// Copyright 2020 The Gitea Authors. // All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. package archiver import ( "io" "io/ioutil" "os" "path" "regexp" "strings" "sync" "time" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/util" ) // ArchiveRequest defines the parameters of an archive request, which notably // includes the specific repository being archived as well as the commit, the // name by which it was requested, and the kind of archive being requested. // This is entirely opaque to external entities, though, and mostly used as a // handle elsewhere. type ArchiveRequest struct { uri string repo *git.Repository refName string ext string archivePath string archiveType git.ArchiveType archiveComplete bool commit *git.Commit cchan chan struct{} } var archiveInProgress []*ArchiveRequest var archiveMutex sync.Mutex // SHA1 hashes will only go up to 40 characters, but SHA256 hashes will go all // the way to 64. var shaRegex = regexp.MustCompile(`^[0-9a-f]{4,64}$`) // These facilitate testing, by allowing the unit tests to control (to some extent) // the goroutine used for processing the queue. var archiveQueueMutex *sync.Mutex var archiveQueueStartCond *sync.Cond var archiveQueueReleaseCond *sync.Cond // GetArchivePath returns the path from which we can serve this archive. func (aReq *ArchiveRequest) GetArchivePath() string { return aReq.archivePath } // GetArchiveName returns the name of the caller, based on the ref used by the // caller to create this request. func (aReq *ArchiveRequest) GetArchiveName() string { return aReq.refName + aReq.ext } // IsComplete returns the completion status of this request. func (aReq *ArchiveRequest) IsComplete() bool { return aReq.archiveComplete } // WaitForCompletion will wait for this request to complete, with no timeout. // It returns whether the archive was actually completed, as the channel could // have also been closed due to an error. func (aReq *ArchiveRequest) WaitForCompletion(ctx *context.Context) bool { select { case <-aReq.cchan: case <-ctx.Req.Context().Done(): } return aReq.IsComplete() } // TimedWaitForCompletion will wait for this request to complete, with timeout // happening after the specified Duration. It returns whether the archive is // now complete and whether we hit the timeout or not. The latter may not be // useful if the request is complete or we started to shutdown. func (aReq *ArchiveRequest) TimedWaitForCompletion(ctx *context.Context, dur time.Duration) (bool, bool) { timeout := false select { case <-time.After(dur): timeout = true case <-aReq.cchan: case <-ctx.Req.Context().Done(): } return aReq.IsComplete(), timeout } // The caller must hold the archiveMutex across calls to getArchiveRequest. func getArchiveRequest(repo *git.Repository, commit *git.Commit, archiveType git.ArchiveType) *ArchiveRequest { for _, r := range archiveInProgress { // Need to be referring to the same repository. if r.repo.Path == repo.Path && r.commit.ID == commit.ID && r.archiveType == archiveType { return r } } return nil } // DeriveRequestFrom creates an archival request, based on the URI. The // resulting ArchiveRequest is suitable for being passed to ArchiveRepository() // if it's determined that the request still needs to be satisfied. func DeriveRequestFrom(ctx *context.Context, uri string) *ArchiveRequest { if ctx.Repo == nil || ctx.Repo.GitRepo == nil { log.Trace("Repo not initialized") return nil } r := &ArchiveRequest{ uri: uri, repo: ctx.Repo.GitRepo, } switch { case strings.HasSuffix(uri, ".zip"): r.ext = ".zip" r.archivePath = path.Join(r.repo.Path, "archives/zip") r.archiveType = git.ZIP case strings.HasSuffix(uri, ".tar.gz"): r.ext = ".tar.gz" r.archivePath = path.Join(r.repo.Path, "archives/targz") r.archiveType = git.TARGZ default: log.Trace("Unknown format: %s", uri) return nil } r.refName = strings.TrimSuffix(r.uri, r.ext) isDir, err := util.IsDir(r.archivePath) if err != nil { ctx.ServerError("Download -> util.IsDir(archivePath)", err) return nil } if !isDir { if err := os.MkdirAll(r.archivePath, os.ModePerm); err != nil { ctx.ServerError("Download -> os.MkdirAll(archivePath)", err) return nil } } // Get corresponding commit. if r.repo.IsBranchExist(r.refName) { r.commit, err = r.repo.GetBranchCommit(r.refName) if err != nil { ctx.ServerError("GetBranchCommit", err) return nil } } else if r.repo.IsTagExist(r.refName) { r.commit, err = r.repo.GetTagCommit(r.refName) if err != nil { ctx.ServerError("GetTagCommit", err) return nil } } else if shaRegex.MatchString(r.refName) { r.commit, err = r.repo.GetCommit(r.refName) if err != nil { ctx.NotFound("GetCommit", nil) return nil } } else { ctx.NotFound("DeriveRequestFrom", nil) return nil } archiveMutex.Lock() defer archiveMutex.Unlock() if rExisting := getArchiveRequest(r.repo, r.commit, r.archiveType); rExisting != nil { return rExisting } r.archivePath = path.Join(r.archivePath, base.ShortSha(r.commit.ID.String())+r.ext) r.archiveComplete, err = util.IsFile(r.archivePath) if err != nil { ctx.ServerError("util.IsFile", err) return nil } return r } func doArchive(r *ArchiveRequest) { var ( err error tmpArchive *os.File destArchive *os.File ) // Close the channel to indicate to potential waiters that this request // has finished. defer close(r.cchan) // It could have happened that we enqueued two archival requests, due to // race conditions and difficulties in locking. Do one last check that // the archive we're referring to doesn't already exist. If it does exist, // then just mark the request as complete and move on. isFile, err := util.IsFile(r.archivePath) if err != nil { log.Error("Unable to check if %s util.IsFile: %v. Will ignore and recreate.", r.archivePath, err) } if isFile { r.archiveComplete = true return } // Create a temporary file to use while the archive is being built. We // will then copy it into place (r.archivePath) once it's fully // constructed. tmpArchive, err = ioutil.TempFile("", "archive") if err != nil { log.Error("Unable to create a temporary archive file! Error: %v", err) return } defer func() { tmpArchive.Close() os.Remove(tmpArchive.Name()) }() if err = r.commit.CreateArchive(graceful.GetManager().ShutdownContext(), tmpArchive.Name(), git.CreateArchiveOpts{ Format: r.archiveType, Prefix: setting.Repository.PrefixArchiveFiles, }); err != nil { log.Error("Download -> CreateArchive "+tmpArchive.Name(), err) return } // Now we copy it into place if destArchive, err = os.Create(r.archivePath); err != nil { log.Error("Unable to open archive " + r.archivePath) return } _, err = io.Copy(destArchive, tmpArchive) destArchive.Close() if err != nil { log.Error("Unable to write archive " + r.archivePath) return } // Block any attempt to finalize creating a new request if we're marking r.archiveComplete = true } // ArchiveRepository satisfies the ArchiveRequest being passed in. Processing // will occur in a separate goroutine, as this phase may take a while to // complete. If the archive already exists, ArchiveRepository will not do // anything. In all cases, the caller should be examining the *ArchiveRequest // being returned for completion, as it may be different than the one they passed // in. func ArchiveRepository(request *ArchiveRequest) *ArchiveRequest { // We'll return the request that's already been enqueued if it has been // enqueued, or we'll immediately enqueue it if it has not been enqueued // and it is not marked complete. archiveMutex.Lock() defer archiveMutex.Unlock() if rExisting := getArchiveRequest(request.repo, request.commit, request.archiveType); rExisting != nil { return rExisting } if request.archiveComplete { return request } request.cchan = make(chan struct{}) archiveInProgress = append(archiveInProgress, request) go func() { // Wait to start, if we have the Cond for it. This is currently only // useful for testing, so that the start and release of queued entries // can be controlled to examine the queue. if archiveQueueStartCond != nil { archiveQueueMutex.Lock() archiveQueueStartCond.Wait() archiveQueueMutex.Unlock() } // Drop the mutex while we process the request. This may take a long // time, and it's not necessary now that we've added the reequest to // archiveInProgress. doArchive(request) if archiveQueueReleaseCond != nil { archiveQueueMutex.Lock() archiveQueueReleaseCond.Wait() archiveQueueMutex.Unlock() } // Purge this request from the list. To do so, we'll just take the // index at which we ended up at and swap the final element into that // position, then chop off the now-redundant final element. The slice // may have change in between these two segments and we may have moved, // so we search for it here. We could perhaps avoid this search // entirely if len(archiveInProgress) == 1, but we should verify // correctness. archiveMutex.Lock() defer archiveMutex.Unlock() idx := -1 for _idx, req := range archiveInProgress { if req == request { idx = _idx break } } if idx == -1 { log.Error("ArchiveRepository: Failed to find request for removal.") return } archiveInProgress = append(archiveInProgress[:idx], archiveInProgress[idx+1:]...) }() return request }