From 7d434376f1504ff64f249b879634ac2eec9440da Mon Sep 17 00:00:00 2001 From: zeripath Date: Wed, 23 Jan 2019 08:56:51 +0000 Subject: [PATCH] Pooled and buffered gzip implementation (#5722) * Pooled and buffered gzip implementation * Add test for gzip * Add integration test * Ensure lfs check within transaction The previous code made it possible for a race condition to occur whereby a LFSMetaObject could be checked into the database twice. We should check if the LFSMetaObject is within the database and insert it if not in one transaction. * Try to avoid primary key problem in postgres The integration tests are being affected by https://github.com/go-testfixtures/testfixtures/issues/39 if we set the primary key high enough, keep a count of this and remove at the end of each test we shouldn't be affected by this. --- integrations/lfs_getobject_test.go | 129 ++++++++++++ integrations/sqlite.ini | 1 + models/lfs.go | 16 +- modules/gzip/gzip.go | 327 +++++++++++++++++++++++++++++ modules/gzip/gzip_test.go | 131 ++++++++++++ routers/routes/routes.go | 4 +- 6 files changed, 598 insertions(+), 10 deletions(-) create mode 100644 integrations/lfs_getobject_test.go create mode 100644 modules/gzip/gzip.go create mode 100644 modules/gzip/gzip_test.go diff --git a/integrations/lfs_getobject_test.go b/integrations/lfs_getobject_test.go new file mode 100644 index 000000000..8f01d712a --- /dev/null +++ b/integrations/lfs_getobject_test.go @@ -0,0 +1,129 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package integrations + +import ( + "archive/zip" + "bytes" + "crypto/sha256" + "encoding/hex" + "io" + "io/ioutil" + "net/http" + "testing" + + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/gzip" + "code.gitea.io/gitea/modules/lfs" + "code.gitea.io/gitea/modules/setting" + "github.com/stretchr/testify/assert" + + gzipp "github.com/klauspost/compress/gzip" +) + +func GenerateLFSOid(content io.Reader) (string, error) { + h := sha256.New() + if _, err := io.Copy(h, content); err != nil { + return "", err + } + sum := h.Sum(nil) + return hex.EncodeToString(sum), nil +} + +var lfsID = int64(20000) + +func storeObjectInRepo(t *testing.T, repositoryID int64, content *[]byte) string { + oid, err := GenerateLFSOid(bytes.NewReader(*content)) + assert.NoError(t, err) + var lfsMetaObject *models.LFSMetaObject + + if setting.UsePostgreSQL { + lfsMetaObject = &models.LFSMetaObject{ID: lfsID, Oid: oid, Size: int64(len(*content)), RepositoryID: repositoryID} + } else { + lfsMetaObject = &models.LFSMetaObject{Oid: oid, Size: int64(len(*content)), RepositoryID: repositoryID} + } + + lfsID = lfsID + 1 + lfsMetaObject, err = models.NewLFSMetaObject(lfsMetaObject) + assert.NoError(t, err) + contentStore := &lfs.ContentStore{BasePath: setting.LFS.ContentPath} + if !contentStore.Exists(lfsMetaObject) { + err := contentStore.Put(lfsMetaObject, bytes.NewReader(*content)) + assert.NoError(t, err) + } + return oid +} + +func doLfs(t *testing.T, content *[]byte, expectGzip bool) { + prepareTestEnv(t) + repo, err := models.GetRepositoryByOwnerAndName("user2", "repo1") + assert.NoError(t, err) + oid := storeObjectInRepo(t, repo.ID, content) + defer repo.RemoveLFSMetaObjectByOid(oid) + + session := loginUser(t, "user2") + + // Request OID + req := NewRequest(t, "GET", "/user2/repo1.git/info/lfs/objects/"+oid+"/test") + req.Header.Set("Accept-Encoding", "gzip") + resp := session.MakeRequest(t, req, http.StatusOK) + + contentEncoding := resp.Header().Get("Content-Encoding") + if !expectGzip || !setting.EnableGzip { + assert.NotContains(t, contentEncoding, "gzip") + + result := resp.Body.Bytes() + assert.Equal(t, *content, result) + } else { + assert.Contains(t, contentEncoding, "gzip") + gzippReader, err := gzipp.NewReader(resp.Body) + assert.NoError(t, err) + result, err := ioutil.ReadAll(gzippReader) + assert.NoError(t, err) + assert.Equal(t, *content, result) + } + +} + +func TestGetLFSSmall(t *testing.T) { + content := []byte("A very small file\n") + doLfs(t, &content, false) +} + +func TestGetLFSLarge(t *testing.T) { + content := make([]byte, gzip.MinSize*10) + for i := range content { + content[i] = byte(i % 256) + } + doLfs(t, &content, true) +} + +func TestGetLFSGzip(t *testing.T) { + b := make([]byte, gzip.MinSize*10) + for i := range b { + b[i] = byte(i % 256) + } + outputBuffer := bytes.NewBuffer([]byte{}) + gzippWriter := gzipp.NewWriter(outputBuffer) + gzippWriter.Write(b) + gzippWriter.Close() + content := outputBuffer.Bytes() + doLfs(t, &content, false) +} + +func TestGetLFSZip(t *testing.T) { + b := make([]byte, gzip.MinSize*10) + for i := range b { + b[i] = byte(i % 256) + } + outputBuffer := bytes.NewBuffer([]byte{}) + zipWriter := zip.NewWriter(outputBuffer) + fileWriter, err := zipWriter.Create("default") + assert.NoError(t, err) + fileWriter.Write(b) + zipWriter.Close() + content := outputBuffer.Bytes() + doLfs(t, &content, false) +} diff --git a/integrations/sqlite.ini b/integrations/sqlite.ini index 857cfe461..33c1015ec 100644 --- a/integrations/sqlite.ini +++ b/integrations/sqlite.ini @@ -30,6 +30,7 @@ LFS_CONTENT_PATH = data/lfs-sqlite OFFLINE_MODE = false LFS_JWT_SECRET = Tv_MjmZuHqpIY6GFl12ebgkRAMt4RlWt0v4EHKSXO0w APP_DATA_PATH = integrations/gitea-integration-sqlite/data +ENABLE_GZIP = true [mailer] ENABLED = false diff --git a/models/lfs.go b/models/lfs.go index 711e5b049..39b0b2dd6 100644 --- a/models/lfs.go +++ b/models/lfs.go @@ -44,20 +44,20 @@ const ( func NewLFSMetaObject(m *LFSMetaObject) (*LFSMetaObject, error) { var err error - has, err := x.Get(m) + sess := x.NewSession() + defer sess.Close() + if err = sess.Begin(); err != nil { + return nil, err + } + + has, err := sess.Get(m) if err != nil { return nil, err } if has { m.Existing = true - return m, nil - } - - sess := x.NewSession() - defer sess.Close() - if err = sess.Begin(); err != nil { - return nil, err + return m, sess.Commit() } if _, err = sess.Insert(m); err != nil { diff --git a/modules/gzip/gzip.go b/modules/gzip/gzip.go new file mode 100644 index 000000000..4a4a797c7 --- /dev/null +++ b/modules/gzip/gzip.go @@ -0,0 +1,327 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package gzip + +import ( + "bufio" + "fmt" + "io" + "net" + "net/http" + "regexp" + "strconv" + "strings" + "sync" + + "github.com/klauspost/compress/gzip" + "gopkg.in/macaron.v1" +) + +const ( + acceptEncodingHeader = "Accept-Encoding" + contentEncodingHeader = "Content-Encoding" + contentLengthHeader = "Content-Length" + contentTypeHeader = "Content-Type" + rangeHeader = "Range" + varyHeader = "Vary" +) + +const ( + // MinSize is the minimum size of content we will compress + MinSize = 1400 +) + +// noopClosers are io.Writers with a shim to prevent early closure +type noopCloser struct { + io.Writer +} + +func (noopCloser) Close() error { return nil } + +// WriterPool is a gzip writer pool to reduce workload on creation of +// gzip writers +type WriterPool struct { + pool sync.Pool + compressionLevel int +} + +// NewWriterPool creates a new pool +func NewWriterPool(compressionLevel int) *WriterPool { + return &WriterPool{pool: sync.Pool{ + // New will return nil, we'll manage the creation of new + // writers in the middleware + New: func() interface{} { return nil }, + }, + compressionLevel: compressionLevel} +} + +// Get a writer from the pool - or create one if not available +func (wp *WriterPool) Get(rw macaron.ResponseWriter) *gzip.Writer { + ret := wp.pool.Get() + if ret == nil { + ret, _ = gzip.NewWriterLevel(rw, wp.compressionLevel) + } else { + ret.(*gzip.Writer).Reset(rw) + } + return ret.(*gzip.Writer) +} + +// Put returns a writer to the pool +func (wp *WriterPool) Put(w *gzip.Writer) { + wp.pool.Put(w) +} + +var writerPool WriterPool +var regex regexp.Regexp + +// Options represents the configuration for the gzip middleware +type Options struct { + CompressionLevel int +} + +func validateCompressionLevel(level int) bool { + return level == gzip.DefaultCompression || + level == gzip.ConstantCompression || + (level >= gzip.BestSpeed && level <= gzip.BestCompression) +} + +func validate(options []Options) Options { + // Default to level 4 compression (Best results seem to be between 4 and 6) + opt := Options{CompressionLevel: 4} + if len(options) > 0 { + opt = options[0] + } + if !validateCompressionLevel(opt.CompressionLevel) { + opt.CompressionLevel = 4 + } + return opt +} + +// Middleware creates a macaron.Handler to proxy the response +func Middleware(options ...Options) macaron.Handler { + opt := validate(options) + writerPool = *NewWriterPool(opt.CompressionLevel) + regex := regexp.MustCompile(`bytes=(\d+)\-.*`) + + return func(ctx *macaron.Context) { + // If the client won't accept gzip or x-gzip don't compress + if !strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "gzip") && + !strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "x-gzip") { + return + } + + // If the client is asking for a specific range of bytes - don't compress + if rangeHdr := ctx.Req.Header.Get(rangeHeader); rangeHdr != "" { + + match := regex.FindStringSubmatch(rangeHdr) + if match != nil && len(match) > 1 { + return + } + } + + // OK we should proxy the response writer + // We are still not necessarily going to compress... + proxyWriter := &ProxyResponseWriter{ + ResponseWriter: ctx.Resp, + } + defer proxyWriter.Close() + + ctx.Resp = proxyWriter + ctx.MapTo(proxyWriter, (*http.ResponseWriter)(nil)) + + // Check if render middleware has been registered, + // if yes, we need to modify ResponseWriter for it as well. + if _, ok := ctx.Render.(*macaron.DummyRender); !ok { + ctx.Render.SetResponseWriter(proxyWriter) + } + + ctx.Next() + } +} + +// ProxyResponseWriter is a wrapped macaron ResponseWriter that may compress its contents +type ProxyResponseWriter struct { + writer io.WriteCloser + macaron.ResponseWriter + stopped bool + + code int + buf []byte +} + +// Write appends data to the proxied gzip writer. +func (proxy *ProxyResponseWriter) Write(b []byte) (int, error) { + // if writer is initialized, use the writer + if proxy.writer != nil { + return proxy.writer.Write(b) + } + + proxy.buf = append(proxy.buf, b...) + + var ( + contentLength, _ = strconv.Atoi(proxy.Header().Get(contentLengthHeader)) + contentType = proxy.Header().Get(contentTypeHeader) + contentEncoding = proxy.Header().Get(contentEncodingHeader) + ) + + // OK if an encoding hasn't been chosen, and content length > 1400 + // and content type isn't a compressed type + if contentEncoding == "" && + (contentLength == 0 || contentLength >= MinSize) && + (contentType == "" || !compressedContentType(contentType)) { + // If current buffer is less than the min size and a Content-Length isn't set, then wait + if len(proxy.buf) < MinSize && contentLength == 0 { + return len(b), nil + } + + // If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue. + if contentLength >= MinSize || len(proxy.buf) >= MinSize { + // if we don't know the content type, infer it + if contentType == "" { + contentType = http.DetectContentType(proxy.buf) + proxy.Header().Set(contentTypeHeader, contentType) + } + // If the Content-Type is not compressed - Compress! + if !compressedContentType(contentType) { + if err := proxy.startGzip(); err != nil { + return 0, err + } + return len(b), nil + } + } + } + // If we got here, we should not GZIP this response. + if err := proxy.startPlain(); err != nil { + return 0, err + } + return len(b), nil +} + +func (proxy *ProxyResponseWriter) startGzip() error { + // Set the content-encoding and vary headers. + proxy.Header().Set(contentEncodingHeader, "gzip") + proxy.Header().Set(varyHeader, acceptEncodingHeader) + + // if the Content-Length is already set, then calls to Write on gzip + // will fail to set the Content-Length header since its already set + // See: https://github.com/golang/go/issues/14975. + proxy.Header().Del(contentLengthHeader) + + // Write the header to gzip response. + if proxy.code != 0 { + proxy.ResponseWriter.WriteHeader(proxy.code) + // Ensure that no other WriteHeader's happen + proxy.code = 0 + } + + // Initialize and flush the buffer into the gzip response if there are any bytes. + // If there aren't any, we shouldn't initialize it yet because on Close it will + // write the gzip header even if nothing was ever written. + if len(proxy.buf) > 0 { + // Initialize the GZIP response. + proxy.writer = writerPool.Get(proxy.ResponseWriter) + + return proxy.writeBuf() + } + return nil +} + +func (proxy *ProxyResponseWriter) startPlain() error { + if proxy.code != 0 { + proxy.ResponseWriter.WriteHeader(proxy.code) + proxy.code = 0 + } + proxy.stopped = true + proxy.writer = noopCloser{proxy.ResponseWriter} + return proxy.writeBuf() +} + +func (proxy *ProxyResponseWriter) writeBuf() error { + if proxy.buf == nil { + return nil + } + + n, err := proxy.writer.Write(proxy.buf) + + // This should never happen (per io.Writer docs), but if the write didn't + // accept the entire buffer but returned no specific error, we have no clue + // what's going on, so abort just to be safe. + if err == nil && n < len(proxy.buf) { + err = io.ErrShortWrite + } + proxy.buf = nil + return err +} + +// WriteHeader will ensure that we have setup the writer before we write the header +func (proxy *ProxyResponseWriter) WriteHeader(code int) { + if proxy.code == 0 { + proxy.code = code + } +} + +// Close the writer +func (proxy *ProxyResponseWriter) Close() error { + if proxy.stopped { + return nil + } + + if proxy.writer == nil { + err := proxy.startPlain() + + if err != nil { + err = fmt.Errorf("GzipMiddleware: write to regular responseWriter at close gets error: %q", err.Error()) + } + } + + err := proxy.writer.Close() + + if poolWriter, ok := proxy.writer.(*gzip.Writer); ok { + writerPool.Put(poolWriter) + } + + proxy.writer = nil + proxy.stopped = true + return err +} + +// Flush the writer +func (proxy *ProxyResponseWriter) Flush() { + if proxy.writer == nil { + return + } + + if gw, ok := proxy.writer.(*gzip.Writer); ok { + gw.Flush() + } + + proxy.ResponseWriter.Flush() +} + +// Hijack implements http.Hijacker. If the underlying ResponseWriter is a +// Hijacker, its Hijack method is returned. Otherwise an error is returned. +func (proxy *ProxyResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) { + hijacker, ok := proxy.ResponseWriter.(http.Hijacker) + if !ok { + return nil, nil, fmt.Errorf("the ResponseWriter doesn't support the Hijacker interface") + } + return hijacker.Hijack() +} + +// verify Hijacker interface implementation +var _ http.Hijacker = &ProxyResponseWriter{} + +func compressedContentType(contentType string) bool { + switch contentType { + case "application/zip": + return true + case "application/x-gzip": + return true + case "application/gzip": + return true + default: + return false + } +} diff --git a/modules/gzip/gzip_test.go b/modules/gzip/gzip_test.go new file mode 100644 index 000000000..d131e240a --- /dev/null +++ b/modules/gzip/gzip_test.go @@ -0,0 +1,131 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package gzip + +import ( + "archive/zip" + "bytes" + "io/ioutil" + "net/http" + "net/http/httptest" + "testing" + + gzipp "github.com/klauspost/compress/gzip" + "github.com/stretchr/testify/assert" + macaron "gopkg.in/macaron.v1" +) + +func setup(sampleResponse []byte) (*macaron.Macaron, *[]byte) { + m := macaron.New() + m.Use(Middleware()) + m.Get("/", func() *[]byte { return &sampleResponse }) + return m, &sampleResponse +} + +func reqNoAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte) { + // Request without accept gzip: Should not gzip + resp := httptest.NewRecorder() + req, err := http.NewRequest("GET", "/", nil) + assert.NoError(t, err) + m.ServeHTTP(resp, req) + + _, ok := resp.HeaderMap[contentEncodingHeader] + assert.False(t, ok) + + contentEncoding := resp.Header().Get(contentEncodingHeader) + assert.NotContains(t, contentEncoding, "gzip") + + result := resp.Body.Bytes() + assert.Equal(t, *sampleResponse, result) +} + +func reqAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte, expectGzip bool) { + // Request without accept gzip: Should not gzip + resp := httptest.NewRecorder() + req, err := http.NewRequest("GET", "/", nil) + assert.NoError(t, err) + req.Header.Set(acceptEncodingHeader, "gzip") + m.ServeHTTP(resp, req) + + _, ok := resp.HeaderMap[contentEncodingHeader] + assert.Equal(t, ok, expectGzip) + + contentEncoding := resp.Header().Get(contentEncodingHeader) + if expectGzip { + assert.Contains(t, contentEncoding, "gzip") + gzippReader, err := gzipp.NewReader(resp.Body) + assert.NoError(t, err) + result, err := ioutil.ReadAll(gzippReader) + assert.NoError(t, err) + assert.Equal(t, *sampleResponse, result) + } else { + assert.NotContains(t, contentEncoding, "gzip") + result := resp.Body.Bytes() + assert.Equal(t, *sampleResponse, result) + } +} + +func TestMiddlewareSmall(t *testing.T) { + m, sampleResponse := setup([]byte("Small response")) + + reqNoAcceptGzip(t, m, sampleResponse) + + reqAcceptGzip(t, m, sampleResponse, false) +} + +func TestMiddlewareLarge(t *testing.T) { + b := make([]byte, MinSize+1) + for i := range b { + b[i] = byte(i % 256) + } + m, sampleResponse := setup(b) + + reqNoAcceptGzip(t, m, sampleResponse) + + // This should be gzipped as we accept gzip + reqAcceptGzip(t, m, sampleResponse, true) +} + +func TestMiddlewareGzip(t *testing.T) { + b := make([]byte, MinSize*10) + for i := range b { + b[i] = byte(i % 256) + } + outputBuffer := bytes.NewBuffer([]byte{}) + gzippWriter := gzipp.NewWriter(outputBuffer) + gzippWriter.Write(b) + gzippWriter.Flush() + gzippWriter.Close() + output := outputBuffer.Bytes() + + m, sampleResponse := setup(output) + + reqNoAcceptGzip(t, m, sampleResponse) + + // This should not be gzipped even though we accept gzip + reqAcceptGzip(t, m, sampleResponse, false) +} + +func TestMiddlewareZip(t *testing.T) { + b := make([]byte, MinSize*10) + for i := range b { + b[i] = byte(i % 256) + } + outputBuffer := bytes.NewBuffer([]byte{}) + zipWriter := zip.NewWriter(outputBuffer) + fileWriter, err := zipWriter.Create("default") + assert.NoError(t, err) + fileWriter.Write(b) + //fileWriter.Close() + zipWriter.Close() + output := outputBuffer.Bytes() + + m, sampleResponse := setup(output) + + reqNoAcceptGzip(t, m, sampleResponse) + + // This should not be gzipped even though we accept gzip + reqAcceptGzip(t, m, sampleResponse, false) +} diff --git a/routers/routes/routes.go b/routers/routes/routes.go index a3bf3f753..c012d5c3c 100644 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -14,6 +14,7 @@ import ( "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/auth" "code.gitea.io/gitea/modules/context" + "code.gitea.io/gitea/modules/gzip" "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/metrics" @@ -36,7 +37,6 @@ import ( "github.com/go-macaron/cache" "github.com/go-macaron/captcha" "github.com/go-macaron/csrf" - "github.com/go-macaron/gzip" "github.com/go-macaron/i18n" "github.com/go-macaron/session" "github.com/go-macaron/toolbox" @@ -54,7 +54,7 @@ func NewMacaron() *macaron.Macaron { } m.Use(macaron.Recovery()) if setting.EnableGzip { - m.Use(gzip.Gziper()) + m.Use(gzip.Middleware()) } if setting.Protocol == setting.FCGI { m.SetURLPrefix(setting.AppSubURL)