From 9d652002c63d03d44083c4410881a457a9390e2f Mon Sep 17 00:00:00 2001 From: Cirno the Strongest <1447794+CirnoT@users.noreply.github.com> Date: Sun, 31 May 2020 00:58:55 +0200 Subject: [PATCH] Fix language stat calculation (#11692) * Fix language stat calculation * Group languages and ignore 0 size files * remove unneeded code --- models/repo_language_stats.go | 35 +-------------------------- modules/git/repo_language_stats.go | 31 +++++++++++++++++++++--- modules/indexer/stats/indexer_test.go | 4 +-- 3 files changed, 29 insertions(+), 41 deletions(-) diff --git a/models/repo_language_stats.go b/models/repo_language_stats.go index d08782eaf..a15063e25 100644 --- a/models/repo_language_stats.go +++ b/models/repo_language_stats.go @@ -26,22 +26,6 @@ type LanguageStat struct { CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"` } -// specialLanguages defines list of languages that are excluded from the calculation -// unless they are the only language present in repository. Only languages which under -// normal circumstances are not considered to be code should be listed here. -var specialLanguages = map[string]struct{}{ - "XML": {}, - "JSON": {}, - "TOML": {}, - "YAML": {}, - "INI": {}, - "SQL": {}, - "SVG": {}, - "Text": {}, - "Markdown": {}, - "other": {}, -} - // LanguageStatList defines a list of language statistics type LanguageStatList []*LanguageStat @@ -55,27 +39,12 @@ func (stats LanguageStatList) getLanguagePercentages() map[string]float32 { langPerc := make(map[string]float32) var otherPerc float32 = 100 var total int64 - // Check that repository has at least one non-special language - var skipSpecial bool - for _, stat := range stats { - if _, ok := specialLanguages[stat.Language]; !ok { - skipSpecial = true - break - } - } + for _, stat := range stats { - // Exclude specific languages from percentage calculation - if _, ok := specialLanguages[stat.Language]; ok && skipSpecial { - continue - } total += stat.Size } if total > 0 { for _, stat := range stats { - // Exclude specific languages from percentage calculation - if _, ok := specialLanguages[stat.Language]; ok && skipSpecial { - continue - } perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10) if perc <= 0.1 { continue @@ -84,8 +53,6 @@ func (stats LanguageStatList) getLanguagePercentages() map[string]float32 { langPerc[stat.Language] = perc } otherPerc = float32(math.Round(float64(otherPerc)*10) / 10) - } else { - otherPerc = 100 } if otherPerc > 0 { langPerc["other"] = otherPerc diff --git a/modules/git/repo_language_stats.go b/modules/git/repo_language_stats.go index d623d6f57..06d7d6aba 100644 --- a/modules/git/repo_language_stats.go +++ b/modules/git/repo_language_stats.go @@ -19,6 +19,20 @@ import ( const fileSizeLimit int64 = 16 * 1024 * 1024 +// specialLanguages defines list of languages that are excluded from the calculation +// unless they are the only language present in repository. Only languages which under +// normal circumstances are not considered to be code should be listed here. +var specialLanguages = []string{ + "XML", + "JSON", + "TOML", + "YAML", + "INI", + "SVG", + "Text", + "Markdown", +} + // GetLanguageStats calculates language stats for git repository at specified commit func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { r, err := git.PlainOpen(repo.Path) @@ -43,7 +57,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err sizes := make(map[string]int64) err = tree.Files().ForEach(func(f *object.File) error { - if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) || + if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) || enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { return nil } @@ -58,7 +72,13 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err language := analyze.GetCodeLanguage(f.Name, content) if language == enry.OtherLanguage || language == "" { - language = "other" + return nil + } + + // group languages, such as Pug -> HTML; SCSS -> CSS + group := enry.GetLanguageGroup(language) + if group != "" { + language = group } sizes[language] += f.Size @@ -69,8 +89,11 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err return nil, err } - if len(sizes) == 0 { - sizes["other"] = 0 + // filter special languages unless they are the only language + if len(sizes) > 1 { + for _, language := range specialLanguages { + delete(sizes, language) + } } return sizes, nil diff --git a/modules/indexer/stats/indexer_test.go b/modules/indexer/stats/indexer_test.go index b60c6d9bb..4bcbaa942 100644 --- a/modules/indexer/stats/indexer_test.go +++ b/modules/indexer/stats/indexer_test.go @@ -39,7 +39,5 @@ func TestRepoStatsIndex(t *testing.T) { assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha) langs, err := repo.GetTopLanguageStats(5) assert.NoError(t, err) - assert.Len(t, langs, 1) - assert.Equal(t, "other", langs[0].Language) - assert.Equal(t, float32(100), langs[0].Percentage) + assert.Empty(t, langs) }