You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
majority-judgment-cli/reader/profiles_csv.go

139 lines
3.7 KiB

package reader
import (
"encoding/csv"
"errors"
"fmt"
"github.com/csimplestring/go-csv/detector"
"io"
"strings"
)
// ProfilesCsvReader reads a poll's tally in a CSV like so:
// Nutriscore, G, F, E, D, C, B, A
// Pizza, 4, 2, 3, 4, 5, 4, 1
// Chips, 5, 3, 2, 4, 4, 3, 2
// Pasta, 4, 4, 2, 4, 4, 3, 2
type ProfilesCsvReader struct{}
// Read the input CSV and return as much data as we can.
// Read does not fill the `judgments` because this data is absent from the profiles.
func (r ProfilesCsvReader) Read(input *io.Reader) (
judgments [][]int,
tallies [][]float64,
proposals []string,
grades []string,
err error,
) {
csvDelimiter := ' ' // default value if our detector below fails
csvQuote := '"'
// I. Read the whole input at once. Tried stream reading with io.Pipe but… buffer!
allDataBytes, _ := io.ReadAll(*input)
allData := sanitizeInput(string(allDataBytes))
inputReaderForMeta := strings.NewReader(allData)
inputReaderForData := strings.NewReader(allData)
// I.a Detect the delimiter between values in the input
delimiterDetector := detector.New()
delimiters := delimiterDetector.DetectDelimiter(inputReaderForMeta, byte(csvQuote))
if 0 < len(delimiters) {
csvDelimiter = readFirstRune(delimiters[0])
}
if 1 < len(delimiters) {
err = fmt.Errorf("too many delimiters: found `%s` and `%s`", delimiters[0], delimiters[1])
return
}
// I.b Read the actual CSV contents
csvReader := csv.NewReader(inputReaderForData)
csvReader.Comma = csvDelimiter
csvRows, errReader := csvReader.ReadAll()
if errReader != nil {
err = errors.New("Failed to read input CSV: " + errReader.Error())
return
}
// II. Detect the shape/structure of the input file
hasGradesNamesRow, hasProposalNamesColumn := r.detectShape(csvRows)
// III. Read the tallies, proposals, grades
for rowIndex, row := range csvRows {
rowLen := len(row)
if rowLen < 2 {
continue
}
// III.a Read the grades names on the first row, or generate some if missing
if 0 == rowIndex {
if hasGradesNamesRow {
grades = ReadNamesRow(row[:], hasProposalNamesColumn)
} else {
var errGradesGen error
if hasProposalNamesColumn {
grades, errGradesGen = GenerateDummyGradeNames(rowLen - 1)
} else {
grades, errGradesGen = GenerateDummyGradeNames(rowLen)
}
if nil != errGradesGen {
err = errors.New("Failed to generate default grades names: " + errGradesGen.Error())
return
}
}
}
if rowIndex > 0 || !hasGradesNamesRow {
// III.b Read the proposals' names
if hasProposalNamesColumn {
proposals = append(proposals, strings.TrimSpace(row[0]))
} else {
j := len(proposals)
proposals = append(proposals, "Proposal "+alphabet[j:j+1])
}
// III.c Read the actual tallies
proposalTallyOfFloats, tallyErr := ReadTallyRow(row, hasProposalNamesColumn)
if nil != tallyErr {
err = errors.New("Failed to read input tally: " + tallyErr.Error())
return
}
tallies = append(tallies, proposalTallyOfFloats)
}
}
return
}
// detectShape gathers metadata about the CSV structure
func (r ProfilesCsvReader) detectShape(rows [][]string) (hasGradesNamesRow bool, hasProposalNamesColumn bool) {
hasGradesNamesRow = false
hasProposalNamesColumn = false
for rowIndex, row := range rows {
if rowIndex == 0 {
for i := len(row) - 1; i >= 1; i-- {
if "" == strings.TrimSpace(row[i]) {
continue
}
_, errDetection := ReadNumber(row[i])
if errDetection != nil {
hasGradesNamesRow = true
break
}
}
}
if !hasGradesNamesRow || 0 != rowIndex {
if "" == strings.TrimSpace(row[0]) {
continue
}
_, errDetection := ReadNumber(row[0])
if errDetection != nil {
hasProposalNamesColumn = true
}
}
}
return
}