diff --git a/io/transforms/gsheet2csv/LICENSE b/io/transforms/gsheet2csv/LICENSE new file mode 100644 index 0000000..e55db06 --- /dev/null +++ b/io/transforms/gsheet2csv/LICENSE @@ -0,0 +1,7 @@ +Authored in 2025 by AJ ONeal +To the extent possible under law, the author(s) have dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +You should have received a copy of the CC0 Public Domain Dedication along with +this software. If not, see . diff --git a/io/transforms/gsheet2csv/README.md b/io/transforms/gsheet2csv/README.md new file mode 100644 index 0000000..1796636 --- /dev/null +++ b/io/transforms/gsheet2csv/README.md @@ -0,0 +1,101 @@ +# gsheet2csv + +[![Go Reference](https://pkg.go.dev/badge/github.com/therootcompany/golib/io/transform/gsheet2csv.svg)](https://pkg.go.dev/github.com/therootcompany/golib/io/transform/gsheet2csv) + +A simple wrapper around `encoding/csv` to read Google Sheet CSVs from URL, or a given Reader. + +This does surprisingly little - you should probably just handle the boilerplate yourself. However, these are the problems it solves for us: + +- works with Google Sheet URLs, regardless of URL format + - Edit URL: + - Share URL (Sheet 1): + - CSV Export URL: + - anything with a path like `/spreadsheets/d/{docid}/` and (optionally) a hash or query param like `gid={gid}` +- can ignore quoted comments (if all other fields in the row are empty) +- can preserve comments +- swaps `\r` (Windows) for `\n` (Unix) and ensures trailing newline (a la `encoding/csv`) + +## Usage + +Same as `encoding/csv` (embedded), but with two extra options: + +```go +package main + +import ( + "fmt" + "os" + + "github.com/therootcompany/golib/io/transform/gsheet2csv" +) + +func main() { + switch len(os.Args) { + case 2: + break + case 1: + fmt.Fprintf(os.Stderr, "Usage: %s \n", os.Args[0]) + os.Exit(1) + } + url := os.Args[1] + + gsr := gsheet2csv.NewReaderFromURL(url) + records, err := gsr.ReadAll() + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading from %s\n", gsr.URL) + os.Exit(1) + } + + // distinguishes between comments and quoted fields + csvw := gsheet2csv.NewWriter(os.Stdout) + csvw.Comment = gsr.Comment + if err := csvw.WriteAll(records); err != nil { + fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err) + os.Exit(1) + } +} +``` + +## CLI + +There are two convenience utilities: + +- `gsheet2csv` +- `gsheet2tsv` + +They're only slightly different from a direct export of a Google CSV in that they reformat comments and newlines. + +### Flags & Options + +```text +--raw download without processing +--print-ids print ids to stdout without download +--print-url print url to stdout without downloading +-o write records to file +-d field delimiter +--comment '#' treat lines starting with # as comments +--crlf use CRLF (\r\n) as record separator +``` + +### Installation + +```sh +go get github.com/therootcompany/golib/io/transform/gsheet2csv +``` + +### ASCII Delimiters + +``` +, comma +\t tab (or a normal tab) + space (just a normal space) +: colon +; semicolon +| pipe +^_ unit separator +^^ record separator +^] group separator +^\ file separator +\f form feed (also ^L) +\v vertical tab (also ^K) +``` diff --git a/io/transforms/gsheet2csv/cmd/gsheet2csv/main.go b/io/transforms/gsheet2csv/cmd/gsheet2csv/main.go new file mode 100644 index 0000000..be51af9 --- /dev/null +++ b/io/transforms/gsheet2csv/cmd/gsheet2csv/main.go @@ -0,0 +1,176 @@ +package main + +import ( + "errors" + "flag" + "fmt" + "io" + "os" + "strings" + "unicode/utf8" + + "github.com/therootcompany/golib/io/transform/gsheet2csv" +) + +const ( + fileSeparator = "\x1c" + groupSeparator = "\x1d" + recordSeparator = "\x1e" + unitSeparator = "\x1f" +) + +func main() { + var commentArg string + format := "CSV" + delim := ',' + if strings.Contains(os.Args[0], "tsv") { + delim = '\t' + format = "TSV" + } + + // Parse command-line flags + flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments") + outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)") + delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)") + useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator") + urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL") + parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)") + rawOnly := flag.Bool("raw", false, "don't parse, just download") + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: %s [flags] \n", os.Args[0]) + fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format) + fmt.Fprintf(os.Stderr, "Flags:\n") + flag.PrintDefaults() + fmt.Fprintf(os.Stderr, "\nExample:\n") + fmt.Fprintf(os.Stderr, " %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0]) + fmt.Fprintf(os.Stderr, " %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0]) + fmt.Fprintf(os.Stderr, " %s -o output.tsv './gsheet.csv'\n", os.Args[0]) + } + flag.Parse() + + // Check for URL argument + if len(flag.Args()) != 1 { + fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n") + flag.Usage() + os.Exit(1) + } + url := flag.Args()[0] + + // Prepare output writer + var out *os.File + if *outputFile != "" { + var err error + out, err = os.Create(*outputFile) + if err != nil { + fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err) + os.Exit(1) + } + defer func() { _ = out.Close() }() + } else { + out = os.Stdout + } + + switch *delimString { + case "^_", "\\x1f": + *delimString = unitSeparator + case "^^", "\\x1e": + *delimString = recordSeparator + case "^]", "\\x1d": + *delimString = groupSeparator + case "^\\", "\\x1c": + *delimString = fileSeparator + case "^L", "\\f": + *delimString = "\f" + case "^K", "\\v": + *delimString = "\v" + case "^I", "\\t": + *delimString = "\t" + } + delim, _ = utf8.DecodeRuneInString(*delimString) + + var rc io.ReadCloser + if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") { + docid, gid := gsheet2csv.ParseIDs(url) + if *parseOnly { + fmt.Printf("docid=%s\ngid=%s\n", docid, gid) + } else { + fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid) + } + + sheetURL := gsheet2csv.ToCSVURL(docid, gid) + if *urlOnly { + fmt.Printf("%s\n", sheetURL) + } else { + fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL) + } + + if !*urlOnly { + resp, err := gsheet2csv.GetSheet(docid, gid) + if err != nil { + fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err) + os.Exit(1) + } + defer func() { _ = resp.Body.Close() }() + rc = resp.Body + } + } else { + url = strings.TrimPrefix(url, "file://") + fmt.Fprintf(os.Stderr, "opening %s\n", url) + f, err := os.Open(url) + if err != nil { + fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err) + os.Exit(1) + } + rc = f + } + fmt.Fprintf(os.Stderr, "\n") + + if *urlOnly || *parseOnly { + os.Exit(0) + return + } + + if *rawOnly { + if _, err := io.Copy(out, rc); err != nil { + fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err) + os.Exit(1) + } + return + } + + comment, _ := utf8.DecodeRuneInString(commentArg) + + // Create a reader for the Google Sheet + gsr := gsheet2csv.NewReader(rc) + gsr.QuotedComments = false + gsr.Comment = 0 + gsr.ReuseRecord = true + + // Create CSV writer + csvw := gsheet2csv.NewWriter(out) + csvw.Comma = delim // Set delimiter to tab for TSV + csvw.Comment = comment + csvw.UseCRLF = *useCRLF + for { + // Convert each record + record, err := gsr.Read() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err) + os.Exit(1) + } + + if err := csvw.Write(record); err != nil { + fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err) + os.Exit(1) + } + } + // Flush the writer to ensure all data is written + csvw.Flush() + if err := csvw.Error(); err != nil { + fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err) + os.Exit(1) + } +} diff --git a/io/transforms/gsheet2csv/cmd/gsheet2tsv/main.go b/io/transforms/gsheet2csv/cmd/gsheet2tsv/main.go new file mode 100644 index 0000000..be51af9 --- /dev/null +++ b/io/transforms/gsheet2csv/cmd/gsheet2tsv/main.go @@ -0,0 +1,176 @@ +package main + +import ( + "errors" + "flag" + "fmt" + "io" + "os" + "strings" + "unicode/utf8" + + "github.com/therootcompany/golib/io/transform/gsheet2csv" +) + +const ( + fileSeparator = "\x1c" + groupSeparator = "\x1d" + recordSeparator = "\x1e" + unitSeparator = "\x1f" +) + +func main() { + var commentArg string + format := "CSV" + delim := ',' + if strings.Contains(os.Args[0], "tsv") { + delim = '\t' + format = "TSV" + } + + // Parse command-line flags + flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments") + outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)") + delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)") + useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator") + urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL") + parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)") + rawOnly := flag.Bool("raw", false, "don't parse, just download") + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: %s [flags] \n", os.Args[0]) + fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format) + fmt.Fprintf(os.Stderr, "Flags:\n") + flag.PrintDefaults() + fmt.Fprintf(os.Stderr, "\nExample:\n") + fmt.Fprintf(os.Stderr, " %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0]) + fmt.Fprintf(os.Stderr, " %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0]) + fmt.Fprintf(os.Stderr, " %s -o output.tsv './gsheet.csv'\n", os.Args[0]) + } + flag.Parse() + + // Check for URL argument + if len(flag.Args()) != 1 { + fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n") + flag.Usage() + os.Exit(1) + } + url := flag.Args()[0] + + // Prepare output writer + var out *os.File + if *outputFile != "" { + var err error + out, err = os.Create(*outputFile) + if err != nil { + fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err) + os.Exit(1) + } + defer func() { _ = out.Close() }() + } else { + out = os.Stdout + } + + switch *delimString { + case "^_", "\\x1f": + *delimString = unitSeparator + case "^^", "\\x1e": + *delimString = recordSeparator + case "^]", "\\x1d": + *delimString = groupSeparator + case "^\\", "\\x1c": + *delimString = fileSeparator + case "^L", "\\f": + *delimString = "\f" + case "^K", "\\v": + *delimString = "\v" + case "^I", "\\t": + *delimString = "\t" + } + delim, _ = utf8.DecodeRuneInString(*delimString) + + var rc io.ReadCloser + if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") { + docid, gid := gsheet2csv.ParseIDs(url) + if *parseOnly { + fmt.Printf("docid=%s\ngid=%s\n", docid, gid) + } else { + fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid) + } + + sheetURL := gsheet2csv.ToCSVURL(docid, gid) + if *urlOnly { + fmt.Printf("%s\n", sheetURL) + } else { + fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL) + } + + if !*urlOnly { + resp, err := gsheet2csv.GetSheet(docid, gid) + if err != nil { + fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err) + os.Exit(1) + } + defer func() { _ = resp.Body.Close() }() + rc = resp.Body + } + } else { + url = strings.TrimPrefix(url, "file://") + fmt.Fprintf(os.Stderr, "opening %s\n", url) + f, err := os.Open(url) + if err != nil { + fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err) + os.Exit(1) + } + rc = f + } + fmt.Fprintf(os.Stderr, "\n") + + if *urlOnly || *parseOnly { + os.Exit(0) + return + } + + if *rawOnly { + if _, err := io.Copy(out, rc); err != nil { + fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err) + os.Exit(1) + } + return + } + + comment, _ := utf8.DecodeRuneInString(commentArg) + + // Create a reader for the Google Sheet + gsr := gsheet2csv.NewReader(rc) + gsr.QuotedComments = false + gsr.Comment = 0 + gsr.ReuseRecord = true + + // Create CSV writer + csvw := gsheet2csv.NewWriter(out) + csvw.Comma = delim // Set delimiter to tab for TSV + csvw.Comment = comment + csvw.UseCRLF = *useCRLF + for { + // Convert each record + record, err := gsr.Read() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err) + os.Exit(1) + } + + if err := csvw.Write(record); err != nil { + fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err) + os.Exit(1) + } + } + // Flush the writer to ensure all data is written + csvw.Flush() + if err := csvw.Error(); err != nil { + fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err) + os.Exit(1) + } +} diff --git a/io/transforms/gsheet2csv/fixtures/example.go b/io/transforms/gsheet2csv/fixtures/example.go new file mode 100644 index 0000000..6bbc48c --- /dev/null +++ b/io/transforms/gsheet2csv/fixtures/example.go @@ -0,0 +1,33 @@ +package main + +import ( + "fmt" + "os" + + "github.com/therootcompany/golib/io/transform/gsheet2csv" +) + +func main() { + switch len(os.Args) { + case 2: + break + case 1: + fmt.Fprintf(os.Stderr, "Usage: %s \n", os.Args[0]) + os.Exit(1) + } + url := os.Args[1] + + gsr := gsheet2csv.NewReaderFromURL(url) + records, err := gsr.ReadAll() + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading from %s\n", gsr.URL) + os.Exit(1) + } + + csvw := gsheet2csv.NewWriter(os.Stdout) + csvw.Comment = gsr.Comment + if err := csvw.WriteAll(records); err != nil { + fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err) + os.Exit(1) + } +} diff --git a/io/transforms/gsheet2csv/fixtures/gsheet.csv b/io/transforms/gsheet2csv/fixtures/gsheet.csv new file mode 100644 index 0000000..571b4ae --- /dev/null +++ b/io/transforms/gsheet2csv/fixtures/gsheet.csv @@ -0,0 +1,18 @@ +# this is a comment,, +"# this is, well, a quoted comment",, +"# this is a ""super""-quoted comment",, +Key,Value, +Name,55, +Girlfriend's Age,55, +,, +My IQ,55, +,55, +"Key,with,Comma",, +,"Value,with,Comma", +"Quoted ""Key""",Normal Value, +Normal Key,"Quoted ""Value""", +"Quoted ""Key""",, +,"Quoted ""Value""", +x,y,z +"# comment with trailing comma,",, +#1,2,#3 \ No newline at end of file diff --git a/io/transforms/gsheet2csv/go.mod b/io/transforms/gsheet2csv/go.mod new file mode 100644 index 0000000..f57d483 --- /dev/null +++ b/io/transforms/gsheet2csv/go.mod @@ -0,0 +1,3 @@ +module github.com/therootcompany/golib/io/transform/gsheet2csv + +go 1.24.6 diff --git a/io/transforms/gsheet2csv/gsheet2csv.go b/io/transforms/gsheet2csv/gsheet2csv.go new file mode 100644 index 0000000..b5841bc --- /dev/null +++ b/io/transforms/gsheet2csv/gsheet2csv.go @@ -0,0 +1,241 @@ +// Authored in 2025 by AJ ONeal (https://therootcompany.com) +// +// To the extent possible under law, the author(s) have dedicated all copyright +// and related and neighboring rights to this software to the public domain +// worldwide. This software is distributed without any warranty. +// +// You should have received a copy of the CC0 Public Domain Dedication along with +// this software. If not, see . +// +// SPDX-License-Identifier: CC0-1.0 + +package gsheet2csv + +import ( + "encoding/csv" + "errors" + "fmt" + "io" + "net/http" + "strings" + "unicode/utf8" +) + +var ErrHTTPGet = errors.New("did not get 200 OK when downloading from URL") + +// For mocking for tests +var httpGet = http.Get + +type Reader struct { + *csv.Reader + DocID string + GID string + URL string + QuotedComments bool + Comment rune + r io.Reader + resp *http.Response + close bool + err error +} + +func NewReaderFromURL(url string) *Reader { + docid, gid := ParseIDs(url) + + return NewReaderFromIDs(docid, gid) +} + +func NewReaderFromIDs(docid, gid string) *Reader { + resp, err := GetSheet(docid, gid) + if err != nil { + r := NewReader(nil) + r.err = err + return r + } + + r := NewReader(resp.Body) + r.URL = ToCSVURL(docid, gid) + r.DocID = docid + r.GID = gid + r.resp = resp + r.close = true + return r +} + +func ToCSVURL(docid, gid string) string { + return fmt.Sprintf("https://docs.google.com/spreadsheets/d/%s/export?format=csv&usp=sharing&gid=%s", docid, gid) +} + +func GetSheet(docid, gid string) (*http.Response, error) { + downloadURL := ToCSVURL(docid, gid) + + resp, err := httpGet(downloadURL) + if err != nil { + return nil, err + } + + if resp.StatusCode != http.StatusOK { + _ = resp.Body.Close() + return nil, ErrHTTPGet + } + + return resp, nil +} + +func NewReader(r io.Reader) *Reader { + csvr := csv.NewReader(r) + csvr.Comma = ',' + csvr.Comment = 0 // to allow distinguishing between quoted comments and fields + csvr.FieldsPerRecord = 0 // Google Sheets is consistent + csvr.LazyQuotes = false // fields that need quotes use them correctly + csvr.TrimLeadingSpace = false + csvr.ReuseRecord = false + return &Reader{ + Reader: csvr, + QuotedComments: true, + Comment: '#', + r: r, + } +} + +func (r *Reader) Read() ([]string, error) { + if r.err != nil { + return nil, r.err + } + + for { + record, err := r.Reader.Read() + if err != nil { + if r.close { + _ = r.resp.Body.Close() + } + return nil, err + } + + if r.QuotedComments && len(record[0]) > 0 { + runeValue, _ := utf8.DecodeRuneInString(record[0]) + if runeValue == r.Comment { + last := len(record) - 1 + for len(record[last]) == 0 { + last -= 1 + } + if last == 0 { + continue + } + } + } + return record, nil + } +} + +func (r *Reader) ReadAll() ([][]string, error) { + var records [][]string + + for { + record, err := r.Read() + if nil != err { + if errors.Is(err, io.EOF) { + return records, nil + } + return records, err + } + records = append(records, record) + } +} + +func ParseIDs(urlStr string) (docid string, gid string) { + // Find key: look for /spreadsheets/d/{key} + const prefix = "/spreadsheets/d/" + startIdx := strings.Index(urlStr, prefix) + if startIdx == -1 { + return "", gid + } + startIdx += len(prefix) + + // Find end of key (next / or end of string) + endIdx := strings.Index(urlStr[startIdx:], "/") + if endIdx == -1 { + endIdx = len(urlStr) + } else { + endIdx += startIdx + } + + docid = urlStr[startIdx:endIdx] + if docid == "" { + return "", "" + } + + // Find gid: look for gid= and take until #, &, ?, /, or end + gidIdx := strings.Index(urlStr, "gid=") + if gidIdx != -1 { + gidStart := gidIdx + len("gid=") + endChars := "#&?/" + gidEnd := strings.IndexAny(urlStr[gidStart:], endChars) + if gidEnd == -1 { + gid = urlStr[gidStart:] + } else { + gid = urlStr[gidStart : gidStart+gidEnd] + } + } + + if len(gid) == 0 { + gid = "0" + } + return docid, gid +} + +type Writer struct { + *csv.Writer + Comment rune + w io.Writer +} + +func NewWriter(w io.Writer) *Writer { + return &Writer{ + Writer: csv.NewWriter(w), + Comment: '#', + w: w, + } +} + +func (w *Writer) Write(record []string) error { + if len(record) > 1 { + if rv1, _ := utf8.DecodeRuneInString(record[0]); rv1 == w.Comment { + w.Flush() + + lastNonEmpty := len(record) - 1 + for len(record[lastNonEmpty]) == 0 { + lastNonEmpty -= 1 + } + + if lastNonEmpty == 0 { + record = record[:1] + } else { + for i, f := range record { + if i == 0 || strings.Contains(f, `"`) { + f = strings.ReplaceAll(f, `"`, `""`) + record[i] = `"` + f + `"` + } + } + } + + line := strings.Join(record, string(w.Comma)) + if _, err := w.w.Write([]byte(line + "\n")); err != nil { + return err + } + return nil + } + } + + return w.Writer.Write(record) +} + +func (w *Writer) WriteAll(records [][]string) error { + for _, r := range records { + if err := w.Write(r); err != nil { + return err + } + } + w.Flush() + return w.Error() +} diff --git a/io/transforms/gsheet2csv/gsheet2csv_test.go b/io/transforms/gsheet2csv/gsheet2csv_test.go new file mode 100644 index 0000000..d236466 --- /dev/null +++ b/io/transforms/gsheet2csv/gsheet2csv_test.go @@ -0,0 +1,250 @@ +package gsheet2csv + +import ( + "errors" + "io" + "net/http" + "slices" + "strings" + "testing" +) + +// mockHTTPClient allows controlling HTTP responses for testing. +type mockHTTPClient struct { + resp *http.Response + err error +} + +func (m *mockHTTPClient) Get(url string) (*http.Response, error) { + return m.resp, m.err +} + +// sampleCSV mimics the structure of ai-models.csv from the project README. +const sampleCSV = `# Generated by ollama list +"# Sample Quoted Comment, with ""quotes"" itself" +"NAME","ID","SIZE","MODIFIED" +"qwen3-coder:30b","06c1097efce0","18 GB","8 days ago" +"gpt-oss:20b","aa4295ac10c3","13 GB","8 days ago" + +"gpt-oss:latest","aa4295ac10c3","13 GB","7 weeks ago" +` + +// malformedCSV for testing error handling. +const malformedCSV = `# Comment +"NAME","ID","SIZE","MODIFIED +"qwen3-coder:30b","06c1097efce0","18 GB","8 days ago" +` + +// TestParseIDs verifies the ParseIDs function for various URL formats. +func TestParseIDs(t *testing.T) { + tests := []struct { + name string + url string + wantDoc string + wantGid string + }{ + { + name: "Google Sheets Edit / Share URL with gid", + url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238", + wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34", + wantGid: "559037238", + }, + { + name: "Google Sheets CSV URL with gid", + url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/export?format=csv&usp=sharing&gid=559037238", + wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34", + wantGid: "559037238", + }, + { + name: "URL without gid", + url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit", + wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34", + wantGid: "0", + }, + { + name: "Invalid URL", + url: "https://example.com/invalid", + wantDoc: "", + wantGid: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotDoc, gotGid := ParseIDs(tt.url) + if gotDoc != tt.wantDoc { + t.Errorf("ParseIDs() docid = %q, want %q", gotDoc, tt.wantDoc) + } + if gotGid != tt.wantGid { + t.Errorf("ParseIDs() gid = %q, want %q", gotGid, tt.wantGid) + } + }) + } +} + +// TestNewReaderFromURL tests initializing a Reader from a Google Sheets URL. +func TestNewReaderFromURL(t *testing.T) { + originalGet := httpGet + defer func() { httpGet = originalGet }() + + url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238" + + // Test successful HTTP response + mockResp := &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(strings.NewReader(sampleCSV)), + } + client := &mockHTTPClient{resp: mockResp} + httpGet = client.Get + + reader := NewReaderFromURL(url) + if reader.err != nil { + t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err) + } + if reader.resp != mockResp { + t.Error("NewReaderFromURL() did not set response correctly") + } + if !reader.close { + t.Error("NewReaderFromURL() did not set close flag") + } + + // Test HTTP failure + client = &mockHTTPClient{resp: mockResp} + client.err = errors.New("network error") + httpGet = client.Get + + reader = NewReaderFromURL(url) + if reader.err == nil { + t.Error("NewReaderFromURL() expected error, got nil") + } + + // Test non-200 status + client = &mockHTTPClient{resp: &http.Response{ + StatusCode: http.StatusNotFound, + Body: io.NopCloser(strings.NewReader("these aren't the droids you're looking for")), + }} + httpGet = client.Get + + reader = NewReaderFromURL(url) + if reader.err == nil { + t.Error("NewReaderFromURL() expected error for non-200 status, got nil") + } +} + +// TestRead tests the Read method for comment handling. +func TestRead(t *testing.T) { + tests := []struct { + name string + quotedComments bool + expected [][]string + }{ + { + name: "Skip comments", + quotedComments: true, + expected: [][]string{ + {"NAME", "ID", "SIZE", "MODIFIED"}, + {"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"}, + {"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"}, + {"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"}, + }, + }, + { + name: "Don't skip quoted comments", + quotedComments: false, + expected: [][]string{ + {"# Sample Quoted Comment, with \"quotes\" itself"}, + {"NAME", "ID", "SIZE", "MODIFIED"}, + {"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"}, + {"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"}, + {"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + reader := NewReader(strings.NewReader(sampleCSV)) + reader.QuotedComments = tt.quotedComments + + for i, want := range tt.expected { + got, err := reader.Read() + if err != nil { + t.Errorf("Read() error at record %d: %v", i, err) + } + if !slices.Equal(got, want) { + t.Errorf("Read() record %d = %v, want %v", i, got, want) + } + } + + // Verify EOF + _, err := reader.Read() + if !errors.Is(err, io.EOF) { + t.Errorf("Read() expected EOF, got %v", err) + } + }) + } +} + +// TestReadAll tests the ReadAll method for different configurations. +func TestReadAll(t *testing.T) { + tests := []struct { + name string + quotedComments bool + expected [][]string + }{ + { + name: "Skip comments", + quotedComments: true, + expected: [][]string{ + {"NAME", "ID", "SIZE", "MODIFIED"}, + {"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"}, + {"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"}, + {"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + reader := NewReader(strings.NewReader(sampleCSV)) + reader.QuotedComments = tt.quotedComments + + got, err := reader.ReadAll() + if err != nil { + t.Errorf("ReadAll() error: %v", err) + } + if len(got) != len(tt.expected) { + t.Errorf("ReadAll() returned %d records, want %d", len(got), len(tt.expected)) + } + for i, want := range tt.expected { + if !slices.Equal(got[i], want) { + t.Errorf("ReadAll() record %d = %v, want %v", i, got[i], want) + } + } + }) + } +} + +// TestNewReaderFromURLWithMalformedCSV tests NewReaderFromURL with malformed CSV. +func TestNewReaderFromURLWithMalformedCSV(t *testing.T) { + mockResp := &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(strings.NewReader(malformedCSV)), + } + client := &mockHTTPClient{resp: mockResp} + originalGet := httpGet + httpGet = client.Get + defer func() { httpGet = originalGet }() + + url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238" + reader := NewReaderFromURL(url) + if reader.err != nil { + t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err) + } + + // Reading should fail due to malformed CSV + _, err := reader.Read() + if err == nil { + t.Error("Read() expected error for malformed CSV, got nil") + } +}