feat(gsheet2csv): parse URLs and CSVs with comments

This commit is contained in:
AJ ONeal 2025-10-10 18:13:12 -06:00
parent dc951ce388
commit cd00d85968
No known key found for this signature in database
9 changed files with 1005 additions and 0 deletions

View File

@ -0,0 +1,7 @@
Authored in 2025 by AJ ONeal <aj@therootcompany.com>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>.

View File

@ -0,0 +1,101 @@
# gsheet2csv
[![Go Reference](https://pkg.go.dev/badge/github.com/therootcompany/golib/io/transform/gsheet2csv.svg)](https://pkg.go.dev/github.com/therootcompany/golib/io/transform/gsheet2csv)
A simple wrapper around `encoding/csv` to read Google Sheet CSVs from URL, or a given Reader.
This does surprisingly little - you should probably just handle the boilerplate yourself. However, these are the problems it solves for us:
- works with Google Sheet URLs, regardless of URL format
- Edit URL: <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/edit?gid=0000000000#gid=0000000000>
- Share URL (Sheet 1): <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/edit?usp=sharing>
- CSV Export URL: <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/export?format=csv&usp=sharing&gid=0000000000>
- anything with a path like `/spreadsheets/d/{docid}/` and (optionally) a hash or query param like `gid={gid}`
- can ignore quoted comments (if all other fields in the row are empty)
- can preserve comments
- swaps `\r` (Windows) for `\n` (Unix) and ensures trailing newline (a la `encoding/csv`)
## Usage
Same as `encoding/csv` (embedded), but with two extra options:
```go
package main
import (
"fmt"
"os"
"github.com/therootcompany/golib/io/transform/gsheet2csv"
)
func main() {
switch len(os.Args) {
case 2:
break
case 1:
fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0])
os.Exit(1)
}
url := os.Args[1]
gsr := gsheet2csv.NewReaderFromURL(url)
records, err := gsr.ReadAll()
if err != nil {
fmt.Fprintf(os.Stderr, "Error reading from %s\n", gsr.URL)
os.Exit(1)
}
// distinguishes between comments and quoted fields
csvw := gsheet2csv.NewWriter(os.Stdout)
csvw.Comment = gsr.Comment
if err := csvw.WriteAll(records); err != nil {
fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err)
os.Exit(1)
}
}
```
## CLI
There are two convenience utilities:
- `gsheet2csv`
- `gsheet2tsv`
They're only slightly different from a direct export of a Google CSV in that they reformat comments and newlines.
### Flags & Options
```text
--raw download without processing
--print-ids print ids to stdout without download
--print-url print url to stdout without downloading
-o <filepath> write records to file
-d field delimiter
--comment '#' treat lines starting with # as comments
--crlf use CRLF (\r\n) as record separator
```
### Installation
```sh
go get github.com/therootcompany/golib/io/transform/gsheet2csv
```
### ASCII Delimiters
```
, comma
\t tab (or a normal tab)
space (just a normal space)
: colon
; semicolon
| pipe
^_ unit separator
^^ record separator
^] group separator
^\ file separator
\f form feed (also ^L)
\v vertical tab (also ^K)
```

View File

@ -0,0 +1,176 @@
package main
import (
"errors"
"flag"
"fmt"
"io"
"os"
"strings"
"unicode/utf8"
"github.com/therootcompany/golib/io/transform/gsheet2csv"
)
const (
fileSeparator = "\x1c"
groupSeparator = "\x1d"
recordSeparator = "\x1e"
unitSeparator = "\x1f"
)
func main() {
var commentArg string
format := "CSV"
delim := ','
if strings.Contains(os.Args[0], "tsv") {
delim = '\t'
format = "TSV"
}
// Parse command-line flags
flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments")
outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)")
delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)")
useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator")
urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL")
parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)")
rawOnly := flag.Bool("raw", false, "don't parse, just download")
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0])
fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format)
fmt.Fprintf(os.Stderr, "Flags:\n")
flag.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nExample:\n")
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s -o output.tsv './gsheet.csv'\n", os.Args[0])
}
flag.Parse()
// Check for URL argument
if len(flag.Args()) != 1 {
fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n")
flag.Usage()
os.Exit(1)
}
url := flag.Args()[0]
// Prepare output writer
var out *os.File
if *outputFile != "" {
var err error
out, err = os.Create(*outputFile)
if err != nil {
fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err)
os.Exit(1)
}
defer func() { _ = out.Close() }()
} else {
out = os.Stdout
}
switch *delimString {
case "^_", "\\x1f":
*delimString = unitSeparator
case "^^", "\\x1e":
*delimString = recordSeparator
case "^]", "\\x1d":
*delimString = groupSeparator
case "^\\", "\\x1c":
*delimString = fileSeparator
case "^L", "\\f":
*delimString = "\f"
case "^K", "\\v":
*delimString = "\v"
case "^I", "\\t":
*delimString = "\t"
}
delim, _ = utf8.DecodeRuneInString(*delimString)
var rc io.ReadCloser
if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") {
docid, gid := gsheet2csv.ParseIDs(url)
if *parseOnly {
fmt.Printf("docid=%s\ngid=%s\n", docid, gid)
} else {
fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid)
}
sheetURL := gsheet2csv.ToCSVURL(docid, gid)
if *urlOnly {
fmt.Printf("%s\n", sheetURL)
} else {
fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL)
}
if !*urlOnly {
resp, err := gsheet2csv.GetSheet(docid, gid)
if err != nil {
fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err)
os.Exit(1)
}
defer func() { _ = resp.Body.Close() }()
rc = resp.Body
}
} else {
url = strings.TrimPrefix(url, "file://")
fmt.Fprintf(os.Stderr, "opening %s\n", url)
f, err := os.Open(url)
if err != nil {
fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err)
os.Exit(1)
}
rc = f
}
fmt.Fprintf(os.Stderr, "\n")
if *urlOnly || *parseOnly {
os.Exit(0)
return
}
if *rawOnly {
if _, err := io.Copy(out, rc); err != nil {
fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err)
os.Exit(1)
}
return
}
comment, _ := utf8.DecodeRuneInString(commentArg)
// Create a reader for the Google Sheet
gsr := gsheet2csv.NewReader(rc)
gsr.QuotedComments = false
gsr.Comment = 0
gsr.ReuseRecord = true
// Create CSV writer
csvw := gsheet2csv.NewWriter(out)
csvw.Comma = delim // Set delimiter to tab for TSV
csvw.Comment = comment
csvw.UseCRLF = *useCRLF
for {
// Convert each record
record, err := gsr.Read()
if err != nil {
if errors.Is(err, io.EOF) {
break
}
fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err)
os.Exit(1)
}
if err := csvw.Write(record); err != nil {
fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err)
os.Exit(1)
}
}
// Flush the writer to ensure all data is written
csvw.Flush()
if err := csvw.Error(); err != nil {
fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err)
os.Exit(1)
}
}

View File

@ -0,0 +1,176 @@
package main
import (
"errors"
"flag"
"fmt"
"io"
"os"
"strings"
"unicode/utf8"
"github.com/therootcompany/golib/io/transform/gsheet2csv"
)
const (
fileSeparator = "\x1c"
groupSeparator = "\x1d"
recordSeparator = "\x1e"
unitSeparator = "\x1f"
)
func main() {
var commentArg string
format := "CSV"
delim := ','
if strings.Contains(os.Args[0], "tsv") {
delim = '\t'
format = "TSV"
}
// Parse command-line flags
flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments")
outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)")
delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)")
useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator")
urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL")
parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)")
rawOnly := flag.Bool("raw", false, "don't parse, just download")
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0])
fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format)
fmt.Fprintf(os.Stderr, "Flags:\n")
flag.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nExample:\n")
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s -o output.tsv './gsheet.csv'\n", os.Args[0])
}
flag.Parse()
// Check for URL argument
if len(flag.Args()) != 1 {
fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n")
flag.Usage()
os.Exit(1)
}
url := flag.Args()[0]
// Prepare output writer
var out *os.File
if *outputFile != "" {
var err error
out, err = os.Create(*outputFile)
if err != nil {
fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err)
os.Exit(1)
}
defer func() { _ = out.Close() }()
} else {
out = os.Stdout
}
switch *delimString {
case "^_", "\\x1f":
*delimString = unitSeparator
case "^^", "\\x1e":
*delimString = recordSeparator
case "^]", "\\x1d":
*delimString = groupSeparator
case "^\\", "\\x1c":
*delimString = fileSeparator
case "^L", "\\f":
*delimString = "\f"
case "^K", "\\v":
*delimString = "\v"
case "^I", "\\t":
*delimString = "\t"
}
delim, _ = utf8.DecodeRuneInString(*delimString)
var rc io.ReadCloser
if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") {
docid, gid := gsheet2csv.ParseIDs(url)
if *parseOnly {
fmt.Printf("docid=%s\ngid=%s\n", docid, gid)
} else {
fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid)
}
sheetURL := gsheet2csv.ToCSVURL(docid, gid)
if *urlOnly {
fmt.Printf("%s\n", sheetURL)
} else {
fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL)
}
if !*urlOnly {
resp, err := gsheet2csv.GetSheet(docid, gid)
if err != nil {
fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err)
os.Exit(1)
}
defer func() { _ = resp.Body.Close() }()
rc = resp.Body
}
} else {
url = strings.TrimPrefix(url, "file://")
fmt.Fprintf(os.Stderr, "opening %s\n", url)
f, err := os.Open(url)
if err != nil {
fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err)
os.Exit(1)
}
rc = f
}
fmt.Fprintf(os.Stderr, "\n")
if *urlOnly || *parseOnly {
os.Exit(0)
return
}
if *rawOnly {
if _, err := io.Copy(out, rc); err != nil {
fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err)
os.Exit(1)
}
return
}
comment, _ := utf8.DecodeRuneInString(commentArg)
// Create a reader for the Google Sheet
gsr := gsheet2csv.NewReader(rc)
gsr.QuotedComments = false
gsr.Comment = 0
gsr.ReuseRecord = true
// Create CSV writer
csvw := gsheet2csv.NewWriter(out)
csvw.Comma = delim // Set delimiter to tab for TSV
csvw.Comment = comment
csvw.UseCRLF = *useCRLF
for {
// Convert each record
record, err := gsr.Read()
if err != nil {
if errors.Is(err, io.EOF) {
break
}
fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err)
os.Exit(1)
}
if err := csvw.Write(record); err != nil {
fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err)
os.Exit(1)
}
}
// Flush the writer to ensure all data is written
csvw.Flush()
if err := csvw.Error(); err != nil {
fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err)
os.Exit(1)
}
}

View File

@ -0,0 +1,33 @@
package main
import (
"fmt"
"os"
"github.com/therootcompany/golib/io/transform/gsheet2csv"
)
func main() {
switch len(os.Args) {
case 2:
break
case 1:
fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0])
os.Exit(1)
}
url := os.Args[1]
gsr := gsheet2csv.NewReaderFromURL(url)
records, err := gsr.ReadAll()
if err != nil {
fmt.Fprintf(os.Stderr, "Error reading from %s\n", gsr.URL)
os.Exit(1)
}
csvw := gsheet2csv.NewWriter(os.Stdout)
csvw.Comment = gsr.Comment
if err := csvw.WriteAll(records); err != nil {
fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err)
os.Exit(1)
}
}

View File

@ -0,0 +1,18 @@
# this is a comment,,
"# this is, well, a quoted comment",,
"# this is a ""super""-quoted comment",,
Key,Value,
Name,55,
Girlfriend's Age,55,
,,
My IQ,55,
,55,
"Key,with,Comma",,
,"Value,with,Comma",
"Quoted ""Key""",Normal Value,
Normal Key,"Quoted ""Value""",
"Quoted ""Key""",,
,"Quoted ""Value""",
x,y,z
"# comment with trailing comma,",,
#1,2,#3
1 # this is a comment
2 # this is, well, a quoted comment
3 # this is a "super"-quoted comment
4 Key Value
5 Name 55
6 Girlfriend's Age 55
7
8 My IQ 55
9 55
10 Key,with,Comma
11 Value,with,Comma
12 Quoted "Key" Normal Value
13 Normal Key Quoted "Value"
14 Quoted "Key"
15 Quoted "Value"
16 x y z
17 # comment with trailing comma,
18 #1 2 #3

View File

@ -0,0 +1,3 @@
module github.com/therootcompany/golib/io/transform/gsheet2csv
go 1.24.6

View File

@ -0,0 +1,241 @@
// Authored in 2025 by AJ ONeal <aj@therootcompany.com> (https://therootcompany.com)
//
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
//
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>.
//
// SPDX-License-Identifier: CC0-1.0
package gsheet2csv
import (
"encoding/csv"
"errors"
"fmt"
"io"
"net/http"
"strings"
"unicode/utf8"
)
var ErrHTTPGet = errors.New("did not get 200 OK when downloading from URL")
// For mocking for tests
var httpGet = http.Get
type Reader struct {
*csv.Reader
DocID string
GID string
URL string
QuotedComments bool
Comment rune
r io.Reader
resp *http.Response
close bool
err error
}
func NewReaderFromURL(url string) *Reader {
docid, gid := ParseIDs(url)
return NewReaderFromIDs(docid, gid)
}
func NewReaderFromIDs(docid, gid string) *Reader {
resp, err := GetSheet(docid, gid)
if err != nil {
r := NewReader(nil)
r.err = err
return r
}
r := NewReader(resp.Body)
r.URL = ToCSVURL(docid, gid)
r.DocID = docid
r.GID = gid
r.resp = resp
r.close = true
return r
}
func ToCSVURL(docid, gid string) string {
return fmt.Sprintf("https://docs.google.com/spreadsheets/d/%s/export?format=csv&usp=sharing&gid=%s", docid, gid)
}
func GetSheet(docid, gid string) (*http.Response, error) {
downloadURL := ToCSVURL(docid, gid)
resp, err := httpGet(downloadURL)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
_ = resp.Body.Close()
return nil, ErrHTTPGet
}
return resp, nil
}
func NewReader(r io.Reader) *Reader {
csvr := csv.NewReader(r)
csvr.Comma = ','
csvr.Comment = 0 // to allow distinguishing between quoted comments and fields
csvr.FieldsPerRecord = 0 // Google Sheets is consistent
csvr.LazyQuotes = false // fields that need quotes use them correctly
csvr.TrimLeadingSpace = false
csvr.ReuseRecord = false
return &Reader{
Reader: csvr,
QuotedComments: true,
Comment: '#',
r: r,
}
}
func (r *Reader) Read() ([]string, error) {
if r.err != nil {
return nil, r.err
}
for {
record, err := r.Reader.Read()
if err != nil {
if r.close {
_ = r.resp.Body.Close()
}
return nil, err
}
if r.QuotedComments && len(record[0]) > 0 {
runeValue, _ := utf8.DecodeRuneInString(record[0])
if runeValue == r.Comment {
last := len(record) - 1
for len(record[last]) == 0 {
last -= 1
}
if last == 0 {
continue
}
}
}
return record, nil
}
}
func (r *Reader) ReadAll() ([][]string, error) {
var records [][]string
for {
record, err := r.Read()
if nil != err {
if errors.Is(err, io.EOF) {
return records, nil
}
return records, err
}
records = append(records, record)
}
}
func ParseIDs(urlStr string) (docid string, gid string) {
// Find key: look for /spreadsheets/d/{key}
const prefix = "/spreadsheets/d/"
startIdx := strings.Index(urlStr, prefix)
if startIdx == -1 {
return "", gid
}
startIdx += len(prefix)
// Find end of key (next / or end of string)
endIdx := strings.Index(urlStr[startIdx:], "/")
if endIdx == -1 {
endIdx = len(urlStr)
} else {
endIdx += startIdx
}
docid = urlStr[startIdx:endIdx]
if docid == "" {
return "", ""
}
// Find gid: look for gid= and take until #, &, ?, /, or end
gidIdx := strings.Index(urlStr, "gid=")
if gidIdx != -1 {
gidStart := gidIdx + len("gid=")
endChars := "#&?/"
gidEnd := strings.IndexAny(urlStr[gidStart:], endChars)
if gidEnd == -1 {
gid = urlStr[gidStart:]
} else {
gid = urlStr[gidStart : gidStart+gidEnd]
}
}
if len(gid) == 0 {
gid = "0"
}
return docid, gid
}
type Writer struct {
*csv.Writer
Comment rune
w io.Writer
}
func NewWriter(w io.Writer) *Writer {
return &Writer{
Writer: csv.NewWriter(w),
Comment: '#',
w: w,
}
}
func (w *Writer) Write(record []string) error {
if len(record) > 1 {
if rv1, _ := utf8.DecodeRuneInString(record[0]); rv1 == w.Comment {
w.Flush()
lastNonEmpty := len(record) - 1
for len(record[lastNonEmpty]) == 0 {
lastNonEmpty -= 1
}
if lastNonEmpty == 0 {
record = record[:1]
} else {
for i, f := range record {
if i == 0 || strings.Contains(f, `"`) {
f = strings.ReplaceAll(f, `"`, `""`)
record[i] = `"` + f + `"`
}
}
}
line := strings.Join(record, string(w.Comma))
if _, err := w.w.Write([]byte(line + "\n")); err != nil {
return err
}
return nil
}
}
return w.Writer.Write(record)
}
func (w *Writer) WriteAll(records [][]string) error {
for _, r := range records {
if err := w.Write(r); err != nil {
return err
}
}
w.Flush()
return w.Error()
}

View File

@ -0,0 +1,250 @@
package gsheet2csv
import (
"errors"
"io"
"net/http"
"slices"
"strings"
"testing"
)
// mockHTTPClient allows controlling HTTP responses for testing.
type mockHTTPClient struct {
resp *http.Response
err error
}
func (m *mockHTTPClient) Get(url string) (*http.Response, error) {
return m.resp, m.err
}
// sampleCSV mimics the structure of ai-models.csv from the project README.
const sampleCSV = `# Generated by ollama list
"# Sample Quoted Comment, with ""quotes"" itself"
"NAME","ID","SIZE","MODIFIED"
"qwen3-coder:30b","06c1097efce0","18 GB","8 days ago"
"gpt-oss:20b","aa4295ac10c3","13 GB","8 days ago"
"gpt-oss:latest","aa4295ac10c3","13 GB","7 weeks ago"
`
// malformedCSV for testing error handling.
const malformedCSV = `# Comment
"NAME","ID","SIZE","MODIFIED
"qwen3-coder:30b","06c1097efce0","18 GB","8 days ago"
`
// TestParseIDs verifies the ParseIDs function for various URL formats.
func TestParseIDs(t *testing.T) {
tests := []struct {
name string
url string
wantDoc string
wantGid string
}{
{
name: "Google Sheets Edit / Share URL with gid",
url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238",
wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
wantGid: "559037238",
},
{
name: "Google Sheets CSV URL with gid",
url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/export?format=csv&usp=sharing&gid=559037238",
wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
wantGid: "559037238",
},
{
name: "URL without gid",
url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit",
wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
wantGid: "0",
},
{
name: "Invalid URL",
url: "https://example.com/invalid",
wantDoc: "",
wantGid: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotDoc, gotGid := ParseIDs(tt.url)
if gotDoc != tt.wantDoc {
t.Errorf("ParseIDs() docid = %q, want %q", gotDoc, tt.wantDoc)
}
if gotGid != tt.wantGid {
t.Errorf("ParseIDs() gid = %q, want %q", gotGid, tt.wantGid)
}
})
}
}
// TestNewReaderFromURL tests initializing a Reader from a Google Sheets URL.
func TestNewReaderFromURL(t *testing.T) {
originalGet := httpGet
defer func() { httpGet = originalGet }()
url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238"
// Test successful HTTP response
mockResp := &http.Response{
StatusCode: http.StatusOK,
Body: io.NopCloser(strings.NewReader(sampleCSV)),
}
client := &mockHTTPClient{resp: mockResp}
httpGet = client.Get
reader := NewReaderFromURL(url)
if reader.err != nil {
t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err)
}
if reader.resp != mockResp {
t.Error("NewReaderFromURL() did not set response correctly")
}
if !reader.close {
t.Error("NewReaderFromURL() did not set close flag")
}
// Test HTTP failure
client = &mockHTTPClient{resp: mockResp}
client.err = errors.New("network error")
httpGet = client.Get
reader = NewReaderFromURL(url)
if reader.err == nil {
t.Error("NewReaderFromURL() expected error, got nil")
}
// Test non-200 status
client = &mockHTTPClient{resp: &http.Response{
StatusCode: http.StatusNotFound,
Body: io.NopCloser(strings.NewReader("these aren't the droids you're looking for")),
}}
httpGet = client.Get
reader = NewReaderFromURL(url)
if reader.err == nil {
t.Error("NewReaderFromURL() expected error for non-200 status, got nil")
}
}
// TestRead tests the Read method for comment handling.
func TestRead(t *testing.T) {
tests := []struct {
name string
quotedComments bool
expected [][]string
}{
{
name: "Skip comments",
quotedComments: true,
expected: [][]string{
{"NAME", "ID", "SIZE", "MODIFIED"},
{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
},
},
{
name: "Don't skip quoted comments",
quotedComments: false,
expected: [][]string{
{"# Sample Quoted Comment, with \"quotes\" itself"},
{"NAME", "ID", "SIZE", "MODIFIED"},
{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
reader := NewReader(strings.NewReader(sampleCSV))
reader.QuotedComments = tt.quotedComments
for i, want := range tt.expected {
got, err := reader.Read()
if err != nil {
t.Errorf("Read() error at record %d: %v", i, err)
}
if !slices.Equal(got, want) {
t.Errorf("Read() record %d = %v, want %v", i, got, want)
}
}
// Verify EOF
_, err := reader.Read()
if !errors.Is(err, io.EOF) {
t.Errorf("Read() expected EOF, got %v", err)
}
})
}
}
// TestReadAll tests the ReadAll method for different configurations.
func TestReadAll(t *testing.T) {
tests := []struct {
name string
quotedComments bool
expected [][]string
}{
{
name: "Skip comments",
quotedComments: true,
expected: [][]string{
{"NAME", "ID", "SIZE", "MODIFIED"},
{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
reader := NewReader(strings.NewReader(sampleCSV))
reader.QuotedComments = tt.quotedComments
got, err := reader.ReadAll()
if err != nil {
t.Errorf("ReadAll() error: %v", err)
}
if len(got) != len(tt.expected) {
t.Errorf("ReadAll() returned %d records, want %d", len(got), len(tt.expected))
}
for i, want := range tt.expected {
if !slices.Equal(got[i], want) {
t.Errorf("ReadAll() record %d = %v, want %v", i, got[i], want)
}
}
})
}
}
// TestNewReaderFromURLWithMalformedCSV tests NewReaderFromURL with malformed CSV.
func TestNewReaderFromURLWithMalformedCSV(t *testing.T) {
mockResp := &http.Response{
StatusCode: http.StatusOK,
Body: io.NopCloser(strings.NewReader(malformedCSV)),
}
client := &mockHTTPClient{resp: mockResp}
originalGet := httpGet
httpGet = client.Get
defer func() { httpGet = originalGet }()
url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238"
reader := NewReaderFromURL(url)
if reader.err != nil {
t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err)
}
// Reading should fail due to malformed CSV
_, err := reader.Read()
if err == nil {
t.Error("Read() expected error for malformed CSV, got nil")
}
}