mirror of
https://github.com/therootcompany/golib.git
synced 2025-10-30 12:42:51 +00:00
feat(gsheet2csv): parse URLs and CSVs with comments
This commit is contained in:
parent
dc951ce388
commit
24ec3f021d
7
io/transform/gsheet2csv/LICENSE
Normal file
7
io/transform/gsheet2csv/LICENSE
Normal file
@ -0,0 +1,7 @@
|
||||
Authored in 2025 by AJ ONeal <aj@therootcompany.com>
|
||||
To the extent possible under law, the author(s) have dedicated all copyright
|
||||
and related and neighboring rights to this software to the public domain
|
||||
worldwide. This software is distributed without any warranty.
|
||||
|
||||
You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
125
io/transform/gsheet2csv/README.md
Normal file
125
io/transform/gsheet2csv/README.md
Normal file
@ -0,0 +1,125 @@
|
||||
# gsheet2csv
|
||||
|
||||
[](https://pkg.go.dev/github.com/therootcompany/golib/io/transform/gsheet2csv)
|
||||
|
||||
A simple wrapper around `encoding/csv` to read Google Sheet CSVs from URL, or a given Reader.
|
||||
|
||||
This does surprisingly little - you should probably just handle the boilerplate yourself. However, these are the problems it solves for us:
|
||||
|
||||
- works with Google Sheet URLs, regardless of URL format
|
||||
- Edit URL: <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/edit?gid=0000000000#gid=0000000000>
|
||||
- Share URL (Sheet 1): <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/edit?usp=sharing>
|
||||
- CSV Export URL: <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/export?format=csv&usp=sharing&gid=0000000000>
|
||||
- anything with a path like `/spreadsheets/d/{docid}/` and (optionally) a hash or query param like `gid={gid}`
|
||||
- can write out for import to gsheet (comments containing quotes or commas are quoted), \
|
||||
or in RFC form (comments are never quoted, but values beginning with a comment character are)
|
||||
- swaps `\r` (Windows) for `\n` (Unix) and ensures trailing newline (a la `encoding/csv`)
|
||||
|
||||
Note:
|
||||
|
||||
- The Google Sheet must be shared to **Anyone with the link**.
|
||||
- Read and write in 'gsheet' style for reciprocity of comment handling
|
||||
- Be careful about single-column CSVs \
|
||||
(all comment-like lines are comments, same as with `encoding/csv` and empty lines)
|
||||
|
||||
# Usage
|
||||
|
||||
Same as `encoding/csv` (embedded), but with two extra options:
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/therootcompany/golib/io/transform/gsheet2csv"
|
||||
)
|
||||
|
||||
func main() {
|
||||
switch len(os.Args) {
|
||||
case 2:
|
||||
break
|
||||
case 1:
|
||||
fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0])
|
||||
os.Exit(1)
|
||||
}
|
||||
urlOrPath := os.Args[1]
|
||||
|
||||
gsr := gsheet2csv.NewReaderFrom(urlOrPath)
|
||||
records, err := gsr.ReadAll()
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error reading from %s: %v\n", gsr.URL, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
csvw := gsheet2csv.NewWriter(os.Stdout)
|
||||
csvw.Comment = gsr.Comment
|
||||
if err := csvw.WriteAll(records); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
# CLI
|
||||
|
||||
There are a few convenience utilities:
|
||||
|
||||
- `gsheet2csv` (also `gsheet2tsv`)
|
||||
- `gsheet2env`
|
||||
|
||||
## gsheet2csv
|
||||
|
||||
They're only slightly different from a direct export of a Google CSV in that they reformat comments and newlines.
|
||||
|
||||
The alterable behavior is almost exclusively for testing.
|
||||
|
||||
### Installation
|
||||
|
||||
```sh
|
||||
go get github.com/therootcompany/golib/io/transform/gsheet2csv
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
```sh
|
||||
gsheet2csv -raw -o ./gsheet.csv 'https://docs.google.com/spreadsheets/...'
|
||||
|
||||
gsheet2csv -d '\t' --write-style 'gsheet' ./gsheet.csv > ./gsheet.tsv
|
||||
|
||||
gsheet2csv --strip-comments ./gsheet.csv > ./sheet.csv
|
||||
```
|
||||
|
||||
```text
|
||||
--raw download without processing
|
||||
--print-ids print ids to stdout without download
|
||||
--print-url print url to stdout without downloading
|
||||
-o <filepath> write records to file (default: stdout)
|
||||
-d field delimiter (for output)
|
||||
--read-delimiter input field delimiter (for testing reciprocity)
|
||||
--crlf write using CRLF (\r\n) as the record separator
|
||||
--comment '#' treat lines starting with # as comments
|
||||
--strip-comments ignore single-field data beginning with a comment character
|
||||
--read-style 'gsheet' (preserves comments as single-field records)
|
||||
or 'rfc' (ignore lines starting with comment character)
|
||||
--write-style 'gsheet' (quote single-field comments containing quotes or commas)
|
||||
or 'rfc' (only quote values starting with a comment character)
|
||||
```
|
||||
|
||||
### ASCII Delimiters
|
||||
|
||||
```
|
||||
, comma
|
||||
\t tab (or a normal tab)
|
||||
space (just a normal space)
|
||||
: colon
|
||||
; semicolon
|
||||
| pipe
|
||||
^_ unit separator
|
||||
^^ record separator
|
||||
^] group separator
|
||||
^\ file separator
|
||||
\f form feed (also ^L)
|
||||
\v vertical tab (also ^K)
|
||||
```
|
||||
220
io/transform/gsheet2csv/cmd/gsheet2csv/main.go
Normal file
220
io/transform/gsheet2csv/cmd/gsheet2csv/main.go
Normal file
@ -0,0 +1,220 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/therootcompany/golib/io/transform/gsheet2csv"
|
||||
)
|
||||
|
||||
type CSVReader interface {
|
||||
Read() ([]string, error)
|
||||
ReadAll() ([][]string, error)
|
||||
}
|
||||
|
||||
type CSVWriter interface {
|
||||
Write([]string) error
|
||||
WriteAll([][]string) error
|
||||
Flush()
|
||||
Error() error
|
||||
}
|
||||
|
||||
func main() {
|
||||
var commentArg string
|
||||
format := "CSV"
|
||||
delim := ','
|
||||
if strings.Contains(os.Args[0], "tsv") {
|
||||
delim = '\t'
|
||||
format = "TSV"
|
||||
}
|
||||
|
||||
// Parse command-line flags
|
||||
flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments, 0 to disable (which may cause read errors)")
|
||||
outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)")
|
||||
readDelimString := flag.String("read-delimiter", ",", "field delimiter to use for input file ('\\t' for tab, '^_' for Unit Separator, etc)")
|
||||
delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)")
|
||||
useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator")
|
||||
urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL")
|
||||
parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)")
|
||||
rawOnly := flag.Bool("raw", false, "don't parse, just download")
|
||||
noReadComments := flag.Bool("strip-comments", false, "strip comments when reading (gsheet-only, control rfc behavior with --comment)")
|
||||
readStyle := flag.String("read-style", "gsheet", "'gsheet' or 'rfc' to read either as a gsheet or rfc CSV")
|
||||
writeStyle := flag.String("write-style", "rfc", "'gsheet' or 'rfc' to write either for gsheet import or rfc CSV read")
|
||||
flag.Usage = func() {
|
||||
fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format)
|
||||
fmt.Fprintf(os.Stderr, "Flags:\n")
|
||||
flag.PrintDefaults()
|
||||
fmt.Fprintf(os.Stderr, "\nExample:\n")
|
||||
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, " %s -o output.tsv './gsheet.csv'\n", os.Args[0])
|
||||
}
|
||||
flag.Parse()
|
||||
|
||||
// Check for URL argument
|
||||
if len(flag.Args()) != 1 {
|
||||
fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n")
|
||||
flag.Usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
url := flag.Args()[0]
|
||||
|
||||
// Prepare output writer
|
||||
var out *os.File
|
||||
if *outputFile != "" {
|
||||
var err error
|
||||
out, err = os.Create(*outputFile)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer func() { _ = out.Close() }()
|
||||
} else {
|
||||
out = os.Stdout
|
||||
}
|
||||
|
||||
inputDelim, err := gsheet2csv.DecodeDelimiter(*readDelimString)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error decoding input delimiter: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
delim, err = gsheet2csv.DecodeDelimiter(*delimString)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error decoding output delimiter: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var rc io.ReadCloser
|
||||
if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") {
|
||||
docid, gid := gsheet2csv.ParseIDs(url)
|
||||
if *parseOnly {
|
||||
fmt.Printf("docid=%s\ngid=%s\n", docid, gid)
|
||||
} else {
|
||||
fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid)
|
||||
}
|
||||
|
||||
sheetURL := gsheet2csv.ToCSVURL(docid, gid)
|
||||
if *urlOnly {
|
||||
fmt.Printf("%s\n", sheetURL)
|
||||
} else {
|
||||
fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL)
|
||||
}
|
||||
|
||||
if !*urlOnly {
|
||||
resp, err := gsheet2csv.GetSheet(docid, gid)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
rc = resp.Body
|
||||
}
|
||||
} else {
|
||||
url = strings.TrimPrefix(url, "file://")
|
||||
fmt.Fprintf(os.Stderr, "opening %s\n", url)
|
||||
f, err := os.Open(url)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
rc = f
|
||||
}
|
||||
|
||||
if out == os.Stdout {
|
||||
fmt.Fprintf(os.Stderr, "\n")
|
||||
}
|
||||
|
||||
if *urlOnly || *parseOnly {
|
||||
os.Exit(0)
|
||||
return
|
||||
}
|
||||
|
||||
if *rawOnly {
|
||||
if _, err := io.Copy(out, rc); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
var comment rune
|
||||
if commentArg == "0" {
|
||||
comment = 0
|
||||
} else {
|
||||
comment, _ = utf8.DecodeRuneInString(commentArg)
|
||||
}
|
||||
|
||||
// Create a reader for the Google Sheet
|
||||
var csvr CSVReader
|
||||
if *readStyle == "rfc" {
|
||||
rfcr := csv.NewReader(rc)
|
||||
rfcr.Comma = inputDelim
|
||||
rfcr.Comment = comment
|
||||
rfcr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not
|
||||
csvr = rfcr
|
||||
} else {
|
||||
gsr := gsheet2csv.NewReader(rc)
|
||||
gsr.Comma = inputDelim
|
||||
if *noReadComments {
|
||||
gsr.Comment = comment
|
||||
} else {
|
||||
gsr.Comment = 0
|
||||
}
|
||||
gsr.ReuseRecord = true
|
||||
csvr = gsr
|
||||
}
|
||||
|
||||
// Create CSV writer
|
||||
var csvw CSVWriter
|
||||
// if *writeStyle == "gsheet"
|
||||
{
|
||||
gsw := gsheet2csv.NewWriter(out)
|
||||
gsw.QuoteAmbiguousComments = *writeStyle == "gsheet"
|
||||
gsw.Comment = comment
|
||||
gsw.Comma = delim // Set delimiter to tab for TSV
|
||||
gsw.UseCRLF = *useCRLF
|
||||
csvw = gsw
|
||||
}
|
||||
// else {
|
||||
// rfcw := csv.NewWriter(out)
|
||||
// rfcw.Comma = delim
|
||||
// rfcw.UseCRLF = *useCRLF
|
||||
// csvw = rfcw
|
||||
// }
|
||||
|
||||
for {
|
||||
// Convert each record
|
||||
record, err := csvr.Read()
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err)
|
||||
os.Exit(1)
|
||||
return
|
||||
}
|
||||
|
||||
if err := csvw.Write(record); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err)
|
||||
os.Exit(1)
|
||||
return
|
||||
}
|
||||
}
|
||||
csvw.Flush()
|
||||
if err := csvw.Error(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if out != os.Stdout {
|
||||
fmt.Fprintf(os.Stderr, "wrote %s\n", *outputFile)
|
||||
}
|
||||
}
|
||||
220
io/transform/gsheet2csv/cmd/gsheet2tsv/main.go
Normal file
220
io/transform/gsheet2csv/cmd/gsheet2tsv/main.go
Normal file
@ -0,0 +1,220 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/therootcompany/golib/io/transform/gsheet2csv"
|
||||
)
|
||||
|
||||
type CSVReader interface {
|
||||
Read() ([]string, error)
|
||||
ReadAll() ([][]string, error)
|
||||
}
|
||||
|
||||
type CSVWriter interface {
|
||||
Write([]string) error
|
||||
WriteAll([][]string) error
|
||||
Flush()
|
||||
Error() error
|
||||
}
|
||||
|
||||
func main() {
|
||||
var commentArg string
|
||||
format := "CSV"
|
||||
delim := ','
|
||||
if strings.Contains(os.Args[0], "tsv") {
|
||||
delim = '\t'
|
||||
format = "TSV"
|
||||
}
|
||||
|
||||
// Parse command-line flags
|
||||
flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments, 0 to disable (which may cause read errors)")
|
||||
outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)")
|
||||
readDelimString := flag.String("read-delimiter", ",", "field delimiter to use for input file ('\\t' for tab, '^_' for Unit Separator, etc)")
|
||||
delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)")
|
||||
useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator")
|
||||
urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL")
|
||||
parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)")
|
||||
rawOnly := flag.Bool("raw", false, "don't parse, just download")
|
||||
noReadComments := flag.Bool("strip-comments", false, "strip comments when reading (gsheet-only, control rfc behavior with --comment)")
|
||||
readStyle := flag.String("read-style", "gsheet", "'gsheet' or 'rfc' to read either as a gsheet or rfc CSV")
|
||||
writeStyle := flag.String("write-style", "rfc", "'gsheet' or 'rfc' to write either for gsheet import or rfc CSV read")
|
||||
flag.Usage = func() {
|
||||
fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format)
|
||||
fmt.Fprintf(os.Stderr, "Flags:\n")
|
||||
flag.PrintDefaults()
|
||||
fmt.Fprintf(os.Stderr, "\nExample:\n")
|
||||
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, " %s -o output.tsv './gsheet.csv'\n", os.Args[0])
|
||||
}
|
||||
flag.Parse()
|
||||
|
||||
// Check for URL argument
|
||||
if len(flag.Args()) != 1 {
|
||||
fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n")
|
||||
flag.Usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
url := flag.Args()[0]
|
||||
|
||||
// Prepare output writer
|
||||
var out *os.File
|
||||
if *outputFile != "" {
|
||||
var err error
|
||||
out, err = os.Create(*outputFile)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer func() { _ = out.Close() }()
|
||||
} else {
|
||||
out = os.Stdout
|
||||
}
|
||||
|
||||
inputDelim, err := gsheet2csv.DecodeDelimiter(*readDelimString)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error decoding input delimiter: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
delim, err = gsheet2csv.DecodeDelimiter(*delimString)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error decoding output delimiter: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var rc io.ReadCloser
|
||||
if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") {
|
||||
docid, gid := gsheet2csv.ParseIDs(url)
|
||||
if *parseOnly {
|
||||
fmt.Printf("docid=%s\ngid=%s\n", docid, gid)
|
||||
} else {
|
||||
fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid)
|
||||
}
|
||||
|
||||
sheetURL := gsheet2csv.ToCSVURL(docid, gid)
|
||||
if *urlOnly {
|
||||
fmt.Printf("%s\n", sheetURL)
|
||||
} else {
|
||||
fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL)
|
||||
}
|
||||
|
||||
if !*urlOnly {
|
||||
resp, err := gsheet2csv.GetSheet(docid, gid)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
rc = resp.Body
|
||||
}
|
||||
} else {
|
||||
url = strings.TrimPrefix(url, "file://")
|
||||
fmt.Fprintf(os.Stderr, "opening %s\n", url)
|
||||
f, err := os.Open(url)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
rc = f
|
||||
}
|
||||
|
||||
if out == os.Stdout {
|
||||
fmt.Fprintf(os.Stderr, "\n")
|
||||
}
|
||||
|
||||
if *urlOnly || *parseOnly {
|
||||
os.Exit(0)
|
||||
return
|
||||
}
|
||||
|
||||
if *rawOnly {
|
||||
if _, err := io.Copy(out, rc); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
var comment rune
|
||||
if commentArg == "0" {
|
||||
comment = 0
|
||||
} else {
|
||||
comment, _ = utf8.DecodeRuneInString(commentArg)
|
||||
}
|
||||
|
||||
// Create a reader for the Google Sheet
|
||||
var csvr CSVReader
|
||||
if *readStyle == "rfc" {
|
||||
rfcr := csv.NewReader(rc)
|
||||
rfcr.Comma = inputDelim
|
||||
rfcr.Comment = comment
|
||||
rfcr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not
|
||||
csvr = rfcr
|
||||
} else {
|
||||
gsr := gsheet2csv.NewReader(rc)
|
||||
gsr.Comma = inputDelim
|
||||
if *noReadComments {
|
||||
gsr.Comment = comment
|
||||
} else {
|
||||
gsr.Comment = 0
|
||||
}
|
||||
gsr.ReuseRecord = true
|
||||
csvr = gsr
|
||||
}
|
||||
|
||||
// Create CSV writer
|
||||
var csvw CSVWriter
|
||||
// if *writeStyle == "gsheet"
|
||||
{
|
||||
gsw := gsheet2csv.NewWriter(out)
|
||||
gsw.QuoteAmbiguousComments = *writeStyle == "gsheet"
|
||||
gsw.Comment = comment
|
||||
gsw.Comma = delim // Set delimiter to tab for TSV
|
||||
gsw.UseCRLF = *useCRLF
|
||||
csvw = gsw
|
||||
}
|
||||
// else {
|
||||
// rfcw := csv.NewWriter(out)
|
||||
// rfcw.Comma = delim
|
||||
// rfcw.UseCRLF = *useCRLF
|
||||
// csvw = rfcw
|
||||
// }
|
||||
|
||||
for {
|
||||
// Convert each record
|
||||
record, err := csvr.Read()
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err)
|
||||
os.Exit(1)
|
||||
return
|
||||
}
|
||||
|
||||
if err := csvw.Write(record); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err)
|
||||
os.Exit(1)
|
||||
return
|
||||
}
|
||||
}
|
||||
csvw.Flush()
|
||||
if err := csvw.Error(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if out != os.Stdout {
|
||||
fmt.Fprintf(os.Stderr, "wrote %s\n", *outputFile)
|
||||
}
|
||||
}
|
||||
33
io/transform/gsheet2csv/fixtures/example.go
Normal file
33
io/transform/gsheet2csv/fixtures/example.go
Normal file
@ -0,0 +1,33 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/therootcompany/golib/io/transform/gsheet2csv"
|
||||
)
|
||||
|
||||
func main() {
|
||||
switch len(os.Args) {
|
||||
case 2:
|
||||
break
|
||||
case 1:
|
||||
fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0])
|
||||
os.Exit(1)
|
||||
}
|
||||
urlOrPath := os.Args[1]
|
||||
|
||||
gsr := gsheet2csv.NewReaderFrom(urlOrPath)
|
||||
records, err := gsr.ReadAll()
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error reading from %s: %v\n", gsr.URL, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
csvw := gsheet2csv.NewWriter(os.Stdout)
|
||||
csvw.Comment = gsr.Comment
|
||||
if err := csvw.WriteAll(records); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
18
io/transform/gsheet2csv/fixtures/gsheet-raw.csv
Normal file
18
io/transform/gsheet2csv/fixtures/gsheet-raw.csv
Normal file
@ -0,0 +1,18 @@
|
||||
# this is a comment,,
|
||||
"# this is, well, a quoted comment",,
|
||||
"# this is a ""super""-quoted comment",,
|
||||
Key,Value,
|
||||
Name,55,
|
||||
Girlfriend's Age,55,
|
||||
,,
|
||||
My IQ,55,
|
||||
,55,
|
||||
"Key,with,Comma",,
|
||||
,"Value,with,Comma",
|
||||
"Quoted ""Key""",Normal Value,
|
||||
Normal Key,"Quoted ""Value""",
|
||||
"Quoted ""Key""",,
|
||||
,"Quoted ""Value""",
|
||||
x,y,z
|
||||
"# comment with trailing comma,",,
|
||||
#1,2,#3
|
||||
|
14
io/transform/gsheet2csv/fixtures/gsheet-stripped.csv
Normal file
14
io/transform/gsheet2csv/fixtures/gsheet-stripped.csv
Normal file
@ -0,0 +1,14 @@
|
||||
Key,Value,
|
||||
Name,55,
|
||||
Girlfriend's Age,55,
|
||||
,,
|
||||
My IQ,55,
|
||||
,55,
|
||||
"Key,with,Comma",,
|
||||
,"Value,with,Comma",
|
||||
"Quoted ""Key""",Normal Value,
|
||||
Normal Key,"Quoted ""Value""",
|
||||
"Quoted ""Key""",,
|
||||
,"Quoted ""Value""",
|
||||
x,y,z
|
||||
"#1",2,#3
|
||||
|
18
io/transform/gsheet2csv/fixtures/gsheet-to-gsheet.csv
Normal file
18
io/transform/gsheet2csv/fixtures/gsheet-to-gsheet.csv
Normal file
@ -0,0 +1,18 @@
|
||||
# this is a comment
|
||||
"# this is, well, a quoted comment"
|
||||
"# this is a ""super""-quoted comment"
|
||||
Key,Value,
|
||||
Name,55,
|
||||
Girlfriend's Age,55,
|
||||
,,
|
||||
My IQ,55,
|
||||
,55,
|
||||
"Key,with,Comma",,
|
||||
,"Value,with,Comma",
|
||||
"Quoted ""Key""",Normal Value,
|
||||
Normal Key,"Quoted ""Value""",
|
||||
"Quoted ""Key""",,
|
||||
,"Quoted ""Value""",
|
||||
x,y,z
|
||||
"# comment with trailing comma,"
|
||||
"#1",2,#3
|
||||
|
Can't render this file because it contains an unexpected character in line 10 and column 16.
|
18
io/transform/gsheet2csv/fixtures/gsheet-to-rfc.csv
Normal file
18
io/transform/gsheet2csv/fixtures/gsheet-to-rfc.csv
Normal file
@ -0,0 +1,18 @@
|
||||
# this is a comment
|
||||
# this is, well, a quoted comment
|
||||
# this is a "super"-quoted comment
|
||||
Key,Value,
|
||||
Name,55,
|
||||
Girlfriend's Age,55,
|
||||
,,
|
||||
My IQ,55,
|
||||
,55,
|
||||
"Key,with,Comma",,
|
||||
,"Value,with,Comma",
|
||||
"Quoted ""Key""",Normal Value,
|
||||
Normal Key,"Quoted ""Value""",
|
||||
"Quoted ""Key""",,
|
||||
,"Quoted ""Value""",
|
||||
x,y,z
|
||||
# comment with trailing comma,
|
||||
"#1",2,#3
|
||||
|
Can't render this file because it contains an unexpected character in line 3 and column 13.
|
18
io/transform/gsheet2csv/fixtures/gsheet-to-rfc.tsv
Normal file
18
io/transform/gsheet2csv/fixtures/gsheet-to-rfc.tsv
Normal file
@ -0,0 +1,18 @@
|
||||
# this is a comment
|
||||
# this is, well, a quoted comment
|
||||
# this is a "super"-quoted comment
|
||||
Key Value
|
||||
Name 55
|
||||
Girlfriend's Age 55
|
||||
|
||||
My IQ 55
|
||||
55
|
||||
Key,with,Comma
|
||||
Value,with,Comma
|
||||
"Quoted ""Key""" Normal Value
|
||||
Normal Key "Quoted ""Value"""
|
||||
"Quoted ""Key"""
|
||||
"Quoted ""Value"""
|
||||
x y z
|
||||
# comment with trailing comma,
|
||||
"#1" 2 #3
|
||||
|
Can't render this file because it contains an unexpected character in line 3 and column 13.
|
3
io/transform/gsheet2csv/go.mod
Normal file
3
io/transform/gsheet2csv/go.mod
Normal file
@ -0,0 +1,3 @@
|
||||
module github.com/therootcompany/golib/io/transform/gsheet2csv
|
||||
|
||||
go 1.24.6
|
||||
309
io/transform/gsheet2csv/gsheet2csv.go
Normal file
309
io/transform/gsheet2csv/gsheet2csv.go
Normal file
@ -0,0 +1,309 @@
|
||||
// Authored in 2025 by AJ ONeal <aj@therootcompany.com> (https://therootcompany.com)
|
||||
//
|
||||
// To the extent possible under law, the author(s) have dedicated all copyright
|
||||
// and related and neighboring rights to this software to the public domain
|
||||
// worldwide. This software is distributed without any warranty.
|
||||
//
|
||||
// You should have received a copy of the CC0 Public Domain Dedication along with
|
||||
// this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
//
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
package gsheet2csv
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const (
|
||||
fileSeparator = '\x1c'
|
||||
groupSeparator = '\x1d'
|
||||
recordSeparator = '\x1e'
|
||||
unitSeparator = '\x1f'
|
||||
)
|
||||
|
||||
var ErrHTTPGet = errors.New("did not get 200 OK when downloading from URL")
|
||||
|
||||
// For mocking for tests
|
||||
var httpGet = http.Get
|
||||
|
||||
type Reader struct {
|
||||
*csv.Reader
|
||||
DocID string
|
||||
GID string
|
||||
URL string
|
||||
Comment rune
|
||||
r io.Reader
|
||||
resp *http.Response
|
||||
close bool
|
||||
err error
|
||||
}
|
||||
|
||||
func NewReaderFrom(urlOrPath string) *Reader {
|
||||
if strings.HasPrefix(urlOrPath, "https://") || strings.HasPrefix(urlOrPath, "http://") {
|
||||
return NewReaderFromURL(urlOrPath)
|
||||
}
|
||||
|
||||
urlOrPath = strings.TrimPrefix(urlOrPath, "file://")
|
||||
f, err := os.Open(urlOrPath)
|
||||
r := NewReader(f)
|
||||
r.URL = urlOrPath
|
||||
if err != nil {
|
||||
r.err = err
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
func NewReaderFromURL(url string) *Reader {
|
||||
docid, gid := ParseIDs(url)
|
||||
|
||||
return NewReaderFromIDs(docid, gid)
|
||||
}
|
||||
|
||||
func NewReaderFromIDs(docid, gid string) *Reader {
|
||||
resp, err := GetSheet(docid, gid)
|
||||
if err != nil {
|
||||
r := NewReader(nil)
|
||||
r.err = err
|
||||
return r
|
||||
}
|
||||
|
||||
r := NewReader(resp.Body)
|
||||
r.URL = ToCSVURL(docid, gid)
|
||||
r.DocID = docid
|
||||
r.GID = gid
|
||||
r.resp = resp
|
||||
r.close = true
|
||||
return r
|
||||
}
|
||||
|
||||
func ToCSVURL(docid, gid string) string {
|
||||
return fmt.Sprintf("https://docs.google.com/spreadsheets/d/%s/export?format=csv&usp=sharing&gid=%s", docid, gid)
|
||||
}
|
||||
|
||||
func GetSheet(docid, gid string) (*http.Response, error) {
|
||||
downloadURL := ToCSVURL(docid, gid)
|
||||
|
||||
resp, err := httpGet(downloadURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
_ = resp.Body.Close()
|
||||
return nil, ErrHTTPGet
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func NewReader(r io.Reader) *Reader {
|
||||
csvr := csv.NewReader(r)
|
||||
csvr.Comma = ','
|
||||
csvr.Comment = 0 // to allow distinguishing between quoted comments and fields
|
||||
csvr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not
|
||||
csvr.LazyQuotes = false // fields that need quotes use them correctly
|
||||
csvr.TrimLeadingSpace = false
|
||||
csvr.ReuseRecord = false
|
||||
return &Reader{
|
||||
Reader: csvr,
|
||||
Comment: '#',
|
||||
r: r,
|
||||
}
|
||||
}
|
||||
|
||||
func DecodeDelimiter(delimString string) (rune, error) {
|
||||
switch delimString {
|
||||
case "^_", "\\x1f":
|
||||
delimString = string(unitSeparator)
|
||||
case "^^", "\\x1e":
|
||||
delimString = string(recordSeparator)
|
||||
case "^]", "\\x1d":
|
||||
delimString = string(groupSeparator)
|
||||
case "^\\", "\\x1c":
|
||||
delimString = string(fileSeparator)
|
||||
case "^L", "\\f":
|
||||
delimString = "\f"
|
||||
case "^K", "\\v":
|
||||
delimString = "\v"
|
||||
case "^I", "\\t":
|
||||
delimString = " "
|
||||
}
|
||||
delim, _ := utf8.DecodeRuneInString(delimString)
|
||||
return delim, nil
|
||||
}
|
||||
|
||||
func (r *Reader) Read() ([]string, error) {
|
||||
if r.err != nil {
|
||||
return nil, r.err
|
||||
}
|
||||
|
||||
for {
|
||||
record, err := r.Reader.Read()
|
||||
if err != nil {
|
||||
if r.close {
|
||||
_ = r.resp.Body.Close()
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if r.Comment > 0 {
|
||||
if rv, _ := utf8.DecodeRuneInString(record[0]); rv == r.Comment {
|
||||
last := len(record) - 1
|
||||
for len(record[last]) == 0 {
|
||||
last -= 1
|
||||
}
|
||||
if last == 0 {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
return record, nil
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Reader) ReadAll() ([][]string, error) {
|
||||
var records [][]string
|
||||
|
||||
for {
|
||||
record, err := r.Read()
|
||||
if nil != err {
|
||||
if errors.Is(err, io.EOF) {
|
||||
return records, nil
|
||||
}
|
||||
return records, err
|
||||
}
|
||||
records = append(records, record)
|
||||
}
|
||||
}
|
||||
|
||||
func ParseIDs(urlStr string) (docid string, gid string) {
|
||||
// Find key: look for /spreadsheets/d/{key}
|
||||
const prefix = "/spreadsheets/d/"
|
||||
startIdx := strings.Index(urlStr, prefix)
|
||||
if startIdx == -1 {
|
||||
return "", gid
|
||||
}
|
||||
startIdx += len(prefix)
|
||||
|
||||
// Find end of key (next / or end of string)
|
||||
endIdx := strings.Index(urlStr[startIdx:], "/")
|
||||
if endIdx == -1 {
|
||||
endIdx = len(urlStr)
|
||||
} else {
|
||||
endIdx += startIdx
|
||||
}
|
||||
|
||||
docid = urlStr[startIdx:endIdx]
|
||||
if docid == "" {
|
||||
return "", ""
|
||||
}
|
||||
|
||||
// Find gid: look for gid= and take until #, &, ?, /, or end
|
||||
gidIdx := strings.Index(urlStr, "gid=")
|
||||
if gidIdx != -1 {
|
||||
gidStart := gidIdx + len("gid=")
|
||||
endChars := "#&?/"
|
||||
gidEnd := strings.IndexAny(urlStr[gidStart:], endChars)
|
||||
if gidEnd == -1 {
|
||||
gid = urlStr[gidStart:]
|
||||
} else {
|
||||
gid = urlStr[gidStart : gidStart+gidEnd]
|
||||
}
|
||||
}
|
||||
|
||||
if len(gid) == 0 {
|
||||
gid = "0"
|
||||
}
|
||||
return docid, gid
|
||||
}
|
||||
|
||||
type Writer struct {
|
||||
*csv.Writer
|
||||
Comment rune
|
||||
QuoteAmbiguousComments bool
|
||||
w io.Writer
|
||||
}
|
||||
|
||||
func NewWriter(w io.Writer) *Writer {
|
||||
return &Writer{
|
||||
Writer: csv.NewWriter(w),
|
||||
Comment: '#',
|
||||
w: w,
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Writer) Write(record []string) error {
|
||||
// Not handling comments? Move along.
|
||||
if w.Comment == 0 || len(record) == 0 {
|
||||
return w.Writer.Write(record)
|
||||
}
|
||||
|
||||
// First char not a comment char? Move along.
|
||||
if rv1, _ := utf8.DecodeRuneInString(record[0]); rv1 != w.Comment {
|
||||
return w.Writer.Write(record)
|
||||
}
|
||||
|
||||
// Is this a true comment? Or data that should be quoted that begins with the comment char?
|
||||
lastNonEmpty := len(record) - 1
|
||||
if lastNonEmpty > -1 {
|
||||
for len(record[lastNonEmpty]) == 0 {
|
||||
lastNonEmpty -= 1
|
||||
}
|
||||
}
|
||||
|
||||
// We will be doing custom writes ahead
|
||||
w.Flush()
|
||||
var newline = "\n"
|
||||
if w.UseCRLF {
|
||||
newline = "\r\n"
|
||||
}
|
||||
|
||||
// Write true comments out plain
|
||||
first := 0
|
||||
if lastNonEmpty == 0 {
|
||||
record = record[:1]
|
||||
if !w.QuoteAmbiguousComments {
|
||||
if _, err := w.w.Write([]byte(record[0] + newline)); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
// Quote the comment iff it contains quotes or commas, not universally
|
||||
first = -1
|
||||
}
|
||||
|
||||
// Quote if
|
||||
// - the line contains quotes or commas
|
||||
// - there are multiple fields and the first starts with a comment character
|
||||
// (but NOT a single-field comment with no quotes or commas)
|
||||
for i, f := range record {
|
||||
if i == first || strings.Contains(f, `"`) || strings.Contains(f, string(w.Comma)) {
|
||||
f = strings.ReplaceAll(f, `"`, `""`)
|
||||
record[i] = `"` + f + `"`
|
||||
}
|
||||
}
|
||||
line := strings.Join(record, string(w.Comma))
|
||||
if _, err := w.w.Write([]byte(line + newline)); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *Writer) WriteAll(records [][]string) error {
|
||||
for _, r := range records {
|
||||
if err := w.Write(r); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
w.Flush()
|
||||
return w.Error()
|
||||
}
|
||||
249
io/transform/gsheet2csv/gsheet2csv_test.go
Normal file
249
io/transform/gsheet2csv/gsheet2csv_test.go
Normal file
@ -0,0 +1,249 @@
|
||||
package gsheet2csv
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// mockHTTPClient allows controlling HTTP responses for testing.
|
||||
type mockHTTPClient struct {
|
||||
resp *http.Response
|
||||
err error
|
||||
}
|
||||
|
||||
func (m *mockHTTPClient) Get(url string) (*http.Response, error) {
|
||||
return m.resp, m.err
|
||||
}
|
||||
|
||||
// sampleCSV mimics the structure of ai-models.csv from the project README.
|
||||
const sampleCSV = `# Generated by ollama list
|
||||
"# Sample Quoted Comment, with ""quotes"" itself"
|
||||
"NAME","ID","SIZE","MODIFIED"
|
||||
"qwen3-coder:30b","06c1097efce0","18 GB","8 days ago"
|
||||
"gpt-oss:20b","aa4295ac10c3","13 GB","8 days ago"
|
||||
|
||||
"gpt-oss:latest","aa4295ac10c3","13 GB","7 weeks ago"
|
||||
`
|
||||
|
||||
// malformedCSV for testing error handling.
|
||||
const malformedCSV = `# Comment
|
||||
"NAME","ID","SIZE","MODIFIED
|
||||
"qwen3-coder:30b","06c1097efce0","18 GB","8 days ago"
|
||||
`
|
||||
|
||||
// TestParseIDs verifies the ParseIDs function for various URL formats.
|
||||
func TestParseIDs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
url string
|
||||
wantDoc string
|
||||
wantGid string
|
||||
}{
|
||||
{
|
||||
name: "Google Sheets Edit / Share URL with gid",
|
||||
url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238",
|
||||
wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
|
||||
wantGid: "559037238",
|
||||
},
|
||||
{
|
||||
name: "Google Sheets CSV URL with gid",
|
||||
url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/export?format=csv&usp=sharing&gid=559037238",
|
||||
wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
|
||||
wantGid: "559037238",
|
||||
},
|
||||
{
|
||||
name: "URL without gid",
|
||||
url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit",
|
||||
wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
|
||||
wantGid: "0",
|
||||
},
|
||||
{
|
||||
name: "Invalid URL",
|
||||
url: "https://example.com/invalid",
|
||||
wantDoc: "",
|
||||
wantGid: "",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gotDoc, gotGid := ParseIDs(tt.url)
|
||||
if gotDoc != tt.wantDoc {
|
||||
t.Errorf("ParseIDs() docid = %q, want %q", gotDoc, tt.wantDoc)
|
||||
}
|
||||
if gotGid != tt.wantGid {
|
||||
t.Errorf("ParseIDs() gid = %q, want %q", gotGid, tt.wantGid)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestNewReaderFromURL tests initializing a Reader from a Google Sheets URL.
|
||||
func TestNewReaderFromURL(t *testing.T) {
|
||||
originalGet := httpGet
|
||||
defer func() { httpGet = originalGet }()
|
||||
|
||||
url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238"
|
||||
|
||||
// Test successful HTTP response
|
||||
mockResp := &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Body: io.NopCloser(strings.NewReader(sampleCSV)),
|
||||
}
|
||||
client := &mockHTTPClient{resp: mockResp}
|
||||
httpGet = client.Get
|
||||
|
||||
reader := NewReaderFromURL(url)
|
||||
if reader.err != nil {
|
||||
t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err)
|
||||
}
|
||||
if reader.resp != mockResp {
|
||||
t.Error("NewReaderFromURL() did not set response correctly")
|
||||
}
|
||||
if !reader.close {
|
||||
t.Error("NewReaderFromURL() did not set close flag")
|
||||
}
|
||||
|
||||
// Test HTTP failure
|
||||
client = &mockHTTPClient{resp: mockResp}
|
||||
client.err = errors.New("network error")
|
||||
httpGet = client.Get
|
||||
|
||||
reader = NewReaderFromURL(url)
|
||||
if reader.err == nil {
|
||||
t.Error("NewReaderFromURL() expected error, got nil")
|
||||
}
|
||||
|
||||
// Test non-200 status
|
||||
client = &mockHTTPClient{resp: &http.Response{
|
||||
StatusCode: http.StatusNotFound,
|
||||
Body: io.NopCloser(strings.NewReader("these aren't the droids you're looking for")),
|
||||
}}
|
||||
httpGet = client.Get
|
||||
|
||||
reader = NewReaderFromURL(url)
|
||||
if reader.err == nil {
|
||||
t.Error("NewReaderFromURL() expected error for non-200 status, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRead tests the Read method for comment handling.
|
||||
func TestRead(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
preserveComments bool
|
||||
expected [][]string
|
||||
}{
|
||||
{
|
||||
name: "Skip comments",
|
||||
expected: [][]string{
|
||||
{"NAME", "ID", "SIZE", "MODIFIED"},
|
||||
{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
|
||||
{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
|
||||
{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Don't skip comments",
|
||||
preserveComments: true,
|
||||
expected: [][]string{
|
||||
{"# Generated by ollama list"},
|
||||
{"# Sample Quoted Comment, with \"quotes\" itself"},
|
||||
{"NAME", "ID", "SIZE", "MODIFIED"},
|
||||
{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
|
||||
{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
|
||||
{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
reader := NewReader(strings.NewReader(sampleCSV))
|
||||
if tt.preserveComments {
|
||||
reader.Comment = 0
|
||||
}
|
||||
|
||||
for i, want := range tt.expected {
|
||||
got, err := reader.Read()
|
||||
if err != nil {
|
||||
t.Errorf("Read() error at record %d: %v", i, err)
|
||||
}
|
||||
if !slices.Equal(got, want) {
|
||||
t.Errorf("Read() record %d = %v, want %v", i, got, want)
|
||||
}
|
||||
}
|
||||
|
||||
// Verify EOF
|
||||
_, err := reader.Read()
|
||||
if !errors.Is(err, io.EOF) {
|
||||
t.Errorf("Read() expected EOF, got %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestReadAll tests the ReadAll method for different configurations.
|
||||
func TestReadAll(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
expected [][]string
|
||||
}{
|
||||
{
|
||||
name: "Skip comments",
|
||||
expected: [][]string{
|
||||
{"NAME", "ID", "SIZE", "MODIFIED"},
|
||||
{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
|
||||
{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
|
||||
{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
reader := NewReader(strings.NewReader(sampleCSV))
|
||||
|
||||
got, err := reader.ReadAll()
|
||||
if err != nil {
|
||||
t.Errorf("ReadAll() error: %v", err)
|
||||
}
|
||||
if len(got) != len(tt.expected) {
|
||||
t.Errorf("ReadAll() returned %d records, want %d", len(got), len(tt.expected))
|
||||
}
|
||||
for i, want := range tt.expected {
|
||||
if !slices.Equal(got[i], want) {
|
||||
t.Errorf("ReadAll() record %d = %v, want %v", i, got[i], want)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestNewReaderFromURLWithMalformedCSV tests NewReaderFromURL with malformed CSV.
|
||||
func TestNewReaderFromURLWithMalformedCSV(t *testing.T) {
|
||||
mockResp := &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Body: io.NopCloser(strings.NewReader(malformedCSV)),
|
||||
}
|
||||
client := &mockHTTPClient{resp: mockResp}
|
||||
originalGet := httpGet
|
||||
httpGet = client.Get
|
||||
defer func() { httpGet = originalGet }()
|
||||
|
||||
url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238"
|
||||
reader := NewReaderFromURL(url)
|
||||
if reader.err != nil {
|
||||
t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err)
|
||||
}
|
||||
|
||||
// Reading should fail due to malformed CSV
|
||||
_, err := reader.Read()
|
||||
if err == nil {
|
||||
t.Error("Read() expected error for malformed CSV, got nil")
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user