mirror of
https://github.com/therootcompany/golib.git
synced 2025-10-12 20:18:16 +00:00
feat(gsheet2csv): parse URLs and CSVs with comments
This commit is contained in:
parent
dc951ce388
commit
cd00d85968
7
io/transforms/gsheet2csv/LICENSE
Normal file
7
io/transforms/gsheet2csv/LICENSE
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
Authored in 2025 by AJ ONeal <aj@therootcompany.com>
|
||||||
|
To the extent possible under law, the author(s) have dedicated all copyright
|
||||||
|
and related and neighboring rights to this software to the public domain
|
||||||
|
worldwide. This software is distributed without any warranty.
|
||||||
|
|
||||||
|
You should have received a copy of the CC0 Public Domain Dedication along with
|
||||||
|
this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>.
|
101
io/transforms/gsheet2csv/README.md
Normal file
101
io/transforms/gsheet2csv/README.md
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
# gsheet2csv
|
||||||
|
|
||||||
|
[](https://pkg.go.dev/github.com/therootcompany/golib/io/transform/gsheet2csv)
|
||||||
|
|
||||||
|
A simple wrapper around `encoding/csv` to read Google Sheet CSVs from URL, or a given Reader.
|
||||||
|
|
||||||
|
This does surprisingly little - you should probably just handle the boilerplate yourself. However, these are the problems it solves for us:
|
||||||
|
|
||||||
|
- works with Google Sheet URLs, regardless of URL format
|
||||||
|
- Edit URL: <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/edit?gid=0000000000#gid=0000000000>
|
||||||
|
- Share URL (Sheet 1): <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/edit?usp=sharing>
|
||||||
|
- CSV Export URL: <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/export?format=csv&usp=sharing&gid=0000000000>
|
||||||
|
- anything with a path like `/spreadsheets/d/{docid}/` and (optionally) a hash or query param like `gid={gid}`
|
||||||
|
- can ignore quoted comments (if all other fields in the row are empty)
|
||||||
|
- can preserve comments
|
||||||
|
- swaps `\r` (Windows) for `\n` (Unix) and ensures trailing newline (a la `encoding/csv`)
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Same as `encoding/csv` (embedded), but with two extra options:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/therootcompany/golib/io/transform/gsheet2csv"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
switch len(os.Args) {
|
||||||
|
case 2:
|
||||||
|
break
|
||||||
|
case 1:
|
||||||
|
fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0])
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
url := os.Args[1]
|
||||||
|
|
||||||
|
gsr := gsheet2csv.NewReaderFromURL(url)
|
||||||
|
records, err := gsr.ReadAll()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error reading from %s\n", gsr.URL)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// distinguishes between comments and quoted fields
|
||||||
|
csvw := gsheet2csv.NewWriter(os.Stdout)
|
||||||
|
csvw.Comment = gsr.Comment
|
||||||
|
if err := csvw.WriteAll(records); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## CLI
|
||||||
|
|
||||||
|
There are two convenience utilities:
|
||||||
|
|
||||||
|
- `gsheet2csv`
|
||||||
|
- `gsheet2tsv`
|
||||||
|
|
||||||
|
They're only slightly different from a direct export of a Google CSV in that they reformat comments and newlines.
|
||||||
|
|
||||||
|
### Flags & Options
|
||||||
|
|
||||||
|
```text
|
||||||
|
--raw download without processing
|
||||||
|
--print-ids print ids to stdout without download
|
||||||
|
--print-url print url to stdout without downloading
|
||||||
|
-o <filepath> write records to file
|
||||||
|
-d field delimiter
|
||||||
|
--comment '#' treat lines starting with # as comments
|
||||||
|
--crlf use CRLF (\r\n) as record separator
|
||||||
|
```
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
|
||||||
|
```sh
|
||||||
|
go get github.com/therootcompany/golib/io/transform/gsheet2csv
|
||||||
|
```
|
||||||
|
|
||||||
|
### ASCII Delimiters
|
||||||
|
|
||||||
|
```
|
||||||
|
, comma
|
||||||
|
\t tab (or a normal tab)
|
||||||
|
space (just a normal space)
|
||||||
|
: colon
|
||||||
|
; semicolon
|
||||||
|
| pipe
|
||||||
|
^_ unit separator
|
||||||
|
^^ record separator
|
||||||
|
^] group separator
|
||||||
|
^\ file separator
|
||||||
|
\f form feed (also ^L)
|
||||||
|
\v vertical tab (also ^K)
|
||||||
|
```
|
176
io/transforms/gsheet2csv/cmd/gsheet2csv/main.go
Normal file
176
io/transforms/gsheet2csv/cmd/gsheet2csv/main.go
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"github.com/therootcompany/golib/io/transform/gsheet2csv"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
fileSeparator = "\x1c"
|
||||||
|
groupSeparator = "\x1d"
|
||||||
|
recordSeparator = "\x1e"
|
||||||
|
unitSeparator = "\x1f"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
var commentArg string
|
||||||
|
format := "CSV"
|
||||||
|
delim := ','
|
||||||
|
if strings.Contains(os.Args[0], "tsv") {
|
||||||
|
delim = '\t'
|
||||||
|
format = "TSV"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse command-line flags
|
||||||
|
flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments")
|
||||||
|
outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)")
|
||||||
|
delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)")
|
||||||
|
useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator")
|
||||||
|
urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL")
|
||||||
|
parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)")
|
||||||
|
rawOnly := flag.Bool("raw", false, "don't parse, just download")
|
||||||
|
flag.Usage = func() {
|
||||||
|
fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format)
|
||||||
|
fmt.Fprintf(os.Stderr, "Flags:\n")
|
||||||
|
flag.PrintDefaults()
|
||||||
|
fmt.Fprintf(os.Stderr, "\nExample:\n")
|
||||||
|
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, " %s -o output.tsv './gsheet.csv'\n", os.Args[0])
|
||||||
|
}
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
// Check for URL argument
|
||||||
|
if len(flag.Args()) != 1 {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n")
|
||||||
|
flag.Usage()
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
url := flag.Args()[0]
|
||||||
|
|
||||||
|
// Prepare output writer
|
||||||
|
var out *os.File
|
||||||
|
if *outputFile != "" {
|
||||||
|
var err error
|
||||||
|
out, err = os.Create(*outputFile)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer func() { _ = out.Close() }()
|
||||||
|
} else {
|
||||||
|
out = os.Stdout
|
||||||
|
}
|
||||||
|
|
||||||
|
switch *delimString {
|
||||||
|
case "^_", "\\x1f":
|
||||||
|
*delimString = unitSeparator
|
||||||
|
case "^^", "\\x1e":
|
||||||
|
*delimString = recordSeparator
|
||||||
|
case "^]", "\\x1d":
|
||||||
|
*delimString = groupSeparator
|
||||||
|
case "^\\", "\\x1c":
|
||||||
|
*delimString = fileSeparator
|
||||||
|
case "^L", "\\f":
|
||||||
|
*delimString = "\f"
|
||||||
|
case "^K", "\\v":
|
||||||
|
*delimString = "\v"
|
||||||
|
case "^I", "\\t":
|
||||||
|
*delimString = "\t"
|
||||||
|
}
|
||||||
|
delim, _ = utf8.DecodeRuneInString(*delimString)
|
||||||
|
|
||||||
|
var rc io.ReadCloser
|
||||||
|
if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") {
|
||||||
|
docid, gid := gsheet2csv.ParseIDs(url)
|
||||||
|
if *parseOnly {
|
||||||
|
fmt.Printf("docid=%s\ngid=%s\n", docid, gid)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid)
|
||||||
|
}
|
||||||
|
|
||||||
|
sheetURL := gsheet2csv.ToCSVURL(docid, gid)
|
||||||
|
if *urlOnly {
|
||||||
|
fmt.Printf("%s\n", sheetURL)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !*urlOnly {
|
||||||
|
resp, err := gsheet2csv.GetSheet(docid, gid)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
rc = resp.Body
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
url = strings.TrimPrefix(url, "file://")
|
||||||
|
fmt.Fprintf(os.Stderr, "opening %s\n", url)
|
||||||
|
f, err := os.Open(url)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
rc = f
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stderr, "\n")
|
||||||
|
|
||||||
|
if *urlOnly || *parseOnly {
|
||||||
|
os.Exit(0)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if *rawOnly {
|
||||||
|
if _, err := io.Copy(out, rc); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
comment, _ := utf8.DecodeRuneInString(commentArg)
|
||||||
|
|
||||||
|
// Create a reader for the Google Sheet
|
||||||
|
gsr := gsheet2csv.NewReader(rc)
|
||||||
|
gsr.QuotedComments = false
|
||||||
|
gsr.Comment = 0
|
||||||
|
gsr.ReuseRecord = true
|
||||||
|
|
||||||
|
// Create CSV writer
|
||||||
|
csvw := gsheet2csv.NewWriter(out)
|
||||||
|
csvw.Comma = delim // Set delimiter to tab for TSV
|
||||||
|
csvw.Comment = comment
|
||||||
|
csvw.UseCRLF = *useCRLF
|
||||||
|
for {
|
||||||
|
// Convert each record
|
||||||
|
record, err := gsr.Read()
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := csvw.Write(record); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Flush the writer to ensure all data is written
|
||||||
|
csvw.Flush()
|
||||||
|
if err := csvw.Error(); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
176
io/transforms/gsheet2csv/cmd/gsheet2tsv/main.go
Normal file
176
io/transforms/gsheet2csv/cmd/gsheet2tsv/main.go
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"github.com/therootcompany/golib/io/transform/gsheet2csv"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
fileSeparator = "\x1c"
|
||||||
|
groupSeparator = "\x1d"
|
||||||
|
recordSeparator = "\x1e"
|
||||||
|
unitSeparator = "\x1f"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
var commentArg string
|
||||||
|
format := "CSV"
|
||||||
|
delim := ','
|
||||||
|
if strings.Contains(os.Args[0], "tsv") {
|
||||||
|
delim = '\t'
|
||||||
|
format = "TSV"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse command-line flags
|
||||||
|
flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments")
|
||||||
|
outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)")
|
||||||
|
delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)")
|
||||||
|
useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator")
|
||||||
|
urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL")
|
||||||
|
parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)")
|
||||||
|
rawOnly := flag.Bool("raw", false, "don't parse, just download")
|
||||||
|
flag.Usage = func() {
|
||||||
|
fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format)
|
||||||
|
fmt.Fprintf(os.Stderr, "Flags:\n")
|
||||||
|
flag.PrintDefaults()
|
||||||
|
fmt.Fprintf(os.Stderr, "\nExample:\n")
|
||||||
|
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, " %s -o output.tsv './gsheet.csv'\n", os.Args[0])
|
||||||
|
}
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
// Check for URL argument
|
||||||
|
if len(flag.Args()) != 1 {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n")
|
||||||
|
flag.Usage()
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
url := flag.Args()[0]
|
||||||
|
|
||||||
|
// Prepare output writer
|
||||||
|
var out *os.File
|
||||||
|
if *outputFile != "" {
|
||||||
|
var err error
|
||||||
|
out, err = os.Create(*outputFile)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer func() { _ = out.Close() }()
|
||||||
|
} else {
|
||||||
|
out = os.Stdout
|
||||||
|
}
|
||||||
|
|
||||||
|
switch *delimString {
|
||||||
|
case "^_", "\\x1f":
|
||||||
|
*delimString = unitSeparator
|
||||||
|
case "^^", "\\x1e":
|
||||||
|
*delimString = recordSeparator
|
||||||
|
case "^]", "\\x1d":
|
||||||
|
*delimString = groupSeparator
|
||||||
|
case "^\\", "\\x1c":
|
||||||
|
*delimString = fileSeparator
|
||||||
|
case "^L", "\\f":
|
||||||
|
*delimString = "\f"
|
||||||
|
case "^K", "\\v":
|
||||||
|
*delimString = "\v"
|
||||||
|
case "^I", "\\t":
|
||||||
|
*delimString = "\t"
|
||||||
|
}
|
||||||
|
delim, _ = utf8.DecodeRuneInString(*delimString)
|
||||||
|
|
||||||
|
var rc io.ReadCloser
|
||||||
|
if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") {
|
||||||
|
docid, gid := gsheet2csv.ParseIDs(url)
|
||||||
|
if *parseOnly {
|
||||||
|
fmt.Printf("docid=%s\ngid=%s\n", docid, gid)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid)
|
||||||
|
}
|
||||||
|
|
||||||
|
sheetURL := gsheet2csv.ToCSVURL(docid, gid)
|
||||||
|
if *urlOnly {
|
||||||
|
fmt.Printf("%s\n", sheetURL)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !*urlOnly {
|
||||||
|
resp, err := gsheet2csv.GetSheet(docid, gid)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
rc = resp.Body
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
url = strings.TrimPrefix(url, "file://")
|
||||||
|
fmt.Fprintf(os.Stderr, "opening %s\n", url)
|
||||||
|
f, err := os.Open(url)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
rc = f
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stderr, "\n")
|
||||||
|
|
||||||
|
if *urlOnly || *parseOnly {
|
||||||
|
os.Exit(0)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if *rawOnly {
|
||||||
|
if _, err := io.Copy(out, rc); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
comment, _ := utf8.DecodeRuneInString(commentArg)
|
||||||
|
|
||||||
|
// Create a reader for the Google Sheet
|
||||||
|
gsr := gsheet2csv.NewReader(rc)
|
||||||
|
gsr.QuotedComments = false
|
||||||
|
gsr.Comment = 0
|
||||||
|
gsr.ReuseRecord = true
|
||||||
|
|
||||||
|
// Create CSV writer
|
||||||
|
csvw := gsheet2csv.NewWriter(out)
|
||||||
|
csvw.Comma = delim // Set delimiter to tab for TSV
|
||||||
|
csvw.Comment = comment
|
||||||
|
csvw.UseCRLF = *useCRLF
|
||||||
|
for {
|
||||||
|
// Convert each record
|
||||||
|
record, err := gsr.Read()
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := csvw.Write(record); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Flush the writer to ensure all data is written
|
||||||
|
csvw.Flush()
|
||||||
|
if err := csvw.Error(); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
33
io/transforms/gsheet2csv/fixtures/example.go
Normal file
33
io/transforms/gsheet2csv/fixtures/example.go
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/therootcompany/golib/io/transform/gsheet2csv"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
switch len(os.Args) {
|
||||||
|
case 2:
|
||||||
|
break
|
||||||
|
case 1:
|
||||||
|
fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0])
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
url := os.Args[1]
|
||||||
|
|
||||||
|
gsr := gsheet2csv.NewReaderFromURL(url)
|
||||||
|
records, err := gsr.ReadAll()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error reading from %s\n", gsr.URL)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
csvw := gsheet2csv.NewWriter(os.Stdout)
|
||||||
|
csvw.Comment = gsr.Comment
|
||||||
|
if err := csvw.WriteAll(records); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
18
io/transforms/gsheet2csv/fixtures/gsheet.csv
Normal file
18
io/transforms/gsheet2csv/fixtures/gsheet.csv
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# this is a comment,,
|
||||||
|
"# this is, well, a quoted comment",,
|
||||||
|
"# this is a ""super""-quoted comment",,
|
||||||
|
Key,Value,
|
||||||
|
Name,55,
|
||||||
|
Girlfriend's Age,55,
|
||||||
|
,,
|
||||||
|
My IQ,55,
|
||||||
|
,55,
|
||||||
|
"Key,with,Comma",,
|
||||||
|
,"Value,with,Comma",
|
||||||
|
"Quoted ""Key""",Normal Value,
|
||||||
|
Normal Key,"Quoted ""Value""",
|
||||||
|
"Quoted ""Key""",,
|
||||||
|
,"Quoted ""Value""",
|
||||||
|
x,y,z
|
||||||
|
"# comment with trailing comma,",,
|
||||||
|
#1,2,#3
|
|
3
io/transforms/gsheet2csv/go.mod
Normal file
3
io/transforms/gsheet2csv/go.mod
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
module github.com/therootcompany/golib/io/transform/gsheet2csv
|
||||||
|
|
||||||
|
go 1.24.6
|
241
io/transforms/gsheet2csv/gsheet2csv.go
Normal file
241
io/transforms/gsheet2csv/gsheet2csv.go
Normal file
@ -0,0 +1,241 @@
|
|||||||
|
// Authored in 2025 by AJ ONeal <aj@therootcompany.com> (https://therootcompany.com)
|
||||||
|
//
|
||||||
|
// To the extent possible under law, the author(s) have dedicated all copyright
|
||||||
|
// and related and neighboring rights to this software to the public domain
|
||||||
|
// worldwide. This software is distributed without any warranty.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the CC0 Public Domain Dedication along with
|
||||||
|
// this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: CC0-1.0
|
||||||
|
|
||||||
|
package gsheet2csv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/csv"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
|
var ErrHTTPGet = errors.New("did not get 200 OK when downloading from URL")
|
||||||
|
|
||||||
|
// For mocking for tests
|
||||||
|
var httpGet = http.Get
|
||||||
|
|
||||||
|
type Reader struct {
|
||||||
|
*csv.Reader
|
||||||
|
DocID string
|
||||||
|
GID string
|
||||||
|
URL string
|
||||||
|
QuotedComments bool
|
||||||
|
Comment rune
|
||||||
|
r io.Reader
|
||||||
|
resp *http.Response
|
||||||
|
close bool
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewReaderFromURL(url string) *Reader {
|
||||||
|
docid, gid := ParseIDs(url)
|
||||||
|
|
||||||
|
return NewReaderFromIDs(docid, gid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewReaderFromIDs(docid, gid string) *Reader {
|
||||||
|
resp, err := GetSheet(docid, gid)
|
||||||
|
if err != nil {
|
||||||
|
r := NewReader(nil)
|
||||||
|
r.err = err
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
r := NewReader(resp.Body)
|
||||||
|
r.URL = ToCSVURL(docid, gid)
|
||||||
|
r.DocID = docid
|
||||||
|
r.GID = gid
|
||||||
|
r.resp = resp
|
||||||
|
r.close = true
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func ToCSVURL(docid, gid string) string {
|
||||||
|
return fmt.Sprintf("https://docs.google.com/spreadsheets/d/%s/export?format=csv&usp=sharing&gid=%s", docid, gid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetSheet(docid, gid string) (*http.Response, error) {
|
||||||
|
downloadURL := ToCSVURL(docid, gid)
|
||||||
|
|
||||||
|
resp, err := httpGet(downloadURL)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
_ = resp.Body.Close()
|
||||||
|
return nil, ErrHTTPGet
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewReader(r io.Reader) *Reader {
|
||||||
|
csvr := csv.NewReader(r)
|
||||||
|
csvr.Comma = ','
|
||||||
|
csvr.Comment = 0 // to allow distinguishing between quoted comments and fields
|
||||||
|
csvr.FieldsPerRecord = 0 // Google Sheets is consistent
|
||||||
|
csvr.LazyQuotes = false // fields that need quotes use them correctly
|
||||||
|
csvr.TrimLeadingSpace = false
|
||||||
|
csvr.ReuseRecord = false
|
||||||
|
return &Reader{
|
||||||
|
Reader: csvr,
|
||||||
|
QuotedComments: true,
|
||||||
|
Comment: '#',
|
||||||
|
r: r,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *Reader) Read() ([]string, error) {
|
||||||
|
if r.err != nil {
|
||||||
|
return nil, r.err
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
record, err := r.Reader.Read()
|
||||||
|
if err != nil {
|
||||||
|
if r.close {
|
||||||
|
_ = r.resp.Body.Close()
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.QuotedComments && len(record[0]) > 0 {
|
||||||
|
runeValue, _ := utf8.DecodeRuneInString(record[0])
|
||||||
|
if runeValue == r.Comment {
|
||||||
|
last := len(record) - 1
|
||||||
|
for len(record[last]) == 0 {
|
||||||
|
last -= 1
|
||||||
|
}
|
||||||
|
if last == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return record, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *Reader) ReadAll() ([][]string, error) {
|
||||||
|
var records [][]string
|
||||||
|
|
||||||
|
for {
|
||||||
|
record, err := r.Read()
|
||||||
|
if nil != err {
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
return records, nil
|
||||||
|
}
|
||||||
|
return records, err
|
||||||
|
}
|
||||||
|
records = append(records, record)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func ParseIDs(urlStr string) (docid string, gid string) {
|
||||||
|
// Find key: look for /spreadsheets/d/{key}
|
||||||
|
const prefix = "/spreadsheets/d/"
|
||||||
|
startIdx := strings.Index(urlStr, prefix)
|
||||||
|
if startIdx == -1 {
|
||||||
|
return "", gid
|
||||||
|
}
|
||||||
|
startIdx += len(prefix)
|
||||||
|
|
||||||
|
// Find end of key (next / or end of string)
|
||||||
|
endIdx := strings.Index(urlStr[startIdx:], "/")
|
||||||
|
if endIdx == -1 {
|
||||||
|
endIdx = len(urlStr)
|
||||||
|
} else {
|
||||||
|
endIdx += startIdx
|
||||||
|
}
|
||||||
|
|
||||||
|
docid = urlStr[startIdx:endIdx]
|
||||||
|
if docid == "" {
|
||||||
|
return "", ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find gid: look for gid= and take until #, &, ?, /, or end
|
||||||
|
gidIdx := strings.Index(urlStr, "gid=")
|
||||||
|
if gidIdx != -1 {
|
||||||
|
gidStart := gidIdx + len("gid=")
|
||||||
|
endChars := "#&?/"
|
||||||
|
gidEnd := strings.IndexAny(urlStr[gidStart:], endChars)
|
||||||
|
if gidEnd == -1 {
|
||||||
|
gid = urlStr[gidStart:]
|
||||||
|
} else {
|
||||||
|
gid = urlStr[gidStart : gidStart+gidEnd]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(gid) == 0 {
|
||||||
|
gid = "0"
|
||||||
|
}
|
||||||
|
return docid, gid
|
||||||
|
}
|
||||||
|
|
||||||
|
type Writer struct {
|
||||||
|
*csv.Writer
|
||||||
|
Comment rune
|
||||||
|
w io.Writer
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewWriter(w io.Writer) *Writer {
|
||||||
|
return &Writer{
|
||||||
|
Writer: csv.NewWriter(w),
|
||||||
|
Comment: '#',
|
||||||
|
w: w,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *Writer) Write(record []string) error {
|
||||||
|
if len(record) > 1 {
|
||||||
|
if rv1, _ := utf8.DecodeRuneInString(record[0]); rv1 == w.Comment {
|
||||||
|
w.Flush()
|
||||||
|
|
||||||
|
lastNonEmpty := len(record) - 1
|
||||||
|
for len(record[lastNonEmpty]) == 0 {
|
||||||
|
lastNonEmpty -= 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if lastNonEmpty == 0 {
|
||||||
|
record = record[:1]
|
||||||
|
} else {
|
||||||
|
for i, f := range record {
|
||||||
|
if i == 0 || strings.Contains(f, `"`) {
|
||||||
|
f = strings.ReplaceAll(f, `"`, `""`)
|
||||||
|
record[i] = `"` + f + `"`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
line := strings.Join(record, string(w.Comma))
|
||||||
|
if _, err := w.w.Write([]byte(line + "\n")); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return w.Writer.Write(record)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *Writer) WriteAll(records [][]string) error {
|
||||||
|
for _, r := range records {
|
||||||
|
if err := w.Write(r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w.Flush()
|
||||||
|
return w.Error()
|
||||||
|
}
|
250
io/transforms/gsheet2csv/gsheet2csv_test.go
Normal file
250
io/transforms/gsheet2csv/gsheet2csv_test.go
Normal file
@ -0,0 +1,250 @@
|
|||||||
|
package gsheet2csv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"slices"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// mockHTTPClient allows controlling HTTP responses for testing.
|
||||||
|
type mockHTTPClient struct {
|
||||||
|
resp *http.Response
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mockHTTPClient) Get(url string) (*http.Response, error) {
|
||||||
|
return m.resp, m.err
|
||||||
|
}
|
||||||
|
|
||||||
|
// sampleCSV mimics the structure of ai-models.csv from the project README.
|
||||||
|
const sampleCSV = `# Generated by ollama list
|
||||||
|
"# Sample Quoted Comment, with ""quotes"" itself"
|
||||||
|
"NAME","ID","SIZE","MODIFIED"
|
||||||
|
"qwen3-coder:30b","06c1097efce0","18 GB","8 days ago"
|
||||||
|
"gpt-oss:20b","aa4295ac10c3","13 GB","8 days ago"
|
||||||
|
|
||||||
|
"gpt-oss:latest","aa4295ac10c3","13 GB","7 weeks ago"
|
||||||
|
`
|
||||||
|
|
||||||
|
// malformedCSV for testing error handling.
|
||||||
|
const malformedCSV = `# Comment
|
||||||
|
"NAME","ID","SIZE","MODIFIED
|
||||||
|
"qwen3-coder:30b","06c1097efce0","18 GB","8 days ago"
|
||||||
|
`
|
||||||
|
|
||||||
|
// TestParseIDs verifies the ParseIDs function for various URL formats.
|
||||||
|
func TestParseIDs(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
url string
|
||||||
|
wantDoc string
|
||||||
|
wantGid string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Google Sheets Edit / Share URL with gid",
|
||||||
|
url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238",
|
||||||
|
wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
|
||||||
|
wantGid: "559037238",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Google Sheets CSV URL with gid",
|
||||||
|
url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/export?format=csv&usp=sharing&gid=559037238",
|
||||||
|
wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
|
||||||
|
wantGid: "559037238",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "URL without gid",
|
||||||
|
url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit",
|
||||||
|
wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
|
||||||
|
wantGid: "0",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Invalid URL",
|
||||||
|
url: "https://example.com/invalid",
|
||||||
|
wantDoc: "",
|
||||||
|
wantGid: "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
gotDoc, gotGid := ParseIDs(tt.url)
|
||||||
|
if gotDoc != tt.wantDoc {
|
||||||
|
t.Errorf("ParseIDs() docid = %q, want %q", gotDoc, tt.wantDoc)
|
||||||
|
}
|
||||||
|
if gotGid != tt.wantGid {
|
||||||
|
t.Errorf("ParseIDs() gid = %q, want %q", gotGid, tt.wantGid)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNewReaderFromURL tests initializing a Reader from a Google Sheets URL.
|
||||||
|
func TestNewReaderFromURL(t *testing.T) {
|
||||||
|
originalGet := httpGet
|
||||||
|
defer func() { httpGet = originalGet }()
|
||||||
|
|
||||||
|
url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238"
|
||||||
|
|
||||||
|
// Test successful HTTP response
|
||||||
|
mockResp := &http.Response{
|
||||||
|
StatusCode: http.StatusOK,
|
||||||
|
Body: io.NopCloser(strings.NewReader(sampleCSV)),
|
||||||
|
}
|
||||||
|
client := &mockHTTPClient{resp: mockResp}
|
||||||
|
httpGet = client.Get
|
||||||
|
|
||||||
|
reader := NewReaderFromURL(url)
|
||||||
|
if reader.err != nil {
|
||||||
|
t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err)
|
||||||
|
}
|
||||||
|
if reader.resp != mockResp {
|
||||||
|
t.Error("NewReaderFromURL() did not set response correctly")
|
||||||
|
}
|
||||||
|
if !reader.close {
|
||||||
|
t.Error("NewReaderFromURL() did not set close flag")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test HTTP failure
|
||||||
|
client = &mockHTTPClient{resp: mockResp}
|
||||||
|
client.err = errors.New("network error")
|
||||||
|
httpGet = client.Get
|
||||||
|
|
||||||
|
reader = NewReaderFromURL(url)
|
||||||
|
if reader.err == nil {
|
||||||
|
t.Error("NewReaderFromURL() expected error, got nil")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test non-200 status
|
||||||
|
client = &mockHTTPClient{resp: &http.Response{
|
||||||
|
StatusCode: http.StatusNotFound,
|
||||||
|
Body: io.NopCloser(strings.NewReader("these aren't the droids you're looking for")),
|
||||||
|
}}
|
||||||
|
httpGet = client.Get
|
||||||
|
|
||||||
|
reader = NewReaderFromURL(url)
|
||||||
|
if reader.err == nil {
|
||||||
|
t.Error("NewReaderFromURL() expected error for non-200 status, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRead tests the Read method for comment handling.
|
||||||
|
func TestRead(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
quotedComments bool
|
||||||
|
expected [][]string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Skip comments",
|
||||||
|
quotedComments: true,
|
||||||
|
expected: [][]string{
|
||||||
|
{"NAME", "ID", "SIZE", "MODIFIED"},
|
||||||
|
{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
|
||||||
|
{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
|
||||||
|
{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Don't skip quoted comments",
|
||||||
|
quotedComments: false,
|
||||||
|
expected: [][]string{
|
||||||
|
{"# Sample Quoted Comment, with \"quotes\" itself"},
|
||||||
|
{"NAME", "ID", "SIZE", "MODIFIED"},
|
||||||
|
{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
|
||||||
|
{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
|
||||||
|
{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
reader := NewReader(strings.NewReader(sampleCSV))
|
||||||
|
reader.QuotedComments = tt.quotedComments
|
||||||
|
|
||||||
|
for i, want := range tt.expected {
|
||||||
|
got, err := reader.Read()
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Read() error at record %d: %v", i, err)
|
||||||
|
}
|
||||||
|
if !slices.Equal(got, want) {
|
||||||
|
t.Errorf("Read() record %d = %v, want %v", i, got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify EOF
|
||||||
|
_, err := reader.Read()
|
||||||
|
if !errors.Is(err, io.EOF) {
|
||||||
|
t.Errorf("Read() expected EOF, got %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestReadAll tests the ReadAll method for different configurations.
|
||||||
|
func TestReadAll(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
quotedComments bool
|
||||||
|
expected [][]string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Skip comments",
|
||||||
|
quotedComments: true,
|
||||||
|
expected: [][]string{
|
||||||
|
{"NAME", "ID", "SIZE", "MODIFIED"},
|
||||||
|
{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
|
||||||
|
{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
|
||||||
|
{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
reader := NewReader(strings.NewReader(sampleCSV))
|
||||||
|
reader.QuotedComments = tt.quotedComments
|
||||||
|
|
||||||
|
got, err := reader.ReadAll()
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("ReadAll() error: %v", err)
|
||||||
|
}
|
||||||
|
if len(got) != len(tt.expected) {
|
||||||
|
t.Errorf("ReadAll() returned %d records, want %d", len(got), len(tt.expected))
|
||||||
|
}
|
||||||
|
for i, want := range tt.expected {
|
||||||
|
if !slices.Equal(got[i], want) {
|
||||||
|
t.Errorf("ReadAll() record %d = %v, want %v", i, got[i], want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNewReaderFromURLWithMalformedCSV tests NewReaderFromURL with malformed CSV.
|
||||||
|
func TestNewReaderFromURLWithMalformedCSV(t *testing.T) {
|
||||||
|
mockResp := &http.Response{
|
||||||
|
StatusCode: http.StatusOK,
|
||||||
|
Body: io.NopCloser(strings.NewReader(malformedCSV)),
|
||||||
|
}
|
||||||
|
client := &mockHTTPClient{resp: mockResp}
|
||||||
|
originalGet := httpGet
|
||||||
|
httpGet = client.Get
|
||||||
|
defer func() { httpGet = originalGet }()
|
||||||
|
|
||||||
|
url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238"
|
||||||
|
reader := NewReaderFromURL(url)
|
||||||
|
if reader.err != nil {
|
||||||
|
t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reading should fail due to malformed CSV
|
||||||
|
_, err := reader.Read()
|
||||||
|
if err == nil {
|
||||||
|
t.Error("Read() expected error for malformed CSV, got nil")
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user