feat(gsheet2csv): parse URLs and CSVs with comments

2026-01-01 18:28:48 +00:00 · 2025-10-10 18:13:12 -06:00 · 2025-10-10 18:13:12 -06:00 · 24ec3f021d
commit 24ec3f021d
parent dc951ce388
13 changed files with 1252 additions and 0 deletions
--- a/io/transform/gsheet2csv/LICENSE
+++ b/io/transform/gsheet2csv/LICENSE
@ -0,0 +1,7 @@
+Authored in 2025 by AJ ONeal <aj@therootcompany.com>
+To the extent possible under law, the author(s) have dedicated all copyright
+and related and neighboring rights to this software to the public domain
+worldwide. This software is distributed without any warranty.
+
+You should have received a copy of the CC0 Public Domain Dedication along with
+this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>.
--- a/io/transform/gsheet2csv/README.md
+++ b/io/transform/gsheet2csv/README.md
@ -0,0 +1,125 @@
+# gsheet2csv
+
+[![Go Reference](https://pkg.go.dev/badge/github.com/therootcompany/golib/io/transform/gsheet2csv.svg)](https://pkg.go.dev/github.com/therootcompany/golib/io/transform/gsheet2csv)
+
+A simple wrapper around `encoding/csv` to read Google Sheet CSVs from URL, or a given Reader.
+
+This does surprisingly little - you should probably just handle the boilerplate yourself. However, these are the problems it solves for us:
+
+- works with Google Sheet URLs, regardless of URL format
+   - Edit URL: <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/edit?gid=0000000000#gid=0000000000>
+   - Share URL (Sheet 1): <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/edit?usp=sharing>
+   - CSV Export URL: <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/export?format=csv&usp=sharing&gid=0000000000>
+   - anything with a path like `/spreadsheets/d/{docid}/` and (optionally) a hash or query param like `gid={gid}`
+- can write out for import to gsheet (comments containing quotes or commas are quoted), \
+  or in RFC form (comments are never quoted, but values beginning with a comment character are)
+- swaps `\r` (Windows) for `\n` (Unix) and ensures trailing newline (a la `encoding/csv`)
+
+Note:
+
+- The Google Sheet must be shared to **Anyone with the link**.
+- Read and write in 'gsheet' style for reciprocity of comment handling
+- Be careful about single-column CSVs \
+  (all comment-like lines are comments, same as with `encoding/csv` and empty lines)
+
+# Usage
+
+Same as `encoding/csv` (embedded), but with two extra options:
+
+```go
+package main
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/therootcompany/golib/io/transform/gsheet2csv"
+)
+
+func main() {
+	switch len(os.Args) {
+	case 2:
+		break
+	case 1:
+		fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0])
+		os.Exit(1)
+	}
+	urlOrPath := os.Args[1]
+
+	gsr := gsheet2csv.NewReaderFrom(urlOrPath)
+	records, err := gsr.ReadAll()
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error reading from %s: %v\n", gsr.URL, err)
+		os.Exit(1)
+	}
+
+	csvw := gsheet2csv.NewWriter(os.Stdout)
+	csvw.Comment = gsr.Comment
+	if err := csvw.WriteAll(records); err != nil {
+		fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err)
+		os.Exit(1)
+	}
+}
+```
+
+# CLI
+
+There are a few convenience utilities:
+
+- `gsheet2csv` (also `gsheet2tsv`)
+- `gsheet2env`
+
+## gsheet2csv
+
+They're only slightly different from a direct export of a Google CSV in that they reformat comments and newlines.
+
+The alterable behavior is almost exclusively for testing.
+
+### Installation
+
+```sh
+go get github.com/therootcompany/golib/io/transform/gsheet2csv
+```
+
+### Usage
+
+```sh
+gsheet2csv -raw -o ./gsheet.csv 'https://docs.google.com/spreadsheets/...'
+
+gsheet2csv -d '\t' --write-style 'gsheet' ./gsheet.csv > ./gsheet.tsv
+
+gsheet2csv --strip-comments ./gsheet.csv > ./sheet.csv
+```
+
+```text
+--raw               download without processing
+--print-ids         print ids to stdout without download
+--print-url         print url to stdout without downloading
+-o <filepath>       write records to file (default: stdout)
+-d                  field delimiter (for output)
+--read-delimiter    input field delimiter (for testing reciprocity)
+--crlf              write using CRLF (\r\n) as the record separator
+--comment '#'       treat lines starting with # as comments
+--strip-comments    ignore single-field data beginning with a comment character
+--read-style        'gsheet' (preserves comments as single-field records)
+                    or 'rfc' (ignore lines starting with comment character)
+--write-style       'gsheet' (quote single-field comments containing quotes or commas)
+                    or 'rfc' (only quote values starting with a comment character)
+```
+
+### ASCII Delimiters
+
+```
+,   comma
+\t  tab (or a normal tab)
+    space (just a normal space)
+:   colon
+;   semicolon
+|   pipe
+^_  unit separator
+^^  record separator
+^]  group separator
+^\  file separator
+\f  form feed (also ^L)
+\v  vertical tab (also ^K)
+```
--- a/io/transform/gsheet2csv/cmd/gsheet2csv/main.go
+++ b/io/transform/gsheet2csv/cmd/gsheet2csv/main.go
@ -0,0 +1,220 @@
+package main
+
+import (
+	"encoding/csv"
+	"errors"
+	"flag"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+	"unicode/utf8"
+
+	"github.com/therootcompany/golib/io/transform/gsheet2csv"
+)
+
+type CSVReader interface {
+	Read() ([]string, error)
+	ReadAll() ([][]string, error)
+}
+
+type CSVWriter interface {
+	Write([]string) error
+	WriteAll([][]string) error
+	Flush()
+	Error() error
+}
+
+func main() {
+	var commentArg string
+	format := "CSV"
+	delim := ','
+	if strings.Contains(os.Args[0], "tsv") {
+		delim = '\t'
+		format = "TSV"
+	}
+
+	// Parse command-line flags
+	flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments, 0 to disable (which may cause read errors)")
+	outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)")
+	readDelimString := flag.String("read-delimiter", ",", "field delimiter to use for input file ('\\t' for tab, '^_' for Unit Separator, etc)")
+	delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)")
+	useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator")
+	urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL")
+	parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)")
+	rawOnly := flag.Bool("raw", false, "don't parse, just download")
+	noReadComments := flag.Bool("strip-comments", false, "strip comments when reading (gsheet-only, control rfc behavior with --comment)")
+	readStyle := flag.String("read-style", "gsheet", "'gsheet' or 'rfc' to read either as a gsheet or rfc CSV")
+	writeStyle := flag.String("write-style", "rfc", "'gsheet' or 'rfc' to write either for gsheet import or rfc CSV read")
+	flag.Usage = func() {
+		fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0])
+		fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format)
+		fmt.Fprintf(os.Stderr, "Flags:\n")
+		flag.PrintDefaults()
+		fmt.Fprintf(os.Stderr, "\nExample:\n")
+		fmt.Fprintf(os.Stderr, "  %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0])
+		fmt.Fprintf(os.Stderr, "  %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0])
+		fmt.Fprintf(os.Stderr, "  %s -o output.tsv './gsheet.csv'\n", os.Args[0])
+	}
+	flag.Parse()
+
+	// Check for URL argument
+	if len(flag.Args()) != 1 {
+		fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n")
+		flag.Usage()
+		os.Exit(1)
+	}
+	url := flag.Args()[0]
+
+	// Prepare output writer
+	var out *os.File
+	if *outputFile != "" {
+		var err error
+		out, err = os.Create(*outputFile)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err)
+			os.Exit(1)
+		}
+		defer func() { _ = out.Close() }()
+	} else {
+		out = os.Stdout
+	}
+
+	inputDelim, err := gsheet2csv.DecodeDelimiter(*readDelimString)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error decoding input delimiter: %v\n", err)
+		os.Exit(1)
+	}
+
+	delim, err = gsheet2csv.DecodeDelimiter(*delimString)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error decoding output delimiter: %v\n", err)
+		os.Exit(1)
+	}
+
+	var rc io.ReadCloser
+	if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") {
+		docid, gid := gsheet2csv.ParseIDs(url)
+		if *parseOnly {
+			fmt.Printf("docid=%s\ngid=%s\n", docid, gid)
+		} else {
+			fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid)
+		}
+
+		sheetURL := gsheet2csv.ToCSVURL(docid, gid)
+		if *urlOnly {
+			fmt.Printf("%s\n", sheetURL)
+		} else {
+			fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL)
+		}
+
+		if !*urlOnly {
+			resp, err := gsheet2csv.GetSheet(docid, gid)
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err)
+				os.Exit(1)
+			}
+			defer func() { _ = resp.Body.Close() }()
+			rc = resp.Body
+		}
+	} else {
+		url = strings.TrimPrefix(url, "file://")
+		fmt.Fprintf(os.Stderr, "opening %s\n", url)
+		f, err := os.Open(url)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err)
+			os.Exit(1)
+		}
+		rc = f
+	}
+
+	if out == os.Stdout {
+		fmt.Fprintf(os.Stderr, "\n")
+	}
+
+	if *urlOnly || *parseOnly {
+		os.Exit(0)
+		return
+	}
+
+	if *rawOnly {
+		if _, err := io.Copy(out, rc); err != nil {
+			fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err)
+			os.Exit(1)
+		}
+		return
+	}
+
+	var comment rune
+	if commentArg == "0" {
+		comment = 0
+	} else {
+		comment, _ = utf8.DecodeRuneInString(commentArg)
+	}
+
+	// Create a reader for the Google Sheet
+	var csvr CSVReader
+	if *readStyle == "rfc" {
+		rfcr := csv.NewReader(rc)
+		rfcr.Comma = inputDelim
+		rfcr.Comment = comment
+		rfcr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not
+		csvr = rfcr
+	} else {
+		gsr := gsheet2csv.NewReader(rc)
+		gsr.Comma = inputDelim
+		if *noReadComments {
+			gsr.Comment = comment
+		} else {
+			gsr.Comment = 0
+		}
+		gsr.ReuseRecord = true
+		csvr = gsr
+	}
+
+	// Create CSV writer
+	var csvw CSVWriter
+	// if *writeStyle == "gsheet"
+	{
+		gsw := gsheet2csv.NewWriter(out)
+		gsw.QuoteAmbiguousComments = *writeStyle == "gsheet"
+		gsw.Comment = comment
+		gsw.Comma = delim // Set delimiter to tab for TSV
+		gsw.UseCRLF = *useCRLF
+		csvw = gsw
+	}
+	// else {
+	// 	rfcw := csv.NewWriter(out)
+	// 	rfcw.Comma = delim
+	// 	rfcw.UseCRLF = *useCRLF
+	// 	csvw = rfcw
+	// }
+
+	for {
+		// Convert each record
+		record, err := csvr.Read()
+		if err != nil {
+			if errors.Is(err, io.EOF) {
+				break
+			}
+			fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err)
+			os.Exit(1)
+			return
+		}
+
+		if err := csvw.Write(record); err != nil {
+			fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err)
+			os.Exit(1)
+			return
+		}
+	}
+	csvw.Flush()
+	if err := csvw.Error(); err != nil {
+		fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err)
+		os.Exit(1)
+	}
+
+	if out != os.Stdout {
+		fmt.Fprintf(os.Stderr, "wrote %s\n", *outputFile)
+	}
+}
--- a/io/transform/gsheet2csv/cmd/gsheet2tsv/main.go
+++ b/io/transform/gsheet2csv/cmd/gsheet2tsv/main.go
@ -0,0 +1,220 @@
+package main
+
+import (
+	"encoding/csv"
+	"errors"
+	"flag"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+	"unicode/utf8"
+
+	"github.com/therootcompany/golib/io/transform/gsheet2csv"
+)
+
+type CSVReader interface {
+	Read() ([]string, error)
+	ReadAll() ([][]string, error)
+}
+
+type CSVWriter interface {
+	Write([]string) error
+	WriteAll([][]string) error
+	Flush()
+	Error() error
+}
+
+func main() {
+	var commentArg string
+	format := "CSV"
+	delim := ','
+	if strings.Contains(os.Args[0], "tsv") {
+		delim = '\t'
+		format = "TSV"
+	}
+
+	// Parse command-line flags
+	flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments, 0 to disable (which may cause read errors)")
+	outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)")
+	readDelimString := flag.String("read-delimiter", ",", "field delimiter to use for input file ('\\t' for tab, '^_' for Unit Separator, etc)")
+	delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)")
+	useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator")
+	urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL")
+	parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)")
+	rawOnly := flag.Bool("raw", false, "don't parse, just download")
+	noReadComments := flag.Bool("strip-comments", false, "strip comments when reading (gsheet-only, control rfc behavior with --comment)")
+	readStyle := flag.String("read-style", "gsheet", "'gsheet' or 'rfc' to read either as a gsheet or rfc CSV")
+	writeStyle := flag.String("write-style", "rfc", "'gsheet' or 'rfc' to write either for gsheet import or rfc CSV read")
+	flag.Usage = func() {
+		fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0])
+		fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format)
+		fmt.Fprintf(os.Stderr, "Flags:\n")
+		flag.PrintDefaults()
+		fmt.Fprintf(os.Stderr, "\nExample:\n")
+		fmt.Fprintf(os.Stderr, "  %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0])
+		fmt.Fprintf(os.Stderr, "  %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0])
+		fmt.Fprintf(os.Stderr, "  %s -o output.tsv './gsheet.csv'\n", os.Args[0])
+	}
+	flag.Parse()
+
+	// Check for URL argument
+	if len(flag.Args()) != 1 {
+		fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n")
+		flag.Usage()
+		os.Exit(1)
+	}
+	url := flag.Args()[0]
+
+	// Prepare output writer
+	var out *os.File
+	if *outputFile != "" {
+		var err error
+		out, err = os.Create(*outputFile)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err)
+			os.Exit(1)
+		}
+		defer func() { _ = out.Close() }()
+	} else {
+		out = os.Stdout
+	}
+
+	inputDelim, err := gsheet2csv.DecodeDelimiter(*readDelimString)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error decoding input delimiter: %v\n", err)
+		os.Exit(1)
+	}
+
+	delim, err = gsheet2csv.DecodeDelimiter(*delimString)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error decoding output delimiter: %v\n", err)
+		os.Exit(1)
+	}
+
+	var rc io.ReadCloser
+	if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") {
+		docid, gid := gsheet2csv.ParseIDs(url)
+		if *parseOnly {
+			fmt.Printf("docid=%s\ngid=%s\n", docid, gid)
+		} else {
+			fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid)
+		}
+
+		sheetURL := gsheet2csv.ToCSVURL(docid, gid)
+		if *urlOnly {
+			fmt.Printf("%s\n", sheetURL)
+		} else {
+			fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL)
+		}
+
+		if !*urlOnly {
+			resp, err := gsheet2csv.GetSheet(docid, gid)
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err)
+				os.Exit(1)
+			}
+			defer func() { _ = resp.Body.Close() }()
+			rc = resp.Body
+		}
+	} else {
+		url = strings.TrimPrefix(url, "file://")
+		fmt.Fprintf(os.Stderr, "opening %s\n", url)
+		f, err := os.Open(url)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err)
+			os.Exit(1)
+		}
+		rc = f
+	}
+
+	if out == os.Stdout {
+		fmt.Fprintf(os.Stderr, "\n")
+	}
+
+	if *urlOnly || *parseOnly {
+		os.Exit(0)
+		return
+	}
+
+	if *rawOnly {
+		if _, err := io.Copy(out, rc); err != nil {
+			fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err)
+			os.Exit(1)
+		}
+		return
+	}
+
+	var comment rune
+	if commentArg == "0" {
+		comment = 0
+	} else {
+		comment, _ = utf8.DecodeRuneInString(commentArg)
+	}
+
+	// Create a reader for the Google Sheet
+	var csvr CSVReader
+	if *readStyle == "rfc" {
+		rfcr := csv.NewReader(rc)
+		rfcr.Comma = inputDelim
+		rfcr.Comment = comment
+		rfcr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not
+		csvr = rfcr
+	} else {
+		gsr := gsheet2csv.NewReader(rc)
+		gsr.Comma = inputDelim
+		if *noReadComments {
+			gsr.Comment = comment
+		} else {
+			gsr.Comment = 0
+		}
+		gsr.ReuseRecord = true
+		csvr = gsr
+	}
+
+	// Create CSV writer
+	var csvw CSVWriter
+	// if *writeStyle == "gsheet"
+	{
+		gsw := gsheet2csv.NewWriter(out)
+		gsw.QuoteAmbiguousComments = *writeStyle == "gsheet"
+		gsw.Comment = comment
+		gsw.Comma = delim // Set delimiter to tab for TSV
+		gsw.UseCRLF = *useCRLF
+		csvw = gsw
+	}
+	// else {
+	// 	rfcw := csv.NewWriter(out)
+	// 	rfcw.Comma = delim
+	// 	rfcw.UseCRLF = *useCRLF
+	// 	csvw = rfcw
+	// }
+
+	for {
+		// Convert each record
+		record, err := csvr.Read()
+		if err != nil {
+			if errors.Is(err, io.EOF) {
+				break
+			}
+			fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err)
+			os.Exit(1)
+			return
+		}
+
+		if err := csvw.Write(record); err != nil {
+			fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err)
+			os.Exit(1)
+			return
+		}
+	}
+	csvw.Flush()
+	if err := csvw.Error(); err != nil {
+		fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err)
+		os.Exit(1)
+	}
+
+	if out != os.Stdout {
+		fmt.Fprintf(os.Stderr, "wrote %s\n", *outputFile)
+	}
+}
--- a/io/transform/gsheet2csv/fixtures/example.go
+++ b/io/transform/gsheet2csv/fixtures/example.go
@ -0,0 +1,33 @@
+package main
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/therootcompany/golib/io/transform/gsheet2csv"
+)
+
+func main() {
+	switch len(os.Args) {
+	case 2:
+		break
+	case 1:
+		fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0])
+		os.Exit(1)
+	}
+	urlOrPath := os.Args[1]
+
+	gsr := gsheet2csv.NewReaderFrom(urlOrPath)
+	records, err := gsr.ReadAll()
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error reading from %s: %v\n", gsr.URL, err)
+		os.Exit(1)
+	}
+
+	csvw := gsheet2csv.NewWriter(os.Stdout)
+	csvw.Comment = gsr.Comment
+	if err := csvw.WriteAll(records); err != nil {
+		fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err)
+		os.Exit(1)
+	}
+}
--- a/io/transform/gsheet2csv/fixtures/gsheet-raw.csv
+++ b/io/transform/gsheet2csv/fixtures/gsheet-raw.csv
@ -0,0 +1,18 @@
+# this is a comment,,
+"# this is, well, a quoted comment",,
+"# this is a ""super""-quoted comment",,
+Key,Value,
+Name,55,
+Girlfriend's Age,55,
+,,
+My IQ,55,
+,55,
+"Key,with,Comma",,
+,"Value,with,Comma",
+"Quoted ""Key""",Normal Value,
+Normal Key,"Quoted ""Value""",
+"Quoted ""Key""",,
+,"Quoted ""Value""",
+x,y,z
+"# comment with trailing comma,",,
+#1,2,#3
--- a/io/transform/gsheet2csv/fixtures/gsheet-stripped.csv
+++ b/io/transform/gsheet2csv/fixtures/gsheet-stripped.csv
@ -0,0 +1,14 @@
+Key,Value,
+Name,55,
+Girlfriend's Age,55,
+,,
+My IQ,55,
+,55,
+"Key,with,Comma",,
+,"Value,with,Comma",
+"Quoted ""Key""",Normal Value,
+Normal Key,"Quoted ""Value""",
+"Quoted ""Key""",,
+,"Quoted ""Value""",
+x,y,z
+"#1",2,#3
--- a/io/transform/gsheet2csv/fixtures/gsheet-to-gsheet.csv
+++ b/io/transform/gsheet2csv/fixtures/gsheet-to-gsheet.csv
@ -0,0 +1,18 @@
+# this is a comment
+"# this is, well, a quoted comment"
+"# this is a ""super""-quoted comment"
+Key,Value,
+Name,55,
+Girlfriend's Age,55,
+,,
+My IQ,55,
+,55,
+"Key,with,Comma",,
+,"Value,with,Comma",
+"Quoted ""Key""",Normal Value,
+Normal Key,"Quoted ""Value""",
+"Quoted ""Key""",,
+,"Quoted ""Value""",
+x,y,z
+"# comment with trailing comma,"
+"#1",2,#3
--- a/io/transform/gsheet2csv/fixtures/gsheet-to-rfc.csv
+++ b/io/transform/gsheet2csv/fixtures/gsheet-to-rfc.csv
@ -0,0 +1,18 @@
+# this is a comment
+# this is, well, a quoted comment
+# this is a "super"-quoted comment
+Key,Value,
+Name,55,
+Girlfriend's Age,55,
+,,
+My IQ,55,
+,55,
+"Key,with,Comma",,
+,"Value,with,Comma",
+"Quoted ""Key""",Normal Value,
+Normal Key,"Quoted ""Value""",
+"Quoted ""Key""",,
+,"Quoted ""Value""",
+x,y,z
+# comment with trailing comma,
+"#1",2,#3
--- a/io/transform/gsheet2csv/fixtures/gsheet-to-rfc.tsv
+++ b/io/transform/gsheet2csv/fixtures/gsheet-to-rfc.tsv
@ -0,0 +1,18 @@
+# this is a comment
+# this is, well, a quoted comment
+# this is a "super"-quoted comment
+Key	Value	
+Name	55	
+Girlfriend's Age	55	
+		
+My IQ	55	
+	55	
+Key,with,Comma		
+	Value,with,Comma	
+"Quoted ""Key"""	Normal Value	
+Normal Key	"Quoted ""Value"""	
+"Quoted ""Key"""		
+	"Quoted ""Value"""	
+x	y	z
+# comment with trailing comma,
+"#1"	2	#3
--- a/io/transform/gsheet2csv/go.mod
+++ b/io/transform/gsheet2csv/go.mod
@ -0,0 +1,3 @@
+module github.com/therootcompany/golib/io/transform/gsheet2csv
+
+go 1.24.6
--- a/io/transform/gsheet2csv/gsheet2csv.go
+++ b/io/transform/gsheet2csv/gsheet2csv.go
@ -0,0 +1,309 @@
+// Authored in 2025 by AJ ONeal <aj@therootcompany.com> (https://therootcompany.com)
+//
+// To the extent possible under law, the author(s) have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+//
+// You should have received a copy of the CC0 Public Domain Dedication along with
+// this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>.
+//
+// SPDX-License-Identifier: CC0-1.0
+
+package gsheet2csv
+
+import (
+	"encoding/csv"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"strings"
+	"unicode/utf8"
+)
+
+const (
+	fileSeparator   = '\x1c'
+	groupSeparator  = '\x1d'
+	recordSeparator = '\x1e'
+	unitSeparator   = '\x1f'
+)
+
+var ErrHTTPGet = errors.New("did not get 200 OK when downloading from URL")
+
+// For mocking for tests
+var httpGet = http.Get
+
+type Reader struct {
+	*csv.Reader
+	DocID   string
+	GID     string
+	URL     string
+	Comment rune
+	r       io.Reader
+	resp    *http.Response
+	close   bool
+	err     error
+}
+
+func NewReaderFrom(urlOrPath string) *Reader {
+	if strings.HasPrefix(urlOrPath, "https://") || strings.HasPrefix(urlOrPath, "http://") {
+		return NewReaderFromURL(urlOrPath)
+	}
+
+	urlOrPath = strings.TrimPrefix(urlOrPath, "file://")
+	f, err := os.Open(urlOrPath)
+	r := NewReader(f)
+	r.URL = urlOrPath
+	if err != nil {
+		r.err = err
+	}
+
+	return r
+}
+
+func NewReaderFromURL(url string) *Reader {
+	docid, gid := ParseIDs(url)
+
+	return NewReaderFromIDs(docid, gid)
+}
+
+func NewReaderFromIDs(docid, gid string) *Reader {
+	resp, err := GetSheet(docid, gid)
+	if err != nil {
+		r := NewReader(nil)
+		r.err = err
+		return r
+	}
+
+	r := NewReader(resp.Body)
+	r.URL = ToCSVURL(docid, gid)
+	r.DocID = docid
+	r.GID = gid
+	r.resp = resp
+	r.close = true
+	return r
+}
+
+func ToCSVURL(docid, gid string) string {
+	return fmt.Sprintf("https://docs.google.com/spreadsheets/d/%s/export?format=csv&usp=sharing&gid=%s", docid, gid)
+}
+
+func GetSheet(docid, gid string) (*http.Response, error) {
+	downloadURL := ToCSVURL(docid, gid)
+
+	resp, err := httpGet(downloadURL)
+	if err != nil {
+		return nil, err
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		_ = resp.Body.Close()
+		return nil, ErrHTTPGet
+	}
+
+	return resp, nil
+}
+
+func NewReader(r io.Reader) *Reader {
+	csvr := csv.NewReader(r)
+	csvr.Comma = ','
+	csvr.Comment = 0          // to allow distinguishing between quoted comments and fields
+	csvr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not
+	csvr.LazyQuotes = false   // fields that need quotes use them correctly
+	csvr.TrimLeadingSpace = false
+	csvr.ReuseRecord = false
+	return &Reader{
+		Reader:  csvr,
+		Comment: '#',
+		r:       r,
+	}
+}
+
+func DecodeDelimiter(delimString string) (rune, error) {
+	switch delimString {
+	case "^_", "\\x1f":
+		delimString = string(unitSeparator)
+	case "^^", "\\x1e":
+		delimString = string(recordSeparator)
+	case "^]", "\\x1d":
+		delimString = string(groupSeparator)
+	case "^\\", "\\x1c":
+		delimString = string(fileSeparator)
+	case "^L", "\\f":
+		delimString = "\f"
+	case "^K", "\\v":
+		delimString = "\v"
+	case "^I", "\\t":
+		delimString = "	"
+	}
+	delim, _ := utf8.DecodeRuneInString(delimString)
+	return delim, nil
+}
+
+func (r *Reader) Read() ([]string, error) {
+	if r.err != nil {
+		return nil, r.err
+	}
+
+	for {
+		record, err := r.Reader.Read()
+		if err != nil {
+			if r.close {
+				_ = r.resp.Body.Close()
+			}
+			return nil, err
+		}
+
+		if r.Comment > 0 {
+			if rv, _ := utf8.DecodeRuneInString(record[0]); rv == r.Comment {
+				last := len(record) - 1
+				for len(record[last]) == 0 {
+					last -= 1
+				}
+				if last == 0 {
+					continue
+				}
+			}
+		}
+		return record, nil
+	}
+}
+
+func (r *Reader) ReadAll() ([][]string, error) {
+	var records [][]string
+
+	for {
+		record, err := r.Read()
+		if nil != err {
+			if errors.Is(err, io.EOF) {
+				return records, nil
+			}
+			return records, err
+		}
+		records = append(records, record)
+	}
+}
+
+func ParseIDs(urlStr string) (docid string, gid string) {
+	// Find key: look for /spreadsheets/d/{key}
+	const prefix = "/spreadsheets/d/"
+	startIdx := strings.Index(urlStr, prefix)
+	if startIdx == -1 {
+		return "", gid
+	}
+	startIdx += len(prefix)
+
+	// Find end of key (next / or end of string)
+	endIdx := strings.Index(urlStr[startIdx:], "/")
+	if endIdx == -1 {
+		endIdx = len(urlStr)
+	} else {
+		endIdx += startIdx
+	}
+
+	docid = urlStr[startIdx:endIdx]
+	if docid == "" {
+		return "", ""
+	}
+
+	// Find gid: look for gid= and take until #, &, ?, /, or end
+	gidIdx := strings.Index(urlStr, "gid=")
+	if gidIdx != -1 {
+		gidStart := gidIdx + len("gid=")
+		endChars := "#&?/"
+		gidEnd := strings.IndexAny(urlStr[gidStart:], endChars)
+		if gidEnd == -1 {
+			gid = urlStr[gidStart:]
+		} else {
+			gid = urlStr[gidStart : gidStart+gidEnd]
+		}
+	}
+
+	if len(gid) == 0 {
+		gid = "0"
+	}
+	return docid, gid
+}
+
+type Writer struct {
+	*csv.Writer
+	Comment                rune
+	QuoteAmbiguousComments bool
+	w                      io.Writer
+}
+
+func NewWriter(w io.Writer) *Writer {
+	return &Writer{
+		Writer:  csv.NewWriter(w),
+		Comment: '#',
+		w:       w,
+	}
+}
+
+func (w *Writer) Write(record []string) error {
+	// Not handling comments? Move along.
+	if w.Comment == 0 || len(record) == 0 {
+		return w.Writer.Write(record)
+	}
+
+	// First char not a comment char? Move along.
+	if rv1, _ := utf8.DecodeRuneInString(record[0]); rv1 != w.Comment {
+		return w.Writer.Write(record)
+	}
+
+	// Is this a true comment? Or data that should be quoted that begins with the comment char?
+	lastNonEmpty := len(record) - 1
+	if lastNonEmpty > -1 {
+		for len(record[lastNonEmpty]) == 0 {
+			lastNonEmpty -= 1
+		}
+	}
+
+	// We will be doing custom writes ahead
+	w.Flush()
+	var newline = "\n"
+	if w.UseCRLF {
+		newline = "\r\n"
+	}
+
+	// Write true comments out plain
+	first := 0
+	if lastNonEmpty == 0 {
+		record = record[:1]
+		if !w.QuoteAmbiguousComments {
+			if _, err := w.w.Write([]byte(record[0] + newline)); err != nil {
+				return err
+			}
+			return nil
+		}
+		// Quote the comment iff it contains quotes or commas, not universally
+		first = -1
+	}
+
+	// Quote if
+	// - the line contains quotes or commas
+	// - there are multiple fields and the first starts with a comment character
+	//   (but NOT a single-field comment with no quotes or commas)
+	for i, f := range record {
+		if i == first || strings.Contains(f, `"`) || strings.Contains(f, string(w.Comma)) {
+			f = strings.ReplaceAll(f, `"`, `""`)
+			record[i] = `"` + f + `"`
+		}
+	}
+	line := strings.Join(record, string(w.Comma))
+	if _, err := w.w.Write([]byte(line + newline)); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (w *Writer) WriteAll(records [][]string) error {
+	for _, r := range records {
+		if err := w.Write(r); err != nil {
+			return err
+		}
+	}
+	w.Flush()
+	return w.Error()
+}
--- a/io/transform/gsheet2csv/gsheet2csv_test.go
+++ b/io/transform/gsheet2csv/gsheet2csv_test.go
@ -0,0 +1,249 @@
+package gsheet2csv
+
+import (
+	"errors"
+	"io"
+	"net/http"
+	"slices"
+	"strings"
+	"testing"
+)
+
+// mockHTTPClient allows controlling HTTP responses for testing.
+type mockHTTPClient struct {
+	resp *http.Response
+	err  error
+}
+
+func (m *mockHTTPClient) Get(url string) (*http.Response, error) {
+	return m.resp, m.err
+}
+
+// sampleCSV mimics the structure of ai-models.csv from the project README.
+const sampleCSV = `# Generated by ollama list
+"# Sample Quoted Comment, with ""quotes"" itself"
+"NAME","ID","SIZE","MODIFIED"
+"qwen3-coder:30b","06c1097efce0","18 GB","8 days ago"
+"gpt-oss:20b","aa4295ac10c3","13 GB","8 days ago"
+
+"gpt-oss:latest","aa4295ac10c3","13 GB","7 weeks ago"
+`
+
+// malformedCSV for testing error handling.
+const malformedCSV = `# Comment
+"NAME","ID","SIZE","MODIFIED
+"qwen3-coder:30b","06c1097efce0","18 GB","8 days ago"
+`
+
+// TestParseIDs verifies the ParseIDs function for various URL formats.
+func TestParseIDs(t *testing.T) {
+	tests := []struct {
+		name    string
+		url     string
+		wantDoc string
+		wantGid string
+	}{
+		{
+			name:    "Google Sheets Edit / Share URL with gid",
+			url:     "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238",
+			wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
+			wantGid: "559037238",
+		},
+		{
+			name:    "Google Sheets CSV URL with gid",
+			url:     "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/export?format=csv&usp=sharing&gid=559037238",
+			wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
+			wantGid: "559037238",
+		},
+		{
+			name:    "URL without gid",
+			url:     "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit",
+			wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
+			wantGid: "0",
+		},
+		{
+			name:    "Invalid URL",
+			url:     "https://example.com/invalid",
+			wantDoc: "",
+			wantGid: "",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			gotDoc, gotGid := ParseIDs(tt.url)
+			if gotDoc != tt.wantDoc {
+				t.Errorf("ParseIDs() docid = %q, want %q", gotDoc, tt.wantDoc)
+			}
+			if gotGid != tt.wantGid {
+				t.Errorf("ParseIDs() gid = %q, want %q", gotGid, tt.wantGid)
+			}
+		})
+	}
+}
+
+// TestNewReaderFromURL tests initializing a Reader from a Google Sheets URL.
+func TestNewReaderFromURL(t *testing.T) {
+	originalGet := httpGet
+	defer func() { httpGet = originalGet }()
+
+	url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238"
+
+	// Test successful HTTP response
+	mockResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Body:       io.NopCloser(strings.NewReader(sampleCSV)),
+	}
+	client := &mockHTTPClient{resp: mockResp}
+	httpGet = client.Get
+
+	reader := NewReaderFromURL(url)
+	if reader.err != nil {
+		t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err)
+	}
+	if reader.resp != mockResp {
+		t.Error("NewReaderFromURL() did not set response correctly")
+	}
+	if !reader.close {
+		t.Error("NewReaderFromURL() did not set close flag")
+	}
+
+	// Test HTTP failure
+	client = &mockHTTPClient{resp: mockResp}
+	client.err = errors.New("network error")
+	httpGet = client.Get
+
+	reader = NewReaderFromURL(url)
+	if reader.err == nil {
+		t.Error("NewReaderFromURL() expected error, got nil")
+	}
+
+	// Test non-200 status
+	client = &mockHTTPClient{resp: &http.Response{
+		StatusCode: http.StatusNotFound,
+		Body:       io.NopCloser(strings.NewReader("these aren't the droids you're looking for")),
+	}}
+	httpGet = client.Get
+
+	reader = NewReaderFromURL(url)
+	if reader.err == nil {
+		t.Error("NewReaderFromURL() expected error for non-200 status, got nil")
+	}
+}
+
+// TestRead tests the Read method for comment handling.
+func TestRead(t *testing.T) {
+	tests := []struct {
+		name             string
+		preserveComments bool
+		expected         [][]string
+	}{
+		{
+			name: "Skip comments",
+			expected: [][]string{
+				{"NAME", "ID", "SIZE", "MODIFIED"},
+				{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
+				{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
+				{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
+			},
+		},
+		{
+			name:             "Don't skip comments",
+			preserveComments: true,
+			expected: [][]string{
+				{"# Generated by ollama list"},
+				{"# Sample Quoted Comment, with \"quotes\" itself"},
+				{"NAME", "ID", "SIZE", "MODIFIED"},
+				{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
+				{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
+				{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			reader := NewReader(strings.NewReader(sampleCSV))
+			if tt.preserveComments {
+				reader.Comment = 0
+			}
+
+			for i, want := range tt.expected {
+				got, err := reader.Read()
+				if err != nil {
+					t.Errorf("Read() error at record %d: %v", i, err)
+				}
+				if !slices.Equal(got, want) {
+					t.Errorf("Read() record %d = %v, want %v", i, got, want)
+				}
+			}
+
+			// Verify EOF
+			_, err := reader.Read()
+			if !errors.Is(err, io.EOF) {
+				t.Errorf("Read() expected EOF, got %v", err)
+			}
+		})
+	}
+}
+
+// TestReadAll tests the ReadAll method for different configurations.
+func TestReadAll(t *testing.T) {
+	tests := []struct {
+		name     string
+		expected [][]string
+	}{
+		{
+			name: "Skip comments",
+			expected: [][]string{
+				{"NAME", "ID", "SIZE", "MODIFIED"},
+				{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
+				{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
+				{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			reader := NewReader(strings.NewReader(sampleCSV))
+
+			got, err := reader.ReadAll()
+			if err != nil {
+				t.Errorf("ReadAll() error: %v", err)
+			}
+			if len(got) != len(tt.expected) {
+				t.Errorf("ReadAll() returned %d records, want %d", len(got), len(tt.expected))
+			}
+			for i, want := range tt.expected {
+				if !slices.Equal(got[i], want) {
+					t.Errorf("ReadAll() record %d = %v, want %v", i, got[i], want)
+				}
+			}
+		})
+	}
+}
+
+// TestNewReaderFromURLWithMalformedCSV tests NewReaderFromURL with malformed CSV.
+func TestNewReaderFromURLWithMalformedCSV(t *testing.T) {
+	mockResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Body:       io.NopCloser(strings.NewReader(malformedCSV)),
+	}
+	client := &mockHTTPClient{resp: mockResp}
+	originalGet := httpGet
+	httpGet = client.Get
+	defer func() { httpGet = originalGet }()
+
+	url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238"
+	reader := NewReaderFromURL(url)
+	if reader.err != nil {
+		t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err)
+	}
+
+	// Reading should fail due to malformed CSV
+	_, err := reader.Read()
+	if err == nil {
+		t.Error("Read() expected error for malformed CSV, got nil")
+	}
+}