mirror of
				https://github.com/therootcompany/golib.git
				synced 2025-10-31 05:02:52 +00:00 
			
		
		
		
	feat(gsheet2csv): parse URLs and CSVs with comments
This commit is contained in:
		
							parent
							
								
									dc951ce388
								
							
						
					
					
						commit
						24ec3f021d
					
				
							
								
								
									
										7
									
								
								io/transform/gsheet2csv/LICENSE
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								io/transform/gsheet2csv/LICENSE
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,7 @@ | ||||
| Authored in 2025 by AJ ONeal <aj@therootcompany.com> | ||||
| To the extent possible under law, the author(s) have dedicated all copyright | ||||
| and related and neighboring rights to this software to the public domain | ||||
| worldwide. This software is distributed without any warranty. | ||||
| 
 | ||||
| You should have received a copy of the CC0 Public Domain Dedication along with | ||||
| this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>. | ||||
							
								
								
									
										125
									
								
								io/transform/gsheet2csv/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								io/transform/gsheet2csv/README.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,125 @@ | ||||
| # gsheet2csv | ||||
| 
 | ||||
| [](https://pkg.go.dev/github.com/therootcompany/golib/io/transform/gsheet2csv) | ||||
| 
 | ||||
| A simple wrapper around `encoding/csv` to read Google Sheet CSVs from URL, or a given Reader. | ||||
| 
 | ||||
| This does surprisingly little - you should probably just handle the boilerplate yourself. However, these are the problems it solves for us: | ||||
| 
 | ||||
| - works with Google Sheet URLs, regardless of URL format | ||||
|    - Edit URL: <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/edit?gid=0000000000#gid=0000000000> | ||||
|    - Share URL (Sheet 1): <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/edit?usp=sharing> | ||||
|    - CSV Export URL: <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/export?format=csv&usp=sharing&gid=0000000000> | ||||
|    - anything with a path like `/spreadsheets/d/{docid}/` and (optionally) a hash or query param like `gid={gid}` | ||||
| - can write out for import to gsheet (comments containing quotes or commas are quoted), \ | ||||
|   or in RFC form (comments are never quoted, but values beginning with a comment character are) | ||||
| - swaps `\r` (Windows) for `\n` (Unix) and ensures trailing newline (a la `encoding/csv`) | ||||
| 
 | ||||
| Note: | ||||
| 
 | ||||
| - The Google Sheet must be shared to **Anyone with the link**. | ||||
| - Read and write in 'gsheet' style for reciprocity of comment handling | ||||
| - Be careful about single-column CSVs \ | ||||
|   (all comment-like lines are comments, same as with `encoding/csv` and empty lines) | ||||
| 
 | ||||
| # Usage | ||||
| 
 | ||||
| Same as `encoding/csv` (embedded), but with two extra options: | ||||
| 
 | ||||
| ```go | ||||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 
 | ||||
| 	"github.com/therootcompany/golib/io/transform/gsheet2csv" | ||||
| ) | ||||
| 
 | ||||
| func main() { | ||||
| 	switch len(os.Args) { | ||||
| 	case 2: | ||||
| 		break | ||||
| 	case 1: | ||||
| 		fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0]) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 	urlOrPath := os.Args[1] | ||||
| 
 | ||||
| 	gsr := gsheet2csv.NewReaderFrom(urlOrPath) | ||||
| 	records, err := gsr.ReadAll() | ||||
| 	if err != nil { | ||||
| 		fmt.Fprintf(os.Stderr, "Error reading from %s: %v\n", gsr.URL, err) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 
 | ||||
| 	csvw := gsheet2csv.NewWriter(os.Stdout) | ||||
| 	csvw.Comment = gsr.Comment | ||||
| 	if err := csvw.WriteAll(records); err != nil { | ||||
| 		fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| } | ||||
| ``` | ||||
| 
 | ||||
| # CLI | ||||
| 
 | ||||
| There are a few convenience utilities: | ||||
| 
 | ||||
| - `gsheet2csv` (also `gsheet2tsv`) | ||||
| - `gsheet2env` | ||||
| 
 | ||||
| ## gsheet2csv | ||||
| 
 | ||||
| They're only slightly different from a direct export of a Google CSV in that they reformat comments and newlines. | ||||
| 
 | ||||
| The alterable behavior is almost exclusively for testing. | ||||
| 
 | ||||
| ### Installation | ||||
| 
 | ||||
| ```sh | ||||
| go get github.com/therootcompany/golib/io/transform/gsheet2csv | ||||
| ``` | ||||
| 
 | ||||
| ### Usage | ||||
| 
 | ||||
| ```sh | ||||
| gsheet2csv -raw -o ./gsheet.csv 'https://docs.google.com/spreadsheets/...' | ||||
| 
 | ||||
| gsheet2csv -d '\t' --write-style 'gsheet' ./gsheet.csv > ./gsheet.tsv | ||||
| 
 | ||||
| gsheet2csv --strip-comments ./gsheet.csv > ./sheet.csv | ||||
| ``` | ||||
| 
 | ||||
| ```text | ||||
| --raw               download without processing | ||||
| --print-ids         print ids to stdout without download | ||||
| --print-url         print url to stdout without downloading | ||||
| -o <filepath>       write records to file (default: stdout) | ||||
| -d                  field delimiter (for output) | ||||
| --read-delimiter    input field delimiter (for testing reciprocity) | ||||
| --crlf              write using CRLF (\r\n) as the record separator | ||||
| --comment '#'       treat lines starting with # as comments | ||||
| --strip-comments    ignore single-field data beginning with a comment character | ||||
| --read-style        'gsheet' (preserves comments as single-field records) | ||||
|                     or 'rfc' (ignore lines starting with comment character) | ||||
| --write-style       'gsheet' (quote single-field comments containing quotes or commas) | ||||
|                     or 'rfc' (only quote values starting with a comment character) | ||||
| ``` | ||||
| 
 | ||||
| ### ASCII Delimiters | ||||
| 
 | ||||
| ``` | ||||
| ,   comma | ||||
| \t  tab (or a normal tab) | ||||
|     space (just a normal space) | ||||
| :   colon | ||||
| ;   semicolon | ||||
| |   pipe | ||||
| ^_  unit separator | ||||
| ^^  record separator | ||||
| ^]  group separator | ||||
| ^\  file separator | ||||
| \f  form feed (also ^L) | ||||
| \v  vertical tab (also ^K) | ||||
| ``` | ||||
							
								
								
									
										220
									
								
								io/transform/gsheet2csv/cmd/gsheet2csv/main.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										220
									
								
								io/transform/gsheet2csv/cmd/gsheet2csv/main.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,220 @@ | ||||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/csv" | ||||
| 	"errors" | ||||
| 	"flag" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"strings" | ||||
| 	"unicode/utf8" | ||||
| 
 | ||||
| 	"github.com/therootcompany/golib/io/transform/gsheet2csv" | ||||
| ) | ||||
| 
 | ||||
| type CSVReader interface { | ||||
| 	Read() ([]string, error) | ||||
| 	ReadAll() ([][]string, error) | ||||
| } | ||||
| 
 | ||||
| type CSVWriter interface { | ||||
| 	Write([]string) error | ||||
| 	WriteAll([][]string) error | ||||
| 	Flush() | ||||
| 	Error() error | ||||
| } | ||||
| 
 | ||||
| func main() { | ||||
| 	var commentArg string | ||||
| 	format := "CSV" | ||||
| 	delim := ',' | ||||
| 	if strings.Contains(os.Args[0], "tsv") { | ||||
| 		delim = '\t' | ||||
| 		format = "TSV" | ||||
| 	} | ||||
| 
 | ||||
| 	// Parse command-line flags | ||||
| 	flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments, 0 to disable (which may cause read errors)") | ||||
| 	outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)") | ||||
| 	readDelimString := flag.String("read-delimiter", ",", "field delimiter to use for input file ('\\t' for tab, '^_' for Unit Separator, etc)") | ||||
| 	delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)") | ||||
| 	useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator") | ||||
| 	urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL") | ||||
| 	parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)") | ||||
| 	rawOnly := flag.Bool("raw", false, "don't parse, just download") | ||||
| 	noReadComments := flag.Bool("strip-comments", false, "strip comments when reading (gsheet-only, control rfc behavior with --comment)") | ||||
| 	readStyle := flag.String("read-style", "gsheet", "'gsheet' or 'rfc' to read either as a gsheet or rfc CSV") | ||||
| 	writeStyle := flag.String("write-style", "rfc", "'gsheet' or 'rfc' to write either for gsheet import or rfc CSV read") | ||||
| 	flag.Usage = func() { | ||||
| 		fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0]) | ||||
| 		fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format) | ||||
| 		fmt.Fprintf(os.Stderr, "Flags:\n") | ||||
| 		flag.PrintDefaults() | ||||
| 		fmt.Fprintf(os.Stderr, "\nExample:\n") | ||||
| 		fmt.Fprintf(os.Stderr, "  %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0]) | ||||
| 		fmt.Fprintf(os.Stderr, "  %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0]) | ||||
| 		fmt.Fprintf(os.Stderr, "  %s -o output.tsv './gsheet.csv'\n", os.Args[0]) | ||||
| 	} | ||||
| 	flag.Parse() | ||||
| 
 | ||||
| 	// Check for URL argument | ||||
| 	if len(flag.Args()) != 1 { | ||||
| 		fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n") | ||||
| 		flag.Usage() | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 	url := flag.Args()[0] | ||||
| 
 | ||||
| 	// Prepare output writer | ||||
| 	var out *os.File | ||||
| 	if *outputFile != "" { | ||||
| 		var err error | ||||
| 		out, err = os.Create(*outputFile) | ||||
| 		if err != nil { | ||||
| 			fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err) | ||||
| 			os.Exit(1) | ||||
| 		} | ||||
| 		defer func() { _ = out.Close() }() | ||||
| 	} else { | ||||
| 		out = os.Stdout | ||||
| 	} | ||||
| 
 | ||||
| 	inputDelim, err := gsheet2csv.DecodeDelimiter(*readDelimString) | ||||
| 	if err != nil { | ||||
| 		fmt.Fprintf(os.Stderr, "Error decoding input delimiter: %v\n", err) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 
 | ||||
| 	delim, err = gsheet2csv.DecodeDelimiter(*delimString) | ||||
| 	if err != nil { | ||||
| 		fmt.Fprintf(os.Stderr, "Error decoding output delimiter: %v\n", err) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 
 | ||||
| 	var rc io.ReadCloser | ||||
| 	if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") { | ||||
| 		docid, gid := gsheet2csv.ParseIDs(url) | ||||
| 		if *parseOnly { | ||||
| 			fmt.Printf("docid=%s\ngid=%s\n", docid, gid) | ||||
| 		} else { | ||||
| 			fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid) | ||||
| 		} | ||||
| 
 | ||||
| 		sheetURL := gsheet2csv.ToCSVURL(docid, gid) | ||||
| 		if *urlOnly { | ||||
| 			fmt.Printf("%s\n", sheetURL) | ||||
| 		} else { | ||||
| 			fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL) | ||||
| 		} | ||||
| 
 | ||||
| 		if !*urlOnly { | ||||
| 			resp, err := gsheet2csv.GetSheet(docid, gid) | ||||
| 			if err != nil { | ||||
| 				fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err) | ||||
| 				os.Exit(1) | ||||
| 			} | ||||
| 			defer func() { _ = resp.Body.Close() }() | ||||
| 			rc = resp.Body | ||||
| 		} | ||||
| 	} else { | ||||
| 		url = strings.TrimPrefix(url, "file://") | ||||
| 		fmt.Fprintf(os.Stderr, "opening %s\n", url) | ||||
| 		f, err := os.Open(url) | ||||
| 		if err != nil { | ||||
| 			fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err) | ||||
| 			os.Exit(1) | ||||
| 		} | ||||
| 		rc = f | ||||
| 	} | ||||
| 
 | ||||
| 	if out == os.Stdout { | ||||
| 		fmt.Fprintf(os.Stderr, "\n") | ||||
| 	} | ||||
| 
 | ||||
| 	if *urlOnly || *parseOnly { | ||||
| 		os.Exit(0) | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	if *rawOnly { | ||||
| 		if _, err := io.Copy(out, rc); err != nil { | ||||
| 			fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err) | ||||
| 			os.Exit(1) | ||||
| 		} | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	var comment rune | ||||
| 	if commentArg == "0" { | ||||
| 		comment = 0 | ||||
| 	} else { | ||||
| 		comment, _ = utf8.DecodeRuneInString(commentArg) | ||||
| 	} | ||||
| 
 | ||||
| 	// Create a reader for the Google Sheet | ||||
| 	var csvr CSVReader | ||||
| 	if *readStyle == "rfc" { | ||||
| 		rfcr := csv.NewReader(rc) | ||||
| 		rfcr.Comma = inputDelim | ||||
| 		rfcr.Comment = comment | ||||
| 		rfcr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not | ||||
| 		csvr = rfcr | ||||
| 	} else { | ||||
| 		gsr := gsheet2csv.NewReader(rc) | ||||
| 		gsr.Comma = inputDelim | ||||
| 		if *noReadComments { | ||||
| 			gsr.Comment = comment | ||||
| 		} else { | ||||
| 			gsr.Comment = 0 | ||||
| 		} | ||||
| 		gsr.ReuseRecord = true | ||||
| 		csvr = gsr | ||||
| 	} | ||||
| 
 | ||||
| 	// Create CSV writer | ||||
| 	var csvw CSVWriter | ||||
| 	// if *writeStyle == "gsheet" | ||||
| 	{ | ||||
| 		gsw := gsheet2csv.NewWriter(out) | ||||
| 		gsw.QuoteAmbiguousComments = *writeStyle == "gsheet" | ||||
| 		gsw.Comment = comment | ||||
| 		gsw.Comma = delim // Set delimiter to tab for TSV | ||||
| 		gsw.UseCRLF = *useCRLF | ||||
| 		csvw = gsw | ||||
| 	} | ||||
| 	// else { | ||||
| 	// 	rfcw := csv.NewWriter(out) | ||||
| 	// 	rfcw.Comma = delim | ||||
| 	// 	rfcw.UseCRLF = *useCRLF | ||||
| 	// 	csvw = rfcw | ||||
| 	// } | ||||
| 
 | ||||
| 	for { | ||||
| 		// Convert each record | ||||
| 		record, err := csvr.Read() | ||||
| 		if err != nil { | ||||
| 			if errors.Is(err, io.EOF) { | ||||
| 				break | ||||
| 			} | ||||
| 			fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err) | ||||
| 			os.Exit(1) | ||||
| 			return | ||||
| 		} | ||||
| 
 | ||||
| 		if err := csvw.Write(record); err != nil { | ||||
| 			fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err) | ||||
| 			os.Exit(1) | ||||
| 			return | ||||
| 		} | ||||
| 	} | ||||
| 	csvw.Flush() | ||||
| 	if err := csvw.Error(); err != nil { | ||||
| 		fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 
 | ||||
| 	if out != os.Stdout { | ||||
| 		fmt.Fprintf(os.Stderr, "wrote %s\n", *outputFile) | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										220
									
								
								io/transform/gsheet2csv/cmd/gsheet2tsv/main.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										220
									
								
								io/transform/gsheet2csv/cmd/gsheet2tsv/main.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,220 @@ | ||||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/csv" | ||||
| 	"errors" | ||||
| 	"flag" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"strings" | ||||
| 	"unicode/utf8" | ||||
| 
 | ||||
| 	"github.com/therootcompany/golib/io/transform/gsheet2csv" | ||||
| ) | ||||
| 
 | ||||
| type CSVReader interface { | ||||
| 	Read() ([]string, error) | ||||
| 	ReadAll() ([][]string, error) | ||||
| } | ||||
| 
 | ||||
| type CSVWriter interface { | ||||
| 	Write([]string) error | ||||
| 	WriteAll([][]string) error | ||||
| 	Flush() | ||||
| 	Error() error | ||||
| } | ||||
| 
 | ||||
| func main() { | ||||
| 	var commentArg string | ||||
| 	format := "CSV" | ||||
| 	delim := ',' | ||||
| 	if strings.Contains(os.Args[0], "tsv") { | ||||
| 		delim = '\t' | ||||
| 		format = "TSV" | ||||
| 	} | ||||
| 
 | ||||
| 	// Parse command-line flags | ||||
| 	flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments, 0 to disable (which may cause read errors)") | ||||
| 	outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)") | ||||
| 	readDelimString := flag.String("read-delimiter", ",", "field delimiter to use for input file ('\\t' for tab, '^_' for Unit Separator, etc)") | ||||
| 	delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)") | ||||
| 	useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator") | ||||
| 	urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL") | ||||
| 	parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)") | ||||
| 	rawOnly := flag.Bool("raw", false, "don't parse, just download") | ||||
| 	noReadComments := flag.Bool("strip-comments", false, "strip comments when reading (gsheet-only, control rfc behavior with --comment)") | ||||
| 	readStyle := flag.String("read-style", "gsheet", "'gsheet' or 'rfc' to read either as a gsheet or rfc CSV") | ||||
| 	writeStyle := flag.String("write-style", "rfc", "'gsheet' or 'rfc' to write either for gsheet import or rfc CSV read") | ||||
| 	flag.Usage = func() { | ||||
| 		fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0]) | ||||
| 		fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format) | ||||
| 		fmt.Fprintf(os.Stderr, "Flags:\n") | ||||
| 		flag.PrintDefaults() | ||||
| 		fmt.Fprintf(os.Stderr, "\nExample:\n") | ||||
| 		fmt.Fprintf(os.Stderr, "  %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0]) | ||||
| 		fmt.Fprintf(os.Stderr, "  %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0]) | ||||
| 		fmt.Fprintf(os.Stderr, "  %s -o output.tsv './gsheet.csv'\n", os.Args[0]) | ||||
| 	} | ||||
| 	flag.Parse() | ||||
| 
 | ||||
| 	// Check for URL argument | ||||
| 	if len(flag.Args()) != 1 { | ||||
| 		fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n") | ||||
| 		flag.Usage() | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 	url := flag.Args()[0] | ||||
| 
 | ||||
| 	// Prepare output writer | ||||
| 	var out *os.File | ||||
| 	if *outputFile != "" { | ||||
| 		var err error | ||||
| 		out, err = os.Create(*outputFile) | ||||
| 		if err != nil { | ||||
| 			fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err) | ||||
| 			os.Exit(1) | ||||
| 		} | ||||
| 		defer func() { _ = out.Close() }() | ||||
| 	} else { | ||||
| 		out = os.Stdout | ||||
| 	} | ||||
| 
 | ||||
| 	inputDelim, err := gsheet2csv.DecodeDelimiter(*readDelimString) | ||||
| 	if err != nil { | ||||
| 		fmt.Fprintf(os.Stderr, "Error decoding input delimiter: %v\n", err) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 
 | ||||
| 	delim, err = gsheet2csv.DecodeDelimiter(*delimString) | ||||
| 	if err != nil { | ||||
| 		fmt.Fprintf(os.Stderr, "Error decoding output delimiter: %v\n", err) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 
 | ||||
| 	var rc io.ReadCloser | ||||
| 	if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") { | ||||
| 		docid, gid := gsheet2csv.ParseIDs(url) | ||||
| 		if *parseOnly { | ||||
| 			fmt.Printf("docid=%s\ngid=%s\n", docid, gid) | ||||
| 		} else { | ||||
| 			fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid) | ||||
| 		} | ||||
| 
 | ||||
| 		sheetURL := gsheet2csv.ToCSVURL(docid, gid) | ||||
| 		if *urlOnly { | ||||
| 			fmt.Printf("%s\n", sheetURL) | ||||
| 		} else { | ||||
| 			fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL) | ||||
| 		} | ||||
| 
 | ||||
| 		if !*urlOnly { | ||||
| 			resp, err := gsheet2csv.GetSheet(docid, gid) | ||||
| 			if err != nil { | ||||
| 				fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err) | ||||
| 				os.Exit(1) | ||||
| 			} | ||||
| 			defer func() { _ = resp.Body.Close() }() | ||||
| 			rc = resp.Body | ||||
| 		} | ||||
| 	} else { | ||||
| 		url = strings.TrimPrefix(url, "file://") | ||||
| 		fmt.Fprintf(os.Stderr, "opening %s\n", url) | ||||
| 		f, err := os.Open(url) | ||||
| 		if err != nil { | ||||
| 			fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err) | ||||
| 			os.Exit(1) | ||||
| 		} | ||||
| 		rc = f | ||||
| 	} | ||||
| 
 | ||||
| 	if out == os.Stdout { | ||||
| 		fmt.Fprintf(os.Stderr, "\n") | ||||
| 	} | ||||
| 
 | ||||
| 	if *urlOnly || *parseOnly { | ||||
| 		os.Exit(0) | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	if *rawOnly { | ||||
| 		if _, err := io.Copy(out, rc); err != nil { | ||||
| 			fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err) | ||||
| 			os.Exit(1) | ||||
| 		} | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	var comment rune | ||||
| 	if commentArg == "0" { | ||||
| 		comment = 0 | ||||
| 	} else { | ||||
| 		comment, _ = utf8.DecodeRuneInString(commentArg) | ||||
| 	} | ||||
| 
 | ||||
| 	// Create a reader for the Google Sheet | ||||
| 	var csvr CSVReader | ||||
| 	if *readStyle == "rfc" { | ||||
| 		rfcr := csv.NewReader(rc) | ||||
| 		rfcr.Comma = inputDelim | ||||
| 		rfcr.Comment = comment | ||||
| 		rfcr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not | ||||
| 		csvr = rfcr | ||||
| 	} else { | ||||
| 		gsr := gsheet2csv.NewReader(rc) | ||||
| 		gsr.Comma = inputDelim | ||||
| 		if *noReadComments { | ||||
| 			gsr.Comment = comment | ||||
| 		} else { | ||||
| 			gsr.Comment = 0 | ||||
| 		} | ||||
| 		gsr.ReuseRecord = true | ||||
| 		csvr = gsr | ||||
| 	} | ||||
| 
 | ||||
| 	// Create CSV writer | ||||
| 	var csvw CSVWriter | ||||
| 	// if *writeStyle == "gsheet" | ||||
| 	{ | ||||
| 		gsw := gsheet2csv.NewWriter(out) | ||||
| 		gsw.QuoteAmbiguousComments = *writeStyle == "gsheet" | ||||
| 		gsw.Comment = comment | ||||
| 		gsw.Comma = delim // Set delimiter to tab for TSV | ||||
| 		gsw.UseCRLF = *useCRLF | ||||
| 		csvw = gsw | ||||
| 	} | ||||
| 	// else { | ||||
| 	// 	rfcw := csv.NewWriter(out) | ||||
| 	// 	rfcw.Comma = delim | ||||
| 	// 	rfcw.UseCRLF = *useCRLF | ||||
| 	// 	csvw = rfcw | ||||
| 	// } | ||||
| 
 | ||||
| 	for { | ||||
| 		// Convert each record | ||||
| 		record, err := csvr.Read() | ||||
| 		if err != nil { | ||||
| 			if errors.Is(err, io.EOF) { | ||||
| 				break | ||||
| 			} | ||||
| 			fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err) | ||||
| 			os.Exit(1) | ||||
| 			return | ||||
| 		} | ||||
| 
 | ||||
| 		if err := csvw.Write(record); err != nil { | ||||
| 			fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err) | ||||
| 			os.Exit(1) | ||||
| 			return | ||||
| 		} | ||||
| 	} | ||||
| 	csvw.Flush() | ||||
| 	if err := csvw.Error(); err != nil { | ||||
| 		fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 
 | ||||
| 	if out != os.Stdout { | ||||
| 		fmt.Fprintf(os.Stderr, "wrote %s\n", *outputFile) | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										33
									
								
								io/transform/gsheet2csv/fixtures/example.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								io/transform/gsheet2csv/fixtures/example.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,33 @@ | ||||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 
 | ||||
| 	"github.com/therootcompany/golib/io/transform/gsheet2csv" | ||||
| ) | ||||
| 
 | ||||
| func main() { | ||||
| 	switch len(os.Args) { | ||||
| 	case 2: | ||||
| 		break | ||||
| 	case 1: | ||||
| 		fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0]) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 	urlOrPath := os.Args[1] | ||||
| 
 | ||||
| 	gsr := gsheet2csv.NewReaderFrom(urlOrPath) | ||||
| 	records, err := gsr.ReadAll() | ||||
| 	if err != nil { | ||||
| 		fmt.Fprintf(os.Stderr, "Error reading from %s: %v\n", gsr.URL, err) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| 
 | ||||
| 	csvw := gsheet2csv.NewWriter(os.Stdout) | ||||
| 	csvw.Comment = gsr.Comment | ||||
| 	if err := csvw.WriteAll(records); err != nil { | ||||
| 		fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err) | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-raw.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-raw.csv
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | ||||
| # this is a comment,, | ||||
| "# this is, well, a quoted comment",, | ||||
| "# this is a ""super""-quoted comment",, | ||||
| Key,Value, | ||||
| Name,55, | ||||
| Girlfriend's Age,55, | ||||
| ,, | ||||
| My IQ,55, | ||||
| ,55, | ||||
| "Key,with,Comma",, | ||||
| ,"Value,with,Comma", | ||||
| "Quoted ""Key""",Normal Value, | ||||
| Normal Key,"Quoted ""Value""", | ||||
| "Quoted ""Key""",, | ||||
| ,"Quoted ""Value""", | ||||
| x,y,z | ||||
| "# comment with trailing comma,",, | ||||
| #1,2,#3 | ||||
| 
 | 
							
								
								
									
										14
									
								
								io/transform/gsheet2csv/fixtures/gsheet-stripped.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								io/transform/gsheet2csv/fixtures/gsheet-stripped.csv
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | ||||
| Key,Value, | ||||
| Name,55, | ||||
| Girlfriend's Age,55, | ||||
| ,, | ||||
| My IQ,55, | ||||
| ,55, | ||||
| "Key,with,Comma",, | ||||
| ,"Value,with,Comma", | ||||
| "Quoted ""Key""",Normal Value, | ||||
| Normal Key,"Quoted ""Value""", | ||||
| "Quoted ""Key""",, | ||||
| ,"Quoted ""Value""", | ||||
| x,y,z | ||||
| "#1",2,#3 | ||||
| 
 | 
							
								
								
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-to-gsheet.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-to-gsheet.csv
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | ||||
| # this is a comment | ||||
| "# this is, well, a quoted comment" | ||||
| "# this is a ""super""-quoted comment" | ||||
| Key,Value, | ||||
| Name,55, | ||||
| Girlfriend's Age,55, | ||||
| ,, | ||||
| My IQ,55, | ||||
| ,55, | ||||
| "Key,with,Comma",, | ||||
| ,"Value,with,Comma", | ||||
| "Quoted ""Key""",Normal Value, | ||||
| Normal Key,"Quoted ""Value""", | ||||
| "Quoted ""Key""",, | ||||
| ,"Quoted ""Value""", | ||||
| x,y,z | ||||
| "# comment with trailing comma," | ||||
| "#1",2,#3 | ||||
| Can't render this file because it contains an unexpected character in line 10 and column 16. | 
							
								
								
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-to-rfc.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-to-rfc.csv
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | ||||
| # this is a comment | ||||
| # this is, well, a quoted comment | ||||
| # this is a "super"-quoted comment | ||||
| Key,Value, | ||||
| Name,55, | ||||
| Girlfriend's Age,55, | ||||
| ,, | ||||
| My IQ,55, | ||||
| ,55, | ||||
| "Key,with,Comma",, | ||||
| ,"Value,with,Comma", | ||||
| "Quoted ""Key""",Normal Value, | ||||
| Normal Key,"Quoted ""Value""", | ||||
| "Quoted ""Key""",, | ||||
| ,"Quoted ""Value""", | ||||
| x,y,z | ||||
| # comment with trailing comma, | ||||
| "#1",2,#3 | ||||
| Can't render this file because it contains an unexpected character in line 3 and column 13. | 
							
								
								
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-to-rfc.tsv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-to-rfc.tsv
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | ||||
| # this is a comment | ||||
| # this is, well, a quoted comment | ||||
| # this is a "super"-quoted comment | ||||
| Key	Value	 | ||||
| Name	55	 | ||||
| Girlfriend's Age	55	 | ||||
| 		 | ||||
| My IQ	55	 | ||||
| 	55	 | ||||
| Key,with,Comma		 | ||||
| 	Value,with,Comma	 | ||||
| "Quoted ""Key"""	Normal Value	 | ||||
| Normal Key	"Quoted ""Value"""	 | ||||
| "Quoted ""Key"""		 | ||||
| 	"Quoted ""Value"""	 | ||||
| x	y	z | ||||
| # comment with trailing comma, | ||||
| "#1"	2	#3 | ||||
| Can't render this file because it contains an unexpected character in line 3 and column 13. | 
							
								
								
									
										3
									
								
								io/transform/gsheet2csv/go.mod
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								io/transform/gsheet2csv/go.mod
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,3 @@ | ||||
| module github.com/therootcompany/golib/io/transform/gsheet2csv | ||||
| 
 | ||||
| go 1.24.6 | ||||
							
								
								
									
										309
									
								
								io/transform/gsheet2csv/gsheet2csv.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										309
									
								
								io/transform/gsheet2csv/gsheet2csv.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,309 @@ | ||||
| // Authored in 2025 by AJ ONeal <aj@therootcompany.com> (https://therootcompany.com) | ||||
| // | ||||
| // To the extent possible under law, the author(s) have dedicated all copyright | ||||
| // and related and neighboring rights to this software to the public domain | ||||
| // worldwide. This software is distributed without any warranty. | ||||
| // | ||||
| // You should have received a copy of the CC0 Public Domain Dedication along with | ||||
| // this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>. | ||||
| // | ||||
| // SPDX-License-Identifier: CC0-1.0 | ||||
| 
 | ||||
| package gsheet2csv | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/csv" | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"net/http" | ||||
| 	"os" | ||||
| 	"strings" | ||||
| 	"unicode/utf8" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	fileSeparator   = '\x1c' | ||||
| 	groupSeparator  = '\x1d' | ||||
| 	recordSeparator = '\x1e' | ||||
| 	unitSeparator   = '\x1f' | ||||
| ) | ||||
| 
 | ||||
| var ErrHTTPGet = errors.New("did not get 200 OK when downloading from URL") | ||||
| 
 | ||||
| // For mocking for tests | ||||
| var httpGet = http.Get | ||||
| 
 | ||||
| type Reader struct { | ||||
| 	*csv.Reader | ||||
| 	DocID   string | ||||
| 	GID     string | ||||
| 	URL     string | ||||
| 	Comment rune | ||||
| 	r       io.Reader | ||||
| 	resp    *http.Response | ||||
| 	close   bool | ||||
| 	err     error | ||||
| } | ||||
| 
 | ||||
| func NewReaderFrom(urlOrPath string) *Reader { | ||||
| 	if strings.HasPrefix(urlOrPath, "https://") || strings.HasPrefix(urlOrPath, "http://") { | ||||
| 		return NewReaderFromURL(urlOrPath) | ||||
| 	} | ||||
| 
 | ||||
| 	urlOrPath = strings.TrimPrefix(urlOrPath, "file://") | ||||
| 	f, err := os.Open(urlOrPath) | ||||
| 	r := NewReader(f) | ||||
| 	r.URL = urlOrPath | ||||
| 	if err != nil { | ||||
| 		r.err = err | ||||
| 	} | ||||
| 
 | ||||
| 	return r | ||||
| } | ||||
| 
 | ||||
| func NewReaderFromURL(url string) *Reader { | ||||
| 	docid, gid := ParseIDs(url) | ||||
| 
 | ||||
| 	return NewReaderFromIDs(docid, gid) | ||||
| } | ||||
| 
 | ||||
| func NewReaderFromIDs(docid, gid string) *Reader { | ||||
| 	resp, err := GetSheet(docid, gid) | ||||
| 	if err != nil { | ||||
| 		r := NewReader(nil) | ||||
| 		r.err = err | ||||
| 		return r | ||||
| 	} | ||||
| 
 | ||||
| 	r := NewReader(resp.Body) | ||||
| 	r.URL = ToCSVURL(docid, gid) | ||||
| 	r.DocID = docid | ||||
| 	r.GID = gid | ||||
| 	r.resp = resp | ||||
| 	r.close = true | ||||
| 	return r | ||||
| } | ||||
| 
 | ||||
| func ToCSVURL(docid, gid string) string { | ||||
| 	return fmt.Sprintf("https://docs.google.com/spreadsheets/d/%s/export?format=csv&usp=sharing&gid=%s", docid, gid) | ||||
| } | ||||
| 
 | ||||
| func GetSheet(docid, gid string) (*http.Response, error) { | ||||
| 	downloadURL := ToCSVURL(docid, gid) | ||||
| 
 | ||||
| 	resp, err := httpGet(downloadURL) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if resp.StatusCode != http.StatusOK { | ||||
| 		_ = resp.Body.Close() | ||||
| 		return nil, ErrHTTPGet | ||||
| 	} | ||||
| 
 | ||||
| 	return resp, nil | ||||
| } | ||||
| 
 | ||||
| func NewReader(r io.Reader) *Reader { | ||||
| 	csvr := csv.NewReader(r) | ||||
| 	csvr.Comma = ',' | ||||
| 	csvr.Comment = 0          // to allow distinguishing between quoted comments and fields | ||||
| 	csvr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not | ||||
| 	csvr.LazyQuotes = false   // fields that need quotes use them correctly | ||||
| 	csvr.TrimLeadingSpace = false | ||||
| 	csvr.ReuseRecord = false | ||||
| 	return &Reader{ | ||||
| 		Reader:  csvr, | ||||
| 		Comment: '#', | ||||
| 		r:       r, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func DecodeDelimiter(delimString string) (rune, error) { | ||||
| 	switch delimString { | ||||
| 	case "^_", "\\x1f": | ||||
| 		delimString = string(unitSeparator) | ||||
| 	case "^^", "\\x1e": | ||||
| 		delimString = string(recordSeparator) | ||||
| 	case "^]", "\\x1d": | ||||
| 		delimString = string(groupSeparator) | ||||
| 	case "^\\", "\\x1c": | ||||
| 		delimString = string(fileSeparator) | ||||
| 	case "^L", "\\f": | ||||
| 		delimString = "\f" | ||||
| 	case "^K", "\\v": | ||||
| 		delimString = "\v" | ||||
| 	case "^I", "\\t": | ||||
| 		delimString = "	" | ||||
| 	} | ||||
| 	delim, _ := utf8.DecodeRuneInString(delimString) | ||||
| 	return delim, nil | ||||
| } | ||||
| 
 | ||||
| func (r *Reader) Read() ([]string, error) { | ||||
| 	if r.err != nil { | ||||
| 		return nil, r.err | ||||
| 	} | ||||
| 
 | ||||
| 	for { | ||||
| 		record, err := r.Reader.Read() | ||||
| 		if err != nil { | ||||
| 			if r.close { | ||||
| 				_ = r.resp.Body.Close() | ||||
| 			} | ||||
| 			return nil, err | ||||
| 		} | ||||
| 
 | ||||
| 		if r.Comment > 0 { | ||||
| 			if rv, _ := utf8.DecodeRuneInString(record[0]); rv == r.Comment { | ||||
| 				last := len(record) - 1 | ||||
| 				for len(record[last]) == 0 { | ||||
| 					last -= 1 | ||||
| 				} | ||||
| 				if last == 0 { | ||||
| 					continue | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		return record, nil | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func (r *Reader) ReadAll() ([][]string, error) { | ||||
| 	var records [][]string | ||||
| 
 | ||||
| 	for { | ||||
| 		record, err := r.Read() | ||||
| 		if nil != err { | ||||
| 			if errors.Is(err, io.EOF) { | ||||
| 				return records, nil | ||||
| 			} | ||||
| 			return records, err | ||||
| 		} | ||||
| 		records = append(records, record) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func ParseIDs(urlStr string) (docid string, gid string) { | ||||
| 	// Find key: look for /spreadsheets/d/{key} | ||||
| 	const prefix = "/spreadsheets/d/" | ||||
| 	startIdx := strings.Index(urlStr, prefix) | ||||
| 	if startIdx == -1 { | ||||
| 		return "", gid | ||||
| 	} | ||||
| 	startIdx += len(prefix) | ||||
| 
 | ||||
| 	// Find end of key (next / or end of string) | ||||
| 	endIdx := strings.Index(urlStr[startIdx:], "/") | ||||
| 	if endIdx == -1 { | ||||
| 		endIdx = len(urlStr) | ||||
| 	} else { | ||||
| 		endIdx += startIdx | ||||
| 	} | ||||
| 
 | ||||
| 	docid = urlStr[startIdx:endIdx] | ||||
| 	if docid == "" { | ||||
| 		return "", "" | ||||
| 	} | ||||
| 
 | ||||
| 	// Find gid: look for gid= and take until #, &, ?, /, or end | ||||
| 	gidIdx := strings.Index(urlStr, "gid=") | ||||
| 	if gidIdx != -1 { | ||||
| 		gidStart := gidIdx + len("gid=") | ||||
| 		endChars := "#&?/" | ||||
| 		gidEnd := strings.IndexAny(urlStr[gidStart:], endChars) | ||||
| 		if gidEnd == -1 { | ||||
| 			gid = urlStr[gidStart:] | ||||
| 		} else { | ||||
| 			gid = urlStr[gidStart : gidStart+gidEnd] | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if len(gid) == 0 { | ||||
| 		gid = "0" | ||||
| 	} | ||||
| 	return docid, gid | ||||
| } | ||||
| 
 | ||||
| type Writer struct { | ||||
| 	*csv.Writer | ||||
| 	Comment                rune | ||||
| 	QuoteAmbiguousComments bool | ||||
| 	w                      io.Writer | ||||
| } | ||||
| 
 | ||||
| func NewWriter(w io.Writer) *Writer { | ||||
| 	return &Writer{ | ||||
| 		Writer:  csv.NewWriter(w), | ||||
| 		Comment: '#', | ||||
| 		w:       w, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func (w *Writer) Write(record []string) error { | ||||
| 	// Not handling comments? Move along. | ||||
| 	if w.Comment == 0 || len(record) == 0 { | ||||
| 		return w.Writer.Write(record) | ||||
| 	} | ||||
| 
 | ||||
| 	// First char not a comment char? Move along. | ||||
| 	if rv1, _ := utf8.DecodeRuneInString(record[0]); rv1 != w.Comment { | ||||
| 		return w.Writer.Write(record) | ||||
| 	} | ||||
| 
 | ||||
| 	// Is this a true comment? Or data that should be quoted that begins with the comment char? | ||||
| 	lastNonEmpty := len(record) - 1 | ||||
| 	if lastNonEmpty > -1 { | ||||
| 		for len(record[lastNonEmpty]) == 0 { | ||||
| 			lastNonEmpty -= 1 | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	// We will be doing custom writes ahead | ||||
| 	w.Flush() | ||||
| 	var newline = "\n" | ||||
| 	if w.UseCRLF { | ||||
| 		newline = "\r\n" | ||||
| 	} | ||||
| 
 | ||||
| 	// Write true comments out plain | ||||
| 	first := 0 | ||||
| 	if lastNonEmpty == 0 { | ||||
| 		record = record[:1] | ||||
| 		if !w.QuoteAmbiguousComments { | ||||
| 			if _, err := w.w.Write([]byte(record[0] + newline)); err != nil { | ||||
| 				return err | ||||
| 			} | ||||
| 			return nil | ||||
| 		} | ||||
| 		// Quote the comment iff it contains quotes or commas, not universally | ||||
| 		first = -1 | ||||
| 	} | ||||
| 
 | ||||
| 	// Quote if | ||||
| 	// - the line contains quotes or commas | ||||
| 	// - there are multiple fields and the first starts with a comment character | ||||
| 	//   (but NOT a single-field comment with no quotes or commas) | ||||
| 	for i, f := range record { | ||||
| 		if i == first || strings.Contains(f, `"`) || strings.Contains(f, string(w.Comma)) { | ||||
| 			f = strings.ReplaceAll(f, `"`, `""`) | ||||
| 			record[i] = `"` + f + `"` | ||||
| 		} | ||||
| 	} | ||||
| 	line := strings.Join(record, string(w.Comma)) | ||||
| 	if _, err := w.w.Write([]byte(line + newline)); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (w *Writer) WriteAll(records [][]string) error { | ||||
| 	for _, r := range records { | ||||
| 		if err := w.Write(r); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	w.Flush() | ||||
| 	return w.Error() | ||||
| } | ||||
							
								
								
									
										249
									
								
								io/transform/gsheet2csv/gsheet2csv_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										249
									
								
								io/transform/gsheet2csv/gsheet2csv_test.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,249 @@ | ||||
| package gsheet2csv | ||||
| 
 | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"io" | ||||
| 	"net/http" | ||||
| 	"slices" | ||||
| 	"strings" | ||||
| 	"testing" | ||||
| ) | ||||
| 
 | ||||
| // mockHTTPClient allows controlling HTTP responses for testing. | ||||
| type mockHTTPClient struct { | ||||
| 	resp *http.Response | ||||
| 	err  error | ||||
| } | ||||
| 
 | ||||
| func (m *mockHTTPClient) Get(url string) (*http.Response, error) { | ||||
| 	return m.resp, m.err | ||||
| } | ||||
| 
 | ||||
| // sampleCSV mimics the structure of ai-models.csv from the project README. | ||||
| const sampleCSV = `# Generated by ollama list | ||||
| "# Sample Quoted Comment, with ""quotes"" itself" | ||||
| "NAME","ID","SIZE","MODIFIED" | ||||
| "qwen3-coder:30b","06c1097efce0","18 GB","8 days ago" | ||||
| "gpt-oss:20b","aa4295ac10c3","13 GB","8 days ago" | ||||
| 
 | ||||
| "gpt-oss:latest","aa4295ac10c3","13 GB","7 weeks ago" | ||||
| ` | ||||
| 
 | ||||
| // malformedCSV for testing error handling. | ||||
| const malformedCSV = `# Comment | ||||
| "NAME","ID","SIZE","MODIFIED | ||||
| "qwen3-coder:30b","06c1097efce0","18 GB","8 days ago" | ||||
| ` | ||||
| 
 | ||||
| // TestParseIDs verifies the ParseIDs function for various URL formats. | ||||
| func TestParseIDs(t *testing.T) { | ||||
| 	tests := []struct { | ||||
| 		name    string | ||||
| 		url     string | ||||
| 		wantDoc string | ||||
| 		wantGid string | ||||
| 	}{ | ||||
| 		{ | ||||
| 			name:    "Google Sheets Edit / Share URL with gid", | ||||
| 			url:     "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238", | ||||
| 			wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34", | ||||
| 			wantGid: "559037238", | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:    "Google Sheets CSV URL with gid", | ||||
| 			url:     "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/export?format=csv&usp=sharing&gid=559037238", | ||||
| 			wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34", | ||||
| 			wantGid: "559037238", | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:    "URL without gid", | ||||
| 			url:     "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit", | ||||
| 			wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34", | ||||
| 			wantGid: "0", | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:    "Invalid URL", | ||||
| 			url:     "https://example.com/invalid", | ||||
| 			wantDoc: "", | ||||
| 			wantGid: "", | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	for _, tt := range tests { | ||||
| 		t.Run(tt.name, func(t *testing.T) { | ||||
| 			gotDoc, gotGid := ParseIDs(tt.url) | ||||
| 			if gotDoc != tt.wantDoc { | ||||
| 				t.Errorf("ParseIDs() docid = %q, want %q", gotDoc, tt.wantDoc) | ||||
| 			} | ||||
| 			if gotGid != tt.wantGid { | ||||
| 				t.Errorf("ParseIDs() gid = %q, want %q", gotGid, tt.wantGid) | ||||
| 			} | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // TestNewReaderFromURL tests initializing a Reader from a Google Sheets URL. | ||||
| func TestNewReaderFromURL(t *testing.T) { | ||||
| 	originalGet := httpGet | ||||
| 	defer func() { httpGet = originalGet }() | ||||
| 
 | ||||
| 	url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238" | ||||
| 
 | ||||
| 	// Test successful HTTP response | ||||
| 	mockResp := &http.Response{ | ||||
| 		StatusCode: http.StatusOK, | ||||
| 		Body:       io.NopCloser(strings.NewReader(sampleCSV)), | ||||
| 	} | ||||
| 	client := &mockHTTPClient{resp: mockResp} | ||||
| 	httpGet = client.Get | ||||
| 
 | ||||
| 	reader := NewReaderFromURL(url) | ||||
| 	if reader.err != nil { | ||||
| 		t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err) | ||||
| 	} | ||||
| 	if reader.resp != mockResp { | ||||
| 		t.Error("NewReaderFromURL() did not set response correctly") | ||||
| 	} | ||||
| 	if !reader.close { | ||||
| 		t.Error("NewReaderFromURL() did not set close flag") | ||||
| 	} | ||||
| 
 | ||||
| 	// Test HTTP failure | ||||
| 	client = &mockHTTPClient{resp: mockResp} | ||||
| 	client.err = errors.New("network error") | ||||
| 	httpGet = client.Get | ||||
| 
 | ||||
| 	reader = NewReaderFromURL(url) | ||||
| 	if reader.err == nil { | ||||
| 		t.Error("NewReaderFromURL() expected error, got nil") | ||||
| 	} | ||||
| 
 | ||||
| 	// Test non-200 status | ||||
| 	client = &mockHTTPClient{resp: &http.Response{ | ||||
| 		StatusCode: http.StatusNotFound, | ||||
| 		Body:       io.NopCloser(strings.NewReader("these aren't the droids you're looking for")), | ||||
| 	}} | ||||
| 	httpGet = client.Get | ||||
| 
 | ||||
| 	reader = NewReaderFromURL(url) | ||||
| 	if reader.err == nil { | ||||
| 		t.Error("NewReaderFromURL() expected error for non-200 status, got nil") | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // TestRead tests the Read method for comment handling. | ||||
| func TestRead(t *testing.T) { | ||||
| 	tests := []struct { | ||||
| 		name             string | ||||
| 		preserveComments bool | ||||
| 		expected         [][]string | ||||
| 	}{ | ||||
| 		{ | ||||
| 			name: "Skip comments", | ||||
| 			expected: [][]string{ | ||||
| 				{"NAME", "ID", "SIZE", "MODIFIED"}, | ||||
| 				{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"}, | ||||
| 				{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"}, | ||||
| 				{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name:             "Don't skip comments", | ||||
| 			preserveComments: true, | ||||
| 			expected: [][]string{ | ||||
| 				{"# Generated by ollama list"}, | ||||
| 				{"# Sample Quoted Comment, with \"quotes\" itself"}, | ||||
| 				{"NAME", "ID", "SIZE", "MODIFIED"}, | ||||
| 				{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"}, | ||||
| 				{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"}, | ||||
| 				{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	for _, tt := range tests { | ||||
| 		t.Run(tt.name, func(t *testing.T) { | ||||
| 			reader := NewReader(strings.NewReader(sampleCSV)) | ||||
| 			if tt.preserveComments { | ||||
| 				reader.Comment = 0 | ||||
| 			} | ||||
| 
 | ||||
| 			for i, want := range tt.expected { | ||||
| 				got, err := reader.Read() | ||||
| 				if err != nil { | ||||
| 					t.Errorf("Read() error at record %d: %v", i, err) | ||||
| 				} | ||||
| 				if !slices.Equal(got, want) { | ||||
| 					t.Errorf("Read() record %d = %v, want %v", i, got, want) | ||||
| 				} | ||||
| 			} | ||||
| 
 | ||||
| 			// Verify EOF | ||||
| 			_, err := reader.Read() | ||||
| 			if !errors.Is(err, io.EOF) { | ||||
| 				t.Errorf("Read() expected EOF, got %v", err) | ||||
| 			} | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // TestReadAll tests the ReadAll method for different configurations. | ||||
| func TestReadAll(t *testing.T) { | ||||
| 	tests := []struct { | ||||
| 		name     string | ||||
| 		expected [][]string | ||||
| 	}{ | ||||
| 		{ | ||||
| 			name: "Skip comments", | ||||
| 			expected: [][]string{ | ||||
| 				{"NAME", "ID", "SIZE", "MODIFIED"}, | ||||
| 				{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"}, | ||||
| 				{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"}, | ||||
| 				{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	for _, tt := range tests { | ||||
| 		t.Run(tt.name, func(t *testing.T) { | ||||
| 			reader := NewReader(strings.NewReader(sampleCSV)) | ||||
| 
 | ||||
| 			got, err := reader.ReadAll() | ||||
| 			if err != nil { | ||||
| 				t.Errorf("ReadAll() error: %v", err) | ||||
| 			} | ||||
| 			if len(got) != len(tt.expected) { | ||||
| 				t.Errorf("ReadAll() returned %d records, want %d", len(got), len(tt.expected)) | ||||
| 			} | ||||
| 			for i, want := range tt.expected { | ||||
| 				if !slices.Equal(got[i], want) { | ||||
| 					t.Errorf("ReadAll() record %d = %v, want %v", i, got[i], want) | ||||
| 				} | ||||
| 			} | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // TestNewReaderFromURLWithMalformedCSV tests NewReaderFromURL with malformed CSV. | ||||
| func TestNewReaderFromURLWithMalformedCSV(t *testing.T) { | ||||
| 	mockResp := &http.Response{ | ||||
| 		StatusCode: http.StatusOK, | ||||
| 		Body:       io.NopCloser(strings.NewReader(malformedCSV)), | ||||
| 	} | ||||
| 	client := &mockHTTPClient{resp: mockResp} | ||||
| 	originalGet := httpGet | ||||
| 	httpGet = client.Get | ||||
| 	defer func() { httpGet = originalGet }() | ||||
| 
 | ||||
| 	url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238" | ||||
| 	reader := NewReaderFromURL(url) | ||||
| 	if reader.err != nil { | ||||
| 		t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err) | ||||
| 	} | ||||
| 
 | ||||
| 	// Reading should fail due to malformed CSV | ||||
| 	_, err := reader.Read() | ||||
| 	if err == nil { | ||||
| 		t.Error("Read() expected error for malformed CSV, got nil") | ||||
| 	} | ||||
| } | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user