mirror of
				https://github.com/therootcompany/golib.git
				synced 2025-10-31 13:12:51 +00:00 
			
		
		
		
	feat(gsheet2csv): parse URLs and CSVs with comments
This commit is contained in:
		
							parent
							
								
									dc951ce388
								
							
						
					
					
						commit
						24ec3f021d
					
				
							
								
								
									
										7
									
								
								io/transform/gsheet2csv/LICENSE
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								io/transform/gsheet2csv/LICENSE
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,7 @@ | |||||||
|  | Authored in 2025 by AJ ONeal <aj@therootcompany.com> | ||||||
|  | To the extent possible under law, the author(s) have dedicated all copyright | ||||||
|  | and related and neighboring rights to this software to the public domain | ||||||
|  | worldwide. This software is distributed without any warranty. | ||||||
|  | 
 | ||||||
|  | You should have received a copy of the CC0 Public Domain Dedication along with | ||||||
|  | this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>. | ||||||
							
								
								
									
										125
									
								
								io/transform/gsheet2csv/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								io/transform/gsheet2csv/README.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,125 @@ | |||||||
|  | # gsheet2csv | ||||||
|  | 
 | ||||||
|  | [](https://pkg.go.dev/github.com/therootcompany/golib/io/transform/gsheet2csv) | ||||||
|  | 
 | ||||||
|  | A simple wrapper around `encoding/csv` to read Google Sheet CSVs from URL, or a given Reader. | ||||||
|  | 
 | ||||||
|  | This does surprisingly little - you should probably just handle the boilerplate yourself. However, these are the problems it solves for us: | ||||||
|  | 
 | ||||||
|  | - works with Google Sheet URLs, regardless of URL format | ||||||
|  |    - Edit URL: <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/edit?gid=0000000000#gid=0000000000> | ||||||
|  |    - Share URL (Sheet 1): <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/edit?usp=sharing> | ||||||
|  |    - CSV Export URL: <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/export?format=csv&usp=sharing&gid=0000000000> | ||||||
|  |    - anything with a path like `/spreadsheets/d/{docid}/` and (optionally) a hash or query param like `gid={gid}` | ||||||
|  | - can write out for import to gsheet (comments containing quotes or commas are quoted), \ | ||||||
|  |   or in RFC form (comments are never quoted, but values beginning with a comment character are) | ||||||
|  | - swaps `\r` (Windows) for `\n` (Unix) and ensures trailing newline (a la `encoding/csv`) | ||||||
|  | 
 | ||||||
|  | Note: | ||||||
|  | 
 | ||||||
|  | - The Google Sheet must be shared to **Anyone with the link**. | ||||||
|  | - Read and write in 'gsheet' style for reciprocity of comment handling | ||||||
|  | - Be careful about single-column CSVs \ | ||||||
|  |   (all comment-like lines are comments, same as with `encoding/csv` and empty lines) | ||||||
|  | 
 | ||||||
|  | # Usage | ||||||
|  | 
 | ||||||
|  | Same as `encoding/csv` (embedded), but with two extra options: | ||||||
|  | 
 | ||||||
|  | ```go | ||||||
|  | package main | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"fmt" | ||||||
|  | 	"os" | ||||||
|  | 
 | ||||||
|  | 	"github.com/therootcompany/golib/io/transform/gsheet2csv" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | func main() { | ||||||
|  | 	switch len(os.Args) { | ||||||
|  | 	case 2: | ||||||
|  | 		break | ||||||
|  | 	case 1: | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0]) | ||||||
|  | 		os.Exit(1) | ||||||
|  | 	} | ||||||
|  | 	urlOrPath := os.Args[1] | ||||||
|  | 
 | ||||||
|  | 	gsr := gsheet2csv.NewReaderFrom(urlOrPath) | ||||||
|  | 	records, err := gsr.ReadAll() | ||||||
|  | 	if err != nil { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Error reading from %s: %v\n", gsr.URL, err) | ||||||
|  | 		os.Exit(1) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	csvw := gsheet2csv.NewWriter(os.Stdout) | ||||||
|  | 	csvw.Comment = gsr.Comment | ||||||
|  | 	if err := csvw.WriteAll(records); err != nil { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err) | ||||||
|  | 		os.Exit(1) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | # CLI | ||||||
|  | 
 | ||||||
|  | There are a few convenience utilities: | ||||||
|  | 
 | ||||||
|  | - `gsheet2csv` (also `gsheet2tsv`) | ||||||
|  | - `gsheet2env` | ||||||
|  | 
 | ||||||
|  | ## gsheet2csv | ||||||
|  | 
 | ||||||
|  | They're only slightly different from a direct export of a Google CSV in that they reformat comments and newlines. | ||||||
|  | 
 | ||||||
|  | The alterable behavior is almost exclusively for testing. | ||||||
|  | 
 | ||||||
|  | ### Installation | ||||||
|  | 
 | ||||||
|  | ```sh | ||||||
|  | go get github.com/therootcompany/golib/io/transform/gsheet2csv | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | ### Usage | ||||||
|  | 
 | ||||||
|  | ```sh | ||||||
|  | gsheet2csv -raw -o ./gsheet.csv 'https://docs.google.com/spreadsheets/...' | ||||||
|  | 
 | ||||||
|  | gsheet2csv -d '\t' --write-style 'gsheet' ./gsheet.csv > ./gsheet.tsv | ||||||
|  | 
 | ||||||
|  | gsheet2csv --strip-comments ./gsheet.csv > ./sheet.csv | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | ```text | ||||||
|  | --raw               download without processing | ||||||
|  | --print-ids         print ids to stdout without download | ||||||
|  | --print-url         print url to stdout without downloading | ||||||
|  | -o <filepath>       write records to file (default: stdout) | ||||||
|  | -d                  field delimiter (for output) | ||||||
|  | --read-delimiter    input field delimiter (for testing reciprocity) | ||||||
|  | --crlf              write using CRLF (\r\n) as the record separator | ||||||
|  | --comment '#'       treat lines starting with # as comments | ||||||
|  | --strip-comments    ignore single-field data beginning with a comment character | ||||||
|  | --read-style        'gsheet' (preserves comments as single-field records) | ||||||
|  |                     or 'rfc' (ignore lines starting with comment character) | ||||||
|  | --write-style       'gsheet' (quote single-field comments containing quotes or commas) | ||||||
|  |                     or 'rfc' (only quote values starting with a comment character) | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | ### ASCII Delimiters | ||||||
|  | 
 | ||||||
|  | ``` | ||||||
|  | ,   comma | ||||||
|  | \t  tab (or a normal tab) | ||||||
|  |     space (just a normal space) | ||||||
|  | :   colon | ||||||
|  | ;   semicolon | ||||||
|  | |   pipe | ||||||
|  | ^_  unit separator | ||||||
|  | ^^  record separator | ||||||
|  | ^]  group separator | ||||||
|  | ^\  file separator | ||||||
|  | \f  form feed (also ^L) | ||||||
|  | \v  vertical tab (also ^K) | ||||||
|  | ``` | ||||||
							
								
								
									
										220
									
								
								io/transform/gsheet2csv/cmd/gsheet2csv/main.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										220
									
								
								io/transform/gsheet2csv/cmd/gsheet2csv/main.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,220 @@ | |||||||
|  | package main | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"encoding/csv" | ||||||
|  | 	"errors" | ||||||
|  | 	"flag" | ||||||
|  | 	"fmt" | ||||||
|  | 	"io" | ||||||
|  | 	"os" | ||||||
|  | 	"strings" | ||||||
|  | 	"unicode/utf8" | ||||||
|  | 
 | ||||||
|  | 	"github.com/therootcompany/golib/io/transform/gsheet2csv" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | type CSVReader interface { | ||||||
|  | 	Read() ([]string, error) | ||||||
|  | 	ReadAll() ([][]string, error) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | type CSVWriter interface { | ||||||
|  | 	Write([]string) error | ||||||
|  | 	WriteAll([][]string) error | ||||||
|  | 	Flush() | ||||||
|  | 	Error() error | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func main() { | ||||||
|  | 	var commentArg string | ||||||
|  | 	format := "CSV" | ||||||
|  | 	delim := ',' | ||||||
|  | 	if strings.Contains(os.Args[0], "tsv") { | ||||||
|  | 		delim = '\t' | ||||||
|  | 		format = "TSV" | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Parse command-line flags | ||||||
|  | 	flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments, 0 to disable (which may cause read errors)") | ||||||
|  | 	outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)") | ||||||
|  | 	readDelimString := flag.String("read-delimiter", ",", "field delimiter to use for input file ('\\t' for tab, '^_' for Unit Separator, etc)") | ||||||
|  | 	delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)") | ||||||
|  | 	useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator") | ||||||
|  | 	urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL") | ||||||
|  | 	parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)") | ||||||
|  | 	rawOnly := flag.Bool("raw", false, "don't parse, just download") | ||||||
|  | 	noReadComments := flag.Bool("strip-comments", false, "strip comments when reading (gsheet-only, control rfc behavior with --comment)") | ||||||
|  | 	readStyle := flag.String("read-style", "gsheet", "'gsheet' or 'rfc' to read either as a gsheet or rfc CSV") | ||||||
|  | 	writeStyle := flag.String("write-style", "rfc", "'gsheet' or 'rfc' to write either for gsheet import or rfc CSV read") | ||||||
|  | 	flag.Usage = func() { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0]) | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format) | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Flags:\n") | ||||||
|  | 		flag.PrintDefaults() | ||||||
|  | 		fmt.Fprintf(os.Stderr, "\nExample:\n") | ||||||
|  | 		fmt.Fprintf(os.Stderr, "  %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0]) | ||||||
|  | 		fmt.Fprintf(os.Stderr, "  %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0]) | ||||||
|  | 		fmt.Fprintf(os.Stderr, "  %s -o output.tsv './gsheet.csv'\n", os.Args[0]) | ||||||
|  | 	} | ||||||
|  | 	flag.Parse() | ||||||
|  | 
 | ||||||
|  | 	// Check for URL argument | ||||||
|  | 	if len(flag.Args()) != 1 { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n") | ||||||
|  | 		flag.Usage() | ||||||
|  | 		os.Exit(1) | ||||||
|  | 	} | ||||||
|  | 	url := flag.Args()[0] | ||||||
|  | 
 | ||||||
|  | 	// Prepare output writer | ||||||
|  | 	var out *os.File | ||||||
|  | 	if *outputFile != "" { | ||||||
|  | 		var err error | ||||||
|  | 		out, err = os.Create(*outputFile) | ||||||
|  | 		if err != nil { | ||||||
|  | 			fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err) | ||||||
|  | 			os.Exit(1) | ||||||
|  | 		} | ||||||
|  | 		defer func() { _ = out.Close() }() | ||||||
|  | 	} else { | ||||||
|  | 		out = os.Stdout | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	inputDelim, err := gsheet2csv.DecodeDelimiter(*readDelimString) | ||||||
|  | 	if err != nil { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Error decoding input delimiter: %v\n", err) | ||||||
|  | 		os.Exit(1) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	delim, err = gsheet2csv.DecodeDelimiter(*delimString) | ||||||
|  | 	if err != nil { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Error decoding output delimiter: %v\n", err) | ||||||
|  | 		os.Exit(1) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	var rc io.ReadCloser | ||||||
|  | 	if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") { | ||||||
|  | 		docid, gid := gsheet2csv.ParseIDs(url) | ||||||
|  | 		if *parseOnly { | ||||||
|  | 			fmt.Printf("docid=%s\ngid=%s\n", docid, gid) | ||||||
|  | 		} else { | ||||||
|  | 			fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid) | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		sheetURL := gsheet2csv.ToCSVURL(docid, gid) | ||||||
|  | 		if *urlOnly { | ||||||
|  | 			fmt.Printf("%s\n", sheetURL) | ||||||
|  | 		} else { | ||||||
|  | 			fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL) | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if !*urlOnly { | ||||||
|  | 			resp, err := gsheet2csv.GetSheet(docid, gid) | ||||||
|  | 			if err != nil { | ||||||
|  | 				fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err) | ||||||
|  | 				os.Exit(1) | ||||||
|  | 			} | ||||||
|  | 			defer func() { _ = resp.Body.Close() }() | ||||||
|  | 			rc = resp.Body | ||||||
|  | 		} | ||||||
|  | 	} else { | ||||||
|  | 		url = strings.TrimPrefix(url, "file://") | ||||||
|  | 		fmt.Fprintf(os.Stderr, "opening %s\n", url) | ||||||
|  | 		f, err := os.Open(url) | ||||||
|  | 		if err != nil { | ||||||
|  | 			fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err) | ||||||
|  | 			os.Exit(1) | ||||||
|  | 		} | ||||||
|  | 		rc = f | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if out == os.Stdout { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "\n") | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if *urlOnly || *parseOnly { | ||||||
|  | 		os.Exit(0) | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if *rawOnly { | ||||||
|  | 		if _, err := io.Copy(out, rc); err != nil { | ||||||
|  | 			fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err) | ||||||
|  | 			os.Exit(1) | ||||||
|  | 		} | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	var comment rune | ||||||
|  | 	if commentArg == "0" { | ||||||
|  | 		comment = 0 | ||||||
|  | 	} else { | ||||||
|  | 		comment, _ = utf8.DecodeRuneInString(commentArg) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Create a reader for the Google Sheet | ||||||
|  | 	var csvr CSVReader | ||||||
|  | 	if *readStyle == "rfc" { | ||||||
|  | 		rfcr := csv.NewReader(rc) | ||||||
|  | 		rfcr.Comma = inputDelim | ||||||
|  | 		rfcr.Comment = comment | ||||||
|  | 		rfcr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not | ||||||
|  | 		csvr = rfcr | ||||||
|  | 	} else { | ||||||
|  | 		gsr := gsheet2csv.NewReader(rc) | ||||||
|  | 		gsr.Comma = inputDelim | ||||||
|  | 		if *noReadComments { | ||||||
|  | 			gsr.Comment = comment | ||||||
|  | 		} else { | ||||||
|  | 			gsr.Comment = 0 | ||||||
|  | 		} | ||||||
|  | 		gsr.ReuseRecord = true | ||||||
|  | 		csvr = gsr | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Create CSV writer | ||||||
|  | 	var csvw CSVWriter | ||||||
|  | 	// if *writeStyle == "gsheet" | ||||||
|  | 	{ | ||||||
|  | 		gsw := gsheet2csv.NewWriter(out) | ||||||
|  | 		gsw.QuoteAmbiguousComments = *writeStyle == "gsheet" | ||||||
|  | 		gsw.Comment = comment | ||||||
|  | 		gsw.Comma = delim // Set delimiter to tab for TSV | ||||||
|  | 		gsw.UseCRLF = *useCRLF | ||||||
|  | 		csvw = gsw | ||||||
|  | 	} | ||||||
|  | 	// else { | ||||||
|  | 	// 	rfcw := csv.NewWriter(out) | ||||||
|  | 	// 	rfcw.Comma = delim | ||||||
|  | 	// 	rfcw.UseCRLF = *useCRLF | ||||||
|  | 	// 	csvw = rfcw | ||||||
|  | 	// } | ||||||
|  | 
 | ||||||
|  | 	for { | ||||||
|  | 		// Convert each record | ||||||
|  | 		record, err := csvr.Read() | ||||||
|  | 		if err != nil { | ||||||
|  | 			if errors.Is(err, io.EOF) { | ||||||
|  | 				break | ||||||
|  | 			} | ||||||
|  | 			fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err) | ||||||
|  | 			os.Exit(1) | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if err := csvw.Write(record); err != nil { | ||||||
|  | 			fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err) | ||||||
|  | 			os.Exit(1) | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	csvw.Flush() | ||||||
|  | 	if err := csvw.Error(); err != nil { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err) | ||||||
|  | 		os.Exit(1) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if out != os.Stdout { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "wrote %s\n", *outputFile) | ||||||
|  | 	} | ||||||
|  | } | ||||||
							
								
								
									
										220
									
								
								io/transform/gsheet2csv/cmd/gsheet2tsv/main.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										220
									
								
								io/transform/gsheet2csv/cmd/gsheet2tsv/main.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,220 @@ | |||||||
|  | package main | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"encoding/csv" | ||||||
|  | 	"errors" | ||||||
|  | 	"flag" | ||||||
|  | 	"fmt" | ||||||
|  | 	"io" | ||||||
|  | 	"os" | ||||||
|  | 	"strings" | ||||||
|  | 	"unicode/utf8" | ||||||
|  | 
 | ||||||
|  | 	"github.com/therootcompany/golib/io/transform/gsheet2csv" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | type CSVReader interface { | ||||||
|  | 	Read() ([]string, error) | ||||||
|  | 	ReadAll() ([][]string, error) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | type CSVWriter interface { | ||||||
|  | 	Write([]string) error | ||||||
|  | 	WriteAll([][]string) error | ||||||
|  | 	Flush() | ||||||
|  | 	Error() error | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func main() { | ||||||
|  | 	var commentArg string | ||||||
|  | 	format := "CSV" | ||||||
|  | 	delim := ',' | ||||||
|  | 	if strings.Contains(os.Args[0], "tsv") { | ||||||
|  | 		delim = '\t' | ||||||
|  | 		format = "TSV" | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Parse command-line flags | ||||||
|  | 	flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments, 0 to disable (which may cause read errors)") | ||||||
|  | 	outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)") | ||||||
|  | 	readDelimString := flag.String("read-delimiter", ",", "field delimiter to use for input file ('\\t' for tab, '^_' for Unit Separator, etc)") | ||||||
|  | 	delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)") | ||||||
|  | 	useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator") | ||||||
|  | 	urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL") | ||||||
|  | 	parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)") | ||||||
|  | 	rawOnly := flag.Bool("raw", false, "don't parse, just download") | ||||||
|  | 	noReadComments := flag.Bool("strip-comments", false, "strip comments when reading (gsheet-only, control rfc behavior with --comment)") | ||||||
|  | 	readStyle := flag.String("read-style", "gsheet", "'gsheet' or 'rfc' to read either as a gsheet or rfc CSV") | ||||||
|  | 	writeStyle := flag.String("write-style", "rfc", "'gsheet' or 'rfc' to write either for gsheet import or rfc CSV read") | ||||||
|  | 	flag.Usage = func() { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0]) | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format) | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Flags:\n") | ||||||
|  | 		flag.PrintDefaults() | ||||||
|  | 		fmt.Fprintf(os.Stderr, "\nExample:\n") | ||||||
|  | 		fmt.Fprintf(os.Stderr, "  %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0]) | ||||||
|  | 		fmt.Fprintf(os.Stderr, "  %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0]) | ||||||
|  | 		fmt.Fprintf(os.Stderr, "  %s -o output.tsv './gsheet.csv'\n", os.Args[0]) | ||||||
|  | 	} | ||||||
|  | 	flag.Parse() | ||||||
|  | 
 | ||||||
|  | 	// Check for URL argument | ||||||
|  | 	if len(flag.Args()) != 1 { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n") | ||||||
|  | 		flag.Usage() | ||||||
|  | 		os.Exit(1) | ||||||
|  | 	} | ||||||
|  | 	url := flag.Args()[0] | ||||||
|  | 
 | ||||||
|  | 	// Prepare output writer | ||||||
|  | 	var out *os.File | ||||||
|  | 	if *outputFile != "" { | ||||||
|  | 		var err error | ||||||
|  | 		out, err = os.Create(*outputFile) | ||||||
|  | 		if err != nil { | ||||||
|  | 			fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err) | ||||||
|  | 			os.Exit(1) | ||||||
|  | 		} | ||||||
|  | 		defer func() { _ = out.Close() }() | ||||||
|  | 	} else { | ||||||
|  | 		out = os.Stdout | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	inputDelim, err := gsheet2csv.DecodeDelimiter(*readDelimString) | ||||||
|  | 	if err != nil { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Error decoding input delimiter: %v\n", err) | ||||||
|  | 		os.Exit(1) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	delim, err = gsheet2csv.DecodeDelimiter(*delimString) | ||||||
|  | 	if err != nil { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Error decoding output delimiter: %v\n", err) | ||||||
|  | 		os.Exit(1) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	var rc io.ReadCloser | ||||||
|  | 	if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") { | ||||||
|  | 		docid, gid := gsheet2csv.ParseIDs(url) | ||||||
|  | 		if *parseOnly { | ||||||
|  | 			fmt.Printf("docid=%s\ngid=%s\n", docid, gid) | ||||||
|  | 		} else { | ||||||
|  | 			fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid) | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		sheetURL := gsheet2csv.ToCSVURL(docid, gid) | ||||||
|  | 		if *urlOnly { | ||||||
|  | 			fmt.Printf("%s\n", sheetURL) | ||||||
|  | 		} else { | ||||||
|  | 			fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL) | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if !*urlOnly { | ||||||
|  | 			resp, err := gsheet2csv.GetSheet(docid, gid) | ||||||
|  | 			if err != nil { | ||||||
|  | 				fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err) | ||||||
|  | 				os.Exit(1) | ||||||
|  | 			} | ||||||
|  | 			defer func() { _ = resp.Body.Close() }() | ||||||
|  | 			rc = resp.Body | ||||||
|  | 		} | ||||||
|  | 	} else { | ||||||
|  | 		url = strings.TrimPrefix(url, "file://") | ||||||
|  | 		fmt.Fprintf(os.Stderr, "opening %s\n", url) | ||||||
|  | 		f, err := os.Open(url) | ||||||
|  | 		if err != nil { | ||||||
|  | 			fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err) | ||||||
|  | 			os.Exit(1) | ||||||
|  | 		} | ||||||
|  | 		rc = f | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if out == os.Stdout { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "\n") | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if *urlOnly || *parseOnly { | ||||||
|  | 		os.Exit(0) | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if *rawOnly { | ||||||
|  | 		if _, err := io.Copy(out, rc); err != nil { | ||||||
|  | 			fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err) | ||||||
|  | 			os.Exit(1) | ||||||
|  | 		} | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	var comment rune | ||||||
|  | 	if commentArg == "0" { | ||||||
|  | 		comment = 0 | ||||||
|  | 	} else { | ||||||
|  | 		comment, _ = utf8.DecodeRuneInString(commentArg) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Create a reader for the Google Sheet | ||||||
|  | 	var csvr CSVReader | ||||||
|  | 	if *readStyle == "rfc" { | ||||||
|  | 		rfcr := csv.NewReader(rc) | ||||||
|  | 		rfcr.Comma = inputDelim | ||||||
|  | 		rfcr.Comment = comment | ||||||
|  | 		rfcr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not | ||||||
|  | 		csvr = rfcr | ||||||
|  | 	} else { | ||||||
|  | 		gsr := gsheet2csv.NewReader(rc) | ||||||
|  | 		gsr.Comma = inputDelim | ||||||
|  | 		if *noReadComments { | ||||||
|  | 			gsr.Comment = comment | ||||||
|  | 		} else { | ||||||
|  | 			gsr.Comment = 0 | ||||||
|  | 		} | ||||||
|  | 		gsr.ReuseRecord = true | ||||||
|  | 		csvr = gsr | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Create CSV writer | ||||||
|  | 	var csvw CSVWriter | ||||||
|  | 	// if *writeStyle == "gsheet" | ||||||
|  | 	{ | ||||||
|  | 		gsw := gsheet2csv.NewWriter(out) | ||||||
|  | 		gsw.QuoteAmbiguousComments = *writeStyle == "gsheet" | ||||||
|  | 		gsw.Comment = comment | ||||||
|  | 		gsw.Comma = delim // Set delimiter to tab for TSV | ||||||
|  | 		gsw.UseCRLF = *useCRLF | ||||||
|  | 		csvw = gsw | ||||||
|  | 	} | ||||||
|  | 	// else { | ||||||
|  | 	// 	rfcw := csv.NewWriter(out) | ||||||
|  | 	// 	rfcw.Comma = delim | ||||||
|  | 	// 	rfcw.UseCRLF = *useCRLF | ||||||
|  | 	// 	csvw = rfcw | ||||||
|  | 	// } | ||||||
|  | 
 | ||||||
|  | 	for { | ||||||
|  | 		// Convert each record | ||||||
|  | 		record, err := csvr.Read() | ||||||
|  | 		if err != nil { | ||||||
|  | 			if errors.Is(err, io.EOF) { | ||||||
|  | 				break | ||||||
|  | 			} | ||||||
|  | 			fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err) | ||||||
|  | 			os.Exit(1) | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if err := csvw.Write(record); err != nil { | ||||||
|  | 			fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err) | ||||||
|  | 			os.Exit(1) | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	csvw.Flush() | ||||||
|  | 	if err := csvw.Error(); err != nil { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err) | ||||||
|  | 		os.Exit(1) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if out != os.Stdout { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "wrote %s\n", *outputFile) | ||||||
|  | 	} | ||||||
|  | } | ||||||
							
								
								
									
										33
									
								
								io/transform/gsheet2csv/fixtures/example.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								io/transform/gsheet2csv/fixtures/example.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,33 @@ | |||||||
|  | package main | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"fmt" | ||||||
|  | 	"os" | ||||||
|  | 
 | ||||||
|  | 	"github.com/therootcompany/golib/io/transform/gsheet2csv" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | func main() { | ||||||
|  | 	switch len(os.Args) { | ||||||
|  | 	case 2: | ||||||
|  | 		break | ||||||
|  | 	case 1: | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0]) | ||||||
|  | 		os.Exit(1) | ||||||
|  | 	} | ||||||
|  | 	urlOrPath := os.Args[1] | ||||||
|  | 
 | ||||||
|  | 	gsr := gsheet2csv.NewReaderFrom(urlOrPath) | ||||||
|  | 	records, err := gsr.ReadAll() | ||||||
|  | 	if err != nil { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Error reading from %s: %v\n", gsr.URL, err) | ||||||
|  | 		os.Exit(1) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	csvw := gsheet2csv.NewWriter(os.Stdout) | ||||||
|  | 	csvw.Comment = gsr.Comment | ||||||
|  | 	if err := csvw.WriteAll(records); err != nil { | ||||||
|  | 		fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err) | ||||||
|  | 		os.Exit(1) | ||||||
|  | 	} | ||||||
|  | } | ||||||
							
								
								
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-raw.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-raw.csv
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | |||||||
|  | # this is a comment,, | ||||||
|  | "# this is, well, a quoted comment",, | ||||||
|  | "# this is a ""super""-quoted comment",, | ||||||
|  | Key,Value, | ||||||
|  | Name,55, | ||||||
|  | Girlfriend's Age,55, | ||||||
|  | ,, | ||||||
|  | My IQ,55, | ||||||
|  | ,55, | ||||||
|  | "Key,with,Comma",, | ||||||
|  | ,"Value,with,Comma", | ||||||
|  | "Quoted ""Key""",Normal Value, | ||||||
|  | Normal Key,"Quoted ""Value""", | ||||||
|  | "Quoted ""Key""",, | ||||||
|  | ,"Quoted ""Value""", | ||||||
|  | x,y,z | ||||||
|  | "# comment with trailing comma,",, | ||||||
|  | #1,2,#3 | ||||||
| 
 | 
							
								
								
									
										14
									
								
								io/transform/gsheet2csv/fixtures/gsheet-stripped.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								io/transform/gsheet2csv/fixtures/gsheet-stripped.csv
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | |||||||
|  | Key,Value, | ||||||
|  | Name,55, | ||||||
|  | Girlfriend's Age,55, | ||||||
|  | ,, | ||||||
|  | My IQ,55, | ||||||
|  | ,55, | ||||||
|  | "Key,with,Comma",, | ||||||
|  | ,"Value,with,Comma", | ||||||
|  | "Quoted ""Key""",Normal Value, | ||||||
|  | Normal Key,"Quoted ""Value""", | ||||||
|  | "Quoted ""Key""",, | ||||||
|  | ,"Quoted ""Value""", | ||||||
|  | x,y,z | ||||||
|  | "#1",2,#3 | ||||||
| 
 | 
							
								
								
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-to-gsheet.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-to-gsheet.csv
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | |||||||
|  | # this is a comment | ||||||
|  | "# this is, well, a quoted comment" | ||||||
|  | "# this is a ""super""-quoted comment" | ||||||
|  | Key,Value, | ||||||
|  | Name,55, | ||||||
|  | Girlfriend's Age,55, | ||||||
|  | ,, | ||||||
|  | My IQ,55, | ||||||
|  | ,55, | ||||||
|  | "Key,with,Comma",, | ||||||
|  | ,"Value,with,Comma", | ||||||
|  | "Quoted ""Key""",Normal Value, | ||||||
|  | Normal Key,"Quoted ""Value""", | ||||||
|  | "Quoted ""Key""",, | ||||||
|  | ,"Quoted ""Value""", | ||||||
|  | x,y,z | ||||||
|  | "# comment with trailing comma," | ||||||
|  | "#1",2,#3 | ||||||
| Can't render this file because it contains an unexpected character in line 10 and column 16. | 
							
								
								
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-to-rfc.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-to-rfc.csv
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | |||||||
|  | # this is a comment | ||||||
|  | # this is, well, a quoted comment | ||||||
|  | # this is a "super"-quoted comment | ||||||
|  | Key,Value, | ||||||
|  | Name,55, | ||||||
|  | Girlfriend's Age,55, | ||||||
|  | ,, | ||||||
|  | My IQ,55, | ||||||
|  | ,55, | ||||||
|  | "Key,with,Comma",, | ||||||
|  | ,"Value,with,Comma", | ||||||
|  | "Quoted ""Key""",Normal Value, | ||||||
|  | Normal Key,"Quoted ""Value""", | ||||||
|  | "Quoted ""Key""",, | ||||||
|  | ,"Quoted ""Value""", | ||||||
|  | x,y,z | ||||||
|  | # comment with trailing comma, | ||||||
|  | "#1",2,#3 | ||||||
| Can't render this file because it contains an unexpected character in line 3 and column 13. | 
							
								
								
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-to-rfc.tsv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								io/transform/gsheet2csv/fixtures/gsheet-to-rfc.tsv
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | |||||||
|  | # this is a comment | ||||||
|  | # this is, well, a quoted comment | ||||||
|  | # this is a "super"-quoted comment | ||||||
|  | Key	Value	 | ||||||
|  | Name	55	 | ||||||
|  | Girlfriend's Age	55	 | ||||||
|  | 		 | ||||||
|  | My IQ	55	 | ||||||
|  | 	55	 | ||||||
|  | Key,with,Comma		 | ||||||
|  | 	Value,with,Comma	 | ||||||
|  | "Quoted ""Key"""	Normal Value	 | ||||||
|  | Normal Key	"Quoted ""Value"""	 | ||||||
|  | "Quoted ""Key"""		 | ||||||
|  | 	"Quoted ""Value"""	 | ||||||
|  | x	y	z | ||||||
|  | # comment with trailing comma, | ||||||
|  | "#1"	2	#3 | ||||||
| Can't render this file because it contains an unexpected character in line 3 and column 13. | 
							
								
								
									
										3
									
								
								io/transform/gsheet2csv/go.mod
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								io/transform/gsheet2csv/go.mod
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,3 @@ | |||||||
|  | module github.com/therootcompany/golib/io/transform/gsheet2csv | ||||||
|  | 
 | ||||||
|  | go 1.24.6 | ||||||
							
								
								
									
										309
									
								
								io/transform/gsheet2csv/gsheet2csv.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										309
									
								
								io/transform/gsheet2csv/gsheet2csv.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,309 @@ | |||||||
|  | // Authored in 2025 by AJ ONeal <aj@therootcompany.com> (https://therootcompany.com) | ||||||
|  | // | ||||||
|  | // To the extent possible under law, the author(s) have dedicated all copyright | ||||||
|  | // and related and neighboring rights to this software to the public domain | ||||||
|  | // worldwide. This software is distributed without any warranty. | ||||||
|  | // | ||||||
|  | // You should have received a copy of the CC0 Public Domain Dedication along with | ||||||
|  | // this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>. | ||||||
|  | // | ||||||
|  | // SPDX-License-Identifier: CC0-1.0 | ||||||
|  | 
 | ||||||
|  | package gsheet2csv | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"encoding/csv" | ||||||
|  | 	"errors" | ||||||
|  | 	"fmt" | ||||||
|  | 	"io" | ||||||
|  | 	"net/http" | ||||||
|  | 	"os" | ||||||
|  | 	"strings" | ||||||
|  | 	"unicode/utf8" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | const ( | ||||||
|  | 	fileSeparator   = '\x1c' | ||||||
|  | 	groupSeparator  = '\x1d' | ||||||
|  | 	recordSeparator = '\x1e' | ||||||
|  | 	unitSeparator   = '\x1f' | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | var ErrHTTPGet = errors.New("did not get 200 OK when downloading from URL") | ||||||
|  | 
 | ||||||
|  | // For mocking for tests | ||||||
|  | var httpGet = http.Get | ||||||
|  | 
 | ||||||
|  | type Reader struct { | ||||||
|  | 	*csv.Reader | ||||||
|  | 	DocID   string | ||||||
|  | 	GID     string | ||||||
|  | 	URL     string | ||||||
|  | 	Comment rune | ||||||
|  | 	r       io.Reader | ||||||
|  | 	resp    *http.Response | ||||||
|  | 	close   bool | ||||||
|  | 	err     error | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func NewReaderFrom(urlOrPath string) *Reader { | ||||||
|  | 	if strings.HasPrefix(urlOrPath, "https://") || strings.HasPrefix(urlOrPath, "http://") { | ||||||
|  | 		return NewReaderFromURL(urlOrPath) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	urlOrPath = strings.TrimPrefix(urlOrPath, "file://") | ||||||
|  | 	f, err := os.Open(urlOrPath) | ||||||
|  | 	r := NewReader(f) | ||||||
|  | 	r.URL = urlOrPath | ||||||
|  | 	if err != nil { | ||||||
|  | 		r.err = err | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return r | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func NewReaderFromURL(url string) *Reader { | ||||||
|  | 	docid, gid := ParseIDs(url) | ||||||
|  | 
 | ||||||
|  | 	return NewReaderFromIDs(docid, gid) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func NewReaderFromIDs(docid, gid string) *Reader { | ||||||
|  | 	resp, err := GetSheet(docid, gid) | ||||||
|  | 	if err != nil { | ||||||
|  | 		r := NewReader(nil) | ||||||
|  | 		r.err = err | ||||||
|  | 		return r | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	r := NewReader(resp.Body) | ||||||
|  | 	r.URL = ToCSVURL(docid, gid) | ||||||
|  | 	r.DocID = docid | ||||||
|  | 	r.GID = gid | ||||||
|  | 	r.resp = resp | ||||||
|  | 	r.close = true | ||||||
|  | 	return r | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func ToCSVURL(docid, gid string) string { | ||||||
|  | 	return fmt.Sprintf("https://docs.google.com/spreadsheets/d/%s/export?format=csv&usp=sharing&gid=%s", docid, gid) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func GetSheet(docid, gid string) (*http.Response, error) { | ||||||
|  | 	downloadURL := ToCSVURL(docid, gid) | ||||||
|  | 
 | ||||||
|  | 	resp, err := httpGet(downloadURL) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if resp.StatusCode != http.StatusOK { | ||||||
|  | 		_ = resp.Body.Close() | ||||||
|  | 		return nil, ErrHTTPGet | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return resp, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func NewReader(r io.Reader) *Reader { | ||||||
|  | 	csvr := csv.NewReader(r) | ||||||
|  | 	csvr.Comma = ',' | ||||||
|  | 	csvr.Comment = 0          // to allow distinguishing between quoted comments and fields | ||||||
|  | 	csvr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not | ||||||
|  | 	csvr.LazyQuotes = false   // fields that need quotes use them correctly | ||||||
|  | 	csvr.TrimLeadingSpace = false | ||||||
|  | 	csvr.ReuseRecord = false | ||||||
|  | 	return &Reader{ | ||||||
|  | 		Reader:  csvr, | ||||||
|  | 		Comment: '#', | ||||||
|  | 		r:       r, | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func DecodeDelimiter(delimString string) (rune, error) { | ||||||
|  | 	switch delimString { | ||||||
|  | 	case "^_", "\\x1f": | ||||||
|  | 		delimString = string(unitSeparator) | ||||||
|  | 	case "^^", "\\x1e": | ||||||
|  | 		delimString = string(recordSeparator) | ||||||
|  | 	case "^]", "\\x1d": | ||||||
|  | 		delimString = string(groupSeparator) | ||||||
|  | 	case "^\\", "\\x1c": | ||||||
|  | 		delimString = string(fileSeparator) | ||||||
|  | 	case "^L", "\\f": | ||||||
|  | 		delimString = "\f" | ||||||
|  | 	case "^K", "\\v": | ||||||
|  | 		delimString = "\v" | ||||||
|  | 	case "^I", "\\t": | ||||||
|  | 		delimString = "	" | ||||||
|  | 	} | ||||||
|  | 	delim, _ := utf8.DecodeRuneInString(delimString) | ||||||
|  | 	return delim, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (r *Reader) Read() ([]string, error) { | ||||||
|  | 	if r.err != nil { | ||||||
|  | 		return nil, r.err | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	for { | ||||||
|  | 		record, err := r.Reader.Read() | ||||||
|  | 		if err != nil { | ||||||
|  | 			if r.close { | ||||||
|  | 				_ = r.resp.Body.Close() | ||||||
|  | 			} | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if r.Comment > 0 { | ||||||
|  | 			if rv, _ := utf8.DecodeRuneInString(record[0]); rv == r.Comment { | ||||||
|  | 				last := len(record) - 1 | ||||||
|  | 				for len(record[last]) == 0 { | ||||||
|  | 					last -= 1 | ||||||
|  | 				} | ||||||
|  | 				if last == 0 { | ||||||
|  | 					continue | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		return record, nil | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (r *Reader) ReadAll() ([][]string, error) { | ||||||
|  | 	var records [][]string | ||||||
|  | 
 | ||||||
|  | 	for { | ||||||
|  | 		record, err := r.Read() | ||||||
|  | 		if nil != err { | ||||||
|  | 			if errors.Is(err, io.EOF) { | ||||||
|  | 				return records, nil | ||||||
|  | 			} | ||||||
|  | 			return records, err | ||||||
|  | 		} | ||||||
|  | 		records = append(records, record) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func ParseIDs(urlStr string) (docid string, gid string) { | ||||||
|  | 	// Find key: look for /spreadsheets/d/{key} | ||||||
|  | 	const prefix = "/spreadsheets/d/" | ||||||
|  | 	startIdx := strings.Index(urlStr, prefix) | ||||||
|  | 	if startIdx == -1 { | ||||||
|  | 		return "", gid | ||||||
|  | 	} | ||||||
|  | 	startIdx += len(prefix) | ||||||
|  | 
 | ||||||
|  | 	// Find end of key (next / or end of string) | ||||||
|  | 	endIdx := strings.Index(urlStr[startIdx:], "/") | ||||||
|  | 	if endIdx == -1 { | ||||||
|  | 		endIdx = len(urlStr) | ||||||
|  | 	} else { | ||||||
|  | 		endIdx += startIdx | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	docid = urlStr[startIdx:endIdx] | ||||||
|  | 	if docid == "" { | ||||||
|  | 		return "", "" | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Find gid: look for gid= and take until #, &, ?, /, or end | ||||||
|  | 	gidIdx := strings.Index(urlStr, "gid=") | ||||||
|  | 	if gidIdx != -1 { | ||||||
|  | 		gidStart := gidIdx + len("gid=") | ||||||
|  | 		endChars := "#&?/" | ||||||
|  | 		gidEnd := strings.IndexAny(urlStr[gidStart:], endChars) | ||||||
|  | 		if gidEnd == -1 { | ||||||
|  | 			gid = urlStr[gidStart:] | ||||||
|  | 		} else { | ||||||
|  | 			gid = urlStr[gidStart : gidStart+gidEnd] | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if len(gid) == 0 { | ||||||
|  | 		gid = "0" | ||||||
|  | 	} | ||||||
|  | 	return docid, gid | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | type Writer struct { | ||||||
|  | 	*csv.Writer | ||||||
|  | 	Comment                rune | ||||||
|  | 	QuoteAmbiguousComments bool | ||||||
|  | 	w                      io.Writer | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func NewWriter(w io.Writer) *Writer { | ||||||
|  | 	return &Writer{ | ||||||
|  | 		Writer:  csv.NewWriter(w), | ||||||
|  | 		Comment: '#', | ||||||
|  | 		w:       w, | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (w *Writer) Write(record []string) error { | ||||||
|  | 	// Not handling comments? Move along. | ||||||
|  | 	if w.Comment == 0 || len(record) == 0 { | ||||||
|  | 		return w.Writer.Write(record) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// First char not a comment char? Move along. | ||||||
|  | 	if rv1, _ := utf8.DecodeRuneInString(record[0]); rv1 != w.Comment { | ||||||
|  | 		return w.Writer.Write(record) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Is this a true comment? Or data that should be quoted that begins with the comment char? | ||||||
|  | 	lastNonEmpty := len(record) - 1 | ||||||
|  | 	if lastNonEmpty > -1 { | ||||||
|  | 		for len(record[lastNonEmpty]) == 0 { | ||||||
|  | 			lastNonEmpty -= 1 | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// We will be doing custom writes ahead | ||||||
|  | 	w.Flush() | ||||||
|  | 	var newline = "\n" | ||||||
|  | 	if w.UseCRLF { | ||||||
|  | 		newline = "\r\n" | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Write true comments out plain | ||||||
|  | 	first := 0 | ||||||
|  | 	if lastNonEmpty == 0 { | ||||||
|  | 		record = record[:1] | ||||||
|  | 		if !w.QuoteAmbiguousComments { | ||||||
|  | 			if _, err := w.w.Write([]byte(record[0] + newline)); err != nil { | ||||||
|  | 				return err | ||||||
|  | 			} | ||||||
|  | 			return nil | ||||||
|  | 		} | ||||||
|  | 		// Quote the comment iff it contains quotes or commas, not universally | ||||||
|  | 		first = -1 | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Quote if | ||||||
|  | 	// - the line contains quotes or commas | ||||||
|  | 	// - there are multiple fields and the first starts with a comment character | ||||||
|  | 	//   (but NOT a single-field comment with no quotes or commas) | ||||||
|  | 	for i, f := range record { | ||||||
|  | 		if i == first || strings.Contains(f, `"`) || strings.Contains(f, string(w.Comma)) { | ||||||
|  | 			f = strings.ReplaceAll(f, `"`, `""`) | ||||||
|  | 			record[i] = `"` + f + `"` | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	line := strings.Join(record, string(w.Comma)) | ||||||
|  | 	if _, err := w.w.Write([]byte(line + newline)); err != nil { | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (w *Writer) WriteAll(records [][]string) error { | ||||||
|  | 	for _, r := range records { | ||||||
|  | 		if err := w.Write(r); err != nil { | ||||||
|  | 			return err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	w.Flush() | ||||||
|  | 	return w.Error() | ||||||
|  | } | ||||||
							
								
								
									
										249
									
								
								io/transform/gsheet2csv/gsheet2csv_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										249
									
								
								io/transform/gsheet2csv/gsheet2csv_test.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,249 @@ | |||||||
|  | package gsheet2csv | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"errors" | ||||||
|  | 	"io" | ||||||
|  | 	"net/http" | ||||||
|  | 	"slices" | ||||||
|  | 	"strings" | ||||||
|  | 	"testing" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | // mockHTTPClient allows controlling HTTP responses for testing. | ||||||
|  | type mockHTTPClient struct { | ||||||
|  | 	resp *http.Response | ||||||
|  | 	err  error | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (m *mockHTTPClient) Get(url string) (*http.Response, error) { | ||||||
|  | 	return m.resp, m.err | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // sampleCSV mimics the structure of ai-models.csv from the project README. | ||||||
|  | const sampleCSV = `# Generated by ollama list | ||||||
|  | "# Sample Quoted Comment, with ""quotes"" itself" | ||||||
|  | "NAME","ID","SIZE","MODIFIED" | ||||||
|  | "qwen3-coder:30b","06c1097efce0","18 GB","8 days ago" | ||||||
|  | "gpt-oss:20b","aa4295ac10c3","13 GB","8 days ago" | ||||||
|  | 
 | ||||||
|  | "gpt-oss:latest","aa4295ac10c3","13 GB","7 weeks ago" | ||||||
|  | ` | ||||||
|  | 
 | ||||||
|  | // malformedCSV for testing error handling. | ||||||
|  | const malformedCSV = `# Comment | ||||||
|  | "NAME","ID","SIZE","MODIFIED | ||||||
|  | "qwen3-coder:30b","06c1097efce0","18 GB","8 days ago" | ||||||
|  | ` | ||||||
|  | 
 | ||||||
|  | // TestParseIDs verifies the ParseIDs function for various URL formats. | ||||||
|  | func TestParseIDs(t *testing.T) { | ||||||
|  | 	tests := []struct { | ||||||
|  | 		name    string | ||||||
|  | 		url     string | ||||||
|  | 		wantDoc string | ||||||
|  | 		wantGid string | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			name:    "Google Sheets Edit / Share URL with gid", | ||||||
|  | 			url:     "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238", | ||||||
|  | 			wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34", | ||||||
|  | 			wantGid: "559037238", | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:    "Google Sheets CSV URL with gid", | ||||||
|  | 			url:     "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/export?format=csv&usp=sharing&gid=559037238", | ||||||
|  | 			wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34", | ||||||
|  | 			wantGid: "559037238", | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:    "URL without gid", | ||||||
|  | 			url:     "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit", | ||||||
|  | 			wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34", | ||||||
|  | 			wantGid: "0", | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:    "Invalid URL", | ||||||
|  | 			url:     "https://example.com/invalid", | ||||||
|  | 			wantDoc: "", | ||||||
|  | 			wantGid: "", | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	for _, tt := range tests { | ||||||
|  | 		t.Run(tt.name, func(t *testing.T) { | ||||||
|  | 			gotDoc, gotGid := ParseIDs(tt.url) | ||||||
|  | 			if gotDoc != tt.wantDoc { | ||||||
|  | 				t.Errorf("ParseIDs() docid = %q, want %q", gotDoc, tt.wantDoc) | ||||||
|  | 			} | ||||||
|  | 			if gotGid != tt.wantGid { | ||||||
|  | 				t.Errorf("ParseIDs() gid = %q, want %q", gotGid, tt.wantGid) | ||||||
|  | 			} | ||||||
|  | 		}) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // TestNewReaderFromURL tests initializing a Reader from a Google Sheets URL. | ||||||
|  | func TestNewReaderFromURL(t *testing.T) { | ||||||
|  | 	originalGet := httpGet | ||||||
|  | 	defer func() { httpGet = originalGet }() | ||||||
|  | 
 | ||||||
|  | 	url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238" | ||||||
|  | 
 | ||||||
|  | 	// Test successful HTTP response | ||||||
|  | 	mockResp := &http.Response{ | ||||||
|  | 		StatusCode: http.StatusOK, | ||||||
|  | 		Body:       io.NopCloser(strings.NewReader(sampleCSV)), | ||||||
|  | 	} | ||||||
|  | 	client := &mockHTTPClient{resp: mockResp} | ||||||
|  | 	httpGet = client.Get | ||||||
|  | 
 | ||||||
|  | 	reader := NewReaderFromURL(url) | ||||||
|  | 	if reader.err != nil { | ||||||
|  | 		t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err) | ||||||
|  | 	} | ||||||
|  | 	if reader.resp != mockResp { | ||||||
|  | 		t.Error("NewReaderFromURL() did not set response correctly") | ||||||
|  | 	} | ||||||
|  | 	if !reader.close { | ||||||
|  | 		t.Error("NewReaderFromURL() did not set close flag") | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Test HTTP failure | ||||||
|  | 	client = &mockHTTPClient{resp: mockResp} | ||||||
|  | 	client.err = errors.New("network error") | ||||||
|  | 	httpGet = client.Get | ||||||
|  | 
 | ||||||
|  | 	reader = NewReaderFromURL(url) | ||||||
|  | 	if reader.err == nil { | ||||||
|  | 		t.Error("NewReaderFromURL() expected error, got nil") | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Test non-200 status | ||||||
|  | 	client = &mockHTTPClient{resp: &http.Response{ | ||||||
|  | 		StatusCode: http.StatusNotFound, | ||||||
|  | 		Body:       io.NopCloser(strings.NewReader("these aren't the droids you're looking for")), | ||||||
|  | 	}} | ||||||
|  | 	httpGet = client.Get | ||||||
|  | 
 | ||||||
|  | 	reader = NewReaderFromURL(url) | ||||||
|  | 	if reader.err == nil { | ||||||
|  | 		t.Error("NewReaderFromURL() expected error for non-200 status, got nil") | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // TestRead tests the Read method for comment handling. | ||||||
|  | func TestRead(t *testing.T) { | ||||||
|  | 	tests := []struct { | ||||||
|  | 		name             string | ||||||
|  | 		preserveComments bool | ||||||
|  | 		expected         [][]string | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			name: "Skip comments", | ||||||
|  | 			expected: [][]string{ | ||||||
|  | 				{"NAME", "ID", "SIZE", "MODIFIED"}, | ||||||
|  | 				{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"}, | ||||||
|  | 				{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"}, | ||||||
|  | 				{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"}, | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:             "Don't skip comments", | ||||||
|  | 			preserveComments: true, | ||||||
|  | 			expected: [][]string{ | ||||||
|  | 				{"# Generated by ollama list"}, | ||||||
|  | 				{"# Sample Quoted Comment, with \"quotes\" itself"}, | ||||||
|  | 				{"NAME", "ID", "SIZE", "MODIFIED"}, | ||||||
|  | 				{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"}, | ||||||
|  | 				{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"}, | ||||||
|  | 				{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"}, | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	for _, tt := range tests { | ||||||
|  | 		t.Run(tt.name, func(t *testing.T) { | ||||||
|  | 			reader := NewReader(strings.NewReader(sampleCSV)) | ||||||
|  | 			if tt.preserveComments { | ||||||
|  | 				reader.Comment = 0 | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			for i, want := range tt.expected { | ||||||
|  | 				got, err := reader.Read() | ||||||
|  | 				if err != nil { | ||||||
|  | 					t.Errorf("Read() error at record %d: %v", i, err) | ||||||
|  | 				} | ||||||
|  | 				if !slices.Equal(got, want) { | ||||||
|  | 					t.Errorf("Read() record %d = %v, want %v", i, got, want) | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			// Verify EOF | ||||||
|  | 			_, err := reader.Read() | ||||||
|  | 			if !errors.Is(err, io.EOF) { | ||||||
|  | 				t.Errorf("Read() expected EOF, got %v", err) | ||||||
|  | 			} | ||||||
|  | 		}) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // TestReadAll tests the ReadAll method for different configurations. | ||||||
|  | func TestReadAll(t *testing.T) { | ||||||
|  | 	tests := []struct { | ||||||
|  | 		name     string | ||||||
|  | 		expected [][]string | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			name: "Skip comments", | ||||||
|  | 			expected: [][]string{ | ||||||
|  | 				{"NAME", "ID", "SIZE", "MODIFIED"}, | ||||||
|  | 				{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"}, | ||||||
|  | 				{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"}, | ||||||
|  | 				{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"}, | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	for _, tt := range tests { | ||||||
|  | 		t.Run(tt.name, func(t *testing.T) { | ||||||
|  | 			reader := NewReader(strings.NewReader(sampleCSV)) | ||||||
|  | 
 | ||||||
|  | 			got, err := reader.ReadAll() | ||||||
|  | 			if err != nil { | ||||||
|  | 				t.Errorf("ReadAll() error: %v", err) | ||||||
|  | 			} | ||||||
|  | 			if len(got) != len(tt.expected) { | ||||||
|  | 				t.Errorf("ReadAll() returned %d records, want %d", len(got), len(tt.expected)) | ||||||
|  | 			} | ||||||
|  | 			for i, want := range tt.expected { | ||||||
|  | 				if !slices.Equal(got[i], want) { | ||||||
|  | 					t.Errorf("ReadAll() record %d = %v, want %v", i, got[i], want) | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		}) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // TestNewReaderFromURLWithMalformedCSV tests NewReaderFromURL with malformed CSV. | ||||||
|  | func TestNewReaderFromURLWithMalformedCSV(t *testing.T) { | ||||||
|  | 	mockResp := &http.Response{ | ||||||
|  | 		StatusCode: http.StatusOK, | ||||||
|  | 		Body:       io.NopCloser(strings.NewReader(malformedCSV)), | ||||||
|  | 	} | ||||||
|  | 	client := &mockHTTPClient{resp: mockResp} | ||||||
|  | 	originalGet := httpGet | ||||||
|  | 	httpGet = client.Get | ||||||
|  | 	defer func() { httpGet = originalGet }() | ||||||
|  | 
 | ||||||
|  | 	url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238" | ||||||
|  | 	reader := NewReaderFromURL(url) | ||||||
|  | 	if reader.err != nil { | ||||||
|  | 		t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Reading should fail due to malformed CSV | ||||||
|  | 	_, err := reader.Read() | ||||||
|  | 	if err == nil { | ||||||
|  | 		t.Error("Read() expected error for malformed CSV, got nil") | ||||||
|  | 	} | ||||||
|  | } | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user