mirror of
https://github.com/therootcompany/golib.git
synced 2025-10-12 20:18:16 +00:00
wip: fighting comments
This commit is contained in:
parent
cd00d85968
commit
fba6cbf472
@ -15,6 +15,8 @@ This does surprisingly little - you should probably just handle the boilerplate
|
|||||||
- can preserve comments
|
- can preserve comments
|
||||||
- swaps `\r` (Windows) for `\n` (Unix) and ensures trailing newline (a la `encoding/csv`)
|
- swaps `\r` (Windows) for `\n` (Unix) and ensures trailing newline (a la `encoding/csv`)
|
||||||
|
|
||||||
|
Note: The Google Sheet must be shared to **Anyone with the link**.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
Same as `encoding/csv` (embedded), but with two extra options:
|
Same as `encoding/csv` (embedded), but with two extra options:
|
||||||
@ -37,16 +39,15 @@ func main() {
|
|||||||
fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0])
|
fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0])
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
url := os.Args[1]
|
urlOrPath := os.Args[1]
|
||||||
|
|
||||||
gsr := gsheet2csv.NewReaderFromURL(url)
|
gsr := gsheet2csv.NewReaderFrom(urlOrPath)
|
||||||
records, err := gsr.ReadAll()
|
records, err := gsr.ReadAll()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "Error reading from %s\n", gsr.URL)
|
fmt.Fprintf(os.Stderr, "Error reading from %s: %v\n", gsr.URL, err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// distinguishes between comments and quoted fields
|
|
||||||
csvw := gsheet2csv.NewWriter(os.Stdout)
|
csvw := gsheet2csv.NewWriter(os.Stdout)
|
||||||
csvw.Comment = gsr.Comment
|
csvw.Comment = gsr.Comment
|
||||||
if err := csvw.WriteAll(records); err != nil {
|
if err := csvw.WriteAll(records); err != nil {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/csv"
|
||||||
"errors"
|
"errors"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
@ -19,8 +20,21 @@ const (
|
|||||||
unitSeparator = "\x1f"
|
unitSeparator = "\x1f"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type CSVReader interface {
|
||||||
|
Read() ([]string, error)
|
||||||
|
ReadAll() ([][]string, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type CSVWriter interface {
|
||||||
|
Write([]string) error
|
||||||
|
WriteAll([][]string) error
|
||||||
|
Flush()
|
||||||
|
Error() error
|
||||||
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
var commentArg string
|
var commentArg string
|
||||||
|
newline := "\n"
|
||||||
format := "CSV"
|
format := "CSV"
|
||||||
delim := ','
|
delim := ','
|
||||||
if strings.Contains(os.Args[0], "tsv") {
|
if strings.Contains(os.Args[0], "tsv") {
|
||||||
@ -29,13 +43,17 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Parse command-line flags
|
// Parse command-line flags
|
||||||
flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments")
|
flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments, 0 to disable")
|
||||||
outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)")
|
outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)")
|
||||||
delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)")
|
delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)")
|
||||||
useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator")
|
useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator")
|
||||||
urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL")
|
urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL")
|
||||||
parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)")
|
parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)")
|
||||||
rawOnly := flag.Bool("raw", false, "don't parse, just download")
|
rawOnly := flag.Bool("raw", false, "don't parse, just download")
|
||||||
|
noReadComments := flag.Bool("no-read-comments", false, "strip comments when reading (gsheet-only)")
|
||||||
|
noWriteComments := flag.Bool("no-write-comments", false, "strip comments when writing")
|
||||||
|
readStyle := flag.String("read-style", "gsheet", "'gsheet' or 'rfc' to read either as a gsheet or rfc CSV")
|
||||||
|
writeStyle := flag.String("write-style", "rfc", "'gsheet' or 'rfc' to write either as a gsheet or rfc CSV")
|
||||||
flag.Usage = func() {
|
flag.Usage = func() {
|
||||||
fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0])
|
fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0])
|
||||||
fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format)
|
fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format)
|
||||||
@ -88,6 +106,10 @@ func main() {
|
|||||||
}
|
}
|
||||||
delim, _ = utf8.DecodeRuneInString(*delimString)
|
delim, _ = utf8.DecodeRuneInString(*delimString)
|
||||||
|
|
||||||
|
if *useCRLF {
|
||||||
|
newline = "\r\n"
|
||||||
|
}
|
||||||
|
|
||||||
var rc io.ReadCloser
|
var rc io.ReadCloser
|
||||||
if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") {
|
if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") {
|
||||||
docid, gid := gsheet2csv.ParseIDs(url)
|
docid, gid := gsheet2csv.ParseIDs(url)
|
||||||
@ -123,7 +145,10 @@ func main() {
|
|||||||
}
|
}
|
||||||
rc = f
|
rc = f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if out == os.Stdout {
|
||||||
fmt.Fprintf(os.Stderr, "\n")
|
fmt.Fprintf(os.Stderr, "\n")
|
||||||
|
}
|
||||||
|
|
||||||
if *urlOnly || *parseOnly {
|
if *urlOnly || *parseOnly {
|
||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
@ -138,22 +163,53 @@ func main() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
comment, _ := utf8.DecodeRuneInString(commentArg)
|
var comment rune
|
||||||
|
if commentArg == "0" {
|
||||||
|
comment = 0
|
||||||
|
} else {
|
||||||
|
comment, _ = utf8.DecodeRuneInString(commentArg)
|
||||||
|
}
|
||||||
|
|
||||||
// Create a reader for the Google Sheet
|
// Create a reader for the Google Sheet
|
||||||
|
var csvr CSVReader
|
||||||
|
if *readStyle == "rfc" {
|
||||||
|
rfcr := csv.NewReader(rc)
|
||||||
|
rfcr.Comment = comment
|
||||||
|
csvr = rfcr
|
||||||
|
} else {
|
||||||
gsr := gsheet2csv.NewReader(rc)
|
gsr := gsheet2csv.NewReader(rc)
|
||||||
gsr.QuotedComments = false
|
gsr.QuotedComments = false
|
||||||
|
if *noReadComments {
|
||||||
|
gsr.Comment = '#'
|
||||||
|
} else {
|
||||||
gsr.Comment = 0
|
gsr.Comment = 0
|
||||||
|
}
|
||||||
gsr.ReuseRecord = true
|
gsr.ReuseRecord = true
|
||||||
|
csvr = gsr
|
||||||
|
}
|
||||||
|
|
||||||
// Create CSV writer
|
// Create CSV writer
|
||||||
csvw := gsheet2csv.NewWriter(out)
|
var csvw CSVWriter
|
||||||
csvw.Comma = delim // Set delimiter to tab for TSV
|
if *writeStyle == "gsheet" {
|
||||||
csvw.Comment = comment
|
gsw := gsheet2csv.NewWriter(out)
|
||||||
csvw.UseCRLF = *useCRLF
|
gsw.Comment = comment
|
||||||
|
gsw.Comma = delim // Set delimiter to tab for TSV
|
||||||
|
gsw.UseCRLF = *useCRLF
|
||||||
|
csvw = gsw
|
||||||
|
} else {
|
||||||
|
rfcw := csv.NewWriter(out)
|
||||||
|
rfcw.Comma = delim
|
||||||
|
rfcw.UseCRLF = *useCRLF
|
||||||
|
csvw = rfcw
|
||||||
|
}
|
||||||
|
|
||||||
|
const quote = `"`
|
||||||
|
const dquote = quote + quote
|
||||||
|
var commaStr = string(delim)
|
||||||
|
var commentStr = string(comment)
|
||||||
for {
|
for {
|
||||||
// Convert each record
|
// Convert each record
|
||||||
record, err := gsr.Read()
|
record, err := csvr.Read()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, io.EOF) {
|
if errors.Is(err, io.EOF) {
|
||||||
break
|
break
|
||||||
@ -162,10 +218,42 @@ func main() {
|
|||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lastNonEmpty := len(record) - 1
|
||||||
|
for lastNonEmpty > -1 && len(record[lastNonEmpty]) == 0 {
|
||||||
|
lastNonEmpty -= 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if *writeStyle != "rfc" || comment == 0 || len(record) == 0 || !strings.HasPrefix(record[0], commentStr) {
|
||||||
|
if *noWriteComments && len(record) > 0 && strings.HasPrefix(record[0], commentStr) && lastNonEmpty == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
if err := csvw.Write(record); err != nil {
|
if err := csvw.Write(record); err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err)
|
fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if lastNonEmpty == 0 {
|
||||||
|
record = record[:1]
|
||||||
|
if *noWriteComments {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for i, f := range record {
|
||||||
|
if i == 0 || strings.Contains(f, quote) || strings.Contains(f, string(commaStr)) {
|
||||||
|
f = strings.ReplaceAll(f, quote, dquote)
|
||||||
|
record[i] = quote + f + quote
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
csvw.Flush()
|
||||||
|
if _, err := out.Write([]byte(strings.Join(record, commaStr) + newline)); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Flush the writer to ensure all data is written
|
// Flush the writer to ensure all data is written
|
||||||
csvw.Flush()
|
csvw.Flush()
|
||||||
@ -173,4 +261,8 @@ func main() {
|
|||||||
fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err)
|
fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if out != os.Stdout {
|
||||||
|
fmt.Fprintf(os.Stderr, "wrote %s\n", *outputFile)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -123,7 +123,10 @@ func main() {
|
|||||||
}
|
}
|
||||||
rc = f
|
rc = f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if out == os.Stdout {
|
||||||
fmt.Fprintf(os.Stderr, "\n")
|
fmt.Fprintf(os.Stderr, "\n")
|
||||||
|
}
|
||||||
|
|
||||||
if *urlOnly || *parseOnly {
|
if *urlOnly || *parseOnly {
|
||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
@ -173,4 +176,8 @@ func main() {
|
|||||||
fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err)
|
fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if out != os.Stdout {
|
||||||
|
fmt.Fprintf(os.Stderr, "wrote %s\n", *outputFile)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -15,12 +15,12 @@ func main() {
|
|||||||
fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0])
|
fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0])
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
url := os.Args[1]
|
urlOrPath := os.Args[1]
|
||||||
|
|
||||||
gsr := gsheet2csv.NewReaderFromURL(url)
|
gsr := gsheet2csv.NewReaderFrom(urlOrPath)
|
||||||
records, err := gsr.ReadAll()
|
records, err := gsr.ReadAll()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "Error reading from %s\n", gsr.URL)
|
fmt.Fprintf(os.Stderr, "Error reading from %s: %v\n", gsr.URL, err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
18
io/transforms/gsheet2csv/fixtures/sheet.csv
Normal file
18
io/transforms/gsheet2csv/fixtures/sheet.csv
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# this is a comment
|
||||||
|
"# this is, well, a quoted comment"
|
||||||
|
"# this is a ""super""-quoted comment"
|
||||||
|
Key,Value,
|
||||||
|
Name,55,
|
||||||
|
Girlfriend's Age,55,
|
||||||
|
,,
|
||||||
|
My IQ,55,
|
||||||
|
,55,
|
||||||
|
"Key,with,Comma",,
|
||||||
|
,"Value,with,Comma",
|
||||||
|
"Quoted ""Key""",Normal Value,
|
||||||
|
Normal Key,"Quoted ""Value""",
|
||||||
|
"Quoted ""Key""",,
|
||||||
|
,"Quoted ""Value""",
|
||||||
|
x,y,z
|
||||||
|
"# comment with trailing comma,"
|
||||||
|
"#1",2,#3
|
Can't render this file because it contains an unexpected character in line 10 and column 16.
|
@ -17,6 +17,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
@ -39,6 +40,22 @@ type Reader struct {
|
|||||||
err error
|
err error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func NewReaderFrom(urlOrPath string) *Reader {
|
||||||
|
if strings.HasPrefix(urlOrPath, "https://") || strings.HasPrefix(urlOrPath, "http://") {
|
||||||
|
return NewReaderFromURL(urlOrPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
urlOrPath = strings.TrimPrefix(urlOrPath, "file://")
|
||||||
|
f, err := os.Open(urlOrPath)
|
||||||
|
r := NewReader(f)
|
||||||
|
r.URL = urlOrPath
|
||||||
|
if err != nil {
|
||||||
|
r.err = err
|
||||||
|
}
|
||||||
|
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
func NewReaderFromURL(url string) *Reader {
|
func NewReaderFromURL(url string) *Reader {
|
||||||
docid, gid := ParseIDs(url)
|
docid, gid := ParseIDs(url)
|
||||||
|
|
||||||
@ -86,7 +103,7 @@ func NewReader(r io.Reader) *Reader {
|
|||||||
csvr := csv.NewReader(r)
|
csvr := csv.NewReader(r)
|
||||||
csvr.Comma = ','
|
csvr.Comma = ','
|
||||||
csvr.Comment = 0 // to allow distinguishing between quoted comments and fields
|
csvr.Comment = 0 // to allow distinguishing between quoted comments and fields
|
||||||
csvr.FieldsPerRecord = 0 // Google Sheets is consistent
|
csvr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not
|
||||||
csvr.LazyQuotes = false // fields that need quotes use them correctly
|
csvr.LazyQuotes = false // fields that need quotes use them correctly
|
||||||
csvr.TrimLeadingSpace = false
|
csvr.TrimLeadingSpace = false
|
||||||
csvr.ReuseRecord = false
|
csvr.ReuseRecord = false
|
||||||
@ -187,6 +204,7 @@ func ParseIDs(urlStr string) (docid string, gid string) {
|
|||||||
type Writer struct {
|
type Writer struct {
|
||||||
*csv.Writer
|
*csv.Writer
|
||||||
Comment rune
|
Comment rune
|
||||||
|
QuoteComments bool
|
||||||
w io.Writer
|
w io.Writer
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -201,25 +219,31 @@ func NewWriter(w io.Writer) *Writer {
|
|||||||
func (w *Writer) Write(record []string) error {
|
func (w *Writer) Write(record []string) error {
|
||||||
if len(record) > 1 {
|
if len(record) > 1 {
|
||||||
if rv1, _ := utf8.DecodeRuneInString(record[0]); rv1 == w.Comment {
|
if rv1, _ := utf8.DecodeRuneInString(record[0]); rv1 == w.Comment {
|
||||||
w.Flush()
|
|
||||||
|
|
||||||
lastNonEmpty := len(record) - 1
|
lastNonEmpty := len(record) - 1
|
||||||
|
if lastNonEmpty > -1 {
|
||||||
for len(record[lastNonEmpty]) == 0 {
|
for len(record[lastNonEmpty]) == 0 {
|
||||||
lastNonEmpty -= 1
|
lastNonEmpty -= 1
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
first := 0
|
||||||
if lastNonEmpty == 0 {
|
if lastNonEmpty == 0 {
|
||||||
record = record[:1]
|
record = record[:1]
|
||||||
} else {
|
// if !w.QuoteComments {
|
||||||
|
// return nil
|
||||||
|
// }
|
||||||
|
first = -1
|
||||||
|
}
|
||||||
|
|
||||||
for i, f := range record {
|
for i, f := range record {
|
||||||
if i == 0 || strings.Contains(f, `"`) {
|
if i == first || strings.Contains(f, `"`) || strings.Contains(f, string(w.Comma)) {
|
||||||
f = strings.ReplaceAll(f, `"`, `""`)
|
f = strings.ReplaceAll(f, `"`, `""`)
|
||||||
record[i] = `"` + f + `"`
|
record[i] = `"` + f + `"`
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
line := strings.Join(record, string(w.Comma))
|
line := strings.Join(record, string(w.Comma))
|
||||||
|
w.Flush()
|
||||||
if _, err := w.w.Write([]byte(line + "\n")); err != nil {
|
if _, err := w.w.Write([]byte(line + "\n")); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -230,6 +254,19 @@ func (w *Writer) Write(record []string) error {
|
|||||||
return w.Writer.Write(record)
|
return w.Writer.Write(record)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func QuoteRecord(record []string, comma, comment string) {
|
||||||
|
const quote = `"`
|
||||||
|
|
||||||
|
for i, f := range record {
|
||||||
|
if (i == 0 && strings.HasPrefix(f, comment)) ||
|
||||||
|
(strings.Contains(f, quote) || strings.Contains(f, string(comma))) {
|
||||||
|
|
||||||
|
f = strings.ReplaceAll(f, `"`, `""`)
|
||||||
|
record[i] = `"` + f + `"`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (w *Writer) WriteAll(records [][]string) error {
|
func (w *Writer) WriteAll(records [][]string) error {
|
||||||
for _, r := range records {
|
for _, r := range records {
|
||||||
if err := w.Write(r); err != nil {
|
if err := w.Write(r); err != nil {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user