mirror of
https://github.com/therootcompany/golib.git
synced 2025-10-12 20:18:16 +00:00
242 lines
4.9 KiB
Go
242 lines
4.9 KiB
Go
// Authored in 2025 by AJ ONeal <aj@therootcompany.com> (https://therootcompany.com)
|
|
//
|
|
// To the extent possible under law, the author(s) have dedicated all copyright
|
|
// and related and neighboring rights to this software to the public domain
|
|
// worldwide. This software is distributed without any warranty.
|
|
//
|
|
// You should have received a copy of the CC0 Public Domain Dedication along with
|
|
// this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>.
|
|
//
|
|
// SPDX-License-Identifier: CC0-1.0
|
|
|
|
package gsheet2csv
|
|
|
|
import (
|
|
"encoding/csv"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
var ErrHTTPGet = errors.New("did not get 200 OK when downloading from URL")
|
|
|
|
// For mocking for tests
|
|
var httpGet = http.Get
|
|
|
|
type Reader struct {
|
|
*csv.Reader
|
|
DocID string
|
|
GID string
|
|
URL string
|
|
QuotedComments bool
|
|
Comment rune
|
|
r io.Reader
|
|
resp *http.Response
|
|
close bool
|
|
err error
|
|
}
|
|
|
|
func NewReaderFromURL(url string) *Reader {
|
|
docid, gid := ParseIDs(url)
|
|
|
|
return NewReaderFromIDs(docid, gid)
|
|
}
|
|
|
|
func NewReaderFromIDs(docid, gid string) *Reader {
|
|
resp, err := GetSheet(docid, gid)
|
|
if err != nil {
|
|
r := NewReader(nil)
|
|
r.err = err
|
|
return r
|
|
}
|
|
|
|
r := NewReader(resp.Body)
|
|
r.URL = ToCSVURL(docid, gid)
|
|
r.DocID = docid
|
|
r.GID = gid
|
|
r.resp = resp
|
|
r.close = true
|
|
return r
|
|
}
|
|
|
|
func ToCSVURL(docid, gid string) string {
|
|
return fmt.Sprintf("https://docs.google.com/spreadsheets/d/%s/export?format=csv&usp=sharing&gid=%s", docid, gid)
|
|
}
|
|
|
|
func GetSheet(docid, gid string) (*http.Response, error) {
|
|
downloadURL := ToCSVURL(docid, gid)
|
|
|
|
resp, err := httpGet(downloadURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
_ = resp.Body.Close()
|
|
return nil, ErrHTTPGet
|
|
}
|
|
|
|
return resp, nil
|
|
}
|
|
|
|
func NewReader(r io.Reader) *Reader {
|
|
csvr := csv.NewReader(r)
|
|
csvr.Comma = ','
|
|
csvr.Comment = 0 // to allow distinguishing between quoted comments and fields
|
|
csvr.FieldsPerRecord = 0 // Google Sheets is consistent
|
|
csvr.LazyQuotes = false // fields that need quotes use them correctly
|
|
csvr.TrimLeadingSpace = false
|
|
csvr.ReuseRecord = false
|
|
return &Reader{
|
|
Reader: csvr,
|
|
QuotedComments: true,
|
|
Comment: '#',
|
|
r: r,
|
|
}
|
|
}
|
|
|
|
func (r *Reader) Read() ([]string, error) {
|
|
if r.err != nil {
|
|
return nil, r.err
|
|
}
|
|
|
|
for {
|
|
record, err := r.Reader.Read()
|
|
if err != nil {
|
|
if r.close {
|
|
_ = r.resp.Body.Close()
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
if r.QuotedComments && len(record[0]) > 0 {
|
|
runeValue, _ := utf8.DecodeRuneInString(record[0])
|
|
if runeValue == r.Comment {
|
|
last := len(record) - 1
|
|
for len(record[last]) == 0 {
|
|
last -= 1
|
|
}
|
|
if last == 0 {
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
return record, nil
|
|
}
|
|
}
|
|
|
|
func (r *Reader) ReadAll() ([][]string, error) {
|
|
var records [][]string
|
|
|
|
for {
|
|
record, err := r.Read()
|
|
if nil != err {
|
|
if errors.Is(err, io.EOF) {
|
|
return records, nil
|
|
}
|
|
return records, err
|
|
}
|
|
records = append(records, record)
|
|
}
|
|
}
|
|
|
|
func ParseIDs(urlStr string) (docid string, gid string) {
|
|
// Find key: look for /spreadsheets/d/{key}
|
|
const prefix = "/spreadsheets/d/"
|
|
startIdx := strings.Index(urlStr, prefix)
|
|
if startIdx == -1 {
|
|
return "", gid
|
|
}
|
|
startIdx += len(prefix)
|
|
|
|
// Find end of key (next / or end of string)
|
|
endIdx := strings.Index(urlStr[startIdx:], "/")
|
|
if endIdx == -1 {
|
|
endIdx = len(urlStr)
|
|
} else {
|
|
endIdx += startIdx
|
|
}
|
|
|
|
docid = urlStr[startIdx:endIdx]
|
|
if docid == "" {
|
|
return "", ""
|
|
}
|
|
|
|
// Find gid: look for gid= and take until #, &, ?, /, or end
|
|
gidIdx := strings.Index(urlStr, "gid=")
|
|
if gidIdx != -1 {
|
|
gidStart := gidIdx + len("gid=")
|
|
endChars := "#&?/"
|
|
gidEnd := strings.IndexAny(urlStr[gidStart:], endChars)
|
|
if gidEnd == -1 {
|
|
gid = urlStr[gidStart:]
|
|
} else {
|
|
gid = urlStr[gidStart : gidStart+gidEnd]
|
|
}
|
|
}
|
|
|
|
if len(gid) == 0 {
|
|
gid = "0"
|
|
}
|
|
return docid, gid
|
|
}
|
|
|
|
type Writer struct {
|
|
*csv.Writer
|
|
Comment rune
|
|
w io.Writer
|
|
}
|
|
|
|
func NewWriter(w io.Writer) *Writer {
|
|
return &Writer{
|
|
Writer: csv.NewWriter(w),
|
|
Comment: '#',
|
|
w: w,
|
|
}
|
|
}
|
|
|
|
func (w *Writer) Write(record []string) error {
|
|
if len(record) > 1 {
|
|
if rv1, _ := utf8.DecodeRuneInString(record[0]); rv1 == w.Comment {
|
|
w.Flush()
|
|
|
|
lastNonEmpty := len(record) - 1
|
|
for len(record[lastNonEmpty]) == 0 {
|
|
lastNonEmpty -= 1
|
|
}
|
|
|
|
if lastNonEmpty == 0 {
|
|
record = record[:1]
|
|
} else {
|
|
for i, f := range record {
|
|
if i == 0 || strings.Contains(f, `"`) {
|
|
f = strings.ReplaceAll(f, `"`, `""`)
|
|
record[i] = `"` + f + `"`
|
|
}
|
|
}
|
|
}
|
|
|
|
line := strings.Join(record, string(w.Comma))
|
|
if _, err := w.w.Write([]byte(line + "\n")); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
return w.Writer.Write(record)
|
|
}
|
|
|
|
func (w *Writer) WriteAll(records [][]string) error {
|
|
for _, r := range records {
|
|
if err := w.Write(r); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
w.Flush()
|
|
return w.Error()
|
|
}
|