diff --git a/.gitignore b/.gitignore index ea4a28d..ef52c14 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,8 @@ cmd/sql-migrate/sql-migrate io/transform/gsheet2csv/cmd/gsheet2csv/gsheet2csv io/transform/gsheet2csv/cmd/gsheet2env/gsheet2env io/transform/gsheet2csv/cmd/gsheet2tsv/gsheet2tsv +tools/jsontypes/cmd/jsonpaths/jsonpaths +tools/jsontypes/jsonpaths # Binaries for programs and plugins *.exe diff --git a/tools/jsontypes/LICENSE b/tools/jsontypes/LICENSE new file mode 100644 index 0000000..abf419b --- /dev/null +++ b/tools/jsontypes/LICENSE @@ -0,0 +1,10 @@ +Authored in 2026 by AJ ONeal , generated by Claude Opus 4.6. + +To the extent possible under law, the author(s) have dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +You should have received a copy of the CC0 Public Domain Dedication along with +this software. If not, see . + +SPDX-License-Identifier: CC0-1.0 diff --git a/tools/jsontypes/analyzer.go b/tools/jsontypes/analyzer.go new file mode 100644 index 0000000..73a7cff --- /dev/null +++ b/tools/jsontypes/analyzer.go @@ -0,0 +1,564 @@ +package jsontypes + +import ( + "encoding/json" + "fmt" + "sort" + "strings" +) + +type Analyzer struct { + Prompter *Prompter + anonymous bool + askTypes bool + typeCounter int + // knownTypes maps shape signature → type name + knownTypes map[string]*structType + // typesByName maps type name → structType for collision detection + typesByName map[string]*structType + // pendingTypeName is set by the combined map/struct+name prompt + // and consumed by decideTypeName to avoid double-prompting + pendingTypeName string +} + +type structType struct { + name string + fields map[string]string // field name → value kind ("string", "number", "bool", "null", "object", "array", "mixed") +} + +type shapeGroup struct { + sig string + fields []string + members []map[string]any +} + +func NewAnalyzer(inputIsStdin, anonymous, askTypes bool) (*Analyzer, error) { + p, err := NewPrompter(inputIsStdin, anonymous) + if err != nil { + return nil, err + } + return &Analyzer{ + Prompter: p, + anonymous: anonymous, + askTypes: askTypes, + knownTypes: make(map[string]*structType), + typesByName: make(map[string]*structType), + }, nil +} + +func (a *Analyzer) Close() { + a.Prompter.Close() +} + +// analyze traverses a JSON value depth-first and returns annotated flat paths. +func (a *Analyzer) Analyze(path string, val any) []string { + switch v := val.(type) { + case nil: + return []string{path + "{null}"} + case bool: + return []string{path + "{bool}"} + case json.Number: + if _, err := v.Int64(); err == nil { + return []string{path + "{int}"} + } + return []string{path + "{float}"} + case string: + return []string{path + "{string}"} + case []any: + return a.analyzeArray(path, v) + case map[string]any: + return a.analyzeObject(path, v) + default: + return []string{path + "{unknown}"} + } +} + +func (a *Analyzer) analyzeObject(path string, obj map[string]any) []string { + if len(obj) == 0 { + return []string{path + "{any}"} + } + + isMap := a.decideMapOrStruct(path, obj) + if isMap { + return a.analyzeAsMap(path, obj) + } + return a.analyzeAsStruct(path, obj) +} + +func (a *Analyzer) analyzeAsMap(path string, obj map[string]any) []string { + keyName := a.decideKeyName(path, obj) + + // Collect all values and group by shape for type unification + values := make([]any, 0, len(obj)) + for _, v := range obj { + values = append(values, v) + } + + return a.analyzeCollectionValues(path+"["+keyName+"]", values) +} + +func (a *Analyzer) analyzeAsStruct(path string, obj map[string]any) []string { + return a.analyzeAsStructMulti(path, []map[string]any{obj}) +} + +// analyzeAsStructMulti handles one or more instances of the same struct type, +// collecting all values for each field across instances for proper unification. +func (a *Analyzer) analyzeAsStructMulti(path string, instances []map[string]any) []string { + // Collect all field names across all instances + merged := mergeObjects(instances) + typeName := a.decideTypeName(path, merged) + + prefix := path + "{" + typeName + "}" + var paths []string + keys := sortedKeys(merged) + for _, k := range keys { + // Collect all values for this field across instances + var fieldValues []any + fieldPresent := 0 + for _, inst := range instances { + if v, ok := inst[k]; ok { + fieldValues = append(fieldValues, v) + fieldPresent++ + } + } + // If the field is missing in some instances, it's optional + if fieldPresent < len(instances) { + paths = append(paths, prefix+"."+k+"{null}") + } + + if len(fieldValues) == 1 { + childPaths := a.Analyze(prefix+"."+k, fieldValues[0]) + paths = append(paths, childPaths...) + } else if len(fieldValues) > 1 { + childPaths := a.analyzeCollectionValues(prefix+"."+k, fieldValues) + paths = append(paths, childPaths...) + } + } + if len(paths) == 0 { + paths = append(paths, prefix) + } + return paths +} + +func (a *Analyzer) analyzeArray(path string, arr []any) []string { + if len(arr) == 0 { + return []string{path + "[]{any}"} + } + + // Check for tuple (short array of mixed types) + if a.isTupleCandidate(arr) { + isTuple := a.decideTupleOrList(path, arr) + if isTuple { + return a.analyzeAsTuple(path, arr) + } + } + + return a.analyzeCollectionValues(path+"[]", arr) +} + +func (a *Analyzer) analyzeAsTuple(path string, arr []any) []string { + var paths []string + for i, v := range arr { + childPaths := a.Analyze(fmt.Sprintf("%s[%d]", path, i), v) + paths = append(paths, childPaths...) + } + return paths +} + +// analyzeCollectionValues handles type unification for a set of values at the +// same path position (map values or array elements). +func (a *Analyzer) analyzeCollectionValues(path string, values []any) []string { + // Group values by kind + var ( + nullCount int + objects []map[string]any + arrays [][]any + primitives []any + primTypeSet = make(map[string]bool) + ) + + for _, v := range values { + switch tv := v.(type) { + case nil: + nullCount++ + case map[string]any: + objects = append(objects, tv) + case []any: + arrays = append(arrays, tv) + default: + primitives = append(primitives, v) + primTypeSet[primitiveType(v)] = true + } + } + + var paths []string + + // Handle nulls: indicates the value is optional + if nullCount > 0 && (len(objects) > 0 || len(arrays) > 0 || len(primitives) > 0) { + paths = append(paths, path+"{null}") + } else if nullCount > 0 && len(objects) == 0 && len(arrays) == 0 && len(primitives) == 0 { + return []string{path + "{null}"} + } + + // Handle primitives + for pt := range primTypeSet { + paths = append(paths, path+"{"+pt+"}") + } + + // Handle objects by grouping by shape and unifying + if len(objects) > 0 { + paths = append(paths, a.unifyObjects(path, objects)...) + } + + // Handle arrays: collect all elements across all array instances + if len(arrays) > 0 { + var allElements []any + for _, arr := range arrays { + allElements = append(allElements, arr...) + } + if len(allElements) > 0 { + paths = append(paths, a.analyzeCollectionValues(path+"[]", allElements)...) + } else { + paths = append(paths, path+"[]{any}") + } + } + + return paths +} + +// unifyObjects groups objects by shape, prompts about type relationships, +// and returns the unified paths. +func (a *Analyzer) unifyObjects(path string, objects []map[string]any) []string { + // Before grouping by shape, check if these objects are really maps by + // pooling all keys across all instances. Individual objects may have too + // few keys for heuristics, but collectively the pattern is clear. + if combined := a.tryAnalyzeAsMaps(path, objects); combined != nil { + return combined + } + + groups := make(map[string]*shapeGroup) + var groupOrder []string + + for _, obj := range objects { + sig := shapeSignature(obj) + if g, ok := groups[sig]; ok { + g.members = append(g.members, obj) + } else { + g := &shapeGroup{ + sig: sig, + fields: sortedKeys(obj), + members: []map[string]any{obj}, + } + groups[sig] = g + groupOrder = append(groupOrder, sig) + } + } + + if len(groups) == 1 { + // All same shape, analyze with all instances for field unification + return a.analyzeAsStructMulti(path, objects) + } + + // Multiple shapes — in anonymous mode default to same type + if a.anonymous { + return a.analyzeAsStructMulti(path, objects) + } + return a.promptTypeUnification(path, groups, groupOrder) +} + +// tryAnalyzeAsMaps pools all keys from multiple objects and checks if they +// collectively look like map keys (e.g., many objects each with 1-2 numeric +// keys). Returns nil if they don't look like maps. +func (a *Analyzer) tryAnalyzeAsMaps(path string, objects []map[string]any) []string { + // Collect all keys across all objects + allKeys := make(map[string]bool) + for _, obj := range objects { + for k := range obj { + allKeys[k] = true + } + } + + // Need enough keys to be meaningful + if len(allKeys) < 3 { + return nil + } + + // Build a synthetic object with all keys for heuristic checking + combined := make(map[string]any, len(allKeys)) + for _, obj := range objects { + for k, v := range obj { + if _, exists := combined[k]; !exists { + combined[k] = v + } + } + } + + isMap, confident := looksLikeMap(combined) + if !isMap || !confident { + return nil + } + + // These are maps — merge all entries and analyze as one map + return a.analyzeAsMap(path, combined) +} + +// promptTypeUnification presents shape groups to the user and asks if they +// are the same type (with optional fields) or different types. +func (a *Analyzer) promptTypeUnification(path string, groups map[string]*shapeGroup, groupOrder []string) []string { + const maxFields = 8 + + // Compute shared and unique fields across all shapes + shared, uniquePerShape := shapeFieldBreakdown(groups, groupOrder) + totalInstances := 0 + for _, sig := range groupOrder { + totalInstances += len(groups[sig].members) + } + + fmt.Fprintf(a.Prompter.output, "\nAt %s — %d shapes (%d instances):\n", + shortPath(path), len(groupOrder), totalInstances) + + // Show shared fields + if len(shared) > 0 { + preview := shared + if len(preview) > maxFields { + preview = preview[:maxFields] + } + fmt.Fprintf(a.Prompter.output, " shared fields (%d): %s", len(shared), strings.Join(preview, ", ")) + if len(shared) > maxFields { + fmt.Fprintf(a.Prompter.output, ", ...") + } + fmt.Fprintln(a.Prompter.output) + } else { + fmt.Fprintf(a.Prompter.output, " no shared fields\n") + } + + // Show unique fields per shape (truncated) + shownShapes := groupOrder + if len(shownShapes) > 5 { + shownShapes = shownShapes[:5] + } + for i, sig := range shownShapes { + g := groups[sig] + unique := uniquePerShape[sig] + if len(unique) == 0 { + fmt.Fprintf(a.Prompter.output, " shape %d (%d instances): no unique fields\n", i+1, len(g.members)) + continue + } + preview := unique + if len(preview) > maxFields { + preview = preview[:maxFields] + } + fmt.Fprintf(a.Prompter.output, " shape %d (%d instances): +%d unique: %s", + i+1, len(g.members), len(unique), strings.Join(preview, ", ")) + if len(unique) > maxFields { + fmt.Fprintf(a.Prompter.output, ", ...") + } + fmt.Fprintln(a.Prompter.output) + } + if len(groupOrder) > 5 { + fmt.Fprintf(a.Prompter.output, " ... and %d more shapes\n", len(groupOrder)-5) + } + + // Decide default: if unique fields heavily outnumber meaningful shared + // fields, default to "different". Ubiquitous fields (id, name, *_at, etc.) + // don't count as meaningful shared fields. + meaningfulShared := 0 + for _, f := range shared { + if !isUbiquitousField(f) { + meaningfulShared++ + } + } + totalUnique := 0 + for _, sig := range groupOrder { + totalUnique += len(uniquePerShape[sig]) + } + defaultChoice := "s" + if totalUnique >= 2*meaningfulShared { + defaultChoice = "d" + } + + // Combined prompt: same/different/show full list + var choice string + for { + choice = a.Prompter.ask( + "[s]ame type? [d]ifferent? show [f]ull list?", + defaultChoice, []string{"s", "d", "f"}, + ) + if choice != "f" { + break + } + for i, sig := range groupOrder { + g := groups[sig] + fmt.Fprintf(a.Prompter.output, " Shape %d (%d instances): %s\n", + i+1, len(g.members), strings.Join(g.fields, ", ")) + } + } + + if choice == "s" { + // Same type — analyze with all instances for field unification + var all []map[string]any + for _, sig := range groupOrder { + all = append(all, groups[sig].members...) + } + return a.analyzeAsStructMulti(path, all) + } + + // Different types — collect all names first, then analyze + names := make([]string, len(groupOrder)) + for i, sig := range groupOrder { + g := groups[sig] + inferred := inferTypeName(path) + if inferred == "" { + a.typeCounter++ + inferred = fmt.Sprintf("Struct%d", a.typeCounter) + } + // Pre-resolve collision so the suggested name is valid + merged := mergeObjects(g.members) + newFields := fieldSet(merged) + shapeSig := shapeSignature(merged) + inferred = a.preResolveCollision(path, inferred, newFields, shapeSig) + + fmt.Fprintf(a.Prompter.output, " Shape %d (%d instances): %s\n", + i+1, len(g.members), strings.Join(g.fields, ", ")) + name := a.Prompter.askTypeName( + fmt.Sprintf(" Name for shape %d?", i+1), inferred) + names[i] = name + + // Register early so subsequent shapes see this name as taken + a.registerType(shapeSig, name, newFields) + } + + // Now analyze each group with its pre-assigned name + var paths []string + for i, sig := range groupOrder { + g := groups[sig] + a.pendingTypeName = names[i] + paths = append(paths, a.analyzeAsStructMulti(path, g.members)...) + } + return paths +} + +// shapeFieldBreakdown computes the shared fields (present in ALL shapes) and +// unique fields (present in only that shape) for display. +func shapeFieldBreakdown(groups map[string]*shapeGroup, groupOrder []string) (shared []string, uniquePerShape map[string][]string) { + if len(groupOrder) == 0 { + return nil, nil + } + + // Count how many shapes each field appears in + fieldCount := make(map[string]int) + for _, sig := range groupOrder { + for _, f := range groups[sig].fields { + fieldCount[f]++ + } + } + + total := len(groupOrder) + for _, f := range sortedFieldCount(fieldCount) { + if fieldCount[f] == total { + shared = append(shared, f) + } + } + + sharedSet := make(map[string]bool, len(shared)) + for _, f := range shared { + sharedSet[f] = true + } + + uniquePerShape = make(map[string][]string) + for _, sig := range groupOrder { + var unique []string + for _, f := range groups[sig].fields { + if !sharedSet[f] { + unique = append(unique, f) + } + } + uniquePerShape[sig] = unique + } + return shared, uniquePerShape +} + +func sortedFieldCount(m map[string]int) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + return keys +} + +// isTupleCandidate returns true if the array might be a tuple: +// short (2-5 elements) with mixed types. +func (a *Analyzer) isTupleCandidate(arr []any) bool { + if len(arr) < 2 || len(arr) > 5 { + return false + } + types := make(map[string]bool) + for _, v := range arr { + types[kindOf(v)] = true + } + return len(types) > 1 +} + +func primitiveType(v any) string { + switch tv := v.(type) { + case bool: + return "bool" + case json.Number: + if _, err := tv.Int64(); err == nil { + return "int" + } + return "float" + case string: + return "string" + default: + return "unknown" + } +} + +func kindOf(v any) string { + switch v.(type) { + case nil: + return "null" + case bool: + return "bool" + case json.Number: + return "number" + case string: + return "string" + case []any: + return "array" + case map[string]any: + return "object" + default: + return "unknown" + } +} + +func shapeSignature(obj map[string]any) string { + keys := sortedKeys(obj) + return strings.Join(keys, ",") +} + +func sortedKeys(obj map[string]any) []string { + keys := make([]string, 0, len(obj)) + for k := range obj { + keys = append(keys, k) + } + sort.Strings(keys) + return keys +} + +// mergeObjects merges multiple objects into one representative that has all +// fields from all instances. For each field, picks the first non-null value. +func mergeObjects(objects []map[string]any) map[string]any { + merged := make(map[string]any) + for _, obj := range objects { + for k, v := range obj { + if existing, ok := merged[k]; !ok || existing == nil { + merged[k] = v + } + } + } + return merged +} diff --git a/tools/jsontypes/analyzer_test.go b/tools/jsontypes/analyzer_test.go new file mode 100644 index 0000000..e22a4df --- /dev/null +++ b/tools/jsontypes/analyzer_test.go @@ -0,0 +1,736 @@ +package jsontypes + +import ( + "bufio" + "encoding/json" + "io" + "os" + "sort" + "strings" + "testing" +) + +// testAnalyzer creates an analyzer in anonymous mode (no prompts). +func testAnalyzer(t *testing.T) *Analyzer { + t.Helper() + a := &Analyzer{ + Prompter: &Prompter{ + reader: nil, + output: os.Stderr, + }, + anonymous: true, + knownTypes: make(map[string]*structType), + typesByName: make(map[string]*structType), + } + return a +} + +func sortPaths(paths []string) []string { + sorted := make([]string, len(paths)) + copy(sorted, paths) + sort.Strings(sorted) + return sorted +} + +func TestAnalyzePrimitive(t *testing.T) { + a := testAnalyzer(t) + tests := []struct { + name string + json string + want string + }{ + {"null", "", ".{null}"}, + {"bool", "", ".{bool}"}, + {"int", "", ".{int}"}, + {"float", "", ".{float}"}, + {"string", "", ".{string}"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var val any + switch tt.name { + case "null": + val = nil + case "bool": + val = true + case "int": + val = jsonNum("42") + case "float": + val = jsonNum("3.14") + case "string": + val = "hello" + } + paths := a.Analyze(".", val) + if len(paths) != 1 || paths[0] != tt.want { + t.Errorf("got %v, want [%s]", paths, tt.want) + } + }) + } +} + +func TestAnalyzeSimpleStruct(t *testing.T) { + a := testAnalyzer(t) + obj := map[string]any{ + "name": "Alice", + "age": jsonNum("30"), + } + paths := sortPaths(a.Analyze(".", obj)) + want := sortPaths([]string{ + ".{Root}.age{int}", + ".{Root}.name{string}", + }) + assertPaths(t, paths, want) +} + +func TestAnalyzeMapDetection(t *testing.T) { + a := testAnalyzer(t) + // Keys with digits + same length → detected as map + obj := map[string]any{ + "abc123": map[string]any{"name": "a"}, + "def456": map[string]any{"name": "b"}, + "ghi789": map[string]any{"name": "c"}, + } + paths := sortPaths(a.Analyze(".", obj)) + want := sortPaths([]string{ + ".[string]{RootItem}.name{string}", + }) + assertPaths(t, paths, want) +} + +func TestAnalyzeArrayOfObjects(t *testing.T) { + a := testAnalyzer(t) + arr := []any{ + map[string]any{"x": jsonNum("1")}, + map[string]any{"x": jsonNum("2")}, + } + paths := sortPaths(a.Analyze(".", arr)) + want := sortPaths([]string{ + ".[]{RootItem}.x{int}", + }) + assertPaths(t, paths, want) +} + +func TestAnalyzeOptionalFields(t *testing.T) { + a := testAnalyzer(t) + // Two objects with different fields → same type with optional fields + values := []any{ + map[string]any{"name": "Alice", "age": jsonNum("30")}, + map[string]any{"name": "Bob"}, + } + paths := sortPaths(a.analyzeCollectionValues(".[]", values)) + want := sortPaths([]string{ + ".[]{RootItem}.age{null}", + ".[]{RootItem}.age{int}", + ".[]{RootItem}.name{string}", + }) + assertPaths(t, paths, want) +} + +func TestAnalyzeNullableField(t *testing.T) { + a := testAnalyzer(t) + values := []any{ + map[string]any{"data": nil}, + map[string]any{"data": "hello"}, + } + paths := sortPaths(a.analyzeCollectionValues(".[]", values)) + want := sortPaths([]string{ + ".[]{RootItem}.data{null}", + ".[]{RootItem}.data{string}", + }) + assertPaths(t, paths, want) +} + +func TestAnalyzeEmptyArray(t *testing.T) { + a := testAnalyzer(t) + paths := a.Analyze(".", []any{}) + want := []string{".[]{any}"} + assertPaths(t, paths, want) +} + +func TestAnalyzeEmptyObject(t *testing.T) { + a := testAnalyzer(t) + paths := a.Analyze(".", map[string]any{}) + want := []string{".{any}"} + assertPaths(t, paths, want) +} + +func TestHeuristicsMapDetection(t *testing.T) { + tests := []struct { + name string + keys []string + wantMap bool + wantConf bool + }{ + {"numeric keys", []string{"1", "2", "3"}, true, true}, + {"alphanum IDs", []string{"abc123", "def456", "ghi789"}, true, true}, + {"field names", []string{"name", "age", "email"}, false, true}, + {"two keys", []string{"ab", "cd"}, false, false}, + {"hex IDs", []string{"a1b2c3d4", "e5f6a7b8", "c9d0e1f2"}, true, true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + obj := make(map[string]any) + for _, k := range tt.keys { + obj[k] = "value" + } + isMap, confident := looksLikeMap(obj) + if isMap != tt.wantMap || confident != tt.wantConf { + t.Errorf("looksLikeMap(%v) = (%v, %v), want (%v, %v)", + tt.keys, isMap, confident, tt.wantMap, tt.wantConf) + } + }) + } +} + +func TestInferTypeName(t *testing.T) { + tests := []struct { + path string + want string + }{ + {".[person_id]", "Person"}, + {".{Root}.friends[]", "Friend"}, + {".{Root}.address", "Address"}, + {".", "Root"}, + {".[]", "RootItem"}, + {".[string]", "RootItem"}, + {".[int]", "RootItem"}, + {".{Root}.json", "RootJSON"}, + {".{Root}.data", "RootData"}, + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + got := inferTypeName(tt.path) + if got != tt.want { + t.Errorf("inferTypeName(%q) = %q, want %q", tt.path, got, tt.want) + } + }) + } +} + +func TestSingularize(t *testing.T) { + tests := []struct { + in, want string + }{ + {"Friends", "Friend"}, + {"Categories", "Category"}, + {"Boxes", "Box"}, + {"Address", "Address"}, + {"Bus", "Bus"}, + } + for _, tt := range tests { + t.Run(tt.in, func(t *testing.T) { + got := singularize(tt.in) + if got != tt.want { + t.Errorf("singularize(%q) = %q, want %q", tt.in, got, tt.want) + } + }) + } +} + +func TestTypeNameSubsetExtends(t *testing.T) { + // When two objects at the SAME path have overlapping fields (one a subset), + // they should get the same type name (via name collision + subset merge). + // Objects at DIFFERENT paths get separate types even if fields overlap, + // because they represent different domain concepts. + a := testAnalyzer(t) + arr := []any{ + map[string]any{"name": "Alice", "age": jsonNum("30")}, + map[string]any{"name": "Bob", "age": jsonNum("25"), "email": "bob@example.com"}, + } + obj := map[string]any{"people": arr} + paths := sortPaths(a.Analyze(".", obj)) + + // Both array elements should be unified under the same type + typeName := "" + for _, p := range paths { + if strings.Contains(p, ".people[]") { + if idx := strings.Index(p, "{"); idx >= 0 { + end := strings.Index(p[idx:], "}") + if typeName == "" { + typeName = p[idx+1 : idx+end] + } else if p[idx+1:idx+end] != typeName && p[idx+1:idx+end] != "Root" { + t.Errorf("expected all people paths to use type %q, got %s", typeName, p) + } + } + } + } + if typeName == "" { + t.Fatal("expected a type name for people array elements") + } +} + +func TestParentTypeName(t *testing.T) { + tests := []struct { + path string + want string + }{ + {".[id]{Document}.rooms[]{Room}.details", "Room"}, + {".[id]{Document}.name", "Document"}, + {".items[]", ""}, + {".{Root}.data{null}", "Root"}, + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + got := parentTypeName(tt.path) + if got != tt.want { + t.Errorf("parentTypeName(%q) = %q, want %q", tt.path, got, tt.want) + } + }) + } +} + +func TestShortPath(t *testing.T) { + tests := []struct { + path string + want string + }{ + { + ".{RoomsResult}.rooms[]{Room}.room[string][]{RoomRoom}.json{RoomRoomJSON}.feature_types[]", + ".rooms[].room[string][].json{RoomRoomJSON}.feature_types[]", + }, + { + ".{Root}.name{string}", + ".name{string}", + }, + { + ".", + ".", + }, + { + ".{Root}", + ".{Root}", + }, + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + got := shortPath(tt.path) + if got != tt.want { + t.Errorf("shortPath(%q)\n got: %q\n want: %q", tt.path, got, tt.want) + } + }) + } +} + +func TestSuggestAlternativeNameUsesParent(t *testing.T) { + a := testAnalyzer(t) + // Register a type named "Room" + a.registerType("a,b", "Room", map[string]string{"a": "string", "b": "string"}) + + // Suggest alternative at a path under {Document} + got := a.suggestAlternativeName(".[id]{Document}.rooms[]", "Room") + if got != "DocumentRoom" { + t.Errorf("got %q, want %q", got, "DocumentRoom") + } + + // Register DocumentRoom too, then it should fall back to numbered + a.registerType("c,d", "DocumentRoom", map[string]string{"c": "string", "d": "string"}) + got = a.suggestAlternativeName(".[id]{Document}.rooms[]", "Room") + if !strings.HasPrefix(got, "Room") || got == "Room" || got == "DocumentRoom" { + t.Errorf("expected numbered fallback, got %q", got) + } +} + +func TestAutoResolveCollision(t *testing.T) { + a := testAnalyzer(t) + // Register a type named "Room" with fields {a, b} + a.registerType("a,b", "Room", map[string]string{"a": "string", "b": "string"}) + + // Analyze an object at a path under {Document} that would infer "Room" + // but has completely different fields — should auto-resolve to "DocumentRoom" + obj := map[string]any{"x": "1", "y": "2"} + paths := a.Analyze(".{Document}.room", obj) + + hasDocumentRoom := false + for _, p := range paths { + if strings.Contains(p, "{DocumentRoom}") { + hasDocumentRoom = true + break + } + } + if !hasDocumentRoom { + t.Errorf("expected DocumentRoom type, got:\n %s", strings.Join(paths, "\n ")) + } +} + +func TestPooledMapDetection(t *testing.T) { + // Multiple objects each with 1-2 numeric keys should be detected as maps + // even though individually they have too few keys for heuristics. + a := testAnalyzer(t) + values := []any{ + map[string]any{"230108": "a"}, + map[string]any{"138666": "b"}, + map[string]any{"162359": "c"}, + map[string]any{}, + } + paths := sortPaths(a.analyzeCollectionValues(".data", values)) + // Should detect as maps with numeric keys → [int] (map index, not array) + hasMapPath := false + for _, p := range paths { + if strings.Contains(p, "[int]") || strings.Contains(p, "[string]") { + hasMapPath = true + break + } + } + if !hasMapPath { + t.Errorf("expected map detection (paths with [int] or [string]), got:\n %s", + strings.Join(paths, "\n ")) + } +} + +func TestAnalyzeFullSample(t *testing.T) { + a := testAnalyzer(t) + + data := map[string]any{ + "abc123": map[string]any{ + "name": "Alice", + "age": jsonNum("30"), + "active": true, + "friends": []any{ + map[string]any{"name": "Bob", "identification": nil}, + map[string]any{"name": "Charlie", "identification": map[string]any{ + "type": "StateID", "number": "12345", "name": "Charlie C", + }}, + }, + }, + "def456": map[string]any{ + "name": "Dave", "age": jsonNum("25"), "active": false, "friends": []any{}, + }, + "ghi789": map[string]any{ + "name": "Eve", "age": jsonNum("28"), "active": true, "score": jsonNum("95.5"), + "friends": []any{ + map[string]any{"name": "Frank", "identification": map[string]any{ + "type": "DriverLicense", "id": "DL-999", "name": "Frank F", + "restrictions": []any{"corrective lenses"}, + }}, + }, + }, + } + + paths := sortPaths(a.Analyze(".", data)) + want := sortPaths([]string{ + ".[string]{RootItem}.active{bool}", + ".[string]{RootItem}.age{int}", + ".[string]{RootItem}.friends[]{Friend}.identification{null}", + ".[string]{RootItem}.friends[]{Friend}.identification{Identification}.id{null}", + ".[string]{RootItem}.friends[]{Friend}.identification{Identification}.id{string}", + ".[string]{RootItem}.friends[]{Friend}.identification{Identification}.name{string}", + ".[string]{RootItem}.friends[]{Friend}.identification{Identification}.number{null}", + ".[string]{RootItem}.friends[]{Friend}.identification{Identification}.number{string}", + ".[string]{RootItem}.friends[]{Friend}.identification{Identification}.restrictions{null}", + ".[string]{RootItem}.friends[]{Friend}.identification{Identification}.restrictions[]{string}", + ".[string]{RootItem}.friends[]{Friend}.identification{Identification}.type{string}", + ".[string]{RootItem}.friends[]{Friend}.name{string}", + ".[string]{RootItem}.name{string}", + ".[string]{RootItem}.score{null}", + ".[string]{RootItem}.score{float}", + }) + assertPaths(t, paths, want) +} + +// TestDifferentTypesPromptsForNames verifies that when the user chooses +// "different" for multiple shapes at the same path: +// 1. They are prompted to name each shape group +// 2. All names are collected BEFORE recursing into children +// 3. The named types appear in the final output +func TestDifferentTypesPromptsForNames(t *testing.T) { + // Simulate: a Room has items[] containing two distinct shapes, each with + // a nested "meta" object. Names should be asked for both shapes before + // the meta objects are analyzed. + arr := []any{ + // Shape 1: has "filename" and "is_required" + map[string]any{"slug": "a", "filename": "x.pdf", "is_required": true, + "meta": map[string]any{"size": jsonNum("100")}}, + map[string]any{"slug": "b", "filename": "y.pdf", "is_required": false, + "meta": map[string]any{"size": jsonNum("200")}}, + // Shape 2: has "feature" and "archived" + map[string]any{"slug": "c", "feature": "upload", "archived": false, + "meta": map[string]any{"version": jsonNum("1")}}, + map[string]any{"slug": "d", "feature": "export", "archived": true, + "meta": map[string]any{"version": jsonNum("2")}}, + } + + var output strings.Builder + a := &Analyzer{ + Prompter: &Prompter{ + reader: bufio.NewReader(strings.NewReader("")), + output: &output, + priorAnswers: []string{"d", "FileField", "FeatureField"}, + }, + knownTypes: make(map[string]*structType), + typesByName: make(map[string]*structType), + } + paths := sortPaths(a.Analyze(".{Room}.items[]", arr)) + + // Verify both named types appear in the paths + hasFileField := false + hasFeatureField := false + for _, p := range paths { + if strings.Contains(p, "{FileField}") { + hasFileField = true + } + if strings.Contains(p, "{FeatureField}") { + hasFeatureField = true + } + } + if !hasFileField { + t.Errorf("expected {FileField} type in paths:\n %s", strings.Join(paths, "\n ")) + } + if !hasFeatureField { + t.Errorf("expected {FeatureField} type in paths:\n %s", strings.Join(paths, "\n ")) + } + + // Verify that both "Name for shape" prompts appear before any deeper prompts + out := output.String() + name1Idx := strings.Index(out, "Name for shape 1?") + name2Idx := strings.Index(out, "Name for shape 2?") + if name1Idx < 0 || name2Idx < 0 { + t.Fatalf("expected both shape name prompts in output:\n%s", out) + } + if name1Idx > name2Idx { + t.Errorf("shape 1 name prompt should appear before shape 2") + } + + // Verify the formatted output includes these types + formatted := FormatPaths(paths) + foundFileField := false + foundFeatureField := false + for _, line := range formatted { + if strings.Contains(line, "{FileField}") { + foundFileField = true + } + if strings.Contains(line, "{FeatureField}") { + foundFeatureField = true + } + } + if !foundFileField { + t.Errorf("formatted output missing {FileField}:\n %s", strings.Join(formatted, "\n ")) + } + if !foundFeatureField { + t.Errorf("formatted output missing {FeatureField}:\n %s", strings.Join(formatted, "\n ")) + } +} + +// TestCombinedPromptShowsTypeName verifies the default-mode prompt shows +// [Root/m] (inferred name + map option), not [s/m] or [S/m]. +func TestCombinedPromptShowsTypeName(t *testing.T) { + var output strings.Builder + a := &Analyzer{ + Prompter: &Prompter{ + reader: bufio.NewReader(strings.NewReader("")), + output: &output, + priorAnswers: []string{"Root"}, // accept default + }, + knownTypes: make(map[string]*structType), + typesByName: make(map[string]*structType), + } + + obj := map[string]any{ + "errors": []any{}, + "rooms": []any{map[string]any{"name": "foo"}}, + } + a.Analyze(".", obj) + + out := output.String() + if !strings.Contains(out, "[Root/m]") { + t.Errorf("expected prompt to contain [Root/m], got output:\n%s", out) + } +} + +// TestCombinedPromptIgnoresOldPriorAnswer verifies that prior answers like +// "s" from old answer files don't corrupt the prompt default. +func TestCombinedPromptIgnoresOldPriorAnswer(t *testing.T) { + var output strings.Builder + a := &Analyzer{ + Prompter: &Prompter{ + reader: bufio.NewReader(strings.NewReader("")), + output: &output, + priorAnswers: []string{"s"}, // old-style answer + }, + knownTypes: make(map[string]*structType), + typesByName: make(map[string]*structType), + } + + obj := map[string]any{ + "errors": []any{}, + "rooms": []any{map[string]any{"name": "foo"}}, + } + a.Analyze(".", obj) + + out := output.String() + if strings.Contains(out, "[s/m]") { + t.Errorf("old prior answer 's' should not appear in prompt, got output:\n%s", out) + } + if !strings.Contains(out, "[Root/m]") { + t.Errorf("expected prompt to contain [Root/m], got output:\n%s", out) + } +} + +// TestOldAnswerFileDoesNotDesync verifies that an old-format answer "s" for +// map/struct is consumed (not skipped), so subsequent answers stay in sync. +func TestOldAnswerFileDoesNotDesync(t *testing.T) { + // Prior answers: "s" (old struct answer for root), then "s" (same type + // for a shape unification prompt). The "s" at position 0 should be consumed + // by askMapOrName (treated as "accept default"), and "s" at position 1 + // should be consumed by the ask() for same/different. + a := testInteractiveAnalyzer(t, []string{ + "s", // old-format: accept struct default → Root + "s", // same type for shapes + }) + + // An array with two shapes that will trigger unification prompt + arr := []any{ + map[string]any{"name": "Alice", "x": jsonNum("1")}, + map[string]any{"name": "Bob", "y": jsonNum("2")}, + } + obj := map[string]any{"items": arr} + paths := sortPaths(a.Analyze(".", obj)) + + // Should have Root type (from "s" → accept default) and Item type + // unified as same type (from "s" → same) + hasRoot := false + for _, p := range paths { + if strings.Contains(p, "{Root}") { + hasRoot = true + break + } + } + if !hasRoot { + t.Errorf("expected {Root} type (old 's' should accept default), got:\n %s", + strings.Join(paths, "\n ")) + } +} + +// TestDefaultDifferentWhenUniqueFieldsDominate verifies that when shapes share +// only ubiquitous fields (slug, name, etc.) and have many unique fields, the +// prompt defaults to "d" (different) instead of "s" (same). +func TestDefaultDifferentWhenUniqueFieldsDominate(t *testing.T) { + // Two shapes sharing only "slug" (ubiquitous) with 2+ unique fields each. + // With no prior answer for same/different, the default should be "d". + // Then we need type names for each shape. + // Shape ordering is insertion order: shape 1 = filename,is_required,slug; shape 2 = archived,feature,slug + a := testInteractiveAnalyzer(t, []string{ + "Root", // root object has 1 key → not confident, prompts for struct/map + "d", // accept default (should be "d" because unique >> meaningful shared) + "FileField", // name for shape 1 (filename, is_required, slug) + "FeatureField", // name for shape 2 (archived, feature, slug) + }) + + arr := []any{ + map[string]any{"slug": "a", "filename": "x.pdf", "is_required": true}, + map[string]any{"slug": "b", "feature": "upload", "archived": false}, + } + obj := map[string]any{"items": arr} + paths := sortPaths(a.Analyze(".", obj)) + + // Should have both FileField and FeatureField as separate types + hasFile := false + hasFeature := false + for _, p := range paths { + if strings.Contains(p, "{FileField}") { + hasFile = true + } + if strings.Contains(p, "{FeatureField}") { + hasFeature = true + } + } + if !hasFile || !hasFeature { + t.Errorf("expected both FileField and FeatureField types, got:\n %s", + strings.Join(paths, "\n ")) + } +} + +// TestDefaultSameWhenMeaningfulFieldsShared verifies that when shapes share +// many non-ubiquitous fields, the prompt defaults to "s" (same). +func TestDefaultSameWhenMeaningfulFieldsShared(t *testing.T) { + // Two shapes sharing "email", "phone", "address" (non-ubiquitous) with + // only 1 unique field each. unique (2) < 2 * meaningful shared (3), so + // default should be "s". + a := testInteractiveAnalyzer(t, []string{ + "Root", // root object has 1 key → not confident, prompts for struct/map + "s", // accept default (should be "s") + }) + + arr := []any{ + map[string]any{"email": "a@b.com", "phone": "555", "address": "123 Main", "vip": true}, + map[string]any{"email": "c@d.com", "phone": "666", "address": "456 Oak", "score": jsonNum("42")}, + } + obj := map[string]any{"people": arr} + paths := sortPaths(a.Analyze(".", obj)) + + // Should be unified as one type (People → singular People) with optional fields + typeCount := 0 + for _, p := range paths { + if strings.Contains(p, "{People}") { + typeCount++ + } + } + if typeCount == 0 { + t.Errorf("expected People type (same default), got:\n %s", + strings.Join(paths, "\n ")) + } +} + +// TestIsUbiquitousField checks the ubiquitous field classifier. +func TestIsUbiquitousField(t *testing.T) { + ubiquitous := []string{ + "id", "ID", "Id", "_id", + "name", "Name", + "type", "Type", "_type", + "slug", "Slug", + "label", "Label", + "title", "Title", + "created_at", "updated_at", "deleted_on", + "startedAt", "endedOn", + } + for _, f := range ubiquitous { + if !isUbiquitousField(f) { + t.Errorf("expected %q to be ubiquitous", f) + } + } + + notUbiquitous := []string{ + "email", "phone", "address", "filename", "feature", + "is_required", "archived", "score", "vip", + "cat", "latitude", "url", + } + for _, f := range notUbiquitous { + if isUbiquitousField(f) { + t.Errorf("expected %q to NOT be ubiquitous", f) + } + } +} + +// testInteractiveAnalyzer creates an analyzer with scripted answers (not anonymous). +func testInteractiveAnalyzer(t *testing.T, answers []string) *Analyzer { + t.Helper() + a := &Analyzer{ + Prompter: &Prompter{ + reader: bufio.NewReader(strings.NewReader("")), + output: io.Discard, + priorAnswers: answers, + }, + knownTypes: make(map[string]*structType), + typesByName: make(map[string]*structType), + } + return a +} + +// helpers + +func jsonNum(s string) json.Number { + return json.Number(s) +} + +func assertPaths(t *testing.T, got, want []string) { + t.Helper() + if len(got) != len(want) { + t.Errorf("got %d paths, want %d:\n got: %s\n want: %s", + len(got), len(want), strings.Join(got, "\n "), strings.Join(want, "\n ")) + return + } + for i := range got { + if got[i] != want[i] { + t.Errorf("path[%d]: got %q, want %q", i, got[i], want[i]) + } + } +} diff --git a/tools/jsontypes/cmd/jsonpaths/README.md b/tools/jsontypes/cmd/jsonpaths/README.md new file mode 100644 index 0000000..b0ab3a2 --- /dev/null +++ b/tools/jsontypes/cmd/jsonpaths/README.md @@ -0,0 +1,199 @@ +# jsonpaths + + + +Infer types from JSON. Generate code. + +`jsonpaths` reads a JSON sample (file, URL, or stdin), walks the structure, +and outputs typed definitions in your choice of 9 formats. No schema file +needed — just point it at real data. + +```sh +go install github.com/therootcompany/golib/tools/jsontypes/cmd/jsonpaths@latest +``` + +## Usage + +```sh +# From a file +jsonpaths data.json + +# From a URL (cached locally by default) +jsonpaths https://api.example.com/users + +# From stdin +curl -s https://api.example.com/users | jsonpaths --anonymous +``` + +### Output formats + +Use `--format` to choose the output: + +| Format | Flag | Description | +| -------------- | ------------------------ | ------------------------------------------ | +| json-paths | `--format json-paths` | Flat type paths (default) | +| Go | `--format go` | Struct definitions with json tags | +| TypeScript | `--format typescript` | Interfaces with optional/nullable fields | +| JSDoc | `--format jsdoc` | @typedef annotations | +| Zod | `--format zod` | Validation schemas with type inference | +| Python | `--format python` | TypedDict classes | +| SQL | `--format sql` | CREATE TABLE with foreign keys | +| JSON Schema | `--format json-schema` | Draft 2020-12 | +| JSON Typedef | `--format json-typedef` | RFC 8927 | + +Short aliases: `ts` for typescript, `py` for python. + +### Example + +Given this JSON: + +```json +{ + "users": [ + {"id": 1, "name": "Alice", "email": "a@b.com", "active": true}, + {"id": 2, "name": "Bob", "active": false} + ] +} +``` + +Default output (json-paths): + +``` +{Root} +.users[]{User} +.users[].active{bool} +.users[].email{string?} +.users[].id{int} +.users[].name{string} +``` + +The `?` suffix marks fields that appear in some instances but not all. + +With `--format go`: + +```go +type Root struct { + Users []User `json:"users"` +} + +type User struct { + Id int64 `json:"id"` + Name string `json:"name"` + Active bool `json:"active"` + Email *string `json:"email,omitempty"` +} +``` + +With `--format typescript`: + +```typescript +export interface Root { + users: User[]; +} + +export interface User { + id: number; + name: string; + active: boolean; + email?: string | null; +} +``` + +### Authentication + +For APIs that require auth, use curl-like flags: + +```sh +# Bearer token +jsonpaths --bearer $TOKEN https://api.example.com/me + +# Basic auth +jsonpaths --user admin:secret https://internal.example.com/data + +# Arbitrary headers +jsonpaths -H 'X-API-Key: abc123' https://api.example.com/data + +# Cookie (accepts both Cookie and Set-Cookie format) +jsonpaths --cookie 'session=abc123' https://app.example.com/api/me + +# Netscape cookie jar +jsonpaths --cookie-jar cookies.txt https://app.example.com/api/me +``` + +### Interactive vs anonymous mode + +By default, `jsonpaths` prompts when it encounters ambiguous structure +(e.g., "is this a map or a struct?"). Use `--anonymous` to skip all +prompts and rely on heuristics: + +```sh +# Non-interactive (for scripts, CI, AI agents) +cat data.json | jsonpaths --anonymous --format go + +# Interactive (from a file or URL) +jsonpaths data.json +``` + +When run interactively against a file or URL, answers are saved to +`.answers` and replayed on subsequent runs. This makes +iterative refinement fast — change one answer and re-run. + +### Caching + +URL responses are cached locally as `.json`. Sensitive +query parameters (tokens, keys, passwords) are stripped from the filename. +Use `--no-cache` to re-fetch. + +## json-paths format + +The intermediate representation is a flat list of typed paths: + +``` +{Root} # root type +.users[]{User} # array of User +.users[].id{int} # field with type +.users[].email{string?} # optional (nullable) field +.metadata[string]{Meta} # map with string keys +.tags[]{string} # array of primitives +.data[]{any} # empty array (unknown element type) +``` + +**Type annotations:** `{string}`, `{int}`, `{float}`, `{bool}`, `{null}`, +`{any}`, or a named struct like `{User}`. The `?` suffix means nullable/optional. + +**Index annotations:** `[]` for arrays, `[string]` for maps, `[0]`, `[1]` for tuples. + +This format is designed to be both human-scannable and machine-parseable, +making it useful as context for AI agents that need to understand an API's +shape before generating code. + +## Library usage + +The core logic is available as a Go package: + +```go +import "github.com/therootcompany/golib/tools/jsontypes" + +a, _ := jsontypes.NewAnalyzer(false, true, false) // anonymous mode +defer a.Close() + +var data any +// ... json.Decode with UseNumber() ... + +paths := jsontypes.FormatPaths(a.Analyze(".", data)) +fmt.Print(jsontypes.GenerateTypeScript(paths)) +``` + +See the [package documentation](https://pkg.go.dev/github.com/therootcompany/golib/tools/jsontypes) +for the full API. + +## License + +Authored by [AJ ONeal](mailto:aj@therootcompany.com), generated by Claude +Opus 4.6, with light human guidance. + +[CC0-1.0](https://creativecommons.org/publicdomain/zero/1.0/) — Public Domain. diff --git a/tools/jsontypes/cmd/jsonpaths/main.go b/tools/jsontypes/cmd/jsonpaths/main.go new file mode 100644 index 0000000..30c23b4 --- /dev/null +++ b/tools/jsontypes/cmd/jsonpaths/main.go @@ -0,0 +1,447 @@ +package main + +import ( + "bufio" + "crypto/tls" + "encoding/base64" + "encoding/json" + "flag" + "fmt" + "io" + "net" + "net/http" + "os" + "os/signal" + "strings" + "time" + + "github.com/therootcompany/golib/tools/jsontypes" +) + +const ( + name = "jsonpaths" + description = "Infer types from JSON. Generate code." +) + +var ( + version = "0.0.0-dev" + commit = "0000000" + date = "0001-01-01" +) + +func printVersion(w io.Writer) { + fmt.Fprintf(w, "%s v%s %s (%s)\n", name, version, commit[:7], date) + fmt.Fprintf(w, "%s\n", description) +} + +// headerList implements flag.Value for repeatable -H flags. +type headerList []string + +func (h *headerList) String() string { return strings.Join(*h, ", ") } +func (h *headerList) Set(val string) error { + if !strings.Contains(val, ":") { + return fmt.Errorf("header must be in 'Name: Value' format") + } + *h = append(*h, val) + return nil +} + +func main() { + // Exit cleanly on Ctrl+C + sig := make(chan os.Signal, 1) + signal.Notify(sig, os.Interrupt) + go func() { + <-sig + fmt.Fprintln(os.Stderr) + os.Exit(130) + }() + + var headers headerList + flag.Var(&headers, "H", "add HTTP header (repeatable, e.g. -H 'X-API-Key: abc')") + anonymous := flag.Bool("anonymous", false, "skip all prompts; use heuristics and auto-inferred names") + askTypes := flag.Bool("ask-types", false, "prompt for each type name instead of auto-inferring") + bearer := flag.String("bearer", "", "set Authorization: Bearer token") + cookie := flag.String("cookie", "", "send cookie (name=value or Set-Cookie format)") + cookieJar := flag.String("cookie-jar", "", "read cookies from Netscape cookie jar file") + format := flag.String("format", "json-paths", "output format: json-paths, go, json-schema, json-typedef, typescript, jsdoc, zod, python, sql") + timeout := flag.Duration("timeout", 20*time.Second, "HTTP request timeout for URL inputs") + noCache := flag.Bool("no-cache", false, "skip local cache for URL inputs") + user := flag.String("user", "", "HTTP basic auth (user:password, like curl)") + + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "USAGE\n %s [flags] [file | url]\n\n", name) + fmt.Fprintf(os.Stderr, "FLAGS\n") + flag.PrintDefaults() + } + + // Handle version/help before flag parse + if len(os.Args) > 1 { + arg := os.Args[1] + if arg == "-V" || arg == "--version" || arg == "version" { + printVersion(os.Stdout) + os.Exit(0) + } + if arg == "help" || arg == "-help" || arg == "--help" { + printVersion(os.Stdout) + fmt.Fprintln(os.Stdout) + flag.CommandLine.SetOutput(os.Stdout) + flag.Usage() + os.Exit(0) + } + } + + flag.Parse() + + var input io.Reader + var baseName string // base filename for .paths and .answers files + inputIsStdin := true + // Build extra HTTP headers from flags + var extraHeaders http.Header + if *bearer != "" || *user != "" || *cookie != "" || *cookieJar != "" || len(headers) > 0 { + extraHeaders = make(http.Header) + } + for _, h := range headers { + name, value, _ := strings.Cut(h, ":") + extraHeaders.Add(strings.TrimSpace(name), strings.TrimSpace(value)) + } + if *bearer != "" { + extraHeaders.Set("Authorization", "Bearer "+*bearer) + } + if *user != "" { + extraHeaders.Set("Authorization", "Basic "+base64.StdEncoding.EncodeToString([]byte(*user))) + } + if *cookie != "" { + extraHeaders.Add("Cookie", parseCookieFlag(*cookie)) + } + if *cookieJar != "" { + cookies, err := readCookieJar(*cookieJar) + if err != nil { + fmt.Fprintf(os.Stderr, "error reading cookie jar: %v\n", err) + os.Exit(1) + } + for _, c := range cookies { + extraHeaders.Add("Cookie", c) + } + } + + if args := flag.Args(); len(args) > 0 && args[0] != "-" { + arg := args[0] + if strings.HasPrefix(arg, "https://") || strings.HasPrefix(arg, "http://") { + r, err := fetchOrCache(arg, *timeout, *noCache, extraHeaders) + if err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } + defer r.Close() + input = r + baseName = stripExt(slugify(arg)) + } else { + f, err := os.Open(arg) + if err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } + defer f.Close() + input = f + baseName = stripExt(arg) + } + inputIsStdin = false + } else { + input = os.Stdin + } + + var data any + dec := json.NewDecoder(input) + dec.UseNumber() + if err := dec.Decode(&data); err != nil { + fmt.Fprintf(os.Stderr, "error parsing JSON: %v\n", err) + os.Exit(1) + } + + a, err := jsontypes.NewAnalyzer(inputIsStdin, *anonymous, *askTypes) + if err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } + defer a.Close() + + // Load prior answers if available + if baseName != "" && !*anonymous { + a.Prompter.LoadAnswers(baseName + ".answers") + } + + rawPaths := a.Analyze(".", data) + formatted := jsontypes.FormatPaths(rawPaths) + + switch *format { + case "go": + fmt.Print(jsontypes.GenerateGoStructs(formatted)) + case "json-typedef": + fmt.Print(jsontypes.GenerateTypedef(formatted)) + case "json-schema": + fmt.Print(jsontypes.GenerateJSONSchema(formatted)) + case "typescript", "ts": + fmt.Print(jsontypes.GenerateTypeScript(formatted)) + case "jsdoc": + fmt.Print(jsontypes.GenerateJSDoc(formatted)) + case "zod": + fmt.Print(jsontypes.GenerateZod(formatted)) + case "python", "py": + fmt.Print(jsontypes.GeneratePython(formatted)) + case "sql": + fmt.Print(jsontypes.GenerateSQL(formatted)) + case "json-paths", "paths", "": + for _, p := range formatted { + fmt.Println(p) + } + default: + fmt.Fprintf(os.Stderr, "error: unknown format %q (use: json-paths, go, json-schema, json-typedef, typescript, jsdoc, zod, python, sql)\n", *format) + os.Exit(1) + } + + // Save outputs + if baseName != "" { + pathsFile := baseName + ".paths" + if err := os.WriteFile(pathsFile, []byte(strings.Join(formatted, "\n")+"\n"), 0o644); err != nil { + fmt.Fprintf(os.Stderr, "warning: could not write %s: %v\n", pathsFile, err) + } + + if !*anonymous { + answersFile := baseName + ".answers" + if err := a.Prompter.SaveAnswers(answersFile); err != nil { + fmt.Fprintf(os.Stderr, "warning: could not write %s: %v\n", answersFile, err) + } + } + } +} + +func stripExt(name string) string { + if idx := strings.LastIndexByte(name, '.'); idx > 0 { + return name[:idx] + } + return name +} + +// slugify converts a URL to a filesystem-safe filename in the current directory. +func slugify(rawURL string) string { + s := rawURL + for _, prefix := range []string{"https://", "http://"} { + s = strings.TrimPrefix(s, prefix) + } + + path := s + query := "" + if idx := strings.IndexByte(s, '?'); idx >= 0 { + path = s[:idx] + query = s[idx+1:] + } + + if query != "" { + var kept []string + for _, param := range strings.Split(query, "&") { + name := param + if idx := strings.IndexByte(param, '='); idx >= 0 { + name = param[:idx] + } + nameLower := strings.ToLower(name) + if isSensitiveParam(nameLower) { + continue + } + if len(param) > len(name)+21 { + continue + } + kept = append(kept, param) + } + if len(kept) > 0 { + path = path + "-" + strings.Join(kept, "-") + } + } + + var buf strings.Builder + lastHyphen := false + for _, r := range path { + if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '.' { + buf.WriteRune(r) + lastHyphen = false + } else if !lastHyphen { + buf.WriteByte('-') + lastHyphen = true + } + } + name := strings.Trim(buf.String(), "-") + if len(name) > 200 { + name = name[:200] + } + return name + ".json" +} + +var sensitiveParams = []string{ + "secret", "token", "code", "key", "apikey", "api_key", + "password", "passwd", "auth", "credential", "session", + "access_token", "refresh_token", "client_secret", +} + +func isSensitiveParam(name string) bool { + for _, s := range sensitiveParams { + if name == s || strings.Contains(name, s) { + return true + } + } + return false +} + +func fetchOrCache(rawURL string, timeout time.Duration, noCache bool, extraHeaders http.Header) (io.ReadCloser, error) { + if !noCache { + path := slugify(rawURL) + if info, err := os.Stat(path); err == nil && info.Size() > 0 { + f, err := os.Open(path) + if err == nil { + fmt.Fprintf(os.Stderr, "using cached ./%s\n (use --no-cache to re-fetch)\n", path) + return f, nil + } + } + } + + body, err := fetchURL(rawURL, timeout, extraHeaders) + if err != nil { + return nil, err + } + + if noCache { + return body, nil + } + + path := slugify(rawURL) + data, err := io.ReadAll(body) + body.Close() + if err != nil { + return nil, fmt.Errorf("reading response: %w", err) + } + + if err := os.WriteFile(path, data, 0o600); err != nil { + fmt.Fprintf(os.Stderr, "warning: could not cache response: %v\n", err) + } else { + fmt.Fprintf(os.Stderr, "cached to ./%s\n", path) + } + + return io.NopCloser(strings.NewReader(string(data))), nil +} + +func fetchURL(url string, timeout time.Duration, extraHeaders http.Header) (io.ReadCloser, error) { + client := &http.Client{ + Timeout: timeout, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{ + MinVersion: tls.VersionTLS12, + }, + DialContext: (&net.Dialer{ + Timeout: 10 * time.Second, + KeepAlive: 0, + }).DialContext, + TLSHandshakeTimeout: 10 * time.Second, + ResponseHeaderTimeout: timeout, + MaxIdleConns: 1, + DisableKeepAlives: true, + }, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + if len(via) >= 5 { + return fmt.Errorf("too many redirects") + } + return nil + }, + } + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, fmt.Errorf("invalid URL: %w", err) + } + req.Header.Set("Accept", "application/json") + for name, vals := range extraHeaders { + for _, v := range vals { + req.Header.Add(name, v) + } + } + + resp, err := client.Do(req) + if err != nil { + if isTimeout(err) { + return nil, fmt.Errorf("request timed out after %s (use --timeout 60s to increase timeout for slow APIs)", timeout) + } + return nil, fmt.Errorf("HTTP request failed: %w", err) + } + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + resp.Body.Close() + return nil, fmt.Errorf("HTTP %d %s", resp.StatusCode, resp.Status) + } + + ct := resp.Header.Get("Content-Type") + if ct != "" && !strings.Contains(ct, "json") && !strings.Contains(ct, "javascript") { + resp.Body.Close() + return nil, fmt.Errorf("unexpected Content-Type %q (expected JSON)", ct) + } + + return struct { + io.Reader + io.Closer + }{ + Reader: io.LimitReader(resp.Body, 256<<20), + Closer: resp.Body, + }, nil +} + +func isTimeout(err error) bool { + if netErr, ok := err.(net.Error); ok { + return netErr.Timeout() + } + return strings.Contains(err.Error(), "deadline exceeded") || + strings.Contains(err.Error(), "timed out") +} + +func parseCookieFlag(raw string) string { + s := raw + for _, prefix := range []string{"Set-Cookie:", "Cookie:"} { + if strings.HasPrefix(s, prefix) { + s = strings.TrimSpace(s[len(prefix):]) + break + } + lower := strings.ToLower(s) + lowerPrefix := strings.ToLower(prefix) + if strings.HasPrefix(lower, lowerPrefix) { + s = strings.TrimSpace(s[len(prefix):]) + break + } + } + if idx := strings.IndexByte(s, ';'); idx >= 0 { + s = strings.TrimSpace(s[:idx]) + } + return s +} + +func readCookieJar(path string) ([]string, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + var cookies []string + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + fields := strings.Split(line, "\t") + if len(fields) < 7 { + continue + } + name := fields[5] + value := fields[6] + cookies = append(cookies, name+"="+value) + } + if err := scanner.Err(); err != nil { + return nil, err + } + return cookies, nil +} diff --git a/tools/jsontypes/cmd/jsonpaths/slugify_test.go b/tools/jsontypes/cmd/jsonpaths/slugify_test.go new file mode 100644 index 0000000..be181b3 --- /dev/null +++ b/tools/jsontypes/cmd/jsonpaths/slugify_test.go @@ -0,0 +1,57 @@ +package main + +import "testing" + +func TestSlugify(t *testing.T) { + tests := []struct { + url string + want string + }{ + { + "https://api.example.com/v2/rooms", + "api.example.com-v2-rooms.json", + }, + { + "https://api.example.com/v2/rooms?limit=10&offset=20", + "api.example.com-v2-rooms-limit-10-offset-20.json", + }, + { + // token param stripped + "https://api.example.com/data?token=abc123secret&limit=5", + "api.example.com-data-limit-5.json", + }, + { + // api_key stripped + "https://api.example.com/data?api_key=xyz&format=json", + "api.example.com-data-format-json.json", + }, + { + // long value stripped (>20 chars) + "https://api.example.com/data?hash=abcdefghijklmnopqrstuvwxyz&page=1", + "api.example.com-data-page-1.json", + }, + { + // access_token stripped + "https://api.example.com/me?access_token=foo", + "api.example.com-me.json", + }, + { + // auth_code contains "code" — stripped + "https://example.com/callback?auth_code=xyz&state=ok", + "example.com-callback-state-ok.json", + }, + { + // no query string + "http://localhost:8080/api/v1/users", + "localhost-8080-api-v1-users.json", + }, + } + for _, tt := range tests { + t.Run(tt.url, func(t *testing.T) { + got := slugify(tt.url) + if got != tt.want { + t.Errorf("slugify(%q)\n got: %s\n want: %s", tt.url, got, tt.want) + } + }) + } +} diff --git a/tools/jsontypes/decisions.go b/tools/jsontypes/decisions.go new file mode 100644 index 0000000..9572d1f --- /dev/null +++ b/tools/jsontypes/decisions.go @@ -0,0 +1,463 @@ +package jsontypes + +import ( + "fmt" + "sort" + "strings" +) + +// decideMapOrStruct determines whether an object is a map or struct. +// In anonymous mode, uses heuristics silently. +// Otherwise, shows a combined prompt: enter a TypeName or 'm' for map. +// In default mode, confident heuristic maps skip the prompt. +// In askTypes mode, the prompt is always shown. +func (a *Analyzer) decideMapOrStruct(path string, obj map[string]any) bool { + isMap, confident := looksLikeMap(obj) + if a.anonymous { + return isMap + } + + // Default mode: skip prompt when heuristics are confident + if !a.askTypes && confident { + return isMap + } + + return a.promptMapOrStructWithName(path, obj, isMap, confident) +} + +// promptMapOrStructWithName shows the object's fields and asks a combined question. +// The user can type 'm' or 'map' for a map, a name starting with a capital letter +// for a struct type, or press Enter to accept the default. +func (a *Analyzer) promptMapOrStructWithName(path string, obj map[string]any, heuristicMap, confident bool) bool { + keys := sortedKeys(obj) + + inferred := inferTypeName(path) + if inferred == "" { + a.typeCounter++ + inferred = fmt.Sprintf("Struct%d", a.typeCounter) + } + + defaultVal := inferred + if confident && heuristicMap { + defaultVal = "m" + } + + fmt.Fprintf(a.Prompter.output, "\nAt %s\n", shortPath(path)) + fmt.Fprintf(a.Prompter.output, " Object with %d keys:\n", len(keys)) + for _, k := range keys { + fmt.Fprintf(a.Prompter.output, " %s: %s\n", k, valueSummary(obj[k])) + } + + answer := a.Prompter.askMapOrName("Struct name (or 'm' for map)?", defaultVal) + if answer == "m" { + a.pendingTypeName = "" + return true + } + a.pendingTypeName = answer + return false +} + +// decideKeyName infers the map key type from the keys. +func (a *Analyzer) decideKeyName(_ string, obj map[string]any) string { + return inferKeyName(obj) +} + +// decideTypeName determines the struct type name, using inference and optionally +// prompting the user. +func (a *Analyzer) decideTypeName(path string, obj map[string]any) string { + // Check if we've already named a type with this exact shape + sig := shapeSignature(obj) + if existing, ok := a.knownTypes[sig]; ok { + a.pendingTypeName = "" + return existing.name + } + + newFields := fieldSet(obj) + + // Consume pending name from askTypes combined prompt + if a.pendingTypeName != "" { + name := a.pendingTypeName + a.pendingTypeName = "" + return a.resolveTypeName(path, name, newFields, sig) + } + + inferred := inferTypeName(path) + if inferred == "" { + a.typeCounter++ + inferred = fmt.Sprintf("Struct%d", a.typeCounter) + } + + // Default and anonymous modes: auto-resolve without prompting + if !a.askTypes { + return a.autoResolveTypeName(path, inferred, newFields, sig) + } + + // askTypes mode: show fields and prompt for name + keys := sortedKeys(obj) + fmt.Fprintf(a.Prompter.output, "\nAt %s\n", shortPath(path)) + fmt.Fprintf(a.Prompter.output, " Struct with %d fields:\n", len(keys)) + for _, k := range keys { + fmt.Fprintf(a.Prompter.output, " %s: %s\n", k, valueSummary(obj[k])) + } + + name := a.promptName(path, inferred, newFields, sig) + return name +} + +// autoResolveTypeName registers or resolves a type name without prompting. +// On collision, tries the parent-prefix strategy; if that also collides, prompts +// (unless anonymous, in which case it uses a numbered fallback). +func (a *Analyzer) autoResolveTypeName(path, name string, newFields map[string]string, sig string) string { + existing, taken := a.typesByName[name] + if !taken { + return a.registerType(sig, name, newFields) + } + + rel := fieldRelation(existing.fields, newFields) + switch rel { + case relEqual: + a.knownTypes[sig] = existing + return name + case relSubset, relSuperset: + merged := mergeFieldSets(existing.fields, newFields) + existing.fields = merged + a.knownTypes[sig] = existing + return name + default: + // Collision — try parent-prefix strategy + alt := a.suggestAlternativeName(path, name) + if _, altTaken := a.typesByName[alt]; !altTaken { + return a.registerType(sig, alt, newFields) + } + // Parent strategy also taken + if a.anonymous { + a.typeCounter++ + return a.registerType(sig, fmt.Sprintf("%s%d", name, a.typeCounter), newFields) + } + // Last resort: prompt + return a.promptName(path, alt, newFields, sig) + } +} + +// resolveTypeName handles a name that came from the combined prompt, +// checking for collisions with existing types. +func (a *Analyzer) resolveTypeName(path, name string, newFields map[string]string, sig string) string { + existing, taken := a.typesByName[name] + if !taken { + return a.registerType(sig, name, newFields) + } + + rel := fieldRelation(existing.fields, newFields) + switch rel { + case relEqual: + a.knownTypes[sig] = existing + return name + case relSubset, relSuperset: + merged := mergeFieldSets(existing.fields, newFields) + existing.fields = merged + a.knownTypes[sig] = existing + return name + default: + return a.promptName(path, name, newFields, sig) + } +} + +// promptName asks for a type name and handles collisions with existing types. +// Pre-resolves the suggested name so the user sees a valid default. +func (a *Analyzer) promptName(path, suggested string, newFields map[string]string, sig string) string { + suggested = a.preResolveCollision(path, suggested, newFields, sig) + + for { + name := a.Prompter.askFreeform("Name for this type?", suggested) + + existing, taken := a.typesByName[name] + if !taken { + return a.registerType(sig, name, newFields) + } + + rel := fieldRelation(existing.fields, newFields) + switch rel { + case relEqual: + a.knownTypes[sig] = existing + return name + case relSubset, relSuperset: + fmt.Fprintf(a.Prompter.output, " Extending existing type %q (merging fields)\n", name) + merged := mergeFieldSets(existing.fields, newFields) + existing.fields = merged + a.knownTypes[sig] = existing + return name + case relOverlap: + fmt.Fprintf(a.Prompter.output, " Type %q already exists with overlapping fields: %s\n", + name, fieldList(existing.fields)) + choice := a.Prompter.ask( + fmt.Sprintf(" [e]xtend %q with merged fields, or use a [d]ifferent name?", name), + "e", []string{"e", "d"}, + ) + if choice == "e" { + merged := mergeFieldSets(existing.fields, newFields) + existing.fields = merged + a.knownTypes[sig] = existing + return name + } + suggested = a.suggestAlternativeName(path, name) + continue + case relDisjoint: + fmt.Fprintf(a.Prompter.output, " Type %q already exists with different fields: %s\n", + name, fieldList(existing.fields)) + suggested = a.suggestAlternativeName(path, name) + continue + } + } +} + +// preResolveCollision checks if the suggested name collides with an existing +// type that can't be auto-merged. If so, prints a warning and returns a new +// suggested name. +func (a *Analyzer) preResolveCollision(path, suggested string, newFields map[string]string, sig string) string { + existing, taken := a.typesByName[suggested] + if !taken { + return suggested + } + + rel := fieldRelation(existing.fields, newFields) + switch rel { + case relEqual, relSubset, relSuperset: + return suggested + default: + alt := a.suggestAlternativeName(path, suggested) + fmt.Fprintf(a.Prompter.output, " (type %q already exists with different fields, suggesting %q)\n", + suggested, alt) + return alt + } +} + +// suggestAlternativeName generates a better name when a collision occurs, +// using the parent type as a prefix (e.g., "DocumentRoom" instead of "Room2"). +func (a *Analyzer) suggestAlternativeName(path, collided string) string { + parent := parentTypeName(path) + if parent != "" { + candidate := parent + collided + if _, taken := a.typesByName[candidate]; !taken { + return candidate + } + } + // Fall back to numbered suffix + a.typeCounter++ + return fmt.Sprintf("%s%d", collided, a.typeCounter) +} + +// shortPath returns the full path but with only the most recent {Type} +// annotation kept; all earlier type annotations are stripped. e.g.: +// ".{RoomsResult}.rooms[]{Room}.room[string][]{RoomRoom}.json{RoomRoomJSON}.feature_types[]" +// → ".rooms[].room[string][].json{RoomRoomJSON}.feature_types[]" +func shortPath(path string) string { + // Find the last {Type} annotation + lastOpen := -1 + lastClose := -1 + for i := len(path) - 1; i >= 0; i-- { + if path[i] == '}' && lastClose < 0 { + lastClose = i + } + if path[i] == '{' && lastClose >= 0 && lastOpen < 0 { + lastOpen = i + break + } + } + if lastOpen < 0 { + return path + } + + // Rebuild: strip all {Type} annotations except the last one + var buf strings.Builder + i := 0 + for i < len(path) { + if path[i] == '{' { + end := strings.IndexByte(path[i:], '}') + if end < 0 { + break + } + if i == lastOpen { + // Keep this annotation + buf.WriteString(path[i : i+end+1]) + } + i = i + end + 1 + } else { + buf.WriteByte(path[i]) + i++ + } + } + + // Collapse any double dots left by stripping (e.g., ".." → ".") + return strings.ReplaceAll(buf.String(), "..", ".") +} + +// parentTypeName extracts the most recent {TypeName} from a path. +// e.g., ".[id]{Document}.rooms[int]{Room}.details" → "Room" +func parentTypeName(path string) string { + last := "" + for { + idx := strings.Index(path, "{") + if idx < 0 { + break + } + end := strings.Index(path[idx:], "}") + if end < 0 { + break + } + candidate := path[idx+1 : idx+end] + if candidate != "null" { + last = candidate + } + path = path[idx+end+1:] + } + return last +} + +func (a *Analyzer) registerType(sig, name string, fields map[string]string) string { + st := &structType{name: name, fields: fields} + a.knownTypes[sig] = st + a.typesByName[name] = st + return name +} + +type fieldRelationType int + +const ( + relEqual fieldRelationType = iota + relSubset // existing ⊂ new + relSuperset // existing ⊃ new + relOverlap // some shared, some unique to each + relDisjoint // no fields in common +) + +func fieldRelation(a, b map[string]string) fieldRelationType { + aInB, bInA := 0, 0 + for k, ak := range a { + if bk, ok := b[k]; ok && kindsCompatible(ak, bk) { + aInB++ + } + } + for k, bk := range b { + if ak, ok := a[k]; ok && kindsCompatible(ak, bk) { + bInA++ + } + } + shared := aInB // same as bInA + if shared == 0 { + return relDisjoint + } + if shared == len(a) && shared == len(b) { + return relEqual + } + if shared == len(a) { + return relSubset // all of a is in b, b has more + } + if shared == len(b) { + return relSuperset // all of b is in a, a has more + } + return relOverlap +} + +// kindsCompatible returns true if two field value kinds can be considered the +// same type. "null" is compatible with anything (it's just an absent value), +// and "mixed" is compatible with anything. +func kindsCompatible(a, b string) bool { + if a == b { + return true + } + if a == "null" || b == "null" || a == "mixed" || b == "mixed" { + return true + } + return false +} + +// fieldsOverlap returns true if one field set is a subset or superset of the other. +func fieldsOverlap(a, b map[string]string) bool { + rel := fieldRelation(a, b) + return rel == relEqual || rel == relSubset || rel == relSuperset +} + +func mergeFieldSets(a, b map[string]string) map[string]string { + merged := make(map[string]string, len(a)+len(b)) + for k, v := range a { + merged[k] = v + } + for k, v := range b { + if existing, ok := merged[k]; ok && existing != v { + merged[k] = "mixed" + } else { + merged[k] = v + } + } + return merged +} + +func fieldList(fields map[string]string) string { + keys := make([]string, 0, len(fields)) + for k := range fields { + keys = append(keys, k) + } + sort.Strings(keys) + return strings.Join(keys, ", ") +} + +// decideTupleOrList asks the user if a short mixed-type array is a tuple or list. +func (a *Analyzer) decideTupleOrList(path string, arr []any) bool { + if a.anonymous { + return false // default to list + } + fmt.Fprintf(a.Prompter.output, "\nAt %s\n", shortPath(path)) + fmt.Fprintf(a.Prompter.output, " Short array with %d elements of mixed types:\n", len(arr)) + for i, v := range arr { + fmt.Fprintf(a.Prompter.output, " [%d]: %s\n", i, valueSummary(v)) + } + choice := a.Prompter.ask( + "Is this a [l]ist or a [t]uple?", + "l", []string{"l", "t"}, + ) + return choice == "t" +} + +// valueSummary returns a short human-readable summary of a JSON value. +func valueSummary(v any) string { + switch tv := v.(type) { + case nil: + return "null" + case bool: + return fmt.Sprintf("%v", tv) + case string: + if len(tv) > 40 { + return fmt.Sprintf("%q...", tv[:37]) + } + return fmt.Sprintf("%q", tv) + case []any: + if len(tv) == 0 { + return "[]" + } + return fmt.Sprintf("[...] (%d elements)", len(tv)) + case map[string]any: + if len(tv) == 0 { + return "{}" + } + keys := sortedKeys(tv) + preview := keys + if len(preview) > 3 { + preview = preview[:3] + } + s := "{" + strings.Join(preview, ", ") + if len(keys) > 3 { + s += ", ..." + } + return s + "}" + default: + return fmt.Sprintf("%v", v) + } +} + +func fieldSet(obj map[string]any) map[string]string { + fs := make(map[string]string, len(obj)) + for k, v := range obj { + fs[k] = kindOf(v) + } + return fs +} diff --git a/tools/jsontypes/doc.go b/tools/jsontypes/doc.go new file mode 100644 index 0000000..1725dc3 --- /dev/null +++ b/tools/jsontypes/doc.go @@ -0,0 +1,52 @@ +// Package jsontypes infers type structure from JSON samples and generates +// type definitions in multiple output formats. +// +// Given a JSON value (object, array, or primitive), jsontypes walks the +// structure depth-first, detects maps vs structs, infers optional fields +// from multiple instances, and produces a flat path notation called +// "json-paths" that captures the full type tree: +// +// {Root} +// .users[]{User} +// .users[].id{int} +// .users[].name{string} +// .users[].email{string?} +// +// These paths can then be rendered into typed definitions for any target: +// +// - [GenerateGoStructs]: Go struct definitions with json tags +// - [GenerateTypeScript]: TypeScript interfaces +// - [GenerateJSDoc]: JSDoc @typedef annotations +// - [GenerateZod]: Zod validation schemas +// - [GeneratePython]: Python TypedDict classes +// - [GenerateSQL]: SQL CREATE TABLE with foreign key relationships +// - [GenerateJSONSchema]: JSON Schema (draft 2020-12) +// - [GenerateTypedef]: JSON Typedef (RFC 8927) +// +// # Quick start +// +// For non-interactive use (e.g., from an AI agent or script): +// +// import "encoding/json" +// import "github.com/therootcompany/golib/tools/jsontypes" +// +// var data any +// dec := json.NewDecoder(input) +// dec.UseNumber() +// dec.Decode(&data) +// +// a, _ := jsontypes.NewAnalyzer(false, true, false) // anonymous mode +// defer a.Close() +// +// paths := jsontypes.FormatPaths(a.Analyze(".", data)) +// fmt.Print(jsontypes.GenerateTypeScript(paths)) +// +// # AI tool use +// +// This package is designed to be callable as an AI skill. Given a JSON +// API response, an agent can infer the complete type structure and emit +// ready-to-use type definitions — no schema file required. The json-paths +// intermediate format is both human-readable and machine-parseable, +// making it suitable for tool-use chains where an agent needs to +// understand an API's shape before generating code. +package jsontypes diff --git a/tools/jsontypes/format.go b/tools/jsontypes/format.go new file mode 100644 index 0000000..b4f7673 --- /dev/null +++ b/tools/jsontypes/format.go @@ -0,0 +1,304 @@ +package jsontypes + +import ( + "sort" + "strings" +) + +// segment represents one part of a parsed path. +type segment struct { + name string // field name (empty for root) + index string // "[]", "[int]", "[string]", etc. (can be multiple like "[int][]") + typ string // type name without braces, e.g. "Room", "string", "null" +} + +// parsePath splits a full annotated path into segments. +// e.g., ".{RoomsResult}.rooms[]{Room}.name{string}" → +// +// [{name:"", typ:"RoomsResult"}, {name:"rooms", index:"[]", typ:"Room"}, {name:"name", typ:"string"}] +func parsePath(path string) []segment { + var segments []segment + i := 0 + for i < len(path) { + var seg segment + // Skip dot prefix + if i < len(path) && path[i] == '.' { + i++ + } + // Name: read until [, {, ., or end + nameStart := i + for i < len(path) && path[i] != '[' && path[i] != '{' && path[i] != '.' { + i++ + } + seg.name = path[nameStart:i] + + // Indices: read all [...] sequences + for i < len(path) && path[i] == '[' { + end := strings.IndexByte(path[i:], ']') + if end < 0 { + break + } + seg.index += path[i : i+end+1] + i = i + end + 1 + } + + // Type: read {Type} + if i < len(path) && path[i] == '{' { + end := strings.IndexByte(path[i:], '}') + if end < 0 { + break + } + seg.typ = path[i+1 : i+end] + i = i + end + 1 + } + + segments = append(segments, seg) + } + return segments +} + +// formatPaths converts fully-annotated flat paths into the display format where: +// - The root type appears alone on the first line (no leading dot) +// - Each type introduction gets its own line +// - Type annotations only appear on the rightmost (new) segment of each line +// - When multiple types share a path position, child fields include the +// parent type to disambiguate (e.g., .items[]{FileField}.slug{string}) +func FormatPaths(paths []string) []string { + // First pass: find bare positions where multiple types are introduced. + // These need parent type disambiguation in their child lines. + typeIntros := make(map[string]map[string]bool) // bare → set of type names + for _, path := range paths { + segs := parsePath(path) + for depth := range segs { + if segs[depth].typ == "" { + continue + } + bare := buildBare(segs[:depth+1]) + if typeIntros[bare] == nil { + typeIntros[bare] = make(map[string]bool) + } + typeIntros[bare][segs[depth].typ] = true + } + } + // Collect bare paths with multiple types (excluding primitives/null) + multiType := make(map[string]bool) + for bare, types := range typeIntros { + named := 0 + for typ := range types { + if typ != "null" && typ != "string" && typ != "int" && + typ != "float" && typ != "bool" && typ != "unknown" { + named++ + } + } + if named > 1 { + multiType[bare] = true + } + } + + seen := make(map[string]bool) + var lines []outputLine + + for _, path := range paths { + segs := parsePath(path) + + for depth := range segs { + if segs[depth].typ == "" { + continue + } + + // Check if the parent position has multiple types + parentIdx := -1 + if depth > 0 { + parentBare := buildBare(segs[:depth]) + if multiType[parentBare] { + // Find the parent segment that has a type + for j := depth - 1; j >= 0; j-- { + if segs[j].typ != "" { + parentIdx = j + break + } + } + } + } + + // Check if this position itself has multiple types (type intro line) + selfBare := buildBare(segs[:depth+1]) + selfMulti := multiType[selfBare] + + var display string + if parentIdx >= 0 { + display = buildDisplayWithParent(segs[:depth+1], depth, parentIdx) + } else { + display = buildDisplay(segs[:depth+1], depth) + } + if !seen[display] { + seen[display] = true + var bare string + if parentIdx >= 0 { + bare = buildBareWithParent(segs[:depth+1], parentIdx) + } else if selfMulti { + // This is a type intro at a multi-type position; + // include own type in bare so children sort under it. + bare = buildBareWithParent(segs[:depth+1], depth) + } else { + bare = buildBare(segs[:depth+1]) + } + lines = append(lines, outputLine{bare, display}) + } + } + } + + // Merge {null} with sibling types: if a bare path has both {null} and + // {SomeType}, replace with {SomeType?} and drop the {null} line. + lines = mergeNullables(lines) + + sort.SliceStable(lines, func(i, j int) bool { + if lines[i].bare != lines[j].bare { + return lines[i].bare < lines[j].bare + } + return lines[i].display < lines[j].display + }) + + result := make([]string, len(lines)) + for i, l := range lines { + result[i] = l.display + } + return result +} + +// buildDisplay builds a display line where only the segment at typeIdx shows +// its type. Parent segments are bare. The root segment has no leading dot. +func buildDisplay(segs []segment, typeIdx int) string { + var buf strings.Builder + for i, seg := range segs { + if seg.name != "" { + buf.WriteByte('.') + buf.WriteString(seg.name) + } + buf.WriteString(seg.index) + if i == typeIdx { + buf.WriteByte('{') + buf.WriteString(seg.typ) + buf.WriteByte('}') + } + } + s := buf.String() + if s == "" && len(segs) > 0 && segs[0].typ != "" { + return "{" + segs[0].typ + "}" + } + return s +} + +type outputLine struct { + bare string // path without types, for sorting + display string +} + +// mergeNullables finds bare paths that have both a {null} line and typed +// lines. It adds ? to the typed lines and drops the {null} line. +// e.g., ".score{null}" + ".score{float}" → ".score{float?}" +func mergeNullables(lines []outputLine) []outputLine { + // Group lines by bare path + byBare := make(map[string][]int) // bare → indices into lines + for i, l := range lines { + byBare[l.bare] = append(byBare[l.bare], i) + } + + drop := make(map[int]bool) + for _, indices := range byBare { + if len(indices) < 2 { + continue + } + // Check if any line in this group is {null} + nullIdx := -1 + hasNonNull := false + for _, idx := range indices { + if strings.HasSuffix(lines[idx].display, "{null}") { + nullIdx = idx + } else { + hasNonNull = true + } + } + if nullIdx < 0 || !hasNonNull { + continue + } + // Drop the {null} line and add ? to the others + drop[nullIdx] = true + for _, idx := range indices { + if idx == nullIdx { + continue + } + d := lines[idx].display + // Replace trailing } with ?} + if strings.HasSuffix(d, "}") { + lines[idx].display = d[:len(d)-1] + "?}" + } + } + } + + if len(drop) == 0 { + return lines + } + result := make([]outputLine, 0, len(lines)-len(drop)) + for i, l := range lines { + if !drop[i] { + result = append(result, l) + } + } + return result +} + +// buildDisplayWithParent builds a display line showing type annotations at both +// parentIdx (for disambiguation) and typeIdx (for the new type introduction). +func buildDisplayWithParent(segs []segment, typeIdx, parentIdx int) string { + var buf strings.Builder + for i, seg := range segs { + if seg.name != "" { + buf.WriteByte('.') + buf.WriteString(seg.name) + } + buf.WriteString(seg.index) + if i == parentIdx || i == typeIdx { + buf.WriteByte('{') + buf.WriteString(seg.typ) + buf.WriteByte('}') + } + } + s := buf.String() + if s == "" && len(segs) > 0 && segs[0].typ != "" { + return "{" + segs[0].typ + "}" + } + return s +} + +// buildBareWithParent builds a bare path that includes the parent type for +// sorting/grouping, so children of different parent types sort separately. +func buildBareWithParent(segs []segment, parentIdx int) string { + var buf strings.Builder + for i, seg := range segs { + if seg.name != "" { + buf.WriteByte('.') + buf.WriteString(seg.name) + } + buf.WriteString(seg.index) + if i == parentIdx { + buf.WriteByte('{') + buf.WriteString(seg.typ) + buf.WriteByte('}') + } + } + return buf.String() +} + +// buildBare builds a path string without any type annotations, for sorting. +func buildBare(segs []segment) string { + var buf strings.Builder + for _, seg := range segs { + if seg.name != "" { + buf.WriteByte('.') + buf.WriteString(seg.name) + } + buf.WriteString(seg.index) + } + return buf.String() +} diff --git a/tools/jsontypes/format_test.go b/tools/jsontypes/format_test.go new file mode 100644 index 0000000..9d7dc5d --- /dev/null +++ b/tools/jsontypes/format_test.go @@ -0,0 +1,197 @@ +package jsontypes + +import ( + "bufio" + "io" + "strings" + "testing" +) + +func TestParsePath(t *testing.T) { + tests := []struct { + path string + want []segment + }{ + { + ".{RoomsResult}.rooms[]{Room}.name{string}", + []segment{ + {name: "", typ: "RoomsResult"}, + {name: "rooms", index: "[]", typ: "Room"}, + {name: "name", typ: "string"}, + }, + }, + { + ".[string]{Person}.friends[]{Friend}.name{string}", + []segment{ + {name: "", index: "[string]", typ: "Person"}, + {name: "friends", index: "[]", typ: "Friend"}, + {name: "name", typ: "string"}, + }, + }, + { + ".{Root}.data[int][]{ResourceData}.x{string}", + []segment{ + {name: "", typ: "Root"}, + {name: "data", index: "[int][]", typ: "ResourceData"}, + {name: "x", typ: "string"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + got := parsePath(tt.path) + if len(got) != len(tt.want) { + t.Fatalf("got %d segments, want %d: %+v", len(got), len(tt.want), got) + } + for i := range got { + if got[i] != tt.want[i] { + t.Errorf("segment[%d]: got %+v, want %+v", i, got[i], tt.want[i]) + } + } + }) + } +} + +func TestFormatPaths(t *testing.T) { + input := []string{ + ".[person_id]{Person}.name{string}", + ".[person_id]{Person}.age{int}", + ".[person_id]{Person}.friends[]{Friend}.name{string}", + ".[person_id]{Person}.friends[]{Friend}.identification{null}", + ".[person_id]{Person}.friends[]{Friend}.identification{StateID}.number{string}", + } + got := FormatPaths(input) + want := []string{ + "[person_id]{Person}", + "[person_id].age{int}", + "[person_id].friends[]{Friend}", + "[person_id].friends[].identification{StateID?}", + "[person_id].friends[].identification.number{string}", + "[person_id].friends[].name{string}", + "[person_id].name{string}", + } + if len(got) != len(want) { + t.Fatalf("got %d lines, want %d:\n got: %s\n want: %s", + len(got), len(want), + strings.Join(got, "\n "), + strings.Join(want, "\n ")) + } + for i := range got { + if got[i] != want[i] { + t.Errorf("line[%d]:\n got: %s\n want: %s", i, got[i], want[i]) + } + } +} + +// TestFormatPathsDifferentTypes verifies that when two different types exist +// at the same path position, their fields are grouped under the parent type +// and don't get deduplicated together. +func TestFormatPathsDifferentTypes(t *testing.T) { + // Raw paths as produced by the analyzer when choosing "different" types + input := []string{ + ".{Root}.items[]{FileField}.slug{string}", + ".{Root}.items[]{FileField}.filename{string}", + ".{Root}.items[]{FileField}.is_required{bool}", + ".{Root}.items[]{FeatureField}.slug{string}", + ".{Root}.items[]{FeatureField}.feature{string}", + ".{Root}.items[]{FeatureField}.archived{bool}", + } + got := FormatPaths(input) + want := []string{ + "{Root}", + ".items[]{FeatureField}", + ".items[]{FeatureField}.archived{bool}", + ".items[]{FeatureField}.feature{string}", + ".items[]{FeatureField}.slug{string}", + ".items[]{FileField}", + ".items[]{FileField}.filename{string}", + ".items[]{FileField}.is_required{bool}", + ".items[]{FileField}.slug{string}", + } + if len(got) != len(want) { + t.Fatalf("got %d lines, want %d:\n got: %s\n want: %s", + len(got), len(want), + strings.Join(got, "\n "), + strings.Join(want, "\n ")) + } + for i := range got { + if got[i] != want[i] { + t.Errorf("line[%d]:\n got: %s\n want: %s", i, got[i], want[i]) + } + } +} + +// TestDifferentTypesEndToEnd tests the full pipeline from JSON data through +// analysis with "different" type selection to formatted output. +func TestDifferentTypesEndToEnd(t *testing.T) { + arr := []any{ + map[string]any{"slug": "a", "filename": "x.pdf", "is_required": true}, + map[string]any{"slug": "b", "filename": "y.pdf", "is_required": false}, + map[string]any{"slug": "c", "feature": "upload", "archived": false}, + map[string]any{"slug": "d", "feature": "export", "archived": true}, + } + obj := map[string]any{"items": arr, "count": jsonNum("4"), "status": "ok"} + + a := &Analyzer{ + Prompter: &Prompter{ + reader: bufio.NewReader(strings.NewReader("")), + output: io.Discard, + // Root has 3 field-like keys → confident struct, no prompt needed. + // Then items[] has 2 shapes → unification prompt: "d" for different, + // then names for each shape. + priorAnswers: []string{"d", "FileField", "FeatureField"}, + }, + knownTypes: make(map[string]*structType), + typesByName: make(map[string]*structType), + } + rawPaths := a.Analyze(".", obj) + formatted := FormatPaths(rawPaths) + + // FileField and FeatureField should each have their own fields listed + // under their type, not merged together + fileFieldLines := 0 + featureFieldLines := 0 + for _, line := range formatted { + if strings.Contains(line, "{FileField}") { + fileFieldLines++ + } + if strings.Contains(line, "{FeatureField}") { + featureFieldLines++ + } + } + // FileField: intro + slug + filename + is_required = 4 + if fileFieldLines < 4 { + t.Errorf("expected at least 4 FileField lines (intro + 3 fields), got %d:\n %s", + fileFieldLines, strings.Join(formatted, "\n ")) + } + // FeatureField: intro + slug + feature + archived = 4 + if featureFieldLines < 4 { + t.Errorf("expected at least 4 FeatureField lines (intro + 3 fields), got %d:\n %s", + featureFieldLines, strings.Join(formatted, "\n ")) + } +} + +func TestFormatPathsRootStruct(t *testing.T) { + input := []string{ + ".{RoomsResult}.rooms[]{Room}.name{string}", + ".{RoomsResult}.errors[]{string}", + } + got := FormatPaths(input) + want := []string{ + "{RoomsResult}", + ".errors[]{string}", + ".rooms[]{Room}", + ".rooms[].name{string}", + } + if len(got) != len(want) { + t.Fatalf("got %d lines, want %d:\n got: %s\n want: %s", + len(got), len(want), + strings.Join(got, "\n "), + strings.Join(want, "\n ")) + } + for i := range got { + if got[i] != want[i] { + t.Errorf("line[%d]:\n got: %s\n want: %s", i, got[i], want[i]) + } + } +} diff --git a/tools/jsontypes/go.mod b/tools/jsontypes/go.mod new file mode 100644 index 0000000..f12f059 --- /dev/null +++ b/tools/jsontypes/go.mod @@ -0,0 +1,3 @@ +module github.com/therootcompany/golib/tool/jsontypes + +go 1.25.0 diff --git a/tools/jsontypes/gostruct.go b/tools/jsontypes/gostruct.go new file mode 100644 index 0000000..b49e13a --- /dev/null +++ b/tools/jsontypes/gostruct.go @@ -0,0 +1,558 @@ +package jsontypes + +import ( + "fmt" + "sort" + "strings" +) + +// goType represents a Go struct being built from flat paths. +type goType struct { + name string + fields []goField +} + +type goField struct { + goName string // PascalCase Go field name + jsonName string // original JSON key + goType string // Go type string + optional bool // nullable/optional field +} + +// goUnion represents a discriminated union — multiple concrete struct types +// at the same JSON position (e.g., an array with different shaped objects). +type goUnion struct { + name string // interface name, e.g., "Item" + concreteTypes []string // ordered concrete type names + sharedFields []goField // fields common to ALL concrete types + uniqueFields map[string][]string // typeName → json field names unique to it + typeFieldJSON string // "type"/"kind" if present in shared, else "" + index string // "[]", "[string]", etc. + fieldName string // json field name in parent struct +} + +func (u *goUnion) markerMethod() string { + return "is" + u.name +} + +func (u *goUnion) unmarshalFuncName() string { + return "unmarshal" + u.name +} + +func (u *goUnion) wrapperTypeName() string { + if u.index == "[]" { + return u.name + "Slice" + } + if strings.HasPrefix(u.index, "[") { + return u.name + "Map" + } + return u.name +} + +// generateGoStructs converts formatted flat paths into Go struct definitions +// with json tags. When multiple types share an array/map position, it generates +// a sealed interface, discriminator function, and wrapper type. +func GenerateGoStructs(paths []string) string { + types, unions := buildGoTypes(paths) + + var buf strings.Builder + + if len(unions) > 0 { + buf.WriteString("import (\n\t\"encoding/json\"\n\t\"fmt\"\n)\n\n") + } + + for i, t := range types { + if i > 0 { + buf.WriteByte('\n') + } + buf.WriteString(fmt.Sprintf("type %s struct {\n", t.name)) + maxNameLen := 0 + maxTypeLen := 0 + for _, f := range t.fields { + if len(f.goName) > maxNameLen { + maxNameLen = len(f.goName) + } + if len(f.goType) > maxTypeLen { + maxTypeLen = len(f.goType) + } + } + for _, f := range t.fields { + tag := fmt.Sprintf("`json:\"%s\"`", f.jsonName) + if f.optional { + tag = fmt.Sprintf("`json:\"%s,omitempty\"`", f.jsonName) + } + buf.WriteString(fmt.Sprintf("\t%-*s %-*s %s\n", + maxNameLen, f.goName, + maxTypeLen, f.goType, + tag)) + } + buf.WriteString("}\n") + } + + for _, u := range unions { + buf.WriteByte('\n') + writeUnionCode(&buf, u) + } + + return buf.String() +} + +// buildGoTypes parses the formatted paths and groups fields by type. +// It also detects union positions (bare prefixes with multiple named types) +// and returns goUnion descriptors for them. +func buildGoTypes(paths []string) ([]goType, []*goUnion) { + // First pass: collect type intros per bare prefix. + type prefixInfo struct { + types []string // type names at this position + name string // field name (e.g., "items") + index string // index part (e.g., "[]") + } + prefixes := make(map[string]*prefixInfo) + typeOrder := []string{} + typeSeen := make(map[string]bool) + typeFields := make(map[string][]goField) + + for _, path := range paths { + segs := parsePath(path) + if len(segs) == 0 { + continue + } + last := segs[len(segs)-1] + if last.typ == "" { + continue + } + typeName := cleanTypeName(last.typ) + if isPrimitiveType(typeName) { + continue + } + bare := buildBare(segs) + pi := prefixes[bare] + if pi == nil { + pi = &prefixInfo{name: last.name, index: last.index} + prefixes[bare] = pi + } + // Add type if not already present at this prefix + found := false + for _, t := range pi.types { + if t == typeName { + found = true + break + } + } + if !found { + pi.types = append(pi.types, typeName) + } + if !typeSeen[typeName] { + typeSeen[typeName] = true + typeOrder = append(typeOrder, typeName) + } + } + + // Build prefixToType for parent lookups (first type at each position). + prefixToType := make(map[string]string) + for bare, pi := range prefixes { + prefixToType[bare] = pi.types[0] + } + + // Identify union positions (>1 named type at the same bare prefix). + unionsByBare := make(map[string]*goUnion) + var unions []*goUnion + for bare, pi := range prefixes { + if len(pi.types) <= 1 { + continue + } + ifaceName := singularize(snakeToPascal(pi.name)) + if ifaceName == "" { + ifaceName = "RootItem" + } + // Avoid collision with concrete type names + for _, t := range pi.types { + if t == ifaceName { + ifaceName += "Variant" + break + } + } + u := &goUnion{ + name: ifaceName, + concreteTypes: pi.types, + index: pi.index, + fieldName: pi.name, + uniqueFields: make(map[string][]string), + } + unionsByBare[bare] = u + unions = append(unions, u) + } + + // Second pass: assign fields to their owning types. + for _, path := range paths { + segs := parsePath(path) + if len(segs) == 0 { + continue + } + last := segs[len(segs)-1] + if last.typ == "" || last.name == "" { + continue + } + typeName := cleanTypeName(last.typ) + + // Find the parent type. + parentType := "" + if len(segs) == 1 { + if pt, ok := prefixToType[""]; ok { + parentType = pt + } + } else { + for depth := len(segs) - 2; depth >= 0; depth-- { + // Prefer explicit type annotation on segment (handles multi-type). + if segs[depth].typ != "" && !isPrimitiveType(cleanTypeName(segs[depth].typ)) { + parentType = cleanTypeName(segs[depth].typ) + break + } + // Fall back to bare prefix lookup. + prefix := buildBare(segs[:depth+1]) + if pt, ok := prefixToType[prefix]; ok { + parentType = pt + break + } + } + } + if parentType == "" { + continue + } + + // Determine the Go type for this field. + lastBare := buildBare(segs) + var goTyp string + if u, isUnion := unionsByBare[lastBare]; isUnion && !isPrimitiveType(typeName) { + goTyp = u.wrapperTypeName() + } else { + goTyp = flatTypeToGo(typeName, last.index) + } + + optional := strings.HasSuffix(last.typ, "?") + if optional { + goTyp = makePointer(goTyp) + } + + field := goField{ + goName: snakeToPascal(last.name), + jsonName: last.name, + goType: goTyp, + optional: optional, + } + + // Deduplicate (union fields appear once per concrete type but the + // parent field should only be added once with the wrapper type). + existing := typeFields[parentType] + dup := false + for _, ef := range existing { + if ef.jsonName == field.jsonName { + dup = true + break + } + } + if !dup { + typeFields[parentType] = append(existing, field) + } + } + + // Compute shared and unique fields for each union. + for _, u := range unions { + fieldCounts := make(map[string]int) + fieldByJSON := make(map[string]goField) + + for _, typeName := range u.concreteTypes { + for _, f := range typeFields[typeName] { + fieldCounts[f.jsonName]++ + if _, exists := fieldByJSON[f.jsonName]; !exists { + fieldByJSON[f.jsonName] = f + } + } + } + + nTypes := len(u.concreteTypes) + for jsonName, count := range fieldCounts { + if count == nTypes { + u.sharedFields = append(u.sharedFields, fieldByJSON[jsonName]) + if jsonName == "type" || jsonName == "kind" || jsonName == "_type" { + u.typeFieldJSON = jsonName + } + } + } + sortGoFields(u.sharedFields) + + for _, typeName := range u.concreteTypes { + typeFieldSet := make(map[string]bool) + for _, f := range typeFields[typeName] { + typeFieldSet[f.jsonName] = true + } + var unique []string + for name := range typeFieldSet { + if fieldCounts[name] == 1 { + unique = append(unique, name) + } + } + sort.Strings(unique) + u.uniqueFields[typeName] = unique + } + } + + var types []goType + for _, name := range typeOrder { + fields := typeFields[name] + sortGoFields(fields) + types = append(types, goType{name: name, fields: fields}) + } + return types, unions +} + +// writeUnionCode generates the interface, discriminator, marker methods, +// getters, and wrapper type for a union. +func writeUnionCode(buf *strings.Builder, u *goUnion) { + marker := u.markerMethod() + + // Interface + buf.WriteString(fmt.Sprintf("// %s can be one of: %s.\n", + u.name, strings.Join(u.concreteTypes, ", "))) + if u.typeFieldJSON != "" { + buf.WriteString(fmt.Sprintf( + "// CHANGE ME: the shared %q field is likely a discriminator — see %s below.\n", + u.typeFieldJSON, u.unmarshalFuncName())) + } + buf.WriteString(fmt.Sprintf("type %s interface {\n", u.name)) + buf.WriteString(fmt.Sprintf("\t%s()\n", marker)) + for _, f := range u.sharedFields { + buf.WriteString(fmt.Sprintf("\tGet%s() %s\n", f.goName, f.goType)) + } + buf.WriteString("}\n\n") + + // Marker methods + for _, t := range u.concreteTypes { + buf.WriteString(fmt.Sprintf("func (*%s) %s() {}\n", t, marker)) + } + buf.WriteByte('\n') + + // Getter implementations + if len(u.sharedFields) > 0 { + for _, t := range u.concreteTypes { + for _, f := range u.sharedFields { + buf.WriteString(fmt.Sprintf("func (v *%s) Get%s() %s { return v.%s }\n", + t, f.goName, f.goType, f.goName)) + } + buf.WriteByte('\n') + } + } + + // Unmarshal function + writeUnmarshalFunc(buf, u) + + // Wrapper type + writeWrapperType(buf, u) +} + +func writeUnmarshalFunc(buf *strings.Builder, u *goUnion) { + buf.WriteString(fmt.Sprintf("// %s decodes a JSON value into the matching %s variant.\n", + u.unmarshalFuncName(), u.name)) + buf.WriteString(fmt.Sprintf("func %s(data json.RawMessage) (%s, error) {\n", + u.unmarshalFuncName(), u.name)) + + // CHANGE ME comment + if u.typeFieldJSON != "" { + goFieldName := snakeToPascal(u.typeFieldJSON) + buf.WriteString(fmt.Sprintf( + "\t// CHANGE ME: switch on the %q discriminator instead of probing unique keys:\n", + u.typeFieldJSON)) + buf.WriteString(fmt.Sprintf( + "\t// var probe struct{ %s string `json:\"%s\"` }\n", goFieldName, u.typeFieldJSON)) + buf.WriteString("\t// if err := json.Unmarshal(data, &probe); err == nil {\n") + buf.WriteString(fmt.Sprintf("\t// switch probe.%s {\n", goFieldName)) + for _, t := range u.concreteTypes { + buf.WriteString(fmt.Sprintf( + "\t// case \"???\":\n\t// var v %s\n\t// return &v, json.Unmarshal(data, &v)\n", t)) + } + buf.WriteString("\t// }\n\t// }\n\n") + } else { + buf.WriteString( + "\t// CHANGE ME: if the variants share a \"type\" or \"kind\" field,\n" + + "\t// switch on its value instead of probing for unique keys.\n\n") + } + + buf.WriteString("\tvar keys map[string]json.RawMessage\n") + buf.WriteString("\tif err := json.Unmarshal(data, &keys); err != nil {\n") + buf.WriteString("\t\treturn nil, err\n") + buf.WriteString("\t}\n") + + // Pick fallback type (the one with fewest unique fields). + fallbackType := u.concreteTypes[0] + fallbackCount := len(u.uniqueFields[fallbackType]) + for _, t := range u.concreteTypes[1:] { + if len(u.uniqueFields[t]) < fallbackCount { + fallbackType = t + fallbackCount = len(u.uniqueFields[t]) + } + } + + // Probe unique fields for each non-fallback type. + for _, t := range u.concreteTypes { + if t == fallbackType { + continue + } + unique := u.uniqueFields[t] + if len(unique) == 0 { + buf.WriteString(fmt.Sprintf( + "\t// CHANGE ME: %s has no unique fields — add a discriminator.\n", t)) + continue + } + buf.WriteString(fmt.Sprintf("\tif _, ok := keys[%q]; ok {\n", unique[0])) + buf.WriteString(fmt.Sprintf("\t\tvar v %s\n", t)) + buf.WriteString("\t\treturn &v, json.Unmarshal(data, &v)\n") + buf.WriteString("\t}\n") + } + + buf.WriteString(fmt.Sprintf("\tvar v %s\n", fallbackType)) + buf.WriteString("\treturn &v, json.Unmarshal(data, &v)\n") + buf.WriteString("}\n\n") +} + +func writeWrapperType(buf *strings.Builder, u *goUnion) { + wrapper := u.wrapperTypeName() + unmarshalFunc := u.unmarshalFuncName() + + if u.index == "[]" { + buf.WriteString(fmt.Sprintf("// %s handles JSON unmarshaling of %s union values.\n", + wrapper, u.name)) + buf.WriteString(fmt.Sprintf("type %s []%s\n\n", wrapper, u.name)) + buf.WriteString(fmt.Sprintf("func (s *%s) UnmarshalJSON(data []byte) error {\n", wrapper)) + buf.WriteString("\tvar raw []json.RawMessage\n") + buf.WriteString("\tif err := json.Unmarshal(data, &raw); err != nil {\n") + buf.WriteString("\t\treturn err\n") + buf.WriteString("\t}\n") + buf.WriteString(fmt.Sprintf("\t*s = make(%s, len(raw))\n", wrapper)) + buf.WriteString("\tfor i, msg := range raw {\n") + buf.WriteString(fmt.Sprintf("\t\tv, err := %s(msg)\n", unmarshalFunc)) + buf.WriteString("\t\tif err != nil {\n") + buf.WriteString(fmt.Sprintf("\t\t\treturn fmt.Errorf(\"%s[%%d]: %%w\", i, err)\n", u.fieldName)) + buf.WriteString("\t\t}\n") + buf.WriteString("\t\t(*s)[i] = v\n") + buf.WriteString("\t}\n") + buf.WriteString("\treturn nil\n") + buf.WriteString("}\n") + } else if strings.HasPrefix(u.index, "[") { + keyType := u.index[1 : len(u.index)-1] + buf.WriteString(fmt.Sprintf("// %s handles JSON unmarshaling of %s union values.\n", + wrapper, u.name)) + buf.WriteString(fmt.Sprintf("type %s map[%s]%s\n\n", wrapper, keyType, u.name)) + buf.WriteString(fmt.Sprintf("func (m *%s) UnmarshalJSON(data []byte) error {\n", wrapper)) + buf.WriteString(fmt.Sprintf("\tvar raw map[%s]json.RawMessage\n", keyType)) + buf.WriteString("\tif err := json.Unmarshal(data, &raw); err != nil {\n") + buf.WriteString("\t\treturn err\n") + buf.WriteString("\t}\n") + buf.WriteString(fmt.Sprintf("\t*m = make(%s, len(raw))\n", wrapper)) + buf.WriteString("\tfor k, msg := range raw {\n") + buf.WriteString(fmt.Sprintf("\t\tv, err := %s(msg)\n", unmarshalFunc)) + buf.WriteString("\t\tif err != nil {\n") + buf.WriteString(fmt.Sprintf("\t\t\treturn fmt.Errorf(\"%s[%%v]: %%w\", k, err)\n", u.fieldName)) + buf.WriteString("\t\t}\n") + buf.WriteString("\t\t(*m)[k] = v\n") + buf.WriteString("\t}\n") + buf.WriteString("\treturn nil\n") + buf.WriteString("}\n") + } +} + +// flatTypeToGo converts a flat path type annotation to a Go type string. +func flatTypeToGo(typ, index string) string { + base := primitiveToGo(typ) + + if index == "" { + return base + } + + // Parse index segments right-to-left to build the type inside-out + var indices []string + i := 0 + for i < len(index) { + if index[i] != '[' { + break + } + end := strings.IndexByte(index[i:], ']') + if end < 0 { + break + } + indices = append(indices, index[i:i+end+1]) + i = i + end + 1 + } + + result := base + for j := len(indices) - 1; j >= 0; j-- { + idx := indices[j] + switch idx { + case "[]": + result = "[]" + result + case "[int]": + result = "map[int]" + result + case "[string]": + result = "map[string]" + result + default: + key := idx[1 : len(idx)-1] + result = "map[" + key + "]" + result + } + } + return result +} + +func primitiveToGo(typ string) string { + switch typ { + case "string": + return "string" + case "int": + return "int64" + case "float": + return "float64" + case "bool": + return "bool" + case "null", "unknown": + return "any" + default: + return typ + } +} + +func isPrimitiveType(typ string) bool { + switch typ { + case "string", "int", "float", "bool", "null", "unknown", "any": + return true + } + return false +} + +func makePointer(typ string) string { + if strings.HasPrefix(typ, "[]") || strings.HasPrefix(typ, "map[") { + return typ + } + return "*" + typ +} + +func cleanTypeName(typ string) string { + return strings.TrimSuffix(typ, "?") +} + +func sortGoFields(fields []goField) { + priority := map[string]int{ + "id": 0, "name": 1, "type": 2, "slug": 3, "label": 4, + } + sort.SliceStable(fields, func(i, j int) bool { + pi, oki := priority[fields[i].jsonName] + pj, okj := priority[fields[j].jsonName] + if oki && okj { + return pi < pj + } + if oki { + return true + } + if okj { + return false + } + return fields[i].jsonName < fields[j].jsonName + }) +} diff --git a/tools/jsontypes/gostruct_test.go b/tools/jsontypes/gostruct_test.go new file mode 100644 index 0000000..fe106a8 --- /dev/null +++ b/tools/jsontypes/gostruct_test.go @@ -0,0 +1,1076 @@ +package jsontypes + +import ( + "bufio" + "encoding/json" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" +) + +func TestGenerateGoStructsSimple(t *testing.T) { + paths := []string{ + "{Root}", + ".rooms[]{Room}", + ".rooms[].id{int}", + ".rooms[].name{string}", + } + got := GenerateGoStructs(paths) + t.Logf("output:\n%s", got) + + if !strings.Contains(got, "Rooms []Room") { + t.Error("expected Root to have 'Rooms []Room' field") + } + if !strings.Contains(got, "type Root struct") { + t.Error("expected Root struct") + } + if !strings.Contains(got, "type Room struct") { + t.Error("expected Room struct") + } +} + +func TestGenerateGoStructsEmptyContainers(t *testing.T) { + paths := []string{ + "{Root}", + ".id{int}", + ".metadata{any}", + ".tags[]{any}", + } + got := GenerateGoStructs(paths) + t.Logf("output:\n%s", got) + + if !strings.Contains(got, "Metadata any") { + t.Error("expected 'Metadata any' for empty object") + } + if !strings.Contains(got, "Tags") || !strings.Contains(got, "[]any") { + t.Error("expected Tags field with []any type for empty array") + } +} + +func TestGenerateGoStructsOptional(t *testing.T) { + paths := []string{ + "{Root}", + ".items[]{Item}", + ".items[].id{int}", + ".items[].email{string?}", + ".items[].meta{Meta?}", + ".items[].meta.score{int}", + } + got := GenerateGoStructs(paths) + t.Logf("output:\n%s", got) + + if !strings.Contains(got, "Email *string") { + t.Error("expected '*string' for optional string") + } + if !strings.Contains(got, `"email,omitempty"`) { + t.Error("expected omitempty for optional field") + } + if !strings.Contains(got, "*Meta") || !strings.Contains(got, `"meta,omitempty"`) { + t.Error("expected '*Meta' with omitempty for optional struct") + } +} + +func TestGenerateGoStructsMap(t *testing.T) { + paths := []string{ + "{Root}", + ".data[string]{Item}", + ".data[string].name{string}", + } + got := GenerateGoStructs(paths) + t.Logf("output:\n%s", got) + + if !strings.Contains(got, "Data map[string]Item") { + t.Error("expected 'Data map[string]Item'") + } +} + +// TestGoStructRoundTrip verifies that generated Go structs can unmarshal the +// source JSON and re-marshal it without losing fields. It writes a temporary +// Go program, compiles it, and runs it. +func TestGoStructRoundTrip(t *testing.T) { + tests := []struct { + name string + json string + }{ + { + "flat_struct", + `{"id": 1, "name": "Alice", "active": true, "score": 3.14}`, + }, + { + "nested_struct", + `{"user": {"id": 1, "name": "Bob", "address": {"city": "NYC", "zip": "10001"}}}`, + }, + { + "array_of_structs", + `{"items": [{"id": 1, "name": "a"}, {"id": 2, "name": "b"}]}`, + }, + { + "optional_fields", + `{"items": [{"id": 1, "name": "a", "email": "x@y"}, {"id": 2, "name": "b", "email": null}]}`, + }, + { + "empty_containers", + `{"id": 1, "tags": [], "meta": {}}`, + }, + { + "nested_arrays", + `{"groups": [{"id": 1, "members": [{"id": 10, "role": "admin"}]}]}`, + }, + { + "all_primitives", + `{"s": "hello", "i": 42, "f": 1.5, "b": true, "n": null}`, + }, + { + "string_array", + `{"tags": ["go", "rust", "zig"]}`, + }, + { + "root_array", + `[{"id": 1, "name": "x"}, {"id": 2, "name": "y"}]`, + }, + { + "deeply_nested", + `{"a": {"b": {"c": {"d": "leaf"}}}}`, + }, + { + "mixed_optional_struct", + `{"items": [{"id": 1, "detail": {"score": 5}}, {"id": 2, "detail": null}]}`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + goCode := GenerateGoStructsFromJSON(t, tt.json) + t.Logf("generated structs:\n%s", goCode) + + // Determine root type — first type declared + rootType := extractRootType(goCode) + if rootType == "" { + t.Fatal("no root type found in generated code") + } + + isArray := strings.HasPrefix(strings.TrimSpace(tt.json), "[") + roundTripGoCode := buildRoundTripProgram(goCode, rootType, tt.json, isArray) + + runGoProgram(t, tt.name, roundTripGoCode) + }) + } +} + +// TestGoStructRoundTripProdJSON tests against the production JSON file if available. +func TestGoStructRoundTripProdJSON(t *testing.T) { + const path = "/tmp/rooms-prod-slow-correct.pretty.json" + data, err := os.ReadFile(path) + if err != nil { + t.Skipf("skipping: %v", err) + } + + goCode := GenerateGoStructsFromJSON(t, string(data)) + rootType := extractRootType(goCode) + if rootType == "" { + t.Fatal("no root type found in generated code") + } + + roundTripGoCode := buildRoundTripProgram(goCode, rootType, string(data), false) + runGoProgram(t, "prod_json", roundTripGoCode) +} + +// GenerateGoStructsFromJSON runs the full pipeline: parse → analyze → format → generate. +func GenerateGoStructsFromJSON(t *testing.T, jsonStr string) string { + t.Helper() + + var data any + dec := json.NewDecoder(strings.NewReader(jsonStr)) + dec.UseNumber() + if err := dec.Decode(&data); err != nil { + t.Fatalf("parse JSON: %v", err) + } + + a := testAnalyzer(t) + rawPaths := a.Analyze(".", data) + formatted := FormatPaths(rawPaths) + return GenerateGoStructs(formatted) +} + +func extractRootType(goCode string) string { + for _, line := range strings.Split(goCode, "\n") { + if strings.HasPrefix(line, "type ") && strings.HasSuffix(line, "struct {") { + parts := strings.Fields(line) + if len(parts) >= 2 { + return parts[1] + } + } + } + return "" +} + +// buildRoundTripProgram creates a Go main program that: +// 1. Unmarshals the JSON into the generated root type +// 2. Re-marshals it back to JSON +// 3. Unmarshals both original and re-marshaled into map[string]any +// 4. Compares that all original keys are present in the round-tripped version +func buildRoundTripProgram(structs, rootType, jsonData string, isArray bool) string { + // Escape backticks in JSON by splitting into raw string segments + jsonLiteral := escapeForGoRawString(jsonData) + + unmarshalTarget := fmt.Sprintf("new(%s)", rootType) + if isArray { + unmarshalTarget = fmt.Sprintf("new([]%s)", rootType) + } + + return fmt.Sprintf(`package main + +import ( + "encoding/json" + "fmt" + "os" + "reflect" +) + +%s + +func main() { + input := %s + + // Unmarshal into generated struct + target := %s + if err := json.Unmarshal([]byte(input), target); err != nil { + fmt.Fprintf(os.Stderr, "unmarshal into struct failed: %%v\n", err) + os.Exit(1) + } + + // Re-marshal back to JSON + out, err := json.Marshal(target) + if err != nil { + fmt.Fprintf(os.Stderr, "re-marshal failed: %%v\n", err) + os.Exit(1) + } + + // Compare: unmarshal both into generic types and check key coverage + var original, roundTripped any + if err := json.Unmarshal([]byte(input), &original); err != nil { + fmt.Fprintf(os.Stderr, "unmarshal original: %%v\n", err) + os.Exit(1) + } + if err := json.Unmarshal(out, &roundTripped); err != nil { + fmt.Fprintf(os.Stderr, "unmarshal round-tripped: %%v\n", err) + os.Exit(1) + } + + missing := checkKeys("", original, roundTripped) + if len(missing) > 0 { + for _, m := range missing { + fmt.Fprintf(os.Stderr, "MISSING: %%s\n", m) + } + os.Exit(1) + } + fmt.Println("OK") +} + +// checkKeys recursively compares two generic JSON values and returns paths +// where keys from 'a' are missing in 'b'. It ignores value differences +// (types may differ due to int64 vs float64, etc.) — it only checks structure. +func checkKeys(path string, a, b any) []string { + var missing []string + + switch av := a.(type) { + case map[string]any: + bv, ok := b.(map[string]any) + if !ok { + return []string{path + " (expected object, got " + reflect.TypeOf(b).String() + ")"} + } + for k, aVal := range av { + childPath := path + "." + k + bVal, exists := bv[k] + if !exists { + // omitempty can drop null/zero fields — only flag if original was non-null + if aVal != nil { + missing = append(missing, childPath) + } + continue + } + missing = append(missing, checkKeys(childPath, aVal, bVal)...) + } + case []any: + bv, ok := b.([]any) + if !ok { + return []string{path + " (expected array, got " + reflect.TypeOf(b).String() + ")"} + } + // Check up to min length + n := len(av) + if len(bv) < n { + n = len(bv) + missing = append(missing, fmt.Sprintf("%%s (array length %%d vs %%d)", path, len(av), len(bv))) + } + for i := 0; i < n; i++ { + missing = append(missing, checkKeys(fmt.Sprintf("%%s[%%d]", path, i), av[i], bv[i])...) + } + } + return missing +} +`, structs, jsonLiteral, unmarshalTarget) +} + +// escapeForGoRawString handles JSON that might contain backticks by using +// string concatenation with interpreted string literals for those parts. +func escapeForGoRawString(s string) string { + if !strings.Contains(s, "`") { + return "`" + s + "`" + } + // Fall back to interpreted string literal with escaping + b, _ := json.Marshal(s) + return string(b) +} + +func runGoProgram(t *testing.T, name, code string) { + t.Helper() + + dir := t.TempDir() + mainFile := filepath.Join(dir, "main.go") + if err := os.WriteFile(mainFile, []byte(code), 0o644); err != nil { + t.Fatalf("write temp file: %v", err) + } + + cmd := exec.Command("go", "run", mainFile) + output, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("round-trip %s failed:\n%s\n\ngenerated code:\n%s", name, output, code) + } + if !strings.Contains(string(output), "OK") { + t.Fatalf("unexpected output: %s", output) + } +} + +func TestGenerateGoStructsUnion(t *testing.T) { + // Formatted paths as produced when user chooses "different" types + paths := []string{ + "{Root}", + ".items[]{FileField}", + ".items[]{FileField}.slug{string}", + ".items[]{FileField}.filename{string}", + ".items[]{FileField}.is_required{bool}", + ".items[]{FeatureField}", + ".items[]{FeatureField}.slug{string}", + ".items[]{FeatureField}.feature{string}", + ".items[]{FeatureField}.archived{bool}", + } + got := GenerateGoStructs(paths) + t.Logf("output:\n%s", got) + + // Should have the interface + if !strings.Contains(got, "type Item interface") { + t.Error("expected Item interface") + } + // Marker method + if !strings.Contains(got, "isItem()") { + t.Error("expected isItem marker method") + } + // Shared field getter (slug is common) + if !strings.Contains(got, "GetSlug() string") { + t.Error("expected GetSlug getter in interface") + } + // Concrete marker implementations + if !strings.Contains(got, "func (*FileField) isItem()") { + t.Error("expected FileField marker implementation") + } + if !strings.Contains(got, "func (*FeatureField) isItem()") { + t.Error("expected FeatureField marker implementation") + } + // Unmarshal function + if !strings.Contains(got, "func unmarshalItem(") { + t.Error("expected unmarshalItem function") + } + // Unique field probes (one type is probed, the other is fallback) + hasProbe := strings.Contains(got, `keys["filename"]`) || + strings.Contains(got, `keys["is_required"]`) || + strings.Contains(got, `keys["feature"]`) || + strings.Contains(got, `keys["archived"]`) + if !hasProbe { + t.Error("expected at least one unique field probe") + } + // Wrapper type + if !strings.Contains(got, "type ItemSlice []Item") { + t.Error("expected ItemSlice wrapper type") + } + if !strings.Contains(got, "func (s *ItemSlice) UnmarshalJSON") { + t.Error("expected UnmarshalJSON on ItemSlice") + } + // Parent field uses wrapper type + if !strings.Contains(got, "ItemSlice") { + t.Error("expected Root.Items to use ItemSlice type") + } + // Import block + if !strings.Contains(got, `"encoding/json"`) { + t.Error("expected encoding/json import") + } +} + +func TestGenerateGoStructsUnionWithTypeField(t *testing.T) { + // Union where concrete types share a "type" field + paths := []string{ + "{Root}", + ".events[]{ClickEvent}", + ".events[]{ClickEvent}.type{string}", + ".events[]{ClickEvent}.x{int}", + ".events[]{ClickEvent}.y{int}", + ".events[]{PageView}", + ".events[]{PageView}.type{string}", + ".events[]{PageView}.url{string}", + } + got := GenerateGoStructs(paths) + t.Logf("output:\n%s", got) + + // Should suggest using "type" as discriminator + if !strings.Contains(got, "CHANGE ME") { + t.Error("expected CHANGE ME comment") + } + if !strings.Contains(got, `"type"`) && !strings.Contains(got, `type`) { + t.Error("expected reference to 'type' discriminator field") + } +} + +func TestGoStructUnionRoundTrip(t *testing.T) { + // Pre-formatted paths representing a union + paths := []string{ + "{Root}", + ".count{int}", + ".items[]{FileField}", + ".items[]{FileField}.slug{string}", + ".items[]{FileField}.filename{string}", + ".items[]{FileField}.is_required{bool}", + ".items[]{FeatureField}", + ".items[]{FeatureField}.slug{string}", + ".items[]{FeatureField}.feature{string}", + ".items[]{FeatureField}.archived{bool}", + } + goCode := GenerateGoStructs(paths) + t.Logf("generated:\n%s", goCode) + + jsonData := `{ + "count": 4, + "items": [ + {"slug": "a", "filename": "x.pdf", "is_required": true}, + {"slug": "b", "filename": "y.pdf", "is_required": false}, + {"slug": "c", "feature": "upload", "archived": false}, + {"slug": "d", "feature": "export", "archived": true} + ] + }` + + program := buildUnionRoundTripProgram(goCode, jsonData) + runGoProgram(t, "union_round_trip", program) +} + +// buildUnionRoundTripProgram creates a Go program that unmarshals JSON through +// the generated union types, checks concrete type dispatch, and re-marshals. +func buildUnionRoundTripProgram(structs, jsonData string) string { + jsonLiteral := escapeForGoRawString(jsonData) + + // The generated structs already include import block with encoding/json and fmt. + // Only add os. + return fmt.Sprintf(`package main + +import "os" + +%s + +func main() { + input := %s + + var root Root + if err := json.Unmarshal([]byte(input), &root); err != nil { + fmt.Fprintf(os.Stderr, "unmarshal failed: %%v\n", err) + os.Exit(1) + } + + if len(root.Items) != 4 { + fmt.Fprintf(os.Stderr, "expected 4 items, got %%d\n", len(root.Items)) + os.Exit(1) + } + + // Check that concrete types were dispatched correctly + for i, item := range root.Items { + switch v := item.(type) { + case *FileField: + if i >= 2 { + fmt.Fprintf(os.Stderr, "item[%%d]: expected FeatureField, got FileField\n", i) + os.Exit(1) + } + if v.Filename == "" { + fmt.Fprintf(os.Stderr, "item[%%d]: FileField.Filename is empty\n", i) + os.Exit(1) + } + case *FeatureField: + if i < 2 { + fmt.Fprintf(os.Stderr, "item[%%d]: expected FileField, got FeatureField\n", i) + os.Exit(1) + } + if v.Feature == "" { + fmt.Fprintf(os.Stderr, "item[%%d]: FeatureField.Feature is empty\n", i) + os.Exit(1) + } + default: + fmt.Fprintf(os.Stderr, "item[%%d]: unexpected type %%T\n", i, item) + os.Exit(1) + } + + // Test shared field getter + if item.GetSlug() == "" { + fmt.Fprintf(os.Stderr, "item[%%d]: GetSlug() returned empty\n", i) + os.Exit(1) + } + } + + // Re-marshal and verify + out, err := json.Marshal(root) + if err != nil { + fmt.Fprintf(os.Stderr, "re-marshal failed: %%v\n", err) + os.Exit(1) + } + + // Verify round-trip preserves structure + var check Root + if err := json.Unmarshal(out, &check); err != nil { + fmt.Fprintf(os.Stderr, "re-unmarshal failed: %%v\n", err) + os.Exit(1) + } + if len(check.Items) != 4 { + fmt.Fprintf(os.Stderr, "round-trip: expected 4 items, got %%d\n", len(check.Items)) + os.Exit(1) + } + + fmt.Println("OK") +} +`, structs, jsonLiteral) +} + +// --- Union: 3+ types --- + +func TestGenerateGoStructsUnionThreeTypes(t *testing.T) { + paths := []string{ + "{Root}", + ".events[]{ClickEvent}", + ".events[]{ClickEvent}.action{string}", + ".events[]{ClickEvent}.x{int}", + ".events[]{PageView}", + ".events[]{PageView}.action{string}", + ".events[]{PageView}.url{string}", + ".events[]{ErrorEvent}", + ".events[]{ErrorEvent}.action{string}", + ".events[]{ErrorEvent}.code{int}", + ".events[]{ErrorEvent}.message{string}", + } + got := GenerateGoStructs(paths) + t.Logf("output:\n%s", got) + + if !strings.Contains(got, "type Event interface") { + t.Error("expected Event interface") + } + for _, typ := range []string{"ClickEvent", "PageView", "ErrorEvent"} { + if !strings.Contains(got, "func (*"+typ+") isEvent()") { + t.Errorf("expected %s marker implementation", typ) + } + } + // Shared field getter + if !strings.Contains(got, "GetAction() string") { + t.Error("expected GetAction getter for shared 'action' field") + } + // Should have probes for at least 2 of the 3 types (one is fallback) + probes := 0 + for _, key := range []string{`keys["x"]`, `keys["url"]`, `keys["code"]`, `keys["message"]`} { + if strings.Contains(got, key) { + probes++ + } + } + if probes < 2 { + t.Errorf("expected at least 2 unique field probes, found %d", probes) + } +} + +// --- Union: name collision with concrete type --- + +func TestGenerateGoStructsUnionNameCollision(t *testing.T) { + // Field "items" singularizes to "Item", which collides with concrete type "Item". + paths := []string{ + "{Root}", + ".items[]{Item}", + ".items[]{Item}.id{int}", + ".items[]{Item}.path{string}", + ".items[]{Other}", + ".items[]{Other}.id{int}", + ".items[]{Other}.score{float}", + } + got := GenerateGoStructs(paths) + t.Logf("output:\n%s", got) + + // Interface should NOT be named "Item" since that's a concrete type. + // Should use "ItemVariant" or similar. + if strings.Contains(got, "type Item interface") { + t.Error("interface should not be named 'Item' — collides with concrete type") + } + if !strings.Contains(got, "Variant") { + t.Error("expected 'Variant' suffix to avoid name collision") + } +} + +// --- Union: map-based --- + +func TestGenerateGoStructsUnionMap(t *testing.T) { + paths := []string{ + "{Root}", + ".data[string]{TypeA}", + ".data[string]{TypeA}.name{string}", + ".data[string]{TypeA}.path{string}", + ".data[string]{TypeB}", + ".data[string]{TypeB}.name{string}", + ".data[string]{TypeB}.score{float}", + } + got := GenerateGoStructs(paths) + t.Logf("output:\n%s", got) + + if !strings.Contains(got, "Map") { + t.Error("expected map wrapper type") + } + if !strings.Contains(got, "map[string]json.RawMessage") { + t.Error("expected map[string]json.RawMessage in UnmarshalJSON") + } +} + +// --- Union: end-to-end through analyzer --- + +func TestGoStructUnionEndToEnd(t *testing.T) { + arr := []any{ + map[string]any{"slug": "a", "filename": "x.pdf", "is_required": true}, + map[string]any{"slug": "b", "filename": "y.pdf", "is_required": false}, + map[string]any{"slug": "c", "feature": "upload", "archived": false}, + map[string]any{"slug": "d", "feature": "export", "archived": true}, + } + obj := map[string]any{"items": arr, "count": jsonNum("4"), "status": "ok"} + + a := &Analyzer{ + Prompter: &Prompter{ + reader: bufio.NewReader(strings.NewReader("")), + output: io.Discard, + priorAnswers: []string{"d", "FileField", "FeatureField"}, + }, + knownTypes: make(map[string]*structType), + typesByName: make(map[string]*structType), + } + rawPaths := a.Analyze(".", obj) + formatted := FormatPaths(rawPaths) + goCode := GenerateGoStructs(formatted) + t.Logf("formatted paths:\n %s", strings.Join(formatted, "\n ")) + t.Logf("generated Go:\n%s", goCode) + + if !strings.Contains(goCode, "interface") { + t.Error("expected union interface from end-to-end") + } + if !strings.Contains(goCode, "Slice") { + t.Error("expected wrapper slice type") + } + // The analyzer assigns type names to shapes in encounter order. + // Verify both user-provided names appear (may be swapped vs our expectation). + if !strings.Contains(goCode, "FileField") || !strings.Contains(goCode, "FeatureField") { + t.Errorf("expected both FileField and FeatureField in output") + } + if !strings.Contains(goCode, "unmarshal") { + t.Error("expected unmarshal function") + } +} + +// --- Union: round-trip with type discriminator wired up --- + +func TestGoStructUnionRoundTripWithDiscriminator(t *testing.T) { + // Manually craft Go code that uses a "type" discriminator switch + // to prove the pattern works when the user uncomments the CHANGE ME code. + goCode := ` +import ( + "encoding/json" + "fmt" +) + +type Root struct { + Events EventSlice ` + "`json:\"events\"`" + ` +} + +type ClickEvent struct { + Type string ` + "`json:\"type\"`" + ` + X int64 ` + "`json:\"x\"`" + ` + Y int64 ` + "`json:\"y\"`" + ` +} + +type PageView struct { + Type string ` + "`json:\"type\"`" + ` + Url string ` + "`json:\"url\"`" + ` +} + +type Event interface { + isEvent() + GetType() string +} + +func (*ClickEvent) isEvent() {} +func (*PageView) isEvent() {} + +func (v *ClickEvent) GetType() string { return v.Type } +func (v *PageView) GetType() string { return v.Type } + +func unmarshalEvent(data json.RawMessage) (Event, error) { + var probe struct{ Type string ` + "`json:\"type\"`" + ` } + if err := json.Unmarshal(data, &probe); err != nil { + return nil, err + } + switch probe.Type { + case "click": + var v ClickEvent + return &v, json.Unmarshal(data, &v) + case "pageview": + var v PageView + return &v, json.Unmarshal(data, &v) + default: + return nil, fmt.Errorf("unknown event type: %s", probe.Type) + } +} + +type EventSlice []Event + +func (s *EventSlice) UnmarshalJSON(data []byte) error { + var raw []json.RawMessage + if err := json.Unmarshal(data, &raw); err != nil { + return err + } + *s = make(EventSlice, len(raw)) + for i, msg := range raw { + v, err := unmarshalEvent(msg) + if err != nil { + return fmt.Errorf("events[%d]: %w", i, err) + } + (*s)[i] = v + } + return nil +} +` + jsonData := `{ + "events": [ + {"type": "click", "x": 10, "y": 20}, + {"type": "pageview", "url": "https://example.com"}, + {"type": "click", "x": 30, "y": 40} + ] + }` + jsonLiteral := escapeForGoRawString(jsonData) + + program := fmt.Sprintf(`package main + +import "os" + +%s + +func main() { + input := %s + + var root Root + if err := json.Unmarshal([]byte(input), &root); err != nil { + fmt.Fprintf(os.Stderr, "unmarshal failed: %%v\n", err) + os.Exit(1) + } + if len(root.Events) != 3 { + fmt.Fprintf(os.Stderr, "expected 3 events, got %%d\n", len(root.Events)) + os.Exit(1) + } + // Verify dispatch + if _, ok := root.Events[0].(*ClickEvent); !ok { + fmt.Fprintf(os.Stderr, "events[0]: expected *ClickEvent, got %%T\n", root.Events[0]) + os.Exit(1) + } + if _, ok := root.Events[1].(*PageView); !ok { + fmt.Fprintf(os.Stderr, "events[1]: expected *PageView, got %%T\n", root.Events[1]) + os.Exit(1) + } + // Verify getter + if root.Events[0].GetType() != "click" { + fmt.Fprintf(os.Stderr, "events[0].GetType() = %%q, want %%q\n", root.Events[0].GetType(), "click") + os.Exit(1) + } + // Re-marshal round trip + out, err := json.Marshal(root) + if err != nil { + fmt.Fprintf(os.Stderr, "marshal: %%v\n", err) + os.Exit(1) + } + var check Root + if err := json.Unmarshal(out, &check); err != nil { + fmt.Fprintf(os.Stderr, "re-unmarshal: %%v\n", err) + os.Exit(1) + } + if len(check.Events) != 3 { + fmt.Fprintf(os.Stderr, "round-trip: expected 3 events, got %%d\n", len(check.Events)) + os.Exit(1) + } + fmt.Println("OK") +} +`, goCode, jsonLiteral) + + runGoProgram(t, "discriminator_round_trip", program) +} + +// --- Mixed int/float same field --- + +func TestGoStructRoundTripMixedIntFloat(t *testing.T) { + // When a field is int in one element and float in another, the analyzer + // should pick a type that handles both. + jsonStr := `{"items": [{"id": 1, "score": 10}, {"id": 2, "score": 1.5}]}` + goCode := GenerateGoStructsFromJSON(t, jsonStr) + t.Logf("generated:\n%s", goCode) + + // The field should be float64 (not int64) since mixed int/float → float + if !strings.Contains(goCode, "float64") { + t.Error("expected float64 for mixed int/float field") + } + + rootType := extractRootType(goCode) + program := buildRoundTripProgram(goCode, rootType, jsonStr, false) + runGoProgram(t, "mixed_int_float", program) +} + +// --- Round-trip with maps --- + +func TestGoStructRoundTripMap(t *testing.T) { + jsonStr := `{"data": {"abc123": {"name": "foo", "active": true}, "def456": {"name": "bar", "active": false}, "ghi789": {"name": "baz", "active": true}, "jkl012": {"name": "qux", "active": false}}}` + goCode := GenerateGoStructsFromJSON(t, jsonStr) + t.Logf("generated:\n%s", goCode) + + if !strings.Contains(goCode, "map[string]") { + t.Error("expected map[string] type for data field") + } + + rootType := extractRootType(goCode) + program := buildRoundTripProgram(goCode, rootType, jsonStr, false) + runGoProgram(t, "map_struct", program) +} + +// --- Nullable struct inside union --- + +func TestGoStructUnionWithNullableNestedStruct(t *testing.T) { + paths := []string{ + "{Root}", + ".items[]{TypeA}", + ".items[]{TypeA}.id{int}", + ".items[]{TypeA}.detail{Detail?}", + ".items[]{TypeA}.detail.score{int}", + ".items[]{TypeA}.path{string}", + ".items[]{TypeB}", + ".items[]{TypeB}.id{int}", + ".items[]{TypeB}.label{string}", + } + got := GenerateGoStructs(paths) + t.Logf("output:\n%s", got) + + if !strings.Contains(got, "*Detail") { + t.Error("expected *Detail for nullable nested struct in union variant") + } + if !strings.Contains(got, "type Item interface") { + t.Error("expected Item interface") + } + if !strings.Contains(got, "type Detail struct") { + t.Error("expected Detail struct definition") + } +} + +// --- Single shape should NOT produce union --- + +func TestGoStructSingleShapeNoUnion(t *testing.T) { + jsonStr := `{"items": [{"slug": "a", "path": "x"}, {"slug": "b", "path": "y"}]}` + goCode := GenerateGoStructsFromJSON(t, jsonStr) + t.Logf("generated:\n%s", goCode) + + if strings.Contains(goCode, "interface") { + t.Error("single shape should not produce a union interface") + } + if strings.Contains(goCode, "Slice") { + t.Error("single shape should not produce a wrapper slice type") + } + if strings.Contains(goCode, "encoding/json") { + t.Error("single shape should not need encoding/json import") + } +} + +// --- Re-marshal value fidelity (not just key presence) --- + +func TestGoStructRoundTripValueFidelity(t *testing.T) { + jsonStr := `{"name": "test", "count": 42, "ratio": 3.14, "active": true, "tags": ["a", "b"], "nested": {"x": 1}}` + goCode := GenerateGoStructsFromJSON(t, jsonStr) + + rootType := extractRootType(goCode) + jsonLiteral := escapeForGoRawString(jsonStr) + + // This program checks EXACT value equality, not just key presence. + program := fmt.Sprintf(`package main + +import ( + "encoding/json" + "fmt" + "os" + "math" +) + +%s + +func main() { + input := %s + + target := new(%s) + if err := json.Unmarshal([]byte(input), target); err != nil { + fmt.Fprintf(os.Stderr, "unmarshal: %%v\n", err) + os.Exit(1) + } + + out, err := json.Marshal(target) + if err != nil { + fmt.Fprintf(os.Stderr, "marshal: %%v\n", err) + os.Exit(1) + } + + // Unmarshal both into generic maps and compare values + var orig, rt map[string]any + json.Unmarshal([]byte(input), &orig) + json.Unmarshal(out, &rt) + + errs := compareValues("", orig, rt) + if len(errs) > 0 { + for _, e := range errs { + fmt.Fprintln(os.Stderr, e) + } + os.Exit(1) + } + fmt.Println("OK") +} + +func compareValues(path string, a, b any) []string { + var errs []string + switch av := a.(type) { + case map[string]any: + bv, ok := b.(map[string]any) + if !ok { + return []string{fmt.Sprintf("%%s: type mismatch: %%T vs %%T", path, a, b)} + } + for k := range av { + errs = append(errs, compareValues(path+"."+k, av[k], bv[k])...) + } + case []any: + bv, ok := b.([]any) + if !ok { + return []string{fmt.Sprintf("%%s: type mismatch: %%T vs %%T", path, a, b)} + } + if len(av) != len(bv) { + return []string{fmt.Sprintf("%%s: length %%d vs %%d", path, len(av), len(bv))} + } + for i := range av { + errs = append(errs, compareValues(fmt.Sprintf("%%s[%%d]", path, i), av[i], bv[i])...) + } + case float64: + bv, ok := b.(float64) + if !ok { + return []string{fmt.Sprintf("%%s: type mismatch: %%T vs %%T", path, a, b)} + } + if math.Abs(av-bv) > 1e-9 { + return []string{fmt.Sprintf("%%s: value %%v vs %%v", path, av, bv)} + } + case string: + bv, ok := b.(string) + if !ok || av != bv { + return []string{fmt.Sprintf("%%s: value %%v vs %%v", path, a, b)} + } + case bool: + bv, ok := b.(bool) + if !ok || av != bv { + return []string{fmt.Sprintf("%%s: value %%v vs %%v", path, a, b)} + } + case nil: + if b != nil { + return []string{fmt.Sprintf("%%s: value nil vs %%v", path, b)} + } + } + return errs +} +`, goCode, jsonLiteral, rootType) + + runGoProgram(t, "value_fidelity", program) +} + +// --- Full pipeline anonymous mode with diverse JSON --- + +func TestGoStructRoundTripDiverseAnonymous(t *testing.T) { + jsonStr := `{ + "id": 1, + "name": "test", + "settings": {"theme": "dark", "lang": "en", "notify": true}, + "tags": ["go", "rust"], + "scores": [10, 20, 30], + "metadata": {}, + "empty_list": [], + "users": [ + { + "id": 100, + "name": "Alice", + "email": "a@b.com", + "address": {"city": "NYC", "zip": "10001"}, + "roles": [{"id": 1, "perm": "admin"}] + }, + { + "id": 200, + "name": "Bob", + "email": null, + "address": {"city": "LA", "zip": "90001"}, + "roles": [] + } + ], + "active": true, + "ratio": 0.75 + }` + + goCode := GenerateGoStructsFromJSON(t, jsonStr) + t.Logf("generated:\n%s", goCode) + + rootType := extractRootType(goCode) + program := buildRoundTripProgram(goCode, rootType, jsonStr, false) + runGoProgram(t, "diverse_anonymous", program) +} + +func TestGenerateGoStructsParsePath(t *testing.T) { + // Verify parsePath handles the formatted output format. + // Note: formatted paths like ".rooms[]{Room}" parse differently from + // raw paths like ".{Root}.rooms[]{Room}" — no empty root segment. + tests := []struct { + path string + want []segment + }{ + { + "{Root}", + []segment{{name: "", typ: "Root"}}, + }, + { + ".rooms[]{Room}", + []segment{{name: "rooms", index: "[]", typ: "Room"}}, + }, + { + ".rooms[].id{int}", + []segment{{name: "rooms", index: "[]"}, {name: "id", typ: "int"}}, + }, + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + got := parsePath(tt.path) + if len(got) != len(tt.want) { + t.Fatalf("got %d segments, want %d: %+v", len(got), len(tt.want), got) + } + for i := range got { + if got[i] != tt.want[i] { + t.Errorf("segment[%d]: got %+v, want %+v", i, got[i], tt.want[i]) + } + } + }) + } +} diff --git a/tools/jsontypes/heuristics.go b/tools/jsontypes/heuristics.go new file mode 100644 index 0000000..89523eb --- /dev/null +++ b/tools/jsontypes/heuristics.go @@ -0,0 +1,370 @@ +package jsontypes + +import ( + "encoding/base64" + "fmt" + "strconv" + "strings" + "unicode" +) + +// looksLikeMap uses heuristics to guess whether an object is a map (keyed +// collection) rather than a struct. Returns true/false and a confidence hint. +// If confidence is low, the caller should prompt the user. +func looksLikeMap(obj map[string]any) (isMap bool, confident bool) { + keys := sortedKeys(obj) + n := len(keys) + if n < 3 { + // Too few keys to be confident about anything + return false, false + } + + // All keys are integers? + allInts := true + for _, k := range keys { + if _, err := strconv.ParseInt(k, 10, 64); err != nil { + allInts = false + break + } + } + if allInts { + return true, true + } + + // All keys same length and contain mixed letters+digits → likely IDs + if allSameLength(keys) && allAlphanumericWithDigits(keys) { + return true, true + } + + // All keys same length and look like base64/hex IDs + if allSameLength(keys) && allLookLikeIDs(keys) { + return true, true + } + + // Keys look like typical struct field names (camelCase, snake_case, short words) + // This must be checked before value-shape heuristics: a struct with many + // fields whose values happen to share a shape is still a struct. + if allLookLikeFieldNames(keys) { + return false, true + } + + // Large number of keys where most values have the same shape — likely a map + if n > 20 && valuesHaveSimilarShape(obj) { + return true, true + } + + return false, false +} + +func allSameLength(keys []string) bool { + if len(keys) == 0 { + return true + } + l := len(keys[0]) + for _, k := range keys[1:] { + if len(k) != l { + return false + } + } + return true +} + +// allLookLikeIDs checks if keys look like identifiers/tokens rather than field +// names: no spaces, alphanumeric/base64/hex, and not common English field names. +func allLookLikeIDs(keys []string) bool { + for _, k := range keys { + if strings.ContainsAny(k, " \t\n") { + return false + } + // Hex or base64 strings of any length ≥ 4 + if len(k) >= 4 && (isHex(k) || isAlphanumeric(k) || isBase64(k)) { + continue + } + return false + } + // Additional check: IDs typically don't look like field names. + // If ALL of them look like field names (e.g., camelCase), not IDs. + if allLookLikeFieldNames(keys) { + return false + } + return true +} + +func isAlphanumeric(s string) bool { + for _, r := range s { + if !unicode.IsLetter(r) && !unicode.IsDigit(r) { + return false + } + } + return true +} + +// allAlphanumericWithDigits checks if all keys are alphanumeric and each +// contains at least one digit (distinguishing IDs like "abc123" from field +// names like "name"). +func allAlphanumericWithDigits(keys []string) bool { + for _, k := range keys { + hasDigit := false + for _, r := range k { + if unicode.IsDigit(r) { + hasDigit = true + } else if !unicode.IsLetter(r) { + return false + } + } + if !hasDigit { + return false + } + } + return true +} + +func isBase64(s string) bool { + // Try standard and URL-safe base64 + if _, err := base64.StdEncoding.DecodeString(s); err == nil { + return true + } + if _, err := base64.URLEncoding.DecodeString(s); err == nil { + return true + } + if _, err := base64.RawURLEncoding.DecodeString(s); err == nil { + return true + } + return false +} + +func isHex(s string) bool { + for _, r := range s { + if !((r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')) { + return false + } + } + return true +} + +// allLookLikeFieldNames checks if keys look like typical struct field names: +// camelCase, snake_case, PascalCase, or short lowercase words. +func allLookLikeFieldNames(keys []string) bool { + fieldLike := 0 + for _, k := range keys { + if looksLikeFieldName(k) { + fieldLike++ + } + } + // If >80% look like field names, probably a struct + return fieldLike > len(keys)*4/5 +} + +func looksLikeFieldName(k string) bool { + if len(k) == 0 || len(k) > 40 { + return false + } + // Must start with a letter + runes := []rune(k) + if !unicode.IsLetter(runes[0]) { + return false + } + // Only letters, digits, underscores + for _, r := range runes { + if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' { + return false + } + } + return true +} + +// valuesHaveSimilarShape checks if most values in the object are objects with +// similar key sets. +func valuesHaveSimilarShape(obj map[string]any) bool { + shapes := make(map[string]int) + total := 0 + for _, v := range obj { + if m, ok := v.(map[string]any); ok { + shapes[shapeSignature(m)]++ + total++ + } + } + if total == 0 { + return false + } + // Find most common shape + maxCount := 0 + for _, count := range shapes { + if count > maxCount { + maxCount = count + } + } + return maxCount > total/2 +} + +// inferKeyName tries to infer a meaningful key name from the map's keys. +func inferKeyName(obj map[string]any) string { + keys := sortedKeys(obj) + if len(keys) == 0 { + return "string" + } + + // All numeric? + allNum := true + for _, k := range keys { + if _, err := strconv.ParseInt(k, 10, 64); err != nil { + allNum = false + break + } + } + if allNum { + return "int" + } + + // Check if all values are objects with a common field that matches the + // key (e.g., keys are "abc123" and objects have an "id" field with "abc123"). + // This suggests the key name is "id". + for _, fieldName := range []string{"id", "ID", "Id", "_id"} { + match := true + for k, v := range obj { + if m, ok := v.(map[string]any); ok { + if val, exists := m[fieldName]; exists { + if fmt.Sprintf("%v", val) == k { + continue + } + } + } + match = false + break + } + if match && len(obj) > 0 { + return fieldName + } + } + + return "string" +} + +// ambiguousTypeNames maps lowercase inferred names to their canonical form. +// When one of these is inferred, the parent type name is prepended and the +// canonical form is used (e.g., "json" in any casing → ParentJSON). +var ambiguousTypeNames = map[string]string{ + "json": "JSON", + "data": "Data", + "item": "Item", + "value": "Value", + "result": "Result", +} + +// inferTypeName tries to guess a struct name from the path context. +func inferTypeName(path string) string { + // Root path → "Root" + if path == "." { + return "Root" + } + + // Root-level collection items (no parent type yet) + // e.g., ".[]", ".[string]", ".[int]" + if !strings.Contains(path, "{") { + name := inferTypeNameFromSegments(path) + if name == "" { + return "RootItem" + } + return name + } + + return inferTypeNameFromSegments(path) +} + +func inferTypeNameFromSegments(path string) string { + // Extract the last meaningful segment from the path + // e.g., ".friends[int]" → "Friend", ".{Person}.address" → "Address" + parts := strings.FieldsFunc(path, func(r rune) bool { + return r == '.' || r == '[' || r == ']' || r == '{' || r == '}' + }) + if len(parts) == 0 { + return "" + } + last := parts[len(parts)-1] + // Skip index-like segments + if last == "int" || last == "string" || last == "id" { + if len(parts) >= 2 { + last = parts[len(parts)-2] + } else { + return "" + } + } + // Strip common suffixes like _id, _key, Id + last = strings.TrimSuffix(last, "_id") + last = strings.TrimSuffix(last, "_key") + last = strings.TrimSuffix(last, "Id") + last = strings.TrimSuffix(last, "Key") + if last == "" { + return "" + } + name := singularize(snakeToPascal(last)) + + // If the inferred name is too generic, use canonical form and prepend parent + if canonical, ok := ambiguousTypeNames[strings.ToLower(name)]; ok { + parent := parentTypeName(path) + if parent != "" { + return parent + canonical + } + return canonical + } + + return name +} + +// isUbiquitousField returns true if a field name is so common across all +// domains (databases, APIs, languages) that sharing it doesn't imply the +// objects are the same type. These are excluded when deciding whether to +// default to "same" or "different" types. +func isUbiquitousField(name string) bool { + // Exact matches + switch name { + case "id", "ID", "Id", "_id", + "name", "Name", + "type", "Type", "_type", + "kind", "Kind", + "slug", "Slug", + "label", "Label", + "title", "Title", + "description", "Description": + return true + } + // Suffix patterns: *_at, *_on, *At, *On (timestamps/dates) + if strings.HasSuffix(name, "_at") || strings.HasSuffix(name, "_on") || + strings.HasSuffix(name, "At") || strings.HasSuffix(name, "On") { + return true + } + return false +} + +// snakeToPascal converts snake_case or camelCase to PascalCase. +func snakeToPascal(s string) string { + parts := strings.Split(s, "_") + for i, p := range parts { + parts[i] = capitalize(p) + } + return strings.Join(parts, "") +} + +func capitalize(s string) string { + if len(s) == 0 { + return s + } + return strings.ToUpper(s[:1]) + s[1:] +} + +// singularize does a naive singularization for common English plurals. +func singularize(s string) string { + if strings.HasSuffix(s, "ies") && len(s) > 4 { + return s[:len(s)-3] + "y" + } + if strings.HasSuffix(s, "ses") || strings.HasSuffix(s, "xes") || strings.HasSuffix(s, "zes") { + return s[:len(s)-2] + } + if strings.HasSuffix(s, "ss") || strings.HasSuffix(s, "us") || strings.HasSuffix(s, "is") { + return s // not plural + } + if strings.HasSuffix(s, "s") && len(s) > 3 { + return s[:len(s)-1] + } + return s +} diff --git a/tools/jsontypes/jsdoc.go b/tools/jsontypes/jsdoc.go new file mode 100644 index 0000000..3a5b687 --- /dev/null +++ b/tools/jsontypes/jsdoc.go @@ -0,0 +1,56 @@ +package jsontypes + +import ( + "fmt" + "strings" +) + +// generateJSDoc converts formatted flat paths into JSDoc @typedef annotations. +func GenerateJSDoc(paths []string) string { + types, _ := buildGoTypes(paths) + if len(types) == 0 { + return "" + } + + var buf strings.Builder + for i, t := range types { + if i > 0 { + buf.WriteByte('\n') + } + buf.WriteString(fmt.Sprintf("/**\n * @typedef {Object} %s\n", t.name)) + for _, f := range t.fields { + jsType := goTypeToJSDoc(f.goType) + if f.optional { + buf.WriteString(fmt.Sprintf(" * @property {%s} [%s]\n", jsType, f.jsonName)) + } else { + buf.WriteString(fmt.Sprintf(" * @property {%s} %s\n", jsType, f.jsonName)) + } + } + buf.WriteString(" */\n") + } + return buf.String() +} + +func goTypeToJSDoc(goTyp string) string { + goTyp = strings.TrimPrefix(goTyp, "*") + + if strings.HasPrefix(goTyp, "[]") { + return goTypeToJSDoc(goTyp[2:]) + "[]" + } + if strings.HasPrefix(goTyp, "map[string]") { + return "Object" + } + + switch goTyp { + case "string": + return "string" + case "int64", "float64": + return "number" + case "bool": + return "boolean" + case "any": + return "*" + default: + return goTyp + } +} diff --git a/tools/jsontypes/jsdoc_test.go b/tools/jsontypes/jsdoc_test.go new file mode 100644 index 0000000..ef76344 --- /dev/null +++ b/tools/jsontypes/jsdoc_test.go @@ -0,0 +1,101 @@ +package jsontypes + +import ( + "strings" + "testing" +) + +func TestGenerateJSDocFlat(t *testing.T) { + paths := []string{ + "{Root}", + ".name{string}", + ".age{int}", + ".active{bool}", + } + out := GenerateJSDoc(paths) + assertContainsAll(t, out, + "@typedef {Object} Root", + "@property {string} name", + "@property {number} age", + "@property {boolean} active", + ) +} + +func TestGenerateJSDocOptional(t *testing.T) { + paths := []string{ + "{Root}", + ".name{string}", + ".bio{string?}", + } + out := GenerateJSDoc(paths) + assertContainsAll(t, out, + "@property {string} name", + "@property {string} [bio]", + ) +} + +func TestGenerateJSDocNested(t *testing.T) { + paths := []string{ + "{Root}", + ".addr{Address}", + ".addr.city{string}", + } + out := GenerateJSDoc(paths) + assertContainsAll(t, out, + "@typedef {Object} Root", + "@property {Address} addr", + "@typedef {Object} Address", + "@property {string} city", + ) +} + +func TestGenerateJSDocArray(t *testing.T) { + paths := []string{ + "{Root}", + ".items[]{Item}", + ".items[].id{string}", + } + out := GenerateJSDoc(paths) + assertContainsAll(t, out, + "@property {Item[]} items", + ) +} + +func TestGenerateJSDocMap(t *testing.T) { + paths := []string{ + "{Root}", + ".scores[string]{Score}", + ".scores[string].value{int}", + } + out := GenerateJSDoc(paths) + assertContainsAll(t, out, + "@property {Object} scores", + ) +} + +func TestGenerateJSDocEmpty(t *testing.T) { + out := GenerateJSDoc(nil) + if out != "" { + t.Errorf("expected empty output, got %q", out) + } +} + +func TestGenerateJSDocEndToEnd(t *testing.T) { + jsonStr := `{"name":"Alice","age":30,"tags":["a"],"meta":{"key":"val"}}` + paths := analyzeAndFormat(t, jsonStr) + out := GenerateJSDoc(paths) + assertContainsAll(t, out, + "@typedef {Object}", + "@property {string} name", + "@property {number} age", + ) +} + +func assertContainsAll(t *testing.T, got string, wants ...string) { + t.Helper() + for _, want := range wants { + if !strings.Contains(got, want) { + t.Errorf("output missing %q\ngot:\n%s", want, got) + } + } +} diff --git a/tools/jsontypes/jsonschema.go b/tools/jsontypes/jsonschema.go new file mode 100644 index 0000000..60480fb --- /dev/null +++ b/tools/jsontypes/jsonschema.go @@ -0,0 +1,115 @@ +package jsontypes + +import ( + "encoding/json" + "strings" +) + +// generateJSONSchema converts formatted flat paths into a JSON Schema (draft 2020-12) document. +func GenerateJSONSchema(paths []string) string { + types, _ := buildGoTypes(paths) + + typeMap := make(map[string]goType) + for _, t := range types { + typeMap[t.name] = t + } + + if len(types) == 0 { + return "{}\n" + } + + root := types[0] + defs := make(map[string]any) + result := structToJSONSchema(root, typeMap, defs) + result["$schema"] = "https://json-schema.org/draft/2020-12/schema" + + if len(defs) > 0 { + result["$defs"] = defs + } + + data, _ := json.MarshalIndent(result, "", " ") + return string(data) + "\n" +} + +func structToJSONSchema(t goType, typeMap map[string]goType, defs map[string]any) map[string]any { + props := make(map[string]any) + var required []string + + for _, f := range t.fields { + schema := goTypeToJSONSchema(f.goType, f.optional, typeMap, defs) + props[f.jsonName] = schema + if !f.optional { + required = append(required, f.jsonName) + } + } + + result := map[string]any{ + "type": "object", + "properties": props, + } + if len(required) > 0 { + result["required"] = required + } + return result +} + +func goTypeToJSONSchema(goTyp string, nullable bool, typeMap map[string]goType, defs map[string]any) map[string]any { + result := goTypeToJSONSchemaInner(goTyp, typeMap, defs) + if nullable { + // JSON Schema nullable: anyOf with null + return map[string]any{ + "anyOf": []any{ + result, + map[string]any{"type": "null"}, + }, + } + } + return result +} + +func goTypeToJSONSchemaInner(goTyp string, typeMap map[string]goType, defs map[string]any) map[string]any { + goTyp = strings.TrimPrefix(goTyp, "*") + + // Slice + if strings.HasPrefix(goTyp, "[]") { + elemType := goTyp[2:] + return map[string]any{ + "type": "array", + "items": goTypeToJSONSchemaInner(elemType, typeMap, defs), + } + } + + // Map + if strings.HasPrefix(goTyp, "map[string]") { + valType := goTyp[11:] + return map[string]any{ + "type": "object", + "additionalProperties": goTypeToJSONSchemaInner(valType, typeMap, defs), + } + } + + // Primitives + switch goTyp { + case "string": + return map[string]any{"type": "string"} + case "int64": + return map[string]any{"type": "integer"} + case "float64": + return map[string]any{"type": "number"} + case "bool": + return map[string]any{"type": "boolean"} + case "any": + return map[string]any{} + } + + // Named struct — emit as $ref, add to $defs + if t, ok := typeMap[goTyp]; ok { + if _, exists := defs[goTyp]; !exists { + defs[goTyp] = nil // placeholder + defs[goTyp] = structToJSONSchema(t, typeMap, defs) + } + return map[string]any{"$ref": "#/$defs/" + goTyp} + } + + return map[string]any{} +} diff --git a/tools/jsontypes/jsonschema_test.go b/tools/jsontypes/jsonschema_test.go new file mode 100644 index 0000000..8b7b91e --- /dev/null +++ b/tools/jsontypes/jsonschema_test.go @@ -0,0 +1,167 @@ +package jsontypes + +import ( + "encoding/json" + "testing" +) + +func TestGenerateJSONSchemaFlat(t *testing.T) { + paths := []string{ + "{Root}", + ".name{string}", + ".age{int}", + ".active{bool}", + } + out := GenerateJSONSchema(paths) + var doc map[string]any + if err := json.Unmarshal([]byte(out), &doc); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, out) + } + if doc["$schema"] != "https://json-schema.org/draft/2020-12/schema" { + t.Errorf("missing or wrong $schema") + } + if doc["type"] != "object" { + t.Errorf("expected type=object, got %v", doc["type"]) + } + props := doc["properties"].(map[string]any) + assertJSType(t, props, "name", "string") + assertJSType(t, props, "age", "integer") + assertJSType(t, props, "active", "boolean") + + // Check required + req := doc["required"].([]any) + reqSet := make(map[string]bool) + for _, r := range req { + reqSet[r.(string)] = true + } + for _, f := range []string{"name", "age", "active"} { + if !reqSet[f] { + t.Errorf("expected %q in required", f) + } + } +} + +func TestGenerateJSONSchemaNested(t *testing.T) { + paths := []string{ + "{Root}", + ".addr{Address}", + ".addr.city{string}", + ".addr.zip{string}", + } + out := GenerateJSONSchema(paths) + var doc map[string]any + if err := json.Unmarshal([]byte(out), &doc); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, out) + } + props := doc["properties"].(map[string]any) + addr := props["addr"].(map[string]any) + if addr["$ref"] != "#/$defs/Address" { + t.Errorf("expected $ref=#/$defs/Address, got %v", addr) + } + defs := doc["$defs"].(map[string]any) + addrDef := defs["Address"].(map[string]any) + addrProps := addrDef["properties"].(map[string]any) + assertJSType(t, addrProps, "city", "string") +} + +func TestGenerateJSONSchemaArray(t *testing.T) { + paths := []string{ + "{Root}", + ".items[]{Item}", + ".items[].id{string}", + } + out := GenerateJSONSchema(paths) + var doc map[string]any + if err := json.Unmarshal([]byte(out), &doc); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, out) + } + props := doc["properties"].(map[string]any) + items := props["items"].(map[string]any) + if items["type"] != "array" { + t.Errorf("expected type=array, got %v", items["type"]) + } + itemsItems := items["items"].(map[string]any) + if itemsItems["$ref"] != "#/$defs/Item" { + t.Errorf("expected items.$ref=#/$defs/Item, got %v", itemsItems) + } +} + +func TestGenerateJSONSchemaOptional(t *testing.T) { + paths := []string{ + "{Root}", + ".name{string}", + ".bio{string?}", + } + out := GenerateJSONSchema(paths) + var doc map[string]any + if err := json.Unmarshal([]byte(out), &doc); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, out) + } + props := doc["properties"].(map[string]any) + bio := props["bio"].(map[string]any) + anyOf := bio["anyOf"].([]any) + if len(anyOf) != 2 { + t.Fatalf("expected 2 anyOf entries, got %d", len(anyOf)) + } + // bio should not be in required + req := doc["required"].([]any) + for _, r := range req { + if r.(string) == "bio" { + t.Errorf("bio should not be in required") + } + } +} + +func TestGenerateJSONSchemaMap(t *testing.T) { + paths := []string{ + "{Root}", + ".scores[string]{Score}", + ".scores[string].value{int}", + } + out := GenerateJSONSchema(paths) + var doc map[string]any + if err := json.Unmarshal([]byte(out), &doc); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, out) + } + props := doc["properties"].(map[string]any) + scores := props["scores"].(map[string]any) + if scores["type"] != "object" { + t.Errorf("expected type=object for map, got %v", scores["type"]) + } + addl := scores["additionalProperties"].(map[string]any) + if addl["$ref"] != "#/$defs/Score" { + t.Errorf("expected additionalProperties.$ref=#/$defs/Score, got %v", addl) + } +} + +func TestGenerateJSONSchemaEmpty(t *testing.T) { + out := GenerateJSONSchema(nil) + if out != "{}\n" { + t.Errorf("expected empty schema, got %q", out) + } +} + +func TestGenerateJSONSchemaEndToEnd(t *testing.T) { + jsonStr := `{"name":"Alice","age":30,"tags":["a","b"],"meta":{"key":"val"}}` + paths := analyzeAndFormat(t, jsonStr) + out := GenerateJSONSchema(paths) + var doc map[string]any + if err := json.Unmarshal([]byte(out), &doc); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, out) + } + if doc["type"] != "object" { + t.Errorf("expected type=object at root: %s", out) + } +} + +func assertJSType(t *testing.T, props map[string]any, field, expected string) { + t.Helper() + f, ok := props[field].(map[string]any) + if !ok { + t.Errorf("field %q not found in properties", field) + return + } + if f["type"] != expected { + t.Errorf("field %q: expected type=%q, got %v", field, expected, f["type"]) + } +} diff --git a/tools/jsontypes/prompt.go b/tools/jsontypes/prompt.go new file mode 100644 index 0000000..a00fd5e --- /dev/null +++ b/tools/jsontypes/prompt.go @@ -0,0 +1,234 @@ +package jsontypes + +import ( + "bufio" + "fmt" + "io" + "os" + "strings" +) + +type Prompter struct { + reader *bufio.Reader + output io.Writer + tty *os.File // non-nil if we opened /dev/tty + + // Answer replay/recording + priorAnswers []string // loaded from .answers file + priorIdx int // next prior answer to use + answers []string // all answers this session (for saving) +} + +// newPrompter creates a prompter. If the JSON input comes from stdin, we open +// /dev/tty for interactive prompts so they don't conflict. +func NewPrompter(inputIsStdin, anonymous bool) (*Prompter, error) { + p := &Prompter{output: os.Stderr} + if inputIsStdin { + if anonymous { + // No prompts needed — use a closed reader that returns EOF + p.reader = bufio.NewReader(strings.NewReader("")) + } else { + tty, err := os.Open("/dev/tty") + if err != nil { + return nil, fmt.Errorf("cannot open /dev/tty for prompts (input is stdin): %w", err) + } + p.tty = tty + p.reader = bufio.NewReader(tty) + } + } else { + p.reader = bufio.NewReader(os.Stdin) + } + return p, nil +} + +// loadAnswers reads prior answers from a file to use as defaults. +func (p *Prompter) LoadAnswers(path string) { + data, err := os.ReadFile(path) + if err != nil { + return + } + lines := strings.Split(strings.TrimRight(string(data), "\n"), "\n") + // Filter out empty trailing lines + for len(lines) > 0 && lines[len(lines)-1] == "" { + lines = lines[:len(lines)-1] + } + if len(lines) > 0 { + fmt.Fprintf(p.output, "using prior answers from %s\n", path) + p.priorAnswers = lines + } +} + +// saveAnswers writes this session's answers to a file. +func (p *Prompter) SaveAnswers(path string) error { + if len(p.answers) == 0 { + return nil + } + return os.WriteFile(path, []byte(strings.Join(p.answers, "\n")+"\n"), 0o600) +} + +// nextPrior returns the next prior answer if available, or empty string. +func (p *Prompter) nextPrior() string { + if p.priorIdx < len(p.priorAnswers) { + answer := p.priorAnswers[p.priorIdx] + p.priorIdx++ + return answer + } + return "" +} + +// record saves an answer for later writing. +func (p *Prompter) record(answer string) { + p.answers = append(p.answers, answer) +} + +func (p *Prompter) Close() { + if p.tty != nil { + p.tty.Close() + } +} + +// ask presents a prompt with a default and valid options. Returns the chosen +// option (lowercase). Options should be lowercase; the default is shown in +// uppercase in the hint. +func (p *Prompter) ask(prompt, defaultOpt string, options []string) string { + // Override default with prior answer if available + if prior := p.nextPrior(); prior != "" { + for _, o := range options { + if prior == o { + defaultOpt = prior + break + } + } + } + + hint := make([]string, len(options)) + for i, o := range options { + if o == defaultOpt { + hint[i] = strings.ToUpper(o) + } else { + hint[i] = o + } + } + for { + fmt.Fprintf(p.output, "%s [%s] ", prompt, strings.Join(hint, "/")) + line, err := p.reader.ReadString('\n') + if err != nil { + p.record(defaultOpt) + return defaultOpt + } + line = strings.TrimSpace(strings.ToLower(line)) + if line == "" { + p.record(defaultOpt) + return defaultOpt + } + for _, o := range options { + if line == o { + p.record(o) + return o + } + } + fmt.Fprintf(p.output, " Please enter one of: %s\n", strings.Join(options, ", ")) + } +} + +// askMapOrName presents a combined map/struct+name prompt. Shows [Default/m]. +// Accepts: 'm' or 'map' → returns "m", a name starting with an uppercase +// letter → returns the name, empty → returns the default. Anything else +// re-prompts. +// +// Prior answers are interpreted generously: "s" (old struct answer) is treated +// as "accept the default struct name", "m" as map, and uppercase names as-is. +func (p *Prompter) askMapOrName(prompt, defaultVal string) string { + if prior := p.nextPrior(); prior != "" { + if prior == "m" || prior == "map" { + defaultVal = prior + } else if len(prior) > 0 && prior[0] >= 'A' && prior[0] <= 'Z' { + defaultVal = prior + } + // Old-format answers like "s" → keep the inferred default (treat as "accept") + } + + hint := defaultVal + "/m" + if defaultVal == "m" { + hint = "m" + } + + for { + fmt.Fprintf(p.output, "%s [%s] ", prompt, hint) + line, err := p.reader.ReadString('\n') + if err != nil { + p.record(defaultVal) + return defaultVal + } + line = strings.TrimSpace(line) + if line == "" { + p.record(defaultVal) + return defaultVal + } + if line == "m" || line == "map" { + p.record("m") + return "m" + } + if len(line) > 0 && line[0] >= 'A' && line[0] <= 'Z' { + p.record(line) + return line + } + fmt.Fprintf(p.output, " Enter a TypeName (starting with uppercase), or 'm' for map\n") + } +} + +// askTypeName presents a prompt for a type name with a suggested default. +// Accepts names starting with an uppercase letter. +// +// Prior answers are interpreted generously: old-format answers that don't +// start with uppercase are treated as "accept the default". +func (p *Prompter) askTypeName(prompt, defaultVal string) string { + if prior := p.nextPrior(); prior != "" { + if len(prior) > 0 && prior[0] >= 'A' && prior[0] <= 'Z' { + defaultVal = prior + } + // Old-format answers → keep the inferred default (treat as "accept") + } + + for { + fmt.Fprintf(p.output, "%s [%s] ", prompt, defaultVal) + line, err := p.reader.ReadString('\n') + if err != nil { + p.record(defaultVal) + return defaultVal + } + line = strings.TrimSpace(line) + if line == "" { + p.record(defaultVal) + return defaultVal + } + if len(line) > 0 && line[0] >= 'A' && line[0] <= 'Z' { + p.record(line) + return line + } + fmt.Fprintf(p.output, " Enter a TypeName (starting with uppercase)\n") + } +} + +// askFreeform presents a prompt with a suggested default. Returns user input +// or the default if they just press enter. +func (p *Prompter) askFreeform(prompt, defaultVal string) string { + // Override default with prior answer if available + if prior := p.nextPrior(); prior != "" { + defaultVal = prior + } + + fmt.Fprintf(p.output, "%s [%s] ", prompt, defaultVal) + line, err := p.reader.ReadString('\n') + if err != nil { + p.record(defaultVal) + return defaultVal + } + line = strings.TrimSpace(line) + if line == "" { + p.record(defaultVal) + return defaultVal + } + p.record(line) + return line +} diff --git a/tools/jsontypes/python.go b/tools/jsontypes/python.go new file mode 100644 index 0000000..93c38e7 --- /dev/null +++ b/tools/jsontypes/python.go @@ -0,0 +1,80 @@ +package jsontypes + +import ( + "fmt" + "strings" +) + +// generatePython converts formatted flat paths into Python TypedDict definitions. +func GeneratePython(paths []string) string { + types, _ := buildGoTypes(paths) + if len(types) == 0 { + return "" + } + + hasOptional := false + for _, t := range types { + for _, f := range t.fields { + if f.optional { + hasOptional = true + break + } + } + if hasOptional { + break + } + } + + var buf strings.Builder + buf.WriteString("from __future__ import annotations\n\n") + if hasOptional { + buf.WriteString("from typing import NotRequired, TypedDict\n") + } else { + buf.WriteString("from typing import TypedDict\n") + } + + // Emit in reverse so referenced types come first. + for i := len(types) - 1; i >= 0; i-- { + t := types[i] + buf.WriteString(fmt.Sprintf("\n\nclass %s(TypedDict):\n", t.name)) + if len(t.fields) == 0 { + buf.WriteString(" pass\n") + continue + } + for _, f := range t.fields { + pyType := goTypeToPython(f.goType) + if f.optional { + buf.WriteString(fmt.Sprintf(" %s: NotRequired[%s | None]\n", f.jsonName, pyType)) + } else { + buf.WriteString(fmt.Sprintf(" %s: %s\n", f.jsonName, pyType)) + } + } + } + return buf.String() +} + +func goTypeToPython(goTyp string) string { + goTyp = strings.TrimPrefix(goTyp, "*") + + if strings.HasPrefix(goTyp, "[]") { + return "list[" + goTypeToPython(goTyp[2:]) + "]" + } + if strings.HasPrefix(goTyp, "map[string]") { + return "dict[str, " + goTypeToPython(goTyp[11:]) + "]" + } + + switch goTyp { + case "string": + return "str" + case "int64": + return "int" + case "float64": + return "float" + case "bool": + return "bool" + case "any": + return "object" + default: + return goTyp + } +} diff --git a/tools/jsontypes/python_test.go b/tools/jsontypes/python_test.go new file mode 100644 index 0000000..01349ee --- /dev/null +++ b/tools/jsontypes/python_test.go @@ -0,0 +1,101 @@ +package jsontypes + +import ( + "strings" + "testing" +) + +func TestGeneratePythonFlat(t *testing.T) { + paths := []string{ + "{Root}", + ".name{string}", + ".age{int}", + ".active{bool}", + } + out := GeneratePython(paths) + assertContainsAll(t, out, + "from typing import TypedDict", + "class Root(TypedDict):", + "name: str", + "age: int", + "active: bool", + ) +} + +func TestGeneratePythonOptional(t *testing.T) { + paths := []string{ + "{Root}", + ".name{string}", + ".bio{string?}", + } + out := GeneratePython(paths) + assertContainsAll(t, out, + "from typing import NotRequired, TypedDict", + "name: str", + "bio: NotRequired[str | None]", + ) +} + +func TestGeneratePythonNested(t *testing.T) { + paths := []string{ + "{Root}", + ".addr{Address}", + ".addr.city{string}", + } + out := GeneratePython(paths) + assertContainsAll(t, out, + "class Root(TypedDict):", + "addr: Address", + "class Address(TypedDict):", + "city: str", + ) + // Address should be defined before Root + addrIdx := strings.Index(out, "class Address") + rootIdx := strings.Index(out, "class Root") + if addrIdx < 0 || rootIdx < 0 || addrIdx > rootIdx { + t.Errorf("Address should be defined before Root\n%s", out) + } +} + +func TestGeneratePythonArray(t *testing.T) { + paths := []string{ + "{Root}", + ".items[]{Item}", + ".items[].id{string}", + } + out := GeneratePython(paths) + assertContainsAll(t, out, + "items: list[Item]", + ) +} + +func TestGeneratePythonMap(t *testing.T) { + paths := []string{ + "{Root}", + ".scores[string]{Score}", + ".scores[string].value{int}", + } + out := GeneratePython(paths) + assertContainsAll(t, out, + "scores: dict[str, Score]", + ) +} + +func TestGeneratePythonEmpty(t *testing.T) { + out := GeneratePython(nil) + if out != "" { + t.Errorf("expected empty output, got %q", out) + } +} + +func TestGeneratePythonEndToEnd(t *testing.T) { + jsonStr := `{"name":"Alice","age":30,"tags":["a"],"meta":{"key":"val"}}` + paths := analyzeAndFormat(t, jsonStr) + out := GeneratePython(paths) + assertContainsAll(t, out, + "class", + "TypedDict", + "name: str", + "age: int", + ) +} diff --git a/tools/jsontypes/sql.go b/tools/jsontypes/sql.go new file mode 100644 index 0000000..b35acb4 --- /dev/null +++ b/tools/jsontypes/sql.go @@ -0,0 +1,154 @@ +package jsontypes + +import ( + "fmt" + "strings" +) + +// generateSQL converts formatted flat paths into SQL CREATE TABLE statements. +// Nested structs become separate tables with foreign key relationships. +// Arrays of structs get a join table or FK pointing back to the parent. +func GenerateSQL(paths []string) string { + types, _ := buildGoTypes(paths) + if len(types) == 0 { + return "" + } + + typeMap := make(map[string]goType) + for _, t := range types { + typeMap[t.name] = t + } + + var buf strings.Builder + + // Emit in reverse order so referenced tables are created first. + for i := len(types) - 1; i >= 0; i-- { + t := types[i] + if i < len(types)-1 { + buf.WriteByte('\n') + } + tableName := toSnakeCase(t.name) + "s" + buf.WriteString(fmt.Sprintf("CREATE TABLE %s (\n", tableName)) + buf.WriteString(" id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY") + + var fks []string + for _, f := range t.fields { + // Skip "id" — we generate a synthetic primary key + if f.jsonName == "id" { + continue + } + colType, fk := goTypeToSQL(f, tableName, typeMap) + if colType == "" { + continue // skip array-of-struct (handled via FK on child) + } + buf.WriteString(",\n") + col := toSnakeCase(f.jsonName) + if fk != "" { + col += "_id" + } + if f.optional { + buf.WriteString(fmt.Sprintf(" %s %s", col, colType)) + } else { + buf.WriteString(fmt.Sprintf(" %s %s NOT NULL", col, colType)) + } + if fk != "" { + fks = append(fks, fk) + } + } + + for _, fk := range fks { + buf.WriteString(",\n") + buf.WriteString(" " + fk) + } + + buf.WriteString("\n);\n") + + // For array-of-struct fields, add a FK column on the child table + // pointing back to this parent. + for _, f := range t.fields { + childType := arrayElementType(f.goType) + if childType == "" { + continue + } + if _, isStruct := typeMap[childType]; !isStruct { + continue + } + childTable := toSnakeCase(childType) + "s" + parentFK := toSnakeCase(t.name) + "_id" + buf.WriteString(fmt.Sprintf( + "\nALTER TABLE %s ADD COLUMN %s BIGINT REFERENCES %s(id);\n", + childTable, parentFK, tableName)) + } + } + + return buf.String() +} + +// goTypeToSQL returns (SQL column type, optional FK constraint string). +// Returns ("", "") for array-of-struct fields (handled separately). +func goTypeToSQL(f goField, parentTable string, typeMap map[string]goType) (string, string) { + goTyp := strings.TrimPrefix(f.goType, "*") + + // Array of primitives → use array type or JSON + if strings.HasPrefix(goTyp, "[]") { + elemType := goTyp[2:] + if _, isStruct := typeMap[elemType]; isStruct { + return "", "" // handled via FK on child table + } + return "JSONB", "" + } + + // Map → JSONB + if strings.HasPrefix(goTyp, "map[") { + return "JSONB", "" + } + + // Named struct → FK reference + if _, isStruct := typeMap[goTyp]; isStruct { + refTable := toSnakeCase(goTyp) + "s" + col := toSnakeCase(f.jsonName) + "_id" + fk := fmt.Sprintf("CONSTRAINT fk_%s FOREIGN KEY (%s) REFERENCES %s(id)", + col, col, refTable) + return "BIGINT", fk + } + + switch goTyp { + case "string": + return "TEXT", "" + case "int64": + return "BIGINT", "" + case "float64": + return "DOUBLE PRECISION", "" + case "bool": + return "BOOLEAN", "" + case "any": + return "JSONB", "" + default: + return "TEXT", "" + } +} + +// arrayElementType returns the element type if goTyp is []SomeType, else "". +func arrayElementType(goTyp string) string { + goTyp = strings.TrimPrefix(goTyp, "*") + if strings.HasPrefix(goTyp, "[]") { + return goTyp[2:] + } + return "" +} + +// toSnakeCase converts PascalCase to snake_case. +func toSnakeCase(s string) string { + var buf strings.Builder + for i, r := range s { + if r >= 'A' && r <= 'Z' { + if i > 0 { + buf.WriteByte('_') + } + buf.WriteRune(r + ('a' - 'A')) + } else { + buf.WriteRune(r) + } + } + return buf.String() +} diff --git a/tools/jsontypes/sql_test.go b/tools/jsontypes/sql_test.go new file mode 100644 index 0000000..526e226 --- /dev/null +++ b/tools/jsontypes/sql_test.go @@ -0,0 +1,161 @@ +package jsontypes + +import ( + "strings" + "testing" +) + +func TestGenerateSQLFlat(t *testing.T) { + paths := []string{ + "{Root}", + ".name{string}", + ".age{int}", + ".active{bool}", + } + out := GenerateSQL(paths) + assertContainsAll(t, out, + "CREATE TABLE roots (", + "id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY", + "name TEXT NOT NULL", + "age BIGINT NOT NULL", + "active BOOLEAN NOT NULL", + ) +} + +func TestGenerateSQLOptional(t *testing.T) { + paths := []string{ + "{Root}", + ".name{string}", + ".bio{string?}", + } + out := GenerateSQL(paths) + // name should be NOT NULL, bio should not + if !strings.Contains(out, "name TEXT NOT NULL") { + t.Errorf("expected name NOT NULL\n%s", out) + } + // bio should NOT have NOT NULL + for _, line := range strings.Split(out, "\n") { + if strings.Contains(line, "bio") && strings.Contains(line, "NOT NULL") { + t.Errorf("bio should be nullable\n%s", out) + } + } +} + +func TestGenerateSQLNested(t *testing.T) { + paths := []string{ + "{Root}", + ".addr{Address}", + ".addr.city{string}", + } + out := GenerateSQL(paths) + assertContainsAll(t, out, + "CREATE TABLE roots (", + "CREATE TABLE addresss (", + "addr_id BIGINT", + "REFERENCES addresss(id)", + ) +} + +func TestGenerateSQLArrayOfStructs(t *testing.T) { + paths := []string{ + "{Root}", + ".items[]{Item}", + ".items[].slug{string}", + ".items[].name{string}", + } + out := GenerateSQL(paths) + assertContainsAll(t, out, + "CREATE TABLE roots (", + "CREATE TABLE items (", + "ALTER TABLE items ADD COLUMN root_id BIGINT REFERENCES roots(id)", + ) + // items should NOT appear as a column in roots + for _, line := range strings.Split(out, "\n") { + trimmed := strings.TrimSpace(line) + if strings.HasPrefix(trimmed, "items ") && strings.Contains(out, "CREATE TABLE roots") { + // This is fine if it's in the items table + } + } +} + +func TestGenerateSQLArrayOfPrimitives(t *testing.T) { + paths := []string{ + "{Root}", + ".tags[]{string}", + } + out := GenerateSQL(paths) + assertContainsAll(t, out, + "tags JSONB NOT NULL", + ) +} + +func TestGenerateSQLMap(t *testing.T) { + paths := []string{ + "{Root}", + ".metadata[string]{string}", + } + out := GenerateSQL(paths) + assertContainsAll(t, out, + "metadata JSONB NOT NULL", + ) +} + +func TestGenerateSQLEmpty(t *testing.T) { + out := GenerateSQL(nil) + if out != "" { + t.Errorf("expected empty output, got %q", out) + } +} + +func TestGenerateSQLEndToEnd(t *testing.T) { + jsonStr := `{"name":"Alice","age":30,"tags":["a"],"meta":{"key":"val"}}` + paths := analyzeAndFormat(t, jsonStr) + out := GenerateSQL(paths) + assertContainsAll(t, out, + "CREATE TABLE", + "TEXT NOT NULL", + "BIGINT", + ) +} + +func TestGenerateSQLRelationships(t *testing.T) { + paths := []string{ + "{User}", + ".name{string}", + ".profile{Profile}", + ".profile.bio{string}", + ".posts[]{Post}", + ".posts[].title{string}", + ".posts[].comments[]{Comment}", + ".posts[].comments[].body{string}", + } + out := GenerateSQL(paths) + assertContainsAll(t, out, + "CREATE TABLE users (", + "CREATE TABLE profiles (", + "CREATE TABLE posts (", + "CREATE TABLE comments (", + // User has FK to profile + "profile_id BIGINT", + "REFERENCES profiles(id)", + // Posts have FK back to users + "ALTER TABLE posts ADD COLUMN user_id BIGINT REFERENCES users(id)", + // Comments have FK back to posts + "ALTER TABLE comments ADD COLUMN post_id BIGINT REFERENCES posts(id)", + ) +} + +func TestToSnakeCase(t *testing.T) { + tests := []struct{ in, want string }{ + {"Root", "root"}, + {"RootItem", "root_item"}, + {"HTTPServer", "h_t_t_p_server"}, + {"address", "address"}, + } + for _, tc := range tests { + got := toSnakeCase(tc.in) + if got != tc.want { + t.Errorf("toSnakeCase(%q) = %q, want %q", tc.in, got, tc.want) + } + } +} diff --git a/tools/jsontypes/testdata/sample.answers b/tools/jsontypes/testdata/sample.answers new file mode 100644 index 0000000..37d60fe --- /dev/null +++ b/tools/jsontypes/testdata/sample.answers @@ -0,0 +1,8 @@ +m +n +s +Person +Friend +n +s +Identification diff --git a/tools/jsontypes/testdata/sample.json b/tools/jsontypes/testdata/sample.json new file mode 100644 index 0000000..a1b691d --- /dev/null +++ b/tools/jsontypes/testdata/sample.json @@ -0,0 +1,44 @@ +{ + "abc123": { + "name": "Alice", + "age": 30, + "active": true, + "friends": [ + { + "name": "Bob", + "identification": null + }, + { + "name": "Charlie", + "identification": { + "type": "StateID", + "number": "12345", + "name": "Charlie C" + } + } + ] + }, + "def456": { + "name": "Dave", + "age": 25, + "active": false, + "friends": [] + }, + "ghi789": { + "name": "Eve", + "age": 28, + "active": true, + "score": 95.5, + "friends": [ + { + "name": "Frank", + "identification": { + "type": "DriverLicense", + "id": "DL-999", + "name": "Frank F", + "restrictions": ["corrective lenses"] + } + } + ] + } +} diff --git a/tools/jsontypes/testdata/sample.paths b/tools/jsontypes/testdata/sample.paths new file mode 100644 index 0000000..06fc853 --- /dev/null +++ b/tools/jsontypes/testdata/sample.paths @@ -0,0 +1,14 @@ +[string]{Person} +[string].active{bool} +[string].age{int} +[string].friends[]{Friend} +[string].friends[].identification{Identification?} +[string].friends[].identification.id{string?} +[string].friends[].identification.name{string} +[string].friends[].identification.number{string?} +[string].friends[].identification.restrictions{null} +[string].friends[].identification.restrictions[]{string} +[string].friends[].identification.type{string} +[string].friends[].name{string} +[string].name{string} +[string].score{float?} diff --git a/tools/jsontypes/typedef.go b/tools/jsontypes/typedef.go new file mode 100644 index 0000000..7c1c796 --- /dev/null +++ b/tools/jsontypes/typedef.go @@ -0,0 +1,116 @@ +package jsontypes + +import ( + "encoding/json" + "strings" +) + +// generateTypedef converts formatted flat paths into a JSON Typedef (RFC 8927) document. +func GenerateTypedef(paths []string) string { + types, _ := buildGoTypes(paths) + + typeMap := make(map[string]goType) + for _, t := range types { + typeMap[t.name] = t + } + + // The first type is the root + if len(types) == 0 { + return "{}\n" + } + + root := types[0] + defs := make(map[string]any) + result := structToJTD(root, typeMap, defs) + + if len(defs) > 0 { + result["definitions"] = defs + } + + data, _ := json.MarshalIndent(result, "", " ") + return string(data) + "\n" +} + +// structToJTD converts a goType to a JTD schema object. +func structToJTD(t goType, typeMap map[string]goType, defs map[string]any) map[string]any { + props := make(map[string]any) + optProps := make(map[string]any) + + for _, f := range t.fields { + schema := goTypeToJTD(f.goType, f.optional, typeMap, defs) + if f.optional { + optProps[f.jsonName] = schema + } else { + props[f.jsonName] = schema + } + } + + result := make(map[string]any) + if len(props) > 0 { + result["properties"] = props + } else if len(optProps) > 0 { + // JTD requires "properties" if "optionalProperties" is present + result["properties"] = map[string]any{} + } + if len(optProps) > 0 { + result["optionalProperties"] = optProps + } + return result +} + +// goTypeToJTD converts a Go type string to a JTD schema. +func goTypeToJTD(goTyp string, nullable bool, typeMap map[string]goType, defs map[string]any) map[string]any { + result := goTypeToJTDInner(goTyp, typeMap, defs) + if nullable { + result["nullable"] = true + } + return result +} + +func goTypeToJTDInner(goTyp string, typeMap map[string]goType, defs map[string]any) map[string]any { + // Strip pointer + goTyp = strings.TrimPrefix(goTyp, "*") + + // Slice + if strings.HasPrefix(goTyp, "[]") { + elemType := goTyp[2:] + return map[string]any{ + "elements": goTypeToJTDInner(elemType, typeMap, defs), + } + } + + // Map + if strings.HasPrefix(goTyp, "map[string]") { + valType := goTyp[11:] + return map[string]any{ + "values": goTypeToJTDInner(valType, typeMap, defs), + } + } + + // Primitives + switch goTyp { + case "string": + return map[string]any{"type": "string"} + case "int64": + return map[string]any{"type": "int32"} + case "float64": + return map[string]any{"type": "float64"} + case "bool": + return map[string]any{"type": "boolean"} + case "any": + return map[string]any{} + } + + // Named struct — emit as ref, add to definitions if not already there + if t, ok := typeMap[goTyp]; ok { + if _, exists := defs[goTyp]; !exists { + // Add placeholder to prevent infinite recursion + defs[goTyp] = nil + defs[goTyp] = structToJTD(t, typeMap, defs) + } + return map[string]any{"ref": goTyp} + } + + // Unknown type + return map[string]any{} +} diff --git a/tools/jsontypes/typedef_test.go b/tools/jsontypes/typedef_test.go new file mode 100644 index 0000000..b8faedc --- /dev/null +++ b/tools/jsontypes/typedef_test.go @@ -0,0 +1,162 @@ +package jsontypes + +import ( + "encoding/json" + "strings" + "testing" +) + +func TestGenerateTypedefFlat(t *testing.T) { + paths := []string{ + "{Root}", + ".name{string}", + ".age{int}", + ".active{bool}", + } + out := GenerateTypedef(paths) + var doc map[string]any + if err := json.Unmarshal([]byte(out), &doc); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, out) + } + props, ok := doc["properties"].(map[string]any) + if !ok { + t.Fatalf("expected properties, got %v", doc) + } + assertJTDType(t, props, "name", "string") + assertJTDType(t, props, "age", "int32") + assertJTDType(t, props, "active", "boolean") +} + +func TestGenerateTypedefNested(t *testing.T) { + paths := []string{ + "{Root}", + ".addr{Address}", + ".addr.city{string}", + ".addr.zip{string}", + } + out := GenerateTypedef(paths) + var doc map[string]any + if err := json.Unmarshal([]byte(out), &doc); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, out) + } + // addr should be a ref + props := doc["properties"].(map[string]any) + addr := props["addr"].(map[string]any) + if addr["ref"] != "Address" { + t.Errorf("expected ref=Address, got %v", addr) + } + // definitions should have Address + defs := doc["definitions"].(map[string]any) + addrDef := defs["Address"].(map[string]any) + addrProps := addrDef["properties"].(map[string]any) + assertJTDType(t, addrProps, "city", "string") + assertJTDType(t, addrProps, "zip", "string") +} + +func TestGenerateTypedefArray(t *testing.T) { + paths := []string{ + "{Root}", + ".items[]{Item}", + ".items[].id{string}", + } + out := GenerateTypedef(paths) + var doc map[string]any + if err := json.Unmarshal([]byte(out), &doc); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, out) + } + props := doc["properties"].(map[string]any) + items := props["items"].(map[string]any) + elem := items["elements"].(map[string]any) + if elem["ref"] != "Item" { + t.Errorf("expected elements ref=Item, got %v", elem) + } +} + +func TestGenerateTypedefOptional(t *testing.T) { + paths := []string{ + "{Root}", + ".name{string}", + ".bio{string?}", + } + out := GenerateTypedef(paths) + var doc map[string]any + if err := json.Unmarshal([]byte(out), &doc); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, out) + } + optProps := doc["optionalProperties"].(map[string]any) + bio := optProps["bio"].(map[string]any) + if bio["nullable"] != true { + t.Errorf("expected nullable=true for bio, got %v", bio) + } + if bio["type"] != "string" { + t.Errorf("expected type=string for bio, got %v", bio["type"]) + } +} + +func TestGenerateTypedefMap(t *testing.T) { + paths := []string{ + "{Root}", + ".scores[string]{Score}", + ".scores[string].value{int}", + } + out := GenerateTypedef(paths) + var doc map[string]any + if err := json.Unmarshal([]byte(out), &doc); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, out) + } + props := doc["properties"].(map[string]any) + scores := props["scores"].(map[string]any) + vals := scores["values"].(map[string]any) + if vals["ref"] != "Score" { + t.Errorf("expected values ref=Score, got %v", vals) + } +} + +func TestGenerateTypedefEmpty(t *testing.T) { + out := GenerateTypedef(nil) + if out != "{}\n" { + t.Errorf("expected empty schema, got %q", out) + } +} + +func TestGenerateTypedefEndToEnd(t *testing.T) { + jsonStr := `{"name":"Alice","age":30,"tags":["a","b"],"meta":{"key":"val"}}` + paths := analyzeAndFormat(t, jsonStr) + out := GenerateTypedef(paths) + var doc map[string]any + if err := json.Unmarshal([]byte(out), &doc); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, out) + } + if _, ok := doc["properties"]; !ok { + t.Errorf("expected properties in output: %s", out) + } +} + +func analyzeAndFormat(t *testing.T, jsonStr string) []string { + t.Helper() + var data any + dec := json.NewDecoder(strings.NewReader(jsonStr)) + dec.UseNumber() + if err := dec.Decode(&data); err != nil { + t.Fatalf("invalid test JSON: %v", err) + } + a, err := NewAnalyzer(false, true, false) + if err != nil { + t.Fatalf("NewAnalyzer: %v", err) + } + defer a.Close() + rawPaths := a.Analyze(".", data) + return FormatPaths(rawPaths) +} + +func assertJTDType(t *testing.T, props map[string]any, field, expected string) { + t.Helper() + f, ok := props[field].(map[string]any) + if !ok { + t.Errorf("field %q not found in properties", field) + return + } + if f["type"] != expected { + t.Errorf("field %q: expected type=%q, got %v", field, expected, f["type"]) + } +} diff --git a/tools/jsontypes/typescript.go b/tools/jsontypes/typescript.go new file mode 100644 index 0000000..d32a647 --- /dev/null +++ b/tools/jsontypes/typescript.go @@ -0,0 +1,56 @@ +package jsontypes + +import ( + "fmt" + "strings" +) + +// generateTypeScript converts formatted flat paths into TypeScript interface definitions. +func GenerateTypeScript(paths []string) string { + types, _ := buildGoTypes(paths) + if len(types) == 0 { + return "" + } + + var buf strings.Builder + for i, t := range types { + if i > 0 { + buf.WriteByte('\n') + } + buf.WriteString(fmt.Sprintf("export interface %s {\n", t.name)) + for _, f := range t.fields { + tsType := goTypeToTS(f.goType) + if f.optional { + buf.WriteString(fmt.Sprintf(" %s?: %s | null;\n", f.jsonName, tsType)) + } else { + buf.WriteString(fmt.Sprintf(" %s: %s;\n", f.jsonName, tsType)) + } + } + buf.WriteString("}\n") + } + return buf.String() +} + +func goTypeToTS(goTyp string) string { + goTyp = strings.TrimPrefix(goTyp, "*") + + if strings.HasPrefix(goTyp, "[]") { + return goTypeToTS(goTyp[2:]) + "[]" + } + if strings.HasPrefix(goTyp, "map[string]") { + return "Record" + } + + switch goTyp { + case "string": + return "string" + case "int64", "float64": + return "number" + case "bool": + return "boolean" + case "any": + return "unknown" + default: + return goTyp + } +} diff --git a/tools/jsontypes/typescript_test.go b/tools/jsontypes/typescript_test.go new file mode 100644 index 0000000..99c7ab7 --- /dev/null +++ b/tools/jsontypes/typescript_test.go @@ -0,0 +1,87 @@ +package jsontypes + +import ( + "strings" + "testing" +) + +func TestGenerateTypeScriptFlat(t *testing.T) { + paths := []string{ + "{Root}", + ".name{string}", + ".age{int}", + ".active{bool}", + } + out := GenerateTypeScript(paths) + assertContains(t, out, "export interface Root {") + assertContains(t, out, "name: string;") + assertContains(t, out, "age: number;") + assertContains(t, out, "active: boolean;") +} + +func TestGenerateTypeScriptOptional(t *testing.T) { + paths := []string{ + "{Root}", + ".name{string}", + ".bio{string?}", + } + out := GenerateTypeScript(paths) + assertContains(t, out, "name: string;") + assertContains(t, out, "bio?: string | null;") +} + +func TestGenerateTypeScriptNested(t *testing.T) { + paths := []string{ + "{Root}", + ".addr{Address}", + ".addr.city{string}", + } + out := GenerateTypeScript(paths) + assertContains(t, out, "addr: Address;") + assertContains(t, out, "export interface Address {") + assertContains(t, out, "city: string;") +} + +func TestGenerateTypeScriptArray(t *testing.T) { + paths := []string{ + "{Root}", + ".items[]{Item}", + ".items[].id{string}", + } + out := GenerateTypeScript(paths) + assertContains(t, out, "items: Item[];") +} + +func TestGenerateTypeScriptMap(t *testing.T) { + paths := []string{ + "{Root}", + ".scores[string]{Score}", + ".scores[string].value{int}", + } + out := GenerateTypeScript(paths) + assertContains(t, out, "scores: Record;") +} + +func TestGenerateTypeScriptEmpty(t *testing.T) { + out := GenerateTypeScript(nil) + if out != "" { + t.Errorf("expected empty output, got %q", out) + } +} + +func TestGenerateTypeScriptEndToEnd(t *testing.T) { + jsonStr := `{"name":"Alice","age":30,"tags":["a"],"meta":{"key":"val"}}` + paths := analyzeAndFormat(t, jsonStr) + out := GenerateTypeScript(paths) + assertContains(t, out, "export interface") + assertContains(t, out, "name: string;") + assertContains(t, out, "age: number;") + assertContains(t, out, "tags: string[];") +} + +func assertContains(t *testing.T, got, want string) { + t.Helper() + if !strings.Contains(got, want) { + t.Errorf("output missing %q\ngot:\n%s", want, got) + } +} diff --git a/tools/jsontypes/zod.go b/tools/jsontypes/zod.go new file mode 100644 index 0000000..7b14845 --- /dev/null +++ b/tools/jsontypes/zod.go @@ -0,0 +1,67 @@ +package jsontypes + +import ( + "fmt" + "strings" +) + +// generateZod converts formatted flat paths into Zod schema definitions. +func GenerateZod(paths []string) string { + types, _ := buildGoTypes(paths) + if len(types) == 0 { + return "" + } + + // Emit in reverse order so referenced schemas are defined first. + var buf strings.Builder + buf.WriteString("import { z } from \"zod\";\n\n") + for i := len(types) - 1; i >= 0; i-- { + t := types[i] + if i < len(types)-1 { + buf.WriteByte('\n') + } + buf.WriteString(fmt.Sprintf("export const %sSchema = z.object({\n", t.name)) + for _, f := range t.fields { + zodType := goTypeToZod(f.goType) + if f.optional { + zodType += ".nullable().optional()" + } + buf.WriteString(fmt.Sprintf(" %s: %s,\n", f.jsonName, zodType)) + } + buf.WriteString("});\n") + } + + // Type aliases + buf.WriteByte('\n') + for _, t := range types { + buf.WriteString(fmt.Sprintf("export type %s = z.infer;\n", t.name, t.name)) + } + + return buf.String() +} + +func goTypeToZod(goTyp string) string { + goTyp = strings.TrimPrefix(goTyp, "*") + + if strings.HasPrefix(goTyp, "[]") { + return "z.array(" + goTypeToZod(goTyp[2:]) + ")" + } + if strings.HasPrefix(goTyp, "map[string]") { + return "z.record(z.string(), " + goTypeToZod(goTyp[11:]) + ")" + } + + switch goTyp { + case "string": + return "z.string()" + case "int64": + return "z.number().int()" + case "float64": + return "z.number()" + case "bool": + return "z.boolean()" + case "any": + return "z.unknown()" + default: + return goTyp + "Schema" + } +} diff --git a/tools/jsontypes/zod_test.go b/tools/jsontypes/zod_test.go new file mode 100644 index 0000000..271670f --- /dev/null +++ b/tools/jsontypes/zod_test.go @@ -0,0 +1,98 @@ +package jsontypes + +import ( + "strings" + "testing" +) + +func TestGenerateZodFlat(t *testing.T) { + paths := []string{ + "{Root}", + ".name{string}", + ".age{int}", + ".active{bool}", + } + out := GenerateZod(paths) + assertContainsAll(t, out, + `import { z } from "zod";`, + "export const RootSchema = z.object({", + "name: z.string(),", + "age: z.number().int(),", + "active: z.boolean(),", + "export type Root = z.infer;", + ) +} + +func TestGenerateZodOptional(t *testing.T) { + paths := []string{ + "{Root}", + ".name{string}", + ".bio{string?}", + } + out := GenerateZod(paths) + assertContainsAll(t, out, + "name: z.string(),", + "bio: z.string().nullable().optional(),", + ) +} + +func TestGenerateZodNested(t *testing.T) { + paths := []string{ + "{Root}", + ".addr{Address}", + ".addr.city{string}", + } + out := GenerateZod(paths) + assertContainsAll(t, out, + "export const AddressSchema = z.object({", + "addr: AddressSchema,", + ) + // AddressSchema should appear before RootSchema + addrIdx := strings.Index(out, "AddressSchema = z.object") + rootIdx := strings.Index(out, "RootSchema = z.object") + if addrIdx < 0 || rootIdx < 0 || addrIdx > rootIdx { + t.Errorf("AddressSchema should be defined before RootSchema\n%s", out) + } +} + +func TestGenerateZodArray(t *testing.T) { + paths := []string{ + "{Root}", + ".items[]{Item}", + ".items[].id{string}", + } + out := GenerateZod(paths) + assertContainsAll(t, out, + "items: z.array(ItemSchema),", + ) +} + +func TestGenerateZodMap(t *testing.T) { + paths := []string{ + "{Root}", + ".scores[string]{Score}", + ".scores[string].value{int}", + } + out := GenerateZod(paths) + assertContainsAll(t, out, + "scores: z.record(z.string(), ScoreSchema),", + ) +} + +func TestGenerateZodEmpty(t *testing.T) { + out := GenerateZod(nil) + if out != "" { + t.Errorf("expected empty output, got %q", out) + } +} + +func TestGenerateZodEndToEnd(t *testing.T) { + jsonStr := `{"name":"Alice","age":30,"tags":["a"],"meta":{"key":"val"}}` + paths := analyzeAndFormat(t, jsonStr) + out := GenerateZod(paths) + assertContainsAll(t, out, + "z.object({", + "z.string()", + "z.number()", + ) +}