diff --git a/cmd/lj.go b/cmd/lj.go index f87b538..aaadaab 100644 --- a/cmd/lj.go +++ b/cmd/lj.go @@ -1,6 +1,7 @@ package main import ( + "encoding/json" "fmt" "log" "os" @@ -27,6 +28,7 @@ func main() { file = os.Stdin } query := os.Args[len(os.Args)-1] - json := Must(libjson.NewReader(file)) - fmt.Printf("%+#v\n", Must(libjson.Get[any](&json, query))) + deserialized := Must(libjson.NewReader(file)) + queryResult := Must(libjson.Get[any](&deserialized, query)) + fmt.Println(string(Must(json.MarshalIndent(queryResult, "", "\t")))) } diff --git a/deserializer.go b/deserializer.go new file mode 100644 index 0000000..ad2ebcc --- /dev/null +++ b/deserializer.go @@ -0,0 +1,246 @@ +package libjson + +import ( + "bytes" + "errors" + "fmt" + "unsafe" +) + +// state encodes all possible states the deserializer can be in. A transation +// is always defined as +// +// Next(state, character) -> state +type state uint8 + +// TODO: FUCK DFA's something is wrong and i have no clue what + +const ( + Start state = iota + InObject + EndObject + InArray + EndArray + String + Number + Atom +) + +var transitions = map[state]map[byte]state{ + Start: { + '{': InObject, + '[': InArray, + '"': String, + 'n': Atom, + 't': Atom, + 'f': Atom, + }, + // no ':', since we always expect a colon between key and value + // all rhs in InObject should be handled by Start transitions + InObject: { + // results in us popping a container and adding to its parent or + // returning it if it is the only value + '}': EndObject, + }, + InArray: { + // results in us popping a container and adding to its parent or + // returning it if it is the only value + ']': EndArray, + }, +} + +func init() { + for b := byte('0'); b <= '9'; b++ { + transitions[Start][b] = Number + } + transitions[Start]['-'] = Number +} + +type container struct { + isObj bool // true if InObject, for InArray false + target any // depending on container.t: map[string]any or []any + key string // current key if InObject +} + +func insertValue(containerStack *[]container, v any) (any, bool) { + if len(*containerStack) == 0 { + return v, true + } + + parent := &(*containerStack)[len(*containerStack)-1] + if parent.isObj { + parent.target.(map[string]any)[parent.key] = v + parent.key = "" + } else { + parent.target = append(parent.target.([]any), v) + } + + return nil, false +} + +// converts any valid JSON to go values, result may be: +// +// T = map[string]T, []T, string, float64, true, false, nil +// +// deserialize merges the concepts of lexical analysis with semantic and +// syntactical analysis, while producing direct go values out of the +// aforementioned. This results in a large performance improvements over the +// traditional approach, since the deserialisation process no longer requires +// multiple passes and intermediate values. +// +// At a high level this works by representing JSON as a table of states and +// possible input characters determining the follow state (table driven DFA). +// This makes deserialisation of large JSON inputs very fast. +func deserialize(src []byte) (any, error) { + var pos int + state := Start + containerStack := make([]container, 0, 16) + + for pos < len(src) { + for pos < len(src) && (src[pos] == ' ' || src[pos] == '\n' || src[pos] == '\t' || src[pos] == '\r') { + pos++ + } + if pos >= len(src) { + break + } + + b := src[pos] + + if b == ',' { + pos++ + continue + } + + next := transitions[state][b] + + fmt.Print("(", state.String(), ",", string(b), ")", "->", next.String(), "\n") + + switch next { + case InArray: + containerStack = append(containerStack, container{ + isObj: false, + target: []any{}, + }) + next = InArray + case InObject: + containerStack = append(containerStack, container{ + isObj: true, + target: map[string]any{}, + }) + next = InObject + case EndArray, EndObject: + last := containerStack[len(containerStack)-1] + containerStack = containerStack[:len(containerStack)-1] + + if len(containerStack) == 0 { + return last.target, nil + } + parent := &containerStack[len(containerStack)-1] + if parent.isObj { + parent.target.(map[string]any)[parent.key] = last.target + parent.key = "" + } else { + parent.target = append(parent.target.([]any), last.target) + } + next = Start + case String: // TODO: add support for escaping strings + pos++ // skip " + offset := bytes.IndexByte(src[pos:], '"') + if offset < 0 { + return nil, errors.New("Unterminated string") + } + end := pos + offset + slice := src[pos:end] + s := unsafe.String(unsafe.SliceData(slice), len(slice)) + pos = end + + if len(containerStack) == 0 { + return s, nil + } + + parent := &containerStack[len(containerStack)-1] + if parent.isObj { + if parent.key == "" { + parent.key = s + + // since this is after the string at whose end we are + if pos >= len(src) || src[pos+1] != ':' { + return nil, fmt.Errorf("Expected ':' after object key") + } + pos++ + } else { + parent.target.(map[string]any)[parent.key] = s + parent.key = "" + } + } else { + parent.target = append(parent.target.([]any), s) + } + + next = Start + case Number: + start := pos + for pos < len(src) && numChar[src[pos]] { + pos++ + } + + if f, err := parseFloat(src[start:pos]); err != nil { + return nil, err + } else { + if out, done := insertValue(&containerStack, f); done { + return out, nil + } + } + state = Start + continue + + case Atom: + var literal any + switch src[pos] { + case 't': + if pos+3 > len(src) || + src[pos+1] != 'r' || + src[pos+2] != 'u' || + src[pos+3] != 'e' { + return nil, errors.New("invalid true attempt") + } + pos += 4 + literal = true + case 'f': + if pos+4 > len(src) || + src[pos+1] != 'a' || + src[pos+2] != 'l' || + src[pos+3] != 's' || + src[pos+4] != 'e' { + return nil, errors.New("invalid false attempt") + } + pos += 5 + literal = false + case 'n': + if pos+3 > len(src) || + src[pos+1] != 'u' || + src[pos+2] != 'l' || + src[pos+3] != 'l' { + return nil, errors.New("invalid null attempt") + } + pos += 4 + literal = nil + } + + if out, done := insertValue(&containerStack, literal); done { + return out, nil + } + state = Start + continue + default: + return nil, fmt.Errorf("No state transition found for (%s, %q)", state.String(), b) + } + state = next + pos++ + } + + if len(containerStack) != 0 { + return nil, fmt.Errorf("Unexpected end of input, unclosed container: %+v", containerStack) + } + + return nil, nil +} diff --git a/deserializer_test.go b/deserializer_test.go new file mode 100644 index 0000000..033778a --- /dev/null +++ b/deserializer_test.go @@ -0,0 +1,110 @@ +package libjson + +import ( + "reflect" + "testing" +) + +func TestDeserialize(t *testing.T) { + tests := []struct { + name string + input string + expected any + }{ + { + name: "empty object", + input: `{}`, + expected: map[string]any{}, + }, + { + name: "empty array", + input: `[]`, + expected: []any{}, + }, + { + name: "simple string", + input: `"abc"`, + expected: "abc", + }, + { + name: "int", + input: `1234`, + expected: 1234.0, + }, + { + name: "int single char", + input: `1`, + expected: 1.0, + }, + { + name: "double", + input: `3.1415`, + expected: 3.1415, + }, + { + name: "true", + input: `true`, + expected: true, + }, + { + name: "false", + input: `false`, + expected: false, + }, + { + name: "null", + input: `null`, + expected: nil, + }, + { + name: "array of literals", + input: `["str", 3, 1.5, true, false, null]`, + expected: []any{ + "str", + 3.0, + 1.5, + true, + false, + nil, + }, + }, + { + name: "simple object", + input: `{"foo": "bar"}`, + expected: map[string]any{ + "foo": "bar", + }, + }, + { + name: "nested object", + input: `{"a": {"b": [1,2,3]}}`, + expected: map[string]any{ + "a": map[string]any{ + "b": []any{float64(1), float64(2), float64(3)}, + }, + }, + }, + + // { + // name: "mixed array", + // input: `[{"x":1}, "str", 3.14]`, + // expected: []any{ + // map[string]any{"x": float64(1)}, + // "str", + // 3.14, + // }, + // }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := deserialize([]byte(tt.input)) + if err != nil { + t.Fatalf("deserialize() error = %v", err) + } + if !reflect.DeepEqual(got, tt.expected) { + t.Errorf("deserialize() = %v, want %v", got, tt.expected) + } + }) + } +} diff --git a/go.mod b/go.mod index 3d85827..4534a13 100644 --- a/go.mod +++ b/go.mod @@ -1,11 +1,14 @@ module github.com/xnacly/libjson -go 1.23.0 +go 1.25.5 require github.com/stretchr/testify v1.9.0 require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + golang.org/x/mod v0.31.0 // indirect + golang.org/x/sync v0.19.0 // indirect + golang.org/x/tools v0.40.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 60ce688..da57699 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,12 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= +golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= +golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/object.go b/object.go index 4e59837..830680a 100644 --- a/object.go +++ b/object.go @@ -11,6 +11,17 @@ type JSON struct { obj any } +func Compile(path string) (func(*JSON) (any, error), error) { + closure, err := parsePath(path) + if err != nil { + return nil, err + } + + return func(j *JSON) (any, error) { + return closure(j.obj) + }, nil +} + func Get[T any](obj *JSON, path string) (T, error) { val, err := obj.get(path) if err != nil { @@ -38,7 +49,7 @@ func indexByKey(data any, key any) (any, error) { return nil, nil } if k, ok := key.(int); !ok { - return nil, fmt.Errorf("Can not use %T::%v to index into %T::%v", key, key, data, data) + return nil, fmt.Errorf("Can not use %T(%v) to index into %T(%v)", key, key, data, data) } else { return v[k], nil } @@ -47,7 +58,7 @@ func indexByKey(data any, key any) (any, error) { return nil, nil } if k, ok := key.(string); !ok { - return nil, fmt.Errorf("Can not use %T::%v to index into %T::%v", key, key, data, data) + return nil, fmt.Errorf("Can not use %T(%v) to index into %T(%v)", key, key, data, data) } else { return v[k], nil } diff --git a/state_string.go b/state_string.go new file mode 100644 index 0000000..1ef7f2d --- /dev/null +++ b/state_string.go @@ -0,0 +1,31 @@ +// Code generated by "stringer -type state"; DO NOT EDIT. + +package libjson + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[Start-0] + _ = x[InObject-1] + _ = x[EndObject-2] + _ = x[InArray-3] + _ = x[EndArray-4] + _ = x[String-5] + _ = x[Number-6] + _ = x[Atom-7] +} + +const _state_name = "StartInObjectEndObjectInArrayEndArrayStringNumberAtom" + +var _state_index = [...]uint8{0, 5, 13, 22, 29, 37, 43, 49, 53} + +func (i state) String() string { + idx := int(i) - 0 + if i < 0 || idx >= len(_state_index)-1 { + return "state(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _state_name[_state_index[idx]:_state_index[idx+1]] +}