diff options
author | NeonXP <i@neonxp.dev> | 2022-11-16 05:11:19 +0300 |
---|---|---|
committer | NeonXP <i@neonxp.dev> | 2022-11-16 05:11:19 +0300 |
commit | a321bfe7b2f6db5078de7b2e5ed5ddcccd65f319 (patch) | |
tree | d11c187bceee610a7843463949df128569142680 /parser |
initial commit
Diffstat (limited to 'parser')
-rw-r--r-- | parser/lexer.go | 181 | ||||
-rw-r--r-- | parser/lextype_string.go | 34 | ||||
-rw-r--r-- | parser/parser.go | 126 | ||||
-rw-r--r-- | parser/parser_test.go | 71 | ||||
-rw-r--r-- | parser/scanners.go | 32 | ||||
-rw-r--r-- | parser/statefunc.go | 17 | ||||
-rw-r--r-- | parser/states.go | 110 |
7 files changed, 571 insertions, 0 deletions
diff --git a/parser/lexer.go b/parser/lexer.go new file mode 100644 index 0000000..a772698 --- /dev/null +++ b/parser/lexer.go @@ -0,0 +1,181 @@ +package parser + +import ( + "fmt" + "strings" + "unicode/utf8" +) + +const eof rune = -1 + +type lexem struct { + Type lexType // Type of Lexem. + Value string // Value of Lexem. + Start int // Start position at input string. + End int // End position at input string. +} + +type lexType int + +const ( + lEOF lexType = iota + lError + lObjectStart + lObjectEnd + lObjectKey + lObjectValue + lArrayStart + lArrayEnd + lString + lNumber + lBoolean + lNull +) + +// lexer holds current scanner state. +type lexer struct { + Input string // Input string. + Start int // Start position of current lexem. + Pos int // Pos at input string. + Output chan lexem // Lexems channel. + width int // Width of last rune. + states stateStack // Stack of states to realize PrevState. +} + +// newLexer returns new scanner for input string. +func newLexer(input string) *lexer { + return &lexer{ + Input: input, + Start: 0, + Pos: 0, + Output: make(chan lexem, 2), + width: 0, + } +} + +// Run lexing. +func (l *lexer) Run(init stateFunc) { + for state := init; state != nil; { + state = state(l) + } + close(l.Output) +} + +// PopState returns previous state function. +func (l *lexer) PopState() stateFunc { + return l.states.Pop() +} + +// PushState pushes state before going deeper states. +func (l *lexer) PushState(s stateFunc) { + l.states.Push(s) +} + +// Emit current lexem to output. +func (l *lexer) Emit(typ lexType) { + l.Output <- lexem{ + Type: typ, + Value: l.Input[l.Start:l.Pos], + Start: l.Start, + End: l.Pos, + } + l.Start = l.Pos +} + +// Errorf produces error lexem and stops scanning. +func (l *lexer) Errorf(format string, args ...interface{}) stateFunc { + l.Output <- lexem{ + Type: lError, + Value: fmt.Sprintf(format, args...), + Start: l.Start, + End: l.Pos, + } + return nil +} + +// Next rune from input. +func (l *lexer) Next() (r rune) { + if int(l.Pos) >= len(l.Input) { + l.width = 0 + return eof + } + r, l.width = utf8.DecodeRuneInString(l.Input[l.Pos:]) + l.Pos += l.width + return r +} + +// Back move position to previos rune. +func (l *lexer) Back() { + l.Pos -= l.width +} + +// Ignore previosly buffered text. +func (l *lexer) Ignore() { + l.Start = l.Pos + l.width = 0 +} + +// Peek rune at current position without moving position. +func (l *lexer) Peek() (r rune) { + r = l.Next() + l.Back() + return r +} + +// Accept any rune from valid string. Returns true if Next rune was in valid string. +func (l *lexer) Accept(valid string) bool { + if strings.ContainsRune(valid, l.Next()) { + return true + } + l.Back() + return false +} + +// AcceptString returns true if given string was at position. +func (l *lexer) AcceptString(s string, caseInsentive bool) bool { + input := l.Input[l.Start:] + if caseInsentive { + input = strings.ToLower(input) + s = strings.ToLower(s) + } + if strings.HasPrefix(input, s) { + l.width = 0 + l.Pos += len(s) + return true + } + return false +} + +// AcceptAnyOf substrings. Retuns true if any of substrings was found. +func (l *lexer) AcceptAnyOf(s []string, caseInsentive bool) bool { + for _, substring := range s { + if l.AcceptString(substring, caseInsentive) { + return true + } + } + return false +} + +// AcceptWhile passing symbols from input while they at `valid` string. +func (l *lexer) AcceptWhile(valid string) bool { + isValid := false + for l.Accept(valid) { + isValid = true + } + return isValid +} + +// AcceptWhileNot passing symbols from input while they NOT in `invalid` string. +func (l *lexer) AcceptWhileNot(invalid string) bool { + isValid := false + for !strings.ContainsRune(invalid, l.Next()) { + isValid = true + } + l.Back() + return isValid +} + +// AtStart returns true if current lexem not empty +func (l *lexer) AtStart() bool { + return l.Pos == l.Start +} diff --git a/parser/lextype_string.go b/parser/lextype_string.go new file mode 100644 index 0000000..f34eb7c --- /dev/null +++ b/parser/lextype_string.go @@ -0,0 +1,34 @@ +// Code generated by "stringer -type=lexType"; DO NOT EDIT. + +package parser + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[lEOF-0] + _ = x[lError-1] + _ = x[lObjectStart-2] + _ = x[lObjectEnd-3] + _ = x[lObjectKey-4] + _ = x[lObjectValue-5] + _ = x[lArrayStart-6] + _ = x[lArrayEnd-7] + _ = x[lString-8] + _ = x[lNumber-9] + _ = x[lBoolean-10] + _ = x[lNull-11] +} + +const _lexType_name = "lEOFlErrorlObjectStartlObjectEndlObjectKeylObjectValuelArrayStartlArrayEndlStringlNumberlBooleanlNull" + +var _lexType_index = [...]uint8{0, 4, 10, 22, 32, 42, 54, 65, 74, 81, 88, 96, 101} + +func (i lexType) String() string { + if i < 0 || i >= lexType(len(_lexType_index)-1) { + return "lexType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _lexType_name[_lexType_index[i]:_lexType_index[i+1]] +} diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..222e4c0 --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,126 @@ +package parser + +import ( + "fmt" + "strconv" + "strings" + + "go.neonxp.dev/json/model" +) + +func Parse(json string) (*model.Node, error) { + l := newLexer(json) + go l.Run(initJson) + n, err := parse(l.Output) + if err != nil { + return nil, err + } + return model.NewNode(n), nil +} + +func parse(ch chan lexem) (any, error) { + prefix := <-ch + switch prefix.Type { + case lObjectStart: + return parseObject(ch) + case lArrayStart: + return parseArray(ch) + case lString: + return strings.Trim(prefix.Value, `"`), nil + case lNumber: + num, err := strconv.ParseFloat(prefix.Value, 64) + if err != nil { + return nil, err + } + return num, nil + case lBoolean: + if strings.ToLower(prefix.Value) == "true" { + return true, nil + } + return false, nil + case lNull: + return nil, nil + } + return nil, fmt.Errorf("ivalid token: '%s' type=%s", prefix.Value, prefix.Type.String()) +} + +func parseObject(ch chan lexem) (model.NodeObjectValue, error) { + m := model.NodeObjectValue{} + nextKey := "" + for l := range ch { + switch l.Type { + case lObjectKey: + nextKey = strings.Trim(l.Value, `"`) + case lString: + m.Set(nextKey, strings.Trim(l.Value, `"`)) + case lNumber: + num, err := strconv.ParseFloat(l.Value, 64) + if err != nil { + return nil, err + } + m.Set(nextKey, num) + case lBoolean: + if strings.ToLower(l.Value) == "true" { + m.Set(nextKey, true) + continue + } + m.Set(nextKey, false) + case lNull: + m.Set(nextKey, nil) + case lObjectStart: + obj, err := parseObject(ch) + if err != nil { + return nil, err + } + m.Set(nextKey, obj) + case lArrayStart: + arr, err := parseArray(ch) + if err != nil { + return nil, err + } + m.Set(nextKey, arr) + case lObjectEnd: + return m, nil + } + } + return nil, fmt.Errorf("unexpected end of object") +} + +func parseArray(ch chan lexem) (model.NodeArrayValue, error) { + m := model.NodeArrayValue{} + for l := range ch { + switch l.Type { + case lString: + m = append(m, model.NewNode(strings.Trim(l.Value, `"`))) + case lNumber: + num, err := strconv.ParseFloat(l.Value, 64) + if err != nil { + return nil, err + } + m = append(m, model.NewNode(num)) + case lBoolean: + if strings.ToLower(l.Value) == "true" { + m = append(m, model.NewNode(true)) + continue + } + m = append(m, model.NewNode(false)) + case lNull: + m = append(m, model.NewNode(nil)) + case lObjectStart: + obj, err := parseObject(ch) + if err != nil { + return nil, err + } + m = append(m, model.NewNode(obj)) + case lArrayStart: + arr, err := parseArray(ch) + if err != nil { + return nil, err + } + m = append(m, model.NewNode(arr)) + case lArrayEnd: + return m, nil + } + } + return nil, fmt.Errorf("unexpected end of object") +} diff --git a/parser/parser_test.go b/parser/parser_test.go new file mode 100644 index 0000000..ea5aa0e --- /dev/null +++ b/parser/parser_test.go @@ -0,0 +1,71 @@ +package parser + +import ( + "reflect" + "testing" + + "go.neonxp.dev/json/model" +) + +func TestParse(t *testing.T) { + type args struct { + json string + } + tests := []struct { + name string + args args + want *model.Node + wantErr bool + }{ + { + name: "complex", + args: args{ + json: `{ + "string key": "string value", + "number key": 1337, + "float key": 123.3, + "object key": { + "ab": "cd" + }, + "array key": [ + 1, + 2, + "three" + ], + "null key":null, + "boolean key":true + }`, + }, + want: model.NewNode( + model.NodeObjectValue{ + "string key": model.NewNode("string value"), + "number key": model.NewNode(1337), + "float key": model.NewNode(123.3), + "object key": model.NewNode(model.NodeObjectValue{ + "ab": model.NewNode("cd"), + }), + "array key": model.NewNode(model.NodeArrayValue{ + model.NewNode(1), + model.NewNode(2), + model.NewNode("three"), + }), + "null key": model.NewNode(nil), + "boolean key": model.NewNode(true), + }, + ), + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := Parse(tt.args.json) + if (err != nil) != tt.wantErr { + t.Errorf("Parse() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("Parse() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/parser/scanners.go b/parser/scanners.go new file mode 100644 index 0000000..078f9d3 --- /dev/null +++ b/parser/scanners.go @@ -0,0 +1,32 @@ +package parser + +func scanNumber(l *lexer) bool { + l.AcceptWhile("0123456789") + if l.AtStart() { + // not found any digit + return false + } + l.Accept(".") + l.AcceptWhile("0123456789") + return !l.AtStart() +} + +func scanQuotedString(l *lexer, quote rune) bool { + start := l.Pos + if l.Next() != quote { + l.Back() + return false + } + for { + ch := l.Next() + switch ch { + case eof: + l.Pos = start // Return position to start + return false // Unclosed quote string? + case '\\': + l.Next() // Skip next char + case quote: + return true // Closing quote + } + } +} diff --git a/parser/statefunc.go b/parser/statefunc.go new file mode 100644 index 0000000..69d7098 --- /dev/null +++ b/parser/statefunc.go @@ -0,0 +1,17 @@ +package parser + +type stateFunc func(*lexer) stateFunc + +type stateStack []stateFunc + +func (ss *stateStack) Push(s stateFunc) { + *ss = append(*ss, s) +} + +func (ss *stateStack) Pop() (s stateFunc) { + if len(*ss) == 0 { + return nil + } + *ss, s = (*ss)[:len(*ss)-1], (*ss)[len(*ss)-1] + return s +} diff --git a/parser/states.go b/parser/states.go new file mode 100644 index 0000000..92c80dc --- /dev/null +++ b/parser/states.go @@ -0,0 +1,110 @@ +package parser + +func initJson(l *lexer) stateFunc { + ignoreWhiteSpace(l) + switch { + case l.Accept("{"): + l.Emit(lObjectStart) + return stateInObject + case l.Accept("["): + l.Emit(lArrayStart) + case l.Peek() == eof: + return nil + } + return l.Errorf("Unknown token: %s", string(l.Peek())) +} + +func stateInObject(l *lexer) stateFunc { + // we in object, so we expect field keys and values + ignoreWhiteSpace(l) + if l.Accept("}") { + l.Emit(lObjectEnd) + // If meet close object return to previous state (including initial) + return l.PopState() + } + ignoreWhiteSpace(l) + l.Accept(",") + ignoreWhiteSpace(l) + if !scanQuotedString(l, '"') { + return l.Errorf("Unknown token: %s", string(l.Peek())) + } + l.Emit(lObjectKey) + ignoreWhiteSpace(l) + if !l.Accept(":") { + return l.Errorf("Expected ':'") + } + ignoreWhiteSpace(l) + l.Emit(lObjectValue) + switch { + case scanQuotedString(l, '"'): + l.Emit(lString) + ignoreWhiteSpace(l) + l.Accept(",") + l.Ignore() + ignoreWhiteSpace(l) + return stateInObject + case scanNumber(l): + l.Emit(lNumber) + ignoreWhiteSpace(l) + l.Accept(",") + l.Ignore() + ignoreWhiteSpace(l) + return stateInObject + case l.AcceptAnyOf([]string{"true", "false"}, true): + l.Emit(lBoolean) + ignoreWhiteSpace(l) + l.Accept(",") + l.Ignore() + ignoreWhiteSpace(l) + return stateInObject + case l.AcceptString("null", true): + l.Emit(lNull) + ignoreWhiteSpace(l) + l.Accept(",") + l.Ignore() + ignoreWhiteSpace(l) + return stateInObject + case l.Accept("{"): + l.Emit(lObjectStart) + l.PushState(stateInObject) + return stateInObject + case l.Accept("["): + l.Emit(lArrayStart) + l.PushState(stateInObject) + return stateInArray + } + return l.Errorf("Unknown token: %s", string(l.Peek())) +} + +func stateInArray(l *lexer) stateFunc { + ignoreWhiteSpace(l) + l.Accept(",") + ignoreWhiteSpace(l) + switch { + case scanQuotedString(l, '"'): + l.Emit(lString) + case scanNumber(l): + l.Emit(lNumber) + case l.AcceptAnyOf([]string{"true", "false"}, true): + l.Emit(lBoolean) + case l.AcceptString("null", true): + l.Emit(lNull) + case l.Accept("{"): + l.Emit(lObjectStart) + l.PushState(stateInArray) + return stateInObject + case l.Accept("["): + l.Emit(lArrayStart) + l.PushState(stateInArray) + return stateInArray + case l.Accept("]"): + l.Emit(lArrayEnd) + return l.PopState() + } + return stateInArray +} + +func ignoreWhiteSpace(l *lexer) { + l.AcceptWhile(" \n\t") // ignore whitespaces + l.Ignore() +} |