diff options
author | NeonXP <i@neonxp.dev> | 2022-12-27 02:37:02 +0300 |
---|---|---|
committer | NeonXP <i@neonxp.dev> | 2022-12-27 02:40:03 +0300 |
commit | 76a7f461ebbde70ea0e3d4f9b79c08139acaee7c (patch) | |
tree | 5e6dcb05f00be5109b3465ef16a6e9169a27497e /parser | |
parent | 6f1d1df79f161cfc695f74d271d689ba72c44d09 (diff) |
Completely rewritedv0.1.0
Diffstat (limited to 'parser')
-rw-r--r-- | parser/lexer.go | 182 | ||||
-rw-r--r-- | parser/lextype_string.go | 34 | ||||
-rw-r--r-- | parser/parser.go | 126 | ||||
-rw-r--r-- | parser/parser_test.go | 71 | ||||
-rw-r--r-- | parser/scanners.go | 32 | ||||
-rw-r--r-- | parser/statefunc.go | 17 | ||||
-rw-r--r-- | parser/states.go | 110 |
7 files changed, 0 insertions, 572 deletions
diff --git a/parser/lexer.go b/parser/lexer.go deleted file mode 100644 index 5034f6a..0000000 --- a/parser/lexer.go +++ /dev/null @@ -1,182 +0,0 @@ -package parser - -import ( - "fmt" - "strings" - "unicode/utf8" -) - -const eof rune = -1 - -type lexem struct { - Type lexType // Type of Lexem. - Value string // Value of Lexem. - Start int // Start position at input string. - End int // End position at input string. -} - -//go:generate stringer -type=lexType -type lexType int - -const ( - lEOF lexType = iota - lError - lObjectStart - lObjectEnd - lObjectKey - lObjectValue - lArrayStart - lArrayEnd - lString - lNumber - lBoolean - lNull -) - -// lexer holds current scanner state. -type lexer struct { - Input string // Input string. - Start int // Start position of current lexem. - Pos int // Pos at input string. - Output chan lexem // Lexems channel. - width int // Width of last rune. - states stateStack // Stack of states to realize PrevState. -} - -// newLexer returns new scanner for input string. -func newLexer(input string) *lexer { - return &lexer{ - Input: input, - Start: 0, - Pos: 0, - Output: make(chan lexem, 2), - width: 0, - } -} - -// Run lexing. -func (l *lexer) Run(init stateFunc) { - for state := init; state != nil; { - state = state(l) - } - close(l.Output) -} - -// PopState returns previous state function. -func (l *lexer) PopState() stateFunc { - return l.states.Pop() -} - -// PushState pushes state before going deeper states. -func (l *lexer) PushState(s stateFunc) { - l.states.Push(s) -} - -// Emit current lexem to output. -func (l *lexer) Emit(typ lexType) { - l.Output <- lexem{ - Type: typ, - Value: l.Input[l.Start:l.Pos], - Start: l.Start, - End: l.Pos, - } - l.Start = l.Pos -} - -// Errorf produces error lexem and stops scanning. -func (l *lexer) Errorf(format string, args ...interface{}) stateFunc { - l.Output <- lexem{ - Type: lError, - Value: fmt.Sprintf(format, args...), - Start: l.Start, - End: l.Pos, - } - return nil -} - -// Next rune from input. -func (l *lexer) Next() (r rune) { - if int(l.Pos) >= len(l.Input) { - l.width = 0 - return eof - } - r, l.width = utf8.DecodeRuneInString(l.Input[l.Pos:]) - l.Pos += l.width - return r -} - -// Back move position to previos rune. -func (l *lexer) Back() { - l.Pos -= l.width -} - -// Ignore previosly buffered text. -func (l *lexer) Ignore() { - l.Start = l.Pos - l.width = 0 -} - -// Peek rune at current position without moving position. -func (l *lexer) Peek() (r rune) { - r = l.Next() - l.Back() - return r -} - -// Accept any rune from valid string. Returns true if Next rune was in valid string. -func (l *lexer) Accept(valid string) bool { - if strings.ContainsRune(valid, l.Next()) { - return true - } - l.Back() - return false -} - -// AcceptString returns true if given string was at position. -func (l *lexer) AcceptString(s string, caseInsentive bool) bool { - input := l.Input[l.Start:] - if caseInsentive { - input = strings.ToLower(input) - s = strings.ToLower(s) - } - if strings.HasPrefix(input, s) { - l.width = 0 - l.Pos += len(s) - return true - } - return false -} - -// AcceptAnyOf substrings. Retuns true if any of substrings was found. -func (l *lexer) AcceptAnyOf(s []string, caseInsentive bool) bool { - for _, substring := range s { - if l.AcceptString(substring, caseInsentive) { - return true - } - } - return false -} - -// AcceptWhile passing symbols from input while they at `valid` string. -func (l *lexer) AcceptWhile(valid string) bool { - isValid := false - for l.Accept(valid) { - isValid = true - } - return isValid -} - -// AcceptWhileNot passing symbols from input while they NOT in `invalid` string. -func (l *lexer) AcceptWhileNot(invalid string) bool { - isValid := false - for !strings.ContainsRune(invalid, l.Next()) { - isValid = true - } - l.Back() - return isValid -} - -// AtStart returns true if current lexem not empty -func (l *lexer) AtStart() bool { - return l.Pos == l.Start -} diff --git a/parser/lextype_string.go b/parser/lextype_string.go deleted file mode 100644 index f34eb7c..0000000 --- a/parser/lextype_string.go +++ /dev/null @@ -1,34 +0,0 @@ -// Code generated by "stringer -type=lexType"; DO NOT EDIT. - -package parser - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[lEOF-0] - _ = x[lError-1] - _ = x[lObjectStart-2] - _ = x[lObjectEnd-3] - _ = x[lObjectKey-4] - _ = x[lObjectValue-5] - _ = x[lArrayStart-6] - _ = x[lArrayEnd-7] - _ = x[lString-8] - _ = x[lNumber-9] - _ = x[lBoolean-10] - _ = x[lNull-11] -} - -const _lexType_name = "lEOFlErrorlObjectStartlObjectEndlObjectKeylObjectValuelArrayStartlArrayEndlStringlNumberlBooleanlNull" - -var _lexType_index = [...]uint8{0, 4, 10, 22, 32, 42, 54, 65, 74, 81, 88, 96, 101} - -func (i lexType) String() string { - if i < 0 || i >= lexType(len(_lexType_index)-1) { - return "lexType(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _lexType_name[_lexType_index[i]:_lexType_index[i+1]] -} diff --git a/parser/parser.go b/parser/parser.go deleted file mode 100644 index dfcd4b4..0000000 --- a/parser/parser.go +++ /dev/null @@ -1,126 +0,0 @@ -package parser - -import ( - "fmt" - "strconv" - "strings" - - "go.neonxp.dev/json/model" -) - -func Parse(json string) (model.Node, error) { - l := newLexer(json) - go l.Run(initJson) - n, err := parse(l.Output) - if err != nil { - return nil, err - } - return model.NewNode(n), nil -} - -func parse(ch chan lexem) (any, error) { - prefix := <-ch - switch prefix.Type { - case lObjectStart: - return parseObject(ch) - case lArrayStart: - return parseArray(ch) - case lString: - return strings.Trim(prefix.Value, `"`), nil - case lNumber: - num, err := strconv.ParseFloat(prefix.Value, 64) - if err != nil { - return nil, err - } - return num, nil - case lBoolean: - if strings.ToLower(prefix.Value) == "true" { - return true, nil - } - return false, nil - case lNull: - return nil, nil - } - return nil, fmt.Errorf("ivalid token: '%s' type=%s", prefix.Value, prefix.Type.String()) -} - -func parseObject(ch chan lexem) (model.NodeObjectValue, error) { - m := model.NodeObjectValue{} - nextKey := "" - for l := range ch { - switch l.Type { - case lObjectKey: - nextKey = strings.Trim(l.Value, `"`) - case lString: - m.Set(nextKey, strings.Trim(l.Value, `"`)) - case lNumber: - num, err := strconv.ParseFloat(l.Value, 64) - if err != nil { - return nil, err - } - m.Set(nextKey, num) - case lBoolean: - if strings.ToLower(l.Value) == "true" { - m.Set(nextKey, true) - continue - } - m.Set(nextKey, false) - case lNull: - m.Set(nextKey, nil) - case lObjectStart: - obj, err := parseObject(ch) - if err != nil { - return nil, err - } - m.Set(nextKey, obj) - case lArrayStart: - arr, err := parseArray(ch) - if err != nil { - return nil, err - } - m.Set(nextKey, arr) - case lObjectEnd: - return m, nil - } - } - return nil, fmt.Errorf("unexpected end of object") -} - -func parseArray(ch chan lexem) (model.NodeArrayValue, error) { - m := model.NodeArrayValue{} - for l := range ch { - switch l.Type { - case lString: - m = append(m, model.NewNode(strings.Trim(l.Value, `"`))) - case lNumber: - num, err := strconv.ParseFloat(l.Value, 64) - if err != nil { - return nil, err - } - m = append(m, model.NewNode(num)) - case lBoolean: - if strings.ToLower(l.Value) == "true" { - m = append(m, model.NewNode(true)) - continue - } - m = append(m, model.NewNode(false)) - case lNull: - m = append(m, model.NewNode(nil)) - case lObjectStart: - obj, err := parseObject(ch) - if err != nil { - return nil, err - } - m = append(m, model.NewNode(obj)) - case lArrayStart: - arr, err := parseArray(ch) - if err != nil { - return nil, err - } - m = append(m, model.NewNode(arr)) - case lArrayEnd: - return m, nil - } - } - return nil, fmt.Errorf("unexpected end of object") -} diff --git a/parser/parser_test.go b/parser/parser_test.go deleted file mode 100644 index 88a1f8f..0000000 --- a/parser/parser_test.go +++ /dev/null @@ -1,71 +0,0 @@ -package parser - -import ( - "reflect" - "testing" - - "go.neonxp.dev/json/model" -) - -func TestParse(t *testing.T) { - type args struct { - json string - } - tests := []struct { - name string - args args - want model.Node - wantErr bool - }{ - { - name: "complex", - args: args{ - json: `{ - "string key": "string value", - "number key": 1337, - "float key": 123.3, - "object key": { - "ab": "cd" - }, - "array key": [ - 1, - 2, - "three" - ], - "null key":null, - "boolean key":true - }`, - }, - want: model.NewNode( - model.NodeObjectValue{ - "string key": model.NewNode("string value"), - "number key": model.NewNode(1337), - "float key": model.NewNode(123.3), - "object key": model.NewNode(model.NodeObjectValue{ - "ab": model.NewNode("cd"), - }), - "array key": model.NewNode(model.NodeArrayValue{ - model.NewNode(1), - model.NewNode(2), - model.NewNode("three"), - }), - "null key": model.NewNode(nil), - "boolean key": model.NewNode(true), - }, - ), - wantErr: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := Parse(tt.args.json) - if (err != nil) != tt.wantErr { - t.Errorf("Parse() error = %v, wantErr %v", err, tt.wantErr) - return - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("Parse() = %v, want %v", got, tt.want) - } - }) - } -} diff --git a/parser/scanners.go b/parser/scanners.go deleted file mode 100644 index 078f9d3..0000000 --- a/parser/scanners.go +++ /dev/null @@ -1,32 +0,0 @@ -package parser - -func scanNumber(l *lexer) bool { - l.AcceptWhile("0123456789") - if l.AtStart() { - // not found any digit - return false - } - l.Accept(".") - l.AcceptWhile("0123456789") - return !l.AtStart() -} - -func scanQuotedString(l *lexer, quote rune) bool { - start := l.Pos - if l.Next() != quote { - l.Back() - return false - } - for { - ch := l.Next() - switch ch { - case eof: - l.Pos = start // Return position to start - return false // Unclosed quote string? - case '\\': - l.Next() // Skip next char - case quote: - return true // Closing quote - } - } -} diff --git a/parser/statefunc.go b/parser/statefunc.go deleted file mode 100644 index 69d7098..0000000 --- a/parser/statefunc.go +++ /dev/null @@ -1,17 +0,0 @@ -package parser - -type stateFunc func(*lexer) stateFunc - -type stateStack []stateFunc - -func (ss *stateStack) Push(s stateFunc) { - *ss = append(*ss, s) -} - -func (ss *stateStack) Pop() (s stateFunc) { - if len(*ss) == 0 { - return nil - } - *ss, s = (*ss)[:len(*ss)-1], (*ss)[len(*ss)-1] - return s -} diff --git a/parser/states.go b/parser/states.go deleted file mode 100644 index 92c80dc..0000000 --- a/parser/states.go +++ /dev/null @@ -1,110 +0,0 @@ -package parser - -func initJson(l *lexer) stateFunc { - ignoreWhiteSpace(l) - switch { - case l.Accept("{"): - l.Emit(lObjectStart) - return stateInObject - case l.Accept("["): - l.Emit(lArrayStart) - case l.Peek() == eof: - return nil - } - return l.Errorf("Unknown token: %s", string(l.Peek())) -} - -func stateInObject(l *lexer) stateFunc { - // we in object, so we expect field keys and values - ignoreWhiteSpace(l) - if l.Accept("}") { - l.Emit(lObjectEnd) - // If meet close object return to previous state (including initial) - return l.PopState() - } - ignoreWhiteSpace(l) - l.Accept(",") - ignoreWhiteSpace(l) - if !scanQuotedString(l, '"') { - return l.Errorf("Unknown token: %s", string(l.Peek())) - } - l.Emit(lObjectKey) - ignoreWhiteSpace(l) - if !l.Accept(":") { - return l.Errorf("Expected ':'") - } - ignoreWhiteSpace(l) - l.Emit(lObjectValue) - switch { - case scanQuotedString(l, '"'): - l.Emit(lString) - ignoreWhiteSpace(l) - l.Accept(",") - l.Ignore() - ignoreWhiteSpace(l) - return stateInObject - case scanNumber(l): - l.Emit(lNumber) - ignoreWhiteSpace(l) - l.Accept(",") - l.Ignore() - ignoreWhiteSpace(l) - return stateInObject - case l.AcceptAnyOf([]string{"true", "false"}, true): - l.Emit(lBoolean) - ignoreWhiteSpace(l) - l.Accept(",") - l.Ignore() - ignoreWhiteSpace(l) - return stateInObject - case l.AcceptString("null", true): - l.Emit(lNull) - ignoreWhiteSpace(l) - l.Accept(",") - l.Ignore() - ignoreWhiteSpace(l) - return stateInObject - case l.Accept("{"): - l.Emit(lObjectStart) - l.PushState(stateInObject) - return stateInObject - case l.Accept("["): - l.Emit(lArrayStart) - l.PushState(stateInObject) - return stateInArray - } - return l.Errorf("Unknown token: %s", string(l.Peek())) -} - -func stateInArray(l *lexer) stateFunc { - ignoreWhiteSpace(l) - l.Accept(",") - ignoreWhiteSpace(l) - switch { - case scanQuotedString(l, '"'): - l.Emit(lString) - case scanNumber(l): - l.Emit(lNumber) - case l.AcceptAnyOf([]string{"true", "false"}, true): - l.Emit(lBoolean) - case l.AcceptString("null", true): - l.Emit(lNull) - case l.Accept("{"): - l.Emit(lObjectStart) - l.PushState(stateInArray) - return stateInObject - case l.Accept("["): - l.Emit(lArrayStart) - l.PushState(stateInArray) - return stateInArray - case l.Accept("]"): - l.Emit(lArrayEnd) - return l.PopState() - } - return stateInArray -} - -func ignoreWhiteSpace(l *lexer) { - l.AcceptWhile(" \n\t") // ignore whitespaces - l.Ignore() -} |