aboutsummaryrefslogtreecommitdiff
path: root/parser
diff options
context:
space:
mode:
authorNeonXP <i@neonxp.dev>2022-11-16 05:11:19 +0300
committerNeonXP <i@neonxp.dev>2022-11-16 05:11:19 +0300
commita321bfe7b2f6db5078de7b2e5ed5ddcccd65f319 (patch)
treed11c187bceee610a7843463949df128569142680 /parser
initial commit
Diffstat (limited to 'parser')
-rw-r--r--parser/lexer.go181
-rw-r--r--parser/lextype_string.go34
-rw-r--r--parser/parser.go126
-rw-r--r--parser/parser_test.go71
-rw-r--r--parser/scanners.go32
-rw-r--r--parser/statefunc.go17
-rw-r--r--parser/states.go110
7 files changed, 571 insertions, 0 deletions
diff --git a/parser/lexer.go b/parser/lexer.go
new file mode 100644
index 0000000..a772698
--- /dev/null
+++ b/parser/lexer.go
@@ -0,0 +1,181 @@
+package parser
+
+import (
+ "fmt"
+ "strings"
+ "unicode/utf8"
+)
+
+const eof rune = -1
+
+type lexem struct {
+ Type lexType // Type of Lexem.
+ Value string // Value of Lexem.
+ Start int // Start position at input string.
+ End int // End position at input string.
+}
+
+type lexType int
+
+const (
+ lEOF lexType = iota
+ lError
+ lObjectStart
+ lObjectEnd
+ lObjectKey
+ lObjectValue
+ lArrayStart
+ lArrayEnd
+ lString
+ lNumber
+ lBoolean
+ lNull
+)
+
+// lexer holds current scanner state.
+type lexer struct {
+ Input string // Input string.
+ Start int // Start position of current lexem.
+ Pos int // Pos at input string.
+ Output chan lexem // Lexems channel.
+ width int // Width of last rune.
+ states stateStack // Stack of states to realize PrevState.
+}
+
+// newLexer returns new scanner for input string.
+func newLexer(input string) *lexer {
+ return &lexer{
+ Input: input,
+ Start: 0,
+ Pos: 0,
+ Output: make(chan lexem, 2),
+ width: 0,
+ }
+}
+
+// Run lexing.
+func (l *lexer) Run(init stateFunc) {
+ for state := init; state != nil; {
+ state = state(l)
+ }
+ close(l.Output)
+}
+
+// PopState returns previous state function.
+func (l *lexer) PopState() stateFunc {
+ return l.states.Pop()
+}
+
+// PushState pushes state before going deeper states.
+func (l *lexer) PushState(s stateFunc) {
+ l.states.Push(s)
+}
+
+// Emit current lexem to output.
+func (l *lexer) Emit(typ lexType) {
+ l.Output <- lexem{
+ Type: typ,
+ Value: l.Input[l.Start:l.Pos],
+ Start: l.Start,
+ End: l.Pos,
+ }
+ l.Start = l.Pos
+}
+
+// Errorf produces error lexem and stops scanning.
+func (l *lexer) Errorf(format string, args ...interface{}) stateFunc {
+ l.Output <- lexem{
+ Type: lError,
+ Value: fmt.Sprintf(format, args...),
+ Start: l.Start,
+ End: l.Pos,
+ }
+ return nil
+}
+
+// Next rune from input.
+func (l *lexer) Next() (r rune) {
+ if int(l.Pos) >= len(l.Input) {
+ l.width = 0
+ return eof
+ }
+ r, l.width = utf8.DecodeRuneInString(l.Input[l.Pos:])
+ l.Pos += l.width
+ return r
+}
+
+// Back move position to previos rune.
+func (l *lexer) Back() {
+ l.Pos -= l.width
+}
+
+// Ignore previosly buffered text.
+func (l *lexer) Ignore() {
+ l.Start = l.Pos
+ l.width = 0
+}
+
+// Peek rune at current position without moving position.
+func (l *lexer) Peek() (r rune) {
+ r = l.Next()
+ l.Back()
+ return r
+}
+
+// Accept any rune from valid string. Returns true if Next rune was in valid string.
+func (l *lexer) Accept(valid string) bool {
+ if strings.ContainsRune(valid, l.Next()) {
+ return true
+ }
+ l.Back()
+ return false
+}
+
+// AcceptString returns true if given string was at position.
+func (l *lexer) AcceptString(s string, caseInsentive bool) bool {
+ input := l.Input[l.Start:]
+ if caseInsentive {
+ input = strings.ToLower(input)
+ s = strings.ToLower(s)
+ }
+ if strings.HasPrefix(input, s) {
+ l.width = 0
+ l.Pos += len(s)
+ return true
+ }
+ return false
+}
+
+// AcceptAnyOf substrings. Retuns true if any of substrings was found.
+func (l *lexer) AcceptAnyOf(s []string, caseInsentive bool) bool {
+ for _, substring := range s {
+ if l.AcceptString(substring, caseInsentive) {
+ return true
+ }
+ }
+ return false
+}
+
+// AcceptWhile passing symbols from input while they at `valid` string.
+func (l *lexer) AcceptWhile(valid string) bool {
+ isValid := false
+ for l.Accept(valid) {
+ isValid = true
+ }
+ return isValid
+}
+
+// AcceptWhileNot passing symbols from input while they NOT in `invalid` string.
+func (l *lexer) AcceptWhileNot(invalid string) bool {
+ isValid := false
+ for !strings.ContainsRune(invalid, l.Next()) {
+ isValid = true
+ }
+ l.Back()
+ return isValid
+}
+
+// AtStart returns true if current lexem not empty
+func (l *lexer) AtStart() bool {
+ return l.Pos == l.Start
+}
diff --git a/parser/lextype_string.go b/parser/lextype_string.go
new file mode 100644
index 0000000..f34eb7c
--- /dev/null
+++ b/parser/lextype_string.go
@@ -0,0 +1,34 @@
+// Code generated by "stringer -type=lexType"; DO NOT EDIT.
+
+package parser
+
+import "strconv"
+
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[lEOF-0]
+ _ = x[lError-1]
+ _ = x[lObjectStart-2]
+ _ = x[lObjectEnd-3]
+ _ = x[lObjectKey-4]
+ _ = x[lObjectValue-5]
+ _ = x[lArrayStart-6]
+ _ = x[lArrayEnd-7]
+ _ = x[lString-8]
+ _ = x[lNumber-9]
+ _ = x[lBoolean-10]
+ _ = x[lNull-11]
+}
+
+const _lexType_name = "lEOFlErrorlObjectStartlObjectEndlObjectKeylObjectValuelArrayStartlArrayEndlStringlNumberlBooleanlNull"
+
+var _lexType_index = [...]uint8{0, 4, 10, 22, 32, 42, 54, 65, 74, 81, 88, 96, 101}
+
+func (i lexType) String() string {
+ if i < 0 || i >= lexType(len(_lexType_index)-1) {
+ return "lexType(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _lexType_name[_lexType_index[i]:_lexType_index[i+1]]
+}
diff --git a/parser/parser.go b/parser/parser.go
new file mode 100644
index 0000000..222e4c0
--- /dev/null
+++ b/parser/parser.go
@@ -0,0 +1,126 @@
+package parser
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+
+ "go.neonxp.dev/json/model"
+)
+
+func Parse(json string) (*model.Node, error) {
+ l := newLexer(json)
+ go l.Run(initJson)
+ n, err := parse(l.Output)
+ if err != nil {
+ return nil, err
+ }
+ return model.NewNode(n), nil
+}
+
+func parse(ch chan lexem) (any, error) {
+ prefix := <-ch
+ switch prefix.Type {
+ case lObjectStart:
+ return parseObject(ch)
+ case lArrayStart:
+ return parseArray(ch)
+ case lString:
+ return strings.Trim(prefix.Value, `"`), nil
+ case lNumber:
+ num, err := strconv.ParseFloat(prefix.Value, 64)
+ if err != nil {
+ return nil, err
+ }
+ return num, nil
+ case lBoolean:
+ if strings.ToLower(prefix.Value) == "true" {
+ return true, nil
+ }
+ return false, nil
+ case lNull:
+ return nil, nil
+ }
+ return nil, fmt.Errorf("ivalid token: '%s' type=%s", prefix.Value, prefix.Type.String())
+}
+
+func parseObject(ch chan lexem) (model.NodeObjectValue, error) {
+ m := model.NodeObjectValue{}
+ nextKey := ""
+ for l := range ch {
+ switch l.Type {
+ case lObjectKey:
+ nextKey = strings.Trim(l.Value, `"`)
+ case lString:
+ m.Set(nextKey, strings.Trim(l.Value, `"`))
+ case lNumber:
+ num, err := strconv.ParseFloat(l.Value, 64)
+ if err != nil {
+ return nil, err
+ }
+ m.Set(nextKey, num)
+ case lBoolean:
+ if strings.ToLower(l.Value) == "true" {
+ m.Set(nextKey, true)
+ continue
+ }
+ m.Set(nextKey, false)
+ case lNull:
+ m.Set(nextKey, nil)
+ case lObjectStart:
+ obj, err := parseObject(ch)
+ if err != nil {
+ return nil, err
+ }
+ m.Set(nextKey, obj)
+ case lArrayStart:
+ arr, err := parseArray(ch)
+ if err != nil {
+ return nil, err
+ }
+ m.Set(nextKey, arr)
+ case lObjectEnd:
+ return m, nil
+ }
+ }
+ return nil, fmt.Errorf("unexpected end of object")
+}
+
+func parseArray(ch chan lexem) (model.NodeArrayValue, error) {
+ m := model.NodeArrayValue{}
+ for l := range ch {
+ switch l.Type {
+ case lString:
+ m = append(m, model.NewNode(strings.Trim(l.Value, `"`)))
+ case lNumber:
+ num, err := strconv.ParseFloat(l.Value, 64)
+ if err != nil {
+ return nil, err
+ }
+ m = append(m, model.NewNode(num))
+ case lBoolean:
+ if strings.ToLower(l.Value) == "true" {
+ m = append(m, model.NewNode(true))
+ continue
+ }
+ m = append(m, model.NewNode(false))
+ case lNull:
+ m = append(m, model.NewNode(nil))
+ case lObjectStart:
+ obj, err := parseObject(ch)
+ if err != nil {
+ return nil, err
+ }
+ m = append(m, model.NewNode(obj))
+ case lArrayStart:
+ arr, err := parseArray(ch)
+ if err != nil {
+ return nil, err
+ }
+ m = append(m, model.NewNode(arr))
+ case lArrayEnd:
+ return m, nil
+ }
+ }
+ return nil, fmt.Errorf("unexpected end of object")
+}
diff --git a/parser/parser_test.go b/parser/parser_test.go
new file mode 100644
index 0000000..ea5aa0e
--- /dev/null
+++ b/parser/parser_test.go
@@ -0,0 +1,71 @@
+package parser
+
+import (
+ "reflect"
+ "testing"
+
+ "go.neonxp.dev/json/model"
+)
+
+func TestParse(t *testing.T) {
+ type args struct {
+ json string
+ }
+ tests := []struct {
+ name string
+ args args
+ want *model.Node
+ wantErr bool
+ }{
+ {
+ name: "complex",
+ args: args{
+ json: `{
+ "string key": "string value",
+ "number key": 1337,
+ "float key": 123.3,
+ "object key": {
+ "ab": "cd"
+ },
+ "array key": [
+ 1,
+ 2,
+ "three"
+ ],
+ "null key":null,
+ "boolean key":true
+ }`,
+ },
+ want: model.NewNode(
+ model.NodeObjectValue{
+ "string key": model.NewNode("string value"),
+ "number key": model.NewNode(1337),
+ "float key": model.NewNode(123.3),
+ "object key": model.NewNode(model.NodeObjectValue{
+ "ab": model.NewNode("cd"),
+ }),
+ "array key": model.NewNode(model.NodeArrayValue{
+ model.NewNode(1),
+ model.NewNode(2),
+ model.NewNode("three"),
+ }),
+ "null key": model.NewNode(nil),
+ "boolean key": model.NewNode(true),
+ },
+ ),
+ wantErr: false,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got, err := Parse(tt.args.json)
+ if (err != nil) != tt.wantErr {
+ t.Errorf("Parse() error = %v, wantErr %v", err, tt.wantErr)
+ return
+ }
+ if !reflect.DeepEqual(got, tt.want) {
+ t.Errorf("Parse() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
diff --git a/parser/scanners.go b/parser/scanners.go
new file mode 100644
index 0000000..078f9d3
--- /dev/null
+++ b/parser/scanners.go
@@ -0,0 +1,32 @@
+package parser
+
+func scanNumber(l *lexer) bool {
+ l.AcceptWhile("0123456789")
+ if l.AtStart() {
+ // not found any digit
+ return false
+ }
+ l.Accept(".")
+ l.AcceptWhile("0123456789")
+ return !l.AtStart()
+}
+
+func scanQuotedString(l *lexer, quote rune) bool {
+ start := l.Pos
+ if l.Next() != quote {
+ l.Back()
+ return false
+ }
+ for {
+ ch := l.Next()
+ switch ch {
+ case eof:
+ l.Pos = start // Return position to start
+ return false // Unclosed quote string?
+ case '\\':
+ l.Next() // Skip next char
+ case quote:
+ return true // Closing quote
+ }
+ }
+}
diff --git a/parser/statefunc.go b/parser/statefunc.go
new file mode 100644
index 0000000..69d7098
--- /dev/null
+++ b/parser/statefunc.go
@@ -0,0 +1,17 @@
+package parser
+
+type stateFunc func(*lexer) stateFunc
+
+type stateStack []stateFunc
+
+func (ss *stateStack) Push(s stateFunc) {
+ *ss = append(*ss, s)
+}
+
+func (ss *stateStack) Pop() (s stateFunc) {
+ if len(*ss) == 0 {
+ return nil
+ }
+ *ss, s = (*ss)[:len(*ss)-1], (*ss)[len(*ss)-1]
+ return s
+}
diff --git a/parser/states.go b/parser/states.go
new file mode 100644
index 0000000..92c80dc
--- /dev/null
+++ b/parser/states.go
@@ -0,0 +1,110 @@
+package parser
+
+func initJson(l *lexer) stateFunc {
+ ignoreWhiteSpace(l)
+ switch {
+ case l.Accept("{"):
+ l.Emit(lObjectStart)
+ return stateInObject
+ case l.Accept("["):
+ l.Emit(lArrayStart)
+ case l.Peek() == eof:
+ return nil
+ }
+ return l.Errorf("Unknown token: %s", string(l.Peek()))
+}
+
+func stateInObject(l *lexer) stateFunc {
+ // we in object, so we expect field keys and values
+ ignoreWhiteSpace(l)
+ if l.Accept("}") {
+ l.Emit(lObjectEnd)
+ // If meet close object return to previous state (including initial)
+ return l.PopState()
+ }
+ ignoreWhiteSpace(l)
+ l.Accept(",")
+ ignoreWhiteSpace(l)
+ if !scanQuotedString(l, '"') {
+ return l.Errorf("Unknown token: %s", string(l.Peek()))
+ }
+ l.Emit(lObjectKey)
+ ignoreWhiteSpace(l)
+ if !l.Accept(":") {
+ return l.Errorf("Expected ':'")
+ }
+ ignoreWhiteSpace(l)
+ l.Emit(lObjectValue)
+ switch {
+ case scanQuotedString(l, '"'):
+ l.Emit(lString)
+ ignoreWhiteSpace(l)
+ l.Accept(",")
+ l.Ignore()
+ ignoreWhiteSpace(l)
+ return stateInObject
+ case scanNumber(l):
+ l.Emit(lNumber)
+ ignoreWhiteSpace(l)
+ l.Accept(",")
+ l.Ignore()
+ ignoreWhiteSpace(l)
+ return stateInObject
+ case l.AcceptAnyOf([]string{"true", "false"}, true):
+ l.Emit(lBoolean)
+ ignoreWhiteSpace(l)
+ l.Accept(",")
+ l.Ignore()
+ ignoreWhiteSpace(l)
+ return stateInObject
+ case l.AcceptString("null", true):
+ l.Emit(lNull)
+ ignoreWhiteSpace(l)
+ l.Accept(",")
+ l.Ignore()
+ ignoreWhiteSpace(l)
+ return stateInObject
+ case l.Accept("{"):
+ l.Emit(lObjectStart)
+ l.PushState(stateInObject)
+ return stateInObject
+ case l.Accept("["):
+ l.Emit(lArrayStart)
+ l.PushState(stateInObject)
+ return stateInArray
+ }
+ return l.Errorf("Unknown token: %s", string(l.Peek()))
+}
+
+func stateInArray(l *lexer) stateFunc {
+ ignoreWhiteSpace(l)
+ l.Accept(",")
+ ignoreWhiteSpace(l)
+ switch {
+ case scanQuotedString(l, '"'):
+ l.Emit(lString)
+ case scanNumber(l):
+ l.Emit(lNumber)
+ case l.AcceptAnyOf([]string{"true", "false"}, true):
+ l.Emit(lBoolean)
+ case l.AcceptString("null", true):
+ l.Emit(lNull)
+ case l.Accept("{"):
+ l.Emit(lObjectStart)
+ l.PushState(stateInArray)
+ return stateInObject
+ case l.Accept("["):
+ l.Emit(lArrayStart)
+ l.PushState(stateInArray)
+ return stateInArray
+ case l.Accept("]"):
+ l.Emit(lArrayEnd)
+ return l.PopState()
+ }
+ return stateInArray
+}
+
+func ignoreWhiteSpace(l *lexer) {
+ l.AcceptWhile(" \n\t") // ignore whitespaces
+ l.Ignore()
+}