initial commit

author: NeonXP <i@neonxp.dev> 2022-11-16 05:11:19 +0300
committer: NeonXP <i@neonxp.dev> 2022-11-16 05:11:19 +0300
commit: a321bfe7b2f6db5078de7b2e5ed5ddcccd65f319 (patch)
tree: d11c187bceee610a7843463949df128569142680 /parser
7 files changed, 571 insertions, 0 deletions
diff --git a/parser/lexer.go b/parser/lexer.go
new file mode 100644
index 0000000..a772698
--- /dev/null
+++ b/parser/lexer.go
@@ -0,0 +1,181 @@
+package parser
+
+import (
+	"fmt"
+	"strings"
+	"unicode/utf8"
+)
+
+const eof rune = -1
+
+type lexem struct {
+	Type  lexType // Type of Lexem.
+	Value string  // Value of Lexem.
+	Start int     // Start position at input string.
+	End   int     // End position at input string.
+}
+
+type lexType int
+
+const (
+	lEOF lexType = iota
+	lError
+	lObjectStart
+	lObjectEnd
+	lObjectKey
+	lObjectValue
+	lArrayStart
+	lArrayEnd
+	lString
+	lNumber
+	lBoolean
+	lNull
+)
+
+// lexer holds current scanner state.
+type lexer struct {
+	Input  string     // Input string.
+	Start  int        // Start position of current lexem.
+	Pos    int        // Pos at input string.
+	Output chan lexem // Lexems channel.
+	width  int        // Width of last rune.
+	states stateStack // Stack of states to realize PrevState.
+}
+
+// newLexer returns new scanner for input string.
+func newLexer(input string) *lexer {
+	return &lexer{
+		Input:  input,
+		Start:  0,
+		Pos:    0,
+		Output: make(chan lexem, 2),
+		width:  0,
+	}
+}
+
+// Run lexing.
+func (l *lexer) Run(init stateFunc) {
+	for state := init; state != nil; {
+		state = state(l)
+	}
+	close(l.Output)
+}
+
+// PopState returns previous state function.
+func (l *lexer) PopState() stateFunc {
+	return l.states.Pop()
+}
+
+// PushState pushes state before going deeper states.
+func (l *lexer) PushState(s stateFunc) {
+	l.states.Push(s)
+}
+
+// Emit current lexem to output.
+func (l *lexer) Emit(typ lexType) {
+	l.Output <- lexem{
+		Type:  typ,
+		Value: l.Input[l.Start:l.Pos],
+		Start: l.Start,
+		End:   l.Pos,
+	}
+	l.Start = l.Pos
+}
+
+// Errorf produces error lexem and stops scanning.
+func (l *lexer) Errorf(format string, args ...interface{}) stateFunc {
+	l.Output <- lexem{
+		Type:  lError,
+		Value: fmt.Sprintf(format, args...),
+		Start: l.Start,
+		End:   l.Pos,
+	}
+	return nil
+}
+
+// Next rune from input.
+func (l *lexer) Next() (r rune) {
+	if int(l.Pos) >= len(l.Input) {
+		l.width = 0
+		return eof
+	}
+	r, l.width = utf8.DecodeRuneInString(l.Input[l.Pos:])
+	l.Pos += l.width
+	return r
+}
+
+// Back move position to previos rune.
+func (l *lexer) Back() {
+	l.Pos -= l.width
+}
+
+// Ignore previosly buffered text.
+func (l *lexer) Ignore() {
+	l.Start = l.Pos
+	l.width = 0
+}
+
+// Peek rune at current position without moving position.
+func (l *lexer) Peek() (r rune) {
+	r = l.Next()
+	l.Back()
+	return r
+}
+
+// Accept any rune from valid string. Returns true if Next rune was in valid string.
+func (l *lexer) Accept(valid string) bool {
+	if strings.ContainsRune(valid, l.Next()) {
+		return true
+	}
+	l.Back()
+	return false
+}
+
+// AcceptString returns true if given string was at position.
+func (l *lexer) AcceptString(s string, caseInsentive bool) bool {
+	input := l.Input[l.Start:]
+	if caseInsentive {
+		input = strings.ToLower(input)
+		s = strings.ToLower(s)
+	}
+	if strings.HasPrefix(input, s) {
+		l.width = 0
+		l.Pos += len(s)
+		return true
+	}
+	return false
+}
+
+// AcceptAnyOf substrings. Retuns true if any of substrings was found.
+func (l *lexer) AcceptAnyOf(s []string, caseInsentive bool) bool {
+	for _, substring := range s {
+		if l.AcceptString(substring, caseInsentive) {
+			return true
+		}
+	}
+	return false
+}
+
+// AcceptWhile passing symbols from input while they at `valid` string.
+func (l *lexer) AcceptWhile(valid string) bool {
+	isValid := false
+	for l.Accept(valid) {
+		isValid = true
+	}
+	return isValid
+}
+
+// AcceptWhileNot passing symbols from input while they NOT in `invalid` string.
+func (l *lexer) AcceptWhileNot(invalid string) bool {
+	isValid := false
+	for !strings.ContainsRune(invalid, l.Next()) {
+		isValid = true
+	}
+	l.Back()
+	return isValid
+}
+
+// AtStart returns true if current lexem not empty
+func (l *lexer) AtStart() bool {
+	return l.Pos == l.Start
+}
diff --git a/parser/lextype_string.go b/parser/lextype_string.go
new file mode 100644
index 0000000..f34eb7c
--- /dev/null
+++ b/parser/lextype_string.go
@@ -0,0 +1,34 @@
+// Code generated by "stringer -type=lexType"; DO NOT EDIT.
+
+package parser
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[lEOF-0]
+	_ = x[lError-1]
+	_ = x[lObjectStart-2]
+	_ = x[lObjectEnd-3]
+	_ = x[lObjectKey-4]
+	_ = x[lObjectValue-5]
+	_ = x[lArrayStart-6]
+	_ = x[lArrayEnd-7]
+	_ = x[lString-8]
+	_ = x[lNumber-9]
+	_ = x[lBoolean-10]
+	_ = x[lNull-11]
+}
+
+const _lexType_name = "lEOFlErrorlObjectStartlObjectEndlObjectKeylObjectValuelArrayStartlArrayEndlStringlNumberlBooleanlNull"
+
+var _lexType_index = [...]uint8{0, 4, 10, 22, 32, 42, 54, 65, 74, 81, 88, 96, 101}
+
+func (i lexType) String() string {
+	if i < 0 || i >= lexType(len(_lexType_index)-1) {
+		return "lexType(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _lexType_name[_lexType_index[i]:_lexType_index[i+1]]
+}
diff --git a/parser/parser.go b/parser/parser.go
new file mode 100644
index 0000000..222e4c0
--- /dev/null
+++ b/parser/parser.go
@@ -0,0 +1,126 @@
+package parser
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+
+	"go.neonxp.dev/json/model"
+)
+
+func Parse(json string) (*model.Node, error) {
+	l := newLexer(json)
+	go l.Run(initJson)
+	n, err := parse(l.Output)
+	if err != nil {
+		return nil, err
+	}
+	return model.NewNode(n), nil
+}
+
+func parse(ch chan lexem) (any, error) {
+	prefix := <-ch
+	switch prefix.Type {
+	case lObjectStart:
+		return parseObject(ch)
+	case lArrayStart:
+		return parseArray(ch)
+	case lString:
+		return strings.Trim(prefix.Value, `"`), nil
+	case lNumber:
+		num, err := strconv.ParseFloat(prefix.Value, 64)
+		if err != nil {
+			return nil, err
+		}
+		return num, nil
+	case lBoolean:
+		if strings.ToLower(prefix.Value) == "true" {
+			return true, nil
+		}
+		return false, nil
+	case lNull:
+		return nil, nil
+	}
+	return nil, fmt.Errorf("ivalid token: '%s' type=%s", prefix.Value, prefix.Type.String())
+}
+
+func parseObject(ch chan lexem) (model.NodeObjectValue, error) {
+	m := model.NodeObjectValue{}
+	nextKey := ""
+	for l := range ch {
+		switch l.Type {
+		case lObjectKey:
+			nextKey = strings.Trim(l.Value, `"`)
+		case lString:
+			m.Set(nextKey, strings.Trim(l.Value, `"`))
+		case lNumber:
+			num, err := strconv.ParseFloat(l.Value, 64)
+			if err != nil {
+				return nil, err
+			}
+			m.Set(nextKey, num)
+		case lBoolean:
+			if strings.ToLower(l.Value) == "true" {
+				m.Set(nextKey, true)
+				continue
+			}
+			m.Set(nextKey, false)
+		case lNull:
+			m.Set(nextKey, nil)
+		case lObjectStart:
+			obj, err := parseObject(ch)
+			if err != nil {
+				return nil, err
+			}
+			m.Set(nextKey, obj)
+		case lArrayStart:
+			arr, err := parseArray(ch)
+			if err != nil {
+				return nil, err
+			}
+			m.Set(nextKey, arr)
+		case lObjectEnd:
+			return m, nil
+		}
+	}
+	return nil, fmt.Errorf("unexpected end of object")
+}
+
+func parseArray(ch chan lexem) (model.NodeArrayValue, error) {
+	m := model.NodeArrayValue{}
+	for l := range ch {
+		switch l.Type {
+		case lString:
+			m = append(m, model.NewNode(strings.Trim(l.Value, `"`)))
+		case lNumber:
+			num, err := strconv.ParseFloat(l.Value, 64)
+			if err != nil {
+				return nil, err
+			}
+			m = append(m, model.NewNode(num))
+		case lBoolean:
+			if strings.ToLower(l.Value) == "true" {
+				m = append(m, model.NewNode(true))
+				continue
+			}
+			m = append(m, model.NewNode(false))
+		case lNull:
+			m = append(m, model.NewNode(nil))
+		case lObjectStart:
+			obj, err := parseObject(ch)
+			if err != nil {
+				return nil, err
+			}
+			m = append(m, model.NewNode(obj))
+		case lArrayStart:
+			arr, err := parseArray(ch)
+			if err != nil {
+				return nil, err
+			}
+			m = append(m, model.NewNode(arr))
+		case lArrayEnd:
+			return m, nil
+		}
+	}
+	return nil, fmt.Errorf("unexpected end of object")
+}
diff --git a/parser/parser_test.go b/parser/parser_test.go
new file mode 100644
index 0000000..ea5aa0e
--- /dev/null
+++ b/parser/parser_test.go
@@ -0,0 +1,71 @@
+package parser
+
+import (
+	"reflect"
+	"testing"
+
+	"go.neonxp.dev/json/model"
+)
+
+func TestParse(t *testing.T) {
+	type args struct {
+		json string
+	}
+	tests := []struct {
+		name    string
+		args    args
+		want    *model.Node
+		wantErr bool
+	}{
+		{
+			name: "complex",
+			args: args{
+				json: `{
+					"string key": "string value",
+					"number key": 1337,
+					"float key": 123.3,
+					"object key": {
+						"ab": "cd"
+					},
+					"array key": [
+						1, 
+						2, 
+						"three"
+					],
+					"null key":null,
+					"boolean key":true
+					}`,
+			},
+			want: model.NewNode(
+				model.NodeObjectValue{
+					"string key": model.NewNode("string value"),
+					"number key": model.NewNode(1337),
+					"float key":  model.NewNode(123.3),
+					"object key": model.NewNode(model.NodeObjectValue{
+						"ab": model.NewNode("cd"),
+					}),
+					"array key": model.NewNode(model.NodeArrayValue{
+						model.NewNode(1),
+						model.NewNode(2),
+						model.NewNode("three"),
+					}),
+					"null key":    model.NewNode(nil),
+					"boolean key": model.NewNode(true),
+				},
+			),
+			wantErr: false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := Parse(tt.args.json)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("Parse() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+			if !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("Parse() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
diff --git a/parser/scanners.go b/parser/scanners.go
new file mode 100644
index 0000000..078f9d3
--- /dev/null
+++ b/parser/scanners.go
@@ -0,0 +1,32 @@
+package parser
+
+func scanNumber(l *lexer) bool {
+	l.AcceptWhile("0123456789")
+	if l.AtStart() {
+		// not found any digit
+		return false
+	}
+	l.Accept(".")
+	l.AcceptWhile("0123456789")
+	return !l.AtStart()
+}
+
+func scanQuotedString(l *lexer, quote rune) bool {
+	start := l.Pos
+	if l.Next() != quote {
+		l.Back()
+		return false
+	}
+	for {
+		ch := l.Next()
+		switch ch {
+		case eof:
+			l.Pos = start // Return position to start
+			return false  // Unclosed quote string?
+		case '\\':
+			l.Next() // Skip next char
+		case quote:
+			return true // Closing quote
+		}
+	}
+}
diff --git a/parser/statefunc.go b/parser/statefunc.go
new file mode 100644
index 0000000..69d7098
--- /dev/null
+++ b/parser/statefunc.go
@@ -0,0 +1,17 @@
+package parser
+
+type stateFunc func(*lexer) stateFunc
+
+type stateStack []stateFunc
+
+func (ss *stateStack) Push(s stateFunc) {
+	*ss = append(*ss, s)
+}
+
+func (ss *stateStack) Pop() (s stateFunc) {
+	if len(*ss) == 0 {
+		return nil
+	}
+	*ss, s = (*ss)[:len(*ss)-1], (*ss)[len(*ss)-1]
+	return s
+}
diff --git a/parser/states.go b/parser/states.go
new file mode 100644
index 0000000..92c80dc
--- /dev/null
+++ b/parser/states.go
@@ -0,0 +1,110 @@
+package parser
+
+func initJson(l *lexer) stateFunc {
+	ignoreWhiteSpace(l)
+	switch {
+	case l.Accept("{"):
+		l.Emit(lObjectStart)
+		return stateInObject
+	case l.Accept("["):
+		l.Emit(lArrayStart)
+	case l.Peek() == eof:
+		return nil
+	}
+	return l.Errorf("Unknown token: %s", string(l.Peek()))
+}
+
+func stateInObject(l *lexer) stateFunc {
+	// we in object, so we expect field keys and values
+	ignoreWhiteSpace(l)
+	if l.Accept("}") {
+		l.Emit(lObjectEnd)
+		// If meet close object return to previous state (including initial)
+		return l.PopState()
+	}
+	ignoreWhiteSpace(l)
+	l.Accept(",")
+	ignoreWhiteSpace(l)
+	if !scanQuotedString(l, '"') {
+		return l.Errorf("Unknown token: %s", string(l.Peek()))
+	}
+	l.Emit(lObjectKey)
+	ignoreWhiteSpace(l)
+	if !l.Accept(":") {
+		return l.Errorf("Expected ':'")
+	}
+	ignoreWhiteSpace(l)
+	l.Emit(lObjectValue)
+	switch {
+	case scanQuotedString(l, '"'):
+		l.Emit(lString)
+		ignoreWhiteSpace(l)
+		l.Accept(",")
+		l.Ignore()
+		ignoreWhiteSpace(l)
+		return stateInObject
+	case scanNumber(l):
+		l.Emit(lNumber)
+		ignoreWhiteSpace(l)
+		l.Accept(",")
+		l.Ignore()
+		ignoreWhiteSpace(l)
+		return stateInObject
+	case l.AcceptAnyOf([]string{"true", "false"}, true):
+		l.Emit(lBoolean)
+		ignoreWhiteSpace(l)
+		l.Accept(",")
+		l.Ignore()
+		ignoreWhiteSpace(l)
+		return stateInObject
+	case l.AcceptString("null", true):
+		l.Emit(lNull)
+		ignoreWhiteSpace(l)
+		l.Accept(",")
+		l.Ignore()
+		ignoreWhiteSpace(l)
+		return stateInObject
+	case l.Accept("{"):
+		l.Emit(lObjectStart)
+		l.PushState(stateInObject)
+		return stateInObject
+	case l.Accept("["):
+		l.Emit(lArrayStart)
+		l.PushState(stateInObject)
+		return stateInArray
+	}
+	return l.Errorf("Unknown token: %s", string(l.Peek()))
+}
+
+func stateInArray(l *lexer) stateFunc {
+	ignoreWhiteSpace(l)
+	l.Accept(",")
+	ignoreWhiteSpace(l)
+	switch {
+	case scanQuotedString(l, '"'):
+		l.Emit(lString)
+	case scanNumber(l):
+		l.Emit(lNumber)
+	case l.AcceptAnyOf([]string{"true", "false"}, true):
+		l.Emit(lBoolean)
+	case l.AcceptString("null", true):
+		l.Emit(lNull)
+	case l.Accept("{"):
+		l.Emit(lObjectStart)
+		l.PushState(stateInArray)
+		return stateInObject
+	case l.Accept("["):
+		l.Emit(lArrayStart)
+		l.PushState(stateInArray)
+		return stateInArray
+	case l.Accept("]"):
+		l.Emit(lArrayEnd)
+		return l.PopState()
+	}
+	return stateInArray
+}
+
+func ignoreWhiteSpace(l *lexer) {
+	l.AcceptWhile(" \n\t") // ignore whitespaces
+	l.Ignore()
+}
author	NeonXP <i@neonxp.dev>	2022-11-16 05:11:19 +0300
committer	NeonXP <i@neonxp.dev>	2022-11-16 05:11:19 +0300
commit	a321bfe7b2f6db5078de7b2e5ed5ddcccd65f319 (patch)
tree	d11c187bceee610a7843463949df128569142680 /parser