aboutsummaryrefslogtreecommitdiff
path: root/parser
diff options
context:
space:
mode:
authorNeonXP <i@neonxp.dev>2022-12-27 02:37:02 +0300
committerNeonXP <i@neonxp.dev>2022-12-27 02:40:03 +0300
commit76a7f461ebbde70ea0e3d4f9b79c08139acaee7c (patch)
tree5e6dcb05f00be5109b3465ef16a6e9169a27497e /parser
parent6f1d1df79f161cfc695f74d271d689ba72c44d09 (diff)
Completely rewritedv0.1.0
Diffstat (limited to 'parser')
-rw-r--r--parser/lexer.go182
-rw-r--r--parser/lextype_string.go34
-rw-r--r--parser/parser.go126
-rw-r--r--parser/parser_test.go71
-rw-r--r--parser/scanners.go32
-rw-r--r--parser/statefunc.go17
-rw-r--r--parser/states.go110
7 files changed, 0 insertions, 572 deletions
diff --git a/parser/lexer.go b/parser/lexer.go
deleted file mode 100644
index 5034f6a..0000000
--- a/parser/lexer.go
+++ /dev/null
@@ -1,182 +0,0 @@
-package parser
-
-import (
- "fmt"
- "strings"
- "unicode/utf8"
-)
-
-const eof rune = -1
-
-type lexem struct {
- Type lexType // Type of Lexem.
- Value string // Value of Lexem.
- Start int // Start position at input string.
- End int // End position at input string.
-}
-
-//go:generate stringer -type=lexType
-type lexType int
-
-const (
- lEOF lexType = iota
- lError
- lObjectStart
- lObjectEnd
- lObjectKey
- lObjectValue
- lArrayStart
- lArrayEnd
- lString
- lNumber
- lBoolean
- lNull
-)
-
-// lexer holds current scanner state.
-type lexer struct {
- Input string // Input string.
- Start int // Start position of current lexem.
- Pos int // Pos at input string.
- Output chan lexem // Lexems channel.
- width int // Width of last rune.
- states stateStack // Stack of states to realize PrevState.
-}
-
-// newLexer returns new scanner for input string.
-func newLexer(input string) *lexer {
- return &lexer{
- Input: input,
- Start: 0,
- Pos: 0,
- Output: make(chan lexem, 2),
- width: 0,
- }
-}
-
-// Run lexing.
-func (l *lexer) Run(init stateFunc) {
- for state := init; state != nil; {
- state = state(l)
- }
- close(l.Output)
-}
-
-// PopState returns previous state function.
-func (l *lexer) PopState() stateFunc {
- return l.states.Pop()
-}
-
-// PushState pushes state before going deeper states.
-func (l *lexer) PushState(s stateFunc) {
- l.states.Push(s)
-}
-
-// Emit current lexem to output.
-func (l *lexer) Emit(typ lexType) {
- l.Output <- lexem{
- Type: typ,
- Value: l.Input[l.Start:l.Pos],
- Start: l.Start,
- End: l.Pos,
- }
- l.Start = l.Pos
-}
-
-// Errorf produces error lexem and stops scanning.
-func (l *lexer) Errorf(format string, args ...interface{}) stateFunc {
- l.Output <- lexem{
- Type: lError,
- Value: fmt.Sprintf(format, args...),
- Start: l.Start,
- End: l.Pos,
- }
- return nil
-}
-
-// Next rune from input.
-func (l *lexer) Next() (r rune) {
- if int(l.Pos) >= len(l.Input) {
- l.width = 0
- return eof
- }
- r, l.width = utf8.DecodeRuneInString(l.Input[l.Pos:])
- l.Pos += l.width
- return r
-}
-
-// Back move position to previos rune.
-func (l *lexer) Back() {
- l.Pos -= l.width
-}
-
-// Ignore previosly buffered text.
-func (l *lexer) Ignore() {
- l.Start = l.Pos
- l.width = 0
-}
-
-// Peek rune at current position without moving position.
-func (l *lexer) Peek() (r rune) {
- r = l.Next()
- l.Back()
- return r
-}
-
-// Accept any rune from valid string. Returns true if Next rune was in valid string.
-func (l *lexer) Accept(valid string) bool {
- if strings.ContainsRune(valid, l.Next()) {
- return true
- }
- l.Back()
- return false
-}
-
-// AcceptString returns true if given string was at position.
-func (l *lexer) AcceptString(s string, caseInsentive bool) bool {
- input := l.Input[l.Start:]
- if caseInsentive {
- input = strings.ToLower(input)
- s = strings.ToLower(s)
- }
- if strings.HasPrefix(input, s) {
- l.width = 0
- l.Pos += len(s)
- return true
- }
- return false
-}
-
-// AcceptAnyOf substrings. Retuns true if any of substrings was found.
-func (l *lexer) AcceptAnyOf(s []string, caseInsentive bool) bool {
- for _, substring := range s {
- if l.AcceptString(substring, caseInsentive) {
- return true
- }
- }
- return false
-}
-
-// AcceptWhile passing symbols from input while they at `valid` string.
-func (l *lexer) AcceptWhile(valid string) bool {
- isValid := false
- for l.Accept(valid) {
- isValid = true
- }
- return isValid
-}
-
-// AcceptWhileNot passing symbols from input while they NOT in `invalid` string.
-func (l *lexer) AcceptWhileNot(invalid string) bool {
- isValid := false
- for !strings.ContainsRune(invalid, l.Next()) {
- isValid = true
- }
- l.Back()
- return isValid
-}
-
-// AtStart returns true if current lexem not empty
-func (l *lexer) AtStart() bool {
- return l.Pos == l.Start
-}
diff --git a/parser/lextype_string.go b/parser/lextype_string.go
deleted file mode 100644
index f34eb7c..0000000
--- a/parser/lextype_string.go
+++ /dev/null
@@ -1,34 +0,0 @@
-// Code generated by "stringer -type=lexType"; DO NOT EDIT.
-
-package parser
-
-import "strconv"
-
-func _() {
- // An "invalid array index" compiler error signifies that the constant values have changed.
- // Re-run the stringer command to generate them again.
- var x [1]struct{}
- _ = x[lEOF-0]
- _ = x[lError-1]
- _ = x[lObjectStart-2]
- _ = x[lObjectEnd-3]
- _ = x[lObjectKey-4]
- _ = x[lObjectValue-5]
- _ = x[lArrayStart-6]
- _ = x[lArrayEnd-7]
- _ = x[lString-8]
- _ = x[lNumber-9]
- _ = x[lBoolean-10]
- _ = x[lNull-11]
-}
-
-const _lexType_name = "lEOFlErrorlObjectStartlObjectEndlObjectKeylObjectValuelArrayStartlArrayEndlStringlNumberlBooleanlNull"
-
-var _lexType_index = [...]uint8{0, 4, 10, 22, 32, 42, 54, 65, 74, 81, 88, 96, 101}
-
-func (i lexType) String() string {
- if i < 0 || i >= lexType(len(_lexType_index)-1) {
- return "lexType(" + strconv.FormatInt(int64(i), 10) + ")"
- }
- return _lexType_name[_lexType_index[i]:_lexType_index[i+1]]
-}
diff --git a/parser/parser.go b/parser/parser.go
deleted file mode 100644
index dfcd4b4..0000000
--- a/parser/parser.go
+++ /dev/null
@@ -1,126 +0,0 @@
-package parser
-
-import (
- "fmt"
- "strconv"
- "strings"
-
- "go.neonxp.dev/json/model"
-)
-
-func Parse(json string) (model.Node, error) {
- l := newLexer(json)
- go l.Run(initJson)
- n, err := parse(l.Output)
- if err != nil {
- return nil, err
- }
- return model.NewNode(n), nil
-}
-
-func parse(ch chan lexem) (any, error) {
- prefix := <-ch
- switch prefix.Type {
- case lObjectStart:
- return parseObject(ch)
- case lArrayStart:
- return parseArray(ch)
- case lString:
- return strings.Trim(prefix.Value, `"`), nil
- case lNumber:
- num, err := strconv.ParseFloat(prefix.Value, 64)
- if err != nil {
- return nil, err
- }
- return num, nil
- case lBoolean:
- if strings.ToLower(prefix.Value) == "true" {
- return true, nil
- }
- return false, nil
- case lNull:
- return nil, nil
- }
- return nil, fmt.Errorf("ivalid token: '%s' type=%s", prefix.Value, prefix.Type.String())
-}
-
-func parseObject(ch chan lexem) (model.NodeObjectValue, error) {
- m := model.NodeObjectValue{}
- nextKey := ""
- for l := range ch {
- switch l.Type {
- case lObjectKey:
- nextKey = strings.Trim(l.Value, `"`)
- case lString:
- m.Set(nextKey, strings.Trim(l.Value, `"`))
- case lNumber:
- num, err := strconv.ParseFloat(l.Value, 64)
- if err != nil {
- return nil, err
- }
- m.Set(nextKey, num)
- case lBoolean:
- if strings.ToLower(l.Value) == "true" {
- m.Set(nextKey, true)
- continue
- }
- m.Set(nextKey, false)
- case lNull:
- m.Set(nextKey, nil)
- case lObjectStart:
- obj, err := parseObject(ch)
- if err != nil {
- return nil, err
- }
- m.Set(nextKey, obj)
- case lArrayStart:
- arr, err := parseArray(ch)
- if err != nil {
- return nil, err
- }
- m.Set(nextKey, arr)
- case lObjectEnd:
- return m, nil
- }
- }
- return nil, fmt.Errorf("unexpected end of object")
-}
-
-func parseArray(ch chan lexem) (model.NodeArrayValue, error) {
- m := model.NodeArrayValue{}
- for l := range ch {
- switch l.Type {
- case lString:
- m = append(m, model.NewNode(strings.Trim(l.Value, `"`)))
- case lNumber:
- num, err := strconv.ParseFloat(l.Value, 64)
- if err != nil {
- return nil, err
- }
- m = append(m, model.NewNode(num))
- case lBoolean:
- if strings.ToLower(l.Value) == "true" {
- m = append(m, model.NewNode(true))
- continue
- }
- m = append(m, model.NewNode(false))
- case lNull:
- m = append(m, model.NewNode(nil))
- case lObjectStart:
- obj, err := parseObject(ch)
- if err != nil {
- return nil, err
- }
- m = append(m, model.NewNode(obj))
- case lArrayStart:
- arr, err := parseArray(ch)
- if err != nil {
- return nil, err
- }
- m = append(m, model.NewNode(arr))
- case lArrayEnd:
- return m, nil
- }
- }
- return nil, fmt.Errorf("unexpected end of object")
-}
diff --git a/parser/parser_test.go b/parser/parser_test.go
deleted file mode 100644
index 88a1f8f..0000000
--- a/parser/parser_test.go
+++ /dev/null
@@ -1,71 +0,0 @@
-package parser
-
-import (
- "reflect"
- "testing"
-
- "go.neonxp.dev/json/model"
-)
-
-func TestParse(t *testing.T) {
- type args struct {
- json string
- }
- tests := []struct {
- name string
- args args
- want model.Node
- wantErr bool
- }{
- {
- name: "complex",
- args: args{
- json: `{
- "string key": "string value",
- "number key": 1337,
- "float key": 123.3,
- "object key": {
- "ab": "cd"
- },
- "array key": [
- 1,
- 2,
- "three"
- ],
- "null key":null,
- "boolean key":true
- }`,
- },
- want: model.NewNode(
- model.NodeObjectValue{
- "string key": model.NewNode("string value"),
- "number key": model.NewNode(1337),
- "float key": model.NewNode(123.3),
- "object key": model.NewNode(model.NodeObjectValue{
- "ab": model.NewNode("cd"),
- }),
- "array key": model.NewNode(model.NodeArrayValue{
- model.NewNode(1),
- model.NewNode(2),
- model.NewNode("three"),
- }),
- "null key": model.NewNode(nil),
- "boolean key": model.NewNode(true),
- },
- ),
- wantErr: false,
- },
- }
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- got, err := Parse(tt.args.json)
- if (err != nil) != tt.wantErr {
- t.Errorf("Parse() error = %v, wantErr %v", err, tt.wantErr)
- return
- }
- if !reflect.DeepEqual(got, tt.want) {
- t.Errorf("Parse() = %v, want %v", got, tt.want)
- }
- })
- }
-}
diff --git a/parser/scanners.go b/parser/scanners.go
deleted file mode 100644
index 078f9d3..0000000
--- a/parser/scanners.go
+++ /dev/null
@@ -1,32 +0,0 @@
-package parser
-
-func scanNumber(l *lexer) bool {
- l.AcceptWhile("0123456789")
- if l.AtStart() {
- // not found any digit
- return false
- }
- l.Accept(".")
- l.AcceptWhile("0123456789")
- return !l.AtStart()
-}
-
-func scanQuotedString(l *lexer, quote rune) bool {
- start := l.Pos
- if l.Next() != quote {
- l.Back()
- return false
- }
- for {
- ch := l.Next()
- switch ch {
- case eof:
- l.Pos = start // Return position to start
- return false // Unclosed quote string?
- case '\\':
- l.Next() // Skip next char
- case quote:
- return true // Closing quote
- }
- }
-}
diff --git a/parser/statefunc.go b/parser/statefunc.go
deleted file mode 100644
index 69d7098..0000000
--- a/parser/statefunc.go
+++ /dev/null
@@ -1,17 +0,0 @@
-package parser
-
-type stateFunc func(*lexer) stateFunc
-
-type stateStack []stateFunc
-
-func (ss *stateStack) Push(s stateFunc) {
- *ss = append(*ss, s)
-}
-
-func (ss *stateStack) Pop() (s stateFunc) {
- if len(*ss) == 0 {
- return nil
- }
- *ss, s = (*ss)[:len(*ss)-1], (*ss)[len(*ss)-1]
- return s
-}
diff --git a/parser/states.go b/parser/states.go
deleted file mode 100644
index 92c80dc..0000000
--- a/parser/states.go
+++ /dev/null
@@ -1,110 +0,0 @@
-package parser
-
-func initJson(l *lexer) stateFunc {
- ignoreWhiteSpace(l)
- switch {
- case l.Accept("{"):
- l.Emit(lObjectStart)
- return stateInObject
- case l.Accept("["):
- l.Emit(lArrayStart)
- case l.Peek() == eof:
- return nil
- }
- return l.Errorf("Unknown token: %s", string(l.Peek()))
-}
-
-func stateInObject(l *lexer) stateFunc {
- // we in object, so we expect field keys and values
- ignoreWhiteSpace(l)
- if l.Accept("}") {
- l.Emit(lObjectEnd)
- // If meet close object return to previous state (including initial)
- return l.PopState()
- }
- ignoreWhiteSpace(l)
- l.Accept(",")
- ignoreWhiteSpace(l)
- if !scanQuotedString(l, '"') {
- return l.Errorf("Unknown token: %s", string(l.Peek()))
- }
- l.Emit(lObjectKey)
- ignoreWhiteSpace(l)
- if !l.Accept(":") {
- return l.Errorf("Expected ':'")
- }
- ignoreWhiteSpace(l)
- l.Emit(lObjectValue)
- switch {
- case scanQuotedString(l, '"'):
- l.Emit(lString)
- ignoreWhiteSpace(l)
- l.Accept(",")
- l.Ignore()
- ignoreWhiteSpace(l)
- return stateInObject
- case scanNumber(l):
- l.Emit(lNumber)
- ignoreWhiteSpace(l)
- l.Accept(",")
- l.Ignore()
- ignoreWhiteSpace(l)
- return stateInObject
- case l.AcceptAnyOf([]string{"true", "false"}, true):
- l.Emit(lBoolean)
- ignoreWhiteSpace(l)
- l.Accept(",")
- l.Ignore()
- ignoreWhiteSpace(l)
- return stateInObject
- case l.AcceptString("null", true):
- l.Emit(lNull)
- ignoreWhiteSpace(l)
- l.Accept(",")
- l.Ignore()
- ignoreWhiteSpace(l)
- return stateInObject
- case l.Accept("{"):
- l.Emit(lObjectStart)
- l.PushState(stateInObject)
- return stateInObject
- case l.Accept("["):
- l.Emit(lArrayStart)
- l.PushState(stateInObject)
- return stateInArray
- }
- return l.Errorf("Unknown token: %s", string(l.Peek()))
-}
-
-func stateInArray(l *lexer) stateFunc {
- ignoreWhiteSpace(l)
- l.Accept(",")
- ignoreWhiteSpace(l)
- switch {
- case scanQuotedString(l, '"'):
- l.Emit(lString)
- case scanNumber(l):
- l.Emit(lNumber)
- case l.AcceptAnyOf([]string{"true", "false"}, true):
- l.Emit(lBoolean)
- case l.AcceptString("null", true):
- l.Emit(lNull)
- case l.Accept("{"):
- l.Emit(lObjectStart)
- l.PushState(stateInArray)
- return stateInObject
- case l.Accept("["):
- l.Emit(lArrayStart)
- l.PushState(stateInArray)
- return stateInArray
- case l.Accept("]"):
- l.Emit(lArrayEnd)
- return l.PopState()
- }
- return stateInArray
-}
-
-func ignoreWhiteSpace(l *lexer) {
- l.AcceptWhile(" \n\t") // ignore whitespaces
- l.Ignore()
-}