aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Kiryukhin <a.kiryukhin@mail.ru>2021-03-10 00:47:58 +0300
committerAlexander Kiryukhin <a.kiryukhin@mail.ru>2021-03-10 00:47:58 +0300
commitff198abd8fc9e2019c2f3ef9b7e74206ecdb99b7 (patch)
tree3ea7c05e54d00bf2d70cec7c88442402f9916a55
parent521e6da1f6c9c964abca5bac22ce62823c276c1f (diff)
Push/pop state, full json examplev0.0.2
-rw-r--r--README.md4
-rw-r--r--example/json/main.go140
-rw-r--r--lexem.go8
-rw-r--r--lexer.go13
-rw-r--r--statefunc.go14
5 files changed, 122 insertions, 57 deletions
diff --git a/README.md b/README.md
index 172ce9b..352cbe0 100644
--- a/README.md
+++ b/README.md
@@ -3,4 +3,6 @@ Universal lexer for Golang
Based on Rob Pike's awesome video [Lexical Scanning in Go](https://www.youtube.com/watch?v=HxaD_trXwRE)
-TODO: Write brief documentation. For now - watch video and [/examples](https://github.com/neonxp/unilex/tree/master/example) directory.
+Examples: [/examples](https://github.com/neonxp/unilex/tree/master/example) directory.
+
+
diff --git a/example/json/main.go b/example/json/main.go
index 842ab82..745e5aa 100644
--- a/example/json/main.go
+++ b/example/json/main.go
@@ -15,7 +15,19 @@ func main() {
"key2": {
"key3" : "value 3"
},
- "key4": 123.321
+ "key4": 123.321,
+ "key5": [
+ 1,
+ 2,
+ [
+ 3,
+ 4,
+ 5,
+ {
+ "key6": "value6"
+ }
+ ]
+ ]
}`
l := unilex.New(testJson)
go l.Run(initJson)
@@ -25,11 +37,14 @@ func main() {
}
const (
- lObjectStart unilex.LexType = "lObjectStart"
- lObjectEnd unilex.LexType = "lObjectEnd"
- lObjectKey unilex.LexType = "lObjectKey"
- lObjectValueString unilex.LexType = "lObjectValueString"
- lObjectValueNumber unilex.LexType = "lObjectValueNumber"
+ lObjectStart unilex.LexType = iota
+ lObjectEnd
+ lObjectKey
+ lObjectValue
+ lArrayStart
+ lArrayEnd
+ lString
+ lNumber
)
func initJson(l *unilex.Lexer) unilex.StateFunc {
@@ -37,60 +52,83 @@ func initJson(l *unilex.Lexer) unilex.StateFunc {
switch {
case l.Accept("{"):
l.Emit(lObjectStart)
- return stateInObject(true)
+ return stateInObject
case l.Peek() == unilex.EOF:
return nil
}
- return l.Errorf("Unknown token: %s", l.Peek())
+ return l.Errorf("Unknown token: %s", string(l.Peek()))
}
-func stateInObject(initial bool) unilex.StateFunc {
- return func(l *unilex.Lexer) unilex.StateFunc {
- // we in object, so we expect field keys and values
+func stateInObject(l *unilex.Lexer) unilex.StateFunc {
+ // we in object, so we expect field keys and values
+ ignoreWhiteSpace(l)
+ if l.Accept("}") {
+ l.Emit(lObjectEnd)
+ // If meet close object return to previous state (including initial)
+ return l.PopState()
+ }
+ ignoreWhiteSpace(l)
+ l.Accept(",")
+ ignoreWhiteSpace(l)
+ if !unilex.ScanQuotedString(l, '"') {
+ return l.Errorf("Unknown token: %s", string(l.Peek()))
+ }
+ l.Emit(lObjectKey)
+ ignoreWhiteSpace(l)
+ if !l.Accept(":") {
+ return l.Errorf("Expected ':'")
+ }
+ ignoreWhiteSpace(l)
+ l.Emit(lObjectValue)
+ switch {
+ case unilex.ScanQuotedString(l, '"'):
+ l.Emit(lString)
+ ignoreWhiteSpace(l)
+ l.Accept(",")
+ l.Ignore()
ignoreWhiteSpace(l)
- if l.Accept("}") {
- l.Emit(lObjectEnd)
- if initial {
- return initJson
- }
- ignoreWhiteSpace(l)
- l.Accept(",")
- ignoreWhiteSpace(l)
- return stateInObject(initial)
- }
- if l.Peek() == unilex.EOF {
- return nil
- }
- if !unilex.ScanQuotedString(l, '"') {
- return l.Errorf("Unknown token: %s", l.Peek())
- }
- l.Emit(lObjectKey)
+ return stateInObject
+ case unilex.ScanNumber(l):
+ l.Emit(lNumber)
ignoreWhiteSpace(l)
- if !l.Accept(":") {
- return l.Errorf("Expected ':'")
- }
+ l.Accept(",")
+ l.Ignore()
ignoreWhiteSpace(l)
- switch {
- case unilex.ScanQuotedString(l, '"'):
- l.Emit(lObjectValueString)
- ignoreWhiteSpace(l)
- l.Accept(",")
- l.Ignore()
- ignoreWhiteSpace(l)
- return stateInObject(initial)
- case unilex.ScanNumber(l):
- l.Emit(lObjectValueNumber)
- ignoreWhiteSpace(l)
- l.Accept(",")
- l.Ignore()
- ignoreWhiteSpace(l)
- return stateInObject(initial)
- case l.Accept("{"):
- l.Emit(lObjectStart)
- return stateInObject(false)
- }
- return l.Errorf("Unknown token")
+ return stateInObject
+ case l.Accept("{"):
+ l.Emit(lObjectStart)
+ l.PushState(stateInObject)
+ return stateInObject
+ case l.Accept("["):
+ l.Emit(lArrayStart)
+ l.PushState(stateInObject)
+ return stateInArray
+ }
+ return l.Errorf("Unknown token: %s", string(l.Peek()))
+}
+
+func stateInArray(l *unilex.Lexer) unilex.StateFunc {
+ ignoreWhiteSpace(l)
+ l.Accept(",")
+ ignoreWhiteSpace(l)
+ switch {
+ case unilex.ScanQuotedString(l, '"'):
+ l.Emit(lString)
+ case unilex.ScanNumber(l):
+ l.Emit(lNumber)
+ case l.Accept("{"):
+ l.Emit(lObjectStart)
+ l.PushState(stateInArray)
+ return stateInObject
+ case l.Accept("["):
+ l.Emit(lArrayStart)
+ l.PushState(stateInArray)
+ return stateInArray
+ case l.Accept("]"):
+ l.Emit(lArrayEnd)
+ return l.PopState()
}
+ return stateInArray
}
func ignoreWhiteSpace(l *unilex.Lexer) {
diff --git a/lexem.go b/lexem.go
index bd24ea9..c594ee9 100644
--- a/lexem.go
+++ b/lexem.go
@@ -9,12 +9,12 @@ type Lexem struct {
}
// LexType represents type of current lexem.
-type LexType string
+type LexType int
// Some std lexem types
const (
- // LError represents lexing error.
- LError LexType = "ERROR"
// LEOF represents end of input.
- LEOF LexType = "EOF"
+ LexEOF LexType = -1
+ // LError represents lexing error.
+ LexError LexType = -2
)
diff --git a/lexer.go b/lexer.go
index 10317bd..b556045 100644
--- a/lexer.go
+++ b/lexer.go
@@ -16,6 +16,7 @@ type Lexer struct {
Pos int // Pos at input string.
Output chan Lexem // Lexems channel.
width int // Width of last rune.
+ states stateStack // Stack of states to realize PrevState.
}
// New returns new scanner for input string.
@@ -37,6 +38,16 @@ func (l *Lexer) Run(init StateFunc) {
close(l.Output)
}
+// PopState returns previous state function.
+func (l *Lexer) PopState() StateFunc {
+ return l.states.Pop()
+}
+
+// PushState pushes state before going deeper states.
+func (l *Lexer) PushState(s StateFunc) {
+ l.states.Push(s)
+}
+
// Emit current lexem to output.
func (l *Lexer) Emit(typ LexType) {
l.Output <- Lexem{
@@ -51,7 +62,7 @@ func (l *Lexer) Emit(typ LexType) {
// Errorf produces error lexem and stops scanning.
func (l *Lexer) Errorf(format string, args ...interface{}) StateFunc {
l.Output <- Lexem{
- Type: LError,
+ Type: LexError,
Value: fmt.Sprintf(format, args...),
Start: l.Start,
End: l.Pos,
diff --git a/statefunc.go b/statefunc.go
index 5980ecc..734fe57 100644
--- a/statefunc.go
+++ b/statefunc.go
@@ -2,3 +2,17 @@ package unilex
// StateFunc represents function that scans lexems and returns new state function or nil if lexing completed.
type StateFunc func(*Lexer) StateFunc
+
+type stateStack []StateFunc
+
+func (ss *stateStack) Push(s StateFunc) {
+ *ss = append(*ss, s)
+}
+
+func (ss *stateStack) Pop() (s StateFunc) {
+ if len(*ss) == 0 {
+ return nil
+ }
+ *ss, s = (*ss)[:len(*ss)-1], (*ss)[len(*ss)-1]
+ return s
+}