5 files changed, 375 insertions, 0 deletions
diff --git a/internal/lexer/lexer.go b/internal/lexer/lexer.go
new file mode 100644
index 0000000..342864d
--- /dev/null
+++ b/internal/lexer/lexer.go
@@ -0,0 +1,182 @@
+package lexer
+
+import (
+	"fmt"
+	"strings"
+	"unicode/utf8"
+)
+
+const eof rune = -1
+
+type Lexem struct {
+	Type  lexType // Type of Lexem.
+	Value string  // Value of Lexem.
+	Start int     // Start position at input string.
+	End   int     // End position at input string.
+}
+
+//go:generate stringer -type=lexType
+type lexType int
+
+const (
+	LEOF lexType = iota
+	LError
+	LObjectStart
+	LObjectEnd
+	LObjectKey
+	LObjectValue
+	LArrayStart
+	LArrayEnd
+	LString
+	LNumber
+	LBoolean
+	LNull
+)
+
+// Lexer holds current scanner state.
+type Lexer struct {
+	Input  string     // Input string.
+	Start  int        // Start position of current lexem.
+	Pos    int        // Pos at input string.
+	Output chan Lexem // Lexems channel.
+	width  int        // Width of last rune.
+	states stateStack // Stack of states to realize PrevState.
+}
+
+// newLexer returns new scanner for input string.
+func NewLexer(input string) *Lexer {
+	return &Lexer{
+		Input:  input,
+		Start:  0,
+		Pos:    0,
+		Output: make(chan Lexem, 2),
+		width:  0,
+	}
+}
+
+// Run lexing.
+func (l *Lexer) Run(init stateFunc) {
+	for state := init; state != nil; {
+		state = state(l)
+	}
+	close(l.Output)
+}
+
+// PopState returns previous state function.
+func (l *Lexer) PopState() stateFunc {
+	return l.states.Pop()
+}
+
+// PushState pushes state before going deeper states.
+func (l *Lexer) PushState(s stateFunc) {
+	l.states.Push(s)
+}
+
+// Emit current lexem to output.
+func (l *Lexer) Emit(typ lexType) {
+	l.Output <- Lexem{
+		Type:  typ,
+		Value: l.Input[l.Start:l.Pos],
+		Start: l.Start,
+		End:   l.Pos,
+	}
+	l.Start = l.Pos
+}
+
+// Errorf produces error lexem and stops scanning.
+func (l *Lexer) Errorf(format string, args ...interface{}) stateFunc {
+	l.Output <- Lexem{
+		Type:  LError,
+		Value: fmt.Sprintf(format, args...),
+		Start: l.Start,
+		End:   l.Pos,
+	}
+	return nil
+}
+
+// Next rune from input.
+func (l *Lexer) Next() (r rune) {
+	if int(l.Pos) >= len(l.Input) {
+		l.width = 0
+		return eof
+	}
+	r, l.width = utf8.DecodeRuneInString(l.Input[l.Pos:])
+	l.Pos += l.width
+	return r
+}
+
+// Back move position to previos rune.
+func (l *Lexer) Back() {
+	l.Pos -= l.width
+}
+
+// Ignore previosly buffered text.
+func (l *Lexer) Ignore() {
+	l.Start = l.Pos
+	l.width = 0
+}
+
+// Peek rune at current position without moving position.
+func (l *Lexer) Peek() (r rune) {
+	r = l.Next()
+	l.Back()
+	return r
+}
+
+// Accept any rune from valid string. Returns true if Next rune was in valid string.
+func (l *Lexer) Accept(valid string) bool {
+	if strings.ContainsRune(valid, l.Next()) {
+		return true
+	}
+	l.Back()
+	return false
+}
+
+// AcceptString returns true if given string was at position.
+func (l *Lexer) AcceptString(s string, caseInsentive bool) bool {
+	input := l.Input[l.Start:]
+	if caseInsentive {
+		input = strings.ToLower(input)
+		s = strings.ToLower(s)
+	}
+	if strings.HasPrefix(input, s) {
+		l.width = 0
+		l.Pos += len(s)
+		return true
+	}
+	return false
+}
+
+// AcceptAnyOf substrings. Retuns true if any of substrings was found.
+func (l *Lexer) AcceptAnyOf(s []string, caseInsentive bool) bool {
+	for _, substring := range s {
+		if l.AcceptString(substring, caseInsentive) {
+			return true
+		}
+	}
+	return false
+}
+
+// AcceptWhile passing symbols from input while they at `valid` string.
+func (l *Lexer) AcceptWhile(valid string) bool {
+	isValid := false
+	for l.Accept(valid) {
+		isValid = true
+	}
+	return isValid
+}
+
+// AcceptWhileNot passing symbols from input while they NOT in `invalid` string.
+func (l *Lexer) AcceptWhileNot(invalid string) bool {
+	isValid := false
+	for !strings.ContainsRune(invalid, l.Next()) {
+		isValid = true
+	}
+	l.Back()
+	return isValid
+}
+
+// AtStart returns true if current lexem not empty
+func (l *Lexer) AtStart() bool {
+	return l.Pos == l.Start
+}
diff --git a/internal/lexer/lextype_string.go b/internal/lexer/lextype_string.go
new file mode 100644
index 0000000..fe895d2
--- /dev/null
+++ b/internal/lexer/lextype_string.go
@@ -0,0 +1,34 @@
+// Code generated by "stringer -type=lexType"; DO NOT EDIT.
+
+package lexer
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[LEOF-0]
+	_ = x[LError-1]
+	_ = x[LObjectStart-2]
+	_ = x[LObjectEnd-3]
+	_ = x[LObjectKey-4]
+	_ = x[LObjectValue-5]
+	_ = x[LArrayStart-6]
+	_ = x[LArrayEnd-7]
+	_ = x[LString-8]
+	_ = x[LNumber-9]
+	_ = x[LBoolean-10]
+	_ = x[LNull-11]
+}
+
+const _lexType_name = "LEOFLErrorLObjectStartLObjectEndLObjectKeyLObjectValueLArrayStartLArrayEndLStringLNumberLBooleanLNull"
+
+var _lexType_index = [...]uint8{0, 4, 10, 22, 32, 42, 54, 65, 74, 81, 88, 96, 101}
+
+func (i lexType) String() string {
+	if i < 0 || i >= lexType(len(_lexType_index)-1) {
+		return "lexType(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _lexType_name[_lexType_index[i]:_lexType_index[i+1]]
+}
diff --git a/internal/lexer/scanners.go b/internal/lexer/scanners.go
new file mode 100644
index 0000000..6181c2d
--- /dev/null
+++ b/internal/lexer/scanners.go
@@ -0,0 +1,32 @@
+package lexer
+
+func scanNumber(l *Lexer) bool {
+	l.AcceptWhile("0123456789")
+	if l.AtStart() {
+		// not found any digit
+		return false
+	}
+	l.Accept(".")
+	l.AcceptWhile("0123456789")
+	return !l.AtStart()
+}
+
+func scanQuotedString(l *Lexer, quote rune) bool {
+	start := l.Pos
+	if l.Next() != quote {
+		l.Back()
+		return false
+	}
+	for {
+		ch := l.Next()
+		switch ch {
+		case eof:
+			l.Pos = start // Return position to start
+			return false  // Unclosed quote string?
+		case '\\':
+			l.Next() // Skip next char
+		case quote:
+			return true // Closing quote
+		}
+	}
+}
diff --git a/internal/lexer/statefunc.go b/internal/lexer/statefunc.go
new file mode 100644
index 0000000..8d0e42a
--- /dev/null
+++ b/internal/lexer/statefunc.go
@@ -0,0 +1,17 @@
+package lexer
+
+type stateFunc func(*Lexer) stateFunc
+
+type stateStack []stateFunc
+
+func (ss *stateStack) Push(s stateFunc) {
+	*ss = append(*ss, s)
+}
+
+func (ss *stateStack) Pop() (s stateFunc) {
+	if len(*ss) == 0 {
+		return nil
+	}
+	*ss, s = (*ss)[:len(*ss)-1], (*ss)[len(*ss)-1]
+	return s
+}
diff --git a/internal/lexer/states.go b/internal/lexer/states.go
new file mode 100644
index 0000000..818ccf6
--- /dev/null
+++ b/internal/lexer/states.go
@@ -0,0 +1,110 @@
+package lexer
+
+func InitJson(l *Lexer) stateFunc {
+	ignoreWhiteSpace(l)
+	switch {
+	case l.Accept("{"):
+		l.Emit(LObjectStart)
+		return stateInObject
+	case l.Accept("["):
+		l.Emit(LArrayStart)
+	case l.Peek() == eof:
+		return nil
+	}
+	return l.Errorf("Unknown token: %s", string(l.Peek()))
+}
+
+func stateInObject(l *Lexer) stateFunc {
+	// we in object, so we expect field keys and values
+	ignoreWhiteSpace(l)
+	if l.Accept("}") {
+		l.Emit(LObjectEnd)
+		// If meet close object return to previous state (including initial)
+		return l.PopState()
+	}
+	ignoreWhiteSpace(l)
+	l.Accept(",")
+	ignoreWhiteSpace(l)
+	if !scanQuotedString(l, '"') {
+		return l.Errorf("Unknown token: %s", string(l.Peek()))
+	}
+	l.Emit(LObjectKey)
+	ignoreWhiteSpace(l)
+	if !l.Accept(":") {
+		return l.Errorf("Expected ':'")
+	}
+	ignoreWhiteSpace(l)
+	l.Emit(LObjectValue)
+	switch {
+	case scanQuotedString(l, '"'):
+		l.Emit(LString)
+		ignoreWhiteSpace(l)
+		l.Accept(",")
+		l.Ignore()
+		ignoreWhiteSpace(l)
+		return stateInObject
+	case scanNumber(l):
+		l.Emit(LNumber)
+		ignoreWhiteSpace(l)
+		l.Accept(",")
+		l.Ignore()
+		ignoreWhiteSpace(l)
+		return stateInObject
+	case l.AcceptAnyOf([]string{"true", "false"}, true):
+		l.Emit(LBoolean)
+		ignoreWhiteSpace(l)
+		l.Accept(",")
+		l.Ignore()
+		ignoreWhiteSpace(l)
+		return stateInObject
+	case l.AcceptString("null", true):
+		l.Emit(LNull)
+		ignoreWhiteSpace(l)
+		l.Accept(",")
+		l.Ignore()
+		ignoreWhiteSpace(l)
+		return stateInObject
+	case l.Accept("{"):
+		l.Emit(LObjectStart)
+		l.PushState(stateInObject)
+		return stateInObject
+	case l.Accept("["):
+		l.Emit(LArrayStart)
+		l.PushState(stateInObject)
+		return stateInArray
+	}
+	return l.Errorf("Unknown token: %s", string(l.Peek()))
+}
+
+func stateInArray(l *Lexer) stateFunc {
+	ignoreWhiteSpace(l)
+	l.Accept(",")
+	ignoreWhiteSpace(l)
+	switch {
+	case scanQuotedString(l, '"'):
+		l.Emit(LString)
+	case scanNumber(l):
+		l.Emit(LNumber)
+	case l.AcceptAnyOf([]string{"true", "false"}, true):
+		l.Emit(LBoolean)
+	case l.AcceptString("null", true):
+		l.Emit(LNull)
+	case l.Accept("{"):
+		l.Emit(LObjectStart)
+		l.PushState(stateInArray)
+		return stateInObject
+	case l.Accept("["):
+		l.Emit(LArrayStart)
+		l.PushState(stateInArray)
+		return stateInArray
+	case l.Accept("]"):
+		l.Emit(LArrayEnd)
+		return l.PopState()
+	}
+	return stateInArray
+}
+
+func ignoreWhiteSpace(l *Lexer) {
+	l.AcceptWhile(" \n\t") // ignore whitespaces
+	l.Ignore()
+}