diff options
Diffstat (limited to 'internal/lexer/lexer.go')
-rw-r--r-- | internal/lexer/lexer.go | 182 |
1 files changed, 182 insertions, 0 deletions
diff --git a/internal/lexer/lexer.go b/internal/lexer/lexer.go new file mode 100644 index 0000000..342864d --- /dev/null +++ b/internal/lexer/lexer.go @@ -0,0 +1,182 @@ +package lexer + +import ( + "fmt" + "strings" + "unicode/utf8" +) + +const eof rune = -1 + +type Lexem struct { + Type lexType // Type of Lexem. + Value string // Value of Lexem. + Start int // Start position at input string. + End int // End position at input string. +} + +//go:generate stringer -type=lexType +type lexType int + +const ( + LEOF lexType = iota + LError + LObjectStart + LObjectEnd + LObjectKey + LObjectValue + LArrayStart + LArrayEnd + LString + LNumber + LBoolean + LNull +) + +// Lexer holds current scanner state. +type Lexer struct { + Input string // Input string. + Start int // Start position of current lexem. + Pos int // Pos at input string. + Output chan Lexem // Lexems channel. + width int // Width of last rune. + states stateStack // Stack of states to realize PrevState. +} + +// newLexer returns new scanner for input string. +func NewLexer(input string) *Lexer { + return &Lexer{ + Input: input, + Start: 0, + Pos: 0, + Output: make(chan Lexem, 2), + width: 0, + } +} + +// Run lexing. +func (l *Lexer) Run(init stateFunc) { + for state := init; state != nil; { + state = state(l) + } + close(l.Output) +} + +// PopState returns previous state function. +func (l *Lexer) PopState() stateFunc { + return l.states.Pop() +} + +// PushState pushes state before going deeper states. +func (l *Lexer) PushState(s stateFunc) { + l.states.Push(s) +} + +// Emit current lexem to output. +func (l *Lexer) Emit(typ lexType) { + l.Output <- Lexem{ + Type: typ, + Value: l.Input[l.Start:l.Pos], + Start: l.Start, + End: l.Pos, + } + l.Start = l.Pos +} + +// Errorf produces error lexem and stops scanning. +func (l *Lexer) Errorf(format string, args ...interface{}) stateFunc { + l.Output <- Lexem{ + Type: LError, + Value: fmt.Sprintf(format, args...), + Start: l.Start, + End: l.Pos, + } + return nil +} + +// Next rune from input. +func (l *Lexer) Next() (r rune) { + if int(l.Pos) >= len(l.Input) { + l.width = 0 + return eof + } + r, l.width = utf8.DecodeRuneInString(l.Input[l.Pos:]) + l.Pos += l.width + return r +} + +// Back move position to previos rune. +func (l *Lexer) Back() { + l.Pos -= l.width +} + +// Ignore previosly buffered text. +func (l *Lexer) Ignore() { + l.Start = l.Pos + l.width = 0 +} + +// Peek rune at current position without moving position. +func (l *Lexer) Peek() (r rune) { + r = l.Next() + l.Back() + return r +} + +// Accept any rune from valid string. Returns true if Next rune was in valid string. +func (l *Lexer) Accept(valid string) bool { + if strings.ContainsRune(valid, l.Next()) { + return true + } + l.Back() + return false +} + +// AcceptString returns true if given string was at position. +func (l *Lexer) AcceptString(s string, caseInsentive bool) bool { + input := l.Input[l.Start:] + if caseInsentive { + input = strings.ToLower(input) + s = strings.ToLower(s) + } + if strings.HasPrefix(input, s) { + l.width = 0 + l.Pos += len(s) + return true + } + return false +} + +// AcceptAnyOf substrings. Retuns true if any of substrings was found. +func (l *Lexer) AcceptAnyOf(s []string, caseInsentive bool) bool { + for _, substring := range s { + if l.AcceptString(substring, caseInsentive) { + return true + } + } + return false +} + +// AcceptWhile passing symbols from input while they at `valid` string. +func (l *Lexer) AcceptWhile(valid string) bool { + isValid := false + for l.Accept(valid) { + isValid = true + } + return isValid +} + +// AcceptWhileNot passing symbols from input while they NOT in `invalid` string. +func (l *Lexer) AcceptWhileNot(invalid string) bool { + isValid := false + for !strings.ContainsRune(invalid, l.Next()) { + isValid = true + } + l.Back() + return isValid +} + +// AtStart returns true if current lexem not empty +func (l *Lexer) AtStart() bool { + return l.Pos == l.Start +} |