diff options
Diffstat (limited to 'lex.go')
-rw-r--r-- | lex.go | 181 |
1 files changed, 181 insertions, 0 deletions
@@ -0,0 +1,181 @@ +package lexpr + +import ( + "context" + "strings" + "unicode/utf8" +) + +// EOF const. +const EOF rune = -1 + +// lex holds current scanner state. +type lex struct { + input string // Input string. + start int // Start position of current lexem. + pos int // Pos at input string. + output chan lexem // Lexems channel. + width int // Width of last rune. +} + +// newLex returns new scanner for input string. +func newLex() *lex { + return &lex{ + input: "", + start: 0, + pos: 0, + output: nil, + width: 0, + } +} + +// parse input to lexems. +func (l *lex) parse(ctx context.Context, input string) <-chan lexem { + l.input = input + l.output = make(chan lexem) + go func() { + defer close(l.output) + for { + if ctx.Err() != nil { + return + } + switch { + case l.acceptWhile(" \n\t", false): + l.ignore() + case l.accept("("): + l.emit(lp) + case l.accept(")"): + l.emit(rp) + case l.accept(","): + l.emit(sep) + case scanNumber(l): + l.emit(number) + case scanOps(l): + l.emit(op) + case scanWord(l): + l.emit(word) + case scanQuotedString(l, `"`): + l.emit(str) + case l.peek() == EOF: + return + default: + l.emit(tokError) + return + } + } + }() + return l.output +} + +// emit current lexem to output. +func (l *lex) emit(typ lexType) { + l.output <- lexem{ + Type: typ, + Value: l.input[l.start:l.pos], + Start: l.start, + End: l.pos, + } + l.start = l.pos +} + +// next rune from input. +func (l *lex) next() (r rune) { + if int(l.pos) >= len(l.input) { + l.width = 0 + return EOF + } + r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return r +} + +// back move position to previos rune. +func (l *lex) back() { + l.pos -= l.width +} + +// ignore previosly buffered text. +func (l *lex) ignore() { + l.start = l.pos + l.width = 0 +} + +// peek rune at current position without moving position. +func (l *lex) peek() (r rune) { + r = l.next() + l.back() + return r +} + +// accept any rune from valid string. Returns true if next rune was in valid string. +func (l *lex) accept(valid string) bool { + if strings.ContainsRune(valid, l.next()) { + return true + } + l.back() + return false +} + +// acceptString returns true if given string was at position. +func (l *lex) acceptString(s string, caseInsentive bool) bool { + input := l.input + if caseInsentive { + input = strings.ToLower(input) + s = strings.ToLower(s) + } + if strings.HasPrefix(input, s) { + l.width = 0 + l.pos += len(s) + return true + } + return false +} + +// acceptAnyOf substrings. Retuns true if any of substrings was found. +func (l *lex) acceptAnyOf(s []string, caseInsentive bool) bool { + for _, substring := range s { + if l.acceptString(substring, caseInsentive) { + return true + } + } + return false +} + +// acceptWhile passing symbols from input while they at `valid` string. +func (l *lex) acceptWhile(valid string, ignoreEscaped bool) bool { + start := l.pos + for { + ch := l.next() + switch { + case ch == EOF: + return false + case ch == '\\' && ignoreEscaped: + l.next() + case !strings.ContainsRune(valid, ch): + l.back() + return l.pos > start + } + } +} + +// acceptWhileNot passing symbols from input while they NOT in `invalid` string. +func (l *lex) acceptWhileNot(invalid string, ignoreEscaped bool) bool { + start := l.pos + for { + ch := l.next() + switch { + case ch == EOF: + return false + case ch == '\\' && ignoreEscaped: + l.next() + case strings.ContainsRune(invalid, ch): + l.back() + return l.pos > start + } + } +} + +// atStart returns true if current lexem not empty +func (l *lex) atStart() bool { + return l.pos == l.start +} |