aboutsummaryrefslogtreecommitdiff
path: root/lex.go
diff options
context:
space:
mode:
Diffstat (limited to 'lex.go')
-rw-r--r--lex.go181
1 files changed, 181 insertions, 0 deletions
diff --git a/lex.go b/lex.go
new file mode 100644
index 0000000..f0c70e1
--- /dev/null
+++ b/lex.go
@@ -0,0 +1,181 @@
+package lexpr
+
+import (
+ "context"
+ "strings"
+ "unicode/utf8"
+)
+
+// EOF const.
+const EOF rune = -1
+
+// lex holds current scanner state.
+type lex struct {
+ input string // Input string.
+ start int // Start position of current lexem.
+ pos int // Pos at input string.
+ output chan lexem // Lexems channel.
+ width int // Width of last rune.
+}
+
+// newLex returns new scanner for input string.
+func newLex() *lex {
+ return &lex{
+ input: "",
+ start: 0,
+ pos: 0,
+ output: nil,
+ width: 0,
+ }
+}
+
+// parse input to lexems.
+func (l *lex) parse(ctx context.Context, input string) <-chan lexem {
+ l.input = input
+ l.output = make(chan lexem)
+ go func() {
+ defer close(l.output)
+ for {
+ if ctx.Err() != nil {
+ return
+ }
+ switch {
+ case l.acceptWhile(" \n\t", false):
+ l.ignore()
+ case l.accept("("):
+ l.emit(lp)
+ case l.accept(")"):
+ l.emit(rp)
+ case l.accept(","):
+ l.emit(sep)
+ case scanNumber(l):
+ l.emit(number)
+ case scanOps(l):
+ l.emit(op)
+ case scanWord(l):
+ l.emit(word)
+ case scanQuotedString(l, `"`):
+ l.emit(str)
+ case l.peek() == EOF:
+ return
+ default:
+ l.emit(tokError)
+ return
+ }
+ }
+ }()
+ return l.output
+}
+
+// emit current lexem to output.
+func (l *lex) emit(typ lexType) {
+ l.output <- lexem{
+ Type: typ,
+ Value: l.input[l.start:l.pos],
+ Start: l.start,
+ End: l.pos,
+ }
+ l.start = l.pos
+}
+
+// next rune from input.
+func (l *lex) next() (r rune) {
+ if int(l.pos) >= len(l.input) {
+ l.width = 0
+ return EOF
+ }
+ r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+ l.pos += l.width
+ return r
+}
+
+// back move position to previos rune.
+func (l *lex) back() {
+ l.pos -= l.width
+}
+
+// ignore previosly buffered text.
+func (l *lex) ignore() {
+ l.start = l.pos
+ l.width = 0
+}
+
+// peek rune at current position without moving position.
+func (l *lex) peek() (r rune) {
+ r = l.next()
+ l.back()
+ return r
+}
+
+// accept any rune from valid string. Returns true if next rune was in valid string.
+func (l *lex) accept(valid string) bool {
+ if strings.ContainsRune(valid, l.next()) {
+ return true
+ }
+ l.back()
+ return false
+}
+
+// acceptString returns true if given string was at position.
+func (l *lex) acceptString(s string, caseInsentive bool) bool {
+ input := l.input
+ if caseInsentive {
+ input = strings.ToLower(input)
+ s = strings.ToLower(s)
+ }
+ if strings.HasPrefix(input, s) {
+ l.width = 0
+ l.pos += len(s)
+ return true
+ }
+ return false
+}
+
+// acceptAnyOf substrings. Retuns true if any of substrings was found.
+func (l *lex) acceptAnyOf(s []string, caseInsentive bool) bool {
+ for _, substring := range s {
+ if l.acceptString(substring, caseInsentive) {
+ return true
+ }
+ }
+ return false
+}
+
+// acceptWhile passing symbols from input while they at `valid` string.
+func (l *lex) acceptWhile(valid string, ignoreEscaped bool) bool {
+ start := l.pos
+ for {
+ ch := l.next()
+ switch {
+ case ch == EOF:
+ return false
+ case ch == '\\' && ignoreEscaped:
+ l.next()
+ case !strings.ContainsRune(valid, ch):
+ l.back()
+ return l.pos > start
+ }
+ }
+}
+
+// acceptWhileNot passing symbols from input while they NOT in `invalid` string.
+func (l *lex) acceptWhileNot(invalid string, ignoreEscaped bool) bool {
+ start := l.pos
+ for {
+ ch := l.next()
+ switch {
+ case ch == EOF:
+ return false
+ case ch == '\\' && ignoreEscaped:
+ l.next()
+ case strings.ContainsRune(invalid, ch):
+ l.back()
+ return l.pos > start
+ }
+ }
+}
+
+// atStart returns true if current lexem not empty
+func (l *lex) atStart() bool {
+ return l.pos == l.start
+}