1 files changed, 181 insertions, 0 deletions
diff --git a/lex.go b/lex.go
new file mode 100644
index 0000000..f0c70e1
--- /dev/null
+++ b/lex.go
@@ -0,0 +1,181 @@
+package lexpr
+
+import (
+	"context"
+	"strings"
+	"unicode/utf8"
+)
+
+// EOF const.
+const EOF rune = -1
+
+// lex holds current scanner state.
+type lex struct {
+	input  string     // Input string.
+	start  int        // Start position of current lexem.
+	pos    int        // Pos at input string.
+	output chan lexem // Lexems channel.
+	width  int        // Width of last rune.
+}
+
+// newLex returns new scanner for input string.
+func newLex() *lex {
+	return &lex{
+		input:  "",
+		start:  0,
+		pos:    0,
+		output: nil,
+		width:  0,
+	}
+}
+
+// parse input to lexems.
+func (l *lex) parse(ctx context.Context, input string) <-chan lexem {
+	l.input = input
+	l.output = make(chan lexem)
+	go func() {
+		defer close(l.output)
+		for {
+			if ctx.Err() != nil {
+				return
+			}
+			switch {
+			case l.acceptWhile(" \n\t", false):
+				l.ignore()
+			case l.accept("("):
+				l.emit(lp)
+			case l.accept(")"):
+				l.emit(rp)
+			case l.accept(","):
+				l.emit(sep)
+			case scanNumber(l):
+				l.emit(number)
+			case scanOps(l):
+				l.emit(op)
+			case scanWord(l):
+				l.emit(word)
+			case scanQuotedString(l, `"`):
+				l.emit(str)
+			case l.peek() == EOF:
+				return
+			default:
+				l.emit(tokError)
+				return
+			}
+		}
+	}()
+	return l.output
+}
+
+// emit current lexem to output.
+func (l *lex) emit(typ lexType) {
+	l.output <- lexem{
+		Type:  typ,
+		Value: l.input[l.start:l.pos],
+		Start: l.start,
+		End:   l.pos,
+	}
+	l.start = l.pos
+}
+
+// next rune from input.
+func (l *lex) next() (r rune) {
+	if int(l.pos) >= len(l.input) {
+		l.width = 0
+		return EOF
+	}
+	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+	l.pos += l.width
+	return r
+}
+
+// back move position to previos rune.
+func (l *lex) back() {
+	l.pos -= l.width
+}
+
+// ignore previosly buffered text.
+func (l *lex) ignore() {
+	l.start = l.pos
+	l.width = 0
+}
+
+// peek rune at current position without moving position.
+func (l *lex) peek() (r rune) {
+	r = l.next()
+	l.back()
+	return r
+}
+
+// accept any rune from valid string. Returns true if next rune was in valid string.
+func (l *lex) accept(valid string) bool {
+	if strings.ContainsRune(valid, l.next()) {
+		return true
+	}
+	l.back()
+	return false
+}
+
+// acceptString returns true if given string was at position.
+func (l *lex) acceptString(s string, caseInsentive bool) bool {
+	input := l.input
+	if caseInsentive {
+		input = strings.ToLower(input)
+		s = strings.ToLower(s)
+	}
+	if strings.HasPrefix(input, s) {
+		l.width = 0
+		l.pos += len(s)
+		return true
+	}
+	return false
+}
+
+// acceptAnyOf substrings. Retuns true if any of substrings was found.
+func (l *lex) acceptAnyOf(s []string, caseInsentive bool) bool {
+	for _, substring := range s {
+		if l.acceptString(substring, caseInsentive) {
+			return true
+		}
+	}
+	return false
+}
+
+// acceptWhile passing symbols from input while they at `valid` string.
+func (l *lex) acceptWhile(valid string, ignoreEscaped bool) bool {
+	start := l.pos
+	for {
+		ch := l.next()
+		switch {
+		case ch == EOF:
+			return false
+		case ch == '\\' && ignoreEscaped:
+			l.next()
+		case !strings.ContainsRune(valid, ch):
+			l.back()
+			return l.pos > start
+		}
+	}
+}
+
+// acceptWhileNot passing symbols from input while they NOT in `invalid` string.
+func (l *lex) acceptWhileNot(invalid string, ignoreEscaped bool) bool {
+	start := l.pos
+	for {
+		ch := l.next()
+		switch {
+		case ch == EOF:
+			return false
+		case ch == '\\' && ignoreEscaped:
+			l.next()
+		case strings.ContainsRune(invalid, ch):
+			l.back()
+			return l.pos > start
+		}
+	}
+}
+
+// atStart returns true if current lexem not empty
+func (l *lex) atStart() bool {
+	return l.pos == l.start
+}