path: root/lex.go



package lexpr

import (
	"context"
	"strings"
	"unicode/utf8"
)

// EOF const.
const EOF rune = -1

// lex holds current scanner state.
type lex struct {
	input  string     // Input string.
	start  int        // Start position of current lexem.
	pos    int        // Pos at input string.
	output chan lexem // Lexems channel.
	width  int        // Width of last rune.
}

// newLex returns new scanner for input string.
func newLex() *lex {
	return &lex{
		input:  "",
		start:  0,
		pos:    0,
		output: nil,
		width:  0,
	}
}

// parse input to lexems.
func (l *lex) parse(ctx context.Context, input string) <-chan lexem {
	l.input = input
	l.output = make(chan lexem)
	go func() {
		defer close(l.output)
		for {
			if ctx.Err() != nil {
				return
			}
			switch {
			case l.acceptWhile(" \n\t", false):
				l.ignore()
			case l.accept("("):
				l.emit(lp)
			case l.accept(")"):
				l.emit(rp)
			case l.accept(","):
				l.emit(sep)
			case scanNumber(l):
				l.emit(number)
			case scanOps(l):
				l.emit(op)
			case scanWord(l):
				l.emit(word)
			case scanQuotedString(l, `"`):
				l.emit(str)
			case l.peek() == EOF:
				return
			default:
				l.emit(tokError)
				return
			}
		}
	}()
	return l.output
}

// emit current lexem to output.
func (l *lex) emit(typ lexType) {
	l.output <- lexem{
		Type:  typ,
		Value: l.input[l.start:l.pos],
		Start: l.start,
		End:   l.pos,
	}
	l.start = l.pos
}

// next rune from input.
func (l *lex) next() (r rune) {
	if int(l.pos) >= len(l.input) {
		l.width = 0
		return EOF
	}
	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
	l.pos += l.width
	return r
}

// back move position to previos rune.
func (l *lex) back() {
	l.pos -= l.width
}

// ignore previosly buffered text.
func (l *lex) ignore() {
	l.start = l.pos
	l.width = 0
}

// peek rune at current position without moving position.
func (l *lex) peek() (r rune) {
	r = l.next()
	l.back()
	return r
}

// accept any rune from valid string. Returns true if next rune was in valid string.
func (l *lex) accept(valid string) bool {
	if strings.ContainsRune(valid, l.next()) {
		return true
	}
	l.back()
	return false
}

// acceptString returns true if given string was at position.
func (l *lex) acceptString(s string, caseInsentive bool) bool {
	input := l.input
	if caseInsentive {
		input = strings.ToLower(input)
		s = strings.ToLower(s)
	}
	if strings.HasPrefix(input, s) {
		l.width = 0
		l.pos += len(s)
		return true
	}
	return false
}

// acceptAnyOf substrings. Retuns true if any of substrings was found.
func (l *lex) acceptAnyOf(s []string, caseInsentive bool) bool {
	for _, substring := range s {
		if l.acceptString(substring, caseInsentive) {
			return true
		}
	}
	return false
}

// acceptWhile passing symbols from input while they at `valid` string.
func (l *lex) acceptWhile(valid string, ignoreEscaped bool) bool {
	start := l.pos
	for {
		ch := l.next()
		switch {
		case ch == EOF:
			return false
		case ch == '\\' && ignoreEscaped:
			l.next()
		case !strings.ContainsRune(valid, ch):
			l.back()
			return l.pos > start
		}
	}
}

// acceptWhileNot passing symbols from input while they NOT in `invalid` string.
func (l *lex) acceptWhileNot(invalid string, ignoreEscaped bool) bool {
	start := l.pos
	for {
		ch := l.next()
		switch {
		case ch == EOF:
			return false
		case ch == '\\' && ignoreEscaped:
			l.next()
		case strings.ContainsRune(invalid, ch):
			l.back()
			return l.pos > start
		}
	}
}

// atStart returns true if current lexem not empty
func (l *lex) atStart() bool {
	return l.pos == l.start
}