diff options
Diffstat (limited to '')
| -rw-r--r-- | tokenizer.go | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/tokenizer.go b/tokenizer.go new file mode 100644 index 0000000..1ddd8d0 --- /dev/null +++ b/tokenizer.go @@ -0,0 +1,106 @@ +// Package lex представляет собой достаточно простой лексер произвольных выражений. +// Практически полностью аналогичен лексеру от Роба Пайка. +package lex + +import ( + "strings" + "unicode/utf8" +) + +func Do(initState StateFunc, input string) <-chan Token { + t := &Lexer{ + pos: 0, + input: input, + start: 0, + ch: make(chan Token), + } + + go func() { + defer close(t.ch) + for state := initState; state != nil; { + state = state(t) + } + }() + + return t.ch +} + +type Lexer struct { + pos int + start int + input string + width int + ch chan Token +} + +func (l *Lexer) Next() (ch rune) { + if l.pos >= len(l.input) { + l.width = 0 + return 0 + } + ch, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return ch +} + +func (l *Lexer) Peek() rune { + ch := l.Next() + l.Back() + return ch +} + +func (l *Lexer) Back() { + l.pos -= l.width +} + +func (l *Lexer) Skip() { + l.start = l.pos +} + +func (l *Lexer) EmitToken(typ Typ) { + value := "" + if l.pos > l.start { + value = l.input[l.start:l.pos] + } + l.ch <- Token{ + Typ: typ, + Value: value, + Pos: l.pos, + } + l.start = l.pos +} + +func (l *Lexer) EmitError(err error) { + value := "" + if l.pos > l.start { + value = l.input[l.start:l.pos] + } + l.ch <- Token{ + Value: value, + Pos: l.pos, + Error: err, + } +} + +func (l *Lexer) Accept(valid string) bool { + if strings.ContainsRune(valid, l.Next()) { + return true + } + l.Back() + + return false +} + +func (l *Lexer) AcceptRun(valid string) { + for strings.ContainsRune(valid, l.Next()) { + } + l.Back() +} + +func (l *Lexer) AcceptNotRun(invalid string, greedy bool) { + for ch := l.Next(); !strings.ContainsRune(invalid, ch) && ch != 0; ch = l.Next() { + } + if !greedy { + l.Back() + } +} |
