diff options
| author | 2026-03-08 18:25:56 +0300 | |
|---|---|---|
| committer | 2026-03-08 18:25:56 +0300 | |
| commit | e4b0856634f78f6ddd1f4d925043264bf0d736d0 (patch) | |
| tree | 88b91c200bef075dc7908dee640595f6b652a92c | |
| parent | Начальный релиз (diff) | |
| download | lex-0.0.2.tar.gz lex-0.0.2.tar.bz2 lex-0.0.2.tar.xz lex-0.0.2.zip | |
| -rw-r--r-- | lexer.go | 153 | ||||
| -rw-r--r-- | tokenizer.go | 106 |
2 files changed, 153 insertions, 106 deletions
diff --git a/lexer.go b/lexer.go new file mode 100644 index 0000000..2652ef0 --- /dev/null +++ b/lexer.go @@ -0,0 +1,153 @@ +// Package lex представляет собой достаточно простой лексер произвольных выражений. +// Практически полностью аналогичен лексеру от Роба Пайка. +package lex + +import ( + "bufio" + "strings" +) + +func Do(initState StateFunc, input *bufio.Reader) <-chan Token { + t := &Lexer{ + input: input, + ch: make(chan Token), + buf: []rune{}, + } + + go func() { + defer close(t.ch) + for state := initState; state != nil; { + state = state(t) + } + }() + + return t.ch +} + +type Lexer struct { + input *bufio.Reader + width int + ch chan Token + buf []rune + pos int +} + +func (l *Lexer) Next() (ch rune, err error) { + ch, l.width, err = l.input.ReadRune() + if err != nil { + return 0, err + } + l.buf = append(l.buf, ch) + return ch, nil +} + +func (l *Lexer) Peek() (rune, error) { + ch, err := l.Next() + if err != nil { + return 0, err + } + return ch, l.input.UnreadRune() +} + +func (l *Lexer) Back() error { + if len(l.buf) == 0 { + return nil + } + l.pos -= l.width + l.buf = l.buf[:len(l.buf)-1] + return l.input.UnreadRune() +} + +func (l *Lexer) Skip() { + l.buf = l.buf[:] +} + +func (l *Lexer) EmitToken(typ Typ) { + l.ch <- Token{ + Typ: typ, + Value: string(l.buf), + Pos: l.pos, + } + l.buf = l.buf[:] +} + +func (l *Lexer) EmitError(err error) { + l.ch <- Token{ + Error: err, + Value: string(l.buf), + Pos: l.pos, + } + l.buf = l.buf[:] +} + +func (l *Lexer) Accept(valid string) bool { + ch, err := l.Next() + if err != nil { + l.Back() + return false + } + if strings.ContainsRune(valid, ch) { + return true + } + l.Back() + return false +} + +func (l *Lexer) AcceptRun(valid string) bool { + ok := false + for l.Accept(valid) { + ok = true + } + return ok +} + +func (l *Lexer) AcceptNotRun(invalid string, greedy bool) bool { + ok := false + for { + ch, err := l.Next() + if err != nil { + return ok + } + if strings.ContainsRune(invalid, ch) { + if !greedy { + l.Back() + } + return ok + } + ok = true + } +} + +func (l *Lexer) AcceptAlph() bool { + ch, err := l.Next() + if err != nil { + l.Back() + return false + } + if 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' { + return true + } + l.Back() + return false +} + +func (l *Lexer) AcceptDigit() bool { + ch, err := l.Next() + if err != nil { + l.Back() + return false + } + if '0' <= ch && ch <= '9' { + return true + } + l.Back() + return false +} + +func (l *Lexer) AcceptSpace() bool { + return l.Accept(" \r\n\t") +} + +func (l *Lexer) Empty() bool { + return len(l.buf) == 0 +} diff --git a/tokenizer.go b/tokenizer.go deleted file mode 100644 index 1ddd8d0..0000000 --- a/tokenizer.go +++ /dev/null @@ -1,106 +0,0 @@ -// Package lex представляет собой достаточно простой лексер произвольных выражений. -// Практически полностью аналогичен лексеру от Роба Пайка. -package lex - -import ( - "strings" - "unicode/utf8" -) - -func Do(initState StateFunc, input string) <-chan Token { - t := &Lexer{ - pos: 0, - input: input, - start: 0, - ch: make(chan Token), - } - - go func() { - defer close(t.ch) - for state := initState; state != nil; { - state = state(t) - } - }() - - return t.ch -} - -type Lexer struct { - pos int - start int - input string - width int - ch chan Token -} - -func (l *Lexer) Next() (ch rune) { - if l.pos >= len(l.input) { - l.width = 0 - return 0 - } - ch, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) - l.pos += l.width - return ch -} - -func (l *Lexer) Peek() rune { - ch := l.Next() - l.Back() - return ch -} - -func (l *Lexer) Back() { - l.pos -= l.width -} - -func (l *Lexer) Skip() { - l.start = l.pos -} - -func (l *Lexer) EmitToken(typ Typ) { - value := "" - if l.pos > l.start { - value = l.input[l.start:l.pos] - } - l.ch <- Token{ - Typ: typ, - Value: value, - Pos: l.pos, - } - l.start = l.pos -} - -func (l *Lexer) EmitError(err error) { - value := "" - if l.pos > l.start { - value = l.input[l.start:l.pos] - } - l.ch <- Token{ - Value: value, - Pos: l.pos, - Error: err, - } -} - -func (l *Lexer) Accept(valid string) bool { - if strings.ContainsRune(valid, l.Next()) { - return true - } - l.Back() - - return false -} - -func (l *Lexer) AcceptRun(valid string) { - for strings.ContainsRune(valid, l.Next()) { - } - l.Back() -} - -func (l *Lexer) AcceptNotRun(invalid string, greedy bool) { - for ch := l.Next(); !strings.ContainsRune(invalid, ch) && ch != 0; ch = l.Next() { - } - if !greedy { - l.Back() - } -} |
