aboutsummaryrefslogtreecommitdiff
path: root/tokenizer.go
diff options
context:
space:
mode:
authorAlexander Kiryukhin <a.kiryukhin@mail.ru>2020-02-13 22:55:13 +0300
committerAlexander Kiryukhin <a.kiryukhin@mail.ru>2020-02-13 22:55:13 +0300
commit24ca753ba3ab0f0d4fdc413c467bc304da06744f (patch)
tree9cdf12a41950369e3784a42173f433a07fda8fd6 /tokenizer.go
initial commit
Diffstat (limited to 'tokenizer.go')
-rw-r--r--tokenizer.go242
1 files changed, 242 insertions, 0 deletions
diff --git a/tokenizer.go b/tokenizer.go
new file mode 100644
index 0000000..df4b08f
--- /dev/null
+++ b/tokenizer.go
@@ -0,0 +1,242 @@
+// Copyright (c) 2020 Alexander Kiryukhin <a.kiryukhin@mail.ru>
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+package executor
+
+import (
+ "fmt"
+ "strconv"
+)
+
+type tokenizer struct {
+ str string
+ numberBuffer string
+ strBuffer string
+ allowNegative bool
+ tkns []*token
+ operators map[string]*Operator
+}
+
+func newTokenizer(str string, operators map[string]*Operator) *tokenizer {
+ return &tokenizer{str: str, numberBuffer: "", strBuffer: "", allowNegative: true, tkns: []*token{}, operators: operators}
+}
+
+func (t *tokenizer) emptyNumberBufferAsLiteral() error {
+ if t.numberBuffer != "" {
+ f, err := strconv.ParseFloat(t.numberBuffer, 64)
+ if err != nil {
+ return fmt.Errorf("invalid number %s", t.numberBuffer)
+ }
+ t.tkns = append(t.tkns, newToken(literalType, "", f))
+ }
+ t.numberBuffer = ""
+ return nil
+}
+
+func (t *tokenizer) emptyStrBufferAsVariable() {
+ if t.strBuffer != "" {
+ t.tkns = append(t.tkns, newToken(variableType, t.strBuffer, 0))
+ t.strBuffer = ""
+ }
+}
+
+func (t *tokenizer) tokenize() error {
+ for _, ch := range t.str {
+ if ch == ' ' {
+ continue
+ }
+ ch := byte(ch)
+ switch true {
+ case isAlpha(ch):
+ if t.numberBuffer != "" {
+ if err := t.emptyNumberBufferAsLiteral(); err != nil {
+ return err
+ }
+ t.tkns = append(t.tkns, newToken(operatorType, "*", 0))
+ t.numberBuffer = ""
+ }
+ t.allowNegative = false
+ t.strBuffer += string(ch)
+ case isNumber(ch):
+ t.numberBuffer += string(ch)
+ t.allowNegative = false
+ case isDot(ch):
+ t.numberBuffer += string(ch)
+ t.allowNegative = false
+ case isLP(ch):
+ if t.strBuffer != "" {
+ t.tkns = append(t.tkns, newToken(functionType, t.strBuffer, 0))
+ t.strBuffer = ""
+ } else if t.numberBuffer != "" {
+ if err := t.emptyNumberBufferAsLiteral(); err != nil {
+ return err
+ }
+ t.tkns = append(t.tkns, newToken(operatorType, "*", 0))
+ t.numberBuffer = ""
+ }
+ t.allowNegative = true
+ t.tkns = append(t.tkns, newToken(leftParenthesisType, "", 0))
+ case isRP(ch):
+ if err := t.emptyNumberBufferAsLiteral(); err != nil {
+ return err
+ }
+ t.emptyStrBufferAsVariable()
+ t.allowNegative = false
+ t.tkns = append(t.tkns, newToken(rightParenthesisType, "", 0))
+ case isComma(ch):
+ if err := t.emptyNumberBufferAsLiteral(); err != nil {
+ return err
+ }
+ t.emptyStrBufferAsVariable()
+ t.tkns = append(t.tkns, newToken(funcSep, "", 0))
+ t.allowNegative = true
+ default:
+ if t.allowNegative && ch == '-' {
+ t.numberBuffer += "-"
+ t.allowNegative = false
+ continue
+ }
+ if err := t.emptyNumberBufferAsLiteral(); err != nil {
+ return err
+ }
+ t.emptyStrBufferAsVariable()
+ if len(t.tkns) > 0 && t.tkns[len(t.tkns)-1].Type == operatorType {
+ t.tkns[len(t.tkns)-1].SValue += string(ch)
+ } else {
+ t.tkns = append(t.tkns, newToken(operatorType, string(ch), 0))
+ }
+ t.allowNegative = true
+ }
+ }
+ if err := t.emptyNumberBufferAsLiteral(); err != nil {
+ return err
+ }
+ t.emptyStrBufferAsVariable()
+ return nil
+}
+
+func (t *tokenizer) toRPN() ([]*token, error) {
+ var tkns []*token
+ var stack tokenStack
+ for _, tkn := range t.tkns {
+ switch tkn.Type {
+ case literalType:
+ tkns = append(tkns, tkn)
+ case variableType:
+ tkns = append(tkns, tkn)
+ case functionType:
+ stack.Push(tkn)
+ case funcSep:
+ for stack.Head().Type != leftParenthesisType {
+ if stack.Head().Type == eof {
+ return nil, ErrInvalidExpression
+ }
+ tkns = append(tkns, stack.Pop())
+ }
+ case operatorType:
+ leftOp, ok := t.operators[tkn.SValue]
+ if !ok {
+ return nil, fmt.Errorf("unknown operator: %s", tkn.SValue)
+ }
+ for {
+ if stack.Head().Type == operatorType {
+ rightOp, ok := t.operators[stack.Head().SValue]
+ if !ok {
+ return nil, fmt.Errorf("unknown operator: %s", stack.Head().SValue)
+ }
+ if leftOp.Priority < rightOp.Priority || (leftOp.Priority == rightOp.Priority && leftOp.Assoc == RightAssoc) {
+ tkns = append(tkns, stack.Pop())
+ continue
+ }
+ }
+ break
+ }
+ stack.Push(tkn)
+ case leftParenthesisType:
+ stack.Push(tkn)
+ case rightParenthesisType:
+ for stack.Head().Type != leftParenthesisType {
+ if stack.Head().Type == eof {
+ return nil, ErrInvalidParenthesis
+ }
+ tkns = append(tkns, stack.Pop())
+ }
+ stack.Pop()
+ if stack.Head().Type == functionType {
+ tkns = append(tkns, stack.Pop())
+ }
+ }
+ }
+ for stack.Head().Type != eof {
+ if stack.Head().Type == leftParenthesisType {
+ return nil, ErrInvalidParenthesis
+ }
+ tkns = append(tkns, stack.Pop())
+ }
+ return tkns, nil
+}
+
+type tokenStack struct {
+ ts []*token
+}
+
+func (ts *tokenStack) Push(t *token) {
+ ts.ts = append(ts.ts, t)
+}
+
+func (ts *tokenStack) Pop() *token {
+ if len(ts.ts) == 0 {
+ return &token{Type: eof}
+ }
+ var head *token
+ head, ts.ts = ts.ts[len(ts.ts)-1], ts.ts[:len(ts.ts)-1]
+ return head
+}
+
+func (ts *tokenStack) Head() *token {
+ if len(ts.ts) == 0 {
+ return &token{Type: eof}
+ }
+ return ts.ts[len(ts.ts)-1]
+}
+
+func isComma(ch byte) bool {
+ return ch == ','
+}
+
+func isDot(ch byte) bool {
+ return ch == '.'
+}
+
+func isNumber(ch byte) bool {
+ return ch >= '0' && ch <= '9'
+}
+
+func isAlpha(ch byte) bool {
+ return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch == '_'
+}
+
+func isLP(ch byte) bool {
+ return ch == '('
+}
+
+func isRP(ch byte) bool {
+ return ch == ')'
+}