initial commit

author: Alexander Kiryukhin <a.kiryukhin@mail.ru> 2020-02-13 22:55:13 +0300
committer: Alexander Kiryukhin <a.kiryukhin@mail.ru> 2020-02-13 22:55:13 +0300
commit: 24ca753ba3ab0f0d4fdc413c467bc304da06744f (patch)
tree: 9cdf12a41950369e3784a42173f433a07fda8fd6 /tokenizer.go
1 files changed, 242 insertions, 0 deletions
diff --git a/tokenizer.go b/tokenizer.go
new file mode 100644
index 0000000..df4b08f
--- /dev/null
+++ b/tokenizer.go
@@ -0,0 +1,242 @@
+// Copyright (c) 2020 Alexander Kiryukhin <a.kiryukhin@mail.ru>
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+package executor
+
+import (
+	"fmt"
+	"strconv"
+)
+
+type tokenizer struct {
+	str           string
+	numberBuffer  string
+	strBuffer     string
+	allowNegative bool
+	tkns          []*token
+	operators     map[string]*Operator
+}
+
+func newTokenizer(str string, operators map[string]*Operator) *tokenizer {
+	return &tokenizer{str: str, numberBuffer: "", strBuffer: "", allowNegative: true, tkns: []*token{}, operators: operators}
+}
+
+func (t *tokenizer) emptyNumberBufferAsLiteral() error {
+	if t.numberBuffer != "" {
+		f, err := strconv.ParseFloat(t.numberBuffer, 64)
+		if err != nil {
+			return fmt.Errorf("invalid number %s", t.numberBuffer)
+		}
+		t.tkns = append(t.tkns, newToken(literalType, "", f))
+	}
+	t.numberBuffer = ""
+	return nil
+}
+
+func (t *tokenizer) emptyStrBufferAsVariable() {
+	if t.strBuffer != "" {
+		t.tkns = append(t.tkns, newToken(variableType, t.strBuffer, 0))
+		t.strBuffer = ""
+	}
+}
+
+func (t *tokenizer) tokenize() error {
+	for _, ch := range t.str {
+		if ch == ' ' {
+			continue
+		}
+		ch := byte(ch)
+		switch true {
+		case isAlpha(ch):
+			if t.numberBuffer != "" {
+				if err := t.emptyNumberBufferAsLiteral(); err != nil {
+					return err
+				}
+				t.tkns = append(t.tkns, newToken(operatorType, "*", 0))
+				t.numberBuffer = ""
+			}
+			t.allowNegative = false
+			t.strBuffer += string(ch)
+		case isNumber(ch):
+			t.numberBuffer += string(ch)
+			t.allowNegative = false
+		case isDot(ch):
+			t.numberBuffer += string(ch)
+			t.allowNegative = false
+		case isLP(ch):
+			if t.strBuffer != "" {
+				t.tkns = append(t.tkns, newToken(functionType, t.strBuffer, 0))
+				t.strBuffer = ""
+			} else if t.numberBuffer != "" {
+				if err := t.emptyNumberBufferAsLiteral(); err != nil {
+					return err
+				}
+				t.tkns = append(t.tkns, newToken(operatorType, "*", 0))
+				t.numberBuffer = ""
+			}
+			t.allowNegative = true
+			t.tkns = append(t.tkns, newToken(leftParenthesisType, "", 0))
+		case isRP(ch):
+			if err := t.emptyNumberBufferAsLiteral(); err != nil {
+				return err
+			}
+			t.emptyStrBufferAsVariable()
+			t.allowNegative = false
+			t.tkns = append(t.tkns, newToken(rightParenthesisType, "", 0))
+		case isComma(ch):
+			if err := t.emptyNumberBufferAsLiteral(); err != nil {
+				return err
+			}
+			t.emptyStrBufferAsVariable()
+			t.tkns = append(t.tkns, newToken(funcSep, "", 0))
+			t.allowNegative = true
+		default:
+			if t.allowNegative && ch == '-' {
+				t.numberBuffer += "-"
+				t.allowNegative = false
+				continue
+			}
+			if err := t.emptyNumberBufferAsLiteral(); err != nil {
+				return err
+			}
+			t.emptyStrBufferAsVariable()
+			if len(t.tkns) > 0 && t.tkns[len(t.tkns)-1].Type == operatorType {
+				t.tkns[len(t.tkns)-1].SValue += string(ch)
+			} else {
+				t.tkns = append(t.tkns, newToken(operatorType, string(ch), 0))
+			}
+			t.allowNegative = true
+		}
+	}
+	if err := t.emptyNumberBufferAsLiteral(); err != nil {
+		return err
+	}
+	t.emptyStrBufferAsVariable()
+	return nil
+}
+
+func (t *tokenizer) toRPN() ([]*token, error) {
+	var tkns []*token
+	var stack tokenStack
+	for _, tkn := range t.tkns {
+		switch tkn.Type {
+		case literalType:
+			tkns = append(tkns, tkn)
+		case variableType:
+			tkns = append(tkns, tkn)
+		case functionType:
+			stack.Push(tkn)
+		case funcSep:
+			for stack.Head().Type != leftParenthesisType {
+				if stack.Head().Type == eof {
+					return nil, ErrInvalidExpression
+				}
+				tkns = append(tkns, stack.Pop())
+			}
+		case operatorType:
+			leftOp, ok := t.operators[tkn.SValue]
+			if !ok {
+				return nil, fmt.Errorf("unknown operator: %s", tkn.SValue)
+			}
+			for {
+				if stack.Head().Type == operatorType {
+					rightOp, ok := t.operators[stack.Head().SValue]
+					if !ok {
+						return nil, fmt.Errorf("unknown operator: %s", stack.Head().SValue)
+					}
+					if leftOp.Priority < rightOp.Priority || (leftOp.Priority == rightOp.Priority && leftOp.Assoc == RightAssoc) {
+						tkns = append(tkns, stack.Pop())
+						continue
+					}
+				}
+				break
+			}
+			stack.Push(tkn)
+		case leftParenthesisType:
+			stack.Push(tkn)
+		case rightParenthesisType:
+			for stack.Head().Type != leftParenthesisType {
+				if stack.Head().Type == eof {
+					return nil, ErrInvalidParenthesis
+				}
+				tkns = append(tkns, stack.Pop())
+			}
+			stack.Pop()
+			if stack.Head().Type == functionType {
+				tkns = append(tkns, stack.Pop())
+			}
+		}
+	}
+	for stack.Head().Type != eof {
+		if stack.Head().Type == leftParenthesisType {
+			return nil, ErrInvalidParenthesis
+		}
+		tkns = append(tkns, stack.Pop())
+	}
+	return tkns, nil
+}
+
+type tokenStack struct {
+	ts []*token
+}
+
+func (ts *tokenStack) Push(t *token) {
+	ts.ts = append(ts.ts, t)
+}
+
+func (ts *tokenStack) Pop() *token {
+	if len(ts.ts) == 0 {
+		return &token{Type: eof}
+	}
+	var head *token
+	head, ts.ts = ts.ts[len(ts.ts)-1], ts.ts[:len(ts.ts)-1]
+	return head
+}
+
+func (ts *tokenStack) Head() *token {
+	if len(ts.ts) == 0 {
+		return &token{Type: eof}
+	}
+	return ts.ts[len(ts.ts)-1]
+}
+
+func isComma(ch byte) bool {
+	return ch == ','
+}
+
+func isDot(ch byte) bool {
+	return ch == '.'
+}
+
+func isNumber(ch byte) bool {
+	return ch >= '0' && ch <= '9'
+}
+
+func isAlpha(ch byte) bool {
+	return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch == '_'
+}
+
+func isLP(ch byte) bool {
+	return ch == '('
+}
+
+func isRP(ch byte) bool {
+	return ch == ')'
+}
author	Alexander Kiryukhin <a.kiryukhin@mail.ru>	2020-02-13 22:55:13 +0300
committer	Alexander Kiryukhin <a.kiryukhin@mail.ru>	2020-02-13 22:55:13 +0300
commit	24ca753ba3ab0f0d4fdc413c467bc304da06744f (patch)
tree	9cdf12a41950369e3784a42173f433a07fda8fd6 /tokenizer.go