aboutsummaryrefslogtreecommitdiff
path: root/src/NXP/Classes/Tokenizer.php
diff options
context:
space:
mode:
authorAlexander Kiryukhin <a.kiryukhin@mail.ru>2020-05-20 06:04:31 +0300
committerGitHub <noreply@github.com>2020-05-20 06:04:31 +0300
commit949334d6c37d384800c639ba9941e94f5157f5ac (patch)
treeecfbcc7e3033ef1f11c25baca1c9d32f496d8ee1 /src/NXP/Classes/Tokenizer.php
parent01415abc9d7f7401d9f4c09fbbec24930c65a097 (diff)
WIP: New generation (#62)
* Massive refactoring More clean structure Parsing without regular expressions * Cleanup unused imports * Fix version string for Travis * 7.1 downgrade * Fix readme
Diffstat (limited to 'src/NXP/Classes/Tokenizer.php')
-rw-r--r--src/NXP/Classes/Tokenizer.php302
1 files changed, 302 insertions, 0 deletions
diff --git a/src/NXP/Classes/Tokenizer.php b/src/NXP/Classes/Tokenizer.php
new file mode 100644
index 0000000..b72b869
--- /dev/null
+++ b/src/NXP/Classes/Tokenizer.php
@@ -0,0 +1,302 @@
+<?php
+/**
+ * This file is part of the MathExecutor package
+ *
+ * (c) Alexander Kiryukhin
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code
+ */
+
+namespace NXP\Classes;
+
+use NXP\Exception\IncorrectBracketsException;
+use NXP\Exception\UnknownOperatorException;
+use RuntimeException;
+use SplStack;
+
+/**
+ * @author Alexander Kiryukhin <a.kiryukhin@mail.ru>
+ */
+class Tokenizer
+{
+ /**
+ * @var Token[]
+ */
+ public $tokens = [];
+ /**
+ * @var string
+ */
+ private $input = "";
+ /**
+ * @var string
+ */
+ private $numberBuffer = "";
+ /**
+ * @var string
+ */
+ private $stringBuffer = "";
+ /**
+ * @var bool
+ */
+ private $allowNegative = true;
+ /**
+ * @var Operator[]
+ */
+ private $operators = [];
+
+ /**
+ * @var bool
+ */
+ private $inSingleQuotedString = false;
+
+ /**
+ * @var bool
+ */
+ private $inDoubleQuotedString = false;
+
+ /**
+ * Tokenizer constructor.
+ * @param string $input
+ * @param Operator[] $operators
+ */
+ public function __construct(string $input, array $operators)
+ {
+ $this->input = $input;
+ $this->operators = $operators;
+ }
+
+ public function tokenize()
+ {
+ foreach (str_split($this->input, 1) as $ch) {
+ switch (true) {
+ case $this->inSingleQuotedString:
+ if ($ch === "'") {
+ $this->tokens[] = new Token(Token::String, $this->stringBuffer);
+ $this->inSingleQuotedString = false;
+ $this->stringBuffer = "";
+ continue 2;
+ }
+ $this->stringBuffer .= $ch;
+ continue 2;
+ case $this->inDoubleQuotedString:
+ if ($ch === "\"") {
+ $this->tokens[] = new Token(Token::String, $this->stringBuffer);
+ $this->inDoubleQuotedString = false;
+ $this->stringBuffer = "";
+ continue 2;
+ }
+ $this->stringBuffer .= $ch;
+ continue 2;
+ case $ch == " " || $ch == "\n" || $ch == "\r" || $ch == "\t":
+ $this->tokens[] = new Token(Token::Space, "");
+ continue 2;
+ case $this->isNumber($ch):
+ if ($this->stringBuffer != "") {
+ $this->stringBuffer .= $ch;
+ continue 2;
+ }
+ $this->numberBuffer .= $ch;
+ $this->allowNegative = false;
+ break;
+ /** @noinspection PhpMissingBreakStatementInspection */
+ case strtolower($ch) === "e":
+ if ($this->numberBuffer != "" && strpos($this->numberBuffer, ".") !== false) {
+ $this->numberBuffer .= "e";
+ $this->allowNegative = true;
+ break;
+ }
+ case $this->isAlpha($ch):
+ if ($this->numberBuffer != "") {
+ $this->emptyNumberBufferAsLiteral();
+ $this->tokens[] = new Token(Token::Operator, "*");
+ }
+ $this->allowNegative = false;
+ $this->stringBuffer .= $ch;
+ break;
+ case $ch == "\"":
+ $this->inDoubleQuotedString = true;
+ continue 2;
+ case $ch == "'":
+ $this->inSingleQuotedString = true;
+ continue 2;
+
+ case $this->isDot($ch):
+ $this->numberBuffer .= $ch;
+ $this->allowNegative = false;
+ break;
+ case $this->isLP($ch):
+ if ($this->stringBuffer != "") {
+ $this->tokens[] = new Token(Token::Function, $this->stringBuffer);
+ $this->stringBuffer = "";
+ } elseif ($this->numberBuffer != "") {
+ $this->emptyNumberBufferAsLiteral();
+ $this->tokens[] = new Token(Token::Operator, "*");
+ }
+ $this->allowNegative = true;
+ $this->tokens[] = new Token(Token::LeftParenthesis, "");
+ break;
+ case $this->isRP($ch):
+ $this->emptyNumberBufferAsLiteral();
+ $this->emptyStrBufferAsVariable();
+ $this->allowNegative = false;
+ $this->tokens[] = new Token(Token::RightParenthesis, "");
+ break;
+ case $this->isComma($ch):
+ $this->emptyNumberBufferAsLiteral();
+ $this->emptyStrBufferAsVariable();
+ $this->allowNegative = true;
+ $this->tokens[] = new Token(Token::ParamSeparator, "");
+ break;
+ default:
+ if ($this->allowNegative && $ch == "-") {
+ $this->allowNegative = false;
+ $this->numberBuffer .= "-";
+ continue 2;
+ }
+ $this->emptyNumberBufferAsLiteral();
+ $this->emptyStrBufferAsVariable();
+ if (count($this->tokens) > 0) {
+ if ($this->tokens[count($this->tokens) - 1]->type === Token::Operator) {
+ $this->tokens[count($this->tokens) - 1]->value .= $ch;
+ } else {
+ $this->tokens[] = new Token(Token::Operator, $ch);
+ }
+ } else {
+ $this->tokens[] = new Token(Token::Operator, $ch);
+ }
+ $this->allowNegative = true;
+ }
+ }
+ $this->emptyNumberBufferAsLiteral();
+ $this->emptyStrBufferAsVariable();
+ return $this;
+ }
+
+ private function isNumber($ch)
+ {
+ return $ch >= '0' && $ch <= '9';
+ }
+
+ private function isAlpha($ch)
+ {
+ return $ch >= 'a' && $ch <= 'z' || $ch >= 'A' && $ch <= 'Z' || $ch == '_';
+ }
+
+ private function emptyNumberBufferAsLiteral()
+ {
+ if ($this->numberBuffer != "") {
+ $this->tokens[] = new Token(Token::Literal, $this->numberBuffer);
+ $this->numberBuffer = "";
+ }
+ }
+
+ private function isDot($ch)
+ {
+ return $ch == '.';
+ }
+
+ private function isLP($ch)
+ {
+ return $ch == '(';
+ }
+
+ private function isRP($ch)
+ {
+ return $ch == ')';
+ }
+
+ private function emptyStrBufferAsVariable()
+ {
+ if ($this->stringBuffer != "") {
+ $this->tokens[] = new Token(Token::Variable, $this->stringBuffer);
+ $this->stringBuffer = "";
+ }
+ }
+
+ private function isComma($ch)
+ {
+ return $ch == ',';
+ }
+
+ /**
+ * @return Token[] Array of tokens in revers polish notation
+ * @throws IncorrectBracketsException
+ * @throws UnknownOperatorException
+ */
+ public function buildReversePolishNotation()
+ {
+ $tokens = [];
+ /** @var SplStack<Token> $stack */
+ $stack = new SplStack();
+ foreach ($this->tokens as $token) {
+ switch ($token->type) {
+ case Token::Literal:
+ case Token::Variable:
+ case Token::String:
+ $tokens[] = $token;
+ break;
+ case Token::Function:
+ case Token::LeftParenthesis:
+ $stack->push($token);
+ break;
+ case Token::ParamSeparator:
+ while ($stack->top()->type !== Token::LeftParenthesis) {
+ if ($stack->count() === 0) {
+ throw new IncorrectBracketsException();
+ }
+ $tokens[] = $stack->pop();
+ }
+ break;
+ case Token::Operator:
+ if (!array_key_exists($token->value, $this->operators)) {
+ throw new UnknownOperatorException();
+ }
+ $op1 = $this->operators[$token->value];
+ while ($stack->count() > 0 && $stack->top()->type === Token::Operator) {
+ if (!array_key_exists($stack->top()->value, $this->operators)) {
+ throw new UnknownOperatorException();
+ }
+ $op2 = $this->operators[$stack->top()->value];
+ if ($op2->priority >= $op1->priority) {
+ $tokens[] = $stack->pop();
+ continue;
+ }
+ break;
+ }
+ $stack->push($token);
+ break;
+ case Token::RightParenthesis:
+ while (true) {
+ try {
+ $ctoken = $stack->pop();
+ if ($ctoken->type === Token::LeftParenthesis) {
+ break;
+ }
+ $tokens[] = $ctoken;
+ } catch (RuntimeException $e) {
+ throw new IncorrectBracketsException();
+ }
+ }
+ if ($stack->count() > 0 && $stack->top()->type == Token::Function) {
+ $tokens[] = $stack->pop();
+ }
+ break;
+ case Token::Space:
+ //do nothing
+ }
+ }
+ while ($stack->count() !== 0) {
+ if ($stack->top()->type === Token::LeftParenthesis || $stack->top()->type === Token::RightParenthesis) {
+ throw new IncorrectBracketsException();
+ }
+ if ($stack->top()->type === Token::Space) {
+ $stack->pop();
+ continue;
+ }
+ $tokens[] = $stack->pop();
+ }
+ return $tokens;
+ }
+}
+