aboutsummaryrefslogblamecommitdiff
path: root/src/NXP/Classes/Tokenizer.php
blob: caf395f1c511f3b333b4718ab5f2a1ff679c1ac1 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12











                                                                          























































                                                                
                                     
     
                                                     





























                                                                                        
                                                                       








































































                                                                                                  
                                                



                                        
                                               



                                                                                  
                                                        






                                                                             
                                             



                          
                                            



                          
                                            



                          
                                                      






                                                                              
                                               








                                                                
                                                        










































































                                                                                                                    
<?php
/**
 * This file is part of the MathExecutor package
 *
 * (c) Alexander Kiryukhin
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code
 */

namespace NXP\Classes;

use NXP\Exception\IncorrectBracketsException;
use NXP\Exception\UnknownOperatorException;
use RuntimeException;
use SplStack;

/**
 * @author Alexander Kiryukhin <a.kiryukhin@mail.ru>
 */
class Tokenizer
{
    /**
     * @var Token[]
     */
    public $tokens = [];
    /**
     * @var string
     */
    private $input = "";
    /**
     * @var string
     */
    private $numberBuffer = "";
    /**
     * @var string
     */
    private $stringBuffer = "";
    /**
     * @var bool
     */
    private $allowNegative = true;
    /**
     * @var Operator[]
     */
    private $operators = [];

    /**
     * @var bool
     */
    private $inSingleQuotedString = false;

    /**
     * @var bool
     */
    private $inDoubleQuotedString = false;

    /**
     * Tokenizer constructor.
     * @param string $input
     * @param Operator[] $operators
     */
    public function __construct(string $input, array $operators)
    {
        $this->input = $input;
        $this->operators = $operators;
    }

    public function tokenize() : self
    {
        foreach (str_split($this->input, 1) as $ch) {
            switch (true) {
                case $this->inSingleQuotedString:
                    if ($ch === "'") {
                        $this->tokens[] = new Token(Token::String, $this->stringBuffer);
                        $this->inSingleQuotedString = false;
                        $this->stringBuffer = "";
                        continue 2;
                    }
                    $this->stringBuffer .= $ch;
                    continue 2;
                case $this->inDoubleQuotedString:
                    if ($ch === "\"") {
                        $this->tokens[] = new Token(Token::String, $this->stringBuffer);
                        $this->inDoubleQuotedString = false;
                        $this->stringBuffer = "";
                        continue 2;
                    }
                    $this->stringBuffer .= $ch;
                    continue 2;
                case $ch == " " || $ch == "\n" || $ch == "\r" || $ch == "\t":
                    $this->tokens[] = new Token(Token::Space, "");
                    continue 2;
                case $this->isNumber($ch):
                    if ($this->stringBuffer != "") {
                        $this->stringBuffer .= $ch;
                        continue 2;
                    }
                    $this->numberBuffer .= $ch;
                    $this->allowNegative = false;
                    break;
                /** @noinspection PhpMissingBreakStatementInspection */
                case strtolower($ch) === "e":
                    if ($this->numberBuffer != "" && strpos($this->numberBuffer, ".") !== false) {
                        $this->numberBuffer .= "e";
                        $this->allowNegative = true;
                        break;
                    }
                case $this->isAlpha($ch):
                    if ($this->numberBuffer != "") {
                        $this->emptyNumberBufferAsLiteral();
                        $this->tokens[] = new Token(Token::Operator, "*");
                    }
                    $this->allowNegative = false;
                    $this->stringBuffer .= $ch;
                    break;
                case $ch == "\"":
                    $this->inDoubleQuotedString = true;
                    continue 2;
                case $ch == "'":
                    $this->inSingleQuotedString = true;
                    continue 2;

                case $this->isDot($ch):
                    $this->numberBuffer .= $ch;
                    $this->allowNegative = false;
                    break;
                case $this->isLP($ch):
                    if ($this->stringBuffer != "") {
                        $this->tokens[] = new Token(Token::Function, $this->stringBuffer);
                        $this->stringBuffer = "";
                    } elseif ($this->numberBuffer != "") {
                        $this->emptyNumberBufferAsLiteral();
                        $this->tokens[] = new Token(Token::Operator, "*");
                    }
                    $this->allowNegative = true;
                    $this->tokens[] = new Token(Token::LeftParenthesis, "");
                    break;
                case $this->isRP($ch):
                    $this->emptyNumberBufferAsLiteral();
                    $this->emptyStrBufferAsVariable();
                    $this->allowNegative = false;
                    $this->tokens[] = new Token(Token::RightParenthesis, "");
                    break;
                case $this->isComma($ch):
                    $this->emptyNumberBufferAsLiteral();
                    $this->emptyStrBufferAsVariable();
                    $this->allowNegative = true;
                    $this->tokens[] = new Token(Token::ParamSeparator, "");
                    break;
                default:
                    if ($this->allowNegative && $ch == "-") {
                        $this->allowNegative = false;
                        $this->numberBuffer .= "-";
                        continue 2;
                    }
                    $this->emptyNumberBufferAsLiteral();
                    $this->emptyStrBufferAsVariable();
                    if (count($this->tokens) > 0) {
                        if ($this->tokens[count($this->tokens) - 1]->type === Token::Operator) {
                            $this->tokens[count($this->tokens) - 1]->value .= $ch;
                        } else {
                            $this->tokens[] = new Token(Token::Operator, $ch);
                        }
                    } else {
                        $this->tokens[] = new Token(Token::Operator, $ch);
                    }
                    $this->allowNegative = true;
            }
        }
        $this->emptyNumberBufferAsLiteral();
        $this->emptyStrBufferAsVariable();
        return $this;
    }

    private function isNumber(string $ch) : bool
    {
        return $ch >= '0' && $ch <= '9';
    }

    private function isAlpha(string $ch) : bool
    {
        return $ch >= 'a' && $ch <= 'z' || $ch >= 'A' && $ch <= 'Z' || $ch == '_';
    }

    private function emptyNumberBufferAsLiteral() : void
    {
        if ($this->numberBuffer != "") {
            $this->tokens[] = new Token(Token::Literal, $this->numberBuffer);
            $this->numberBuffer = "";
        }
    }

    private function isDot(string $ch) : bool
    {
        return $ch == '.';
    }

    private function isLP(string $ch) : bool
    {
        return $ch == '(';
    }

    private function isRP(string $ch) : bool
    {
        return $ch == ')';
    }

    private function emptyStrBufferAsVariable() : void
    {
        if ($this->stringBuffer != "") {
            $this->tokens[] = new Token(Token::Variable, $this->stringBuffer);
            $this->stringBuffer = "";
        }
    }

    private function isComma(string $ch) : bool
    {
        return $ch == ',';
    }

    /**
     * @return Token[] Array of tokens in revers polish notation
     * @throws IncorrectBracketsException
     * @throws UnknownOperatorException
     */
    public function buildReversePolishNotation() : array
    {
        $tokens = [];
        /** @var SplStack<Token> $stack */
        $stack = new SplStack();
        foreach ($this->tokens as $token) {
            switch ($token->type) {
                case Token::Literal:
                case Token::Variable:
                case Token::String:
                    $tokens[] = $token;
                    break;
                case Token::Function:
                case Token::LeftParenthesis:
                    $stack->push($token);
                    break;
                case Token::ParamSeparator:
                    while ($stack->top()->type !== Token::LeftParenthesis) {
                        if ($stack->count() === 0) {
                            throw new IncorrectBracketsException();
                        }
                        $tokens[] = $stack->pop();
                    }
                    break;
                case Token::Operator:
                    if (!array_key_exists($token->value, $this->operators)) {
                        throw new UnknownOperatorException();
                    }
                    $op1 = $this->operators[$token->value];
                    while ($stack->count() > 0 && $stack->top()->type === Token::Operator) {
                        if (!array_key_exists($stack->top()->value, $this->operators)) {
                            throw new UnknownOperatorException();
                        }
                        $op2 = $this->operators[$stack->top()->value];
                        if ($op2->priority >= $op1->priority) {
                            $tokens[] = $stack->pop();
                            continue;
                        }
                        break;
                    }
                    $stack->push($token);
                    break;
                case Token::RightParenthesis:
                    while (true) {
                        try {
                            $ctoken = $stack->pop();
                            if ($ctoken->type === Token::LeftParenthesis) {
                                break;
                            }
                            $tokens[] = $ctoken;
                        } catch (RuntimeException $e) {
                            throw new IncorrectBracketsException();
                        }
                    }
                    if ($stack->count() > 0 && $stack->top()->type == Token::Function) {
                        $tokens[] = $stack->pop();
                    }
                    break;
                case Token::Space:
                    //do nothing
            }
        }
        while ($stack->count() !== 0) {
            if ($stack->top()->type === Token::LeftParenthesis || $stack->top()->type === Token::RightParenthesis) {
                throw new IncorrectBracketsException();
            }
            if ($stack->top()->type === Token::Space) {
                $stack->pop();
                continue;
            }
            $tokens[] = $stack->pop();
        }
        return $tokens;
    }
}