aboutsummaryrefslogblamecommitdiff
path: root/src/NXP/Classes/Tokenizer.php
blob: f8056c3ab00397d438b40536a5a1149111695e3d (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12











                                                                          
















                                                    
                        


                  
                               


                  
                               





























                                                                
                                     
     
                                                     




                                                                                        
                                                 




                                                 
                                      

                                                                                        
                                                 



                                               

                                                                             

                                          
                                                    





                                                   
                                                                       



                                                                                                    

                              
                               
                                         
                                                      
                                                            
                                                                          



                                                 
                                










                                                       
                                                    
                                                                                          

                                                            
                                                            
                                                                          

                                                
                                                                            




                                                        
                                                                             




                                                        
                                                                           

                          

                                                        

                                                         
                                                                                                       






                                                                                                                 


                                                        






                                                                                                    


                                                                              








                                                
                                                



                                        
                                               



                                                                                  
                                                        
     
                                          
                                                                             
                                     


         
                                             



                          
                                            



                          
                                            



                          
                                                      
     
                                        
                                                                              
                                     


         
                                               








                                                                
                                                        
























                                                                             
                                                                          



                                                                                            
                                                                                     










































                                                                                                                    
<?php
/**
 * This file is part of the MathExecutor package
 *
 * (c) Alexander Kiryukhin
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code
 */

namespace NXP\Classes;

use NXP\Exception\IncorrectBracketsException;
use NXP\Exception\UnknownOperatorException;
use RuntimeException;
use SplStack;

/**
 * @author Alexander Kiryukhin <a.kiryukhin@mail.ru>
 */
class Tokenizer
{
    /**
     * @var Token[]
     */
    public $tokens = [];
    /**
     * @var string
     */
    private $input = '';
    /**
     * @var string
     */
    private $numberBuffer = '';
    /**
     * @var string
     */
    private $stringBuffer = '';
    /**
     * @var bool
     */
    private $allowNegative = true;
    /**
     * @var Operator[]
     */
    private $operators = [];

    /**
     * @var bool
     */
    private $inSingleQuotedString = false;

    /**
     * @var bool
     */
    private $inDoubleQuotedString = false;

    /**
     * Tokenizer constructor.
     * @param string $input
     * @param Operator[] $operators
     */
    public function __construct(string $input, array $operators)
    {
        $this->input = $input;
        $this->operators = $operators;
    }

    public function tokenize() : self
    {
        foreach (str_split($this->input, 1) as $ch) {
            switch (true) {
                case $this->inSingleQuotedString:
                    if ($ch === "'") {
                        $this->tokens[] = new Token(Token::String, $this->stringBuffer);
                        $this->inSingleQuotedString = false;
                        $this->stringBuffer = '';
                        continue 2;
                    }
                    $this->stringBuffer .= $ch;
                    continue 2;
                case $this->inDoubleQuotedString:
                    if ($ch === '"') {
                        $this->tokens[] = new Token(Token::String, $this->stringBuffer);
                        $this->inDoubleQuotedString = false;
                        $this->stringBuffer = '';
                        continue 2;
                    }
                    $this->stringBuffer .= $ch;
                    continue 2;
                case $ch == ' ' || $ch == "\n" || $ch == "\r" || $ch == "\t":
                    $this->tokens[] = new Token(Token::Space, '');
                    continue 2;
                case $this->isNumber($ch):
                    if ($this->stringBuffer != '') {
                        $this->stringBuffer .= $ch;
                        continue 2;
                    }
                    $this->numberBuffer .= $ch;
                    $this->allowNegative = false;
                    break;
                /** @noinspection PhpMissingBreakStatementInspection */
                case strtolower($ch) === 'e':
                    if (strlen($this->numberBuffer) && strpos($this->numberBuffer, '.') !== false) {
                        $this->numberBuffer .= 'e';
                        $this->allowNegative = false;
                        break;
                    }
                    // no break
                case $this->isAlpha($ch):
                    if (strlen($this->numberBuffer)) {
                        $this->emptyNumberBufferAsLiteral();
                        $this->tokens[] = new Token(Token::Operator, '*');
                    }
                    $this->allowNegative = false;
                    $this->stringBuffer .= $ch;
                    break;
                case $ch == '"':
                    $this->inDoubleQuotedString = true;
                    continue 2;
                case $ch == "'":
                    $this->inSingleQuotedString = true;
                    continue 2;

                case $this->isDot($ch):
                    $this->numberBuffer .= $ch;
                    $this->allowNegative = false;
                    break;
                case $this->isLP($ch):
                    if ($this->stringBuffer != '') {
                        $this->tokens[] = new Token(Token::Function, $this->stringBuffer);
                        $this->stringBuffer = '';
                    } elseif (strlen($this->numberBuffer)) {
                        $this->emptyNumberBufferAsLiteral();
                        $this->tokens[] = new Token(Token::Operator, '*');
                    }
                    $this->allowNegative = true;
                    $this->tokens[] = new Token(Token::LeftParenthesis, '');
                    break;
                case $this->isRP($ch):
                    $this->emptyNumberBufferAsLiteral();
                    $this->emptyStrBufferAsVariable();
                    $this->allowNegative = false;
                    $this->tokens[] = new Token(Token::RightParenthesis, '');
                    break;
                case $this->isComma($ch):
                    $this->emptyNumberBufferAsLiteral();
                    $this->emptyStrBufferAsVariable();
                    $this->allowNegative = true;
                    $this->tokens[] = new Token(Token::ParamSeparator, '');
                    break;
                default:
                    // special case for unary operations
                    if ($ch == '-' || $ch == '+') {
                        if ($this->allowNegative) {
                            $this->allowNegative = false;
                            $this->tokens[] = new Token(Token::Operator, $ch == '-' ? 'uNeg' : 'uPos');
                            continue 2;
                        }
                        // could be in exponent, in which case negative should be added to the numberBuffer
                        if ($this->numberBuffer && $this->numberBuffer[strlen($this->numberBuffer) - 1] == 'e') {
                            $this->numberBuffer .= '-';
                            continue 2;
                        }
                    }
                    $this->emptyNumberBufferAsLiteral();
                    $this->emptyStrBufferAsVariable();
                    if ($ch != '$') {
                        if (count($this->tokens) > 0) {
                            if ($this->tokens[count($this->tokens) - 1]->type === Token::Operator) {
                                $this->tokens[count($this->tokens) - 1]->value .= $ch;
                            } else {
                                $this->tokens[] = new Token(Token::Operator, $ch);
                            }
                        } else {
                            $this->tokens[] = new Token(Token::Operator, $ch);
                        }
                    }
                    $this->allowNegative = true;
            }
        }
        $this->emptyNumberBufferAsLiteral();
        $this->emptyStrBufferAsVariable();
        return $this;
    }

    private function isNumber(string $ch) : bool
    {
        return $ch >= '0' && $ch <= '9';
    }

    private function isAlpha(string $ch) : bool
    {
        return $ch >= 'a' && $ch <= 'z' || $ch >= 'A' && $ch <= 'Z' || $ch == '_';
    }

    private function emptyNumberBufferAsLiteral() : void
    {
        if (strlen($this->numberBuffer)) {
            $this->tokens[] = new Token(Token::Literal, $this->numberBuffer);
            $this->numberBuffer = '';
        }
    }

    private function isDot(string $ch) : bool
    {
        return $ch == '.';
    }

    private function isLP(string $ch) : bool
    {
        return $ch == '(';
    }

    private function isRP(string $ch) : bool
    {
        return $ch == ')';
    }

    private function emptyStrBufferAsVariable() : void
    {
        if ($this->stringBuffer != '') {
            $this->tokens[] = new Token(Token::Variable, $this->stringBuffer);
            $this->stringBuffer = '';
        }
    }

    private function isComma(string $ch) : bool
    {
        return $ch == ',';
    }

    /**
     * @return Token[] Array of tokens in revers polish notation
     * @throws IncorrectBracketsException
     * @throws UnknownOperatorException
     */
    public function buildReversePolishNotation() : array
    {
        $tokens = [];
        /** @var SplStack<Token> $stack */
        $stack = new SplStack();
        foreach ($this->tokens as $token) {
            switch ($token->type) {
                case Token::Literal:
                case Token::Variable:
                case Token::String:
                    $tokens[] = $token;
                    break;
                case Token::Function:
                case Token::LeftParenthesis:
                    $stack->push($token);
                    break;
                case Token::ParamSeparator:
                    while ($stack->top()->type !== Token::LeftParenthesis) {
                        if ($stack->count() === 0) {
                            throw new IncorrectBracketsException();
                        }
                        $tokens[] = $stack->pop();
                    }
                    break;
                case Token::Operator:
                    if (!array_key_exists($token->value, $this->operators)) {
                        throw new UnknownOperatorException($token->value);
                    }
                    $op1 = $this->operators[$token->value];
                    while ($stack->count() > 0 && $stack->top()->type === Token::Operator) {
                        if (!array_key_exists($stack->top()->value, $this->operators)) {
                            throw new UnknownOperatorException($stack->top()->value);
                        }
                        $op2 = $this->operators[$stack->top()->value];
                        if ($op2->priority >= $op1->priority) {
                            $tokens[] = $stack->pop();
                            continue;
                        }
                        break;
                    }
                    $stack->push($token);
                    break;
                case Token::RightParenthesis:
                    while (true) {
                        try {
                            $ctoken = $stack->pop();
                            if ($ctoken->type === Token::LeftParenthesis) {
                                break;
                            }
                            $tokens[] = $ctoken;
                        } catch (RuntimeException $e) {
                            throw new IncorrectBracketsException();
                        }
                    }
                    if ($stack->count() > 0 && $stack->top()->type == Token::Function) {
                        $tokens[] = $stack->pop();
                    }
                    break;
                case Token::Space:
                    //do nothing
            }
        }
        while ($stack->count() !== 0) {
            if ($stack->top()->type === Token::LeftParenthesis || $stack->top()->type === Token::RightParenthesis) {
                throw new IncorrectBracketsException();
            }
            if ($stack->top()->type === Token::Space) {
                $stack->pop();
                continue;
            }
            $tokens[] = $stack->pop();
        }
        return $tokens;
    }
}