aboutsummaryrefslogblamecommitdiff
path: root/src/NXP/Classes/Tokenizer.php
blob: a74bff659df7deb66ae25491ef22136d93b694dd (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12











                                                                          









                                                    
                            
                              
 
                               
 





                                       
                               




                                               


                             







                                                                
                                     
     


                                                   

                                                 

















                                                                                            
 

                                   




                                                    
                                               
 
                               
 
                                                 






                                                        
 
                                   















                                                                                            

                                               
 
                               
 






                                                                            
                                                                             

                                                        
                                                                  
 
                               
 
                                          
                                                    
                                                   
 



                                                 
 
                          
                                                                       

                                                                                                      

                                                     
 

                              

                                             
                                         
                                                       
                                                            
                                                                          


                                                 
 
                          

                                
                                                       
 
                               

                                
                                                       
 




                                                 
 
                          
 
                                      
                                                    
                                                                                          
                                                 
                                                             
                                                            
                                                                          

                                                
                                                                            
 
                          
 
                                                      


                                                        
                                                                             
 
                          
 



                                                        
                                                                           
 
                          
 
                        
                                                        
                                                   

                                                         

                                                                                                       


                                                                                                           
                                                                                                                  
                                                       
 

                                       


                                                        




                                                                                                     


                                                                                  


                                                                              





                                                
 
                     


       
                                         
                                       
                                                                
       
                                                        



                                          



                                           
 





                                           
 



                                                                                   
                          
 
                                     







                                                                                   

                                            
 
                          
 
                                           

                                                                            



                                                                   
                                                                  
 
                          
 
                                     
                                                                               
                                                                          

                                                           


                                                                                            
                                                                                     

                                                                      
 

                                                               
 

                                     
 


                                         
 
                          
 



                                                    

                                                                           






                                                                   

                                                                                        





                                                              
                     
 
                          
 



                                  


                                                                                                                    

                                                       

                                                       
                              
 



                                      
 

                       













































                                                                                  
 
<?php
/**
 * This file is part of the MathExecutor package
 *
 * (c) Alexander Kiryukhin
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code
 */

namespace NXP\Classes;

use NXP\Exception\IncorrectBracketsException;
use NXP\Exception\UnknownOperatorException;
use RuntimeException;
use SplStack;

/**
 * @author Alexander Kiryukhin <a.kiryukhin@mail.ru>
 */
class Tokenizer
{
    /** @var array<Token> */
    public array $tokens = [];

    private string $input = '';

    private string $numberBuffer = '';

    private string $stringBuffer = '';

    private bool $allowNegative = true;

    /** @var array<Operator> */
    private array $operators = [];

    private bool $inSingleQuotedString = false;

    private bool $inDoubleQuotedString = false;

    /**
     * Tokenizer constructor.
     * @param Operator[] $operators
     */
    public function __construct(string $input, array $operators)
    {
        $this->input = $input;
        $this->operators = $operators;
    }

    public function tokenize() : self
    {
        $isLastCharEscape = false;

        foreach (\str_split($this->input) as $ch) {
            switch (true) {
                case $this->inSingleQuotedString:
                    if ('\\' === $ch) {
                        if ($isLastCharEscape) {
                            $this->stringBuffer .= '\\';
                            $isLastCharEscape = false;
                        } else {
                            $isLastCharEscape = true;
                        }

                        continue 2;
                    } elseif ("'" === $ch) {
                        if ($isLastCharEscape) {
                            $this->stringBuffer .= "'";
                            $isLastCharEscape = false;
                        } else {
                            $this->tokens[] = new Token(Token::String, $this->stringBuffer);
                            $this->inSingleQuotedString = false;
                            $this->stringBuffer = '';
                        }

                        continue 2;
                    }

                    if ($isLastCharEscape) {
                        $this->stringBuffer .= '\\';
                        $isLastCharEscape = false;
                    }
                    $this->stringBuffer .= $ch;

                    continue 2;

                case $this->inDoubleQuotedString:
                    if ('\\' === $ch) {
                        if ($isLastCharEscape) {
                            $this->stringBuffer .= '\\';
                            $isLastCharEscape = false;
                        } else {
                            $isLastCharEscape = true;
                        }

                        continue 2;
                    } elseif ('"' === $ch) {
                        if ($isLastCharEscape) {
                            $this->stringBuffer .= '"';
                            $isLastCharEscape = false;
                        } else {
                            $this->tokens[] = new Token(Token::String, $this->stringBuffer);
                            $this->inDoubleQuotedString = false;
                            $this->stringBuffer = '';
                        }

                        continue 2;
                    }

                    if ($isLastCharEscape) {
                        $this->stringBuffer .= '\\';
                        $isLastCharEscape = false;
                    }
                    $this->stringBuffer .= $ch;

                    continue 2;

                case '[' === $ch:
                    $this->tokens[] = new Token(Token::Function, 'array');
                    $this->allowNegative = true;
                    $this->tokens[] = new Token(Token::LeftParenthesis, '');

                    continue 2;

                case ' ' == $ch || "\n" == $ch || "\r" == $ch || "\t" == $ch:
                    $this->emptyNumberBufferAsLiteral();
                    $this->emptyStrBufferAsVariable();
                    $this->tokens[] = new Token(Token::Space, '');

                    continue 2;

                case $this->isNumber($ch):
                    if ('' != $this->stringBuffer) {
                        $this->stringBuffer .= $ch;

                        continue 2;
                    }
                    $this->numberBuffer .= $ch;
                    $this->allowNegative = false;

                    break;
                /** @noinspection PhpMissingBreakStatementInspection */
                case 'e' === \strtolower($ch):
                    if (\strlen($this->numberBuffer) && false !== \strpos($this->numberBuffer, '.')) {
                        $this->numberBuffer .= 'e';
                        $this->allowNegative = false;

                        break;
                    }
                // no break
                // Intentionally fall through
                case $this->isAlpha($ch):
                    if (\strlen($this->numberBuffer)) {
                        $this->emptyNumberBufferAsLiteral();
                        $this->tokens[] = new Token(Token::Operator, '*');
                    }
                    $this->allowNegative = false;
                    $this->stringBuffer .= $ch;

                    break;

                case '"' == $ch:
                    $this->inDoubleQuotedString = true;

                    continue 2;

                case "'" == $ch:
                    $this->inSingleQuotedString = true;

                    continue 2;

                case $this->isDot($ch):
                    $this->numberBuffer .= $ch;
                    $this->allowNegative = false;

                    break;

                case $this->isLP($ch):
                    if ('' != $this->stringBuffer) {
                        $this->tokens[] = new Token(Token::Function, $this->stringBuffer);
                        $this->stringBuffer = '';
                    } elseif (\strlen($this->numberBuffer)) {
                        $this->emptyNumberBufferAsLiteral();
                        $this->tokens[] = new Token(Token::Operator, '*');
                    }
                    $this->allowNegative = true;
                    $this->tokens[] = new Token(Token::LeftParenthesis, '');

                    break;

                case $this->isRP($ch) || ']' === $ch :
                    $this->emptyNumberBufferAsLiteral();
                    $this->emptyStrBufferAsVariable();
                    $this->allowNegative = false;
                    $this->tokens[] = new Token(Token::RightParenthesis, '');

                    break;

                case $this->isComma($ch):
                    $this->emptyNumberBufferAsLiteral();
                    $this->emptyStrBufferAsVariable();
                    $this->allowNegative = true;
                    $this->tokens[] = new Token(Token::ParamSeparator, '');

                    break;

                default:
                    // special case for unary operations
                    if ('-' == $ch || '+' == $ch) {
                        if ($this->allowNegative) {
                            $this->allowNegative = false;
                            $this->tokens[] = new Token(Token::Operator, '-' == $ch ? 'uNeg' : 'uPos');

                            continue 2;
                        }
                        // could be in exponent, in which case negative should be added to the numberBuffer
                        if ($this->numberBuffer && 'e' == $this->numberBuffer[\strlen($this->numberBuffer) - 1]) {
                            $this->numberBuffer .= $ch;

                            continue 2;
                        }
                    }
                    $this->emptyNumberBufferAsLiteral();
                    $this->emptyStrBufferAsVariable();

                    if ('$' != $ch) {
                        if (\count($this->tokens) > 0) {
                            if (Token::Operator === $this->tokens[\count($this->tokens) - 1]->type) {
                                $this->tokens[\count($this->tokens) - 1]->value .= $ch;
                            } else {
                                $this->tokens[] = new Token(Token::Operator, $ch);
                            }
                        } else {
                            $this->tokens[] = new Token(Token::Operator, $ch);
                        }
                    }
                    $this->allowNegative = true;
            }
        }
        $this->emptyNumberBufferAsLiteral();
        $this->emptyStrBufferAsVariable();

        return $this;
    }

    /**
     * @throws IncorrectBracketsException
     * @throws UnknownOperatorException
     * @return Token[] Array of tokens in revers polish notation
     */
    public function buildReversePolishNotation() : array
    {
        $tokens = [];
        /** @var SplStack<Token> $stack */
        $stack = new SplStack();
        /**
         * @var SplStack<int> $paramCounter
         */
        $paramCounter = new SplStack();

        foreach ($this->tokens as $token) {
            switch ($token->type) {
                case Token::Literal:
                case Token::Variable:
                case Token::String:
                    $tokens[] = $token;

                    if ($paramCounter->count() > 0 && 0 === $paramCounter->top()) {
                        $paramCounter->push($paramCounter->pop() + 1);
                    }

                    break;

                case Token::Function:
                    if ($paramCounter->count() > 0 && 0 === $paramCounter->top()) {
                        $paramCounter->push($paramCounter->pop() + 1);
                    }
                    $stack->push($token);
                    $paramCounter->push(0);

                    break;

                case Token::LeftParenthesis:
                    $stack->push($token);

                    break;

                case Token::ParamSeparator:
                    while (Token::LeftParenthesis !== $stack->top()->type) {
                        if (0 === $stack->count()) {
                            throw new IncorrectBracketsException();
                        }
                        $tokens[] = $stack->pop();
                    }
                    $paramCounter->push($paramCounter->pop() + 1);

                    break;

                case Token::Operator:
                    if (! \array_key_exists($token->value, $this->operators)) {
                        throw new UnknownOperatorException($token->value);
                    }
                    $op1 = $this->operators[$token->value];

                    while ($stack->count() > 0 && Token::Operator === $stack->top()->type) {
                        if (! \array_key_exists($stack->top()->value, $this->operators)) {
                            throw new UnknownOperatorException($stack->top()->value);
                        }
                        $op2 = $this->operators[$stack->top()->value];

                        if ($op2->priority >= $op1->priority) {
                            $tokens[] = $stack->pop();

                            continue;
                        }

                        break;
                    }
                    $stack->push($token);

                    break;

                case Token::RightParenthesis:
                    while (true) {
                        try {
                            $ctoken = $stack->pop();

                            if (Token::LeftParenthesis === $ctoken->type) {
                                break;
                            }
                            $tokens[] = $ctoken;
                        } catch (RuntimeException $e) {
                            throw new IncorrectBracketsException();
                        }
                    }

                    if ($stack->count() > 0 && Token::Function == $stack->top()->type) {
                        /**
                         * @var Token $f
                         */
                        $f = $stack->pop();
                        $f->paramCount = $paramCounter->pop();
                        $tokens[] = $f;
                    }

                    break;

                case Token::Space:
                    //do nothing
            }
        }

        while (0 !== $stack->count()) {
            if (Token::LeftParenthesis === $stack->top()->type || Token::RightParenthesis === $stack->top()->type) {
                throw new IncorrectBracketsException();
            }

            if (Token::Space === $stack->top()->type) {
                $stack->pop();

                continue;
            }
            $tokens[] = $stack->pop();
        }

        return $tokens;
    }

    private function isNumber(string $ch) : bool
    {
        return $ch >= '0' && $ch <= '9';
    }

    private function isAlpha(string $ch) : bool
    {
        return $ch >= 'a' && $ch <= 'z' || $ch >= 'A' && $ch <= 'Z' || '_' == $ch;
    }

    private function emptyNumberBufferAsLiteral() : void
    {
        if (\strlen($this->numberBuffer)) {
            $this->tokens[] = new Token(Token::Literal, $this->numberBuffer);
            $this->numberBuffer = '';
        }
    }

    private function isDot(string $ch) : bool
    {
        return '.' == $ch;
    }

    private function isLP(string $ch) : bool
    {
        return '(' == $ch;
    }

    private function isRP(string $ch) : bool
    {
        return ')' == $ch;
    }

    private function emptyStrBufferAsVariable() : void
    {
        if ('' != $this->stringBuffer) {
            $this->tokens[] = new Token(Token::Variable, $this->stringBuffer);
            $this->stringBuffer = '';
        }
    }

    private function isComma(string $ch) : bool
    {
        return ',' == $ch;
    }
}