aboutsummaryrefslogblamecommitdiff
path: root/internal/lexer/lexer.go
blob: 342864d3b032e055471ebf4af217ae8e475265ec (plain) (tree)
1
2
3
4
5
6
7
8
9
10
             








                      
                   





                                                        
                                    


                











                           

 

                                     


                                                             
                                            




                                                                  

                                    


                              
                                            




                          
                                     






                                            
                                      



                                                     
                                        



                                

                                   








                                                  


                                                                      







                                                    
                                 









                                                             
                        



                                  
                          




                                                         
                                 





                                                                                    
                                           







                                                             
                                                                 













                                                                      
                                                                  








                                                                       
                                                







                                                                                
                                                     








                                                      
                                

                               
package lexer

import (
	"fmt"
	"strings"
	"unicode/utf8"
)

const eof rune = -1

type Lexem struct {
	Type  lexType // Type of Lexem.
	Value string  // Value of Lexem.
	Start int     // Start position at input string.
	End   int     // End position at input string.
}

//go:generate stringer -type=lexType
type lexType int

const (
	LEOF lexType = iota
	LError
	LObjectStart
	LObjectEnd
	LObjectKey
	LObjectValue
	LArrayStart
	LArrayEnd
	LString
	LNumber
	LBoolean
	LNull
)

// Lexer holds current scanner state.
type Lexer struct {
	Input  string     // Input string.
	Start  int        // Start position of current lexem.
	Pos    int        // Pos at input string.
	Output chan Lexem // Lexems channel.
	width  int        // Width of last rune.
	states stateStack // Stack of states to realize PrevState.
}

// newLexer returns new scanner for input string.
func NewLexer(input string) *Lexer {
	return &Lexer{
		Input:  input,
		Start:  0,
		Pos:    0,
		Output: make(chan Lexem, 2),
		width:  0,
	}
}

// Run lexing.
func (l *Lexer) Run(init stateFunc) {
	for state := init; state != nil; {
		state = state(l)
	}
	close(l.Output)
}

// PopState returns previous state function.
func (l *Lexer) PopState() stateFunc {
	return l.states.Pop()
}

// PushState pushes state before going deeper states.
func (l *Lexer) PushState(s stateFunc) {
	l.states.Push(s)
}

// Emit current lexem to output.
func (l *Lexer) Emit(typ lexType) {
	l.Output <- Lexem{
		Type:  typ,
		Value: l.Input[l.Start:l.Pos],
		Start: l.Start,
		End:   l.Pos,
	}
	l.Start = l.Pos
}

// Errorf produces error lexem and stops scanning.
func (l *Lexer) Errorf(format string, args ...interface{}) stateFunc {
	l.Output <- Lexem{
		Type:  LError,
		Value: fmt.Sprintf(format, args...),
		Start: l.Start,
		End:   l.Pos,
	}
	return nil
}

// Next rune from input.
func (l *Lexer) Next() (r rune) {
	if int(l.Pos) >= len(l.Input) {
		l.width = 0
		return eof
	}
	r, l.width = utf8.DecodeRuneInString(l.Input[l.Pos:])
	l.Pos += l.width
	return r
}

// Back move position to previos rune.
func (l *Lexer) Back() {
	l.Pos -= l.width
}

// Ignore previosly buffered text.
func (l *Lexer) Ignore() {
	l.Start = l.Pos
	l.width = 0
}

// Peek rune at current position without moving position.
func (l *Lexer) Peek() (r rune) {
	r = l.Next()
	l.Back()
	return r
}

// Accept any rune from valid string. Returns true if Next rune was in valid string.
func (l *Lexer) Accept(valid string) bool {
	if strings.ContainsRune(valid, l.Next()) {
		return true
	}
	l.Back()
	return false
}

// AcceptString returns true if given string was at position.
func (l *Lexer) AcceptString(s string, caseInsentive bool) bool {
	input := l.Input[l.Start:]
	if caseInsentive {
		input = strings.ToLower(input)
		s = strings.ToLower(s)
	}
	if strings.HasPrefix(input, s) {
		l.width = 0
		l.Pos += len(s)
		return true
	}
	return false
}

// AcceptAnyOf substrings. Retuns true if any of substrings was found.
func (l *Lexer) AcceptAnyOf(s []string, caseInsentive bool) bool {
	for _, substring := range s {
		if l.AcceptString(substring, caseInsentive) {
			return true
		}
	}
	return false
}

// AcceptWhile passing symbols from input while they at `valid` string.
func (l *Lexer) AcceptWhile(valid string) bool {
	isValid := false
	for l.Accept(valid) {
		isValid = true
	}
	return isValid
}

// AcceptWhileNot passing symbols from input while they NOT in `invalid` string.
func (l *Lexer) AcceptWhileNot(invalid string) bool {
	isValid := false
	for !strings.ContainsRune(invalid, l.Next()) {
		isValid = true
	}
	l.Back()
	return isValid
}

// AtStart returns true if current lexem not empty
func (l *Lexer) AtStart() bool {
	return l.Pos == l.Start
}