aboutsummaryrefslogblamecommitdiff
path: root/telegram/formatter/formatter.go
blob: d6541cba78676b9b3060a41f95a7adbfc2a52698 (plain) (tree)
1
2
3
4
5
6
7
8



                 
                 

                                        
                                            











                                                     

                                    
                             

                                      


                                  
 
                                                                                  










































                                                                                                     















































                                                                                                              















































                                                                                       













                                                                                              
                                                                                 

                                                                     

                                                                                     
                                           
                                                                 

                                                                     




                                                                                             
                                                                                      




                       

                                                                          



                                                




                                                                               

                                                                                   
                                           
                                                                 

                                                                     

                                                                         










                                                                                             








                                                                         
                                                                                                                  
 




                                                              
                                               


















































                                                                                    
package formatter

import (
	"sort"
	"unicode"

	log "github.com/sirupsen/logrus"
	"github.com/zelenin/go-tdlib/client"
)

// Insertion is a piece of text in given position
type Insertion struct {
	Offset int32
	Runes  []rune
}

// InsertionStack contains the sequence of insertions
// from the start or from the end
type InsertionStack []*Insertion

var boldRunesMarkdown = []rune("**")
var boldRunesXEP0393 = []rune("*")
var italicRunes = []rune("_")
var strikeRunesMarkdown = []rune("~~")
var strikeRunesXEP0393 = []rune("~")
var codeRunes = []rune("`")
var preRuneStart = []rune("```\n")
var preRuneEnd = []rune("\n```")

// rebalance pumps all the values until the given offset to current stack (growing
// from start) from given stack (growing from end); should be called
// before any insertions to the current stack at the given offset
func (s InsertionStack) rebalance(s2 InsertionStack, offset int32) (InsertionStack, InsertionStack) {
	for len(s2) > 0 && s2[len(s2)-1].Offset <= offset {
		s = append(s, s2[len(s2)-1])
		s2 = s2[:len(s2)-1]
	}

	return s, s2
}

// NewIterator is a second order function that sequentially scans and returns
// stack elements; starts returning nil when elements are ended
func (s InsertionStack) NewIterator() func() *Insertion {
	i := -1

	return func() *Insertion {
		i++
		if i < len(s) {
			return s[i]
		}
		return nil
	}
}

// SortEntities arranges the entities in traversal-ready order
func SortEntities(entities []*client.TextEntity) []*client.TextEntity {
	sortedEntities := make([]*client.TextEntity, len(entities))
	copy(sortedEntities, entities)

	sort.Slice(sortedEntities, func(i int, j int) bool {
		entity1 := entities[i]
		entity2 := entities[j]
		if entity1.Offset < entity2.Offset {
			return true
		} else if entity1.Offset == entity2.Offset {
			return entity1.Length > entity2.Length
		}
		return false
	})
	return sortedEntities
}

// MergeAdjacentEntities merges entities of a same kind
func MergeAdjacentEntities(entities []*client.TextEntity) []*client.TextEntity {
	mergedEntities := make([]*client.TextEntity, 0, len(entities))
	excludedIndices := make(map[int]bool)

	for i, entity := range entities {
		if excludedIndices[i] {
			continue
		}

		typ := entity.Type.TextEntityTypeType()
		start := entity.Offset
		end := start + entity.Length
		ei := make(map[int]bool)

		// collect continuations
		for j, entity2 := range entities[i+1:] {
			if entity2.Type.TextEntityTypeType() == typ && entity2.Offset == end {
				end += entity2.Length
				ei[j+i+1] = true
			}
		}

		// check for intersections with other entities
		var isIntersecting bool
		if len(ei) > 0 {
			for _, entity2 := range entities {
				entity2End := entity2.Offset + entity2.Length
				if (entity2.Offset < start && entity2End > start && entity2End < end) ||
					(entity2.Offset > start && entity2.Offset < end && entity2End > end) {
					isIntersecting = true
					break
				}
			}
		}

		if !isIntersecting {
			entity.Length = end - start
			for j := range ei {
				excludedIndices[j] = true
			}
		}
		mergedEntities = append(mergedEntities, entity)
	}

	return mergedEntities
}

// ClaspDirectives to the following span as required by XEP-0393
func ClaspDirectives(text string, entities []*client.TextEntity) []*client.TextEntity {
	alignedEntities := make([]*client.TextEntity, len(entities))
	copy(alignedEntities, entities)

	// transform the source text into a form with uniform runes and code points,
	// by duplicating the Basic Multilingual Plane
	doubledRunes := make([]rune, 0, len(text)*2)

	for _, cp := range text {
		if cp > 0x0000ffff {
			doubledRunes = append(doubledRunes, cp, cp)
		} else {
			doubledRunes = append(doubledRunes, cp)
		}
	}
	for i, entity := range alignedEntities {
		var dirty bool
		endOffset := entity.Offset + entity.Length

		if unicode.IsSpace(doubledRunes[entity.Offset]) {
			for j, r := range doubledRunes[entity.Offset+1:endOffset] {
				if !unicode.IsSpace(r) {
					dirty = true
					entity.Offset += int32(j+1)
					entity.Length -= int32(j+1)
					break
				}
			}
		}
		if unicode.IsSpace(doubledRunes[endOffset-1]) {
			for j := endOffset-2; j >= entity.Offset; j-- {
				if !unicode.IsSpace(doubledRunes[j]) {
					dirty = true
					entity.Length = j+1-entity.Offset
					break
				}
			}
		}

		if dirty {
			alignedEntities[i] = entity
		}
	}

	return alignedEntities
}

func markupBraces(entity *client.TextEntity, lbrace, rbrace []rune) (*Insertion, *Insertion) {
	return &Insertion{
			Offset: entity.Offset,
			Runes:  lbrace,
		}, &Insertion{
			Offset: entity.Offset + entity.Length,
			Runes:  rbrace,
		}
}

// EntityToMarkdown generates the wrapping Markdown tags
func EntityToMarkdown(entity *client.TextEntity) (*Insertion, *Insertion) {
	switch entity.Type.TextEntityTypeType() {
	case client.TypeTextEntityTypeBold:
		return markupBraces(entity, boldRunesMarkdown, boldRunesMarkdown)
	case client.TypeTextEntityTypeItalic:
		return markupBraces(entity, italicRunes, italicRunes)
	case client.TypeTextEntityTypeStrikethrough:
		return markupBraces(entity, strikeRunesMarkdown, strikeRunesMarkdown)
	case client.TypeTextEntityTypeCode:
		return markupBraces(entity, codeRunes, codeRunes)
	case client.TypeTextEntityTypePre:
		return markupBraces(entity, preRuneStart, preRuneEnd)
	case client.TypeTextEntityTypePreCode:
		preCode, _ := entity.Type.(*client.TextEntityTypePreCode)
		return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes)
	case client.TypeTextEntityTypeTextUrl:
		textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl)
		return markupBraces(entity, []rune("["), []rune("]("+textURL.Url+")"))
	}

	return nil, nil
}

// EntityToXEP0393 generates the wrapping XEP-0393 tags
func EntityToXEP0393(entity *client.TextEntity) (*Insertion, *Insertion) {
	if entity == nil || entity.Type == nil {
		return nil, nil
	}

	switch entity.Type.TextEntityTypeType() {
	case client.TypeTextEntityTypeBold:
		return markupBraces(entity, boldRunesXEP0393, boldRunesXEP0393)
	case client.TypeTextEntityTypeItalic:
		return markupBraces(entity, italicRunes, italicRunes)
	case client.TypeTextEntityTypeStrikethrough:
		return markupBraces(entity, strikeRunesXEP0393, strikeRunesXEP0393)
	case client.TypeTextEntityTypeCode:
		return markupBraces(entity, codeRunes, codeRunes)
	case client.TypeTextEntityTypePre:
		return markupBraces(entity, preRuneStart, preRuneEnd)
	case client.TypeTextEntityTypePreCode:
		preCode, _ := entity.Type.(*client.TextEntityTypePreCode)
		return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes)
	case client.TypeTextEntityTypeTextUrl:
		textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl)
		// non-standard, Pidgin-specific
		return markupBraces(entity, []rune{}, []rune(" <"+textURL.Url+">"))
	}

	return nil, nil
}

// Format traverses an already sorted list of entities and wraps the text in a markup
func Format(
	sourceText string,
	entities []*client.TextEntity,
	entityToMarkup func(*client.TextEntity) (*Insertion, *Insertion),
) string {
	if len(entities) == 0 {
		return sourceText
	}

	mergedEntities := SortEntities(ClaspDirectives(sourceText, MergeAdjacentEntities(SortEntities(entities))))

	startStack := make(InsertionStack, 0, len(sourceText))
	endStack := make(InsertionStack, 0, len(sourceText))

	// convert entities to a stack of brackets
	var maxEndOffset int32
	for _, entity := range mergedEntities {
		log.Debugf("%#v", entity)
		if entity.Length <= 0 {
			continue
		}

		endOffset := entity.Offset + entity.Length
		if endOffset > maxEndOffset {
			maxEndOffset = endOffset
		}

		startStack, endStack = startStack.rebalance(endStack, entity.Offset)

		startInsertion, endInsertion := entityToMarkup(entity)
		if startInsertion != nil {
			startStack = append(startStack, startInsertion)
		}
		if endInsertion != nil {
			endStack = append(endStack, endInsertion)
		}
	}
	// flush the closing brackets that still remain in endStack
	startStack, endStack = startStack.rebalance(endStack, maxEndOffset)

	// merge brackets into text
	markupRunes := make([]rune, 0, len(sourceText))

	nextInsertion := startStack.NewIterator()
	insertion := nextInsertion()
	var runeI int32

	for _, cp := range sourceText {
		for insertion != nil && insertion.Offset <= runeI {
			markupRunes = append(markupRunes, insertion.Runes...)
			insertion = nextInsertion()
		}

		markupRunes = append(markupRunes, cp)
		// skip two UTF-16 code units (not points actually!) if needed
		if cp > 0x0000ffff {
			runeI += 2
		} else {
			runeI++
		}
	}
	for insertion != nil {
		markupRunes = append(markupRunes, insertion.Runes...)
		insertion = nextInsertion()
	}

	return string(markupRunes)
}