diff options
author | bodqhrohro <bodqhrohro@gmail.com> | 2020-01-10 00:16:40 +0300 |
---|---|---|
committer | bodqhrohro <bodqhrohro@gmail.com> | 2020-01-10 00:16:40 +0300 |
commit | 70383bee128b11451211e514200b23f47bf272db (patch) | |
tree | bfc56f26264e206e66e4a6c150f70cf8625e8d66 /telegram/formatter/formatter.go | |
parent | b8fcac6ae24dd5e07f366741f0f282f33b18b503 (diff) |
Convert formatting entities to Markdown
Diffstat (limited to 'telegram/formatter/formatter.go')
-rw-r--r-- | telegram/formatter/formatter.go | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/telegram/formatter/formatter.go b/telegram/formatter/formatter.go new file mode 100644 index 0000000..4b26f83 --- /dev/null +++ b/telegram/formatter/formatter.go @@ -0,0 +1,165 @@ +package formatter + +import ( + "sort" + + log "github.com/sirupsen/logrus" + "github.com/zelenin/go-tdlib/client" +) + +// Insertion is a piece of text in given position +type Insertion struct { + Offset int32 + Runes []rune +} + +// InsertionStack contains the sequence of insertions +// from the start or from the end +type InsertionStack []*Insertion + +var boldRunes = []rune("**") +var italicRunes = []rune("_") +var codeRunes = []rune("\n```\n") +var urlRuneL = []rune("[") + +// rebalance pumps all the values at given offset to current stack (growing +// from start) from given stack (growing from end); should be called +// before any insertions to the current stack at the given offset +func (s InsertionStack) rebalance(s2 InsertionStack, offset int32) (InsertionStack, InsertionStack) { + for len(s2) > 0 && s2[len(s2)-1].Offset <= offset { + s = append(s, s2[len(s2)-1]) + s2 = s2[:len(s2)-1] + } + + return s, s2 +} + +// NewIterator is a second order function that sequentially scans and returns +// stack elements; starts returning nil when elements are ended +func (s InsertionStack) NewIterator() func() *Insertion { + i := -1 + + return func() *Insertion { + i++ + if i < len(s) { + return s[i] + } + return nil + } +} + +// SortEntities arranges the entities in traversal-ready order +func SortEntities(entities []*client.TextEntity) []*client.TextEntity { + sortedEntities := make([]*client.TextEntity, len(entities)) + copy(sortedEntities, entities) + + sort.Slice(sortedEntities, func(i int, j int) bool { + entity1 := entities[i] + entity2 := entities[j] + if entity1.Offset < entity2.Offset { + return true + } else if entity1.Offset == entity2.Offset { + return entity1.Length > entity2.Length + } + return false + }) + return sortedEntities +} + +func markupBraces(entity *client.TextEntity, lbrace, rbrace []rune) (*Insertion, *Insertion) { + return &Insertion{ + Offset: entity.Offset, + Runes: lbrace, + }, &Insertion{ + Offset: entity.Offset + entity.Length, + Runes: rbrace, + } +} + +// EntityToMarkdown generates the wrapping Markdown tags +func EntityToMarkdown(entity *client.TextEntity) (*Insertion, *Insertion) { + switch entity.Type.TextEntityTypeType() { + case client.TypeTextEntityTypeBold: + return markupBraces(entity, boldRunes, boldRunes) + case client.TypeTextEntityTypeItalic: + return markupBraces(entity, italicRunes, italicRunes) + case client.TypeTextEntityTypeCode, client.TypeTextEntityTypePre: + return markupBraces(entity, codeRunes, codeRunes) + case client.TypeTextEntityTypePreCode: + preCode, _ := entity.Type.(*client.TextEntityTypePreCode) + return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes) + case client.TypeTextEntityTypeTextUrl: + textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl) + return markupBraces(entity, urlRuneL, []rune("]("+textURL.Url+")")) + } + + return nil, nil +} + +// Format traverses an already sorted list of entities and wraps the text in Markdown +func Format( + sourceText string, + entities []*client.TextEntity, + entityToMarkup func(*client.TextEntity) (*Insertion, *Insertion), +) string { + if len(entities) == 0 { + return sourceText + } + + startStack := make(InsertionStack, 0, len(sourceText)) + endStack := make(InsertionStack, 0, len(sourceText)) + + // convert entities to a stack of brackets + var maxEndOffset int32 + for _, entity := range entities { + log.Debugf("%#v", entity) + if entity.Length <= 0 { + continue + } + + endOffset := entity.Offset + entity.Length + if endOffset > maxEndOffset { + maxEndOffset = endOffset + } + + startStack, endStack = startStack.rebalance(endStack, entity.Offset) + + startInsertion, endInsertion := entityToMarkup(entity) + if startInsertion != nil { + startStack = append(startStack, startInsertion) + } + if endInsertion != nil { + endStack = append(endStack, endInsertion) + } + } + // flush the closing brackets that still remain in endStack + startStack, endStack = startStack.rebalance(endStack, maxEndOffset) + + // merge brackets into text + markupRunes := make([]rune, 0, len(sourceText)) + + nextInsertion := startStack.NewIterator() + insertion := nextInsertion() + var runeI int32 + + for _, cp := range sourceText { + for insertion != nil && insertion.Offset <= runeI { + markupRunes = append(markupRunes, insertion.Runes...) + insertion = nextInsertion() + } + + markupRunes = append(markupRunes, cp) + // skip two UTF-16 code units (not points actually!) if needed + if cp > 0x0000ffff { + runeI += 2 + } else { + runeI++ + } + } + for insertion != nil { + markupRunes = append(markupRunes, insertion.Runes...) + insertion = nextInsertion() + } + + return string(markupRunes) +} |