diff options
author | Bohdan Horbeshko <bodqhrohro@gmail.com> | 2023-11-16 03:38:45 +0300 |
---|---|---|
committer | Bohdan Horbeshko <bodqhrohro@gmail.com> | 2023-11-16 03:38:45 +0300 |
commit | 6bd837911431ef68d23de1bcbb75893edd39a32b (patch) | |
tree | 660d2a497017f027edacedd5d7ab55d75252a924 /telegram/formatter/formatter.go | |
parent | 576acba0d18717da5abc5a232cf92c23c9b8a550 (diff) |
Support blockquotes in formatter
Diffstat (limited to 'telegram/formatter/formatter.go')
-rw-r--r-- | telegram/formatter/formatter.go | 231 |
1 files changed, 186 insertions, 45 deletions
diff --git a/telegram/formatter/formatter.go b/telegram/formatter/formatter.go index 740fa09..9403198 100644 --- a/telegram/formatter/formatter.go +++ b/telegram/formatter/formatter.go @@ -8,15 +8,29 @@ import ( "github.com/zelenin/go-tdlib/client" ) -// Insertion is a piece of text in given position -type Insertion struct { +type insertionType int +const ( + insertionOpening insertionType = iota + insertionClosing + insertionUnpaired +) + +type MarkupModeType int +const ( + MarkupModeXEP0393 MarkupModeType = iota + MarkupModeMarkdown +) + +// insertion is a piece of text in given position +type insertion struct { Offset int32 Runes []rune + Type insertionType } -// InsertionStack contains the sequence of insertions +// insertionStack contains the sequence of insertions // from the start or from the end -type InsertionStack []*Insertion +type insertionStack []*insertion var boldRunesMarkdown = []rune("**") var boldRunesXEP0393 = []rune("*") @@ -26,11 +40,16 @@ var strikeRunesXEP0393 = []rune("~") var codeRunes = []rune("`") var preRuneStart = []rune("```\n") var preRuneEnd = []rune("\n```") +var quoteRunes = []rune("> ") +var newlineRunes = []rune("\n") +var doubleNewlineRunes = []rune("\n\n") +var newlineCode = rune(0x0000000a) +var bmpCeil = rune(0x0000ffff) // rebalance pumps all the values until the given offset to current stack (growing // from start) from given stack (growing from end); should be called // before any insertions to the current stack at the given offset -func (s InsertionStack) rebalance(s2 InsertionStack, offset int32) (InsertionStack, InsertionStack) { +func (s insertionStack) rebalance(s2 insertionStack, offset int32) (insertionStack, insertionStack) { for len(s2) > 0 && s2[len(s2)-1].Offset <= offset { s = append(s, s2[len(s2)-1]) s2 = s2[:len(s2)-1] @@ -41,10 +60,10 @@ func (s InsertionStack) rebalance(s2 InsertionStack, offset int32) (InsertionSta // NewIterator is a second order function that sequentially scans and returns // stack elements; starts returning nil when elements are ended -func (s InsertionStack) NewIterator() func() *Insertion { +func (s insertionStack) NewIterator() func() *insertion { i := -1 - return func() *Insertion { + return func() *insertion { i++ if i < len(s) { return s[i] @@ -120,21 +139,10 @@ func MergeAdjacentEntities(entities []*client.TextEntity) []*client.TextEntity { } // ClaspDirectives to the following span as required by XEP-0393 -func ClaspDirectives(text string, entities []*client.TextEntity) []*client.TextEntity { +func ClaspDirectives(doubledRunes []rune, entities []*client.TextEntity) []*client.TextEntity { alignedEntities := make([]*client.TextEntity, len(entities)) copy(alignedEntities, entities) - // transform the source text into a form with uniform runes and code points, - // by duplicating the Basic Multilingual Plane - doubledRunes := make([]rune, 0, len(text)*2) - - for _, cp := range text { - if cp > 0x0000ffff { - doubledRunes = append(doubledRunes, cp, cp) - } else { - doubledRunes = append(doubledRunes, cp) - } - } for i, entity := range alignedEntities { var dirty bool endOffset := entity.Offset + entity.Length @@ -167,18 +175,89 @@ func ClaspDirectives(text string, entities []*client.TextEntity) []*client.TextE return alignedEntities } -func markupBraces(entity *client.TextEntity, lbrace, rbrace []rune) (*Insertion, *Insertion) { - return &Insertion{ +func markupBraces(entity *client.TextEntity, lbrace, rbrace []rune) []*insertion { + return []*insertion{ + &insertion{ Offset: entity.Offset, Runes: lbrace, - }, &Insertion{ + Type: insertionOpening, + }, + &insertion{ Offset: entity.Offset + entity.Length, Runes: rbrace, + Type: insertionClosing, + }, + } +} + +func quotePrependNewlines(entity *client.TextEntity, doubledRunes []rune, markupMode MarkupModeType) []*insertion { + if len(doubledRunes) == 0 { + return []*insertion{} + } + + startRunes := []rune("\n> ") + if entity.Offset == 0 || doubledRunes[entity.Offset-1] == newlineCode { + startRunes = quoteRunes + } + insertions := []*insertion{ + &insertion{ + Offset: entity.Offset, + Runes: startRunes, + Type: insertionUnpaired, + }, + } + + entityEnd := entity.Offset + entity.Length + entityEndInt := int(entityEnd) + + var wasNewline bool + // last newline is omitted, there's no need to put quote mark after the quote + for i := entity.Offset; i < entityEnd-1; i++ { + isNewline := doubledRunes[i] == newlineCode + if (isNewline && markupMode == MarkupModeXEP0393) || (wasNewline && isNewline && markupMode == MarkupModeMarkdown) { + insertions = append(insertions, &insertion{ + Offset: i+1, + Runes: quoteRunes, + Type: insertionUnpaired, + }) + } + + if isNewline { + wasNewline = true + } else { + wasNewline = false } + } + + var rbrace []rune + if len(doubledRunes) > entityEndInt { + if doubledRunes[entityEnd] == newlineCode { + if markupMode == MarkupModeMarkdown && len(doubledRunes) > entityEndInt+1 && doubledRunes[entityEndInt+1] != newlineCode { + rbrace = newlineRunes + } + } else { + if markupMode == MarkupModeMarkdown { + rbrace = doubleNewlineRunes + } else { + rbrace = newlineRunes + } + } + } + insertions = append(insertions, &insertion{ + Offset: entityEnd, + Runes: rbrace, + Type: insertionClosing, + }) + + return insertions } -// EntityToMarkdown generates the wrapping Markdown tags -func EntityToMarkdown(entity *client.TextEntity) (*Insertion, *Insertion) { +// entityToMarkdown generates the wrapping Markdown tags +func entityToMarkdown(entity *client.TextEntity, doubledRunes []rune, markupMode MarkupModeType) []*insertion { + if entity == nil || entity.Type == nil { + return []*insertion{} + } + switch entity.Type.TextEntityTypeType() { case client.TypeTextEntityTypeBold: return markupBraces(entity, boldRunesMarkdown, boldRunesMarkdown) @@ -193,18 +272,20 @@ func EntityToMarkdown(entity *client.TextEntity) (*Insertion, *Insertion) { case client.TypeTextEntityTypePreCode: preCode, _ := entity.Type.(*client.TextEntityTypePreCode) return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes) + case client.TypeTextEntityTypeBlockQuote: + return quotePrependNewlines(entity, doubledRunes, MarkupModeMarkdown) case client.TypeTextEntityTypeTextUrl: textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl) return markupBraces(entity, []rune("["), []rune("]("+textURL.Url+")")) } - return nil, nil + return []*insertion{} } -// EntityToXEP0393 generates the wrapping XEP-0393 tags -func EntityToXEP0393(entity *client.TextEntity) (*Insertion, *Insertion) { +// entityToXEP0393 generates the wrapping XEP-0393 tags +func entityToXEP0393(entity *client.TextEntity, doubledRunes []rune, markupMode MarkupModeType) []*insertion { if entity == nil || entity.Type == nil { - return nil, nil + return []*insertion{} } switch entity.Type.TextEntityTypeType() { @@ -221,29 +302,55 @@ func EntityToXEP0393(entity *client.TextEntity) (*Insertion, *Insertion) { case client.TypeTextEntityTypePreCode: preCode, _ := entity.Type.(*client.TextEntityTypePreCode) return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes) + case client.TypeTextEntityTypeBlockQuote: + return quotePrependNewlines(entity, doubledRunes, MarkupModeXEP0393) case client.TypeTextEntityTypeTextUrl: textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl) // non-standard, Pidgin-specific return markupBraces(entity, []rune{}, []rune(" <"+textURL.Url+">")) } - return nil, nil + return []*insertion{} +} + +// transform the source text into a form with uniform runes and code points, +// by duplicating anything beyond the Basic Multilingual Plane +func textToDoubledRunes(text string) []rune { + doubledRunes := make([]rune, 0, len(text)*2) + for _, cp := range text { + if cp > bmpCeil { + doubledRunes = append(doubledRunes, cp, cp) + } else { + doubledRunes = append(doubledRunes, cp) + } + } + + return doubledRunes } // Format traverses an already sorted list of entities and wraps the text in a markup func Format( sourceText string, entities []*client.TextEntity, - entityToMarkup func(*client.TextEntity) (*Insertion, *Insertion), + markupMode MarkupModeType, ) string { if len(entities) == 0 { return sourceText } - mergedEntities := SortEntities(ClaspDirectives(sourceText, MergeAdjacentEntities(SortEntities(entities)))) + var entityToMarkup func(*client.TextEntity, []rune, MarkupModeType) []*insertion + if markupMode == MarkupModeXEP0393 { + entityToMarkup = entityToXEP0393 + } else { + entityToMarkup = entityToMarkdown + } - startStack := make(InsertionStack, 0, len(sourceText)) - endStack := make(InsertionStack, 0, len(sourceText)) + doubledRunes := textToDoubledRunes(sourceText) + + mergedEntities := SortEntities(ClaspDirectives(doubledRunes, MergeAdjacentEntities(SortEntities(entities)))) + + startStack := make(insertionStack, 0, len(sourceText)) + endStack := make(insertionStack, 0, len(sourceText)) // convert entities to a stack of brackets var maxEndOffset int32 @@ -260,36 +367,70 @@ func Format( startStack, endStack = startStack.rebalance(endStack, entity.Offset) - startInsertion, endInsertion := entityToMarkup(entity) - if startInsertion != nil { - startStack = append(startStack, startInsertion) + insertions := entityToMarkup(entity, doubledRunes, markupMode) + if len(insertions) > 1 { + startStack = append(startStack, insertions[0:len(insertions)-1]...) } - if endInsertion != nil { - endStack = append(endStack, endInsertion) + if len(insertions) > 0 { + endStack = append(endStack, insertions[len(insertions)-1]) } } // flush the closing brackets that still remain in endStack startStack, endStack = startStack.rebalance(endStack, maxEndOffset) + // sort unpaired insertions + sort.SliceStable(startStack, func(i int, j int) bool { + ins1 := startStack[i] + ins2 := startStack[j] + if ins1.Type == insertionUnpaired && ins2.Type == insertionUnpaired { + return ins1.Offset < ins2.Offset + } + if ins1.Type == insertionUnpaired { + if ins1.Offset == ins2.Offset { + if ins2.Type == insertionOpening { // > ** + return true + } else if ins2.Type == insertionClosing { // **> + return false + } + } else { + return ins1.Offset < ins2.Offset + } + } + if ins2.Type == insertionUnpaired { + if ins1.Offset == ins2.Offset { + if ins1.Type == insertionOpening { // > ** + return false + } else if ins1.Type == insertionClosing { // **> + return true + } + } else { + return ins1.Offset < ins2.Offset + } + } + return false + }) // merge brackets into text markupRunes := make([]rune, 0, len(sourceText)) nextInsertion := startStack.NewIterator() insertion := nextInsertion() - var runeI int32 + var skipNext bool - for _, cp := range sourceText { - for insertion != nil && insertion.Offset <= runeI { + for i, cp := range doubledRunes { + if skipNext { + skipNext = false + continue + } + + for insertion != nil && int(insertion.Offset) <= i { markupRunes = append(markupRunes, insertion.Runes...) insertion = nextInsertion() } markupRunes = append(markupRunes, cp) // skip two UTF-16 code units (not points actually!) if needed - if cp > 0x0000ffff { - runeI += 2 - } else { - runeI++ + if cp > bmpCeil { + skipNext = true } } for insertion != nil { |