aboutsummaryrefslogtreecommitdiff
path: root/telegram
diff options
context:
space:
mode:
authorbodqhrohro <bodqhrohro@gmail.com>2020-01-10 00:16:40 +0300
committerbodqhrohro <bodqhrohro@gmail.com>2020-01-10 00:16:40 +0300
commit70383bee128b11451211e514200b23f47bf272db (patch)
treebfc56f26264e206e66e4a6c150f70cf8625e8d66 /telegram
parentb8fcac6ae24dd5e07f366741f0f282f33b18b503 (diff)
Convert formatting entities to Markdown
Diffstat (limited to 'telegram')
-rw-r--r--telegram/formatter/formatter.go165
-rw-r--r--telegram/formatter/formatter_test.go208
-rw-r--r--telegram/utils.go44
3 files changed, 410 insertions, 7 deletions
diff --git a/telegram/formatter/formatter.go b/telegram/formatter/formatter.go
new file mode 100644
index 0000000..4b26f83
--- /dev/null
+++ b/telegram/formatter/formatter.go
@@ -0,0 +1,165 @@
+package formatter
+
+import (
+ "sort"
+
+ log "github.com/sirupsen/logrus"
+ "github.com/zelenin/go-tdlib/client"
+)
+
+// Insertion is a piece of text in given position
+type Insertion struct {
+ Offset int32
+ Runes []rune
+}
+
+// InsertionStack contains the sequence of insertions
+// from the start or from the end
+type InsertionStack []*Insertion
+
+var boldRunes = []rune("**")
+var italicRunes = []rune("_")
+var codeRunes = []rune("\n```\n")
+var urlRuneL = []rune("[")
+
+// rebalance pumps all the values at given offset to current stack (growing
+// from start) from given stack (growing from end); should be called
+// before any insertions to the current stack at the given offset
+func (s InsertionStack) rebalance(s2 InsertionStack, offset int32) (InsertionStack, InsertionStack) {
+ for len(s2) > 0 && s2[len(s2)-1].Offset <= offset {
+ s = append(s, s2[len(s2)-1])
+ s2 = s2[:len(s2)-1]
+ }
+
+ return s, s2
+}
+
+// NewIterator is a second order function that sequentially scans and returns
+// stack elements; starts returning nil when elements are ended
+func (s InsertionStack) NewIterator() func() *Insertion {
+ i := -1
+
+ return func() *Insertion {
+ i++
+ if i < len(s) {
+ return s[i]
+ }
+ return nil
+ }
+}
+
+// SortEntities arranges the entities in traversal-ready order
+func SortEntities(entities []*client.TextEntity) []*client.TextEntity {
+ sortedEntities := make([]*client.TextEntity, len(entities))
+ copy(sortedEntities, entities)
+
+ sort.Slice(sortedEntities, func(i int, j int) bool {
+ entity1 := entities[i]
+ entity2 := entities[j]
+ if entity1.Offset < entity2.Offset {
+ return true
+ } else if entity1.Offset == entity2.Offset {
+ return entity1.Length > entity2.Length
+ }
+ return false
+ })
+ return sortedEntities
+}
+
+func markupBraces(entity *client.TextEntity, lbrace, rbrace []rune) (*Insertion, *Insertion) {
+ return &Insertion{
+ Offset: entity.Offset,
+ Runes: lbrace,
+ }, &Insertion{
+ Offset: entity.Offset + entity.Length,
+ Runes: rbrace,
+ }
+}
+
+// EntityToMarkdown generates the wrapping Markdown tags
+func EntityToMarkdown(entity *client.TextEntity) (*Insertion, *Insertion) {
+ switch entity.Type.TextEntityTypeType() {
+ case client.TypeTextEntityTypeBold:
+ return markupBraces(entity, boldRunes, boldRunes)
+ case client.TypeTextEntityTypeItalic:
+ return markupBraces(entity, italicRunes, italicRunes)
+ case client.TypeTextEntityTypeCode, client.TypeTextEntityTypePre:
+ return markupBraces(entity, codeRunes, codeRunes)
+ case client.TypeTextEntityTypePreCode:
+ preCode, _ := entity.Type.(*client.TextEntityTypePreCode)
+ return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes)
+ case client.TypeTextEntityTypeTextUrl:
+ textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl)
+ return markupBraces(entity, urlRuneL, []rune("]("+textURL.Url+")"))
+ }
+
+ return nil, nil
+}
+
+// Format traverses an already sorted list of entities and wraps the text in Markdown
+func Format(
+ sourceText string,
+ entities []*client.TextEntity,
+ entityToMarkup func(*client.TextEntity) (*Insertion, *Insertion),
+) string {
+ if len(entities) == 0 {
+ return sourceText
+ }
+
+ startStack := make(InsertionStack, 0, len(sourceText))
+ endStack := make(InsertionStack, 0, len(sourceText))
+
+ // convert entities to a stack of brackets
+ var maxEndOffset int32
+ for _, entity := range entities {
+ log.Debugf("%#v", entity)
+ if entity.Length <= 0 {
+ continue
+ }
+
+ endOffset := entity.Offset + entity.Length
+ if endOffset > maxEndOffset {
+ maxEndOffset = endOffset
+ }
+
+ startStack, endStack = startStack.rebalance(endStack, entity.Offset)
+
+ startInsertion, endInsertion := entityToMarkup(entity)
+ if startInsertion != nil {
+ startStack = append(startStack, startInsertion)
+ }
+ if endInsertion != nil {
+ endStack = append(endStack, endInsertion)
+ }
+ }
+ // flush the closing brackets that still remain in endStack
+ startStack, endStack = startStack.rebalance(endStack, maxEndOffset)
+
+ // merge brackets into text
+ markupRunes := make([]rune, 0, len(sourceText))
+
+ nextInsertion := startStack.NewIterator()
+ insertion := nextInsertion()
+ var runeI int32
+
+ for _, cp := range sourceText {
+ for insertion != nil && insertion.Offset <= runeI {
+ markupRunes = append(markupRunes, insertion.Runes...)
+ insertion = nextInsertion()
+ }
+
+ markupRunes = append(markupRunes, cp)
+ // skip two UTF-16 code units (not points actually!) if needed
+ if cp > 0x0000ffff {
+ runeI += 2
+ } else {
+ runeI++
+ }
+ }
+ for insertion != nil {
+ markupRunes = append(markupRunes, insertion.Runes...)
+ insertion = nextInsertion()
+ }
+
+ return string(markupRunes)
+}
diff --git a/telegram/formatter/formatter_test.go b/telegram/formatter/formatter_test.go
new file mode 100644
index 0000000..63337d6
--- /dev/null
+++ b/telegram/formatter/formatter_test.go
@@ -0,0 +1,208 @@
+package formatter
+
+import (
+ "testing"
+
+ "github.com/zelenin/go-tdlib/client"
+)
+
+func TestNoFormatting(t *testing.T) {
+ markup := Format("abc\ndef", []*client.TextEntity{}, EntityToMarkdown)
+ if markup != "abc\ndef" {
+ t.Errorf("No formatting expected, but: %v", markup)
+ }
+}
+
+func TestFormattingSimple(t *testing.T) {
+ markup := Format("👙🐧🐖", []*client.TextEntity{
+ &client.TextEntity{
+ Offset: 2,
+ Length: 4,
+ Type: &client.TextEntityTypeBold{},
+ },
+ }, EntityToMarkdown)
+ if markup != "👙**🐧🐖**" {
+ t.Errorf("Wrong simple formatting: %v", markup)
+ }
+}
+
+func TestFormattingAdjacent(t *testing.T) {
+ markup := Format("a👙🐧🐖", []*client.TextEntity{
+ &client.TextEntity{
+ Offset: 3,
+ Length: 2,
+ Type: &client.TextEntityTypeItalic{},
+ },
+ &client.TextEntity{
+ Offset: 5,
+ Length: 2,
+ Type: &client.TextEntityTypeTextUrl{
+ Url: "https://narayana.im/",
+ },
+ },
+ }, EntityToMarkdown)
+ if markup != "a👙_🐧_[🐖](https://narayana.im/)" {
+ t.Errorf("Wrong adjacent formatting: %v", markup)
+ }
+}
+
+func TestFormattingAdjacentAndNested(t *testing.T) {
+ markup := Format("👙🐧🐖", []*client.TextEntity{
+ &client.TextEntity{
+ Offset: 0,
+ Length: 4,
+ Type: &client.TextEntityTypePre{},
+ },
+ &client.TextEntity{
+ Offset: 0,
+ Length: 2,
+ Type: &client.TextEntityTypeBold{},
+ },
+ &client.TextEntity{
+ Offset: 4,
+ Length: 2,
+ Type: &client.TextEntityTypeItalic{},
+ },
+ }, EntityToMarkdown)
+ if markup != "\n```\n**👙**🐧\n```\n_🐖_" {
+ t.Errorf("Wrong adjacent&nested formatting: %v", markup)
+ }
+}
+
+func TestRebalanceTwoZero(t *testing.T) {
+ s1 := InsertionStack{
+ &Insertion{Offset: 7},
+ &Insertion{Offset: 8},
+ }
+ s2 := InsertionStack{}
+ s1, s2 = s1.rebalance(s2, 7)
+ if !(len(s1) == 2 && len(s2) == 0 && s1[0].Offset == 7 && s1[1].Offset == 8) {
+ t.Errorf("Wrong rebalance 2–0: %#v %#v", s1, s2)
+ }
+}
+
+func TestRebalanceNeeded(t *testing.T) {
+ s1 := InsertionStack{
+ &Insertion{Offset: 7},
+ &Insertion{Offset: 8},
+ }
+ s2 := InsertionStack{
+ &Insertion{Offset: 10},
+ &Insertion{Offset: 9},
+ }
+ s1, s2 = s1.rebalance(s2, 9)
+ if !(len(s1) == 3 && len(s2) == 1 &&
+ s1[0].Offset == 7 && s1[1].Offset == 8 && s1[2].Offset == 9 &&
+ s2[0].Offset == 10) {
+ t.Errorf("Wrong rebalance when needed: %#v %#v", s1, s2)
+ }
+}
+
+func TestRebalanceNotNeeded(t *testing.T) {
+ s1 := InsertionStack{
+ &Insertion{Offset: 7},
+ &Insertion{Offset: 8},
+ }
+ s2 := InsertionStack{
+ &Insertion{Offset: 10},
+ &Insertion{Offset: 9},
+ }
+ s1, s2 = s1.rebalance(s2, 8)
+ if !(len(s1) == 2 && len(s2) == 2 &&
+ s1[0].Offset == 7 && s1[1].Offset == 8 &&
+ s2[0].Offset == 10 && s2[1].Offset == 9) {
+ t.Errorf("Wrong rebalance when not needed: %#v %#v", s1, s2)
+ }
+}
+
+func TestRebalanceLate(t *testing.T) {
+ s1 := InsertionStack{
+ &Insertion{Offset: 7},
+ &Insertion{Offset: 8},
+ }
+ s2 := InsertionStack{
+ &Insertion{Offset: 10},
+ &Insertion{Offset: 9},
+ }
+ s1, s2 = s1.rebalance(s2, 10)
+ if !(len(s1) == 4 && len(s2) == 0 &&
+ s1[0].Offset == 7 && s1[1].Offset == 8 &&
+ s1[2].Offset == 9 && s1[3].Offset == 10) {
+ t.Errorf("Wrong rebalance when late: %#v %#v", s1, s2)
+ }
+}
+
+func TestIteratorEmpty(t *testing.T) {
+ s := InsertionStack{}
+ g := s.NewIterator()
+ v := g()
+ if v != nil {
+ t.Errorf("Empty iterator should return nil but returned %#v", v)
+ }
+}
+
+func TestIterator(t *testing.T) {
+ s := InsertionStack{
+ &Insertion{Offset: 7},
+ &Insertion{Offset: 8},
+ }
+ g := s.NewIterator()
+ v := g()
+ if v == nil || v.Offset != 7 {
+ t.Errorf("Wrong insertion instead of 7: %#v", v)
+ }
+ v = g()
+ if v == nil || v.Offset != 8 {
+ t.Errorf("Wrong insertion instead of 8: %#v", v)
+ }
+ v = g()
+ if v != nil {
+ t.Errorf("nil should be returned after end, %#v instead", v)
+ }
+ v = g()
+ if v != nil {
+ t.Errorf("Further attempts should return nil too, %#v instead", v)
+ }
+}
+
+func TestSortEntities(t *testing.T) {
+ entities := []*client.TextEntity{
+ &client.TextEntity{
+ Offset: 3,
+ Length: 2,
+ },
+ &client.TextEntity{
+ Offset: 5,
+ Length: 2,
+ },
+ &client.TextEntity{
+ Offset: 7,
+ Length: 2,
+ },
+ &client.TextEntity{
+ Offset: 6,
+ Length: 1,
+ },
+ &client.TextEntity{
+ Offset: 5,
+ Length: 1,
+ },
+ }
+ entities = SortEntities(entities)
+ if !(len(entities) == 5 &&
+ entities[0].Offset == 3 && entities[0].Length == 2 &&
+ entities[1].Offset == 5 && entities[1].Length == 2 &&
+ entities[2].Offset == 5 && entities[2].Length == 1 &&
+ entities[3].Offset == 6 && entities[3].Length == 1 &&
+ entities[4].Offset == 7 && entities[4].Length == 2) {
+ t.Errorf("Wrong sorting order: %#v", entities)
+ }
+}
+
+func TestSortEmpty(t *testing.T) {
+ entities := []*client.TextEntity{}
+ entities = SortEntities(entities)
+ if len(entities) != 0 {
+ t.Errorf("Empty entities set sorting error: %#v", entities)
+ }
+}
diff --git a/telegram/utils.go b/telegram/utils.go
index 8de1f5f..f7e7a28 100644
--- a/telegram/utils.go
+++ b/telegram/utils.go
@@ -15,6 +15,7 @@ import (
"time"
"dev.narayana.im/narayana/telegabber/telegram/cache"
+ "dev.narayana.im/narayana/telegabber/telegram/formatter"
"dev.narayana.im/narayana/telegabber/xmpp/gateway"
log "github.com/sirupsen/logrus"
@@ -281,6 +282,7 @@ func (c *Client) formatContent(file *client.File, filename string) string {
}
func (c *Client) messageToText(message *client.Message) string {
+ markupFunction := formatter.EntityToMarkdown
switch message.Content.MessageContentType() {
case client.TypeMessageSticker:
sticker, _ := message.Content.(*client.MessageSticker)
@@ -318,27 +320,55 @@ func (c *Client) messageToText(message *client.Message) string {
)
case client.TypeMessagePhoto:
photo, _ := message.Content.(*client.MessagePhoto)
- return photo.Caption.Text
+ return formatter.Format(
+ photo.Caption.Text,
+ formatter.SortEntities(photo.Caption.Entities),
+ markupFunction,
+ )
case client.TypeMessageAudio:
audio, _ := message.Content.(*client.MessageAudio)
- return audio.Caption.Text
+ return formatter.Format(
+ audio.Caption.Text,
+ formatter.SortEntities(audio.Caption.Entities),
+ markupFunction,
+ )
case client.TypeMessageVideo:
video, _ := message.Content.(*client.MessageVideo)
- return video.Caption.Text
+ return formatter.Format(
+ video.Caption.Text,
+ formatter.SortEntities(video.Caption.Entities),
+ markupFunction,
+ )
case client.TypeMessageDocument:
document, _ := message.Content.(*client.MessageDocument)
- return document.Caption.Text
+ return formatter.Format(
+ document.Caption.Text,
+ formatter.SortEntities(document.Caption.Entities),
+ markupFunction,
+ )
case client.TypeMessageText:
text, _ := message.Content.(*client.MessageText)
- return text.Text.Text
+ return formatter.Format(
+ text.Text.Text,
+ formatter.SortEntities(text.Text.Entities),
+ markupFunction,
+ )
case client.TypeMessageVoiceNote:
voice, _ := message.Content.(*client.MessageVoiceNote)
- return voice.Caption.Text
+ return formatter.Format(
+ voice.Caption.Text,
+ formatter.SortEntities(voice.Caption.Entities),
+ markupFunction,
+ )
case client.TypeMessageVideoNote:
return ""
case client.TypeMessageAnimation:
animation, _ := message.Content.(*client.MessageAnimation)
- return animation.Caption.Text
+ return formatter.Format(
+ animation.Caption.Text,
+ formatter.SortEntities(animation.Caption.Entities),
+ markupFunction,
+ )
}
return fmt.Sprintf("unknown message (%s)", message.Content.MessageContentType())