From 9fcf8e29214210612d545bed50d7f889800ac639 Mon Sep 17 00:00:00 2001 From: Alexander Kiryukhin Date: Sun, 1 May 2022 21:50:12 +0300 Subject: Initial --- levenshtein/doc.go | 2 + levenshtein/generic.go | 99 ++++++++++++++++++++++++++++++++++++++ levenshtein/generic_test.go | 115 ++++++++++++++++++++++++++++++++++++++++++++ levenshtein/strings.go | 11 +++++ levenshtein/strings_test.go | 64 ++++++++++++++++++++++++ 5 files changed, 291 insertions(+) create mode 100644 levenshtein/doc.go create mode 100644 levenshtein/generic.go create mode 100644 levenshtein/generic_test.go create mode 100644 levenshtein/strings.go create mode 100644 levenshtein/strings_test.go (limited to 'levenshtein') diff --git a/levenshtein/doc.go b/levenshtein/doc.go new file mode 100644 index 0000000..a9d3130 --- /dev/null +++ b/levenshtein/doc.go @@ -0,0 +1,2 @@ +// Пакет с функциями получения редакторских правок по Левенштейну +package levenshtein diff --git a/levenshtein/generic.go b/levenshtein/generic.go new file mode 100644 index 0000000..4aa3ecf --- /dev/null +++ b/levenshtein/generic.go @@ -0,0 +1,99 @@ +package levenshtein + +import ( + "go.neonxp.dev/extra/math" +) + +// Levenshtein возвращает последовательность правок для превращения последовательности элементов s1 в s2 +// с учетом стоимостей операций возвращаемых функцией cost +// TODO в алгоритме не предусмотрена экономия памяти ¯\_(ツ)_/¯ +func Levenshtein[T comparable](s1, s2 []T, cost EditCost[T]) []Edit { + len1 := len(s1) + len2 := len(s2) + if cost == nil { + cost = EditCost[T](func(t EditType, s1 *T, s2 *T) int { + return 1 + }) + } + + dp := make([][]int, len1+1) + for i := range dp { + dp[i] = make([]int, len2+1) + } + dp[0][0] = 0 + for i := 0; i < len1; i++ { + for j := 0; j < len2; j++ { + if i == 0 && j == 0 { + continue + } + switch { + case i == 0: + dp[0][j] = dp[0][j-1] + cost(Insert, nil, &s2[j]) + case j == 0: + dp[i][0] = dp[i-1][0] + cost(Delete, &s1[i], nil) + default: + m1 := dp[i-1][j] + cost(Delete, &s1[i], nil) + m2 := dp[i][j-1] + cost(Insert, nil, &s2[j]) + m3 := dp[i-1][j-1] + cost(Replace, &s1[i], &s2[j]) + dp[i][j] = math.MinScalar(m1, m2, m3) + } + } + } + i, j := len1-1, len2-1 + edits := []Edit{} + for i > 0 && j > 0 { + m1 := dp[i-1][j] + cost(Delete, &s1[i], nil) + m2 := dp[i][j-1] + cost(Insert, nil, &s2[j]) + m3 := dp[i-1][j-1] + cost(Replace, &s1[i], &s2[j]) + min := math.MinScalar(m1, m2, m3) + if min == m1 { + // добавляем удаление символа S1[i] + edits = append(edits, Edit{ + Type: Delete, + Idx1: i, + }) + i-- + } + if min == m2 { + // добавляем вставку символа S2[j] после S1[i] + edits = append(edits, Edit{ + Type: Insert, + Idx1: i, + Idx2: j, + }) + j-- + } + if min == m3 { + // добавляем замену S1[i] на S2[j] + i-- + j-- + if s1[i] != s2[j] { + edits = append(edits, Edit{ + Type: Replace, + Idx1: i, + Idx2: j, + }) + } + } + } + return edits +} + +// EditCost функция возвращающая стоимость действия t над элементами from, to +type EditCost[T comparable] func(t EditType, from *T, to *T) int + +// Edit редакторская правка описывающее одно действие над исходной последовательностью +type Edit struct { + Type EditType // Тип правки: Вставка/Замена/Удаление + Idx1 int // Индекс элемента из первой последовательности + Idx2 int // Индекс элемента из второй последовательности +} + +// EditType тип правки +type EditType int + +const ( + Insert EditType = iota // Вставка + Replace // Замена + Delete // Удаление +) diff --git a/levenshtein/generic_test.go b/levenshtein/generic_test.go new file mode 100644 index 0000000..11c9611 --- /dev/null +++ b/levenshtein/generic_test.go @@ -0,0 +1,115 @@ +package levenshtein + +import ( + "reflect" + "testing" +) + +func TestLevenshtein2(t *testing.T) { + type args struct { + s1 []myType + s2 []myType + cost EditCost[myType] + } + tests := []struct { + name string + args args + want []Edit + }{ + { + name: "test1", + args: args{ + s1: []myType{ + { + a: "a", + }, { + a: "b", + }, { + a: "c", + }, + }, + s2: []myType{ + { + a: "a", + }, { + a: "d", + }, { + a: "c", + }, + }, + cost: EditCost[myType](func(t EditType, from, to *myType) int { + return 1 + }), + }, + want: []Edit{ + { + Type: Replace, + Idx1: 1, + Idx2: 1, + }, + }, + }, + { + name: "test2 - costly replace", + args: args{ + s1: []myType{ + { + a: "a", + }, { + a: "b", + }, { + a: "c", + }, + }, + s2: []myType{ + { + a: "a", + }, { + a: "d", + }, { + a: "c", + }, + }, + cost: EditCost[myType](func(t EditType, from, to *myType) int { + if t == Replace { + return 100 + } + return 1 + }), + }, + want: []Edit{ + { + Type: Delete, + Idx1: 2, + Idx2: 0, + }, + { + Type: Insert, + Idx1: 1, + Idx2: 2, + }, + { + Type: Delete, + Idx1: 1, + Idx2: 0, + }, + { + Type: Insert, + Idx1: 0, + Idx2: 1, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := Levenshtein(tt.args.s1, tt.args.s2, tt.args.cost); !reflect.DeepEqual(got, tt.want) { + t.Errorf("Levenshtein() = %v, want %v", got, tt.want) + } + }) + } +} + +type myType struct { + a string +} diff --git a/levenshtein/strings.go b/levenshtein/strings.go new file mode 100644 index 0000000..a6257d1 --- /dev/null +++ b/levenshtein/strings.go @@ -0,0 +1,11 @@ +package levenshtein + +// String возвращает последовательность правок для превращения строки s1 в строку s2 +func String(s1, s2 string) []Edit { + return Levenshtein([]rune(s1), []rune(s2), nil) +} + +// Strings возвращает последовательность правок для превращения слайса строк s1 в слайс строк s2 +func Strings(s1, s2 []string) []Edit { + return Levenshtein(s1, s2, nil) +} diff --git a/levenshtein/strings_test.go b/levenshtein/strings_test.go new file mode 100644 index 0000000..fe103e4 --- /dev/null +++ b/levenshtein/strings_test.go @@ -0,0 +1,64 @@ +package levenshtein + +import ( + "reflect" + "testing" +) + +func TestString(t *testing.T) { + type args struct { + s1 string + s2 string + } + tests := []struct { + name string + args args + want []Edit + }{ + { + name: "test1", + args: args{ + s1: "абвзгде", + s2: "абвжгде", + }, + want: []Edit{ + { + Type: Replace, + Idx1: 3, + Idx2: 3, + }, + }, + }, + { + name: "test2", + args: args{ + s1: "абавзгд", + s2: "абввжгде", + }, + want: []Edit{ + { + Type: Insert, // абавзгд -> абавзгд[е] + Idx1: 6, + Idx2: 7, + }, + { + Type: Replace, // абав[з]где -> абав[ж]где + Idx1: 4, + Idx2: 4, + }, + { + Type: Replace, // аб[а]вжгде -> аб[в]вжгде + Idx1: 2, + Idx2: 2, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := String(tt.args.s1, tt.args.s2); !reflect.DeepEqual(got, tt.want) { + t.Errorf("LevenshteinString() = %v, want %v", got, tt.want) + } + }) + } +} -- cgit v1.2.3