From cf07a154d56f87d34568c44c9c28367985a9e935 Mon Sep 17 00:00:00 2001 From: Alexander Kiryukhin Date: Thu, 10 May 2018 03:03:55 +0300 Subject: Move files Added travis.yml --- .travis.yml | 14 +++++ StemmerRu/stemmer.go | 142 ---------------------------------------------- StemmerRu/stemmer_test.go | 28 --------- stemmer.go | 142 ++++++++++++++++++++++++++++++++++++++++++++++ stemmer_test.go | 27 +++++++++ 5 files changed, 183 insertions(+), 170 deletions(-) create mode 100644 .travis.yml delete mode 100644 StemmerRu/stemmer.go delete mode 100644 StemmerRu/stemmer_test.go create mode 100644 stemmer.go create mode 100644 stemmer_test.go diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..bc58899 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,14 @@ +language: go +go: + - "1.10" + - tip + + +before_install: + - go get -t -v ./... + +script: + - go test -race -coverprofile=coverage.txt -covermode=atomic + +after_success: + - bash <(curl -s https://codecov.io/bash) \ No newline at end of file diff --git a/StemmerRu/stemmer.go b/StemmerRu/stemmer.go deleted file mode 100644 index f15281b..0000000 --- a/StemmerRu/stemmer.go +++ /dev/null @@ -1,142 +0,0 @@ -package StemmerRu - -import ( - "strings" -) - -var ( - perfectiveGerund = [][]string{{`в`, `вши`, `вшись`}, {`ив`, `ивши`, `ившись`, `ыв`, `ывши`, `ывшись`}} - adjective = []string{`ее`, `ие`, `ые`, `ое`, `ими`, `ыми`, `ей`, `ий`, `ый`, `ой`, `ем`, `им`, `ым`, `ом`, `его`, `ого`, `ему`, `ому`, `их`, `ых`, `ую`, `юю`, `ая`, `яя`, `ою`, `ею`} - participle = [][]string{{`ем`, `нн`, `вш`, `ющ`, `щ`}, {`ивш`, `ывш`, `ующ`}} - reflexive = []string{`ся`, `сь`} - verb = [][]string{{`ла`, `на`, `ете`, `йте`, `ли`, `й`, `л`, `ем`, `н`, `ло`, `но`, `ет`, `ют`, `ны`, `ть`, `ешь`, `нно`}, {`ила`, `ыла`, `ена`, `ейте`, `уйте`, `ите`, `или`, `ыли`, `ей`, `уй`, `ил`, `ыл`, `им`, `ым`, `ен`, `ило`, `ыло`, `ено`, `ят`, `ует`, `уют`, `ит`, `ыт`, `ены`, `ить`, `ыть`, `ишь`, `ую`, `ю`}} - noun = []string{`а`, `ев`, `ов`, `ие`, `ье`, `е`, `иями`, `ями`, `ами`, `еи`, `ии`, `и`, `ией`, `ей`, `ой`, `ий`, `й`, `иям`, `ям`, `ием`, `ем`, `ам`, `ом`, `о`, `у`, `ах`, `иях`, `ях`, `ы`, `ь`, `ию`, `ью`, `ю`, `ия`, `ья`, `я`} - superlative = []string{`ейш`, `ейше`} - derivational = []string{`ост`, `ость`} - - vowels = `аеиоуыэюя` -) - -func StemWord(word string) string { - - word = strings.Replace(word, `ё`, `е`, -1) - - RVpos := getRVPart(word) - - if RVpos == -1 { - return word - } - - - R1pos := getRNPart(word, 0) - R2pos := getRNPart(word, R1pos) - if (R2pos < RVpos) { - R2pos = 0 - } else { - R2pos -= RVpos - } - - suffix := string([]rune(word)[RVpos:]) - prefix := string([]rune(word)[:RVpos]) - - // Step 1 - suffix, isTrimmed := trimSuffix(suffix, perfectiveGerund[1], perfectiveGerund[0]) - if !isTrimmed { - suffix, isTrimmed = trimSuffix(suffix, reflexive, nil) - suffix, isTrimmed = trimAdjectival(suffix) - if !isTrimmed { - suffix, isTrimmed = trimSuffix(suffix, verb[1], verb[0]) - if !isTrimmed { - suffix, _ = trimSuffix(suffix, noun, nil) - } - } - } - - // Step 2 - suffix = strings.TrimSuffix(suffix, `и`) - - // Step 3 - if R2pos < len([]rune(suffix)) { - R2suffix := string([]rune(suffix)[R2pos:]) - R2prefix := string([]rune(suffix)[:R2pos]) - R2suffix, _ = trimSuffix(R2suffix, derivational, nil) - suffix = R2prefix + R2suffix - } - - // Step 4 - suffix, isTrimmed = trimNN(suffix) - if !isTrimmed { - suffix, isTrimmed = trimSuffix(suffix, superlative, nil) - if isTrimmed { - suffix, _ = trimNN(suffix) - } else { - suffix = strings.TrimSuffix(suffix, `ь`) - } - } - - return prefix + suffix -} - -func trimNN(word string) (string, bool) { - if strings.HasSuffix(word, `нн`) { - return strings.TrimSuffix(word, `нн`) + `н`, true - } - - return word, false -} - -func trimAdjectival(word string) (string, bool) { - isTrimmedParticiple := false - word, isTrimmedAdjective := trimSuffix(word, adjective, nil) - if isTrimmedAdjective { - word, isTrimmedParticiple = trimSuffix(word, participle[1], participle[0]) - } - - return word, isTrimmedAdjective || isTrimmedParticiple -} - -func trimSuffix(word string, suffixes []string, suffixes2 []string) (string, bool) { - for _, suffix := range suffixes { - if strings.HasSuffix(word, suffix) { - return strings.TrimSuffix(word, suffix), true - } - } - if suffixes2 != nil { - for _, suffix := range suffixes2 { - if strings.HasSuffix(word, `а`+suffix) || - strings.HasSuffix(word, `я`+suffix) { - return strings.TrimSuffix(word, suffix), true - } - } - } - - return word, false -} - -func isVowel(char rune) bool { - return strings.Contains(vowels, string(char)) -} - -func getRVPart(word string) int { - chars := []rune(word) - for idx, char := range chars { - if isVowel(char) { - return idx + 1 - } - } - - return -1 -} - -func getRNPart(word string, startPos int) int { - chars := []rune(word)[startPos:] - for idx, char := range chars { - if idx+2 < len(chars) { - if isVowel(char) && !isVowel(chars[idx+1]) { - return startPos + idx + 2 - } - } - } - - return startPos -} diff --git a/StemmerRu/stemmer_test.go b/StemmerRu/stemmer_test.go deleted file mode 100644 index 018c6e3..0000000 --- a/StemmerRu/stemmer_test.go +++ /dev/null @@ -1,28 +0,0 @@ -package StemmerRu - -import ( - "testing" - "io/ioutil" - "encoding/json" - "path" -) - -var testFile = path.Join(`..`, `tests.json`) - -func TestStemWord(t *testing.T) { - file, err := ioutil.ReadFile(testFile) - if err != nil { - t.Error("Can't open file", testFile) - } - tests := &map[string]string{} - err = json.Unmarshal(file, tests) - if err != nil { - t.Error("Can't parse json", err) - } - for source, expected := range *tests { - result := StemWord(source); - if expected != result { - t.Errorf(`Expected "%s" (source: %s) but got "%s"`, result, source, result) - } - } -} diff --git a/stemmer.go b/stemmer.go new file mode 100644 index 0000000..f15281b --- /dev/null +++ b/stemmer.go @@ -0,0 +1,142 @@ +package StemmerRu + +import ( + "strings" +) + +var ( + perfectiveGerund = [][]string{{`в`, `вши`, `вшись`}, {`ив`, `ивши`, `ившись`, `ыв`, `ывши`, `ывшись`}} + adjective = []string{`ее`, `ие`, `ые`, `ое`, `ими`, `ыми`, `ей`, `ий`, `ый`, `ой`, `ем`, `им`, `ым`, `ом`, `его`, `ого`, `ему`, `ому`, `их`, `ых`, `ую`, `юю`, `ая`, `яя`, `ою`, `ею`} + participle = [][]string{{`ем`, `нн`, `вш`, `ющ`, `щ`}, {`ивш`, `ывш`, `ующ`}} + reflexive = []string{`ся`, `сь`} + verb = [][]string{{`ла`, `на`, `ете`, `йте`, `ли`, `й`, `л`, `ем`, `н`, `ло`, `но`, `ет`, `ют`, `ны`, `ть`, `ешь`, `нно`}, {`ила`, `ыла`, `ена`, `ейте`, `уйте`, `ите`, `или`, `ыли`, `ей`, `уй`, `ил`, `ыл`, `им`, `ым`, `ен`, `ило`, `ыло`, `ено`, `ят`, `ует`, `уют`, `ит`, `ыт`, `ены`, `ить`, `ыть`, `ишь`, `ую`, `ю`}} + noun = []string{`а`, `ев`, `ов`, `ие`, `ье`, `е`, `иями`, `ями`, `ами`, `еи`, `ии`, `и`, `ией`, `ей`, `ой`, `ий`, `й`, `иям`, `ям`, `ием`, `ем`, `ам`, `ом`, `о`, `у`, `ах`, `иях`, `ях`, `ы`, `ь`, `ию`, `ью`, `ю`, `ия`, `ья`, `я`} + superlative = []string{`ейш`, `ейше`} + derivational = []string{`ост`, `ость`} + + vowels = `аеиоуыэюя` +) + +func StemWord(word string) string { + + word = strings.Replace(word, `ё`, `е`, -1) + + RVpos := getRVPart(word) + + if RVpos == -1 { + return word + } + + + R1pos := getRNPart(word, 0) + R2pos := getRNPart(word, R1pos) + if (R2pos < RVpos) { + R2pos = 0 + } else { + R2pos -= RVpos + } + + suffix := string([]rune(word)[RVpos:]) + prefix := string([]rune(word)[:RVpos]) + + // Step 1 + suffix, isTrimmed := trimSuffix(suffix, perfectiveGerund[1], perfectiveGerund[0]) + if !isTrimmed { + suffix, isTrimmed = trimSuffix(suffix, reflexive, nil) + suffix, isTrimmed = trimAdjectival(suffix) + if !isTrimmed { + suffix, isTrimmed = trimSuffix(suffix, verb[1], verb[0]) + if !isTrimmed { + suffix, _ = trimSuffix(suffix, noun, nil) + } + } + } + + // Step 2 + suffix = strings.TrimSuffix(suffix, `и`) + + // Step 3 + if R2pos < len([]rune(suffix)) { + R2suffix := string([]rune(suffix)[R2pos:]) + R2prefix := string([]rune(suffix)[:R2pos]) + R2suffix, _ = trimSuffix(R2suffix, derivational, nil) + suffix = R2prefix + R2suffix + } + + // Step 4 + suffix, isTrimmed = trimNN(suffix) + if !isTrimmed { + suffix, isTrimmed = trimSuffix(suffix, superlative, nil) + if isTrimmed { + suffix, _ = trimNN(suffix) + } else { + suffix = strings.TrimSuffix(suffix, `ь`) + } + } + + return prefix + suffix +} + +func trimNN(word string) (string, bool) { + if strings.HasSuffix(word, `нн`) { + return strings.TrimSuffix(word, `нн`) + `н`, true + } + + return word, false +} + +func trimAdjectival(word string) (string, bool) { + isTrimmedParticiple := false + word, isTrimmedAdjective := trimSuffix(word, adjective, nil) + if isTrimmedAdjective { + word, isTrimmedParticiple = trimSuffix(word, participle[1], participle[0]) + } + + return word, isTrimmedAdjective || isTrimmedParticiple +} + +func trimSuffix(word string, suffixes []string, suffixes2 []string) (string, bool) { + for _, suffix := range suffixes { + if strings.HasSuffix(word, suffix) { + return strings.TrimSuffix(word, suffix), true + } + } + if suffixes2 != nil { + for _, suffix := range suffixes2 { + if strings.HasSuffix(word, `а`+suffix) || + strings.HasSuffix(word, `я`+suffix) { + return strings.TrimSuffix(word, suffix), true + } + } + } + + return word, false +} + +func isVowel(char rune) bool { + return strings.Contains(vowels, string(char)) +} + +func getRVPart(word string) int { + chars := []rune(word) + for idx, char := range chars { + if isVowel(char) { + return idx + 1 + } + } + + return -1 +} + +func getRNPart(word string, startPos int) int { + chars := []rune(word)[startPos:] + for idx, char := range chars { + if idx+2 < len(chars) { + if isVowel(char) && !isVowel(chars[idx+1]) { + return startPos + idx + 2 + } + } + } + + return startPos +} diff --git a/stemmer_test.go b/stemmer_test.go new file mode 100644 index 0000000..8b27774 --- /dev/null +++ b/stemmer_test.go @@ -0,0 +1,27 @@ +package StemmerRu + +import ( + "testing" + "io/ioutil" + "encoding/json" +) + +var testFile = `tests.json` + +func TestStemWord(t *testing.T) { + file, err := ioutil.ReadFile(testFile) + if err != nil { + t.Error("Can't open file", testFile) + } + tests := &map[string]string{} + err = json.Unmarshal(file, tests) + if err != nil { + t.Error("Can't parse json", err) + } + for source, expected := range *tests { + result := StemWord(source); + if expected != result { + t.Errorf(`Expected "%s" (source: %s) but got "%s"`, result, source, result) + } + } +} -- cgit v1.2.3