Compatibility with https://github.com/caneroj1/stemmer libraryHEAD master

author: Alexander Kiryukhin <a.kiryukhin@mail.ru> 2019-05-28 13:24:40 +0300
committer: Alexander Kiryukhin <a.kiryukhin@mail.ru> 2019-05-28 13:24:40 +0300
commit: 5c7748b15e2fcc616c89492b85f367750e327d79 (patch)
tree: ec477f84fef127c42c3df0db55ab9792a74d4c99 /stemmer.go
parent: 0911847693eac96cce52f18a6592a93e50464cf2 (diff)
1 files changed, 55 insertions, 3 deletions
diff --git a/stemmer.go b/stemmer.go
index f15281b..80dc592 100644
--- a/stemmer.go
+++ b/stemmer.go
@@ -1,6 +1,7 @@
 package StemmerRu
 
 import (
+	"runtime"
 	"strings"
 )
 
@@ -17,7 +18,7 @@ var (
 	vowels = `аеиоуыэюя`
 )
 
-func StemWord(word string) string {
+func Stem(word string) string {
 
 	word = strings.Replace(word, `ё`, `е`, -1)
 
@@ -27,10 +28,9 @@ func StemWord(word string) string {
 		return word
 	}
 
-
 	R1pos := getRNPart(word, 0)
 	R2pos := getRNPart(word, R1pos)
-	if (R2pos < RVpos) {
+	if R2pos < RVpos {
 		R2pos = 0
 	} else {
 		R2pos -= RVpos
@@ -140,3 +140,55 @@ func getRNPart(word string, startPos int) int {
 
 	return startPos
 }
+
+// Code from https://github.com/caneroj1/stemmer
+
+// StemMultiple accepts a slice of strings and stems each of them.
+func StemMultiple(words []string) (output []string) {
+	output = make([]string, len(words))
+	for idx, word := range words {
+		output[idx] = Stem(word)
+	}
+
+	return
+}
+
+// StemMultipleMutate accepts a pointer to a slice of strings and stems them in place.
+// It modifies the original slice.
+func StemMultipleMutate(words *[]string) {
+	for idx, word := range *words {
+		(*words)[idx] = Stem(word)
+	}
+}
+
+// StemConcurrent accepts a pointer to a slice of strings and stems them in place.
+// It tries to offload the work into multiple threads. It makes no guarantees about
+// the order of the stems in the modified slice.
+func StemConcurrent(words *[]string) {
+	CPUs := runtime.NumCPU()
+	length := len(*words)
+	output := make(chan string)
+	partition := length / CPUs
+
+	var CPU int
+	for CPU = 0; CPU < CPUs; CPU++ {
+		go func(strs []string) {
+			for _, word := range strs {
+				output <- Stem(word)
+			}
+		}((*words)[CPU*partition : (CPU+1)*partition])
+	}
+
+	// if there are leftover words, stem them now
+	if length-(CPU)*partition > 0 {
+		go func(strs []string) {
+			for _, word := range strs {
+				output <- Stem(word)
+			}
+		}((*words)[(CPU)*partition : length])
+	}
+
+	for idx := 0; idx < length; idx++ {
+		(*words)[idx] = <-output
+	}
+}
author	Alexander Kiryukhin <a.kiryukhin@mail.ru>	2019-05-28 13:24:40 +0300
committer	Alexander Kiryukhin <a.kiryukhin@mail.ru>	2019-05-28 13:24:40 +0300
commit	5c7748b15e2fcc616c89492b85f367750e327d79 (patch)
tree	ec477f84fef127c42c3df0db55ab9792a74d4c99 /stemmer.go
parent	0911847693eac96cce52f18a6592a93e50464cf2 (diff)