summaryrefslogtreecommitdiff
path: root/cmd/bash
diff options
context:
space:
mode:
authorNeonXP <i@neonxp.dev>2023-01-04 18:52:25 +0300
committerNeonXP <i@neonxp.dev>2023-01-04 18:52:25 +0300
commit5947c1d643dfc077c19d3b4c01e599578e1dfe62 (patch)
tree6723c5982626403a507ed25c74711ae74eccbe23 /cmd/bash
Diffstat (limited to 'cmd/bash')
-rw-r--r--cmd/bash/main.go98
1 files changed, 98 insertions, 0 deletions
diff --git a/cmd/bash/main.go b/cmd/bash/main.go
new file mode 100644
index 0000000..8ecf2e7
--- /dev/null
+++ b/cmd/bash/main.go
@@ -0,0 +1,98 @@
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "log"
+ "os"
+ "strconv"
+ "strings"
+ "sync"
+
+ "github.com/antchfx/htmlquery"
+)
+
+func main() {
+ all := []quoteElem{}
+ from := 3472
+ wg := sync.WaitGroup{}
+ for i := from; i >= 1; i-- {
+ wg.Add(1)
+ go func(i int) {
+ defer wg.Done()
+ quotes, err := parsePage(i)
+ if err != nil {
+ log.Println(err)
+ return
+ }
+ all = append(all, quotes...)
+ }(i)
+ }
+ wg.Wait()
+ b, err := json.Marshal(all)
+ if err != nil {
+ panic(err)
+ }
+ if err := os.WriteFile("db/quotes.json", b, os.ModePerm); err != nil {
+ panic(err)
+ }
+ log.Println("ok")
+}
+
+func parsePage(num int) ([]quoteElem, error) {
+ doc, err := htmlquery.LoadURL(fmt.Sprintf("https://xn--80abh7bk0c.xn--p1ai/index/%d", num))
+ if err != nil {
+ return nil, err
+ }
+ quotes := []quoteElem{}
+ quotesList, err := htmlquery.QueryAll(doc, "/html/body/div[1]/main/section/article")
+ if err != nil {
+ return nil, err
+ }
+ for _, quote := range quotesList {
+ header, err := htmlquery.Query(quote, "/div/header/a")
+ if err != nil {
+ return nil, err
+ }
+ if header == nil {
+ break
+ }
+ num, _ := strconv.Atoi(header.FirstChild.Data[1:])
+ date, err := htmlquery.Query(quote, "/div/header/div")
+ dates := ""
+ if err != nil {
+ return nil, err
+ }
+ if date != nil {
+ dates = date.FirstChild.Data
+ dates = strings.Trim(strings.ReplaceAll(dates, "\\n", ""), " ")
+ }
+ body := htmlquery.FindOne(quote, "/div/div").FirstChild
+ text := []string{}
+ for {
+ if body.DataAtom == 0 {
+ text = append(text, body.Data)
+ }
+ body = body.NextSibling
+ if body == nil {
+ break
+ }
+ }
+ quotes = append(quotes, quoteElem{
+ Body: strings.Trim(strings.Join(text, "\n"), " \n\t"),
+ Num: num,
+ Date: dates,
+ })
+ quote = quote.NextSibling
+ if quote == nil {
+ break
+ }
+ }
+ return quotes, nil
+}
+
+type quoteElem struct {
+ Num int `json:"num"`
+ Body string `json:"body"`
+ Date string `json:"date"`
+}