diff options
author | NeonXP <i@neonxp.dev> | 2023-01-04 18:52:25 +0300 |
---|---|---|
committer | NeonXP <i@neonxp.dev> | 2023-01-04 18:52:25 +0300 |
commit | 5947c1d643dfc077c19d3b4c01e599578e1dfe62 (patch) | |
tree | 6723c5982626403a507ed25c74711ae74eccbe23 /cmd/bash/main.go |
Diffstat (limited to 'cmd/bash/main.go')
-rw-r--r-- | cmd/bash/main.go | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/cmd/bash/main.go b/cmd/bash/main.go new file mode 100644 index 0000000..8ecf2e7 --- /dev/null +++ b/cmd/bash/main.go @@ -0,0 +1,98 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "os" + "strconv" + "strings" + "sync" + + "github.com/antchfx/htmlquery" +) + +func main() { + all := []quoteElem{} + from := 3472 + wg := sync.WaitGroup{} + for i := from; i >= 1; i-- { + wg.Add(1) + go func(i int) { + defer wg.Done() + quotes, err := parsePage(i) + if err != nil { + log.Println(err) + return + } + all = append(all, quotes...) + }(i) + } + wg.Wait() + b, err := json.Marshal(all) + if err != nil { + panic(err) + } + if err := os.WriteFile("db/quotes.json", b, os.ModePerm); err != nil { + panic(err) + } + log.Println("ok") +} + +func parsePage(num int) ([]quoteElem, error) { + doc, err := htmlquery.LoadURL(fmt.Sprintf("https://xn--80abh7bk0c.xn--p1ai/index/%d", num)) + if err != nil { + return nil, err + } + quotes := []quoteElem{} + quotesList, err := htmlquery.QueryAll(doc, "/html/body/div[1]/main/section/article") + if err != nil { + return nil, err + } + for _, quote := range quotesList { + header, err := htmlquery.Query(quote, "/div/header/a") + if err != nil { + return nil, err + } + if header == nil { + break + } + num, _ := strconv.Atoi(header.FirstChild.Data[1:]) + date, err := htmlquery.Query(quote, "/div/header/div") + dates := "" + if err != nil { + return nil, err + } + if date != nil { + dates = date.FirstChild.Data + dates = strings.Trim(strings.ReplaceAll(dates, "\\n", ""), " ") + } + body := htmlquery.FindOne(quote, "/div/div").FirstChild + text := []string{} + for { + if body.DataAtom == 0 { + text = append(text, body.Data) + } + body = body.NextSibling + if body == nil { + break + } + } + quotes = append(quotes, quoteElem{ + Body: strings.Trim(strings.Join(text, "\n"), " \n\t"), + Num: num, + Date: dates, + }) + quote = quote.NextSibling + if quote == nil { + break + } + } + return quotes, nil +} + +type quoteElem struct { + Num int `json:"num"` + Body string `json:"body"` + Date string `json:"date"` +} |