package main import ( "encoding/json" "fmt" "log" "os" "strconv" "strings" "sync" "github.com/antchfx/htmlquery" ) func main() { all := []quoteElem{} from := 3472 wg := sync.WaitGroup{} for i := from; i >= 1; i-- { wg.Add(1) go func(i int) { defer wg.Done() quotes, err := parsePage(i) if err != nil { log.Println(err) return } all = append(all, quotes...) }(i) } wg.Wait() b, err := json.Marshal(all) if err != nil { panic(err) } if err := os.WriteFile("db/quotes.json", b, os.ModePerm); err != nil { panic(err) } log.Println("ok") } func parsePage(num int) ([]quoteElem, error) { doc, err := htmlquery.LoadURL(fmt.Sprintf("https://xn--80abh7bk0c.xn--p1ai/index/%d", num)) if err != nil { return nil, err } quotes := []quoteElem{} quotesList, err := htmlquery.QueryAll(doc, "/html/body/div[1]/main/section/article") if err != nil { return nil, err } for _, quote := range quotesList { header, err := htmlquery.Query(quote, "/div/header/a") if err != nil { return nil, err } if header == nil { break } num, _ := strconv.Atoi(header.FirstChild.Data[1:]) date, err := htmlquery.Query(quote, "/div/header/div") dates := "" if err != nil { return nil, err } if date != nil { dates = date.FirstChild.Data dates = strings.Trim(strings.ReplaceAll(dates, "\\n", ""), " ") } body := htmlquery.FindOne(quote, "/div/div").FirstChild text := []string{} for { if body.DataAtom == 0 { text = append(text, body.Data) } body = body.NextSibling if body == nil { break } } quotes = append(quotes, quoteElem{ Body: strings.Trim(strings.Join(text, "\n"), " \n\t"), Num: num, Date: dates, }) quote = quote.NextSibling if quote == nil { break } } return quotes, nil } type quoteElem struct { Num int `json:"num"` Body string `json:"body"` Date string `json:"date"` }