package main
import ( "context" "fmt" "log" "os" "strconv" "strings" "sync" "time"
"github.com/chromedp/cdproto/cdp" "github.com/chromedp/chromedp" "github.com/chromedp/chromedp/kb" )
const ( qqID string = "2498742177" savePath string = "F:/Temp" docRange int = 50 )
var ( ctx context.Context wg sync.WaitGroup )
func main() { url := "https://user.qzone.qq.com/" + qqID + "/311" fmt.Println("please log in") visit(url) deal(254, 999) wg.Wait() }
func deal(start int, end int) {
pageNum := ini_load()
if start < 1 { start = 1 } if end > pageNum || end < start { end = pageNum }
textChan := make(chan string, 100000) wg.Add(1) go output(textChan)
turnToPage(start)
for i := start; i <= end; i++ { load() read(textChan) time.Sleep(1 * time.Second) nextPage()
if i == end { fmt.Println("all pages have been read. ") textChan <- "end" } } }
func visit(url string) { err := chromedp.Run(ctx, chromedp.Navigate(url), chromedp.WaitVisible(`#app_canvas_frame`, chromedp.ByID)) check(err) }
func ini_load() int { time.Sleep(2 * time.Second) i := 0 for { i++ var str string err := chromedp.Run(ctx, chromedp.Evaluate("window.scrollBy(0,1000)", nil), chromedp.Sleep(time.Second), chromedp.TextContent(`.//a[@title="末页"]`, &str)) check(err) if len(str) != 0 { fmt.Println("total page: " + str) fmt.Println("initializing completed") pageNum, _ := strconv.Atoi(str) return pageNum } else { if i > 10 { err = fmt.Errorf("scroll error: can't find pageEnd") break } } } return -1 }
func load() { time.Sleep(2 * time.Second) i := 0 for { i++ var str string err := chromedp.Run(ctx, chromedp.Evaluate("window.scrollBy(0,1000)", nil), chromedp.Sleep(time.Second), chromedp.TextContent(`.//p[@class="mod_pagenav_main"]/span[@class="current"]/span`, &str)) check(err) if len(str) != 0 { fmt.Println("* current page: " + str) fmt.Println("loading completed") break } else { if i > 10 { err = fmt.Errorf("scroll error: can't find pageEnd") break } } } }
func read(textChan chan string) { fmt.Println("reading...")
var nodes []*cdp.Node
err := chromedp.Run(ctx, chromedp.Sleep(time.Second), chromedp.Nodes(`.//div[@class='box bgr3']`, &nodes), )
check(err)
fmt.Println("total: ", len(nodes))
for _, node := range nodes {
path := node.FullXPath() path = path[strings.Index(path, "//")+2:]
var text string var share string var date string
textPath := path + "/div[2]/pre" sharePath := path + "/div[3]" datePath := path + "/div[4]/div/span/a"
err = chromedp.Run(ctx, chromedp.TextContent(textPath, &text), chromedp.AttributeValue(sharePath, "class", &share, nil), chromedp.AttributeValue(datePath, "title", &date, nil), ) check(err)
wg.Add(1) go func() { defer wg.Done() if text != "" && share == "md" { textChan <- "<!-- node " + date + "-->\n\n" + text + "\n\n" } }()
} }
func turnToPage(i int) { err := chromedp.Run(ctx, chromedp.SendKeys(`.//span[@class="mod_pagenav_turn"]/input`, strconv.Itoa(i)+kb.Enter), ) check(err) }
func nextPage() { err := chromedp.Run(ctx, chromedp.Click(`//a[@title='下一页']`), ) check(err) }
func output(textChan chan string) { defer wg.Done()
doc := "" k := 0 for i := 0; ; { item, _ := <-textChan if item == "end" { k++ wg.Add(1) go toFile(doc, &k) break } if i < 49 { i++ doc = doc + item } else { k++ wg.Add(1) go toFile(doc, &k) doc = "" i = 0 } } }
func toFile(doc string, k *int) { defer wg.Done()
var fileSavePath string
for { fileSavePath = savePath + "/qzone(" + strconv.Itoa(*k) + ").md" _, err := os.Stat(fileSavePath) if err != nil { break } else { *k++ } }
doc = "---\ntitle: qzone(" + strconv.Itoa(*k) + ")\nlayout: wiki\nwiki: dynamic\ntype: dynamic\norder:\n---\n" + "{% timeline %}\n\n" + doc + "{% endtimeline %}"
file, err := os.OpenFile(fileSavePath, os.O_WRONLY|os.O_CREATE, 0644) check(err)
defer file.Close()
file.WriteString(doc)
fmt.Println("- file " + strconv.Itoa(*k) + " has been written. ") }
func init() { headlessFlag := chromedp.Flag("headless", false) opts := append( chromedp.DefaultExecAllocatorOptions[:], chromedp.NoDefaultBrowserCheck, headlessFlag, chromedp.IgnoreCertErrors, chromedp.Flag("blink-settings", "imagesEnabled=false"), chromedp.DisableGPU, chromedp.NoSandbox, chromedp.NoFirstRun, chromedp.Flag("disable-web-security", true), chromedp.Flag("disable-extensions", true), chromedp.Flag("disable-default-apps", true), chromedp.WindowSize(1280, 1024), chromedp.Flag("run-all-compositor-stages-before-draw", true), chromedp.UserAgent(`Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36`), )
allocCtx, _ := chromedp.NewExecAllocator(context.Background(), opts...) ctx, _ = chromedp.NewContext( allocCtx, chromedp.WithLogf(log.Printf), ) chromedp.Run(ctx, make([]chromedp.Action, 0, 1)...) }
func check(err error) { if err != nil { log.Fatal(err) } }
|