Skip to content

Commit

Permalink
实现搜索页视频列表视频爬虫
Browse files Browse the repository at this point in the history
  • Loading branch information
cnbattle committed Jun 20, 2021
1 parent 48426ed commit 22cb078
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 7 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# 抖音推荐列表视频爬虫方案
# 抖音推荐/搜索页视频列表视频爬虫方案

> 最近测试可用时间:`2021.05.29`
> 最近测试可用时间:`2021.06.20`
> adb暂未实现进入搜索页的操作,请根据自身技术栈实现相关点击操作及键入关键词等
> 老版本请切换到`old`分支查看,`old`分支使用anyproxy抓取,更适合大多数人使用
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.16

require (
github.com/ouqiang/goproxy v1.1.0
github.com/spf13/viper v1.7.1
github.com/spf13/viper v1.8.0
gorm.io/driver/sqlite v1.1.4
gorm.io/gorm v1.21.4
gorm.io/gorm v1.21.11
)
1 change: 1 addition & 0 deletions internal/core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ func HandleJson(data model.Data) {
for _, item := range data.AwemeList {
// 判断是否是广告 点赞数是否大于设定值
if item.IsAds == true || item.Statistics.DiggCount < config.V.GetInt("smallLike") {
log.Println("数据:", item.Desc, "continue")
continue
}
log.Println("开始处理数据:", item.Desc)
Expand Down
40 changes: 38 additions & 2 deletions internal/proxy/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,44 @@ func (e *EventHandler) BeforeResponse(ctx *goproxy.Context, resp *http.Response,
if err != nil {
return
}
// 处理
if strings.EqualFold(ctx.Req.URL.Path, "/aweme/v1/feed/") {
// /aweme/v1/general/search/single/ 综合搜索
// /aweme/v1/search/item/ 视频
//if strings.EqualFold(ctx.Req.URL.Path, "/aweme/v1/general/search/single/") {
// response, err := ioutil.ReadAll(resp.Body)
// if err != nil {
// log.Println(err)
// return
// }
// // gzip
// body, err := utils.ParseGzip(response)
// if err != nil {
// log.Println(err)
// return
// }
// var filename = "./single.json"
// var f *os.File
// /***************************** 第一种方式: 使用 io.WriteString 写入文件 ***********************************************/
// if utils.CheckFileIsExist(filename) { //如果文件存在
// f, _ = os.OpenFile(filename, os.O_APPEND, 0666) //打开文件
// fmt.Println("文件存在")
// } else {
// f, _ = os.Create(filename) //创建文件
// fmt.Println("文件不存在")
// }
// _, _ = io.WriteString(f, string(body)) //写入文件(字符串)
// //var data model.Data
// //err = json.Unmarshal(body, &data)
// //if err != nil {
// // log.Println(err)
// // return
// //}
// //go core.HandleJson(data)
// // resp.Body 只能读取一次, 读取后必须再放回去
// resp.Body = ioutil.NopCloser(bytes.NewReader(response))
//}

// 处理 推荐列表接口 搜索页视频列表接口
if strings.EqualFold(ctx.Req.URL.Path, "/aweme/v1/feed/") || strings.EqualFold(ctx.Req.URL.Path, "/aweme/v1/search/item/") {
response, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Println(err)
Expand Down
3 changes: 2 additions & 1 deletion internal/proxy/proxy.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package proxy

import (
"github.com/cnbattle/douyin/internal/config"
"log"
"net/http"
"time"

"github.com/cnbattle/douyin/internal/config"

"github.com/ouqiang/goproxy"
)

Expand Down
9 changes: 9 additions & 0 deletions internal/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"encoding/binary"
"encoding/hex"
"io/ioutil"
"os"
)

// Md5 字符串加密
Expand All @@ -16,6 +17,14 @@ func Md5(str string) string {
return hex.EncodeToString(h.Sum(nil))
}

func CheckFileIsExist(filename string) bool {
var exist = true
if _, err := os.Stat(filename); os.IsNotExist(err) {
exist = false
}
return exist
}

// ParseGzip gzip 解压
func ParseGzip(data []byte) ([]byte, error) {
b := new(bytes.Buffer)
Expand Down

0 comments on commit 22cb078

Please sign in to comment.