Parcourir la source

update test case in web module

reghao il y a 1 an
Parent
commit
693f7c0e27

+ 0 - 177
web/src/test/java/cn/reghao/bnt/web/parser/BiliCommentSpiderTest.java

@@ -1,177 +0,0 @@
-package cn.reghao.bnt.web.parser;
-
-import cn.reghao.jutil.jdk.http.WebRequest;
-import cn.reghao.jutil.jdk.http.WebResponse;
-import cn.reghao.jutil.jdk.serializer.JsonConverter;
-import cn.reghao.jutil.jdk.text.TextFile;
-import cn.reghao.jutil.tool.http.DefaultWebRequest;
-import cn.reghao.bnt.browser.chrome.ReqMatcher;
-import cn.reghao.bnt.spider.url.BodyDataType;
-import cn.reghao.bnt.spider.url.Site;
-import cn.reghao.bnt.web.parser.db.mongo.UnparsedDataMongo;
-import cn.reghao.bnt.web.parser.db.mongo.UrlResourceMongo;
-import cn.reghao.bnt.web.parser.model.po.UnparsedData;
-import cn.reghao.bnt.web.parser.model.po.UrlResource;
-import cn.reghao.bnt.web.parser.site.bilibili.BiliCommentDataParser;
-import cn.reghao.bnt.web.parser.site.bilibili.db.mongo.BiliVideoMongo;
-import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliComment;
-import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliVideo;
-import cn.reghao.bnt.web.parser.task.DataProducer;
-import com.google.gson.JsonObject;
-import lombok.extern.slf4j.Slf4j;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.boot.SpringApplication;
-import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.test.context.ActiveProfiles;
-import org.springframework.test.context.junit4.SpringRunner;
-
-import java.util.*;
-
-/**
- * @author reghao
- * @date 2021-03-20 18:02:38
- */
-@Slf4j
-@ActiveProfiles("dev")
-@SpringBootTest(classes = SpringApplication.class)
-@RunWith(SpringRunner.class)
-public class BiliCommentSpiderTest {
-    //*****************************************************************************************************************
-    // 视频评论
-    //*****************************************************************************************************************
-    @Autowired
-    BiliCommentDataParser biliCommentDataParser;
-    @Test
-    public void biliCommentTest() {
-        Set<ReqMatcher> set = new HashSet<>();
-        String pattern1 = "reply/wbi/main";
-        set.add(new ReqMatcher(Site.bilibili, pattern1, BodyDataType.html, biliCommentDataParser));
-
-        //String pattern2 = "x/v2/reply/reply";
-        //set.add(new ReqMatcher(Site.bilibili, pattern2, BodyDataType.html, biliCommentDataParser));
-
-        /*String url = "https://www.bilibili.com/video/BV1bA4m1c7Vm/";
-        AbstractChrome chrome = new ChromeBrowser(false, false);
-        chrome.getAndHandleDynamicPage(url, set);*/
-
-        getBiliComment();
-    }
-
-    @Autowired
-    UrlResourceMongo urlResourceMongo;
-    public void getBiliComment() {
-        WebRequest webRequest = biliWebRequest();
-
-        String site = Site.bilibili.name();
-        String parser = BiliCommentDataParser.class.getSimpleName();
-        List<UrlResource> list = urlResourceMongo.findNotCrawled(site, parser, 1000);
-        for (UrlResource urlResource : list) {
-            try {
-                String url = urlResource.getUrl();
-                WebResponse webResponse = webRequest.get(url);
-                int statusCode = webResponse.getStatusCode();
-                String body = webResponse.getBody();
-                if (statusCode != 200 || body == null) {
-                    log.info("请求 {} 失败", url);
-                    continue;
-                }
-
-                Map<String, Object> map = biliCommentDataParser.parse(url, body);
-                List<BiliComment> biliComment = (List<BiliComment>) map.get("biliComments");
-                if (biliComment != null) {
-                    urlResourceMongo.updateCrawledTime(urlResource, System.currentTimeMillis());
-                } else {
-                    log.info("解析 {} 失败", url);
-                }
-
-                Thread.sleep(1_000);
-            } catch (Exception e) {
-                e.printStackTrace();
-            }
-        }
-    }
-
-    TextFile textFile = new TextFile();
-    private WebRequest biliWebRequest() {
-        String cookie = textFile.readFile("/home/reghao/Downloads/cookie.txt");
-        if (cookie.isBlank()) {
-            return null;
-        }
-
-        String domain = ".bilibili.com";
-        WebRequest webRequest = new DefaultWebRequest(cookie, domain);
-        String url = "https://api.bilibili.com/x/member/web/account";
-        WebResponse webResponse = webRequest.get(url);
-        int statusCode = webResponse.getStatusCode();
-        String body = webResponse.getBody();
-        if (statusCode != 200 || body == null) {
-            log.error("用户认证失败");
-            return null;
-        }
-
-        JsonObject jsonObject = JsonConverter.jsonToJsonElement(body).getAsJsonObject();
-        int code = jsonObject.get("code").getAsInt();
-        String msg = jsonObject.get("message").getAsString();
-        if (code != 0) {
-            log.error("用户认证失败");
-            return null;
-        }
-
-        JsonObject jsonObject1 = jsonObject.get("data").getAsJsonObject();
-        long mid = jsonObject1.get("mid").getAsLong();
-        String username = jsonObject1.get("uname").getAsString();
-        String userId = jsonObject1.get("userid").getAsString();
-        return webRequest;
-    }
-
-    @Autowired
-    BiliVideoMongo biliVideoMongo;
-    @Autowired
-    DataProducer dataProducer;
-    @Test
-    public void feedTest() throws InterruptedException {
-        int size = 10_000;
-        List<BiliVideo> list = biliVideoMongo.findByNotFeedVideo(size);
-        for (BiliVideo biliVideo : list) {
-            try {
-                dataProducer.put(biliVideo);
-            } catch (InterruptedException e) {
-                e.printStackTrace();
-            }
-        }
-
-        Thread.sleep(3600*24*7);
-    }
-
-    @Autowired
-    UnparsedDataMongo unparsedDataMongo;
-    @Test
-    public void parseTest() {
-        String site = Site.bilibili.name();
-        String parser = "BiliCommentDataParser";
-        List<UnparsedData> list = unparsedDataMongo.findNotParsed(site, parser, 10_000);
-        int i = 1;
-        for (UnparsedData unparsedData : list) {
-            parserBiliComment(unparsedData);
-            log.info("处理完第 {} 文档", i++);
-        }
-    }
-
-    List<UnparsedData> unparsedDataList = new ArrayList<>();
-    private void parserBiliComment(UnparsedData unparsedData) {
-        String url = unparsedData.getUrl();
-        String body = unparsedData.getData();
-        try {
-            Map<String, Object> result = biliCommentDataParser.parse(url, body);
-            unparsedData.setParsed(1);
-        } catch (Exception e) {
-            e.printStackTrace();
-            unparsedData.setParsed(2);
-        } finally {
-            //unparsedDataMongo.update(unparsedData);
-            //unparsedDataList.add(unparsedData);
-        }
-    }
-}

+ 108 - 0
web/src/test/java/cn/reghao/bnt/web/parser/BiliSpiderTest.java

@@ -1,6 +1,12 @@
 package cn.reghao.bnt.web.parser;
 
 import cn.reghao.bnt.web.WebApplication;
+import cn.reghao.bnt.web.parser.db.mongo.UnparsedDataMongo;
+import cn.reghao.bnt.web.parser.model.po.UnparsedData;
+import cn.reghao.bnt.web.parser.site.bilibili.BiliCommentDataParser;
+import cn.reghao.bnt.web.parser.site.bilibili.db.mongo.BiliVideoMongo;
+import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliComment;
+import cn.reghao.bnt.web.parser.task.DataProducer;
 import cn.reghao.jutil.jdk.http.WebRequest;
 import cn.reghao.jutil.jdk.http.WebResponse;
 import cn.reghao.jutil.jdk.serializer.JsonConverter;
@@ -289,4 +295,106 @@ public class BiliSpiderTest {
         AbstractChrome chrome = new ChromeBrowser(false, false);
         chrome.getAndHandleDynamicPage(url, set);
     }
+
+    //*****************************************************************************************************************
+    // 视频评论
+    //*****************************************************************************************************************
+    @Autowired
+    BiliCommentDataParser biliCommentDataParser;
+    @Test
+    public void biliCommentTest() {
+        Set<ReqMatcher> set = new HashSet<>();
+        String pattern1 = "reply/wbi/main";
+        set.add(new ReqMatcher(Site.bilibili, pattern1, BodyDataType.html, biliCommentDataParser));
+
+        //String pattern2 = "x/v2/reply/reply";
+        //set.add(new ReqMatcher(Site.bilibili, pattern2, BodyDataType.html, biliCommentDataParser));
+
+        /*String url = "https://www.bilibili.com/video/BV1bA4m1c7Vm/";
+        AbstractChrome chrome = new ChromeBrowser(false, false);
+        chrome.getAndHandleDynamicPage(url, set);*/
+
+        getBiliComment();
+    }
+
+    public void getBiliComment() {
+        WebRequest webRequest = biliWebRequest();
+
+        String site = Site.bilibili.name();
+        String parser = BiliCommentDataParser.class.getSimpleName();
+        List<UrlResource> list = urlResourceMongo.findNotCrawled(site, parser, 1000);
+        for (UrlResource urlResource : list) {
+            try {
+                String url = urlResource.getUrl();
+                WebResponse webResponse = webRequest.get(url);
+                int statusCode = webResponse.getStatusCode();
+                String body = webResponse.getBody();
+                if (statusCode != 200 || body == null) {
+                    log.info("请求 {} 失败", url);
+                    continue;
+                }
+
+                Map<String, Object> map = biliCommentDataParser.parse(url, body);
+                List<BiliComment> biliComment = (List<BiliComment>) map.get("biliComments");
+                if (biliComment != null) {
+                    urlResourceMongo.updateCrawledTime(urlResource, System.currentTimeMillis());
+                } else {
+                    log.info("解析 {} 失败", url);
+                }
+
+                Thread.sleep(1_000);
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+    @Autowired
+    BiliVideoMongo biliVideoMongo;
+    @Autowired
+    DataProducer dataProducer;
+    @Test
+    public void feedTest() throws InterruptedException {
+        int size = 10_000;
+        List<BiliVideo> list = biliVideoMongo.findByNotFeedVideo(size);
+        for (BiliVideo biliVideo : list) {
+            try {
+                dataProducer.put(biliVideo);
+            } catch (InterruptedException e) {
+                e.printStackTrace();
+            }
+        }
+
+        Thread.sleep(3600*24*7);
+    }
+
+    @Autowired
+    UnparsedDataMongo unparsedDataMongo;
+    @Test
+    public void parseTest() {
+        String site = Site.bilibili.name();
+        String parser = "BiliCommentDataParser";
+        List<UnparsedData> list = unparsedDataMongo.findNotParsed(site, parser, 10_000);
+        int i = 1;
+        for (UnparsedData unparsedData : list) {
+            parserBiliComment(unparsedData);
+            log.info("处理完第 {} 文档", i++);
+        }
+    }
+
+    List<UnparsedData> unparsedDataList = new ArrayList<>();
+    private void parserBiliComment(UnparsedData unparsedData) {
+        String url = unparsedData.getUrl();
+        String body = unparsedData.getData();
+        try {
+            Map<String, Object> result = biliCommentDataParser.parse(url, body);
+            unparsedData.setParsed(1);
+        } catch (Exception e) {
+            e.printStackTrace();
+            unparsedData.setParsed(2);
+        } finally {
+            //unparsedDataMongo.update(unparsedData);
+            //unparsedDataList.add(unparsedData);
+        }
+    }
 }