Просмотр исходного кода

将 webspider1 项目的 79e8a4c3 版本放到 bnt 项目

reghao 1 год назад
Родитель
Сommit
ce0d5cc2f5
100 измененных файлов с 8756 добавлено и 0 удалено
  1. 51 0
      browser/dependency-reduced-pom.xml
  2. 65 0
      browser/pom.xml
  3. 44 0
      browser/src/main/java/cn/reghao/bnt/browser/SpiderBrowser.java
  4. 11 0
      browser/src/main/java/cn/reghao/bnt/browser/cdp/handler/CdpHandler.java
  5. 103 0
      browser/src/main/java/cn/reghao/bnt/browser/cdp/handler/event/NetworkEventHandler.java
  6. 130 0
      browser/src/main/java/cn/reghao/bnt/browser/cdp/handler/result/GetResponseBodyResultHandler.java
  7. 22 0
      browser/src/main/java/cn/reghao/bnt/browser/cdp/handler/result/PageNavigateResultHandler.java
  8. 44 0
      browser/src/main/java/cn/reghao/bnt/browser/cdp/msg/CdpMethodResultMessage.java
  9. 26 0
      browser/src/main/java/cn/reghao/bnt/browser/cdp/msg/CdpNetworkEventMessage.java
  10. 26 0
      browser/src/main/java/cn/reghao/bnt/browser/cdp/req/CdpReq.java
  11. 24 0
      browser/src/main/java/cn/reghao/bnt/browser/cdp/req/NetworkEnable.java
  12. 26 0
      browser/src/main/java/cn/reghao/bnt/browser/cdp/req/NetworkGetCookies.java
  13. 22 0
      browser/src/main/java/cn/reghao/bnt/browser/cdp/req/NetworkGetResponseBody.java
  14. 23 0
      browser/src/main/java/cn/reghao/bnt/browser/cdp/req/PageNavigate.java
  15. 26 0
      browser/src/main/java/cn/reghao/bnt/browser/cdp/req/PageSetDownloadBehavior.java
  16. 21 0
      browser/src/main/java/cn/reghao/bnt/browser/cdp/req/RemoteTarget.java
  17. 23 0
      browser/src/main/java/cn/reghao/bnt/browser/cdp/req/TargetCreateTarget.java
  18. 298 0
      browser/src/main/java/cn/reghao/bnt/browser/chrome/AbstractChrome.java
  19. 71 0
      browser/src/main/java/cn/reghao/bnt/browser/chrome/ChromeBrowser.java
  20. 79 0
      browser/src/main/java/cn/reghao/bnt/browser/chrome/PageRequest.java
  21. 31 0
      browser/src/main/java/cn/reghao/bnt/browser/chrome/ReqInPage.java
  22. 37 0
      browser/src/main/java/cn/reghao/bnt/browser/chrome/ReqMatcher.java
  23. 52 0
      browser/src/main/java/cn/reghao/bnt/browser/parser/ChromeDataParser.java
  24. 48 0
      browser/src/main/java/cn/reghao/bnt/browser/ws/CdpMessageHandler.java
  25. 9 0
      browser/src/main/java/cn/reghao/bnt/browser/ws/MessageHandler.java
  26. 37 0
      browser/src/main/java/cn/reghao/bnt/browser/ws/WebSocketClient.java
  27. 55 0
      browser/src/main/java/cn/reghao/bnt/browser/ws/WebSocketListenerImpl.java
  28. 14 0
      core/pom.xml
  29. 56 0
      core/src/main/java/cn/reghao/bnt/core/event/EvtCrawl.java
  30. 12 0
      core/src/main/java/cn/reghao/bnt/core/event/EvtNoUrl.java
  31. 18 0
      core/src/main/java/cn/reghao/bnt/core/http/CrawlRequest.java
  32. 25 0
      core/src/main/java/cn/reghao/bnt/core/http/CrawlResponse.java
  33. 109 0
      core/src/main/java/cn/reghao/bnt/core/http/DefaultCrawlRequest.java
  34. 131 0
      core/src/main/java/cn/reghao/bnt/core/http/JdkCrawlRequest.java
  35. 21 0
      core/src/main/java/cn/reghao/bnt/core/parser/DataParser.java
  36. 54 0
      core/src/main/java/cn/reghao/bnt/core/parser/SiteParser.java
  37. 11 0
      core/src/main/java/cn/reghao/bnt/core/url/BodyDataType.java
  38. 148 0
      core/src/main/java/cn/reghao/bnt/core/url/CrawlUrl.java
  39. 73 0
      core/src/main/java/cn/reghao/bnt/core/url/RawData.java
  40. 9 0
      core/src/main/java/cn/reghao/bnt/core/url/Site.java
  41. 23 0
      core/src/main/java/cn/reghao/bnt/core/url/UrlScheduler.java
  42. 7 0
      crawler/Dockerfile
  43. 1 0
      crawler/README.md
  44. 5 0
      crawler/bin/shutdown.sh
  45. 5 0
      crawler/bin/start.sh
  46. 51 0
      crawler/dependency-reduced-pom.xml
  47. 58 0
      crawler/pom.xml
  48. 74 0
      crawler/src/main/java/cn/reghao/bnt/crawler/Crawler.java
  49. 82 0
      crawler/src/main/java/cn/reghao/bnt/crawler/CrawlerContext.java
  50. 21 0
      crawler/src/main/java/cn/reghao/bnt/crawler/SpiderCrawler.java
  51. 115 0
      crawler/src/main/java/cn/reghao/bnt/crawler/impl/HttpUrlScheduler.java
  52. 104 0
      crawler/src/main/java/cn/reghao/bnt/crawler/task/DataProducer.java
  53. 23 0
      crawler/src/main/java/cn/reghao/bnt/crawler/task/SiteParserGetter.java
  54. 3 0
      pom.xml
  55. 82 0
      web/pom.xml
  56. 17 0
      web/src/main/java/cn/reghao/bnt/web/SpringApplication.java
  57. 20 0
      web/src/main/java/cn/reghao/bnt/web/config/AppProperties.java
  58. 64 0
      web/src/main/java/cn/reghao/bnt/web/config/MongoConfig.java
  59. 37 0
      web/src/main/java/cn/reghao/bnt/web/config/OssConsoleClientFactory.java
  60. 2 0
      web/src/main/java/cn/reghao/bnt/web/config/spring/BeansConfig.java
  61. 2442 0
      web/src/main/java/cn/reghao/bnt/web/parser/autogen/BiliDm.java
  62. 102 0
      web/src/main/java/cn/reghao/bnt/web/parser/consumer/RawDataConsumer.java
  63. 54 0
      web/src/main/java/cn/reghao/bnt/web/parser/consumer/RawDataStore.java
  64. 128 0
      web/src/main/java/cn/reghao/bnt/web/parser/consumer/UnparsedDataParser.java
  65. 108 0
      web/src/main/java/cn/reghao/bnt/web/parser/consumer/UnparsedDataStore.java
  66. 26 0
      web/src/main/java/cn/reghao/bnt/web/parser/controller/CrawlerController.java
  67. 31 0
      web/src/main/java/cn/reghao/bnt/web/parser/controller/TaskController.java
  68. 51 0
      web/src/main/java/cn/reghao/bnt/web/parser/controller/UrlController.java
  69. 103 0
      web/src/main/java/cn/reghao/bnt/web/parser/crawler/CrawlerContext.java
  70. 89 0
      web/src/main/java/cn/reghao/bnt/web/parser/db/mongo/DataRecordMongo.java
  71. 290 0
      web/src/main/java/cn/reghao/bnt/web/parser/db/mongo/UnparsedDataMongo.java
  72. 242 0
      web/src/main/java/cn/reghao/bnt/web/parser/db/mongo/UrlResourceMongo.java
  73. 33 0
      web/src/main/java/cn/reghao/bnt/web/parser/model/dto/BiliCategory.java
  74. 14 0
      web/src/main/java/cn/reghao/bnt/web/parser/model/po/DataRecord.java
  75. 52 0
      web/src/main/java/cn/reghao/bnt/web/parser/model/po/UnparsedData.java
  76. 89 0
      web/src/main/java/cn/reghao/bnt/web/parser/model/po/UrlResource.java
  77. 20 0
      web/src/main/java/cn/reghao/bnt/web/parser/model/vo/DataCount.java
  78. 13 0
      web/src/main/java/cn/reghao/bnt/web/parser/model/vo/DayCount.java
  79. 23 0
      web/src/main/java/cn/reghao/bnt/web/parser/model/vo/UnparsedDataStatistics.java
  80. 23 0
      web/src/main/java/cn/reghao/bnt/web/parser/model/vo/UrlResourceStatistics.java
  81. 100 0
      web/src/main/java/cn/reghao/bnt/web/parser/rpc/BiliCache.java
  82. 87 0
      web/src/main/java/cn/reghao/bnt/web/parser/service/StatisticsService.java
  83. 81 0
      web/src/main/java/cn/reghao/bnt/web/parser/service/UrlSchedulerService.java
  84. 234 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/BiliCommentDataParser.java
  85. 46 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/BiliDanmakuDataParser.java
  86. 150 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/BiliPageParser.java
  87. 49 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/BiliRecommendDataParser.java
  88. 214 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/BiliVideoDataParser.java
  89. 35 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/api/BiliCrawlUrl.java
  90. 64 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/api/BiliUrl.java
  91. 150 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/db/mongo/BiliCommentMongo.java
  92. 103 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/db/mongo/BiliDanmakuMongo.java
  93. 109 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/db/mongo/BiliDanmakuUrlMongo.java
  94. 127 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/db/mongo/BiliUserMongo.java
  95. 156 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/db/mongo/BiliVideoMongo.java
  96. 51 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/model/po/BiliComment.java
  97. 45 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/model/po/BiliDanmaku.java
  98. 44 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/model/po/BiliDanmakuUrl.java
  99. 30 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/model/po/BiliUser.java
  100. 69 0
      web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/model/po/BiliVideo.java

+ 51 - 0
browser/dependency-reduced-pom.xml

@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <parent>
+    <artifactId>bnt</artifactId>
+    <groupId>cn.reghao.bnt</groupId>
+    <version>1.0.0</version>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+  <artifactId>browser</artifactId>
+  <version>1.0.0-SNAPSHOT</version>
+  <build>
+    <finalName>bnt-browser</finalName>
+    <plugins>
+      <plugin>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.8.1</version>
+        <configuration>
+          <source>11</source>
+          <target>11</target>
+        </configuration>
+      </plugin>
+      <plugin>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>3.2.4</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <transformers>
+                <transformer>
+                  <mainClass>cn.reghao.bnt.browser.SpiderBrowser</mainClass>
+                </transformer>
+              </transformers>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+  <dependencies>
+    <dependency>
+      <groupId>org.projectlombok</groupId>
+      <artifactId>lombok</artifactId>
+      <version>1.18.0</version>
+      <scope>provided</scope>
+    </dependency>
+  </dependencies>
+</project>

+ 65 - 0
browser/pom.xml

@@ -0,0 +1,65 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>bnt</artifactId>
+        <groupId>cn.reghao.bnt</groupId>
+        <version>1.0.0</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>browser</artifactId>
+    <version>1.0.0-SNAPSHOT</version>
+
+    <dependencies>
+        <dependency>
+            <groupId>cn.reghao.bnt</groupId>
+            <artifactId>core</artifactId>
+            <version>1.0.0-SNAPSHOT</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.squareup.okhttp3</groupId>
+            <artifactId>okhttp</artifactId>
+            <!--<version>4.10.0</version>-->
+            <version>3.14.9</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <finalName>bnt-browser</finalName>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.8.1</version>
+                <configuration>
+                    <source>11</source>
+                    <target>11</target>
+                </configuration>
+            </plugin>
+
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.2.4</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <transformers>
+                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                                    <mainClass>cn.reghao.bnt.browser.SpiderBrowser</mainClass>
+                                </transformer>
+                            </transformers>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>

+ 44 - 0
browser/src/main/java/cn/reghao/bnt/browser/SpiderBrowser.java

@@ -0,0 +1,44 @@
+package cn.reghao.bnt.browser;
+
+import cn.reghao.bnt.browser.parser.ChromeDataParser;
+import cn.reghao.bnt.browser.chrome.AbstractChrome;
+import cn.reghao.bnt.browser.chrome.ChromeBrowser;
+import cn.reghao.bnt.browser.chrome.ReqMatcher;
+import cn.reghao.bnt.core.url.BodyDataType;
+import cn.reghao.bnt.core.url.Site;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * @author reghao
+ * @date 2023-09-10 02:24:50
+ */
+public class SpiderBrowser {
+    public static Map<String, String> map = new HashMap<>();
+    static ChromeDataParser chromeDataParser = new ChromeDataParser();
+
+    static Set<ReqMatcher> reqMatchers() {
+        Set<ReqMatcher> set = new HashSet<>();
+        String pattern = "h5/mtop.taobao.rate.detaillist.get/6.0";
+        String parser = "TmallCommentDataParser";
+        map.put(pattern, parser);
+        set.add(new ReqMatcher(Site.bilibili, pattern, BodyDataType.json, chromeDataParser));
+
+        String pattern1 = "rate.taobao.com/feedRateList.htm";
+        String parser1 = "TaobaoCommentDataParser";
+        map.put(pattern1, parser1);
+        set.add(new ReqMatcher(Site.bilibili, pattern1, BodyDataType.json, chromeDataParser));
+        return set;
+    }
+
+    public static void main(String[] args) {
+        Set<ReqMatcher> set = reqMatchers();
+
+        String url = "https://www.tmall.com/";
+        AbstractChrome chrome = new ChromeBrowser(false, false);
+        chrome.getAndHandleDynamicPage(url, set);
+    }
+}

+ 11 - 0
browser/src/main/java/cn/reghao/bnt/browser/cdp/handler/CdpHandler.java

@@ -0,0 +1,11 @@
+package cn.reghao.bnt.browser.cdp.handler;
+
+import com.google.gson.JsonObject;
+
+/**
+ * @author reghao
+ * @date 2021-03-15 09:14:51
+ */
+public interface CdpHandler {
+    void handle(JsonObject cdpMsg);
+}

+ 103 - 0
browser/src/main/java/cn/reghao/bnt/browser/cdp/handler/event/NetworkEventHandler.java

@@ -0,0 +1,103 @@
+package cn.reghao.bnt.browser.cdp.handler.event;
+
+import cn.reghao.bnt.browser.cdp.handler.CdpHandler;
+import cn.reghao.bnt.browser.cdp.req.NetworkGetResponseBody;
+import cn.reghao.bnt.browser.chrome.PageRequest;
+import cn.reghao.bnt.browser.chrome.ReqInPage;
+import cn.reghao.bnt.browser.chrome.ReqMatcher;
+import cn.reghao.bnt.browser.ws.WebSocketClient;
+import com.google.gson.JsonObject;
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.*;
+
+/**
+ * Chrome DevTools Protocol WebSocket 网络事件消息处理器
+ *
+ * @author reghao
+ * @date 2021-03-15 09:08:55
+ */
+@Slf4j
+public class NetworkEventHandler implements CdpHandler {
+    private final WebSocketClient wsClient;
+    private final PageRequest pageRequest;
+    private final Map<String, Set<String>> map = new HashMap<>();
+
+    public NetworkEventHandler(WebSocketClient wsClient, PageRequest pageRequest) {
+        this.wsClient = wsClient;
+        this.pageRequest = pageRequest;
+    }
+
+    @Override
+    public void handle(JsonObject cdpMsg) {
+        String method = cdpMsg.get("method").getAsString();
+        JsonObject params = cdpMsg.get("params").getAsJsonObject();
+
+        String requestId = params.get("requestId").getAsString();
+        // Network 事件时序 requestWillBeSent -> responseReceived -> loadingFinished/loadingFailed
+        switch (method) {
+            case "Network.requestWillBeSent":
+                String loaderId = params.get("loaderId").getAsString();
+                JsonObject request = params.get("request").getAsJsonObject();
+                String requestUrl = request.get("url").getAsString();
+                map.computeIfAbsent(loaderId, k -> new HashSet<>());
+                map.get(loaderId).add(requestUrl);
+
+                //log.info("requestUrl -> {}", requestUrl);
+                for (ReqMatcher reqMatcher : pageRequest.getReqMatchers()) {
+                    // 匹配需要的 url 资源
+                    if (requestUrl.contains(reqMatcher.getPattern())) {
+                        ReqInPage reqInPage = new ReqInPage(reqMatcher, requestId, loaderId, requestUrl);
+                        pageRequest.getReqsInNetworkEvent().put(requestId, reqInPage);
+                    }
+                }
+                break;
+            case "Network.loadingFinished":
+                if (pageRequest.getReqsInNetworkEvent().get(requestId) != null) {
+                    // 需要的 url 资源加载完成
+                    getResponseBody(pageRequest.getReqsInNetworkEvent().get(requestId));
+                }
+                break;
+            case "Network.loadingFailed":
+                for (ReqInPage reqInPage : pageRequest.getReqsInNetworkEvent().values()) {
+                    if (requestId.equals(reqInPage.getRequestId())) {
+                        // TODO 应该结束当前页面的请求
+                        log.error("chrome 请求 {} 失败...", reqInPage.getRequestUrl());
+                    }
+                }
+                break;
+            case "Network.responseReceived":
+                break;
+            default:
+                break;
+        }
+    }
+
+    private void getResponseBody(ReqInPage reqInPage) {
+        String requestUrl = reqInPage.getRequestUrl();
+        //String pagePattern = reqInPage.getReqMatcher().getPagePattern();
+        // TODO 临时代码,获取页面 url
+        /*for (Map.Entry<String, Set<String>> entry : map.entrySet()) {
+            String loaderId = entry.getKey();
+            Set<String> urls = entry.getValue();
+
+            if (urls.contains(requestUrl)) {
+                for (String url : urls) {
+                    if (pagePattern != null && url.startsWith(pagePattern)) {
+                        reqInPage.setPageUrl(url);
+                        map.remove(loaderId);
+                        break;
+                    }
+                }
+                break;
+            }
+        }*/
+
+        NetworkGetResponseBody getResponseBody = new NetworkGetResponseBody(reqInPage.getRequestId());
+        int id = getResponseBody.getId();
+        reqInPage.setWsReqId(id);
+        pageRequest.getReqsInGetResponseBody().put(id, reqInPage);
+        // 向 chrome 发送 url 资源可获取事件
+        wsClient.sendMessage(getResponseBody);
+    }
+}

+ 130 - 0
browser/src/main/java/cn/reghao/bnt/browser/cdp/handler/result/GetResponseBodyResultHandler.java

@@ -0,0 +1,130 @@
+package cn.reghao.bnt.browser.cdp.handler.result;
+
+import cn.reghao.bnt.browser.chrome.PageRequest;
+import cn.reghao.bnt.browser.chrome.ReqInPage;
+import cn.reghao.bnt.core.parser.DataParser;
+import cn.reghao.bnt.core.url.BodyDataType;
+import cn.reghao.jutil.jdk.serializer.JsonConverter;
+import cn.reghao.bnt.browser.cdp.handler.CdpHandler;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonPrimitive;
+import lombok.extern.slf4j.Slf4j;
+
+import java.nio.charset.StandardCharsets;
+import java.util.*;
+
+/**
+ * Chrome DevTools Protocol 获取请求响应 body 的 WebSocket 响应消息处理器
+ *
+ * @author reghao
+ * @date 2021-03-15 09:08:55
+ */
+@Slf4j
+public class GetResponseBodyResultHandler implements CdpHandler {
+    private final PageRequest pageRequest;
+
+    public GetResponseBodyResultHandler(PageRequest pageRequest) {
+        this.pageRequest = pageRequest;
+    }
+
+    @Override
+    public void handle(JsonObject cdpMsg) {
+        // 处理 NetworkEventHandler#getResponseBody 中发送的消息
+        int id = cdpMsg.get("id").getAsInt();
+        ReqInPage reqInPage = pageRequest.getReqsInGetResponseBody().get(id);
+        if (reqInPage != null) {
+            if (cdpMsg.get("result") instanceof JsonObject) {
+                handleResponse(reqInPage, cdpMsg.get("result").getAsJsonObject());
+            } else if (cdpMsg.get("error") instanceof JsonObject) {
+                JsonObject error = cdpMsg.get("error").getAsJsonObject();
+                log.error("{} 没有找到 response body", reqInPage.getRequestUrl());
+            }
+        }
+    }
+
+    /**
+     * 获取请求的响应 body
+     *
+     * @param
+     * @return
+     * @date 2021-03-15 上午2:34
+     */
+    private void handleResponse(ReqInPage reqInPage, JsonObject result) {
+        JsonElement bodyElement = result.get("body");
+        if (bodyElement instanceof JsonPrimitive) {
+            String body = bodyElement.getAsString();
+            boolean base64Encoded = result.get("base64Encoded").getAsBoolean();
+            if (base64Encoded) {
+                byte[] bytes = Base64.getUrlDecoder().decode(body);
+                body = new String(bytes, StandardCharsets.UTF_8);
+            }
+
+            if (handle404NotFound(reqInPage.getRequestUrl(), body)) {
+                // 一个页面中,只要有一个需要的资源返回 404,则将所有资源视为 404
+                pageRequest.setNotFound(true);
+            } else {
+                if (!pageRequest.isHandleDirectly()) {
+                    saveResponse(reqInPage, body);
+                } else {
+                    handleResponse(reqInPage, body);
+                }
+            }
+        }
+    }
+
+    /**
+     * 保存响应,返回使用
+     *
+     * @param
+     * @return
+     * @date 2021-08-05 下午9:36
+     */
+    private void saveResponse(ReqInPage reqInPage, String body) {
+        String dataType = reqInPage.getReqMatcher().getDataType();
+        if (dataType.equals(BodyDataType.m3u8.name()) && !body.contains("#EXTINF")) {
+            return;
+        }
+
+        reqInPage.setBody(body);
+        pageRequest.getReqsInGetResponseBodyDone().add(reqInPage);
+    }
+
+    /**
+     * 直接处理响应
+     *
+     * @param
+     * @return
+     * @date 2021-08-05 下午9:36
+     */
+    private synchronized void handleResponse(ReqInPage reqInPage, String body) {
+        String pageUrl = reqInPage.getRequestUrl();
+        DataParser dataParser = reqInPage.getReqMatcher().getDataParser();
+        try {
+            dataParser.parse(pageUrl, body);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    /**
+     * 处理 404
+     *
+     * @param
+     * @return
+     * @date 2021-03-19 下午2:40
+     */
+    private boolean handle404NotFound(String requestUrl, String body) {
+        try {
+            JsonElement errors = JsonConverter.jsonToJsonElement(body).getAsJsonObject().get("errors");
+            if (errors instanceof JsonArray) {
+                log.error("{} 不存在 -> {}", requestUrl, body);
+                return true;
+            }
+        } catch (Exception e) {
+            //
+        }
+        return false;
+    }
+}

+ 22 - 0
browser/src/main/java/cn/reghao/bnt/browser/cdp/handler/result/PageNavigateResultHandler.java

@@ -0,0 +1,22 @@
+package cn.reghao.bnt.browser.cdp.handler.result;
+
+import cn.reghao.bnt.browser.cdp.handler.CdpHandler;
+import com.google.gson.JsonObject;
+import lombok.extern.slf4j.Slf4j;
+
+/**
+ * Chrome DevTools Protocol 页面请求 WebSocket 响应消息处理器
+ *
+ * @author reghao
+ * @date 2021-03-15 09:08:55
+ */
+@Slf4j
+public class PageNavigateResultHandler implements CdpHandler {
+    @Override
+    public void handle(JsonObject cdpMsg) {
+        int id = cdpMsg.get("id").getAsInt();
+        JsonObject result = cdpMsg.get("result").getAsJsonObject();
+        String frameId = result.get("frameId").getAsString();
+        String loaderId = result.get("loaderId").getAsString();
+    }
+}

+ 44 - 0
browser/src/main/java/cn/reghao/bnt/browser/cdp/msg/CdpMethodResultMessage.java

@@ -0,0 +1,44 @@
+package cn.reghao.bnt.browser.cdp.msg;
+
+import cn.reghao.bnt.browser.chrome.PageRequest;
+import cn.reghao.bnt.browser.cdp.handler.result.GetResponseBodyResultHandler;
+import cn.reghao.bnt.browser.cdp.handler.result.PageNavigateResultHandler;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import lombok.extern.slf4j.Slf4j;
+
+/**
+ * Chrome DevTools Protocol 方法调用 WebSocket 响应消息
+ *
+ * @author reghao
+ * @date 2021-03-15 09:08:55
+ */
+@Slf4j
+public class CdpMethodResultMessage {
+    private PageNavigateResultHandler pageNavigateResultHandler;
+    private GetResponseBodyResultHandler getResponseBodyResultHandler;
+
+    public CdpMethodResultMessage(PageRequest pageRequest) {
+        this.pageNavigateResultHandler = new PageNavigateResultHandler();
+        this.getResponseBodyResultHandler = new GetResponseBodyResultHandler(pageRequest);
+    }
+
+    public void parse(JsonObject cdpMsg) {
+        if (cdpMsg.get("result") != null) {
+            JsonObject result = cdpMsg.get("result").getAsJsonObject();
+
+            JsonElement frameIdElement = result.get("frameId");
+            if (frameIdElement != null) {
+                pageNavigateResultHandler.handle(cdpMsg);
+            }
+
+            JsonElement bodyElement = result.get("body");
+            if (bodyElement != null) {
+                getResponseBodyResultHandler.handle(cdpMsg);
+            }
+        } else if (cdpMsg.get("error") != null) {
+            JsonObject error = cdpMsg.get("error").getAsJsonObject();
+            log.error("请求错误...");
+        }
+    }
+}

+ 26 - 0
browser/src/main/java/cn/reghao/bnt/browser/cdp/msg/CdpNetworkEventMessage.java

@@ -0,0 +1,26 @@
+package cn.reghao.bnt.browser.cdp.msg;
+
+import cn.reghao.bnt.browser.chrome.PageRequest;
+import cn.reghao.bnt.browser.cdp.handler.event.NetworkEventHandler;
+import cn.reghao.bnt.browser.ws.WebSocketClient;
+import com.google.gson.JsonObject;
+import lombok.extern.slf4j.Slf4j;
+
+/**
+ * Chrome DevTools Protocol WebSocket 网络事件消息
+ *
+ * @author reghao
+ * @date 2021-03-15 09:08:55
+ */
+@Slf4j
+public class CdpNetworkEventMessage {
+    private NetworkEventHandler networkEventHandler;
+
+    public CdpNetworkEventMessage(WebSocketClient wsClient, PageRequest pageRequest) {
+        this.networkEventHandler = new NetworkEventHandler(wsClient, pageRequest);
+    }
+
+    public void parse(JsonObject cdpMsg) {
+        networkEventHandler.handle(cdpMsg);
+    }
+}

+ 26 - 0
browser/src/main/java/cn/reghao/bnt/browser/cdp/req/CdpReq.java

@@ -0,0 +1,26 @@
+package cn.reghao.bnt.browser.cdp.req;
+
+import java.util.concurrent.ThreadLocalRandom;
+
+/**
+ * @author reghao
+ * @date 2021-03-14 01:56:32
+ */
+public class CdpReq {
+    private int id;
+    private String method;
+
+    public CdpReq(String method) {
+        this.id = randomInt();
+        this.method = method;
+    }
+
+    public int getId() {
+        return this.id;
+    }
+
+    protected int randomInt() {
+        int min = 10000, max = 99999;
+        return ThreadLocalRandom.current().nextInt(min, max + 1);
+    }
+}

+ 24 - 0
browser/src/main/java/cn/reghao/bnt/browser/cdp/req/NetworkEnable.java

@@ -0,0 +1,24 @@
+package cn.reghao.bnt.browser.cdp.req;
+
+/**
+ * @author reghao
+ * @date 2021-03-14 04:26:04
+ */
+public class NetworkEnable extends CdpReq {
+    private Params params;
+
+    public NetworkEnable() {
+        super("Network.enable");
+        this.params = new Params();
+    }
+
+    static class Params {
+        private long maxResourceBufferSize;
+        private long maxTotalBufferSize;
+
+        public Params() {
+            this.maxResourceBufferSize = 1024*1024*100;
+            this.maxTotalBufferSize = 1024*1024*200;
+        }
+    }
+}

+ 26 - 0
browser/src/main/java/cn/reghao/bnt/browser/cdp/req/NetworkGetCookies.java

@@ -0,0 +1,26 @@
+package cn.reghao.bnt.browser.cdp.req;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author reghao
+ * @date 2021-03-14 19:23:26
+ */
+public class NetworkGetCookies extends CdpReq {
+    private Params params;
+
+    public NetworkGetCookies(String url) {
+        super("Network.getCookies");
+        this.params = new Params(url);
+    }
+
+    static class Params {
+        private List<String> urls;
+        public Params(String url) {
+            List<String> list = new ArrayList<>();
+            list.add(url);
+            this.urls = list;
+        }
+    }
+}

+ 22 - 0
browser/src/main/java/cn/reghao/bnt/browser/cdp/req/NetworkGetResponseBody.java

@@ -0,0 +1,22 @@
+package cn.reghao.bnt.browser.cdp.req;
+
+/**
+ * @author reghao
+ * @date 2021-03-14 19:23:26
+ */
+public class NetworkGetResponseBody extends CdpReq {
+    private Params params;
+
+    public NetworkGetResponseBody(String requestId) {
+        super("Network.getResponseBody");
+        this.params = new Params(requestId);
+    }
+
+    static class Params {
+        private String requestId;
+
+        public Params(String requestId) {
+            this.requestId = requestId;
+        }
+    }
+}

+ 23 - 0
browser/src/main/java/cn/reghao/bnt/browser/cdp/req/PageNavigate.java

@@ -0,0 +1,23 @@
+package cn.reghao.bnt.browser.cdp.req;
+
+/**
+ * 请求页面
+ *
+ * @author reghao
+ * @date 2021-03-14 19:23:26
+ */
+public class PageNavigate extends CdpReq {
+    private Params params;
+
+    public PageNavigate(String url) {
+        super("Page.navigate");
+        this.params = new Params(url);
+    }
+
+    static class Params {
+        private String url;
+        public Params(String url) {
+            this.url = url;
+        }
+    }
+}

+ 26 - 0
browser/src/main/java/cn/reghao/bnt/browser/cdp/req/PageSetDownloadBehavior.java

@@ -0,0 +1,26 @@
+package cn.reghao.bnt.browser.cdp.req;
+
+/**
+ * 文件下载设置
+ *
+ * @author reghao
+ * @date 2022-06-02 10:25:21
+ */
+public class PageSetDownloadBehavior extends CdpReq {
+    private Params params;
+
+    public PageSetDownloadBehavior(String dir) {
+        super("Page.setDownloadBehavior");
+        this.params = new PageSetDownloadBehavior.Params(dir);
+    }
+
+    static class Params {
+        private String behavior;
+        private String downloadPath;
+
+        public Params(String downloadPath) {
+            this.behavior = "allowAndName";
+            this.downloadPath = downloadPath;
+        }
+    }
+}

+ 21 - 0
browser/src/main/java/cn/reghao/bnt/browser/cdp/req/RemoteTarget.java

@@ -0,0 +1,21 @@
+package cn.reghao.bnt.browser.cdp.req;
+
+import lombok.Data;
+
+/**
+ * http://localhost:9999/json 返回的数据格式
+ *
+ * @author reghao
+ * @date 2021-03-14 23:42:18
+ */
+@Data
+public class RemoteTarget {
+    private String description;
+    private String devtoolsFrontendUrl;
+    // tabId
+    private String id;
+    private String title;
+    private String type;
+    private String url;
+    private String webSocketDebuggerUrl;
+}

+ 23 - 0
browser/src/main/java/cn/reghao/bnt/browser/cdp/req/TargetCreateTarget.java

@@ -0,0 +1,23 @@
+package cn.reghao.bnt.browser.cdp.req;
+
+/**
+ * 打开新标签
+ *
+ * @author reghao
+ * @date 2021-08-07 14:15:40
+ */
+public class TargetCreateTarget extends CdpReq {
+    private Params params;
+
+    public TargetCreateTarget(String url) {
+        super("Target.createTarget");
+        this.params = new Params(url);
+    }
+
+    static class Params {
+        private String url;
+        public Params(String url) {
+            this.url = url;
+        }
+    }
+}

+ 298 - 0
browser/src/main/java/cn/reghao/bnt/browser/chrome/AbstractChrome.java

@@ -0,0 +1,298 @@
+package cn.reghao.bnt.browser.chrome;
+
+import cn.reghao.bnt.browser.cdp.req.*;
+import cn.reghao.bnt.browser.ws.WebSocketClient;
+import cn.reghao.bnt.browser.cdp.req.*;
+import cn.reghao.jutil.jdk.serializer.JsonConverter;
+import lombok.extern.slf4j.Slf4j;
+import okhttp3.Call;
+import okhttp3.OkHttpClient;
+import okhttp3.Request;
+import okhttp3.Response;
+
+import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.stream.Collectors;
+
+/**
+ * Chrome 浏览器抽象类
+ *
+ * @author reghao
+ * @date 2021-03-19 13:58:07
+ */
+@Slf4j
+public abstract class AbstractChrome {
+    protected boolean isHeadless;
+    protected boolean enableProxy;
+    protected int reqTimeout;
+    protected int remotePort;
+    protected String tabsApi;
+    protected String wsPrefix;
+    protected Map<String, PageRequest> pageRequestMap = new ConcurrentHashMap<>();
+    protected Map<String, WebSocketClient> wsClientMap = new ConcurrentHashMap<>();
+
+    public AbstractChrome(boolean isHeadless, boolean enableProxy) {
+        this.isHeadless = isHeadless;
+        this.enableProxy = enableProxy;
+        // 需要代理的页面超时 30s
+        this.reqTimeout = enableProxy ? 30 : 10;
+        init();
+    }
+
+    private void init() {
+        this.remotePort = randomPort();
+        this.tabsApi = String.format("http://localhost:%s/json", remotePort);
+        this.wsPrefix = String.format("ws://localhost:%s/devtools/page/", remotePort);
+    }
+
+    private int randomPort() {
+        int min = 10000, max = 20000;
+        return ThreadLocalRandom.current().nextInt(min, max + 1);
+    }
+
+    protected void enableWebSocket() {
+        Set<String> tabIds = tabIds();
+        if (tabIds.size() != 1) {
+            log.error("Chrome 启动时默认打开的 tab 不是 1 个,而是 {} 个, 启用 WebSocket 连接失败...", tabIds.size());
+            return;
+        }
+
+        String tabId = tabIds.iterator().next();
+        PageRequest pageRequest = new PageRequest();
+        pageRequestMap.putIfAbsent(tabId, pageRequest);
+        wsClientMap.putIfAbsent(tabId, new WebSocketClient(wsPrefix + tabId, pageRequest));
+    }
+
+    private Set<String> tabIds() {
+        OkHttpClient okHttpClient = new OkHttpClient();
+        Request request = new Request.Builder()
+                .url(tabsApi)
+                .get()
+                .build();
+
+        Call call = okHttpClient.newCall(request);
+        try {
+            Response response = call.execute();
+            assert response.body() != null;
+            String body = response.body().string();
+            return JsonConverter.jsonToObjects(body, RemoteTarget.class).stream()
+                    .filter(remoteTarget -> remoteTarget.getType().equals("page"))
+                    .filter(remoteTarget -> {
+                        String url = remoteTarget.getUrl();
+                        return url.equals("data:,") || url.equals("chrome://newtab/");
+                    })
+                    .map(RemoteTarget::getId)
+                    .collect(Collectors.toSet());
+        } catch (Exception e) {
+            log.error("获取 chrome tab 列表失败 -> {}", e.getMessage());
+        }
+        return Collections.emptySet();
+    }
+
+    protected void openTabs(int tabNum) {
+        if (wsClientMap.size() != 1) {
+            log.error("打开新 tab 失败");
+            return;
+        }
+        WebSocketClient wsClient = wsClientMap.entrySet().iterator().next().getValue();
+        for (int i = 1; i < tabNum; i++) {
+            TargetCreateTarget targetCreateTarget = new TargetCreateTarget("");
+            wsClient.sendMessage(targetCreateTarget);
+        }
+
+        Set<String> tabIds = tabIds();
+        if (tabIds.size() != tabNum) {
+            log.error("打开新 tab 失败");
+            return;
+        }
+        tabIds.forEach(this::enableTabNetworkDebug);
+    }
+
+    /**
+     * 启用 tab 网络 debug
+     *
+     * @param
+     * @return
+     * @date 2021-08-07 下午3:40
+     */
+    private void enableTabNetworkDebug(String tabId) {
+        PageRequest pageRequest = pageRequestMap.get(tabId);
+        if (pageRequest == null) {
+            pageRequest = new PageRequest();
+            pageRequestMap.put(tabId, pageRequest);
+        }
+
+        NetworkEnable networkEnable = new NetworkEnable();
+        WebSocketClient wsClient = wsClientMap.get(tabId);
+        if (wsClient == null) {
+            wsClientMap.put(tabId, new WebSocketClient(wsPrefix + tabId, pageRequest));
+        } else {
+            wsClient.sendMessage(networkEnable);
+        }
+    }
+
+    public List<String> availTabIds() {
+        return new ArrayList<>(wsClientMap.keySet());
+    }
+
+    public List<ReqInPage> getDynamicPage(String tabId, String url, boolean autoScroll, Set<ReqMatcher> reqMatchers) {
+        PageRequest pageRequest = pageRequestMap.get(tabId);
+        pageRequest.setReqMatchers(reqMatchers);
+        PageNavigate pageNavigate = new PageNavigate(url);
+        int id = pageNavigate.getId();
+        // 向 chrome 发送打开页面事件
+        wsClientMap.get(tabId).sendMessage(pageNavigate);
+        for (int i = 0; i < reqTimeout; i++) {
+            try {
+                Thread.sleep(1_000);
+                //log.info("请求已用时 {}s...", i+1);
+            } catch (InterruptedException e) {
+                e.printStackTrace();
+            }
+
+            if (pageRequest.isNotFound()) {
+                log.info("{} 中的资源不存在,请开始下一次请求...", url);
+                pageRequest.clearAll();
+                return null;
+            } else if (pageRequest.isLoadDone()) {
+                log.info("请求处理完成,请开始下一次请求...");
+                if (autoScroll) {
+                    autoScrollPage();
+                }
+                return pageRequest.reqsInPage();
+            }
+        }
+
+        pageRequest.clearAll();
+        return Collections.emptyList();
+    }
+
+    public List<ReqInPage> getDynamicPage(String url, boolean autoScroll, Set<ReqMatcher> reqMatchers) {
+        Map.Entry<String, PageRequest> entry = pageRequestMap.entrySet().iterator().next();
+        String tabId = entry.getKey();
+        PageRequest pageRequest = entry.getValue();
+        pageRequest.setReqMatchers(reqMatchers);
+        pageRequest.setHandleDirectly(true);
+
+        PageNavigate pageNavigate = new PageNavigate(url);
+        int id = pageNavigate.getId();
+        wsClientMap.get(tabId).sendMessage(pageNavigate);
+        for (int i = 0; i < reqTimeout; i++) {
+            try {
+                Thread.sleep(1_000);
+                //log.info("请求已用时 {}s...", i+1);
+            } catch (InterruptedException e) {
+                e.printStackTrace();
+            }
+
+            if (pageRequest.isNotFound()) {
+                log.info("{} 中的资源不存在,请开始下一次请求...", url);
+                pageRequest.clearAll();
+                return null;
+            } else if (pageRequest.isLoadDone()) {
+                log.info("请求处理完成,请开始下一次请求...");
+                if (autoScroll) {
+                    autoScrollPage();
+                }
+                return pageRequest.reqsInPage();
+            }
+        }
+
+        pageRequest.clearAll();
+        return Collections.emptyList();
+    }
+
+    public void getAndHandleDynamicPage(String url, Set<ReqMatcher> reqMatchers) {
+        Map.Entry<String, PageRequest> entry = pageRequestMap.entrySet().iterator().next();
+        String tabId = entry.getKey();
+        PageRequest pageRequest = entry.getValue();
+        pageRequest.setReqMatchers(reqMatchers);
+        pageRequest.setHandleDirectly(true);
+
+        PageNavigate pageNavigate = new PageNavigate(url);
+        int id = pageNavigate.getId();
+        wsClientMap.get(tabId).sendMessage(pageNavigate);
+        int timeout = 3600*24;
+        for (int i = 0; i < timeout; i++) {
+            try {
+                Thread.sleep(1_000);
+            } catch (InterruptedException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+    public void getAndHandleDynamicPageAuto(String url, Set<ReqMatcher> reqMatchers) {
+        Map.Entry<String, PageRequest> entry = pageRequestMap.entrySet().iterator().next();
+        String tabId = entry.getKey();
+        PageRequest pageRequest = entry.getValue();
+        pageRequest.setReqMatchers(reqMatchers);
+        pageRequest.setHandleDirectly(true);
+
+        PageNavigate pageNavigate = new PageNavigate(url);
+        int id = pageNavigate.getId();
+        wsClientMap.get(tabId).sendMessage(pageNavigate);
+
+        try {
+            log.info("休眠 10s 后请求下一个页面");
+            Thread.sleep(10_000);
+        } catch (InterruptedException e) {
+            e.printStackTrace();
+        }
+    }
+
+    @Deprecated
+    public void getAndHandleDynamicPages(Set<String> urls, Set<ReqMatcher> reqMatchers) {
+        Map.Entry<String, PageRequest> entry = pageRequestMap.entrySet().iterator().next();
+        String tabId = entry.getKey();
+        PageRequest pageRequest = entry.getValue();
+        pageRequest.setReqMatchers(reqMatchers);
+        pageRequest.setHandleDirectly(true);
+        for (String url : urls) {
+            PageNavigate pageNavigate = new PageNavigate(url);
+            int id = pageNavigate.getId();
+            wsClientMap.get(tabId).sendMessage(pageNavigate);
+
+            try {
+                log.info("休眠 3s 后请求下一个页面");
+                Thread.sleep(3_000);
+            } catch (InterruptedException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+    public void download(Set<String> urls, String dir) {
+        Map.Entry<String, PageRequest> entry = pageRequestMap.entrySet().iterator().next();
+        String tabId = entry.getKey();
+        PageRequest pageRequest = entry.getValue();
+        pageRequest.setReqMatchers(new HashSet<>());
+        pageRequest.setHandleDirectly(true);
+
+        WebSocketClient wsClient = wsClientMap.get(tabId);
+        PageSetDownloadBehavior downloadBehavior = new PageSetDownloadBehavior(dir);
+        wsClient.sendMessage(downloadBehavior);
+        for (String url : urls) {
+            PageNavigate pageNavigate = new PageNavigate(url);
+            int id = pageNavigate.getId();
+            wsClient.sendMessage(pageNavigate);
+
+            try {
+                log.info("休眠 3s 后请求下一个页面");
+                Thread.sleep(3_000);
+            } catch (InterruptedException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+    /**
+     * 自动滚动页面
+     *
+     * @param
+     * @return
+     * @date 2021-08-06 上午9:54
+     */
+    abstract void autoScrollPage();
+}

+ 71 - 0
browser/src/main/java/cn/reghao/bnt/browser/chrome/ChromeBrowser.java

@@ -0,0 +1,71 @@
+package cn.reghao.bnt.browser.chrome;
+
+import cn.reghao.bnt.browser.cdp.req.NetworkGetCookies;
+import lombok.extern.slf4j.Slf4j;
+import java.util.*;
+
+/**
+ * @author reghao
+ * @date 2021-03-15 00:59:58
+ */
+@Slf4j
+public class ChromeBrowser extends AbstractChrome {
+    public ChromeBrowser(int tabNum, boolean isHeadless, boolean enableProxy) {
+        super(isHeadless, enableProxy);
+        init(tabNum);
+    }
+
+    public ChromeBrowser(boolean isHeadless, boolean enableProxy) {
+        super(isHeadless, enableProxy);
+        init(1);
+    }
+
+    private void init(int tabNum) {
+        ProcessBuilder pb = new ProcessBuilder(chromeArguments());
+        try {
+            pb.start();
+            log.info("休眠 5s 等待 Chrome 完全启动...");
+            Thread.sleep(5_000);
+            enableWebSocket();
+            openTabs(tabNum);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    private List<String> chromeArguments() {
+        List<String> cmd = new ArrayList<>();
+        //cmd.add("/usr/bin/google-chrome-unstable");
+        cmd.add("/usr/bin/google-chrome-stable");
+        if (isHeadless) {
+            cmd.add("--headless");
+        }
+        cmd.add("--disable-gpu");
+        cmd.add("--user-data-dir=");
+        cmd.add("--remote-debugging-port=" + remotePort);
+        if (enableProxy) {
+            cmd.add("--proxy-server=socks://127.0.0.1:1080");
+        }
+        return cmd;
+    }
+
+    public String getCookies(String url) {
+        NetworkGetCookies getCookies = new NetworkGetCookies(url);
+        log.info("获取 cookies 的 id -> " + getCookies.getId());
+        //wsClient.sendMessage(getCookies);
+        return null;
+    }
+
+    public void setCookies() {
+    }
+
+    @Override
+    public void autoScrollPage() {
+        log.info("chrome 浏览器休眠 60s 模拟页面滚动...");
+        try {
+            Thread.sleep(60_000);
+        } catch (InterruptedException e) {
+            e.printStackTrace();
+        }
+    }
+}

+ 79 - 0
browser/src/main/java/cn/reghao/bnt/browser/chrome/PageRequest.java

@@ -0,0 +1,79 @@
+package cn.reghao.bnt.browser.chrome;
+
+import lombok.Data;
+
+import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.stream.Collectors;
+
+/**
+ * 页面请求,包含若干动态请求(和浏览器的 tab 关联)
+ *
+ * @author reghao
+ * @date 2021-03-19 20:49:56
+ */
+@Data
+public class PageRequest {
+    // 是否直接处理页面中的响应
+    private boolean isHandleDirectly;
+    private boolean notFound;
+    // 页面中需要匹配的请求
+    private Set<ReqMatcher> reqMatchers;
+    // 匹配到的请求
+    private Map<String, ReqInPage> reqsInNetworkEvent;
+    // 需要从 chrome 中获取响应的请求
+    private Map<Integer, ReqInPage> reqsInGetResponseBody;
+    // 已经从 chrome 中获取到响应的请求
+    private List<ReqInPage> reqsInGetResponseBodyDone;
+
+    public PageRequest() {
+        this.isHandleDirectly = false;
+        this.notFound = false;
+        this.reqsInNetworkEvent = new ConcurrentHashMap<>();
+        this.reqsInGetResponseBody = new ConcurrentHashMap<>();
+        this.reqsInGetResponseBodyDone = new CopyOnWriteArrayList<>();
+    }
+
+    /**
+     * 页面是否加载完成
+     *
+     * @param
+     * @return
+     * @date 2021-08-06 上午9:51
+     */
+    public boolean isLoadDone() {
+        Set<ReqMatcher> set = reqsInGetResponseBodyDone.stream()
+                .map(ReqInPage::getReqMatcher).collect(Collectors.toSet());
+        // TODO 判断 set 和 reqMatchers 中的元素是否相同
+        return reqMatchers.size() == set.size();
+    }
+
+    /**
+     * 页面中需要匹配的请求,每次请求页面前调用
+     *
+     * @param
+     * @return
+     * @date 2021-08-07 下午1:50
+     */
+    public synchronized void setReqMatchers(Set<ReqMatcher> set) {
+        reqMatchers = set;
+    }
+
+    public List<ReqInPage> reqsInPage() {
+        List<ReqInPage> list = new ArrayList<>(reqsInGetResponseBodyDone);
+        clearAll();
+        return list;
+    }
+
+    public void clearAll() {
+        this.notFound = false;
+        clearCache();
+    }
+
+    private void clearCache() {
+        reqsInGetResponseBodyDone.clear();
+        reqsInGetResponseBody.clear();
+        reqsInNetworkEvent.clear();
+    }
+}

+ 31 - 0
browser/src/main/java/cn/reghao/bnt/browser/chrome/ReqInPage.java

@@ -0,0 +1,31 @@
+package cn.reghao.bnt.browser.chrome;
+
+import lombok.Data;
+
+/**
+ * 页面中的动态请求,URL 在页面中唯一
+ *
+ * @author reghao
+ * @date 2021-03-12 23:34:56
+ */
+@Data
+public class ReqInPage {
+    // WebSocket request id(global unique)
+    private int wsReqId;
+    private ReqMatcher reqMatcher;
+    // CDP Network 事件中的 requestId
+    private String requestId;
+    // 同一个页面加载的所有请求的 loaderId 都相同
+    private String loaderId;
+    // pattern 匹配的 URL
+    private String requestUrl;
+    private String body;
+    //private String pageUrl;
+
+    public ReqInPage(ReqMatcher reqMatcher, String requestId, String loaderId, String requestUrl) {
+        this.reqMatcher = reqMatcher;
+        this.requestId = requestId;
+        this.loaderId = loaderId;
+        this.requestUrl = requestUrl;
+    }
+}

+ 37 - 0
browser/src/main/java/cn/reghao/bnt/browser/chrome/ReqMatcher.java

@@ -0,0 +1,37 @@
+package cn.reghao.bnt.browser.chrome;
+
+import cn.reghao.bnt.core.parser.DataParser;
+import cn.reghao.bnt.core.url.BodyDataType;
+import cn.reghao.bnt.core.url.Site;
+import lombok.Data;
+
+/**
+ * 匹配页面中的特定请求
+ *
+ * @author reghao
+ * @date 2021-08-02 18:15:11
+ */
+@Data
+public class ReqMatcher {
+    private String site;
+    // 能唯一确定一个请求的字符串匹配模板
+    private String pattern;
+    private String dataType;
+    private DataParser dataParser;
+    //private String pagePattern;
+
+    public ReqMatcher(Site site, String pattern, BodyDataType bodyDataType, DataParser dataParser) {
+        this.site = site.name();
+        this.pattern = pattern;
+        this.dataType = bodyDataType.name();
+        this.dataParser = dataParser;
+    }
+
+    /*public ReqMatcher(Site site, String pattern, BodyDataType bodyDataType, DataParser dataParser, String pagePattern) {
+        this.site = site.name();
+        this.pattern = pattern;
+        this.dataType = bodyDataType.name();
+        this.dataParser = dataParser;
+        this.pagePattern = pagePattern;
+    }*/
+}

+ 52 - 0
browser/src/main/java/cn/reghao/bnt/browser/parser/ChromeDataParser.java

@@ -0,0 +1,52 @@
+package cn.reghao.bnt.browser.parser;
+
+import cn.reghao.jutil.jdk.http.WebRequest;
+import cn.reghao.jutil.jdk.http.WebResponse;
+import cn.reghao.jutil.jdk.result.WebResult;
+import cn.reghao.jutil.jdk.serializer.JsonConverter;
+import cn.reghao.jutil.tool.http.DefaultWebRequest;
+import cn.reghao.bnt.browser.SpiderBrowser;
+import cn.reghao.bnt.core.parser.DataParser;
+import cn.reghao.bnt.core.url.CrawlUrl;
+import cn.reghao.bnt.core.url.RawData;
+import cn.reghao.bnt.core.url.Site;
+import com.google.gson.reflect.TypeToken;
+import lombok.extern.slf4j.Slf4j;
+
+import java.lang.reflect.Type;
+import java.util.Collections;
+import java.util.Map;
+
+/**
+ * @author reghao
+ * @date 2023-09-10 02:22:39
+ */
+@Slf4j
+public class ChromeDataParser implements DataParser {
+    private final WebRequest webRequest = new DefaultWebRequest();
+    private final String api = "http://spider.reghao.cn/api/crawler/rawdata";
+
+    @Override
+    public Map<String, Object> parse(String url, String body) throws InterruptedException {
+        SpiderBrowser.map.forEach((key, value) -> {
+            if (url.contains(key)) {
+                CrawlUrl crawlUrl = new CrawlUrl(Site.taobao.name(), value, url);
+                RawData rawData = new RawData(crawlUrl, body);
+                WebResponse webResponse = webRequest.postJson(api, JsonConverter.objectToJson(rawData));
+                int statusCode = webResponse.getStatusCode();
+                if (statusCode != 200) {
+                    return;
+                }
+
+                String body1 = webResponse.getBody();
+                Type type = new TypeToken<WebResult<Boolean>>(){}.getType();
+                WebResult<Boolean> webResult = JsonConverter.jsonToObject(body1, type);
+                if (webResult.getCode() != 0) {
+                    log.error("请求提交失败: {}", webResult.getMsg());
+                }
+            }
+        });
+
+        return Collections.emptyMap();
+    }
+}

+ 48 - 0
browser/src/main/java/cn/reghao/bnt/browser/ws/CdpMessageHandler.java

@@ -0,0 +1,48 @@
+package cn.reghao.bnt.browser.ws;
+
+import cn.reghao.bnt.browser.cdp.msg.CdpMethodResultMessage;
+import cn.reghao.bnt.browser.cdp.msg.CdpNetworkEventMessage;
+import cn.reghao.jutil.jdk.serializer.JsonConverter;
+import cn.reghao.bnt.browser.chrome.PageRequest;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import lombok.extern.slf4j.Slf4j;
+
+/**
+ * Chrome DevTools Protocol WebSocket 消息处理器
+ *
+ * @author reghao
+ * @date 2021-03-14 20:26:58
+ */
+@Slf4j
+public class CdpMessageHandler implements MessageHandler {
+    private CdpMethodResultMessage methodResultMessage;
+    private CdpNetworkEventMessage networkEventMessage;
+
+    public CdpMessageHandler(WebSocketClient wsClient, PageRequest pageRequest) {
+        this.methodResultMessage = new CdpMethodResultMessage(pageRequest);
+        this.networkEventMessage = new CdpNetworkEventMessage(wsClient, pageRequest);
+    }
+
+    @Override
+    public void handleMessage(String message) {
+        JsonElement jsonElement = JsonConverter.jsonToJsonElement(message);
+        if (jsonElement instanceof JsonObject) {
+            JsonObject cdpMsg = jsonElement.getAsJsonObject();
+
+            if (cdpMsg.get("result") != null) {
+                //log.info("接收到 Chrome WebSocket 响应消息");
+                methodResultMessage.parse(cdpMsg);
+            }
+
+            JsonElement methodElement = cdpMsg.get("method");
+            if (methodElement != null) {
+                String method = methodElement.getAsString();
+                if (method.startsWith("Network.")) {
+                    //log.info("接收到 Network 事件消息 -> {}", message);
+                    networkEventMessage.parse(cdpMsg);
+                }
+            }
+        }
+    }
+}

+ 9 - 0
browser/src/main/java/cn/reghao/bnt/browser/ws/MessageHandler.java

@@ -0,0 +1,9 @@
+package cn.reghao.bnt.browser.ws;
+
+/**
+ * @author reghao
+ * @date 2021-03-14 20:26:39
+ */
+public interface MessageHandler {
+    void handleMessage(String message);
+}

+ 37 - 0
browser/src/main/java/cn/reghao/bnt/browser/ws/WebSocketClient.java

@@ -0,0 +1,37 @@
+package cn.reghao.bnt.browser.ws;
+
+import cn.reghao.jutil.jdk.serializer.JsonConverter;
+import cn.reghao.bnt.browser.chrome.PageRequest;
+import lombok.extern.slf4j.Slf4j;
+import okhttp3.*;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * @author reghao
+ * @date 2021-03-14 18:54:14
+ */
+@Slf4j
+public class WebSocketClient {
+    private final WebSocket webSocket;
+
+    public WebSocketClient(String ws, PageRequest pageRequest) {
+        Request request = new Request.Builder().get().url(ws).build();
+        OkHttpClient okHttpClient = new OkHttpClient.Builder()
+                .readTimeout(5, TimeUnit.SECONDS)//设置读取超时时间
+                .writeTimeout(5, TimeUnit.SECONDS)//设置写的超时时间
+                .connectTimeout(5, TimeUnit.SECONDS)//设置连接超时时间
+                .build();
+
+        this.webSocket = okHttpClient.newWebSocket(request,
+                new WebSocketListenerImpl(new CdpMessageHandler(this, pageRequest)));
+    }
+
+    public void sendMessage(String message) {
+        webSocket.send(message);
+    }
+
+    public void sendMessage(Object object) {
+        webSocket.send(JsonConverter.objectToJson(object));
+    }
+}

+ 55 - 0
browser/src/main/java/cn/reghao/bnt/browser/ws/WebSocketListenerImpl.java

@@ -0,0 +1,55 @@
+package cn.reghao.bnt.browser.ws;
+
+import lombok.extern.slf4j.Slf4j;
+import okhttp3.Response;
+import okhttp3.WebSocket;
+import okhttp3.WebSocketListener;
+import okio.ByteString;
+
+/**
+ * @author reghao
+ * @date 2021-03-14 20:18:24
+ */
+@Slf4j
+public class WebSocketListenerImpl extends WebSocketListener {
+    private final MessageHandler messageHandler;
+
+    public WebSocketListenerImpl(MessageHandler messageHandler) {
+        this.messageHandler = messageHandler;
+    }
+
+    @Override
+    public void onOpen(WebSocket webSocket, Response response) {
+        super.onOpen(webSocket, response);
+        log.info("WebSocket 连接建立...");
+    }
+
+    @Override
+    public void onClosed(WebSocket webSocket, int code, String reason) {
+        super.onClosed(webSocket, code, reason);
+        log.info("WebSocket 连接断开...");
+    }
+
+    @Override
+    public void onFailure(WebSocket webSocket, Throwable throwable, Response response) {
+        log.info("WebSocket 连接失败 -> {} - {}", throwable.toString(), response.toString());
+        super.onFailure(webSocket, throwable, response);
+    }
+
+    @Override
+    public void onMessage(WebSocket webSocket, String text) {
+        super.onMessage(webSocket, text);
+        try {
+            messageHandler.handleMessage(text);
+        } catch (Exception e) {
+            log.error("处理消息时发生错误 -> {}", e.getMessage());
+            e.printStackTrace();
+        }
+    }
+
+    @Override
+    public void onMessage(WebSocket webSocket, ByteString bytes) {
+        super.onMessage(webSocket, bytes);
+        log.info("接收到服务端字节消息");
+    }
+}

+ 14 - 0
core/pom.xml

@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>bnt</artifactId>
+        <groupId>cn.reghao.bnt</groupId>
+        <version>1.0.0</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>core</artifactId>
+    <version>1.0.0-SNAPSHOT</version>
+</project>

+ 56 - 0
core/src/main/java/cn/reghao/bnt/core/event/EvtCrawl.java

@@ -0,0 +1,56 @@
+package cn.reghao.bnt.core.event;
+
+import cn.reghao.jutil.jdk.event.message.Event;
+
+/**
+ * @author reghao
+ * @date 2022-02-25 18:52:57
+ */
+public class EvtCrawl extends Event {
+    private String site;
+    private String parser;
+    private String url;
+    private String msg;
+
+    public EvtCrawl() {
+    }
+
+    public EvtCrawl(String site, String parser, String url) {
+        this.site = site;
+        this.parser = parser;
+        this.url = url;
+        this.msg = "ok";
+    }
+
+    public void setSite(String site) {
+        this.site = site;
+    }
+
+    public String getSite() {
+        return site;
+    }
+
+    public void setParser(String parser) {
+        this.parser = parser;
+    }
+
+    public String getParser() {
+        return parser;
+    }
+
+    public void setUrl(String url) {
+        this.url = url;
+    }
+
+    public String getUrl() {
+        return url;
+    }
+
+    public void setMsg(String msg) {
+        this.msg = msg;
+    }
+
+    public String getMsg() {
+        return msg;
+    }
+}

+ 12 - 0
core/src/main/java/cn/reghao/bnt/core/event/EvtNoUrl.java

@@ -0,0 +1,12 @@
+package cn.reghao.bnt.core.event;
+
+import cn.reghao.jutil.jdk.event.message.Event;
+
+/**
+ * @author reghao
+ * @date 2021-12-17 17:19:50
+ */
+public class EvtNoUrl extends Event {
+    private String site;
+    private String parser;
+}

+ 18 - 0
core/src/main/java/cn/reghao/bnt/core/http/CrawlRequest.java

@@ -0,0 +1,18 @@
+package cn.reghao.bnt.core.http;
+
+import cn.reghao.bnt.core.url.CrawlUrl;
+
+import java.io.File;
+
+/**
+ * 爬虫请求
+ *
+ * @author reghao
+ * @date 2019-08-01 16:27:55
+ */
+public interface CrawlRequest {
+    int head(String url);
+    CrawlResponse get(String url);
+    CrawlResponse get(CrawlUrl crawlUrl);
+    boolean download(CrawlUrl crawlUrl, File file);
+}

+ 25 - 0
core/src/main/java/cn/reghao/bnt/core/http/CrawlResponse.java

@@ -0,0 +1,25 @@
+package cn.reghao.bnt.core.http;
+
+/**
+ * 爬虫响应
+ *
+ * @author reghao
+ * @date 2019-08-01 16:27:55
+ */
+public class CrawlResponse {
+    private int statusCode;
+    private String body;
+
+    public CrawlResponse(int statusCode, String body) {
+        this.statusCode = statusCode;
+        this.body = body;
+    }
+
+    public int getStatusCode() {
+        return statusCode;
+    }
+
+    public String getBody() {
+        return body;
+    }
+}

+ 109 - 0
core/src/main/java/cn/reghao/bnt/core/http/DefaultCrawlRequest.java

@@ -0,0 +1,109 @@
+package cn.reghao.bnt.core.http;
+
+import cn.reghao.jutil.tool.http.BaseWebRequest;
+import cn.reghao.jutil.jdk.http.util.UserAgents;
+import cn.reghao.bnt.core.url.CrawlUrl;
+import org.apache.http.*;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpHead;
+import org.apache.http.client.methods.HttpRequestBase;
+import org.apache.http.cookie.Cookie;
+import org.apache.http.protocol.HttpContext;
+import org.apache.http.util.EntityUtils;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.text.MessageFormat;
+import java.util.List;
+import java.util.logging.Logger;
+
+/**
+ * @author reghao
+ * @date 2019-11-29 10:03:18
+ */
+public class DefaultCrawlRequest extends BaseWebRequest implements CrawlRequest {
+    private static final Logger log = Logger.getLogger(DefaultCrawlRequest.class.getName());
+
+    public DefaultCrawlRequest() {
+    }
+
+    @Override
+    public int head(String url) {
+        HttpHead head = new HttpHead(url);
+        try (CloseableHttpResponse response = client.execute(head)) {
+            return response.getStatusLine().getStatusCode();
+        } catch (IOException e) {
+            log.info(MessageFormat.format("{0} head 请求失败 -> {1}", url, e.getMessage()));
+        }
+
+        // 资源无法访问
+        return 600;
+    }
+
+    @Override
+    public CrawlResponse get(String url) {
+        HttpGet get = new HttpGet(url);
+        get.setHeader("User-Agent", UserAgents.getDesktopAgent());
+        return execRequest(get, context);
+    }
+
+    public CrawlResponse get(String url, List<Cookie> cookies) {
+        HttpGet get = new HttpGet(url);
+        get.setHeader("User-Agent", UserAgents.getMobileAgent());
+        return execRequest(get, context);
+    }
+
+    @Override
+    public CrawlResponse get(CrawlUrl crawlUrl) {
+        HttpGet get = new HttpGet(crawlUrl.getUrl());
+        String referer = crawlUrl.getReferer();
+        if (referer != null) {
+            get.setHeader("Referer", referer);
+        }
+        get.setHeader("User-Agent", UserAgents.getDesktopAgent());
+        return execRequest(get, context);
+    }
+
+    private CrawlResponse execRequest(HttpRequestBase request, HttpContext context) {
+        try (CloseableHttpResponse response = client.execute(request, context)) {
+            StatusLine statusLine = response.getStatusLine();
+            int statusCode = statusLine.getStatusCode();
+            String body = EntityUtils.toString(response.getEntity(), Charset.forName(bodyCharset));
+            return new CrawlResponse(statusCode, body);
+        } catch (Exception e) {
+            // TODO 是否应该放在 finally 块中?
+            return new CrawlResponse(600, e.getMessage());
+        }
+    }
+
+    @Override
+    public boolean download(CrawlUrl crawlUrl, File file) {
+        String url = crawlUrl.getUrl();
+        String referer = crawlUrl.getReferer();
+        HttpGet get = new HttpGet(url);
+        if (referer != null) {
+            get.setHeader("Referer", referer);
+        }
+        get.setHeader("User-Agent", UserAgents.getDesktopAgent());
+        long start = System.currentTimeMillis();
+        try (CloseableHttpResponse response = client.execute(get)) {
+            int statusCode = response.getStatusLine().getStatusCode();
+            if (statusCode == 200) {
+                HttpEntity httpEntity = response.getEntity();
+                String contentType = httpEntity.getContentType().getValue();
+                //String filename = UrlFormatter.getFilename(url);
+
+                FileOutputStream fout = new FileOutputStream(file);
+                // 持续写到本地文件,直到服务器没有数据
+                httpEntity.writeTo(fout);
+                return true;
+            }
+        } catch (IOException e) {
+            log.info(MessageFormat.format("{0} 下载失败 -> {1}", url, e.getMessage()));
+        }
+        return false;
+    }
+}

+ 131 - 0
core/src/main/java/cn/reghao/bnt/core/http/JdkCrawlRequest.java

@@ -0,0 +1,131 @@
+package cn.reghao.bnt.core.http;
+
+import cn.reghao.bnt.core.url.CrawlUrl;
+import cn.reghao.jutil.jdk.http.util.UserAgents;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.text.MessageFormat;
+import java.time.Duration;
+import java.util.logging.Logger;
+
+/**
+ * 爬虫请求
+ *
+ * @author reghao
+ * @date 2022-02-28 15:27:55
+ */
+public class JdkCrawlRequest implements CrawlRequest {
+    private static final Logger log = Logger.getLogger(JdkCrawlRequest.class.getName());
+
+    private final HttpClient client = HttpClient.newBuilder()
+            .version(HttpClient.Version.HTTP_1_1)
+            .build();
+
+    @Override
+    public int head(String url) {
+        HttpRequest.Builder builder = HttpRequest.newBuilder()
+                .uri(URI.create(url))
+                .timeout(Duration.ofSeconds(5))
+                .GET();
+        builder.setHeader("User-Agent", UserAgents.getDesktopAgent());
+
+        try {
+            HttpResponse<String> response = client.send(builder.build(), HttpResponse.BodyHandlers.ofString());
+            return response.statusCode();
+        } catch (Exception e) {
+            log.info(MessageFormat.format("{0} 请求失败 -> {1}", url, e.getMessage()));
+            return 600;
+        }
+    }
+
+    @Override
+    public CrawlResponse get(String url) {
+        HttpRequest.Builder builder = HttpRequest.newBuilder()
+                .uri(URI.create(url))
+                .timeout(Duration.ofSeconds(5))
+                .GET();
+        builder.setHeader("User-Agent", UserAgents.getDesktopAgent());
+
+        try {
+            HttpResponse<String> response = client.send(builder.build(), HttpResponse.BodyHandlers.ofString());
+            int statusCode = response.statusCode();
+            // TODO 处理返回的内容编码, 默认编码是 utf-8
+            String body = response.body();
+            return new CrawlResponse(statusCode, body);
+        } catch (Exception e) {
+            log.info(MessageFormat.format("{0} 请求失败 -> {1}", url, e.getMessage()));
+            return new CrawlResponse(600, e.getMessage());
+        }
+    }
+
+    @Override
+    public CrawlResponse get(CrawlUrl crawlUrl) {
+        String url = crawlUrl.getUrl();
+        HttpRequest.Builder builder = HttpRequest.newBuilder()
+                .uri(URI.create(url))
+                .timeout(Duration.ofSeconds(5))
+                .GET();
+        builder.setHeader("User-Agent", UserAgents.getDesktopAgent());
+        String referer = crawlUrl.getReferer();
+        if (referer != null) {
+            builder.setHeader("Referer", referer);
+        }
+
+        try {
+            HttpResponse<String> response = client.send(builder.build(), HttpResponse.BodyHandlers.ofString());
+            int statusCode = response.statusCode();
+            String body = response.body();
+            return new CrawlResponse(statusCode, body);
+        } catch (Exception e) {
+            log.info(MessageFormat.format("{0} 请求失败 -> {1}", url, e.getMessage()));
+            return new CrawlResponse(600, e.getMessage());
+        }
+    }
+
+    @Override
+    public boolean download(CrawlUrl crawlUrl, File file) {
+        String url = crawlUrl.getUrl();
+        HttpRequest.Builder builder = HttpRequest.newBuilder()
+                .uri(URI.create(url))
+                .timeout(Duration.ofSeconds(30))
+                .GET();
+        builder.setHeader("User-Agent", UserAgents.getDesktopAgent());
+        String referer = crawlUrl.getReferer();
+        if (referer != null) {
+            builder.setHeader("Referer", referer);
+        }
+
+        try {
+            HttpResponse<InputStream> in = client.send(builder.build(), HttpResponse.BodyHandlers.ofInputStream());
+            saveFile(in.body(), file);
+            return true;
+        } catch (Exception e) {
+            log.info(MessageFormat.format("{0} 下载失败 -> {1}", url, e.getMessage()));
+        }
+        return false;
+    }
+
+    private void saveFile(InputStream in, File file) throws IOException {
+        File parentDir = file.getParentFile();
+        if (!parentDir.exists()) {
+            //FileUtils.forceMkdir(parentDir);
+        }
+
+        FileOutputStream fos = new FileOutputStream(file);
+        // 1MiB
+        int len = 1024*1024;
+        byte[] buf = new byte[len];
+        int readLen;
+        while ((readLen = in.read(buf, 0, len)) != -1) {
+            fos.write(buf, 0, readLen);
+        }
+        fos.close();
+    }
+}

+ 21 - 0
core/src/main/java/cn/reghao/bnt/core/parser/DataParser.java

@@ -0,0 +1,21 @@
+package cn.reghao.bnt.core.parser;
+
+import java.util.Map;
+
+/**
+ * 数据解析器,解析 crawler 爬取的数据
+ *
+ * @author reghao
+ * @date 2019-11-29 17:55:17
+ */
+public interface DataParser {
+    /**
+     * parser 名字(即 DataParser 实现类名)应该全局唯一
+     *
+     * @date 2021-02-03 下午1:55
+     */
+    default String parserName() {
+        return this.getClass().getSimpleName();
+    }
+    Map<String, Object> parse(String url, String body) throws Exception;
+}

+ 54 - 0
core/src/main/java/cn/reghao/bnt/core/parser/SiteParser.java

@@ -0,0 +1,54 @@
+package cn.reghao.bnt.core.parser;
+
+import java.io.Serializable;
+
+/**
+ * @author reghao
+ * @date 2022-05-18 11:55:19
+ */
+public class SiteParser implements Serializable {
+    private static final long serialVersionUID = 1L;
+
+    private final String site;
+    private final String parser;
+
+    public SiteParser(String site, String parser) {
+        this.site = site;
+        this.parser = parser;
+    }
+
+    public String getSite() {
+        return site;
+    }
+
+    public String getParser() {
+        return parser;
+    }
+
+    @Override
+    public String toString() {
+        return String.format("%s.%s", site, parser);
+    }
+
+    @Override
+    public int hashCode() {
+        int result = 17;
+        result = result * 31 + site.hashCode();
+        result = result * 31 + parser.hashCode();
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (other == this) {
+            return true;
+        }
+
+        if (other instanceof SiteParser) {
+            SiteParser o = (SiteParser) other;
+            return o.site.equals(site) && o.parser.equals(parser);
+        } else {
+            return false;
+        }
+    }
+}

+ 11 - 0
core/src/main/java/cn/reghao/bnt/core/url/BodyDataType.java

@@ -0,0 +1,11 @@
+package cn.reghao.bnt.core.url;
+
+/**
+ * HTTP 响应数据的格式
+ *
+ * @author reghao
+ * @date 2020-04-10 20:56:21
+ */
+public enum BodyDataType {
+    json, html, text, m3u8
+}

+ 148 - 0
core/src/main/java/cn/reghao/bnt/core/url/CrawlUrl.java

@@ -0,0 +1,148 @@
+package cn.reghao.bnt.core.url;
+
+import java.io.Serializable;
+
+/**
+ * @author reghao
+ * @date 2020-03-15 13:53:38
+ */
+public class CrawlUrl implements Serializable {
+    private static final long serialVersionUID = 1L;
+
+    private String site;
+    // DataParser 实现类的名字(确保全局唯一)
+    private String parser;
+    private String url;
+    // 若 url 是一个接口,那 referer 表示发送接口请求的页面 url
+    private String referer;
+    // 响应 body 的数据类型,BodyDataType 枚举中的值
+    private String dataType;
+    // 资源不存在时返回的 http status code
+    private int notFoundCode;
+    // 由于反爬虫返回的 http status code
+    private int antiCrawlCode;
+    private String title;
+
+    public CrawlUrl() {
+    }
+
+    public CrawlUrl(String site, String parser, String url) {
+        this.site = site;
+        this.parser = parser;
+        this.url = url;
+        this.referer = null;
+        this.dataType = null;
+        this.notFoundCode = 404;
+        this.antiCrawlCode = 403;
+        this.title = null;
+    }
+
+    public CrawlUrl(String site, String parser, String url, String referer, String dataType,
+                    int notFoundCode, int antiCrawlCode, String title) {
+        this.site = site;
+        this.parser = parser;
+        this.url = url;
+        this.referer = referer;
+        this.dataType = dataType;
+        this.notFoundCode = notFoundCode;
+        this.antiCrawlCode = antiCrawlCode;
+        this.title = title;
+    }
+
+    @Deprecated
+    public CrawlUrl(String url, String referer) {
+        this.url = url;
+        this.referer = referer;
+    }
+
+    @Deprecated
+    public void setSiteAndParser(String site, String parser) {
+        this.site = site;
+        this.parser = parser;
+    }
+
+    public void setSite(String site) {
+        this.site = site;
+    }
+
+    public String getSite() {
+        return site;
+    }
+
+    public void setParser(String parser) {
+        this.parser = parser;
+    }
+
+    public String getParser() {
+        return parser;
+    }
+
+    public void setUrl(String url) {
+        this.url = url;
+    }
+
+    public String getUrl() {
+        return url;
+    }
+
+    public void setReferer(String referer) {
+        this.referer = referer;
+    }
+
+    public String getReferer() {
+        return referer;
+    }
+
+    public void setDataType(String dataType) {
+        this.dataType = dataType;
+    }
+
+    public String getDataType() {
+        return dataType;
+    }
+
+    public void setNotFoundCode(int notFoundCode) {
+        this.notFoundCode = notFoundCode;
+    }
+
+    public int getNotFoundCode() {
+        return notFoundCode;
+    }
+
+    public void setAntiCrawlCode(int antiCrawlCode) {
+        this.antiCrawlCode = antiCrawlCode;
+    }
+
+    public int getAntiCrawlCode() {
+        return antiCrawlCode;
+    }
+
+    public void setTitle(String title) {
+        this.title = title;
+    }
+
+    public String getTitle() {
+        return title;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = 17;
+        result = result * 31 + url.hashCode();
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (other == this) {
+            return true;
+        }
+
+        if (other instanceof CrawlUrl) {
+            CrawlUrl o = (CrawlUrl) other;
+            return o.url.equals(url);
+        } else {
+            return false;
+        }
+    }
+}

+ 73 - 0
core/src/main/java/cn/reghao/bnt/core/url/RawData.java

@@ -0,0 +1,73 @@
+package cn.reghao.bnt.core.url;
+
+import java.io.Serializable;
+
+/**
+ * 在 DataBus 中传递的未解析的原始数据
+ *
+ * @author reghao
+ * @date 2019-12-09 09:05:48
+ */
+public class RawData implements Serializable {
+    private static final long serialVersionUID = 1L;
+
+    private CrawlUrl crawlUrl;
+    private String data;
+    private long crawledTime;
+
+    public RawData() {
+    }
+
+    public RawData(CrawlUrl crawlUrl, String data) {
+        this.crawlUrl = crawlUrl;
+        this.data = data;
+        this.crawledTime = System.currentTimeMillis();
+    }
+
+    public void setCrawlUrl(CrawlUrl crawlUrl) {
+        this.crawlUrl = crawlUrl;
+    }
+
+    public CrawlUrl getCrawlUrl() {
+        return crawlUrl;
+    }
+
+    public void setData(String data) {
+        this.data = data;
+    }
+
+    public String getData() {
+        return data;
+    }
+
+    public void setCrawledTime(long crawledTime) {
+        this.crawledTime = crawledTime;
+    }
+
+    public long getCrawledTime() {
+        return crawledTime;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = 17;
+        result = result * 31 + crawlUrl.hashCode();
+        result = result * 31 + data.hashCode();
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (other == this) {
+            return true;
+        }
+
+        if (other instanceof RawData) {
+            RawData o = (RawData) other;
+            return o.crawlUrl.equals(crawlUrl)
+                    && o.data.equals(data);
+        } else {
+            return false;
+        }
+    }
+}

+ 9 - 0
core/src/main/java/cn/reghao/bnt/core/url/Site.java

@@ -0,0 +1,9 @@
+package cn.reghao.bnt.core.url;
+
+/**
+ * @author reghao
+ * @date 2020-03-15 19:07:39
+ */
+public enum Site {
+    zhihu, weibo, bilibili, taobao
+}

+ 23 - 0
core/src/main/java/cn/reghao/bnt/core/url/UrlScheduler.java

@@ -0,0 +1,23 @@
+package cn.reghao.bnt.core.url;
+
+import cn.reghao.bnt.core.event.EvtCrawl;
+import cn.reghao.bnt.core.event.EvtNoUrl;
+import cn.reghao.bnt.core.parser.SiteParser;
+
+import java.util.List;
+import java.util.Set;
+
+/**
+ * URL 调度器
+ *
+ * @author reghao
+ * @date 2019-08-06 11:24:24
+ */
+public interface UrlScheduler {
+    List<SiteParser> getSiteParsers();
+    Set<CrawlUrl> getUrls(String site, String parser);
+    void putData(RawData rawData);
+    void crawlEvent(EvtCrawl evtCrawl);
+    default void noUrlEvent(EvtNoUrl evtNoUrl) {
+    }
+}

+ 7 - 0
crawler/Dockerfile

@@ -0,0 +1,7 @@
+FROM adoptopenjdk/openjdk11:x86_64-alpine-jre-11.0.15_10
+
+WORKDIR /app
+RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo 'Asia/Shanghai' >/etc/timezone
+
+COPY target/spider-crawler-exec.jar /app/spider-crawler-exec.jar
+ENTRYPOINT ["java","-Djava.security.egd=file:/dev/./urandom","-jar","/app/spider-crawler-exec.jar"]

+ 1 - 0
crawler/README.md

@@ -0,0 +1 @@
+# crawler

+ 5 - 0
crawler/bin/shutdown.sh

@@ -0,0 +1,5 @@
+#!/bin/bash
+
+app='spider-crawler-exec.jar'
+pid=`jps | grep ${app} | awk '{print $1}'`
+kill -15 ${pid}

+ 5 - 0
crawler/bin/start.sh

@@ -0,0 +1,5 @@
+#!/bin/bash
+
+app='spider-crawler-exec.jar'
+app_path=`pwd`/${app}
+nohup java -jar ${app_path} > console.log 2>&1 &

+ 51 - 0
crawler/dependency-reduced-pom.xml

@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <parent>
+    <artifactId>bnt</artifactId>
+    <groupId>cn.reghao.bnt</groupId>
+    <version>1.0.0</version>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+  <artifactId>crawler</artifactId>
+  <version>1.0.0-SNAPSHOT</version>
+  <build>
+    <finalName>bnt-crawler</finalName>
+    <plugins>
+      <plugin>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.8.1</version>
+        <configuration>
+          <source>11</source>
+          <target>11</target>
+        </configuration>
+      </plugin>
+      <plugin>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>3.2.4</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <transformers>
+                <transformer>
+                  <mainClass>cn.reghao.bnt.crawler.SpiderCrawler</mainClass>
+                </transformer>
+              </transformers>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+  <dependencies>
+    <dependency>
+      <groupId>org.projectlombok</groupId>
+      <artifactId>lombok</artifactId>
+      <version>1.18.0</version>
+      <scope>provided</scope>
+    </dependency>
+  </dependencies>
+</project>

+ 58 - 0
crawler/pom.xml

@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>bnt</artifactId>
+        <groupId>cn.reghao.bnt</groupId>
+        <version>1.0.0</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>crawler</artifactId>
+    <version>1.0.0-SNAPSHOT</version>
+
+    <dependencies>
+        <dependency>
+            <groupId>cn.reghao.bnt</groupId>
+            <artifactId>core</artifactId>
+            <version>1.0.0-SNAPSHOT</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <finalName>bnt-crawler</finalName>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.8.1</version>
+                <configuration>
+                    <source>11</source>
+                    <target>11</target>
+                </configuration>
+            </plugin>
+
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.2.4</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <transformers>
+                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                                    <mainClass>cn.reghao.bnt.crawler.SpiderCrawler</mainClass>
+                                </transformer>
+                            </transformers>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>

+ 74 - 0
crawler/src/main/java/cn/reghao/bnt/crawler/Crawler.java

@@ -0,0 +1,74 @@
+package cn.reghao.bnt.crawler;
+
+import cn.reghao.bnt.crawler.task.SiteParserGetter;
+import cn.reghao.jutil.jdk.thread.ThreadPoolWrapper;
+import cn.reghao.bnt.core.http.CrawlRequest;
+import cn.reghao.bnt.core.http.DefaultCrawlRequest;
+import cn.reghao.bnt.core.parser.SiteParser;
+import cn.reghao.bnt.core.url.UrlScheduler;
+import cn.reghao.bnt.crawler.impl.HttpUrlScheduler;
+import cn.reghao.bnt.crawler.task.DataProducer;
+
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * @author reghao
+ * @date 2022-02-25 16:55:41
+ */
+public class Crawler {
+    private final CrawlRequest crawlRequest;
+    private final UrlScheduler urlScheduler;
+    private final ScheduledExecutorService crawlerScheduler;
+    private final Map<SiteParser, ScheduledFuture<?>> map;
+
+    public Crawler(String managerUrl, int maxThread) {
+        this.crawlRequest = new DefaultCrawlRequest();
+        this.urlScheduler = new HttpUrlScheduler(managerUrl);
+        this.crawlerScheduler = ThreadPoolWrapper.scheduledThreadPool("crawler", maxThread);
+        this.map = new ConcurrentHashMap<>();
+    }
+
+    public ScheduledExecutorService getCrawlerScheduler() {
+        return crawlerScheduler;
+    }
+
+    public CrawlRequest getCrawlRequest() {
+        return crawlRequest;
+    }
+
+    public UrlScheduler getUrlScheduler() {
+        return urlScheduler;
+    }
+
+    public void start() {
+        SiteParserGetter siteParserGetter = new SiteParserGetter(urlScheduler, this);
+        crawlerScheduler.scheduleAtFixedRate(siteParserGetter, 0, 1, TimeUnit.MINUTES);
+    }
+
+    public void start(SiteParser siteParser) {
+        if (map.get(siteParser) == null) {
+            DataProducer dataProducer = new DataProducer(this, siteParser);
+            ScheduledFuture<?> future = crawlerScheduler.scheduleAtFixedRate(dataProducer, 1, 3, TimeUnit.SECONDS);
+            map.put(siteParser, future);
+        }
+    }
+
+    public void stop() {
+        map.forEach(((siteParser, scheduledFuture) -> {
+            scheduledFuture.cancel(true);
+        }));
+        map.clear();
+    }
+
+    public void stop(SiteParser siteParser) {
+        ScheduledFuture<?> future = map.get(siteParser);
+        if (future != null) {
+            future.cancel(true);
+        }
+        map.remove(siteParser);
+    }
+}

+ 82 - 0
crawler/src/main/java/cn/reghao/bnt/crawler/CrawlerContext.java

@@ -0,0 +1,82 @@
+package cn.reghao.bnt.crawler;
+
+import cn.reghao.jutil.jdk.thread.ThreadPoolWrapper;
+import cn.reghao.jutil.jdk.util.SingleInstance;
+
+import java.util.logging.Logger;
+
+/**
+ * 数据爬取上下文
+ *
+ * @author reghao
+ * @date 2019-11-29 17:34:01
+ */
+public class CrawlerContext {
+    private static final Logger log = Logger.getLogger(CrawlerContext.class.getName());
+
+    private final Crawler crawler;
+
+    public CrawlerContext(Crawler crawler) {
+        this.crawler = crawler;
+    }
+
+    /**
+     * 开始数据爬取任务
+     *
+     * @param
+     * @return
+     * @date 2020-03-16 下午8:35
+     */
+    public void start() {
+        crawler.start();
+        stopGracefully();
+        keepMainRunning();
+    }
+
+    /**
+     * 主线程保持运行状态
+     *
+     * @param
+     * @return
+     * @date 2021-08-09 上午12:12
+     */
+    private void keepMainRunning() {
+        SingleInstance.onlyOne(60002);
+    }
+
+    /**
+     * 优雅关闭爬虫(进程级别的关闭)
+     *
+     * @date 2019-08-16 下午3:45
+     */
+    private void stopGracefully() {
+        Runtime.getRuntime().addShutdownHook(new Thread(new ShutdownHook(), "main-shutdown-hook"));
+    }
+
+    /**
+     * 1.可以处理 kill -2 或 kill -15
+     * 2.无法处理 kill -9 和机器断电的情况
+     *
+     * @date 2019-08-16 下午4:07
+     */
+    class ShutdownHook implements Runnable {
+        @Override
+        public void run() {
+            stop();
+        }
+    }
+
+    /**
+     * 1.停止从 UrlScheduler 中获取数据
+     * 2.停止向 DataProducer 提交数据
+     * 3.线程池停止接受新任务,并等待当前执行的任务完成
+     *
+     * @param
+     * @return
+     * @date 2021-04-02 下午11:26
+     */
+    private void stop() {
+        ThreadPoolWrapper.shutdownScheduler(crawler.getCrawlerScheduler());
+        log.info("resource cleared and terminate SpiderCrawler...");
+    }
+}

+ 21 - 0
crawler/src/main/java/cn/reghao/bnt/crawler/SpiderCrawler.java

@@ -0,0 +1,21 @@
+package cn.reghao.bnt.crawler;
+
+import java.util.logging.Logger;
+
+/**
+ * 数据爬取应用
+ *
+ * @author reghao
+ * @date 2020-04-15 09:46:09
+ */
+public class SpiderCrawler {
+    private static final Logger log = Logger.getLogger(SpiderCrawler.class.getName());
+
+    public static void main(String[] args) {
+        String managerUrl = "http://spider.reghao.cn";
+        final Crawler crawler = new Crawler(managerUrl, 10);
+        CrawlerContext crawlerContext = new CrawlerContext(crawler);
+        log.info("starting SpiderCralwer...");
+        //crawlerContext.start();
+    }
+}

+ 115 - 0
crawler/src/main/java/cn/reghao/bnt/crawler/impl/HttpUrlScheduler.java

@@ -0,0 +1,115 @@
+package cn.reghao.bnt.crawler.impl;
+
+import cn.reghao.jutil.jdk.serializer.JsonConverter;
+import cn.reghao.bnt.core.url.UrlScheduler;
+import cn.reghao.bnt.core.event.EvtCrawl;
+import cn.reghao.bnt.core.url.RawData;
+import cn.reghao.bnt.core.url.CrawlUrl;
+import cn.reghao.bnt.core.parser.SiteParser;
+import com.google.gson.JsonObject;
+
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.time.Duration;
+import java.util.*;
+
+/**
+ * @author reghao
+ * @date 2022-05-25 23:44:25
+ */
+public class HttpUrlScheduler implements UrlScheduler {
+    private final String managerUrl;
+    private final HttpClient client = HttpClient.newBuilder()
+            .version(HttpClient.Version.HTTP_1_1)
+            .build();
+    
+    public HttpUrlScheduler(String managerUrl) {
+        this.managerUrl = managerUrl;
+    }
+
+    @Override
+    public List<SiteParser> getSiteParsers() {
+        try {
+            HttpRequest request = HttpRequest.newBuilder()
+                    .uri(URI.create(managerUrl + "/api/crawler/siteparsers"))
+                    .timeout(Duration.ofSeconds(30))
+                    .GET()
+                    .build();
+            HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
+            int statusCode = response.statusCode();
+            if (statusCode == 200) {
+                String json = response.body();
+                JsonObject jsonObject = JsonConverter.jsonToJsonElement(json).getAsJsonObject();
+                int code = jsonObject.get("code").getAsInt();
+                if (code == 0) {
+                    String json1 = jsonObject.get("data").toString();
+                    List<SiteParser> list = JsonConverter.jsonToObjects(json1, SiteParser.class);
+                    return list;
+                }
+            }
+        } catch (Exception ignore) {
+        }
+        return Collections.emptyList();
+    }
+
+    @Override
+    public Set<CrawlUrl> getUrls(String site, String parser) {
+        String url = String.format(managerUrl + "/api/crawler/urls?site=%s&parser=%s", site, parser);
+        HttpRequest request = HttpRequest.newBuilder()
+                .uri(URI.create(url))
+                .timeout(Duration.ofSeconds(30 ))
+                .GET()
+                .build();
+        try {
+            HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
+            int statusCode = response.statusCode();
+            if (statusCode == 200) {
+                String json = response.body();
+                JsonObject jsonObject = JsonConverter.jsonToJsonElement(json).getAsJsonObject();
+                int code = jsonObject.get("code").getAsInt();
+                if (code == 0) {
+                    String json1 = jsonObject.get("data").toString();
+                    List<CrawlUrl> list = JsonConverter.jsonToObjects(json1, CrawlUrl.class);
+                    return new HashSet<>(list);
+                }
+            }
+        } catch (Exception ignore) {
+        }
+
+        return Collections.emptySet();
+    }
+
+    @Override
+    public void putData(RawData rawData) {
+        String jsonPayload = JsonConverter.objectToJson(rawData);
+        HttpRequest request = HttpRequest.newBuilder()
+                .uri(URI.create(managerUrl + "/api/crawler/rawdata"))
+                .timeout(Duration.ofSeconds(30))
+                .header("Content-Type", "application/json")
+                .POST(HttpRequest.BodyPublishers.ofString(jsonPayload))
+                .build();
+        try {
+            HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
+            int statusCode = response.statusCode();
+        } catch (Exception ignore) {
+        }
+    }
+
+    @Override
+    public void crawlEvent(EvtCrawl evtCrawl) {
+        String jsonPayload = JsonConverter.objectToJson(evtCrawl);
+        HttpRequest request = HttpRequest.newBuilder()
+                .uri(URI.create(managerUrl + "/api/crawler/event/crawl"))
+                .timeout(Duration.ofSeconds(30))
+                .header("Content-Type", "application/json")
+                .POST(HttpRequest.BodyPublishers.ofString(jsonPayload))
+                .build();
+        try {
+            HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
+            int statusCode = response.statusCode();
+        } catch (Exception ignore) {
+        }
+    }
+}

+ 104 - 0
crawler/src/main/java/cn/reghao/bnt/crawler/task/DataProducer.java

@@ -0,0 +1,104 @@
+package cn.reghao.bnt.crawler.task;
+
+import cn.reghao.bnt.crawler.Crawler;
+import cn.reghao.bnt.core.url.RawData;
+import cn.reghao.bnt.core.event.EvtCrawl;
+import cn.reghao.bnt.core.http.CrawlRequest;
+import cn.reghao.bnt.core.http.CrawlResponse;
+import cn.reghao.bnt.core.url.CrawlUrl;
+import cn.reghao.bnt.core.parser.SiteParser;
+import cn.reghao.bnt.core.url.UrlScheduler;
+
+import java.util.LinkedList;
+import java.util.Queue;
+import java.util.Set;
+import java.util.logging.Logger;
+
+/**
+ * @author reghao
+ * @date 2022-05-18 11:38:16
+ */
+public class DataProducer implements Runnable {
+    private static final Logger log = Logger.getLogger(DataProducer.class.getName());
+
+    private final Crawler crawler;
+    private final SiteParser siteParser;
+    private final CrawlRequest crawlRequest;
+    private final UrlScheduler urlScheduler;
+    private int failedCount;
+    private boolean antiAntiCrawl = false;
+    private final Queue<CrawlUrl> cachedCrawlUrls = new LinkedList<>();
+
+    public DataProducer(Crawler crawler, SiteParser siteParser) {
+        this.crawler = crawler;
+        this.siteParser = siteParser;
+        this.urlScheduler = crawler.getUrlScheduler();
+        this.crawlRequest = crawler.getCrawlRequest();
+    }
+
+    @Override
+    public void run() {
+        String site = siteParser.getSite();
+        String parser = siteParser.getParser();
+        try {
+            if (antiAntiCrawl()) {
+                return;
+            }
+
+            CrawlUrl crawlUrl = cachedCrawlUrls.poll();
+            if (crawlUrl == null) {
+                Set<CrawlUrl> set = urlScheduler.getUrls(site, parser);
+                if (!set.isEmpty()) {
+                    cachedCrawlUrls.addAll(set);
+                } else {
+                    crawler.stop(siteParser);
+                }
+                return;
+            }
+            crawl(crawlUrl);
+        } catch (Exception ignore) {
+        }
+    }
+
+    /**
+     * 反反爬虫策略
+     *
+     * @param
+     * @return
+     * @date 2021-08-13 下午2:47
+     */
+    private boolean antiAntiCrawl() {
+        if (failedCount > 10 && !antiAntiCrawl) {
+            failedCount = failedCount * 10;
+            antiAntiCrawl = true;
+        }
+
+        if (failedCount < 0) {
+            antiAntiCrawl = false;
+        }
+
+        if (antiAntiCrawl) {
+            log.severe(String.format("执行 %s 的反反爬虫策略, 还需跳过 %s 次请求", siteParser.getSite(), failedCount));
+            failedCount--;
+        }
+
+        return antiAntiCrawl;
+    }
+
+    private void crawl(CrawlUrl crawlUrl) {
+        EvtCrawl evtCrawl = new EvtCrawl(siteParser.getSite(), siteParser.getParser(), crawlUrl.getUrl());
+        CrawlResponse response = crawlRequest.get(crawlUrl);
+        int statusCode = response.getStatusCode();
+        if (statusCode == 200) {
+            urlScheduler.putData(new RawData(crawlUrl, response.getBody()));
+            urlScheduler.crawlEvent(evtCrawl);
+        } else if (statusCode == 404 || statusCode == crawlUrl.getNotFoundCode()) {
+            urlScheduler.putData(new RawData(crawlUrl, "404"));
+            urlScheduler.crawlEvent(evtCrawl);
+        } else {
+            failedCount++;
+            evtCrawl.setMsg(response.getBody());
+            urlScheduler.crawlEvent(evtCrawl);
+        }
+    }
+}

+ 23 - 0
crawler/src/main/java/cn/reghao/bnt/crawler/task/SiteParserGetter.java

@@ -0,0 +1,23 @@
+package cn.reghao.bnt.crawler.task;
+
+import cn.reghao.bnt.core.url.UrlScheduler;
+import cn.reghao.bnt.crawler.Crawler;
+
+/**
+ * @author reghao
+ * @date 2022-05-18 11:43:36
+ */
+public class SiteParserGetter implements Runnable {
+    private final UrlScheduler urlScheduler;
+    private final Crawler crawler;
+
+    public SiteParserGetter(UrlScheduler urlScheduler, Crawler crawler) {
+        this.urlScheduler = urlScheduler;
+        this.crawler = crawler;
+    }
+
+    @Override
+    public void run() {
+        urlScheduler.getSiteParsers().forEach(crawler::start);
+    }
+}

+ 3 - 0
pom.xml

@@ -13,6 +13,9 @@
         <module>agent</module>
         <module>logstash</module>
         <module>deployer</module>
+        <module>core</module>
+        <module>browser</module>
+        <module>crawler</module>
     </modules>
     <packaging>pom</packaging>
 

+ 82 - 0
web/pom.xml

@@ -163,6 +163,87 @@
             <artifactId>commons-io</artifactId>
             <version>2.6</version>
         </dependency>
+
+        <!-- spider 依赖-->
+        <dependency>
+            <groupId>cn.reghao.jutil</groupId>
+            <artifactId>media</artifactId>
+            <version>1.0.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>cn.reghao.bnt</groupId>
+            <artifactId>core</artifactId>
+            <version>1.0.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>cn.reghao.bnt</groupId>
+            <artifactId>browser</artifactId>
+            <version>1.0.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>cn.reghao.oss</groupId>
+            <artifactId>oss-sdk</artifactId>
+            <version>1.0.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>cn.reghao.tnb.account</groupId>
+            <artifactId>account-api</artifactId>
+            <version>1.0.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>cn.reghao.tnb.content</groupId>
+            <artifactId>content-api</artifactId>
+            <version>1.0.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>cn.reghao.tnb.data</groupId>
+            <artifactId>data-api</artifactId>
+            <version>1.0.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>cn.reghao.tnb.user</groupId>
+            <artifactId>user-api</artifactId>
+            <version>1.0.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>cn.reghao.tnb.mall</groupId>
+            <artifactId>mall-api</artifactId>
+            <version>1.0.0-SNAPSHOT</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.drewnoakes</groupId>
+            <artifactId>metadata-extractor</artifactId>
+            <version>2.18.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-imaging</artifactId>
+            <version>1.0-alpha3</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter-data-mongodb</artifactId>
+            <version>2.3.9.RELEASE</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.jsoup</groupId>
+            <artifactId>jsoup</artifactId>
+            <version>1.12.1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.dom4j</groupId>
+            <artifactId>dom4j</artifactId>
+            <version>2.1.1</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.google.protobuf</groupId>
+            <artifactId>protobuf-java</artifactId>
+            <version>3.20.0-rc-1</version>
+        </dependency>
     </dependencies>
 
     <profiles>
@@ -190,6 +271,7 @@
                 <directory>src/main/resources</directory>
                 <filtering>true</filtering>
                 <includes>
+                    <include>banner.txt</include>
                     <include>application.yml</include>
                     <include>application-${profile.active}.yml</include>
                     <include>logback-spring.xml</include>

+ 17 - 0
web/src/main/java/cn/reghao/bnt/web/SpringApplication.java

@@ -1,7 +1,11 @@
 package cn.reghao.bnt.web;
 
+import cn.reghao.bnt.web.config.AppProperties;
+import cn.reghao.bnt.web.parser.util.FileCache;
+import cn.reghao.oss.sdk.model.OssConsoleConfig;
 import org.springframework.boot.autoconfigure.SpringBootApplication;
 import org.springframework.boot.autoconfigure.domain.EntityScan;
+import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.ComponentScan;
 import org.springframework.data.jpa.repository.config.EnableJpaRepositories;
 
@@ -13,4 +17,17 @@ public class SpringApplication {
 	public static void main(String[] args) {
 		org.springframework.boot.SpringApplication.run(SpringApplication.class, args);
 	}
+
+	@Bean
+	public FileCache fileCache(AppProperties appProperties) {
+		return new FileCache(appProperties.getCacheDir());
+	}
+
+	@Bean
+	public OssConsoleConfig ossConsoleConfig() {
+		String endpoint = System.getenv("CONSOLE_ENDPOINT");
+		String accessKeyId = System.getenv("CONSOLE_ACCESS_KEY_ID");
+		String accessKeySecret = System.getenv("CONSOLE_ACCESS_KEY_SECRET");
+		return new OssConsoleConfig(endpoint, accessKeyId, accessKeySecret);
+	}
 }

+ 20 - 0
web/src/main/java/cn/reghao/bnt/web/config/AppProperties.java

@@ -0,0 +1,20 @@
+package cn.reghao.bnt.web.config;
+
+import lombok.Getter;
+import lombok.Setter;
+import org.springframework.boot.context.properties.ConfigurationProperties;
+import org.springframework.context.annotation.Configuration;
+
+/**
+ * @author reghao
+ * @date 2021-12-30 11:01:46
+ */
+@Getter
+@Setter
+@Configuration
+@ConfigurationProperties(prefix = "app")
+public class AppProperties {
+    private String ossEndpoint;
+    private String cookiePath;
+    private String cacheDir;
+}

+ 64 - 0
web/src/main/java/cn/reghao/bnt/web/config/MongoConfig.java

@@ -0,0 +1,64 @@
+package cn.reghao.bnt.web.config;
+
+import com.mongodb.MongoClientSettings;
+import com.mongodb.MongoCredential;
+import com.mongodb.ServerAddress;
+import com.mongodb.client.MongoClient;
+import com.mongodb.client.MongoClients;
+import org.springframework.boot.autoconfigure.mongo.MongoProperties;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.data.mongodb.MongoDatabaseFactory;
+import org.springframework.data.mongodb.core.SimpleMongoClientDatabaseFactory;
+import org.springframework.data.mongodb.core.convert.DbRefResolver;
+import org.springframework.data.mongodb.core.convert.DefaultDbRefResolver;
+import org.springframework.data.mongodb.core.convert.MappingMongoConverter;
+import org.springframework.data.mongodb.core.convert.MongoCustomConversions;
+import org.springframework.data.mongodb.core.mapping.MongoMappingContext;
+
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * MongoDB 客户端配置
+ *
+ * @author reghao
+ * @date 2022-01-27 20:12:54
+ */
+@Configuration
+public class MongoConfig {
+    @Bean
+    public MappingMongoConverter mongoConverter(MongoDatabaseFactory dbFactory, MongoMappingContext context,
+                                                MongoCustomConversions conversions) {
+        DbRefResolver dbRefResolver = new DefaultDbRefResolver(dbFactory);
+        MappingMongoConverter mappingConverter = new MappingMongoConverter(dbRefResolver, context);
+        // remove _class field
+        // mappingConverter.setTypeMapper(new DefaultMongoTypeMapper(null));
+        mappingConverter.setCustomConversions(conversions);
+        return mappingConverter;
+    }
+
+    @Bean
+    public MongoDatabaseFactory mongoDbFactory(MongoProperties mongoProperties) {
+        ServerAddress serverAddress = new ServerAddress(mongoProperties.getHost());
+        MongoClientSettings mongoClientSettings = MongoClientSettings.builder()
+                .applyToSocketSettings(builder ->
+                        builder.readTimeout(0, TimeUnit.SECONDS)
+                                .connectTimeout(0, TimeUnit.SECONDS))
+                .applyToConnectionPoolSettings(builder ->
+                        builder.minSize(10).maxSize(20)
+                                .maxConnectionIdleTime(0, TimeUnit.SECONDS)
+                                .maxConnectionLifeTime(0, TimeUnit.SECONDS))
+                .credential(MongoCredential.createCredential(mongoProperties.getUsername(),
+                        mongoProperties.getAuthenticationDatabase(), mongoProperties.getPassword()))
+                //.applicationName("SpiderParser")
+                .applyToServerSettings(builder ->
+                        builder.minHeartbeatFrequency(10, TimeUnit.SECONDS)
+                                .heartbeatFrequency(20, TimeUnit.SECONDS))
+                .applyToClusterSettings(builder -> builder.hosts(List.of(serverAddress)))
+                .build();
+
+        MongoClient mongoClient = MongoClients.create(mongoClientSettings);
+        return new SimpleMongoClientDatabaseFactory(mongoClient, mongoProperties.getDatabase());
+    }
+}

+ 37 - 0
web/src/main/java/cn/reghao/bnt/web/config/OssConsoleClientFactory.java

@@ -0,0 +1,37 @@
+package cn.reghao.bnt.web.config;
+
+import cn.reghao.oss.sdk.OssConsoleClient;
+import cn.reghao.oss.sdk.model.OssConsoleConfig;
+import org.springframework.stereotype.Component;
+
+/**
+ * @author reghao
+ * @date 2024-02-24 17:41:42
+ */
+@Component
+public class OssConsoleClientFactory {
+    private OssConsoleClient ossConsoleClient;
+    private final OssConsoleConfig ossConsoleConfig;
+
+    public OssConsoleClientFactory(OssConsoleConfig ossConsoleConfig) {
+        this.ossConsoleConfig = ossConsoleConfig;
+    }
+
+    public OssConsoleClient getOssConsoleClient() {
+        if (ossConsoleClient == null) {
+            ossConsoleClient = get();
+        }
+
+        return ossConsoleClient;
+    }
+
+    private OssConsoleClient get() {
+        try {
+            return new OssConsoleClient(ossConsoleConfig);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+
+        return null;
+    }
+}

+ 2 - 0
web/src/main/java/cn/reghao/bnt/web/config/spring/BeansConfig.java

@@ -3,6 +3,7 @@ package cn.reghao.bnt.web.config.spring;
 import cn.reghao.jutil.jdk.converter.ByteConverter;
 import cn.reghao.jutil.jdk.http.WebClient;
 import cn.reghao.jutil.jdk.http.WebRequest;
+import cn.reghao.jutil.tool.http.DefaultWebRequest;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
 import oshi.SystemInfo;
@@ -20,6 +21,7 @@ public class BeansConfig {
 
     @Bean
     public WebRequest webRequest() {
+        //return new DefaultWebRequest();
         return new WebClient();
     }
 

+ 2442 - 0
web/src/main/java/cn/reghao/bnt/web/parser/autogen/BiliDm.java

@@ -0,0 +1,2442 @@
+// Generated by the protocol buffer compiler.  DO NOT EDIT!
+// source: dm.proto
+
+package cn.reghao.bnt.web.parser.autogen;
+
+public final class BiliDm {
+  private BiliDm() {}
+  public static void registerAllExtensions(
+      com.google.protobuf.ExtensionRegistryLite registry) {
+  }
+
+  public static void registerAllExtensions(
+      com.google.protobuf.ExtensionRegistry registry) {
+    registerAllExtensions(
+        (com.google.protobuf.ExtensionRegistryLite) registry);
+  }
+  public interface DmSegMobileReplyOrBuilder extends
+      // @@protoc_insertion_point(interface_extends:DmSegMobileReply)
+      com.google.protobuf.MessageOrBuilder {
+
+    /**
+     * <code>repeated .DanmakuElem elems = 1;</code>
+     */
+    java.util.List<DanmakuElem>
+        getElemsList();
+    /**
+     * <code>repeated .DanmakuElem elems = 1;</code>
+     */
+    DanmakuElem getElems(int index);
+    /**
+     * <code>repeated .DanmakuElem elems = 1;</code>
+     */
+    int getElemsCount();
+    /**
+     * <code>repeated .DanmakuElem elems = 1;</code>
+     */
+    java.util.List<? extends DanmakuElemOrBuilder>
+        getElemsOrBuilderList();
+    /**
+     * <code>repeated .DanmakuElem elems = 1;</code>
+     */
+    DanmakuElemOrBuilder getElemsOrBuilder(
+        int index);
+  }
+  /**
+   * Protobuf type {@code DmSegMobileReply}
+   */
+  public static final class DmSegMobileReply extends
+      com.google.protobuf.GeneratedMessageV3 implements
+      // @@protoc_insertion_point(message_implements:DmSegMobileReply)
+      DmSegMobileReplyOrBuilder {
+  private static final long serialVersionUID = 0L;
+    // Use DmSegMobileReply.newBuilder() to construct.
+    private DmSegMobileReply(com.google.protobuf.GeneratedMessageV3.Builder<?> builder) {
+      super(builder);
+    }
+    private DmSegMobileReply() {
+      elems_ = java.util.Collections.emptyList();
+    }
+
+    @Override
+    @SuppressWarnings({"unused"})
+    protected Object newInstance(
+        UnusedPrivateParameter unused) {
+      return new DmSegMobileReply();
+    }
+
+    @Override
+    public final com.google.protobuf.UnknownFieldSet
+    getUnknownFields() {
+      return this.unknownFields;
+    }
+    private DmSegMobileReply(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      this();
+      if (extensionRegistry == null) {
+        throw new NullPointerException();
+      }
+      int mutable_bitField0_ = 0;
+      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
+          com.google.protobuf.UnknownFieldSet.newBuilder();
+      try {
+        boolean done = false;
+        while (!done) {
+          int tag = input.readTag();
+          switch (tag) {
+            case 0:
+              done = true;
+              break;
+            case 10: {
+              if (!((mutable_bitField0_ & 0x00000001) != 0)) {
+                elems_ = new java.util.ArrayList<DanmakuElem>();
+                mutable_bitField0_ |= 0x00000001;
+              }
+              elems_.add(
+                  input.readMessage(DanmakuElem.parser(), extensionRegistry));
+              break;
+            }
+            default: {
+              if (!parseUnknownField(
+                  input, unknownFields, extensionRegistry, tag)) {
+                done = true;
+              }
+              break;
+            }
+          }
+        }
+      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(this);
+      } catch (com.google.protobuf.UninitializedMessageException e) {
+        throw e.asInvalidProtocolBufferException().setUnfinishedMessage(this);
+      } catch (java.io.IOException e) {
+        throw new com.google.protobuf.InvalidProtocolBufferException(
+            e).setUnfinishedMessage(this);
+      } finally {
+        if (((mutable_bitField0_ & 0x00000001) != 0)) {
+          elems_ = java.util.Collections.unmodifiableList(elems_);
+        }
+        this.unknownFields = unknownFields.build();
+        makeExtensionsImmutable();
+      }
+    }
+    public static final com.google.protobuf.Descriptors.Descriptor
+        getDescriptor() {
+      return BiliDm.internal_static_DmSegMobileReply_descriptor;
+    }
+
+    @Override
+    protected FieldAccessorTable
+        internalGetFieldAccessorTable() {
+      return BiliDm.internal_static_DmSegMobileReply_fieldAccessorTable
+          .ensureFieldAccessorsInitialized(
+              DmSegMobileReply.class, Builder.class);
+    }
+
+    public static final int ELEMS_FIELD_NUMBER = 1;
+    private java.util.List<DanmakuElem> elems_;
+    /**
+     * <code>repeated .DanmakuElem elems = 1;</code>
+     */
+    @Override
+    public java.util.List<DanmakuElem> getElemsList() {
+      return elems_;
+    }
+    /**
+     * <code>repeated .DanmakuElem elems = 1;</code>
+     */
+    @Override
+    public java.util.List<? extends DanmakuElemOrBuilder>
+        getElemsOrBuilderList() {
+      return elems_;
+    }
+    /**
+     * <code>repeated .DanmakuElem elems = 1;</code>
+     */
+    @Override
+    public int getElemsCount() {
+      return elems_.size();
+    }
+    /**
+     * <code>repeated .DanmakuElem elems = 1;</code>
+     */
+    @Override
+    public DanmakuElem getElems(int index) {
+      return elems_.get(index);
+    }
+    /**
+     * <code>repeated .DanmakuElem elems = 1;</code>
+     */
+    @Override
+    public DanmakuElemOrBuilder getElemsOrBuilder(
+        int index) {
+      return elems_.get(index);
+    }
+
+    private byte memoizedIsInitialized = -1;
+    @Override
+    public final boolean isInitialized() {
+      byte isInitialized = memoizedIsInitialized;
+      if (isInitialized == 1) return true;
+      if (isInitialized == 0) return false;
+
+      memoizedIsInitialized = 1;
+      return true;
+    }
+
+    @Override
+    public void writeTo(com.google.protobuf.CodedOutputStream output)
+                        throws java.io.IOException {
+      for (int i = 0; i < elems_.size(); i++) {
+        output.writeMessage(1, elems_.get(i));
+      }
+      unknownFields.writeTo(output);
+    }
+
+    @Override
+    public int getSerializedSize() {
+      int size = memoizedSize;
+      if (size != -1) return size;
+
+      size = 0;
+      for (int i = 0; i < elems_.size(); i++) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeMessageSize(1, elems_.get(i));
+      }
+      size += unknownFields.getSerializedSize();
+      memoizedSize = size;
+      return size;
+    }
+
+    @Override
+    public boolean equals(final Object obj) {
+      if (obj == this) {
+       return true;
+      }
+      if (!(obj instanceof DmSegMobileReply)) {
+        return super.equals(obj);
+      }
+      DmSegMobileReply other = (DmSegMobileReply) obj;
+
+      if (!getElemsList()
+          .equals(other.getElemsList())) return false;
+      if (!unknownFields.equals(other.unknownFields)) return false;
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      if (memoizedHashCode != 0) {
+        return memoizedHashCode;
+      }
+      int hash = 41;
+      hash = (19 * hash) + getDescriptor().hashCode();
+      if (getElemsCount() > 0) {
+        hash = (37 * hash) + ELEMS_FIELD_NUMBER;
+        hash = (53 * hash) + getElemsList().hashCode();
+      }
+      hash = (29 * hash) + unknownFields.hashCode();
+      memoizedHashCode = hash;
+      return hash;
+    }
+
+    public static DmSegMobileReply parseFrom(
+        java.nio.ByteBuffer data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static DmSegMobileReply parseFrom(
+        java.nio.ByteBuffer data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static DmSegMobileReply parseFrom(
+        com.google.protobuf.ByteString data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static DmSegMobileReply parseFrom(
+        com.google.protobuf.ByteString data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static DmSegMobileReply parseFrom(byte[] data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static DmSegMobileReply parseFrom(
+        byte[] data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static DmSegMobileReply parseFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return com.google.protobuf.GeneratedMessageV3
+          .parseWithIOException(PARSER, input);
+    }
+    public static DmSegMobileReply parseFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return com.google.protobuf.GeneratedMessageV3
+          .parseWithIOException(PARSER, input, extensionRegistry);
+    }
+    public static DmSegMobileReply parseDelimitedFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return com.google.protobuf.GeneratedMessageV3
+          .parseDelimitedWithIOException(PARSER, input);
+    }
+    public static DmSegMobileReply parseDelimitedFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return com.google.protobuf.GeneratedMessageV3
+          .parseDelimitedWithIOException(PARSER, input, extensionRegistry);
+    }
+    public static DmSegMobileReply parseFrom(
+        com.google.protobuf.CodedInputStream input)
+        throws java.io.IOException {
+      return com.google.protobuf.GeneratedMessageV3
+          .parseWithIOException(PARSER, input);
+    }
+    public static DmSegMobileReply parseFrom(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return com.google.protobuf.GeneratedMessageV3
+          .parseWithIOException(PARSER, input, extensionRegistry);
+    }
+
+    @Override
+    public Builder newBuilderForType() { return newBuilder(); }
+    public static Builder newBuilder() {
+      return DEFAULT_INSTANCE.toBuilder();
+    }
+    public static Builder newBuilder(DmSegMobileReply prototype) {
+      return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
+    }
+    @Override
+    public Builder toBuilder() {
+      return this == DEFAULT_INSTANCE
+          ? new Builder() : new Builder().mergeFrom(this);
+    }
+
+    @Override
+    protected Builder newBuilderForType(
+        BuilderParent parent) {
+      Builder builder = new Builder(parent);
+      return builder;
+    }
+    /**
+     * Protobuf type {@code DmSegMobileReply}
+     */
+    public static final class Builder extends
+        com.google.protobuf.GeneratedMessageV3.Builder<Builder> implements
+        // @@protoc_insertion_point(builder_implements:DmSegMobileReply)
+        DmSegMobileReplyOrBuilder {
+      public static final com.google.protobuf.Descriptors.Descriptor
+          getDescriptor() {
+        return BiliDm.internal_static_DmSegMobileReply_descriptor;
+      }
+
+      @Override
+      protected FieldAccessorTable
+          internalGetFieldAccessorTable() {
+        return BiliDm.internal_static_DmSegMobileReply_fieldAccessorTable
+            .ensureFieldAccessorsInitialized(
+                DmSegMobileReply.class, Builder.class);
+      }
+
+      // Construct using cn.reghao.webspider.parser.autogen.BiliDm.DmSegMobileReply.newBuilder()
+      private Builder() {
+        maybeForceBuilderInitialization();
+      }
+
+      private Builder(
+          BuilderParent parent) {
+        super(parent);
+        maybeForceBuilderInitialization();
+      }
+      private void maybeForceBuilderInitialization() {
+        if (com.google.protobuf.GeneratedMessageV3
+                .alwaysUseFieldBuilders) {
+          getElemsFieldBuilder();
+        }
+      }
+      @Override
+      public Builder clear() {
+        super.clear();
+        if (elemsBuilder_ == null) {
+          elems_ = java.util.Collections.emptyList();
+          bitField0_ = (bitField0_ & ~0x00000001);
+        } else {
+          elemsBuilder_.clear();
+        }
+        return this;
+      }
+
+      @Override
+      public com.google.protobuf.Descriptors.Descriptor
+          getDescriptorForType() {
+        return BiliDm.internal_static_DmSegMobileReply_descriptor;
+      }
+
+      @Override
+      public DmSegMobileReply getDefaultInstanceForType() {
+        return DmSegMobileReply.getDefaultInstance();
+      }
+
+      @Override
+      public DmSegMobileReply build() {
+        DmSegMobileReply result = buildPartial();
+        if (!result.isInitialized()) {
+          throw newUninitializedMessageException(result);
+        }
+        return result;
+      }
+
+      @Override
+      public DmSegMobileReply buildPartial() {
+        DmSegMobileReply result = new DmSegMobileReply(this);
+        int from_bitField0_ = bitField0_;
+        if (elemsBuilder_ == null) {
+          if (((bitField0_ & 0x00000001) != 0)) {
+            elems_ = java.util.Collections.unmodifiableList(elems_);
+            bitField0_ = (bitField0_ & ~0x00000001);
+          }
+          result.elems_ = elems_;
+        } else {
+          result.elems_ = elemsBuilder_.build();
+        }
+        onBuilt();
+        return result;
+      }
+
+      @Override
+      public Builder clone() {
+        return super.clone();
+      }
+      @Override
+      public Builder setField(
+          com.google.protobuf.Descriptors.FieldDescriptor field,
+          Object value) {
+        return super.setField(field, value);
+      }
+      @Override
+      public Builder clearField(
+          com.google.protobuf.Descriptors.FieldDescriptor field) {
+        return super.clearField(field);
+      }
+      @Override
+      public Builder clearOneof(
+          com.google.protobuf.Descriptors.OneofDescriptor oneof) {
+        return super.clearOneof(oneof);
+      }
+      @Override
+      public Builder setRepeatedField(
+          com.google.protobuf.Descriptors.FieldDescriptor field,
+          int index, Object value) {
+        return super.setRepeatedField(field, index, value);
+      }
+      @Override
+      public Builder addRepeatedField(
+          com.google.protobuf.Descriptors.FieldDescriptor field,
+          Object value) {
+        return super.addRepeatedField(field, value);
+      }
+      @Override
+      public Builder mergeFrom(com.google.protobuf.Message other) {
+        if (other instanceof DmSegMobileReply) {
+          return mergeFrom((DmSegMobileReply)other);
+        } else {
+          super.mergeFrom(other);
+          return this;
+        }
+      }
+
+      public Builder mergeFrom(DmSegMobileReply other) {
+        if (other == DmSegMobileReply.getDefaultInstance()) return this;
+        if (elemsBuilder_ == null) {
+          if (!other.elems_.isEmpty()) {
+            if (elems_.isEmpty()) {
+              elems_ = other.elems_;
+              bitField0_ = (bitField0_ & ~0x00000001);
+            } else {
+              ensureElemsIsMutable();
+              elems_.addAll(other.elems_);
+            }
+            onChanged();
+          }
+        } else {
+          if (!other.elems_.isEmpty()) {
+            if (elemsBuilder_.isEmpty()) {
+              elemsBuilder_.dispose();
+              elemsBuilder_ = null;
+              elems_ = other.elems_;
+              bitField0_ = (bitField0_ & ~0x00000001);
+              elemsBuilder_ = 
+                com.google.protobuf.GeneratedMessageV3.alwaysUseFieldBuilders ?
+                   getElemsFieldBuilder() : null;
+            } else {
+              elemsBuilder_.addAllMessages(other.elems_);
+            }
+          }
+        }
+        this.mergeUnknownFields(other.unknownFields);
+        onChanged();
+        return this;
+      }
+
+      @Override
+      public final boolean isInitialized() {
+        return true;
+      }
+
+      @Override
+      public Builder mergeFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws java.io.IOException {
+        DmSegMobileReply parsedMessage = null;
+        try {
+          parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
+        } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+          parsedMessage = (DmSegMobileReply) e.getUnfinishedMessage();
+          throw e.unwrapIOException();
+        } finally {
+          if (parsedMessage != null) {
+            mergeFrom(parsedMessage);
+          }
+        }
+        return this;
+      }
+      private int bitField0_;
+
+      private java.util.List<DanmakuElem> elems_ =
+        java.util.Collections.emptyList();
+      private void ensureElemsIsMutable() {
+        if (!((bitField0_ & 0x00000001) != 0)) {
+          elems_ = new java.util.ArrayList<DanmakuElem>(elems_);
+          bitField0_ |= 0x00000001;
+         }
+      }
+
+      private com.google.protobuf.RepeatedFieldBuilderV3<
+          DanmakuElem, DanmakuElem.Builder, DanmakuElemOrBuilder> elemsBuilder_;
+
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public java.util.List<DanmakuElem> getElemsList() {
+        if (elemsBuilder_ == null) {
+          return java.util.Collections.unmodifiableList(elems_);
+        } else {
+          return elemsBuilder_.getMessageList();
+        }
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public int getElemsCount() {
+        if (elemsBuilder_ == null) {
+          return elems_.size();
+        } else {
+          return elemsBuilder_.getCount();
+        }
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public DanmakuElem getElems(int index) {
+        if (elemsBuilder_ == null) {
+          return elems_.get(index);
+        } else {
+          return elemsBuilder_.getMessage(index);
+        }
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public Builder setElems(
+          int index, DanmakuElem value) {
+        if (elemsBuilder_ == null) {
+          if (value == null) {
+            throw new NullPointerException();
+          }
+          ensureElemsIsMutable();
+          elems_.set(index, value);
+          onChanged();
+        } else {
+          elemsBuilder_.setMessage(index, value);
+        }
+        return this;
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public Builder setElems(
+          int index, DanmakuElem.Builder builderForValue) {
+        if (elemsBuilder_ == null) {
+          ensureElemsIsMutable();
+          elems_.set(index, builderForValue.build());
+          onChanged();
+        } else {
+          elemsBuilder_.setMessage(index, builderForValue.build());
+        }
+        return this;
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public Builder addElems(DanmakuElem value) {
+        if (elemsBuilder_ == null) {
+          if (value == null) {
+            throw new NullPointerException();
+          }
+          ensureElemsIsMutable();
+          elems_.add(value);
+          onChanged();
+        } else {
+          elemsBuilder_.addMessage(value);
+        }
+        return this;
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public Builder addElems(
+          int index, DanmakuElem value) {
+        if (elemsBuilder_ == null) {
+          if (value == null) {
+            throw new NullPointerException();
+          }
+          ensureElemsIsMutable();
+          elems_.add(index, value);
+          onChanged();
+        } else {
+          elemsBuilder_.addMessage(index, value);
+        }
+        return this;
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public Builder addElems(
+          DanmakuElem.Builder builderForValue) {
+        if (elemsBuilder_ == null) {
+          ensureElemsIsMutable();
+          elems_.add(builderForValue.build());
+          onChanged();
+        } else {
+          elemsBuilder_.addMessage(builderForValue.build());
+        }
+        return this;
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public Builder addElems(
+          int index, DanmakuElem.Builder builderForValue) {
+        if (elemsBuilder_ == null) {
+          ensureElemsIsMutable();
+          elems_.add(index, builderForValue.build());
+          onChanged();
+        } else {
+          elemsBuilder_.addMessage(index, builderForValue.build());
+        }
+        return this;
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public Builder addAllElems(
+          Iterable<? extends DanmakuElem> values) {
+        if (elemsBuilder_ == null) {
+          ensureElemsIsMutable();
+          com.google.protobuf.AbstractMessageLite.Builder.addAll(
+              values, elems_);
+          onChanged();
+        } else {
+          elemsBuilder_.addAllMessages(values);
+        }
+        return this;
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public Builder clearElems() {
+        if (elemsBuilder_ == null) {
+          elems_ = java.util.Collections.emptyList();
+          bitField0_ = (bitField0_ & ~0x00000001);
+          onChanged();
+        } else {
+          elemsBuilder_.clear();
+        }
+        return this;
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public Builder removeElems(int index) {
+        if (elemsBuilder_ == null) {
+          ensureElemsIsMutable();
+          elems_.remove(index);
+          onChanged();
+        } else {
+          elemsBuilder_.remove(index);
+        }
+        return this;
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public DanmakuElem.Builder getElemsBuilder(
+          int index) {
+        return getElemsFieldBuilder().getBuilder(index);
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public DanmakuElemOrBuilder getElemsOrBuilder(
+          int index) {
+        if (elemsBuilder_ == null) {
+          return elems_.get(index);  } else {
+          return elemsBuilder_.getMessageOrBuilder(index);
+        }
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public java.util.List<? extends DanmakuElemOrBuilder>
+           getElemsOrBuilderList() {
+        if (elemsBuilder_ != null) {
+          return elemsBuilder_.getMessageOrBuilderList();
+        } else {
+          return java.util.Collections.unmodifiableList(elems_);
+        }
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public DanmakuElem.Builder addElemsBuilder() {
+        return getElemsFieldBuilder().addBuilder(
+            DanmakuElem.getDefaultInstance());
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public DanmakuElem.Builder addElemsBuilder(
+          int index) {
+        return getElemsFieldBuilder().addBuilder(
+            index, DanmakuElem.getDefaultInstance());
+      }
+      /**
+       * <code>repeated .DanmakuElem elems = 1;</code>
+       */
+      public java.util.List<DanmakuElem.Builder>
+           getElemsBuilderList() {
+        return getElemsFieldBuilder().getBuilderList();
+      }
+      private com.google.protobuf.RepeatedFieldBuilderV3<
+          DanmakuElem, DanmakuElem.Builder, DanmakuElemOrBuilder>
+          getElemsFieldBuilder() {
+        if (elemsBuilder_ == null) {
+          elemsBuilder_ = new com.google.protobuf.RepeatedFieldBuilderV3<
+              DanmakuElem, DanmakuElem.Builder, DanmakuElemOrBuilder>(
+                  elems_,
+                  ((bitField0_ & 0x00000001) != 0),
+                  getParentForChildren(),
+                  isClean());
+          elems_ = null;
+        }
+        return elemsBuilder_;
+      }
+      @Override
+      public final Builder setUnknownFields(
+          final com.google.protobuf.UnknownFieldSet unknownFields) {
+        return super.setUnknownFields(unknownFields);
+      }
+
+      @Override
+      public final Builder mergeUnknownFields(
+          final com.google.protobuf.UnknownFieldSet unknownFields) {
+        return super.mergeUnknownFields(unknownFields);
+      }
+
+
+      // @@protoc_insertion_point(builder_scope:DmSegMobileReply)
+    }
+
+    // @@protoc_insertion_point(class_scope:DmSegMobileReply)
+    private static final DmSegMobileReply DEFAULT_INSTANCE;
+    static {
+      DEFAULT_INSTANCE = new DmSegMobileReply();
+    }
+
+    public static DmSegMobileReply getDefaultInstance() {
+      return DEFAULT_INSTANCE;
+    }
+
+    private static final com.google.protobuf.Parser<DmSegMobileReply>
+        PARSER = new com.google.protobuf.AbstractParser<DmSegMobileReply>() {
+      @Override
+      public DmSegMobileReply parsePartialFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws com.google.protobuf.InvalidProtocolBufferException {
+        return new DmSegMobileReply(input, extensionRegistry);
+      }
+    };
+
+    public static com.google.protobuf.Parser<DmSegMobileReply> parser() {
+      return PARSER;
+    }
+
+    @Override
+    public com.google.protobuf.Parser<DmSegMobileReply> getParserForType() {
+      return PARSER;
+    }
+
+    @Override
+    public DmSegMobileReply getDefaultInstanceForType() {
+      return DEFAULT_INSTANCE;
+    }
+
+  }
+
+  public interface DanmakuElemOrBuilder extends
+      // @@protoc_insertion_point(interface_extends:DanmakuElem)
+      com.google.protobuf.MessageOrBuilder {
+
+    /**
+     * <code>int64 id = 1;</code>
+     * @return The id.
+     */
+    long getId();
+
+    /**
+     * <code>int32 progress = 2;</code>
+     * @return The progress.
+     */
+    int getProgress();
+
+    /**
+     * <code>int32 mode = 3;</code>
+     * @return The mode.
+     */
+    int getMode();
+
+    /**
+     * <code>int32 fontsize = 4;</code>
+     * @return The fontsize.
+     */
+    int getFontsize();
+
+    /**
+     * <code>uint32 color = 5;</code>
+     * @return The color.
+     */
+    int getColor();
+
+    /**
+     * <code>string midHash = 6;</code>
+     * @return The midHash.
+     */
+    String getMidHash();
+    /**
+     * <code>string midHash = 6;</code>
+     * @return The bytes for midHash.
+     */
+    com.google.protobuf.ByteString
+        getMidHashBytes();
+
+    /**
+     * <code>string content = 7;</code>
+     * @return The content.
+     */
+    String getContent();
+    /**
+     * <code>string content = 7;</code>
+     * @return The bytes for content.
+     */
+    com.google.protobuf.ByteString
+        getContentBytes();
+
+    /**
+     * <code>int64 ctime = 8;</code>
+     * @return The ctime.
+     */
+    long getCtime();
+
+    /**
+     * <code>int32 weight = 9;</code>
+     * @return The weight.
+     */
+    int getWeight();
+
+    /**
+     * <code>string action = 10;</code>
+     * @return The action.
+     */
+    String getAction();
+    /**
+     * <code>string action = 10;</code>
+     * @return The bytes for action.
+     */
+    com.google.protobuf.ByteString
+        getActionBytes();
+
+    /**
+     * <code>int32 pool = 11;</code>
+     * @return The pool.
+     */
+    int getPool();
+
+    /**
+     * <code>string idStr = 12;</code>
+     * @return The idStr.
+     */
+    String getIdStr();
+    /**
+     * <code>string idStr = 12;</code>
+     * @return The bytes for idStr.
+     */
+    com.google.protobuf.ByteString
+        getIdStrBytes();
+  }
+  /**
+   * Protobuf type {@code DanmakuElem}
+   */
+  public static final class DanmakuElem extends
+      com.google.protobuf.GeneratedMessageV3 implements
+      // @@protoc_insertion_point(message_implements:DanmakuElem)
+      DanmakuElemOrBuilder {
+  private static final long serialVersionUID = 0L;
+    // Use DanmakuElem.newBuilder() to construct.
+    private DanmakuElem(com.google.protobuf.GeneratedMessageV3.Builder<?> builder) {
+      super(builder);
+    }
+    private DanmakuElem() {
+      midHash_ = "";
+      content_ = "";
+      action_ = "";
+      idStr_ = "";
+    }
+
+    @Override
+    @SuppressWarnings({"unused"})
+    protected Object newInstance(
+        UnusedPrivateParameter unused) {
+      return new DanmakuElem();
+    }
+
+    @Override
+    public final com.google.protobuf.UnknownFieldSet
+    getUnknownFields() {
+      return this.unknownFields;
+    }
+    private DanmakuElem(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      this();
+      if (extensionRegistry == null) {
+        throw new NullPointerException();
+      }
+      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
+          com.google.protobuf.UnknownFieldSet.newBuilder();
+      try {
+        boolean done = false;
+        while (!done) {
+          int tag = input.readTag();
+          switch (tag) {
+            case 0:
+              done = true;
+              break;
+            case 8: {
+
+              id_ = input.readInt64();
+              break;
+            }
+            case 16: {
+
+              progress_ = input.readInt32();
+              break;
+            }
+            case 24: {
+
+              mode_ = input.readInt32();
+              break;
+            }
+            case 32: {
+
+              fontsize_ = input.readInt32();
+              break;
+            }
+            case 40: {
+
+              color_ = input.readUInt32();
+              break;
+            }
+            case 50: {
+              String s = input.readStringRequireUtf8();
+
+              midHash_ = s;
+              break;
+            }
+            case 58: {
+              String s = input.readStringRequireUtf8();
+
+              content_ = s;
+              break;
+            }
+            case 64: {
+
+              ctime_ = input.readInt64();
+              break;
+            }
+            case 72: {
+
+              weight_ = input.readInt32();
+              break;
+            }
+            case 82: {
+              String s = input.readStringRequireUtf8();
+
+              action_ = s;
+              break;
+            }
+            case 88: {
+
+              pool_ = input.readInt32();
+              break;
+            }
+            case 98: {
+              String s = input.readStringRequireUtf8();
+
+              idStr_ = s;
+              break;
+            }
+            default: {
+              if (!parseUnknownField(
+                  input, unknownFields, extensionRegistry, tag)) {
+                done = true;
+              }
+              break;
+            }
+          }
+        }
+      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(this);
+      } catch (com.google.protobuf.UninitializedMessageException e) {
+        throw e.asInvalidProtocolBufferException().setUnfinishedMessage(this);
+      } catch (java.io.IOException e) {
+        throw new com.google.protobuf.InvalidProtocolBufferException(
+            e).setUnfinishedMessage(this);
+      } finally {
+        this.unknownFields = unknownFields.build();
+        makeExtensionsImmutable();
+      }
+    }
+    public static final com.google.protobuf.Descriptors.Descriptor
+        getDescriptor() {
+      return BiliDm.internal_static_DanmakuElem_descriptor;
+    }
+
+    @Override
+    protected FieldAccessorTable
+        internalGetFieldAccessorTable() {
+      return BiliDm.internal_static_DanmakuElem_fieldAccessorTable
+          .ensureFieldAccessorsInitialized(
+              DanmakuElem.class, Builder.class);
+    }
+
+    public static final int ID_FIELD_NUMBER = 1;
+    private long id_;
+    /**
+     * <code>int64 id = 1;</code>
+     * @return The id.
+     */
+    @Override
+    public long getId() {
+      return id_;
+    }
+
+    public static final int PROGRESS_FIELD_NUMBER = 2;
+    private int progress_;
+    /**
+     * <code>int32 progress = 2;</code>
+     * @return The progress.
+     */
+    @Override
+    public int getProgress() {
+      return progress_;
+    }
+
+    public static final int MODE_FIELD_NUMBER = 3;
+    private int mode_;
+    /**
+     * <code>int32 mode = 3;</code>
+     * @return The mode.
+     */
+    @Override
+    public int getMode() {
+      return mode_;
+    }
+
+    public static final int FONTSIZE_FIELD_NUMBER = 4;
+    private int fontsize_;
+    /**
+     * <code>int32 fontsize = 4;</code>
+     * @return The fontsize.
+     */
+    @Override
+    public int getFontsize() {
+      return fontsize_;
+    }
+
+    public static final int COLOR_FIELD_NUMBER = 5;
+    private int color_;
+    /**
+     * <code>uint32 color = 5;</code>
+     * @return The color.
+     */
+    @Override
+    public int getColor() {
+      return color_;
+    }
+
+    public static final int MIDHASH_FIELD_NUMBER = 6;
+    private volatile Object midHash_;
+    /**
+     * <code>string midHash = 6;</code>
+     * @return The midHash.
+     */
+    @Override
+    public String getMidHash() {
+      Object ref = midHash_;
+      if (ref instanceof String) {
+        return (String) ref;
+      } else {
+        com.google.protobuf.ByteString bs = 
+            (com.google.protobuf.ByteString) ref;
+        String s = bs.toStringUtf8();
+        midHash_ = s;
+        return s;
+      }
+    }
+    /**
+     * <code>string midHash = 6;</code>
+     * @return The bytes for midHash.
+     */
+    @Override
+    public com.google.protobuf.ByteString
+        getMidHashBytes() {
+      Object ref = midHash_;
+      if (ref instanceof String) {
+        com.google.protobuf.ByteString b = 
+            com.google.protobuf.ByteString.copyFromUtf8(
+                (String) ref);
+        midHash_ = b;
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+
+    public static final int CONTENT_FIELD_NUMBER = 7;
+    private volatile Object content_;
+    /**
+     * <code>string content = 7;</code>
+     * @return The content.
+     */
+    @Override
+    public String getContent() {
+      Object ref = content_;
+      if (ref instanceof String) {
+        return (String) ref;
+      } else {
+        com.google.protobuf.ByteString bs = 
+            (com.google.protobuf.ByteString) ref;
+        String s = bs.toStringUtf8();
+        content_ = s;
+        return s;
+      }
+    }
+    /**
+     * <code>string content = 7;</code>
+     * @return The bytes for content.
+     */
+    @Override
+    public com.google.protobuf.ByteString
+        getContentBytes() {
+      Object ref = content_;
+      if (ref instanceof String) {
+        com.google.protobuf.ByteString b = 
+            com.google.protobuf.ByteString.copyFromUtf8(
+                (String) ref);
+        content_ = b;
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+
+    public static final int CTIME_FIELD_NUMBER = 8;
+    private long ctime_;
+    /**
+     * <code>int64 ctime = 8;</code>
+     * @return The ctime.
+     */
+    @Override
+    public long getCtime() {
+      return ctime_;
+    }
+
+    public static final int WEIGHT_FIELD_NUMBER = 9;
+    private int weight_;
+    /**
+     * <code>int32 weight = 9;</code>
+     * @return The weight.
+     */
+    @Override
+    public int getWeight() {
+      return weight_;
+    }
+
+    public static final int ACTION_FIELD_NUMBER = 10;
+    private volatile Object action_;
+    /**
+     * <code>string action = 10;</code>
+     * @return The action.
+     */
+    @Override
+    public String getAction() {
+      Object ref = action_;
+      if (ref instanceof String) {
+        return (String) ref;
+      } else {
+        com.google.protobuf.ByteString bs = 
+            (com.google.protobuf.ByteString) ref;
+        String s = bs.toStringUtf8();
+        action_ = s;
+        return s;
+      }
+    }
+    /**
+     * <code>string action = 10;</code>
+     * @return The bytes for action.
+     */
+    @Override
+    public com.google.protobuf.ByteString
+        getActionBytes() {
+      Object ref = action_;
+      if (ref instanceof String) {
+        com.google.protobuf.ByteString b = 
+            com.google.protobuf.ByteString.copyFromUtf8(
+                (String) ref);
+        action_ = b;
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+
+    public static final int POOL_FIELD_NUMBER = 11;
+    private int pool_;
+    /**
+     * <code>int32 pool = 11;</code>
+     * @return The pool.
+     */
+    @Override
+    public int getPool() {
+      return pool_;
+    }
+
+    public static final int IDSTR_FIELD_NUMBER = 12;
+    private volatile Object idStr_;
+    /**
+     * <code>string idStr = 12;</code>
+     * @return The idStr.
+     */
+    @Override
+    public String getIdStr() {
+      Object ref = idStr_;
+      if (ref instanceof String) {
+        return (String) ref;
+      } else {
+        com.google.protobuf.ByteString bs = 
+            (com.google.protobuf.ByteString) ref;
+        String s = bs.toStringUtf8();
+        idStr_ = s;
+        return s;
+      }
+    }
+    /**
+     * <code>string idStr = 12;</code>
+     * @return The bytes for idStr.
+     */
+    @Override
+    public com.google.protobuf.ByteString
+        getIdStrBytes() {
+      Object ref = idStr_;
+      if (ref instanceof String) {
+        com.google.protobuf.ByteString b = 
+            com.google.protobuf.ByteString.copyFromUtf8(
+                (String) ref);
+        idStr_ = b;
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+
+    private byte memoizedIsInitialized = -1;
+    @Override
+    public final boolean isInitialized() {
+      byte isInitialized = memoizedIsInitialized;
+      if (isInitialized == 1) return true;
+      if (isInitialized == 0) return false;
+
+      memoizedIsInitialized = 1;
+      return true;
+    }
+
+    @Override
+    public void writeTo(com.google.protobuf.CodedOutputStream output)
+                        throws java.io.IOException {
+      if (id_ != 0L) {
+        output.writeInt64(1, id_);
+      }
+      if (progress_ != 0) {
+        output.writeInt32(2, progress_);
+      }
+      if (mode_ != 0) {
+        output.writeInt32(3, mode_);
+      }
+      if (fontsize_ != 0) {
+        output.writeInt32(4, fontsize_);
+      }
+      if (color_ != 0) {
+        output.writeUInt32(5, color_);
+      }
+      if (!com.google.protobuf.GeneratedMessageV3.isStringEmpty(midHash_)) {
+        com.google.protobuf.GeneratedMessageV3.writeString(output, 6, midHash_);
+      }
+      if (!com.google.protobuf.GeneratedMessageV3.isStringEmpty(content_)) {
+        com.google.protobuf.GeneratedMessageV3.writeString(output, 7, content_);
+      }
+      if (ctime_ != 0L) {
+        output.writeInt64(8, ctime_);
+      }
+      if (weight_ != 0) {
+        output.writeInt32(9, weight_);
+      }
+      if (!com.google.protobuf.GeneratedMessageV3.isStringEmpty(action_)) {
+        com.google.protobuf.GeneratedMessageV3.writeString(output, 10, action_);
+      }
+      if (pool_ != 0) {
+        output.writeInt32(11, pool_);
+      }
+      if (!com.google.protobuf.GeneratedMessageV3.isStringEmpty(idStr_)) {
+        com.google.protobuf.GeneratedMessageV3.writeString(output, 12, idStr_);
+      }
+      unknownFields.writeTo(output);
+    }
+
+    @Override
+    public int getSerializedSize() {
+      int size = memoizedSize;
+      if (size != -1) return size;
+
+      size = 0;
+      if (id_ != 0L) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeInt64Size(1, id_);
+      }
+      if (progress_ != 0) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeInt32Size(2, progress_);
+      }
+      if (mode_ != 0) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeInt32Size(3, mode_);
+      }
+      if (fontsize_ != 0) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeInt32Size(4, fontsize_);
+      }
+      if (color_ != 0) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeUInt32Size(5, color_);
+      }
+      if (!com.google.protobuf.GeneratedMessageV3.isStringEmpty(midHash_)) {
+        size += com.google.protobuf.GeneratedMessageV3.computeStringSize(6, midHash_);
+      }
+      if (!com.google.protobuf.GeneratedMessageV3.isStringEmpty(content_)) {
+        size += com.google.protobuf.GeneratedMessageV3.computeStringSize(7, content_);
+      }
+      if (ctime_ != 0L) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeInt64Size(8, ctime_);
+      }
+      if (weight_ != 0) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeInt32Size(9, weight_);
+      }
+      if (!com.google.protobuf.GeneratedMessageV3.isStringEmpty(action_)) {
+        size += com.google.protobuf.GeneratedMessageV3.computeStringSize(10, action_);
+      }
+      if (pool_ != 0) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeInt32Size(11, pool_);
+      }
+      if (!com.google.protobuf.GeneratedMessageV3.isStringEmpty(idStr_)) {
+        size += com.google.protobuf.GeneratedMessageV3.computeStringSize(12, idStr_);
+      }
+      size += unknownFields.getSerializedSize();
+      memoizedSize = size;
+      return size;
+    }
+
+    @Override
+    public boolean equals(final Object obj) {
+      if (obj == this) {
+       return true;
+      }
+      if (!(obj instanceof DanmakuElem)) {
+        return super.equals(obj);
+      }
+      DanmakuElem other = (DanmakuElem) obj;
+
+      if (getId()
+          != other.getId()) return false;
+      if (getProgress()
+          != other.getProgress()) return false;
+      if (getMode()
+          != other.getMode()) return false;
+      if (getFontsize()
+          != other.getFontsize()) return false;
+      if (getColor()
+          != other.getColor()) return false;
+      if (!getMidHash()
+          .equals(other.getMidHash())) return false;
+      if (!getContent()
+          .equals(other.getContent())) return false;
+      if (getCtime()
+          != other.getCtime()) return false;
+      if (getWeight()
+          != other.getWeight()) return false;
+      if (!getAction()
+          .equals(other.getAction())) return false;
+      if (getPool()
+          != other.getPool()) return false;
+      if (!getIdStr()
+          .equals(other.getIdStr())) return false;
+      if (!unknownFields.equals(other.unknownFields)) return false;
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      if (memoizedHashCode != 0) {
+        return memoizedHashCode;
+      }
+      int hash = 41;
+      hash = (19 * hash) + getDescriptor().hashCode();
+      hash = (37 * hash) + ID_FIELD_NUMBER;
+      hash = (53 * hash) + com.google.protobuf.Internal.hashLong(
+          getId());
+      hash = (37 * hash) + PROGRESS_FIELD_NUMBER;
+      hash = (53 * hash) + getProgress();
+      hash = (37 * hash) + MODE_FIELD_NUMBER;
+      hash = (53 * hash) + getMode();
+      hash = (37 * hash) + FONTSIZE_FIELD_NUMBER;
+      hash = (53 * hash) + getFontsize();
+      hash = (37 * hash) + COLOR_FIELD_NUMBER;
+      hash = (53 * hash) + getColor();
+      hash = (37 * hash) + MIDHASH_FIELD_NUMBER;
+      hash = (53 * hash) + getMidHash().hashCode();
+      hash = (37 * hash) + CONTENT_FIELD_NUMBER;
+      hash = (53 * hash) + getContent().hashCode();
+      hash = (37 * hash) + CTIME_FIELD_NUMBER;
+      hash = (53 * hash) + com.google.protobuf.Internal.hashLong(
+          getCtime());
+      hash = (37 * hash) + WEIGHT_FIELD_NUMBER;
+      hash = (53 * hash) + getWeight();
+      hash = (37 * hash) + ACTION_FIELD_NUMBER;
+      hash = (53 * hash) + getAction().hashCode();
+      hash = (37 * hash) + POOL_FIELD_NUMBER;
+      hash = (53 * hash) + getPool();
+      hash = (37 * hash) + IDSTR_FIELD_NUMBER;
+      hash = (53 * hash) + getIdStr().hashCode();
+      hash = (29 * hash) + unknownFields.hashCode();
+      memoizedHashCode = hash;
+      return hash;
+    }
+
+    public static DanmakuElem parseFrom(
+        java.nio.ByteBuffer data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static DanmakuElem parseFrom(
+        java.nio.ByteBuffer data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static DanmakuElem parseFrom(
+        com.google.protobuf.ByteString data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static DanmakuElem parseFrom(
+        com.google.protobuf.ByteString data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static DanmakuElem parseFrom(byte[] data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data);
+    }
+    public static DanmakuElem parseFrom(
+        byte[] data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return PARSER.parseFrom(data, extensionRegistry);
+    }
+    public static DanmakuElem parseFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return com.google.protobuf.GeneratedMessageV3
+          .parseWithIOException(PARSER, input);
+    }
+    public static DanmakuElem parseFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return com.google.protobuf.GeneratedMessageV3
+          .parseWithIOException(PARSER, input, extensionRegistry);
+    }
+    public static DanmakuElem parseDelimitedFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return com.google.protobuf.GeneratedMessageV3
+          .parseDelimitedWithIOException(PARSER, input);
+    }
+    public static DanmakuElem parseDelimitedFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return com.google.protobuf.GeneratedMessageV3
+          .parseDelimitedWithIOException(PARSER, input, extensionRegistry);
+    }
+    public static DanmakuElem parseFrom(
+        com.google.protobuf.CodedInputStream input)
+        throws java.io.IOException {
+      return com.google.protobuf.GeneratedMessageV3
+          .parseWithIOException(PARSER, input);
+    }
+    public static DanmakuElem parseFrom(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return com.google.protobuf.GeneratedMessageV3
+          .parseWithIOException(PARSER, input, extensionRegistry);
+    }
+
+    @Override
+    public Builder newBuilderForType() { return newBuilder(); }
+    public static Builder newBuilder() {
+      return DEFAULT_INSTANCE.toBuilder();
+    }
+    public static Builder newBuilder(DanmakuElem prototype) {
+      return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
+    }
+    @Override
+    public Builder toBuilder() {
+      return this == DEFAULT_INSTANCE
+          ? new Builder() : new Builder().mergeFrom(this);
+    }
+
+    @Override
+    protected Builder newBuilderForType(
+        BuilderParent parent) {
+      Builder builder = new Builder(parent);
+      return builder;
+    }
+    /**
+     * Protobuf type {@code DanmakuElem}
+     */
+    public static final class Builder extends
+        com.google.protobuf.GeneratedMessageV3.Builder<Builder> implements
+        // @@protoc_insertion_point(builder_implements:DanmakuElem)
+        DanmakuElemOrBuilder {
+      public static final com.google.protobuf.Descriptors.Descriptor
+          getDescriptor() {
+        return BiliDm.internal_static_DanmakuElem_descriptor;
+      }
+
+      @Override
+      protected FieldAccessorTable
+          internalGetFieldAccessorTable() {
+        return BiliDm.internal_static_DanmakuElem_fieldAccessorTable
+            .ensureFieldAccessorsInitialized(
+                DanmakuElem.class, Builder.class);
+      }
+
+      // Construct using cn.reghao.webspider.parser.autogen.BiliDm.DanmakuElem.newBuilder()
+      private Builder() {
+        maybeForceBuilderInitialization();
+      }
+
+      private Builder(
+          BuilderParent parent) {
+        super(parent);
+        maybeForceBuilderInitialization();
+      }
+      private void maybeForceBuilderInitialization() {
+        if (com.google.protobuf.GeneratedMessageV3
+                .alwaysUseFieldBuilders) {
+        }
+      }
+      @Override
+      public Builder clear() {
+        super.clear();
+        id_ = 0L;
+
+        progress_ = 0;
+
+        mode_ = 0;
+
+        fontsize_ = 0;
+
+        color_ = 0;
+
+        midHash_ = "";
+
+        content_ = "";
+
+        ctime_ = 0L;
+
+        weight_ = 0;
+
+        action_ = "";
+
+        pool_ = 0;
+
+        idStr_ = "";
+
+        return this;
+      }
+
+      @Override
+      public com.google.protobuf.Descriptors.Descriptor
+          getDescriptorForType() {
+        return BiliDm.internal_static_DanmakuElem_descriptor;
+      }
+
+      @Override
+      public DanmakuElem getDefaultInstanceForType() {
+        return DanmakuElem.getDefaultInstance();
+      }
+
+      @Override
+      public DanmakuElem build() {
+        DanmakuElem result = buildPartial();
+        if (!result.isInitialized()) {
+          throw newUninitializedMessageException(result);
+        }
+        return result;
+      }
+
+      @Override
+      public DanmakuElem buildPartial() {
+        DanmakuElem result = new DanmakuElem(this);
+        result.id_ = id_;
+        result.progress_ = progress_;
+        result.mode_ = mode_;
+        result.fontsize_ = fontsize_;
+        result.color_ = color_;
+        result.midHash_ = midHash_;
+        result.content_ = content_;
+        result.ctime_ = ctime_;
+        result.weight_ = weight_;
+        result.action_ = action_;
+        result.pool_ = pool_;
+        result.idStr_ = idStr_;
+        onBuilt();
+        return result;
+      }
+
+      @Override
+      public Builder clone() {
+        return super.clone();
+      }
+      @Override
+      public Builder setField(
+          com.google.protobuf.Descriptors.FieldDescriptor field,
+          Object value) {
+        return super.setField(field, value);
+      }
+      @Override
+      public Builder clearField(
+          com.google.protobuf.Descriptors.FieldDescriptor field) {
+        return super.clearField(field);
+      }
+      @Override
+      public Builder clearOneof(
+          com.google.protobuf.Descriptors.OneofDescriptor oneof) {
+        return super.clearOneof(oneof);
+      }
+      @Override
+      public Builder setRepeatedField(
+          com.google.protobuf.Descriptors.FieldDescriptor field,
+          int index, Object value) {
+        return super.setRepeatedField(field, index, value);
+      }
+      @Override
+      public Builder addRepeatedField(
+          com.google.protobuf.Descriptors.FieldDescriptor field,
+          Object value) {
+        return super.addRepeatedField(field, value);
+      }
+      @Override
+      public Builder mergeFrom(com.google.protobuf.Message other) {
+        if (other instanceof DanmakuElem) {
+          return mergeFrom((DanmakuElem)other);
+        } else {
+          super.mergeFrom(other);
+          return this;
+        }
+      }
+
+      public Builder mergeFrom(DanmakuElem other) {
+        if (other == DanmakuElem.getDefaultInstance()) return this;
+        if (other.getId() != 0L) {
+          setId(other.getId());
+        }
+        if (other.getProgress() != 0) {
+          setProgress(other.getProgress());
+        }
+        if (other.getMode() != 0) {
+          setMode(other.getMode());
+        }
+        if (other.getFontsize() != 0) {
+          setFontsize(other.getFontsize());
+        }
+        if (other.getColor() != 0) {
+          setColor(other.getColor());
+        }
+        if (!other.getMidHash().isEmpty()) {
+          midHash_ = other.midHash_;
+          onChanged();
+        }
+        if (!other.getContent().isEmpty()) {
+          content_ = other.content_;
+          onChanged();
+        }
+        if (other.getCtime() != 0L) {
+          setCtime(other.getCtime());
+        }
+        if (other.getWeight() != 0) {
+          setWeight(other.getWeight());
+        }
+        if (!other.getAction().isEmpty()) {
+          action_ = other.action_;
+          onChanged();
+        }
+        if (other.getPool() != 0) {
+          setPool(other.getPool());
+        }
+        if (!other.getIdStr().isEmpty()) {
+          idStr_ = other.idStr_;
+          onChanged();
+        }
+        this.mergeUnknownFields(other.unknownFields);
+        onChanged();
+        return this;
+      }
+
+      @Override
+      public final boolean isInitialized() {
+        return true;
+      }
+
+      @Override
+      public Builder mergeFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws java.io.IOException {
+        DanmakuElem parsedMessage = null;
+        try {
+          parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
+        } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+          parsedMessage = (DanmakuElem) e.getUnfinishedMessage();
+          throw e.unwrapIOException();
+        } finally {
+          if (parsedMessage != null) {
+            mergeFrom(parsedMessage);
+          }
+        }
+        return this;
+      }
+
+      private long id_ ;
+      /**
+       * <code>int64 id = 1;</code>
+       * @return The id.
+       */
+      @Override
+      public long getId() {
+        return id_;
+      }
+      /**
+       * <code>int64 id = 1;</code>
+       * @param value The id to set.
+       * @return This builder for chaining.
+       */
+      public Builder setId(long value) {
+        
+        id_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>int64 id = 1;</code>
+       * @return This builder for chaining.
+       */
+      public Builder clearId() {
+        
+        id_ = 0L;
+        onChanged();
+        return this;
+      }
+
+      private int progress_ ;
+      /**
+       * <code>int32 progress = 2;</code>
+       * @return The progress.
+       */
+      @Override
+      public int getProgress() {
+        return progress_;
+      }
+      /**
+       * <code>int32 progress = 2;</code>
+       * @param value The progress to set.
+       * @return This builder for chaining.
+       */
+      public Builder setProgress(int value) {
+        
+        progress_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>int32 progress = 2;</code>
+       * @return This builder for chaining.
+       */
+      public Builder clearProgress() {
+        
+        progress_ = 0;
+        onChanged();
+        return this;
+      }
+
+      private int mode_ ;
+      /**
+       * <code>int32 mode = 3;</code>
+       * @return The mode.
+       */
+      @Override
+      public int getMode() {
+        return mode_;
+      }
+      /**
+       * <code>int32 mode = 3;</code>
+       * @param value The mode to set.
+       * @return This builder for chaining.
+       */
+      public Builder setMode(int value) {
+        
+        mode_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>int32 mode = 3;</code>
+       * @return This builder for chaining.
+       */
+      public Builder clearMode() {
+        
+        mode_ = 0;
+        onChanged();
+        return this;
+      }
+
+      private int fontsize_ ;
+      /**
+       * <code>int32 fontsize = 4;</code>
+       * @return The fontsize.
+       */
+      @Override
+      public int getFontsize() {
+        return fontsize_;
+      }
+      /**
+       * <code>int32 fontsize = 4;</code>
+       * @param value The fontsize to set.
+       * @return This builder for chaining.
+       */
+      public Builder setFontsize(int value) {
+        
+        fontsize_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>int32 fontsize = 4;</code>
+       * @return This builder for chaining.
+       */
+      public Builder clearFontsize() {
+        
+        fontsize_ = 0;
+        onChanged();
+        return this;
+      }
+
+      private int color_ ;
+      /**
+       * <code>uint32 color = 5;</code>
+       * @return The color.
+       */
+      @Override
+      public int getColor() {
+        return color_;
+      }
+      /**
+       * <code>uint32 color = 5;</code>
+       * @param value The color to set.
+       * @return This builder for chaining.
+       */
+      public Builder setColor(int value) {
+        
+        color_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>uint32 color = 5;</code>
+       * @return This builder for chaining.
+       */
+      public Builder clearColor() {
+        
+        color_ = 0;
+        onChanged();
+        return this;
+      }
+
+      private Object midHash_ = "";
+      /**
+       * <code>string midHash = 6;</code>
+       * @return The midHash.
+       */
+      public String getMidHash() {
+        Object ref = midHash_;
+        if (!(ref instanceof String)) {
+          com.google.protobuf.ByteString bs =
+              (com.google.protobuf.ByteString) ref;
+          String s = bs.toStringUtf8();
+          midHash_ = s;
+          return s;
+        } else {
+          return (String) ref;
+        }
+      }
+      /**
+       * <code>string midHash = 6;</code>
+       * @return The bytes for midHash.
+       */
+      public com.google.protobuf.ByteString
+          getMidHashBytes() {
+        Object ref = midHash_;
+        if (ref instanceof String) {
+          com.google.protobuf.ByteString b = 
+              com.google.protobuf.ByteString.copyFromUtf8(
+                  (String) ref);
+          midHash_ = b;
+          return b;
+        } else {
+          return (com.google.protobuf.ByteString) ref;
+        }
+      }
+      /**
+       * <code>string midHash = 6;</code>
+       * @param value The midHash to set.
+       * @return This builder for chaining.
+       */
+      public Builder setMidHash(
+          String value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  
+        midHash_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>string midHash = 6;</code>
+       * @return This builder for chaining.
+       */
+      public Builder clearMidHash() {
+        
+        midHash_ = getDefaultInstance().getMidHash();
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>string midHash = 6;</code>
+       * @param value The bytes for midHash to set.
+       * @return This builder for chaining.
+       */
+      public Builder setMidHashBytes(
+          com.google.protobuf.ByteString value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  checkByteStringIsUtf8(value);
+        
+        midHash_ = value;
+        onChanged();
+        return this;
+      }
+
+      private Object content_ = "";
+      /**
+       * <code>string content = 7;</code>
+       * @return The content.
+       */
+      public String getContent() {
+        Object ref = content_;
+        if (!(ref instanceof String)) {
+          com.google.protobuf.ByteString bs =
+              (com.google.protobuf.ByteString) ref;
+          String s = bs.toStringUtf8();
+          content_ = s;
+          return s;
+        } else {
+          return (String) ref;
+        }
+      }
+      /**
+       * <code>string content = 7;</code>
+       * @return The bytes for content.
+       */
+      public com.google.protobuf.ByteString
+          getContentBytes() {
+        Object ref = content_;
+        if (ref instanceof String) {
+          com.google.protobuf.ByteString b = 
+              com.google.protobuf.ByteString.copyFromUtf8(
+                  (String) ref);
+          content_ = b;
+          return b;
+        } else {
+          return (com.google.protobuf.ByteString) ref;
+        }
+      }
+      /**
+       * <code>string content = 7;</code>
+       * @param value The content to set.
+       * @return This builder for chaining.
+       */
+      public Builder setContent(
+          String value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  
+        content_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>string content = 7;</code>
+       * @return This builder for chaining.
+       */
+      public Builder clearContent() {
+        
+        content_ = getDefaultInstance().getContent();
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>string content = 7;</code>
+       * @param value The bytes for content to set.
+       * @return This builder for chaining.
+       */
+      public Builder setContentBytes(
+          com.google.protobuf.ByteString value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  checkByteStringIsUtf8(value);
+        
+        content_ = value;
+        onChanged();
+        return this;
+      }
+
+      private long ctime_ ;
+      /**
+       * <code>int64 ctime = 8;</code>
+       * @return The ctime.
+       */
+      @Override
+      public long getCtime() {
+        return ctime_;
+      }
+      /**
+       * <code>int64 ctime = 8;</code>
+       * @param value The ctime to set.
+       * @return This builder for chaining.
+       */
+      public Builder setCtime(long value) {
+        
+        ctime_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>int64 ctime = 8;</code>
+       * @return This builder for chaining.
+       */
+      public Builder clearCtime() {
+        
+        ctime_ = 0L;
+        onChanged();
+        return this;
+      }
+
+      private int weight_ ;
+      /**
+       * <code>int32 weight = 9;</code>
+       * @return The weight.
+       */
+      @Override
+      public int getWeight() {
+        return weight_;
+      }
+      /**
+       * <code>int32 weight = 9;</code>
+       * @param value The weight to set.
+       * @return This builder for chaining.
+       */
+      public Builder setWeight(int value) {
+        
+        weight_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>int32 weight = 9;</code>
+       * @return This builder for chaining.
+       */
+      public Builder clearWeight() {
+        
+        weight_ = 0;
+        onChanged();
+        return this;
+      }
+
+      private Object action_ = "";
+      /**
+       * <code>string action = 10;</code>
+       * @return The action.
+       */
+      public String getAction() {
+        Object ref = action_;
+        if (!(ref instanceof String)) {
+          com.google.protobuf.ByteString bs =
+              (com.google.protobuf.ByteString) ref;
+          String s = bs.toStringUtf8();
+          action_ = s;
+          return s;
+        } else {
+          return (String) ref;
+        }
+      }
+      /**
+       * <code>string action = 10;</code>
+       * @return The bytes for action.
+       */
+      public com.google.protobuf.ByteString
+          getActionBytes() {
+        Object ref = action_;
+        if (ref instanceof String) {
+          com.google.protobuf.ByteString b = 
+              com.google.protobuf.ByteString.copyFromUtf8(
+                  (String) ref);
+          action_ = b;
+          return b;
+        } else {
+          return (com.google.protobuf.ByteString) ref;
+        }
+      }
+      /**
+       * <code>string action = 10;</code>
+       * @param value The action to set.
+       * @return This builder for chaining.
+       */
+      public Builder setAction(
+          String value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  
+        action_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>string action = 10;</code>
+       * @return This builder for chaining.
+       */
+      public Builder clearAction() {
+        
+        action_ = getDefaultInstance().getAction();
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>string action = 10;</code>
+       * @param value The bytes for action to set.
+       * @return This builder for chaining.
+       */
+      public Builder setActionBytes(
+          com.google.protobuf.ByteString value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  checkByteStringIsUtf8(value);
+        
+        action_ = value;
+        onChanged();
+        return this;
+      }
+
+      private int pool_ ;
+      /**
+       * <code>int32 pool = 11;</code>
+       * @return The pool.
+       */
+      @Override
+      public int getPool() {
+        return pool_;
+      }
+      /**
+       * <code>int32 pool = 11;</code>
+       * @param value The pool to set.
+       * @return This builder for chaining.
+       */
+      public Builder setPool(int value) {
+        
+        pool_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>int32 pool = 11;</code>
+       * @return This builder for chaining.
+       */
+      public Builder clearPool() {
+        
+        pool_ = 0;
+        onChanged();
+        return this;
+      }
+
+      private Object idStr_ = "";
+      /**
+       * <code>string idStr = 12;</code>
+       * @return The idStr.
+       */
+      public String getIdStr() {
+        Object ref = idStr_;
+        if (!(ref instanceof String)) {
+          com.google.protobuf.ByteString bs =
+              (com.google.protobuf.ByteString) ref;
+          String s = bs.toStringUtf8();
+          idStr_ = s;
+          return s;
+        } else {
+          return (String) ref;
+        }
+      }
+      /**
+       * <code>string idStr = 12;</code>
+       * @return The bytes for idStr.
+       */
+      public com.google.protobuf.ByteString
+          getIdStrBytes() {
+        Object ref = idStr_;
+        if (ref instanceof String) {
+          com.google.protobuf.ByteString b = 
+              com.google.protobuf.ByteString.copyFromUtf8(
+                  (String) ref);
+          idStr_ = b;
+          return b;
+        } else {
+          return (com.google.protobuf.ByteString) ref;
+        }
+      }
+      /**
+       * <code>string idStr = 12;</code>
+       * @param value The idStr to set.
+       * @return This builder for chaining.
+       */
+      public Builder setIdStr(
+          String value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  
+        idStr_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>string idStr = 12;</code>
+       * @return This builder for chaining.
+       */
+      public Builder clearIdStr() {
+        
+        idStr_ = getDefaultInstance().getIdStr();
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>string idStr = 12;</code>
+       * @param value The bytes for idStr to set.
+       * @return This builder for chaining.
+       */
+      public Builder setIdStrBytes(
+          com.google.protobuf.ByteString value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  checkByteStringIsUtf8(value);
+        
+        idStr_ = value;
+        onChanged();
+        return this;
+      }
+      @Override
+      public final Builder setUnknownFields(
+          final com.google.protobuf.UnknownFieldSet unknownFields) {
+        return super.setUnknownFields(unknownFields);
+      }
+
+      @Override
+      public final Builder mergeUnknownFields(
+          final com.google.protobuf.UnknownFieldSet unknownFields) {
+        return super.mergeUnknownFields(unknownFields);
+      }
+
+
+      // @@protoc_insertion_point(builder_scope:DanmakuElem)
+    }
+
+    // @@protoc_insertion_point(class_scope:DanmakuElem)
+    private static final DanmakuElem DEFAULT_INSTANCE;
+    static {
+      DEFAULT_INSTANCE = new DanmakuElem();
+    }
+
+    public static DanmakuElem getDefaultInstance() {
+      return DEFAULT_INSTANCE;
+    }
+
+    private static final com.google.protobuf.Parser<DanmakuElem>
+        PARSER = new com.google.protobuf.AbstractParser<DanmakuElem>() {
+      @Override
+      public DanmakuElem parsePartialFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws com.google.protobuf.InvalidProtocolBufferException {
+        return new DanmakuElem(input, extensionRegistry);
+      }
+    };
+
+    public static com.google.protobuf.Parser<DanmakuElem> parser() {
+      return PARSER;
+    }
+
+    @Override
+    public com.google.protobuf.Parser<DanmakuElem> getParserForType() {
+      return PARSER;
+    }
+
+    @Override
+    public DanmakuElem getDefaultInstanceForType() {
+      return DEFAULT_INSTANCE;
+    }
+
+  }
+
+  private static final com.google.protobuf.Descriptors.Descriptor
+    internal_static_DmSegMobileReply_descriptor;
+  private static final 
+    com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
+      internal_static_DmSegMobileReply_fieldAccessorTable;
+  private static final com.google.protobuf.Descriptors.Descriptor
+    internal_static_DanmakuElem_descriptor;
+  private static final 
+    com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
+      internal_static_DanmakuElem_fieldAccessorTable;
+
+  public static com.google.protobuf.Descriptors.FileDescriptor
+      getDescriptor() {
+    return descriptor;
+  }
+  private static  com.google.protobuf.Descriptors.FileDescriptor
+      descriptor;
+  static {
+    String[] descriptorData = {
+      "\n\010dm.proto\"/\n\020DmSegMobileReply\022\033\n\005elems\030" +
+      "\001 \003(\0132\014.DanmakuElem\"\310\001\n\013DanmakuElem\022\n\n\002i" +
+      "d\030\001 \001(\003\022\020\n\010progress\030\002 \001(\005\022\014\n\004mode\030\003 \001(\005\022" +
+      "\020\n\010fontsize\030\004 \001(\005\022\r\n\005color\030\005 \001(\r\022\017\n\007midH" +
+      "ash\030\006 \001(\t\022\017\n\007content\030\007 \001(\t\022\r\n\005ctime\030\010 \001(" +
+      "\003\022\016\n\006weight\030\t \001(\005\022\016\n\006action\030\n \001(\t\022\014\n\004poo" +
+      "l\030\013 \001(\005\022\r\n\005idStr\030\014 \001(\tB2\n&cn.reghao.webs" +
+      "pider.parser.app.autogenB\006BiliDmP\000b\006prot" +
+      "o3"
+    };
+    descriptor = com.google.protobuf.Descriptors.FileDescriptor
+      .internalBuildGeneratedFileFrom(descriptorData,
+        new com.google.protobuf.Descriptors.FileDescriptor[] {
+        });
+    internal_static_DmSegMobileReply_descriptor =
+      getDescriptor().getMessageTypes().get(0);
+    internal_static_DmSegMobileReply_fieldAccessorTable = new
+      com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
+        internal_static_DmSegMobileReply_descriptor,
+        new String[] { "Elems", });
+    internal_static_DanmakuElem_descriptor =
+      getDescriptor().getMessageTypes().get(1);
+    internal_static_DanmakuElem_fieldAccessorTable = new
+      com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
+        internal_static_DanmakuElem_descriptor,
+        new String[] { "Id", "Progress", "Mode", "Fontsize", "Color", "MidHash", "Content", "Ctime", "Weight", "Action", "Pool", "IdStr", });
+  }
+
+  // @@protoc_insertion_point(outer_class_scope)
+}

+ 102 - 0
web/src/main/java/cn/reghao/bnt/web/parser/consumer/RawDataConsumer.java

@@ -0,0 +1,102 @@
+package cn.reghao.bnt.web.parser.consumer;
+
+import cn.reghao.bnt.core.url.CrawlUrl;
+import cn.reghao.bnt.core.url.RawData;
+import cn.reghao.bnt.web.parser.db.mongo.UnparsedDataMongo;
+import cn.reghao.bnt.web.parser.db.mongo.UrlResourceMongo;
+import cn.reghao.bnt.web.parser.model.po.UnparsedData;
+import cn.reghao.bnt.web.parser.model.po.UrlResource;
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.ThreadPoolExecutor;
+
+/**
+ * @author reghao
+ * @date 2022-02-27 21:03:38
+ */
+@Slf4j
+public class RawDataConsumer implements Runnable {
+    private final Object monitor;
+    private final UnparsedDataMongo unparsedDataMongo;
+    private final UrlResourceMongo urlResourceMongo;
+    private final RawDataStore rawDataStore;
+    private final ExecutorService threadPool;
+
+    public RawDataConsumer(ExecutorService threadPool, RawDataStore rawDataStore,
+                           UnparsedDataMongo unparsedDataMongo, UrlResourceMongo urlResourceMongo) {
+        this.threadPool = threadPool;
+        this.rawDataStore = rawDataStore;
+        this.monitor = rawDataStore.getMonitor();
+        this.unparsedDataMongo = unparsedDataMongo;
+        this.urlResourceMongo = urlResourceMongo;
+    }
+
+    @Override
+    public void run() {
+        ThreadPoolExecutor tpe = (ThreadPoolExecutor) threadPool;
+        log.info("持久化 RawData...");
+        while (!Thread.interrupted()) {
+            int activeThread = tpe.getActiveCount();
+            if (activeThread < 10) {
+                synchronized (monitor) {
+                    try {
+                        RawData rawData = rawDataStore.get();
+                        if (rawData != null) {
+                            log.info("提交 PersistTask 到线程池...");
+                            threadPool.submit(new RawDataPersistTask(unparsedDataMongo, urlResourceMongo, rawData));
+                        } else {
+                            log.info("调用 monitor.wait() 等待 RawDataStore 中有数据可用");
+                            monitor.wait();
+                        }
+                    } catch (InterruptedException e) {
+                        log.error(e.getMessage());
+                    }
+                }
+            } else {
+                log.info("休眠 1s 等待线程池空闲...");
+                try {
+                    Thread.sleep(1_000);
+                } catch (InterruptedException e) {
+                    log.error(e.getMessage());
+                }
+            }
+        }
+    }
+
+    static class RawDataPersistTask implements Runnable {
+        private final UnparsedDataMongo unparsedDataMongo;
+        private final UrlResourceMongo urlResourceMongo;
+        private final RawData rawData;
+
+        public RawDataPersistTask(UnparsedDataMongo unparsedDataMongo, UrlResourceMongo urlResourceMongo, RawData rawData) {
+            this.unparsedDataMongo = unparsedDataMongo;
+            this.urlResourceMongo = urlResourceMongo;
+            this.rawData = rawData;
+        }
+
+        @Override
+        public void run() {
+            long start = System.currentTimeMillis();
+            CrawlUrl crawlUrl = rawData.getCrawlUrl();
+            String site = crawlUrl.getSite();
+            String parser = crawlUrl.getParser();
+            long crawledTime = rawData.getCrawledTime();
+            UnparsedData unparsedData = new UnparsedData(rawData);
+            try {
+                UrlResource urlResource = new UrlResource(crawlUrl);
+                if (rawData.getData().equals("404")) {
+                    urlResource.setNotFound(true);
+                }
+                urlResource.setCrawledTime(crawledTime);
+
+                // TODO 这两个操作应该在一个事务中
+                urlResourceMongo.update(urlResource);
+                unparsedDataMongo.save(unparsedData);
+            } catch (Exception e) {
+                log.error("{} 的 RawData 持久化数据异常 -> {}", crawlUrl.getUrl(), e.getMessage());
+            }
+            log.info("{} 的 RawData 完成持久化, 耗时 {}ms...", crawlUrl.getUrl(), System.currentTimeMillis()-start);
+        }
+    }
+}

+ 54 - 0
web/src/main/java/cn/reghao/bnt/web/parser/consumer/RawDataStore.java

@@ -0,0 +1,54 @@
+package cn.reghao.bnt.web.parser.consumer;
+
+import cn.reghao.jutil.jdk.thread.ThreadPoolWrapper;
+import cn.reghao.bnt.core.url.RawData;
+import cn.reghao.bnt.web.parser.db.mongo.UnparsedDataMongo;
+import cn.reghao.bnt.web.parser.db.mongo.UrlResourceMongo;
+import org.springframework.stereotype.Component;
+
+import java.util.Queue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.LinkedBlockingQueue;
+
+/**
+ * @author reghao
+ * @date 2021-11-19 17:34:54
+ */
+@Component
+public class RawDataStore {
+    private final Queue<RawData> dataQueue;
+    private final Object monitor;
+    private final RawDataConsumer rawDataConsumer;
+    private final ExecutorService threadPool = ThreadPoolWrapper.threadPool("RawDataConsumer", 10);
+
+    public RawDataStore(UnparsedDataMongo unparsedDataMongo, UrlResourceMongo urlResourceMongo) {
+        this.dataQueue = new LinkedBlockingQueue<>();
+        this.monitor = new Object();
+        this.rawDataConsumer = new RawDataConsumer(threadPool, this, unparsedDataMongo, urlResourceMongo);
+    }
+
+    //@PostConstruct
+    public void start() {
+        threadPool.submit(rawDataConsumer);
+    }
+
+    public Object getMonitor() {
+        return monitor;
+    }
+
+    public void put(RawData rawData) {
+        dataQueue.add(rawData);
+        synchronized (monitor) {
+            // 通知 consumer 线程有数据可用
+            monitor.notify();
+        }
+    }
+
+    public RawData get() {
+        return dataQueue.poll();
+    }
+
+    public int size() {
+        return dataQueue.size();
+    }
+}

+ 128 - 0
web/src/main/java/cn/reghao/bnt/web/parser/consumer/UnparsedDataParser.java

@@ -0,0 +1,128 @@
+package cn.reghao.bnt.web.parser.consumer;
+
+import cn.reghao.bnt.web.parser.db.mongo.UnparsedDataMongo;
+import cn.reghao.bnt.web.parser.db.mongo.UrlResourceMongo;
+import cn.reghao.bnt.web.parser.model.po.UnparsedData;
+import cn.reghao.bnt.core.parser.DataParser;
+import cn.reghao.bnt.core.url.RawData;
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+/**
+ * @author reghao
+ * @date 2023-09-11 14:50:27
+ */
+@Slf4j
+public class UnparsedDataParser implements Runnable {
+    private final Object monitor;
+    private final UnparsedDataStore unparsedDataStore;
+    private final UnparsedDataMongo unparsedDataMongo;
+    private final Map<String, DataParser> dataParsers;
+    private final ExecutorService threadPool;
+
+    public UnparsedDataParser(ExecutorService threadPool, UnparsedDataStore unparsedDataStore,
+                              UnparsedDataMongo unparsedDataMongo, UrlResourceMongo urlResourceMongo,
+                              List<DataParser> dataParsers) {
+        this.threadPool = threadPool;
+        this.unparsedDataStore = unparsedDataStore;
+        this.monitor = unparsedDataStore.getMonitor();
+        this.unparsedDataMongo = unparsedDataMongo;
+        this.dataParsers = dataParsers.stream().collect(Collectors.toMap(DataParser::parserName, Function.identity()));
+    }
+
+    @Override
+    public void run() {
+        ThreadPoolExecutor tpe = (ThreadPoolExecutor) threadPool;
+        log.info("解析 UnparsedData...");
+        while (!Thread.interrupted()) {
+            try {
+                int activeThread = tpe.getActiveCount();
+                if (activeThread < 10) {
+                    synchronized (monitor) {
+                        Object object = unparsedDataStore.get();
+                        if (object instanceof UnparsedData) {
+                            UnparsedData unparsedData = (UnparsedData) object;
+                            threadPool.submit(new UnparsedDataTask(unparsedData));
+                        } else if (object instanceof RawData) {
+                            RawData rawData = (RawData) object;
+                            threadPool.submit(new RawDataTask(rawData));
+                        } else {
+                            //log.info("调用 monitor.wait() 等待 UnparsedDataStore 中有数据可用");
+                            monitor.wait();
+                        }
+                    }
+                } else {
+                    log.info("休眠 1s 等待线程池空闲...");
+                    Thread.sleep(1_000);
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+    class UnparsedDataTask implements Runnable {
+        private final UnparsedData unparsedData;
+
+        public UnparsedDataTask(UnparsedData unparsedData) {
+            this.unparsedData = unparsedData;
+        }
+
+        @Override
+        public void run() {
+            RawData rawData = unparsedData.getRawData();
+            String url = rawData.getCrawlUrl().getUrl();
+            try {
+                if (!rawData.getData().equals("404")) {
+                    String parser = rawData.getCrawlUrl().getParser();
+                    DataParser dataParser = dataParsers.get(parser);
+                    if (dataParser != null) {
+                        String body = rawData.getData();
+                        Map<String, Object> map =  dataParser.parse(url, body);
+                        unparsedData.setParsed(1);
+                    } else {
+                        log.error(" RawData 中的解析器 {} 不存在...", parser);
+                    }
+                } else {
+                    unparsedData.setParsed(2);
+                }
+            } catch (Exception e) {
+                log.error("处理 {} 的 RawData 发生异常 -> {}", rawData.getCrawlUrl().getUrl(), e.getMessage());
+                unparsedData.setParsed(2);
+            } finally {
+                unparsedDataMongo.update(unparsedData);
+            }
+        }
+    }
+
+    class RawDataTask implements Runnable {
+        private final RawData rawData;
+
+        public RawDataTask(RawData rawData) {
+            this.rawData = rawData;
+        }
+
+        @Override
+        public void run() {
+            String url = rawData.getCrawlUrl().getUrl();
+            try {
+                String parser = rawData.getCrawlUrl().getParser();
+                DataParser dataParser = dataParsers.get(parser);
+                if (dataParser != null) {
+                    String body = rawData.getData();
+                    Map<String, Object> map =  dataParser.parse(url, body);
+                } else {
+                    log.error(" RawData 中的解析器 {} 不存在...", parser);
+                }
+            } catch (Exception e) {
+                log.error("处理 {} 的 RawData 发生异常 -> {}", rawData.getCrawlUrl().getUrl(), e.getMessage());
+            }
+        }
+    }
+}

+ 108 - 0
web/src/main/java/cn/reghao/bnt/web/parser/consumer/UnparsedDataStore.java

@@ -0,0 +1,108 @@
+package cn.reghao.bnt.web.parser.consumer;
+
+import cn.reghao.jutil.jdk.thread.ThreadPoolWrapper;
+import cn.reghao.bnt.core.parser.DataParser;
+import cn.reghao.bnt.web.parser.db.mongo.UnparsedDataMongo;
+import cn.reghao.bnt.web.parser.db.mongo.UrlResourceMongo;
+import cn.reghao.bnt.web.parser.model.po.UnparsedData;
+import com.mongodb.client.MongoCursor;
+import lombok.extern.slf4j.Slf4j;
+import org.bson.Document;
+import org.springframework.data.mongodb.core.convert.MongoConverter;
+import org.springframework.stereotype.Component;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.LinkedBlockingQueue;
+
+/**
+ * @author reghao
+ * @date 2023-09-11 14:50:27
+ */
+@Slf4j
+@Component
+public class UnparsedDataStore implements Runnable {
+    private final int threads = 20;
+    private final ExecutorService threadPool = ThreadPoolWrapper.threadPool("rawdata-store", threads);
+    private final LinkedBlockingQueue<Object> dataQueue;
+    private final Object monitor;
+    private final MongoConverter mongoConverter;
+    private final UnparsedDataParser unparsedDataParser;
+    private final UnparsedDataMongo unparsedDataMongo;
+
+    public UnparsedDataStore(MongoConverter mongoConverter, UnparsedDataMongo unparsedDataMongo,
+                             UrlResourceMongo urlResourceMongo, List<DataParser> dataParsers) {
+        this.mongoConverter = mongoConverter;
+        this.unparsedDataMongo = unparsedDataMongo;
+        this.dataQueue = new LinkedBlockingQueue<>(10_000);
+        this.monitor = new Object();
+        this.unparsedDataParser =
+                new UnparsedDataParser(threadPool, this, unparsedDataMongo, urlResourceMongo, dataParsers);
+    }
+
+    //@PostConstruct
+    public void start() {
+        log.info("------------------------------------------------------------");
+        log.info("开始数据解析任务...");
+        log.info("------------------------------------------------------------");
+        //Future<?> dataStoreTask = threadPool.submit(this);
+        Future<?> dataParseTask = threadPool.submit(unparsedDataParser);
+    }
+
+    @Override
+    public void run() {
+        String parser = "BiliVideoDataParser";
+        Map<String, Object> map = new HashMap<>();
+        map.put("parsed", 0);
+        map.put("parser", parser);
+
+        int i = 0;
+        while (!Thread.interrupted()) {
+            try {
+                MongoCursor<Document> cursor = unparsedDataMongo.getCursor(map);
+                while (cursor.hasNext()) {
+                    Document document = cursor.next();
+                    UnparsedData unparsedData = mongoConverter.read(UnparsedData.class, document);
+                    this.put(unparsedData);
+                    log.info("第 {} 个文档", ++i);
+                }
+
+                log.info("休眠 1min 等待有新数据可用");
+                Thread.sleep(60_000);
+            } catch (InterruptedException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+    public Object getMonitor() {
+        return monitor;
+    }
+
+    public void put(UnparsedData unparsedData) throws InterruptedException {
+        dataQueue.put(unparsedData);
+        synchronized (monitor) {
+            // 通知 consumer 线程有数据可用
+            monitor.notify();
+        }
+    }
+
+    public void put(Object object) throws InterruptedException {
+        dataQueue.put(object);
+        synchronized (monitor) {
+            // 通知 consumer 线程有数据可用
+            monitor.notify();
+        }
+    }
+
+    public Object get() {
+        return dataQueue.poll();
+    }
+
+    public int size() {
+        return dataQueue.size();
+    }
+}

+ 26 - 0
web/src/main/java/cn/reghao/bnt/web/parser/controller/CrawlerController.java

@@ -0,0 +1,26 @@
+package cn.reghao.bnt.web.parser.controller;
+
+import cn.reghao.jutil.jdk.result.WebResult;
+import cn.reghao.bnt.core.url.RawData;
+import cn.reghao.bnt.web.parser.consumer.UnparsedDataStore;
+import org.springframework.web.bind.annotation.*;
+
+/**
+ * @author reghao
+ * @date 2022-05-25 23:46:32
+ */
+@RequestMapping("/api/crawler")
+@RestController
+public class CrawlerController {
+    private final UnparsedDataStore unparsedDataStore;
+
+    public CrawlerController(UnparsedDataStore unparsedDataStore) {
+        this.unparsedDataStore = unparsedDataStore;
+    }
+
+    @PostMapping("/rawdata")
+    public String putRawData(@RequestBody RawData rawData) throws InterruptedException {
+        unparsedDataStore.put(rawData);
+        return WebResult.success();
+    }
+}

+ 31 - 0
web/src/main/java/cn/reghao/bnt/web/parser/controller/TaskController.java

@@ -0,0 +1,31 @@
+package cn.reghao.bnt.web.parser.controller;
+
+import cn.reghao.jutil.jdk.result.WebResult;
+import cn.reghao.bnt.web.parser.task.DataProducer;
+import org.springframework.http.MediaType;
+import org.springframework.web.bind.annotation.*;
+
+/**
+ * @author reghao
+ * @date 2023-06-15 14:52:56
+ */
+@RequestMapping("/api/parser/task")
+@RestController
+public class TaskController {
+    private final DataProducer dataProducer;
+
+    public TaskController(DataProducer dataProducer) {
+        this.dataProducer = dataProducer;
+    }
+
+    @PostMapping(value = "/submit/{size}", produces = MediaType.APPLICATION_JSON_VALUE)
+    public String submitTask(@PathVariable("size") int size) {
+        return WebResult.success();
+    }
+
+    @GetMapping(value = "/running", produces = MediaType.APPLICATION_JSON_VALUE)
+    public String getRunningTasks() {
+        int total = dataProducer.size();
+        return WebResult.success(total);
+    }
+}

+ 51 - 0
web/src/main/java/cn/reghao/bnt/web/parser/controller/UrlController.java

@@ -0,0 +1,51 @@
+package cn.reghao.bnt.web.parser.controller;
+
+import cn.reghao.jutil.jdk.result.WebResult;
+import cn.reghao.bnt.core.event.EvtCrawl;
+import cn.reghao.bnt.core.event.EvtNoUrl;
+import cn.reghao.bnt.core.parser.SiteParser;
+import cn.reghao.bnt.core.url.CrawlUrl;
+import cn.reghao.bnt.web.parser.service.UrlSchedulerService;
+import org.springframework.http.MediaType;
+import org.springframework.web.bind.annotation.*;
+
+import java.util.List;
+import java.util.Set;
+
+/**
+ * @author reghao
+ * @date 2022-05-27 16:50:50
+ */
+@RequestMapping("/api/url")
+@RestController
+public class UrlController {
+    private final UrlSchedulerService urlSchedulerService;
+
+    public UrlController(UrlSchedulerService urlSchedulerService) {
+        this.urlSchedulerService = urlSchedulerService;
+    }
+
+    @GetMapping(value = "/parser", produces = MediaType.APPLICATION_JSON_VALUE)
+    public String siteParsers() {
+        List<SiteParser> list = urlSchedulerService.getSiteParsers();
+        return WebResult.success(list);
+    }
+
+    @GetMapping(value = "/get", produces = MediaType.APPLICATION_JSON_VALUE)
+    public String getUrls(@RequestParam("site") String site, @RequestParam("parser") String parser) {
+        Set<CrawlUrl> set = urlSchedulerService.getUrls(site, parser);
+        return WebResult.success(set);
+    }
+
+    @PostMapping(value = "/event/crawl", produces = MediaType.APPLICATION_JSON_VALUE)
+    public String evtCrawl(@RequestBody EvtCrawl evtCrawl) {
+        urlSchedulerService.crawlEvent(evtCrawl);
+        return WebResult.success();
+    }
+
+    @PostMapping(value = "/event/nourl", produces = MediaType.APPLICATION_JSON_VALUE)
+    public String evtNoUrl(@RequestBody EvtNoUrl evtNoUrl) {
+        urlSchedulerService.noUrlEvent(evtNoUrl);
+        return WebResult.success();
+    }
+}

+ 103 - 0
web/src/main/java/cn/reghao/bnt/web/parser/crawler/CrawlerContext.java

@@ -0,0 +1,103 @@
+package cn.reghao.bnt.web.parser.crawler;
+
+import cn.reghao.jutil.jdk.http.WebRequest;
+import cn.reghao.jutil.jdk.http.WebResponse;
+import cn.reghao.jutil.jdk.text.TextFile;
+import cn.reghao.jutil.tool.http.DefaultWebRequest;
+import cn.reghao.bnt.core.url.CrawlUrl;
+import cn.reghao.bnt.core.url.RawData;
+import cn.reghao.bnt.core.url.Site;
+import cn.reghao.bnt.web.config.AppProperties;
+import cn.reghao.bnt.web.parser.consumer.RawDataStore;
+import cn.reghao.bnt.web.parser.db.mongo.UrlResourceMongo;
+import cn.reghao.bnt.web.parser.model.po.UrlResource;
+import lombok.extern.slf4j.Slf4j;
+import org.bson.types.ObjectId;
+import org.springframework.stereotype.Component;
+
+import java.util.List;
+
+/**
+ * @author reghao
+ * @date 2022-11-22 19:34:54
+ */
+@Slf4j
+@Component
+public class CrawlerContext {
+    private ObjectId nextId = new ObjectId(0, 0);
+    private final UrlResourceMongo urlResourceMongo;
+    private final WebRequest webRequest;
+    private final RawDataStore rawDataStore;
+
+    public CrawlerContext(UrlResourceMongo urlResourceMongo, AppProperties appProperties, RawDataStore rawDataStore) {
+        this.urlResourceMongo = urlResourceMongo;
+        this.rawDataStore = rawDataStore;
+        TextFile textFile = new TextFile();
+        String cookie = textFile.readFile(appProperties.getCookiePath());
+        this.webRequest = new DefaultWebRequest(cookie, "bilibili.com");
+    }
+
+    //@PostConstruct
+    public void start() {
+        String site = Site.bilibili.name();
+        String parser = "BiliVideoDataParser";
+        String parser1 = "BiliCommentDataParser";
+
+        int pageSize = 100;
+        List<UrlResource> list0 = urlResourceMongo.findNotCrawled(site, parser, pageSize);
+        log.info("开始爬取 CrawlUrl...");
+        list0.forEach(urlResource -> {
+            try {
+                CrawlUrl crawlUrl = urlResource.getCrawlUrl();
+                crawl(crawlUrl);
+                Thread.sleep(3_000);
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        });
+        log.info("爬取 CrawlUrl 完成...");
+
+        /*List<UrlResource> list = urlResourceMongo.findNotCrawled(site, parser, pageSize, nextId);
+        while (!list.isEmpty()) {
+            list.forEach(urlResource -> {
+                try {
+                    CrawlUrl crawlUrl = urlResource.getCrawlUrl();
+                    log.info("爬取 CrawlUrl...");
+                    //crawl(crawlUrl);
+                    Thread.sleep(3_000);
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+            });
+
+            nextId = new ObjectId(list.get(list.size()-1).getId());
+            list = urlResourceMongo.findNotCrawled(site, parser, pageSize, nextId);
+        }*/
+    }
+
+    private void crawl(CrawlUrl crawlUrl) {
+        String url = crawlUrl.getUrl();
+        WebResponse webResponse = webRequest.get(url);
+        int statusCode = webResponse.getStatusCode();
+        if (statusCode != 200) {
+            log.error("请求 {} 失败", url);
+            return;
+        }
+
+        String body = webResponse.getBody();
+        RawData rawData = new RawData(crawlUrl, body);
+        rawDataStore.put(rawData);
+    }
+
+    class ProducerTask implements Runnable {
+        private final CrawlUrl crawlUrl;
+
+        public ProducerTask(CrawlUrl crawlUrl) {
+            this.crawlUrl = crawlUrl;
+        }
+
+        @Override
+        public void run() {
+        }
+    }
+}

+ 89 - 0
web/src/main/java/cn/reghao/bnt/web/parser/db/mongo/DataRecordMongo.java

@@ -0,0 +1,89 @@
+package cn.reghao.bnt.web.parser.db.mongo;
+
+import cn.reghao.jutil.jdk.db.BaseCrud;
+import cn.reghao.jutil.jdk.db.BaseQuery;
+import cn.reghao.bnt.web.parser.model.po.DataRecord;
+import com.mongodb.MongoBulkWriteException;
+import com.mongodb.client.model.InsertManyOptions;
+import com.mongodb.client.result.DeleteResult;
+import com.mongodb.client.result.InsertManyResult;
+import lombok.extern.slf4j.Slf4j;
+import org.bson.Document;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.data.mongodb.core.convert.MongoConverter;
+import org.springframework.data.mongodb.core.query.Criteria;
+import org.springframework.data.mongodb.core.query.Query;
+import org.springframework.stereotype.Repository;
+
+import java.time.LocalDateTime;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * @author reghao
+ * @date 2023-03-16 14:19:27
+ */
+@Slf4j
+@Repository
+public class DataRecordMongo implements BaseCrud<DataRecord>, BaseQuery<DataRecord> {
+    private final String colName = "DataRecord";
+    private final MongoTemplate mongoTemplate;
+    private final MongoConverter mongoConverter;
+
+    public DataRecordMongo(MongoTemplate mongoTemplate, MongoConverter mongoConverter) {
+        this.mongoTemplate = mongoTemplate;
+        this.mongoConverter = mongoConverter;
+    }
+
+    @Override
+    public DataRecord save(DataRecord dataRecord) {
+        Document doc = new Document();
+        mongoConverter.write(dataRecord, doc);
+        mongoTemplate.getCollection(colName).insertOne(doc);
+        return null;
+    }
+
+    @Override
+    public void saveAll(List<DataRecord> list) {
+        List<Document> documents = list.stream()
+                .map(t -> {
+                    Document doc = new Document();
+                    mongoConverter.write(t, doc);
+                    return doc;
+                })
+                .collect(Collectors.toList());
+
+        InsertManyOptions options = new InsertManyOptions();
+        options.ordered(false);
+        try {
+            InsertManyResult result = mongoTemplate.getCollection(colName).insertMany(documents, options);
+        }  catch (MongoBulkWriteException e) {
+            log.error(e.getMessage());
+        }
+    }
+
+    @Override
+    public void update(DataRecord dataRecord) {
+        dataRecord.setUpdateTime(LocalDateTime.now());
+        Document document = new Document();
+        mongoConverter.write(dataRecord, document);
+        Document filter = new Document();
+        //filter.put("url", dataRecord.getUrl());
+        mongoTemplate.getCollection(colName).replaceOne(filter, document);
+    }
+
+    @Override
+    public void delete(DataRecord dataRecord) {
+        Query query = new Query();
+        //query.addCriteria(Criteria.where("url").is(dataRecord.getUrl()));
+        DeleteResult deleteResult = mongoTemplate.remove(query, DataRecord.class);
+    }
+    
+    public List<DataRecord> findNotParsed(String site, int size) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("site").is(site));
+        query.addCriteria(Criteria.where("parsed").is(0));
+        query.limit(size);
+        return mongoTemplate.find(query, DataRecord.class, colName);
+    }
+}

+ 290 - 0
web/src/main/java/cn/reghao/bnt/web/parser/db/mongo/UnparsedDataMongo.java

@@ -0,0 +1,290 @@
+package cn.reghao.bnt.web.parser.db.mongo;
+
+import cn.reghao.jutil.jdk.db.BaseCrud;
+import cn.reghao.jutil.jdk.db.BaseQuery;
+import cn.reghao.bnt.web.parser.model.po.UnparsedData;
+import cn.reghao.bnt.web.parser.model.vo.UnparsedDataStatistics;
+import com.mongodb.BasicDBList;
+import com.mongodb.BasicDBObject;
+import com.mongodb.MongoBulkWriteException;
+import com.mongodb.client.AggregateIterable;
+import com.mongodb.client.MongoCursor;
+import com.mongodb.client.model.InsertManyOptions;
+import com.mongodb.client.result.DeleteResult;
+import com.mongodb.client.result.InsertManyResult;
+import lombok.extern.slf4j.Slf4j;
+import org.bson.Document;
+import org.bson.types.ObjectId;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.data.mongodb.core.convert.MongoConverter;
+import org.springframework.data.mongodb.core.query.Criteria;
+import org.springframework.data.mongodb.core.query.Query;
+import org.springframework.stereotype.Repository;
+
+import java.time.LocalDateTime;
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * @author reghao
+ * @date 2021-03-13 03:21:27
+ */
+@Slf4j
+@Repository
+public class UnparsedDataMongo implements BaseCrud<UnparsedData>, BaseQuery<UnparsedData> {
+    private final String colName = "UnparsedData";
+    private final MongoTemplate mongoTemplate;
+    private final MongoConverter mongoConverter;
+
+    public UnparsedDataMongo(MongoTemplate mongoTemplate, MongoConverter mongoConverter) {
+        this.mongoTemplate = mongoTemplate;
+        this.mongoConverter = mongoConverter;
+    }
+
+    @Override
+    public UnparsedData save(UnparsedData unparsedData) {
+        Document doc = new Document();
+        mongoConverter.write(unparsedData, doc);
+        mongoTemplate.getCollection(colName).insertOne(doc);
+        return null;
+    }
+
+    @Override
+    public void saveAll(List<UnparsedData> list) {
+        List<Document> documents = list.stream()
+                .map(t -> {
+                    Document doc = new Document();
+                    mongoConverter.write(t, doc);
+                    return doc;
+                })
+                .collect(Collectors.toList());
+
+        InsertManyOptions options = new InsertManyOptions();
+        options.ordered(false);
+        try {
+            InsertManyResult result = mongoTemplate.getCollection(colName).insertMany(documents, options);
+        }  catch (MongoBulkWriteException e) {
+            log.error(e.getMessage());
+        }
+    }
+
+    @Override
+    public void update(UnparsedData unparsedData) {
+        unparsedData.setUpdateTime(LocalDateTime.now());
+        Document document = new Document();
+        mongoConverter.write(unparsedData, document);
+        Document filter = new Document();
+        filter.put("url", unparsedData.getUrl());
+        mongoTemplate.getCollection(colName).replaceOne(filter, document);
+    }
+
+    @Override
+    public void delete(UnparsedData unparsedData) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("url").is(unparsedData.getUrl()));
+        DeleteResult deleteResult = mongoTemplate.remove(query, UnparsedData.class);
+    }
+
+    public void batchDelete(List<Criteria> criteriaList) {
+        Query query = new Query();
+        criteriaList.forEach(query::addCriteria);
+        DeleteResult deleteResult = mongoTemplate.remove(query, UnparsedData.class);
+        long count = deleteResult.getDeletedCount();
+        log.info("删除 {} 条数据",count);
+    }
+
+    public long count(Map<String, Object> map) {
+        Document filter = new Document();
+        map.forEach(filter::append);
+        return mongoTemplate.getCollection(colName).countDocuments(filter);
+    }
+
+    public long countByCriteria(List<Criteria> criteriaList) {
+        Query query = new Query();
+        criteriaList.forEach(query::addCriteria);
+        return mongoTemplate.count(query, UnparsedData.class, colName);
+    }
+
+    public UnparsedData findByUrl(String url) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("url").is(url));
+
+        List<UnparsedData> list = mongoTemplate.find(query, UnparsedData.class, colName);
+        return list.isEmpty() ? null : list.get(0);
+    }
+
+    @Deprecated
+    public List<UnparsedData> findAll() {
+        return mongoTemplate.findAll(UnparsedData.class, colName);
+    }
+
+    public List<UnparsedData> findAll(List<Criteria> criteriaList) {
+        Query query = new Query();
+        criteriaList.forEach(query::addCriteria);
+        return mongoTemplate.find(query, UnparsedData.class, colName);
+    }
+
+    @Deprecated
+    public List<UnparsedData> findNotParsed(String site, int size) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("site").is(site));
+        query.addCriteria(Criteria.where("parsed").is(0));
+        query.limit(size);
+        return mongoTemplate.find(query, UnparsedData.class, colName);
+    }
+
+    public List<UnparsedData> findNotParsed(String site, String parser, int size) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("site").is(site));
+        query.addCriteria(Criteria.where("parser").is(parser));
+        query.addCriteria(Criteria.where("parsed").is(0));
+        query.limit(size);
+        return mongoTemplate.find(query, UnparsedData.class, colName);
+    }
+
+    @Deprecated
+    public List<UnparsedData> findNotParsed(String site, int size, ObjectId lastId) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("site").is(site));
+        query.addCriteria(Criteria.where("parsed").is(0));
+
+        query.addCriteria(Criteria.where("_id").gt(lastId));
+        query.limit(size);
+        return mongoTemplate.find(query, UnparsedData.class, colName);
+    }
+
+    @Deprecated
+    public List<UnparsedData> findNotParsed(String site, String parser, ObjectId lastId) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("site").is(site));
+        query.addCriteria(Criteria.where("parser").is(parser));
+        query.addCriteria(Criteria.where("parsed").is(0));
+
+        query.addCriteria(Criteria.where("_id").gt(lastId));
+        query.limit(1000);
+        return mongoTemplate.find(query, UnparsedData.class, colName);
+    }
+
+    public MongoCursor<Document> getCursor(Map<String, Object> map) {
+        Document filter = new Document();
+        filter.putAll(map);
+        return mongoTemplate.getCollection(colName).find(filter, Document.class).noCursorTimeout(true).cursor();
+    }
+
+    public Map<String, Integer> aggregate(List<String> fields) {
+        Document groupByFields = new Document();
+        fields.forEach(field -> {
+            groupByFields.put(field, "$" + field);
+        });
+
+        Document groupObject = new Document("_id", groupByFields);
+        groupObject.put("total", new BasicDBObject("$sum", 1));
+        Document queryGroup = new Document("$group", groupObject);
+
+        Document sortObject = new Document("$sort", new BasicDBObject("total", -1));
+
+        List<Document> pipeline = new ArrayList<>();
+        pipeline.add(queryGroup);
+        pipeline.add(sortObject);
+        AggregateIterable<Document> result = mongoTemplate.getCollection(colName).aggregate(pipeline).allowDiskUse(true);
+
+        Map<String, Integer> map = new HashMap<>();
+        for (Document document : result) {
+            Document document1 = document.get("_id", Document.class);
+            String url = document1.getString("url");
+            Integer total = document.getInteger("total");
+            map.putIfAbsent(url, total);
+        }
+
+        return map;
+    }
+
+    public List<UnparsedDataStatistics> getUnparsedDataStatistics() {
+        Document idFields = new Document();
+        idFields.put("site", "$site");
+        idFields.put("parser", "$parser");
+        Document group = new Document();
+        group.put("_id", idFields);
+        group.put("total", new BasicDBObject("$sum", 1));
+
+        Document groupObject = new Document("$group", group);
+        Document sortObject = new Document("$sort", new BasicDBObject("total", -1));
+
+        List<Document> pipeline = new ArrayList<>();
+        pipeline.add(groupObject);
+        pipeline.add(sortObject);
+
+        AggregateIterable<Document> results = mongoTemplate.getCollection(colName).aggregate(pipeline).allowDiskUse(true);
+        List<UnparsedDataStatistics> list = new ArrayList<>();
+        for (Document result : results) {
+            Document document1 = result.get("_id", Document.class);
+            int total = (Integer) result.get("total");
+            String site = document1.getString("site");
+            String parser = document1.getString("parser");
+
+            UnparsedDataStatistics unparsedDataStatistics = new UnparsedDataStatistics();
+            unparsedDataStatistics.setTotal(total);
+            unparsedDataStatistics.setSite(site);
+            unparsedDataStatistics.setParser(parser);
+            list.add(unparsedDataStatistics);
+        }
+        return list;
+    }
+
+    public List<UnparsedDataStatistics> getUnparsedDataStatistics1() {
+        Document idFields = new Document();
+        idFields.put("site", "$site");
+        idFields.put("parser", "$parser");
+
+        Document groupObject = new Document("_id", idFields);
+        groupObject.put("total", new BasicDBObject("$sum", 1));
+
+        BasicDBList basicDBList = new BasicDBList();
+        basicDBList.add("$parsed");
+        basicDBList.add(null);
+        Document condition1 = new Document();
+        condition1.put("if", new BasicDBObject("$eq", basicDBList));
+        condition1.put("then", 1);
+        condition1.put("else", 0);
+        Document condDoc = new Document("$cond", condition1);
+        groupObject.put("notParseCount", new BasicDBObject("$sum", condDoc));
+
+        Document document1 = new Document();
+        BasicDBList basicDBList1 = new BasicDBList();
+        basicDBList1.add("$parsed");
+        basicDBList1.add(true);
+        document1.put("if", new BasicDBObject("$eq", basicDBList1));
+        document1.put("then", 1);
+        document1.put("else", 0);
+        Document condition2 = new Document("$cond", document1);
+        groupObject.put("parsedCount", new BasicDBObject("$sum", condition2));
+
+        Document document2 = new Document();
+        BasicDBList basicDBList2 = new BasicDBList();
+        basicDBList2.add("$parsed");
+        basicDBList2.add(false);
+        document2.put("if", new BasicDBObject("$eq", basicDBList2));
+        document2.put("then", 1);
+        document2.put("else", 0);
+        Document condition3 = new Document("$cond", document2);
+        groupObject.put("parseErrorCount", new BasicDBObject("$sum", condition3));
+
+        Document queryGroup = new Document("$group", groupObject);
+        Document sortObject = new Document("$sort", new BasicDBObject("total", -1));
+        List<Document> pipeline = new ArrayList<>(Arrays.asList(queryGroup, sortObject));
+        AggregateIterable<Document> results = mongoTemplate.getCollection(colName).aggregate(pipeline).allowDiskUse(true);
+
+        List<UnparsedDataStatistics> list = new ArrayList<>();
+        for (Document result : results) {
+            Document doc = result.get("_id", Document.class);
+            String site = doc.getString("site");
+            String parser = doc.getString("parser");
+            Integer total = result.getInteger("total");
+            Integer notParseCount = result.getInteger("notParseCount");
+            Integer parsedCount = result.getInteger("parsedCount");
+            Integer parseErrorCount = result.getInteger("parseErrorCount");
+            list.add(new UnparsedDataStatistics(site, parser, total, notParseCount, parsedCount, parseErrorCount));
+        }
+        return list;
+    }
+}

+ 242 - 0
web/src/main/java/cn/reghao/bnt/web/parser/db/mongo/UrlResourceMongo.java

@@ -0,0 +1,242 @@
+package cn.reghao.bnt.web.parser.db.mongo;
+
+import cn.reghao.jutil.jdk.db.BaseCrud;
+import cn.reghao.jutil.jdk.db.BaseQuery;
+import cn.reghao.bnt.core.parser.SiteParser;
+import cn.reghao.bnt.web.parser.model.po.UrlResource;
+import cn.reghao.bnt.web.parser.model.vo.UrlResourceStatistics;
+import com.mongodb.BasicDBObject;
+import com.mongodb.MongoBulkWriteException;
+import com.mongodb.client.AggregateIterable;
+import com.mongodb.client.MongoCursor;
+import com.mongodb.client.model.InsertManyOptions;
+import com.mongodb.client.result.DeleteResult;
+import com.mongodb.client.result.InsertManyResult;
+import lombok.extern.slf4j.Slf4j;
+import org.bson.Document;
+import org.bson.types.ObjectId;
+import org.springframework.data.domain.Sort;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.data.mongodb.core.convert.MongoConverter;
+import org.springframework.data.mongodb.core.query.Criteria;
+import org.springframework.data.mongodb.core.query.Query;
+import org.springframework.stereotype.Repository;
+
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * @author reghao
+ * @date 2022-02-27 14:33:02
+ */
+@Slf4j
+@Repository
+public class UrlResourceMongo implements BaseCrud<UrlResource>, BaseQuery<UrlResource> {
+    private final String colName = "UrlResource";
+    private final MongoTemplate mongoTemplate;
+    private final MongoConverter mongoConverter;
+
+    public UrlResourceMongo(MongoTemplate mongoTemplate, MongoConverter mongoConverter) {
+        this.mongoTemplate = mongoTemplate;
+        this.mongoConverter = mongoConverter;
+    }
+
+    @Override
+    public UrlResource save(UrlResource urlResource) {
+        Document doc = new Document();
+        mongoConverter.write(urlResource, doc);
+        mongoTemplate.getCollection(colName).insertOne(doc);
+        return null;
+    }
+
+    @Override
+    public void saveAll(List<UrlResource> list) {
+        List<Document> documents = list.stream()
+                .map(t -> {
+                    Document doc = new Document();
+                    mongoConverter.write(t, doc);
+                    return doc;
+                })
+                .collect(Collectors.toList());
+
+        InsertManyOptions options = new InsertManyOptions();
+        // 忽略 insert 失败的文档
+        options.ordered(false);
+        try {
+            InsertManyResult result = mongoTemplate.getCollection(colName).insertMany(documents, options);
+        }  catch (MongoBulkWriteException ignore) {
+        }
+    }
+
+    @Override
+    public void update(UrlResource urlResource) {
+        urlResource.setUpdateTime(LocalDateTime.now());
+
+        Document document = new Document();
+        mongoConverter.write(urlResource, document);
+        Document filter = new Document();
+        filter.put("url", urlResource.getUrl());
+        mongoTemplate.getCollection(colName).replaceOne(filter, document);
+    }
+
+    public void updateCrawledTime(UrlResource urlResource, long crawledTime) {
+        urlResource.setCrawledTime(crawledTime);
+        urlResource.setUpdateTime(LocalDateTime.now());
+
+        Document document = new Document();
+        mongoConverter.write(urlResource, document);
+        Document filter = new Document();
+        filter.put("url", urlResource.getUrl());
+        mongoTemplate.getCollection(colName).replaceOne(filter, document);
+    }
+
+    public void updateSetNotFound(UrlResource urlResource, long crawledTime) {
+        urlResource.setCrawledTime(crawledTime);
+        urlResource.setNotFound(true);
+        urlResource.setUpdateTime(LocalDateTime.now());
+
+        Document document = new Document();
+        mongoConverter.write(urlResource, document);
+        Document filter = new Document();
+        filter.put("url", urlResource.getUrl());
+        mongoTemplate.getCollection(colName).replaceOne(filter, document);
+    }
+
+    @Override
+    public void delete(UrlResource urlResource) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("_id").is(urlResource.getId()));
+        DeleteResult deleteResult = mongoTemplate.remove(query, UrlResource.class);
+    }
+
+    public UrlResource findByUrl(String url) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("url").is(url));
+        List<UrlResource> list = mongoTemplate.find(query, UrlResource.class, colName);
+        return list.isEmpty() ? null : list.get(0);
+    }
+
+    public List<UrlResource> findNotCrawled(String site, String parser, int pageSize) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("site").is(site));
+        query.addCriteria(Criteria.where("parser").is(parser));
+        query.addCriteria(Criteria.where("crawledTime").is(0));
+        query.with(Sort.by(new Sort.Order(Sort.Direction.DESC, "updateTime")));
+        query.skip(0).limit(pageSize);
+        return mongoTemplate.find(query, UrlResource.class, colName);
+    }
+
+    public List<UrlResource> findNotCrawled(String site, String parser, int pageSize, ObjectId lastId) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("site").is(site));
+        query.addCriteria(Criteria.where("parser").is(parser));
+        query.addCriteria(Criteria.where("crawledTime").is(0));
+        query.addCriteria(Criteria.where("_id").gt(lastId));
+        query.with(Sort.by(new Sort.Order(Sort.Direction.DESC, "updateTime")));
+        query.limit(pageSize);
+        return mongoTemplate.find(query, UrlResource.class, colName);
+    }
+
+    public List<SiteParser> getNotCrawledSiteParsers() {
+        Document matchFields = new Document();
+        matchFields.put("crawledTime", 0);
+        Document matchObject = new Document("$match", matchFields);
+
+        Document idFields = new Document();
+        idFields.put("site", "$site");
+        idFields.put("parser", "$parser");
+        Document group = new Document();
+        group.put("_id", idFields);
+        Document groupObject = new Document("$group", group);
+
+        List<Document> pipeline = new ArrayList<>();
+        pipeline.add(matchObject);
+        pipeline.add(groupObject);
+
+        AggregateIterable<Document> results = mongoTemplate.getCollection(colName).aggregate(pipeline).allowDiskUse(true);
+        List<SiteParser> list = new ArrayList<>();
+        for (Document result : results) {
+            Document document1 = result.get("_id", Document.class);
+            String site = document1.getString("site");
+            String parser = document1.getString("parser");
+            list.add(new SiteParser(site, parser));
+        }
+        return list;
+    }
+
+    public long countByCriteria(List<Criteria> criteriaList) {
+        Query query = new Query();
+        criteriaList.forEach(query::addCriteria);
+        return mongoTemplate.count(query, UrlResource.class, colName);
+    }
+
+    public MongoCursor<Document> getCursor(Map<String, Object> map) {
+        Document filter = new Document();
+        filter.putAll(map);
+        return mongoTemplate.getCollection(colName).find(filter, Document.class).cursor();
+    }
+
+    public List<UrlResourceStatistics> getUrlResourceStatistics() {
+        Document idFields = new Document();
+        idFields.put("site", "$site");
+        idFields.put("parser", "$parser");
+        Document group = new Document();
+        group.put("_id", idFields);
+        group.put("total", new BasicDBObject("$sum", 1));
+
+        Document groupObject = new Document("$group", group);
+        Document sortObject = new Document("$sort", new BasicDBObject("total", -1));
+
+        List<Document> pipeline = new ArrayList<>();
+        pipeline.add(groupObject);
+        pipeline.add(sortObject);
+
+        AggregateIterable<Document> results = mongoTemplate.getCollection(colName).aggregate(pipeline).allowDiskUse(true);
+        List<UrlResourceStatistics> list = new ArrayList<>();
+        for (Document result : results) {
+            Document document1 = result.get("_id", Document.class);
+            int total = (Integer) result.get("total");
+            String site = document1.getString("site");
+            String parser = document1.getString("parser");
+
+            UrlResourceStatistics urlResourceStatistics = new UrlResourceStatistics();
+            urlResourceStatistics.setTotal(total);
+            urlResourceStatistics.setSite(site);
+            urlResourceStatistics.setParser(parser);
+            list.add(urlResourceStatistics);
+        }
+        return list;
+    }
+
+    public Map<String, Integer> aggregate(List<String> fields) {
+        Document groupByFields = new Document();
+        fields.forEach(field -> {
+            groupByFields.put(field, "$" + field);
+        });
+
+        Document groupObject = new Document("_id", groupByFields);
+        groupObject.put("total", new BasicDBObject("$sum", 1));
+        Document queryGroup = new Document("$group", groupObject);
+
+        Document sortObject = new Document("$sort", new BasicDBObject("total", -1));
+
+        List<Document> pipeline = new ArrayList<>();
+        pipeline.add(queryGroup);
+        pipeline.add(sortObject);
+        AggregateIterable<Document> result = mongoTemplate.getCollection(colName).aggregate(pipeline).allowDiskUse(true);
+
+        Map<String, Integer> map = new HashMap<>();
+        for (Document document : result) {
+            Document document1 = document.get("_id", Document.class);
+            String url = document1.getString("url");
+            Integer total = document.getInteger("total");
+            map.putIfAbsent(url, total);
+        }
+
+        return map;
+    }
+}

+ 33 - 0
web/src/main/java/cn/reghao/bnt/web/parser/model/dto/BiliCategory.java

@@ -0,0 +1,33 @@
+package cn.reghao.bnt.web.parser.model.dto;
+
+import java.io.Serializable;
+
+/**
+ * @author reghao
+ * @date 2023-11-21 23:20:16
+ */
+public class BiliCategory implements Serializable {
+    private static final long serialVersionUID = 1L;
+
+    private String bvId;
+    private Integer pid;
+    private Integer id;
+
+    public BiliCategory(String bvId, int pid, int id) {
+        this.bvId = bvId;
+        this.pid = pid;
+        this.id = id;
+    }
+
+    public String getBvId() {
+        return bvId;
+    }
+
+    public Integer getPid() {
+        return pid;
+    }
+
+    public Integer getId() {
+        return id;
+    }
+}

+ 14 - 0
web/src/main/java/cn/reghao/bnt/web/parser/model/po/DataRecord.java

@@ -0,0 +1,14 @@
+package cn.reghao.bnt.web.parser.model.po;
+
+import cn.reghao.jutil.jdk.db.BaseObject;
+import lombok.Getter;
+
+/**
+ * @author reghao
+ * @date 2023-06-16 14:18:17
+ */
+@Getter
+public class DataRecord extends BaseObject<String> {
+    private String colName;
+    private String nextId;
+}

+ 52 - 0
web/src/main/java/cn/reghao/bnt/web/parser/model/po/UnparsedData.java

@@ -0,0 +1,52 @@
+package cn.reghao.bnt.web.parser.model.po;
+
+import cn.reghao.jutil.jdk.db.BaseObject;
+import cn.reghao.bnt.core.url.CrawlUrl;
+import cn.reghao.bnt.core.url.RawData;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
+
+/**
+ * Redis 中的 RawData 存储在 MongoDB 中的格式
+ *
+ * @author reghao
+ * @date 2021-03-17 00:45:37
+ */
+@EqualsAndHashCode(callSuper = true)
+@NoArgsConstructor
+@Data
+public class UnparsedData extends BaseObject<String> {
+    // CrawlUrl
+    private String site;
+    private String parser;
+    private String url;
+    private String referer;
+    private String dataType;
+    private int notFoundCode;
+    private int antiCrawlCode;
+    private String title;
+    // response body 数据
+    private String data;
+    // 0 - 未解析, 1 - 解析成功, 2 - 解析失败
+    private Integer parsed;
+
+    public UnparsedData(RawData rawData) {
+        CrawlUrl crawlUrl = rawData.getCrawlUrl();
+        this.site = crawlUrl.getSite();
+        this.parser = crawlUrl.getParser();
+        this.url = crawlUrl.getUrl();
+        this.referer = crawlUrl.getReferer();
+        this.dataType = crawlUrl.getDataType();
+        this.notFoundCode = crawlUrl.getNotFoundCode();
+        this.antiCrawlCode = crawlUrl.getAntiCrawlCode();
+        this.title = crawlUrl.getTitle();
+        this.data = rawData.getData();
+        this.parsed = 0;
+    }
+
+    public RawData getRawData() {
+        CrawlUrl crawlUrl = new CrawlUrl(site, parser, url, referer, dataType, notFoundCode, antiCrawlCode, title);
+        return new RawData(crawlUrl, data);
+    }
+}

+ 89 - 0
web/src/main/java/cn/reghao/bnt/web/parser/model/po/UrlResource.java

@@ -0,0 +1,89 @@
+package cn.reghao.bnt.web.parser.model.po;
+
+import cn.reghao.jutil.jdk.db.BaseObject;
+import cn.reghao.bnt.core.url.BodyDataType;
+import cn.reghao.bnt.core.url.CrawlUrl;
+import cn.reghao.bnt.core.url.Site;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * Redis 中的 CrawlUrl 存储在 MongoDB 中的格式
+ *
+ * @author reghao
+ * @date 2020-03-15 13:53:38
+ */
+@NoArgsConstructor
+@Data
+public class UrlResource extends BaseObject<String> {
+    private String site;
+    private String parser;
+    private String url;
+    private String referer;
+    private String dataType;
+    private int notFoundCode;
+    private int antiCrawlCode;
+    private String title;
+    // URL 爬取的时间(ms 时间戳)
+    private Long crawledTime;
+    private boolean notFound;
+
+    /**
+     * 数据尚未爬取时创建,等同于 CrawlUrl
+     *
+     * @param
+     * @return
+     * @date 2021-11-19 下午7:53
+     */
+    public UrlResource(Site site, String parser, String url, String referer, BodyDataType dataType,
+                       int notFoundCode, int antiCrawlCode, String title) {
+        this.site = site.name();
+        this.parser = parser;
+        this.url = url;
+        this.referer = referer;
+        this.dataType = dataType.name();
+        this.notFoundCode = notFoundCode;
+        this.antiCrawlCode = antiCrawlCode;
+        this.title = title;
+        this.crawledTime = 0L;
+        this.notFound = false;
+    }
+
+    public UrlResource(CrawlUrl crawlUrl) {
+        this.site = crawlUrl.getSite();
+        this.parser = crawlUrl.getParser();
+        this.url = crawlUrl.getUrl();
+        this.referer = crawlUrl.getReferer();
+        this.dataType = crawlUrl.getDataType();
+        this.notFoundCode = crawlUrl.getNotFoundCode();
+        this.antiCrawlCode = crawlUrl.getAntiCrawlCode();
+        this.title = crawlUrl.getTitle();
+        this.crawledTime = 0L;
+        this.notFound = false;
+    }
+
+    public CrawlUrl getCrawlUrl() {
+        return new CrawlUrl(site, parser, url, referer, dataType, notFoundCode, antiCrawlCode, title);
+    }
+
+    @Override
+    public int hashCode() {
+        int result = 17;
+        result = result * 31 + url.hashCode();
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (other == this) {
+            return true;
+        }
+
+        if (other instanceof UrlResource) {
+            UrlResource o = (UrlResource) other;
+            return o.url.equals(url);
+        } else {
+            return false;
+        }
+    }
+}

+ 20 - 0
web/src/main/java/cn/reghao/bnt/web/parser/model/vo/DataCount.java

@@ -0,0 +1,20 @@
+package cn.reghao.bnt.web.parser.model.vo;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * @author reghao
+ * @date 2021-09-10 01:01:39
+ */
+@NoArgsConstructor
+@AllArgsConstructor
+@Data
+public class DataCount {
+    private String site;
+    private String parser;
+    private long total;
+    private long processed;
+    private long unprocessed;
+}

+ 13 - 0
web/src/main/java/cn/reghao/bnt/web/parser/model/vo/DayCount.java

@@ -0,0 +1,13 @@
+package cn.reghao.bnt.web.parser.model.vo;
+
+import lombok.Data;
+
+/**
+ * @author reghao
+ * @date 2021-09-15 23:24:33
+ */
+@Data
+public class DayCount {
+    private String createDate;
+    private int total;
+}

+ 23 - 0
web/src/main/java/cn/reghao/bnt/web/parser/model/vo/UnparsedDataStatistics.java

@@ -0,0 +1,23 @@
+package cn.reghao.bnt.web.parser.model.vo;
+
+import lombok.AllArgsConstructor;
+import lombok.Getter;
+import lombok.NoArgsConstructor;
+import lombok.Setter;
+
+/**
+ * @author reghao
+ * @date 2022-05-19 10:01:55
+ */
+@Setter
+@Getter
+@AllArgsConstructor
+@NoArgsConstructor
+public class UnparsedDataStatistics {
+    private String site;
+    private String parser;
+    private long total;
+    private long notParseCount;
+    private long parsedCount;
+    private long parseErrorCount;
+}

+ 23 - 0
web/src/main/java/cn/reghao/bnt/web/parser/model/vo/UrlResourceStatistics.java

@@ -0,0 +1,23 @@
+package cn.reghao.bnt.web.parser.model.vo;
+
+import lombok.Getter;
+import lombok.Setter;
+
+import java.time.LocalDateTime;
+
+/**
+ * @author reghao
+ * @date 2022-05-19 10:01:55
+ */
+@Setter
+@Getter
+public class UrlResourceStatistics {
+    private String site;
+    private String parser;
+    private long total;
+    private long notCrawlCount;
+    private long crawledCount;
+    private long notFoundCount;
+    private LocalDateTime lastCrawlTime;
+    private LocalDateTime lastAddTime;
+}

+ 100 - 0
web/src/main/java/cn/reghao/bnt/web/parser/rpc/BiliCache.java

@@ -0,0 +1,100 @@
+package cn.reghao.bnt.web.parser.rpc;
+
+import cn.reghao.bnt.web.parser.model.dto.BiliCategory;
+import cn.reghao.bnt.web.parser.db.mongo.UnparsedDataMongo;
+import cn.reghao.bnt.web.parser.model.po.UnparsedData;
+import cn.reghao.bnt.web.parser.task.DataProducer;
+import cn.reghao.bnt.web.parser.site.bilibili.db.mongo.BiliVideoMongo;
+import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliVideo;
+import cn.reghao.bnt.web.parser.task.cache.CacheContext;
+import lombok.extern.slf4j.Slf4j;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.select.Elements;
+import org.springframework.stereotype.Service;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * @author reghao
+ * @date 2023-05-31 16:28:13
+ */
+@Slf4j
+@Service
+public class BiliCache {
+    private final CacheContext cacheContext;
+    private final BiliVideoMongo biliVideoMongo;
+    private final DataProducer dataProducer;
+    private final UnparsedDataMongo unparsedDataMongo;
+
+    public BiliCache(CacheContext cacheContext, BiliVideoMongo biliVideoMongo, DataProducer dataProducer,
+                     UnparsedDataMongo unparsedDataMongo) {
+        this.cacheContext = cacheContext;
+        this.biliVideoMongo = biliVideoMongo;
+        this.dataProducer = dataProducer;
+        this.unparsedDataMongo = unparsedDataMongo;
+    }
+
+    public void cacheBiliVideo(String bvId) {
+        if (dataProducer.size() > 10_000) {
+            log.error("当前队列中的元素已超过 10_000 个");
+            return;
+        }
+
+        String url = String.format("https://www.bilibili.com/video/%s", bvId);
+        try {
+            dataProducer.put(url);
+        } catch (InterruptedException e) {
+            e.printStackTrace();
+        }
+    }
+
+    public void cacheBiliVideo(int size) {
+        if (dataProducer.size() > 10_000) {
+            log.error("当前队列中的元素已超过 10_000 个");
+            return;
+        }
+
+        List<BiliVideo> biliVideos = biliVideoMongo.findNotCached(size);
+        List<String> list = biliVideos.stream()
+                .map(biliVideo -> {
+                    String bvId = biliVideo.getBvId();
+                    return String.format("https://www.bilibili.com/video/%s", bvId);
+                }).collect(Collectors.toList());
+    }
+
+    public int getTaskCount() {
+        return cacheContext.getActiveCount();
+    }
+
+    public List<BiliCategory> getCategory(List<String> bvIds) {
+        long start = System.currentTimeMillis();
+        List<BiliCategory> biliCategories = bvIds.stream()
+                .map(bvId -> {
+                    String url = String.format("https://www.bilibili.com/video/%s", bvId);
+                    UnparsedData unparsedData = unparsedDataMongo.findByUrl(url);
+                    if (unparsedData == null) {
+                        return null;
+                    }
+
+                    String html = unparsedData.getData();
+                    Document document = Jsoup.parse(html);
+                    Elements elements = document.select("tag-link");
+                    document.getElementsByClass("tag-link").forEach(element -> {
+                        String href = element.attr("href");
+                        if (href == null) {
+                            return;
+                        }
+
+                        System.out.println();
+                    });
+
+                    return new BiliCategory(bvId, 1, 1);
+                })
+                .collect(Collectors.toList());
+
+        log.info("process cost {}ms", System.currentTimeMillis()-start);
+        return biliCategories;
+    }
+}

+ 87 - 0
web/src/main/java/cn/reghao/bnt/web/parser/service/StatisticsService.java

@@ -0,0 +1,87 @@
+package cn.reghao.bnt.web.parser.service;
+
+import cn.reghao.bnt.web.parser.db.mongo.UnparsedDataMongo;
+import cn.reghao.bnt.web.parser.db.mongo.UrlResourceMongo;
+import cn.reghao.bnt.web.parser.model.vo.UnparsedDataStatistics;
+import cn.reghao.bnt.web.parser.model.vo.UrlResourceStatistics;
+import org.springframework.data.mongodb.core.query.Criteria;
+import org.springframework.stereotype.Service;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * @author reghao
+ * @date 2022-05-27 17:12:44
+ */
+@Service
+public class StatisticsService {
+    private final UrlResourceMongo urlResourceMongo;
+    private final UnparsedDataMongo unparsedDataMongo;
+
+    public StatisticsService(UrlResourceMongo urlResourceMongo, UnparsedDataMongo unparsedDataMongo) {
+        this.urlResourceMongo = urlResourceMongo;
+        this.unparsedDataMongo = unparsedDataMongo;
+    }
+
+    public List<UrlResourceStatistics> getUrlResourceStatistics() {
+        return urlResourceMongo.getUrlResourceStatistics().stream()
+                .parallel()
+                .peek(urlResourceStatistics -> {
+                    String site = urlResourceStatistics.getSite();
+                    String parser = urlResourceStatistics.getParser();
+
+                    List<Criteria> criteriaList = new ArrayList<>();
+                    criteriaList.add(Criteria.where("site").is(site));
+                    criteriaList.add(Criteria.where("parser").is(parser));
+                    criteriaList.add(Criteria.where("crawledTime").is(0));
+                    long notCrawlCount = urlResourceMongo.countByCriteria(criteriaList);
+                    urlResourceStatistics.setNotCrawlCount(notCrawlCount);
+
+                    List<Criteria> criteriaList1 = new ArrayList<>();
+                    criteriaList1.add(Criteria.where("site").is(site));
+                    criteriaList1.add(Criteria.where("parser").is(parser));
+                    criteriaList1.add(Criteria.where("crawledTime").ne(0));
+                    long crawledCount = urlResourceMongo.countByCriteria(criteriaList1);
+                    urlResourceStatistics.setCrawledCount(crawledCount);
+
+                    List<Criteria> criteriaList2 = new ArrayList<>();
+                    criteriaList2.add(Criteria.where("site").is(site));
+                    criteriaList2.add(Criteria.where("parser").is(parser));
+                    criteriaList2.add(Criteria.where("notFound").is(true));
+                    long notFoundCount = urlResourceMongo.countByCriteria(criteriaList2);
+                    urlResourceStatistics.setNotFoundCount(notFoundCount);
+                }).collect(Collectors.toList());
+    }
+
+    public List<UnparsedDataStatistics> getUnparsedDataStatistics() {
+        //return unparsedDataMongo.getUnparsedDataStatistics1();
+        return unparsedDataMongo.getUnparsedDataStatistics().stream().parallel()
+                .peek(unparsedDataStatistics -> {
+                    String site = unparsedDataStatistics.getSite();
+                    String parser = unparsedDataStatistics.getParser();
+
+                    List<Criteria> criteriaList = new ArrayList<>();
+                    criteriaList.add(Criteria.where("site").is(site));
+                    criteriaList.add(Criteria.where("parser").is(parser));
+                    criteriaList.add(Criteria.where("parsed").is(null));
+                    long notParseCount = unparsedDataMongo.countByCriteria(criteriaList);
+                    unparsedDataStatistics.setNotParseCount(notParseCount);
+
+                    List<Criteria> criteriaList1 = new ArrayList<>();
+                    criteriaList1.add(Criteria.where("site").is(site));
+                    criteriaList1.add(Criteria.where("parser").is(parser));
+                    criteriaList1.add(Criteria.where("parsed").is(true));
+                    long parsedCount = unparsedDataMongo.countByCriteria(criteriaList1);
+                    unparsedDataStatistics.setParsedCount(parsedCount);
+
+                    List<Criteria> criteriaList2 = new ArrayList<>();
+                    criteriaList2.add(Criteria.where("site").is(site));
+                    criteriaList2.add(Criteria.where("parser").is(parser));
+                    criteriaList2.add(Criteria.where("parsed").is(false));
+                    long parseErrorCount = unparsedDataMongo.countByCriteria(criteriaList2);
+                    unparsedDataStatistics.setParseErrorCount(parseErrorCount);
+                }).collect(Collectors.toList());
+    }
+}

+ 81 - 0
web/src/main/java/cn/reghao/bnt/web/parser/service/UrlSchedulerService.java

@@ -0,0 +1,81 @@
+package cn.reghao.bnt.web.parser.service;
+
+import cn.reghao.bnt.core.event.EvtCrawl;
+import cn.reghao.bnt.core.event.EvtNoUrl;
+import cn.reghao.bnt.core.parser.SiteParser;
+import cn.reghao.bnt.core.url.CrawlUrl;
+import cn.reghao.bnt.core.url.RawData;
+import cn.reghao.bnt.core.url.Site;
+import cn.reghao.bnt.core.url.UrlScheduler;
+import cn.reghao.bnt.web.parser.db.mongo.UrlResourceMongo;
+import cn.reghao.bnt.web.parser.model.po.UrlResource;
+import cn.reghao.bnt.web.parser.consumer.RawDataStore;
+import lombok.extern.slf4j.Slf4j;
+import org.bson.types.ObjectId;
+import org.springframework.stereotype.Service;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * @author reghao
+ * @date 2022-05-26 15:09:26
+ */
+@Slf4j
+@Service
+public class UrlSchedulerService implements UrlScheduler {
+    private final UrlResourceMongo urlResourceMongo;
+    private final RawDataStore rawDataStore;
+    private volatile ObjectId lastId = new ObjectId(0, 0);
+
+    public UrlSchedulerService(UrlResourceMongo urlResourceMongo, RawDataStore rawDataStore) {
+        this.urlResourceMongo = urlResourceMongo;
+        this.rawDataStore = rawDataStore;
+    }
+
+    // TODO 目前只爬取 bilibili 的数据
+    @Override
+    public List<SiteParser> getSiteParsers() {
+        List<SiteParser> list = new ArrayList<>();
+        list.add(new SiteParser(Site.bilibili.name(), "BiliVideoDataParser"));
+        //list.add(new SiteParser(Site.bilibili.name(), "BiliCommentDataParser"));
+        return list;
+        /*return urlResourceMongo.getNotCrawledSiteParsers().stream()
+                .filter(siteParser -> siteParser.getSite() != null &&
+                        (siteParser.getSite().equals(Site.bilibili.name())))
+                .collect(Collectors.toList());*/
+    }
+
+    @Override
+    public synchronized Set<CrawlUrl> getUrls(String site, String parser) {
+        List<UrlResource> list = urlResourceMongo.findNotCrawled(site, parser, 10_000, lastId);
+        if (!list.isEmpty()) {
+             UrlResource urlResource = list.get(list.size()-1);
+             lastId = new ObjectId(urlResource.getId());
+        } else {
+            lastId = new ObjectId(0, 0);
+        }
+
+        return list.stream().map(UrlResource::getCrawlUrl).collect(Collectors.toSet());
+    }
+
+    @Override
+    public void putData(RawData rawData) {
+        rawDataStore.put(rawData);
+    }
+
+    @Override
+    public void crawlEvent(EvtCrawl evtCrawl) {
+        String site = evtCrawl.getSite();
+        String parser = evtCrawl.getParser();
+        String url = evtCrawl.getUrl();
+        String msg = evtCrawl.getMsg();
+        log.info("{}.{}: {} -> {}", site, parser, url, msg);
+    }
+
+    @Override
+    public void noUrlEvent(EvtNoUrl evtNoUrl) {
+    }
+}

+ 234 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/BiliCommentDataParser.java

@@ -0,0 +1,234 @@
+package cn.reghao.bnt.web.parser.site.bilibili;
+
+import cn.reghao.bnt.web.config.OssConsoleClientFactory;
+import cn.reghao.jutil.jdk.http.WebRequest;
+import cn.reghao.jutil.jdk.serializer.JsonConverter;
+import cn.reghao.jutil.tool.http.DefaultWebRequest;
+import cn.reghao.bnt.core.parser.DataParser;
+import cn.reghao.bnt.core.url.CrawlUrl;
+import cn.reghao.bnt.web.parser.db.mongo.UrlResourceMongo;
+import cn.reghao.bnt.web.parser.model.po.UrlResource;
+import cn.reghao.bnt.web.parser.site.bilibili.api.BiliCrawlUrl;
+import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliComment;
+import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliUser;
+import cn.reghao.bnt.web.parser.site.bilibili.util.BvAvConverter;
+import cn.reghao.bnt.web.parser.task.feed.FeedService;
+import cn.reghao.bnt.web.parser.util.FileCache;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Component;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * Bilibili 视频评论
+ *
+ * @author reghao
+ * @date 2022-02-06 11:28:04
+ */
+@Slf4j
+@Component
+public class BiliCommentDataParser implements DataParser {
+    private final FeedService feedService;
+    private UrlResourceMongo urlResourceMongo;
+    private int coverChannelId = 104;
+    private final WebRequest webRequest;
+    private FileCache fileCache;
+    private OssConsoleClientFactory ossConsoleClientFactory;
+
+    public BiliCommentDataParser(FeedService feedService, UrlResourceMongo urlResourceMongo,
+                                 FileCache fileCache, OssConsoleClientFactory ossConsoleClientFactory) throws Exception {
+        this.feedService = feedService;
+        this.urlResourceMongo = urlResourceMongo;
+        this.webRequest = new DefaultWebRequest();
+        this.fileCache = fileCache;
+        this.ossConsoleClientFactory = ossConsoleClientFactory;
+    }
+
+    @Override
+    public Map<String, Object> parse(String url, String body) throws Exception {
+        JsonObject jsonObject = JsonConverter.jsonToJsonElement(body).getAsJsonObject();
+        int code = jsonObject.get("code").getAsInt();
+        int message = jsonObject.get("message").getAsInt();
+        int ttl = jsonObject.get("ttl").getAsInt();
+        JsonObject data = jsonObject.get("data").getAsJsonObject();
+
+        Map<String, Object> map = new HashMap<>();
+        JsonElement element = data.get("replies");
+        if (element == null) {
+            return map;
+        }
+
+        Set<BiliUser> biliUsers = new HashSet<>();
+        Set<BiliComment> biliComments = new HashSet<>();
+        Set<CrawlUrl> crawlUrls = new HashSet<>();
+
+        JsonElement element0 = data.get("top_replies");
+        if (element0 != null) {
+            JsonArray jsonArray0 = element0.getAsJsonArray();
+            jsonArray0.forEach(jsonElement -> {
+                try {
+                    parseJsonElement(jsonElement, biliUsers, biliComments, crawlUrls);
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+            });
+        }
+
+        JsonArray jsonArray = element.getAsJsonArray();
+        jsonArray.forEach(jsonElement -> {
+            try {
+                parseJsonElement(jsonElement, biliUsers, biliComments, crawlUrls);
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        });
+
+        if (url.contains("x/v2/reply/reply")) {
+            JsonObject page = data.get("page").getAsJsonObject();
+            int num = page.get("num").getAsInt();
+            if (num == 1) {
+                // 评论第一页
+                int size = page.get("size").getAsInt();
+                int count = page.get("count").getAsInt();
+                //int acount = page.get("acount").getAsInt();
+
+                int total = count/size + 1;
+                long avId = jsonArray.get(0).getAsJsonObject().get("oid").getAsLong();
+                boolean childComment = url.split("\\?")[0].endsWith("reply/reply");
+                if (childComment) {
+                    // 子评论
+                    long root = jsonArray.get(0).getAsJsonObject().get("root").getAsLong();
+                    for (int i = 2; i <= total; i++) {
+                        crawlUrls.add(BiliCrawlUrl.childCommentUrl(avId, root, i));
+                    }
+                } else {
+                    for (int i = 2; i <= total; i++) {
+                        crawlUrls.add(BiliCrawlUrl.commentUrl(avId, i));
+                    }
+                }
+            }
+        }
+
+        if (!biliUsers.isEmpty()) {
+            map.put("biliUsers", new ArrayList<>(biliUsers));
+        }
+
+        if (!biliComments.isEmpty()) {
+            map.put("biliComments", new ArrayList<>(biliComments));
+        }
+
+        if (!crawlUrls.isEmpty()) {
+            List<UrlResource> list = crawlUrls.stream().map(UrlResource::new).collect(Collectors.toList());
+            urlResourceMongo.saveAll(list);
+            map.put("crawlUrls", new ArrayList<>(crawlUrls));
+        }
+
+        return map;
+    }
+
+    private void parseJsonElement(JsonElement jsonElement, Set<BiliUser> biliUsers,
+                                  Set<BiliComment> biliComments, Set<CrawlUrl> crawlUrls) throws Exception {
+        JsonObject jsonObject1 = jsonElement.getAsJsonObject();
+        JsonObject member = jsonObject1.get("member").getAsJsonObject();
+        BiliUser biliUser = parseBiliUser(member);
+        biliUsers.add(biliUser);
+
+        long userId = biliUser.getUserId();
+        long rpid = jsonObject1.get("rpid").getAsLong();
+        long root = jsonObject1.get("root").getAsLong();
+        long oid = jsonObject1.get("oid").getAsLong();
+        int like = jsonObject1.get("like").getAsInt();
+        JsonObject content = jsonObject1.get("content").getAsJsonObject();
+        String contentStr = content.get("message").getAsString();
+        JsonElement picturesElement = content.get("pictures");
+        List<String> commentImages = new ArrayList<>();
+        if (picturesElement != null) {
+            JsonArray jsonArray1 = picturesElement.getAsJsonArray();
+            jsonArray1.forEach(jsonElement1 -> {
+                JsonObject imgObject = jsonElement1.getAsJsonObject();
+                String imgSrc = imgObject.get("img_src").getAsString();
+
+                String uuid = UUID.randomUUID().toString().replace("-", "");
+                String objectName = String.format("bili/img/vidcover/%s.jpg", uuid);
+                String coverPath = fileCache.cache(webRequest, imgSrc, objectName);
+                if (coverPath == null) {
+                    log.error("{} 的 cmtimg 缓存失败", imgSrc);
+                    commentImages.add(imgSrc);
+                } else {
+                    /*File coverFile = new File(coverPath);
+                    try {
+                        UploadFileRet uploadFileRet = ossConsoleClient.uploadFile(coverChannelId, coverFile);
+                        if (uploadFileRet == null) {
+                            log.error("{} 的 cmtimg 上传到 oss 失败", imgSrc);
+                            commentImages.add(imgSrc);
+                        } else {
+                            List<ImageInfo> list = ossConsoleClient.getImagesInfo(coverChannelId, List.of(uploadFileRet.getUploadId()));
+                            String coverUrl = list.get(0).getUrl();
+                            if (coverUrl != null) {
+                                commentImages.add(coverUrl);
+                            } else {
+                                commentImages.add(imgSrc);
+                            }
+                        }
+                    } catch (Exception e) {
+                        e.printStackTrace();
+                        commentImages.add(imgSrc);
+                    }*/
+                }
+            });
+        }
+
+        String ctime = jsonObject1.get("ctime").getAsString();
+        long replyTime = Long.parseLong(ctime)*1000;
+
+        JsonObject replyControl = jsonObject1.get("reply_control").getAsJsonObject();
+        String location = "";
+        if (replyControl.get("location") != null) {
+            location = replyControl.get("location").getAsString();
+        }
+
+        String bvId = BvAvConverter.av2bv(oid);
+        BiliComment biliComment = new BiliComment();
+        biliComment.setCommentId(rpid);
+        biliComment.setRootId(root);
+        biliComment.setBvId(bvId);
+        biliComment.setAvId(oid);
+        biliComment.setUserId(userId);
+        biliComment.setReplyContent(contentStr);
+        biliComment.setReplyTime(replyTime);
+        biliComment.setThumbUpCount(like);
+        biliComment.setCommentImages(commentImages);
+        biliComment.setLocation(location);
+        biliComment.setBiliUser(biliUser);
+
+        //feedService.feedComment(biliComment);
+        biliComments.add(biliComment);
+        JsonElement jsonElement1 = jsonObject1.get("replies");
+        if (jsonElement1 instanceof JsonArray) {
+            JsonArray jsonArray1 = jsonElement1.getAsJsonArray();
+            if (jsonArray1.size() != 0) {
+                crawlUrls.add(BiliCrawlUrl.childCommentUrl(oid, rpid, 1));
+            }
+        }
+    }
+
+    private BiliUser parseBiliUser(JsonObject jsonObject) {
+        Long userId = jsonObject.get("mid").getAsLong();
+        String username = jsonObject.get("uname").getAsString();
+        String gender = jsonObject.get("sex").getAsString();
+        String avatar = jsonObject.get("avatar").getAsString();
+        String signature = jsonObject.get("sign").getAsString();
+
+        BiliUser biliUser = new BiliUser();
+        biliUser.setUserId(userId);
+        biliUser.setUsername(username);
+        biliUser.setGender(gender);
+        biliUser.setAvatarUrlOrigin(avatar);
+        biliUser.setSignature(signature);
+        return biliUser;
+    }
+}

+ 46 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/BiliDanmakuDataParser.java

@@ -0,0 +1,46 @@
+package cn.reghao.bnt.web.parser.site.bilibili;
+
+import cn.reghao.jutil.jdk.http.util.UrlFormatter;
+import cn.reghao.jutil.jdk.serializer.JsonConverter;
+import cn.reghao.bnt.core.parser.DataParser;
+import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliDanmakuUrl;
+import com.google.gson.JsonObject;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Component;
+
+import java.util.*;
+
+/**
+ * 解析 Bilibili 视频弹幕
+ *
+ * @author reghao
+ * @date 2022-06-02 10:57:54
+ */
+@Slf4j
+@Component
+public class BiliDanmakuDataParser implements DataParser {
+    @Override
+    public Map<String, Object> parse(String url, String body) throws Exception {
+        Map<String, Object> map = new HashMap<>();
+        long cid = Long.parseLong(UrlFormatter.getParamValue(url, "oid"));
+        JsonObject jsonObject = JsonConverter.jsonToJsonElement(body).getAsJsonObject();
+        int code = jsonObject.get("code").getAsInt();
+        if (code != 0) {
+            return map;
+        }
+
+        List<BiliDanmakuUrl> list = new ArrayList<>();
+        jsonObject.get("data").getAsJsonArray().forEach(jsonElement -> {
+            String yearMonth = jsonElement.getAsString();
+            String url1 = String.format("https://api.bilibili.com/x/v2/dm/web/history/seg.so?type=1&oid=%s&date=%s",
+                    cid, yearMonth);
+            list.add(new BiliDanmakuUrl(cid, url1));
+        });
+
+        if (!list.isEmpty()) {
+            map.put("biliDanmakuUrls", list);
+        }
+
+        return map;
+    }
+}

+ 150 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/BiliPageParser.java

@@ -0,0 +1,150 @@
+package cn.reghao.bnt.web.parser.site.bilibili;
+
+import cn.reghao.jutil.jdk.http.util.UrlFormatter;
+import cn.reghao.jutil.jdk.serializer.JsonConverter;
+import cn.reghao.bnt.core.url.CrawlUrl;
+import cn.reghao.bnt.core.parser.DataParser;
+import cn.reghao.bnt.web.parser.db.mongo.UrlResourceMongo;
+import cn.reghao.bnt.web.parser.model.po.UrlResource;
+import cn.reghao.bnt.web.parser.site.bilibili.api.BiliCrawlUrl;
+import cn.reghao.bnt.web.parser.site.bilibili.db.mongo.BiliDanmakuUrlMongo;
+import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliDanmakuUrl;
+import com.google.gson.JsonObject;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Component;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * 解析 Bilibili 视频弹幕
+ *
+ * @author reghao
+ * @date 2022-06-02 10:57:54
+ */
+@Slf4j
+@Component
+public class BiliPageParser implements DataParser {
+    private final UrlResourceMongo urlResourceMongo;
+    private final BiliDanmakuUrlMongo biliDanmakuUrlMongo;
+
+    public BiliPageParser(UrlResourceMongo urlResourceMongo, BiliDanmakuUrlMongo biliDanmakuUrlMongo) {
+        this.urlResourceMongo = urlResourceMongo;
+        this.biliDanmakuUrlMongo = biliDanmakuUrlMongo;
+    }
+
+    @Override
+    public Map<String, Object> parse(String url, String body) throws Exception {
+        if (url.contains("dm/history/index")) {
+            parseDanmaku(url, body);
+        } else if (url.contains("fav/resource/list")) {
+            parseUserFav(body);
+        } else if (url.contains("web-interface/popular/precious")) {
+            parse3(body);
+        } else if (url.contains("wbi/arc/search")) {
+            parseUserVideo(body);
+        }
+
+        return Collections.emptyMap();
+    }
+
+    /**
+     * 解析出历史弹幕的 URL
+     *
+     * @param
+     * @return
+     * @date 2022-06-08 下午1:59
+     */
+    private void parseDanmaku(String url, String body) {
+        long cid = Long.parseLong(UrlFormatter.getParamValue(url, "oid"));
+        JsonObject jsonObject = JsonConverter.jsonToJsonElement(body).getAsJsonObject();
+        int code = jsonObject.get("code").getAsInt();
+        if (code != 0) {
+            return;
+        }
+
+        Set<BiliDanmakuUrl> set = new HashSet<>();
+        jsonObject.get("data").getAsJsonArray().forEach(jsonElement -> {
+            String yearMonth = jsonElement.getAsString();
+            String url1 = String.format("https://api.bilibili.com/x/v2/dm/web/history/seg.so?type=1&oid=%s&date=%s",
+                    cid, yearMonth);
+            set.add(new BiliDanmakuUrl(cid, url1));
+        });
+
+        if (!set.isEmpty()) {
+            //biliDanmakuUrlMongo.saveAll(new ArrayList<>(set));
+        }
+    }
+
+    /**
+     * 用户收藏页面
+     *
+     * @param
+     * @return
+     * @date 2023-05-29 11:31 AM
+     */
+    private void parseUserFav(String body) {
+        JsonObject jsonObject = JsonConverter.jsonToJsonElement(body).getAsJsonObject();
+        JsonObject dataObject = jsonObject.get("data").getAsJsonObject();
+
+        Set<CrawlUrl> set = new HashSet<>();
+        dataObject.getAsJsonArray("medias").forEach(jsonElement -> {
+            JsonObject jsonObject1 = jsonElement.getAsJsonObject();
+            String title = jsonObject1.get("title").getAsString();
+            String bvId = jsonObject1.get("bvid").getAsString();
+            String coverUrl = jsonObject1.get("cover").getAsString();
+            long pubTime = jsonObject1.get("pubtime").getAsLong();
+            JsonObject upper = jsonObject1.get("upper").getAsJsonObject();
+            long biliUserId = upper.get("mid").getAsLong();
+            String name = upper.get("name").getAsString();
+            String avatar = upper.get("face").getAsString();
+            set.add(BiliCrawlUrl.videoPageUrl(bvId, title));
+        });
+
+        List<UrlResource> list = set.stream().map(UrlResource::new).collect(Collectors.toList());
+        if (!list.isEmpty()) {
+            urlResourceMongo.saveAll(list);
+        }
+    }
+
+    private void parse3(String body) {
+        JsonObject jsonObject = JsonConverter.jsonToJsonElement(body).getAsJsonObject();
+        Set<CrawlUrl> set = new HashSet<>();
+        jsonObject.get("data").getAsJsonObject().get("list").getAsJsonArray().forEach(jsonElement -> {
+            JsonObject jsonObject1 = jsonElement.getAsJsonObject();
+            String title = jsonObject1.get("title").getAsString();
+            String bvId = jsonObject1.get("bvid").getAsString();
+            set.add(BiliCrawlUrl.videoPageUrl(bvId, title));
+        });
+    }
+
+    /**
+     * 用户投稿页面
+     *
+     * @param
+     * @return
+     * @date 2023-05-29 11:31 AM
+     */
+    private void parseUserVideo(String body) {
+        JsonObject jsonObject = JsonConverter.jsonToJsonElement(body).getAsJsonObject();
+        Set<CrawlUrl> set = new HashSet<>();
+        jsonObject.get("data").getAsJsonObject()
+                .get("list").getAsJsonObject()
+                .get("vlist").getAsJsonArray()
+                .forEach(jsonElement -> {
+                    JsonObject jsonObject1 = jsonElement.getAsJsonObject();
+                    int commentCount = jsonObject1.get("comment").getAsInt();
+                    int playCount = jsonObject1.get("play").getAsInt();
+                    int danmakuCount = jsonObject1.get("video_review").getAsInt();
+
+                    String title = jsonObject1.get("title").getAsString();
+                    String bvId = jsonObject1.get("bvid").getAsString();
+                    set.add(BiliCrawlUrl.videoPageUrl(bvId, title));
+                });
+
+        List<UrlResource> list = set.stream().map(UrlResource::new).collect(Collectors.toList());
+        if (!list.isEmpty()) {
+            urlResourceMongo.saveAll(list);
+        }
+    }
+}

+ 49 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/BiliRecommendDataParser.java

@@ -0,0 +1,49 @@
+package cn.reghao.bnt.web.parser.site.bilibili;
+
+import cn.reghao.jutil.jdk.serializer.JsonConverter;
+import cn.reghao.bnt.core.url.CrawlUrl;
+import cn.reghao.bnt.core.parser.DataParser;
+import cn.reghao.bnt.web.parser.site.bilibili.api.BiliCrawlUrl;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonObject;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Component;
+
+import java.util.*;
+
+/**
+ * 解析 Bilibili 主页推荐视频的 URL
+ *
+ * @author reghao
+ * @date 2022-08-09 13:57:54
+ */
+@Slf4j
+@Component
+public class BiliRecommendDataParser implements DataParser {
+    @Override
+    public Map<String, Object> parse(String url, String body) throws Exception {
+        JsonObject jsonObject = JsonConverter.jsonToJsonElement(body).getAsJsonObject();
+        int code = jsonObject.get("code").getAsInt();
+        Map<String, Object> map = new HashMap<>();
+        if (code == 0) {
+            JsonArray jsonArray = jsonObject.get("data").getAsJsonObject().get("item").getAsJsonArray();
+            List<CrawlUrl> crawlUrls = new ArrayList<>();
+            jsonArray.forEach(jsonElement -> {
+                JsonObject jsonObject1 = jsonElement.getAsJsonObject();
+                String title = jsonObject1.get("title").getAsString();
+                String bvId = jsonObject1.get("bvid").getAsString();
+                if (!bvId.isBlank()) {
+                    String coverUrl = jsonObject1.get("pic").getAsString();
+                    String pageUrl = String.format("https://www.bilibili.com/video/%s", bvId);
+                    crawlUrls.add(BiliCrawlUrl.videoPageUrl(bvId, title));
+                }
+            });
+
+            if (!crawlUrls.isEmpty()) {
+                map.put("crawlUrls", crawlUrls);
+            }
+        }
+
+        return map;
+    }
+}

+ 214 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/BiliVideoDataParser.java

@@ -0,0 +1,214 @@
+package cn.reghao.bnt.web.parser.site.bilibili;
+
+import cn.reghao.jutil.jdk.converter.DateTimeConverter;
+import cn.reghao.jutil.jdk.serializer.JsonConverter;
+import cn.reghao.bnt.core.parser.DataParser;
+import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliUser;
+import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliVideo;
+import cn.reghao.bnt.web.parser.site.bilibili.model.vo.BiliDash;
+import cn.reghao.bnt.web.parser.site.bilibili.model.vo.DashContent;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import lombok.extern.slf4j.Slf4j;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.springframework.stereotype.Component;
+
+import java.time.LocalDateTime;
+import java.util.*;
+
+/**
+ * 解析 Bilibili 视频页面(https://www.bilibili.com/video/BV1iK4y1m7ff)信息
+ *
+ * @author reghao
+ * @date 2021-03-12 15:57:54
+ */
+@Slf4j
+@Component
+public class BiliVideoDataParser implements DataParser {
+    @Override
+    public Map<String, Object> parse(String url, String body) {
+        Map<String, Object> map = new HashMap<>(5);
+        Document document = Jsoup.parse(body);
+        String initialStateJson = "";
+        String playInfoJson = "";
+        for (Element element : document.select("script")) {
+            String data = element.data();
+            if (data.contains("window.__INITIAL_STATE__=")) {
+                initialStateJson = data.split("window.__INITIAL_STATE__=")[1].split(";\\(function")[0];
+            }
+
+            if (data.contains("window.__playinfo__=")) {
+                playInfoJson = data.split("window.__playinfo__=")[1];
+            }
+        }
+
+        if (initialStateJson.isEmpty()) {
+            log.error("{} 页面没有视频信息...", url);
+            return map;
+        }
+
+        StringBuilder tags = new StringBuilder();
+        /*document.select("#v_tag").select("li").forEach(element -> {
+            tags.append(element.text()).append(",");
+        });*/
+        JsonObject initialState = JsonConverter.jsonToJsonElement(initialStateJson).getAsJsonObject();
+        JsonArray tagArray = initialState.get("tags").getAsJsonArray();
+        for (JsonElement jsonElement : tagArray) {
+            JsonObject tagObject = jsonElement.getAsJsonObject();
+            String tagName = tagObject.get("tag_name").getAsString();
+            tags.append(tagName).append(",");
+        }
+
+        JsonObject upData = initialState.getAsJsonObject("upData");
+        BiliUser biliUser = parseBiliUser(upData);
+        map.put("biliUser", biliUser);
+
+        JsonObject videoData = initialState.getAsJsonObject("videoData");
+        BiliVideo biliVideo = parseBiliVideo(url, biliUser.getUserId(), videoData);
+        biliVideo.setTags(tags.toString());
+        biliVideo.setBiliUser(biliUser);
+        map.put("biliVideo", biliVideo);
+
+        // 视频链接信息
+        String bvId = videoData.get("bvid").getAsString();
+        if (!playInfoJson.isEmpty()) {
+            JsonObject jsonObject = JsonConverter.jsonToJsonElement(playInfoJson).getAsJsonObject();
+            BiliDash biliDash = parseBiliDash(bvId, jsonObject);
+            biliDash.setPageUrl(url);
+            map.put("biliDash", biliDash);
+        }
+        return map;
+    }
+
+    private BiliUser parseBiliUser(JsonObject jsonObject) {
+        Long userId = jsonObject.get("mid").getAsLong();
+        String username = jsonObject.get("name").getAsString();
+        String gender = jsonObject.get("sex").getAsString();
+        String avatar = jsonObject.get("face").getAsString();
+        String signature = jsonObject.get("sign").getAsString();
+
+        BiliUser biliUser = new BiliUser();
+        biliUser.setUserId(userId);
+        biliUser.setUsername(username);
+        biliUser.setGender(gender);
+        biliUser.setAvatarUrlOrigin(avatar);
+        biliUser.setSignature(signature);
+        return biliUser;
+    }
+
+    private BiliVideo parseBiliVideo(String url, Long userId, JsonObject jsonObject) {
+        String title = jsonObject.get("title").getAsString();
+        String description = jsonObject.get("desc").getAsString();
+        String cover = jsonObject.get("pic").getAsString();
+        int duration = jsonObject.get("duration").getAsInt();
+        LocalDateTime pubDate = DateTimeConverter.localDateTime(jsonObject.get("pubdate").getAsLong()*1000);
+        Long cid = jsonObject.get("cid").getAsLong();
+        Long avId = jsonObject.get("aid").getAsLong();
+        String bvId = jsonObject.get("bvid").getAsString();
+        String tag = jsonObject.get("tname").getAsString();
+        int tid = jsonObject.get("tid").getAsInt();
+        JsonObject dimensionObj = jsonObject.get("dimension").getAsJsonObject();
+        int width = dimensionObj.get("width").getAsInt();
+        int height = dimensionObj.get("height").getAsInt();
+        boolean horizontal = width > height;
+
+        JsonObject statObject = jsonObject.get("stat").getAsJsonObject();
+        int viewCount = statObject.get("view").getAsInt();
+        int danmakuCount = statObject.get("danmaku").getAsInt();
+        int replyCount = statObject.get("reply").getAsInt();
+        int faviroteCount = statObject.get("favorite").getAsInt();
+        int likeCount = statObject.get("like").getAsInt();
+        int dislikeCount = statObject.get("dislike").getAsInt();
+        int shareCount = statObject.get("share").getAsInt();
+        int coinCount = statObject.get("coin").getAsInt();
+
+        BiliVideo biliVideo = new BiliVideo(bvId, avId, title, description, url, cover, duration, horizontal, pubDate);
+        biliVideo.setCid(cid);
+        biliVideo.setAvId(avId);
+        biliVideo.setRid(tid);
+        biliVideo.setUserId(userId);
+        biliVideo.setView(viewCount);
+        biliVideo.setDanmaku(danmakuCount);
+        biliVideo.setReply(replyCount);
+        biliVideo.setFavorite(faviroteCount);
+        biliVideo.setLike(likeCount);
+        biliVideo.setDislike(dislikeCount);
+        biliVideo.setShare(shareCount);
+        biliVideo.setCoin(coinCount);
+        return biliVideo;
+    }
+
+    private BiliDash parseBiliDash(String bvId, JsonObject jsonObject) {
+        JsonObject data = jsonObject.getAsJsonObject("data");
+        JsonObject dash = data.getAsJsonObject("dash");
+
+        int duration = dash.get("duration").getAsInt();
+        double minBufferTime = dash.get("minBufferTime").getAsDouble();
+        dash.get("dolby");
+
+        List<DashContent> videoList = new ArrayList<>();
+        JsonArray video = dash.getAsJsonArray("video");
+        video.forEach(jsonElement -> {
+            try {
+                videoList.add(setDashContent("video", jsonElement));
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        });
+
+        List<DashContent> audioList = new ArrayList<>();
+        JsonArray audio = dash.getAsJsonArray("audio");
+        audio.forEach(jsonElement -> {
+            audioList.add(setDashContent("audio", jsonElement));
+        });
+
+        BiliDash biliDash = new BiliDash();
+        biliDash.setBvId(bvId);
+        biliDash.setDuration(duration);
+        biliDash.setMinBufferTime(minBufferTime);
+        biliDash.setVideo(videoList);
+        biliDash.setAudio(audioList);
+        return biliDash;
+    }
+
+    private DashContent setDashContent(String type, JsonElement jsonElement) {
+        JsonObject jsonObject = jsonElement.getAsJsonObject();
+        int id = jsonObject.get("id").getAsInt();
+        String baseUrl = jsonObject.get("baseUrl").getAsString();
+        int bandwidth = jsonObject.get("bandwidth").getAsInt();
+        String mimeType = jsonObject.get("mimeType").getAsString();
+        String codecs = jsonObject.get("codecs").getAsString();
+        int width = jsonObject.get("width").getAsInt();
+        int height = jsonObject.get("height").getAsInt();
+        String frameRate = jsonObject.get("frameRate").getAsString();
+        JsonArray jsonArray = jsonObject.get("backupUrl").getAsJsonArray();
+        List<String> list = new ArrayList<>();
+        jsonArray.forEach(jsonElement1 -> {
+            list.add(jsonElement1.getAsString());
+        });
+
+        JsonObject segmentBase = jsonObject.get("SegmentBase").getAsJsonObject();
+        String initialization = segmentBase.get("Initialization").getAsString();
+        String indexRange = segmentBase.get("indexRange").getAsString();
+        int codecid = jsonObject.get("codecid").getAsInt();
+
+        DashContent dashContent = new DashContent();
+        dashContent.setId(id);
+        dashContent.setBaseUrl(baseUrl);
+        dashContent.setBackupUrl(list);
+        dashContent.setBandwidth(bandwidth);
+        dashContent.setMimeType(mimeType);
+        dashContent.setCodecs(codecs);
+        dashContent.setWidth(width);
+        dashContent.setHeight(height);
+        dashContent.setFrameRate(frameRate);
+
+        DashContent.SegmentBase segmentBase1 = new DashContent.SegmentBase(initialization, indexRange);
+        dashContent.setSegmentBase(segmentBase1);
+        dashContent.setCodecid(codecid);
+        return dashContent;
+    }
+}

+ 35 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/api/BiliCrawlUrl.java

@@ -0,0 +1,35 @@
+package cn.reghao.bnt.web.parser.site.bilibili.api;
+
+import cn.reghao.bnt.core.url.BodyDataType;
+import cn.reghao.bnt.core.url.CrawlUrl;
+import cn.reghao.bnt.core.url.Site;
+import cn.reghao.bnt.web.parser.site.bilibili.BiliCommentDataParser;
+import cn.reghao.bnt.web.parser.site.bilibili.BiliVideoDataParser;
+
+/**
+ * @author reghao
+ * @date 2021-08-22 00:36:55
+ */
+public class BiliCrawlUrl {
+    static String site = Site.bilibili.name();
+    static int notFoundCode = 410;
+    static int antiCrawlCode = 418;
+
+    public static CrawlUrl videoPageUrl(String bvId, String title) {
+        String parser = BiliVideoDataParser.class.getSimpleName();
+        String url = BiliUrl.video(bvId);
+        return new CrawlUrl(site, parser, url, null, BodyDataType.html.name(), notFoundCode, antiCrawlCode, title);
+    }
+
+    public static CrawlUrl commentUrl(long avId, int page) {
+        String parser = BiliCommentDataParser.class.getSimpleName();
+        String url = BiliUrl.comment(avId, page);
+        return new CrawlUrl(site, parser, url, null, BodyDataType.json.name(), notFoundCode, antiCrawlCode, null);
+    }
+
+    public static CrawlUrl childCommentUrl(long avId, long commentId, int page) {
+        String parser = BiliCommentDataParser.class.getSimpleName();
+        String url = BiliUrl.childComment(avId, commentId, page);
+        return new CrawlUrl(site, parser, url, null, BodyDataType.json.name(), notFoundCode, antiCrawlCode, null);
+    }
+}

+ 64 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/api/BiliUrl.java

@@ -0,0 +1,64 @@
+package cn.reghao.bnt.web.parser.site.bilibili.api;
+
+/**
+ * @author reghao
+ * @date 2021-08-03 11:11:32
+ */
+public class BiliUrl {
+    public static String userInfo(Long userId) {
+        return String.format("https://api.bilibili.com/x/space/acc/info?mid=%s&jsonp=jsonp", userId);
+    }
+
+    public static String video(String bvId) {
+        return String.format("https://www.bilibili.com/video/%s", bvId);
+    }
+
+    /**
+     * 全站排行页面
+     *
+     * @param
+     * @return
+     * @date 2021-08-12 下午3:47
+     */
+    public static String rankAll() {
+        return "https://www.bilibili.com/v/popular/rank/all";
+    }
+
+    /**
+     * 用户投稿
+     *
+     * @param
+     * @return
+     * @date 2021-08-09 下午3:00
+     */
+    public static String userVideos(long userId, int page) {
+        return "https://api.bilibili.com/x/space/arc/search" +
+                "?mid=" + userId + "&ps=30&tid=0&pn=" + page + "&keyword=&order=pubdate&jsonp=jsonp";
+    }
+
+    public static String comment(long avId, int page) {
+        String api = "https://api.bilibili.com/x/v2/reply";
+        String param1 = "?jsonp=jsonp";
+        String param2 = "&type=1";
+        // 按最新发送时间排序
+        String param3 = "&sort=0";
+        String param4 = "&oid=" + avId;
+        String param5 = "&pn=" + page;
+        return api + param1 + param2 + param3 + param4 + param5;
+    }
+
+    public static String childComment(long avId, long commentId, int page) {
+        String api = "https://api.bilibili.com/x/v2/reply/reply";
+        String param1 = "?jsonp=jsonp";
+        String param2 = "&type=1";
+        String param3 = "&ps=10";
+        String param4 = "&oid=" + avId;
+        String param5 = "&root=" + commentId;
+        String param6 = "&pn=" + page;
+        return api + param1 + param2 + param3 + param4 + param5 + param6;
+    }
+
+    public static String danmu(int cid) {
+        return "https://comment.bilibili.com/" + cid + ".xml";
+    }
+}

+ 150 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/db/mongo/BiliCommentMongo.java

@@ -0,0 +1,150 @@
+package cn.reghao.bnt.web.parser.site.bilibili.db.mongo;
+
+import cn.reghao.jutil.jdk.db.BaseCrud;
+import cn.reghao.jutil.jdk.db.BaseQuery;
+import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliComment;
+import com.mongodb.BasicDBObject;
+import com.mongodb.MongoBulkWriteException;
+import com.mongodb.client.AggregateIterable;
+import com.mongodb.client.FindIterable;
+import com.mongodb.client.model.InsertManyOptions;
+import com.mongodb.client.result.DeleteResult;
+import com.mongodb.client.result.InsertManyResult;
+import lombok.extern.slf4j.Slf4j;
+import org.bson.Document;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.data.mongodb.core.convert.MongoConverter;
+import org.springframework.data.mongodb.core.query.Criteria;
+import org.springframework.data.mongodb.core.query.Query;
+import org.springframework.stereotype.Repository;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * @author reghao
+ * @date 2022-02-25 13:12:37
+ */
+@Slf4j
+@Repository
+public class BiliCommentMongo implements BaseCrud<BiliComment>, BaseQuery<BiliComment> {
+    private final String colName = "BiliComment";
+    private final MongoTemplate mongoTemplate;
+    private final MongoConverter mongoConverter;
+
+    public BiliCommentMongo(MongoTemplate mongoTemplate, MongoConverter mongoConverter) {
+        this.mongoTemplate = mongoTemplate;
+        this.mongoConverter = mongoConverter;
+    }
+
+    @Override
+    public BiliComment save(BiliComment biliComment) {
+        Document doc = new Document();
+        mongoConverter.write(biliComment, doc);
+        mongoTemplate.getCollection(colName).insertOne(doc);
+        return null;
+    }
+
+    @Override
+    public void saveAll(List<BiliComment> list) {
+        List<Document> documents = list.stream()
+                .map(t -> {
+                    Document doc = new Document();
+                    mongoConverter.write(t, doc);
+                    return doc;
+                })
+                .collect(Collectors.toList());
+
+        InsertManyOptions options = new InsertManyOptions();
+        options.ordered(false);
+        try {
+            InsertManyResult result = mongoTemplate.getCollection(colName).insertMany(documents, options);
+        }  catch (MongoBulkWriteException ignore) {
+        }
+    }
+
+    @Override
+    public void update(BiliComment biliComment) {
+        Document document = new Document();
+        mongoConverter.write(biliComment, document);
+        Document filter = new Document();
+        filter.put("commentId", biliComment.getCommentId());
+        mongoTemplate.getCollection(colName).replaceOne(filter, document);
+    }
+
+    @Override
+    public void delete(BiliComment biliComment) {
+        DeleteResult deleteResult = mongoTemplate.remove(biliComment, colName);
+    }
+
+    public void batchDelete(List<Criteria> criteriaList) {
+        Query query = new Query();
+        criteriaList.forEach(query::addCriteria);
+        DeleteResult deleteResult = mongoTemplate.remove(query, BiliComment.class);
+        long count = deleteResult.getDeletedCount();
+        log.info("删除 {} 条数据",count);
+    }
+
+    public long count(Map<String, Object> map) {
+        Document filter = new Document();
+        map.forEach(filter::append);
+        return mongoTemplate.getCollection(colName).countDocuments(filter);
+    }
+
+    public long countChildComment(long commentId) {
+        Document filter = new Document();
+        filter.put("rootId", commentId);
+        return mongoTemplate.getCollection(colName).countDocuments(filter);
+    }
+
+    public FindIterable<Document> getIterator() {
+        return mongoTemplate.getCollection(colName).find();
+    }
+
+    public List<BiliComment> findAll() {
+        Query query = new Query();
+        query.limit(10000);
+        return mongoTemplate.find(query, BiliComment.class, colName);
+    }
+
+    public List<BiliComment> findAll(List<Criteria> criteriaList) {
+        Query query = new Query();
+        criteriaList.forEach(query::addCriteria);
+        return mongoTemplate.find(query, BiliComment.class, colName);
+    }
+
+    public BiliComment findByCommentId(Long commentId) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("commentId").is(commentId));
+        List<BiliComment> list = mongoTemplate.find(query, BiliComment.class, colName);
+        return list.isEmpty() ? null : list.get(0);
+    }
+
+    public Map<Long, Integer> aggregate(List<String> fields) {
+        Document groupByFields = new Document();
+        fields.forEach(field -> {
+            groupByFields.put(field, "$" + field);
+        });
+
+        Document groupObject = new Document("_id", groupByFields);
+        groupObject.put("total", new BasicDBObject("$sum", 1));
+        Document queryGroup = new Document("$group", groupObject);
+
+        Document sortObject = new Document("$sort", new BasicDBObject("total", -1));
+
+        List<Document> pipeline = new ArrayList<>();
+        pipeline.add(queryGroup);
+        pipeline.add(sortObject);
+        AggregateIterable<Document> result = mongoTemplate.getCollection(colName).aggregate(pipeline).allowDiskUse(true);
+
+        Map<Long, Integer> map = new HashMap<>();
+        for (Document document : result) {
+            Document document1 = document.get("_id", Document.class);
+            Long commentId = document1.getLong("commentId");
+            Integer total = document.getInteger("total");
+            map.putIfAbsent(commentId, total);
+        }
+
+        return map;
+    }
+}

+ 103 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/db/mongo/BiliDanmakuMongo.java

@@ -0,0 +1,103 @@
+package cn.reghao.bnt.web.parser.site.bilibili.db.mongo;
+
+import cn.reghao.jutil.jdk.db.BaseCrud;
+import cn.reghao.jutil.jdk.db.BaseQuery;
+import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliDanmaku;
+import com.mongodb.MongoBulkWriteException;
+import com.mongodb.client.model.InsertManyOptions;
+import com.mongodb.client.result.DeleteResult;
+import com.mongodb.client.result.InsertManyResult;
+import lombok.extern.slf4j.Slf4j;
+import org.bson.Document;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.data.mongodb.core.convert.MongoConverter;
+import org.springframework.data.mongodb.core.query.Criteria;
+import org.springframework.data.mongodb.core.query.Query;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * @author reghao
+ * @date 2023-05-29 11:17:41
+ */
+@Slf4j
+@Repository
+public class BiliDanmakuMongo implements BaseCrud<BiliDanmaku>, BaseQuery<BiliDanmaku> {
+    private final String colName = "BiliDanmaku";
+    private final MongoTemplate mongoTemplate;
+    private final MongoConverter mongoConverter;
+
+    public BiliDanmakuMongo(MongoTemplate mongoTemplate, MongoConverter mongoConverter) {
+        this.mongoTemplate = mongoTemplate;
+        this.mongoConverter = mongoConverter;
+    }
+
+    @Override
+    public BiliDanmaku save(BiliDanmaku biliDanmaku) {
+        Document doc = new Document();
+        mongoConverter.write(biliDanmaku, doc);
+        mongoTemplate.getCollection(colName).insertOne(doc);
+        return null;
+    }
+
+    @Override
+    public void saveAll(List<BiliDanmaku> list) {
+        List<Document> documents = list.stream()
+                .map(t -> {
+                    Document doc = new Document();
+                    mongoConverter.write(t, doc);
+                    return doc;
+                })
+                .collect(Collectors.toList());
+
+        InsertManyOptions options = new InsertManyOptions();
+        options.ordered(false);
+        try {
+            InsertManyResult result = mongoTemplate.getCollection(colName).insertMany(documents, options);
+        }  catch (MongoBulkWriteException ignore) {
+        }
+    }
+
+    @Override
+    public void update(BiliDanmaku biliDanmaku) {
+        Document document = new Document();
+        mongoConverter.write(biliDanmaku, document);
+        Document filter = new Document();
+        //filter.put("commentId", biliDanmaku.getCommentId());
+        mongoTemplate.getCollection(colName).replaceOne(filter, document);
+    }
+
+    @Override
+    public void delete(BiliDanmaku biliDanmaku) {
+        DeleteResult deleteResult = mongoTemplate.remove(biliDanmaku, colName);
+    }
+
+    public void batchDelete(List<Criteria> criteriaList) {
+        Query query = new Query();
+        criteriaList.forEach(query::addCriteria);
+        DeleteResult deleteResult = mongoTemplate.remove(query, BiliDanmaku.class);
+        long count = deleteResult.getDeletedCount();
+        log.info("删除 {} 条数据",count);
+    }
+
+    public long count(Map<String, Object> map) {
+        Document filter = new Document();
+        map.forEach(filter::append);
+        return mongoTemplate.getCollection(colName).countDocuments(filter);
+    }
+
+    public List<BiliDanmaku> findAll() {
+        Query query = new Query();
+        query.limit(10000);
+        return mongoTemplate.find(query, BiliDanmaku.class, colName);
+    }
+
+    public List<BiliDanmaku> findAll(List<Criteria> criteriaList) {
+        Query query = new Query();
+        criteriaList.forEach(query::addCriteria);
+        return mongoTemplate.find(query, BiliDanmaku.class, colName);
+    }
+}

+ 109 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/db/mongo/BiliDanmakuUrlMongo.java

@@ -0,0 +1,109 @@
+package cn.reghao.bnt.web.parser.site.bilibili.db.mongo;
+
+import cn.reghao.jutil.jdk.db.BaseCrud;
+import cn.reghao.jutil.jdk.db.BaseQuery;
+import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliDanmakuUrl;
+import com.mongodb.MongoBulkWriteException;
+import com.mongodb.client.model.InsertManyOptions;
+import com.mongodb.client.result.DeleteResult;
+import com.mongodb.client.result.InsertManyResult;
+import lombok.extern.slf4j.Slf4j;
+import org.bson.Document;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.data.mongodb.core.convert.MongoConverter;
+import org.springframework.data.mongodb.core.query.Criteria;
+import org.springframework.data.mongodb.core.query.Query;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * @author reghao
+ * @date 2023-05-29 11:17:41
+ */
+@Slf4j
+@Repository
+public class BiliDanmakuUrlMongo implements BaseCrud<BiliDanmakuUrl>, BaseQuery<BiliDanmakuUrl> {
+    private final String colName = "BiliDanmakuUrl";
+    private final MongoTemplate mongoTemplate;
+    private final MongoConverter mongoConverter;
+
+    public BiliDanmakuUrlMongo(MongoTemplate mongoTemplate, MongoConverter mongoConverter) {
+        this.mongoTemplate = mongoTemplate;
+        this.mongoConverter = mongoConverter;
+    }
+
+    @Override
+    public BiliDanmakuUrl save(BiliDanmakuUrl biliDanmakuUrl) {
+        Document doc = new Document();
+        mongoConverter.write(biliDanmakuUrl, doc);
+        mongoTemplate.getCollection(colName).insertOne(doc);
+        return null;
+    }
+
+    @Override
+    public void saveAll(List<BiliDanmakuUrl> list) {
+        List<Document> documents = list.stream()
+                .map(t -> {
+                    Document doc = new Document();
+                    mongoConverter.write(t, doc);
+                    return doc;
+                })
+                .collect(Collectors.toList());
+
+        InsertManyOptions options = new InsertManyOptions();
+        options.ordered(false);
+        try {
+            InsertManyResult result = mongoTemplate.getCollection(colName).insertMany(documents, options);
+        }  catch (MongoBulkWriteException ignore) {
+        }
+    }
+
+    @Override
+    public void update(BiliDanmakuUrl biliDanmakuUrl) {
+        Document document = new Document();
+        mongoConverter.write(biliDanmakuUrl, document);
+        Document filter = new Document();
+        //filter.put("commentId", biliDanmakuUrl.getCommentId());
+        mongoTemplate.getCollection(colName).replaceOne(filter, document);
+    }
+
+    @Override
+    public void delete(BiliDanmakuUrl biliDanmakuUrl) {
+        DeleteResult deleteResult = mongoTemplate.remove(biliDanmakuUrl, colName);
+    }
+
+    public void batchDelete(List<Criteria> criteriaList) {
+        Query query = new Query();
+        criteriaList.forEach(query::addCriteria);
+        DeleteResult deleteResult = mongoTemplate.remove(query, BiliDanmakuUrl.class);
+        long count = deleteResult.getDeletedCount();
+        log.info("删除 {} 条数据",count);
+    }
+
+    public long count(Map<String, Object> map) {
+        Document filter = new Document();
+        map.forEach(filter::append);
+        return mongoTemplate.getCollection(colName).countDocuments(filter);
+    }
+
+    public List<BiliDanmakuUrl> findAll() {
+        Query query = new Query();
+        query.limit(10000);
+        return mongoTemplate.find(query, BiliDanmakuUrl.class, colName);
+    }
+
+    public List<BiliDanmakuUrl> findByCid(long cid) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("cid").is(cid));
+        return mongoTemplate.find(query, BiliDanmakuUrl.class, colName);
+    }
+
+    public List<BiliDanmakuUrl> findAll(List<Criteria> criteriaList) {
+        Query query = new Query();
+        criteriaList.forEach(query::addCriteria);
+        return mongoTemplate.find(query, BiliDanmakuUrl.class, colName);
+    }
+}

+ 127 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/db/mongo/BiliUserMongo.java

@@ -0,0 +1,127 @@
+package cn.reghao.bnt.web.parser.site.bilibili.db.mongo;
+
+import cn.reghao.jutil.jdk.db.BaseCrud;
+import cn.reghao.jutil.jdk.db.BaseQuery;
+import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliUser;
+import com.mongodb.MongoBulkWriteException;
+import com.mongodb.client.model.InsertManyOptions;
+import com.mongodb.client.result.DeleteResult;
+import com.mongodb.client.result.InsertManyResult;
+import lombok.extern.slf4j.Slf4j;
+import org.bson.Document;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.data.mongodb.core.convert.MongoConverter;
+import org.springframework.data.mongodb.core.query.Criteria;
+import org.springframework.data.mongodb.core.query.Query;
+import org.springframework.stereotype.Repository;
+
+import java.time.LocalDateTime;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * @author reghao
+ * @date 2023-05-29 11:17:41
+ */
+@Slf4j
+@Repository
+public class BiliUserMongo implements BaseCrud<BiliUser>, BaseQuery<BiliUser> {
+    private final String colName = "BiliUser";
+    private final MongoTemplate mongoTemplate;
+    private final MongoConverter mongoConverter;
+
+    public BiliUserMongo(MongoTemplate mongoTemplate, MongoConverter mongoConverter) {
+        this.mongoTemplate = mongoTemplate;
+        this.mongoConverter = mongoConverter;
+    }
+
+    @Override
+    public BiliUser save(BiliUser biliUser) {
+        long userId = biliUser.getUserId();
+        BiliUser biliUser1 = findByUserId(userId);
+        if (biliUser1 != null) {
+            return null;
+        }
+
+        Document doc = new Document();
+        mongoConverter.write(biliUser, doc);
+        mongoTemplate.getCollection(colName).insertOne(doc);
+        return null;
+    }
+
+    @Override
+    public void saveAll(List<BiliUser> list) {
+        List<Document> documents = list.stream()
+                .map(t -> {
+                    Document doc = new Document();
+                    mongoConverter.write(t, doc);
+                    return doc;
+                })
+                .collect(Collectors.toList());
+
+        InsertManyOptions options = new InsertManyOptions();
+        options.ordered(false);
+        try {
+            InsertManyResult result = mongoTemplate.getCollection(colName).insertMany(documents, options);
+        }  catch (MongoBulkWriteException ignore) {
+        }
+    }
+
+    @Override
+    public void update(BiliUser biliUser) {
+        biliUser.setUpdateTime(LocalDateTime.now());
+
+        Document document = new Document();
+        mongoConverter.write(biliUser, document);
+        Document filter = new Document();
+        filter.put("userId", biliUser.getUserId());
+        mongoTemplate.getCollection(colName).replaceOne(filter, document);
+    }
+
+    @Override
+    public void delete(BiliUser biliUser) {
+        DeleteResult deleteResult = mongoTemplate.remove(biliUser, colName);
+    }
+
+    public void batchDelete(List<Criteria> criteriaList) {
+        Query query = new Query();
+        criteriaList.forEach(query::addCriteria);
+        DeleteResult deleteResult = mongoTemplate.remove(query, BiliUser.class);
+        long count = deleteResult.getDeletedCount();
+        log.info("删除 {} 条数据",count);
+    }
+
+    public long count(Map<String, Object> map) {
+        Document filter = new Document();
+        map.forEach(filter::append);
+        return mongoTemplate.getCollection(colName).countDocuments(filter);
+    }
+
+    public List<BiliUser> findAll() {
+        Query query = new Query();
+        query.limit(10000);
+        return mongoTemplate.find(query, BiliUser.class, colName);
+    }
+
+    public List<BiliUser> findAll(List<Criteria> criteriaList) {
+        Query query = new Query();
+        criteriaList.forEach(query::addCriteria);
+        return mongoTemplate.find(query, BiliUser.class, colName);
+    }
+
+    public BiliUser findByUserId(long userId) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("userId").is(userId));
+        List<BiliUser> list = mongoTemplate.find(query, BiliUser.class, colName);
+        return list.isEmpty() ? null : list.get(0);
+    }
+
+    public List<BiliUser> findByNotFeedUser(int size) {
+        Query query = new Query();
+        query.skip(0).limit(size);
+        query.addCriteria(Criteria.where("feedUser").is(false));
+        query.addCriteria(Criteria.where("avatarUrl").ne(null));
+        return mongoTemplate.find(query, BiliUser.class, colName);
+    }
+}

+ 156 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/db/mongo/BiliVideoMongo.java

@@ -0,0 +1,156 @@
+package cn.reghao.bnt.web.parser.site.bilibili.db.mongo;
+
+import cn.reghao.jutil.jdk.db.BaseCrud;
+import cn.reghao.jutil.jdk.db.BaseQuery;
+import cn.reghao.bnt.web.parser.site.bilibili.model.po.BiliVideo;
+import com.mongodb.MongoBulkWriteException;
+import com.mongodb.client.model.InsertManyOptions;
+import com.mongodb.client.result.InsertManyResult;
+import lombok.extern.slf4j.Slf4j;
+import org.bson.Document;
+import org.springframework.data.domain.Sort;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.data.mongodb.core.convert.MongoConverter;
+import org.springframework.data.mongodb.core.query.Criteria;
+import org.springframework.data.mongodb.core.query.Query;
+import org.springframework.stereotype.Repository;
+
+import java.time.LocalDateTime;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * @author reghao
+ * @date 2023-05-29 11:17:41
+ */
+@Slf4j
+@Repository
+public class BiliVideoMongo implements BaseCrud<BiliVideo>, BaseQuery<BiliVideo> {
+    private final String colName = "BiliVideo";
+    private final MongoTemplate mongoTemplate;
+    private final MongoConverter mongoConverter;
+
+    public BiliVideoMongo(MongoTemplate mongoTemplate, MongoConverter mongoConverter) {
+        this.mongoTemplate = mongoTemplate;
+        this.mongoConverter = mongoConverter;
+    }
+
+    @Override
+    public BiliVideo save(BiliVideo biliVideo) {
+        String bvId = biliVideo.getBvId();
+        BiliVideo biliVideo1 = findByBvId(bvId);
+        if (biliVideo1 != null) {
+            return null;
+        }
+
+        Document doc = new Document();
+        mongoConverter.write(biliVideo, doc);
+        mongoTemplate.getCollection(colName).insertOne(doc);
+        return null;
+    }
+
+    @Override
+    public void saveAll(List<BiliVideo> list) {
+        List<Document> documents = list.stream()
+                .map(t -> {
+                    Document doc = new Document();
+                    mongoConverter.write(t, doc);
+                    return doc;
+                })
+                .collect(Collectors.toList());
+
+        InsertManyOptions options = new InsertManyOptions();
+        options.ordered(false);
+        try {
+            InsertManyResult result = mongoTemplate.getCollection(colName).insertMany(documents, options);
+        }  catch (MongoBulkWriteException ignore) {
+        }
+    }
+
+    @Override
+    public void update(BiliVideo biliVideo) {
+        biliVideo.setUpdateTime(LocalDateTime.now());
+
+        Document document = new Document();
+        mongoConverter.write(biliVideo, document);
+        Document filter = new Document();
+        filter.put("bvId", biliVideo.getBvId());
+        mongoTemplate.getCollection(colName).replaceOne(filter, document);
+    }
+
+    @Override
+    public void delete(BiliVideo biliVideo) {
+    }
+
+    public long count(Map<String, Object> map) {
+        Document filter = new Document();
+        map.forEach(filter::append);
+        return mongoTemplate.getCollection(colName).countDocuments(filter);
+    }
+    
+    public List<BiliVideo> findAll() {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("coverUrl").is(null));
+        query.limit(10000);
+        return mongoTemplate.find(query, BiliVideo.class, colName);
+    }
+
+    public List<BiliVideo> findAll(List<Criteria> criteriaList) {
+        Query query = new Query();
+        criteriaList.forEach(query::addCriteria);
+        return mongoTemplate.find(query, BiliVideo.class, colName);
+    }
+
+    public BiliVideo findByBvId(String bvId) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("bvId").is(bvId));
+        List<BiliVideo> list = mongoTemplate.find(query, BiliVideo.class, colName);
+        if (list.isEmpty()) {
+            return null;
+        }
+        return list.get(0);
+    }
+
+    public BiliVideo findByCid(long cid) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("cid").is(cid));
+        List<BiliVideo> list = mongoTemplate.find(query, BiliVideo.class, colName);
+        if (list.isEmpty()) {
+            return null;
+        }
+        return list.get(0);
+    }
+
+    public List<BiliVideo> findNotCached(int size) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("coverUrl").ne(null));
+        query.addCriteria(Criteria.where("mpdUrl").is(null));
+        query.addCriteria(Criteria.where("duration").lt(600));
+        //query.addCriteria(Criteria.where("duration").gt(1800));
+        query.with(Sort.by(new Sort.Order(Sort.Direction.ASC, "updateTime")));
+        query.skip(0).limit(size);
+        return mongoTemplate.find(query, BiliVideo.class, colName);
+    }
+
+    public List<BiliVideo> findNotCached(int size, String lastId) {
+        Query query = new Query();
+        query.addCriteria(Criteria.where("mpdUrl").is(null));
+        query.addCriteria(Criteria.where("duration").lt(300));
+        query.with(Sort.by(new Sort.Order(Sort.Direction.ASC, "updateTime")));
+        query.skip(0).limit(size);
+        return mongoTemplate.find(query, BiliVideo.class, colName);
+    }
+
+    public List<BiliVideo> findByNotFeedVideo(int size) {
+        Query query = new Query();
+        query.skip(0).limit(size);
+        query.addCriteria(Criteria.where("coverUrl").ne(null));
+        query.addCriteria(Criteria.where("feedVideo").is(false));
+        query.addCriteria(Criteria.where("mpdUrl").is(null));
+        //query.addCriteria(Criteria.where("mpdUrl").ne(null));
+
+        query.with(Sort.by(new Sort.Order(Sort.Direction.ASC, "updateTime")));
+        return mongoTemplate.find(query, BiliVideo.class, colName);
+    }
+}

+ 51 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/model/po/BiliComment.java

@@ -0,0 +1,51 @@
+package cn.reghao.bnt.web.parser.site.bilibili.model.po;
+
+import cn.reghao.jutil.jdk.db.BaseObject;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import org.springframework.data.annotation.Transient;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author reghao
+ * @date 2022-02-25 12:47:22
+ */
+@EqualsAndHashCode(callSuper = false)
+@Data
+public class BiliComment extends BaseObject<String> {
+    private Long commentId;
+    private Long rootId;
+    private String bvId;
+    private Long avId;
+    private Long userId;
+    private String replyContent;
+    private Long replyTime;
+    private Integer thumbUpCount;
+    private List<String> commentImages = new ArrayList<>();
+    private String location;
+    @Transient
+    private BiliUser biliUser;
+
+    @Override
+    public int hashCode() {
+        int result = 17;
+        result = result * 31 + commentId.hashCode();
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (other == this) {
+            return true;
+        }
+
+        if (other instanceof BiliComment) {
+            BiliComment o = (BiliComment) other;
+            return o.commentId.equals(commentId);
+        } else {
+            return false;
+        }
+    }
+}

+ 45 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/model/po/BiliDanmaku.java

@@ -0,0 +1,45 @@
+package cn.reghao.bnt.web.parser.site.bilibili.model.po;
+
+import cn.reghao.jutil.jdk.db.BaseObject;
+import lombok.AllArgsConstructor;
+import lombok.Getter;
+import lombok.NoArgsConstructor;
+
+/**
+ * @author reghao
+ * @date 2022-06-02 16:57:29
+ */
+@NoArgsConstructor
+@AllArgsConstructor
+@Getter
+public class BiliDanmaku extends BaseObject<String> {
+    private Long dmId;
+    private Integer mode;
+    private Integer color;
+    private Double progress;
+    private String content;
+    private String bvId;
+    private Long publishAt;
+    private String midHash;
+
+    @Override
+    public int hashCode() {
+        int result = 17;
+        result = result * 31 + dmId.hashCode();
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (other == this) {
+            return true;
+        }
+
+        if (other instanceof BiliDanmaku) {
+            BiliDanmaku o = (BiliDanmaku) other;
+            return o.dmId.equals(dmId);
+        } else {
+            return false;
+        }
+    }
+}

+ 44 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/model/po/BiliDanmakuUrl.java

@@ -0,0 +1,44 @@
+package cn.reghao.bnt.web.parser.site.bilibili.model.po;
+
+import cn.reghao.jutil.jdk.db.BaseObject;
+import lombok.Getter;
+import lombok.NoArgsConstructor;
+
+/**
+ * @author reghao
+ * @date 2022-06-08 14:00:06
+ */
+@NoArgsConstructor
+@Getter
+public class BiliDanmakuUrl extends BaseObject<String> {
+    private Long cid;
+    private String url;
+    private Long crawledTime;
+
+    public BiliDanmakuUrl(Long cid, String url) {
+        this.cid = cid;
+        this.url = url;
+        this.crawledTime = 0L;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = 17;
+        result = result * 31 + url.hashCode();
+        return result;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (other == this) {
+            return true;
+        }
+
+        if (other instanceof BiliDanmakuUrl) {
+            BiliDanmakuUrl o = (BiliDanmakuUrl) other;
+            return o.url.equals(url);
+        } else {
+            return false;
+        }
+    }
+}

+ 30 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/model/po/BiliUser.java

@@ -0,0 +1,30 @@
+package cn.reghao.bnt.web.parser.site.bilibili.model.po;
+
+import cn.reghao.jutil.jdk.db.BaseObject;
+import lombok.Data;
+
+/**
+ * @author reghao
+ * @date 2020-08-03 09:47:22
+ */
+@Data
+public class BiliUser extends BaseObject<String> {
+    private Long userId;
+    private String username;
+    private String gender;
+    private String signature;
+    private String avatarUrlOrigin;
+    private String avatarUrl;
+    private Boolean feedUser;
+
+    public BiliUser() {
+        this.feedUser = false;
+    }
+
+    public BiliUser(long userId, String username, String avatarUrl) {
+        this.userId = userId;
+        this.username = username;
+        this.avatarUrlOrigin = avatarUrl;
+        this.feedUser = false;
+    }
+}

+ 69 - 0
web/src/main/java/cn/reghao/bnt/web/parser/site/bilibili/model/po/BiliVideo.java

@@ -0,0 +1,69 @@
+package cn.reghao.bnt.web.parser.site.bilibili.model.po;
+
+import cn.reghao.jutil.jdk.db.BaseObject;
+import lombok.Getter;
+import lombok.Setter;
+
+import java.time.LocalDateTime;
+
+/**
+ * @author reghao
+ * @date 2021-04-21 17:52:01
+ */
+@Setter
+@Getter
+public class BiliVideo extends BaseObject<String> {
+    private String bvId;
+    private Long avId;
+    private Long cid;
+    private String title;
+    private String description;
+    private String pageUrl;
+    private String coverUrlOrigin;
+    private String coverUrl;
+    // 单位秒
+    private Integer duration;
+    private String tags;
+    private Boolean horizontal;
+    private LocalDateTime pubDate;
+    private String mpdUrl;
+    private Boolean feedVideo;
+
+    private Integer categoryPid;
+    private Integer categoryId;
+    private Integer view;
+    private Integer danmaku;
+    private Integer reply;
+    private Integer favorite;
+    private Integer share;
+    private Integer like;
+    private Integer dislike;
+    private Integer coin;
+    private Integer rid;
+
+    private long userId;
+    private BiliUser biliUser;
+
+    public BiliVideo(String bvId, Long avId, String title, String description,  String pageUrl,
+                     String coverUrlOrigin, int duration, Boolean horizontal, LocalDateTime pubDate) {
+        this.bvId = bvId;
+        this.avId = avId;
+        this.title = title;
+        this.description = description;
+        this.pageUrl = pageUrl;
+        this.coverUrlOrigin = coverUrlOrigin;
+        this.duration = duration;
+        this.horizontal = horizontal;
+        this.pubDate = pubDate;
+        this.feedVideo = false;
+        this.categoryPid = 3;
+        this.categoryId = 4;
+        this.view = 0;
+        this.danmaku = 0;
+        this.reply = 0;
+        this.favorite = 0;
+        this.share = 0;
+        this.like = 0;
+        this.rid = 0;
+    }
+}

Некоторые файлы не были показаны из-за большого количества измененных файлов