reghao 1 год назад
Родитель
Сommit
65cb47e943

+ 0 - 34
browser/src/main/java/cn/reghao/bnt/browser/SpiderBrowser.java

@@ -1,44 +1,10 @@
 package cn.reghao.bnt.browser;
 
-import cn.reghao.bnt.browser.parser.ChromeDataParser;
-import cn.reghao.bnt.browser.chrome.AbstractChrome;
-import cn.reghao.bnt.browser.chrome.ChromeBrowser;
-import cn.reghao.bnt.browser.chrome.ReqMatcher;
-import cn.reghao.bnt.spider.url.BodyDataType;
-import cn.reghao.bnt.spider.url.Site;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-
 /**
  * @author reghao
  * @date 2023-09-10 02:24:50
  */
 public class SpiderBrowser {
-    public static Map<String, String> map = new HashMap<>();
-    static ChromeDataParser chromeDataParser = new ChromeDataParser();
-
-    static Set<ReqMatcher> reqMatchers() {
-        Set<ReqMatcher> set = new HashSet<>();
-        String pattern = "h5/mtop.taobao.rate.detaillist.get/6.0";
-        String parser = "TmallCommentDataParser";
-        map.put(pattern, parser);
-        set.add(new ReqMatcher(Site.bilibili, pattern, BodyDataType.json, chromeDataParser));
-
-        String pattern1 = "rate.taobao.com/feedRateList.htm";
-        String parser1 = "TaobaoCommentDataParser";
-        map.put(pattern1, parser1);
-        set.add(new ReqMatcher(Site.bilibili, pattern1, BodyDataType.json, chromeDataParser));
-        return set;
-    }
-
     public static void main(String[] args) {
-        Set<ReqMatcher> set = reqMatchers();
-
-        String url = "https://www.tmall.com/";
-        AbstractChrome chrome = new ChromeBrowser(false, false);
-        chrome.getAndHandleDynamicPage(url, set);
     }
 }

+ 3 - 1
browser/src/main/java/cn/reghao/bnt/browser/parser/ChromeDataParser.java

@@ -15,6 +15,7 @@ import lombok.extern.slf4j.Slf4j;
 
 import java.lang.reflect.Type;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.Map;
 
 /**
@@ -25,10 +26,11 @@ import java.util.Map;
 public class ChromeDataParser implements DataParser {
     private final WebRequest webRequest = new DefaultWebRequest();
     private final String api = "http://spider.reghao.cn/api/crawler/rawdata";
+    private Map<String, String> map = new HashMap<>();
 
     @Override
     public Map<String, Object> parse(String url, String body) throws InterruptedException {
-        SpiderBrowser.map.forEach((key, value) -> {
+        map.forEach((key, value) -> {
             if (url.contains(key)) {
                 CrawlUrl crawlUrl = new CrawlUrl(Site.taobao.name(), value, url);
                 RawData rawData = new RawData(crawlUrl, body);

+ 46 - 0
browser/src/test/java/cn/reghao/bnt/browser/TmallSpiderTest.java

@@ -0,0 +1,46 @@
+package cn.reghao.bnt.browser;
+
+import cn.reghao.bnt.browser.chrome.AbstractChrome;
+import cn.reghao.bnt.browser.chrome.ChromeBrowser;
+import cn.reghao.bnt.browser.chrome.ReqMatcher;
+import cn.reghao.bnt.browser.parser.ChromeDataParser;
+import cn.reghao.bnt.spider.url.BodyDataType;
+import cn.reghao.bnt.spider.url.Site;
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * @author reghao
+ * @date 2023-08-31 00:20:54
+ */
+@Slf4j
+public class TmallSpiderTest {
+    public static Map<String, String> map = new HashMap<>();
+    static ChromeDataParser chromeDataParser = new ChromeDataParser();
+
+    static Set<ReqMatcher> reqMatchers() {
+        Set<ReqMatcher> set = new HashSet<>();
+        String pattern = "h5/mtop.taobao.rate.detaillist.get/6.0";
+        String parser = "TmallCommentDataParser";
+        map.put(pattern, parser);
+        set.add(new ReqMatcher(Site.bilibili, pattern, BodyDataType.json, chromeDataParser));
+
+        String pattern1 = "rate.taobao.com/feedRateList.htm";
+        String parser1 = "TaobaoCommentDataParser";
+        map.put(pattern1, parser1);
+        set.add(new ReqMatcher(Site.bilibili, pattern1, BodyDataType.json, chromeDataParser));
+        return set;
+    }
+
+    public static void main(String[] args) {
+        Set<ReqMatcher> set = reqMatchers();
+
+        String url = "https://www.tmall.com/";
+        AbstractChrome chrome = new ChromeBrowser(false, false);
+        chrome.getAndHandleDynamicPage(url, set);
+    }
+}