|
|
@@ -1,298 +0,0 @@
|
|
|
-package cn.reghao.bnt.browser.chrome;
|
|
|
-
|
|
|
-import cn.reghao.bnt.browser.cdp.req.*;
|
|
|
-import cn.reghao.bnt.browser.ws.WebSocketClient;
|
|
|
-import cn.reghao.bnt.browser.cdp.req.*;
|
|
|
-import cn.reghao.jutil.jdk.serializer.JsonConverter;
|
|
|
-import lombok.extern.slf4j.Slf4j;
|
|
|
-import okhttp3.Call;
|
|
|
-import okhttp3.OkHttpClient;
|
|
|
-import okhttp3.Request;
|
|
|
-import okhttp3.Response;
|
|
|
-
|
|
|
-import java.util.*;
|
|
|
-import java.util.concurrent.ConcurrentHashMap;
|
|
|
-import java.util.concurrent.ThreadLocalRandom;
|
|
|
-import java.util.stream.Collectors;
|
|
|
-
|
|
|
-/**
|
|
|
- * Chrome 浏览器抽象类
|
|
|
- *
|
|
|
- * @author reghao
|
|
|
- * @date 2021-03-19 13:58:07
|
|
|
- */
|
|
|
-@Slf4j
|
|
|
-public abstract class AbstractChrome {
|
|
|
- protected boolean isHeadless;
|
|
|
- protected boolean enableProxy;
|
|
|
- protected int reqTimeout;
|
|
|
- protected int remotePort;
|
|
|
- protected String tabsApi;
|
|
|
- protected String wsPrefix;
|
|
|
- protected Map<String, PageRequest> pageRequestMap = new ConcurrentHashMap<>();
|
|
|
- protected Map<String, WebSocketClient> wsClientMap = new ConcurrentHashMap<>();
|
|
|
-
|
|
|
- public AbstractChrome(boolean isHeadless, boolean enableProxy) {
|
|
|
- this.isHeadless = isHeadless;
|
|
|
- this.enableProxy = enableProxy;
|
|
|
- // 需要代理的页面超时 30s
|
|
|
- this.reqTimeout = enableProxy ? 30 : 10;
|
|
|
- init();
|
|
|
- }
|
|
|
-
|
|
|
- private void init() {
|
|
|
- this.remotePort = randomPort();
|
|
|
- this.tabsApi = String.format("http://localhost:%s/json", remotePort);
|
|
|
- this.wsPrefix = String.format("ws://localhost:%s/devtools/page/", remotePort);
|
|
|
- }
|
|
|
-
|
|
|
- private int randomPort() {
|
|
|
- int min = 10000, max = 20000;
|
|
|
- return ThreadLocalRandom.current().nextInt(min, max + 1);
|
|
|
- }
|
|
|
-
|
|
|
- protected void enableWebSocket() {
|
|
|
- Set<String> tabIds = tabIds();
|
|
|
- if (tabIds.size() != 1) {
|
|
|
- log.error("Chrome 启动时默认打开的 tab 不是 1 个,而是 {} 个, 启用 WebSocket 连接失败...", tabIds.size());
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
- String tabId = tabIds.iterator().next();
|
|
|
- PageRequest pageRequest = new PageRequest();
|
|
|
- pageRequestMap.putIfAbsent(tabId, pageRequest);
|
|
|
- wsClientMap.putIfAbsent(tabId, new WebSocketClient(wsPrefix + tabId, pageRequest));
|
|
|
- }
|
|
|
-
|
|
|
- private Set<String> tabIds() {
|
|
|
- OkHttpClient okHttpClient = new OkHttpClient();
|
|
|
- Request request = new Request.Builder()
|
|
|
- .url(tabsApi)
|
|
|
- .get()
|
|
|
- .build();
|
|
|
-
|
|
|
- Call call = okHttpClient.newCall(request);
|
|
|
- try {
|
|
|
- Response response = call.execute();
|
|
|
- assert response.body() != null;
|
|
|
- String body = response.body().string();
|
|
|
- return JsonConverter.jsonToObjects(body, RemoteTarget.class).stream()
|
|
|
- .filter(remoteTarget -> remoteTarget.getType().equals("page"))
|
|
|
- .filter(remoteTarget -> {
|
|
|
- String url = remoteTarget.getUrl();
|
|
|
- return url.equals("data:,") || url.equals("chrome://newtab/");
|
|
|
- })
|
|
|
- .map(RemoteTarget::getId)
|
|
|
- .collect(Collectors.toSet());
|
|
|
- } catch (Exception e) {
|
|
|
- log.error("获取 chrome tab 列表失败 -> {}", e.getMessage());
|
|
|
- }
|
|
|
- return Collections.emptySet();
|
|
|
- }
|
|
|
-
|
|
|
- protected void openTabs(int tabNum) {
|
|
|
- if (wsClientMap.size() != 1) {
|
|
|
- log.error("打开新 tab 失败");
|
|
|
- return;
|
|
|
- }
|
|
|
- WebSocketClient wsClient = wsClientMap.entrySet().iterator().next().getValue();
|
|
|
- for (int i = 1; i < tabNum; i++) {
|
|
|
- TargetCreateTarget targetCreateTarget = new TargetCreateTarget("");
|
|
|
- wsClient.sendMessage(targetCreateTarget);
|
|
|
- }
|
|
|
-
|
|
|
- Set<String> tabIds = tabIds();
|
|
|
- if (tabIds.size() != tabNum) {
|
|
|
- log.error("打开新 tab 失败");
|
|
|
- return;
|
|
|
- }
|
|
|
- tabIds.forEach(this::enableTabNetworkDebug);
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 启用 tab 网络 debug
|
|
|
- *
|
|
|
- * @param
|
|
|
- * @return
|
|
|
- * @date 2021-08-07 下午3:40
|
|
|
- */
|
|
|
- private void enableTabNetworkDebug(String tabId) {
|
|
|
- PageRequest pageRequest = pageRequestMap.get(tabId);
|
|
|
- if (pageRequest == null) {
|
|
|
- pageRequest = new PageRequest();
|
|
|
- pageRequestMap.put(tabId, pageRequest);
|
|
|
- }
|
|
|
-
|
|
|
- NetworkEnable networkEnable = new NetworkEnable();
|
|
|
- WebSocketClient wsClient = wsClientMap.get(tabId);
|
|
|
- if (wsClient == null) {
|
|
|
- wsClientMap.put(tabId, new WebSocketClient(wsPrefix + tabId, pageRequest));
|
|
|
- } else {
|
|
|
- wsClient.sendMessage(networkEnable);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- public List<String> availTabIds() {
|
|
|
- return new ArrayList<>(wsClientMap.keySet());
|
|
|
- }
|
|
|
-
|
|
|
- public List<ReqInPage> getDynamicPage(String tabId, String url, boolean autoScroll, Set<ReqMatcher> reqMatchers) {
|
|
|
- PageRequest pageRequest = pageRequestMap.get(tabId);
|
|
|
- pageRequest.setReqMatchers(reqMatchers);
|
|
|
- PageNavigate pageNavigate = new PageNavigate(url);
|
|
|
- int id = pageNavigate.getId();
|
|
|
- // 向 chrome 发送打开页面事件
|
|
|
- wsClientMap.get(tabId).sendMessage(pageNavigate);
|
|
|
- for (int i = 0; i < reqTimeout; i++) {
|
|
|
- try {
|
|
|
- Thread.sleep(1_000);
|
|
|
- //log.info("请求已用时 {}s...", i+1);
|
|
|
- } catch (InterruptedException e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
-
|
|
|
- if (pageRequest.isNotFound()) {
|
|
|
- log.info("{} 中的资源不存在,请开始下一次请求...", url);
|
|
|
- pageRequest.clearAll();
|
|
|
- return null;
|
|
|
- } else if (pageRequest.isLoadDone()) {
|
|
|
- log.info("请求处理完成,请开始下一次请求...");
|
|
|
- if (autoScroll) {
|
|
|
- autoScrollPage();
|
|
|
- }
|
|
|
- return pageRequest.reqsInPage();
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- pageRequest.clearAll();
|
|
|
- return Collections.emptyList();
|
|
|
- }
|
|
|
-
|
|
|
- public List<ReqInPage> getDynamicPage(String url, boolean autoScroll, Set<ReqMatcher> reqMatchers) {
|
|
|
- Map.Entry<String, PageRequest> entry = pageRequestMap.entrySet().iterator().next();
|
|
|
- String tabId = entry.getKey();
|
|
|
- PageRequest pageRequest = entry.getValue();
|
|
|
- pageRequest.setReqMatchers(reqMatchers);
|
|
|
- pageRequest.setHandleDirectly(true);
|
|
|
-
|
|
|
- PageNavigate pageNavigate = new PageNavigate(url);
|
|
|
- int id = pageNavigate.getId();
|
|
|
- wsClientMap.get(tabId).sendMessage(pageNavigate);
|
|
|
- for (int i = 0; i < reqTimeout; i++) {
|
|
|
- try {
|
|
|
- Thread.sleep(1_000);
|
|
|
- //log.info("请求已用时 {}s...", i+1);
|
|
|
- } catch (InterruptedException e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
-
|
|
|
- if (pageRequest.isNotFound()) {
|
|
|
- log.info("{} 中的资源不存在,请开始下一次请求...", url);
|
|
|
- pageRequest.clearAll();
|
|
|
- return null;
|
|
|
- } else if (pageRequest.isLoadDone()) {
|
|
|
- log.info("请求处理完成,请开始下一次请求...");
|
|
|
- if (autoScroll) {
|
|
|
- autoScrollPage();
|
|
|
- }
|
|
|
- return pageRequest.reqsInPage();
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- pageRequest.clearAll();
|
|
|
- return Collections.emptyList();
|
|
|
- }
|
|
|
-
|
|
|
- public void getAndHandleDynamicPage(String url, Set<ReqMatcher> reqMatchers) {
|
|
|
- Map.Entry<String, PageRequest> entry = pageRequestMap.entrySet().iterator().next();
|
|
|
- String tabId = entry.getKey();
|
|
|
- PageRequest pageRequest = entry.getValue();
|
|
|
- pageRequest.setReqMatchers(reqMatchers);
|
|
|
- pageRequest.setHandleDirectly(true);
|
|
|
-
|
|
|
- PageNavigate pageNavigate = new PageNavigate(url);
|
|
|
- int id = pageNavigate.getId();
|
|
|
- wsClientMap.get(tabId).sendMessage(pageNavigate);
|
|
|
- int timeout = 3600*24;
|
|
|
- for (int i = 0; i < timeout; i++) {
|
|
|
- try {
|
|
|
- Thread.sleep(1_000);
|
|
|
- } catch (InterruptedException e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- public void getAndHandleDynamicPageAuto(String url, Set<ReqMatcher> reqMatchers) {
|
|
|
- Map.Entry<String, PageRequest> entry = pageRequestMap.entrySet().iterator().next();
|
|
|
- String tabId = entry.getKey();
|
|
|
- PageRequest pageRequest = entry.getValue();
|
|
|
- pageRequest.setReqMatchers(reqMatchers);
|
|
|
- pageRequest.setHandleDirectly(true);
|
|
|
-
|
|
|
- PageNavigate pageNavigate = new PageNavigate(url);
|
|
|
- int id = pageNavigate.getId();
|
|
|
- wsClientMap.get(tabId).sendMessage(pageNavigate);
|
|
|
-
|
|
|
- try {
|
|
|
- log.info("休眠 10s 后请求下一个页面");
|
|
|
- Thread.sleep(10_000);
|
|
|
- } catch (InterruptedException e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- @Deprecated
|
|
|
- public void getAndHandleDynamicPages(Set<String> urls, Set<ReqMatcher> reqMatchers) {
|
|
|
- Map.Entry<String, PageRequest> entry = pageRequestMap.entrySet().iterator().next();
|
|
|
- String tabId = entry.getKey();
|
|
|
- PageRequest pageRequest = entry.getValue();
|
|
|
- pageRequest.setReqMatchers(reqMatchers);
|
|
|
- pageRequest.setHandleDirectly(true);
|
|
|
- for (String url : urls) {
|
|
|
- PageNavigate pageNavigate = new PageNavigate(url);
|
|
|
- int id = pageNavigate.getId();
|
|
|
- wsClientMap.get(tabId).sendMessage(pageNavigate);
|
|
|
-
|
|
|
- try {
|
|
|
- log.info("休眠 3s 后请求下一个页面");
|
|
|
- Thread.sleep(3_000);
|
|
|
- } catch (InterruptedException e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- public void download(Set<String> urls, String dir) {
|
|
|
- Map.Entry<String, PageRequest> entry = pageRequestMap.entrySet().iterator().next();
|
|
|
- String tabId = entry.getKey();
|
|
|
- PageRequest pageRequest = entry.getValue();
|
|
|
- pageRequest.setReqMatchers(new HashSet<>());
|
|
|
- pageRequest.setHandleDirectly(true);
|
|
|
-
|
|
|
- WebSocketClient wsClient = wsClientMap.get(tabId);
|
|
|
- PageSetDownloadBehavior downloadBehavior = new PageSetDownloadBehavior(dir);
|
|
|
- wsClient.sendMessage(downloadBehavior);
|
|
|
- for (String url : urls) {
|
|
|
- PageNavigate pageNavigate = new PageNavigate(url);
|
|
|
- int id = pageNavigate.getId();
|
|
|
- wsClient.sendMessage(pageNavigate);
|
|
|
-
|
|
|
- try {
|
|
|
- log.info("休眠 3s 后请求下一个页面");
|
|
|
- Thread.sleep(3_000);
|
|
|
- } catch (InterruptedException e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 自动滚动页面
|
|
|
- *
|
|
|
- * @param
|
|
|
- * @return
|
|
|
- * @date 2021-08-06 上午9:54
|
|
|
- */
|
|
|
- abstract void autoScrollPage();
|
|
|
-}
|