|
@@ -0,0 +1,121 @@
|
|
|
|
|
+package cn.reghao.tnb.content.app.data.spider.task;
|
|
|
|
|
+
|
|
|
|
|
+import cn.reghao.jutil.jdk.converter.DateTimeConverter;
|
|
|
|
|
+import cn.reghao.jutil.jdk.http.WebRequest;
|
|
|
|
|
+import cn.reghao.jutil.jdk.http.WebResponse;
|
|
|
|
|
+import cn.reghao.jutil.jdk.serializer.JsonConverter;
|
|
|
|
|
+import cn.reghao.jutil.tool.id.SnowFlake;
|
|
|
|
|
+import cn.reghao.tnb.content.app.data.db.mongo.RichTextMongo;
|
|
|
|
|
+import cn.reghao.tnb.content.app.data.model.po.RichText;
|
|
|
|
|
+import com.google.gson.JsonArray;
|
|
|
|
|
+import com.google.gson.JsonElement;
|
|
|
|
|
+import com.google.gson.JsonObject;
|
|
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
|
|
+
|
|
|
|
|
+import java.time.Duration;
|
|
|
|
|
+import java.time.LocalDate;
|
|
|
|
|
+import java.time.LocalDateTime;
|
|
|
|
|
+import java.util.ArrayList;
|
|
|
|
|
+import java.util.HashMap;
|
|
|
|
|
+import java.util.List;
|
|
|
|
|
+import java.util.Map;
|
|
|
|
|
+
|
|
|
|
|
+/**
|
|
|
|
|
+ * @author reghao
|
|
|
|
|
+ * @date 2025-05-06 11:33:42
|
|
|
|
|
+ */
|
|
|
|
|
+@Slf4j
|
|
|
|
|
+public class GetNewsIndexTask implements Runnable {
|
|
|
|
|
+ private final SnowFlake idGenerator = new SnowFlake(1, 1);
|
|
|
|
|
+ private final WebRequest webRequest;
|
|
|
|
|
+ private final RichTextMongo richTextMongo;
|
|
|
|
|
+ private final int pastHours;
|
|
|
|
|
+
|
|
|
|
|
+ public GetNewsIndexTask(WebRequest webRequest, RichTextMongo richTextMongo, int pastHours) {
|
|
|
|
|
+ this.webRequest = webRequest;
|
|
|
|
|
+ this.richTextMongo = richTextMongo;
|
|
|
|
|
+ this.pastHours = pastHours;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ @Override
|
|
|
|
|
+ public void run() {
|
|
|
|
|
+ LocalDateTime current = LocalDate.now().atStartOfDay();
|
|
|
|
|
+ List<Integer> excludeContIds = List.of(30769556,30772115,30772061,30770621,30772060,30771769,30771960,30772059,30772058,30770148,30769570,30767725,30772111,30772110,30767342,30772330);
|
|
|
|
|
+ List<Integer> listRecommendIds = List.of(30769556,30772330,30771960,30771769);
|
|
|
|
|
+
|
|
|
|
|
+ String url1 = "https://api.thepaper.cn/contentapi/nodeCont/getByChannelId";
|
|
|
|
|
+ Map<String, Object> map = new HashMap<>();
|
|
|
|
|
+ map.put("channelId", "25950");
|
|
|
|
|
+ map.put("excludeContIds", excludeContIds);
|
|
|
|
|
+ map.put("listRecommendIds", listRecommendIds);
|
|
|
|
|
+ map.put("pageSize", "20");
|
|
|
|
|
+
|
|
|
|
|
+ int pageNum = 1;
|
|
|
|
|
+ long startTime = System.currentTimeMillis();
|
|
|
|
|
+ while (pageNum < 50) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ map.put("pageNum", String.valueOf(pageNum++));
|
|
|
|
|
+ map.put("startTime", String.valueOf(startTime));
|
|
|
|
|
+
|
|
|
|
|
+ WebResponse webResponse = webRequest.postJson(url1, JsonConverter.objectToJson(map));
|
|
|
|
|
+ if (webResponse.getStatusCode() != 200) {
|
|
|
|
|
+ log.error("get {} failed", url1);
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ String body = webResponse.getBody();
|
|
|
|
|
+ JsonObject jsonObject = JsonConverter.jsonToJsonElement(body).getAsJsonObject();
|
|
|
|
|
+ int code = jsonObject.get("code").getAsInt();
|
|
|
|
|
+ if (code != 200) {
|
|
|
|
|
+ log.error("get {} failed", url1);
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ JsonObject dataObject = jsonObject.get("data").getAsJsonObject();
|
|
|
|
|
+ startTime = dataObject.get("startTime").getAsLong();
|
|
|
|
|
+ JsonArray jsonArray = dataObject.get("list").getAsJsonArray();
|
|
|
|
|
+ List<RichText> richTextList = new ArrayList<>();
|
|
|
|
|
+ for (JsonElement jsonElement : jsonArray) {
|
|
|
|
|
+ JsonObject jsonObject1 = jsonElement.getAsJsonObject();
|
|
|
|
|
+ String name = jsonObject1.get("name").getAsString();
|
|
|
|
|
+ String pic = jsonObject1.get("pic").getAsString();
|
|
|
|
|
+ long contId = jsonObject1.get("contId").getAsLong();
|
|
|
|
|
+ long pubTimeLong = jsonObject1.get("pubTimeLong").getAsLong();
|
|
|
|
|
+ String pageUrl = String.format("https://www.thepaper.cn/newsDetail_forward_%s", contId);
|
|
|
|
|
+ RichText richText = richTextMongo.findByOriginalUrl(pageUrl);
|
|
|
|
|
+ if (richText == null) {
|
|
|
|
|
+ long contentId = idGenerator.nextId();
|
|
|
|
|
+ richText = new RichText();
|
|
|
|
|
+ richText.setContentId(contentId);
|
|
|
|
|
+ richText.setOriginalUrl(pageUrl);
|
|
|
|
|
+ richText.setTitle(name);
|
|
|
|
|
+ richText.setCoverUrl(pic);
|
|
|
|
|
+ richText.setPublishAt(pubTimeLong);
|
|
|
|
|
+ richTextList.add(richText);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (!richTextList.isEmpty()) {
|
|
|
|
|
+ richTextMongo.saveAll(richTextList);
|
|
|
|
|
+
|
|
|
|
|
+ richTextList.sort((o1, o2) -> {
|
|
|
|
|
+ long l = o1.getPublishAt() - o2.getPublishAt();
|
|
|
|
|
+ return (int) l;
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ long latestPubTime = richTextList.get(richTextList.size()-1).getPublishAt();
|
|
|
|
|
+ LocalDateTime pubDateTime = DateTimeConverter.localDateTime(latestPubTime);
|
|
|
|
|
+ Duration duration = Duration.between(current, pubDateTime);
|
|
|
|
|
+ if (duration.toHours()+pastHours < 0) {
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ Thread.sleep(1_000);
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ e.printStackTrace();
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ log.info("{} done", this.getClass().getSimpleName());
|
|
|
|
|
+ }
|
|
|
|
|
+}
|