Commit af0bbdcecf9eb4a2de40475ab8709eaf61ae82d4

Authored by 谢茂盛
1 parent 47bdaf78

feat: nature article爬取

sql/webmagic.sql
1 -DROP TABLE IF EXISTS `nature_article`;  
2 -CREATE TABLE `nature_article` ( 1 +DROP TABLE IF EXISTS `article`;
  2 +CREATE TABLE `article` (
3 `id` bigint NOT NULL AUTO_INCREMENT, 3 `id` bigint NOT NULL AUTO_INCREMENT,
  4 + `article_type` varchar(32) DEFAULT NULL COMMENT '文章类型',
4 `article_code` varchar(64) DEFAULT NULL COMMENT '文章标识', 5 `article_code` varchar(64) DEFAULT NULL COMMENT '文章标识',
5 `author_name` varchar(256) DEFAULT NULL COMMENT '作者名称', 6 `author_name` varchar(256) DEFAULT NULL COMMENT '作者名称',
6 `title` varchar(256) DEFAULT NULL COMMENT '文章标题', 7 `title` varchar(256) DEFAULT NULL COMMENT '文章标题',
7 `publish_time` varchar(64) DEFAULT NULL COMMENT '发布时间', 8 `publish_time` varchar(64) DEFAULT NULL COMMENT '发布时间',
8 `email_info` varchar(512) DEFAULT NULL COMMENT '邮箱信息', 9 `email_info` varchar(512) DEFAULT NULL COMMENT '邮箱信息',
  10 + `article_desc` text DEFAULT NULL COMMENT '文章摘要',
  11 + `author_address` text DEFAULT NULL COMMENT '作者地址信息',
  12 + `reference_info` text DEFAULT NULL COMMENT '相关文章引用信息',
9 PRIMARY KEY (`id`) 13 PRIMARY KEY (`id`)
10 -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='nature-文章信息';  
11 -  
12 -  
13 -alter table `nature_article` add column `article_code` varchar(256) DEFAULT NULL COMMENT '文章标识';  
14 \ No newline at end of file 14 \ No newline at end of file
  15 +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='文章信息';
15 \ No newline at end of file 16 \ No newline at end of file
src/main/java/com/canrd/webmagic/controller/NatureArticleController.java renamed to src/main/java/com/canrd/webmagic/controller/ArticleController.java
@@ -6,7 +6,7 @@ import com.canrd.webmagic.domain.vo.NatureArticleQueryVO; @@ -6,7 +6,7 @@ import com.canrd.webmagic.domain.vo.NatureArticleQueryVO;
6 import com.canrd.webmagic.domain.vo.NatureArticleVO; 6 import com.canrd.webmagic.domain.vo.NatureArticleVO;
7 import com.canrd.webmagic.processor.NatureSearchPageProcessor; 7 import com.canrd.webmagic.processor.NatureSearchPageProcessor;
8 import com.canrd.webmagic.processor.pipeline.NatureArticlePipeline; 8 import com.canrd.webmagic.processor.pipeline.NatureArticlePipeline;
9 -import com.canrd.webmagic.service.NatureArticleService; 9 +import com.canrd.webmagic.service.ArticleService;
10 import org.springframework.validation.annotation.Validated; 10 import org.springframework.validation.annotation.Validated;
11 import org.springframework.web.bind.annotation.*; 11 import org.springframework.web.bind.annotation.*;
12 import us.codecraft.webmagic.Spider; 12 import us.codecraft.webmagic.Spider;
@@ -21,12 +21,12 @@ import javax.annotation.Resource; @@ -21,12 +21,12 @@ import javax.annotation.Resource;
21 */ 21 */
22 @RestController 22 @RestController
23 @RequestMapping("/nature/article") 23 @RequestMapping("/nature/article")
24 -public class NatureArticleController { 24 +public class ArticleController {
25 /** 25 /**
26 * 服务对象 26 * 服务对象
27 */ 27 */
28 @Resource 28 @Resource
29 - private NatureArticleService natureArticleService; 29 + private ArticleService articleService;
30 30
31 @Resource 31 @Resource
32 private NatureSearchPageProcessor natureSearchPageProcessor; 32 private NatureSearchPageProcessor natureSearchPageProcessor;
@@ -60,7 +60,7 @@ public class NatureArticleController { @@ -60,7 +60,7 @@ public class NatureArticleController {
60 */ 60 */
61 @PostMapping("/list") 61 @PostMapping("/list")
62 public ServerResult list(@RequestBody @Validated({OperateGroup.List.class}) NatureArticleQueryVO natureArticleQueryVO) { 62 public ServerResult list(@RequestBody @Validated({OperateGroup.List.class}) NatureArticleQueryVO natureArticleQueryVO) {
63 - return natureArticleService.list(natureArticleQueryVO); 63 + return articleService.list(natureArticleQueryVO);
64 } 64 }
65 65
66 /** 66 /**
@@ -71,7 +71,7 @@ public class NatureArticleController { @@ -71,7 +71,7 @@ public class NatureArticleController {
71 */ 71 */
72 @PostMapping("/query_by_id") 72 @PostMapping("/query_by_id")
73 public ServerResult queryById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) { 73 public ServerResult queryById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) {
74 - return natureArticleService.queryById(natureArticleQueryVO); 74 + return articleService.queryById(natureArticleQueryVO);
75 } 75 }
76 76
77 /** 77 /**
@@ -82,7 +82,7 @@ public class NatureArticleController { @@ -82,7 +82,7 @@ public class NatureArticleController {
82 */ 82 */
83 @PostMapping("/add") 83 @PostMapping("/add")
84 public ServerResult add(@RequestBody NatureArticleVO natureArticleVO) { 84 public ServerResult add(@RequestBody NatureArticleVO natureArticleVO) {
85 - return natureArticleService.add(natureArticleVO); 85 + return articleService.add(natureArticleVO);
86 } 86 }
87 87
88 /** 88 /**
@@ -93,7 +93,7 @@ public class NatureArticleController { @@ -93,7 +93,7 @@ public class NatureArticleController {
93 */ 93 */
94 @PostMapping("/edit") 94 @PostMapping("/edit")
95 public ServerResult edit(@RequestBody NatureArticleVO natureArticleVO) { 95 public ServerResult edit(@RequestBody NatureArticleVO natureArticleVO) {
96 - return natureArticleService.edit(natureArticleVO); 96 + return articleService.edit(natureArticleVO);
97 } 97 }
98 98
99 /** 99 /**
@@ -104,7 +104,7 @@ public class NatureArticleController { @@ -104,7 +104,7 @@ public class NatureArticleController {
104 */ 104 */
105 @PostMapping("/delete_by_id") 105 @PostMapping("/delete_by_id")
106 public ServerResult deleteById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) { 106 public ServerResult deleteById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) {
107 - return natureArticleService.deleteById(natureArticleQueryVO); 107 + return articleService.deleteById(natureArticleQueryVO);
108 } 108 }
109 109
110 } 110 }
src/main/java/com/canrd/webmagic/domain/ArticleTypeEnum.java 0 → 100644
  1 +package com.canrd.webmagic.domain;
  2 +
  3 +import lombok.AllArgsConstructor;
  4 +import lombok.Getter;
  5 +import lombok.NoArgsConstructor;
  6 +
  7 +/**
  8 + * @author: xms
  9 + * @description: TODO
  10 + * @date: 2024/4/11 16:52
  11 + * @version: 1.0
  12 + */
  13 +@Getter
  14 +@AllArgsConstructor
  15 +@NoArgsConstructor
  16 +public enum ArticleTypeEnum {
  17 + NATURE("nature", "nature网址"),
  18 + ;
  19 + private String type;
  20 + private String desc;
  21 +
  22 +}
src/main/java/com/canrd/webmagic/domain/dto/NatureArticleDO.java renamed to src/main/java/com/canrd/webmagic/domain/dto/ArticleDO.java
@@ -12,17 +12,23 @@ import java.io.Serializable; @@ -12,17 +12,23 @@ import java.io.Serializable;
12 * @author makejava 12 * @author makejava
13 * @since 2024-04-07 18:39:38 13 * @since 2024-04-07 18:39:38
14 */ 14 */
15 -@TableName("nature_article") 15 +@TableName("article")
16 @Data 16 @Data
17 @AllArgsConstructor 17 @AllArgsConstructor
18 @ToString 18 @ToString
19 @NoArgsConstructor 19 @NoArgsConstructor
20 @EqualsAndHashCode(callSuper = false) 20 @EqualsAndHashCode(callSuper = false)
21 @SuperBuilder 21 @SuperBuilder
22 -public class NatureArticleDO implements Serializable { 22 +public class ArticleDO implements Serializable {
23 private static final long serialVersionUID = 890672868109538541L; 23 private static final long serialVersionUID = 890672868109538541L;
24 24
25 private Long id; 25 private Long id;
  26 +
  27 + /**
  28 + * 文章类型:ArticleTypeEnum
  29 + */
  30 + private String articleType;
  31 +
26 /** 32 /**
27 * 作者名称 33 * 作者名称
28 */ 34 */
@@ -48,4 +54,19 @@ public class NatureArticleDO implements Serializable { @@ -48,4 +54,19 @@ public class NatureArticleDO implements Serializable {
48 */ 54 */
49 private String emailInfo; 55 private String emailInfo;
50 56
  57 + /**
  58 + * 文章摘要
  59 + */
  60 + private String articleDesc;
  61 +
  62 + /**
  63 + * 作者地址
  64 + */
  65 + private String authorAddress;
  66 +
  67 + /**
  68 + * 相关文章引用信息
  69 + */
  70 + private String referenceInfo;
  71 +
51 } 72 }
src/main/java/com/canrd/webmagic/mapper/NatureArticleMapper.java renamed to src/main/java/com/canrd/webmagic/mapper/ArticleMapper.java
1 package com.canrd.webmagic.mapper; 1 package com.canrd.webmagic.mapper;
2 2
3 import com.baomidou.mybatisplus.core.mapper.BaseMapper; 3 import com.baomidou.mybatisplus.core.mapper.BaseMapper;
4 -import com.canrd.webmagic.domain.dto.NatureArticleDO; 4 +import com.canrd.webmagic.domain.dto.ArticleDO;
5 5
6 /** 6 /**
7 * nature-文章信息(NatureArticle)表数据库访问层 7 * nature-文章信息(NatureArticle)表数据库访问层
@@ -9,7 +9,7 @@ import com.canrd.webmagic.domain.dto.NatureArticleDO; @@ -9,7 +9,7 @@ import com.canrd.webmagic.domain.dto.NatureArticleDO;
9 * @author makejava 9 * @author makejava
10 * @since 2024-04-07 18:39:47 10 * @since 2024-04-07 18:39:47
11 */ 11 */
12 -public interface NatureArticleMapper extends BaseMapper<NatureArticleDO> { 12 +public interface ArticleMapper extends BaseMapper<ArticleDO> {
13 13
14 14
15 } 15 }
src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java
@@ -2,10 +2,13 @@ package com.canrd.webmagic.processor; @@ -2,10 +2,13 @@ package com.canrd.webmagic.processor;
2 2
3 import com.alibaba.fastjson.JSONArray; 3 import com.alibaba.fastjson.JSONArray;
4 import com.alibaba.fastjson.JSONObject; 4 import com.alibaba.fastjson.JSONObject;
  5 +import com.baomidou.mybatisplus.core.toolkit.CollectionUtils;
5 import com.canrd.webmagic.common.utils.StringUtils; 6 import com.canrd.webmagic.common.utils.StringUtils;
6 -import com.canrd.webmagic.domain.dto.NatureArticleDO; 7 +import com.canrd.webmagic.domain.ArticleTypeEnum;
  8 +import com.canrd.webmagic.domain.dto.ArticleDO;
7 import com.canrd.webmagic.processor.config.Agent; 9 import com.canrd.webmagic.processor.config.Agent;
8 import com.canrd.webmagic.processor.pipeline.NatureArticlePipeline; 10 import com.canrd.webmagic.processor.pipeline.NatureArticlePipeline;
  11 +import lombok.extern.slf4j.Slf4j;
9 import org.springframework.stereotype.Component; 12 import org.springframework.stereotype.Component;
10 import us.codecraft.webmagic.Page; 13 import us.codecraft.webmagic.Page;
11 import us.codecraft.webmagic.Site; 14 import us.codecraft.webmagic.Site;
@@ -15,8 +18,10 @@ import us.codecraft.webmagic.selector.Html; @@ -15,8 +18,10 @@ import us.codecraft.webmagic.selector.Html;
15 import us.codecraft.webmagic.selector.Selectable; 18 import us.codecraft.webmagic.selector.Selectable;
16 import us.codecraft.webmagic.selector.XpathSelector; 19 import us.codecraft.webmagic.selector.XpathSelector;
17 20
  21 +import java.util.ArrayList;
18 import java.util.List; 22 import java.util.List;
19 import java.util.Objects; 23 import java.util.Objects;
  24 +import java.util.stream.Collectors;
20 25
21 /** 26 /**
22 * @author: xms 27 * @author: xms
@@ -24,12 +29,13 @@ import java.util.Objects; @@ -24,12 +29,13 @@ import java.util.Objects;
24 * @date: 2024/4/1 14:19 29 * @date: 2024/4/1 14:19
25 * @version: 1.0 30 * @version: 1.0
26 */ 31 */
  32 +@Slf4j
27 @Component 33 @Component
28 public class NatureSearchPageProcessor implements PageProcessor { 34 public class NatureSearchPageProcessor implements PageProcessor {
29 private String agent = Agent.getRandom(); 35 private String agent = Agent.getRandom();
30 36
31 // 抓取网站的相关配置,包括编码、抓取间隔、重试次数等 37 // 抓取网站的相关配置,包括编码、抓取间隔、重试次数等
32 - private Site site = Site.me().setRetryTimes(3).setSleepTime(100).setUserAgent(agent); 38 + private Site site = Site.me().setRetryTimes(3).setSleepTime(100);
33 39
34 /** 40 /**
35 * 定制爬虫逻辑的核心接口,在这里编写抽取逻辑 41 * 定制爬虫逻辑的核心接口,在这里编写抽取逻辑
@@ -42,17 +48,16 @@ public class NatureSearchPageProcessor implements PageProcessor { @@ -42,17 +48,16 @@ public class NatureSearchPageProcessor implements PageProcessor {
42 doArticleList(page); 48 doArticleList(page);
43 } else if (page.getUrl().get().contains("research-articles")) { 49 } else if (page.getUrl().get().contains("research-articles")) {
44 doArticleList4ReSearch(page); 50 doArticleList4ReSearch(page);
45 - }else { 51 + } else {
46 doArticleContent(page); 52 doArticleContent(page);
47 } 53 }
48 54
49 } 55 }
50 56
51 /** 57 /**
52 - *  
53 * @param page 58 * @param page
54 */ 59 */
55 - private void doArticleList4ReSearch(Page page){ 60 + private void doArticleList4ReSearch(Page page) {
56 String url = page.getUrl().get(); 61 String url = page.getUrl().get();
57 String[] split = url.split("="); 62 String[] split = url.split("=");
58 Integer pageIndex = Integer.parseInt(split[split.length - 1]); 63 Integer pageIndex = Integer.parseInt(split[split.length - 1]);
@@ -79,17 +84,23 @@ public class NatureSearchPageProcessor implements PageProcessor { @@ -79,17 +84,23 @@ public class NatureSearchPageProcessor implements PageProcessor {
79 } 84 }
80 85
81 private void doArticleContent(Page page) { 86 private void doArticleContent(Page page) {
  87 + if (page.getUrl().get().contains("redirect") || !page.getUrl().get().contains("nature")) {
  88 + return;
  89 + }
82 //解析页面 90 //解析页面
83 Html html = page.getHtml(); 91 Html html = page.getHtml();
84 String[] urlArr = page.getUrl().get().split("/"); 92 String[] urlArr = page.getUrl().get().split("/");
85 String articleCode = urlArr[urlArr.length - 1]; 93 String articleCode = urlArr[urlArr.length - 1];
86 Selectable headSelectable = html.xpath("//div[@class='c-article-header']/header"); 94 Selectable headSelectable = html.xpath("//div[@class='c-article-header']/header");
87 List<Selectable> authorEmailSelectables = html.xpath("//p[@id='corresponding-author-list']/a").nodes(); 95 List<Selectable> authorEmailSelectables = html.xpath("//p[@id='corresponding-author-list']/a").nodes();
  96 + Selectable referencesSelectable = html.xpath("//ol[@class='c-article-references']").select(new XpathSelector("li[@class='c-article-references__item js-c-reading-companion-references-item']"));
  97 + Selectable authorAddressSelectable = html.xpath("//ol[@class='c-article-author-affiliation__list']").select(new XpathSelector("li"));
88 98
89 String title = headSelectable.xpath("//div/h1/text()").get(); 99 String title = headSelectable.xpath("//div/h1/text()").get();
90 if (StringUtils.isBlank(title)) { 100 if (StringUtils.isBlank(title)) {
91 title = headSelectable.xpath("//h1/text()").get(); 101 title = headSelectable.xpath("//h1/text()").get();
92 } 102 }
  103 + String articleDesc = html.xpath("//div[@class='c-article-section__content']/p/text()").get();
93 String publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(2).xpath("//li/time/text()").get(); 104 String publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(2).xpath("//li/time/text()").get();
94 Selectable authorSelectable = headSelectable.xpath("//ul").nodes().get(1).select(new XpathSelector("li[@class='c-article-author-list__item']")); 105 Selectable authorSelectable = headSelectable.xpath("//ul").nodes().get(1).select(new XpathSelector("li[@class='c-article-author-list__item']"));
95 List<Selectable> authorNodes = authorSelectable.nodes(); 106 List<Selectable> authorNodes = authorSelectable.nodes();
@@ -97,7 +108,41 @@ public class NatureSearchPageProcessor implements PageProcessor { @@ -97,7 +108,41 @@ public class NatureSearchPageProcessor implements PageProcessor {
97 for (Selectable node : authorNodes) { 108 for (Selectable node : authorNodes) {
98 authorName.append(node.xpath("//a/text()")); 109 authorName.append(node.xpath("//a/text()"));
99 } 110 }
100 - JSONArray array = new JSONArray(); 111 +
  112 + JSONArray authorAddress = new JSONArray();
  113 + List<Selectable> authorAddressList = authorAddressSelectable.nodes();
  114 + if (CollectionUtils.isNotEmpty(authorAddressList)) {
  115 + for (Selectable selectable : authorAddressList) {
  116 + String address = selectable.xpath("//p").xpath("//p[@class='c-article-author-affiliation__address']/text()").get();
  117 + String authorNames = selectable.xpath("//p").xpath("//p[@class='c-article-author-affiliation__authors-list']/text()").get();
  118 + JSONObject object = new JSONObject();
  119 + object.put("address", address);
  120 + object.put("authorNames", authorNames);
  121 + authorAddress.add(object);
  122 + }
  123 + }
  124 +
  125 + JSONArray references = new JSONArray();
  126 + List<Selectable> referenceList = referencesSelectable.nodes();
  127 + if (CollectionUtils.isNotEmpty(referenceList)) {
  128 + for (Selectable reference : referenceList) {
  129 + String referenceTitle = reference.xpath("//p").xpath("//p[@class='c-article-references__text']/text()").get();
  130 + List<Selectable> referenceLinks = reference.xpath("//p").xpath("//p[@class='c-article-references__links u-hide-print']").links().nodes();
  131 + List<String> links = new ArrayList<>();
  132 + if (CollectionUtils.isNotEmpty(referenceLinks)) {
  133 + links = referenceLinks.stream().map(x -> x.get()).collect(Collectors.toList());
  134 + }
  135 + JSONObject object = new JSONObject();
  136 + object.put("referenceTitle", referenceTitle);
  137 + object.put("links", links);
  138 + if (CollectionUtils.isNotEmpty(links)) {
  139 + page.addTargetRequests(links.stream().filter(x -> x.contains("nature")).collect(Collectors.toList()));
  140 + }
  141 + references.add(object);
  142 + }
  143 + }
  144 +
  145 + JSONArray authorEmail = new JSONArray();
101 for (Selectable authorEmailSelectable : authorEmailSelectables) { 146 for (Selectable authorEmailSelectable : authorEmailSelectables) {
102 String[] split = authorEmailSelectable.xpath("//a").links().get().split(":"); 147 String[] split = authorEmailSelectable.xpath("//a").links().get().split(":");
103 String email = Objects.isNull(split) ? "" : split[split.length - 1]; 148 String email = Objects.isNull(split) ? "" : split[split.length - 1];
@@ -105,16 +150,21 @@ public class NatureSearchPageProcessor implements PageProcessor { @@ -105,16 +150,21 @@ public class NatureSearchPageProcessor implements PageProcessor {
105 JSONObject jsonObject = new JSONObject(); 150 JSONObject jsonObject = new JSONObject();
106 jsonObject.put("authorEmailName", authorEmailName); 151 jsonObject.put("authorEmailName", authorEmailName);
107 jsonObject.put("email", email); 152 jsonObject.put("email", email);
108 - array.add(jsonObject); 153 + authorEmail.add(jsonObject);
109 } 154 }
110 - System.out.println("code:" + articleCode + ",发布时间:" + publishTime + ",标题:" + title + ",作者:" + authorName + ",邮箱信息:" + array.toJSONString()); 155 + System.out.println("code:" + articleCode + ",发布时间:" + publishTime + ",标题:" + title + ",作者:" + authorName + ",邮箱信息:" + authorEmail.toJSONString());
  156 +
111 157
112 - page.putField("article", NatureArticleDO.builder() 158 + page.putField("article", ArticleDO.builder()
  159 + .articleType(ArticleTypeEnum.NATURE.getType())
113 .articleCode(articleCode) 160 .articleCode(articleCode)
114 .authorName(authorName.toString()) 161 .authorName(authorName.toString())
115 .title(title) 162 .title(title)
116 .publishTime(publishTime) 163 .publishTime(publishTime)
117 - .emailInfo(array.toJSONString()).build()); 164 + .emailInfo(authorEmail.toJSONString())
  165 + .articleDesc(articleDesc)
  166 + .authorAddress(authorAddress.toJSONString())
  167 + .referenceInfo(references.toJSONString()).build());
118 } 168 }
119 169
120 private void doArticleList(Page page) { 170 private void doArticleList(Page page) {
@@ -152,7 +202,7 @@ public class NatureSearchPageProcessor implements PageProcessor { @@ -152,7 +202,7 @@ public class NatureSearchPageProcessor implements PageProcessor {
152 // 创建一个Spider,并把我们的处理器放进去 202 // 创建一个Spider,并把我们的处理器放进去
153 Spider.create(new NatureSearchPageProcessor()) 203 Spider.create(new NatureSearchPageProcessor())
154 // 添加这个Spider要爬取的网页地址 204 // 添加这个Spider要爬取的网页地址
155 - .addUrl("https://www.nature.com/search?q=battery&page=1") 205 + .addUrl("https://www.nature.com/nature/research-articles?sort=PubDate&page=1")
156 .addPipeline(new NatureArticlePipeline()) 206 .addPipeline(new NatureArticlePipeline())
157 // 开启5个线程执行,并开始爬取 207 // 开启5个线程执行,并开始爬取
158 .thread(5).run(); 208 .thread(5).run();
src/main/java/com/canrd/webmagic/processor/config/UpdateIp.java
@@ -31,7 +31,7 @@ public class UpdateIp { @@ -31,7 +31,7 @@ public class UpdateIp {
31 @Autowired 31 @Autowired
32 private RedisTemplate redisTemplate; 32 private RedisTemplate redisTemplate;
33 33
34 - @Scheduled(cron = "*/20 * * * * ?") 34 +// @Scheduled(cron = "*/20 * * * * ?")
35 void update() { 35 void update() {
36 List<String> range = redisTemplate.opsForList().range("ip", 0, -1); 36 List<String> range = redisTemplate.opsForList().range("ip", 0, -1);
37 for (String ip : range) { 37 for (String ip : range) {
@@ -42,7 +42,7 @@ public class UpdateIp { @@ -42,7 +42,7 @@ public class UpdateIp {
42 } 42 }
43 } 43 }
44 44
45 - @Scheduled(cron = "*/15 * * * * ?") 45 +// @Scheduled(cron = "*/15 * * * * ?")
46 void ips() { 46 void ips() {
47 String string = null; 47 String string = null;
48 try { 48 try {
src/main/java/com/canrd/webmagic/processor/pipeline/NatureArticlePipeline.java
1 package com.canrd.webmagic.processor.pipeline; 1 package com.canrd.webmagic.processor.pipeline;
2 2
3 import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; 3 import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
4 -import com.canrd.webmagic.domain.dto.NatureArticleDO;  
5 -import com.canrd.webmagic.service.NatureArticleService; 4 +import com.baomidou.mybatisplus.core.toolkit.CollectionUtils;
  5 +import com.canrd.webmagic.domain.dto.ArticleDO;
  6 +import com.canrd.webmagic.service.ArticleService;
6 import org.springframework.beans.factory.annotation.Autowired; 7 import org.springframework.beans.factory.annotation.Autowired;
7 import org.springframework.stereotype.Component; 8 import org.springframework.stereotype.Component;
8 import us.codecraft.webmagic.ResultItems; 9 import us.codecraft.webmagic.ResultItems;
9 import us.codecraft.webmagic.Task; 10 import us.codecraft.webmagic.Task;
10 import us.codecraft.webmagic.pipeline.Pipeline; 11 import us.codecraft.webmagic.pipeline.Pipeline;
11 12
  13 +import java.util.List;
12 import java.util.Objects; 14 import java.util.Objects;
13 15
14 @Component 16 @Component
15 public class NatureArticlePipeline implements Pipeline { 17 public class NatureArticlePipeline implements Pipeline {
16 18
17 - private NatureArticleService natureArticleService; 19 + private ArticleService articleService;
18 20
19 @Autowired 21 @Autowired
20 - public void setNatureArticleService(NatureArticleService natureArticleService) {  
21 - this.natureArticleService = natureArticleService; 22 + public void setNatureArticleService(ArticleService articleService) {
  23 + this.articleService = articleService;
22 } 24 }
23 25
24 @Override 26 @Override
25 public void process(ResultItems resultItems, Task task) { 27 public void process(ResultItems resultItems, Task task) {
26 - NatureArticleDO articleDO = resultItems.get("article"); 28 + ArticleDO articleDO = resultItems.get("article");
27 if (Objects.nonNull(articleDO)) { 29 if (Objects.nonNull(articleDO)) {
28 - NatureArticleDO natureArticleDO = natureArticleService.getOne(new LambdaQueryWrapper<NatureArticleDO>().eq(NatureArticleDO::getArticleCode, articleDO.getArticleCode()));  
29 - if (Objects.nonNull(natureArticleDO)) { 30 + List<ArticleDO> natureArticleDO = articleService.list(new LambdaQueryWrapper<ArticleDO>().eq(ArticleDO::getArticleCode, articleDO.getArticleCode()));
  31 + if (CollectionUtils.isNotEmpty(natureArticleDO)) {
30 return; 32 return;
31 } 33 }
32 - natureArticleService.save(articleDO); 34 + articleService.save(articleDO);
33 } 35 }
34 } 36 }
35 } 37 }
src/main/java/com/canrd/webmagic/service/NatureArticleService.java renamed to src/main/java/com/canrd/webmagic/service/ArticleService.java
@@ -2,7 +2,7 @@ package com.canrd.webmagic.service; @@ -2,7 +2,7 @@ package com.canrd.webmagic.service;
2 2
3 import com.baomidou.mybatisplus.extension.service.IService; 3 import com.baomidou.mybatisplus.extension.service.IService;
4 import com.canrd.webmagic.common.constant.ServerResult; 4 import com.canrd.webmagic.common.constant.ServerResult;
5 -import com.canrd.webmagic.domain.dto.NatureArticleDO; 5 +import com.canrd.webmagic.domain.dto.ArticleDO;
6 import com.canrd.webmagic.domain.vo.NatureArticleQueryVO; 6 import com.canrd.webmagic.domain.vo.NatureArticleQueryVO;
7 import com.canrd.webmagic.domain.vo.NatureArticleVO; 7 import com.canrd.webmagic.domain.vo.NatureArticleVO;
8 8
@@ -12,7 +12,7 @@ import com.canrd.webmagic.domain.vo.NatureArticleVO; @@ -12,7 +12,7 @@ import com.canrd.webmagic.domain.vo.NatureArticleVO;
12 * @author makejava 12 * @author makejava
13 * @since 2024-04-07 18:39:48 13 * @since 2024-04-07 18:39:48
14 */ 14 */
15 -public interface NatureArticleService extends IService<NatureArticleDO> { 15 +public interface ArticleService extends IService<ArticleDO> {
16 16
17 /** 17 /**
18 * 通过ID查询单条数据 18 * 通过ID查询单条数据
src/main/java/com/canrd/webmagic/service/impl/NatureArticleServiceImpl.java renamed to src/main/java/com/canrd/webmagic/service/impl/ArticleServiceImpl.java
@@ -5,11 +5,11 @@ import cn.hutool.core.collection.CollUtil; @@ -5,11 +5,11 @@ import cn.hutool.core.collection.CollUtil;
5 import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; 5 import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
6 import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; 6 import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
7 import com.canrd.webmagic.common.constant.ServerResult; 7 import com.canrd.webmagic.common.constant.ServerResult;
8 -import com.canrd.webmagic.domain.dto.NatureArticleDO; 8 +import com.canrd.webmagic.domain.dto.ArticleDO;
9 import com.canrd.webmagic.domain.vo.NatureArticleQueryVO; 9 import com.canrd.webmagic.domain.vo.NatureArticleQueryVO;
10 import com.canrd.webmagic.domain.vo.NatureArticleVO; 10 import com.canrd.webmagic.domain.vo.NatureArticleVO;
11 -import com.canrd.webmagic.mapper.NatureArticleMapper;  
12 -import com.canrd.webmagic.service.NatureArticleService; 11 +import com.canrd.webmagic.mapper.ArticleMapper;
  12 +import com.canrd.webmagic.service.ArticleService;
13 import lombok.extern.slf4j.Slf4j; 13 import lombok.extern.slf4j.Slf4j;
14 import org.springframework.stereotype.Service; 14 import org.springframework.stereotype.Service;
15 15
@@ -24,7 +24,7 @@ import java.util.Objects; @@ -24,7 +24,7 @@ import java.util.Objects;
24 */ 24 */
25 @Slf4j 25 @Slf4j
26 @Service 26 @Service
27 -public class NatureArticleServiceImpl extends ServiceImpl<NatureArticleMapper, NatureArticleDO> implements NatureArticleService { 27 +public class ArticleServiceImpl extends ServiceImpl<ArticleMapper, ArticleDO> implements ArticleService {
28 28
29 29
30 /** 30 /**
@@ -39,11 +39,11 @@ public class NatureArticleServiceImpl extends ServiceImpl&lt;NatureArticleMapper, N @@ -39,11 +39,11 @@ public class NatureArticleServiceImpl extends ServiceImpl&lt;NatureArticleMapper, N
39 if (Objects.isNull(natureArticleQueryVO.getId())) { 39 if (Objects.isNull(natureArticleQueryVO.getId())) {
40 return ServerResult.fail("id 不能为空"); 40 return ServerResult.fail("id 不能为空");
41 } 41 }
42 - NatureArticleDO NatureArticleDo = getById(natureArticleQueryVO.getId());  
43 - if (Objects.isNull(NatureArticleDo)) { 42 + ArticleDO articleDo = getById(natureArticleQueryVO.getId());
  43 + if (Objects.isNull(articleDo)) {
44 return ServerResult.success(null); 44 return ServerResult.success(null);
45 } 45 }
46 - return ServerResult.success(BeanUtil.copyProperties(NatureArticleDo, NatureArticleVO.class)); 46 + return ServerResult.success(BeanUtil.copyProperties(articleDo, NatureArticleVO.class));
47 } 47 }
48 48
49 /** 49 /**
@@ -69,9 +69,9 @@ public class NatureArticleServiceImpl extends ServiceImpl&lt;NatureArticleMapper, N @@ -69,9 +69,9 @@ public class NatureArticleServiceImpl extends ServiceImpl&lt;NatureArticleMapper, N
69 if (Objects.nonNull(natureArticleVO.getId())) { 69 if (Objects.nonNull(natureArticleVO.getId())) {
70 natureArticleVO.setId(null); 70 natureArticleVO.setId(null);
71 } 71 }
72 - NatureArticleDO natureArticleDo = BeanUtil.copyProperties(natureArticleVO, NatureArticleDO.class); 72 + ArticleDO articleDo = BeanUtil.copyProperties(natureArticleVO, ArticleDO.class);
73 73
74 - save(natureArticleDo); 74 + save(articleDo);
75 75
76 return ServerResult.success(); 76 return ServerResult.success();
77 } 77 }
@@ -88,9 +88,9 @@ public class NatureArticleServiceImpl extends ServiceImpl&lt;NatureArticleMapper, N @@ -88,9 +88,9 @@ public class NatureArticleServiceImpl extends ServiceImpl&lt;NatureArticleMapper, N
88 if (Objects.isNull(natureArticleVO.getId())) { 88 if (Objects.isNull(natureArticleVO.getId())) {
89 return ServerResult.fail("id 不能为空"); 89 return ServerResult.fail("id 不能为空");
90 } 90 }
91 - NatureArticleDO natureArticleDo = BeanUtil.copyProperties(natureArticleVO, NatureArticleDO.class); 91 + ArticleDO articleDo = BeanUtil.copyProperties(natureArticleVO, ArticleDO.class);
92 92
93 - updateById(natureArticleDo); 93 + updateById(articleDo);
94 94
95 return ServerResult.success(); 95 return ServerResult.success();
96 } 96 }
@@ -107,13 +107,13 @@ public class NatureArticleServiceImpl extends ServiceImpl&lt;NatureArticleMapper, N @@ -107,13 +107,13 @@ public class NatureArticleServiceImpl extends ServiceImpl&lt;NatureArticleMapper, N
107 if (CollUtil.isEmpty(ids)) { 107 if (CollUtil.isEmpty(ids)) {
108 return ServerResult.fail("ids 参数不能为空"); 108 return ServerResult.fail("ids 参数不能为空");
109 } 109 }
110 - List<NatureArticleDO> natureArticleList = listByIds(ids); 110 + List<ArticleDO> natureArticleList = listByIds(ids);
111 if (CollUtil.isEmpty(natureArticleList)) { 111 if (CollUtil.isEmpty(natureArticleList)) {
112 return ServerResult.success(); 112 return ServerResult.success();
113 } 113 }
114 //todo 校验是否可以逻辑删除 114 //todo 校验是否可以逻辑删除
115 - LambdaUpdateWrapper<NatureArticleDO> updateWrapper = new LambdaUpdateWrapper<NatureArticleDO>()  
116 - .in(NatureArticleDO::getId, ids); 115 + LambdaUpdateWrapper<ArticleDO> updateWrapper = new LambdaUpdateWrapper<ArticleDO>()
  116 + .in(ArticleDO::getId, ids);
117 update(updateWrapper); 117 update(updateWrapper);
118 return ServerResult.success(); 118 return ServerResult.success();
119 } 119 }
src/test/java/com/canrd/webmagic/utils/DateTimeUtilTest.java
@@ -4,8 +4,8 @@ import com.alibaba.fastjson.JSON; @@ -4,8 +4,8 @@ import com.alibaba.fastjson.JSON;
4 import com.alibaba.fastjson.JSONArray; 4 import com.alibaba.fastjson.JSONArray;
5 import com.alibaba.fastjson.JSONObject; 5 import com.alibaba.fastjson.JSONObject;
6 import com.canrd.webmagic.BaseTest; 6 import com.canrd.webmagic.BaseTest;
7 -import com.canrd.webmagic.domain.dto.NatureArticleDO;  
8 -import com.canrd.webmagic.service.NatureArticleService; 7 +import com.canrd.webmagic.domain.dto.ArticleDO;
  8 +import com.canrd.webmagic.service.ArticleService;
9 import org.junit.Test; 9 import org.junit.Test;
10 10
11 import javax.annotation.Resource; 11 import javax.annotation.Resource;
@@ -20,13 +20,13 @@ import java.util.List; @@ -20,13 +20,13 @@ import java.util.List;
20 public class DateTimeUtilTest extends BaseTest { 20 public class DateTimeUtilTest extends BaseTest {
21 21
22 @Resource 22 @Resource
23 - private NatureArticleService natureArticleService; 23 + private ArticleService articleService;
24 24
25 @Test 25 @Test
26 public void export() { 26 public void export() {
27 - List<NatureArticleDO> articleDOList = natureArticleService.list(); 27 + List<ArticleDO> articleDOList = articleService.list();
28 JSONArray array = new JSONArray(); 28 JSONArray array = new JSONArray();
29 - for (NatureArticleDO articleDO : articleDOList) { 29 + for (ArticleDO articleDO : articleDOList) {
30 JSONArray jsonArray = JSONArray.parseArray(articleDO.getEmailInfo()); 30 JSONArray jsonArray = JSONArray.parseArray(articleDO.getEmailInfo());
31 array.addAll(jsonArray); 31 array.addAll(jsonArray);
32 } 32 }