Commit 432582f2e4750ca22442326788bab2419a94e1e1

Authored by 谢茂盛
1 parent a2f96c4a

feat:

1、selenium 整合
2、science 网站
src/main/java/com/canrd/webmagic/controller/ArticleController.java
1 package com.canrd.webmagic.controller; 1 package com.canrd.webmagic.controller;
2 2
3 import com.canrd.webmagic.common.constant.ServerResult; 3 import com.canrd.webmagic.common.constant.ServerResult;
4 -import com.canrd.webmagic.common.jsr303.OperateGroup;  
5 -import com.canrd.webmagic.domain.vo.NatureArticleQueryVO;  
6 -import com.canrd.webmagic.domain.vo.NatureArticleVO;  
7 import com.canrd.webmagic.processor.NatureSearchPageProcessor; 4 import com.canrd.webmagic.processor.NatureSearchPageProcessor;
8 import com.canrd.webmagic.processor.download.Downloader; 5 import com.canrd.webmagic.processor.download.Downloader;
9 import com.canrd.webmagic.processor.pipeline.ArticlePipeline; 6 import com.canrd.webmagic.processor.pipeline.ArticlePipeline;
10 -import com.canrd.webmagic.service.ArticleService;  
11 -import org.springframework.validation.annotation.Validated;  
12 -import org.springframework.web.bind.annotation.*; 7 +import org.springframework.web.bind.annotation.GetMapping;
  8 +import org.springframework.web.bind.annotation.RequestMapping;
  9 +import org.springframework.web.bind.annotation.RequestParam;
  10 +import org.springframework.web.bind.annotation.RestController;
13 import us.codecraft.webmagic.Spider; 11 import us.codecraft.webmagic.Spider;
14 12
15 import javax.annotation.Resource; 13 import javax.annotation.Resource;
@@ -23,11 +21,6 @@ import javax.annotation.Resource; @@ -23,11 +21,6 @@ import javax.annotation.Resource;
23 @RestController 21 @RestController
24 @RequestMapping("/nature/article") 22 @RequestMapping("/nature/article")
25 public class ArticleController { 23 public class ArticleController {
26 - /**  
27 - * 服务对象  
28 - */  
29 - @Resource  
30 - private ArticleService articleService;  
31 24
32 @Resource 25 @Resource
33 private NatureSearchPageProcessor natureSearchPageProcessor; 26 private NatureSearchPageProcessor natureSearchPageProcessor;
@@ -45,8 +38,6 @@ public class ArticleController { @@ -45,8 +38,6 @@ public class ArticleController {
45 public ServerResult start(@RequestParam(value = "keyword") String keyword, @RequestParam(value = "indexSize") Integer indexSize) { 38 public ServerResult start(@RequestParam(value = "keyword") String keyword, @RequestParam(value = "indexSize") Integer indexSize) {
46 for (int i = 1; i <= indexSize; i++) { 39 for (int i = 1; i <= indexSize; i++) {
47 Spider.create(natureSearchPageProcessor) 40 Spider.create(natureSearchPageProcessor)
48 - // 添加这个Spider要爬取的网页地址  
49 - .addUrl("https://www.nature.com/search?q=" + keyword + "&page=" + i)  
50 .addUrl("https://www.nature.com/nature/research-articles?sort=PubDate&page=" + i) 41 .addUrl("https://www.nature.com/nature/research-articles?sort=PubDate&page=" + i)
51 .addPipeline(articlePipeline) 42 .addPipeline(articlePipeline)
52 // .setDownloader(downloader.newIpDownloader()) 43 // .setDownloader(downloader.newIpDownloader())
@@ -56,61 +47,5 @@ public class ArticleController { @@ -56,61 +47,5 @@ public class ArticleController {
56 47
57 return ServerResult.success(); 48 return ServerResult.success();
58 } 49 }
59 -  
60 - /**  
61 - * 分页查询  
62 - *  
63 - * @param natureArticleQueryVO 查询条件  
64 - * @return 查询结果  
65 - */  
66 - @PostMapping("/list")  
67 - public ServerResult list(@RequestBody @Validated({OperateGroup.List.class}) NatureArticleQueryVO natureArticleQueryVO) {  
68 - return articleService.list(natureArticleQueryVO);  
69 - }  
70 -  
71 - /**  
72 - * 通过主键查询单条数据  
73 - *  
74 - * @param natureArticleQueryVO 查询条件  
75 - * @return 单条数据  
76 - */  
77 - @PostMapping("/query_by_id")  
78 - public ServerResult queryById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) {  
79 - return articleService.queryById(natureArticleQueryVO);  
80 - }  
81 -  
82 - /**  
83 - * 新增数据  
84 - *  
85 - * @param natureArticleVO 数据VO  
86 - * @return 新增结果  
87 - */  
88 - @PostMapping("/add")  
89 - public ServerResult add(@RequestBody NatureArticleVO natureArticleVO) {  
90 - return articleService.add(natureArticleVO);  
91 - }  
92 -  
93 - /**  
94 - * 编辑数据  
95 - *  
96 - * @param natureArticleVO 数据VO  
97 - * @return 编辑结果  
98 - */  
99 - @PostMapping("/edit")  
100 - public ServerResult edit(@RequestBody NatureArticleVO natureArticleVO) {  
101 - return articleService.edit(natureArticleVO);  
102 - }  
103 -  
104 - /**  
105 - * 删除数据  
106 - *  
107 - * @param natureArticleQueryVO 查询条件  
108 - * @return 删除是否成功  
109 - */  
110 - @PostMapping("/delete_by_id")  
111 - public ServerResult deleteById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) {  
112 - return articleService.deleteById(natureArticleQueryVO);  
113 - }  
114 -  
115 } 50 }
116 51
src/main/java/com/canrd/webmagic/controller/Science4JournalController.java
1 package com.canrd.webmagic.controller; 1 package com.canrd.webmagic.controller;
2 2
3 import com.canrd.webmagic.common.constant.ServerResult; 3 import com.canrd.webmagic.common.constant.ServerResult;
4 -import com.canrd.webmagic.common.jsr303.OperateGroup;  
5 -import com.canrd.webmagic.domain.vo.NatureArticleQueryVO;  
6 -import com.canrd.webmagic.domain.vo.NatureArticleVO;  
7 import com.canrd.webmagic.processor.Science4JournalSearchPageProcessor; 4 import com.canrd.webmagic.processor.Science4JournalSearchPageProcessor;
8 import com.canrd.webmagic.processor.download.SeleniumDownloader; 5 import com.canrd.webmagic.processor.download.SeleniumDownloader;
9 -import com.canrd.webmagic.service.ArticleService;  
10 import org.apache.logging.log4j.core.util.UuidUtil; 6 import org.apache.logging.log4j.core.util.UuidUtil;
11 -import org.springframework.validation.annotation.Validated;  
12 -import org.springframework.web.bind.annotation.*; 7 +import org.springframework.web.bind.annotation.GetMapping;
  8 +import org.springframework.web.bind.annotation.RequestMapping;
  9 +import org.springframework.web.bind.annotation.RequestParam;
  10 +import org.springframework.web.bind.annotation.RestController;
13 import us.codecraft.webmagic.Spider; 11 import us.codecraft.webmagic.Spider;
14 12
15 import javax.annotation.Resource; 13 import javax.annotation.Resource;
@@ -23,11 +21,6 @@ import javax.annotation.Resource; @@ -23,11 +21,6 @@ import javax.annotation.Resource;
23 @RestController 21 @RestController
24 @RequestMapping("/science/journal") 22 @RequestMapping("/science/journal")
25 public class Science4JournalController { 23 public class Science4JournalController {
26 - /**  
27 - * 服务对象  
28 - */  
29 - @Resource  
30 - private ArticleService articleService;  
31 24
32 @Resource 25 @Resource
33 private Science4JournalSearchPageProcessor science4JournalSearchPageProcessor; 26 private Science4JournalSearchPageProcessor science4JournalSearchPageProcessor;
@@ -52,60 +45,6 @@ public class Science4JournalController { @@ -52,60 +45,6 @@ public class Science4JournalController {
52 return ServerResult.success(); 45 return ServerResult.success();
53 } 46 }
54 47
55 - /**  
56 - * 分页查询  
57 - *  
58 - * @param natureArticleQueryVO 查询条件  
59 - * @return 查询结果  
60 - */  
61 - @PostMapping("/list")  
62 - public ServerResult list(@RequestBody @Validated({OperateGroup.List.class}) NatureArticleQueryVO natureArticleQueryVO) {  
63 - return articleService.list(natureArticleQueryVO);  
64 - }  
65 -  
66 - /**  
67 - * 通过主键查询单条数据  
68 - *  
69 - * @param natureArticleQueryVO 查询条件  
70 - * @return 单条数据  
71 - */  
72 - @PostMapping("/query_by_id")  
73 - public ServerResult queryById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) {  
74 - return articleService.queryById(natureArticleQueryVO);  
75 - }  
76 -  
77 - /**  
78 - * 新增数据  
79 - *  
80 - * @param natureArticleVO 数据VO  
81 - * @return 新增结果  
82 - */  
83 - @PostMapping("/add")  
84 - public ServerResult add(@RequestBody NatureArticleVO natureArticleVO) {  
85 - return articleService.add(natureArticleVO);  
86 - }  
87 -  
88 - /**  
89 - * 编辑数据  
90 - *  
91 - * @param natureArticleVO 数据VO  
92 - * @return 编辑结果  
93 - */  
94 - @PostMapping("/edit")  
95 - public ServerResult edit(@RequestBody NatureArticleVO natureArticleVO) {  
96 - return articleService.edit(natureArticleVO);  
97 - }  
98 -  
99 - /**  
100 - * 删除数据  
101 - *  
102 - * @param natureArticleQueryVO 查询条件  
103 - * @return 删除是否成功  
104 - */  
105 - @PostMapping("/delete_by_id")  
106 - public ServerResult deleteById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) {  
107 - return articleService.deleteById(natureArticleQueryVO);  
108 - }  
109 48
110 } 49 }
111 50