Commit 432582f2e4750ca22442326788bab2419a94e1e1
1 parent
a2f96c4a
feat:
1、selenium 整合 2、science 网站
Showing
2 changed files
with
8 additions
and
134 deletions
src/main/java/com/canrd/webmagic/controller/ArticleController.java
1 | package com.canrd.webmagic.controller; | 1 | package com.canrd.webmagic.controller; |
2 | 2 | ||
3 | import com.canrd.webmagic.common.constant.ServerResult; | 3 | import com.canrd.webmagic.common.constant.ServerResult; |
4 | -import com.canrd.webmagic.common.jsr303.OperateGroup; | ||
5 | -import com.canrd.webmagic.domain.vo.NatureArticleQueryVO; | ||
6 | -import com.canrd.webmagic.domain.vo.NatureArticleVO; | ||
7 | import com.canrd.webmagic.processor.NatureSearchPageProcessor; | 4 | import com.canrd.webmagic.processor.NatureSearchPageProcessor; |
8 | import com.canrd.webmagic.processor.download.Downloader; | 5 | import com.canrd.webmagic.processor.download.Downloader; |
9 | import com.canrd.webmagic.processor.pipeline.ArticlePipeline; | 6 | import com.canrd.webmagic.processor.pipeline.ArticlePipeline; |
10 | -import com.canrd.webmagic.service.ArticleService; | ||
11 | -import org.springframework.validation.annotation.Validated; | ||
12 | -import org.springframework.web.bind.annotation.*; | 7 | +import org.springframework.web.bind.annotation.GetMapping; |
8 | +import org.springframework.web.bind.annotation.RequestMapping; | ||
9 | +import org.springframework.web.bind.annotation.RequestParam; | ||
10 | +import org.springframework.web.bind.annotation.RestController; | ||
13 | import us.codecraft.webmagic.Spider; | 11 | import us.codecraft.webmagic.Spider; |
14 | 12 | ||
15 | import javax.annotation.Resource; | 13 | import javax.annotation.Resource; |
@@ -23,11 +21,6 @@ import javax.annotation.Resource; | @@ -23,11 +21,6 @@ import javax.annotation.Resource; | ||
23 | @RestController | 21 | @RestController |
24 | @RequestMapping("/nature/article") | 22 | @RequestMapping("/nature/article") |
25 | public class ArticleController { | 23 | public class ArticleController { |
26 | - /** | ||
27 | - * 服务对象 | ||
28 | - */ | ||
29 | - @Resource | ||
30 | - private ArticleService articleService; | ||
31 | 24 | ||
32 | @Resource | 25 | @Resource |
33 | private NatureSearchPageProcessor natureSearchPageProcessor; | 26 | private NatureSearchPageProcessor natureSearchPageProcessor; |
@@ -45,8 +38,6 @@ public class ArticleController { | @@ -45,8 +38,6 @@ public class ArticleController { | ||
45 | public ServerResult start(@RequestParam(value = "keyword") String keyword, @RequestParam(value = "indexSize") Integer indexSize) { | 38 | public ServerResult start(@RequestParam(value = "keyword") String keyword, @RequestParam(value = "indexSize") Integer indexSize) { |
46 | for (int i = 1; i <= indexSize; i++) { | 39 | for (int i = 1; i <= indexSize; i++) { |
47 | Spider.create(natureSearchPageProcessor) | 40 | Spider.create(natureSearchPageProcessor) |
48 | - // 添加这个Spider要爬取的网页地址 | ||
49 | - .addUrl("https://www.nature.com/search?q=" + keyword + "&page=" + i) | ||
50 | .addUrl("https://www.nature.com/nature/research-articles?sort=PubDate&page=" + i) | 41 | .addUrl("https://www.nature.com/nature/research-articles?sort=PubDate&page=" + i) |
51 | .addPipeline(articlePipeline) | 42 | .addPipeline(articlePipeline) |
52 | // .setDownloader(downloader.newIpDownloader()) | 43 | // .setDownloader(downloader.newIpDownloader()) |
@@ -56,61 +47,5 @@ public class ArticleController { | @@ -56,61 +47,5 @@ public class ArticleController { | ||
56 | 47 | ||
57 | return ServerResult.success(); | 48 | return ServerResult.success(); |
58 | } | 49 | } |
59 | - | ||
60 | - /** | ||
61 | - * 分页查询 | ||
62 | - * | ||
63 | - * @param natureArticleQueryVO 查询条件 | ||
64 | - * @return 查询结果 | ||
65 | - */ | ||
66 | - @PostMapping("/list") | ||
67 | - public ServerResult list(@RequestBody @Validated({OperateGroup.List.class}) NatureArticleQueryVO natureArticleQueryVO) { | ||
68 | - return articleService.list(natureArticleQueryVO); | ||
69 | - } | ||
70 | - | ||
71 | - /** | ||
72 | - * 通过主键查询单条数据 | ||
73 | - * | ||
74 | - * @param natureArticleQueryVO 查询条件 | ||
75 | - * @return 单条数据 | ||
76 | - */ | ||
77 | - @PostMapping("/query_by_id") | ||
78 | - public ServerResult queryById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) { | ||
79 | - return articleService.queryById(natureArticleQueryVO); | ||
80 | - } | ||
81 | - | ||
82 | - /** | ||
83 | - * 新增数据 | ||
84 | - * | ||
85 | - * @param natureArticleVO 数据VO | ||
86 | - * @return 新增结果 | ||
87 | - */ | ||
88 | - @PostMapping("/add") | ||
89 | - public ServerResult add(@RequestBody NatureArticleVO natureArticleVO) { | ||
90 | - return articleService.add(natureArticleVO); | ||
91 | - } | ||
92 | - | ||
93 | - /** | ||
94 | - * 编辑数据 | ||
95 | - * | ||
96 | - * @param natureArticleVO 数据VO | ||
97 | - * @return 编辑结果 | ||
98 | - */ | ||
99 | - @PostMapping("/edit") | ||
100 | - public ServerResult edit(@RequestBody NatureArticleVO natureArticleVO) { | ||
101 | - return articleService.edit(natureArticleVO); | ||
102 | - } | ||
103 | - | ||
104 | - /** | ||
105 | - * 删除数据 | ||
106 | - * | ||
107 | - * @param natureArticleQueryVO 查询条件 | ||
108 | - * @return 删除是否成功 | ||
109 | - */ | ||
110 | - @PostMapping("/delete_by_id") | ||
111 | - public ServerResult deleteById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) { | ||
112 | - return articleService.deleteById(natureArticleQueryVO); | ||
113 | - } | ||
114 | - | ||
115 | } | 50 | } |
116 | 51 |
src/main/java/com/canrd/webmagic/controller/Science4JournalController.java
1 | package com.canrd.webmagic.controller; | 1 | package com.canrd.webmagic.controller; |
2 | 2 | ||
3 | import com.canrd.webmagic.common.constant.ServerResult; | 3 | import com.canrd.webmagic.common.constant.ServerResult; |
4 | -import com.canrd.webmagic.common.jsr303.OperateGroup; | ||
5 | -import com.canrd.webmagic.domain.vo.NatureArticleQueryVO; | ||
6 | -import com.canrd.webmagic.domain.vo.NatureArticleVO; | ||
7 | import com.canrd.webmagic.processor.Science4JournalSearchPageProcessor; | 4 | import com.canrd.webmagic.processor.Science4JournalSearchPageProcessor; |
8 | import com.canrd.webmagic.processor.download.SeleniumDownloader; | 5 | import com.canrd.webmagic.processor.download.SeleniumDownloader; |
9 | -import com.canrd.webmagic.service.ArticleService; | ||
10 | import org.apache.logging.log4j.core.util.UuidUtil; | 6 | import org.apache.logging.log4j.core.util.UuidUtil; |
11 | -import org.springframework.validation.annotation.Validated; | ||
12 | -import org.springframework.web.bind.annotation.*; | 7 | +import org.springframework.web.bind.annotation.GetMapping; |
8 | +import org.springframework.web.bind.annotation.RequestMapping; | ||
9 | +import org.springframework.web.bind.annotation.RequestParam; | ||
10 | +import org.springframework.web.bind.annotation.RestController; | ||
13 | import us.codecraft.webmagic.Spider; | 11 | import us.codecraft.webmagic.Spider; |
14 | 12 | ||
15 | import javax.annotation.Resource; | 13 | import javax.annotation.Resource; |
@@ -23,11 +21,6 @@ import javax.annotation.Resource; | @@ -23,11 +21,6 @@ import javax.annotation.Resource; | ||
23 | @RestController | 21 | @RestController |
24 | @RequestMapping("/science/journal") | 22 | @RequestMapping("/science/journal") |
25 | public class Science4JournalController { | 23 | public class Science4JournalController { |
26 | - /** | ||
27 | - * 服务对象 | ||
28 | - */ | ||
29 | - @Resource | ||
30 | - private ArticleService articleService; | ||
31 | 24 | ||
32 | @Resource | 25 | @Resource |
33 | private Science4JournalSearchPageProcessor science4JournalSearchPageProcessor; | 26 | private Science4JournalSearchPageProcessor science4JournalSearchPageProcessor; |
@@ -52,60 +45,6 @@ public class Science4JournalController { | @@ -52,60 +45,6 @@ public class Science4JournalController { | ||
52 | return ServerResult.success(); | 45 | return ServerResult.success(); |
53 | } | 46 | } |
54 | 47 | ||
55 | - /** | ||
56 | - * 分页查询 | ||
57 | - * | ||
58 | - * @param natureArticleQueryVO 查询条件 | ||
59 | - * @return 查询结果 | ||
60 | - */ | ||
61 | - @PostMapping("/list") | ||
62 | - public ServerResult list(@RequestBody @Validated({OperateGroup.List.class}) NatureArticleQueryVO natureArticleQueryVO) { | ||
63 | - return articleService.list(natureArticleQueryVO); | ||
64 | - } | ||
65 | - | ||
66 | - /** | ||
67 | - * 通过主键查询单条数据 | ||
68 | - * | ||
69 | - * @param natureArticleQueryVO 查询条件 | ||
70 | - * @return 单条数据 | ||
71 | - */ | ||
72 | - @PostMapping("/query_by_id") | ||
73 | - public ServerResult queryById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) { | ||
74 | - return articleService.queryById(natureArticleQueryVO); | ||
75 | - } | ||
76 | - | ||
77 | - /** | ||
78 | - * 新增数据 | ||
79 | - * | ||
80 | - * @param natureArticleVO 数据VO | ||
81 | - * @return 新增结果 | ||
82 | - */ | ||
83 | - @PostMapping("/add") | ||
84 | - public ServerResult add(@RequestBody NatureArticleVO natureArticleVO) { | ||
85 | - return articleService.add(natureArticleVO); | ||
86 | - } | ||
87 | - | ||
88 | - /** | ||
89 | - * 编辑数据 | ||
90 | - * | ||
91 | - * @param natureArticleVO 数据VO | ||
92 | - * @return 编辑结果 | ||
93 | - */ | ||
94 | - @PostMapping("/edit") | ||
95 | - public ServerResult edit(@RequestBody NatureArticleVO natureArticleVO) { | ||
96 | - return articleService.edit(natureArticleVO); | ||
97 | - } | ||
98 | - | ||
99 | - /** | ||
100 | - * 删除数据 | ||
101 | - * | ||
102 | - * @param natureArticleQueryVO 查询条件 | ||
103 | - * @return 删除是否成功 | ||
104 | - */ | ||
105 | - @PostMapping("/delete_by_id") | ||
106 | - public ServerResult deleteById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) { | ||
107 | - return articleService.deleteById(natureArticleQueryVO); | ||
108 | - } | ||
109 | 48 | ||
110 | } | 49 | } |
111 | 50 |