Commit 432582f2e4750ca22442326788bab2419a94e1e1
1 parent
a2f96c4a
feat:
1、selenium 整合 2、science 网站
Showing
2 changed files
with
8 additions
and
134 deletions
src/main/java/com/canrd/webmagic/controller/ArticleController.java
1 | 1 | package com.canrd.webmagic.controller; |
2 | 2 | |
3 | 3 | import com.canrd.webmagic.common.constant.ServerResult; |
4 | -import com.canrd.webmagic.common.jsr303.OperateGroup; | |
5 | -import com.canrd.webmagic.domain.vo.NatureArticleQueryVO; | |
6 | -import com.canrd.webmagic.domain.vo.NatureArticleVO; | |
7 | 4 | import com.canrd.webmagic.processor.NatureSearchPageProcessor; |
8 | 5 | import com.canrd.webmagic.processor.download.Downloader; |
9 | 6 | import com.canrd.webmagic.processor.pipeline.ArticlePipeline; |
10 | -import com.canrd.webmagic.service.ArticleService; | |
11 | -import org.springframework.validation.annotation.Validated; | |
12 | -import org.springframework.web.bind.annotation.*; | |
7 | +import org.springframework.web.bind.annotation.GetMapping; | |
8 | +import org.springframework.web.bind.annotation.RequestMapping; | |
9 | +import org.springframework.web.bind.annotation.RequestParam; | |
10 | +import org.springframework.web.bind.annotation.RestController; | |
13 | 11 | import us.codecraft.webmagic.Spider; |
14 | 12 | |
15 | 13 | import javax.annotation.Resource; |
... | ... | @@ -23,11 +21,6 @@ import javax.annotation.Resource; |
23 | 21 | @RestController |
24 | 22 | @RequestMapping("/nature/article") |
25 | 23 | public class ArticleController { |
26 | - /** | |
27 | - * 服务对象 | |
28 | - */ | |
29 | - @Resource | |
30 | - private ArticleService articleService; | |
31 | 24 | |
32 | 25 | @Resource |
33 | 26 | private NatureSearchPageProcessor natureSearchPageProcessor; |
... | ... | @@ -45,8 +38,6 @@ public class ArticleController { |
45 | 38 | public ServerResult start(@RequestParam(value = "keyword") String keyword, @RequestParam(value = "indexSize") Integer indexSize) { |
46 | 39 | for (int i = 1; i <= indexSize; i++) { |
47 | 40 | Spider.create(natureSearchPageProcessor) |
48 | - // 添加这个Spider要爬取的网页地址 | |
49 | - .addUrl("https://www.nature.com/search?q=" + keyword + "&page=" + i) | |
50 | 41 | .addUrl("https://www.nature.com/nature/research-articles?sort=PubDate&page=" + i) |
51 | 42 | .addPipeline(articlePipeline) |
52 | 43 | // .setDownloader(downloader.newIpDownloader()) |
... | ... | @@ -56,61 +47,5 @@ public class ArticleController { |
56 | 47 | |
57 | 48 | return ServerResult.success(); |
58 | 49 | } |
59 | - | |
60 | - /** | |
61 | - * 分页查询 | |
62 | - * | |
63 | - * @param natureArticleQueryVO 查询条件 | |
64 | - * @return 查询结果 | |
65 | - */ | |
66 | - @PostMapping("/list") | |
67 | - public ServerResult list(@RequestBody @Validated({OperateGroup.List.class}) NatureArticleQueryVO natureArticleQueryVO) { | |
68 | - return articleService.list(natureArticleQueryVO); | |
69 | - } | |
70 | - | |
71 | - /** | |
72 | - * 通过主键查询单条数据 | |
73 | - * | |
74 | - * @param natureArticleQueryVO 查询条件 | |
75 | - * @return 单条数据 | |
76 | - */ | |
77 | - @PostMapping("/query_by_id") | |
78 | - public ServerResult queryById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) { | |
79 | - return articleService.queryById(natureArticleQueryVO); | |
80 | - } | |
81 | - | |
82 | - /** | |
83 | - * 新增数据 | |
84 | - * | |
85 | - * @param natureArticleVO 数据VO | |
86 | - * @return 新增结果 | |
87 | - */ | |
88 | - @PostMapping("/add") | |
89 | - public ServerResult add(@RequestBody NatureArticleVO natureArticleVO) { | |
90 | - return articleService.add(natureArticleVO); | |
91 | - } | |
92 | - | |
93 | - /** | |
94 | - * 编辑数据 | |
95 | - * | |
96 | - * @param natureArticleVO 数据VO | |
97 | - * @return 编辑结果 | |
98 | - */ | |
99 | - @PostMapping("/edit") | |
100 | - public ServerResult edit(@RequestBody NatureArticleVO natureArticleVO) { | |
101 | - return articleService.edit(natureArticleVO); | |
102 | - } | |
103 | - | |
104 | - /** | |
105 | - * 删除数据 | |
106 | - * | |
107 | - * @param natureArticleQueryVO 查询条件 | |
108 | - * @return 删除是否成功 | |
109 | - */ | |
110 | - @PostMapping("/delete_by_id") | |
111 | - public ServerResult deleteById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) { | |
112 | - return articleService.deleteById(natureArticleQueryVO); | |
113 | - } | |
114 | - | |
115 | 50 | } |
116 | 51 | ... | ... |
src/main/java/com/canrd/webmagic/controller/Science4JournalController.java
1 | 1 | package com.canrd.webmagic.controller; |
2 | 2 | |
3 | 3 | import com.canrd.webmagic.common.constant.ServerResult; |
4 | -import com.canrd.webmagic.common.jsr303.OperateGroup; | |
5 | -import com.canrd.webmagic.domain.vo.NatureArticleQueryVO; | |
6 | -import com.canrd.webmagic.domain.vo.NatureArticleVO; | |
7 | 4 | import com.canrd.webmagic.processor.Science4JournalSearchPageProcessor; |
8 | 5 | import com.canrd.webmagic.processor.download.SeleniumDownloader; |
9 | -import com.canrd.webmagic.service.ArticleService; | |
10 | 6 | import org.apache.logging.log4j.core.util.UuidUtil; |
11 | -import org.springframework.validation.annotation.Validated; | |
12 | -import org.springframework.web.bind.annotation.*; | |
7 | +import org.springframework.web.bind.annotation.GetMapping; | |
8 | +import org.springframework.web.bind.annotation.RequestMapping; | |
9 | +import org.springframework.web.bind.annotation.RequestParam; | |
10 | +import org.springframework.web.bind.annotation.RestController; | |
13 | 11 | import us.codecraft.webmagic.Spider; |
14 | 12 | |
15 | 13 | import javax.annotation.Resource; |
... | ... | @@ -23,11 +21,6 @@ import javax.annotation.Resource; |
23 | 21 | @RestController |
24 | 22 | @RequestMapping("/science/journal") |
25 | 23 | public class Science4JournalController { |
26 | - /** | |
27 | - * 服务对象 | |
28 | - */ | |
29 | - @Resource | |
30 | - private ArticleService articleService; | |
31 | 24 | |
32 | 25 | @Resource |
33 | 26 | private Science4JournalSearchPageProcessor science4JournalSearchPageProcessor; |
... | ... | @@ -52,60 +45,6 @@ public class Science4JournalController { |
52 | 45 | return ServerResult.success(); |
53 | 46 | } |
54 | 47 | |
55 | - /** | |
56 | - * 分页查询 | |
57 | - * | |
58 | - * @param natureArticleQueryVO 查询条件 | |
59 | - * @return 查询结果 | |
60 | - */ | |
61 | - @PostMapping("/list") | |
62 | - public ServerResult list(@RequestBody @Validated({OperateGroup.List.class}) NatureArticleQueryVO natureArticleQueryVO) { | |
63 | - return articleService.list(natureArticleQueryVO); | |
64 | - } | |
65 | - | |
66 | - /** | |
67 | - * 通过主键查询单条数据 | |
68 | - * | |
69 | - * @param natureArticleQueryVO 查询条件 | |
70 | - * @return 单条数据 | |
71 | - */ | |
72 | - @PostMapping("/query_by_id") | |
73 | - public ServerResult queryById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) { | |
74 | - return articleService.queryById(natureArticleQueryVO); | |
75 | - } | |
76 | - | |
77 | - /** | |
78 | - * 新增数据 | |
79 | - * | |
80 | - * @param natureArticleVO 数据VO | |
81 | - * @return 新增结果 | |
82 | - */ | |
83 | - @PostMapping("/add") | |
84 | - public ServerResult add(@RequestBody NatureArticleVO natureArticleVO) { | |
85 | - return articleService.add(natureArticleVO); | |
86 | - } | |
87 | - | |
88 | - /** | |
89 | - * 编辑数据 | |
90 | - * | |
91 | - * @param natureArticleVO 数据VO | |
92 | - * @return 编辑结果 | |
93 | - */ | |
94 | - @PostMapping("/edit") | |
95 | - public ServerResult edit(@RequestBody NatureArticleVO natureArticleVO) { | |
96 | - return articleService.edit(natureArticleVO); | |
97 | - } | |
98 | - | |
99 | - /** | |
100 | - * 删除数据 | |
101 | - * | |
102 | - * @param natureArticleQueryVO 查询条件 | |
103 | - * @return 删除是否成功 | |
104 | - */ | |
105 | - @PostMapping("/delete_by_id") | |
106 | - public ServerResult deleteById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) { | |
107 | - return articleService.deleteById(natureArticleQueryVO); | |
108 | - } | |
109 | 48 | |
110 | 49 | } |
111 | 50 | ... | ... |