Commit 432582f2e4750ca22442326788bab2419a94e1e1

Authored by 谢茂盛
1 parent a2f96c4a

feat:

1、selenium 整合
2、science 网站
src/main/java/com/canrd/webmagic/controller/ArticleController.java
1 1 package com.canrd.webmagic.controller;
2 2  
3 3 import com.canrd.webmagic.common.constant.ServerResult;
4   -import com.canrd.webmagic.common.jsr303.OperateGroup;
5   -import com.canrd.webmagic.domain.vo.NatureArticleQueryVO;
6   -import com.canrd.webmagic.domain.vo.NatureArticleVO;
7 4 import com.canrd.webmagic.processor.NatureSearchPageProcessor;
8 5 import com.canrd.webmagic.processor.download.Downloader;
9 6 import com.canrd.webmagic.processor.pipeline.ArticlePipeline;
10   -import com.canrd.webmagic.service.ArticleService;
11   -import org.springframework.validation.annotation.Validated;
12   -import org.springframework.web.bind.annotation.*;
  7 +import org.springframework.web.bind.annotation.GetMapping;
  8 +import org.springframework.web.bind.annotation.RequestMapping;
  9 +import org.springframework.web.bind.annotation.RequestParam;
  10 +import org.springframework.web.bind.annotation.RestController;
13 11 import us.codecraft.webmagic.Spider;
14 12  
15 13 import javax.annotation.Resource;
... ... @@ -23,11 +21,6 @@ import javax.annotation.Resource;
23 21 @RestController
24 22 @RequestMapping("/nature/article")
25 23 public class ArticleController {
26   - /**
27   - * 服务对象
28   - */
29   - @Resource
30   - private ArticleService articleService;
31 24  
32 25 @Resource
33 26 private NatureSearchPageProcessor natureSearchPageProcessor;
... ... @@ -45,8 +38,6 @@ public class ArticleController {
45 38 public ServerResult start(@RequestParam(value = "keyword") String keyword, @RequestParam(value = "indexSize") Integer indexSize) {
46 39 for (int i = 1; i <= indexSize; i++) {
47 40 Spider.create(natureSearchPageProcessor)
48   - // 添加这个Spider要爬取的网页地址
49   - .addUrl("https://www.nature.com/search?q=" + keyword + "&page=" + i)
50 41 .addUrl("https://www.nature.com/nature/research-articles?sort=PubDate&page=" + i)
51 42 .addPipeline(articlePipeline)
52 43 // .setDownloader(downloader.newIpDownloader())
... ... @@ -56,61 +47,5 @@ public class ArticleController {
56 47  
57 48 return ServerResult.success();
58 49 }
59   -
60   - /**
61   - * 分页查询
62   - *
63   - * @param natureArticleQueryVO 查询条件
64   - * @return 查询结果
65   - */
66   - @PostMapping("/list")
67   - public ServerResult list(@RequestBody @Validated({OperateGroup.List.class}) NatureArticleQueryVO natureArticleQueryVO) {
68   - return articleService.list(natureArticleQueryVO);
69   - }
70   -
71   - /**
72   - * 通过主键查询单条数据
73   - *
74   - * @param natureArticleQueryVO 查询条件
75   - * @return 单条数据
76   - */
77   - @PostMapping("/query_by_id")
78   - public ServerResult queryById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) {
79   - return articleService.queryById(natureArticleQueryVO);
80   - }
81   -
82   - /**
83   - * 新增数据
84   - *
85   - * @param natureArticleVO 数据VO
86   - * @return 新增结果
87   - */
88   - @PostMapping("/add")
89   - public ServerResult add(@RequestBody NatureArticleVO natureArticleVO) {
90   - return articleService.add(natureArticleVO);
91   - }
92   -
93   - /**
94   - * 编辑数据
95   - *
96   - * @param natureArticleVO 数据VO
97   - * @return 编辑结果
98   - */
99   - @PostMapping("/edit")
100   - public ServerResult edit(@RequestBody NatureArticleVO natureArticleVO) {
101   - return articleService.edit(natureArticleVO);
102   - }
103   -
104   - /**
105   - * 删除数据
106   - *
107   - * @param natureArticleQueryVO 查询条件
108   - * @return 删除是否成功
109   - */
110   - @PostMapping("/delete_by_id")
111   - public ServerResult deleteById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) {
112   - return articleService.deleteById(natureArticleQueryVO);
113   - }
114   -
115 50 }
116 51  
... ...
src/main/java/com/canrd/webmagic/controller/Science4JournalController.java
1 1 package com.canrd.webmagic.controller;
2 2  
3 3 import com.canrd.webmagic.common.constant.ServerResult;
4   -import com.canrd.webmagic.common.jsr303.OperateGroup;
5   -import com.canrd.webmagic.domain.vo.NatureArticleQueryVO;
6   -import com.canrd.webmagic.domain.vo.NatureArticleVO;
7 4 import com.canrd.webmagic.processor.Science4JournalSearchPageProcessor;
8 5 import com.canrd.webmagic.processor.download.SeleniumDownloader;
9   -import com.canrd.webmagic.service.ArticleService;
10 6 import org.apache.logging.log4j.core.util.UuidUtil;
11   -import org.springframework.validation.annotation.Validated;
12   -import org.springframework.web.bind.annotation.*;
  7 +import org.springframework.web.bind.annotation.GetMapping;
  8 +import org.springframework.web.bind.annotation.RequestMapping;
  9 +import org.springframework.web.bind.annotation.RequestParam;
  10 +import org.springframework.web.bind.annotation.RestController;
13 11 import us.codecraft.webmagic.Spider;
14 12  
15 13 import javax.annotation.Resource;
... ... @@ -23,11 +21,6 @@ import javax.annotation.Resource;
23 21 @RestController
24 22 @RequestMapping("/science/journal")
25 23 public class Science4JournalController {
26   - /**
27   - * 服务对象
28   - */
29   - @Resource
30   - private ArticleService articleService;
31 24  
32 25 @Resource
33 26 private Science4JournalSearchPageProcessor science4JournalSearchPageProcessor;
... ... @@ -52,60 +45,6 @@ public class Science4JournalController {
52 45 return ServerResult.success();
53 46 }
54 47  
55   - /**
56   - * 分页查询
57   - *
58   - * @param natureArticleQueryVO 查询条件
59   - * @return 查询结果
60   - */
61   - @PostMapping("/list")
62   - public ServerResult list(@RequestBody @Validated({OperateGroup.List.class}) NatureArticleQueryVO natureArticleQueryVO) {
63   - return articleService.list(natureArticleQueryVO);
64   - }
65   -
66   - /**
67   - * 通过主键查询单条数据
68   - *
69   - * @param natureArticleQueryVO 查询条件
70   - * @return 单条数据
71   - */
72   - @PostMapping("/query_by_id")
73   - public ServerResult queryById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) {
74   - return articleService.queryById(natureArticleQueryVO);
75   - }
76   -
77   - /**
78   - * 新增数据
79   - *
80   - * @param natureArticleVO 数据VO
81   - * @return 新增结果
82   - */
83   - @PostMapping("/add")
84   - public ServerResult add(@RequestBody NatureArticleVO natureArticleVO) {
85   - return articleService.add(natureArticleVO);
86   - }
87   -
88   - /**
89   - * 编辑数据
90   - *
91   - * @param natureArticleVO 数据VO
92   - * @return 编辑结果
93   - */
94   - @PostMapping("/edit")
95   - public ServerResult edit(@RequestBody NatureArticleVO natureArticleVO) {
96   - return articleService.edit(natureArticleVO);
97   - }
98   -
99   - /**
100   - * 删除数据
101   - *
102   - * @param natureArticleQueryVO 查询条件
103   - * @return 删除是否成功
104   - */
105   - @PostMapping("/delete_by_id")
106   - public ServerResult deleteById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) {
107   - return articleService.deleteById(natureArticleQueryVO);
108   - }
109 48  
110 49 }
111 50  
... ...