ArticleController.java
3.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
package com.canrd.webmagic.controller;
import com.canrd.webmagic.common.constant.ServerResult;
import com.canrd.webmagic.common.jsr303.OperateGroup;
import com.canrd.webmagic.domain.vo.NatureArticleQueryVO;
import com.canrd.webmagic.domain.vo.NatureArticleVO;
import com.canrd.webmagic.processor.NatureSearchPageProcessor;
import com.canrd.webmagic.processor.download.Downloader;
import com.canrd.webmagic.processor.pipeline.ArticlePipeline;
import com.canrd.webmagic.service.ArticleService;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.*;
import us.codecraft.webmagic.Spider;
import javax.annotation.Resource;
/**
* nature-文章信息(NatureArticle)表控制层
*
* @author makejava
* @since 2024-04-07 18:39:41
*/
@RestController
@RequestMapping("/nature/article")
public class ArticleController {
/**
* 服务对象
*/
@Resource
private ArticleService articleService;
@Resource
private NatureSearchPageProcessor natureSearchPageProcessor;
@Resource
private ArticlePipeline articlePipeline;
@Resource
private Downloader downloader;
/**
* @return
*/
@GetMapping("/start")
public ServerResult start(@RequestParam(value = "keyword") String keyword, @RequestParam(value = "indexSize") Integer indexSize) {
for (int i = 1; i <= indexSize; i++) {
Spider.create(natureSearchPageProcessor)
// 添加这个Spider要爬取的网页地址
.addUrl("https://www.nature.com/search?q=" + keyword + "&page=" + i)
.addUrl("https://www.nature.com/nature/research-articles?sort=PubDate&page=" + i)
.addPipeline(articlePipeline)
// .setDownloader(downloader.newIpDownloader())
// 开启5个线程执行,并开始爬取
.thread(5).run();
}
return ServerResult.success();
}
/**
* 分页查询
*
* @param natureArticleQueryVO 查询条件
* @return 查询结果
*/
@PostMapping("/list")
public ServerResult list(@RequestBody @Validated({OperateGroup.List.class}) NatureArticleQueryVO natureArticleQueryVO) {
return articleService.list(natureArticleQueryVO);
}
/**
* 通过主键查询单条数据
*
* @param natureArticleQueryVO 查询条件
* @return 单条数据
*/
@PostMapping("/query_by_id")
public ServerResult queryById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) {
return articleService.queryById(natureArticleQueryVO);
}
/**
* 新增数据
*
* @param natureArticleVO 数据VO
* @return 新增结果
*/
@PostMapping("/add")
public ServerResult add(@RequestBody NatureArticleVO natureArticleVO) {
return articleService.add(natureArticleVO);
}
/**
* 编辑数据
*
* @param natureArticleVO 数据VO
* @return 编辑结果
*/
@PostMapping("/edit")
public ServerResult edit(@RequestBody NatureArticleVO natureArticleVO) {
return articleService.edit(natureArticleVO);
}
/**
* 删除数据
*
* @param natureArticleQueryVO 查询条件
* @return 删除是否成功
*/
@PostMapping("/delete_by_id")
public ServerResult deleteById(@RequestBody NatureArticleQueryVO natureArticleQueryVO) {
return articleService.deleteById(natureArticleQueryVO);
}
}