NatureMethodsController.java
1.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
package com.canrd.webmagic.controller;
import com.canrd.webmagic.common.constant.ServerResult;
import com.canrd.webmagic.processor.NatureMethodsPcoessor;
import com.canrd.webmagic.processor.NatureNanotechnologyProcessor;
import com.canrd.webmagic.processor.pipeline.ArticlePipeline;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import org.apache.logging.log4j.core.util.UuidUtil;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.scheduler.RedisScheduler;
import javax.annotation.Resource;
@RestController
@RequestMapping("/nature-methods/article")
@Api("Nature")
public class NatureMethodsController {
@Resource
private NatureMethodsPcoessor natureMethodsPcoessor;
@Resource
private ArticlePipeline articlePipeline;
@GetMapping("/start")
@ApiOperation("start")
public ServerResult start() {
Spider.create(natureMethodsPcoessor)
// 添加这个Spider要爬取的网页地址
.addUrl("https://www.nature.com/nmeth/")
.setUUID(UuidUtil.getTimeBasedUuid().toString())
.addPipeline(articlePipeline)
.setScheduler(new RedisScheduler("127.0.0.1"))
// 开启5个线程执行,并开始爬取
.thread(20).run();
return ServerResult.success();
}
}