MatterController.java 1.32 KB
package com.canrd.webmagic.controller;

import com.canrd.webmagic.common.constant.ServerResult;
import com.canrd.webmagic.processor.MatterPagePcoessor;
import com.canrd.webmagic.processor.download.SeleniumDownloader;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import org.apache.logging.log4j.core.util.UuidUtil;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import us.codecraft.webmagic.Spider;

import javax.annotation.Resource;

@RestController
@RequestMapping("/matter/article")
@Api("Matter")
public class MatterController {
    @Resource
    private MatterPagePcoessor matterPragePcoessor;

    @Resource
    private SeleniumDownloader seleniumDownloader;

    @GetMapping("/start")
    @ApiOperation("start")
    public ServerResult start()  {

        Spider.create(matterPragePcoessor)
                // 添加这个Spider要爬取的网页地址
                .addUrl("https://www.cell.com/matter/home")
                .setUUID(UuidUtil.getTimeBasedUuid().toString())
                .setDownloader(seleniumDownloader)
                // 开启5个线程执行,并开始爬取
                .thread(60).start();
        return ServerResult.success();
    }
}