ScienceDirectArticlePcoessor.java 1.26 KB
package com.canrd.webmagic.processor;

import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;

@Slf4j
@Component
public class ScienceDirectArticlePcoessor implements PageProcessor {

    @Override
    public void process(Page page) {
        String url = page.getUrl().get();
        if (url.equals("https://www.sciencedirect.com/journal/nano-today")){
            findSearch(page);
        }else if (url.equals("https://www.sciencedirect.com/search?qs=battery")){
            getMaxPage(page);
        }else if (url.contains("https://www.sciencedirect.com/search?qs=battery&show=100&offset=")){
            everyPage(page);
        }
    }

    @Override
    public Site getSite() {
        return PageProcessor.super.getSite();
    }

    void findSearch(Page page){
        Html html = page.getHtml();
        page.putField("html",html);
    }

    void getMaxPage(Page page){
        Html html = page.getHtml();
//        move-right
        page.putField("html",html);
    }

    void everyPage(Page page){
        Html html = page.getHtml();
        page.putField("html",html);
    }
}