IPUqiDataProcessor.java 2.82 KB
package com.canrd.webmagic.processor;

import com.canrd.webmagic.domain.dto.IpDO;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.Selectable;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;

@Component
@Slf4j
public class IPUqiDataProcessor implements PageProcessor {
    private static int index = 0;

    @Override
    public void process(Page page) {
        String url = page.getUrl().get();
        if (url.contains("https://ip.uqidata.com/freeip-") && index == 0) {
            getMaxPage(page);
        } else if (url.contains("https://ip.uqidata.com/freeip-") && index == 1) {
            everyPage(page);
        } else if (url.contains("https://ip.uqidata.com/freeip/")) {
            getPageDetail(page);
        }
    }

    @Override
    public Site getSite() {
        return PageProcessor.super.getSite();
    }

    void getMaxPage(Page page) {
        index = 1;
        Html html = page.getHtml();
        List<String> pageList = html.xpath("//div[@class='wp-pagenavi pd25']/a/text()").all();
        for (int i = 1; i <= Integer.parseInt(pageList.get(pageList.size() - 1)); i++) {
            page.addTargetRequest("https://ip.uqidata.com/freeip-" + i + ".html");
        }
    }

    void everyPage(Page page) {
        Html html = page.getHtml();
        log.info(page.getUrl().get());
        List<String> hrefList = html.xpath("//div[@class='entry entry-content freeip']/h2/a/@href").all();
        hrefList.forEach(href->page.addTargetRequest("https:"+href));
    }

    void getPageDetail(Page page) {
        Html html = page.getHtml();
        String ips = html.xpath("//div[@class='content']/p[@class!='pd25']/text()").get();
        String[] ipArray = ips.split(" ");
        List ipDoList = new ArrayList<>();
        for (int i = 0; i < ipArray.length; i += 3) {
            IpDO ipDO = new IpDO();
            String[] ip_port = ipArray[i].split(":");
            String ip = ip_port[0];
            int port = Integer.parseInt(ip_port[1]);
            String anonymity = ipArray[i+1];
            StringBuffer region = new StringBuffer();
            char[] chars = ipArray[i + 2].toCharArray();
            for (int j = 0; j < chars.length; j++) {
                if (!Character.isDigit(chars[j])){
                    region.append(chars[j]);
                }
            }
            ipDO.setIp(ip);
            ipDO.setPort(port);
            ipDO.setAnonymity(anonymity);
            ipDO.setRegion(String.valueOf(region));
            ipDoList.add(ipDO);
        }
        page.putField("ips",ipDoList);
    }
}