IPUqiDataProcessor.java
2.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
package com.canrd.webmagic.processor;
import com.canrd.webmagic.domain.dto.IpDO;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.Selectable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
@Component
@Slf4j
public class IPUqiDataProcessor implements PageProcessor {
private static int index = 0;
@Override
public void process(Page page) {
String url = page.getUrl().get();
if (url.contains("https://ip.uqidata.com/freeip-") && index == 0) {
getMaxPage(page);
} else if (url.contains("https://ip.uqidata.com/freeip-") && index == 1) {
everyPage(page);
} else if (url.contains("https://ip.uqidata.com/freeip/")) {
getPageDetail(page);
}
}
@Override
public Site getSite() {
return PageProcessor.super.getSite();
}
void getMaxPage(Page page) {
index = 1;
Html html = page.getHtml();
List<String> pageList = html.xpath("//div[@class='wp-pagenavi pd25']/a/text()").all();
for (int i = 1; i <= Integer.parseInt(pageList.get(pageList.size() - 1)); i++) {
page.addTargetRequest("https://ip.uqidata.com/freeip-" + i + ".html");
}
}
void everyPage(Page page) {
Html html = page.getHtml();
log.info(page.getUrl().get());
List<String> hrefList = html.xpath("//div[@class='entry entry-content freeip']/h2/a/@href").all();
hrefList.forEach(href->page.addTargetRequest("https:"+href));
}
void getPageDetail(Page page) {
Html html = page.getHtml();
String ips = html.xpath("//div[@class='content']/p[@class!='pd25']/text()").get();
String[] ipArray = ips.split(" ");
List ipDoList = new ArrayList<>();
for (int i = 0; i < ipArray.length; i += 3) {
IpDO ipDO = new IpDO();
String[] ip_port = ipArray[i].split(":");
String ip = ip_port[0];
int port = Integer.parseInt(ip_port[1]);
String anonymity = ipArray[i+1];
StringBuffer region = new StringBuffer();
char[] chars = ipArray[i + 2].toCharArray();
for (int j = 0; j < chars.length; j++) {
if (!Character.isDigit(chars[j])){
region.append(chars[j]);
}
}
ipDO.setIp(ip);
ipDO.setPort(port);
ipDO.setAnonymity(anonymity);
ipDO.setRegion(String.valueOf(region));
ipDoList.add(ipDO);
}
page.putField("ips",ipDoList);
}
}