package com.canrd.webmagic.processor.config; import org.apache.commons.io.IOUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.redis.core.RedisTemplate; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Component; import java.io.IOException; import java.io.InputStream; import java.net.InetSocketAddress; import java.net.Proxy; import java.net.URL; import java.net.URLConnection; import java.util.List; /** * @author: xms * @description: TODO * @date: 2024/4/9 10:35 * @version: 1.0 */ @Component public class UpdateIp { @Autowired private RedisTemplate redisTemplate; @Scheduled(cron = "*/20 * * * * ?") void update() { List<String> range = redisTemplate.opsForList().range("ip", 0, -1); for (String ip : range) { if (ifUseless(ip)) { System.err.println(ip + " 从redis移除"); redisTemplate.opsForList().remove("ip", 0, ip); } } } // @Scheduled(cron = "*/15 * * * * ?") void ips() { try { for (int i = 1; i < 10; i++) { Document document = Jsoup.connect("https://www.zdaye.com/free/" + i + "/?sAdr=taiwan").timeout(3000).get(); Elements tags = document.selectXpath("//table[@id='ipc']/tbody/tr"); for (Element element : tags) { String ip = element.getElementsByTag("td").get(0).text(); String port = document.selectXpath("//table[@id='ipc']/tbody/tr").get(0).getElementsByTag("td").get(1).text(); String uri = ip + ":" + port; if (!ifUseless(uri)) { List<String> range = redisTemplate.opsForList().range("ip", 0, -1); if (!range.contains(uri)) { System.err.println(uri + " 存进redis"); if (redisTemplate.opsForList().size("ip") > 100) { redisTemplate.opsForList().rightPopAndLeftPush("ip", uri); } else { redisTemplate.opsForList().leftPush("ip", uri); } } } } } } catch (IOException e) { e.printStackTrace(); } } /** * 无效的ip 返回true 有效的ip返回false * * @param ip * @return */ boolean ifUseless(String ip) { String[] split = ip.split(":"); URL url = null; try { url = new URL("http://www.baidu.com"); InetSocketAddress addr = new InetSocketAddress(split[0], Integer.parseInt(split[1])); Proxy proxy = new Proxy(Proxy.Type.HTTP, addr); InputStream in = null; try { URLConnection conn = url.openConnection(proxy); conn.setConnectTimeout(2000); in = conn.getInputStream(); } catch (Exception e) { return true; } String s = IOUtils.toString(in); if (s.indexOf("baidu") > 0) { return false; } return true; } catch (Exception e) { return true; } } }