Commit b1b31dc60cb0fdfde1fea1c80b6fb2493a00ab32
1 parent
836066f9
feat: 配置userAgent和IP代理池
Showing
11 changed files
with
348 additions
and
6 deletions
src/main/java/com/canrd/webmagic/controller/NatureArticleController.java
@@ -4,7 +4,7 @@ import com.canrd.webmagic.common.constant.ServerResult; | @@ -4,7 +4,7 @@ import com.canrd.webmagic.common.constant.ServerResult; | ||
4 | import com.canrd.webmagic.common.jsr303.OperateGroup; | 4 | import com.canrd.webmagic.common.jsr303.OperateGroup; |
5 | import com.canrd.webmagic.domain.vo.NatureArticleQueryVO; | 5 | import com.canrd.webmagic.domain.vo.NatureArticleQueryVO; |
6 | import com.canrd.webmagic.domain.vo.NatureArticleVO; | 6 | import com.canrd.webmagic.domain.vo.NatureArticleVO; |
7 | -import com.canrd.webmagic.processor.NatureArticlePipeline; | 7 | +import com.canrd.webmagic.processor.pipeline.NatureArticlePipeline; |
8 | import com.canrd.webmagic.processor.NatureSearchPageProcessor; | 8 | import com.canrd.webmagic.processor.NatureSearchPageProcessor; |
9 | import com.canrd.webmagic.service.NatureArticleService; | 9 | import com.canrd.webmagic.service.NatureArticleService; |
10 | import org.springframework.validation.annotation.Validated; | 10 | import org.springframework.validation.annotation.Validated; |
src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java
@@ -4,6 +4,8 @@ import com.alibaba.fastjson.JSONArray; | @@ -4,6 +4,8 @@ import com.alibaba.fastjson.JSONArray; | ||
4 | import com.alibaba.fastjson.JSONObject; | 4 | import com.alibaba.fastjson.JSONObject; |
5 | import com.canrd.webmagic.common.utils.StringUtils; | 5 | import com.canrd.webmagic.common.utils.StringUtils; |
6 | import com.canrd.webmagic.domain.dto.NatureArticleDO; | 6 | import com.canrd.webmagic.domain.dto.NatureArticleDO; |
7 | +import com.canrd.webmagic.processor.config.Agent; | ||
8 | +import com.canrd.webmagic.processor.pipeline.NatureArticlePipeline; | ||
7 | import org.springframework.stereotype.Component; | 9 | import org.springframework.stereotype.Component; |
8 | import us.codecraft.webmagic.Page; | 10 | import us.codecraft.webmagic.Page; |
9 | import us.codecraft.webmagic.Site; | 11 | import us.codecraft.webmagic.Site; |
@@ -24,9 +26,10 @@ import java.util.Objects; | @@ -24,9 +26,10 @@ import java.util.Objects; | ||
24 | */ | 26 | */ |
25 | @Component | 27 | @Component |
26 | public class NatureSearchPageProcessor implements PageProcessor { | 28 | public class NatureSearchPageProcessor implements PageProcessor { |
29 | + private String agent = Agent.getRandom(); | ||
27 | 30 | ||
28 | // 抓取网站的相关配置,包括编码、抓取间隔、重试次数等 | 31 | // 抓取网站的相关配置,包括编码、抓取间隔、重试次数等 |
29 | - private Site site = Site.me().setRetryTimes(3).setSleepTime(100); | 32 | + private Site site = Site.me().setRetryTimes(3).setSleepTime(100).setUserAgent(agent); |
30 | 33 | ||
31 | /** | 34 | /** |
32 | * 定制爬虫逻辑的核心接口,在这里编写抽取逻辑 | 35 | * 定制爬虫逻辑的核心接口,在这里编写抽取逻辑 |
src/main/java/com/canrd/webmagic/processor/config/Agent.java
0 → 100644
1 | +package com.canrd.webmagic.processor.config; | ||
2 | + | ||
3 | +import cn.hutool.core.io.resource.ClassPathResource; | ||
4 | +import lombok.extern.slf4j.Slf4j; | ||
5 | + | ||
6 | +import java.io.BufferedReader; | ||
7 | +import java.io.IOException; | ||
8 | +import java.io.InputStream; | ||
9 | +import java.io.InputStreamReader; | ||
10 | +import java.util.ArrayList; | ||
11 | +import java.util.List; | ||
12 | +import java.util.Random; | ||
13 | +import java.util.concurrent.locks.ReentrantReadWriteLock; | ||
14 | + | ||
15 | +/** | ||
16 | + * @author: xms | ||
17 | + * @description: TODO | ||
18 | + * @date: 2024/4/9 10:28 | ||
19 | + * @version: 1.0 | ||
20 | + */ | ||
21 | +@Slf4j | ||
22 | +public class Agent { | ||
23 | + private static final String AGENT_FILE_PATH = "user-agent/User-Agents.txt"; | ||
24 | + private static ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); | ||
25 | + private static List<String> agents; | ||
26 | + | ||
27 | + /** | ||
28 | + * @return | ||
29 | + */ | ||
30 | + public static String getRandom() { | ||
31 | + String random = getRandom(null); | ||
32 | + log.info("Agent======================>" + random); | ||
33 | + return random; | ||
34 | + } | ||
35 | + | ||
36 | + /** | ||
37 | + * @param agent | ||
38 | + * @return | ||
39 | + */ | ||
40 | + private static String getRandom(String agent) { | ||
41 | + try { | ||
42 | + lock.readLock().lock(); | ||
43 | + int size = agents.size(); | ||
44 | + if (size == 0) { | ||
45 | + return "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"; | ||
46 | + } | ||
47 | + Random random = new Random(); | ||
48 | + if (null != agent) { | ||
49 | + return agent; | ||
50 | + } else { | ||
51 | + return agents.get(random.nextInt(size)); | ||
52 | + } | ||
53 | + } catch (Exception e) { | ||
54 | + e.printStackTrace(); | ||
55 | + return "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"; | ||
56 | + } finally { | ||
57 | + lock.readLock().unlock(); | ||
58 | + } | ||
59 | + } | ||
60 | + | ||
61 | + static { | ||
62 | + agents = new ArrayList<>(); | ||
63 | + InputStream resourceAsStream = null; | ||
64 | + InputStreamReader inputStreamReader = null; | ||
65 | + BufferedReader bufferedReader = null; | ||
66 | + try { | ||
67 | + resourceAsStream = new ClassPathResource(AGENT_FILE_PATH).getStream(); | ||
68 | + inputStreamReader = new InputStreamReader(resourceAsStream); | ||
69 | + bufferedReader = new BufferedReader(inputStreamReader); | ||
70 | + String len; | ||
71 | + while ((len = bufferedReader.readLine()) != null) { | ||
72 | + if (!len.matches("^#.*")) { | ||
73 | + agents.add(len.trim()); | ||
74 | + } | ||
75 | + } | ||
76 | + } catch (Exception e) { | ||
77 | + e.printStackTrace(); | ||
78 | + } finally { | ||
79 | + if (null != bufferedReader) { | ||
80 | + try { | ||
81 | + bufferedReader.close(); | ||
82 | + } catch (IOException e) { | ||
83 | + e.printStackTrace(); | ||
84 | + } | ||
85 | + } | ||
86 | + if (null != inputStreamReader) { | ||
87 | + try { | ||
88 | + inputStreamReader.close(); | ||
89 | + } catch (IOException e) { | ||
90 | + e.printStackTrace(); | ||
91 | + } | ||
92 | + } | ||
93 | + if (null != resourceAsStream) { | ||
94 | + try { | ||
95 | + resourceAsStream.close(); | ||
96 | + } catch (IOException e) { | ||
97 | + e.printStackTrace(); | ||
98 | + } | ||
99 | + } | ||
100 | + } | ||
101 | + } | ||
102 | +} |
src/main/java/com/canrd/webmagic/processor/config/Downloader.java
0 → 100644
1 | +package com.canrd.webmagic.processor.config; | ||
2 | + | ||
3 | +import lombok.extern.slf4j.Slf4j; | ||
4 | +import org.springframework.beans.factory.annotation.Autowired; | ||
5 | +import org.springframework.data.redis.core.RedisTemplate; | ||
6 | +import org.springframework.stereotype.Component; | ||
7 | +import us.codecraft.webmagic.Request; | ||
8 | +import us.codecraft.webmagic.downloader.HttpClientDownloader; | ||
9 | +import us.codecraft.webmagic.proxy.Proxy; | ||
10 | +import us.codecraft.webmagic.proxy.SimpleProxyProvider; | ||
11 | + | ||
12 | +import java.util.Random; | ||
13 | + | ||
14 | +/** | ||
15 | + * @author: xms | ||
16 | + * @description: TODO | ||
17 | + * @date: 2024/4/9 10:37 | ||
18 | + * @version: 1.0 | ||
19 | + */ | ||
20 | +@Slf4j | ||
21 | +@Component | ||
22 | +public class Downloader { | ||
23 | + private static RedisTemplate redisTemplate; | ||
24 | + | ||
25 | + @Autowired | ||
26 | + Downloader(RedisTemplate redisTemplate) { | ||
27 | + Downloader.redisTemplate = redisTemplate; | ||
28 | + } | ||
29 | + | ||
30 | + /** | ||
31 | + * | ||
32 | + * @return | ||
33 | + */ | ||
34 | + public static HttpClientDownloader newIpDownloader() { | ||
35 | + HttpClientDownloader downloader = new HttpClientDownloader() { | ||
36 | + @Override | ||
37 | + protected void onError(Request request) { | ||
38 | + String[] ips = newIp(); | ||
39 | + setProxyProvider(SimpleProxyProvider.from(new Proxy(ips[0], Integer.parseInt(ips[1])))); | ||
40 | + } | ||
41 | + }; | ||
42 | + return downloader; | ||
43 | + } | ||
44 | + | ||
45 | + static String[] newIp() { | ||
46 | + Long size = redisTemplate.opsForList().size("ip"); | ||
47 | + String ip = redisTemplate.opsForList().index("ip", new Random().nextInt(size.intValue())).toString(); | ||
48 | + log.info("获取ip===========>" + ip); | ||
49 | + String[] ips = ip.split(":"); | ||
50 | + return ips; | ||
51 | + } | ||
52 | +} |
src/main/java/com/canrd/webmagic/processor/config/UpdateIp.java
0 → 100644
1 | +package com.canrd.webmagic.processor.config; | ||
2 | + | ||
3 | +import com.baomidou.mybatisplus.core.toolkit.StringUtils; | ||
4 | +import org.apache.commons.io.IOUtils; | ||
5 | +import org.jsoup.Jsoup; | ||
6 | +import org.jsoup.nodes.Document; | ||
7 | +import org.jsoup.nodes.Element; | ||
8 | +import org.jsoup.select.Elements; | ||
9 | +import org.springframework.beans.factory.annotation.Autowired; | ||
10 | +import org.springframework.data.redis.core.RedisTemplate; | ||
11 | +import org.springframework.scheduling.annotation.Scheduled; | ||
12 | +import org.springframework.stereotype.Component; | ||
13 | + | ||
14 | +import java.io.IOException; | ||
15 | +import java.io.InputStream; | ||
16 | +import java.net.InetSocketAddress; | ||
17 | +import java.net.Proxy; | ||
18 | +import java.net.URL; | ||
19 | +import java.net.URLConnection; | ||
20 | +import java.util.List; | ||
21 | + | ||
22 | +/** | ||
23 | + * @author: xms | ||
24 | + * @description: TODO | ||
25 | + * @date: 2024/4/9 10:35 | ||
26 | + * @version: 1.0 | ||
27 | + */ | ||
28 | +@Component | ||
29 | +public class UpdateIp { | ||
30 | + | ||
31 | + @Autowired | ||
32 | + private RedisTemplate redisTemplate; | ||
33 | + | ||
34 | + @Scheduled(cron = "*/20 * * * * ?") | ||
35 | + void update() { | ||
36 | + List<String> range = redisTemplate.opsForList().range("ip", 0, -1); | ||
37 | + for (String ip : range) { | ||
38 | + if (ifUseless(ip)) { | ||
39 | + System.err.println(ip + " 从redis移除"); | ||
40 | + redisTemplate.opsForList().remove("ip", 0, ip); | ||
41 | + } | ||
42 | + } | ||
43 | + } | ||
44 | + | ||
45 | + @Scheduled(cron = "*/15 * * * * ?") | ||
46 | + void ips() { | ||
47 | + String string = null; | ||
48 | + try { | ||
49 | + Document document = Jsoup.connect("https://www.xicidaili.com/nn").timeout(3000).get(); | ||
50 | + Elements tags = document.select("#ip_list > tbody > tr"); | ||
51 | + for (Element element : tags) { | ||
52 | + //取得ip地址节点 | ||
53 | + Elements tdChilds = element.select("tr > td:nth-child(2)"); | ||
54 | + //取得端口号节点 | ||
55 | + Elements tcpd = element.select("tr > td:nth-child(3)"); | ||
56 | + if (StringUtils.isNotBlank(tdChilds.text()) && StringUtils.isNotBlank(tcpd.text())) { | ||
57 | + string = tdChilds.text() + ":" + tcpd.text(); | ||
58 | + if (!ifUseless(string)) { | ||
59 | + List<String> range = redisTemplate.opsForList().range("ip", 0, -1); | ||
60 | + if (!range.contains(string)) { | ||
61 | + System.err.println(string + " 存进redis"); | ||
62 | + if (redisTemplate.opsForList().size("ip") > 100) { | ||
63 | + redisTemplate.opsForList().rightPopAndLeftPush("ip", string); | ||
64 | + } else { | ||
65 | + redisTemplate.opsForList().leftPush("ip", string); | ||
66 | + } | ||
67 | + } | ||
68 | + } | ||
69 | + } | ||
70 | + } | ||
71 | + } catch (IOException e) { | ||
72 | + e.printStackTrace(); | ||
73 | + } | ||
74 | + } | ||
75 | + | ||
76 | + /** | ||
77 | + * 无效的ip 返回true 有效的ip返回false | ||
78 | + * | ||
79 | + * @param ip | ||
80 | + * @return | ||
81 | + */ | ||
82 | + boolean ifUseless(String ip) { | ||
83 | + String[] split = ip.split(":"); | ||
84 | + URL url = null; | ||
85 | + try { | ||
86 | + url = new URL("http://www.baidu.com"); | ||
87 | + InetSocketAddress addr = new InetSocketAddress(split[0], Integer.parseInt(split[1])); | ||
88 | + Proxy proxy = new Proxy(Proxy.Type.HTTP, addr); | ||
89 | + InputStream in = null; | ||
90 | + try { | ||
91 | + URLConnection conn = url.openConnection(proxy); | ||
92 | + conn.setConnectTimeout(2000); | ||
93 | + in = conn.getInputStream(); | ||
94 | + } catch (Exception e) { | ||
95 | + return true; | ||
96 | + } | ||
97 | + String s = IOUtils.toString(in); | ||
98 | + if (s.indexOf("baidu") > 0) { | ||
99 | + return false; | ||
100 | + } | ||
101 | + return true; | ||
102 | + } catch (Exception e) { | ||
103 | + return true; | ||
104 | + } | ||
105 | + } | ||
106 | +} |
src/main/java/com/canrd/webmagic/processor/NatureArticlePipeline.java renamed to src/main/java/com/canrd/webmagic/processor/pipeline/NatureArticlePipeline.java
1 | -package com.canrd.webmagic.processor; | 1 | +package com.canrd.webmagic.processor.pipeline; |
2 | 2 | ||
3 | import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; | 3 | import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; |
4 | import com.canrd.webmagic.domain.dto.NatureArticleDO; | 4 | import com.canrd.webmagic.domain.dto.NatureArticleDO; |
src/main/resources/log4j2-dev.xml
@@ -66,7 +66,7 @@ | @@ -66,7 +66,7 @@ | ||
66 | <ThresholdFilter level="debug"/> | 66 | <ThresholdFilter level="debug"/> |
67 | <appender-ref ref="RollingFileInfo" /> | 67 | <appender-ref ref="RollingFileInfo" /> |
68 | </logger> | 68 | </logger> |
69 | - <logger name="com.canrd.shop" level="DEBUG" > | 69 | + <logger name="com.canrd.webmagic" level="DEBUG" > |
70 | <ThresholdFilter level="debug"/> | 70 | <ThresholdFilter level="debug"/> |
71 | <appender-ref ref="RollingFileInfo" /> | 71 | <appender-ref ref="RollingFileInfo" /> |
72 | </logger> | 72 | </logger> |
src/main/resources/log4j2-prod.xml
@@ -30,7 +30,7 @@ | @@ -30,7 +30,7 @@ | ||
30 | <!--</console>--> | 30 | <!--</console>--> |
31 | 31 | ||
32 | <!-- 这个会打印出所有的info及以下级别的信息,每次大小超过size,则这size大小的日志会自动存入按年份-月份建立的文件夹下面并进行压缩,作为存档 --> | 32 | <!-- 这个会打印出所有的info及以下级别的信息,每次大小超过size,则这size大小的日志会自动存入按年份-月份建立的文件夹下面并进行压缩,作为存档 --> |
33 | - <RollingFile name="RollingFileInfo" fileName="${sys:logging.path}/logs/overtime.log" filePattern="${sys:logging.path}/logs/$${date:yyyy-MM-dd}/info-%d{yyyy-MM-dd}-%i.log"> | 33 | + <RollingFile name="RollingFileInfo" fileName="${sys:logging.path}/logs/webmagic.log" filePattern="${sys:logging.path}/logs/$${date:yyyy-MM-dd}/info-%d{yyyy-MM-dd}-%i.log"> |
34 | <!--控制台只输出level及以上级别的信息(onMatch),其他的直接拒绝(onMismatch) --> | 34 | <!--控制台只输出level及以上级别的信息(onMatch),其他的直接拒绝(onMismatch) --> |
35 | <ThresholdFilter level="info" onMatch="ACCEPT" onMismatch="DENY" /> | 35 | <ThresholdFilter level="info" onMatch="ACCEPT" onMismatch="DENY" /> |
36 | <!--<Filters>--> | 36 | <!--<Filters>--> |
@@ -65,7 +65,7 @@ | @@ -65,7 +65,7 @@ | ||
65 | <ThresholdFilter level="info"/> | 65 | <ThresholdFilter level="info"/> |
66 | <appender-ref ref="RollingFileInfo" /> | 66 | <appender-ref ref="RollingFileInfo" /> |
67 | </logger> | 67 | </logger> |
68 | - <logger name="com.canrd.shop" level="info" > | 68 | + <logger name="com.canrd.webmagic" level="info" > |
69 | <ThresholdFilter level="info"/> | 69 | <ThresholdFilter level="info"/> |
70 | <appender-ref ref="RollingFileInfo" /> | 70 | <appender-ref ref="RollingFileInfo" /> |
71 | </logger> | 71 | </logger> |
src/main/resources/user-agent/User-Agents.txt
0 → 100644
1 | +Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60 | ||
2 | +Opera/8.0 (Windows NT 5.1; U; en) | ||
3 | +Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50 | ||
4 | +Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50 | ||
5 | +Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0 | ||
6 | +Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10 | ||
7 | +Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2 | ||
8 | +Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36 | ||
9 | +Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11 | ||
10 | +Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16 | ||
11 | +Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36 | ||
12 | +Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko | ||
13 | +Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11 | ||
14 | +Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER | ||
15 | +Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER) | ||
16 | +Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)" | ||
17 | +Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400) | ||
18 | +Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E) | ||
19 | +Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0 | ||
20 | +Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0) | ||
21 | +Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36 | ||
22 | +Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36 | ||
0 | \ No newline at end of file | 23 | \ No newline at end of file |
src/test/java/com/canrd/webmagic/BaseTest.java
0 → 100644
1 | +package com.canrd.webmagic; | ||
2 | + | ||
3 | +import org.junit.runner.RunWith; | ||
4 | +import org.springframework.boot.test.context.SpringBootTest; | ||
5 | +import org.springframework.test.context.junit4.SpringRunner; | ||
6 | +import org.springframework.transaction.annotation.Transactional; | ||
7 | + | ||
8 | + | ||
9 | +@RunWith(SpringRunner.class) | ||
10 | +@SpringBootTest(classes = Application.class) | ||
11 | +//主动回滚测试产生的数据 | ||
12 | +@Transactional | ||
13 | +public class BaseTest { | ||
14 | + | ||
15 | + | ||
16 | +} |
src/test/java/com/canrd/webmagic/utils/DateTimeUtilTest.java
0 → 100644
1 | +package com.canrd.webmagic.utils; | ||
2 | + | ||
3 | +import com.alibaba.fastjson.JSON; | ||
4 | +import com.alibaba.fastjson.JSONArray; | ||
5 | +import com.alibaba.fastjson.JSONObject; | ||
6 | +import com.canrd.webmagic.BaseTest; | ||
7 | +import com.canrd.webmagic.domain.dto.NatureArticleDO; | ||
8 | +import com.canrd.webmagic.service.NatureArticleService; | ||
9 | +import org.junit.Test; | ||
10 | + | ||
11 | +import javax.annotation.Resource; | ||
12 | +import java.util.List; | ||
13 | + | ||
14 | +/** | ||
15 | + * @author: xms | ||
16 | + * @description: TODO | ||
17 | + * @date: 2023/2/10 14:09 | ||
18 | + * @version: 1.0 | ||
19 | + */ | ||
20 | +public class DateTimeUtilTest extends BaseTest { | ||
21 | + | ||
22 | + @Resource | ||
23 | + private NatureArticleService natureArticleService; | ||
24 | + | ||
25 | + @Test | ||
26 | + public void export() { | ||
27 | + List<NatureArticleDO> articleDOList = natureArticleService.list(); | ||
28 | + JSONArray array = new JSONArray(); | ||
29 | + for (NatureArticleDO articleDO : articleDOList) { | ||
30 | + JSONArray jsonArray = JSONArray.parseArray(articleDO.getEmailInfo()); | ||
31 | + array.addAll(jsonArray); | ||
32 | + } | ||
33 | + System.out.println("联系作者,邮箱"); | ||
34 | + for (Object o : array) { | ||
35 | + JSONObject jsonObject = JSONObject.parseObject(JSON.toJSONString(o)); | ||
36 | + String authorEmailName = jsonObject.getString("authorEmailName"); | ||
37 | + String email = jsonObject.getString("email"); | ||
38 | + System.out.println(authorEmailName + "," + email); | ||
39 | + } | ||
40 | + } | ||
41 | +} |