Commit cfb428111420796fcaf9a213cc9592c209aa6977
1 parent
fc48df03
feat:
1、science 爬取
Showing
1 changed file
with
3 additions
and
0 deletions
src/main/java/com/canrd/webmagic/processor/Science4JournalArticlePageProcessor.java
@@ -93,6 +93,9 @@ public class Science4JournalArticlePageProcessor implements PageProcessor { | @@ -93,6 +93,9 @@ public class Science4JournalArticlePageProcessor implements PageProcessor { | ||
93 | String givenName = authorEmailSelectable.xpath("//span[@property='givenName']/text()").get(); | 93 | String givenName = authorEmailSelectable.xpath("//span[@property='givenName']/text()").get(); |
94 | String familyName = authorEmailSelectable.xpath("//span[@property='familyName']/text()").get(); | 94 | String familyName = authorEmailSelectable.xpath("//span[@property='familyName']/text()").get(); |
95 | String email = authorEmailSelectable.xpath("//a[@property='email']/text()").get(); | 95 | String email = authorEmailSelectable.xpath("//a[@property='email']/text()").get(); |
96 | + if (StringUtils.isBlank(email)) { | ||
97 | + continue; | ||
98 | + } | ||
96 | 99 | ||
97 | JSONObject jsonObject = new JSONObject(); | 100 | JSONObject jsonObject = new JSONObject(); |
98 | jsonObject.put("authorEmailName", givenName + "" + familyName); | 101 | jsonObject.put("authorEmailName", givenName + "" + familyName); |