Commit cfb428111420796fcaf9a213cc9592c209aa6977

Authored by 谢茂盛
1 parent fc48df03

feat:

1、science 爬取
src/main/java/com/canrd/webmagic/processor/Science4JournalArticlePageProcessor.java
@@ -93,6 +93,9 @@ public class Science4JournalArticlePageProcessor implements PageProcessor { @@ -93,6 +93,9 @@ public class Science4JournalArticlePageProcessor implements PageProcessor {
93 String givenName = authorEmailSelectable.xpath("//span[@property='givenName']/text()").get(); 93 String givenName = authorEmailSelectable.xpath("//span[@property='givenName']/text()").get();
94 String familyName = authorEmailSelectable.xpath("//span[@property='familyName']/text()").get(); 94 String familyName = authorEmailSelectable.xpath("//span[@property='familyName']/text()").get();
95 String email = authorEmailSelectable.xpath("//a[@property='email']/text()").get(); 95 String email = authorEmailSelectable.xpath("//a[@property='email']/text()").get();
  96 + if (StringUtils.isBlank(email)) {
  97 + continue;
  98 + }
96 99
97 JSONObject jsonObject = new JSONObject(); 100 JSONObject jsonObject = new JSONObject();
98 jsonObject.put("authorEmailName", givenName + "" + familyName); 101 jsonObject.put("authorEmailName", givenName + "" + familyName);