Commit 89ec05fb28d18e3ca9cb0f37e0abfa84f12f7cae
1 parent
af0bbdce
nature文章信息爬取,存储db
Showing
1 changed file
with
10 additions
and
1 deletions
src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java
... | ... | @@ -101,7 +101,16 @@ public class NatureSearchPageProcessor implements PageProcessor { |
101 | 101 | title = headSelectable.xpath("//h1/text()").get(); |
102 | 102 | } |
103 | 103 | String articleDesc = html.xpath("//div[@class='c-article-section__content']/p/text()").get(); |
104 | - String publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(2).xpath("//li/time/text()").get(); | |
104 | + String publishTime; | |
105 | + try { | |
106 | + publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(2).xpath("//li/time/text()").get(); | |
107 | + }catch (Exception e) { | |
108 | + try { | |
109 | + publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(1).xpath("//li/time/text()").get(); | |
110 | + }catch (Exception e1) { | |
111 | + publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(0).xpath("//li/time/text()").get(); | |
112 | + } | |
113 | + } | |
105 | 114 | Selectable authorSelectable = headSelectable.xpath("//ul").nodes().get(1).select(new XpathSelector("li[@class='c-article-author-list__item']")); |
106 | 115 | List<Selectable> authorNodes = authorSelectable.nodes(); |
107 | 116 | StringBuffer authorName = new StringBuffer(); | ... | ... |