Commit 89ec05fb28d18e3ca9cb0f37e0abfa84f12f7cae
1 parent
af0bbdce
nature文章信息爬取,存储db
Showing
1 changed file
with
10 additions
and
1 deletions
src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java
@@ -101,7 +101,16 @@ public class NatureSearchPageProcessor implements PageProcessor { | @@ -101,7 +101,16 @@ public class NatureSearchPageProcessor implements PageProcessor { | ||
101 | title = headSelectable.xpath("//h1/text()").get(); | 101 | title = headSelectable.xpath("//h1/text()").get(); |
102 | } | 102 | } |
103 | String articleDesc = html.xpath("//div[@class='c-article-section__content']/p/text()").get(); | 103 | String articleDesc = html.xpath("//div[@class='c-article-section__content']/p/text()").get(); |
104 | - String publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(2).xpath("//li/time/text()").get(); | 104 | + String publishTime; |
105 | + try { | ||
106 | + publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(2).xpath("//li/time/text()").get(); | ||
107 | + }catch (Exception e) { | ||
108 | + try { | ||
109 | + publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(1).xpath("//li/time/text()").get(); | ||
110 | + }catch (Exception e1) { | ||
111 | + publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(0).xpath("//li/time/text()").get(); | ||
112 | + } | ||
113 | + } | ||
105 | Selectable authorSelectable = headSelectable.xpath("//ul").nodes().get(1).select(new XpathSelector("li[@class='c-article-author-list__item']")); | 114 | Selectable authorSelectable = headSelectable.xpath("//ul").nodes().get(1).select(new XpathSelector("li[@class='c-article-author-list__item']")); |
106 | List<Selectable> authorNodes = authorSelectable.nodes(); | 115 | List<Selectable> authorNodes = authorSelectable.nodes(); |
107 | StringBuffer authorName = new StringBuffer(); | 116 | StringBuffer authorName = new StringBuffer(); |