Commit 89ec05fb28d18e3ca9cb0f37e0abfa84f12f7cae

Authored by qdlgxiemaosheng
1 parent af0bbdce

nature文章信息爬取,存储db

src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java
... ... @@ -101,7 +101,16 @@ public class NatureSearchPageProcessor implements PageProcessor {
101 101 title = headSelectable.xpath("//h1/text()").get();
102 102 }
103 103 String articleDesc = html.xpath("//div[@class='c-article-section__content']/p/text()").get();
104   - String publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(2).xpath("//li/time/text()").get();
  104 + String publishTime;
  105 + try {
  106 + publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(2).xpath("//li/time/text()").get();
  107 + }catch (Exception e) {
  108 + try {
  109 + publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(1).xpath("//li/time/text()").get();
  110 + }catch (Exception e1) {
  111 + publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(0).xpath("//li/time/text()").get();
  112 + }
  113 + }
105 114 Selectable authorSelectable = headSelectable.xpath("//ul").nodes().get(1).select(new XpathSelector("li[@class='c-article-author-list__item']"));
106 115 List<Selectable> authorNodes = authorSelectable.nodes();
107 116 StringBuffer authorName = new StringBuffer();
... ...