From 89ec05fb28d18e3ca9cb0f37e0abfa84f12f7cae Mon Sep 17 00:00:00 2001 From: qdlgxiemaosheng <qdlgxiemaosheng@163.com> Date: Thu, 11 Apr 2024 20:49:31 +0800 Subject: [PATCH] nature文章信息爬取,存储db --- src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java b/src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java index 838e443..eab35a0 100644 --- a/src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java +++ b/src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java @@ -101,7 +101,16 @@ public class NatureSearchPageProcessor implements PageProcessor { title = headSelectable.xpath("//h1/text()").get(); } String articleDesc = html.xpath("//div[@class='c-article-section__content']/p/text()").get(); - String publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(2).xpath("//li/time/text()").get(); + String publishTime; + try { + publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(2).xpath("//li/time/text()").get(); + }catch (Exception e) { + try { + publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(1).xpath("//li/time/text()").get(); + }catch (Exception e1) { + publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(0).xpath("//li/time/text()").get(); + } + } Selectable authorSelectable = headSelectable.xpath("//ul").nodes().get(1).select(new XpathSelector("li[@class='c-article-author-list__item']")); List<Selectable> authorNodes = authorSelectable.nodes(); StringBuffer authorName = new StringBuffer(); -- libgit2 0.23.3