From 89ec05fb28d18e3ca9cb0f37e0abfa84f12f7cae Mon Sep 17 00:00:00 2001
From: qdlgxiemaosheng <qdlgxiemaosheng@163.com>
Date: Thu, 11 Apr 2024 20:49:31 +0800
Subject: [PATCH] nature文章信息爬取,存储db

---
 src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java b/src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java
index 838e443..eab35a0 100644
--- a/src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java
+++ b/src/main/java/com/canrd/webmagic/processor/NatureSearchPageProcessor.java
@@ -101,7 +101,16 @@ public class NatureSearchPageProcessor implements PageProcessor {
             title = headSelectable.xpath("//h1/text()").get();
         }
         String articleDesc = html.xpath("//div[@class='c-article-section__content']/p/text()").get();
-        String publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(2).xpath("//li/time/text()").get();
+        String publishTime;
+        try {
+            publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(2).xpath("//li/time/text()").get();
+        }catch (Exception e) {
+            try {
+                publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(1).xpath("//li/time/text()").get();
+            }catch (Exception e1) {
+                publishTime = headSelectable.xpath("//ul").nodes().get(0).xpath("//li").nodes().get(0).xpath("//li/time/text()").get();
+            }
+        }
         Selectable authorSelectable = headSelectable.xpath("//ul").nodes().get(1).select(new XpathSelector("li[@class='c-article-author-list__item']"));
         List<Selectable> authorNodes = authorSelectable.nodes();
         StringBuffer authorName = new StringBuffer();
--
libgit2 0.23.3