Commit b778962166ad64d6415a914f414cc1db6bbccceb

Authored by 谢茂盛
1 parent a24924df

feat:

1、selenium 整合
2、science 网站
src/main/java/com/canrd/webmagic/processor/Science4JournalSearchPageProcessor.java
@@ -73,7 +73,7 @@ public class Science4JournalSearchPageProcessor implements PageProcessor { @@ -73,7 +73,7 @@ public class Science4JournalSearchPageProcessor implements PageProcessor {
73 for (int i = 0; i <= nodes.size() - 1; i++) { 73 for (int i = 0; i <= nodes.size() - 1; i++) {
74 String title = nodes.get(i).xpath("//div[@class='card pb-3 mb-4 border-bottom']/div").xpath("//div[@class='d-flex justify-content-between align-items-end']/div/span/h2/a/text()").get(); 74 String title = nodes.get(i).xpath("//div[@class='card pb-3 mb-4 border-bottom']/div").xpath("//div[@class='d-flex justify-content-between align-items-end']/div/span/h2/a/text()").get();
75 String time = nodes.get(i).xpath("//div[@class='card-meta align-middle mb-2 text-uppercase text-darker-gray']/span").nodes().get(2).xpath("//time/text()").get(); 75 String time = nodes.get(i).xpath("//div[@class='card-meta align-middle mb-2 text-uppercase text-darker-gray']/span").nodes().get(2).xpath("//time/text()").get();
76 - String link = nodes.get(0).links().get(); 76 + String link = nodes.get(i).links().get();
77 if (!KeywordUtil.containKeywordsInTitle(title)) { 77 if (!KeywordUtil.containKeywordsInTitle(title)) {
78 SimpleDateFormat formatter = new SimpleDateFormat("dd MMMM yyyy", Locale.ENGLISH); 78 SimpleDateFormat formatter = new SimpleDateFormat("dd MMMM yyyy", Locale.ENGLISH);
79 try { 79 try {