Commit e8c669c786fc7019e541cde43255eb7b3efeec0a

Authored by 谢茂盛
1 parent 432582f2

feat:

1、selenium 整合
2、science 网站
src/main/java/com/canrd/webmagic/common/utils/KeywordUtil.java
@@ -10,7 +10,7 @@ import java.util.List; @@ -10,7 +10,7 @@ import java.util.List;
10 * @version: 1.0 10 * @version: 1.0
11 */ 11 */
12 public class KeywordUtil { 12 public class KeywordUtil {
13 - public static final String WORDS = "Aluminum foil,Nickel foam,electrolyte,Activated carbon,sodium ion battery,nickel cobalt manganese,binder,conductive carbon black,high voltage lithium nickel manganese oxide,lithium nickel cobalt manganese oxide,high purity manganese dioxide,lithium cobalt oxide,lithium sulfur battery cathode,lithium vanadium phosphate,lithium manganese oxide,graphite,carbon-coated lithium titanate,isolation film,titanium foil,tin foil,zinc foil,weighing paper,Jintang Microporous filter film,sterile filter film,Microporous filter film,Glass rod,glass funnel,high shape weighing bottle,graphene,sodium ferric phosphate phosphate,electrode sheet,sodium electrohard carbon electrode sheet,sodium battery dry cell,lithium battery dry cell,metal negative dry cell,solid electrolyte,solid electrolyte powder,solid electrolyte sheet,titanium aluminum lithium phosphate solid electrolyte,sodium electrohard carbon,dioxopentyl ring,silicon carbon button electrolyte,PTFE powder,sodium alginate,Nanofiber,copper foil,nickel foil,stack assembly machine,gas diffusion electrode test fixture,fixture,flow battery test bench,electrolytic water test bench,catalyst,supercapacitor dry electrode sheet,positive electrode material,negative electrode material,Silver Oxide Battery,Alkaline Battery,Lithium Polymer Battery,Solid State Battery,Lead Acid Battery,NiMH Battery,Lithium-ion Battery,lithium battery,sodium battery,zinc battery,nickel-cadmium cell,zinc-manganese battery,smee cell,fuel cell,solar cell,copper zinc cell,zinc iron cell,alkaline battery,Zinc-carbon battery,lithium metal battery"; 13 + public static final String WORDS = "Aluminum foil,Nickel foam,electrolyte,Activated carbon,sodium ion battery,nickel cobalt manganese,binder,conductive carbon black,high voltage lithium nickel manganese oxide,lithium nickel cobalt manganese oxide,high purity manganese dioxide,lithium cobalt oxide,lithium sulfur battery cathode,lithium vanadium phosphate,lithium manganese oxide,graphite,carbon-coated lithium titanate,isolation film,titanium foil,tin foil,zinc foil,weighing paper,Jintang Microporous filter film,sterile filter film,Microporous filter film,Glass rod,glass funnel,high shape weighing bottle,graphene,sodium ferric phosphate phosphate,electrode sheet,sodium electrohard carbon electrode sheet,sodium battery dry cell,lithium battery dry cell,metal negative dry cell,solid electrolyte,solid electrolyte powder,solid electrolyte sheet,titanium aluminum lithium phosphate solid electrolyte,sodium electrohard carbon,dioxopentyl ring,silicon carbon button electrolyte,PTFE powder,sodium alginate,Nanofiber,copper foil,nickel foil,stack assembly machine,gas diffusion electrode test fixture,fixture,flow battery test bench,electrolytic water test bench,catalyst,supercapacitor dry electrode sheet,positive electrode material,negative electrode material,Silver Oxide Battery,Alkaline Battery,Lithium Polymer Battery,Solid State Battery,Lead Acid Battery,NiMH Battery,Lithium-ion Battery,lithium battery,sodium battery,zinc battery,nickel-cadmium cell,zinc-manganese battery,smee cell,fuel cell,solar cell,copper zinc cell,zinc iron cell,alkaline battery,Zinc-carbon battery,lithium metal battery,battery,Developing";
14 14
15 /** 15 /**
16 * @return 16 * @return
@@ -25,6 +25,9 @@ public class KeywordUtil { @@ -25,6 +25,9 @@ public class KeywordUtil {
25 * @return 25 * @return
26 */ 26 */
27 public static boolean containKeywordsInTitle(String title) { 27 public static boolean containKeywordsInTitle(String title) {
  28 + if (StringUtils.isBlank(title)) {
  29 + return false;
  30 + }
28 List<String> keywords = getKeyWordList(); 31 List<String> keywords = getKeyWordList();
29 for (String word : keywords) { 32 for (String word : keywords) {
30 if (title.contains(word)) { 33 if (title.contains(word)) {
src/main/java/com/canrd/webmagic/processor/Science4JournalSearchPageProcessor.java
@@ -75,7 +75,7 @@ public class Science4JournalSearchPageProcessor implements PageProcessor { @@ -75,7 +75,7 @@ public class Science4JournalSearchPageProcessor implements PageProcessor {
75 String title = nodes.get(i).xpath("//div[@class='card pb-3 mb-4 border-bottom']/div").xpath("//div[@class='d-flex justify-content-between align-items-end']/div/span/h2/a/text()").get(); 75 String title = nodes.get(i).xpath("//div[@class='card pb-3 mb-4 border-bottom']/div").xpath("//div[@class='d-flex justify-content-between align-items-end']/div/span/h2/a/text()").get();
76 String time = nodes.get(i).xpath("//div[@class='card-meta align-middle mb-2 text-uppercase text-darker-gray']/span").nodes().get(2).xpath("//time/text()").get(); 76 String time = nodes.get(i).xpath("//div[@class='card-meta align-middle mb-2 text-uppercase text-darker-gray']/span").nodes().get(2).xpath("//time/text()").get();
77 String link = nodes.get(i).links().get(); 77 String link = nodes.get(i).links().get();
78 - if (!KeywordUtil.containKeywordsInTitle(title)) { 78 + if (KeywordUtil.containKeywordsInTitle(title)) {
79 SimpleDateFormat formatter = new SimpleDateFormat("dd MMMM yyyy", Locale.ENGLISH); 79 SimpleDateFormat formatter = new SimpleDateFormat("dd MMMM yyyy", Locale.ENGLISH);
80 try { 80 try {
81 Date publishTimeDateTime = formatter.parse(time); 81 Date publishTimeDateTime = formatter.parse(time);