diff --git a/src/main/java/com/canrd/webmagic/config/SeleniumConfig.java b/src/main/java/com/canrd/webmagic/config/SeleniumConfig.java index da1ee5c..ff851d4 100644 --- a/src/main/java/com/canrd/webmagic/config/SeleniumConfig.java +++ b/src/main/java/com/canrd/webmagic/config/SeleniumConfig.java @@ -6,6 +6,8 @@ import org.openqa.selenium.chrome.ChromeOptions; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import java.util.Arrays; + /** * @author: xms * @description: TODO @@ -31,6 +33,32 @@ public class SeleniumConfig { // 禁用本地缓存,确保每次访问都从服务器获取 options.addArguments("--disable-application-cache"); + // 禁止策略化 + options.addArguments("--disable-infobars"); + // 解决DevToolsActivePort文件不存在的报错 + options.addArguments("--no-sandbox"); + // 指定浏览器分辨 + options.addArguments("window-size=1920x3000"); + // 谷歌文档提到需要加上这个属性来规避bug + options.addArguments("--disable-gpu"); + // 隐身模式(无痕模式) + options.addArguments("--incognito"); + // 最大化运行(全屏窗口),不设置,取元素会报错 + options.addArguments("--start-maximized"); + // 禁用浏览器正在被自动化程序控制的提示 + options.addArguments("--disable-infobars"); + // 隐藏滚动条, 应对一些特殊页面 + options.addArguments("--hide-scrollbars"); + // 不加载图片, 提升速度 + options.addArguments("blink-settings=imagesEnabled=false"); + // 浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败 + options.addArguments("--headless"); + //禁用 blink 特征 + options.addArguments("disable-blink-features=AutomationControlled"); + options.setExperimentalOption("excludeSwitches", Arrays.asList("enable-automation")); + options.setExperimentalOption("useAutomationExtension", false); + options.addArguments("--remote-allow-origins=*"); + String os_name = System.getProperty("os.name"); // 判断是否是windows系统 if (os_name.toLowerCase().startsWith("win")) { diff --git a/src/main/java/com/canrd/webmagic/processor/download/SeleniumDownloader.java b/src/main/java/com/canrd/webmagic/processor/download/SeleniumDownloader.java index cf7cdda..59bb6aa 100644 --- a/src/main/java/com/canrd/webmagic/processor/download/SeleniumDownloader.java +++ b/src/main/java/com/canrd/webmagic/processor/download/SeleniumDownloader.java @@ -28,7 +28,7 @@ import java.util.Map; @Slf4j @Component public class SeleniumDownloader extends AbstractDownloader { - private int sleepTime = 30; + private int sleepTime = 3000; @Resource private SeleniumConfig config; @@ -67,24 +67,13 @@ public class SeleniumDownloader extends AbstractDownloader { webDriver.get(request.getUrl()); try { if (sleepTime > 0) { + //休眠3秒就是为了动态的数据渲染完成后在进行获取 Thread.sleep(sleepTime); } } catch (InterruptedException e) { e.printStackTrace(); } - - /* - * TODO You can add mouse event or other processes - * - * @author: bob.li.0718@gmail.com - */ - try { - //休眠3秒就是为了动态的数据渲染完成后在进行获取 - Thread.sleep(3000); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } WebElement webElement = webDriver.findElement(By.xpath("/html")); String content = webElement.getAttribute("outerHTML"); page.setDownloadSuccess(true);