RSelenium 爬取 中检院生物制品批签发信息公示表
#java -jar /usr/local/bin/selenium-server-standalone-3.9.1.jar library(RSelenium) library(rvest) library(stringr) library(magrittr) library(tidyverse) remDr <- remoteDriver( remoteServerAddr = "localhost", port = 4444, browserName = "firefox") #打开浏览器 remDr$open() remDr$navigate("http://www.nifdc.org.cn/CL0903/") webElems <- remDr$findElements(using = "partial link text", "中检院生物制品批签发信息公示表") links <- unlist(lapply(webElems, function(e) {e$getElementAttribute("href")})) # 下一页 nextElem <- remDr$findElement(using = "partial link text", "下一页") a.elem <- nextElem$getElementAttribute('href')[[1]] remDr$navigate(a.elem) nextWebElems <- remDr$findElements(using = "partial link text", "中检院生物制品批签发信息公示表") links <- c(links,unlist(lapply(nextWebElems, function(e) {e$getElementAttribute("href")}))) dataframe <-data.frame() for (i in 1:length(links)) { re