python selenium

七月 13, 2019

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time
import pandas as pd

driver = webdriver.Firefox()
driver.get('https://www.google.com/')

#https://news.google.cn
#https://scholar.google.com/

q = driver.find_element_by_name('q')
q.send_keys('疫苗')
q.send_keys(Keys.RETURN)

#time.sleep(5)
titles=[]
links=[]

for p in range(3):
soup = BeautifulSoup(driver.page_source, 'lxml')
for element in soup.select("#rso div.r a"):
#print(ele.text)
for i in element.select('h3'):
titles.append(i.text)
links.append(element['href'])

time.sleep(1)
driver.find_element_by_link_text('下一頁').click()

for a in soup.find_all('a', href=True):
print(a['href'])

soup.find_all('h3')

all_href = soup.find_all('a',href=True)
all_href = [l['href'] for l in all_href]

搜索此博客

xuefliang

python selenium

评论

发表评论

此博客中的热门博文

windows 命令行下查看端口占用情况的方法

Android 7.0 开启Google Now

Rstudio 使用代理

python selenium

评论

发表评论

此博客中的热门博文

windows 命令行下 查看端口占用情况的方法

Android 7.0 开启Google Now

Rstudio 使用代理

windows 命令行下查看端口占用情况的方法