python selenium

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time
import pandas as pd

driver = webdriver.Firefox()
driver.get('https://www.google.com/')

#https://news.google.cn
#https://scholar.google.com/

q = driver.find_element_by_name('q')
q.send_keys('疫苗')
q.send_keys(Keys.RETURN)

#time.sleep(5)
titles=[]
links=[]

for p in range(3):
soup = BeautifulSoup(driver.page_source, 'lxml')
for element in soup.select("#rso div.r a"):
#print(ele.text)
for i in element.select('h3'):
titles.append(i.text)
links.append(element['href'])

time.sleep(1)
driver.find_element_by_link_text('下一頁').click()


for a in soup.find_all('a', href=True):
print(a['href'])

soup.find_all('h3')

all_href = soup.find_all('a',href=True)
all_href = [l['href'] for l in all_href]


评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

Rstudio 使用代理