python selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time
import pandas as pd
driver = webdriver.Firefox()
driver.get('https://www.google.com/')
#https://news.google.cn
#https://scholar.google.com/
q = driver.find_element_by_name('q')
q.send_keys('疫苗')
q.send_keys(Keys.RETURN)
#time.sleep(5)
titles=[]
links=[]
for p in range(3):
soup = BeautifulSoup(driver.page_source, 'lxml')
for element in soup.select("#rso div.r a"):
#print(ele.text)
for i in element.select('h3'):
titles.append(i.text)
links.append(element['href'])
time.sleep(1)
driver.find_element_by_link_text('下一頁').click()
for a in soup.find_all('a', href=True):
print(a['href'])
soup.find_all('h3')
all_href = soup.find_all('a',href=True)
all_href = [l['href'] for l in all_href]
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time
import pandas as pd
driver = webdriver.Firefox()
driver.get('https://www.google.com/')
#https://news.google.cn
#https://scholar.google.com/
q = driver.find_element_by_name('q')
q.send_keys('疫苗')
q.send_keys(Keys.RETURN)
#time.sleep(5)
titles=[]
links=[]
for p in range(3):
soup = BeautifulSoup(driver.page_source, 'lxml')
for element in soup.select("#rso div.r a"):
#print(ele.text)
for i in element.select('h3'):
titles.append(i.text)
links.append(element['href'])
time.sleep(1)
driver.find_element_by_link_text('下一頁').click()
for a in soup.find_all('a', href=True):
print(a['href'])
soup.find_all('h3')
all_href = soup.find_all('a',href=True)
all_href = [l['href'] for l in all_href]
评论
发表评论