I am new to Python programming, but I have found some different code snippets and have compiled them into the code underneath. The Python script are returning all the right HTML values, from the summary array but no values from the statistics array, because the values don't get matches.
I don't know how to extract the values on the statistics pane on Yahoo Finance. Its referred to as url2, and key_stats_on_stat.
I hope you are willing to help me out.
import os, sys
import csv
from bs4 import BeautifulSoup
import xlsxwriter
import urllib3
from selenium import webdriver
import pdb
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
key_stats_on_main =['Market Cap', 'PE Ratio (TTM)', 'EPS (TTM)']
key_stats_on_stat =['Enterprise Value', 'Trailing P/E', 'Forward P/E',
'PEG Ratio (5 yr expected)', 'Return on Assets', 'Quarterly Revenue Growth',
'EBITDA', 'Diluted EPS', 'Total Debt/Equity', 'Current Ratio']
stocks_arr =[]
pfolio_file= open("stocks.csv", "r")
for line in pfolio_file:
indv_stock_arr = line.strip().split(',')
stocks_arr.append(indv_stock_arr)
print(stocks_arr)
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument("--headless") # Runs Chrome in headless mode.
options.add_argument('--no-sandbox') # Bypass OS security model
options.add_argument('--disable-gpu') # applicable to windows os only
options.add_argument('start-maximized') #
options.add_argument('disable-infobars')
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(options=options, executable_path=r'C:Users""DocumentsPython Scriptschromedriver_win32chromedriver.exe')
driver.get("https://finance.yahoo.com/quote/AMZN/")
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//html/body/div/div/div/form/div/button[2]'))).click()
stock_info_arr = []
for stock in stocks_arr:
stock_info = []
ticker = stock[0]
stock_info.append(ticker)
url = "https://finance.yahoo.com/quote/{0}?p={0}".format(ticker) #Summary
url2 = "https://finance.yahoo.com/quote/{0}/key-statistics?p={0}".format(ticker) #Statistics
driver.get(url)
innerHTML = driver.execute_script("return document.body.innerHTML")
soup = BeautifulSoup(innerHTML, 'html.parser')
for stat in key_stats_on_main:
page_stat1 = soup.find(text=stat)
try:
page_row1 = page_stat1.find_parent('tr')
try:
page_statnum1 = page_row1.find_all('span')[1].contents[1].get_text(strip=True)
print(page_statnum1)
except:
page_statnum1 = page_row1.find_all('td')[1].contents[0].get_text(strip=True)
print(page_statnum1)
except:
print('Invalid parent for this element')
page_statnum1 = "N/A"
stock_info.append(page_statnum1)
driver.get(url2)
innerHTML2 = driver.execute_script("return document.body.innerHTML")
soup2 = BeautifulSoup(innerHTML2, 'html.parser')
for stat in key_stats_on_stat:
page_stat2 = soup2.find(text=stat)
try:
page_row2 = page_stat2.find_parent('tr')
try:
page_statnum2 = page_row2.find_all('span')[1].contents[0].get_text(strip=True)
print(page_statnum2)
except:
page_statnum2 = page_row2.find_all('td')[1].contents[0].get_text(strip=True)
print(page_statnum2)
except:
print('Invalid parent for this element')
page_statnum2 = 'N/A'
stock_info.append(page_statnum2)
stock_info_arr.append(stock_info)
print(stock_info_arr)
########## WRITING OUR RESULTS INTO EXCEL
key_stats_on_main.extend(key_stats_on_stat)
workbook = xlsxwriter.Workbook('Stocks01.xlsx')
worksheet = workbook.add_worksheet()
row = 0
col = 1
for stat in key_stats_on_main:
worksheet.write(row, col, stat)
col +=1
row = 1
col = 0
for our_stock in stock_info_arr:
col = 0
for info_bit in our_stock:
worksheet.write(row, col, info_bit)
col += 1
row += 1
workbook.close()
print('Script completed')
See Question&Answers more detail:
os 与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…