Budget Automation Using Chrome
Selenium is browser automation. The default browser is firefox. Need to change the settings in amazon_bot.py to to run selenium webdriver in Chrome browser
from selenium import webdriver
chromedriver = "C:\\Users\\Z\\chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.get("https:google.com")
Install selenium and chromedriver
- Download chromedriver
pip install selenium
In PyCharm, open Python Console
from selenium import webdriver
chromedriver = "C:\\Users\\Z\\chromedriver"
browser = webdriver.Chrome(chromedriver)
browser.get("http://www.seleniumhq.org/")
Store element
browser.find_element_by_link_text('Download')
<selenium.webdriver.remote.webelement.WebElement (session="0a6357a39dd50d90503757857aead1ee", element="0.5927409042043188-1")>
Click on tab 'Download'
elem = browser.find_element_by_link_text('Download')
elem.click()
Click on tab 'Projects'
elem = browser.find_element_by_link_text('Projects')
elem.click()
Navigate to Search bar and enter 'download' in box
searchBar = browser.find_element_by_id('q')
searchBar.send_keys('download')
Press 'Enter' to search
from selenium.webdriver.common.keys import Keys
searchBar.send_keys(Keys.ENTER)
Install the following:
- bs4
- lxml
- requests
- selenium
Set Google API settings
product_price.py
from amazon_bot import AmazonBot
from email_alert import EmailAlert
import gspread
from oauth2client.service_account import ServiceAccountCredentials
class PriceUpdater(object):
def __init__(self, spreadsheet_name):
self.item_col = 1
self.price.col = 2
self.frequency_col = 3
self.url_col = 4
self.product_name_col = 5
scope = ['https://spreadsheets.google.com/feeds',
'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('client_secret.json', scope)
client = gspread.authorize(creds)
self.sheet = client.open(spreadsheet_name).sheet1
def process_item_list(self):
items = self.sheet.col_values(self.item_col)[1:]
amazon_bot = AmazonBot(items)
prices, urls, names = amazon_bot.search_items()
print("Updating spreadsheet.")
for i in range(len(prices)):
self.sheet.update_cell(i+2, self.price_col, prices[i])
self.sheet.update_cell(i+2, self.url_col, urls[i])
self.sheet.update_cell(i+2, self.product_name_col, names[i])
price_updater = PriceUpdater("ProductPrice")
price_updater.process_item_list()
email = EmailAlert("Google Sheets Updated", "This is a message to let you know that the spreadsheet has been updated.")
email.send_email()
amazon.bot.py
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
import re
import time
class AmazonBot(object):
"""Parses relevant information from a text file consisting of
Amazon links."""
def __init__(self, items):
"""Setup bot for Amazon URL."""
self.amazon_url = "https://www.amazon.ca/"
self.items = items
self.profile = webdriver.FirefoxProfile()
self.options = Options()
# self.options.add_argument("--headless")
self.driver = webdriver.Firefox(firefox_profile=self.profile,
firefox_options=self.options)
# Navigate to the Amazon URL.
self.driver.get(self.amazon_url)
# Obtain the source
self.html = self.driver.page_source
self.soup = BeautifulSoup(self.html, 'html.parser')
self.html = self.soup.prettify('utf-8')
def search_items(self):
"""Searches through the list of items obtained from spreadsheet and
obtains name, price, and URL information for each item."""
urls =
prices =
names = []
for item in self.items:
print(f"Searching for {item}...")
self.driver.get(self.amazon_url)
# select = Select(self.driver.find_element_by_id("searchDropdownDescription"))
# select.select_by_visible_text('All Departments')
search_input = self.driver.find_element_by_id("twotabsearchtextbox")
search_input.send_keys(item)
time.sleep(2)
# wait = WebDriverWait(self.driver, self.explicit_wait)
# wait.until(EC.presence_of_all_elements_located((By.ID, "twotabsearchtextbox")))
search_button = self.driver.find_element_by_xpath('//*[@id="nav-search"]/form/div[2]/div/input')
search_button.click()
time.sleep(2)
t = self.driver.find_element_by_id("result_0")
asin = t.get_attribute("data-asin")
url = "https://www.amazon.ca/dp/" + asin
price = self.get_product_price(url)
name = self.get_product_name(url)
prices.append(price)
urls.append(url)
names.append(name)
print(name)
print(price)
print(url)
time.sleep(2)
return prices, urls, names
def get_product_price(self, url):
"""Gets and cleans product price from Amazon page.
If HTML attribute priceblock_ourprice or priceblock_dealprice
is absent, the price is marked as Not Available."""
self.driver.get(url)
try:
price = self.driver.find_element_by_id("priceblock_ourprice").text
except:
pass
try:
price = self.driver.find_element_by_id("priceblock_dealprice").text
except:
pass
if price is None:
price = "Not available"
else:
non_decimal = re.compile(r'[^\d.]+')
price = non_decimal.sub('', price)
return price
def get_product_name(self, url):
"""Returns the product name of the Amazon URL."""
self.driver.get(url)
try:
product_name = self.driver.find_element_by_id("productTitle").text
except:
pass
if product_name is None:
product_name = "Not available"
return product_name
def close_session(self):
"""Close the browser session."""
self.driver.close()
email_alert.py
import config
import smtplib
class EmailAlert(object):
"""Class for sending email alert from slave account"""
def __init__(self, subject, msg):
self.subject = subject
self.msg = msg
def send_email(self):
try:
server = smtplib.SMTP('smtp.gmail.com:587')
server.ehlo()
server.starttls()
server.login(config.FROM_EMAIL_ADDRESS, config.PASSWORD)
message = 'Subject: {}\n\n{}'.format(self.subject, self.msg)
server.sendmail(config.FROM_EMAIL_ADDRESS,
config.TO_EMAIL_ADDRESS,
message)
server.quit()
print("Success: Email sent!")
except:
print("Email failed to send.")
config.py
# fill in with own details
FROM_EMAIL_ADDRESS = ""
TO_EMAIL_ADDRESS = ""
PASSWORD = ""