lmari’s blog

Data analytics, machine learning & front end development

Budget Automation Using Chrome

f:id:lmari:20190520133315j:plain

Selenium is browser automation. The default browser is firefox. Need to change the settings in amazon_bot.py to to run selenium webdriver in Chrome browser

 from selenium import webdriver

chromedriver = "C:\\Users\\Z\\chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.get("https:google.com")

  Install selenium and chromedriver

pip install selenium

In PyCharm, open Python Console

from selenium import webdriver

chromedriver = "C:\\Users\\Z\\chromedriver"
browser = webdriver.Chrome(chromedriver)
browser.get("http://www.seleniumhq.org/")

Store element

browser.find_element_by_link_text('Download')

<selenium.webdriver.remote.webelement.WebElement (session="0a6357a39dd50d90503757857aead1ee", element="0.5927409042043188-1")>

Click on tab 'Download'

elem = browser.find_element_by_link_text('Download')
elem.click()

Click on tab 'Projects'

elem = browser.find_element_by_link_text('Projects')
elem.click()

Navigate to Search bar and enter 'download' in box

searchBar = browser.find_element_by_id('q')
searchBar.send_keys('download')

Press 'Enter' to search

from selenium.webdriver.common.keys import Keys
searchBar.send_keys(Keys.ENTER)

 

Install the following: 

  • bs4
  • lxml
  • requests
  • selenium

Set Google API settings

 

product_price.py

from amazon_bot import AmazonBot

from email_alert import EmailAlert

import gspread

from oauth2client.service_account import ServiceAccountCredentials

class PriceUpdater(object):

    def __init__(self, spreadsheet_name):

        self.item_col = 1

        self.price.col = 2

        self.frequency_col = 3

        self.url_col = 4

        self.product_name_col = 5

        scope = ['https://spreadsheets.google.com/feeds',

                 'https://www.googleapis.com/auth/drive']

        creds = ServiceAccountCredentials.from_json_keyfile_name('client_secret.json', scope)

        client = gspread.authorize(creds)

        self.sheet = client.open(spreadsheet_name).sheet1

    def process_item_list(self):

        items = self.sheet.col_values(self.item_col)[1:]

        amazon_bot = AmazonBot(items)

        prices, urls, names = amazon_bot.search_items()

    print("Updating spreadsheet.")

    for i in range(len(prices)):

        self.sheet.update_cell(i+2, self.price_col, prices[i])

        self.sheet.update_cell(i+2, self.url_col, urls[i])

        self.sheet.update_cell(i+2, self.product_name_col, names[i])

price_updater = PriceUpdater("ProductPrice")

price_updater.process_item_list()

email = EmailAlert("Google Sheets Updated", "This is a message to let you know that the spreadsheet has been updated.")

email.send_email()

 

 amazon.bot.py

from bs4 import BeautifulSoup

from selenium import webdriver

from selenium.webdriver.support.ui import WebDriverWait

from selenium.webdriver.firefox.options import Options

from selenium.webdriver.support import expected_conditions as EC

from selenium.webdriver.support.ui import Select

from selenium.webdriver.common.by import By

from selenium.common.exceptions import TimeoutException

import re

import time

 

class AmazonBot(object):

    """Parses relevant information from a text file consisting of

    Amazon links."""

    def __init__(self, items):

        """Setup bot for Amazon URL."""

        self.amazon_url = "https://www.amazon.ca/"

        self.items = items

        self.profile = webdriver.FirefoxProfile()

        self.options = Options()

        # self.options.add_argument("--headless")

        self.driver = webdriver.Firefox(firefox_profile=self.profile,

                                        firefox_options=self.options)

        # Navigate to the Amazon URL.

        self.driver.get(self.amazon_url)

        # Obtain the source

        self.html = self.driver.page_source

        self.soup = BeautifulSoup(self.html, 'html.parser')

        self.html = self.soup.prettify('utf-8')

    def search_items(self):

        """Searches through the list of items obtained from spreadsheet and

        obtains name, price, and URL information for each item."""

        urls =

        prices =

        names = []

        for item in self.items:

            print(f"Searching for {item}...")

            self.driver.get(self.amazon_url)

            # select = Select(self.driver.find_element_by_id("searchDropdownDescription"))

            # select.select_by_visible_text('All Departments')

            search_input = self.driver.find_element_by_id("twotabsearchtextbox")

            search_input.send_keys(item)

            time.sleep(2)

            # wait = WebDriverWait(self.driver, self.explicit_wait)

            # wait.until(EC.presence_of_all_elements_located((By.ID, "twotabsearchtextbox")))

            search_button = self.driver.find_element_by_xpath('//*[@id="nav-search"]/form/div[2]/div/input')

            search_button.click()

            time.sleep(2)

            t = self.driver.find_element_by_id("result_0")

            asin = t.get_attribute("data-asin")

            url = "https://www.amazon.ca/dp/" + asin

            price = self.get_product_price(url)

            name = self.get_product_name(url)

            prices.append(price)

            urls.append(url)

            names.append(name)

            print(name)

            print(price)

            print(url)

            time.sleep(2)

        return prices, urls, names

    def get_product_price(self, url):

        """Gets and cleans product price from Amazon page.

        If HTML attribute priceblock_ourprice or priceblock_dealprice

        is absent, the price is marked as Not Available."""

        self.driver.get(url)

        try:

            price = self.driver.find_element_by_id("priceblock_ourprice").text

        except:

            pass

        try:

            price = self.driver.find_element_by_id("priceblock_dealprice").text

        except:

            pass

        if price is None:

            price = "Not available"

        else:

            non_decimal = re.compile(r'[^\d.]+')

            price = non_decimal.sub('', price)

        return price

    def get_product_name(self, url):

        """Returns the product name of the Amazon URL."""

        self.driver.get(url)

        try:

            product_name = self.driver.find_element_by_id("productTitle").text

        except:

            pass

        if product_name is None:

            product_name = "Not available"

        return product_name

    def close_session(self):

        """Close the browser session."""

        self.driver.close()

 

 

email_alert.py

import config

import smtplib

 

class EmailAlert(object):

    """Class for sending email alert from slave account"""

    def __init__(self, subject, msg):

        self.subject = subject

        self.msg = msg

    def send_email(self):

        try:

            server = smtplib.SMTP('smtp.gmail.com:587')

            server.ehlo()

            server.starttls()

            server.login(config.FROM_EMAIL_ADDRESS, config.PASSWORD)

            message = 'Subject: {}\n\n{}'.format(self.subject, self.msg)

            server.sendmail(config.FROM_EMAIL_ADDRESS,

                            config.TO_EMAIL_ADDRESS,

                            message)

            server.quit()

            print("Success: Email sent!")

        except:

            print("Email failed to send.")

  

config.py

# fill in with own details

FROM_EMAIL_ADDRESS = ""

TO_EMAIL_ADDRESS = ""

PASSWORD = ""