From d781928cba2eaca66e594642b313f68b43534214 Mon Sep 17 00:00:00 2001 From: Malcolm Kotok Date: Sat, 10 Mar 2018 11:43:10 -0500 Subject: [PATCH] initial commit --- .gitignore | 2 + README.md | 4 ++ scraper.py | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++ scraper.sh | 5 +++ stats.csv | 66 +++++++++++++++++++++++++++++ 5 files changed, 199 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 scraper.py create mode 100755 scraper.sh create mode 100644 stats.csv diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..adaf3df --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# don't share configuration settings +config.py* diff --git a/README.md b/README.md new file mode 100644 index 0000000..e7655f9 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +# Scraper + +This code is used to scrape the my.chevrolet.com +site and retrieve information on my Chevy Bolt. diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..89e9a6d --- /dev/null +++ b/scraper.py @@ -0,0 +1,122 @@ +import os +import re +from datetime import datetime +from selenium import webdriver +from selenium.common.exceptions import NoSuchElementException +from selenium.common.exceptions import TimeoutException +from selenium.webdriver.support.ui import WebDriverWait + +from config import driver_type, driver_path, username, password + + +def main(): + script_dir = os.path.dirname(os.path.realpath(__file__)) + + # create webdriver based on config + # TODO: Add cmdline arguments for these? + if driver_type == "chrome": + driver = webdriver.Chrome(driver_path) + elif driver_type == "phantomjs": + driver = webdriver.PhantomJS(driver_path) + else: + raise RuntimeError("Unknown driver type: %s" % driver_type) + + # log in to my.chevrolet.com + driver.get("https://my.chevrolet.com/login") + driver.find_element_by_id("Login_Username").send_keys(username) + driver.find_element_by_id("Login_Password").send_keys(password) + driver.find_element_by_id("Login_Button").click() + + # get status elements within certain time parameters/retries + retries = 5 + timeout = 3*60 # seconds + args = [] + for _ in range(retries): + try: + args = WebDriverWait(driver, timeout).until(find_status_elements) + except TimeoutException: + print "Timeout searching for elements -- refreshing..." + driver.refresh() + else: + if len(args) == 3: + print "Found status elements" + break + else: + print "Error boxes found -- refreshing..." + driver.refresh() + + # we should have 3 elements of information + if len(args) != 3: + print "Unable to get stats" + cleanup(driver, script_dir) + return 1 + + # parse elements + status_box, status_right, info_table = args + stats = [] + + # get current charge + lines = status_box.text.splitlines() + stats.append(lines[2]) + + # get estimated electric and total range + lines = status_right.text.splitlines() + for s in ['Estimated Electric Range:', + 'Estimated Total Range:']: + idx = lines.index(s) + 1 + match = re.search(r"[\d,]+", lines[idx]).group() + stats.append(match.replace(',', '')) + + # get vehicle info table stats + lines = info_table.text.splitlines() + for s in ['Electric Miles', + 'Electric Economy', + 'Electricity Used', + 'Est Fuel Saved', + 'Est. CO2 Avoided']: + idx = lines.index(s) + 1 + match = re.search(r"[\d,]+", lines[idx]).group() + stats.append(match.replace(',', '')) + + # create stats string + stats.insert(0, str(datetime.now())) + stats = ','.join(stats) + print stats + + # write stats to csv + csv = os.path.join(script_dir, "stats.csv") + open(csv, 'a').write(stats + '\n') + + # clean up and return + cleanup(driver, script_dir) + return 0 + + +def cleanup(driver, script_dir): + """function to properly close driver and remove log files""" + driver.close() + driver.quit() + + ghostdriverlog = os.path.join(script_dir, "ghostdriver.log") + if os.path.isfile(ghostdriverlog): + os.remove(ghostdriverlog) + + +def find_status_elements(driver): + """function to check if status elements or error boxes have loaded""" + try: + status_box = driver.find_element_by_class_name("status-box") + status_right = driver.find_element_by_class_name("status-right") + info_table = driver.find_element_by_class_name("panel-vehicle-info-table") + return status_box, status_right, info_table + except NoSuchElementException: + # return error elements if any + error_selector = "[data-cms-content-id='vehicleProfile_EV_error1']" + errors = driver.find_elements_by_css_selector(error_selector) + return errors + + +########################################################################### + +if __name__ == "__main__": + main() diff --git a/scraper.sh b/scraper.sh new file mode 100755 index 0000000..a8b856c --- /dev/null +++ b/scraper.sh @@ -0,0 +1,5 @@ +#!/bin/bash +echo "Cron job started at `date`" +cd $( dirname "${BASH_SOURCE[0]}" ) +python scraper.py +echo "Cron job completed at `date`" diff --git a/stats.csv b/stats.csv new file mode 100644 index 0000000..72568be --- /dev/null +++ b/stats.csv @@ -0,0 +1,66 @@ +datetime,current charge,est electric range,est total range,mileage,economy (Kwh/100mi),electricity used (Kwh),fuel saved (gal),CO2 avoided (lbs) +2017-12-23 22:00:50.815358,85%,129,129,8106,36,129,312,6056 +2017-12-24 00:01:35.472697,85%,129,129,8106,36,129,312,6056 +2017-12-24 14:32:59.930174,100%,149,175,8106,36,175,312,6056 +2017-12-24 14:51:34.385776,100%,149,175,8106,36,175,312,6056 +2017-12-24 14:57:43.538692,100%,149,175,8106,36,175,312,6056 +2017-12-25 00:00:51.112911,87%,132,155,8127,36,155,313,6073 +2017-12-26 00:01:59.124346,72%,108,108,8170,36,108,315,6105 +2017-12-27 00:01:41.827139,87%,124,146,8196,36,146,316,6124 +2017-12-28 00:01:52.348759,100%,145,116,8196,36,116,316,6124 +2017-12-29 00:01:09.185208,82%,118,139,8217,36,139,316,6140 +2017-12-30 00:01:39.970757,93%,135,135,8224,36,135,317,6145 +2017-12-31 00:01:56.921793,88%,128,151,8248,36,151,318,6163 +2018-01-01 00:01:37.816503,72%,95,77,8279,36,77,319,6186 +2018-01-02 00:02:08.587081,76%,104,104,8297,36,104,320,6200 +2018-01-16 00:02:27.748025,87%,111,130,8324,36,130,321,6220 +2018-01-17 00:01:16.531126,88%,118,140,8351,37,140,322,6240 +2018-01-18 00:01:31.510746,89%,115,135,8381,37,135,323,6263 +2018-01-19 00:01:51.407200,89%,117,87,8408,37,87,324,6283 +2018-01-20 00:01:42.204268,82%,118,139,8450,37,139,325,6314 +2018-01-21 00:01:52.014376,83%,125,147,8481,37,147,327,6337 +2018-01-22 00:01:15.165274,100%,150,177,8481,37,177,327,6337 +2018-01-23 00:01:38.206684,85%,125,101,8508,37,101,328,6357 +2018-01-25 00:01:59.048981,88%,126,148,8561,37,148,330,6398 +2018-01-26 00:01:32.308935,87%,123,90,8591,37,90,331,6420 +2018-01-27 00:02:14.534221,94%,139,113,8603,37,113,331,6429 +2018-01-28 00:01:16.763659,83%,129,129,8632,37,129,332,6450 +2018-01-29 00:01:48.056085,90%,139,165,8647,37,165,333,6462 +2018-01-30 00:02:12.143807,87%,131,100,8674,37,100,334,6482 +2018-02-01 00:01:07.243458,87%,126,148,8730,37,148,336,6524 +2018-02-03 00:01:15.883892,100%,141,114,8800,37,114,339,6577 +2018-02-04 11:32:14.507617,100%,152,98,8846,37,98,341,6611 +2018-02-04 17:42:19.736736,96%,146,172,8854,37,172,341,6617 +2018-02-05 00:02:00.130482,90%,137,112,8862,37,112,341,6623 +2018-02-05 19:14:54.355435,79%,114,114,8889,37,114,342,6643 +2018-02-06 00:11:55.708906,90%,129,129,8889,37,129,342,6643 +2018-02-07 00:06:01.968330,89%,129,129,8917,37,129,344,6664 +2018-02-08 00:01:58.399934,88%,127,127,8944,37,127,345,6684 +2018-02-09 00:01:57.828682,88%,121,89,8971,37,89,346,6704 +2018-02-10 00:02:23.995437,85%,121,142,9002,37,142,347,6728 +2018-02-12 00:02:24.714102,85%,125,125,9052,38,125,349,6765 +2018-02-13 00:01:36.408885,87%,127,97,9079,38,97,350,6785 +2018-02-14 00:02:06.356713,87%,126,96,9105,38,96,351,6805 +2018-02-15 00:02:20.208228,84%,125,125,9133,38,125,352,6826 +2018-02-16 00:01:55.983832,84%,135,159,9162,38,159,353,6848 +2018-02-17 00:01:49.713725,100%,159,159,9189,38,159,354,6868 +2018-02-18 00:01:36.852508,84%,134,109,9214,38,109,355,6886 +2018-02-19 00:01:52.219627,86%,135,107,9242,38,107,356,6907 +2018-02-20 00:01:50.387337,84%,134,159,9269,38,159,357,6928 +2018-02-21 00:02:07.799953,87%,145,171,9296,38,171,358,6948 +2018-02-22 00:02:29.549972,86%,147,173,9323,38,173,359,6968 +2018-02-23 00:02:14.454823,86%,141,141,9352,38,141,360,6990 +2018-02-24 00:02:37.258386,82%,139,165,9404,38,165,362,7029 +2018-02-25 00:02:01.363765,100%,169,138,9404,38,138,362,7029 +2018-02-26 00:02:42.806172,100%,168,198,9404,38,198,362,7029 +2018-02-27 00:01:56.636890,85%,142,142,9435,38,142,364,7052 +2018-02-28 00:02:34.331466,86%,151,178,9463,38,178,365,7073 +2018-03-01 00:02:05.608700,81%,142,108,9505,38,108,366,7104 +2018-03-03 00:01:59.992545,84%,142,142,9578,38,142,369,7159 +2018-03-04 00:01:36.835058,85%,143,143,9603,38,143,370,7178 +2018-03-05 00:01:38.521116,86%,141,141,9643,38,141,372,7208 +2018-03-06 00:01:31.448443,87%,138,138,9670,38,138,373,7228 +2018-03-07 00:01:35.393821,84%,137,137,9697,38,137,374,7248 +2018-03-08 00:02:13.640529,80%,130,153,9738,38,153,375,7279 +2018-03-09 00:01:29.217477,85%,134,158,9767,38,158,376,7301 +2018-03-10 00:01:23.649098,85%,136,136,9813,38,136,378,7335