mirror of
https://github.com/mkotok/scraper
synced 2024-12-23 04:04:57 -06:00
initial commit
This commit is contained in:
commit
d781928cba
5 changed files with 199 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
# don't share configuration settings
|
||||
config.py*
|
4
README.md
Normal file
4
README.md
Normal file
|
@ -0,0 +1,4 @@
|
|||
# Scraper
|
||||
|
||||
This code is used to scrape the my.chevrolet.com
|
||||
site and retrieve information on my Chevy Bolt.
|
122
scraper.py
Normal file
122
scraper.py
Normal file
|
@ -0,0 +1,122 @@
|
|||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import NoSuchElementException
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
|
||||
from config import driver_type, driver_path, username, password
|
||||
|
||||
|
||||
def main():
|
||||
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
# create webdriver based on config
|
||||
# TODO: Add cmdline arguments for these?
|
||||
if driver_type == "chrome":
|
||||
driver = webdriver.Chrome(driver_path)
|
||||
elif driver_type == "phantomjs":
|
||||
driver = webdriver.PhantomJS(driver_path)
|
||||
else:
|
||||
raise RuntimeError("Unknown driver type: %s" % driver_type)
|
||||
|
||||
# log in to my.chevrolet.com
|
||||
driver.get("https://my.chevrolet.com/login")
|
||||
driver.find_element_by_id("Login_Username").send_keys(username)
|
||||
driver.find_element_by_id("Login_Password").send_keys(password)
|
||||
driver.find_element_by_id("Login_Button").click()
|
||||
|
||||
# get status elements within certain time parameters/retries
|
||||
retries = 5
|
||||
timeout = 3*60 # seconds
|
||||
args = []
|
||||
for _ in range(retries):
|
||||
try:
|
||||
args = WebDriverWait(driver, timeout).until(find_status_elements)
|
||||
except TimeoutException:
|
||||
print "Timeout searching for elements -- refreshing..."
|
||||
driver.refresh()
|
||||
else:
|
||||
if len(args) == 3:
|
||||
print "Found status elements"
|
||||
break
|
||||
else:
|
||||
print "Error boxes found -- refreshing..."
|
||||
driver.refresh()
|
||||
|
||||
# we should have 3 elements of information
|
||||
if len(args) != 3:
|
||||
print "Unable to get stats"
|
||||
cleanup(driver, script_dir)
|
||||
return 1
|
||||
|
||||
# parse elements
|
||||
status_box, status_right, info_table = args
|
||||
stats = []
|
||||
|
||||
# get current charge
|
||||
lines = status_box.text.splitlines()
|
||||
stats.append(lines[2])
|
||||
|
||||
# get estimated electric and total range
|
||||
lines = status_right.text.splitlines()
|
||||
for s in ['Estimated Electric Range:',
|
||||
'Estimated Total Range:']:
|
||||
idx = lines.index(s) + 1
|
||||
match = re.search(r"[\d,]+", lines[idx]).group()
|
||||
stats.append(match.replace(',', ''))
|
||||
|
||||
# get vehicle info table stats
|
||||
lines = info_table.text.splitlines()
|
||||
for s in ['Electric Miles',
|
||||
'Electric Economy',
|
||||
'Electricity Used',
|
||||
'Est Fuel Saved',
|
||||
'Est. CO2 Avoided']:
|
||||
idx = lines.index(s) + 1
|
||||
match = re.search(r"[\d,]+", lines[idx]).group()
|
||||
stats.append(match.replace(',', ''))
|
||||
|
||||
# create stats string
|
||||
stats.insert(0, str(datetime.now()))
|
||||
stats = ','.join(stats)
|
||||
print stats
|
||||
|
||||
# write stats to csv
|
||||
csv = os.path.join(script_dir, "stats.csv")
|
||||
open(csv, 'a').write(stats + '\n')
|
||||
|
||||
# clean up and return
|
||||
cleanup(driver, script_dir)
|
||||
return 0
|
||||
|
||||
|
||||
def cleanup(driver, script_dir):
|
||||
"""function to properly close driver and remove log files"""
|
||||
driver.close()
|
||||
driver.quit()
|
||||
|
||||
ghostdriverlog = os.path.join(script_dir, "ghostdriver.log")
|
||||
if os.path.isfile(ghostdriverlog):
|
||||
os.remove(ghostdriverlog)
|
||||
|
||||
|
||||
def find_status_elements(driver):
|
||||
"""function to check if status elements or error boxes have loaded"""
|
||||
try:
|
||||
status_box = driver.find_element_by_class_name("status-box")
|
||||
status_right = driver.find_element_by_class_name("status-right")
|
||||
info_table = driver.find_element_by_class_name("panel-vehicle-info-table")
|
||||
return status_box, status_right, info_table
|
||||
except NoSuchElementException:
|
||||
# return error elements if any
|
||||
error_selector = "[data-cms-content-id='vehicleProfile_EV_error1']"
|
||||
errors = driver.find_elements_by_css_selector(error_selector)
|
||||
return errors
|
||||
|
||||
|
||||
###########################################################################
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
5
scraper.sh
Executable file
5
scraper.sh
Executable file
|
@ -0,0 +1,5 @@
|
|||
#!/bin/bash
|
||||
echo "Cron job started at `date`"
|
||||
cd $( dirname "${BASH_SOURCE[0]}" )
|
||||
python scraper.py
|
||||
echo "Cron job completed at `date`"
|
66
stats.csv
Normal file
66
stats.csv
Normal file
|
@ -0,0 +1,66 @@
|
|||
datetime,current charge,est electric range,est total range,mileage,economy (Kwh/100mi),electricity used (Kwh),fuel saved (gal),CO2 avoided (lbs)
|
||||
2017-12-23 22:00:50.815358,85%,129,129,8106,36,129,312,6056
|
||||
2017-12-24 00:01:35.472697,85%,129,129,8106,36,129,312,6056
|
||||
2017-12-24 14:32:59.930174,100%,149,175,8106,36,175,312,6056
|
||||
2017-12-24 14:51:34.385776,100%,149,175,8106,36,175,312,6056
|
||||
2017-12-24 14:57:43.538692,100%,149,175,8106,36,175,312,6056
|
||||
2017-12-25 00:00:51.112911,87%,132,155,8127,36,155,313,6073
|
||||
2017-12-26 00:01:59.124346,72%,108,108,8170,36,108,315,6105
|
||||
2017-12-27 00:01:41.827139,87%,124,146,8196,36,146,316,6124
|
||||
2017-12-28 00:01:52.348759,100%,145,116,8196,36,116,316,6124
|
||||
2017-12-29 00:01:09.185208,82%,118,139,8217,36,139,316,6140
|
||||
2017-12-30 00:01:39.970757,93%,135,135,8224,36,135,317,6145
|
||||
2017-12-31 00:01:56.921793,88%,128,151,8248,36,151,318,6163
|
||||
2018-01-01 00:01:37.816503,72%,95,77,8279,36,77,319,6186
|
||||
2018-01-02 00:02:08.587081,76%,104,104,8297,36,104,320,6200
|
||||
2018-01-16 00:02:27.748025,87%,111,130,8324,36,130,321,6220
|
||||
2018-01-17 00:01:16.531126,88%,118,140,8351,37,140,322,6240
|
||||
2018-01-18 00:01:31.510746,89%,115,135,8381,37,135,323,6263
|
||||
2018-01-19 00:01:51.407200,89%,117,87,8408,37,87,324,6283
|
||||
2018-01-20 00:01:42.204268,82%,118,139,8450,37,139,325,6314
|
||||
2018-01-21 00:01:52.014376,83%,125,147,8481,37,147,327,6337
|
||||
2018-01-22 00:01:15.165274,100%,150,177,8481,37,177,327,6337
|
||||
2018-01-23 00:01:38.206684,85%,125,101,8508,37,101,328,6357
|
||||
2018-01-25 00:01:59.048981,88%,126,148,8561,37,148,330,6398
|
||||
2018-01-26 00:01:32.308935,87%,123,90,8591,37,90,331,6420
|
||||
2018-01-27 00:02:14.534221,94%,139,113,8603,37,113,331,6429
|
||||
2018-01-28 00:01:16.763659,83%,129,129,8632,37,129,332,6450
|
||||
2018-01-29 00:01:48.056085,90%,139,165,8647,37,165,333,6462
|
||||
2018-01-30 00:02:12.143807,87%,131,100,8674,37,100,334,6482
|
||||
2018-02-01 00:01:07.243458,87%,126,148,8730,37,148,336,6524
|
||||
2018-02-03 00:01:15.883892,100%,141,114,8800,37,114,339,6577
|
||||
2018-02-04 11:32:14.507617,100%,152,98,8846,37,98,341,6611
|
||||
2018-02-04 17:42:19.736736,96%,146,172,8854,37,172,341,6617
|
||||
2018-02-05 00:02:00.130482,90%,137,112,8862,37,112,341,6623
|
||||
2018-02-05 19:14:54.355435,79%,114,114,8889,37,114,342,6643
|
||||
2018-02-06 00:11:55.708906,90%,129,129,8889,37,129,342,6643
|
||||
2018-02-07 00:06:01.968330,89%,129,129,8917,37,129,344,6664
|
||||
2018-02-08 00:01:58.399934,88%,127,127,8944,37,127,345,6684
|
||||
2018-02-09 00:01:57.828682,88%,121,89,8971,37,89,346,6704
|
||||
2018-02-10 00:02:23.995437,85%,121,142,9002,37,142,347,6728
|
||||
2018-02-12 00:02:24.714102,85%,125,125,9052,38,125,349,6765
|
||||
2018-02-13 00:01:36.408885,87%,127,97,9079,38,97,350,6785
|
||||
2018-02-14 00:02:06.356713,87%,126,96,9105,38,96,351,6805
|
||||
2018-02-15 00:02:20.208228,84%,125,125,9133,38,125,352,6826
|
||||
2018-02-16 00:01:55.983832,84%,135,159,9162,38,159,353,6848
|
||||
2018-02-17 00:01:49.713725,100%,159,159,9189,38,159,354,6868
|
||||
2018-02-18 00:01:36.852508,84%,134,109,9214,38,109,355,6886
|
||||
2018-02-19 00:01:52.219627,86%,135,107,9242,38,107,356,6907
|
||||
2018-02-20 00:01:50.387337,84%,134,159,9269,38,159,357,6928
|
||||
2018-02-21 00:02:07.799953,87%,145,171,9296,38,171,358,6948
|
||||
2018-02-22 00:02:29.549972,86%,147,173,9323,38,173,359,6968
|
||||
2018-02-23 00:02:14.454823,86%,141,141,9352,38,141,360,6990
|
||||
2018-02-24 00:02:37.258386,82%,139,165,9404,38,165,362,7029
|
||||
2018-02-25 00:02:01.363765,100%,169,138,9404,38,138,362,7029
|
||||
2018-02-26 00:02:42.806172,100%,168,198,9404,38,198,362,7029
|
||||
2018-02-27 00:01:56.636890,85%,142,142,9435,38,142,364,7052
|
||||
2018-02-28 00:02:34.331466,86%,151,178,9463,38,178,365,7073
|
||||
2018-03-01 00:02:05.608700,81%,142,108,9505,38,108,366,7104
|
||||
2018-03-03 00:01:59.992545,84%,142,142,9578,38,142,369,7159
|
||||
2018-03-04 00:01:36.835058,85%,143,143,9603,38,143,370,7178
|
||||
2018-03-05 00:01:38.521116,86%,141,141,9643,38,141,372,7208
|
||||
2018-03-06 00:01:31.448443,87%,138,138,9670,38,138,373,7228
|
||||
2018-03-07 00:01:35.393821,84%,137,137,9697,38,137,374,7248
|
||||
2018-03-08 00:02:13.640529,80%,130,153,9738,38,153,375,7279
|
||||
2018-03-09 00:01:29.217477,85%,134,158,9767,38,158,376,7301
|
||||
2018-03-10 00:01:23.649098,85%,136,136,9813,38,136,378,7335
|
|
Loading…
Reference in a new issue