Move DwdScraper to a class

This commit is contained in:
clerie 2022-02-28 17:02:01 +01:00
parent f7cb2ca86e
commit cf620b86bc
1 changed files with 98 additions and 92 deletions

View File

@ -7,33 +7,36 @@ import zipfile
import datetime import datetime
from config.db import db as config_db from config.db import db as config_db
def cleanup_value(v): class DwdScraper:
def run(self):
def cleanup_value(v):
if int(v) == -999: if int(v) == -999:
return None return None
return v return v
conn = psycopg2.connect(config_db["uri"]) conn = psycopg2.connect(config_db["uri"])
cur = conn.cursor() cur = conn.cursor()
cur.execute("SELECT min(dwd_last_update) FROM stations;") cur.execute("SELECT min(dwd_last_update) FROM stations;")
last_date = cur.fetchall()[0][0] last_date = cur.fetchall()[0][0]
print(last_date) print(last_date)
if not last_date is None: if not last_date is None:
cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update = %s LIMIT 1;", [last_date]) cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update = %s LIMIT 1;", [last_date])
else: else:
cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update IS NULL LIMIT 1;") cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update IS NULL LIMIT 1;")
last_station = cur.fetchone() last_station = cur.fetchone()
print(last_station) print(last_station)
curr_station_id = last_station[0] curr_station_id = last_station[0]
curr_station_dwd_id = last_station[4] curr_station_dwd_id = last_station[4]
print(curr_station_dwd_id) print(curr_station_dwd_id)
r = requests.get('https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/tageswerte_KL_' + str(curr_station_dwd_id) + '_akt.zip', stream=True) r = requests.get('https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/tageswerte_KL_' + str(curr_station_dwd_id) + '_akt.zip', stream=True)
print(r.url) print(r.url)
if r.status_code == 200: if r.status_code == 200:
zip = zipfile.ZipFile(io.BytesIO(r.content)) zip = zipfile.ZipFile(io.BytesIO(r.content))
files = zip.namelist() files = zip.namelist()
print(files) print(files)
@ -102,9 +105,12 @@ if r.status_code == 200:
else: else:
print("ignored") print("ignored")
cur.execute("UPDATE stations SET dwd_last_update = %s WHERE id = %s;", [datetime.datetime.today().isoformat(), curr_station_id]) cur.execute("UPDATE stations SET dwd_last_update = %s WHERE id = %s;", [datetime.datetime.today().isoformat(), curr_station_id])
conn.commit() conn.commit()
cur.close() cur.close()
conn.close() conn.close()
if __name__ == "__main__":
DwdScraper().run()