Move DwdScraper to a class

This commit is contained in:
clerie 2022-02-28 17:02:01 +01:00
parent f7cb2ca86e
commit cf620b86bc

View File

@ -7,33 +7,36 @@ import zipfile
import datetime
from config.db import db as config_db
def cleanup_value(v):
class DwdScraper:
def run(self):
def cleanup_value(v):
if int(v) == -999:
return None
return v
conn = psycopg2.connect(config_db["uri"])
cur = conn.cursor()
conn = psycopg2.connect(config_db["uri"])
cur = conn.cursor()
cur.execute("SELECT min(dwd_last_update) FROM stations;")
last_date = cur.fetchall()[0][0]
print(last_date)
cur.execute("SELECT min(dwd_last_update) FROM stations;")
last_date = cur.fetchall()[0][0]
print(last_date)
if not last_date is None:
if not last_date is None:
cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update = %s LIMIT 1;", [last_date])
else:
else:
cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update IS NULL LIMIT 1;")
last_station = cur.fetchone()
print(last_station)
last_station = cur.fetchone()
print(last_station)
curr_station_id = last_station[0]
curr_station_dwd_id = last_station[4]
curr_station_id = last_station[0]
curr_station_dwd_id = last_station[4]
print(curr_station_dwd_id)
r = requests.get('https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/tageswerte_KL_' + str(curr_station_dwd_id) + '_akt.zip', stream=True)
print(r.url)
if r.status_code == 200:
print(curr_station_dwd_id)
r = requests.get('https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/tageswerte_KL_' + str(curr_station_dwd_id) + '_akt.zip', stream=True)
print(r.url)
if r.status_code == 200:
zip = zipfile.ZipFile(io.BytesIO(r.content))
files = zip.namelist()
print(files)
@ -102,9 +105,12 @@ if r.status_code == 200:
else:
print("ignored")
cur.execute("UPDATE stations SET dwd_last_update = %s WHERE id = %s;", [datetime.datetime.today().isoformat(), curr_station_id])
conn.commit()
cur.execute("UPDATE stations SET dwd_last_update = %s WHERE id = %s;", [datetime.datetime.today().isoformat(), curr_station_id])
conn.commit()
cur.close()
conn.close()
cur.close()
conn.close()
if __name__ == "__main__":
DwdScraper().run()