Move DwdScraper to a class
This commit is contained in:
parent
f7cb2ca86e
commit
cf620b86bc
@ -7,33 +7,36 @@ import zipfile
|
|||||||
import datetime
|
import datetime
|
||||||
from config.db import db as config_db
|
from config.db import db as config_db
|
||||||
|
|
||||||
def cleanup_value(v):
|
class DwdScraper:
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
def cleanup_value(v):
|
||||||
if int(v) == -999:
|
if int(v) == -999:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return v
|
return v
|
||||||
|
|
||||||
conn = psycopg2.connect(config_db["uri"])
|
conn = psycopg2.connect(config_db["uri"])
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
|
|
||||||
cur.execute("SELECT min(dwd_last_update) FROM stations;")
|
cur.execute("SELECT min(dwd_last_update) FROM stations;")
|
||||||
last_date = cur.fetchall()[0][0]
|
last_date = cur.fetchall()[0][0]
|
||||||
print(last_date)
|
print(last_date)
|
||||||
|
|
||||||
if not last_date is None:
|
if not last_date is None:
|
||||||
cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update = %s LIMIT 1;", [last_date])
|
cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update = %s LIMIT 1;", [last_date])
|
||||||
else:
|
else:
|
||||||
cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update IS NULL LIMIT 1;")
|
cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update IS NULL LIMIT 1;")
|
||||||
last_station = cur.fetchone()
|
last_station = cur.fetchone()
|
||||||
print(last_station)
|
print(last_station)
|
||||||
|
|
||||||
curr_station_id = last_station[0]
|
curr_station_id = last_station[0]
|
||||||
curr_station_dwd_id = last_station[4]
|
curr_station_dwd_id = last_station[4]
|
||||||
|
|
||||||
print(curr_station_dwd_id)
|
print(curr_station_dwd_id)
|
||||||
r = requests.get('https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/tageswerte_KL_' + str(curr_station_dwd_id) + '_akt.zip', stream=True)
|
r = requests.get('https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/tageswerte_KL_' + str(curr_station_dwd_id) + '_akt.zip', stream=True)
|
||||||
print(r.url)
|
print(r.url)
|
||||||
if r.status_code == 200:
|
if r.status_code == 200:
|
||||||
zip = zipfile.ZipFile(io.BytesIO(r.content))
|
zip = zipfile.ZipFile(io.BytesIO(r.content))
|
||||||
files = zip.namelist()
|
files = zip.namelist()
|
||||||
print(files)
|
print(files)
|
||||||
@ -102,9 +105,12 @@ if r.status_code == 200:
|
|||||||
else:
|
else:
|
||||||
print("ignored")
|
print("ignored")
|
||||||
|
|
||||||
cur.execute("UPDATE stations SET dwd_last_update = %s WHERE id = %s;", [datetime.datetime.today().isoformat(), curr_station_id])
|
cur.execute("UPDATE stations SET dwd_last_update = %s WHERE id = %s;", [datetime.datetime.today().isoformat(), curr_station_id])
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
cur.close()
|
cur.close()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
DwdScraper().run()
|
||||||
|
Loading…
Reference in New Issue
Block a user