diff --git a/scraper/daily_import_climate.py b/scraper/daily_import_climate.py index 57f6830..cb63c3b 100755 --- a/scraper/daily_import_climate.py +++ b/scraper/daily_import_climate.py @@ -10,24 +10,17 @@ from config.db import db as config_db class DwdScraper: def run(self): - + last_station = self.get_station_to_update() + print("checking station", last_station) with psycopg2.connect(config_db["uri"]) as conn: with conn.cursor() as cur: - cur.execute("SELECT min(dwd_last_update) FROM stations;") - last_date = cur.fetchall()[0][0] - print(last_date) + cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_id = %s;", [last_station]) + current_station = cur.fetchone() - if not last_date is None: - cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update = %s LIMIT 1;", [last_date]) - else: - cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update IS NULL LIMIT 1;") - last_station = cur.fetchone() - print(last_station) - - curr_station_id = last_station[0] - curr_station_dwd_id = last_station[4] + curr_station_id = current_station[0] + curr_station_dwd_id = current_station[4] print(curr_station_dwd_id) r = requests.get('https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/tageswerte_KL_' + str(curr_station_dwd_id) + '_akt.zip', stream=True) @@ -104,6 +97,25 @@ class DwdScraper: cur.execute("UPDATE stations SET dwd_last_update = %s WHERE id = %s;", [datetime.datetime.today().isoformat(), curr_station_id]) conn.commit() + def get_station_to_update(self): + """ + Returns DWD ID of station that should get updated soon. + """ + + with psycopg2.connect(config_db["uri"]) as conn: + with conn.cursor() as cur: + + cur.execute("SELECT min(dwd_last_update) FROM stations;") + last_date = cur.fetchall()[0][0] + + if not last_date is None: + cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update = %s LIMIT 1;", [last_date]) + else: + cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update IS NULL LIMIT 1;") + last_station = cur.fetchone() + + return last_station[4] + def cleanup_value(self, v): if int(v) == -999: return None