#! /usr/bin/env python3

import psycopg2
import requests
import io
import zipfile
import datetime
from config.db import db as config_db

class DwdScraper:

    def run(self):
        last_station = self.get_station_to_update()
        print("checking station", last_station)

        with psycopg2.connect(config_db["uri"]) as conn:
            with conn.cursor() as cur:

                cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_id = %s;", [last_station])
                current_station = cur.fetchone()

                curr_station_id = current_station[0]
                curr_station_dwd_id = current_station[4]

                print(curr_station_dwd_id)
                r = requests.get('https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/tageswerte_KL_' + str(curr_station_dwd_id) + '_akt.zip', stream=True)
                print(r.url)
                if r.status_code == 200:
                    zip = zipfile.ZipFile(io.BytesIO(r.content))
                    files = zip.namelist()
                    print(files)
                    files_climate = [f for f in files if f.startswith("produkt_klima_")]
                    print(files_climate)


                    buffer = []

                    is_first_line = True

                    for line in zip.open(files_climate[0]):
                        l = line.decode('utf-8').strip()
                        if is_first_line:
                            is_first_line = False
                            continue

                        b = {}

                        b["dwd_id"], l = l.strip().split(";", 1)
                        b["dwd_id"] = str(b["dwd_id"])
                        b["date"], l = l.strip().split(";", 1)
                        b["date"] = str(b["date"])
                        b["date"] = datetime.date(int(b["date"][0:4]), int(b["date"][4:6]), int(b["date"][6:8]))
                        b["qn_3"], l = l.strip().split(";", 1)
                        b["qn_3"] = self.cleanup_value(int(b["qn_3"]))
                        b["fx"], l = l.strip().split(";", 1)
                        b["fx"] = self.cleanup_value(float(b["fx"]))
                        b["fm"], l = l.strip().split(";", 1)
                        b["fm"] = self.cleanup_value(float(b["fm"]))
                        b["qn_4"], l = l.strip().split(";", 1)
                        b["qn_4"] = self.cleanup_value(int(b["qn_4"]))
                        b["rsk"], l = l.strip().split(";", 1)
                        b["rsk"] = self.cleanup_value(float(b["rsk"]))
                        b["rskf"], l = l.strip().split(";", 1)
                        b["rskf"] = self.cleanup_value(int(b["rskf"]))
                        b["sdk"], l = l.strip().split(";", 1)
                        b["sdk"] = self.cleanup_value(float(b["sdk"]))
                        b["shk_tag"], l = l.strip().split(";", 1)
                        b["shk_tag"] = self.cleanup_value(float(b["shk_tag"]))
                        b["nm"], l = l.strip().split(";", 1)
                        b["nm"] = self.cleanup_value(float(b["nm"]))
                        b["vpm"], l = l.strip().split(";", 1)
                        b["vpm"] = self.cleanup_value(float(b["vpm"]))
                        b["pm"], l = l.strip().split(";", 1)
                        b["pm"] = self.cleanup_value(float(b["pm"]))
                        b["tmk"], l = l.strip().split(";", 1)
                        b["tmk"] = self.cleanup_value(float(b["tmk"]))
                        b["upm"], l = l.strip().split(";", 1)
                        b["upm"] = self.cleanup_value(float(b["upm"]))
                        b["txk"], l = l.strip().split(";", 1)
                        b["txk"] = self.cleanup_value(float(b["txk"]))
                        b["tnk"], l = l.strip().split(";", 1)
                        b["tnk"] = self.cleanup_value(float(b["tnk"]))
                        b["tgk"], l = l.strip().split(";", 1)
                        b["tgk"] = self.cleanup_value(float(b["tgk"]))

                        #print(b)
                        print(curr_station_id, b["date"].isoformat())

                        cur.execute("SELECT id FROM climate WHERE station = %s AND date = %s; ", [curr_station_id, b["date"].isoformat()])
                        if cur.rowcount == 0:
                            cur.execute("INSERT INTO climate (station, date, qn_3, fx, fm, qn_4, rsk, rskf, sdk, shk_tag, nm, vpm, pm, tmk, upm, txk, tnk, tgk) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);",
                            [curr_station_id, b["date"].isoformat(), b["qn_3"], b["fx"], b["fm"], b["qn_4"], b["rsk"], b["rskf"], b["sdk"], b["shk_tag"], b["nm"], b["vpm"], b["pm"], b["tmk"], b["upm"], b["txk"], b["tnk"], b["tgk"]])
                            print("imported")
                        else:
                            print("ignored")

                cur.execute("UPDATE stations SET dwd_last_update = %s WHERE dwd_id = %s;", [datetime.datetime.today().isoformat(), last_station])
                conn.commit()

    def get_station_to_update(self):
        """
            Returns DWD ID of station that should get updated soon.
        """

        with psycopg2.connect(config_db["uri"]) as conn:
            with conn.cursor() as cur:

                cur.execute("SELECT min(dwd_last_update) FROM stations;")
                last_date = cur.fetchall()[0][0]

                if not last_date is None:
                    cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update = %s LIMIT 1;", [last_date])
                else:
                    cur.execute("SELECT id, name, lat, lon, dwd_id, dwd_last_update FROM stations WHERE dwd_last_update IS NULL LIMIT 1;")
                last_station = cur.fetchone()

        return last_station[4]

    def cleanup_value(self, v):
        if int(v) == -999:
            return None

        return v

if __name__ == "__main__":
    DwdScraper().run()