from __future__ import annotations import csv import http.client from urllib import request from dataclasses import dataclass from collections.abc import Iterable import psycopg2 from psycopg2 import sql @dataclass class ReservoirDayData: """A Representation of a single row of data from waterdatafortexas for Canyon Lake Data is kept as `str` types to make it easier for export. Very little or no actual parsing of the data should be done. Attributes: date: YYYY-MM-DD, when the data was captured water_level: feet above vertical datum surface_area: water coverage of the lake in acres reservoir_storage: actual storage at measured lake elevation conservation_storage: reservoir storage - dead pool capacity (note: conservation storage is capped at conservation capacity) percent_full: 100 * conservation storage/conservation capacity conservation_capacity: storage at conservation pool elevation - dead pool capacity dead_pool_capacity: storage at dead pool elevation """ date: str water_level: float | None surface_area: float | None reservoir_storage: float conservation_storage: float percent_full: float conservation_capacity: float dead_pool_capacity: float @classmethod def from_csv_data(cls, data: list[str]) -> Iterable[ReservoirDayData]: # Strip comment lines from CSV data = [row for row in data if not row.startswith("#")] for row in csv.DictReader(data, delimiter=",", strict=True): date = row["date"] water_level = float(row["water_level"]) if row["water_level"] else None surface_area = float(row["surface_area"]) if row["surface_area"] else None reservoir_storage = float(row["reservoir_storage"]) conservation_storage = float(row["conservation_storage"]) percent_full = float(row["percent_full"]) conservation_capacity = float(row["conservation_capacity"]) dead_pool_capacity = float(row["dead_pool_capacity"]) yield ReservoirDayData( date, water_level, surface_area, reservoir_storage, conservation_storage, percent_full, conservation_capacity, dead_pool_capacity, ) def save_to_db(self, conn: psycopg2.extensions.connection): with conn.cursor() as cur: cur.execute( sql.SQL( "INSERT INTO {} values (%s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT DO NOTHING" ).format(sql.Identifier("waterdata")), [ self.date, self.water_level, self.surface_area, self.reservoir_storage, self.conservation_storage, self.percent_full, self.conservation_capacity, self.dead_pool_capacity, ], ) conn.commit() @classmethod def scrape( cls, url: str = "https://www.waterdatafortexas.org/reservoirs/individual/canyon.csv", ): data: http.client.HTTPResponse with request.urlopen(url) as data: return cls.from_csv_data([row.decode("utf-8") for row in data.readlines()]) if __name__ == "__main__": for row in ReservoirDayData.scrape(): print(row)