college/Summer-2024/CS-3443/LakeWatch/LakeWatchScraper/lakewatchscraper/waterdata/reservoir.py

95 lines
3.4 KiB
Python

from __future__ import annotations
import csv
import http.client
from urllib import request
from dataclasses import dataclass
from collections.abc import Iterable
import psycopg2
from psycopg2 import sql
@dataclass
class ReservoirDayData:
"""A Representation of a single row of data from waterdatafortexas for Canyon Lake
Data is kept as `str` types to make it easier for export. Very little or no actual parsing of
the data should be done.
Attributes:
date: YYYY-MM-DD, when the data was captured
water_level: feet above vertical datum
surface_area: water coverage of the lake in acres
reservoir_storage: actual storage at measured lake elevation
conservation_storage: reservoir storage - dead pool capacity (note: conservation storage is capped at conservation capacity)
percent_full: 100 * conservation storage/conservation capacity
conservation_capacity: storage at conservation pool elevation - dead pool capacity
dead_pool_capacity: storage at dead pool elevation
"""
date: str
water_level: float | None
surface_area: float | None
reservoir_storage: float
conservation_storage: float
percent_full: float
conservation_capacity: float
dead_pool_capacity: float
@classmethod
def from_csv_data(cls, data: list[str]) -> Iterable[ReservoirDayData]:
# Strip comment lines from CSV
data = [row for row in data if not row.startswith("#")]
for row in csv.DictReader(data, delimiter=",", strict=True):
date = row["date"]
water_level = float(row["water_level"]) if row["water_level"] else None
surface_area = float(row["surface_area"]) if row["surface_area"] else None
reservoir_storage = float(row["reservoir_storage"])
conservation_storage = float(row["conservation_storage"])
percent_full = float(row["percent_full"])
conservation_capacity = float(row["conservation_capacity"])
dead_pool_capacity = float(row["dead_pool_capacity"])
yield ReservoirDayData(
date,
water_level,
surface_area,
reservoir_storage,
conservation_storage,
percent_full,
conservation_capacity,
dead_pool_capacity,
)
def save_to_db(self, conn: psycopg2.extensions.connection):
with conn.cursor() as cur:
cur.execute(
sql.SQL(
"INSERT INTO {} values (%s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT DO NOTHING"
).format(sql.Identifier("waterdata")),
[
self.date,
self.water_level,
self.surface_area,
self.reservoir_storage,
self.conservation_storage,
self.percent_full,
self.conservation_capacity,
self.dead_pool_capacity,
],
)
conn.commit()
@classmethod
def scrape(
cls,
url: str = "https://www.waterdatafortexas.org/reservoirs/individual/canyon.csv",
):
data: http.client.HTTPResponse
with request.urlopen(url) as data:
return cls.from_csv_data([row.decode("utf-8") for row in data.readlines()])
if __name__ == "__main__":
for row in ReservoirDayData.scrape():
print(row)