from __future__ import annotations from pathlib import Path from typing import List, Dict, Any import pandas as pd from django.conf import settings from .base import DataProvider class CsvDataProvider(DataProvider): """ Simple provider reading local CSVs under BASE_DIR/data/. Expected layout (current): SAMI: data/sami/population.csv -> cols: city, N data/sami/{indicator}.csv -> cols: city, value Sites (competition POIs - DENUE-like): data/denue/{city}_{business}.csv -> cols: name, lat, lon, category (name/category optional) Sites (demand pop grid): data/popgrid/{city}_grid.csv -> cols: cell_id, lat, lon, pop """ def __init__(self, base_dir: str | Path | None = None): self.base_dir = Path(base_dir) if base_dir else Path(settings.BASE_DIR) / "data" def _exists(self, *parts: str) -> bool: return (self.base_dir.joinpath(*parts)).exists() # ---------- Common ---------- def health(self) -> Dict[str, Any]: missing = [] # We can only check basics here. if not self._exists("sami", "population.csv"): missing.append("data/sami/population.csv") ok = len(missing) == 0 return { "provider": "csv-data", "ok": ok, "base_dir": str(self.base_dir), "missing": missing, } # ---------- SAMI ---------- def indicator(self, indicator: str, cities: List[str]) -> pd.DataFrame: pop_path = self.base_dir / "sami" / "population.csv" ind_path = self.base_dir / "sami" / f"{indicator}.csv" pop = pd.read_csv(pop_path) # cols: city, N ind = pd.read_csv(ind_path) # cols: city, value df = pd.merge(ind, pop, on="city", how="inner") if cities: df = df[df["city"].isin(cities)].copy() # Ensure numeric df["value"] = pd.to_numeric(df["value"], errors="coerce") df["N"] = pd.to_numeric(df["N"], errors="coerce") df = df.dropna(subset=["value", "N"]) return df[["city", "value", "N"]] # ---------- Sites: competition (POIs) ---------- def denue(self, city: str, business: str) -> pd.DataFrame: """ Reads POIs from data/denue/{city}_{business}.csv Expected columns: - lat (float), lon (float) - name (str, optional), category (str, optional) """ path = self.base_dir / "denue" / f"{city}_{business}.csv" if not path.exists(): return pd.DataFrame(columns=["name", "lat", "lon", "category"]) df = pd.read_csv(path) # minimal columns if "lat" not in df.columns or "lon" not in df.columns: return pd.DataFrame(columns=["name", "lat", "lon", "category"]) # quick cleaning df["lat"] = pd.to_numeric(df["lat"], errors="coerce") df["lon"] = pd.to_numeric(df["lon"], errors="coerce") df = df.dropna(subset=["lat", "lon"]).copy() if "name" not in df.columns: df["name"] = None if "category" not in df.columns: df["category"] = business return df[["name", "lat", "lon", "category"]] # ---------- Sites: demand (population grid) ---------- def popgrid(self, city: str) -> pd.DataFrame: """ Loads population grid points from data/popgrid/{city}_grid.csv Required columns: lat, lon, pop Optional: cell_id """ path = self.base_dir / "popgrid" / f"{city}_grid.csv" if not path.exists(): return pd.DataFrame(columns=["cell_id", "lat", "lon", "pop"]) df = pd.read_csv(path) for col in ["lat", "lon", "pop"]: if col not in df.columns: return pd.DataFrame(columns=["cell_id", "lat", "lon", "pop"]) # numeric & drop invalid df["lat"] = pd.to_numeric(df["lat"], errors="coerce") df["lon"] = pd.to_numeric(df["lon"], errors="coerce") df["pop"] = pd.to_numeric(df["pop"], errors="coerce") df = df.dropna(subset=["lat", "lon", "pop"]).copy() if "cell_id" not in df.columns: df["cell_id"] = None return df[["cell_id", "lat", "lon", "pop"]] # ---------- Optional: city boundary ---------- def city_boundary(self, city: str) -> Dict[str, Any]: # Not implemented yet; return empty dict. return {} # ---------- Backwards compatibility alias ---------- # Some earlier code used "grid(city)" for population grid. def grid(self, city: str) -> pd.DataFrame: return self.popgrid(city)