Ekaropolus 0eb2b393f2
All checks were successful
continuous-integration/drone/push Build is passing
SAMI Functionality add
2025-09-16 16:18:45 -06:00

118 lines
4.5 KiB
Python

from __future__ import annotations
from pathlib import Path
from typing import List, Dict, Any
import pandas as pd
from django.conf import settings
from .base import DataProvider
class CsvDataProvider(DataProvider):
"""
Simple provider reading local CSVs under BASE_DIR/data/.
Expected layout (current):
SAMI:
data/sami/population.csv -> cols: city, N
data/sami/{indicator}.csv -> cols: city, value
Sites (competition POIs - DENUE-like):
data/denue/{city}_{business}.csv -> cols: name, lat, lon, category (name/category optional)
Sites (demand pop grid):
data/popgrid/{city}_grid.csv -> cols: cell_id, lat, lon, pop
"""
def __init__(self, base_dir: str | Path | None = None):
self.base_dir = Path(base_dir) if base_dir else Path(settings.BASE_DIR) / "data"
def _exists(self, *parts: str) -> bool:
return (self.base_dir.joinpath(*parts)).exists()
# ---------- Common ----------
def health(self) -> Dict[str, Any]:
missing = []
# We can only check basics here.
if not self._exists("sami", "population.csv"):
missing.append("data/sami/population.csv")
ok = len(missing) == 0
return {
"provider": "csv-data",
"ok": ok,
"base_dir": str(self.base_dir),
"missing": missing,
}
# ---------- SAMI ----------
def indicator(self, indicator: str, cities: List[str]) -> pd.DataFrame:
pop_path = self.base_dir / "sami" / "population.csv"
ind_path = self.base_dir / "sami" / f"{indicator}.csv"
pop = pd.read_csv(pop_path) # cols: city, N
ind = pd.read_csv(ind_path) # cols: city, value
df = pd.merge(ind, pop, on="city", how="inner")
if cities:
df = df[df["city"].isin(cities)].copy()
# Ensure numeric
df["value"] = pd.to_numeric(df["value"], errors="coerce")
df["N"] = pd.to_numeric(df["N"], errors="coerce")
df = df.dropna(subset=["value", "N"])
return df[["city", "value", "N"]]
# ---------- Sites: competition (POIs) ----------
def denue(self, city: str, business: str) -> pd.DataFrame:
"""
Reads POIs from data/denue/{city}_{business}.csv
Expected columns:
- lat (float), lon (float)
- name (str, optional), category (str, optional)
"""
path = self.base_dir / "denue" / f"{city}_{business}.csv"
if not path.exists():
return pd.DataFrame(columns=["name", "lat", "lon", "category"])
df = pd.read_csv(path)
# minimal columns
if "lat" not in df.columns or "lon" not in df.columns:
return pd.DataFrame(columns=["name", "lat", "lon", "category"])
# quick cleaning
df["lat"] = pd.to_numeric(df["lat"], errors="coerce")
df["lon"] = pd.to_numeric(df["lon"], errors="coerce")
df = df.dropna(subset=["lat", "lon"]).copy()
if "name" not in df.columns:
df["name"] = None
if "category" not in df.columns:
df["category"] = business
return df[["name", "lat", "lon", "category"]]
# ---------- Sites: demand (population grid) ----------
def popgrid(self, city: str) -> pd.DataFrame:
"""
Loads population grid points from data/popgrid/{city}_grid.csv
Required columns: lat, lon, pop
Optional: cell_id
"""
path = self.base_dir / "popgrid" / f"{city}_grid.csv"
if not path.exists():
return pd.DataFrame(columns=["cell_id", "lat", "lon", "pop"])
df = pd.read_csv(path)
for col in ["lat", "lon", "pop"]:
if col not in df.columns:
return pd.DataFrame(columns=["cell_id", "lat", "lon", "pop"])
# numeric & drop invalid
df["lat"] = pd.to_numeric(df["lat"], errors="coerce")
df["lon"] = pd.to_numeric(df["lon"], errors="coerce")
df["pop"] = pd.to_numeric(df["pop"], errors="coerce")
df = df.dropna(subset=["lat", "lon", "pop"]).copy()
if "cell_id" not in df.columns:
df["cell_id"] = None
return df[["cell_id", "lat", "lon", "pop"]]
# ---------- Optional: city boundary ----------
def city_boundary(self, city: str) -> Dict[str, Any]:
# Not implemented yet; return empty dict.
return {}
# ---------- Backwards compatibility alias ----------
# Some earlier code used "grid(city)" for population grid.
def grid(self, city: str) -> pd.DataFrame:
return self.popgrid(city)