118 lines
4.5 KiB
Python
118 lines
4.5 KiB
Python
from __future__ import annotations
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any
|
|
import pandas as pd
|
|
from django.conf import settings
|
|
|
|
from .base import DataProvider
|
|
|
|
|
|
class CsvDataProvider(DataProvider):
|
|
"""
|
|
Simple provider reading local CSVs under BASE_DIR/data/.
|
|
|
|
Expected layout (current):
|
|
SAMI:
|
|
data/sami/population.csv -> cols: city, N
|
|
data/sami/{indicator}.csv -> cols: city, value
|
|
|
|
Sites (competition POIs - DENUE-like):
|
|
data/denue/{city}_{business}.csv -> cols: name, lat, lon, category (name/category optional)
|
|
|
|
Sites (demand pop grid):
|
|
data/popgrid/{city}_grid.csv -> cols: cell_id, lat, lon, pop
|
|
"""
|
|
|
|
def __init__(self, base_dir: str | Path | None = None):
|
|
self.base_dir = Path(base_dir) if base_dir else Path(settings.BASE_DIR) / "data"
|
|
|
|
def _exists(self, *parts: str) -> bool:
|
|
return (self.base_dir.joinpath(*parts)).exists()
|
|
|
|
# ---------- Common ----------
|
|
def health(self) -> Dict[str, Any]:
|
|
missing = []
|
|
# We can only check basics here.
|
|
if not self._exists("sami", "population.csv"):
|
|
missing.append("data/sami/population.csv")
|
|
ok = len(missing) == 0
|
|
return {
|
|
"provider": "csv-data",
|
|
"ok": ok,
|
|
"base_dir": str(self.base_dir),
|
|
"missing": missing,
|
|
}
|
|
|
|
# ---------- SAMI ----------
|
|
def indicator(self, indicator: str, cities: List[str]) -> pd.DataFrame:
|
|
pop_path = self.base_dir / "sami" / "population.csv"
|
|
ind_path = self.base_dir / "sami" / f"{indicator}.csv"
|
|
pop = pd.read_csv(pop_path) # cols: city, N
|
|
ind = pd.read_csv(ind_path) # cols: city, value
|
|
df = pd.merge(ind, pop, on="city", how="inner")
|
|
if cities:
|
|
df = df[df["city"].isin(cities)].copy()
|
|
# Ensure numeric
|
|
df["value"] = pd.to_numeric(df["value"], errors="coerce")
|
|
df["N"] = pd.to_numeric(df["N"], errors="coerce")
|
|
df = df.dropna(subset=["value", "N"])
|
|
return df[["city", "value", "N"]]
|
|
|
|
# ---------- Sites: competition (POIs) ----------
|
|
def denue(self, city: str, business: str) -> pd.DataFrame:
|
|
"""
|
|
Reads POIs from data/denue/{city}_{business}.csv
|
|
Expected columns:
|
|
- lat (float), lon (float)
|
|
- name (str, optional), category (str, optional)
|
|
"""
|
|
path = self.base_dir / "denue" / f"{city}_{business}.csv"
|
|
if not path.exists():
|
|
return pd.DataFrame(columns=["name", "lat", "lon", "category"])
|
|
df = pd.read_csv(path)
|
|
# minimal columns
|
|
if "lat" not in df.columns or "lon" not in df.columns:
|
|
return pd.DataFrame(columns=["name", "lat", "lon", "category"])
|
|
# quick cleaning
|
|
df["lat"] = pd.to_numeric(df["lat"], errors="coerce")
|
|
df["lon"] = pd.to_numeric(df["lon"], errors="coerce")
|
|
df = df.dropna(subset=["lat", "lon"]).copy()
|
|
if "name" not in df.columns:
|
|
df["name"] = None
|
|
if "category" not in df.columns:
|
|
df["category"] = business
|
|
return df[["name", "lat", "lon", "category"]]
|
|
|
|
# ---------- Sites: demand (population grid) ----------
|
|
def popgrid(self, city: str) -> pd.DataFrame:
|
|
"""
|
|
Loads population grid points from data/popgrid/{city}_grid.csv
|
|
Required columns: lat, lon, pop
|
|
Optional: cell_id
|
|
"""
|
|
path = self.base_dir / "popgrid" / f"{city}_grid.csv"
|
|
if not path.exists():
|
|
return pd.DataFrame(columns=["cell_id", "lat", "lon", "pop"])
|
|
df = pd.read_csv(path)
|
|
for col in ["lat", "lon", "pop"]:
|
|
if col not in df.columns:
|
|
return pd.DataFrame(columns=["cell_id", "lat", "lon", "pop"])
|
|
# numeric & drop invalid
|
|
df["lat"] = pd.to_numeric(df["lat"], errors="coerce")
|
|
df["lon"] = pd.to_numeric(df["lon"], errors="coerce")
|
|
df["pop"] = pd.to_numeric(df["pop"], errors="coerce")
|
|
df = df.dropna(subset=["lat", "lon", "pop"]).copy()
|
|
if "cell_id" not in df.columns:
|
|
df["cell_id"] = None
|
|
return df[["cell_id", "lat", "lon", "pop"]]
|
|
|
|
# ---------- Optional: city boundary ----------
|
|
def city_boundary(self, city: str) -> Dict[str, Any]:
|
|
# Not implemented yet; return empty dict.
|
|
return {}
|
|
|
|
# ---------- Backwards compatibility alias ----------
|
|
# Some earlier code used "grid(city)" for population grid.
|
|
def grid(self, city: str) -> pd.DataFrame:
|
|
return self.popgrid(city)
|