Source code for geoprior.scripts.make_boundary

r"""Script helpers for building study-area boundary artifacts."""

from __future__ import annotations

import argparse
from pathlib import Path
from typing import TYPE_CHECKING, Any

import numpy as np
import pandas as pd

from . import config as cfg
from . import utils

if TYPE_CHECKING:
    import geopandas as gpd  # noqa


_CITY_A = cfg.CITY_CANON.get("ns", "Nansha")
_CITY_B = cfg.CITY_CANON.get("zh", "Zhongshan")


def _require_geopandas():
    try:
        import geopandas as gpd  # noqa
    except Exception as e:
        raise SystemExit(
            "Need geopandas installed for boundary export. "
            f"Error: {e}"
        ) from e
    return gpd


def _require_shapely_boundary_tools():
    try:
        from shapely import concave_hull
        from shapely.geometry import MultiPoint
    except Exception as e:
        raise SystemExit(
            "Need shapely installed for boundary export. "
            f"Error: {e}"
        ) from e
    return MultiPoint, concave_hull


def _pick_paths(
    art: utils.Artifacts,
    split: str,
) -> tuple[Path | None, Path | None]:
    if split == "val":
        return art.forecast_val_csv, art.forecast_future_csv
    if split == "test":
        return (
            art.forecast_test_csv,
            art.forecast_test_future_csv,
        )
    if (
        art.forecast_test_csv is not None
        and art.forecast_test_future_csv is not None
    ):
        return (
            art.forecast_test_csv,
            art.forecast_test_future_csv,
        )
    return art.forecast_val_csv, art.forecast_future_csv


def _load_xy(path: str) -> np.ndarray:
    df = pd.read_csv(utils.as_path(path))
    utils.ensure_columns(df, aliases=cfg._BASE_ALIASES)

    for c in ("coord_x", "coord_y"):
        if c not in df.columns:
            raise KeyError(f"{path}: missing {c}")

    x = pd.to_numeric(
        df["coord_x"], errors="coerce"
    ).to_numpy(float)
    y = pd.to_numeric(
        df["coord_y"], errors="coerce"
    ).to_numpy(float)

    m = np.isfinite(x) & np.isfinite(y)
    return np.column_stack([x[m], y[m]])


def _resolve_city(
    *,
    city: str,
    src: str | None,
    eval_csv: str | None,
    future_csv: str | None,
    split: str,
) -> dict[str, Any]:
    out: dict[str, Any] = {"name": city}

    if eval_csv and future_csv:
        out["eval_csv"] = str(utils.as_path(eval_csv))
        out["future_csv"] = str(utils.as_path(future_csv))
        return out

    if not src:
        raise ValueError(
            f"{city}: provide --*-src or --*-eval/--*-future"
        )

    art = utils.detect_artifacts(src)
    ev, fu = _pick_paths(art, split)

    if ev is None or fu is None:
        raise FileNotFoundError(
            f"{city}: missing eval/future under {src}"
        )

    out["eval_csv"] = str(ev)
    out["future_csv"] = str(fu)
    return out


def _poly_from_points(
    xy: np.ndarray,
    *,
    method: str,
    alpha: float,
):
    MultiPoint, concave_hull = (
        _require_shapely_boundary_tools()
    )

    pts = MultiPoint([tuple(p) for p in xy])

    if method == "convex":
        return pts.convex_hull

    try:
        return concave_hull(pts, ratio=float(alpha))
    except Exception:
        return pts.convex_hull


def _make_boundary_gdf(
    *,
    city: str,
    poly: Any,
):
    gpd = _require_geopandas()  # noqa
    return gpd.GeoDataFrame(
        [{"city": city, "geometry": poly}],
        geometry="geometry",
        crs=None,
    )


def _slug_city(city: str) -> str:
    return str(city).strip().lower().replace(" ", "_")


def _out_stem(
    out_arg: str,
    *,
    city: str,
) -> Path:
    base = utils.resolve_out_out(out_arg)

    if base.suffix:
        base = base.with_suffix("")

    stem = base.parent / (f"{base.name}_{_slug_city(city)}")
    stem.parent.mkdir(
        parents=True,
        exist_ok=True,
    )
    return stem


def _write_checked(
    gdf: Any,
    path: Path,
    *,
    driver: str | None = None,
) -> Path:
    path.parent.mkdir(
        parents=True,
        exist_ok=True,
    )

    if driver is None:
        gdf.to_file(path)
    else:
        gdf.to_file(path, driver=driver)

    if not path.exists():
        raise FileNotFoundError(
            "Boundary export reported success but "
            f"file is missing: {path}"
        )

    return path.resolve()


[docs] def make_boundary_main( argv: list[str] | None = None, *, prog: str | None = None, ) -> list[Path]: ap = argparse.ArgumentParser( prog=prog or "make-boundary", description=( "Create a boundary polygon from forecast points." ), ) utils.add_city_flags(ap, default_both=True) ap.add_argument("--ns-src", type=str, default=None) ap.add_argument("--zh-src", type=str, default=None) ap.add_argument("--ns-eval", type=str, default=None) ap.add_argument("--zh-eval", type=str, default=None) ap.add_argument("--ns-future", type=str, default=None) ap.add_argument("--zh-future", type=str, default=None) ap.add_argument( "--split", choices=["auto", "val", "test"], default="auto", ) ap.add_argument( "--method", choices=["convex", "concave"], default="convex", ) ap.add_argument( "--alpha", type=float, default=0.2, help="Concave hull ratio (0..1).", ) ap.add_argument( "--format", choices=["geojson", "shp", "both"], default="geojson", ) ap.add_argument( "--out", type=str, default="boundary", help=( "Output stem/path. Bare names go to " "scripts/out; explicit folders are kept." ), ) args = ap.parse_args(argv) utils.ensure_script_dirs() cities0 = utils.resolve_cities(args) or [ _CITY_A, _CITY_B, ] jobs: list[dict[str, Any]] = [] if _CITY_A in cities0: jobs.append( _resolve_city( city=_CITY_A, src=args.ns_src, eval_csv=args.ns_eval, future_csv=args.ns_future, split=args.split, ) ) if _CITY_B in cities0: jobs.append( _resolve_city( city=_CITY_B, src=args.zh_src, eval_csv=args.zh_eval, future_csv=args.zh_future, split=args.split, ) ) written: list[Path] = [] for j in jobs: city = str(j["name"]) xy1 = _load_xy(j["eval_csv"]) xy2 = _load_xy(j["future_csv"]) xy = np.vstack([xy1, xy2]) poly = _poly_from_points( xy, method=str(args.method), alpha=float(args.alpha), ) gdf = _make_boundary_gdf( city=city, poly=poly, ) stem = _out_stem(args.out, city=city) if args.format in ("geojson", "both"): p = _write_checked( gdf, stem.with_suffix(".geojson"), driver="GeoJSON", ) written.append(p) print(f"[OK] wrote {p}") if args.format in ("shp", "both"): p = _write_checked( gdf, stem.with_suffix(".shp"), ) written.append(p) print(f"[OK] wrote {p}") return written
[docs] def main( argv: list[str] | None = None, ) -> None: make_boundary_main(argv)
if __name__ == "__main__": main()