Source code for geoprior.cli.sm3_collect_summaries

# SPDX-License-Identifier: Apache-2.0
# GeoPrior-v3 - https://github.com/earthai-tech/geoprior-v3
# Copyright (c) 2026-present
# Author: LKouadio <https://lkouadio.com>

"""
Collect SM3 per-regime summaries into one combined table.

Expected structure under ``--suite-root``::

    sm3_tau_<reg>_50/
        sm3_synth_summary.csv

Writes:
- ``--out-csv``: combined CSV (long format)
- ``--out-json``: combined JSON (records)

Example
-------
.. code-block:: bash

   python nat.com/sm3_collect_summaries.py \
     --suite-root results/sm3_tau_suite_20260303-120000 \
     --out-csv results/.../combined.csv \
     --out-json results/.../combined.json
"""

from __future__ import annotations

import argparse
import json
import re
from pathlib import Path

import pandas as pd


[docs] def infer_regime(folder_name: str) -> str: # Matches: # sm3_tau_<reg>_50 # sm3_both_<reg>_50 m = re.search(r"sm3_(?:tau|both)_(.+?)_50$", folder_name) if m: return m.group(1) return folder_name
[docs] def main() -> None: ap = argparse.ArgumentParser() ap.add_argument("--suite-root", required=True) ap.add_argument("--out-csv", required=True) ap.add_argument("--out-json", required=True) args = ap.parse_args() root = Path(args.suite_root).expanduser().resolve() if not root.exists(): raise FileNotFoundError( f"Suite root not found: {root}" ) rows = [] # Scan for summary CSVs anywhere under suite root for p in root.rglob("sm3_synth_summary.csv"): run_dir = p.parent regime = infer_regime(run_dir.name) try: df = pd.read_csv(p) except Exception as e: print(f"[skip] failed to read {p}: {e}") continue if df.empty or "metric" not in df.columns: print(f"[skip] unexpected format: {p}") continue df = df.copy() df.insert(0, "regime", regime) df.insert(1, "run_dir", str(run_dir)) rows.append(df) if not rows: raise RuntimeError( "No sm3_synth_summary.csv files found under suite root." ) out = pd.concat(rows, ignore_index=True) # Sort for readability sort_cols = [ c for c in ["metric", "regime"] if c in out.columns ] if sort_cols: out = out.sort_values(sort_cols).reset_index( drop=True ) out_csv = Path(args.out_csv).expanduser().resolve() out_csv.parent.mkdir(parents=True, exist_ok=True) out.to_csv(out_csv, index=False) out_json = Path(args.out_json).expanduser().resolve() out_json.parent.mkdir(parents=True, exist_ok=True) with open(out_json, "w", encoding="utf-8") as f: json.dump(out.to_dict("records"), f, indent=2) print("[OK] wrote:", str(out_csv)) print("[OK] wrote:", str(out_json))
if __name__ == "__main__": main()