Source code for geoprior.cli.sm3_collect_summaries
# SPDX-License-Identifier: Apache-2.0
# GeoPrior-v3 - https://github.com/earthai-tech/geoprior-v3
# Copyright (c) 2026-present
# Author: LKouadio <https://lkouadio.com>
"""
Collect SM3 per-regime summaries into one combined table.
Expected structure under ``--suite-root``::
sm3_tau_<reg>_50/
sm3_synth_summary.csv
Writes:
- ``--out-csv``: combined CSV (long format)
- ``--out-json``: combined JSON (records)
Example
-------
.. code-block:: bash
python nat.com/sm3_collect_summaries.py \
--suite-root results/sm3_tau_suite_20260303-120000 \
--out-csv results/.../combined.csv \
--out-json results/.../combined.json
"""
from __future__ import annotations
import argparse
import json
import re
from pathlib import Path
import pandas as pd
[docs]
def infer_regime(folder_name: str) -> str:
# Matches:
# sm3_tau_<reg>_50
# sm3_both_<reg>_50
m = re.search(r"sm3_(?:tau|both)_(.+?)_50$", folder_name)
if m:
return m.group(1)
return folder_name
[docs]
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--suite-root", required=True)
ap.add_argument("--out-csv", required=True)
ap.add_argument("--out-json", required=True)
args = ap.parse_args()
root = Path(args.suite_root).expanduser().resolve()
if not root.exists():
raise FileNotFoundError(
f"Suite root not found: {root}"
)
rows = []
# Scan for summary CSVs anywhere under suite root
for p in root.rglob("sm3_synth_summary.csv"):
run_dir = p.parent
regime = infer_regime(run_dir.name)
try:
df = pd.read_csv(p)
except Exception as e:
print(f"[skip] failed to read {p}: {e}")
continue
if df.empty or "metric" not in df.columns:
print(f"[skip] unexpected format: {p}")
continue
df = df.copy()
df.insert(0, "regime", regime)
df.insert(1, "run_dir", str(run_dir))
rows.append(df)
if not rows:
raise RuntimeError(
"No sm3_synth_summary.csv files found under suite root."
)
out = pd.concat(rows, ignore_index=True)
# Sort for readability
sort_cols = [
c for c in ["metric", "regime"] if c in out.columns
]
if sort_cols:
out = out.sort_values(sort_cols).reset_index(
drop=True
)
out_csv = Path(args.out_csv).expanduser().resolve()
out_csv.parent.mkdir(parents=True, exist_ok=True)
out.to_csv(out_csv, index=False)
out_json = Path(args.out_json).expanduser().resolve()
out_json.parent.mkdir(parents=True, exist_ok=True)
with open(out_json, "w", encoding="utf-8") as f:
json.dump(out.to_dict("records"), f, indent=2)
print("[OK] wrote:", str(out_csv))
print("[OK] wrote:", str(out_json))
if __name__ == "__main__":
main()