BenjaminIsaac0111
diff --git a/‎README.md‎
Lines changed: 4 additions & 4 deletions b/‎README.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/MODELS.md‎
Lines changed: 3 additions & 7 deletions b/‎docs/MODELS.md‎
Lines changed: 3 additions & 7 deletions
diff --git a/‎docs/TRAINING_GUIDE.md‎
Lines changed: 14 additions & 15 deletions b/‎docs/TRAINING_GUIDE.md‎
Lines changed: 14 additions & 15 deletions
diff --git a/‎scripts/inspect_outputs.py‎
Lines changed: 122 additions & 0 deletions b/‎scripts/inspect_outputs.py‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎scripts/migrate_logs_to_sqlite.py‎
Lines changed: 29 additions & 0 deletions b/‎scripts/migrate_logs_to_sqlite.py‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎scripts/predict_sample.py‎
Lines changed: 112 additions & 0 deletions b/‎scripts/predict_sample.py‎
Lines changed: 112 additions & 0 deletions
@@ -62,11 +62,11 @@ stf-download --species "Homo sapiens" --local_dir hest_data
 We provide presets for baseline models and scaled versions of the SpatialTranscriptFormer.
 
 ```bash
-# Recommended: Run the Interaction model with 4 transformer layers
-python scripts/run_preset.py --preset stf_interaction_l4
+# Recommended: Run the Interaction model (Small)
+python scripts/run_preset.py --preset stf_small
 
-# Run the lightweight 2-layer version
-python scripts/run_preset.py --preset stf_interaction_l2
+# Run the lightweight Tiny version
+python scripts/run_preset.py --preset stf_tiny
 
 # Run baselines
 python scripts/run_preset.py --preset he2rna_baseline
 
@@ -34,12 +34,8 @@ The SpatialTranscriptFormer models the **interaction between biological pathways
 By default, the model operates in **Full Interaction** mode where all four information flows are active. Users can selectively disable any combination using the `--interactions` flag to explore architectural variants:
 
 ```bash
-# Default: Full Interaction (all quadrants enabled)
---interactions p2p p2h h2p h2h
-
-# Pathway Bottleneck: block H↔H to force all inter-patch
-# communication through the pathway bottleneck
---interactions p2p p2h h2p
+# Default: Small Interaction (CTransPath, 4 layers)
+python scripts/run_preset.py --preset stf_small
 ```
 
 > [!TIP]
@@ -53,7 +49,7 @@ Three additional design principles support these interactions:
 
 - **Biological Initialisation** — The gene reconstruction weights are initialised from MSigDB Hallmark gene sets, providing a biologically-grounded starting point that the model refines during training.
 
-### 2.2 Spatial Learning
+## 2.2 Spatial Learning
 
 The spatial relationships of gene expression are central to this model. It is not sufficient to predict correct expression magnitudes at each spot independently — the model must capture **where** on the tissue pathways are active and how that spatial pattern varies across the slide. Two mechanisms enforce this:
 
 
@@ -147,23 +147,22 @@ python -m spatial_transcript_former.train \
 
 > **Note**: Without `--pathway-init`, the model disables the `AuxiliaryPathwayLoss` and relies entirely on the main reconstruction objectives and the L1 sparsity penalty. (I am yet to obtain results with this method)...
 
-### Robust Counting: ZINB + Auxiliary Loss
+### Recommended: Using Presets
 
-For raw count data with high sparsity, using the ZINB distribution and auxiliary pathway supervision is recommended.
+For most cases, it is recommended to use the provided presets:
 
 ```bash
-python -m spatial_transcript_former.train \
-    --data-dir A:\hest_data \
-    --model interaction \
-    --backbone ctranspath \
-    --pathway-init \
-    --loss zinb \
-    --pathway-loss-weight 0.5 \
-    --lr 5e-5 \
-    --batch-size 4 \
-    --whole-slide \
-    --precomputed \
-    --epochs 200
+# Tiny (2 layers, 256 dim)
+python scripts/run_preset.py --preset stf_tiny
+
+# Small (4 layers, 384 dim) - Recommended
+python scripts/run_preset.py --preset stf_small
+
+# Medium (6 layers, 512 dim)
+python scripts/run_preset.py --preset stf_medium
+
+# Large (12 layers, 768 dim)
+python scripts/run_preset.py --preset stf_large
 ```
 
 ### Choosing Interaction Modes
@@ -201,7 +200,7 @@ Submit with:
 sbatch hpc/array_train.slurm
 ```
 
-### Collecting Results
+### Collecting Results (Currently broken!)
 
 After experiments complete, aggregate all `results_summary.json` files into a comparison table:
 
 
@@ -0,0 +1,122 @@
+import torch
+import json
+import os
+import argparse
+import numpy as np
+from spatial_transcript_former.models import SpatialTranscriptFormer
+from spatial_transcript_former.data.utils import get_sample_ids, setup_dataloaders
+
+
+class Args:
+    pass
+
+
+args = Args()
+args.data_dir = "A:\\hest_data"
+args.epochs = 2000
+args.output_dir = "runs/stf_tiny"
+args.model = "interaction"
+args.backbone = "ctranspath"
+args.precomputed = True
+args.whole_slide = True
+args.pathway_init = True
+args.use_amp = True
+args.log_transform = True
+args.loss = "mse_pcc"
+args.resume = True
+args.n_layers = 2
+args.token_dim = 256
+args.n_heads = 4
+args.batch_size = 1
+args.vis_sample = "TENX29"
+args.max_samples = 1
+args.organ = None
+args.num_genes = 1000
+args.n_neighbors = 6
+args.use_global_context = False
+args.global_context_size = 0
+args.augment = False
+args.feature_dir = None
+args.seed = 42
+args.warmup_epochs = 10
+args.sparsity_lambda = 0.0
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+genes_path = "global_genes.json"
+with open(genes_path, "r") as f:
+    gene_list = json.load(f)[:1000]
+args.num_genes = len(gene_list)
+
+final_ids = get_sample_ids(
+    args.data_dir, precomputed=args.precomputed, backbone=args.backbone, max_samples=1
+)
+train_loader, _ = setup_dataloaders(args, final_ids, [])
+
+model = SpatialTranscriptFormer(
+    num_genes=args.num_genes,
+    backbone_name=args.backbone,
+    pretrained=False,
+    token_dim=args.token_dim,
+    n_heads=args.n_heads,
+    n_layers=args.n_layers,
+    num_pathways=50,
+    use_spatial_pe=True,
+    output_mode="counts",
+)
+
+ckpt_path = os.path.join(args.output_dir, "latest_model_interaction.pth")
+if os.path.exists(ckpt_path):
+    print("Loading", ckpt_path)
+    ckpt = torch.load(ckpt_path, map_location=device, weights_only=True)
+    model.load_state_dict(ckpt["model_state_dict"], strict=False)
+else:
+    print("No ckpt found!")
+
+model.to(device)
+model.eval()
+
+with torch.no_grad():
+    for batch in train_loader:
+        feats, genes, coords, mask = [x.to(device) for x in batch]
+        out = model(feats, rel_coords=coords, mask=mask, return_dense=True)
+        preds = out
+
+        preds = torch.expm1(preds) if args.log_transform else preds
+        targets = torch.expm1(genes) if args.log_transform else genes
+
+        patch_idx = None
+        for i in range(mask.shape[1]):
+            if not mask[0, i]:
+                patch_idx = i
+                break
+
+        with open(
+            "C:/Users/wispy/.gemini/antigravity/brain/6a31ec6d-2f34-4f97-96b8-e437c2640219/model_output_sample.md",
+            "w",
+        ) as f:
+            f.write("# Model Output Sample (stf_tiny with simplifications)\n\n")
+            if patch_idx is not None:
+                f.write("### Target vs Prediction for a Single Valid Patch\n")
+                f.write("Showing the first 20 genes (absolute expression counts).\n\n")
+
+                f.write("| Gene Index | Target Count (True) | Predicted Count |\n")
+                f.write("|------------|----------------------|-----------------|\n")
+
+                t_vals = targets[0, patch_idx, :20].cpu().numpy()
+                p_vals = preds[0, patch_idx, :20].cpu().numpy()
+
+                for i in range(20):
+                    f.write(f"| {i} | {t_vals[i]:.2f} | {p_vals[i]:.2f} |\n")
+
+                f.write("\n### Summary Statistics Across All Patches in Batch\n")
+                f.write(f"- Target Mean: {targets[~mask].mean().item():.4f}\n")
+                f.write(f"- Target Max:  {targets[~mask].max().item():.4f}\n")
+                f.write(f"- Pred Mean:   {preds[~mask].mean().item():.4f}\n")
+                f.write(f"- Pred Max:    {preds[~mask].max().item():.4f}\n")
+                f.write(f"- Pred Min:    {preds[~mask].min().item():.4f}\n")
+            else:
+                f.write("No valid patches found in sample.\n")
+
+        print("Sample logic written to artifact.")
+        break
@@ -0,0 +1,29 @@
+import os
+import pandas as pd
+import sqlite3
+import argparse
+
+
+def migrate_csv_to_sqlite(run_dir):
+    csv_path = os.path.join(run_dir, "training_log.csv")
+    db_path = os.path.join(run_dir, "training_logs.sqlite")
+
+    if not os.path.exists(csv_path):
+        print(f"No CSV found at {csv_path}")
+        return
+
+    print(f"Migrating {csv_path} to {db_path}...")
+    df = pd.read_csv(csv_path)
+
+    with sqlite3.connect(db_path) as conn:
+        df.to_sql("metrics", conn, if_exists="replace", index=False)
+        print("Done!")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--run-dir", type=str, required=True, help="Path to run directory"
+    )
+    args = parser.parse_args()
+    migrate_csv_to_sqlite(args.run_dir)
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+import argparse
+import os
+import torch
+import json
+from spatial_transcript_former.visualization import run_inference_plot
+
+
+# Dummy class to hold loaded arguments
+class RunArgs:
+    def __init__(self, **entries):
+        self.__dict__.update(entries)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("Predict sample pathways")
+    parser.add_argument(
+        "--sample-id",
+        required=True,
+        type=str,
+        help="Sample ID to run inference on (e.g. TENX156)",
+    )
+    parser.add_argument(
+        "--run-dir",
+        required=True,
+        type=str,
+        help="Directory containing model weights and args.json",
+    )
+    parser.add_argument(
+        "--output-dir", type=str, default=".", help="Where to save the output plot"
+    )
+    parser.add_argument(
+        "--epoch", type=int, default=0, help="Epoch number to label the plot with"
+    )
+    return parser.parse_args()
+
+
+def main():
+    cli_args = parse_args()
+
+    # Load args from run_dir
+    args_path = os.path.join(cli_args.run_dir, "results_summary.json")
+    if not os.path.exists(args_path):
+        raise FileNotFoundError(f"Missing {args_path}")
+
+    with open(args_path, "r") as f:
+        summary_dict = json.load(f)
+        run_args_dict = summary_dict.get("config", {})
+
+    run_args = RunArgs(**run_args_dict)
+    run_args.output_dir = cli_args.output_dir
+    run_args.run_dir = cli_args.run_dir
+
+    # Optional arguments that might be missing from older args.json
+    if not hasattr(run_args, "log_transform"):
+        run_args.log_transform = False
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    # Re-initialize the model based on run_args
+    if run_args.model == "baseline":
+        from spatial_transcript_former.models import SpatialTranscriptFormer
+
+        model = SpatialTranscriptFormer(
+            backbone=run_args.backbone,
+            num_genes=run_args.num_genes,
+            dropout=run_args.dropout,
+            n_neighbors=run_args.n_neighbors,
+        )
+    elif run_args.model == "interaction":
+        from spatial_transcript_former.models import SpatialTranscriptFormer
+
+        model = SpatialTranscriptFormer(
+            num_genes=run_args.num_genes,
+            backbone_name=run_args.backbone,
+            pretrained=run_args.pretrained,
+            token_dim=getattr(run_args, "token_dim", 384),
+            n_heads=getattr(run_args, "n_heads", 6),
+            n_layers=getattr(run_args, "n_layers", 4),
+            num_pathways=getattr(run_args, "num_pathways", 0),
+            use_spatial_pe=getattr(run_args, "use_spatial_pe", True),
+            output_mode="zinb" if getattr(run_args, "loss", "") == "zinb" else "counts",
+            interactions=getattr(run_args, "interactions", None),
+        )
+    else:
+        raise ValueError(f"Unknown model type: {run_args.model}")
+
+    model.to(device)
+
+    # Note: we explicitly load the *best* model if it exists, otherwise the latest
+    ckpt_path = os.path.join(cli_args.run_dir, f"best_model_{run_args.model}.pth")
+    if not os.path.exists(ckpt_path):
+        ckpt_path = os.path.join(cli_args.run_dir, f"latest_model_{run_args.model}.pth")
+
+    if os.path.exists(ckpt_path):
+        print(f"Loading checkpoint from {ckpt_path}...")
+        checkpoint = torch.load(ckpt_path, map_location=device, weights_only=True)
+        if "model_state_dict" in checkpoint:
+            model.load_state_dict(checkpoint["model_state_dict"])
+        else:
+            model.load_state_dict(checkpoint)
+    else:
+        print(
+            f"Warning: No checkpoint found in {cli_args.run_dir}. Using untrained model."
+        )
+
+    print(f"Running inference for sample {cli_args.sample_id}...")
+    run_inference_plot(model, run_args, cli_args.sample_id, cli_args.epoch, device)
+
+
+if __name__ == "__main__":
+    main()