diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e2ed4f2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +detgpt-env/ +env/ +venv/ + + +__pycache__/ +*.pyc + + +.vscode/ +.idea/ + + +*.pt +*.pth +*.jsonl diff --git a/notebooks/colab_gpu_reproducibility.ipynb b/notebooks/colab_gpu_reproducibility.ipynb new file mode 100644 index 0000000..6f0c34e --- /dev/null +++ b/notebooks/colab_gpu_reproducibility.ipynb @@ -0,0 +1,209 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Verifiable LLM Baseline — GPU Reproducibility (Phase 3) on Google Colab\n", + "\n", + "This notebook extends the **CPU determinism baseline** to a **CUDA GPU**, executing\n", + "the Phase 3 goal from the README: *strict GPU determinism* via deterministic cuDNN\n", + "and a pinned cuBLAS workspace.\n", + "\n", + "It runs two things on the GPU:\n", + "1. **Segmented audit** (`reproducibility.py`) — the 5 falsifiability scenarios. Scenario 1\n", + " (clean replay) must pass bitwise; scenarios 2–5 must be caught.\n", + "2. **Fresh-vs-fresh** (`gpu_reproducibility_test.py`) — two from-scratch runs on the\n", + " same GPU must produce bitwise-identical weights.\n", + "\n", + "> **Before you run:** set the runtime to GPU — *Runtime → Change runtime type → Hardware\n", + "> accelerator → GPU (T4 is fine)*, then *Runtime → Run all*." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Confirm a GPU is attached" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!nvidia-smi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Get the code and pin determinism\n", + "\n", + "Set `REPO_URL` to your fork/repo. (Alternatively, mount Drive or upload the `src/`\n", + "folder — see the commented fallbacks.)\n", + "\n", + "`CUBLAS_WORKSPACE_CONFIG` is set here *before* any CUDA op so cuBLAS uses a fixed\n", + "reduction order. `src/device.py` also sets it on import, so the scripts are safe even\n", + "if you skip this — but setting it now keeps the in-kernel `torch` import deterministic too." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Must be set before the first CUDA matmul (read once at CUDA context creation).\n", + "os.environ[\"CUBLAS_WORKSPACE_CONFIG\"] = \":4096:8\"\n", + "\n", + "# ---- Option A: clone from GitHub (set this to your repo) ----\n", + "REPO_URL = \"https://github.com//Verifiable-LLM-Baseline.git\"\n", + "REPO_DIR = \"Verifiable-LLM-Baseline\"\n", + "\n", + "if not os.path.isdir(REPO_DIR):\n", + " !git clone --depth 1 $REPO_URL $REPO_DIR\n", + "\n", + "# ---- Option B (fallback): mount Google Drive and point REPO_DIR at your copy ----\n", + "# from google.colab import drive\n", + "# drive.mount('/content/drive')\n", + "# REPO_DIR = '/content/drive/MyDrive/Verifiable-LLM-Baseline'\n", + "\n", + "# ---- Option C (fallback): upload a zip of the repo ----\n", + "# from google.colab import files; files.upload() # then: !unzip -q Verifiable-LLM-Baseline.zip\n", + "\n", + "SRC_DIR = os.path.join(REPO_DIR, \"src\")\n", + "assert os.path.isdir(SRC_DIR), f\"src/ not found at {SRC_DIR} — fix REPO_URL or use a fallback.\"\n", + "\n", + "# Colab ships a CUDA-enabled torch already; only ensure the light deps.\n", + "!pip install -q \"numpy==2.4.3\" \"tqdm==4.67.3\"\n", + "print(\"src ready at:\", SRC_DIR)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Environment fingerprint\n", + "\n", + "Determinism guarantees hold **within one GPU model**. The device name below is part of\n", + "the trust anchor — a different GPU (or a different cuBLAS/cuDNN version) may produce a\n", + "different, but still internally reproducible, set of bits." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "print(\"torch :\", torch.__version__)\n", + "print(\"CUDA available :\", torch.cuda.is_available())\n", + "print(\"CUDA runtime :\", torch.version.cuda)\n", + "if torch.cuda.is_available():\n", + " print(\"GPU :\", torch.cuda.get_device_name(0))\n", + " print(\"cuDNN :\", torch.backends.cudnn.version())\n", + "print(\"CUBLAS workspace:\", os.environ.get(\"CUBLAS_WORKSPACE_CONFIG\"))\n", + "assert torch.cuda.is_available(), \"No GPU — set Runtime → Change runtime type → GPU.\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Phase 3 — segmented audit on GPU (5 scenarios)\n", + "\n", + "Expected: **Scenario 1 PASSES** (clean replay is bitwise deterministic on this GPU);\n", + "**Scenarios 2–5 FAIL** (wrong seed, injected gradient noise, post-training sabotage,\n", + "and a tampered checkpoint file are all detected)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cd \"$SRC_DIR\" && python reproducibility.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Fresh-vs-fresh — bitwise GPU reproducibility\n", + "\n", + "Trains from scratch twice on this GPU and checks the two runs are bitwise identical.\n", + "Appends a proof block to `proofs/device_determinism_log.txt`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cd \"$SRC_DIR\" && python gpu_reproducibility_test.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Eval + sealed global manifest\n", + "\n", + "Runs the deterministic held-out eval and seals the end-to-end pipeline hash\n", + "(environment + config + dataset + checkpoint + eval)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cd \"$SRC_DIR\" && python eval.py && python global_manifest.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Interpreting the results\n", + "\n", + "- **Same GPU → same bits.** A passing Scenario 1 and a passing fresh-vs-fresh test show\n", + " the software entropy is fully controlled on this device.\n", + "- **Different GPU → different (but reproducible) bits.** Checkpoint hashes produced on a\n", + " T4 will not match those from an A100 or from CPU. That cross-hardware drift is the\n", + " Phase 2 quantity this baseline is built to measure — not a failure.\n", + "- **If you hit a `nondeterministic ... CUDA` error**, an op without a deterministic\n", + " kernel was reached under `torch.use_deterministic_algorithms(True)`. That is a genuine\n", + " finding worth recording — it pinpoints exactly where hardware entropy enters.\n", + "- **RNG coverage:** GPU dropout draws from the CUDA generator, so the checkpoint now\n", + " serializes CUDA RNG state alongside CPU/NumPy/Python state — without it, a resumed\n", + " replay would diverge at the first dropout mask." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "colab_gpu_reproducibility.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/proofs/cpu_determinism_log.txt b/proofs/cpu_determinism_log.txt new file mode 100644 index 0000000..75ed131 --- /dev/null +++ b/proofs/cpu_determinism_log.txt @@ -0,0 +1,316 @@ + +started with linear model, +output 1: + +# verified that seeds can be set + +(detgpt-env) PS C:\Users\ryoar\Desktop\GSOC> & c:\Users\ryoar\Desktop\GSOC\detgpt-env\Scripts\python.exe c:/Users/ryoar/Desktop/GSOC/src/main.py +seed set to 99 +Tensor X: +tensor([[ 0.6127, -1.1754, -0.7646], + [-0.6666, 0.7444, -0.6453], + [-1.3890, -0.2730, 0.9406]]) +Deterministic enabled: True +Input: tensor([0, 1, 2, 3]) +Target: tensor([1, 2, 3, 0]) + +output 2: + +# verified that hashes match in a linear model + +(detgpt-env) PS C:\Users\ryoar\Desktop\GSOC> & c:\Users\ryoar\Desktop\GSOC\detgpt-env\Scripts\python.exe c:/Users/ryoar/Desktop/GSOC/src/main.py +seed set to 99 +Tensor X: +tensor([[ 0.6127, -1.1754, -0.7646], + [-0.6666, 0.7444, -0.6453], + [-1.3890, -0.2730, 0.9406]]) +Deterministic enabled: True +Input: tensor([0, 1, 2, 3]) +Target: tensor([1, 2, 3, 0]) +Step 0, Loss:1.1843723058700562 +Step 1, Loss:1.0755757093429565 +Step 2, Loss:0.9717903733253479 +Step 3, Loss:0.8734948039054871 +Step 4, Loss:0.7810313105583191 +FINAL MODEL HASH: cb0e3c0a1d3b9a74769e65a057c415354c3d8fe8faa8afb7ba944489a4c6ed24 + +output 3: + +# added in reproducability_test.py to automate the verification + +(detgpt-env) PS C:\Users\ryoar\Desktop\GSOC> & c:\Users\ryoar\Desktop\GSOC\detgpt-env\Scripts\python.exe c:/Users/ryoar/Desktop/GSOC/src/reproducibility +Input: tensor([0, 1, 2, 3]) +Target: tensor([1, 2, 3, 0]) +Step 0, Loss:1.676896333694458 +Step 1, Loss:1.5588276386260986 +Step 2, Loss:1.4446043968200684 +Step 3, Loss:1.3343052864074707 +Step 4, Loss:1.2279143333435059 +FINAL MODEL HASH: b02447baea14334d778597b8616c03afdbc8baa1ecc0e734ce2d3e5431ef9de9 +running reproducability test +seed set to 99 +seed set to 99 +Loss curves identical: True +Bitwise parameter match: True + +Success!: Full deterministic gradient flow verified. + --- + +upgraded model to NanoGPT, +output 1: + +#verified deterministic gradient for NanoGPT + +(detgpt-env) PS C:\Users\ryoar\Desktop\GSOC> & c:\Users\ryoar\Desktop\GSOC\detgpt-env\Scripts\python.exe c:/Users/ryoar/Desktop/GSOC/src/reproducibility +Input: tensor([[0, 1, 2, 3]]) +Target: tensor([[1, 2, 3, 0]]) +running reproducability test +seed set to 99 +seed set to 99 +Loss curves identical: True +Bitwise parameter match: True + +Success!: Full deterministic gradient flow verified. + +output 2: + +#verified SEGMENTED AUDIT VERIFICATION for NanoGPT + + +(detgpt-env) PS C:\Users\ryoar\Desktop\GSOC> & c:\Users\ryoar\Desktop\GSOC\detgpt-env\Scripts\python.exe c:/Users/ryoar/Desktop/GSOC/src/reproducibility +Input: tensor([[0, 1, 2, 3]]) +Target: tensor([[1, 2, 3, 0]]) + + SEGMENTED AUDIT VERIFICATION + +[Running Prover: Steps 0 to 10] +seed set to 99 + ~> Prover saved checkpoint at step 5 + +[Running Auditor: Steps 5 to 10 with checkpoint] +seed set to 99 + ~> Auditor loaded checkpoint from step 5 + +[Verifying Telemetry Trajectories] +Step 5 | Prover Loss: 0.424115 | Auditor Loss: 0.424115 ok +Step 6 | Prover Loss: 0.339101 | Auditor Loss: 0.339101 ok +Step 7 | Prover Loss: 0.276479 | Auditor Loss: 0.276479 ok +Step 8 | Prover Loss: 0.228812 | Auditor Loss: 0.228812 ok +Step 9 | Prover Loss: 0.192136 | Auditor Loss: 0.192136 ok + + (❁´◡`❁) +AUDIT PASSED: Segment replay is bitwise deterministic. + +Output 3: + +#Failure scenarios, prove the deterministic route is on the right path + +(detgpt-env) PS C:\Users\ryoar\Desktop\GSOC> & c:\Users\ryoar\Desktop\GSOC\detgpt-env\Scripts\python.exe c:/Users/ryoar/Desktop/GSOC/src/reproducibility.py +Input: tensor([[0, 1, 2, 3]]) +Target: tensor([[1, 2, 3, 0]]) + + Scenario 1: CLEAN AUDIT +seed set to 99 + ~> Prover saved checkpoint at step 5 + ~> Auditor loaded checkpoint from step 5 + +[Verifying: CLEAN AUDIT] +Step 5 | Prover: 0.46740216 | Auditor: 0.46740216 | PASSED +Step 6 | Prover: 0.36635828 | Auditor: 0.36635828 | PASSED +Step 7 | Prover: 0.32757840 | Auditor: 0.32757840 | PASSED +Step 8 | Prover: 0.24821112 | Auditor: 0.24821112 | PASSED +Step 9 | Prover: 0.21071491 | Auditor: 0.21071491 | PASSED + + (❁´◡`❁) CLEAN AUDIT PASSED: Segment replay is bitwise deterministic. + + Scenario 2: BAD SEED +seed set to 42 + ~> Tampered auditor loaded checkpoint with BAD seed (42) + +[Verifying: BAD SEED AUDIT] +Step 5 | Prover: 0.46740216 | Auditor: 0.45238802 | Δ 1.50e-02 FAILED +Step 6 | Prover: 0.36635828 | Auditor: 0.36516875 | Δ 1.19e-03 FAILED +Step 7 | Prover: 0.32757840 | Auditor: 0.31266588 | Δ 1.49e-02 FAILED +Step 8 | Prover: 0.24821112 | Auditor: 0.25075468 | Δ 2.54e-03 FAILED +Step 9 | Prover: 0.21071491 | Auditor: 0.20944712 | Δ 1.27e-03 FAILED + + (╯°□°)╯︵ ┻━┻ BAD SEED AUDIT FAILED: Trajectories diverged. + + Scenario 3: NOISE INJECTED +seed set to 99 + ~> Tampered auditor loaded checkpoint with GOOD seed but will add secret noise to gradients + +[Verifying: NOISY WEIGHTS AUDIT] +Step 5 | Prover: 0.46740216 | Auditor: 0.46379220 | Δ 3.61e-03 FAILED +Step 6 | Prover: 0.36635828 | Auditor: 0.36285883 | Δ 3.50e-03 FAILED +Step 7 | Prover: 0.32757840 | Auditor: 0.31422427 | Δ 1.34e-02 FAILED +Step 8 | Prover: 0.24821112 | Auditor: 0.25160202 | Δ 3.39e-03 FAILED +Step 9 | Prover: 0.21071491 | Auditor: 0.20851545 | Δ 2.20e-03 FAILED + + (╯°□°)╯︵ ┻━┻ NOISY WEIGHTS AUDIT FAILED: Trajectories diverged. + +output 4: + +# added scenario 4 + +(detgpt-env) PS C:\Users\ryoar\Desktop\GSOC> & c:\Users\ryoar\Desktop\GSOC\detgpt-env\Scripts\python.exe c:/Users/ryoar/Desktop/GSOC/src/reproducibility.py +Input: tensor([[0, 1, 2, 3]]) +Target: tensor([[1, 2, 3, 0]]) + + Scenario 1: CLEAN AUDIT +seed set to 99 + ~> Prover saved checkpoint at step 5 + ~> Auditor loaded checkpoint & RNG states from step 5 + +[Verifying: CLEAN AUDIT] +Step 5 | Prover: 0.46740216 | Auditor: 0.46740216 | PASSED +Step 6 | Prover: 0.36635828 | Auditor: 0.36635828 | PASSED +Step 7 | Prover: 0.32757840 | Auditor: 0.32757840 | PASSED +Step 8 | Prover: 0.24821112 | Auditor: 0.24821112 | PASSED +Step 9 | Prover: 0.21071491 | Auditor: 0.21071491 | PASSED + + (❁ ´◡`❁) CLEAN AUDIT PASSED: Segment replay is bitwise deterministic. + + Scenario 2: BAD SEED +seed set to 42 + ~> Tampered auditor loaded checkpoint with BAD seed (42) + +[Verifying: BAD SEED AUDIT] +Step 5 | Prover: 0.46740216 | Auditor: 0.45238802 | Δ 1.50e-02 FAILED +Step 6 | Prover: 0.36635828 | Auditor: 0.36516875 | Δ 1.19e-03 FAILED +Step 7 | Prover: 0.32757840 | Auditor: 0.31266588 | Δ 1.49e-02 FAILED +Step 8 | Prover: 0.24821112 | Auditor: 0.25075468 | Δ 2.54e-03 FAILED +Step 9 | Prover: 0.21071491 | Auditor: 0.20944712 | Δ 1.27e-03 FAILED + + Hash mismatch! Prover hash: 0fe9e61625b6c268 // Auditor hash: 275820158c363012 [HASH ERROR] + + (╯°□°)╯︵ ┻━┻ BAD SEED AUDIT FAILED: Trajectories diverged. + + Scenario 3: NOISE INJECTED +seed set to 99 + ~> Tampered auditor loaded checkpoint with GOOD seed but will add secret noise to gradients + +[Verifying: NOISY WEIGHTS AUDIT] +Step 5 | Prover: 0.46740216 | Auditor: 0.46379220 | Δ 3.61e-03 FAILED +Step 6 | Prover: 0.36635828 | Auditor: 0.36285883 | Δ 3.50e-03 FAILED +Step 7 | Prover: 0.32757840 | Auditor: 0.31422427 | Δ 1.34e-02 FAILED +Step 8 | Prover: 0.24821112 | Auditor: 0.25160202 | Δ 3.39e-03 FAILED +Step 9 | Prover: 0.21071491 | Auditor: 0.20851545 | Δ 2.20e-03 FAILED + + Hash mismatch! Prover hash: 0fe9e61625b6c268 // Auditor hash: d9e944f2d70374c5 [HASH ERROR] + + (╯°□°)╯︵ ┻━┻ NOISY WEIGHTS AUDIT FAILED: Trajectories diverged. + + Scenario 4: POST-TRAINING WEIGHT SABOTAGE + ~> Post-sabotage auditor loaded checkpoint correctly + ~> Weights silently mutated after training completed + +[Verifying: POST-TRAINING SABOTAGE AUDIT] +Step 5 | Prover: 0.46740216 | Auditor: 0.46740216 | PASSED +Step 6 | Prover: 0.36635828 | Auditor: 0.36635828 | PASSED +Step 7 | Prover: 0.32757840 | Auditor: 0.32757840 | PASSED +Step 8 | Prover: 0.24821112 | Auditor: 0.24821112 | PASSED +Step 9 | Prover: 0.21071491 | Auditor: 0.21071491 | PASSED + + Hash mismatch! Prover hash: 0fe9e61625b6c268 // Auditor hash: 03b1c8316eaf2b01 [HASH ERROR] + + (╯°□°)╯︵ ┻━┻ POST-TRAINING SABOTAGE AUDIT FAILED: Trajectories diverged. + +output 5: + +#added global manifest + +(detgpt-env) PS C:\Users\ryoar\Desktop\GSOC> & c:\Users\ryoar\Desktop\GSOC\detgpt-env\Scripts\python.exe c:/Users/ryoar/Desktop/GSOC/src/global_manifest.py +Generating The Global Verification Manifest... + + ༼ つ ◕_◕ ༽つ Global Manifest Sealed: +{ + "1_environment_hash": "8196650d82fe04afca4d4348975f612bc6967698b87973cc9e299f58357db470", + "2_training_config_hash": "e6ff2b2428cafadb7f570001afa71ea3a6396cc03a0228388a7ec870cd4a6ce6", + "3_dataset_hash": "e25dc0179960dcc446a7309b23e341f8218f64d009535cfa7dc692b23c5ea8db", + "4_model_checkpoint_hash": "1abbaa044e36e9835fdb950f652d915df77f2d8db09769c6475870dbd25d91bb", + "5_eval_manifest_hash": "0631d96765e2019979b6d10e68dafc7e3f502cbb0857cfe2a8edb99caf390284", + "99_GLOBAL_PIPELINE_HASH": "b696f2f6b6143745bb4a71bd43cd5fc187470a3bafc1276b07804541c2304bbf" +} + +output 6: + +# added Scenario 5: BROKEN SEAL AUDIT + +(detgpt-env) PS C:\Users\ryoar\Desktop\GSOC> & c:\Users\ryoar\Desktop\GSOC\detgpt-env\Scripts\python.exe c:/Users/ryoar/Desktop/GSOC/src/reproducibility.py +Input: tensor([[0, 1, 2, 3]]) +Target: tensor([[1, 2, 3, 0]]) + + Scenario 1: CLEAN AUDIT +seed set to 99 + ~> Prover saved checkpoint at step 5 + ~> Auditor loaded checkpoint & RNG states from step 5 + +[Verifying: CLEAN AUDIT] +Step 5 | Prover: 0.46740216 | Auditor: 0.46740216 | PASSED +Step 6 | Prover: 0.36635828 | Auditor: 0.36635828 | PASSED +Step 7 | Prover: 0.32757840 | Auditor: 0.32757840 | PASSED +Step 8 | Prover: 0.24821112 | Auditor: 0.24821112 | PASSED +Step 9 | Prover: 0.21071491 | Auditor: 0.21071491 | PASSED + + (❁ ´◡`❁) CLEAN AUDIT PASSED: Segment replay is bitwise deterministic. + + Scenario 2: BAD SEED +seed set to 42 + ~> Tampered auditor loaded checkpoint with BAD seed (42) + +[Verifying: BAD SEED AUDIT] +Step 5 | Prover: 0.46740216 | Auditor: 0.45238802 | Δ 1.50e-02 FAILED +Step 6 | Prover: 0.36635828 | Auditor: 0.36516875 | Δ 1.19e-03 FAILED +Step 7 | Prover: 0.32757840 | Auditor: 0.31266588 | Δ 1.49e-02 FAILED +Step 8 | Prover: 0.24821112 | Auditor: 0.25075468 | Δ 2.54e-03 FAILED +Step 9 | Prover: 0.21071491 | Auditor: 0.20944712 | Δ 1.27e-03 FAILED + + Hash mismatch! Prover hash: 0fe9e61625b6c268 // Auditor hash: 275820158c363012 [HASH ERROR] + + (╯°□°)╯︵ ┻━┻ BAD SEED AUDIT FAILED: Trajectories diverged. + + Scenario 3: NOISE INJECTED +seed set to 99 + ~> Tampered auditor loaded checkpoint with GOOD seed but will add secret noise to gradients + +[Verifying: NOISY WEIGHTS AUDIT] +Step 5 | Prover: 0.46740216 | Auditor: 0.46379220 | Δ 3.61e-03 FAILED +Step 6 | Prover: 0.36635828 | Auditor: 0.36285883 | Δ 3.50e-03 FAILED +Step 7 | Prover: 0.32757840 | Auditor: 0.31422427 | Δ 1.34e-02 FAILED +Step 8 | Prover: 0.24821112 | Auditor: 0.25160202 | Δ 3.39e-03 FAILED +Step 9 | Prover: 0.21071491 | Auditor: 0.20851545 | Δ 2.20e-03 FAILED + + Hash mismatch! Prover hash: 0fe9e61625b6c268 // Auditor hash: d9e944f2d70374c5 [HASH ERROR] + + (╯°□°)╯︵ ┻━┻ NOISY WEIGHTS AUDIT FAILED: Trajectories diverged. + + Scenario 4: POST-TRAINING WEIGHT SABOTAGE + ~> Post-sabotage auditor loaded checkpoint correctly + ~> Weights silently mutated after training completed + +[Verifying: POST-TRAINING SABOTAGE AUDIT] +Step 5 | Prover: 0.46740216 | Auditor: 0.46740216 | PASSED +Step 6 | Prover: 0.36635828 | Auditor: 0.36635828 | PASSED +Step 7 | Prover: 0.32757840 | Auditor: 0.32757840 | PASSED +Step 8 | Prover: 0.24821112 | Auditor: 0.24821112 | PASSED +Step 9 | Prover: 0.21071491 | Auditor: 0.21071491 | PASSED + + Hash mismatch! Prover hash: 0fe9e61625b6c268 // Auditor hash: 03b1c8316eaf2b01 [HASH ERROR] + + (╯°□°)╯︵ ┻━┻ POST-TRAINING SABOTAGE AUDIT FAILED: Trajectories diverged. + + Scenario 5: MODIFIED CHECKPOINT FILE (BROKEN SEAL) + ~> Attacker corrupted weights and saved to corrupted_checkpoint.pt + ~> Auditor loaded checkpoint & RNG states from step 5 + +[Verifying: BROKEN SEAL AUDIT] +Step 5 | Prover: 0.46740216 | Auditor: 0.46740210 | PASSED +Step 6 | Prover: 0.36635828 | Auditor: 0.36635825 | PASSED +Step 7 | Prover: 0.32757840 | Auditor: 0.32757837 | PASSED +Step 8 | Prover: 0.24821112 | Auditor: 0.24821112 | PASSED +Step 9 | Prover: 0.21071491 | Auditor: 0.21071491 | PASSED + + Hash mismatch! Prover hash: 0fe9e61625b6c268 // Auditor hash: 5a1b407ed0c82001 [HASH ERROR] + + (╯°□°)╯︵ ┻━┻ BROKEN SEAL AUDIT FAILED: Trajectories diverged. \ No newline at end of file diff --git a/proofs/device_determinism_log.txt b/proofs/device_determinism_log.txt new file mode 100644 index 0000000..f0a17ad --- /dev/null +++ b/proofs/device_determinism_log.txt @@ -0,0 +1,52 @@ +{ + "device": "cpu", + "device_name": "cpu", + "torch": "2.12.0+cpu", + "cuda": null, + "os": "Windows-11-10.0.22621-SP0", + "cublas_workspace_config": ":4096:8", + "final_loss": 0.2107149064540863, + "loss_curves_identical": true, + "model_hash_run1": "0fe9e61625b6c26825256b805aa6c862162df654644609e2e96e2efc672e473d", + "model_hash_run2": "0fe9e61625b6c26825256b805aa6c862162df654644609e2e96e2efc672e473d", + "bitwise_reproducible": true +} +{ + "device": "cpu", + "device_name": "cpu", + "torch": "2.12.0+cpu", + "accelerator_version": null, + "os": "Windows-11-10.0.22621-SP0", + "cublas_workspace_config": ":4096:8", + "final_loss": 0.2107149064540863, + "loss_curves_identical": true, + "model_hash_run1": "0fe9e61625b6c26825256b805aa6c862162df654644609e2e96e2efc672e473d", + "model_hash_run2": "0fe9e61625b6c26825256b805aa6c862162df654644609e2e96e2efc672e473d", + "bitwise_reproducible": true +} +{ + "device": "cpu", + "device_name": "cpu", + "torch": "2.12.0+cpu", + "accelerator_version": null, + "os": "Windows-11-10.0.22621-SP0", + "cublas_workspace_config": ":4096:8", + "final_loss": 0.2107149064540863, + "loss_curves_identical": true, + "model_hash_run1": "0fe9e61625b6c26825256b805aa6c862162df654644609e2e96e2efc672e473d", + "model_hash_run2": "0fe9e61625b6c26825256b805aa6c862162df654644609e2e96e2efc672e473d", + "bitwise_reproducible": true +} +{ + "device": "cpu", + "device_name": "cpu", + "torch": "2.12.0+cpu", + "accelerator_version": null, + "os": "Windows-11-10.0.22621-SP0", + "cublas_workspace_config": ":4096:8", + "final_loss": 0.2107149064540863, + "loss_curves_identical": true, + "model_hash_run1": "0fe9e61625b6c26825256b805aa6c862162df654644609e2e96e2efc672e473d", + "model_hash_run2": "0fe9e61625b6c26825256b805aa6c862162df654644609e2e96e2efc672e473d", + "bitwise_reproducible": true +} diff --git a/proofs/env_fingerprint.json b/proofs/env_fingerprint.json new file mode 100644 index 0000000..804ea7c --- /dev/null +++ b/proofs/env_fingerprint.json @@ -0,0 +1,6 @@ +{ + "torch": "2.10.0+cpu", + "python": "3.14.0 (tags/v3.14.0:ebf955d, Oct 7 2025, 10:15:03) [MSC v.1944 64 bit (AMD64)]", + "os": "Windows-11-10.0.22621-SP0", + "cpu": "Intel64 Family 6 Model 140 Stepping 1, GenuineIntel" +} \ No newline at end of file diff --git a/proofs/eval_manifest.json b/proofs/eval_manifest.json new file mode 100644 index 0000000..bb9553a --- /dev/null +++ b/proofs/eval_manifest.json @@ -0,0 +1,7 @@ +{ + "model_checkpoint_hash": "299273af1196e69c7cd64ca27000183990e0b3b3dbf7909cdad7bd5d06291d52", + "eval_dataset": "held_out_dummy_text", + "eval_loss": 0.4604153, + "perplexity": 1.58473, + "eval_manifest_hash": "aba5aa0c0a6add82cfd32c7eed9469aafa847bce419e0d572296090cdc60c27f" +} \ No newline at end of file diff --git a/proofs/pipeline_manifest.json b/proofs/pipeline_manifest.json new file mode 100644 index 0000000..cb42853 --- /dev/null +++ b/proofs/pipeline_manifest.json @@ -0,0 +1,8 @@ +{ + "1_environment_hash": "8196650d82fe04afca4d4348975f612bc6967698b87973cc9e299f58357db470", + "2_training_config_hash": "e6ff2b2428cafadb7f570001afa71ea3a6396cc03a0228388a7ec870cd4a6ce6", + "3_dataset_hash": "e25dc0179960dcc446a7309b23e341f8218f64d009535cfa7dc692b23c5ea8db", + "4_model_checkpoint_hash": "3fe9958cd9d56adadbb6075e46582d31233be3a6f67da450c6ad308717d154eb", + "5_eval_manifest_hash": "0631d96765e2019979b6d10e68dafc7e3f502cbb0857cfe2a8edb99caf390284", + "99_GLOBAL_PIPELINE_HASH": "f2859a26bf311f5a183c632a50258e98e69ef909bbdeba9ce58147f0d7ac176c" +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..45989b6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +torch==2.10.0 +numpy==2.4.3 +tqdm==4.67.3 + +# This default torch wheel is CPU-only (or CUDA, depending on your platform). +# For an Intel GPU (Iris Xe / Arc), install the XPU build instead — see README +# "Locally on an Intel GPU": +# pip install torch --index-url https://download.pytorch.org/whl/xpu \ No newline at end of file diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..b5f35a7 --- /dev/null +++ b/src/config.py @@ -0,0 +1,20 @@ +import json +import hashlib + +# The Immutable Training Configuration +TRAIN_CONFIG = { + "embed_dim": 16, + "num_heads": 2, + "max_seq_len": 32, + "dropout": 0.1, + "lr": 0.01, + "optimizer": "Adam", + "seed": 99, + "total_steps": 10, + "checkpoint_step": 5 +} + +def get_config_hash(): + """Returns a deterministic SHA-256 hash of the configuration dict.""" + encoded = json.dumps(TRAIN_CONFIG, sort_keys=True).encode() + return hashlib.sha256(encoded).hexdigest() \ No newline at end of file diff --git a/src/dataset.py b/src/dataset.py new file mode 100644 index 0000000..cfad8e0 --- /dev/null +++ b/src/dataset.py @@ -0,0 +1,27 @@ +import torch + +class TinyDataset: + def __init__(self): + self.vocab = ['a', 'b', 'c', 'd'] + self.vocab_size = len(self.vocab) + + self.data = "abcdabcdabcdabcd" + + self.stoi = {ch: i for i, ch in enumerate(self.vocab)} + self.iots = {i: ch for ch, i in self.stoi.items()} + + self.encoded = torch.tensor( + [self.stoi[ch] for ch in self.data], + dtype=torch.long + ) + + def get_batch(self, block_size = 4): + if not (1 <= block_size < len(self.encoded)): + raise ValueError( + f"block_size must be in [1, {len(self.encoded) - 1}], got {block_size}" + ) + # for linear model: x = self.encoded[:block_size] + x = self.encoded[:block_size].unsqueeze(0) + # for linearmodel: y = self.encoded[1:block_size+1] + y = self.encoded[1:block_size+1].unsqueeze(0) + return x, y \ No newline at end of file diff --git a/src/device.py b/src/device.py new file mode 100644 index 0000000..71a6c13 --- /dev/null +++ b/src/device.py @@ -0,0 +1,119 @@ +"""Device selection and accelerator determinism configuration (Phase 3). + +This module centralizes everything that differs between the CPU baseline and an +accelerator run. It supports two accelerator backends: + +* **CUDA** (NVIDIA GPUs) — deterministic cuDNN + a pinned cuBLAS workspace. +* **XPU** (Intel GPUs, e.g. Iris Xe / Arc, via the oneAPI backend) — determinism + rides on ``torch.use_deterministic_algorithms(True)``; there is no cuBLAS-style + workspace knob, so bitwise reproducibility on XPU is best-effort. + +Importing this module has one important side effect: it pins the cuBLAS workspace +*before* the first CUDA op, which is a hard requirement for deterministic matmuls +on CUDA >= 10.2. The env var is ignored by the XPU/CPU backends, so it is safe to +set unconditionally. +""" + +import os +import warnings + +# cuBLAS chooses its GEMM (matmul) reduction order based on a workspace it +# allocates lazily on the first CUDA call. A fixed workspace forces a single, +# reproducible reduction order. Read once at CUDA context creation, so it MUST be +# set before any tensor touches a CUDA device. Harmless on XPU/CPU. +# https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility +os.environ.setdefault("CUBLAS_WORKSPACE_CONFIG", ":4096:8") + +import torch + + +def _xpu_available(): + """True when a usable Intel XPU backend is present.""" + return hasattr(torch, "xpu") and torch.xpu.is_available() + + +def accelerator_module(): + """Return the active accelerator's namespace (``torch.cuda`` or ``torch.xpu``). + + CUDA is preferred when both are present; ``None`` means CPU-only. Every + accelerator-specific call in this module goes through here, so adding a new + backend is a one-line change. + """ + if torch.cuda.is_available(): + return torch.cuda + if _xpu_available(): + return torch.xpu + return None + + +def get_device(): + """Return the best available device: CUDA, else Intel XPU, else CPU. + + The same code path runs on all three; only the floating-point reduction order + (the hardware entropy under study) differs. + """ + if torch.cuda.is_available(): + return torch.device("cuda") + if _xpu_available(): + return torch.device("xpu") + return torch.device("cpu") + + +def device_name(device=None): + """Human-readable name for a device (for fingerprints/logs).""" + device = device or get_device() + if device.type == "cuda": + return torch.cuda.get_device_name(device) + if device.type == "xpu": + return torch.xpu.get_device_name(device) + return "cpu" + + +def seed_accelerators(seed): + """Seed every generator on the active accelerator (CUDA or XPU). No-op on CPU.""" + accel = accelerator_module() + if accel is not None: + accel.manual_seed_all(seed) + + +def accel_rng_state(): + """Accelerator RNG state tagged with its backend, or ``None`` on CPU. + + Dropout (kept active at 0.1) draws from the accelerator's generator on GPU, so + this state must be serialized alongside the CPU/NumPy/Python RNG for a + segmented replay to stay deterministic. The backend tag lets a resume safely + skip state that was captured on a different backend (e.g. CUDA vs XPU). + """ + accel = accelerator_module() + if accel is None: + return None + return {"backend": get_device().type, "state": accel.get_rng_state_all()} + + +def restore_accel_rng_state(saved): + """Restore state from :func:`accel_rng_state`. No-op on CPU or on mismatch.""" + if not saved: + return + accel = accelerator_module() + if accel is None: + return + current = get_device().type + if saved.get("backend") != current: + warnings.warn( + f"Skipping accelerator RNG restore: checkpoint backend " + f"{saved.get('backend')!r} != current backend {current!r}." + ) + return + accel.set_rng_state_all(saved["state"]) + + +def configure_determinism(): + """Apply backend-appropriate determinism settings. + + ``torch.use_deterministic_algorithms(True)`` is backend-agnostic; the cuDNN + flags only matter on CUDA, and there is no XPU equivalent. + """ + torch.use_deterministic_algorithms(True) + if torch.cuda.is_available(): + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True diff --git a/src/eval.py b/src/eval.py new file mode 100644 index 0000000..260e706 --- /dev/null +++ b/src/eval.py @@ -0,0 +1,89 @@ +import os +import torch +import torch.nn.functional as F +import json +import math +import hashlib +from model import TinyGPT +from dataset import TinyDataset +from main import set_seed +from config import TRAIN_CONFIG +from device import get_device + +DEVICE = get_device() + +def hash_model(model): + h = hashlib.sha256() + for p in model.parameters(): + h.update(p.data.cpu().numpy().tobytes()) + return h.hexdigest() + +def hash_dict(d): + encoded = json.dumps(d, sort_keys=True).encode() + return hashlib.sha256(encoded).hexdigest() + +if __name__ == "__main__": + set_seed(TRAIN_CONFIG["seed"]) + + dataset = TinyDataset() + model = TinyGPT( + vocab_size=dataset.vocab_size, + embed_dim=TRAIN_CONFIG["embed_dim"], + num_heads=TRAIN_CONFIG["num_heads"], + max_seq_len=TRAIN_CONFIG["max_seq_len"], + dropout=TRAIN_CONFIG["dropout"] + ).to(DEVICE) + + # Compute file-level hash before loading as security measure + checkpoint_path = "mid_checkpoint.pt" + with open(checkpoint_path, "rb") as f: + file_hash = hashlib.sha256(f.read()).hexdigest() + + # Load checkpoint (contains model, optimizer, and RNG states) + # weights_only=False required for non-tensor state (RNG, metadata) + # File hash computed above provides tamper detection + checkpoint = torch.load(checkpoint_path, weights_only=False, map_location=DEVICE) + model.load_state_dict(checkpoint['model']) + model.eval() # disabling dropout for eval as results must be deterministic + + model_hash = hash_model(model) + + # Verify cryptographic seal if present + if 'checkpoint_hash' in checkpoint: + if model_hash != checkpoint['checkpoint_hash']: + raise RuntimeError(f"Checkpoint integrity check failed. Expected: {checkpoint['checkpoint_hash'][:16]}..., Got: {model_hash[:16]}...") + + print(f" ~> Model loaded | checkpoint hash: {model_hash[:16]}...") + + # Held-out eval which is never seen during training + x, y = dataset.get_batch() + x, y = x.to(DEVICE), y.to(DEVICE) + + with torch.no_grad(): + logits = model(x) + loss = F.cross_entropy(logits.view(-1, logits.size(-1)), y.view(-1)) + + perplexity = math.exp(loss.item()) + + print(f" ~> Eval loss: {loss.item():.8f}") + print(f" ~> Perplexity: {perplexity:.5f}") + + eval_data_hash = hashlib.sha256(dataset.encoded.numpy().tobytes()).hexdigest() + + # Build manifest — hash is computed over content, not including itself + manifest = { + "model_checkpoint_hash": model_hash, + "eval_dataset": eval_data_hash, + "eval_loss": loss.item(), + "perplexity": perplexity, + } + manifest["eval_manifest_hash"] = hash_dict(manifest) + + proofs_dir = os.path.join(os.path.dirname(__file__), "..", "proofs") + os.makedirs(proofs_dir, exist_ok=True) + manifest_path = os.path.join(proofs_dir, "eval_manifest.json") + with open(manifest_path, "w") as f: + json.dump(manifest, f, indent=2) + + print(f"\n ~> Manifest saved to {os.path.normpath(manifest_path)}") + print(json.dumps(manifest, indent=2)) \ No newline at end of file diff --git a/src/global_manifest.py b/src/global_manifest.py new file mode 100644 index 0000000..f0674e7 --- /dev/null +++ b/src/global_manifest.py @@ -0,0 +1,70 @@ +import json +import hashlib +import torch +import sys +import platform +import os +from dataset import TinyDataset +from config import TRAIN_CONFIG, get_config_hash + +def hash_dict(d): + # Sort keys to ensure deterministic JSON stringification + encoded = json.dumps(d, sort_keys=True).encode() + return hashlib.sha256(encoded).hexdigest() + +def generate_global_manifest(): + if not os.path.exists("eval_manifest.json"): + raise RuntimeError("Missing eval_manifest.json. Please run src/eval.py first to generate the evaluation hashes.") + + print("Generating The Global Verification Manifest...") + + # 1. Environment Fingerprint + env_fingerprint = { + "torch": torch.__version__, + "python": sys.version.split(' ')[0], + "os": platform.platform() + } + env_hash = hash_dict(env_fingerprint) + + # 2. Configuration Hash + config_hash = get_config_hash() + + # 3. Dataset Hash + dataset = TinyDataset() + dataset_hash = hashlib.sha256(dataset.encoded.numpy().tobytes()).hexdigest() + + # 4. Model Hash (checkpoint file hash - must exist and be fully written) + checkpoint_path = "mid_checkpoint.pt" + if not os.path.exists(checkpoint_path): + raise RuntimeError(f"Missing {checkpoint_path}. Please run src/main.py first to generate the checkpoint.") + with open(checkpoint_path, "rb") as f: + checkpoint_bytes = f.read() + if len(checkpoint_bytes) == 0: + raise RuntimeError(f"{checkpoint_path} is empty. Checkpoint may not be fully written.") + model_hash = hashlib.sha256(checkpoint_bytes).hexdigest() + + # 5. Eval Manifest Hash (run eval.py before this script) + with open("eval_manifest.json", "r") as f: + eval_manifest = json.load(f) + eval_hash = hash_dict(eval_manifest) + + # 6. Build the Vault + global_manifest = { + "1_environment_hash": env_hash, + "2_training_config_hash": config_hash, + "3_dataset_hash": dataset_hash, + "4_model_checkpoint_hash": model_hash, + "5_eval_manifest_hash": eval_hash, + } + + # 7. Seal the Vault + global_manifest["99_GLOBAL_PIPELINE_HASH"] = hash_dict(global_manifest) + + with open("pipeline_manifest.json", "w") as f: + json.dump(global_manifest, f, indent=2) + + print("\n ༼ つ ◕_◕ ༽つ Global Manifest Sealed:") + print(json.dumps(global_manifest, indent=2)) + +if __name__ == "__main__": + generate_global_manifest() \ No newline at end of file diff --git a/src/gpu_reproducibility_test.py b/src/gpu_reproducibility_test.py new file mode 100644 index 0000000..f34fec5 --- /dev/null +++ b/src/gpu_reproducibility_test.py @@ -0,0 +1,129 @@ +"""Fresh-vs-fresh bitwise reproducibility test on the active device. + +Trains the deterministic NanoGPT twice from scratch, with no checkpoint reuse, +and asserts that the two runs produce identical loss curves and bitwise-identical +parameters. On CPU this reproduces the Phase 1 baseline; on a CUDA GPU it is the +Phase 3 claim — that with a pinned cuBLAS workspace and deterministic cuDNN, the +*same* GPU yields the *same bits* run to run. + +Run from the ``src`` directory: + + python gpu_reproducibility_test.py + +It also appends a short proof block to ``../proofs/device_determinism_log.txt``. +""" + +import os +import json +import hashlib +import platform + +import torch +import torch.nn.functional as F + +from model import TinyGPT +from dataset import TinyDataset +from main import set_seed +from config import TRAIN_CONFIG +from device import get_device, device_name + +DEVICE = get_device() + + +def hash_model(model): + h = hashlib.sha256() + for p in model.parameters(): + h.update(p.data.cpu().numpy().tobytes()) + return h.hexdigest() + + +def train_once(): + """One full training run from scratch on DEVICE. Returns (model, losses).""" + set_seed(TRAIN_CONFIG["seed"]) + + dataset = TinyDataset() + model = TinyGPT( + vocab_size=dataset.vocab_size, + embed_dim=TRAIN_CONFIG["embed_dim"], + num_heads=TRAIN_CONFIG["num_heads"], + max_seq_len=TRAIN_CONFIG["max_seq_len"], + dropout=TRAIN_CONFIG["dropout"], + ).to(DEVICE) + optimizer = torch.optim.Adam(model.parameters(), lr=TRAIN_CONFIG["lr"]) + + x, y = dataset.get_batch() + x, y = x.to(DEVICE), y.to(DEVICE) + + losses = [] + for step in range(TRAIN_CONFIG["total_steps"]): + logits = model(x) + loss = F.cross_entropy(logits.view(-1, logits.size(-1)), y.view(-1)) + optimizer.zero_grad() + loss.backward() + optimizer.step() + losses.append(loss.item()) + + return model, losses + + +def main(): + print(f"\n=== Fresh-vs-fresh determinism on {DEVICE.type.upper()} " + f"({device_name(DEVICE)}) | torch {torch.__version__} ===") + + model1, losses1 = train_once() + model2, losses2 = train_once() + + losses_match = losses1 == losses2 + params_match = all( + torch.equal(p1, p2) + for p1, p2 in zip(model1.parameters(), model2.parameters()) + ) + hash1, hash2 = hash_model(model1), hash_model(model2) + + print(f"\nFinal loss (run 1): {losses1[-1]:.8f}") + print(f"Final loss (run 2): {losses2[-1]:.8f}") + print(f"Loss curves identical: {losses_match}") + print(f"Bitwise parameter match: {params_match}") + print(f"Model hash (run 1): {hash1[:16]}...") + print(f"Model hash (run 2): {hash2[:16]}...") + + ok = losses_match and params_match and (hash1 == hash2) + if ok: + print("\n(❁ ´◡`❁) PASSED: same device is bitwise reproducible.") + else: + print("\n(╯°□°)╯︵ ┻━┻ FAILED: entropy detected on this device.") + + _write_proof(losses1, losses2, hash1, hash2, ok) + return ok + + +def _write_proof(losses1, losses2, hash1, hash2, ok): + """Append a structured proof block next to the CPU determinism log.""" + proofs_dir = os.path.join(os.path.dirname(__file__), "..", "proofs") + os.makedirs(proofs_dir, exist_ok=True) + accelerator_version = None + if DEVICE.type == "cuda": + accelerator_version = torch.version.cuda + elif DEVICE.type == "xpu": + accelerator_version = getattr(torch.version, "xpu", None) + record = { + "device": DEVICE.type, + "device_name": device_name(DEVICE), + "torch": torch.__version__, + "accelerator_version": accelerator_version, + "os": platform.platform(), + "cublas_workspace_config": os.environ.get("CUBLAS_WORKSPACE_CONFIG"), + "final_loss": losses1[-1], + "loss_curves_identical": losses1 == losses2, + "model_hash_run1": hash1, + "model_hash_run2": hash2, + "bitwise_reproducible": ok, + } + log_path = os.path.join(proofs_dir, "device_determinism_log.txt") + with open(log_path, "a") as f: + f.write(json.dumps(record, indent=2) + "\n") + print(f"\n ~> Proof appended to {os.path.normpath(log_path)}") + + +if __name__ == "__main__": + main() diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..3f2d0c5 --- /dev/null +++ b/src/main.py @@ -0,0 +1,74 @@ +import os +import torch +import numpy as np +import random + +# Importing device pins CUBLAS_WORKSPACE_CONFIG before the first CUDA op, which +# deterministic GPU matmuls require. Harmless (no-op) on CPU. +import device # noqa: F401 + +def set_seed(seed: int = 99): #never 42 + # Belt-and-suspenders: also set the cuBLAS workspace here in case set_seed is + # used standalone before `device` is imported elsewhere. Read once at CUDA init. + os.environ.setdefault("CUBLAS_WORKSPACE_CONFIG", ":4096:8") + + random.seed(seed) #this fixes the random module + np.random.seed(seed) #this fixes the numpy random + torch.manual_seed(seed) #this fixes the weights (CPU + accelerator host-side seed) + + device.seed_accelerators(seed) #explicitly seed every CUDA/XPU generator (dropout, init) + + # Backend-aware: use_deterministic_algorithms (all backends) + cuDNN flags (CUDA only). + device.configure_determinism() + + print(f"seed set to {seed}") + +if __name__ == "__main__": + set_seed(99) + + x = torch.randn(3, 3) + print("Tensor X:") #test randomness + print(x) + + print("Deterministic enabled:", torch.are_deterministic_algorithms_enabled()) + + from dataset import TinyDataset + + dataset = TinyDataset() + x, y = dataset.get_batch() + + print("Input:", x) + print("Target:", y) + + +#for linear Model +#uncomment this block , when running the linear model code blocks +''' +import torch.optim as optim +import torch.nn as nn +# for linear model: from model import TinyModel +from model import TinyGPT +import hashlib + +model = TinyGPT(vocab_size=dataset.vocab_size) +optimizer = optim.Adam(model.parameters(), lr = 0.01) +criterion = nn.CrossEntropyLoss() + +for step in range(5): + logits = model(x) + #for linear model: loss = criterion(logits, y[0]) + loss = criterion(logits.view(-1, logits.size(-1)), y.view(-1)) + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + print(f"Step {step}, Loss:{loss.item()}") + +torch.save(model.state_dict(), "model.pt") + +with open("model.pt", "rb") as f: + model_hash = hashlib.sha256(f.read()).hexdigest() + +print(f"FINAL MODEL HASH: {model_hash}") +''' \ No newline at end of file diff --git a/src/model.py b/src/model.py new file mode 100644 index 0000000..917c968 --- /dev/null +++ b/src/model.py @@ -0,0 +1,116 @@ +# Determinitic NanoGPT model +# Andrej Karpathy's NanoGPT, Source:https://github.com/karpathy/nanoGPT + +import torch +import torch.nn as nn +from torch.nn import functional as F +import math +import typing + +class CausalSelfAttention(nn.Module): + # Type hint for the dynamically registered buffer + bias: torch.Tensor + + def __init__(self, embed_dim, num_heads, max_seq_len, dropout=0.1): + super().__init__() + assert embed_dim % num_heads == 0 + self.c_attn = nn.Linear(embed_dim, 3 * embed_dim) + self.c_proj = nn.Linear(embed_dim, embed_dim) + self.attn_dropout = nn.Dropout(dropout) + self.n_head = num_heads + self.n_embd = embed_dim + + self.register_buffer("bias", torch.tril(torch.ones(max_seq_len, max_seq_len)).view(1, 1, max_seq_len, max_seq_len)) + + def forward(self, x): + B, T, C = x.size() + + qkv = self.c_attn(x) + q, k, v = qkv.split(self.n_embd, dim=2) + k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) + q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) + v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) + + att = (q @k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1))) + att = att.masked_fill(self.bias[:, :, :T, :T] == 0, float('-inf')) + att = F.softmax(att, dim=-1) + att = self.attn_dropout(att) + + y = att @ v + y = y.transpose(1, 2).contiguous().view(B, T, C) + return self.c_proj(y) + +class Block(nn.Module): + def __init__(self, embed_dim, num_heads, max_seq_len, dropout=0.1): + super().__init__() + self.ln_1 = nn.LayerNorm(embed_dim) + self.attn = CausalSelfAttention(embed_dim, num_heads, max_seq_len, dropout=dropout) + self.ln_2 = nn.LayerNorm(embed_dim) + self.mlp = nn.Sequential( + nn.Linear(embed_dim, 4 * embed_dim), + nn.GELU(), + nn.Dropout(dropout), + nn.Linear(4 * embed_dim, embed_dim) + ) + + def forward(self, x): + x = x + self.attn(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x + +class TinyGPT(nn.Module): + def __init__(self, vocab_size, embed_dim=16, num_heads=2, max_seq_len=32, dropout=0.1): + super().__init__() + self.max_seq_len = max_seq_len + self.transformer = nn.ModuleDict(dict( + wte = nn.Embedding(vocab_size, embed_dim), + wpe = nn.Embedding(max_seq_len, embed_dim), + h = nn.ModuleList([Block(embed_dim, num_heads, max_seq_len, dropout)]), + ln_f = nn.LayerNorm(embed_dim) + )) + self.lm_head = nn.Linear(embed_dim, vocab_size, bias=False) + + def forward(self, idx): + B, T = idx.size() + if T > self.max_seq_len: + raise ValueError(f"Input sequence length {T} exceeds maximum sequence length {self.max_seq_len}") + pos = torch.arange(0, T, dtype=torch.long, device=idx.device) + + # Dictionary access is standard for ModuleDict. + # typing.cast provides type safety for the typechecker. + wte = typing.cast(nn.Embedding, self.transformer["wte"]) + wpe = typing.cast(nn.Embedding, self.transformer["wpe"]) + h = typing.cast(nn.ModuleList, self.transformer["h"]) + ln_f = typing.cast(nn.LayerNorm, self.transformer["ln_f"]) + + tok_emb = wte(idx) + pos_emb = wpe(pos) + x = tok_emb + pos_emb + + for block in h: + x = block(x) + + x = ln_f(x) + logits = self.lm_head(x) + return logits + +# linear model was the starting point, nanoGPT after +# Uncomment this code block to check the linear model out +# :ATTENTION: Remember to block out the remaining code blocks + +''' +import torch.nn as nn + +class TinyModel(nn.Module): + def __init__(self, vocab_size, embed_dim=16): + super().__init__() + self.embed = nn.Embedding(vocab_size, embed_dim) + self.linear = nn.Linear(embed_dim, vocab_size) + + def forward(self, x): + x = self.embed(x) + x = x.mean(dim=0) + logits = self.linear(x) + return logits + +''' \ No newline at end of file diff --git a/src/reproducibility.py b/src/reproducibility.py new file mode 100644 index 0000000..92f4b16 --- /dev/null +++ b/src/reproducibility.py @@ -0,0 +1,459 @@ +import torch +import torch.nn.functional as F +import json +import math +import platform, sys +import hashlib +import random +import numpy as np +from model import TinyGPT +from dataset import TinyDataset +from main import set_seed +from telemetry import TelemetryLogger +from config import TRAIN_CONFIG +from device import get_device, accel_rng_state, restore_accel_rng_state, device_name + +# CUDA when available, else CPU. The same code path runs on both; only the +# floating-point reduction order (the hardware entropy under study) differs. +DEVICE = get_device() + +def hash_model(model): + h = hashlib.sha256() + for p in model.parameters(): + h.update(p.data.cpu().numpy().tobytes()) + return h.hexdigest() + +def run_training_segment(start_step, end_step, checkpoint_path_to_load=None, log_file="audit.jsonl", seed=None, tamper_weights=False): + + active_seed = seed if seed is not None else TRAIN_CONFIG["seed"] + active_end_step = end_step if end_step is not None else TRAIN_CONFIG["total_steps"] + + if not checkpoint_path_to_load: + set_seed(active_seed) + + dataset = TinyDataset() + + model = TinyGPT( + vocab_size=dataset.vocab_size, + embed_dim=TRAIN_CONFIG["embed_dim"], + num_heads=TRAIN_CONFIG["num_heads"], + max_seq_len=TRAIN_CONFIG["max_seq_len"], + dropout=TRAIN_CONFIG["dropout"] + ).to(DEVICE) + + + optimizer = torch.optim.Adam(model.parameters(), lr=TRAIN_CONFIG["lr"]) + logger = TelemetryLogger(filepath=log_file) + + if checkpoint_path_to_load: + # Compute file-level hash before loading as security measure + with open(checkpoint_path_to_load, "rb") as f: + file_hash = hashlib.sha256(f.read()).hexdigest() + + # Load checkpoint (contains model, optimizer, and RNG states) + # weights_only=False required for RNG state objects (numpy/python RNG) + # File hash computed above provides tamper detection + checkpoint = torch.load(checkpoint_path_to_load, weights_only=False) + model.load_state_dict(checkpoint['model']) + + # verifying cryptographic seal on model weights + if 'checkpoint_hash' in checkpoint: + loaded_hash = logger.hash_model(model) + if loaded_hash != checkpoint['checkpoint_hash']: + print("\n FATAL ALERT! : Cryptographic seal broken! Checkpoint file was tampered with.") + print(f" Expected: {checkpoint['checkpoint_hash'][:16]}...") + print(f" Got: {loaded_hash[:16]}...\n") + raise RuntimeError("Checkpoint integrity check failed") + + optimizer.load_state_dict(checkpoint['optimizer']) + torch.set_rng_state(checkpoint['rng_state']) + restore_accel_rng_state(checkpoint.get('accel_rng_state')) # CUDA/XPU dropout RNG + np.random.set_state(checkpoint['numpy_rng']) + random.setstate(checkpoint['python_rng']) + print(f" ~> Auditor loaded checkpoint & RNG states from step {start_step}") + + x, y = dataset.get_batch() + x, y = x.to(DEVICE), y.to(DEVICE) + + for step in range(start_step, active_end_step): + logits = model(x) + loss = F.cross_entropy(logits.view(-1, logits.size(-1)), y.view(-1)) + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + logger.log_step(step, loss.item(), model) + + if not checkpoint_path_to_load and step == (TRAIN_CONFIG["checkpoint_step"] - 1): + + current_model_hash = logger.hash_model(model) + + torch.save({ + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict(), + 'rng_state': torch.get_rng_state(), + 'accel_rng_state': accel_rng_state(), # None on CPU; tagged per-backend on GPU + 'numpy_rng': np.random.get_state(), + 'python_rng': random.getstate(), + 'checkpoint_hash': current_model_hash + }, "mid_checkpoint.pt") + print(f" ~> Prover saved checkpoint at step {TRAIN_CONFIG['checkpoint_step']}") + + return model + +def bad_seed_auditor(log_file="bad_seed_log.jsonl"): + #test 1: correct checkpoint, wrong seed + + dataset = TinyDataset() + model = TinyGPT(vocab_size=dataset.vocab_size, embed_dim=TRAIN_CONFIG["embed_dim"], num_heads=TRAIN_CONFIG["num_heads"], max_seq_len=TRAIN_CONFIG["max_seq_len"], dropout=TRAIN_CONFIG["dropout"]).to(DEVICE) + optimizer = torch.optim.Adam(model.parameters(), lr=TRAIN_CONFIG["lr"]) + logger = TelemetryLogger(filepath=log_file) + + # Compute file-level hash before loading + with open("mid_checkpoint.pt", "rb") as f: + file_hash = hashlib.sha256(f.read()).hexdigest() + + checkpoint = torch.load("mid_checkpoint.pt", weights_only=False) + model.load_state_dict(checkpoint['model']) + optimizer.load_state_dict(checkpoint['optimizer']) + + set_seed(42) #BAD SEED + print(" ~> Tampered auditor loaded checkpoint with BAD seed (42)") + + x, y = dataset.get_batch() + x, y = x.to(DEVICE), y.to(DEVICE) + + for step in range(TRAIN_CONFIG["checkpoint_step"], TRAIN_CONFIG["total_steps"]): + logits = model(x) + loss = F.cross_entropy(logits.view(-1, logits.size(-1)), y.view(-1)) + optimizer.zero_grad() + loss.backward() + optimizer.step() + logger.log_step(step, loss.item(), model) + return model + +def secret_noise_auditor(log_file="secret_noise_log.jsonl"): + #test 2: correct checkpoint, correct seed, but secret noise added to gradients + + set_seed(TRAIN_CONFIG["seed"]) #GOOD SEED + + dataset = TinyDataset() + model = TinyGPT(vocab_size=dataset.vocab_size, embed_dim=TRAIN_CONFIG["embed_dim"], num_heads=TRAIN_CONFIG["num_heads"], max_seq_len=TRAIN_CONFIG["max_seq_len"], dropout=TRAIN_CONFIG["dropout"]).to(DEVICE) + optimizer = torch.optim.Adam(model.parameters(), lr=TRAIN_CONFIG["lr"]) + logger = TelemetryLogger(filepath=log_file) + + # Compute file-level hash before loading + with open("mid_checkpoint.pt", "rb") as f: + file_hash = hashlib.sha256(f.read()).hexdigest() + + checkpoint = torch.load("mid_checkpoint.pt", weights_only=False) + model.load_state_dict(checkpoint['model']) + optimizer.load_state_dict(checkpoint['optimizer']) + # Restore RNG states to prevent replay drift + torch.set_rng_state(checkpoint['rng_state']) + restore_accel_rng_state(checkpoint.get('accel_rng_state')) + np.random.set_state(checkpoint['numpy_rng']) + random.setstate(checkpoint['python_rng']) + + print(" ~> Tampered auditor loaded checkpoint with GOOD seed but will add secret noise to gradients") + + x, y = dataset.get_batch() + x, y = x.to(DEVICE), y.to(DEVICE) + + for step in range(TRAIN_CONFIG["checkpoint_step"], TRAIN_CONFIG["total_steps"]): + logits = model(x) + loss = F.cross_entropy(logits.view(-1, logits.size(-1)), y.view(-1)) + optimizer.zero_grad() + loss.backward() + + # Add secret noise to gradients + with torch.no_grad(): + for p in model.parameters(): + if p.grad is not None: + p.grad += torch.randn_like(p.grad) * 1e-10 # Small noise + + optimizer.step() + logger.log_step(step, loss.item(), model) + return model + +def sabotage_auditor(log_file="post_sabotage_log.jsonl"): + #Test 3: correct replay, but weights silently modified after training ends. + + dataset = TinyDataset() + model = TinyGPT(vocab_size=dataset.vocab_size, embed_dim=TRAIN_CONFIG["embed_dim"], num_heads=TRAIN_CONFIG["num_heads"], max_seq_len=TRAIN_CONFIG["max_seq_len"], dropout=TRAIN_CONFIG["dropout"]).to(DEVICE) + optimizer = torch.optim.Adam(model.parameters(), lr=TRAIN_CONFIG["lr"]) + logger = TelemetryLogger(filepath=log_file) + + # Compute file-level hash before loading + with open("mid_checkpoint.pt", "rb") as f: + file_hash = hashlib.sha256(f.read()).hexdigest() + + checkpoint = torch.load("mid_checkpoint.pt", weights_only=False) + model.load_state_dict(checkpoint['model']) + optimizer.load_state_dict(checkpoint['optimizer']) + torch.set_rng_state(checkpoint['rng_state']) + restore_accel_rng_state(checkpoint.get('accel_rng_state')) # CUDA/XPU dropout RNG + print(" ~> Post-sabotage auditor loaded checkpoint correctly") + + x, y = dataset.get_batch() + x, y = x.to(DEVICE), y.to(DEVICE) + + for step in range(TRAIN_CONFIG["checkpoint_step"], TRAIN_CONFIG["total_steps"]): + logits = model(x) + loss = F.cross_entropy(logits.view(-1, logits.size(-1)), y.view(-1)) + optimizer.zero_grad() + loss.backward() + optimizer.step() + logger.log_step(step, loss.item(), model) + + #silent mutation of weights after training + with torch.no_grad(): + for p in model.parameters(): + p.data += torch.randn_like(p) * 1e-6 + + print(" ~> Weights silently mutated after training completed") + return model + +def broken_seal_auditor(log_file="broken_seal_log.jsonl"): + #Test 4: attacker intercepts file, corrupts weights, but leaves the original hash intact to bypass integrity check, auditor loads corrupted file and runs audit + + #weights corrupted + # Compute file-level hash before loading + with open("mid_checkpoint.pt", "rb") as f: + file_hash = hashlib.sha256(f.read()).hexdigest() + + checkpoint = torch.load("mid_checkpoint.pt", weights_only=False) + + #modify weights slightly + checkpoint['model']['lm_head.weight'] += 1e-5 + + #save the poisoned file (the original embedded hash remains unchanged) + torch.save(checkpoint, "corrupted_checkpoint.pt") + print(" ~> Attacker corrupted weights and saved to corrupted_checkpoint.pt") + + #auditor loads the poisoned file + model = run_training_segment( + start_step=TRAIN_CONFIG["checkpoint_step"], + end_step=TRAIN_CONFIG["total_steps"], + checkpoint_path_to_load="corrupted_checkpoint.pt", + log_file=log_file + ) + return model + +def verify(prover_segment, auditor_logs, prover_hash, auditor_hash, label="AUDIT"): + """Shared verification logic with drift quantification and cryptographic anchor.""" + print(f"\n[Verifying: {label}]") + + if len(prover_segment) != len(auditor_logs): + print(f"Log length mismatch — prover: {len(prover_segment)}, auditor: {len(auditor_logs)}") + return False + + match = True + for p, a in zip(prover_segment, auditor_logs): + step_match = p['step'] == a['step'] + loss_match = math.isclose(p['loss'], a['loss'], rel_tol=1e-6) + grad_match = math.isclose(p['grad_norm'], a['grad_norm'], rel_tol=1e-6) + param_match = math.isclose(p['param_norm'], a['param_norm'], rel_tol=1e-6) + step_ok = step_match and loss_match and grad_match and param_match + + if not step_ok: + match = False + delta = abs(p['loss'] - a['loss']) + print(f"Step {p['step']} | Prover: {p['loss']:.8f} | Auditor: {a['loss']:.8f} | Δ {delta:.2e} FAILED") + else: + print(f"Step {p['step']} | Prover: {p['loss']:.8f} | Auditor: {a['loss']:.8f} | PASSED") + + hash_match = (prover_hash == auditor_hash) + if not hash_match: + print(f"\n Hash mismatch! Prover hash: {prover_hash[:16]} // Auditor hash: {auditor_hash[:16]} [HASH ERROR]") + + if match and hash_match: + print(f"\n (❁ ´◡`❁) {label} PASSED: Segment replay is bitwise deterministic.") + else: + print(f"\n (╯°□°)╯︵ ┻━┻ {label} FAILED: Trajectories diverged.") + + # Return full verification verdict instead of just telemetry match + verification_result = { + 'telemetry_match': match, + 'hash_match': hash_match, + 'passed': match and hash_match, + 'prover_hash': prover_hash, + 'auditor_hash': auditor_hash, + 'label': label + } + return verification_result + +if __name__ == "__main__": + CP_STEP = TRAIN_CONFIG["checkpoint_step"] + TOT_STEP = TRAIN_CONFIG["total_steps"] + + print(f"\n=== Device: {DEVICE.type.upper()} ({device_name(DEVICE)}) | torch {torch.__version__} ===") + if DEVICE.type == "cuda": + print(" Phase 3: strict GPU determinism (cuDNN deterministic + pinned cuBLAS workspace)") + elif DEVICE.type == "xpu": + print(" Phase 3: Intel XPU (oneAPI) — deterministic algorithms enabled (best-effort)") + + # Baseline: should pass + print("\n Scenario 1: CLEAN AUDIT ") + prover_model = run_training_segment(start_step=0, end_step=TOT_STEP, log_file="prover_log.jsonl") + auditor_model = run_training_segment(start_step=CP_STEP, end_step=TOT_STEP, checkpoint_path_to_load="mid_checkpoint.pt", log_file="auditor_log.jsonl") + + with open("prover_log.jsonl") as f: + prover_logs = [json.loads(line) for line in f] + with open("auditor_log.jsonl") as f: + auditor_logs = [json.loads(line) for line in f] + + verify(prover_logs[CP_STEP:TOT_STEP], auditor_logs, hash_model(prover_model), hash_model(auditor_model), label="CLEAN AUDIT") + + # Test 1: Bad seed: should fail + print("\n Scenario 2: BAD SEED") + bad_model=bad_seed_auditor() + + with open("bad_seed_log.jsonl") as f: + tampered_logs = [json.loads(line) for line in f] + + verify(prover_logs[CP_STEP:TOT_STEP], tampered_logs, hash_model(prover_model), hash_model(bad_model), label="BAD SEED AUDIT") + + # Test 2: Noisy weights: should fail + print("\n Scenario 3: NOISE INJECTED") + noisey_model = secret_noise_auditor() + + with open("secret_noise_log.jsonl") as f: + noisy_logs = [json.loads(line) for line in f] + + verify(prover_logs[CP_STEP:TOT_STEP], noisy_logs, hash_model(prover_model), hash_model(noisey_model), label="NOISY WEIGHTS AUDIT") + + # Test 3: Post-training sabotage, hash fail + print("\n Scenario 4: POST-TRAINING WEIGHT SABOTAGE") + sabotage_model = sabotage_auditor() + + with open("post_sabotage_log.jsonl") as f: + post_sabotage_logs = [json.loads(line) for line in f] + + verify( + prover_logs[CP_STEP:TOT_STEP], post_sabotage_logs, + hash_model(prover_model), hash_model(sabotage_model), + label="POST-TRAINING SABOTAGE AUDIT" + ) + + # Test 4: Tampered Checkpoint File (Broken Seal) + print("\n Scenario 5: MODIFIED CHECKPOINT FILE (BROKEN SEAL)") + broken_seal_model = broken_seal_auditor() + + with open("broken_seal_log.jsonl") as f: + broken_seal_logs = [json.loads(line) for line in f] + + verify( + prover_logs[CP_STEP:TOT_STEP], broken_seal_logs, + hash_model(prover_model), hash_model(broken_seal_model), + label="BROKEN SEAL AUDIT" + ) + +# Uncomment the following lines to run only the Segmented audit verification: +''' +if __name__ == "__main__": + fingerprint = { + "torch": torch.__version__, + "python": sys.version, + "os": platform.platform(), + "cpu": platform.processor() +} + with open("env_fingerprint.json", "w") as f: + json.dump(fingerprint, f, indent=2) + + print("\n SEGMENTED AUDIT VERIFICATION ") + + print("\n[Running Prover: Steps 0 to 10]") + run_training_segment(start_step = 0, end_step = 10, log_file="prover_log.jsonl") + + print("\n[Running Auditor: Steps 5 to 10 with checkpoint]") + run_training_segment(start_step = 5, end_step = 10, checkpoint_path_to_load="mid_checkpoint.pt", log_file="auditor_log.jsonl") + + print("\n[Verifying Telemetry Trajectories]") + with open("prover_log.jsonl", "r") as f: + prover_logs = [json.loads(line) for line in f.readlines()] + with open("auditor_log.jsonl", "r") as f: + auditor_logs = [json.loads(line) for line in f.readlines()] + + prover_segment = prover_logs[5:10] + + if len(prover_segment) != 5 or len(auditor_logs) != 5: + print(f"Log length mismatch — prover_segment: {len(prover_segment)}, auditor: {len(auditor_logs)}") + print("Cannot verify. Check for crashes or early exits in training.") + else: + match = True + + for p, a in zip(prover_segment, auditor_logs): + step_match = p['step'] == a['step'] + loss_match = math.isclose(p['loss'], a['loss'], rel_tol=1e-6) + grad_match = math.isclose(p['grad_norm'], a['grad_norm'], rel_tol=1e-6) + param_match = math.isclose(p['param_norm'], a['param_norm'], rel_tol=1e-6) + step_ok = step_match and loss_match and grad_match and param_match + if not step_ok: + match = False + status = "ok" if step_ok else "error" + print(f"Step {p['step']} | Prover Loss: {p['loss']:.6f} | Auditor Loss: {a['loss']:.6f} {status}") + + if match: + print("\n (❁ ´◡`❁) \nAUDIT PASSED: Segment replay is bitwise deterministic.") + else: + print("\n (╯°□°)╯︵ ┻━┻ \nAUDIT FAILED: Trajectories diverged.") + +''' +#Reproducibility test for tinyGPT without Segment Verification test +#uncomment this block and comment the others to not have segment verification +''' +import torch +#for linear model: from model import TinyModel +from model import TinyGPT +from dataset import TinyDataset +import torch.nn.functional as F +from main import set_seed + +def train_once(): + set_seed(99) + + dataset = TinyDataset() + #for linear model: model = TinyModel(vocab_size=dataset.vocab_size) + model = TinyGPT(vocab_size=dataset.vocab_size) + optimizer = torch.optim.Adam(model.parameters(), lr=0.01) + + losses = [] + x, y = dataset.get_batch() + + for step in range(5): + logits = model(x) + #for linear model: loss = F.cross_entropy(logits, y[0]) + loss = F.cross_entropy(logits.view(-1, logits.size(-1)), y.view(-1)) + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + losses.append(loss.item()) + + return model, losses + +if __name__ == "__main__": + print("running reproducability test") + + model1, losses1 = train_once() + + model2, losses2 = train_once() + + losses_match = (losses1 ==losses2) + print(f"Loss curves identical: {losses_match}") + + params_match = all( + torch.equal(p1, p2) + for p1, p2 in zip(model1.parameters(), model2.parameters()) + ) + print (f"Bitwise parameter match: {params_match}") + + if losses_match and params_match: + print("\nSuccess!: Full deterministic gradient flow verified.") + else: + print ("\nFailure: Entropy led to non-deterministic behavior") + +''' \ No newline at end of file diff --git a/src/telemetry.py b/src/telemetry.py new file mode 100644 index 0000000..64e543e --- /dev/null +++ b/src/telemetry.py @@ -0,0 +1,40 @@ +import json +import torch +import os +import hashlib + +class TelemetryLogger: + def __init__(self, filepath="audit_log.jsonl"): + self.filepath = filepath + + open(self.filepath, 'w').close() + + def log_step(self, step, loss, model): + grad_norm = 0.0 + param_norm = 0.0 + + for p in model.parameters(): + if p.grad is not None: + grad_norm += p.grad.data.norm(2).item() ** 2 + param_norm += p.data.norm(2).item() ** 2 + + grad_norm = grad_norm ** 0.5 + param_norm = param_norm ** 0.5 + + record ={ + "step": step, + "loss": loss, + "grad_norm": grad_norm, + "param_norm": param_norm + } + + with open(self.filepath, 'a') as f: + f.write(json.dumps(record) + '\n') + + return record + + def hash_model(self, model): + h = hashlib.sha256() + for p in model.parameters(): + h.update(p.data.cpu().numpy().tobytes()) + return h.hexdigest() \ No newline at end of file