diff --git a/templates/config.json b/templates/config.json index 4d013eb0..c795a792 100644 --- a/templates/config.json +++ b/templates/config.json @@ -3749,6 +3749,52 @@ }, "tags": ["LLM Inference & Model Serving", "Developer Tools", "AI Agents"] }, +{ + "id": "stable-worldmodel", + "name": "galilai-group/stable-worldmodel", + "description": "A platform for reproducible world model research and evaluation", + "repo": "https://github.com/Phala-Network/phala-cloud/tree/main/templates/prebuilt/stable-worldmodel", + "author": "galilai-group", + "icon": "stable-worldmodel.png", + "envs": [ + { + "key": "STABLE_WORLDMODEL_VERSION", + "required": false, + "default": "0.1.0", + "description": "Pinned stable-worldmodel Python package version installed by the verifier." + }, + { + "key": "PYTORCH_CPU_VERSION", + "required": false, + "default": "2.7.1+cpu", + "description": "CPU-only PyTorch wheel version installed from the official PyTorch CPU wheel index before stable-worldmodel." + }, + { + "key": "TORCHVISION_CPU_VERSION", + "required": false, + "default": "0.22.1+cpu", + "description": "CPU-only TorchVision wheel version matching the default PyTorch version." + }, + { + "key": "OPENCV_PYTHON_HEADLESS_VERSION", + "required": false, + "default": "4.12.0.88", + "description": "Headless OpenCV compatibility package version used because upstream imports visual wrappers at package import time." + }, + { + "key": "IMAGEIO_VERSION", + "required": false, + "default": "2.37.0", + "description": "ImageIO compatibility package version used because upstream imports visual wrapper helpers at package import time." + } + ], + "defaultResource": { + "vCPU": 1, + "memory": 2048, + "diskSize": 20 + }, + "tags": ["AI Apps & Workflows", "Developer Tools", "AI Agents"] + }, { "id": "deepspeed", "name": "deepspeedai/DeepSpeed", diff --git a/templates/icons/stable-worldmodel.png b/templates/icons/stable-worldmodel.png new file mode 100644 index 00000000..f0f57dcc Binary files /dev/null and b/templates/icons/stable-worldmodel.png differ diff --git a/templates/prebuilt/stable-worldmodel/README.md b/templates/prebuilt/stable-worldmodel/README.md new file mode 100644 index 00000000..d4e9a802 --- /dev/null +++ b/templates/prebuilt/stable-worldmodel/README.md @@ -0,0 +1,135 @@ +# galilai-group/stable-worldmodel on Phala Cloud + +This template deploys a CPU-safe `stable-worldmodel` verifier behind a public Caddy proxy. It installs the real upstream Python package, imports the package, verifies local variation-space primitives, runs a tiny deterministic CEM planning solver against an in-memory cost fixture, and exposes JSON endpoints for deployment smoke tests. + +The default service does not create Gymnasium environments, collect datasets, inspect private caches, download datasets, download checkpoints, load model weights, train world models, evaluate policies, call hosted model providers, require browser auth, or require credentials. It is scoped for small CPU-only Phala Cloud deployments. + +## Metadata + +- Template id: `stable-worldmodel` +- Display name: `galilai-group/stable-worldmodel` +- Category: AI Apps & Workflows +- Upstream repo: `https://github.com/galilai-group/stable-worldmodel` +- Upstream docs: `https://galilai-group.github.io/stable-worldmodel/` +- Upstream package: `stable-worldmodel==0.1.0` +- Upstream author: GalilAI group / `galilai-group` +- Icon source: `https://github.com/galilai-group.png`, the GitHub organization avatar. The upstream README, docs tree, and MkDocs config were inspected and no dedicated logo, icon, favicon, or SVG asset was present. + +## What Runs By Default + +- `app`: an internal Python HTTP verifier on `APP_PORT=8000`. On startup it installs CPU-only PyTorch and TorchVision wheels, installs `stable-worldmodel`, imports the package, checks the package API and environment registry, and serves smoke-test endpoints. +- `proxy`: a public Caddy reverse proxy. It is the only service with a host port mapping and exposes `8080:80`. + +The `/demo` endpoint exercises safe local primitives from the real package: + +- `stable_worldmodel.spaces.Discrete`, `RGBBox`, and `Dict` +- `stable_worldmodel.policy.PlanConfig` +- `stable_worldmodel.solver.CEMSolver` +- the `swm --version` CLI path through `python -m stable_worldmodel.cli --version` + +The solver demo uses a local quadratic cost fixture and tiny tensors only. It is not a real model-serving endpoint. + +## Deployment + +1. Deploy the `stable-worldmodel` template on Phala Cloud. +2. Keep the default CPU resources for the verifier. +3. Open the public endpoint on port `8080`. +4. Check `https:///healthz` after startup completes. + +The first startup downloads Python wheels from the public package indexes. No persistent volume is required because the verifier does not store datasets, checkpoints, model weights, or generated artifacts. + +## Environment Variables + +No credentials are required by the default verifier. + +| Variable | Default | Required | Description | +| --- | --- | --- | --- | +| `APP_PORT` | `8000` | No | Internal Python app port. Caddy proxies to this port; the public host port is `8080`. | +| `STABLE_WORLDMODEL_VERSION` | `0.1.0` | No | Pinned `stable-worldmodel` package version installed by the verifier. | +| `PYTORCH_CPU_VERSION` | `2.7.1+cpu` | No | CPU-only PyTorch wheel version installed from the official PyTorch CPU wheel index before `stable-worldmodel`. | +| `TORCHVISION_CPU_VERSION` | `0.22.1+cpu` | No | CPU-only TorchVision wheel version matching the default PyTorch version. | +| `OPENCV_PYTHON_HEADLESS_VERSION` | `4.12.0.88` | No | Headless OpenCV compatibility package used because upstream imports visual wrappers at package import time. | +| `IMAGEIO_VERSION` | `2.37.0` | No | ImageIO compatibility package used because upstream imports visual wrapper helpers at package import time. | +| `STABLEWM_HOME` | `/tmp/stable_worldmodel` | No | Cache root used by upstream helpers if users later add dataset or checkpoint code. The default verifier does not populate it. | +| `HF_HUB_DISABLE_TELEMETRY` | `1` | No | Disables Hugging Face telemetry in case users extend the verifier with Hub-aware code. | +| `WANDB_DISABLED` | `true` | No | Keeps Weights & Biases disabled for the default verifier. | +| `WANDB_MODE` | `offline` | No | Keeps Weights & Biases offline for the default verifier. | +| `CUDA_VISIBLE_DEVICES` | empty | No | Hides CUDA devices from the default runtime. CUDA is not required. | + +Provider, dataset, checkpoint, and telemetry credentials such as `HF_TOKEN`, `HUGGING_FACE_HUB_TOKEN`, `WANDB_API_KEY`, `OPENAI_API_KEY`, or private storage tokens are intentionally not required and are not consumed by the default demo. Add secrets only if you replace this verifier with a real training, evaluation, dataset, or inference workflow. + +## Exposed Endpoints + +The public HTTP API is available through Caddy on port `8080`. + +- `GET /healthz`: returns HTTP 200 when the real package imports and the package/API checks pass. +- `GET /demo`: runs deterministic local `stable-worldmodel` space, planning config, CEM solver, and CLI checks. It returns explicit booleans showing that no training, evaluation, model download, checkpoint loading, dataset download, provider call, credential requirement, or GPU requirement is active. +- `GET /v1/models`: returns an OpenAI-shaped metadata list with `stable-worldmodel/no-model-download-demo`. It is metadata only; the default template does not host or load a model. +- `GET /`: returns the same readiness payload as `/healthz`. + +## Smoke Verification + +Run these checks after deployment to verify the CPU-safe demo: + +```bash +curl -i http://localhost:8080/healthz +curl -fsS http://localhost:8080/demo +curl -fsS http://localhost:8080/v1/models +``` + +Expected results: + +- `GET /healthz` returns `200 OK`. +- `/demo` contains `"ok": true`. +- `/demo.demo.runtime.model_downloads` is `false`. +- `/demo.demo.runtime.training_started` is `false`. +- `/demo.demo.runtime.provider_api_calls` is `false`. +- `/v1/models` includes `stable-worldmodel/no-model-download-demo`. + +For a local Compose smoke test from the parent monorepo worktree: + +```bash +docker compose -f sdks/templates/prebuilt/stable-worldmodel/docker-compose.yml up -d +curl -fsS http://localhost:8080/healthz +curl -fsS http://localhost:8080/demo +curl -fsS http://localhost:8080/v1/models +docker compose -f sdks/templates/prebuilt/stable-worldmodel/docker-compose.yml down +``` + +Template validation commands from the parent monorepo worktree: + +```bash +python3 sdks/templates/validate.py +git -C sdks diff --check origin/main...HEAD +docker compose -f sdks/templates/prebuilt/stable-worldmodel/docker-compose.yml config >/dev/null +``` + +## Production Notes + +`stable-worldmodel` is a research library for data collection, world-model training, and evaluation with model-predictive control. The upstream quick start and CLI docs show workflows such as `World(...)`, `world.collect(...)`, `swm envs`, dataset inspection, checkpoint listing, and planning with `CEMSolver`. Real workloads can require environment extras, simulator dependencies, datasets, checkpoints, GPU or accelerator resources, large storage, reproducibility controls, and experiment tracking. + +Before adapting this template for production research: + +- Pick the exact environment suite and install only the required upstream extras or simulator packages. +- Size CPU, memory, disk, and GPU resources around the selected environment, batch size, model, dataset format, and checkpoint storage. +- Use named volumes for datasets, caches, checkpoints, and logs when persistence is needed. +- Review dataset, simulator, model, and checkpoint licenses before downloading or serving artifacts. +- Store credentials through Phala Cloud secrets or required environment variables; never hardcode tokens in `docker-compose.yml` or this README. +- Add authentication before exposing dataset, checkpoint, training, evaluation, or file-management endpoints. +- Replace the verifier with an explicit training, evaluation, or serving command and document any required datasets, checkpoints, credentials, and accelerator assumptions. + +## Security Notes + +- Only Caddy publishes a host port: `8080:80`. +- The app service is internal and uses `expose`, not `ports`. +- The template does not use privileged mode, host networking, host IPC, host PID, Docker socket mounts, host bind mounts, external `env_file`, real secrets, or private credentials. +- The default endpoints are unauthenticated metadata and smoke-test endpoints only. +- The default runtime path does not download datasets, model weights, or checkpoints and does not call external model providers. + +## Cleanup + +```bash +docker compose -f sdks/templates/prebuilt/stable-worldmodel/docker-compose.yml down +``` + +No named volumes are created by this template. diff --git a/templates/prebuilt/stable-worldmodel/docker-compose.yml b/templates/prebuilt/stable-worldmodel/docker-compose.yml new file mode 100644 index 00000000..05bb9683 --- /dev/null +++ b/templates/prebuilt/stable-worldmodel/docker-compose.yml @@ -0,0 +1,429 @@ +services: + app: + image: ghcr.io/astral-sh/uv:python3.11-bookworm-slim + restart: unless-stopped + init: true + expose: + - "8000" + environment: + APP_PORT: "8000" + STABLE_WORLDMODEL_VERSION: ${STABLE_WORLDMODEL_VERSION:-0.1.0} + PYTORCH_CPU_VERSION: ${PYTORCH_CPU_VERSION:-2.7.1+cpu} + TORCHVISION_CPU_VERSION: ${TORCHVISION_CPU_VERSION:-0.22.1+cpu} + OPENCV_PYTHON_HEADLESS_VERSION: ${OPENCV_PYTHON_HEADLESS_VERSION:-4.12.0.88} + IMAGEIO_VERSION: ${IMAGEIO_VERSION:-2.37.0} + CUDA_VISIBLE_DEVICES: "" + HF_HUB_DISABLE_TELEMETRY: "1" + STABLEWM_HOME: /tmp/stable_worldmodel + WANDB_DISABLED: "true" + WANDB_MODE: offline + PYTHONDONTWRITEBYTECODE: "1" + PYTHONUNBUFFERED: "1" + UV_SYSTEM_PYTHON: "1" + configs: + - source: stable_worldmodel_demo_app + target: /opt/stable-worldmodel-demo/main.py + command: + - /bin/sh + - -lc + - | + set -eu + uv pip install --system --no-cache --index-url https://download.pytorch.org/whl/cpu "torch==$${PYTORCH_CPU_VERSION}" "torchvision==$${TORCHVISION_CPU_VERSION}" + uv pip install --system --no-cache "stable-worldmodel==$${STABLE_WORLDMODEL_VERSION}" "opencv-python-headless==$${OPENCV_PYTHON_HEADLESS_VERSION}" "imageio==$${IMAGEIO_VERSION}" + exec python /opt/stable-worldmodel-demo/main.py + healthcheck: + test: + [ + "CMD-SHELL", + "python -c \"import os, sys, urllib.request; port = os.environ.get('APP_PORT', '8000'); response = urllib.request.urlopen(f'http://127.0.0.1:{port}/healthz', timeout=5); sys.exit(0 if response.status == 200 else 1)\"", + ] + interval: 30s + timeout: 10s + retries: 10 + start_period: 300s + networks: + - internal + + proxy: + image: caddy:2.8 + restart: unless-stopped + ports: + - "8080:80" + configs: + - source: caddy_config + target: /etc/caddy/Caddyfile + depends_on: + app: + condition: service_healthy + networks: + - internal + +configs: + caddy_config: + content: | + :80 { + encode zstd gzip + header { + X-Content-Type-Options "nosniff" + Referrer-Policy "no-referrer" + -Server + } + reverse_proxy app:8000 + } + + stable_worldmodel_demo_app: + content: | + import contextlib + import importlib + import importlib.metadata + import io + import json + import os + import platform + import subprocess + import sys + import time + import warnings + from http import HTTPStatus + from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer + from urllib.parse import urlparse + + + SERVICE = "stable-worldmodel-demo" + PACKAGE_NAME = "stable-worldmodel" + MODULE_NAME = "stable_worldmodel" + UPSTREAM = "https://github.com/galilai-group/stable-worldmodel" + STARTED_AT = time.time() + + + def package_version(name): + try: + return importlib.metadata.version(name) + except Exception as exc: + return f"unavailable: {type(exc).__name__}: {exc}" + + + def runtime_flags(): + return { + "credentials_required": False, + "provider_api_calls": False, + "model_downloads": False, + "checkpoint_downloads": False, + "checkpoint_loaded": False, + "dataset_downloads": False, + "environment_rollouts": False, + "training_started": False, + "evaluation_started": False, + "gpu_or_cuda_required": False, + } + + + def import_package_status(): + result = { + "package": PACKAGE_NAME, + "distribution_version": package_version(PACKAGE_NAME), + "module_version": None, + "import_ok": False, + "checks": {}, + "environment_registry": {}, + "dependencies": {}, + "error": None, + } + + try: + warnings.filterwarnings( + "ignore", + message="ale-py not found.*", + category=UserWarning, + ) + swm = importlib.import_module(MODULE_NAME) + torch = importlib.import_module("torch") + torchvision = importlib.import_module("torchvision") + numpy = importlib.import_module("numpy") + cv2 = importlib.import_module("cv2") + imageio = importlib.import_module("imageio") + + from stable_worldmodel.policy import PlanConfig + from stable_worldmodel.solver import CEMSolver + + worlds = sorted(getattr(swm.envs, "WORLDS", [])) + result["module_version"] = getattr(swm, "__version__", None) + result["environment_registry"] = { + "registered_worlds": len(worlds), + "sample_worlds": worlds[:8], + "rollouts_started": False, + } + result["dependencies"] = { + "torch": getattr(torch, "__version__", None), + "torchvision": getattr(torchvision, "__version__", None), + "numpy": getattr(numpy, "__version__", None), + "opencv_python": getattr(cv2, "__version__", None), + "imageio": getattr(imageio, "__version__", None), + "lancedb": package_version("lancedb"), + "pyarrow": package_version("pyarrow"), + } + result["checks"] = { + "top_level_import": swm is not None, + "world_api_present": hasattr(swm, "World"), + "spaces_module_present": hasattr(swm, "spaces"), + "policy_plan_config_present": isinstance(PlanConfig, type), + "cem_solver_present": isinstance(CEMSolver, type), + "world_registry_nonempty": len(worlds) > 0, + "torch_cuda_available": bool(torch.cuda.is_available()), + "torch_cuda_required": False, + } + result["import_ok"] = ( + result["checks"]["top_level_import"] + and result["checks"]["world_api_present"] + and result["checks"]["spaces_module_present"] + and result["checks"]["policy_plan_config_present"] + and result["checks"]["cem_solver_present"] + and result["checks"]["world_registry_nonempty"] + and not result["checks"]["torch_cuda_required"] + ) + except Exception as exc: + result["error"] = f"{type(exc).__name__}: {exc}" + + return result + + + IMPORT_STATUS = import_package_status() + + + def run_demo_checks(): + if not IMPORT_STATUS["import_ok"]: + return { + "ok": False, + "error": IMPORT_STATUS["error"] or "stable-worldmodel import checks failed", + "runtime": runtime_flags(), + } + + result = { + "ok": False, + "checks": {}, + "spaces_demo": None, + "solver_demo": None, + "cli_demo": None, + "runtime": runtime_flags(), + "error": None, + } + + try: + import numpy as np + import stable_worldmodel as swm + import torch + from gymnasium import spaces as gym_spaces + from stable_worldmodel.policy import PlanConfig + from stable_worldmodel.solver import CEMSolver + + discrete = swm.spaces.Discrete( + 6, + init_value=2, + constrain_fn=lambda value: int(value) % 2 == 0, + ) + rgb = swm.spaces.RGBBox( + init_value=np.array([64, 128, 192], dtype=np.uint8), + ) + variation_space = swm.spaces.Dict( + { + "mode": discrete, + "color": rgb, + }, + sampling_order=["mode", "color"], + ) + variation_space.seed(1234) + discrete_sample = discrete.sample(max_tries=50) + + class QuadraticCostModel: + def get_cost(self, info_dict, action_candidates): + return action_candidates.pow(2).sum(dim=(-1, -2)) + + solver = CEMSolver( + model=QuadraticCostModel(), + batch_size=1, + num_samples=12, + n_steps=2, + topk=4, + seed=7, + ) + action_space = gym_spaces.Box( + low=-1, + high=1, + shape=(1, 2), + dtype=np.float32, + ) + plan_config = PlanConfig(horizon=3, receding_horizon=1) + solver.configure( + action_space=action_space, + n_envs=1, + config=plan_config, + ) + stdout_buffer = io.StringIO() + with contextlib.redirect_stdout(stdout_buffer): + solver_output = solver({"state": torch.zeros(1, 1, 2)}) + actions = solver_output["actions"].detach().cpu().numpy() + + cli_env = dict(os.environ) + cli_env["PYTHONWARNINGS"] = "ignore:ale-py not found:UserWarning" + cli = subprocess.run( + [sys.executable, "-m", "stable_worldmodel.cli", "--version"], + env=cli_env, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=30, + check=False, + ) + + result["spaces_demo"] = { + "variation_names": variation_space.names(), + "sampling_order": variation_space.sampling_order, + "initial_value": { + "mode": int(variation_space.init_value["mode"]), + "color": variation_space.init_value["color"].tolist(), + }, + "sampled_even_mode": int(discrete_sample), + "check_ok": bool(variation_space.check(debug=True)), + } + result["solver_demo"] = { + "solver": "CEMSolver", + "cost_model": "local quadratic cost fixture", + "plan_config": { + "horizon": plan_config.horizon, + "receding_horizon": plan_config.receding_horizon, + "history_len": plan_config.history_len, + "action_block": plan_config.action_block, + "plan_len": plan_config.plan_len, + }, + "action_shape": list(actions.shape), + "actions": actions.round(4).tolist(), + "costs": [float(value) for value in solver_output["costs"]], + "stdout": stdout_buffer.getvalue().strip(), + } + result["cli_demo"] = { + "command": "python -m stable_worldmodel.cli --version", + "returncode": cli.returncode, + "stdout": cli.stdout.strip(), + "stderr": cli.stderr.strip(), + } + result["checks"] = { + "spaces_check_ok": result["spaces_demo"]["check_ok"], + "solver_actions_shape_ok": result["solver_demo"]["action_shape"] == [1, 3, 2], + "cli_version_ok": cli.returncode == 0 and package_version(PACKAGE_NAME) in cli.stdout, + "no_cuda_required": not result["runtime"]["gpu_or_cuda_required"], + "no_remote_runtime_work": not any(result["runtime"].values()), + } + result["ok"] = all(result["checks"].values()) + except Exception as exc: + result["error"] = f"{type(exc).__name__}: {exc}" + + return result + + + def base_payload(): + return { + "service": SERVICE, + "upstream": UPSTREAM, + "python": sys.version.split()[0], + "platform": platform.platform(), + "uptime_seconds": round(time.time() - STARTED_AT, 3), + "package": IMPORT_STATUS, + } + + + class Handler(BaseHTTPRequestHandler): + server_version = "stable-worldmodel-demo/1.0" + + def log_message(self, fmt, *args): + print("%s - %s" % (self.address_string(), fmt % args), flush=True) + + def respond_json(self, status, payload): + body = json.dumps(payload, sort_keys=True).encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_GET(self): + path = urlparse(self.path).path + + if path in ("/", "/healthz"): + payload = base_payload() + payload.update( + { + "ok": IMPORT_STATUS["import_ok"], + "status": "ready" if IMPORT_STATUS["import_ok"] else "unhealthy", + "runtime": runtime_flags(), + "endpoints": ["/healthz", "/demo", "/v1/models"], + } + ) + status = HTTPStatus.OK if IMPORT_STATUS["import_ok"] else HTTPStatus.SERVICE_UNAVAILABLE + self.respond_json(status, payload) + return + + if path == "/demo": + demo = run_demo_checks() + payload = base_payload() + payload.update( + { + "ok": demo["ok"], + "name": "stable-worldmodel local package and solver verifier", + "description": ( + "Installs and imports the real stable-worldmodel package, " + "checks local variation-space primitives, runs a tiny CEM " + "solver against a deterministic in-memory cost fixture, " + "and verifies the CLI version without datasets, model " + "weights, provider calls, or environment rollouts." + ), + "demo": demo, + } + ) + status = HTTPStatus.OK if demo["ok"] else HTTPStatus.SERVICE_UNAVAILABLE + self.respond_json(status, payload) + return + + if path == "/v1/models": + self.respond_json( + HTTPStatus.OK, + { + "object": "list", + "data": [ + { + "id": "stable-worldmodel/no-model-download-demo", + "object": "model", + "created": 0, + "owned_by": "galilai-group", + "description": ( + "Metadata placeholder for the CPU-safe stable-worldmodel " + "verifier. The default template does not host, download, " + "train, evaluate, or load a world model." + ), + } + ], + }, + ) + return + + self.respond_json( + HTTPStatus.NOT_FOUND, + { + "error": "not_found", + "endpoints": ["/healthz", "/demo", "/v1/models"], + }, + ) + + + def main(): + port = int(os.environ.get("APP_PORT", "8000")) + server = ThreadingHTTPServer(("0.0.0.0", port), Handler) + print(f"{SERVICE} listening on 0.0.0.0:{port}", flush=True) + server.serve_forever() + + + if __name__ == "__main__": + main() + +networks: + internal: + driver: bridge