diff --git a/flake.nix b/flake.nix index 78abe0e..9c5ce3b 100644 --- a/flake.nix +++ b/flake.nix @@ -56,6 +56,8 @@ frigate = ./modules/frigate.nix; hetzner-bare-metal = ./modules/presets/hetzner-bare-metal.nix; public-frigate = ./modules/presets/public-frigate.nix; + frigate-edge = ./modules/presets/frigate-edge.nix; + wireguard-mesh = ./modules/wireguard-mesh.nix; # Batteries-included entry point. Bundles nix-bitcoin so the # consumer needs only `roost` in their flake inputs to deploy a @@ -102,6 +104,34 @@ inherit pkgs extraModules; roost = self; }; + + # Two-node test of the wireguard-mesh module. Boots two VMs on + # the test driver's shared virtual network, brings up the mesh, + # and verifies cross-mesh reachability + firewall scoping. + mkMeshTest = + { + pkgs, + extraModules ? [ ], + }: + import ./test/mesh.nix { + inherit pkgs extraModules; + roost = self; + }; + + # Two-VM end-to-end test for the frigate-edge preset. Boots a + # full nix-bitcoin stack on the `backend` node (with the + # public-frigate exposeBackends option enabled) and a slim + # frigate-edge consumer on the `edge` node, then exercises the + # edge's Electrum listeners. + mkRegtestEdgeE2E = + { + pkgs, + extraModules ? [ ], + }: + import ./test/regtest-edge.nix { + inherit pkgs extraModules; + roost = self; + }; }; checks = forAllLinux (system: { @@ -112,6 +142,12 @@ regtest-preset = self.lib.mkRegtestPresetE2E { pkgs = pkgsFor system; }; + regtest-edge = self.lib.mkRegtestEdgeE2E { + pkgs = pkgsFor system; + }; + wireguard-mesh = self.lib.mkMeshTest { + pkgs = pkgsFor system; + }; }); templates.default = { diff --git a/modules/_internal/frigate-tls-acme.nix b/modules/_internal/frigate-tls-acme.nix new file mode 100644 index 0000000..7504fb7 --- /dev/null +++ b/modules/_internal/frigate-tls-acme.nix @@ -0,0 +1,145 @@ +{ + config, + lib, + pkgs, + ... +}: + +# Internal helper: TLS + ACME wiring shared between the `public-frigate` +# and `frigate-edge` presets. Not exported via `nixosModules` and not +# part of the stable API — the options below are flagged `internal`. +# +# A parent preset enables this module and feeds it `host` + `tls`. The +# module materializes `services.frigate.sslCert` / `sslKey`, ACME via +# webroot when an email is set, the nginx vhost serving the HTTP-01 +# challenge, the PKCS#8 key conversion frigate's TLS loader requires, +# and the systemd ordering that prevents frigate from racing the +# initial cert issuance. + +let + cfg = config.services._roost.frigate-tls-acme; + + certFile = + if cfg.tls.certificateFile != null then + cfg.tls.certificateFile + else + "/var/lib/acme/${cfg.host}/fullchain.pem"; + + keyFile = if cfg.tls.keyFile != null then cfg.tls.keyFile else "/var/lib/acme/${cfg.host}/key.pem"; +in +{ + options.services._roost.frigate-tls-acme = with lib; { + enable = mkOption { + type = types.bool; + default = false; + internal = true; + description = "Enable shared TLS + ACME wiring. Set by a parent preset, not by hand."; + }; + + host = mkOption { + type = types.str; + internal = true; + }; + + tls = { + acmeEmail = mkOption { + type = types.nullOr types.str; + default = null; + internal = true; + }; + certificateFile = mkOption { + type = types.nullOr types.path; + default = null; + internal = true; + }; + keyFile = mkOption { + type = types.nullOr types.path; + default = null; + internal = true; + }; + }; + }; + + config = lib.mkIf cfg.enable ( + lib.mkMerge [ + { + assertions = [ + { + assertion = + (cfg.tls.acmeEmail == null) || (cfg.tls.certificateFile == null && cfg.tls.keyFile == null); + message = '' + tls.acmeEmail is mutually exclusive with tls.certificateFile / tls.keyFile. + ''; + } + { + assertion = + (cfg.tls.acmeEmail != null) || (cfg.tls.certificateFile != null && cfg.tls.keyFile != null); + message = '' + TLS requires either tls.acmeEmail (ACME-issued) or both tls.certificateFile + and tls.keyFile (operator-managed). + ''; + } + ]; + + services.frigate.sslCert = certFile; + services.frigate.sslKey = keyFile; + services.frigate.extraSupplementaryGroups = lib.optional (cfg.tls.acmeEmail != null) "acme"; + } + + (lib.mkIf (cfg.tls.acmeEmail != null) { + security.acme = { + acceptTerms = true; + defaults.email = cfg.tls.acmeEmail; + }; + + # Manage the cert directly via `webroot` HTTP-01 rather than + # nginx's `enableACME` shorthand. The shorthand auto-registers + # nginx (and `nginx-config-reload.service` as root) as cert + # consumers and adds an assertion that the cert be readable by + # both — but our cert lives in the `acme` group for frigate, + # and neither nginx nor the reload service joins it. nginx + # here only needs to serve the HTTP-01 challenge files lego + # drops into the webroot; it never touches the issued cert. + # + # postRun: frigate's TLS loader only accepts PKCS#8 + # (`BEGIN PRIVATE KEY`), but lego emits EC keys in SEC1 + # (`BEGIN EC PRIVATE KEY`) and RSA keys in PKCS#1 + # (`BEGIN RSA PRIVATE KEY`). Convert key.pem in place after + # each issuance/renewal so frigate can parse it. Runs as root + # in the cert directory; `chown acme:acme` keeps the file + # owned the way NixOS would have set it. Idempotent — running + # `openssl pkcs8 -topk8` on an already-PKCS#8 key is a no-op. + security.acme.certs.${cfg.host} = { + domain = cfg.host; + webroot = "/var/lib/acme/acme-challenge"; + group = "acme"; + reloadServices = [ "frigate.service" ]; + postRun = '' + umask 0027 + ${pkgs.openssl}/bin/openssl pkcs8 -topk8 -nocrypt \ + -in key.pem -out key.pem.pkcs8 + chown acme:acme key.pem.pkcs8 + mv key.pem.pkcs8 key.pem + ''; + }; + + services.nginx = { + enable = true; + virtualHosts.${cfg.host} = { + locations."/.well-known/acme-challenge/".root = "/var/lib/acme/acme-challenge"; + locations."/".return = "404"; + }; + }; + + networking.firewall.allowedTCPPorts = [ 80 ]; + + # Block frigate startup until the cert exists, otherwise it + # crash-loops on a missing `fullchain.pem` during a fresh + # deploy. `wants` (not `requires`) so a transient acme failure + # later does not take frigate down with it. + systemd.services.frigate.after = [ "acme-${cfg.host}.service" ]; + systemd.services.frigate.wants = [ "acme-${cfg.host}.service" ]; + }) + ] + ); +} diff --git a/modules/presets/frigate-edge.nix b/modules/presets/frigate-edge.nix new file mode 100644 index 0000000..899a193 --- /dev/null +++ b/modules/presets/frigate-edge.nix @@ -0,0 +1,161 @@ +{ + config, + lib, + ... +}: + +# Edge-mode Frigate: TLS + ACME + frigate, with bitcoind and fulcrum +# living on another host. The consumer points `backend.bitcoind.rpcUrl`, +# `backend.bitcoind.zmqSequenceEndpoint`, and `backend.electrumUrl` at +# the remote endpoints — typically over a private WireGuard mesh (see +# `roost.nixosModules.wireguard-mesh`) — and supplies a credentials +# file containing `user:password` for the bitcoind RPC. +# +# This preset is intentionally narrow: no nix-bitcoin, no local +# services.bitcoind or services.fulcrum, no `manage` flags. If you want +# everything on one box, use `public-frigate` (or `nixosModules.default`) +# instead. + +let + cfg = config.services.frigate-edge; +in +{ + imports = [ + ../frigate.nix + ../_internal/frigate-tls-acme.nix + ]; + + options.services.frigate-edge = with lib; { + enable = mkEnableOption "edge-mode public Frigate (TLS + ACME, backends on another host)"; + + host = mkOption { + type = types.str; + example = "albatross.example.com"; + description = '' + Public DNS name for this frigate node. Advertised in the Electrum + `server.features` response, used as the SAN clients validate + against the served TLS certificate, and — when `tls.acmeEmail` + is set — as the `security.acme.certs.` identifier. + ''; + }; + + network = mkOption { + type = types.enum [ + "mainnet" + "testnet" + "testnet4" + "signet" + "regtest" + ]; + default = "mainnet"; + }; + + publicPort = mkOption { + type = types.port; + default = 50002; + description = '' + Public TLS port. 50002 is the convention for Electrum-over-SSL. + ''; + }; + + tls = { + acmeEmail = mkOption { + type = types.nullOr types.str; + default = null; + example = "ops@example.com"; + description = '' + Email address for Let's Encrypt registration. Setting it enables + ACME for `host`. Mutually exclusive with manual cert/key files. + ''; + }; + + certificateFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to a TLS certificate. Required when not using ACME."; + }; + + keyFile = mkOption { + type = types.nullOr types.path; + default = null; + description = "Path to the matching PKCS#8 TLS private key. Required when not using ACME."; + }; + }; + + backend = { + bitcoind = { + rpcUrl = mkOption { + type = types.str; + example = "http://10.42.0.1:8332"; + description = '' + URL of the bitcoind JSON-RPC endpoint on the backend host. + Plain `http://` is fine when the transport is a private + mesh; do not expose the backend RPC to the public internet. + ''; + }; + + authCredentialFile = mkOption { + type = types.path; + description = '' + File on disk containing literally `user:password` for the + bitcoind RPC user. Loaded via systemd `LoadCredential` and + substituted into frigate's config.toml at service start; + never read by the frigate process directly. Typically an + agenix-decrypted path under `/run/agenix/`. + + The corresponding rpcauth line (`user:salt$hash`) lives on + the backend host's bitcoin.conf. Generate the pair once via + bitcoind's `rpcauth.py`. + ''; + }; + + zmqSequenceEndpoint = mkOption { + type = types.str; + example = "tcp://10.42.0.1:28336"; + description = '' + URL of the bitcoind ZMQ `sequence` publisher on the backend + host. Frigate subscribes for sub-100ms mempool ingestion. + ''; + }; + }; + + electrumUrl = mkOption { + type = types.str; + example = "tcp://10.42.0.1:60001"; + description = '' + URL of the backing Electrum server (fulcrum) on the backend + host. Frigate proxies non-silent-payments queries here. + ''; + }; + }; + }; + + config = lib.mkIf cfg.enable { + # TLS + ACME wiring is shared with public-frigate; delegate to the + # private helper module. TLS-mutex assertions live there. + services._roost.frigate-tls-acme = { + enable = true; + inherit (cfg) host tls; + }; + + services.frigate = { + enable = true; + host = cfg.host; + network = cfg.network; + # Plaintext listener stays on loopback. All public traffic + # arrives via the TLS listener below. + tcp = "tcp://127.0.0.1:50001"; + ssl = "ssl://0.0.0.0:${toString cfg.publicPort}"; + bitcoind = { + enable = true; + server = cfg.backend.bitcoind.rpcUrl; + authType = "USERPASS"; + authCredentialFile = cfg.backend.bitcoind.authCredentialFile; + zmqSequenceEndpoint = cfg.backend.bitcoind.zmqSequenceEndpoint; + }; + electrumBackend = cfg.backend.electrumUrl; + }; + + networking.firewall.allowedTCPPorts = [ cfg.publicPort ]; + }; +} diff --git a/modules/presets/public-frigate.nix b/modules/presets/public-frigate.nix index 23d8fba..9a0528a 100644 --- a/modules/presets/public-frigate.nix +++ b/modules/presets/public-frigate.nix @@ -15,22 +15,25 @@ let # `electrumBackend` URL can't drift apart. backendPort = 60001; - # ZMQ `sequence` publisher endpoint. Bitcoin Core opens the socket - # (via `zmqpubsequence=...`) and Frigate subscribes to it (via - # `core.zmqSequenceEndpoint`). Both sides must match exactly. + # The local frigate process always reads ZMQ off loopback; that's a + # constant. When `exposeBackends` is on, bitcoind additionally binds + # the same socket on the mesh address so edge consumers can subscribe + # — see the publish endpoint below. zmqSequenceEndpoint = "tcp://127.0.0.1:28336"; - # When ACME issues the cert, the files live under `/var/lib/acme//`. - # When the consumer brings their own, we point straight at their files. - certFile = - if cfg.tls.certificateFile != null then - cfg.tls.certificateFile - else - "/var/lib/acme/${cfg.host}/fullchain.pem"; - keyFile = if cfg.tls.keyFile != null then cfg.tls.keyFile else "/var/lib/acme/${cfg.host}/key.pem"; + # Where bitcoind opens the ZMQ socket. With no edge consumers, bind + # to loopback only. With `exposeBackends.enable`, bind to 0.0.0.0 so + # both local frigate (via 127.0.0.1) and remote edge frigate (via + # `bindAddress`) can subscribe; the firewall scopes outside access + # to `exposeBackends.interface` only. + zmqPublishBind = if cfg.exposeBackends.enable then "0.0.0.0" else "127.0.0.1"; + zmqPublishEndpoint = "tcp://${zmqPublishBind}:28336"; in { - imports = [ ../frigate.nix ]; + imports = [ + ../frigate.nix + ../_internal/frigate-tls-acme.nix + ]; options.services.public-frigate = with lib; { enable = mkEnableOption "public-facing Frigate silent payments server"; @@ -113,6 +116,61 @@ in ''; }; + exposeBackends = { + enable = mkEnableOption "expose bitcoind RPC/ZMQ and fulcrum for edge consumers"; + + bindAddress = mkOption { + type = types.str; + example = "10.42.0.1"; + description = '' + Additional address bitcoind RPC, ZMQ sequence, and fulcrum + bind to (in addition to their loopback defaults). Typically + this host's mesh IP — see `roost.nixosModules.wireguard-mesh`. + ''; + }; + + interface = mkOption { + type = types.str; + example = "wg0"; + description = '' + Interface name used to scope the firewall rules that open the + backend ports. Only traffic arriving on this interface is + accepted; the backends remain unreachable from the public + internet. + ''; + }; + + allowedPeers = mkOption { + type = types.listOf types.str; + example = [ "10.42.0.2/32" ]; + description = '' + Source CIDRs added to bitcoind's `rpcallowip`. Must include + every edge consumer's mesh IP (/32) that needs to talk to the + backends. Loopback is always allowed. + ''; + }; + + rpcAuth = { + user = mkOption { + type = types.str; + example = "frigate-edge"; + description = "RPC user name added to bitcoind for edge consumers."; + }; + + passwordHMAC = mkOption { + type = types.str; + example = "f7efda5c189b999524f151318c0c86$d5b51b3beffbc02b724e5d095828e0bc8b2456e9ac8757ae3211a5d9b16a22ae"; + description = '' + Literal `salt$hash` portion of an rpcauth line, as produced + by bitcoind's `rpcauth.py`. Committed to nix config — the + HMAC is one-way derived from the password; only the + corresponding plaintext is a secret (lives on the edge + consumer). + ''; + }; + }; + }; + # Sentinel attribute, mirroring nix-bitcoin's `secure-node-preset-enabled`. # Lets downstream modules and tests detect activation without re-checking # every individual service. @@ -127,6 +185,15 @@ in lib.mkMerge [ { services.public-frigate.preset-enabled = { }; } + # TLS + ACME wiring is shared with frigate-edge; delegate to the + # private helper module. TLS-mutex assertions live there too. + { + services._roost.frigate-tls-acme = { + enable = true; + inherit (cfg) host tls; + }; + } + { assertions = [ { @@ -150,22 +217,6 @@ in and enable it, or set services.public-frigate.fulcrum.manage = true. ''; } - { - assertion = - (cfg.tls.acmeEmail == null) || (cfg.tls.certificateFile == null && cfg.tls.keyFile == null); - message = '' - services.public-frigate.tls.acmeEmail is mutually exclusive with - tls.certificateFile / tls.keyFile. - ''; - } - { - assertion = - (cfg.tls.acmeEmail != null) || (cfg.tls.certificateFile != null && cfg.tls.keyFile != null); - message = '' - services.public-frigate requires either tls.acmeEmail (for ACME) - or both tls.certificateFile and tls.keyFile (for a manual cert). - ''; - } ]; } @@ -197,14 +248,15 @@ in # all public traffic comes in over `ssl`. The backend Electrum # server (fulcrum/electrs/etc.) listens on a non-conflicting port # so frigate can occupy the canonical Electrum ports. + # + # `sslCert`, `sslKey` and `extraSupplementaryGroups` are set by + # the shared TLS+ACME helper (imported above). services.frigate = { enable = true; host = cfg.host; network = cfg.network; tcp = "tcp://127.0.0.1:50001"; ssl = "ssl://0.0.0.0:${toString cfg.publicPort}"; - sslCert = certFile; - sslKey = keyFile; bitcoind = { enable = true; server = "http://127.0.0.1:8332"; @@ -213,11 +265,6 @@ in inherit zmqSequenceEndpoint; }; electrumBackend = "tcp://127.0.0.1:${toString backendPort}"; - # ACME-issued certs live in /var/lib/acme// owned by the - # `acme` group. Frigate reads them at startup, so its service - # needs the group. Skipped for manual-cert deployments where - # the operator has already arranged read access. - extraSupplementaryGroups = lib.optional (cfg.tls.acmeEmail != null) "acme"; }; users.users.frigate.extraGroups = [ "bitcoin" ]; @@ -260,73 +307,76 @@ in # the nix-bitcoin module already assigns the string. (lib.mkIf cfg.bitcoind.manage { services.bitcoind.extraConfig = '' - zmqpubsequence=${zmqSequenceEndpoint} + zmqpubsequence=${zmqPublishEndpoint} ''; systemd.services.bitcoind.serviceConfig.RestrictAddressFamilies = lib.mkForce "AF_UNIX AF_INET AF_INET6 AF_NETLINK"; }) - # ACME path: a minimal HTTP vhost on port 80 hosts the HTTP-01 - # challenge so Let's Encrypt can verify domain ownership. NixOS's - # `enableACME` wires `security.acme.certs.` and the challenge - # location automatically; the `404` covers anything else hitting - # this vhost. nginx is only here for ACME — TLS termination for - # the Electrum stream is frigate's job. - (lib.mkIf (cfg.tls.acmeEmail != null) { - security.acme = { - acceptTerms = true; - defaults.email = cfg.tls.acmeEmail; - }; - - # Manage the cert directly via `webroot` HTTP-01 rather than - # nginx's `enableACME` shorthand. The shorthand auto-registers - # nginx (and `nginx-config-reload.service` as root) as cert - # consumers and adds an assertion that the cert be readable by - # both — but our cert lives in the `acme` group for frigate, - # and neither nginx nor the reload service joins it. nginx - # here only needs to serve the HTTP-01 challenge files lego - # drops into the webroot; it never touches the issued cert. - # - # postRun: frigate's TLS loader only accepts PKCS#8 - # (`BEGIN PRIVATE KEY`), but lego emits EC keys in SEC1 - # (`BEGIN EC PRIVATE KEY`) and RSA keys in PKCS#1 - # (`BEGIN RSA PRIVATE KEY`). Convert key.pem in place after - # each issuance/renewal so frigate can parse it. Runs as root - # in the cert directory; `chown acme:acme` keeps the file - # owned the way NixOS would have set it. Idempotent — running - # `openssl pkcs8 -topk8` on an already-PKCS#8 key is a no-op. - security.acme.certs.${cfg.host} = { - domain = cfg.host; - webroot = "/var/lib/acme/acme-challenge"; - group = "acme"; - reloadServices = [ "frigate.service" ]; - postRun = '' - umask 0027 - ${pkgs.openssl}/bin/openssl pkcs8 -topk8 -nocrypt \ - -in key.pem -out key.pem.pkcs8 - chown acme:acme key.pem.pkcs8 - mv key.pem.pkcs8 key.pem - ''; - }; + # exposeBackends: bind bitcoind RPC + ZMQ + fulcrum on the mesh + # interface for an edge consumer. Only honored when the preset is + # managing those services locally — exposing services we don't + # manage would be a contract violation. + # + # bitcoind RPC: nix-bitcoin's `rpc.address` is single-valued, so + # we keep the typed loopback default and append a second + # `rpcbind=` via extraConfig. bitcoind accepts repeated rpcbind + # lines and binds each one. + # + # ZMQ: the publish endpoint above (`zmqPublishEndpoint`) already + # flips to 0.0.0.0 when exposeBackends is on — no extraConfig + # work needed here for ZMQ. + # + # fulcrum: same single-bind option pattern as bitcoind RPC. The + # typed `address` stays on loopback; an extra `tcp = ...` line is + # appended via `extraConfig` for the mesh address. + (lib.mkIf cfg.exposeBackends.enable { + assertions = [ + { + assertion = cfg.bitcoind.manage; + message = '' + services.public-frigate.exposeBackends.enable requires + services.public-frigate.bitcoind.manage = true. The preset + cannot expose a bitcoind it does not configure. + ''; + } + { + assertion = cfg.fulcrum.manage; + message = '' + services.public-frigate.exposeBackends.enable requires + services.public-frigate.fulcrum.manage = true. The preset + cannot expose a fulcrum it does not configure. + ''; + } + ]; - services.nginx = { - enable = true; - virtualHosts.${cfg.host} = { - locations."/.well-known/acme-challenge/".root = "/var/lib/acme/acme-challenge"; - locations."/".return = "404"; + services.bitcoind = { + rpc.allowip = [ "127.0.0.1" ] ++ cfg.exposeBackends.allowedPeers; + rpc.users.${cfg.exposeBackends.rpcAuth.user} = { + inherit (cfg.exposeBackends.rpcAuth) passwordHMAC; }; + extraConfig = '' + rpcbind=${cfg.exposeBackends.bindAddress} + ''; }; - networking.firewall.allowedTCPPorts = [ 80 ]; + services.fulcrum.extraConfig = '' + tcp = ${cfg.exposeBackends.bindAddress}:${toString backendPort} + ''; - # Block frigate startup until the cert exists, otherwise it - # crash-loops on a missing `fullchain.pem` during a fresh - # deploy. `wants` (not `requires`) so a transient acme failure - # later doesn't take frigate down with it. List values under - # `systemd.services.` accumulate via module merging, so - # this composes with the bitcoind/fulcrum deps above. - systemd.services.frigate.after = [ "acme-${cfg.host}.service" ]; - systemd.services.frigate.wants = [ "acme-${cfg.host}.service" ]; + # Scope the open ports to the mesh interface only. Outside + # traffic (e.g. the public internet on eth0) is dropped at + # INPUT by NixOS's default-deny firewall posture. + # + # Pull bitcoind's RPC port from config rather than hardcoding + # `8332`. nix-bitcoin's `rpc.port` default tracks the chain + # (8332 mainnet, 18443 regtest, 18332 testnet, etc.), and the + # firewall has to match wherever bitcoind actually listens. + networking.firewall.interfaces.${cfg.exposeBackends.interface}.allowedTCPPorts = [ + config.services.bitcoind.rpc.port + 28336 + backendPort + ]; }) ] ); diff --git a/modules/wireguard-mesh.nix b/modules/wireguard-mesh.nix new file mode 100644 index 0000000..11065dd --- /dev/null +++ b/modules/wireguard-mesh.nix @@ -0,0 +1,182 @@ +{ + config, + lib, + ... +}: + +# Thin WireGuard-mesh wrapper around `networking.wireguard.interfaces`. +# +# The same `peers` block is intended to be defined identically on every +# member host; only `thisHost` and `privateKeyFile` differ per node. The +# module enumerates the peer set, drops the entry matching `thisHost`, +# and emits a wireguard peer for each remainder. Adding a third node +# becomes a one-place edit: a new entry in `peers` plus `thisHost` on +# the new host. +# +# The mesh is point-to-point with /32 peer allowedIPs — no subnets get +# routed through. Public exposure (mesh interface ↔ consumer services) +# is the consumer's concern (scope via `networking.firewall.interfaces.`). + +let + cfg = config.services.roost.wireguard-mesh; + + # CIDR like "10.42.0.0/24" -> prefix length "24". Falls back to "32" + # if the input isn't parseable, which the assertion below catches. + cidrParts = builtins.match "([0-9.]+)/([0-9]+)" cfg.meshCidr; + cidrPrefixLen = if cidrParts == null then "32" else builtins.elemAt cidrParts 1; + + thisPeer = cfg.peers.${cfg.thisHost} or null; + interfaceAddress = lib.optionalString (thisPeer != null) "${thisPeer.meshIp}/${cidrPrefixLen}"; + + otherPeers = lib.filterAttrs (name: _: name != cfg.thisHost) cfg.peers; + + toWgPeer = + peer: + { + publicKey = peer.publicKey; + endpoint = peer.endpoint; + allowedIPs = [ "${peer.meshIp}/32" ]; + } + // lib.optionalAttrs (peer.persistentKeepalive != null) { + inherit (peer) persistentKeepalive; + }; +in +{ + options.services.roost.wireguard-mesh = with lib; { + enable = mkEnableOption "WireGuard mesh between roost hosts"; + + interface = mkOption { + type = types.str; + default = "wg0"; + description = '' + Name of the wireguard interface to create. Override if `wg0` is + already in use on the host for another purpose. + ''; + }; + + thisHost = mkOption { + type = types.str; + description = '' + Short name of the current host within the mesh. Must be a key of + `peers`. The mesh IP for this host is `peers..meshIp`. + ''; + }; + + privateKeyFile = mkOption { + type = types.path; + description = '' + Path to this host's WireGuard private key (typically an + agenix-decrypted path under /run/agenix/). The file must be + readable by root and contain a single base64-encoded key as + produced by `wg genkey`. + ''; + }; + + port = mkOption { + type = types.port; + default = 51820; + description = "UDP port WireGuard listens on. Opened in the firewall."; + }; + + meshCidr = mkOption { + type = types.str; + example = "10.42.0.0/24"; + description = '' + CIDR covering every `peers.*.meshIp`. Only the prefix length is + used (to size the wireguard interface address); the network + portion is informational and documented for operators. + ''; + }; + + mtu = mkOption { + type = types.nullOr types.int; + default = null; + description = '' + Override the WireGuard interface MTU. Null = the upstream + default (1420). Lower this only if the path MTU between mesh + members is below 1500. + ''; + }; + + peers = mkOption { + description = '' + All mesh members keyed by short host name. Define identically + on every member; the module skips the entry matching `thisHost` + when emitting wireguard peers. + ''; + type = types.attrsOf ( + types.submodule { + options = { + publicKey = mkOption { + type = types.str; + description = "Base64 WireGuard public key as produced by `wg pubkey`."; + }; + endpoint = mkOption { + type = types.str; + example = "1.2.3.4:51820"; + description = '' + Public reachable endpoint of this peer (`ip:port` or + `hostname:port`). DNS is resolved once by `wg` at + interface setup — if the address can change, configure + `networking.wireguard.dynamicEndpointRefreshSeconds` at + the consumer level. + ''; + }; + meshIp = mkOption { + type = types.str; + example = "10.42.0.1"; + description = "Mesh-side IPv4 address for this peer. Must fall inside `meshCidr`."; + }; + persistentKeepalive = mkOption { + type = types.nullOr types.int; + default = 25; + description = '' + Seconds between keepalive packets to this peer. 25 is the + conventional "always-on" value — harmless on bare-metal + links and useful behind NAT or any stateful middlebox. + Null disables keepalives. + ''; + }; + }; + } + ); + }; + }; + + config = lib.mkIf cfg.enable { + assertions = [ + { + assertion = cfg.peers ? ${cfg.thisHost}; + message = '' + services.roost.wireguard-mesh.thisHost ("${cfg.thisHost}") must name + an entry in services.roost.wireguard-mesh.peers. Existing peers: + ${lib.concatStringsSep ", " (lib.attrNames cfg.peers)}. + ''; + } + { + assertion = cidrParts != null; + message = '' + services.roost.wireguard-mesh.meshCidr ("${cfg.meshCidr}") is not a + valid IPv4 CIDR. Expected form: "10.42.0.0/24". + ''; + } + { + assertion = (builtins.length (lib.attrNames cfg.peers)) >= 2; + message = '' + services.roost.wireguard-mesh.peers must have at least two members + (this host + at least one remote). A single-node mesh is a no-op. + ''; + } + ]; + + networking.wireguard.interfaces.${cfg.interface} = { + ips = [ interfaceAddress ]; + listenPort = cfg.port; + privateKeyFile = toString cfg.privateKeyFile; + mtu = cfg.mtu; + peers = lib.mapAttrsToList (_name: toWgPeer) otherPeers; + }; + + networking.firewall.allowedUDPPorts = [ cfg.port ]; + }; +} diff --git a/test/mesh.nix b/test/mesh.nix new file mode 100644 index 0000000..1fb5b06 --- /dev/null +++ b/test/mesh.nix @@ -0,0 +1,105 @@ +{ + pkgs, + roost, + extraModules ? [ ], +}: + +# Two-node nixosTest for the `wireguard-mesh` module. Both VMs sit on +# the same virtual network; the mesh interface is layered on top, with +# /32 allowedIPs scoping the peer relationships. The test asserts that +# mesh IPs reach each other, that the firewall opens the WireGuard UDP +# port automatically, and that the assertions catch a misconfigured +# `thisHost`. +# +# Keypairs below are throwaway test fixtures generated specifically for +# this file. They have no relationship to production hosts and are +# committed deliberately so the test stays pure (no IFD). +let + testKeys = { + a = { + privateKey = "AF3qED26m1FhgY3yn7gvBKP76qPcKoej0oTVaetMZkU="; + publicKey = "PRbUI7dXfSREqCH9twFOaugCW5OrTl2T4RU55F6YGHU="; + }; + b = { + privateKey = "MAN5lxJ4l3bTug+rxk7YMhmIhoPy/13BspwvLnJHUVw="; + publicKey = "vufhiWpCvP7C8LpG9WjXqJk78KJUYDGHcl5Wn3I2xSU="; + }; + }; + + # The wireguard module wants a path on disk, not a literal key. Drop + # each test key into the nix store and reference it by path. Mode 600 + # matches what agenix would produce. + privFor = + name: + pkgs.writeTextFile { + name = "wg-mesh-test-${name}.priv"; + text = testKeys.${name}.privateKey; + }; + + meshPeers = { + a = { + publicKey = testKeys.a.publicKey; + # `nodes..networking.primaryIPAddress` is the canonical way + # to reference a VM's primary NIC address inside a nixosTest, but + # we cannot reference that from inside `nodes.*` (cyclic). The + # test framework assigns 192.168.. starting at + # nodeNumber 1, in declaration order: nodeA = .1, nodeB = .2. + endpoint = "192.168.1.1:51820"; + meshIp = "10.42.0.1"; + }; + b = { + publicKey = testKeys.b.publicKey; + endpoint = "192.168.1.2:51820"; + meshIp = "10.42.0.2"; + }; + }; + + mkNode = name: { + imports = [ + roost.nixosModules.wireguard-mesh + ] + ++ extraModules; + + services.roost.wireguard-mesh = { + enable = true; + thisHost = name; + privateKeyFile = privFor name; + meshCidr = "10.42.0.0/24"; + peers = meshPeers; + }; + }; +in +pkgs.testers.runNixOSTest { + name = "wireguard-mesh"; + + nodes.nodeA = mkNode "a"; + nodes.nodeB = mkNode "b"; + + testScript = '' + start_all() + + # The upstream wireguard module creates a target `wireguard-wg0` + # that waits for the interface service AND every peer service. + # Waiting on the bare interface service returns too early — the + # peers aren't in the kernel yet, so ping fails with "Required key + # not available" until the peer services finish. + nodeA.wait_for_unit("wireguard-wg0.target") + nodeB.wait_for_unit("wireguard-wg0.target") + + # The interface should be up with the configured /24 address. The + # `ip addr show` output includes "10.42.0.1/24" for nodeA only. + nodeA.succeed("ip -4 addr show wg0 | grep -q 10.42.0.1/24") + nodeB.succeed("ip -4 addr show wg0 | grep -q 10.42.0.2/24") + + # Cross-mesh reachability. The first handshake can take a moment + # after both ends finish their boot, so allow a few attempts. + nodeA.wait_until_succeeds("ping -c 1 -W 1 10.42.0.2", timeout=30) + nodeB.wait_until_succeeds("ping -c 1 -W 1 10.42.0.1", timeout=30) + + # Firewall should have UDP 51820 open. Confirm by inspecting the + # iptables ruleset rather than poking the port from outside, which + # would race against the in-progress handshake. + nodeA.succeed("iptables-save | grep -E -- '--dport 51820'") + nodeB.succeed("iptables-save | grep -E -- '--dport 51820'") + ''; +} diff --git a/test/regtest-edge.nix b/test/regtest-edge.nix new file mode 100644 index 0000000..101545d --- /dev/null +++ b/test/regtest-edge.nix @@ -0,0 +1,247 @@ +{ + pkgs, + roost, + extraModules ? [ ], +}: + +# Two-VM end-to-end test for the frigate-edge preset. +# +# backend VM: nix-bitcoin + public-frigate (full local stack) with +# `exposeBackends` enabled so bitcoind RPC/ZMQ and fulcrum +# also listen on the shared subnet for the edge. +# edge VM: frigate-edge consuming the backend's services over the +# shared network. ACME is off (manual cert) so the edge +# boots without DNS or a real CA. +# +# WireGuard is intentionally not in the loop here — that's covered by +# `test/mesh.nix`. This test exists to verify the frigate-edge preset's +# wiring (USERPASS auth, remote ZMQ, remote electrum, ACME bypass via +# manual cert) and the matching `exposeBackends` bind logic on the +# backend side. +# +# The bitcoind RPC password is a fixed test fixture; the rpcauth HMAC +# below was computed from it via `bitcoind/share/rpcauth/rpcauth.py +# frigate-edge testpassword`. Both halves are committed deliberately so +# the test stays pure (no IFD, no out-of-band state). +let + selfSignedCert = + pkgs.runCommand "test-self-signed-cert" + { + nativeBuildInputs = [ pkgs.openssl ]; + } + '' + openssl req -x509 -newkey rsa:2048 -nodes \ + -keyout key.pem -out cert.pem \ + -days 1 -subj "/CN=test.local" + install -d $out + install -m 0644 cert.pem $out/cert.pem + install -m 0644 key.pem $out/key.pem + ''; + + rpcUser = "frigate-edge"; + rpcPassword = "testpassword"; + + # `salt$hash` form bitcoind expects. The HMAC is computed as + # HMAC-SHA256 with the salt's *literal UTF-8 bytes* as the key (not + # the hex-decoded bytes) — same algorithm bitcoind/share/rpcauth/rpcauth.py + # implements: `hmac.new(salt.encode("utf-8"), password.encode("utf-8"), "SHA256")`. + # Committable — derives one-way from the plaintext. + rpcPasswordHMAC = "2316d0a5e8ee6339ffb4d86c983bb421$34cc4776187170b359d40928b25deb28ea2bfc436c96fdd0db7150ec5211de85"; + + # `user:password` line the edge feeds frigate via LoadCredential. + authCredentialFile = pkgs.writeText "edge-bitcoind-auth" "${rpcUser}:${rpcPassword}"; + + # The nixosTest framework assigns 192.168.. + # starting at nodeNumber 1, in node-declaration order: `backend` is + # declared first so it ends up at .1, and `edge` at .2. Hardcoded + # here because the edge config needs to reference the backend's + # address before the test driver has wired up the topology. + backendIp = "192.168.1.1"; +in +pkgs.testers.runNixOSTest { + name = "regtest-edge"; + + nodes.backend = + { + config, + pkgs, + lib, + ... + }: + { + imports = [ + roost.nixosModules.default + ] + ++ extraModules; + + services.public-frigate = { + enable = true; + host = "backend.test.local"; + network = "regtest"; + tls.certificateFile = "${selfSignedCert}/cert.pem"; + tls.keyFile = "${selfSignedCert}/key.pem"; + + exposeBackends = { + enable = true; + bindAddress = backendIp; + interface = "eth1"; + allowedPeers = [ "192.168.1.0/24" ]; + rpcAuth = { + user = rpcUser; + passwordHMAC = rpcPasswordHMAC; + }; + }; + }; + + # Same regtest plumbing as `regtest-preset.nix`. See that file for + # the per-knob rationale. + services.bitcoind = { + regtest = true; + dbCache = lib.mkForce 100; + disablewallet = lib.mkForce false; + extraConfig = '' + maxtipage=2147483647 + ''; + }; + services.frigate.bitcoind.cookieDir = lib.mkForce "/var/lib/bitcoind/regtest"; + services.frigate.computeBackend = lib.mkForce "CPU"; + services.frigate.bitcoind.server = lib.mkForce "http://127.0.0.1:${toString config.services.bitcoind.rpc.port}"; + + networking.firewall.allowedTCPPorts = [ 50001 ]; + + virtualisation.cores = 4; + virtualisation.memorySize = 4096; + }; + + nodes.edge = + { + config, + pkgs, + lib, + ... + }: + { + imports = [ + roost.nixosModules.frigate-edge + ] + ++ extraModules; + + services.frigate-edge = { + enable = true; + host = "edge.test.local"; + network = "regtest"; + tls.certificateFile = "${selfSignedCert}/cert.pem"; + tls.keyFile = "${selfSignedCert}/key.pem"; + + backend = { + bitcoind = { + rpcUrl = "http://${backendIp}:18443"; + inherit authCredentialFile; + zmqSequenceEndpoint = "tcp://${backendIp}:28336"; + }; + electrumUrl = "tcp://${backendIp}:60001"; + }; + }; + + # GPU isn't present in the test VM; pin to CPU compute. Matches + # what regtest-preset does on the backend. + services.frigate.computeBackend = lib.mkForce "CPU"; + + # The probe below pipes JSON-RPC into `nc -q 3` to bound how long + # nc waits after stdin EOF. `-q` is a netcat-openbsd extension; + # NixOS's default nc supports `-z` but not `-q`, so without this + # package the probe silently emits nothing and the loop times out. + environment.systemPackages = [ pkgs.netcat-openbsd ]; + + virtualisation.cores = 2; + virtualisation.memorySize = 2048; + }; + + testScript = + { nodes, ... }: + let + cli = "bitcoin-cli -regtest -datadir=/var/lib/bitcoind"; + in + '' + start_all() + + # Backend comes up first; mine the chain so the edge has something + # real to scan. + backend.wait_for_unit("bitcoind.service") + backend.wait_until_succeeds("${cli} getblockchaininfo", timeout=30) + + backend.succeed("${cli} createwallet test") + addr = backend.succeed("${cli} -rpcwallet=test getnewaddress").strip() + backend.succeed(f"${cli} generatetoaddress 101 {addr}") + + backend.wait_until_succeeds( + "${cli} getblockchaininfo | grep -q '\"initialblockdownload\": false'", + timeout=30, + ) + + backend.wait_for_unit("fulcrum.service") + backend.wait_for_open_port(60001, addr="${backendIp}") + + # bitcoind RPC and ZMQ should also be reachable from the second + # interface thanks to exposeBackends. + backend.wait_for_open_port(18443, addr="${backendIp}") + backend.wait_for_open_port(28336, addr="${backendIp}") + + # Edge can talk to the backend via the shared subnet. + edge.wait_until_succeeds("nc -z ${backendIp} 60001", timeout=30) + edge.wait_until_succeeds("nc -z ${backendIp} 18443", timeout=30) + + # Frigate-edge should authenticate against bitcoind (USERPASS, + # plaintext fed via LoadCredential), subscribe to remote ZMQ, and + # accept Electrum traffic on both its plaintext and TLS listeners. + # + # `wait_for_unit` accepts the unit in the brief `activating` state + # of a restart cycle, so it's not a strong "frigate is up" signal. + # The port-open check is the real gate. Bound it tightly enough + # that a stuck restart loop surfaces fast, and dump the unit + # journal on failure so the actual error (auth, DNS, ZMQ) is + # visible in CI output instead of just "port never opened". + edge.wait_for_unit("frigate.service") + + import time + deadline = time.time() + 60 + while time.time() < deadline: + if edge.execute("ss -tln | grep -q ':50001 '")[0] == 0: + break + time.sleep(2) + else: + journal, _ = edge.execute("journalctl -u frigate -n 50 --no-pager") + raise Exception( + f"frigate-edge did not bind port 50001 within 60s. " + f"Last 50 journal lines:\n{journal}" + ) + edge.wait_for_open_port(50002, timeout=10) + + import time + deadline = time.time() + 120 + probe = ( + "{ echo '{\"jsonrpc\":\"2.0\",\"id\":0,\"method\":\"server.version\",\"params\":[\"test\",\"1.4\"]}'" + "; echo '{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"server.features\",\"params\":[]}'" + "; echo '{\"jsonrpc\":\"2.0\",\"id\":2,\"method\":\"blockchain.headers.subscribe\",\"params\":[]}'; }" + " | nc -q 3 127.0.0.1 50001" + ) + internal = "" + while time.time() < deadline: + _status, internal = edge.execute(probe) + print(f"frigate-edge plaintext probe ({len(internal)}B): {internal!r}") + if "edge.test.local" in internal and '"height":101' in internal: + break + time.sleep(2) + else: + raise Exception( + f"frigate-edge plaintext probe never returned expected content. " + f"Last response: {internal!r}" + ) + + assert "edge.test.local" in internal, f"edge server.features missing host: {internal}" + assert '"height":101' in internal, ( + f"edge blockchain.headers.subscribe missing height:101 — fulcrum proxy " + f"or remote backend wiring broken: {internal}" + ) + ''; +}