yetanotherco · ColoCarletti · Apr 10, 2026 · Apr 22, 2026 · Apr 24, 2026 · Apr 28, 2026
diff --git a/spec/.gitignore b/spec/.gitignore
@@ -1,2 +1,3 @@
 dist/*
+interaction_count.json
 ebook.pdf
diff --git a/spec/about_ecalls.typ b/spec/about_ecalls.typ
@@ -22,3 +22,13 @@ When `ECALL` is executed, it is assumed that:
 - the return value is written to `A0`,
 where `A0`-`A7` are symbolic names for the registers `x10`-`x17`
 #footnote([RISC-V - Register sets; en.wikipedia.org, #link("https://web.archive.org/web/20260209053447/https://en.wikipedia.org/wiki/RISC-V#Register_sets")[[src]]]).
+
+= ECALL number overview
+
+We provide a list of supported ECALL numbers.
+Negative numbers (represented as 2s complement 64-bit numbers), are used for our own custom accelerators/extensions.
+
+/ 64: `write` (@commit)
+/ 93: `exit` (@halt)
+/ -1: `SHA256` (@sha256)
+/ -2: `KECCAK` (@keccak)
diff --git a/spec/bitwise.typ b/spec/bitwise.typ
@@ -37,8 +37,6 @@ This chip adds the following interactions to the lookup:
 
 = Notes/Optimizations
 The following ideas may prove to be optimizations for the #bitwise chip:
-+ Extend `IS_BYTE[X]` to `ARE_BYTES[X, Y]`, such that two bytes are range checked at once. 
-  When only a single check is required, one can still execute `IS_BYTE[X] := ARE_BYTES[X, 0]`.
 + Drop `MSB8` column, and instead define the `MSB8` lookup as `MSB8<X> := MSB16[256X]`.
   Note: currently, `MSB8` also implicity range checks the input `X` (the lookup fails if `X` is not a `Byte`).
   This optimization should only be executed when all chips leveraging `MSB8` do _not_ need this implicit range check.

diff --git a/spec/book.typ b/spec/book.typ
@@ -18,6 +18,7 @@
     )),
     ("TEMPLATES", (
       ("is_bit.typ", [`IS_BIT` template], <isbit>),
+      ("is_byte.typ", [`IS_BYTE` template], <isbyte>),
       ("sign.typ", [`SIGN` template], <sign>),
       ("add.typ", [`ADD`/`SUB` template], <add>),
       ("neg.typ", [`NEG` template], <neg>),
@@ -42,6 +43,8 @@
       ("about_ecalls.typ", [About `ECALL`], <ecall>),
       ("halt.typ", [`HALT` chip], <halt>),
       ("commit.typ", [`COMMIT` chip], <commit>),
+      ("sha256.typ", [`SHA256` accelerator], <sha256>),
+      ("keccak.typ", [`KECCAK` accelerator], <keccak>),
     ))
   )
 )
@@ -105,6 +108,7 @@
 
 // Invisibly include another chapter, so that its labels can be resolved
 #let xref-include(f) = {
+  show ref: none
   context {
     place(hide(box(width: auto, height: 0%, strip-all(include "/" + f))))
   }
@@ -142,7 +146,7 @@
         } else {
           rf.supplement
         }
-        [#supplement#numbering(fig.numbering, ..counter.at(lbl))]
+        [#supplement #numbering(fig.numbering, ..counter.at(lbl))]
       }
       cross-link("/" + ch, reference: shiroa-label, link-content)
     }
@@ -169,13 +173,22 @@
         }
       })
       let cond() = _toplevel.final() == file
-      project.with(..args, title: context meta_sections.find(x => x.at(0) == _toplevel.final()).at(1), cond: cond)([
-        #show ref: it => context if _toplevel.final() == file {
-          xref(it)
-        }
+      show ref: it => context if cond() { xref(it) }
+      let title = context {
+        // Strip raw, because shiroa already makes the title raw
+        show raw: it => it.text
+        meta_sections.find(x => x.at(0) == _toplevel.final()).at(1)
+      }
+      project.with(..args, title: title, description: plain-text(meta_sections.find(x => x.at(0) == file).at(1)), cond: cond)([
         #context _xref-included.final().pairs().map(((key, value)) => context if value and cond() {
           xref-include(key)
         }).join()
+        #metadata(json("interaction_count.json").sum(default: (:)))<interaction_count>
+
+        #let chapter-index = meta_sections.position(x => x.at(0) == file) + 1
+        #set heading(numbering: (..args) => [#chapter-index.#numbering("1.1", ..args)])
+        #counter(heading).update(0)
+
         #body
       ])
     }

diff --git a/spec/build_shiroa.sh b/spec/build_shiroa.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# cd into the script directory
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+# Clean up potential old file
+rm -f interaction_count.json
+
+# Always clean up after ourselves
+trap 'rm -f interaction_count.json' EXIT
+
+# Query the ebook version for the proper counts
+typst query ebook.typ '<interaction_count>' --field value > interaction_count.json
+
+# Check if there's enough memory available for a parallel shiroa build
+# 20GiB as comfortable baseline
+available_kb=$(awk '/MemAvailable/ { print $2 }' /proc/meminfo)
+required_kb=$((20 * 1024 * 1024))
+if [ "$available_kb" -lt "$required_kb" ]; then
+  echo "Falling back to single-thread"
+  export RAYON_NUM_THREADS=1
+fi
+
+# And build
+shiroa build
diff --git a/spec/chip.typ b/spec/chip.typ
@@ -14,12 +14,15 @@
   .map(pair => pair.at(1))
   .flatten()
   .map(var => {
-    let (label, factor) = if type(var.type) == array {
-      (var.type.at(0), var.type.at(1))
-    } else {
-      (var.type, 1)
+    let (factor, var_type) = (1, var.type)
+    while type(var_type) == array {
+      assert(var_type.len() == 2, message: "invalid var (sub)type length: " + repr(var.type))
+      assert(type(var_type.at(1)) == int, message: "invalid var (sub)type length: " + repr(var.type))
+      factor *= var_type.at(1)
+      var_type = var_type.at(0)
     }
-    config.variables.types.filter(type => type.label == label).first().subtypes.len() * factor
+
+    config.variables.types.filter(type => type.label == var_type).first().subtypes.len() * factor
   })
   .sum()
 }
@@ -48,14 +51,19 @@
 // store it as metadata under the `<interaction_count>` label
 // with tag `chip.name`. This tag is overwritten by `name` when specified.
 #let set_nr_interactions(chip, name: none) = {
+  // Skip when building shiroa, since the web/chapter structure fails to converge properly
+  import "book.typ": is-shiroa
+  if is-shiroa {
+    return
+  }
   if name == none {
-      name = chip.name
-    }
+    name = chip.name
+  }
 
   let constraints = chip
     .constraints
     .values()
-    .flatten()
+    .sum(default: ())
 
   // nr. of direct interactions
   let nr-direct-interactions = constraints
@@ -290,7 +298,7 @@
   } else if type(groups) == str {
     groups = (groups,)
   }
-  assert(groups.all(group => group in all_groups), message: "unknown group")
+  assert(groups.all(group => group in all_groups), message: "unknown group: " + repr(groups))
   let selected_constraints = groups.map(g => ((g): chip.constraints.at(g))).join()
 
   // Find the group definition in the constraint_groups

diff --git a/spec/cpu.typ b/spec/cpu.typ
@@ -56,16 +56,26 @@ The ALU functionality is then obtained through judicious dispatching to the corr
 
 #render_constraint_table(chip, config, groups: "alu")
 
-== Memory
+== Memory<cpu:memory>
 
 The interactions with the memory, both for register loading and storing, as for `LOAD` and `STORE` instructions are handled.
 Note that since registers need no byte-addressing, we store them in the memory argument with `Word` limbs.
-The timestamps are ensured to be disjoint for disjoint memory locations.
+The `pc` register behaves very predictably with respect to its timestamps and when it is being read,
+so for performance reasons, we inline its memory interactions directly into the #cpu chip.
+
+Potentially overlapping memory accesses are ensured to have disjoint timestamps.
 One consequence of that is that `next_pc` is written at `timestamp + 1`
-to ensure the access is disjoint with the `pc` read into `rv1` as part of the `AUIPC` instruction.
+to ensure the access is disjoint with the `pc` read into `rv1` as part of the `AUIPC` instruction (see @cpu:c:read_rv1 and @decode:decoding-overview).
+Constraints regarding whether `pc_double_read` corresponds to an `AUIPC` instruction are not necessary,
+as regardless of its value, the old timestamp is guaranteed smaller than the new timestamp,
+and the integrity of the memory argument therefore ensures the correctness of this bit.
 
 #render_constraint_table(chip, config, groups: "mem")
 
+=== Potential optimizations
+
+- `double_pc_read` could be integrated into decoding, so that `AUIPC` could set `read_register1 = 0` and no extra MEMW access for `rv1` is needed at this point.
+
 == System
 
 The interactions with the wider system.

diff --git a/spec/decode.typ b/spec/decode.typ
@@ -39,7 +39,7 @@ Given that `CPU` asserts that `EBREAK = 0` (see @cpu:c:ebreak_traps), using this
 Note moreover that the `pc` is set to $7$.
 This value is the _smallest odd number_ (i.e., not reachable during regular execution) that is more than _$4$_ (i.e., the max `pc`-increment) greater than _$1$_ (i.e., the `pc`-value used in the #link(<cpu-padding-decode-row>)[additional instruction] referred to by `CPU`-padding lines).
 
-= Decoding
+= Decoding<decode:decoding-overview>
 For the purposes of explaining decoding, we decompress #decode's `packed_decode` variable into its constituent variables.
 Note that the below table is _not_ used in practice: it is solely used for the purposes of this explanation.
 
@@ -64,12 +64,6 @@ For the purpose of brevity and readability, the table uses the following rules-o
 
 Further clarification is provided in the notes following the table.
 
-== C-type instructions
-The `RV64C` extension for compressed instructions specifies that \~50% of all instructions can be represented using a 16-bit instruction (rather than 32-bits), saving \~25% in code size.
-This execution of assembly code is _not_ agnostic to an instruction's compression state; after executing a compressed instruction, the `pc` should be incremented by $2$ rather than $4$.
-To indicate an instruction is provided in compressed form, the `c_type` flag is introduced.
-*This flag should be set to $1$ whenever the decoded instruction is provided in compressed form and $0$ otherwise.*
-
 /// Add a reference to one or more notes following this table.
 #let ref_note(..refs) = {
   super("[" + refs.pos().map(r => ref(r)).join(",") + "]")
@@ -152,6 +146,12 @@ To indicate an instruction is provided in compressed form, the `c_type` flag is
 
 #decoding_table(decoding)
 
+== C-type instructions
+The `RV64C` extension for compressed instructions specifies that \~50% of all instructions can be represented using a 16-bit instruction (rather than 32-bits), saving \~25% in code size.
+This execution of assembly code is _not_ agnostic to an instruction's compression state; after executing a compressed instruction, the `pc` should be incremented by $2$ rather than $4$.
+To indicate an instruction is provided in compressed form, the `c_type` flag is introduced.
+*This flag should be set to $1$ whenever the decoded instruction is provided in compressed form and $0$ otherwise.*
+
 // Construct a note that can be referenced through `lbl`
 #let referenceable_note(lbl, note) = {
   show figure: (it) => align(left, [#it])

diff --git a/spec/expr.typ b/spec/expr.typ
@@ -1,20 +1,26 @@
 // Types and array types
 // <type> ::= str
-//          | [str, int]
+//          | [<type>, int]
 
 // Check that a type expression is structurally valid, without validating against a set of known base types
 #let check_array_type(typ) = {
-  assert(type(typ.at(0)) == str, message: "Array types need to have a regular type as base")
-  assert(type(typ.at(1)) == int, message: "Array types need to have a constant dimension")
+  while type(typ) == array {
+    assert(typ.len() == 2, message: "Array types must specify two parameters")
+    assert(type(typ.at(1)) == int, message: "Array types need to have a constant dimension")
+    typ = typ.at(0)
+  }
+  assert(type(typ) == str, message: "Array types need to have a regular type as base")
 }
 
 // Render a type to code
 #let type_to_code(typ) = {
-  if type(typ) == array {
-    check_array_type(typ)
-    return raw(typ.at(0) + "[" + str(typ.at(1)) + "]")
-  } else if type(typ) == str {
-    return raw(typ)
+  let label = ""
+  while type(typ) == array {
+    label += "[" + str(typ.at(1)) + "]"
+    typ = typ.at(0)
+  }
+  if type(typ) == str {
+    return raw(typ + label)
   } else {
     assert(false, message: "Unknown format for type: " + repr(typ))
   }
@@ -54,12 +60,13 @@
   "cast": 3, // cast
   "mul": 4,  // *
   "div": 5,  // /
-  "sum": 6,  // Σ
-  "not": 7,  // not
-  "sub": 8,  // -
-  "add": 9,  // +  
-  "eq": 10,   // = and :=
-  "MAX": 11, // <the void outside every expression>
+  "mod": 6,  // mod
+  "sum": 7,  // Σ
+  "not": 8,  // not
+  "sub": 9,  // -
+  "add": 10,  // +  
+  "eq": 11,   // = and :=
+  "MAX": 12, // <the void outside every expression>
 )
 
 // Mutual recursion through a trick from https://github.com/typst/typst/issues/744
@@ -97,6 +104,13 @@
     "not": (pp, rec, e) => cwrap(rec(PREC.not, 1) + ` - ` + rec(PREC.not, e.at(1)), pp < PREC.not),
     "+": (pp, rec, e) => cwrap(e.slice(1).map(rec.with(PREC.add)).join(` + `), pp < PREC.add),
     "sum": (pp, rec, e) => assert(false, message: "sum is unsupported in code."),
+    "mod": (pp, rec, e) => {
+      assert(e.len() == 3 and type(e.at(2)) == int, message: "Invalid mod expr: " + repr(e))
+      cwrap(
+        rec(PREC.mod, e.at(1)) + ` % ` + rec(PREC.mod, e.at(2)), 
+        pp <= PREC.mod
+      ) 
+    },
     "*": (pp, rec, e) => {
       if e.len() == 3 and type(e.at(1)) == int and type(e.at(2)) == str and e.at(2).len() == 1 {
         // multiplication of a constant with one-letter variable. 
@@ -165,6 +179,13 @@
         pp <= PREC.sub
       )
     },
+    "mod": (pp, rec, e) => {
+      assert(e.len() == 3 and type(e.at(2)) == int, message: "Invalid mod expr: " + repr(e))
+      mwrap(
+        $#rec(PREC.mod, e.at(1)) mod #rec(PREC.mod, e.at(2))$, 
+        pp <= PREC.mod
+      )
+    },
     "*": (pp, rec, e) => {
       if e.len() == 3 and type(e.at(1)) == int and type(e.at(2)) == str and e.at(2).len() == 1 {
         // multiplication of a constant with one-letter variable. 

diff --git a/spec/is_byte.typ b/spec/is_byte.typ
@@ -0,0 +1,22 @@
+#import "/book.typ": book-page
+#import "/src.typ": load_config, load_chip
+#import "/chip.typ": render_chip_variable_table, render_constraint_table, compute_nr_interactions, total_nr_variables, total_nr_instantiated_columns
+
+#let config = load_config()
+#let chip = load_chip("src/is_byte.toml", config)
+
+#show: book-page(chip.name)
+#let is_byte = raw(chip.name)
+
+#is_byte is a constraint template that is used to assert that a variable lies in the range $[0, 255]$ under the condition that `cond` is non-zero. Note: when `cond` is omitted, it defaults to $1$.
+
+When a chip leverages this template twice or more, implementors are encouraged to merge pairs of #is_byte interactions with identical conditions into `ARE_BYTES` interactions; the #is_byte template is included for convenience of notation, and to complete the specification of chips that use an odd number of #is_byte range checks.
+
+= Variables
+#let nr_interactions = compute_nr_interactions(chip)
+
+The #is_byte template leverages #nr_interactions interaction(s):
+#render_chip_variable_table(chip, config)
+
+= Constraints
+#render_constraint_table(chip, config)