diff --git a/.github/workflows/unified-docker.yml b/.github/workflows/unified-docker.yml index 93f102699..6458a2f43 100644 --- a/.github/workflows/unified-docker.yml +++ b/.github/workflows/unified-docker.yml @@ -31,6 +31,11 @@ on: type: boolean required: false default: true + build_cuda13: + description: "Build CUDA 13 image" + type: boolean + required: false + default: true build_vulkan: description: "Build Vulkan image" type: boolean @@ -59,6 +64,9 @@ jobs: if [[ "${{ github.event_name }}" == "schedule" ]] || [[ "${{ inputs.build_cuda }}" == "true" ]]; then backends+=("cuda") fi + if [[ "${{ github.event_name }}" == "schedule" ]] || [[ "${{ inputs.build_cuda13 }}" == "true" ]]; then + backends+=("cuda13") + fi if [[ "${{ github.event_name }}" == "schedule" ]] || [[ "${{ inputs.build_vulkan }}" == "true" ]]; then backends+=("vulkan") fi @@ -68,11 +76,20 @@ jobs: build: needs: setup if: ${{ needs.setup.outputs.matrix != '[]' }} - runs-on: ubuntu-latest + runs-on: ${{ matrix.runner }} strategy: fail-fast: false matrix: backend: ${{ fromJSON(needs.setup.outputs.matrix) }} + platform: ["linux/amd64", "linux/arm64"] + include: + - platform: linux/amd64 + arch: amd64 + runner: ubuntu-latest + - platform: linux/arm64 + arch: arm64 + runner: ubuntu-24.04-arm + steps: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2 @@ -104,14 +121,14 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Build unified Docker image (${{ matrix.backend }}) + - name: Build unified Docker image (${{ matrix.backend }} - ${{ matrix.platform }}) env: LLAMA_REF: ${{ inputs.llama_cpp_ref || 'master' }} WHISPER_REF: ${{ inputs.whisper_ref || 'master' }} SD_REF: ${{ inputs.sd_ref || 'master' }} IK_LLAMA_REF: ${{ inputs.ik_llama_ref || 'main' }} LS_VERSION: ${{ inputs.llama_swap_version || 'main' }} - DOCKER_IMAGE_TAG: ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }} + DOCKER_IMAGE_TAG: ghcr.io/${{ github.repository }}:unified-${{ matrix.backend }}-${{ matrix.arch }} # When running under act, use the local builder that has warm ccache. # On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder # created by setup-buildx-action above. @@ -123,14 +140,82 @@ jobs: - name: Push to GitHub Container Registry if: ${{ !env.ACT && (github.event_name == 'schedule' || inputs.push_to_ghcr == true) }} run: | - BASE_TAG="ghcr.io/mostlygeek/llama-swap:unified-${{ matrix.backend }}" + BASE_TAG="ghcr.io/${{ github.repository }}:unified-${{ matrix.backend }}-${{ matrix.arch }}" DATE_TAG=$(date -u +%Y-%m-%d) docker push "${BASE_TAG}" docker tag "${BASE_TAG}" "${BASE_TAG}-${DATE_TAG}" docker push "${BASE_TAG}-${DATE_TAG}" + - name: Build unified Rootless Docker image (${{ matrix.backend }} - ${{ matrix.platform }}) + env: + DOCKER_IMAGE_TAG: ghcr.io/${{ github.repository }}:unified-${{ matrix.backend }}-${{ matrix.arch }} + # When running under act, use the local builder that has warm ccache. + # On GitHub Actions, BUILDX_BUILDER is unset so docker uses the builder + # created by setup-buildx-action above. + BUILDX_BUILDER: ${{ env.ACT == 'true' && 'llama-swap-builder' || '' }} + run: | + chmod +x docker/unified/build-image-rootless.sh + docker/unified/build-image-rootless.sh --${{ matrix.backend }} + + - name: Push Rootless to GitHub Container Registry + if: ${{ !env.ACT && (github.event_name == 'schedule' || inputs.push_to_ghcr == true) }} + run: | + BASE_TAG="ghcr.io/${{ github.repository }}:unified-${{ matrix.backend }}-${{ matrix.arch }}" + DATE_TAG=$(date -u +%Y-%m-%d) + ROOTLESS_TAG="${BASE_TAG}-rootless" docker push "${ROOTLESS_TAG}" docker tag "${ROOTLESS_TAG}" "${ROOTLESS_TAG}-${DATE_TAG}" docker push "${ROOTLESS_TAG}-${DATE_TAG}" + + merge-manifests: + needs: + - setup + - build + if: ${{ github.event_name == 'schedule' || inputs.push_to_ghcr == true }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + backend: ${{ fromJSON(needs.setup.outputs.matrix) }} + + steps: + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set date tag + run: echo "DATE_TAG=$(date -u +%Y-%m-%d)" >> $GITHUB_ENV + + - name: Create and push multi-arch manifest + if: ${{ !env.ACT }} + run: | + BASE_TAG="ghcr.io/${{ github.repository }}:unified-${{ matrix.backend }}" + BASE_TAG_DATE="${BASE_TAG}-${DATE_TAG}" + AMD64_TAG="${BASE_TAG}-amd64" + ARM64_TAG="${BASE_TAG}-arm64" + + ROOTLESS_TAG="${BASE_TAG}-rootless" + ROOTLESS_TAG_DATE="${ROOTLESS_TAG}-${DATE_TAG}" + ROOTLESS_AMD64_TAG="${AMD64_TAG}-rootless" + ROOTLESS_ARM64_TAG="${ARM64_TAG}-rootless" + + docker buildx imagetools create -t "${BASE_TAG}" \ + "${AMD64_TAG}" \ + "${ARM64_TAG}" + + docker buildx imagetools create -t "${BASE_TAG_DATE}" \ + "${AMD64_TAG}" \ + "${ARM64_TAG}" + + docker buildx imagetools create -t "${ROOTLESS_TAG}" \ + "${ROOTLESS_AMD64_TAG}" \ + "${ROOTLESS_ARM64_TAG}" + + docker buildx imagetools create -t "${ROOTLESS_TAG_DATE}" \ + "${ROOTLESS_AMD64_TAG}" \ + "${ROOTLESS_ARM64_TAG}" diff --git a/docker/unified/Dockerfile b/docker/unified/Dockerfile index 1848289d1..7e33881a7 100644 --- a/docker/unified/Dockerfile +++ b/docker/unified/Dockerfile @@ -10,10 +10,12 @@ # and installing binaries. Build stages are independent for cache efficiency. ARG BACKEND=cuda +ARG CUDA_VERSION=12.9.1 +ARG UBUNTU_VERSION=24.04 # ── Builder bases ────────────────────────────────────────────────────── -FROM nvidia/cuda:12.9.1-devel-ubuntu24.04 AS builder-base-cuda +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS builder-base-cuda ARG CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89" ENV DEBIAN_FRONTEND=noninteractive @@ -24,14 +26,18 @@ ENV PATH="/usr/lib/ccache:${PATH}" RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential cmake git python3 python3-pip libssl-dev \ - curl ca-certificates ccache make wget \ + curl ca-certificates ccache make wget nodejs npm \ && rm -rf /var/lib/apt/lists/* +RUN npm install -g pnpm@latest-10 + WORKDIR /build +FROM builder-base-cuda AS builder-base-cuda13 + # ── -FROM ubuntu:24.04 AS builder-base-vulkan +FROM ubuntu:${UBUNTU_VERSION} AS builder-base-vulkan ENV DEBIAN_FRONTEND=noninteractive ENV CCACHE_DIR=/ccache @@ -40,11 +46,14 @@ ENV PATH="/usr/lib/ccache:${PATH}" RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential cmake git python3 python3-pip libssl-dev \ - curl ca-certificates ccache make wget software-properties-common \ + curl ca-certificates ccache make wget nodejs npm \ + software-properties-common \ libvulkan-dev glslang-tools spirv-tools vulkan-validationlayers glslc \ spirv-headers \ && rm -rf /var/lib/apt/lists/* +RUN npm install -g pnpm@latest-10 + WORKDIR /build # ── Select builder base by BACKEND ──────────────────────────────────── @@ -57,7 +66,7 @@ FROM builder-base AS whisper-build ARG BACKEND=cuda ARG WHISPER_COMMIT_HASH=master COPY install-whisper.sh /build/ -RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \ +RUN --mount=type=cache,id=ccache-${BACKEND},target=${CCACHE_DIR} \ --mount=type=cache,id=whisper-${BACKEND},target=/src/whisper.cpp/build \ BACKEND=${BACKEND} bash /build/install-whisper.sh "${WHISPER_COMMIT_HASH}" @@ -67,7 +76,7 @@ FROM builder-base AS sd-build ARG BACKEND=cuda ARG SD_COMMIT_HASH=master COPY install-sd.sh /build/ -RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \ +RUN --mount=type=cache,id=ccache-${BACKEND},target=${CCACHE_DIR} \ --mount=type=cache,id=sd-${BACKEND},target=/src/stable-diffusion.cpp/build \ BACKEND=${BACKEND} bash /build/install-sd.sh "${SD_COMMIT_HASH}" @@ -77,7 +86,7 @@ FROM builder-base AS llama-build ARG BACKEND=cuda ARG LLAMA_COMMIT_HASH=master COPY install-llama.sh /build/ -RUN --mount=type=cache,id=ccache-${BACKEND},target=/ccache \ +RUN --mount=type=cache,id=ccache-${BACKEND},target=${CCACHE_DIR} \ --mount=type=cache,id=llama-${BACKEND},target=/src/llama.cpp/build \ BACKEND=${BACKEND} bash /build/install-llama.sh "${LLAMA_COMMIT_HASH}" @@ -95,10 +104,12 @@ RUN mkdir -p /install/bin FROM builder-base-cuda AS ik-llama-cuda ARG IK_LLAMA_COMMIT_HASH=main COPY install-ik-llama.sh /build/ -RUN --mount=type=cache,id=ccache-cuda,target=/ccache \ +RUN --mount=type=cache,id=ccache-cuda,target=${CCACHE_DIR} \ --mount=type=cache,id=ik-llama-cuda,target=/src/ik_llama.cpp/build \ bash /build/install-ik-llama.sh "${IK_LLAMA_COMMIT_HASH}" +FROM ik-llama-cuda AS ik-llama-cuda13 + ARG BACKEND=cuda FROM ik-llama-${BACKEND} AS ik-llama-build @@ -111,7 +122,7 @@ RUN bash /build/install-llama-swap.sh "${LS_VERSION}" # ── Runtime bases ───────────────────────────────────────────────────── -FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS runtime-cuda +FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime-cuda ENV DEBIAN_FRONTEND=noninteractive ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" @@ -125,9 +136,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so COPY --from=builder-base-cuda /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 +FROM runtime-cuda AS runtime-cuda13 + # ── -FROM ubuntu:24.04 AS runtime-vulkan +FROM ubuntu:${UBUNTU_VERSION} AS runtime-vulkan ENV DEBIAN_FRONTEND=noninteractive ENV PATH="/usr/local/bin:${PATH}" diff --git a/docker/unified/build-image-rootless.sh b/docker/unified/build-image-rootless.sh new file mode 100755 index 000000000..30d9cce7c --- /dev/null +++ b/docker/unified/build-image-rootless.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# +# Build script for unified container with version pinning +# +# Usage: +# ./build-image-rootless.sh --cuda # Build CUDA image +# ./build-image-rootless.sh --vulkan # Build Vulkan image +# ./build-image-rootless.sh --cuda --no-cache # Build without cache +# LLAMA_REF=b1234 ./build-image-rootless.sh --vulkan # Pin llama.cpp to a commit hash +# LLAMA_REF=v1.2.3 ./build-image-rootless.sh --cuda # Pin llama.cpp to a tag +# WHISPER_REF=v1.0.0 ./build-image-rootless.sh --vulkan # Pin whisper.cpp to a tag +# SD_REF=master ./build-image-rootless.sh --cuda # Pin stable-diffusion.cpp to a branch +# LS_VERSION=170 ./build-image-rootless.sh --cuda # Override llama-swap version +# IK_LLAMA_REF=main ./build-image-rootless.sh --cuda # Pin ik_llama.cpp to main branch (CUDA only) +# + +set -euo pipefail + +BACKEND="" +NO_CACHE=false + +for arg in "$@"; do + case $arg in + --cuda) + BACKEND="cuda" + ;; + --cuda13) + BACKEND="cuda13" + ;; + --vulkan) + BACKEND="vulkan" + ;; + --no-cache) + NO_CACHE=true + ;; + --help|-h) + echo "Usage: ./build-image-rootless.sh --cuda|--vulkan" + echo "" + echo "Options:" + echo " --cuda Build CUDA image (NVIDIA GPUs)" + echo " --cuda13 Build CUDA 13 image (NVIDIA GPUs)" + echo " --vulkan Build Vulkan image (AMD GPUs and compatible hardware)" + echo " --no-cache Force rebuild without using Docker cache" + echo " --help, -h Show this help message" + echo "" + echo "Environment variables:" + echo " DOCKER_IMAGE_TAG Set custom image tag (default: llama-swap:unified-cuda or llama-swap:unified-vulkan)" + exit 0 + ;; + esac +done + +if [[ -z "$BACKEND" ]]; then + echo "Error: No backend specified. Please use --cuda, --cuda13, or --vulkan." + echo "" + echo "Usage: ./build-image-rootless.sh --cuda|--cuda13|--vulkan" + exit 1 +fi + +ARCH=$(uname -m) +case "$ARCH" in + x86_64) ARCH="amd64" ;; + aarch64|arm64) ARCH="arm64" ;; + *) echo "FATAL: Unsupported architecture: $ARCH" >&2; exit 1 ;; +esac + +DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-llama-swap:unified-${BACKEND}-${ARCH}}" + +echo "" +echo "==========================================" +echo "Building rootless image..." +echo "==========================================" +echo "" + +ROOTLESS_TAG="${DOCKER_IMAGE_TAG}-rootless" +BUILD_ARGS=() +if [[ "$NO_CACHE" == "true" ]]; then + BUILD_ARGS+=(--no-cache) +fi +docker buildx build "${BUILD_ARGS[@]}" --load -t "${ROOTLESS_TAG}" - < environment variable > default list +CMAKE_CUDA_ARCHITECTURES="${CLI_CUDA_ARCHITECTURES:-${CMAKE_CUDA_ARCHITECTURES:-75;86;89;120;121}}" +CUDA_VERSION="${CUDA_VERSION:-12.9.1}" +if [[ "$BACKEND" == "cuda" || "$BACKEND" == "cuda13" ]]; then + IS_CUDA_BACKEND=true +else + IS_CUDA_BACKEND=false +fi + +if [[ "$BACKEND" == "cuda" ]]; then + CUDA_VERSION="12.9.1" + if [[ -z "$CLI_CUDA_ARCHITECTURES" ]]; then + # For CUDA 12, default to older set of architectures + CMAKE_CUDA_ARCHITECTURES="60;61;75;86;89" + fi +fi + +if [[ "$BACKEND" == "cuda13" ]]; then + CUDA_VERSION="13.2.0" + if [[ -z "$CLI_CUDA_ARCHITECTURES" ]]; then + # For CUDA 13, default to a more modern set of architectures + CMAKE_CUDA_ARCHITECTURES="86;89;120;121" + fi +fi + + +ARCH=$(uname -m) +case "$ARCH" in + x86_64) ARCH="amd64" ;; + aarch64|arm64) ARCH="arm64" ;; + *) echo "FATAL: Unsupported architecture: $ARCH" >&2; exit 1 ;; +esac + +DOCKER_IMAGE_TAG="${DOCKER_IMAGE_TAG:-llama-swap:unified-${BACKEND}-${ARCH}}" +GITHUB_REPOSITORY="${GITHUB_REPOSITORY:-mostlygeek/llama-swap}" + # Git repository URLs LLAMA_REPO="https://github.com/ggml-org/llama.cpp.git" @@ -156,7 +206,7 @@ else fi # Resolve ik_llama.cpp ref (CUDA only) -if [[ "$BACKEND" == "cuda" ]]; then +if [[ "$IS_CUDA_BACKEND" == true ]]; then if [[ -n "${IK_LLAMA_REF:-}" ]]; then IK_LLAMA_HASH=$(resolve_ref "${IK_LLAMA_REPO}" "${IK_LLAMA_REF}") || exit 1 echo "ik_llama.cpp: ${IK_LLAMA_REF} -> ${IK_LLAMA_HASH}" @@ -201,6 +251,8 @@ BUILD_ARGS=( --build-arg "SD_COMMIT_HASH=${SD_HASH}" --build-arg "IK_LLAMA_COMMIT_HASH=${IK_LLAMA_HASH}" --build-arg "LS_VERSION=${LS_HASH}" + --build-arg "CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" + --build-arg "CUDA_VERSION=${CUDA_VERSION}" -t "${DOCKER_IMAGE_TAG}" -f "${SCRIPT_DIR}/Dockerfile" ) @@ -209,7 +261,7 @@ if [[ "$NO_CACHE" == true ]]; then BUILD_ARGS+=(--no-cache) echo "Note: Building without cache" elif [[ "${GITHUB_ACTIONS:-}" == "true" && "${ACT:-}" != "true" ]]; then - CACHE_REF="ghcr.io/mostlygeek/llama-swap:unified-${BACKEND}-cache" + CACHE_REF="ghcr.io/${GITHUB_REPOSITORY}:unified-${BACKEND}-cache-${ARCH}" BUILD_ARGS+=( --cache-from "type=registry,ref=${CACHE_REF}" --cache-to "type=registry,ref=${CACHE_REF},mode=max" @@ -226,7 +278,7 @@ echo "==========================================" echo "" EXPECTED_BINARIES=(llama-server llama-cli whisper-server whisper-cli sd-server sd-cli llama-swap) -if [[ "$BACKEND" == "cuda" ]]; then +if [[ "$IS_CUDA_BACKEND" == true ]]; then EXPECTED_BINARIES+=(ik-llama-server) fi @@ -249,30 +301,11 @@ if [[ ${#MISSING_BINARIES[@]} -gt 0 ]]; then fi VERIFIED_LIST="llama-server, llama-cli, whisper-server, whisper-cli, sd-server, sd-cli, llama-swap" -if [[ "$BACKEND" == "cuda" ]]; then +if [[ "$IS_CUDA_BACKEND" == true ]]; then VERIFIED_LIST="${VERIFIED_LIST}, ik-llama-server" fi echo "All expected binaries verified: ${VERIFIED_LIST}" -echo "" -echo "==========================================" -echo "Building rootless image..." -echo "==========================================" -echo "" - -ROOTLESS_TAG="${DOCKER_IMAGE_TAG}-rootless" -docker buildx build --load -t "${ROOTLESS_TAG}" - <&2; exit 1 ;; +esac + +if [ "$ARCH" = "arm64" ]; then + CMAKE_FLAGS+=(-DGGML_ARCH_FLAGS="-march=armv8.2-a+dotprod+fp16") +fi + rm -rf build/CMakeCache.txt build/CMakeFiles 2>/dev/null || true echo "=== Building ik_llama.cpp ===" diff --git a/docker/unified/install-llama.sh b/docker/unified/install-llama.sh index ddc75cbaa..8e1b5b921 100755 --- a/docker/unified/install-llama.sh +++ b/docker/unified/install-llama.sh @@ -29,7 +29,7 @@ CMAKE_FLAGS=( -DLLAMA_BUILD_TESTS=OFF ) -if [ "$BACKEND" = "cuda" ]; then +if [[ "$BACKEND" = "cuda" || "$BACKEND" == "cuda13" ]]; then CMAKE_FLAGS+=( -DGGML_CUDA=ON -DGGML_VULKAN=OFF diff --git a/docker/unified/install-sd.sh b/docker/unified/install-sd.sh index 54fdb5214..512ac6930 100755 --- a/docker/unified/install-sd.sh +++ b/docker/unified/install-sd.sh @@ -27,9 +27,10 @@ CMAKE_FLAGS=( -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DSD_BUILD_EXAMPLES=ON + -DSD_SERVER_BUILD_FRONTEND=ON ) -if [ "$BACKEND" = "cuda" ]; then +if [[ "$BACKEND" = "cuda" || "$BACKEND" == "cuda13" ]]; then CMAKE_FLAGS+=( -DGGML_CUDA=ON -DGGML_VULKAN=OFF diff --git a/docker/unified/install-whisper.sh b/docker/unified/install-whisper.sh index 2e0feb8df..467029cb1 100755 --- a/docker/unified/install-whisper.sh +++ b/docker/unified/install-whisper.sh @@ -27,7 +27,7 @@ CMAKE_FLAGS=( -DCMAKE_CXX_COMPILER_LAUNCHER=ccache ) -if [ "$BACKEND" = "cuda" ]; then +if [[ "$BACKEND" = "cuda" || "$BACKEND" == "cuda13" ]]; then CMAKE_FLAGS+=( -DGGML_CUDA=ON -DGGML_VULKAN=OFF