diff --git a/docker/Dockerfile b/docker/Dockerfile index 245713dca6..bfb1195a49 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -58,6 +58,8 @@ ENV UV_PROJECT_ENVIRONMENT=/opt/venv ENV UV_CACHE_DIR=/opt/uv_cache ENV PATH="$UV_PROJECT_ENVIRONMENT/bin:$PATH" ENV UV_LINK_MODE=copy +# Make the system FFmpeg discoverable by source-built Python deps (e.g. PyAV). +ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig RUN uv venv ${UV_PROJECT_ENVIRONMENT} --python /usr/bin/python3.13 --system-site-packages --seed FROM build AS nemo_curator_dep @@ -93,9 +95,9 @@ COPY nemo_curator/__init__.py nemo_curator/package_info.py /opt/Curator/nemo_cur # Cap ninja parallelism for flash-attn's NVCC build at min(nproc, 16); default of nproc OOMs at ~2 GB/job. RUN export MAX_JOBS=$(( $(nproc) > 16 ? 16 : $(nproc) )) && \ if [ -n "${CURATOR_EXTRA}" ]; then \ - uv sync --link-mode copy --locked --extra ${CURATOR_EXTRA} ${CURATOR_GROUPS} --no-cache; \ + uv sync --link-mode copy --locked --no-binary-package av --extra ${CURATOR_EXTRA} ${CURATOR_GROUPS} --no-cache; \ else \ - uv sync --link-mode copy --locked ${CURATOR_GROUPS} --no-cache; \ + uv sync --link-mode copy --locked --no-binary-package av ${CURATOR_GROUPS} --no-cache; \ fi && \ find /opt/venv -type d -path "*ray/_private/runtime_env/agent/thirdparty_files/aiohttp*" -exec rm -rf {} + && \ # Address GHSA-72hv-8253-57qq: jackson-core bundled in ray_dist.jar (jackson-core 2.16.1 < 2.18.6) diff --git a/docker/common/install_ffmpeg.sh b/docker/common/install_ffmpeg.sh index 81a4652c20..96ed366618 100644 --- a/docker/common/install_ffmpeg.sh +++ b/docker/common/install_ffmpeg.sh @@ -15,12 +15,12 @@ #!/bin/bash set -xeuo pipefail # Exit immediately if a command exits with a non-zero status -FFMPEG_VERSION=8.0.1 +FFMPEG_BRANCH=n8.0.1 NVCODEC_VERSION=12.1.14.0 for i in "$@"; do case $i in - --FFMPEG_VERSION=?*) FFMPEG_VERSION="${i#*=}";; + --FFMPEG_BRANCH=?*) FFMPEG_BRANCH="${i#*=}";; --NVCODEC_VERSION=?*) NVCODEC_VERSION="${i#*=}";; *) ;; esac @@ -31,62 +31,75 @@ done export DEBIAN_FRONTEND=noninteractive apt-get update apt-get install -y \ - libcrypt-dev \ autoconf \ automake \ build-essential \ + ca-certificates \ cmake \ - libaom-dev \ - libass-dev \ - libdav1d-dev \ - libdrm-dev \ - libfreetype6-dev \ - libgnutls28-dev \ + git \ + libcrypt-dev \ + libgl1 \ + libglib2.0-0t64 \ libnuma-dev \ - libopenh264-dev \ libtool \ - libva-dev \ - libvorbis-dev \ libvpx-dev \ - libwebp-dev \ nasm \ pkg-config \ - vainfo \ wget \ yasm \ zlib1g-dev -# Install NVCODEC +# Install nv-codec-headers (NVENC + NVDEC bridge to the NVIDIA driver) wget -O /tmp/nv-codec-headers.tar.gz https://github.com/FFmpeg/nv-codec-headers/releases/download/n${NVCODEC_VERSION}/nv-codec-headers-${NVCODEC_VERSION}.tar.gz tar xzvf /tmp/nv-codec-headers.tar.gz -C /tmp/ cd /tmp/nv-codec-headers-${NVCODEC_VERSION} make make install -# Install FFMPEG -wget -O /tmp/ffmpeg-snapshot.tar.bz2 https://www.ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.bz2 -tar xjvf /tmp/ffmpeg-snapshot.tar.bz2 -C /tmp/ -cd /tmp/ffmpeg-${FFMPEG_VERSION} -PATH="/usr/local/cuda/bin:$PATH" ./configure \ - --prefix=/usr/local \ - --enable-nonfree \ - --enable-cuda-nvcc \ - --enable-libnpp \ - --enable-libopenh264 \ - --enable-libaom \ - --enable-libdav1d \ - --enable-libvorbis \ - --enable-libvpx \ - --enable-libwebp \ - --enable-vaapi \ - --extra-cflags=-I/usr/local/cuda/include \ - --extra-ldflags=-L/usr/local/cuda/lib64 \ - --extra-libs=-lpthread \ - --extra-libs=-lm \ - --disable-static \ +# Build FFmpeg from upstream git, branch ${FFMPEG_BRANCH} (n8.0.1): +# - --disable-everything strips ALL components by default (encoders, +# decoders, muxers, demuxers, parsers, bsfs, hwaccels, filters, protocols) +# and we re-enable only what's needed. +# - --enable-version3 selects LGPLv3+. +# - Encoders: only NVENC (h264/hevc/av1) and libvpx-vp9 + rawvideo. +# - Decoders: NVDEC variants for h264/hevc/av1/vp9 + software vp8/vp9 + +# mpeg1/2/4 + libvpx_vp9 + rawvideo. NO software h264/hevc/av1. +# - Shared-linked: installs libav*.so to /usr/local/lib so PyAV and other +# source-built Python deps can pkg-config against this build. +cd /tmp +git clone --depth 1 --branch "${FFMPEG_BRANCH}" --recurse-submodules https://git.ffmpeg.org/ffmpeg.git +cd ffmpeg +PKG_CONFIG_PATH="/usr/local/lib/pkgconfig" ./configure \ + --prefix="/usr/local" \ --enable-shared \ + --disable-static \ + --extra-cflags="-I/usr/local/cuda/include" \ + --extra-ldflags="-L/usr/local/cuda/lib64" \ + --extra-libs="-lpthread -lm" \ + --ld="g++" \ + --enable-version3 \ + --disable-everything \ + --disable-network \ --disable-doc \ - --disable-debug + --disable-ffplay \ + --disable-vaapi \ + --disable-vdpau \ + --disable-dxva2 \ + --disable-libdrm \ + --enable-encoder=rawvideo,libvpx_vp9,h264_nvenc,hevc_nvenc,av1_nvenc \ + --enable-decoder=rawvideo,libvpx_vp9,vp9,vp8,h264_cuvid,hevc_cuvid,av1_cuvid,mpeg1video,mpeg2video,mpeg4 \ + --enable-muxer=mp4,rawvideo,image2pipe \ + --enable-demuxer=mov,mp4,m4a,3gp,3g2,mj2,avi,matroska,webm,image2,image2pipe \ + --enable-parser=h264,hevc,av1,vp8,vp9 \ + --enable-bsf=h264_mp4toannexb,hevc_mp4toannexb \ + --enable-protocol=file,pipe \ + --enable-filter=scale,format,null,copy \ + --enable-libvpx \ + --enable-cuda \ + --enable-cuvid \ + --enable-nvdec \ + --enable-nvenc \ + --enable-ffnvcodec make -j$(nproc) make install ldconfig diff --git a/docker/common/install_h264_support.sh b/docker/common/install_h264_support.sh new file mode 100755 index 0000000000..7a933e09c3 --- /dev/null +++ b/docker/common/install_h264_support.sh @@ -0,0 +1,176 @@ +#!/bin/bash +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Opt-in installer that adds software h264/hevc/av1 decoder support to the +# Curator container by recompiling FFmpeg from source with those decoders +# added to the existing allowlist. +# +# The default Curator image deliberately ships an NVDEC-only ffmpeg so that +# every h264/hevc/av1 decode goes through the GPU. That breaks ffprobe-based +# metadata extraction in CPU-only Ray actors (VideoReader, ClipWriter), which +# is why this opt-in exists. +# +# Run inside a running container, e.g.: +# docker exec bash /opt/Curator/docker/common/install_h264_support.sh +# +# Behaviour: +# - Recompiles FFmpeg from upstream git, same tag/branch as install_ffmpeg.sh, +# with the existing allowlist plus --enable-decoder=h264,hevc,av1. +# - Optionally also enables the libopenh264 software h264 ENCODER (Cisco's +# free-license OpenH264 binary; opt-in via --with-libopenh264). +# - Replaces /usr/local/bin/{ffmpeg,ffprobe} in place. +# - Default stays LGPLv3 (only FFmpeg-internal native decoders); with +# --with-libopenh264 the resulting binary additionally links Cisco's +# OpenH264 (BSD-2-Clause; see https://www.openh264.org/BINARY_LICENSE.txt). +# - Takes ~5-10 min. +# +# Keep FFMPEG_BRANCH and NVCODEC_VERSION in sync with docker/common/install_ffmpeg.sh. + +set -euo pipefail + +FFMPEG_BRANCH=n8.0.1 +NVCODEC_VERSION=12.1.14.0 +WITH_LIBOPENH264=0 + +usage() { + cat <<'EOF' +Usage: install_h264_support.sh [--with-libopenh264] [--FFMPEG_BRANCH=] + [--NVCODEC_VERSION=] + +Recompiles FFmpeg from source with software h264/hevc/av1 decoders enabled. + +Options: + --with-libopenh264 Also enable the libopenh264 software h264 ENCODER + (Cisco's OpenH264 binary; required by Curator's + --transcode-encoder=libopenh264 path). + --FFMPEG_BRANCH= FFmpeg upstream tag/branch (default: n8.0.1). + --NVCODEC_VERSION= nv-codec-headers release version (default: 12.1.14.0). + -h, --help Show this help. + +License notice: + Default mode enables only FFmpeg's internal h264/hevc/av1 decoders (LGPL). + With --with-libopenh264 the build additionally links Cisco's OpenH264 + binary (BSD-2-Clause + Cisco-distributed binary license; see + https://www.openh264.org/BINARY_LICENSE.txt). By running this script you + are responsible for any license obligations the resulting binaries impose + on your distribution. +EOF +} + +for arg in "$@"; do + case $arg in + --with-libopenh264) WITH_LIBOPENH264=1 ;; + --FFMPEG_BRANCH=?*) FFMPEG_BRANCH="${arg#*=}" ;; + --NVCODEC_VERSION=?*) NVCODEC_VERSION="${arg#*=}" ;; + -h|--help) usage; exit 0 ;; + *) echo "Unknown argument: $arg" >&2; usage >&2; exit 2 ;; + esac +done + +if [ "$(id -u)" -ne 0 ]; then + echo "ERROR: must be run as root inside the container." >&2 + exit 1 +fi + +echo "==> install_h264_support.sh: rebuilding ffmpeg ${FFMPEG_BRANCH}" +echo " Decoders added: h264, hevc, av1 (software)" +if [ "$WITH_LIBOPENH264" -eq 1 ]; then + echo " Encoder added: libopenh264 (Cisco OpenH264 binary)" +fi +echo " NOTE: This expands the container's codec footprint beyond the default" +echo " NVDEC-only allowlist. License obligations of the resulting binaries" +echo " are the user's responsibility." +echo + +export DEBIAN_FRONTEND=noninteractive +apt-get update +apt_packages=( + autoconf automake build-essential ca-certificates cmake git + libcrypt-dev libnuma-dev libtool libvpx-dev nasm pkg-config + wget yasm zlib1g-dev +) +if [ "$WITH_LIBOPENH264" -eq 1 ]; then + apt_packages+=(libopenh264-dev) +fi +apt-get install -y "${apt_packages[@]}" + +if [ ! -f /usr/local/include/ffnvcodec/dynlink_loader.h ]; then + wget -O /tmp/nv-codec-headers.tar.gz \ + "https://github.com/FFmpeg/nv-codec-headers/releases/download/n${NVCODEC_VERSION}/nv-codec-headers-${NVCODEC_VERSION}.tar.gz" + tar xzf /tmp/nv-codec-headers.tar.gz -C /tmp/ + (cd "/tmp/nv-codec-headers-${NVCODEC_VERSION}" && make && make install) +fi + +cd /tmp +rm -rf ffmpeg +git clone --depth 1 --branch "${FFMPEG_BRANCH}" --recurse-submodules \ + https://git.ffmpeg.org/ffmpeg.git +cd ffmpeg + +# Configure mirrors install_ffmpeg.sh exactly, with the decoder allowlist +# extended to include software h264/hevc/av1, and optionally the libopenh264 +# software h264 encoder. +encoder_list="rawvideo,libvpx_vp9,h264_nvenc,hevc_nvenc,av1_nvenc" +extra_configure_flags=() +if [ "$WITH_LIBOPENH264" -eq 1 ]; then + encoder_list="${encoder_list},libopenh264" + extra_configure_flags+=(--enable-libopenh264) +fi + +PKG_CONFIG_PATH="/usr/local/lib/pkgconfig" ./configure \ + --prefix="/usr/local" \ + --enable-shared \ + --disable-static \ + --extra-cflags="-I/usr/local/cuda/include" \ + --extra-ldflags="-L/usr/local/cuda/lib64" \ + --extra-libs="-lpthread -lm" \ + --ld="g++" \ + --enable-version3 \ + --disable-everything \ + --disable-network \ + --disable-doc \ + --disable-ffplay \ + --disable-vaapi \ + --disable-vdpau \ + --disable-dxva2 \ + --disable-libdrm \ + --enable-encoder="${encoder_list}" \ + --enable-decoder=rawvideo,libvpx_vp9,vp9,vp8,h264_cuvid,hevc_cuvid,av1_cuvid,mpeg1video,mpeg2video,mpeg4,h264,hevc,av1 \ + --enable-muxer=mp4,rawvideo,image2pipe \ + --enable-demuxer=mov,mp4,m4a,3gp,3g2,mj2,avi,matroska,webm,image2,image2pipe \ + --enable-parser=h264,hevc,av1,vp8,vp9 \ + --enable-bsf=h264_mp4toannexb,hevc_mp4toannexb \ + --enable-protocol=file,pipe \ + --enable-filter=scale,format,null,copy \ + --enable-libvpx \ + --enable-cuda \ + --enable-cuvid \ + --enable-nvdec \ + --enable-nvenc \ + --enable-ffnvcodec \ + "${extra_configure_flags[@]}" +make -j"$(nproc)" +make install +ldconfig + +cd / +rm -rf /tmp/ffmpeg /tmp/nv-codec-headers* +echo +if [ "$WITH_LIBOPENH264" -eq 1 ]; then + echo "==> Done. /usr/local/bin/{ffmpeg,ffprobe} now include software h264/hevc/av1" + echo " decoders and the libopenh264 software h264 encoder." +else + echo "==> Done. /usr/local/bin/{ffmpeg,ffprobe} now include software h264/hevc/av1 decoders." +fi diff --git a/docs/admin/installation.md b/docs/admin/installation.md index b779a6af67..a20696219f 100644 --- a/docs/admin/installation.md +++ b/docs/admin/installation.md @@ -12,7 +12,7 @@ modality: "universal" # Installation Guide -This guide covers installing NeMo Curator with support for **all modalities** and verifying your installation is working correctly. +This guide covers installing NeMo Curator with support for **all modalities** and verifying that your installation is working correctly. ## Before You Start @@ -28,7 +28,7 @@ For comprehensive system requirements and production deployment specifications, - **GPU** (optional): NVIDIA GPU with 16GB+ VRAM for acceleration - **CUDA 12** (required for `audio_cuda12`, `video_cuda12`, `image_cuda12`, and `text_cuda12` extras) -### Development vs Production +### Development vs. Production | Use Case | Requirements | See | |----------|-------------|-----| @@ -96,7 +96,7 @@ uv sync --all-extras --all-groups :::{tab-item} Container Installation (Recommended for Video/Audio) -NeMo Curator is available as a standalone container on NGC: https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo-curator. The container includes NeMo Curator with all dependencies pre-installed, including FFmpeg with NVENC support. +NeMo Curator is available as a standalone container on [NGC](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo-curator). The container includes NeMo Curator with all dependencies pre-installed, including FFmpeg with NVENC support. ```bash # Pull the container from NGC @@ -138,13 +138,13 @@ docker run --gpus all -it --rm nemo-curator:latest ### Install FFmpeg and Encoders (Required for Video) -Curator’s video pipelines rely on `FFmpeg` for decoding and encoding. If you plan to encode clips (for example, using `--transcode-encoder libopenh264` or `h264_nvenc`), install `FFmpeg` with the corresponding encoders. +Curator’s video pipelines rely on `FFmpeg` for decoding and encoding. If you plan to encode clips (using `--transcode-encoder h264_nvenc` or `--transcode-encoder libvpx-vp9`), install `FFmpeg` with NVENC and libvpx-vp9 support. The maintained install script bundles both. ::::{tab-set} :::{tab-item} Debian/Ubuntu (Script) -Use the maintained script in the repository to build and install `FFmpeg` with `libopenh264` and NVIDIA NVENC support. The script enables `--enable-libopenh264`, `--enable-cuda-nvcc`, and `--enable-libnpp`. +Use the maintained script in the repository to build and install `FFmpeg` with NVIDIA NVENC support. The script enables `--enable-cuda-nvcc`, `--enable-libnpp`, and `--enable-libvpx`. - Script source: [docker/common/install_ffmpeg.sh](https://github.com/NVIDIA-NeMo/Curator/blob/main/docker/common/install_ffmpeg.sh) @@ -158,11 +158,11 @@ sudo bash install_ffmpeg.sh :::{tab-item} Verify Installation -Confirm that `FFmpeg` is on your `PATH` and that at least one H.264 encoder is available: +Confirm that `FFmpeg` is on your `PATH` and that at least one supported encoder is available: ```bash ffmpeg -hide_banner -version | head -n 5 -ffmpeg -encoders | grep -E "h264_nvenc|libopenh264|libx264" | cat +ffmpeg -encoders | grep -E "h264_nvenc|libvpx-vp9" | cat ``` If encoders are missing, reinstall `FFmpeg` with the required options or use the Debian/Ubuntu script above. @@ -174,6 +174,85 @@ If encoders are missing, reinstall `FFmpeg` with the required options or use the **FFmpeg build requires CUDA toolkit (nvcc):** If you encounter `ERROR: failed checking for nvcc` during FFmpeg installation, ensure that the CUDA toolkit is installed and `nvcc` is available on your `PATH`. You can verify with `nvcc --version`. If using the NeMo Curator container, FFmpeg is pre-installed with NVENC support. ``` +```{important} +**Processing H.264/HEVC/AV1 inputs? You might still need a software decoder — even with NVENC/NVDEC.** + +Curator's pipeline runs `ffprobe` for metadata extraction inside CPU-only Ray actors (`VideoReader` and `ClipWriter`). Those actors don't have GPU visibility, so the bundled `h264_cuvid` / `hevc_cuvid` / `av1_cuvid` decoders can't be opened from there. Without a software decoder, `ffprobe` exits non-zero and your h264/hevc/av1 inputs are silently skipped (you'll see a `SoftwareCodecMissingError` in the logs). + +**Recommended fix:** run the bundled installer inside the container — no image rebuild needed: + +```bash +bash /opt/Curator/docker/common/install_h264_support.sh +``` + +See [Software H.264/HEVC/AV1 Codec Support](#software-h264hevcav1-codec-support-advanced) below for the full picture (other paths, license notes, opt-in `libopenh264` encoder). +``` + +### Software H.264/HEVC/AV1 Codec Support (Advanced) + +Curator's default FFmpeg build deliberately ships **NVDEC-only** decoders for `h264`, `hevc`, and `av1`, and **excludes** software H.264 encoders (`libopenh264`, `libx264`, `libx265`). This keeps the codec footprint tight and routes every H.264/HEVC/AV1 decode through the GPU. + +You may need to add software codec support in two cases: + +- **H.264 inputs in CPU-only pipeline stages.** `VideoReader` and `ClipWriter` invoke `ffprobe` from CPU-only Ray actors that can't see the GPU; they need a software `h264`/`hevc`/`av1` decoder to extract metadata. Without it you'll get a `SoftwareCodecMissingError` pointing back here. +- **H.264 software encoding** (for example, on GPUs without an NVENC encoder block such as A100 or H100, when VP9 isn't acceptable). + +#### Option 1: Run the bundled installer inside the container (Recommended) + +The repository ships a runtime opt-in script that recompiles FFmpeg with software h264/hevc/av1 decoders enabled, optionally including the `libopenh264` encoder. It runs **inside an existing container** — no image rebuild required. + +```bash +# Inside the container — adds h264/hevc/av1 software decoders only (LGPLv3): +bash /opt/Curator/docker/common/install_h264_support.sh + +# Same plus the libopenh264 software h264 ENCODER, so --transcode-encoder=libopenh264 works: +bash /opt/Curator/docker/common/install_h264_support.sh --with-libopenh264 +``` + +The build takes ~5–10 minutes, replaces `/usr/local/bin/{ffmpeg,ffprobe}` in place, and pins to the same FFmpeg tag as the image build. Script source: [docker/common/install_h264_support.sh](https://github.com/NVIDIA-NeMo/Curator/blob/main/docker/common/install_h264_support.sh). + +License notice: the default mode adds only FFmpeg-internal decoders (LGPL). With `--with-libopenh264` the binary additionally links Cisco's OpenH264 (BSD-2-Clause + Cisco-distributed binary license — see https://www.openh264.org/BINARY_LICENSE.txt). You are responsible for any license obligations the resulting binaries impose on your distribution. + +#### Option 2: Use the System FFmpeg + +If you're not using the Curator container, most Linux distributions ship FFmpeg with `libx264` (and sometimes `libopenh264`) preinstalled: + +```bash +sudo apt-get install -y ffmpeg +ffmpeg -hide_banner -encoders | grep -E "libx264|libopenh264" +``` + +Make sure the `ffmpeg` on your `PATH` is the one you want — it must shadow Curator's bundled build. + +#### Option 3: Edit `install_ffmpeg.sh` and Rebuild the Image + +For users distributing customized images, edit [`docker/common/install_ffmpeg.sh`](https://github.com/NVIDIA-NeMo/Curator/blob/main/docker/common/install_ffmpeg.sh) before building the container: + +- For software h264/hevc/av1 decoders: append `h264,hevc,av1` to the `--enable-decoder=...` list. +- For `libopenh264` encoder: add `libopenh264-dev` to the apt list, `libopenh264` to `--enable-encoder=...`, and `--enable-libopenh264` to the configure flags. +- For `libx264` encoder: add `libx264-dev` to the apt list and `--enable-libx264 --enable-gpl` to the configure flags. Note that `--enable-gpl` makes the resulting FFmpeg binary GPL-licensed. + +Then rebuild your image. + +#### Use the Encoder in `ClipTranscodingStage` + +`libopenh264` is accepted by `ClipTranscodingStage` out of the box. At setup time, the stage probes the local FFmpeg build and raises a clear error pointing back to this section if the encoder is not actually compiled in. Once your FFmpeg build includes it, just pass: + +```bash +python video_split_clip_example.py ... --transcode-encoder libopenh264 +``` + +For other custom encoders not in `SUPPORTED_ENCODERS` (for example, `libx264`), edit `nemo_curator/stages/video/clipping/clip_extraction_stages.py` to extend the tuple, and add the encoder name to the `--transcode-encoder` argparse `choices` list in `tutorials/video/getting-started/video_split_clip_example.py`: + +```python +SUPPORTED_ENCODERS = ("h264_nvenc", "libvpx-vp9", "libopenh264", "libx264") # add yours +``` + +#### Caveats + +- **Default options for these encoders are not tuned.** `ClipTranscodingStage` only sets quality presets for `h264_nvenc` and `libvpx-vp9`. Other encoders run with FFmpeg defaults, which may produce different quality/size trade-offs than you expect — see [Configure encoders](../curate-video/process-data/transcoding.md#configure) for how to pass an explicit bitrate. +- **The NeMo Curator team does not test custom encoder configurations.** Issues filed against custom encoder builds may be closed. + --- ## Package Extras @@ -198,7 +277,7 @@ NeMo Curator provides several installation extras to install only the components - CPU-only audio curation with NeMo Toolkit ASR * - **audio_cuda12** - `uv pip install nemo-curator[audio_cuda12]` - - GPU-accelerated audio curation. When using `uv`, requires `transformers==4.55.2` override. + - GPU-accelerated audio curation. When using `uv`, requires a `transformers==4.55.2` override. * - **image_cpu** - `uv pip install nemo-curator[image_cpu]` - CPU-only image processing @@ -219,11 +298,11 @@ NeMo Curator provides several installation extras to install only the components --- -## Installation Verification +## Verify Your Installation After installation, verify that NeMo Curator is working correctly: -### 1. Basic Import Test +### 1. Test Basic Imports ```python # Test basic imports @@ -236,7 +315,7 @@ from nemo_curator.tasks import DocumentBatch print("✓ Core modules imported successfully") ``` -### 2. GPU Availability Check +### 2. Check GPU Availability If you installed GPU support, verify GPU access: @@ -249,7 +328,7 @@ try: print(f"✓ GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB") else: print("⚠ No GPU detected") - + # Check cuDF for GPU deduplication import cudf print("✓ cuDF available for GPU-accelerated deduplication") diff --git a/docs/curate-video/index.md b/docs/curate-video/index.md index 82d873bfab..e272b6f72c 100644 --- a/docs/curate-video/index.md +++ b/docs/curate-video/index.md @@ -48,7 +48,7 @@ Get oriented and prepare your environment so you can start curating videos with :::{grid-item-card} {octicon}`database;1.5em;sd-mr-1` Concepts :link: about-concepts-video :link-type: ref -Learn about the architecture, stages, pipelines, and data flow for video curation +Learn about the architecture, stages, pipelines, and data flow for video curation. +++ {bdg-secondary}`stages` {bdg-secondary}`pipelines` @@ -121,8 +121,6 @@ Encode clips to H.264 using CPU or GPU encoders and tune performance. +++ {bdg-primary}`clips` {bdg-secondary}`h264_nvenc` -{bdg-secondary}`libopenh264` -{bdg-secondary}`libx264` ::: :::{grid-item-card} {octicon}`filter;1.5em;sd-mr-1` Filter Clips and Frames diff --git a/docs/curate-video/process-data/index.md b/docs/curate-video/process-data/index.md index 6e49781525..63d46dea78 100644 --- a/docs/curate-video/process-data/index.md +++ b/docs/curate-video/process-data/index.md @@ -14,7 +14,7 @@ modality: "video-only" Use NeMo Curator stages to split videos into clips, encode them, generate embeddings or captions, and remove duplicates. -## How it Works +## How It Works Create a `Pipeline` and add stages for clip extraction, optional re-encoding and filtering, embeddings or captions, previews, and writing outputs. Each stage is modular and configurable to match your quality and performance needs. @@ -42,7 +42,6 @@ Encode clips to H.264 using CPU or GPU encoders and tune performance. +++ {bdg-primary}`clips` {bdg-secondary}`h264_nvenc` -{bdg-secondary}`libopenh264` ::: :::{grid-item-card} {octicon}`filter;1.5em;sd-mr-1` Filter Clips and Frames @@ -89,7 +88,7 @@ Produce clip captions and optional preview images for review workflows. :::{grid-item-card} {octicon}`git-branch;1.5em;sd-mr-1` Remove Duplicate Embeddings :link: video-process-dedup :link-type: ref -Remove near-duplicates using semantic clustering and similarity with generated embeddings. +Remove near-duplicates using semantic clustering and pairwise similarity of generated embeddings. +++ {bdg-primary}`clips` {bdg-secondary}`semantic` diff --git a/docs/curate-video/process-data/transcoding.md b/docs/curate-video/process-data/transcoding.md index 089be396f4..2828a28f90 100644 --- a/docs/curate-video/process-data/transcoding.md +++ b/docs/curate-video/process-data/transcoding.md @@ -41,7 +41,7 @@ from nemo_curator.stages.video.clipping.clip_extraction_stages import FixedStrid pipe = Pipeline(name="transcode_example") pipe.add_stage(FixedStrideExtractorStage(clip_len_s=10.0, clip_stride_s=10.0)) -pipe.add_stage(ClipTranscodingStage(encoder="libopenh264", encode_batch_size=16, encoder_threads=1, verbose=True)) +pipe.add_stage(ClipTranscodingStage(encoder="h264_nvenc", encode_batch_size=16, encoder_threads=1, verbose=True)) pipe.run() ``` @@ -53,7 +53,7 @@ pipe.run() python -m ray_curator.examples.video.video_split_clip_example \ ... --transcode-encoder h264_nvenc \ - --transcode-use-hwaccel \ + --transcode-use-hwaccel ``` ::: @@ -67,19 +67,33 @@ python -m ray_curator.examples.video.video_split_clip_example \ * - Encoder - Hardware - Description -* - `libx264` - - CPU - - Widely available, high quality, CPU-based. -* - `libopenh264` - - CPU - - Good quality and throughput balance. Often faster than `libx264` at similar presets. * - `h264_nvenc` - NVIDIA GPU (NVENC) - Uses NVENC for high-throughput H.264 encoding on NVIDIA GPU hardware. +* - `libvpx-vp9` + - CPU + - - VP9 software encoder. Use as a fallback on GPUs without NVENC silicon (e.g., A100/H100). Slower than NVENC; produces VP9 in `.mp4`. Emits a one-time performance advisory at construction. +* - `libopenh264` + - CPU + - H.264 software encoder. **Not bundled with Curator's default FFmpeg build** — opt-in by running `bash /opt/Curator/docker/common/install_h264_support.sh --with-libopenh264` inside the container, or providing a system FFmpeg that includes it. The stage probes at setup time and raises with a docs link if missing. See [Software H.264/HEVC/AV1 Codec Support](../../admin/installation.md#software-h264hevcav1-codec-support-advanced). ``` ```{tip} -On systems with supported NVIDIA GPU hardware and an `ffmpeg` build with NVENC, `h264_nvenc` can significantly increase throughput. Refer to the verification steps below to confirm NVENC availability. +On systems with supported NVIDIA GPU hardware and an `ffmpeg` build with NVENC, `h264_nvenc` can significantly increase throughput. Refer to the verification steps below to confirm NVENC availability. On GPUs without an NVENC encoder block (such as A100 and H100), use `libvpx-vp9` instead — it runs entirely on CPU and has no proprietary licensing constraints. +``` + +```{note} +**Need software H.264 (libopenh264 / libx264)?** Curator's default FFmpeg build excludes them for licensing reasons. The fastest path inside the container is the bundled installer: + +```bash +bash /opt/Curator/docker/common/install_h264_support.sh --with-libopenh264 +``` + +For other paths (system FFmpeg, custom image rebuild) see [Software H.264/HEVC/AV1 Codec Support](../../admin/installation.md#software-h264hevcav1-codec-support-advanced). +``` + +```{note} +**Got `SoftwareCodecMissingError` on an h264 input?** Curator's CPU-only `VideoReader` and `ClipWriter` stages call `ffprobe`, which needs a software `h264` decoder — not available in the default NVDEC-only build. Install software decoder support with `bash /opt/Curator/docker/common/install_h264_support.sh`, or pick a transcode encoder whose output codec the system FFmpeg can software-decode (e.g., `--transcode-encoder libvpx-vp9`). See [Software H.264/HEVC/AV1 Codec Support](../../admin/installation.md#software-h264hevcav1-codec-support-advanced). ``` ### Verify `ffmpeg`/NVENC Support @@ -102,7 +116,7 @@ Use `ClipTranscodingStage` to control encoder choice, batching, and acceleration from nemo_curator.stages.video.clipping.clip_extraction_stages import ClipTranscodingStage transcode = ClipTranscodingStage( - encoder="h264_nvenc", # or "libopenh264", "libx264" + encoder="h264_nvenc", use_hwaccel=True, # enable NVENC when using h264_nvenc encoder_threads=1, # CPU thread count for CPU encoders encode_batch_size=16, # number of clips per encode batch @@ -121,9 +135,9 @@ transcode = ClipTranscodingStage( * - Parameter - Description * - `encoder` - - Selects the encoding backend. Recommended defaults: `libopenh264` (CPU) or `h264_nvenc` (GPU). + - Selects the encoding backend. Supported values: `h264_nvenc` (GPU, requires NVENC), `libvpx-vp9` (CPU fallback for non-NVENC GPUs such as A100/H100), and `libopenh264` (CPU; opt-in via `install_h264_support.sh --with-libopenh264` — see [Software H.264/HEVC/AV1 Codec Support](../../admin/installation.md#software-h264hevcav1-codec-support-advanced)). * - `use_hwaccel` - - Enable when using GPU encoders like `h264_nvenc`. + - Enable when using `h264_nvenc`. Not valid with `libvpx-vp9` or `libopenh264`. * - `encoder_threads` - CPU threads per worker for CPU encoders. Increase to use more CPU. * - `encode_batch_size` diff --git a/docs/curate-video/tutorials/beginner.md b/docs/curate-video/tutorials/beginner.md index acda9d4999..db96e7bf92 100644 --- a/docs/curate-video/tutorials/beginner.md +++ b/docs/curate-video/tutorials/beginner.md @@ -46,7 +46,7 @@ flowchart LR - **Data units**: Input videos → clip windows → frames → embeddings + files. - **Common choices**: - **Splitting**: fixed stride vs. scene-change (TransNetV2) - - **Encoding**: `libopenh264`, `h264_nvenc`, or `libx264` + - **Encoding**: `h264_nvenc` (NVENC-equipped GPU) or `libvpx-vp9` (CPU fallback for non-NVENC GPUs such as A100/H100) - **Embeddings**: Cosmos-Embed1 - **Outputs**: Clips (mp4), previews (optional), and parquet embeddings for downstream tasks (such as semantic duplicate removal). @@ -109,7 +109,7 @@ pipeline.add_stage( ::::{tab-set} -:::{tab-item} Fixed stride +:::{tab-item} Fixed Stride ```python pipeline.add_stage( @@ -124,7 +124,7 @@ pipeline.add_stage( ::: -:::{tab-item} TransNetV2 (scene change) +:::{tab-item} TransNetV2 (Scene Change) ```python from nemo_curator.stages.video.clipping.video_frame_extraction import VideoFrameExtractionStage @@ -152,16 +152,16 @@ pipeline.add_stage( ### Encode Clips -Convert clip buffers to H.264 using the selected encoder and settings. Refer to [Clip Encoding](video-process-transcoding) for encoder choices and NVENC setup. +Convert clip buffers using the selected encoder and settings. Choose `h264_nvenc` on NVENC-equipped GPUs or `libvpx-vp9` (CPU) on GPUs without NVENC such as A100/H100. Refer to [Clip Encoding](video-process-transcoding) for encoder details and NVENC setup. ```python pipeline.add_stage( ClipTranscodingStage( num_cpus_per_worker=6.0, - encoder="libopenh264", + encoder="h264_nvenc", # or "libvpx-vp9" for non-NVENC GPUs encoder_threads=1, encode_batch_size=16, - use_hwaccel=False, + use_hwaccel=True, use_input_bit_rate=False, num_clips_per_chunk=32, verbose=True, diff --git a/docs/curate-video/tutorials/split-dedup.md b/docs/curate-video/tutorials/split-dedup.md index 2147d0bddd..d1d5d93c21 100644 --- a/docs/curate-video/tutorials/split-dedup.md +++ b/docs/curate-video/tutorials/split-dedup.md @@ -22,7 +22,7 @@ Learn how to run the splitting pipeline to generate clips and embeddings, then r ## Before You Start -- Complete the [Get Started guide](gs-video). +- Complete the {ref}`Get Started with Video Curation ` guide. --- @@ -38,7 +38,7 @@ python tutorials/video/getting-started/video_split_clip_example.py \ --splitting-algorithm fixed_stride \ --fixed-stride-split-duration 10.0 \ --embedding-algorithm cosmos-embed1-224p \ - --transcode-encoder libopenh264 \ + --transcode-encoder h264_nvenc \ --verbose ``` @@ -60,7 +60,7 @@ The pipeline writes embeddings to Parquet with two columns: ::::{tab-set} -:::{tab-item} Directory layout +:::{tab-item} Directory Layout ```text $OUT_DIR/ @@ -80,7 +80,7 @@ embedding: list # length = 768 for Cosmos-Embed1 ::: -:::{tab-item} Sample row +:::{tab-item} Sample Row ```json {"id": "a3f2b0c1-7d64-4a1e-9f2b-8b0f6d1e2c33", "embedding": [0.0123, -0.0456, 0.0031, 0.1279]} @@ -88,7 +88,7 @@ embedding: list # length = 768 for Cosmos-Embed1 ::: -:::{tab-item} Read example +:::{tab-item} Read Example ```python import pyarrow.parquet as pq @@ -149,9 +149,9 @@ pipe.add_stage( pipe.run() ``` -`which_to_keep` selects the representative within each cluster: "hard" keeps outliers far from the centroid, "easy" keeps the nearest to the centroid, and "random" ignores distance and picks randomly. +`which_to_keep` selects the representative within each cluster: `hard` keeps outliers far from the centroid, `easy` keeps the nearest to the centroid, and `random` ignores distance and picks randomly. -`sim_metric` sets the distance used for similarity: "cosine" uses cosine distance (1 − cosine similarity), while "l2" uses Euclidean distance. +`sim_metric` sets the distance used for similarity: `cosine` uses cosine distance (1 − cosine similarity), while `l2` uses Euclidean distance. `pairwise_batch_size` controls how many items are processed per GPU batch during pairwise similarity; larger values can be faster but require more GPU memory. diff --git a/docs/get-started/video.md b/docs/get-started/video.md index 74cac57df4..9c8dc02dce 100644 --- a/docs/get-started/video.md +++ b/docs/get-started/video.md @@ -58,9 +58,9 @@ To use NeMo Curator's video curation capabilities, ensure your system meets thes - Reduced configuration (lower batch sizes, FP8): ~21GB VRAM #### Software Dependencies -* **FFmpeg 8.0+** with H.264 encoding support - - GPU encoder: `h264_nvenc` (recommended for performance) - - CPU encoders: `libopenh264` or `libx264` (fallback options) +* **FFmpeg 8.0+** with one of the following encoders: + - GPU encoder: `h264_nvenc` (recommended for performance; requires an NVENC-equipped GPU — note that A100 and H100 do **not** include NVENC) + - CPU encoder: `libvpx-vp9` (for non-NVENC GPUs; produces VP9 in `.mp4`) :::{tip} If `uv` is not installed, refer to the [Installation Guide](../admin/installation.md) for setup instructions, or install it quickly with: @@ -122,13 +122,13 @@ For details on container environments and configurations, see [Container Environ ## Install FFmpeg and Encoders -Curator’s video pipelines rely on `FFmpeg` for decoding and encoding. If you plan to encode clips (for example, using `--transcode-encoder libopenh264` or `h264_nvenc`), install `FFmpeg` with the corresponding encoders. +Curator’s video pipelines rely on `FFmpeg` for decoding and encoding. If you plan to encode clips (using `--transcode-encoder h264_nvenc` or `--transcode-encoder libvpx-vp9`), install `FFmpeg` with NVENC and `libvpx-vp9` support. The maintained install script bundles both. ::::{tab-set} :::{tab-item} Debian/Ubuntu (Script) -Use the maintained script in the repository to build and install `FFmpeg` with `libopenh264` and NVIDIA NVENC support. The script enables `--enable-libopenh264`, `--enable-cuda-nvcc`, and `--enable-libnpp`. +Use the maintained script in the repository to build and install `FFmpeg` with NVIDIA NVENC and `libvpx-vp9` support. The script enables `--enable-cuda-nvcc`, `--enable-libnpp`, and `--enable-libvpx`. - Script source: [docker/common/install_ffmpeg.sh](https://github.com/NVIDIA-NeMo/Curator/blob/main/docker/common/install_ffmpeg.sh) @@ -146,7 +146,7 @@ Confirm that `FFmpeg` is on your `PATH` and that at least one H.264 encoder is a ```bash ffmpeg -hide_banner -version | head -n 5 -ffmpeg -encoders | grep -E "h264_nvenc|libopenh264|libx264" | cat +ffmpeg -encoders | grep -E "h264_nvenc|libvpx-vp9" | cat ``` If encoders are missing, reinstall `FFmpeg` with the required options or use the Debian/Ubuntu script above. @@ -155,11 +155,24 @@ If encoders are missing, reinstall `FFmpeg` with the required options or use the :::: +```{important} +**Processing H.264/HEVC/AV1 inputs? You might still need a software decoder — even with NVENC/NVDEC.** + +Curator runs `ffprobe` inside CPU-only Ray actors (`VideoReader`, `ClipWriter`) for metadata extraction. Those actors can't open NVDEC decoders, so without a software h264/hevc/av1 decoder your inputs are silently skipped (`SoftwareCodecMissingError` in the logs). + +Run the bundled installer inside the container to add software decoder support — no image rebuild needed: + +```bash +bash /opt/Curator/docker/common/install_h264_support.sh +``` + +See [Software H.264/HEVC/AV1 Codec Support](../admin/installation.md#software-h264hevcav1-codec-support-advanced) for the full picture. +``` + Refer to [Clip Encoding](video-process-transcoding) to choose encoders and verify NVENC support on your system. ### Available Models - Embeddings convert each video clip into a numeric vector that captures visual and semantic content. Curator uses these vectors to: - Remove near-duplicate clips during duplicate removal @@ -221,7 +234,7 @@ Organize input videos and output locations before running the pipeline. ## Run the Splitting Pipeline Example -Use the example script from https://github.com/NVIDIA-NeMo/Curator/tree/main/tutorials/video/getting-started to read videos, split into clips, and write outputs. This runs a Ray pipeline with `XennaExecutor` under the hood. +Use the [example script](https://github.com/NVIDIA-NeMo/Curator/tree/main/tutorials/video/getting-started) to read videos, split into clips, and write outputs. This runs a Ray pipeline with `XennaExecutor` under the hood. ```bash python tutorials/video/getting-started/video_split_clip_example.py \ @@ -231,7 +244,7 @@ python tutorials/video/getting-started/video_split_clip_example.py \ --splitting-algorithm fixed_stride \ --fixed-stride-split-duration 10.0 \ --embedding-algorithm cosmos-embed1-224p \ - --transcode-encoder libopenh264 \ + --transcode-encoder h264_nvenc \ --verbose ``` @@ -239,7 +252,7 @@ python tutorials/video/getting-started/video_split_clip_example.py \ 1. Reads all video files from `$DATA_DIR` 2. Splits each video into 10-second clips using fixed stride 3. Generates embeddings using Cosmos-Embed1-224p model -4. Encodes clips using libopenh264 codec +4. Encodes clips using h264_nvenc codec 5. Writes output clips and metadata to `$OUT_DIR` ```{tip} @@ -250,8 +263,8 @@ python tutorials/video/getting-started/video_split_clip_example.py \ --splitting-algorithm fixed_stride --fixed-stride-split-duration 10.0 --embedding-algorithm cosmos-embed1-224p - --transcode-encoder libopenh264' > my_config.txt - + --transcode-encoder h264_nvenc' > my_config.txt + python tutorials/video/getting-started/video_split_clip_example.py @my_config.txt ``` @@ -266,14 +279,14 @@ python tutorials/video/getting-started/video_split_clip_example.py \ | **Embedding** | | `--embedding-algorithm` | `cosmos-embed1-224p`, `cosmos-embed1-336p`, `cosmos-embed1-448p` | Embedding model to use | | **Encoding** | -| `--transcode-encoder` | `h264_nvenc`, `libopenh264`, `libx264` | Video encoder for output clips | -| `--transcode-use-hwaccel` | Flag | Enable hardware acceleration for encoding | +| `--transcode-encoder` | `h264_nvenc`, `libvpx-vp9`, `libopenh264` | Video encoder for output clips. Use `libvpx-vp9` (CPU) on GPUs without NVENC such as A100/H100. `libopenh264` is opt-in — run `install_h264_support.sh --with-libopenh264` inside the container or provide a system FFmpeg that includes it. See [Software H.264/HEVC/AV1 Codec Support](../admin/installation.md#software-h264hevcav1-codec-support-advanced). | +| `--transcode-use-hwaccel` | Flag | Enable hardware acceleration for encoding (only valid with `h264_nvenc`). | | **Optional Features** | | `--generate-captions` | Flag | Generate text captions for each clip | | `--generate-previews` | Flag | Create preview images for each clip | | `--verbose` | Flag | Enable detailed logging output | -### Understanding Pipeline Output +### Understand Pipeline Output After successful execution, the output directory will contain: diff --git a/nemo_curator/stages/video/clipping/clip_extraction_stages.py b/nemo_curator/stages/video/clipping/clip_extraction_stages.py index 6814f384b7..21cb31c5ea 100644 --- a/nemo_curator/stages/video/clipping/clip_extraction_stages.py +++ b/nemo_curator/stages/video/clipping/clip_extraction_stages.py @@ -14,6 +14,7 @@ import copy import pathlib +import shutil import subprocess import uuid from dataclasses import dataclass @@ -29,13 +30,29 @@ from nemo_curator.utils import grouping from nemo_curator.utils.operation_utils import make_pipeline_temporary_dir +SUPPORTED_ENCODERS = ("h264_nvenc", "libvpx-vp9", "libopenh264") + +_BYO_H264_DOCS_URL = ( + "https://github.com/NVIDIA-NeMo/Curator/blob/main/docs/admin/installation.md" + "#software-h264hevcav1-codec-support-advanced" +) + @dataclass class ClipTranscodingStage(ProcessingStage[VideoTask, VideoTask]): """Stage that transcodes video clips into a standardized format. - This stage handles the conversion of video clips using FFmpeg, supporting both - software (libx264, libopenh264) and hardware (NVENC) encoding with configurable parameters. + This stage handles the conversion of video clips using FFmpeg. Supported + encoders: + + - ``h264_nvenc`` — hardware H.264 via NVENC (recommended; requires an + NVENC-equipped NVIDIA GPU — note that A100/H100 do not include NVENC). + - ``libvpx-vp9`` — royalty-free VP9 software encoder (CPU fallback for + non-NVENC GPUs). Significantly slower; emits a perf advisory. + - ``libopenh264`` — H.264 software encoder. Not bundled with Curator's + FFmpeg build for licensing reasons; users must install it themselves. + The stage probes for it at setup time and raises a clear error pointing + to the docs if it is not available. Args: num_cpus_per_worker: Number of CPUs per worker. @@ -43,7 +60,7 @@ class ClipTranscodingStage(ProcessingStage[VideoTask, VideoTask]): encoder_threads: Number of threads per encoder. encode_batch_size: Number of clips to encode in parallel. nb_streams_per_gpu: Number of streams per GPU. - use_hwaccel: Whether to use hardware acceleration. + use_hwaccel: Whether to use hardware acceleration. Only valid with `h264_nvenc`. use_input_bit_rate: Whether to use input video bit rate. num_clips_per_chunk: Number of clips per chunk. If the number of clips is larger than this, the clips will be split into chunks, and created VideoTasks for each chunk. verbose: Whether to print verbose logs. @@ -51,7 +68,7 @@ class ClipTranscodingStage(ProcessingStage[VideoTask, VideoTask]): """ num_cpus_per_worker: float = 6.0 - encoder: str = "libx264" + encoder: str = "h264_nvenc" encoder_threads: int = 1 encode_batch_size: int = 16 nb_streams_per_gpu: int = 3 @@ -69,9 +86,44 @@ def setup(self, worker_metadata: WorkerMetadata | None = None) -> None: # noqa: Args: worker_metadata (WorkerMetadata, optional): Information about the worker (provided by some backends) """ - if self.encoder not in {"libopenh264", "libx264", "h264_nvenc"}: - error_msg = f"Expected encoder of `libopenh264`, `libx264`, or `h264_nvenc`. Got {self.encoder}" + if self.encoder not in SUPPORTED_ENCODERS: + error_msg = f"Expected encoder in {SUPPORTED_ENCODERS}. Got {self.encoder}" raise ValueError(error_msg) + if self.encoder == "libvpx-vp9" and self.use_hwaccel: + error_msg = "use_hwaccel is not supported with libvpx-vp9 (CPU encoder)" + raise ValueError(error_msg) + if self.encoder == "libopenh264": + self._verify_libopenh264_available() + + @staticmethod + def _verify_libopenh264_available() -> None: + """Probe the local FFmpeg build for libopenh264 support.""" + ffmpeg_bin = shutil.which("ffmpeg") + if ffmpeg_bin is None: + error_msg = ( + "Could not find `ffmpeg` on PATH while verifying libopenh264 support. " + f"Install FFmpeg and ensure it is on PATH. See {_BYO_H264_DOCS_URL}" + ) + raise RuntimeError(error_msg) + try: + result = subprocess.run( # noqa: S603 + [ffmpeg_bin, "-hide_banner", "-encoders"], + capture_output=True, + text=True, + check=False, + timeout=10, + ) + except subprocess.TimeoutExpired as e: + error_msg = f"`ffmpeg -encoders` timed out while verifying libopenh264 support. See {_BYO_H264_DOCS_URL}" + raise RuntimeError(error_msg) from e + if "libopenh264" not in result.stdout: + error_msg = ( + "encoder='libopenh264' was requested but the local FFmpeg build " + "does not include it. Curator does not ship libopenh264 due to " + "its patent-license redistribution model. To enable it, install " + f"a libopenh264-enabled FFmpeg yourself — see {_BYO_H264_DOCS_URL}" + ) + raise RuntimeError(error_msg) def __post_init__(self) -> None: """Post-initialization method called after all fields are set.""" @@ -84,6 +136,14 @@ def __post_init__(self) -> None: else: self.resources = Resources(cpus=self.num_cpus_per_worker) + if self.encoder == "libvpx-vp9": + logger.warning( + "ClipTranscodingStage: libvpx-vp9 is significantly slower than " + "h264_nvenc and libopenh264. If your GPU has NVENC, prefer " + "encoder='h264_nvenc'. To use libopenh264 instead, see " + f"{_BYO_H264_DOCS_URL}" + ) + def inputs(self) -> tuple[list[str], list[str]]: return ["data"], ["source_bytes"] @@ -232,11 +292,8 @@ def _add_decoder_threads(self, command: list[str]) -> None: def _add_hwaccel_options(self, command: list[str]) -> None: """Add hardware acceleration options to command.""" - if self.use_hwaccel: - if self.encoder == "h264_nvenc": - command.extend(["-hwaccel", "cuda", "-hwaccel_output_format", "cuda"]) - else: - command.extend(["-hwaccel", "auto"]) + if self.use_hwaccel and self.encoder == "h264_nvenc": + command.extend(["-hwaccel", "cuda", "-hwaccel_output_format", "cuda"]) def _add_input_options(self, command: list[str], clip: Clip, video_filename: str, index: int) -> None: """Add input options to command.""" @@ -263,6 +320,8 @@ def _add_video_encoding_options(self, command: list[str], use_bit_rate: str | No if self.encoder == "h264_nvenc": self._add_nvenc_options(command, force_pix_fmt) + elif self.encoder == "libvpx-vp9": + self._add_libvpx_vp9_options(command, use_bit_rate, force_pix_fmt) def _add_nvenc_options(self, command: list[str], force_pix_fmt: bool) -> None: """Add NVENC-specific encoding options.""" @@ -288,6 +347,28 @@ def _add_nvenc_options(self, command: list[str], force_pix_fmt: bool) -> None: if force_pix_fmt: command.extend(["-pix_fmt", "yuv420p"]) + def _add_libvpx_vp9_options(self, command: list[str], use_bit_rate: str | None, force_pix_fmt: bool) -> None: + """Add libvpx-vp9 (CPU) encoding options.""" + # Constant-quality mode when no explicit bitrate is requested. + # libvpx-vp9 requires `-b:v 0` to honor `-crf` exactly. + if use_bit_rate is None: + command.extend(["-b:v", "0", "-crf", "31"]) + command.extend( + [ + "-deadline", + "good", + "-cpu-used", + "4", + "-row-mt", + "1", + "-tile-columns", + "2", + ] + ) + + if force_pix_fmt: + command.extend(["-pix_fmt", "yuv420p"]) + def _add_output_options(self, command: list[str], clip: Clip, index: int) -> None: """Add output options to command.""" # Add encoder threads diff --git a/nemo_curator/stages/video/filtering/motion_vector_backend.py b/nemo_curator/stages/video/filtering/motion_vector_backend.py index 6f73c07596..f1707cf577 100644 --- a/nemo_curator/stages/video/filtering/motion_vector_backend.py +++ b/nemo_curator/stages/video/filtering/motion_vector_backend.py @@ -28,6 +28,21 @@ _MIN_SIDE_RESOLUTION = 256 +def _resolve_export_mvs_flag() -> int: + """Return the EXPORT_MVS bitflag, accepting either the PyAV >=15 lowercase + name (``export_mvs``) or the PyAV <=13 uppercase name (``EXPORT_MVS``). + + The enum member was renamed between PyAV 13 and 15. Tests for both branches + pin this contract so a future PyAV bump that renames it again surfaces as + a failed unit test rather than silently zero motion vectors at runtime. + """ + flags2 = av.codec.context.Flags2 + flag = getattr(flags2, "export_mvs", None) + if flag is None: + flag = flags2.EXPORT_MVS # type: ignore[attr-defined] + return flag + + class VideoResolutionTooSmallError(Exception): """Exception raised when video resolution is below the minimum required size. @@ -184,8 +199,8 @@ def decode_for_motion( # noqa: C901 with cast("av.container.InputContainer", av.open(video, metadata_errors="ignore")) as input_container: stream = input_container.streams.video[0] ctx = stream.codec_context - # Set this flag to return motion vectors - ctx.flags2 |= av.codec.context.Flags2.EXPORT_MVS + # Request motion-vector side data from the decoder. + ctx.flags2 |= _resolve_export_mvs_flag() ctx.thread_type = av.codec.context.ThreadType.AUTO ctx.thread_count = thread_count mv_data = [] diff --git a/nemo_curator/stages/video/io/video_reader.py b/nemo_curator/stages/video/io/video_reader.py index b78145474f..0606c0c5c0 100644 --- a/nemo_curator/stages/video/io/video_reader.py +++ b/nemo_curator/stages/video/io/video_reader.py @@ -25,6 +25,7 @@ from nemo_curator.tasks.file_group import FileGroupTask from nemo_curator.tasks.video import Video, VideoTask from nemo_curator.utils.client_utils import FSPath, is_remote_url +from nemo_curator.utils.decoder_utils import SoftwareCodecMissingError @dataclass @@ -172,6 +173,9 @@ def _extract_and_validate_metadata(self, video: Video) -> bool: """ try: video.populate_metadata() + except SoftwareCodecMissingError as e: + logger.error(f"Skipping {video.input_video}: software codec missing for this stage. {e}") + return False except Exception as e: # noqa: BLE001 logger.warning(f"Failed to extract metadata for {video.input_video}: {e}") return False diff --git a/nemo_curator/utils/decoder_utils.py b/nemo_curator/utils/decoder_utils.py index 3d3441f251..b4bd645db8 100644 --- a/nemo_curator/utils/decoder_utils.py +++ b/nemo_curator/utils/decoder_utils.py @@ -38,6 +38,61 @@ from nemo_curator.utils.operation_utils import make_pipeline_named_temporary_file +class SoftwareCodecMissingError(RuntimeError): + """Raised when ffprobe fails to open a stream because the FFmpeg build lacks + a software decoder for the input codec (Curator's default image ships + NVDEC-only h264/hevc/av1 decoders). + + Carries the detected codec name (e.g. ``"h264"``) so callers can produce a + targeted, actionable message. + """ + + def __init__(self, message: str, codec: str | None = None) -> None: + super().__init__(message) + self.codec = codec + + +# MP4 sample-description FOURCCs for codecs that the default Curator image +# can only decode via NVDEC. Used by the heuristic header sniff below. +_MP4_GPU_ONLY_CODEC_TAGS: dict[bytes, str] = { + b"avc1": "h264", + b"avc3": "h264", + b"hev1": "hevc", + b"hvc1": "hevc", + b"av01": "av1", +} + + +def _detect_codec_from_mp4_header(path: Path, *, scan_bytes: int = 1_048_576) -> str | None: + """Heuristically detect the video codec of an MP4/MOV file by scanning its + first ``scan_bytes`` for known sample-description FOURCC tags. + + This avoids invoking ffprobe (which is what's failing) and is intentionally + permissive: a substring match in the header is enough for diagnostic + purposes. Returns the codec name or ``None`` if no known tag was found or + the file could not be read. + """ + try: + with Path(path).open("rb") as fh: + head = fh.read(scan_bytes) + except OSError: + return None + for tag, codec in _MP4_GPU_ONLY_CODEC_TAGS.items(): + if tag in head: + return codec + return None + + +# Substrings in ffprobe's stderr that indicate the failure was a codec/CUDA +# initialization problem rather than e.g. a missing/corrupt file. +_CODEC_OPEN_FAILURE_SIGNALS: tuple[str, ...] = ( + "CUDA_ERROR_NO_DEVICE", + "no CUDA-capable device", + "Failed loading nvcuvid", + "Cannot load libnvcuvid", +) + + class Resolution(NamedTuple): """Container for video frame dimensions. @@ -112,7 +167,7 @@ def to_str(self) -> str: return f"{self.extraction_policy!s}-{int(self.target_fps * 1000)}" -def extract_video_metadata(video: str | bytes) -> VideoMetadata: +def extract_video_metadata(video: str | bytes) -> VideoMetadata: # noqa: C901, PLR0912 """Extract metadata from a video file using ffprobe. Args: @@ -142,7 +197,22 @@ def extract_video_metadata(video: str | bytes) -> VideoMetadata: error_msg = f"{real_video_path} not found!" raise FileNotFoundError(error_msg) cmd.append(real_video_path.as_posix()) - result = subprocess.run(cmd, input=inp, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) # noqa: UP022, S603 + try: + result = subprocess.run(cmd, input=inp, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) # noqa: UP022, S603 + except subprocess.CalledProcessError as e: + stderr = (e.stderr or b"").decode(errors="replace") + if any(signal in stderr for signal in _CODEC_OPEN_FAILURE_SIGNALS): + codec = _detect_codec_from_mp4_header(real_video_path) + msg = ( + f"ffprobe could not open the video codec for {real_video_path}" + f"{f' (detected {codec})' if codec else ''}. " + "The container's ffprobe ships NVDEC-only h264/hevc/av1 decoders, " + "and no GPU is visible to this stage. To process h264/hevc/av1 inputs, " + "install full ffmpeg inside the container with:\n" + " bash /opt/Curator/docker/common/install_h264_support.sh" + ) + raise SoftwareCodecMissingError(msg, codec=codec) from e + raise video_info = json.loads(result.stdout) video_stream, audio_codec = None, None diff --git a/pyproject.toml b/pyproject.toml index a0ab3fac3e..a501f5b91b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -213,7 +213,7 @@ text_cuda12 = [ # Video Curation Dependencies video_cpu = [ - "av==13.1.0", + "av==15.1.0", "opencv-python", "torchvision", "einops", @@ -316,6 +316,10 @@ managed = true default-groups = ["dev", "test"] index-strategy = "unsafe-best-match" no-build-isolation-package = ["flash-attn"] +# NOTE: PyAV is forced to source-build only inside the Docker image (so it +# links against the same FFmpeg as /usr/local/bin/ffmpeg). On bare runners +# without FFmpeg 7+ headers the source build fails, so the constraint lives +# in docker/Dockerfile's `uv sync --no-binary-package av` instead of here. constraint-dependencies = [ "aiohttp>=3.13.3", # Addresses CVE GHSA-6mq8-rvhq-8wgg "cryptography>=46.0.6", # Address CVE GHSA-m959-cc7f-wv43 diff --git a/tests/stages/video/clipping/test_clip_transcoding_stage.py b/tests/stages/video/clipping/test_clip_transcoding_stage.py index 35a054adf5..0aadd0f7e2 100644 --- a/tests/stages/video/clipping/test_clip_transcoding_stage.py +++ b/tests/stages/video/clipping/test_clip_transcoding_stage.py @@ -22,7 +22,6 @@ import pytest from nemo_curator.backends.base import WorkerMetadata -from nemo_curator.stages.resources import Resources from nemo_curator.stages.video.clipping.clip_extraction_stages import ClipTranscodingStage from nemo_curator.tasks.video import Clip, Video, VideoMetadata, VideoTask @@ -46,7 +45,7 @@ def setup_method(self) -> None: """Set up test fixtures.""" self.stage = ClipTranscodingStage( num_cpus_per_worker=4.0, - encoder="libx264", + encoder="h264_nvenc", encoder_threads=2, encode_batch_size=8, use_hwaccel=False, @@ -99,9 +98,71 @@ def test_setup_invalid_encoder(self) -> None: """Test setup with invalid encoder raises ValueError.""" stage = ClipTranscodingStage(encoder="invalid_encoder") - with pytest.raises(ValueError, match="Expected encoder of"): + with pytest.raises(ValueError, match="Expected encoder in"): stage.setup() + def test_setup_libvpx_vp9_valid(self) -> None: + """Test setup accepts libvpx-vp9 as a valid encoder.""" + stage = ClipTranscodingStage(encoder="libvpx-vp9", use_hwaccel=False) + # Should not raise + stage.setup() + + def test_setup_libvpx_vp9_with_hwaccel_raises(self) -> None: + """Test setup rejects libvpx-vp9 combined with use_hwaccel=True.""" + stage = ClipTranscodingStage(encoder="libvpx-vp9", use_hwaccel=True) + + with pytest.raises(ValueError, match="use_hwaccel is not supported with libvpx-vp9"): + stage.setup() + + @patch("nemo_curator.stages.video.clipping.clip_extraction_stages.subprocess.run") + @patch("nemo_curator.stages.video.clipping.clip_extraction_stages.shutil.which") + def test_setup_libopenh264_available_passes(self, mock_which: MagicMock, mock_run: MagicMock) -> None: + """libopenh264 is accepted when the local FFmpeg build advertises it.""" + mock_which.return_value = "/usr/local/bin/ffmpeg" + mock_run.return_value = MagicMock(stdout="V..... libopenh264 OpenH264 H.264", returncode=0) + stage = ClipTranscodingStage(encoder="libopenh264") + stage.setup() # should not raise + mock_run.assert_called_once() + # Verify the probe used `ffmpeg -hide_banner -encoders` with the resolved path + cmd = mock_run.call_args.args[0] + assert cmd[0] == "/usr/local/bin/ffmpeg" + assert "-encoders" in cmd + + @patch("nemo_curator.stages.video.clipping.clip_extraction_stages.subprocess.run") + @patch("nemo_curator.stages.video.clipping.clip_extraction_stages.shutil.which") + def test_setup_libopenh264_unavailable_raises(self, mock_which: MagicMock, mock_run: MagicMock) -> None: + """libopenh264 raises a clear error when the local FFmpeg build lacks it.""" + mock_which.return_value = "/usr/local/bin/ffmpeg" + mock_run.return_value = MagicMock(stdout="V..... h264_nvenc NVIDIA NVENC", returncode=0) + stage = ClipTranscodingStage(encoder="libopenh264") + with pytest.raises(RuntimeError, match=r"libopenh264.*does not include it"): + stage.setup() + + @patch("nemo_curator.stages.video.clipping.clip_extraction_stages.shutil.which") + def test_setup_libopenh264_ffmpeg_missing_raises(self, mock_which: MagicMock) -> None: + """A missing FFmpeg binary surfaces as a RuntimeError pointing to the docs.""" + mock_which.return_value = None + stage = ClipTranscodingStage(encoder="libopenh264") + with pytest.raises(RuntimeError, match=r"Could not find `ffmpeg` on PATH"): + stage.setup() + + def test_post_init_libvpx_vp9_emits_perf_warning(self) -> None: + """Constructing a libvpx-vp9 stage logs the perf advisory.""" + import nemo_curator.stages.video.clipping.clip_extraction_stages as ces + + with patch.object(ces, "logger") as mock_logger: + ClipTranscodingStage(encoder="libvpx-vp9") + mock_logger.warning.assert_called_once() + assert "libvpx-vp9 is significantly slower" in mock_logger.warning.call_args[0][0] + + def test_post_init_h264_nvenc_no_perf_warning(self) -> None: + """h264_nvenc construction does not trigger the VP9 perf advisory.""" + import nemo_curator.stages.video.clipping.clip_extraction_stages as ces + + with patch.object(ces, "logger") as mock_logger: + ClipTranscodingStage(encoder="h264_nvenc") + assert not any("libvpx-vp9 is significantly slower" in str(c) for c in mock_logger.warning.call_args_list) + def test_ray_stage_spec(self) -> None: """Test that ray_stage_spec returns the correct values.""" spec = self.stage.ray_stage_spec() @@ -112,14 +173,6 @@ def test_ray_stage_spec(self) -> None: assert RayStageSpecKeys.IS_FANOUT_STAGE in spec assert spec[RayStageSpecKeys.IS_FANOUT_STAGE] is True - def test_resources_cpu_encoder(self) -> None: - """Test resource requirements for CPU encoders.""" - stage = ClipTranscodingStage(encoder="libx264", use_hwaccel=False, num_cpus_per_worker=6.0) - - resources = stage.resources - assert isinstance(resources, Resources) - assert resources.cpus == 6.0 - def test_process_no_clips(self) -> None: """Test processing when video has no clips.""" self.mock_video.clips = [] @@ -300,6 +353,22 @@ def test_resources_fractional_gpu_allocation(self) -> None: stage_multi = ClipTranscodingStage(use_hwaccel=True, encoder="h264_nvenc", nb_streams_per_gpu=4) assert stage_multi.resources.gpus == 0.25 + def test_resources_libvpx_vp9_uses_cpu(self) -> None: + """Test that libvpx-vp9 allocates CPU resources, not GPU.""" + stage = ClipTranscodingStage(encoder="libvpx-vp9", use_hwaccel=False, num_cpus_per_worker=8.0) + assert stage.resources.cpus == 8.0 + assert stage.resources.gpus == 0 + + def test_add_hwaccel_options_libvpx_vp9_ignored(self) -> None: + """Test that hwaccel options are not added for libvpx-vp9 even if requested.""" + command: list[str] = [] + # Bypass setup-time validation by constructing without use_hwaccel, + # then assert the command builder is also defensive. + stage = ClipTranscodingStage(encoder="libvpx-vp9", use_hwaccel=False) + stage.use_hwaccel = True # simulate misconfiguration + stage._add_hwaccel_options(command) + assert "-hwaccel" not in command + def test_add_input_options(self) -> None: """Test adding input options to FFmpeg command.""" command = [] @@ -317,7 +386,7 @@ def test_add_input_options(self) -> None: assert "-map" in command assert "0:v:0" in command assert "-c:v" in command - assert "libx264" in command + assert "h264_nvenc" in command def test_add_video_encoding_options_no_bitrate(self) -> None: """Test adding video encoding options without bit rate.""" @@ -338,6 +407,50 @@ def test_add_video_encoding_options_with_bitrate(self) -> None: assert "-b:v" in command assert "5000K" in command + def test_add_video_encoding_options_libvpx_vp9_crf_mode(self) -> None: + """Test that libvpx-vp9 emits CRF-mode options when no bitrate is given.""" + stage = ClipTranscodingStage(encoder="libvpx-vp9") + command: list[str] = [] + + stage._add_video_encoding_options(command, None, False) + + # CRF mode: -b:v 0 plus -crf + assert "-b:v" in command + assert "0" in command + assert "-crf" in command + # VP9 threading/speed knobs + assert "-row-mt" in command + assert "-tile-columns" in command + assert "-deadline" in command + assert "-cpu-used" in command + # Must not contain NVENC-only flags + assert "-rc:v" not in command + assert "-cq:v" not in command + assert "-tune" not in command + + def test_add_video_encoding_options_libvpx_vp9_with_bitrate(self) -> None: + """Test that libvpx-vp9 honors an explicit bitrate (skips CRF mode).""" + stage = ClipTranscodingStage(encoder="libvpx-vp9") + command: list[str] = [] + + stage._add_video_encoding_options(command, "5000K", False) + + # Bitrate path: -b:v 5000K, no -crf + assert "5000K" in command + assert "-crf" not in command + # Threading/speed knobs still present + assert "-row-mt" in command + + def test_add_video_encoding_options_libvpx_vp9_force_pix_fmt(self) -> None: + """Test that libvpx-vp9 forces yuv420p when force_pix_fmt is True.""" + stage = ClipTranscodingStage(encoder="libvpx-vp9") + command: list[str] = [] + + stage._add_video_encoding_options(command, None, True) + + assert "-pix_fmt" in command + assert "yuv420p" in command + def test_add_output_options(self) -> None: """Test adding output options to FFmpeg command.""" command = [] diff --git a/tests/stages/video/filtering/test_motion_vector_backend.py b/tests/stages/video/filtering/test_motion_vector_backend.py index bef19b8fe5..ba8d889f75 100644 --- a/tests/stages/video/filtering/test_motion_vector_backend.py +++ b/tests/stages/video/filtering/test_motion_vector_backend.py @@ -13,12 +13,15 @@ # limitations under the License. import io +import types from unittest.mock import Mock, patch +import av import numpy as np import pytest import torch +from nemo_curator.stages.video.filtering import motion_vector_backend from nemo_curator.stages.video.filtering.motion_vector_backend import ( DecodedData, MotionInfo, @@ -260,12 +263,13 @@ def test_successful_decode(self): mock_open.return_value = mock_container - with patch("av.sidedata.sidedata.Type.MOTION_VECTORS", create=True): - result = decode_for_motion(mock_video) + # PyAV >= 15 ships av.sidedata.sidedata.Type.MOTION_VECTORS as a real enum + # member, so we no longer need (and cannot) patch it in. + result = decode_for_motion(mock_video) - assert isinstance(result, DecodedData) - assert len(result.frames) == 1 - assert result.frame_size == torch.Size([480, 640, 3]) + assert isinstance(result, DecodedData) + assert len(result.frames) == 1 + assert result.frame_size == torch.Size([480, 640, 3]) def test_no_motion_vectors(self): """Test decode with no motion vectors.""" @@ -356,11 +360,33 @@ def test_custom_parameters(self): mock_open.return_value = mock_container - with patch("av.sidedata.sidedata.Type.MOTION_VECTORS", create=True): - result = decode_for_motion(mock_video, thread_count=8, target_fps=5.0, target_duration_ratio=0.3) + # PyAV >= 15 ships av.sidedata.sidedata.Type.MOTION_VECTORS as a real enum + # member, so we no longer need (and cannot) patch it in. + result = decode_for_motion(mock_video, thread_count=8, target_fps=5.0, target_duration_ratio=0.3) + + assert isinstance(result, DecodedData) + assert result.frame_size == torch.Size([480, 640, 3]) + + +class TestResolveExportMvsFlag: + """Pin PyAV API-drift compat for the EXPORT_MVS bitflag. + + PyAV <=13 exposed it as ``Flags2.EXPORT_MVS``; PyAV >=15 renamed it to the + lowercase ``Flags2.export_mvs``. A future rename would silently produce + zero motion vectors at runtime; these tests catch it as a unit-test failure. + """ + + def test_prefers_lowercase_export_mvs(self, monkeypatch: pytest.MonkeyPatch) -> None: + # PyAV >=15 path: only the lowercase name exists. + fake_flags2 = types.SimpleNamespace(export_mvs=42) + monkeypatch.setattr(av.codec.context, "Flags2", fake_flags2) + assert motion_vector_backend._resolve_export_mvs_flag() == 42 - assert isinstance(result, DecodedData) - assert result.frame_size == torch.Size([480, 640, 3]) + def test_falls_back_to_uppercase_export_mvs(self, monkeypatch: pytest.MonkeyPatch) -> None: + # PyAV <=13 path: only the uppercase name exists. + fake_flags2 = types.SimpleNamespace(EXPORT_MVS=84) + monkeypatch.setattr(av.codec.context, "Flags2", fake_flags2) + assert motion_vector_backend._resolve_export_mvs_flag() == 84 class TestCheckIfSmallMotion: diff --git a/tests/utils/test_decoder_utils.py b/tests/utils/test_decoder_utils.py index f8ccf0a8a0..1d2345a20c 100644 --- a/tests/utils/test_decoder_utils.py +++ b/tests/utils/test_decoder_utils.py @@ -17,6 +17,7 @@ import io import json import pathlib +import subprocess import tempfile from unittest.mock import Mock, patch @@ -27,7 +28,9 @@ FrameExtractionPolicy, FrameExtractionSignature, Resolution, + SoftwareCodecMissingError, VideoMetadata, + _detect_codec_from_mp4_header, _make_video_stream, decode_video_cpu, extract_frames, @@ -252,6 +255,114 @@ def test_extract_video_metadata_file_not_found(self) -> None: with pytest.raises(FileNotFoundError, match="not found"): extract_video_metadata(non_existent_path) + @pytest.mark.parametrize( + "stderr_signal", + [ + b"[CUDA @ 0x0] cu->cuInit(0) failed -> CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected", + b"[h264_cuvid] no CUDA-capable device is detected", + b"[h264_cuvid] Cannot load libnvcuvid.so.1", + b"[h264_cuvid] Failed loading nvcuvid.", + ], + ) + @patch("subprocess.run") + def test_extract_video_metadata_raises_software_codec_missing( + self, mock_subprocess: Mock, stderr_signal: bytes + ) -> None: + """When ffprobe fails because the codec/CUDA cannot be opened, raise + SoftwareCodecMissingError with a hint pointing at install_h264_support.sh.""" + mock_subprocess.side_effect = subprocess.CalledProcessError( + returncode=1, cmd=["ffprobe"], stderr=stderr_signal + ) + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: + # Minimal mp4 header containing an avc1 (h264) sample-description tag + # so that _detect_codec_from_mp4_header reports "h264". + tmp.write(b"\x00\x00\x00\x18ftypisom\x00\x00\x02\x00isomiso2avc1mp41") + tmp.write(b"\x00" * 64) + tmp_path = tmp.name + try: + with pytest.raises(SoftwareCodecMissingError) as excinfo: + extract_video_metadata(tmp_path) + assert excinfo.value.codec == "h264" + assert "install_h264_support.sh" in str(excinfo.value) + finally: + pathlib.Path(tmp_path).unlink() + + @pytest.mark.parametrize( + "stderr_signal", + [ + # A generic "Could not open codec" without any CUDA signal must NOT + # be remapped — common cause is a corrupt file or unsupported codec + # profile, neither of which install_h264_support.sh fixes. + b"[vp9 @ 0x0] Could not open codec for input stream 0", + b"some other generic ffprobe failure", + b"Invalid data found when processing input", + ], + ) + @patch("subprocess.run") + def test_extract_video_metadata_reraises_unrelated_failure( + self, mock_subprocess: Mock, stderr_signal: bytes + ) -> None: + """ffprobe failures unrelated to NVDEC/CUDA must not be remapped.""" + mock_subprocess.side_effect = subprocess.CalledProcessError( + returncode=1, cmd=["ffprobe"], stderr=stderr_signal + ) + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: + tmp_path = tmp.name + try: + with pytest.raises(subprocess.CalledProcessError): + extract_video_metadata(tmp_path) + finally: + pathlib.Path(tmp_path).unlink() + + +class TestSoftwareCodecMissingError: + """SoftwareCodecMissingError carries an actionable message and the detected codec.""" + + def test_inherits_runtime_error_and_carries_codec(self) -> None: + err = SoftwareCodecMissingError("oops", codec="hevc") + assert isinstance(err, RuntimeError) + assert err.codec == "hevc" + assert str(err) == "oops" + + def test_codec_defaults_to_none(self) -> None: + err = SoftwareCodecMissingError("oops") + assert err.codec is None + + +class TestDetectCodecFromMp4Header: + """_detect_codec_from_mp4_header is a heuristic FOURCC scan over the file head.""" + + @pytest.mark.parametrize( + ("tag", "expected"), + [ + (b"avc1", "h264"), + (b"avc3", "h264"), + (b"hev1", "hevc"), + (b"hvc1", "hevc"), + (b"av01", "av1"), + ], + ) + def test_detects_known_tags(self, tag: bytes, expected: str) -> None: + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: + tmp.write(b"\x00" * 32 + tag + b"\x00" * 32) + tmp_path = pathlib.Path(tmp.name) + try: + assert _detect_codec_from_mp4_header(tmp_path) == expected + finally: + tmp_path.unlink() + + def test_returns_none_for_unknown_content(self) -> None: + with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: + tmp.write(b"plain bytes with no codec FOURCC") + tmp_path = pathlib.Path(tmp.name) + try: + assert _detect_codec_from_mp4_header(tmp_path) is None + finally: + tmp_path.unlink() + + def test_returns_none_for_unreadable_path(self) -> None: + assert _detect_codec_from_mp4_header(pathlib.Path("/path/that/does/not/exist.mp4")) is None + @patch("subprocess.run") @patch("nemo_curator.utils.decoder_utils.make_pipeline_named_temporary_file") def test_extract_video_metadata_no_video_stream(self, mock_temp_file: Mock, mock_subprocess: Mock) -> None: diff --git a/tutorials/video/getting-started/README.md b/tutorials/video/getting-started/README.md index c4a4f95a4f..bafdc6d89b 100644 --- a/tutorials/video/getting-started/README.md +++ b/tutorials/video/getting-started/README.md @@ -50,10 +50,10 @@ python video_split_clip_example.py \ --transnetv2-min-length-s 2.0 \ --transnetv2-max-length-s 10.0 \ --embedding-algorithm cosmos-embed1-224p \ - --transcode-encoder libopenh264 \ + --transcode-encoder h264_nvenc \ --verbose ``` -This example demonstrates a more advanced workflow than the minimal example by using scene-aware splitting with the TransNetV2 algorithm (which detects scene boundaries instead of fixed intervals), applies the Cosmos-Embed1 embedding model to each clip, transcodes the output using the `libopenh264` encoder, and enables verbose logging for more detailed output. +This example demonstrates a more advanced workflow than the minimal example by using scene-aware splitting with the TransNetV2 algorithm (which detects scene boundaries instead of fixed intervals), applies the Cosmos-Embed1 embedding model to each clip, transcodes the output using the `h264_nvenc` encoder, and enables verbose logging for more detailed output. On GPUs without NVENC (such as A100/H100), pass `--transcode-encoder libvpx-vp9` instead — VP9 is a royalty-free CPU encoder that produces clips in the same `.mp4` container. **Full pipeline with captions and filtering**: ```bash diff --git a/tutorials/video/getting-started/video_split_clip_example.py b/tutorials/video/getting-started/video_split_clip_example.py index f1be6c19b4..ddbdf7ec95 100644 --- a/tutorials/video/getting-started/video_split_clip_example.py +++ b/tutorials/video/getting-started/video_split_clip_example.py @@ -13,6 +13,10 @@ # limitations under the License. import argparse +import re +import shutil +import subprocess +import sys from nemo_curator.backends.xenna import XennaExecutor from nemo_curator.pipeline import Pipeline @@ -234,7 +238,57 @@ def create_video_splitting_pipeline(args: argparse.Namespace) -> Pipeline: # no return pipeline +# Encoders that produce h264 clip output. ClipWriter's metadata extraction +# runs ffprobe in a CPU-only Ray actor, so it needs a software h264 decoder +# (NVDEC-only h264 won't work without GPU visibility in that actor). +_H264_PRODUCING_ENCODERS = frozenset({"h264_nvenc", "libopenh264"}) + +# Matches the ` V..... h264 ` row in `ffmpeg -decoders`, excluding `h264_cuvid` etc. +_H264_SW_DECODER_LINE = re.compile(r"^\s+V\S*\s+h264\s") + + +def _h264_software_decoder_available() -> bool: + if shutil.which("ffmpeg") is None: + return False + try: + out = subprocess.run( + ["ffmpeg", "-hide_banner", "-decoders"], # noqa: S607 + check=True, + capture_output=True, + text=True, + timeout=10, + ).stdout + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError): + return False + return any(_H264_SW_DECODER_LINE.match(line) for line in out.splitlines()) + + +def _preflight_check_h264_decoder(encoder: str) -> None: + """Fail-fast if the chosen transcode encoder produces h264 but the system + ffmpeg lacks a software h264 decoder — ClipWriter would otherwise crash on + every transcoded clip in a CPU-only Ray actor. + """ + if encoder not in _H264_PRODUCING_ENCODERS: + return + if _h264_software_decoder_available(): + return + msg = ( + f"\nERROR: --transcode-encoder={encoder} produces h264 clips, but the " + "container's ffmpeg does not include a software h264 decoder.\n" + "ClipWriter's metadata extraction (ffprobe in a CPU-only Ray actor) " + "will fail on every transcoded clip.\n\n" + "Fix one of:\n" + " 1. Install software h264/hevc/av1 decoders inside the container:\n" + " bash /opt/Curator/docker/common/install_h264_support.sh\n" + " 2. Pick a transcode encoder whose output codec the system ffmpeg " + "can software-decode (e.g. --transcode-encoder libvpx-vp9).\n" + ) + print(msg, file=sys.stderr) + sys.exit(2) + + def main(args: argparse.Namespace) -> None: + _preflight_check_h264_decoder(args.transcode_encoder) pipeline = create_video_splitting_pipeline(args) # Print pipeline description @@ -382,9 +436,15 @@ def create_video_splitting_argparser() -> argparse.ArgumentParser: # noqa: PLR0 parser.add_argument( "--transcode-encoder", type=str, - default="libopenh264", - choices=["libopenh264", "h264_nvenc", "libx264"], - help="Codec for transcoding clips; None to skip transcoding.", + default="h264_nvenc", + choices=["h264_nvenc", "libvpx-vp9", "libopenh264"], + help=( + "Codec for transcoding clips. Use `h264_nvenc` on NVENC-equipped GPUs; " + "use `libvpx-vp9` (CPU) as a royalty-free fallback on GPUs without NVENC " + "such as A100/H100; `libopenh264` is accepted but requires a user-" + "installed FFmpeg build (Curator does not ship it — see the " + "Bring-Your-Own H.264 docs)." + ), ) parser.add_argument( "--transcode-encoder-threads", diff --git a/uv.lock b/uv.lock index fd786a09ae..557e349df7 100644 --- a/uv.lock +++ b/uv.lock @@ -570,28 +570,31 @@ wheels = [ [[package]] name = "av" -version = "13.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0c/9d/486d31e76784cc0ad943f420c5e05867263b32b37e2f4b0f7f22fdc1ca3a/av-13.1.0.tar.gz", hash = "sha256:d3da736c55847d8596eb8c26c60e036f193001db3bc5c10da8665622d906c17e", size = 3957908, upload-time = "2024-10-06T04:54:57.507Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/39/54/c4227080c9700384db90072ace70d89b6a288b3748bd2ec0e32580a49e7f/av-13.1.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:867385e6701464a5c95903e24d2e0df1c7e0dbf211ed91d0ce639cd687373e10", size = 24255112, upload-time = "2024-10-06T04:52:48.49Z" }, - { url = "https://files.pythonhosted.org/packages/32/4a/eb9348231655ca99b200b380f4edbceff7358c927a285badcc84b18fb1c9/av-13.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cb7a3f319401a46b0017771268ff4928501e77cf00b1a2aa0721e20b2fd1146e", size = 19467930, upload-time = "2024-10-06T04:52:52.118Z" }, - { url = "https://files.pythonhosted.org/packages/14/c7/48c80252bdbc3a75a54dd205a7fab8f613914009b9e5416202757208e040/av-13.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad904f860147bceaca65b0d3174a8153f35c570d465161d210f1879970b15559", size = 32207671, upload-time = "2024-10-06T04:52:55.82Z" }, - { url = "https://files.pythonhosted.org/packages/f9/66/3332c7fa8c43b65680a94f279ea3e832b5500de3a1392bac6112881e984b/av-13.1.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a906e017b29d0eb80d9ccf7a98d19268122da792dbb68eb741cfebba156e6aed", size = 31520911, upload-time = "2024-10-06T04:52:59.231Z" }, - { url = "https://files.pythonhosted.org/packages/e5/bb/2e03acb9b27591d97f700a3a6c27cfd1bc53fa148177747eda8a70cca1e9/av-13.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ce894d7847897da7be63277a0875bd93c51327134ac226c67978de014c7979f", size = 34048399, upload-time = "2024-10-06T04:53:03.934Z" }, - { url = "https://files.pythonhosted.org/packages/85/44/527aa3b65947d42cfe829326026edf0cd1a8c459390076034be275616c36/av-13.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:384bcdb5fc3238a263a5a25cc9efc690859fa4148cc4b07e00fae927178db22a", size = 25779569, upload-time = "2024-10-06T04:53:07.582Z" }, - { url = "https://files.pythonhosted.org/packages/9b/aa/4bdd8ce59173574fc6e0c282c71ee6f96fca82643d97bf172bc4cb5a5674/av-13.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:261dbc3f4b55f4f8f3375b10b2258fca7f2ab7a6365c01bc65e77a0d5327a195", size = 24268674, upload-time = "2024-10-06T04:53:11.251Z" }, - { url = "https://files.pythonhosted.org/packages/17/b4/b267dd5bad99eed49ec6731827c6bcb5ab03864bf732a7ebb81e3df79911/av-13.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:83d259ef86b9054eb914bc7c6a7f6092a6d75cb939295e70ee979cfd92a67b99", size = 19475617, upload-time = "2024-10-06T04:53:13.832Z" }, - { url = "https://files.pythonhosted.org/packages/68/32/4209e51f54d7b54a1feb576d309c671ed1ff437b54fcc4ec68c239199e0a/av-13.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3b4d3ca159eceab97e3c0fb08fe756520fb95508417f76e48198fda2a5b0806", size = 32468873, upload-time = "2024-10-06T04:53:17.639Z" }, - { url = "https://files.pythonhosted.org/packages/b6/d8/c174da5f06b24f3c9e36f91fd02a7411c39da9ce792c17964260d4be675e/av-13.1.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40e8f757e373b73a2dc4640852a00cce4a4a92ef19b2e642a96d6994cd1fffbf", size = 31818484, upload-time = "2024-10-06T04:53:21.509Z" }, - { url = "https://files.pythonhosted.org/packages/7f/22/0dd8d1d5cad415772bb707d16aea8b81cf75d340d11d3668eea43468c730/av-13.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8aaec2c0bfd024359db3821d679009d4e637e1bee0321d20f61c54ed6b20f41", size = 34398652, upload-time = "2024-10-06T04:53:25.798Z" }, - { url = "https://files.pythonhosted.org/packages/7b/ff/48fa68888b8d5bae36d915556ff18f9e5fdc6b5ff5ae23dc4904c9713168/av-13.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:5ea0deab0e6a739cb742fba2a3983d8102f7516a3cdf3c46669f3cac0ed1f351", size = 25781343, upload-time = "2024-10-06T04:53:29.577Z" }, - { url = "https://files.pythonhosted.org/packages/82/6e/cdce12e534570df37d3fdcb3a74851d39e9ab79d388f3174dea9785a011a/av-13.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:47642ebaebfe20519b2391bd5b7c38b596efcd052bfd09c8d33058f94ddd0fd6", size = 24229340, upload-time = "2024-10-06T04:53:33.25Z" }, - { url = "https://files.pythonhosted.org/packages/7c/88/5359aeada9ea509426f2db63b6531833824a1b02470667b103479ddea7ae/av-13.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2f079c2daa3ae06557b3f6e9bed4fb9c876e8012175bec645ccd007199a302db", size = 19436445, upload-time = "2024-10-06T04:53:36.573Z" }, - { url = "https://files.pythonhosted.org/packages/b4/d4/64995e5b800476c86dae4ea1444a0eac44e2c4985fac6401b08401e2df11/av-13.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f0de8252deeeb1887637e88d4d9d18514e5cfe276bdb9e6ca8e9eef89d1667a", size = 32120549, upload-time = "2024-10-06T04:53:39.752Z" }, - { url = "https://files.pythonhosted.org/packages/68/76/9910694cf87d2d308d851f5b2b5c5b20f7f55411f596e2c158fb13bf84a3/av-13.1.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9ad0024f4def11b0cedfeee478fa6c6fd7ed3955e13387e0f27261fdda6121b4", size = 31495305, upload-time = "2024-10-06T04:53:43.661Z" }, - { url = "https://files.pythonhosted.org/packages/6a/a8/cd92de947b9595a0eb2c64e6f7ba295aac2687972050ae092173c2f6ea0c/av-13.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb88e2590eaed45233eb117f1dfab1a43ed9a997b2c46da9f08468dd00f14895", size = 34065325, upload-time = "2024-10-06T04:53:47.25Z" }, - { url = "https://files.pythonhosted.org/packages/9d/d0/9869fcbd66422df2033d4b78a663e3c64aa6fe7eb9189c811d60f69d9871/av-13.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:c927e4fa4f6aeed4340b3e3b16b237d7cb743e5c1a55b92307407590ca4112aa", size = 25754728, upload-time = "2024-10-06T04:53:50.603Z" }, +version = "15.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e9/c3/83e6e73d1592bc54436eae0bc61704ae0cff0c3cfbde7b58af9ed67ebb49/av-15.1.0.tar.gz", hash = "sha256:39cda2dc810e11c1938f8cb5759c41d6b630550236b3365790e67a313660ec85", size = 3774192, upload-time = "2025-08-30T04:41:56.076Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/58/4e44cf6939be7aba96a4abce024e1be11ba7539ecac74d09369b8c03aa05/av-15.1.0-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:b785948762a8d45fc58fc24a20251496829ace1817e9a7a508a348d6de2182c3", size = 21767323, upload-time = "2025-08-30T04:39:37.989Z" }, + { url = "https://files.pythonhosted.org/packages/9b/f6/a946544cdb49f6d892d2761b1d61a8bc6ce912fe57ba06769bdc640c0a7f/av-15.1.0-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:9c7131494a3a318612b4ee4db98fe5bc50eb705f6b6536127c7ab776c524fd8b", size = 26946268, upload-time = "2025-08-30T04:39:40.601Z" }, + { url = "https://files.pythonhosted.org/packages/70/7c/b33513c0af73d0033af59a98f035b521c5b93445a6af7e9efbf41a6e8383/av-15.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2b9623ae848625c59213b610c8665817924f913580c7c5c91e0dc18936deb00d", size = 38062118, upload-time = "2025-08-30T04:39:43.928Z" }, + { url = "https://files.pythonhosted.org/packages/5e/95/31b7fb34f9fea7c7389240364194f4f56ad2d460095038cc720f50a90bb3/av-15.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c8ef597087db560514617143532b1fafc4825ebb2dda9a22418f548b113a0cc7", size = 39571086, upload-time = "2025-08-30T04:39:47.109Z" }, + { url = "https://files.pythonhosted.org/packages/e7/b0/7b0b45474a4e90c35c11d0032947d8b3c7386872957ce29c6f12add69a74/av-15.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:08eac47a90ebae1e2bd5935f400dd515166019bab4ff5b03c4625fa6ac3a0a5e", size = 40112634, upload-time = "2025-08-30T04:39:50.981Z" }, + { url = "https://files.pythonhosted.org/packages/aa/04/038b94bc9a1ee10a451c867d4a2fc91e845f83bfc2dae9df25893abcb57f/av-15.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d3f66ff200ea166e606cb3c5cb1bd2fc714effbec2e262a5d67ce60450c8234a", size = 40878695, upload-time = "2025-08-30T04:39:54.493Z" }, + { url = "https://files.pythonhosted.org/packages/1d/3d/9f8f96c0deeaaf648485a3dbd1699b2f0580f2ce8a36cb616c0138ba7615/av-15.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:57b99544d91121b8bea570e4ddf61700f679a6b677c1f37966bc1a22e1d4cd5c", size = 31335683, upload-time = "2025-08-30T04:39:57.861Z" }, + { url = "https://files.pythonhosted.org/packages/d1/58/de78b276d20db6ffcd4371283df771721a833ba525a3d57e753d00a9fe79/av-15.1.0-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:40c5df37f4c354ab8190c6fd68dab7881d112f527906f64ca73da4c252a58cee", size = 21760991, upload-time = "2025-08-30T04:40:00.801Z" }, + { url = "https://files.pythonhosted.org/packages/56/cc/45f85775304ae60b66976360d82ba5b152ad3fd91f9267d5020a51e9a828/av-15.1.0-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:af455ce65ada3d361f80c90c810d9bced4db5655ab9aa513024d6c71c5c476d5", size = 26953097, upload-time = "2025-08-30T04:40:03.998Z" }, + { url = "https://files.pythonhosted.org/packages/f3/f8/2d781e5e71d02fc829487e775ccb1185e72f95340d05f2e84eb57a11e093/av-15.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:86226d2474c80c3393fa07a9c366106029ae500716098b72b3ec3f67205524c3", size = 38319710, upload-time = "2025-08-30T04:40:07.701Z" }, + { url = "https://files.pythonhosted.org/packages/ac/13/37737ef2193e83862ccacff23580c39de251da456a1bf0459e762cca273c/av-15.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:11326f197e7001c4ca53a83b2dbc67fd39ddff8cdf62ce6be3b22d9f3f9338bd", size = 39915519, upload-time = "2025-08-30T04:40:11.066Z" }, + { url = "https://files.pythonhosted.org/packages/26/e9/e8032c7b8f2a4129a03f63f896544f8b7cf068e2db2950326fa2400d5c47/av-15.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a631ea879cc553080ee62874f4284765c42ba08ee0279851a98a85e2ceb3cc8d", size = 40286166, upload-time = "2025-08-30T04:40:14.561Z" }, + { url = "https://files.pythonhosted.org/packages/e2/23/612c0fd809444d04b8387a2dfd942ccc77829507bd78a387ff65a9d98c24/av-15.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8f383949b010c3e731c245f80351d19dc0c08f345e194fc46becb1cb279be3ff", size = 41150592, upload-time = "2025-08-30T04:40:17.951Z" }, + { url = "https://files.pythonhosted.org/packages/15/74/6f8e38a3b0aea5f28e72813672ff45b64615f2c69e6a4a558718c95edb9f/av-15.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:d5921aa45f4c1f8c1a8d8185eb347e02aa4c3071278a2e2dd56368d54433d643", size = 31336093, upload-time = "2025-08-30T04:40:21.393Z" }, + { url = "https://files.pythonhosted.org/packages/2e/bc/78b2ffa8235eeffc29aa4a8cc47b02e660cfec32f601f39a00975fb06d0e/av-15.1.0-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:2f77853c3119c59d1bff4214ccbe46e3133eccff85ed96adee51c68684443f4e", size = 21726244, upload-time = "2025-08-30T04:40:24.14Z" }, + { url = "https://files.pythonhosted.org/packages/1a/99/66d69453a2dce028e6e8ebea085d90e880aac03d3a3ab7d8ec16755ffd75/av-15.1.0-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:c0bc4471c156a0a1c70a607502434f477bc8dfe085eef905e55b4b0d66bcd3a5", size = 26918663, upload-time = "2025-08-30T04:40:27.557Z" }, + { url = "https://files.pythonhosted.org/packages/fa/51/1a7dfbeda71f2772bc46d758af0e7fab1cc8388ce4bc7f24aecbc4bfd764/av-15.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:37839d4fa1407f047af82560dfc0f94d8d6266071eff49e1cbe16c4483054621", size = 38041408, upload-time = "2025-08-30T04:40:30.811Z" }, + { url = "https://files.pythonhosted.org/packages/d7/97/2c4e0288ad4359b6064cb06ae79c2ff3a84ac73d27e91f2161b75fcd86fa/av-15.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:729179cd8622815e8b6f6854d13a806fe710576e08895c77e5e4ad254609de9a", size = 39642563, upload-time = "2025-08-30T04:40:34.617Z" }, + { url = "https://files.pythonhosted.org/packages/ea/94/2362502149e276d00957edabcc201a5f4d5109a8a7b4fd30793714a532f3/av-15.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4abdf085bfa4eec318efccff567831b361ea56c045cc38366811552e3127c665", size = 40022119, upload-time = "2025-08-30T04:40:37.703Z" }, + { url = "https://files.pythonhosted.org/packages/df/58/1a0ce1b3835d9728da0a7a54aeffaa0a2b1a88405eaed9322efd55212a54/av-15.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f985661644879e4520d28a995fcb2afeb951bc15a1d51412eb8e5f36da85b6fe", size = 40885158, upload-time = "2025-08-30T04:40:40.952Z" }, + { url = "https://files.pythonhosted.org/packages/30/e6/054bb64e424d90b77ed5fc6a7358e4013fb436154c998fc90a89a186313f/av-15.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:7d7804a44c8048bb4b014a99353dd124663a12cd1d4613ba2bd3b457c3b1d539", size = 31312256, upload-time = "2025-08-30T04:40:44.224Z" }, ] [[package]] @@ -5494,7 +5497,7 @@ requires-dist = [ { name = "ai-dynamo", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin' and extra == 'inference-server'", specifier = "==1.1.0" }, { name = "aiohttp", marker = "extra == 'translation-nmt'", specifier = ">=3.13.3" }, { name = "albumentations", marker = "extra == 'interleaved-cpu'" }, - { name = "av", marker = "extra == 'video-cpu'", specifier = "==13.1.0" }, + { name = "av", marker = "extra == 'video-cpu'", specifier = "==15.1.0" }, { name = "beautifulsoup4", marker = "extra == 'text-cpu'" }, { name = "boto3", marker = "extra == 'inference-server'", specifier = ">=1.35" }, { name = "boto3", marker = "extra == 'math-cpu'", specifier = ">=1.35" },