diff --git a/benchmarks/commit0/run_infer.py b/benchmarks/commit0/run_infer.py index 36af180a5..90e964911 100644 --- a/benchmarks/commit0/run_infer.py +++ b/benchmarks/commit0/run_infer.py @@ -24,7 +24,7 @@ construct_eval_output_dir, get_default_on_result_writer, ) -from benchmarks.utils.image_utils import create_docker_workspace, image_exists +from benchmarks.utils.image_utils import create_docker_workspace, remote_image_exists from benchmarks.utils.llm_config import load_llm_config from benchmarks.utils.models import ( EvalInstance, @@ -213,7 +213,7 @@ def prepare_workspace( f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-{custom_tag}{suffix}" ) - if not image_exists(agent_server_image): + if not remote_image_exists(agent_server_image): raise RuntimeError( f"Agent server image {agent_server_image} does not exist in container registry. " "Run 'benchmarks/commit0/build_images.py --push' to build and push it first." diff --git a/benchmarks/gaia/run_infer.py b/benchmarks/gaia/run_infer.py index fa0317a8e..36e472cbf 100644 --- a/benchmarks/gaia/run_infer.py +++ b/benchmarks/gaia/run_infer.py @@ -27,7 +27,7 @@ get_default_on_result_writer, ) from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response -from benchmarks.utils.image_utils import create_docker_workspace, image_exists +from benchmarks.utils.image_utils import create_docker_workspace, remote_image_exists from benchmarks.utils.llm_config import load_llm_config from benchmarks.utils.models import EvalInstance, EvalMetadata, EvalOutput from benchmarks.utils.version import SDK_SHORT_SHA @@ -182,7 +182,7 @@ def prepare_workspace( f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-gaia-binary" ) - if not image_exists(agent_server_image): + if not remote_image_exists(agent_server_image): raise RuntimeError( f"Agent server image {agent_server_image} does not exist in container registry. " f"Run 'benchmarks/gaia/build_images.py --push' to build and push it first." diff --git a/benchmarks/multiswebench/run_infer.py b/benchmarks/multiswebench/run_infer.py index ec5137a2d..833fcdcc4 100644 --- a/benchmarks/multiswebench/run_infer.py +++ b/benchmarks/multiswebench/run_infer.py @@ -26,7 +26,7 @@ get_default_on_result_writer, ) from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response -from benchmarks.utils.image_utils import image_exists +from benchmarks.utils.image_utils import remote_image_exists from benchmarks.utils.llm_config import load_llm_config from benchmarks.utils.models import ( EvalInstance, @@ -234,7 +234,7 @@ def prepare_workspace( agent_server_image = ( f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-{custom_tag}{suffix}" ) - if not image_exists(agent_server_image): + if not remote_image_exists(agent_server_image): raise RuntimeError( f"Agent server image {agent_server_image} does not exist in container registry, " "make sure to build, push it, and make it public accessible before using remote workspace." diff --git a/benchmarks/swebench/build_images.py b/benchmarks/swebench/build_images.py index cae96b871..5ace5419b 100644 --- a/benchmarks/swebench/build_images.py +++ b/benchmarks/swebench/build_images.py @@ -22,7 +22,7 @@ run_docker_build_layer, ) from benchmarks.utils.dataset import get_dataset -from benchmarks.utils.image_utils import image_exists +from benchmarks.utils.image_utils import remote_image_exists from openhands.sdk import get_logger @@ -94,7 +94,7 @@ def wrap_image(agent_image: str, push: bool = False) -> BuildOutput: For pushes, verify the base tag exists in the registry. For local builds, assume the tag is available locally or resolvable by Docker during buildx. """ - if push and not image_exists(agent_image): + if push and not remote_image_exists(agent_image): return BuildOutput( base_image=agent_image, tags=[], diff --git a/benchmarks/swebench/run_infer.py b/benchmarks/swebench/run_infer.py index 231de93e9..5021db16e 100644 --- a/benchmarks/swebench/run_infer.py +++ b/benchmarks/swebench/run_infer.py @@ -26,7 +26,7 @@ get_default_on_result_writer, ) from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response -from benchmarks.utils.image_utils import image_exists +from benchmarks.utils.image_utils import remote_image_exists from benchmarks.utils.llm_config import load_llm_config from benchmarks.utils.models import ( EvalInstance, @@ -195,7 +195,7 @@ def prepare_workspace( agent_server_image = ( f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-{custom_tag}{suffix}" ) - if not image_exists(agent_server_image): + if not remote_image_exists(agent_server_image): raise RuntimeError( f"Agent server image {agent_server_image} does not exist in container registry, " "make sure to build, push it, and make it public accessible before using remote workspace." diff --git a/benchmarks/swebenchmultimodal/run_infer.py b/benchmarks/swebenchmultimodal/run_infer.py index 6a1b533a7..fdf5382ad 100644 --- a/benchmarks/swebenchmultimodal/run_infer.py +++ b/benchmarks/swebenchmultimodal/run_infer.py @@ -24,7 +24,7 @@ get_default_on_result_writer, ) from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response -from benchmarks.utils.image_utils import image_exists +from benchmarks.utils.image_utils import remote_image_exists from benchmarks.utils.llm_config import load_llm_config from benchmarks.utils.models import ( EvalInstance, @@ -187,7 +187,7 @@ def prepare_workspace( agent_server_image = ( f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-{custom_tag}{suffix}" ) - if not image_exists(agent_server_image): + if not remote_image_exists(agent_server_image): raise RuntimeError( f"Agent server image {agent_server_image} does not exist in container registry, " "make sure to build, push it, and make it public accessible before using remote workspace." diff --git a/benchmarks/swefficiency/run_infer.py b/benchmarks/swefficiency/run_infer.py index cf5a902d2..f559d7f6e 100644 --- a/benchmarks/swefficiency/run_infer.py +++ b/benchmarks/swefficiency/run_infer.py @@ -21,7 +21,7 @@ get_default_on_result_writer, ) from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response -from benchmarks.utils.image_utils import image_exists +from benchmarks.utils.image_utils import remote_image_exists from benchmarks.utils.models import ( EvalInstance, EvalMetadata, @@ -248,7 +248,7 @@ def prepare_workspace( remote_agent_image = ( f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-{custom_tag}{suffix}" ) - if not image_exists(remote_agent_image): + if not remote_image_exists(remote_agent_image): raise RuntimeError( f"Agent server image {remote_agent_image} does not exist in container registry, " "make sure to build, push it, and make it public accessible before using remote workspace." diff --git a/benchmarks/swtbench/build_eval_env_images.py b/benchmarks/swtbench/build_eval_env_images.py index fde30ed9c..2f0ea9862 100644 --- a/benchmarks/swtbench/build_eval_env_images.py +++ b/benchmarks/swtbench/build_eval_env_images.py @@ -12,7 +12,7 @@ from benchmarks.swtbench.config import EVAL_DEFAULTS from benchmarks.swtbench.image_utils import ensure_swt_bench_repo from benchmarks.utils.dataset import get_dataset -from benchmarks.utils.image_utils import image_exists as remote_image_exists +from benchmarks.utils.image_utils import remote_image_exists from openhands.sdk import get_logger diff --git a/benchmarks/swtbench/run_infer.py b/benchmarks/swtbench/run_infer.py index 36b78f265..132949427 100644 --- a/benchmarks/swtbench/run_infer.py +++ b/benchmarks/swtbench/run_infer.py @@ -18,7 +18,7 @@ get_default_on_result_writer, ) from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response -from benchmarks.utils.image_utils import create_docker_workspace, image_exists +from benchmarks.utils.image_utils import create_docker_workspace, remote_image_exists from benchmarks.utils.llm_config import load_llm_config from benchmarks.utils.models import ( EvalInstance, @@ -188,7 +188,7 @@ def prepare_workspace( agent_server_image = ( f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-{custom_tag}{suffix}" ) - if not image_exists(agent_server_image): + if not remote_image_exists(agent_server_image): raise RuntimeError( f"Agent server image {agent_server_image} does not exist in container registry, " "make sure to build, push it, and make it public accessible before using remote workspace." diff --git a/benchmarks/utils/build_utils.py b/benchmarks/utils/build_utils.py index df5c10fa7..f61110729 100644 --- a/benchmarks/utils/build_utils.py +++ b/benchmarks/utils/build_utils.py @@ -27,7 +27,7 @@ maybe_reset_buildkit, ) from benchmarks.utils.constants import EVAL_AGENT_SERVER_IMAGE -from benchmarks.utils.image_utils import image_exists, local_image_exists +from benchmarks.utils.image_utils import local_image_exists, remote_image_exists from openhands.agent_server.docker.build import BuildOptions, TargetType, build from openhands.sdk import get_logger @@ -300,7 +300,7 @@ def build_image( ) for t in opts.all_tags: # Check if image exists or not - if image_exists(t): + if remote_image_exists(t): logger.info("Image %s already exists. Skipping build.", t) return BuildOutput(base_image=base_image, tags=[t], error=None) tags = build(opts) diff --git a/benchmarks/utils/image_utils.py b/benchmarks/utils/image_utils.py index bbbecb74f..467074cb9 100644 --- a/benchmarks/utils/image_utils.py +++ b/benchmarks/utils/image_utils.py @@ -120,12 +120,13 @@ def create_docker_workspace( ) -def image_exists( +def remote_image_exists( image_ref: str, gh_username: str | None = None, gh_pat: str | None = None, # GitHub PAT with read:packages for private GHCR docker_token: str | None = None, # Docker Hub JWT if you already have one ) -> bool: + """Check if a Docker image exists in a remote registry.""" registry, repo, ref = _parse(image_ref) headers = {"Accept": ACCEPT} @@ -167,5 +168,5 @@ def image_exists( gh_user = sys.argv[2] if len(sys.argv) > 2 else None gh_pat = sys.argv[3] if len(sys.argv) > 3 else None - ok = image_exists(image, gh_username=gh_user, gh_pat=gh_pat) + ok = remote_image_exists(image, gh_username=gh_user, gh_pat=gh_pat) print(f"{image} -> {'✅ exists' if ok else '❌ not found or unauthorized'}")