Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarks/commit0/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
construct_eval_output_dir,
get_default_on_result_writer,
)
from benchmarks.utils.image_utils import create_docker_workspace, image_exists
from benchmarks.utils.image_utils import create_docker_workspace, remote_image_exists
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
Expand Down Expand Up @@ -213,7 +213,7 @@ def prepare_workspace(
f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-{custom_tag}{suffix}"
)

if not image_exists(agent_server_image):
if not remote_image_exists(agent_server_image):
raise RuntimeError(
f"Agent server image {agent_server_image} does not exist in container registry. "
"Run 'benchmarks/commit0/build_images.py --push' to build and push it first."
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/gaia/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
get_default_on_result_writer,
)
from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response
from benchmarks.utils.image_utils import create_docker_workspace, image_exists
from benchmarks.utils.image_utils import create_docker_workspace, remote_image_exists
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import EvalInstance, EvalMetadata, EvalOutput
from benchmarks.utils.version import SDK_SHORT_SHA
Expand Down Expand Up @@ -182,7 +182,7 @@ def prepare_workspace(
f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-gaia-binary"
)

if not image_exists(agent_server_image):
if not remote_image_exists(agent_server_image):
raise RuntimeError(
f"Agent server image {agent_server_image} does not exist in container registry. "
f"Run 'benchmarks/gaia/build_images.py --push' to build and push it first."
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/multiswebench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
get_default_on_result_writer,
)
from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response
from benchmarks.utils.image_utils import image_exists
from benchmarks.utils.image_utils import remote_image_exists
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
Expand Down Expand Up @@ -234,7 +234,7 @@ def prepare_workspace(
agent_server_image = (
f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-{custom_tag}{suffix}"
)
if not image_exists(agent_server_image):
if not remote_image_exists(agent_server_image):
raise RuntimeError(
f"Agent server image {agent_server_image} does not exist in container registry, "
"make sure to build, push it, and make it public accessible before using remote workspace."
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/swebench/build_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
run_docker_build_layer,
)
from benchmarks.utils.dataset import get_dataset
from benchmarks.utils.image_utils import image_exists
from benchmarks.utils.image_utils import remote_image_exists
from openhands.sdk import get_logger


Expand Down Expand Up @@ -94,7 +94,7 @@ def wrap_image(agent_image: str, push: bool = False) -> BuildOutput:
For pushes, verify the base tag exists in the registry. For local builds,
assume the tag is available locally or resolvable by Docker during buildx.
"""
if push and not image_exists(agent_image):
if push and not remote_image_exists(agent_image):
return BuildOutput(
base_image=agent_image,
tags=[],
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/swebench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
get_default_on_result_writer,
)
from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response
from benchmarks.utils.image_utils import image_exists
from benchmarks.utils.image_utils import remote_image_exists
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
Expand Down Expand Up @@ -195,7 +195,7 @@ def prepare_workspace(
agent_server_image = (
f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-{custom_tag}{suffix}"
)
if not image_exists(agent_server_image):
if not remote_image_exists(agent_server_image):
raise RuntimeError(
f"Agent server image {agent_server_image} does not exist in container registry, "
"make sure to build, push it, and make it public accessible before using remote workspace."
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/swebenchmultimodal/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
get_default_on_result_writer,
)
from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response
from benchmarks.utils.image_utils import image_exists
from benchmarks.utils.image_utils import remote_image_exists
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
Expand Down Expand Up @@ -187,7 +187,7 @@ def prepare_workspace(
agent_server_image = (
f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-{custom_tag}{suffix}"
)
if not image_exists(agent_server_image):
if not remote_image_exists(agent_server_image):
raise RuntimeError(
f"Agent server image {agent_server_image} does not exist in container registry, "
"make sure to build, push it, and make it public accessible before using remote workspace."
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/swefficiency/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
get_default_on_result_writer,
)
from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response
from benchmarks.utils.image_utils import image_exists
from benchmarks.utils.image_utils import remote_image_exists
from benchmarks.utils.models import (
EvalInstance,
EvalMetadata,
Expand Down Expand Up @@ -248,7 +248,7 @@ def prepare_workspace(
remote_agent_image = (
f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-{custom_tag}{suffix}"
)
if not image_exists(remote_agent_image):
if not remote_image_exists(remote_agent_image):
raise RuntimeError(
f"Agent server image {remote_agent_image} does not exist in container registry, "
"make sure to build, push it, and make it public accessible before using remote workspace."
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/swtbench/build_eval_env_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from benchmarks.swtbench.config import EVAL_DEFAULTS
from benchmarks.swtbench.image_utils import ensure_swt_bench_repo
from benchmarks.utils.dataset import get_dataset
from benchmarks.utils.image_utils import image_exists as remote_image_exists
from benchmarks.utils.image_utils import remote_image_exists
from openhands.sdk import get_logger


Expand Down
4 changes: 2 additions & 2 deletions benchmarks/swtbench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
get_default_on_result_writer,
)
from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response
from benchmarks.utils.image_utils import create_docker_workspace, image_exists
from benchmarks.utils.image_utils import create_docker_workspace, remote_image_exists
from benchmarks.utils.llm_config import load_llm_config
from benchmarks.utils.models import (
EvalInstance,
Expand Down Expand Up @@ -188,7 +188,7 @@ def prepare_workspace(
agent_server_image = (
f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-{custom_tag}{suffix}"
)
if not image_exists(agent_server_image):
if not remote_image_exists(agent_server_image):
raise RuntimeError(
f"Agent server image {agent_server_image} does not exist in container registry, "
"make sure to build, push it, and make it public accessible before using remote workspace."
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/utils/build_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
maybe_reset_buildkit,
)
from benchmarks.utils.constants import EVAL_AGENT_SERVER_IMAGE
from benchmarks.utils.image_utils import image_exists, local_image_exists
from benchmarks.utils.image_utils import local_image_exists, remote_image_exists
from openhands.agent_server.docker.build import BuildOptions, TargetType, build
from openhands.sdk import get_logger

Expand Down Expand Up @@ -300,7 +300,7 @@ def build_image(
)
for t in opts.all_tags:
# Check if image exists or not
if image_exists(t):
if remote_image_exists(t):
logger.info("Image %s already exists. Skipping build.", t)
return BuildOutput(base_image=base_image, tags=[t], error=None)
tags = build(opts)
Expand Down
5 changes: 3 additions & 2 deletions benchmarks/utils/image_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,13 @@ def create_docker_workspace(
)


def image_exists(
def remote_image_exists(
image_ref: str,
gh_username: str | None = None,
gh_pat: str | None = None, # GitHub PAT with read:packages for private GHCR
docker_token: str | None = None, # Docker Hub JWT if you already have one
) -> bool:
"""Check if a Docker image exists in a remote registry."""
registry, repo, ref = _parse(image_ref)
headers = {"Accept": ACCEPT}

Expand Down Expand Up @@ -167,5 +168,5 @@ def image_exists(
gh_user = sys.argv[2] if len(sys.argv) > 2 else None
gh_pat = sys.argv[3] if len(sys.argv) > 3 else None

ok = image_exists(image, gh_username=gh_user, gh_pat=gh_pat)
ok = remote_image_exists(image, gh_username=gh_user, gh_pat=gh_pat)
print(f"{image} -> {'✅ exists' if ok else '❌ not found or unauthorized'}")
Loading