Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
from apache_beam.ml.inference.base import RunInference
from apache_beam.testing.test_pipeline import TestPipeline

pytest.importorskip("vertexai", reason="Vertex AI dependencies not available")

# pylint: disable=ungrouped-imports
try:
from apache_beam.examples.inference import vertex_ai_image_classification
Expand All @@ -53,8 +55,8 @@
_INVOKE_OUTPUT_DIR = "gs://apache-beam-ml/testing/outputs/vertex_invoke"


@pytest.mark.vertex_ai_postcommit
class VertexAIInference(unittest.TestCase):
@pytest.mark.vertex_ai_postcommit
def test_vertex_ai_run_flower_image_classification(self):
output_file = '/'.join([_OUTPUT_DIR, str(uuid.uuid4()), 'output.txt'])

Expand All @@ -73,7 +75,6 @@ def test_vertex_ai_run_flower_image_classification(self):
test_pipeline.get_full_options_as_args(**extra_opts))
self.assertEqual(FileSystems().exists(output_file), True)

@pytest.mark.vertex_ai_postcommit
@unittest.skipIf(
not _INVOKE_ENDPOINT_ID,
"Invoke endpoint not configured. Set _INVOKE_ENDPOINT_ID.")
Expand Down
17 changes: 9 additions & 8 deletions sdks/python/apache_beam/ml/rag/embeddings/vertex_ai_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import unittest
import zlib

import pytest

import apache_beam as beam
from apache_beam.ml.rag.types import Chunk
from apache_beam.ml.rag.types import Content
Expand All @@ -33,16 +35,19 @@
from apache_beam.testing.util import assert_that
from apache_beam.testing.util import equal_to

pytest.importorskip("vertexai", reason="Vertex AI dependencies not available")

# pylint: disable=ungrouped-imports
try:
import vertexai # pylint: disable=unused-import

from apache_beam.ml.rag.embeddings.vertex_ai import VertexAIImageEmbeddings
from apache_beam.ml.rag.embeddings.vertex_ai import VertexAITextEmbeddings
from apache_beam.ml.rag.embeddings.vertex_ai import _create_image_adapter
VERTEX_AI_AVAILABLE = True
except ImportError:
VERTEX_AI_AVAILABLE = False
VertexAIImageEmbeddings = None # type: ignore
VertexAITextEmbeddings = None # type: ignore
_create_image_adapter = None # type: ignore


def chunk_approximately_equals(expected, actual):
Expand All @@ -58,8 +63,7 @@ def chunk_approximately_equals(expected, actual):
all(isinstance(x, float) for x in actual.embedding.dense_embedding))


@unittest.skipIf(
not VERTEX_AI_AVAILABLE, "Vertex AI dependencies not available")
@pytest.mark.vertex_ai_postcommit
class VertexAITextEmbeddingsTest(unittest.TestCase):
def setUp(self):
self.artifact_location = tempfile.mkdtemp(prefix='vertex_ai_')
Expand Down Expand Up @@ -113,8 +117,6 @@ def test_embedding_pipeline(self):
embeddings, equal_to(expected, equals_fn=chunk_approximately_equals))


@unittest.skipIf(
not VERTEX_AI_AVAILABLE, "Vertex AI dependencies not available")
class VertexAIImageAdapterTest(unittest.TestCase):
def test_image_adapter_missing_content(self):
adapter = _create_image_adapter()
Expand Down Expand Up @@ -146,8 +148,7 @@ def test_image_adapter_output(self):
self.assertEqual(result[0].embedding.dense_embedding, [0.1, 0.2, 0.3])


@unittest.skipIf(
not VERTEX_AI_AVAILABLE, "Vertex AI dependencies not available")
@pytest.mark.vertex_ai_postcommit
class VertexAIImageEmbeddingsTest(unittest.TestCase):
def setUp(self):
self.artifact_location = tempfile.mkdtemp(prefix='vertex_ai_img_')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,15 @@
import unittest
import uuid

import pytest

import apache_beam as beam
from apache_beam.ml.inference.base import RunInference
from apache_beam.ml.transforms import base
from apache_beam.ml.transforms.base import MLTransform

pytest.importorskip("vertexai", reason="Vertex AI dependencies not available")

# pylint: disable=ungrouped-imports
# isort: off
try:
Expand Down Expand Up @@ -58,8 +62,7 @@
model_name: str = "text-embedding-005"


@unittest.skipIf(
VertexAITextEmbeddings is None, 'Vertex AI Python SDK is not installed.')
@pytest.mark.vertex_ai_postcommit
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that adding
vertex_ai_postcommit would make these tests run on Dataflow, instead of Beam direct runner:

"collect": "vertex_ai_postcommit" ,

Was that your intent?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes that is intentional as these vertex AI tests are integration tests and kept it to run in the dataflow vertexAIInferenceTest suite rather than the standard DirectRunner unit test jobs and the vertex_ai_postcommit marker makes sure the dataflow gradle task collects them and at the same time local runs can skip them by markers if needed.

Copy link
Contributor

@tvalentyn tvalentyn Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, we typically name dataflow tests as _it_test (integration test) in the file name, in this case I'd suggest we also rename this file to vertex_ai_it_test.py

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for example, in this particular case: we already import


try:

  from vertexai.vision_models import Image
  from vertexai.vision_models import Video
  from vertexai.vision_models import VideoSegmentConfig

i would then ask - why do we also need to try to import vertexai -- doesn't a successful import of from vertexai.vision_models import Image already imply that vertexai is importable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay sure i will rename it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh the decorator runs at collection time and only checks if the sdk is there at all so the test file’s imports run when the test runs and are for the specific stuff that test needs soo the skip is one generic check as the test file does its own imports for the APIs it uses

class VertexAIEmbeddingsTest(unittest.TestCase):
def setUp(self) -> None:
self.artifact_location = tempfile.mkdtemp(prefix='_vertex_ai_test')
Expand Down Expand Up @@ -261,8 +264,7 @@ def test_mltransform_to_ptransform_with_vertex(self):
ptransform_list[i]._model_handler._underlying.model_name, model_name)


@unittest.skipIf(
VertexAIImageEmbeddings is None, 'Vertex AI Python SDK is not installed.')
@pytest.mark.vertex_ai_postcommit
class VertexAIImageEmbeddingsTest(unittest.TestCase):
def setUp(self) -> None:
self.artifact_location = tempfile.mkdtemp(prefix='_vertex_ai_image_test')
Expand Down Expand Up @@ -308,9 +310,7 @@ def _make_text_chunk(input: str) -> Chunk:
return Chunk(content=Content(text=input))


@unittest.skipIf(
VertexAIMultiModalEmbeddings is None,
'Vertex AI Python SDK is not installed.')
@pytest.mark.vertex_ai_postcommit
class VertexAIMultiModalEmbeddingsTest(unittest.TestCase):
def setUp(self) -> None:
self.artifact_location = tempfile.mkdtemp(
Expand Down
50 changes: 23 additions & 27 deletions sdks/python/test-suites/tox/py310/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -49,29 +49,9 @@ project.tasks.register("postCommitPyDep") {}
// For versions that we would like to prioritize for testing,
// for example versions released in a timeframe of last 1-2 years.

toxTask "testPy310pyarrow-9", "py310-pyarrow-9", "${posargs}"
test.dependsOn "testPy310pyarrow-9"
postCommitPyDep.dependsOn "testPy310pyarrow-9"

toxTask "testPy310pyarrow-10", "py310-pyarrow-10", "${posargs}"
test.dependsOn "testPy310pyarrow-10"
postCommitPyDep.dependsOn "testPy310pyarrow-10"

toxTask "testPy310pyarrow-11", "py310-pyarrow-11", "${posargs}"
test.dependsOn "testPy310pyarrow-11"
postCommitPyDep.dependsOn "testPy310pyarrow-11"

toxTask "testPy310pyarrow-12", "py310-pyarrow-12", "${posargs}"
test.dependsOn "testPy310pyarrow-12"
postCommitPyDep.dependsOn "testPy310pyarrow-12"

toxTask "testPy310pyarrow-13", "py310-pyarrow-13", "${posargs}"
test.dependsOn "testPy310pyarrow-13"
postCommitPyDep.dependsOn "testPy310pyarrow-13"

toxTask "testPy310pyarrow-14", "py310-pyarrow-14", "${posargs}"
test.dependsOn "testPy310pyarrow-14"
postCommitPyDep.dependsOn "testPy310pyarrow-14"
toxTask "testPy310pyarrow-6", "py310-pyarrow-6", "${posargs}"
test.dependsOn "testPy310pyarrow-6"
postCommitPyDep.dependsOn "testPy310pyarrow-6"

toxTask "testPy310pyarrow-15", "py310-pyarrow-15", "${posargs}"
test.dependsOn "testPy310pyarrow-15"
Expand All @@ -89,6 +69,26 @@ toxTask "testPy310pyarrow-18", "py310-pyarrow-18", "${posargs}"
test.dependsOn "testPy310pyarrow-18"
postCommitPyDep.dependsOn "testPy310pyarrow-18"

toxTask "testPy310pyarrow-19", "py310-pyarrow-19", "${posargs}"
test.dependsOn "testPy310pyarrow-19"
postCommitPyDep.dependsOn "testPy310pyarrow-19"

toxTask "testPy310pyarrow-20", "py310-pyarrow-20", "${posargs}"
test.dependsOn "testPy310pyarrow-20"
postCommitPyDep.dependsOn "testPy310pyarrow-20"

toxTask "testPy310pyarrow-21", "py310-pyarrow-21", "${posargs}"
test.dependsOn "testPy310pyarrow-21"
postCommitPyDep.dependsOn "testPy310pyarrow-21"

toxTask "testPy310pyarrow-22", "py310-pyarrow-22", "${posargs}"
test.dependsOn "testPy310pyarrow-22"
postCommitPyDep.dependsOn "testPy310pyarrow-22"

toxTask "testPy310pyarrow-23", "py310-pyarrow-23", "${posargs}"
test.dependsOn "testPy310pyarrow-23"
postCommitPyDep.dependsOn "testPy310pyarrow-23"

// Create a test task for each supported minor version of pandas
toxTask "testPy310pandas-14", "py310-pandas-14", "${posargs}"
test.dependsOn "testPy310pandas-14"
Expand Down Expand Up @@ -159,10 +159,6 @@ toxTask "testPy310transformers-447", "py310-transformers-447", "${posargs}"
test.dependsOn "testPy310transformers-447"
postCommitPyDep.dependsOn "testPy310transformers-447"

toxTask "testPy310transformers-448", "py310-transformers-448", "${posargs}"
test.dependsOn "testPy310transformers-448"
postCommitPyDep.dependsOn "testPy310transformers-448"

toxTask "testPy310transformers-latest", "py310-transformers-latest", "${posargs}"
test.dependsOn "testPy310transformers-latest"
postCommitPyDep.dependsOn "testPy310transformers-latest"
Expand Down
7 changes: 4 additions & 3 deletions sdks/python/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ deps =
pip==25.0.1
accelerate>=1.6.0
onnx<1.19.0
pip_pre = False
setenv =
extras = test,gcp,dataframe,ml_test
commands =
Expand Down Expand Up @@ -512,16 +513,16 @@ commands =
# Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories.
/bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_xgboost {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret'

[testenv:py{310,311}-transformers-{428,447,448,latest}]
[testenv:py{310,311}-transformers-{428,447,latest}]
deps =
# Environment dependencies are defined in the `setenv` section and installed in the `commands` section.
extras = test,gcp,ml_test
pip_pre = False
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what happens if we don't do this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it keeps tox from installing pre release versions of dependencies which tends to make the test environments more stable

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that is true; but the point of using --pre was to test upcoming releases to have a heads-up when things might start breaking before things actually break.

are test environments unstable because pip cannot resolve dependencies or for some other reasons (pre-release deps have bugs, etc.)?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if using --pre becomes too much of a hassle for us, its ok to disable, just wanted to understand the reasons.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes mostly it was pip having trouble resolving things when pre releases happening as i saw installs fail or get flaky so i turned it off to make CI more stable

extras = test
setenv =
COMMON_DEPS = tensorflow==2.12.0 protobuf==4.25.5 pip==25.0.1
# sentence-transformers 2.2.2 is the latest version that supports transformers 4.28.x
428: DEPS = sentence-transformers==2.2.2 'transformers>=4.28.0,<4.29.0' 'torch>=1.9.0,<1.14.0'
447: DEPS = 'transformers>=4.47.0,<4.48.0' 'torch>=1.9.0,<1.14.0'
455: DEPS = 'transformers>=4.55.0,<4.56.0' 'torch>=2.0.0,<2.1.0'
latest: DEPS = 'transformers>=4.55.0' 'torch>=2.0.0' 'accelerate>=1.6.0'
commands =
/bin/sh -c "pip install .[{extras}] {env:DEPS} {env:COMMON_DEPS}"
Expand Down
Loading