diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 222805e..9b75f71 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -45,6 +45,7 @@ jobs: TITILER_PGSTAC_API_CUSTOM_DOMAIN_NAME: ${{ vars.TITILER_PGSTAC_API_CUSTOM_DOMAIN_NAME }} USER_STAC_ITEM_GEN_ROLE_ARN: ${{ vars.USER_STAC_ITEM_GEN_ROLE_ARN }} USER_STAC_INBOUND_TOPIC_ARNS: ${{ vars.USER_STAC_INBOUND_TOPIC_ARNS }} + USER_STAC_COLLECTION_ID_REGISTRY: ${{ vars.USER_STAC_COLLECTION_ID_REGISTRY }} USER_STAC_STAC_API_CUSTOM_DOMAIN_NAME: ${{ vars.USER_STAC_STAC_API_CUSTOM_DOMAIN_NAME }} USER_STAC_TITILER_PGSTAC_API_CUSTOM_DOMAIN_NAME: ${{ vars.USER_STAC_TITILER_PGSTAC_API_CUSTOM_DOMAIN_NAME }} WEB_ACL_ARN: ${{ vars.WEB_ACL_ARN }} diff --git a/cdk/PgStacInfra.ts b/cdk/PgStacInfra.ts index 7717597..481b053 100644 --- a/cdk/PgStacInfra.ts +++ b/cdk/PgStacInfra.ts @@ -462,6 +462,7 @@ export class PgStacInfra extends Stack { subnetSelection: apiSubnetSelection, batchSize: 500, lambdaTimeoutSeconds: 300, + maxBatchingWindowMinutes: 5, environment: { CREATE_COLLECTIONS_IF_MISSING: "TRUE", }, @@ -503,6 +504,8 @@ export class PgStacInfra extends Stack { itemLoadTopicArn: stacLoader.topic.topicArn, roleArn: dpsStacItemGenConfig.itemGenRoleArn, inboundTopicArns: dpsStacItemGenConfig.inboundTopicArns, + userStacCollectionIdRegistry: + dpsStacItemGenConfig.userStacCollectionIdRegistry, vpc, subnetSelection: apiSubnetSelection, stage, @@ -662,6 +665,7 @@ export interface Props extends StackProps { dpsStacItemGenConfig?: { itemGenRoleArn: string; inboundTopicArns?: string[]; + userStacCollectionIdRegistry?: Record; }; addStactoolsItemGenerator?: boolean | undefined; } diff --git a/cdk/app.ts b/cdk/app.ts index d48f717..3a3e133 100644 --- a/cdk/app.ts +++ b/cdk/app.ts @@ -27,6 +27,7 @@ const { tags, titilerDataAccessRoleArn, titilerPgStacApiCustomDomainName, + userStacCollectionIdRegistry, userStacInboundTopicArns, userStacItemGenRoleArn, userStacStacApiCustomDomainName, @@ -124,6 +125,7 @@ const userInfrastructure = new PgStacInfra(app, buildStackName("userSTAC"), { dpsStacItemGenConfig: { itemGenRoleArn: userStacItemGenRoleArn, inboundTopicArns: userStacInboundTopicArns, + userStacCollectionIdRegistry, }, }), terminationProtection: false, diff --git a/cdk/config.ts b/cdk/config.ts index 42410e5..995e791 100644 --- a/cdk/config.ts +++ b/cdk/config.ts @@ -22,6 +22,7 @@ export class Config { readonly webAclArn: string; readonly userStacItemGenRoleArn: string; readonly userStacInboundTopicArns: string[] | undefined; + readonly userStacCollectionIdRegistry: Record | undefined; readonly userStacStacApiCustomDomainName: string | undefined; readonly userStacTitilerPgStacApiCustomDomainName: string | undefined; @@ -133,6 +134,21 @@ export class Config { } else { this.userStacInboundTopicArns = undefined; } + + if (process.env.USER_STAC_COLLECTION_ID_REGISTRY) { + try { + this.userStacCollectionIdRegistry = JSON.parse( + process.env.USER_STAC_COLLECTION_ID_REGISTRY, + ) as Record; + } catch (error) { + throw new Error( + `Invalid JSON format for USER_STAC_COLLECTION_ID_REGISTRY: ${error}. ` + + `Expected format: {"collection-id": ["user1", "user2"]}` + ); + } + } else { + this.userStacCollectionIdRegistry = undefined; + } } /** diff --git a/cdk/constructs/DpsStacItemGenerator/index.ts b/cdk/constructs/DpsStacItemGenerator/index.ts index 6fd15c3..9516d2e 100644 --- a/cdk/constructs/DpsStacItemGenerator/index.ts +++ b/cdk/constructs/DpsStacItemGenerator/index.ts @@ -102,6 +102,21 @@ export interface DpsStacItemGeneratorProps { readonly inboundTopicArns?: string[]; readonly roleArn: string; + /** + * Registry mapping collection ID patterns to lists of authorized usernames. + * + * When a DPS job's STAC items already carry a collection ID and the + * submitting user is listed as authorized for that collection ID pattern, + * the item's collection ID is preserved instead of being replaced with the + * deterministic ID. Keys support glob wildcards (e.g. "maap-prefix-*"). + * + * @example + * { "my-collection": ["user1", "user2"], "maap-*": ["user3"] } + * + * @default {} (all items receive the deterministic collection ID) + */ + readonly userStacCollectionIdRegistry?: Record; + /** * Deployment stage for naming resources and exports. * @@ -187,6 +202,11 @@ export class DpsStacItemGenerator extends Construct { environment: { ITEM_LOAD_TOPIC_ARN: props.itemLoadTopicArn, LOG_LEVEL: "INFO", + ...(props.userStacCollectionIdRegistry && { + USER_STAC_COLLECTION_ID_REGISTRY: JSON.stringify( + props.userStacCollectionIdRegistry, + ), + }), ...props.environment, }, }); diff --git a/cdk/constructs/DpsStacItemGenerator/runtime/pyproject.toml b/cdk/constructs/DpsStacItemGenerator/runtime/pyproject.toml index fedcdac..cf068ca 100644 --- a/cdk/constructs/DpsStacItemGenerator/runtime/pyproject.toml +++ b/cdk/constructs/DpsStacItemGenerator/runtime/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ [dependency-groups] dev = [ + "boto3>=1.42.89", "httpx>=0.28.1", "pytest>=8.3.5", "pytest-mock>=3.14.0", diff --git a/cdk/constructs/DpsStacItemGenerator/runtime/src/dps_stac_item_generator/handler.py b/cdk/constructs/DpsStacItemGenerator/runtime/src/dps_stac_item_generator/handler.py index dcb7df0..7696b9b 100644 --- a/cdk/constructs/DpsStacItemGenerator/runtime/src/dps_stac_item_generator/handler.py +++ b/cdk/constructs/DpsStacItemGenerator/runtime/src/dps_stac_item_generator/handler.py @@ -31,6 +31,31 @@ logger.addHandler(log_handler) +def _load_collection_id_registry(raw: str) -> dict[str, list[str]]: + """Parse USER_STAC_COLLECTION_ID_REGISTRY JSON string into a registry dict. + + Args: + raw: JSON string mapping collection ID patterns to lists of authorized + usernames. An empty JSON object ("{}") disables all overrides. + + Returns: + Parsed registry dict, or an empty dict if parsing fails. + """ + try: + return json.loads(raw) + except json.JSONDecodeError: + logger.warning( + "Failed to parse USER_STAC_COLLECTION_ID_REGISTRY, using empty map: %s", + raw, + ) + return {} + + +COLLECTION_ID_REGISTRY: dict[str, list[str]] = _load_collection_id_registry( + os.environ.get("USER_STAC_COLLECTION_ID_REGISTRY", "{}") +) + + def get_topic_arn() -> str: item_load_topic_arn = os.environ.get("ITEM_LOAD_TOPIC_ARN") if not item_load_topic_arn: @@ -119,7 +144,10 @@ def handler( logger.debug(f"[{message_id}] SNS Message content: {message_str}") catalog_json_key = get_catalog_json_key(message_str) - for stac_item in get_stac_items(catalog_json_key): + for stac_item in get_stac_items( + catalog_json_key, + collection_id_registry=COLLECTION_ID_REGISTRY, + ): stac_item_json = stac_item.model_dump_json() item_load_topic_arn = get_topic_arn() diff --git a/cdk/constructs/DpsStacItemGenerator/runtime/src/dps_stac_item_generator/item.py b/cdk/constructs/DpsStacItemGenerator/runtime/src/dps_stac_item_generator/item.py index 0dbe718..e135307 100644 --- a/cdk/constructs/DpsStacItemGenerator/runtime/src/dps_stac_item_generator/item.py +++ b/cdk/constructs/DpsStacItemGenerator/runtime/src/dps_stac_item_generator/item.py @@ -1,3 +1,4 @@ +import fnmatch import json import logging import re @@ -77,10 +78,49 @@ def load_met_json(bucket: str, job_output_prefix: str) -> Optional[Dict[str, str ) -def get_stac_items(catalog_json_key: str) -> Generator[Item, Any, Any]: +def is_authorized( + username: str, + collection_id: str, + registry: dict[str, list[str]], +) -> bool: + """Return True if username is authorized to publish to collection_id. + + Each key in registry is a collection ID pattern (exact string or glob + wildcard using fnmatch syntax). A user is authorized when their username + appears in the list for any pattern that matches collection_id. + + Args: + username: The DPS job submitter's username. + collection_id: The collection ID the item declares. + registry: Mapping of collection ID patterns to authorized usernames. + + Returns: + True if the username is authorized for the given collection ID. """ - Yield STAC items out of a catalog.json + for pattern, authorized_users in registry.items(): + if fnmatch.fnmatch(collection_id, pattern) and username in authorized_users: + return True + return False + + +def get_stac_items( + catalog_json_key: str, + collection_id_registry: dict[str, list[str]] | None = None, +) -> Generator[Item, Any, Any]: + """Yield STAC items out of a catalog.json. + + If collection_id_registry is provided, items whose existing collection ID + is authorized for the submitting user are published as-is. All other items + receive a deterministic collection ID derived from DPS job metadata. + + Args: + catalog_json_key: S3 URI of the catalog.json file. + collection_id_registry: Optional mapping of collection ID patterns to + lists of authorized usernames. When omitted, all items receive the + deterministic collection ID. """ + registry = collection_id_registry or {} + job_output_prefix = get_dps_output_prefix(catalog_json_key) if not job_output_prefix: raise ValueError( @@ -95,15 +135,25 @@ def get_stac_items(catalog_json_key: str) -> Generator[Item, Any, Any]: f"could not locate the .met.json file with the DPS job outputs in {job_output_prefix}" ) - collection_id = slugify( + deterministic_collection_id = slugify( COLLECTION_ID_FORMAT.format(**job_metadata), regex_pattern=r"[/\?#%& ]+" ) + username = job_metadata.get("username", "") catalog = pystac.Catalog.from_file(catalog_json_key) catalog.make_all_asset_hrefs_absolute() for item in catalog.get_all_items(): item_dict = item.to_dict() - item_dict["collection"] = collection_id + item_collection_id = item_dict.get("collection") + + if item_collection_id and is_authorized(username, item_collection_id, registry): + logger.info( + "Preserving user-specified collection %s for user %s", + item_collection_id, + username, + ) + else: + item_dict["collection"] = deterministic_collection_id yield Item(**item_dict) diff --git a/cdk/constructs/DpsStacItemGenerator/runtime/tests/test_item.py b/cdk/constructs/DpsStacItemGenerator/runtime/tests/test_item.py index 77e505a..b0350db 100644 --- a/cdk/constructs/DpsStacItemGenerator/runtime/tests/test_item.py +++ b/cdk/constructs/DpsStacItemGenerator/runtime/tests/test_item.py @@ -2,11 +2,46 @@ import pystac import pytest -from dps_stac_item_generator.item import get_stac_items -from pystac.errors import STACValidationError +from dps_stac_item_generator.item import get_stac_items, is_authorized from stac_pydantic.item import Item +class TestIsAuthorized: + """Test cases for is_authorized helper.""" + + def test_exact_match_authorized(self): + registry = {"my-collection": ["user1", "user2"]} + assert is_authorized("user1", "my-collection", registry) is True + + def test_exact_match_wrong_user(self): + registry = {"my-collection": ["user1"]} + assert is_authorized("user2", "my-collection", registry) is False + + def test_exact_match_wrong_collection(self): + registry = {"my-collection": ["user1"]} + assert is_authorized("user1", "other-collection", registry) is False + + def test_wildcard_match_authorized(self): + registry = {"maap-*": ["user3"]} + assert is_authorized("user3", "maap-sentinel-2", registry) is True + + def test_wildcard_match_wrong_user(self): + registry = {"maap-*": ["user3"]} + assert is_authorized("user1", "maap-sentinel-2", registry) is False + + def test_wildcard_no_match(self): + registry = {"maap-*": ["user1"]} + assert is_authorized("user1", "other-prefix-data", registry) is False + + def test_empty_registry(self): + assert is_authorized("user1", "any-collection", {}) is False + + def test_multiple_patterns_first_match_wins(self): + registry = {"exact-collection": ["user1"], "exact-*": ["user2"]} + assert is_authorized("user1", "exact-collection", registry) is True + assert is_authorized("user2", "exact-collection", registry) is True + + class TestGetStacItems: """Test cases for get_stac_items function.""" @@ -33,8 +68,6 @@ def mock_catalog(self): "assets": {}, "stac_extensions": [], } - item1.validate.return_value = None - item2 = MagicMock() item2.to_dict.return_value = { "type": "Feature", @@ -53,8 +86,6 @@ def mock_catalog(self): "assets": {}, "stac_extensions": [], } - item2.validate.return_value = None - catalog.get_all_items.return_value = [item1, item2] catalog.make_all_asset_hrefs_absolute.return_value = None @@ -153,25 +184,6 @@ def test_get_stac_items_load_met_json_called_correctly( "test-bucket", "2023/01/15/10/30/45/123456/" ) - def test_get_stac_items_validation_called(self, mock_catalog, mock_job_metadata): - """Test that validation is called on each item.""" - catalog_s3_key = "s3://test-bucket/2023/01/15/10/30/45/123456/catalog.json" - - with ( - patch( - "dps_stac_item_generator.item.pystac.Catalog.from_file", - return_value=mock_catalog, - ), - patch( - "dps_stac_item_generator.item.load_met_json", - return_value=mock_job_metadata, - ), - ): - list(get_stac_items(catalog_s3_key)) - - for item in mock_catalog.get_all_items.return_value: - item.validate.assert_called_once() - def test_get_stac_items_empty_catalog(self, mock_job_metadata): """Test handling of catalog with no items.""" catalog_s3_key = "s3://test-bucket/2023/01/15/10/30/45/123456/catalog.json" @@ -212,13 +224,10 @@ def test_get_stac_items_catalog_loading_failure(self, mock_job_metadata): with pytest.raises(Exception, match="Failed to load catalog"): list(get_stac_items(catalog_s3_key)) - def test_get_stac_items_validation_failure(self, mock_catalog, mock_job_metadata): - """Test handling of item validation failure.""" + def test_get_stac_items_generator_behavior(self, mock_catalog, mock_job_metadata): + """Test that get_stac_items returns a generator and yields items lazily.""" catalog_s3_key = "s3://test-bucket/2023/01/15/10/30/45/123456/catalog.json" - items = mock_catalog.get_all_items.return_value - items[0].validate.side_effect = Exception("Validation failed") - with ( patch( "dps_stac_item_generator.item.pystac.Catalog.from_file", @@ -229,12 +238,42 @@ def test_get_stac_items_validation_failure(self, mock_catalog, mock_job_metadata return_value=mock_job_metadata, ), ): - with pytest.raises(Exception, match="Validation failed"): + items_generator = get_stac_items(catalog_s3_key) + + assert hasattr(items_generator, "__iter__") + assert hasattr(items_generator, "__next__") + + items = list(items_generator) + assert len(items) == 2 + + mock_catalog.make_all_asset_hrefs_absolute.assert_called_once() + mock_catalog.get_all_items.assert_called_once() + + def test_get_stac_items_invalid_catalog_json(self, mock_job_metadata): + """Test handling of invalid catalog.json file.""" + catalog_s3_key = "s3://test-bucket/2023/01/15/10/30/45/123456/catalog.json" + + with ( + patch( + "dps_stac_item_generator.item.load_met_json", + return_value=mock_job_metadata, + ), + patch( + "dps_stac_item_generator.item.pystac.Catalog.from_file", + side_effect=Exception("Failed to parse catalog.json: invalid format"), + ), + ): + with pytest.raises( + Exception, match="Failed to parse catalog.json: invalid format" + ): list(get_stac_items(catalog_s3_key)) - def test_get_stac_items_generator_behavior(self, mock_catalog, mock_job_metadata): - """Test that get_stac_items returns a generator and yields items lazily.""" + def test_santitize_collection_id(self, mock_catalog, mock_job_metadata): + """Test that collection ID is sanitized correctly.""" catalog_s3_key = "s3://test-bucket/2023/01/15/10/30/45/123456/catalog.json" + mock_job_metadata["username"] = "user/name" + mock_job_metadata["algorithm_name"] = "algo?name" + expected_collection_id = "user-name__algo-name__0.1__test" with ( patch( @@ -246,26 +285,15 @@ def test_get_stac_items_generator_behavior(self, mock_catalog, mock_job_metadata return_value=mock_job_metadata, ), ): - items_generator = get_stac_items(catalog_s3_key) - - assert hasattr(items_generator, "__iter__") - assert hasattr(items_generator, "__next__") - - items = list(items_generator) - assert len(items) == 2 + items = list(get_stac_items(catalog_s3_key)) - mock_catalog.make_all_asset_hrefs_absolute.assert_called_once() - mock_catalog.get_all_items.assert_called_once() + for item in items: + assert item.collection == expected_collection_id - def test_get_stac_items_stac_validation_error( - self, mock_catalog, mock_job_metadata - ): - """Test handling of STACValidationError during item validation.""" + def test_authorized_collection_id_preserved(self, mock_catalog, mock_job_metadata): + """Items keep their existing collection ID when the user is authorized.""" catalog_s3_key = "s3://test-bucket/2023/01/15/10/30/45/123456/catalog.json" - - items = mock_catalog.get_all_items.return_value - validation_error_msg = "Item does not conform to STAC specification" - items[0].validate.side_effect = STACValidationError(validation_error_msg) + registry = {"test-collection": ["superman"]} with ( patch( @@ -277,36 +305,56 @@ def test_get_stac_items_stac_validation_error( return_value=mock_job_metadata, ), ): - with pytest.raises(STACValidationError, match=validation_error_msg): - list(get_stac_items(catalog_s3_key)) + items = list(get_stac_items(catalog_s3_key, collection_id_registry=registry)) - items[0].validate.assert_called_once() + for item in items: + assert item.collection == "test-collection" - def test_get_stac_items_invalid_catalog_json(self, mock_job_metadata): - """Test handling of invalid catalog.json file.""" + def test_unauthorized_collection_id_replaced(self, mock_catalog, mock_job_metadata): + """Items get the deterministic ID when the user is not authorized.""" catalog_s3_key = "s3://test-bucket/2023/01/15/10/30/45/123456/catalog.json" + registry = {"test-collection": ["other-user"]} + expected_collection_id = "superman__awesome-algo__0.1__test" with ( + patch( + "dps_stac_item_generator.item.pystac.Catalog.from_file", + return_value=mock_catalog, + ), patch( "dps_stac_item_generator.item.load_met_json", return_value=mock_job_metadata, ), + ): + items = list(get_stac_items(catalog_s3_key, collection_id_registry=registry)) + + for item in items: + assert item.collection == expected_collection_id + + def test_wildcard_registry_pattern(self, mock_catalog, mock_job_metadata): + """Items keep their collection ID when matched by a wildcard pattern.""" + catalog_s3_key = "s3://test-bucket/2023/01/15/10/30/45/123456/catalog.json" + registry = {"test-*": ["superman"]} + + with ( patch( "dps_stac_item_generator.item.pystac.Catalog.from_file", - side_effect=Exception("Failed to parse catalog.json: invalid format"), + return_value=mock_catalog, + ), + patch( + "dps_stac_item_generator.item.load_met_json", + return_value=mock_job_metadata, ), ): - with pytest.raises( - Exception, match="Failed to parse catalog.json: invalid format" - ): - list(get_stac_items(catalog_s3_key)) + items = list(get_stac_items(catalog_s3_key, collection_id_registry=registry)) - def test_santitize_collection_id(self, mock_catalog, mock_job_metadata): - """Test that collection ID is sanitized correctly.""" + for item in items: + assert item.collection == "test-collection" + + def test_empty_registry_uses_deterministic_id(self, mock_catalog, mock_job_metadata): + """An empty registry results in the deterministic collection ID for all items.""" catalog_s3_key = "s3://test-bucket/2023/01/15/10/30/45/123456/catalog.json" - mock_job_metadata["username"] = "user/name" - mock_job_metadata["algorithm_name"] = "algo?name" - expected_collection_id = "user-name__algo-name__0.1__test" + expected_collection_id = "superman__awesome-algo__0.1__test" with ( patch( @@ -318,7 +366,7 @@ def test_santitize_collection_id(self, mock_catalog, mock_job_metadata): return_value=mock_job_metadata, ), ): - items = list(get_stac_items(catalog_s3_key)) + items = list(get_stac_items(catalog_s3_key, collection_id_registry={})) - for item in items: - assert item.collection == expected_collection_id + for item in items: + assert item.collection == expected_collection_id diff --git a/cdk/constructs/DpsStacItemGenerator/runtime/tests/test_item_gen_handler.py b/cdk/constructs/DpsStacItemGenerator/runtime/tests/test_item_gen_handler.py index 95716ef..857f9b2 100644 --- a/cdk/constructs/DpsStacItemGenerator/runtime/tests/test_item_gen_handler.py +++ b/cdk/constructs/DpsStacItemGenerator/runtime/tests/test_item_gen_handler.py @@ -1,10 +1,12 @@ import json import logging import os -from unittest.mock import patch +from unittest.mock import MagicMock, patch +import pystac import pytest from dps_stac_item_generator import handler as item_gen_handler +from dps_stac_item_generator.handler import _load_collection_id_registry from stac_pydantic.item import Item @@ -32,14 +34,12 @@ def mock_sns_client(mocker): mock_client_instance = mocker.MagicMock() mock_client_instance.publish.return_value = {"MessageId": "fake-sns-message-id"} - mock_boto_client = patch( + mocker.patch( "dps_stac_item_generator.handler.boto3.client", return_value=mock_client_instance, - ).start() - - yield mock_client_instance + ) - mock_boto_client.stop() + return mock_client_instance @pytest.fixture @@ -61,17 +61,15 @@ def mock_get_stac_items(mocker): } mock_item = Item(**mock_item_dict) - mock_func = patch( + mock_func = mocker.patch( "dps_stac_item_generator.handler.get_stac_items", return_value=[mock_item] - ).start() + ) mock_func.mock_item = mock_item mock_func.mock_item_dict = mock_item_dict mock_func.mock_item_json = mock_item.model_dump_json() - yield mock_func - - mock_func.stop() + return mock_func def create_sqs_event_with_s3_notification(s3_events: list[dict]) -> dict: @@ -141,7 +139,10 @@ def test_handler_success_single_message( assert result is None - mock_get_stac_items.assert_called_once_with(expected_s3_uri) + mock_get_stac_items.assert_called_once_with( + expected_s3_uri, + collection_id_registry=item_gen_handler.COLLECTION_ID_REGISTRY, + ) mock_sns_client.publish.assert_called_once_with( TopicArn=os.environ["ITEM_LOAD_TOPIC_ARN"], @@ -187,8 +188,14 @@ def test_handler_success_multiple_messages( assert mock_sns_client.publish.call_count == 2 expected_calls = [ - mocker.call("s3://test-catalog-bucket-1/path1/catalog.json"), - mocker.call("s3://test-catalog-bucket-2/path2/catalog.json"), + mocker.call( + "s3://test-catalog-bucket-1/path1/catalog.json", + collection_id_registry=item_gen_handler.COLLECTION_ID_REGISTRY, + ), + mocker.call( + "s3://test-catalog-bucket-2/path2/catalog.json", + collection_id_registry=item_gen_handler.COLLECTION_ID_REGISTRY, + ), ] mock_get_stac_items.assert_has_calls(expected_calls) @@ -475,6 +482,29 @@ def test_handler_multiple_items_from_catalog( assert "Publishing STAC item item3" in caplog.text +class TestLoadCollectionIdRegistry: + """Test cases for _load_collection_id_registry helper.""" + + def test_valid_json_parsed_correctly(self): + raw = '{"my-collection": ["user1", "user2"], "maap-*": ["user3"]}' + result = _load_collection_id_registry(raw) + assert result == {"my-collection": ["user1", "user2"], "maap-*": ["user3"]} + + def test_empty_object_returns_empty_dict(self): + assert _load_collection_id_registry("{}") == {} + + def test_malformed_json_returns_empty_dict(self, caplog): + with caplog.at_level(logging.WARNING): + result = _load_collection_id_registry("not-valid-json") + assert result == {} + assert "USER_STAC_COLLECTION_ID_REGISTRY" in caplog.text + + def test_missing_env_var_defaults_to_empty_dict(self, monkeypatch): + monkeypatch.delenv("USER_STAC_COLLECTION_ID_REGISTRY", raising=False) + raw = os.environ.get("USER_STAC_COLLECTION_ID_REGISTRY", "{}") + assert _load_collection_id_registry(raw) == {} + + def test_handler_missing_s3_fields( mock_context, mock_sns_client, mock_get_stac_items, caplog ): @@ -529,3 +559,74 @@ def test_handler_missing_s3_fields( mock_sns_client.publish.assert_not_called() assert f"[{event['Records'][0]['messageId']}] Failed with error:" in caplog.text + + +def test_handler_registry_preserves_authorized_collection_id( + mock_context, mock_sns_client, monkeypatch +): + """Handler publishes the user-specified collection ID when the submitting user + is listed in the registry for that collection. + + This exercises the full path through get_stac_items rather than mocking it, + so it catches any regression in how the handler wires the registry into item + generation. + """ + s3_event_data = { + "bucket": {"name": "test-dps-bucket"}, + "object": {"key": "2023/01/15/10/30/45/123456/catalog.json"}, + } + event = create_sqs_event_with_s3_notification([s3_event_data]) + + monkeypatch.setattr( + item_gen_handler, + "COLLECTION_ID_REGISTRY", + {"my-custom-collection": ["superman"]}, + ) + + mock_catalog = MagicMock(spec=pystac.Catalog) + mock_catalog.make_all_asset_hrefs_absolute.return_value = None + pystac_item = MagicMock() + pystac_item.to_dict.return_value = { + "type": "Feature", + "stac_version": "1.0.0", + "id": "test-item", + "collection": "my-custom-collection", + "properties": {"datetime": "2023-01-01T00:00:00Z"}, + "geometry": { + "type": "Polygon", + "coordinates": [ + [[-180, -90], [180, -90], [180, 90], [-180, 90], [-180, -90]] + ], + }, + "bbox": [-180, -90, 180, 90], + "links": [], + "assets": {}, + "stac_extensions": [], + } + mock_catalog.get_all_items.return_value = [pystac_item] + + job_metadata = { + "algorithm_name": "awesome-algo", + "algorithm_version": "0.1", + "username": "superman", + "tag": "test", + } + + with ( + patch( + "dps_stac_item_generator.item.pystac.Catalog.from_file", + return_value=mock_catalog, + ), + patch( + "dps_stac_item_generator.item.load_met_json", + return_value=job_metadata, + ), + ): + result = item_gen_handler.handler(event, mock_context) + + assert result is None + mock_sns_client.publish.assert_called_once() + published_item = Item( + **json.loads(mock_sns_client.publish.call_args.kwargs["Message"]) + ) + assert published_item.collection == "my-custom-collection" diff --git a/cdk/constructs/DpsStacItemGenerator/runtime/uv.lock b/cdk/constructs/DpsStacItemGenerator/runtime/uv.lock index b8dfd5b..2d03aa9 100644 --- a/cdk/constructs/DpsStacItemGenerator/runtime/uv.lock +++ b/cdk/constructs/DpsStacItemGenerator/runtime/uv.lock @@ -34,6 +34,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" }, ] +[[package]] +name = "boto3" +version = "1.42.89" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bb/0c/f7bccb22b245cabf392816baba20f9e95f78ace7dbc580fd40136e80e732/boto3-1.42.89.tar.gz", hash = "sha256:3e43aacc0801bba9bcd23a8c271c089af297a69565f783fcdd357ae0e330bf1e", size = 113165, upload-time = "2026-04-13T19:36:17.516Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b9/33/55103ba5ef9975ea54b8d39e69b76eb6e9fded3beae5f01065e26951a3a1/boto3-1.42.89-py3-none-any.whl", hash = "sha256:6204b189f4d0c655535f43d7eaa57ff4e8d965b8463c97e45952291211162932", size = 140556, upload-time = "2026-04-13T19:36:13.894Z" }, +] + +[[package]] +name = "botocore" +version = "1.42.89" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0f/cc/e6be943efa9051bd15c2ee14077c2b10d6e27c9e9385fc43a03a5c4ed8b5/botocore-1.42.89.tar.gz", hash = "sha256:95ac52f472dad29942f3088b278ab493044516c16dbf9133c975af16527baa99", size = 15206290, upload-time = "2026-04-13T19:36:02.321Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/f1/90a7b8eda38b7c3a65ca7ee0075bdf310b6b471cb1b95fab6e8994323a50/botocore-1.42.89-py3-none-any.whl", hash = "sha256:d9b786c8d9db6473063b4cc5be0ba7e6a381082307bd6afb69d4216f9fa95f35", size = 14887287, upload-time = "2026-04-13T19:35:56.677Z" }, +] + [[package]] name = "certifi" version = "2025.10.5" @@ -78,6 +106,7 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "boto3" }, { name = "httpx" }, { name = "pytest" }, { name = "pytest-mock" }, @@ -94,6 +123,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ + { name = "boto3", specifier = ">=1.42.89" }, { name = "httpx", specifier = ">=0.28.1" }, { name = "pytest", specifier = ">=8.3.5" }, { name = "pytest-mock", specifier = ">=3.14.0" }, @@ -166,6 +196,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, ] +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, +] + [[package]] name = "jsonschema" version = "4.25.1" @@ -511,6 +550,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/7d/97119da51cb1dd3f2f3c0805f155a3aa4a95fa44fe7d78ae15e69edf4f34/rpds_py-0.27.1-cp314-cp314t-win_amd64.whl", hash = "sha256:6567d2bb951e21232c2f660c24cf3470bb96de56cdcb3f071a83feeaff8a2772", size = 230097, upload-time = "2025-08-27T12:15:03.961Z" }, ] +[[package]] +name = "s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" }, +] + [[package]] name = "six" version = "1.17.0" @@ -572,3 +623,12 @@ sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac wheels = [ { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, ] + +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, +]