diff --git a/README.md b/README.md index 3039485f..be3198d7 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,34 @@ Examples of filters | Clean only untagged manifests in all repos (with --untagged) | --filter `".*:^$"` | | Clean only untagged manifests in app repo (with --untagged) | --filter `"app:^$"` | +##### Literal vs. regex repository names + +When the repository portion of a `--filter` is a plain name with no regex metacharacters (e.g. `my-repo:.*`), the CLI treats it as a **literal** repository name and targets that repository directly — **without listing the full catalog**. The catalog API is only called when at least one filter contains a regex pattern in the repository portion (e.g. `.*:.*` or `.*/cache:.*`). + +This has two practical benefits: + +- **Large registries:** Registries with a very large number of repositories can avoid the slow or impractical catalog listing step entirely by using literal repository names in their filters. +- **Restricted permissions (ABAC):** In ABAC-enabled registries where a user only has access to specific repositories and lacks catalog listing permissions, literal filters allow `acr purge` to work without requiring the `Container Registry Repository Catalog Lister` role. + +For example, a user who only has access to `team-a/app` can run: + +```sh +acr purge \ + --registry \ + --filter "team-a/app:.*" \ + --ago 30d +``` + +No catalog listing permission is needed because `team-a/app` is a literal name. Multiple literal filters can be combined to target several specific repositories: + +```sh +acr purge \ + --registry \ + --filter "team-a/app:.*" \ + --filter "team-a/cache:.*" \ + --ago 30d +``` + #### Ago flag @@ -262,7 +290,7 @@ acr purge \ Registries with ABAC enabled use repository-scoped permissions instead of registry-wide roles. When using `acr purge` with an ABAC registry, keep the following in mind: **Required permissions:** -- **Catalog listing:** The user must have permission to list repositories (e.g., the `Container Registry Repository Catalog Lister` role or equivalent `registry:catalog:*` scope). +- **Catalog listing:** Required only when the `--filter` contains a regex pattern in the repository portion (e.g., `.*:.*`). If all filters use literal repository names (e.g., `my-repo:.*`), catalog listing is **not** required — see [Literal vs. regex repository names](#literal-vs-regex-repository-names) above. - **Repository access:** The user needs the `Container Registry Repository Contributor` role for deletes, which can be scoped to specific repositories using ABAC conditions. **Partial access behavior:** diff --git a/cmd/acr/purge_test.go b/cmd/acr/purge_test.go index 20fb3b05..4e16b9f4 100644 --- a/cmd/acr/purge_test.go +++ b/cmd/acr/purge_test.go @@ -495,7 +495,10 @@ func TestPurgeManifests(t *testing.T) { mockClient := &mocks.AcrCLIClientInterface{} mockClient.On("GetAcrManifests", mock.Anything, testRepo, "", "").Return(singleMultiArchOCIWithTagsResult, nil).Once() mockClient.On("GetManifest", mock.Anything, testRepo, "sha256:d88fb54ba4424dada7c928c6af332ed1c49065ad85eafefb6f26664695015119").Return(multiArchOCIBytes, nil).Once() - mockClient.On("GetManifest", mock.Anything, testRepo, "sha256:63532043b5af6247377a472ad075a42bde35689918de1cf7f807714997e0e683").Return(emptyManifestBytes, nil).Once() + // This call may or may not happen depending on goroutine timing: the + // tagged parent's goroutine may add digest1 to the ignoreList before + // the main loop reaches this untagged manifest. + mockClient.On("GetManifest", mock.Anything, testRepo, "sha256:63532043b5af6247377a472ad075a42bde35689918de1cf7f807714997e0e683").Return(emptyManifestBytes, nil).Maybe() mockClient.On("GetManifest", mock.Anything, testRepo, "sha256:6305e31b9b0081d2532397a1e08823f843f329a7af2ac98cb1d7f0355a3e3696").Return(emptyManifestBytes, nil).Once() mockClient.On("GetAcrManifests", mock.Anything, testRepo, "", "sha256:d88fb54ba4424dada7c928c6af332ed1c49065ad85eafefb6f26664695015119").Return(doubleOCIWithoutTagsResult, nil).Once() mockClient.On("GetAcrManifests", mock.Anything, testRepo, "", "sha256:6305e31b9b0081d2532397a1e08823f843f329a7af2ac98cb1d7f0355a3e3696").Return(EmptyListManifestsResult, nil).Once() @@ -598,8 +601,7 @@ func TestCollectTagFilters(t *testing.T) { t.Run("SingleRepo", func(t *testing.T) { assert := assert.New(t) mockClient := &mocks.BaseClientAPI{} - mockClient.On("GetRepositories", mock.Anything, "", mock.Anything).Return(ManyRepositoriesResult, nil).Once() - mockClient.On("GetRepositories", mock.Anything, mock.Anything, mock.Anything).Return(NoRepositoriesResult, nil).Once() + // "bar" is a literal repo name, so GetRepositories should not be called. filters, err := repository.CollectTagFilters(testCtx, []string{testRepo + ":.*"}, mockClient, 60, defaultRepoPageSize) assert.Equal(1, len(filters), "Number of found should be one") assert.Equal(".*", filters[testRepo], "Filter for test repo should be .*") @@ -623,10 +625,11 @@ func TestCollectTagFilters(t *testing.T) { t.Run("NoPartialMatch", func(t *testing.T) { assert := assert.New(t) mockClient := &mocks.BaseClientAPI{} - mockClient.On("GetRepositories", mock.Anything, "", mock.Anything).Return(ManyRepositoriesResult, nil).Once() - mockClient.On("GetRepositories", mock.Anything, mock.Anything, mock.Anything).Return(NoRepositoriesResult, nil).Once() + // "ba" is a literal repo name, so GetRepositories should not be called. + // The literal name "ba" is used directly without verifying against the registry. filters, err := repository.CollectTagFilters(testCtx, []string{"ba:.*"}, mockClient, 60, defaultRepoPageSize) - assert.Equal(0, len(filters), "Number of found repos should be zero") + assert.Equal(1, len(filters), "Literal repo name should be passed through") + assert.Equal(".*", filters["ba"], "Filter for ba repo should be .*") assert.Equal(nil, err, "Error should be nil") mockClient.AssertExpectations(t) }) @@ -634,8 +637,7 @@ func TestCollectTagFilters(t *testing.T) { t.Run("NameWithSlash", func(t *testing.T) { assert := assert.New(t) mockClient := &mocks.BaseClientAPI{} - mockClient.On("GetRepositories", mock.Anything, "", mock.Anything).Return(ManyRepositoriesResult, nil).Once() - mockClient.On("GetRepositories", mock.Anything, mock.Anything, mock.Anything).Return(NoRepositoriesResult, nil).Once() + // "foo/bar" is a literal repo name, so GetRepositories should not be called. filters, err := repository.CollectTagFilters(testCtx, []string{"foo/bar:.*"}, mockClient, 60, defaultRepoPageSize) assert.Equal(1, len(filters), "Number of found repos should be one") assert.Equal(nil, err, "Error should be nil") @@ -645,8 +647,7 @@ func TestCollectTagFilters(t *testing.T) { t.Run("NameWithSlashAndNonCaptureGroupInTag", func(t *testing.T) { assert := assert.New(t) mockClient := &mocks.BaseClientAPI{} - mockClient.On("GetRepositories", mock.Anything, "", mock.Anything).Return(ManyRepositoriesResult, nil).Once() - mockClient.On("GetRepositories", mock.Anything, mock.Anything, mock.Anything).Return(NoRepositoriesResult, nil).Once() + // "foo/bar" is a literal repo name, so GetRepositories should not be called. filters, err := repository.CollectTagFilters(testCtx, []string{"foo/bar:(?:.*)"}, mockClient, 60, defaultRepoPageSize) assert.Equal(1, len(filters), "Number of found repos should be one") assert.Equal(nil, err, "Error should be nil") @@ -700,9 +701,11 @@ func TestCollectTagFilters(t *testing.T) { t.Run("NoRepos", func(t *testing.T) { assert := assert.New(t) mockClient := &mocks.BaseClientAPI{} - mockClient.On("GetRepositories", mock.Anything, "", mock.Anything).Return(NoRepositoriesResult, nil).Once() + // "bar" is a literal repo name, so GetRepositories should not be called. + // The literal name is used directly without verifying against the registry. filters, err := repository.CollectTagFilters(testCtx, []string{testRepo + ":.*"}, mockClient, 60, defaultRepoPageSize) - assert.Equal(0, len(filters), "Number of found repos should be zero") + assert.Equal(1, len(filters), "Literal repo name should be passed through") + assert.Equal(".*", filters[testRepo], "Filter for test repo should be .*") assert.Equal(nil, err, "Error should be nil") mockClient.AssertExpectations(t) }) @@ -710,8 +713,7 @@ func TestCollectTagFilters(t *testing.T) { t.Run("EmptyRepoRegex", func(t *testing.T) { assert := assert.New(t) mockClient := &mocks.BaseClientAPI{} - mockClient.On("GetRepositories", mock.Anything, "", mock.Anything).Return(ManyRepositoriesResult, nil).Once() - mockClient.On("GetRepositories", mock.Anything, mock.Anything, mock.Anything).Return(NoRepositoriesResult, nil).Once() + // Parsing fails before any repo listing happens. _, err := repository.CollectTagFilters(testCtx, []string{":.*"}, mockClient, 60, defaultRepoPageSize) assert.NotEqual(nil, err, "Error should not be nil") mockClient.AssertExpectations(t) @@ -720,8 +722,7 @@ func TestCollectTagFilters(t *testing.T) { t.Run("EmptyTagRegex", func(t *testing.T) { assert := assert.New(t) mockClient := &mocks.BaseClientAPI{} - mockClient.On("GetRepositories", mock.Anything, "", mock.Anything).Return(ManyRepositoriesResult, nil).Once() - mockClient.On("GetRepositories", mock.Anything, mock.Anything, mock.Anything).Return(NoRepositoriesResult, nil).Once() + // Parsing fails before any repo listing happens. _, err := repository.CollectTagFilters(testCtx, []string{testRepo + ".*:"}, mockClient, 60, defaultRepoPageSize) assert.NotEqual(nil, err, "Error should not be nil") mockClient.AssertExpectations(t) diff --git a/cmd/repository/image_functions.go b/cmd/repository/image_functions.go index c2cde7f6..28a3c006 100644 --- a/cmd/repository/image_functions.go +++ b/cmd/repository/image_functions.go @@ -101,29 +101,61 @@ func GetRepositoryAndTagRegex(filter string) (string, string, error) { return repoAndRegex[0], repoAndRegex[1], nil } +// isLiteralRegex returns true if the pattern contains no regex metacharacters +// and therefore can only match itself as a literal string. +func isLiteralRegex(pattern string) bool { + return regexp2.Escape(pattern) == pattern +} + // CollectTagFilters collects all matching repos and collects the associated tag filters func CollectTagFilters(ctx context.Context, rawFilters []string, client acrapi.BaseClientAPI, regexMatchTimeout int64, repoPageSize int32) (map[string]string, error) { - allRepoNames, err := GetAllRepositoryNames(ctx, client, repoPageSize) - if err != nil { - return nil, err + type parsedFilter struct { + repoRegex string + tagRegex string + literal bool } - tagFilters := map[string]string{} + parsed := make([]parsedFilter, 0, len(rawFilters)) + needRepoList := false for _, filter := range rawFilters { repoRegex, tagRegex, err := GetRepositoryAndTagRegex(filter) if err != nil { return nil, err } - repoNames, err := GetMatchingRepos(allRepoNames, "^"+repoRegex+"$", regexMatchTimeout) + literal := isLiteralRegex(repoRegex) + if !literal { + needRepoList = true + } + parsed = append(parsed, parsedFilter{repoRegex: repoRegex, tagRegex: tagRegex, literal: literal}) + } + + var allRepoNames []string + if needRepoList { + var err error + allRepoNames, err = GetAllRepositoryNames(ctx, client, repoPageSize) if err != nil { return nil, err } + } + + tagFilters := map[string]string{} + for _, pf := range parsed { + var repoNames []string + if pf.literal { + repoNames = []string{pf.repoRegex} + } else { + var err error + repoNames, err = GetMatchingRepos(allRepoNames, "^"+pf.repoRegex+"$", regexMatchTimeout) + if err != nil { + return nil, err + } + } for _, repoName := range repoNames { if _, ok := tagFilters[repoName]; ok { // To only iterate through a repo once a big regex filter is made of all the filters of a particular repo. - tagFilters[repoName] = tagFilters[repoName] + "|" + tagRegex + tagFilters[repoName] = tagFilters[repoName] + "|" + pf.tagRegex } else { - tagFilters[repoName] = tagRegex + tagFilters[repoName] = pf.tagRegex } } } diff --git a/cmd/repository/image_functions_test.go b/cmd/repository/image_functions_test.go index e33db8b0..04bc6976 100644 --- a/cmd/repository/image_functions_test.go +++ b/cmd/repository/image_functions_test.go @@ -542,6 +542,78 @@ func TestGetUntaggedManifestsWithAgeCriteria(t *testing.T) { }) } +func TestIsLiteralRegex(t *testing.T) { + tests := []struct { + pattern string + expected bool + }{ + {"myrepo", true}, + {"my-repo/sub-repo", true}, + {"repo123", true}, + {"my.repo", false}, + {"repo*", false}, + {"repo+", false}, + {"repo?", false}, + {"(repo)", false}, + {"[repo]", false}, + {`repo\d`, false}, + {"repo|other", false}, + {"^repo", false}, + {"repo$", false}, + {"repo{1}", false}, + } + for _, tt := range tests { + t.Run(tt.pattern, func(t *testing.T) { + assert.Equal(t, tt.expected, isLiteralRegex(tt.pattern)) + }) + } +} + +func TestCollectTagFiltersLiteralSkipsRepoListing(t *testing.T) { + ctx := context.Background() + + t.Run("Literal repo name skips GetAllRepositoryNames", func(t *testing.T) { + mockClient := &mocks.BaseClientAPI{} + // No GetRepositories expectation set — if called, mock will fail. + filters := []string{"myrepo:.*"} + result, err := CollectTagFilters(ctx, filters, mockClient, 60, 100) + assert.NoError(t, err) + assert.Equal(t, map[string]string{"myrepo": ".*"}, result) + mockClient.AssertExpectations(t) + }) + + t.Run("Regex repo name calls GetAllRepositoryNames", func(t *testing.T) { + mockClient := &mocks.BaseClientAPI{} + pageSize := int32(100) + names := []string{"repo-a", "repo-b"} + mockClient.On("GetRepositories", ctx, "", &pageSize).Return(acr.Repositories{Names: &names}, nil).Once() + mockClient.On("GetRepositories", ctx, "repo-b", &pageSize).Return(acr.Repositories{}, nil).Once() + + filters := []string{"repo-.*:latest"} + result, err := CollectTagFilters(ctx, filters, mockClient, 60, pageSize) + assert.NoError(t, err) + assert.Equal(t, map[string]string{"repo-a": "latest", "repo-b": "latest"}, result) + mockClient.AssertExpectations(t) + }) + + t.Run("Mixed literal and regex filters", func(t *testing.T) { + mockClient := &mocks.BaseClientAPI{} + pageSize := int32(100) + names := []string{"alpha", "beta", "gamma"} + mockClient.On("GetRepositories", ctx, "", &pageSize).Return(acr.Repositories{Names: &names}, nil).Once() + mockClient.On("GetRepositories", ctx, "gamma", &pageSize).Return(acr.Repositories{}, nil).Once() + + filters := []string{"alpha:v1", "bet.*:v2"} + result, err := CollectTagFilters(ctx, filters, mockClient, 60, pageSize) + assert.NoError(t, err) + assert.Equal(t, "v1", result["alpha"]) + assert.Equal(t, "v2", result["beta"]) + _, hasGamma := result["gamma"] + assert.False(t, hasGamma) + mockClient.AssertExpectations(t) + }) +} + // Helper types and functions for testing type manifestTestData struct {