From 63c8303e498f02f890a4a243d5bc41c4245579dc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 08:01:03 +0000 Subject: [PATCH 1/6] Optimize breaking_changes_checker performance with AST caching, early exit, and O(n) algorithms Key optimizations: 1. Add AST module cache (_ast_cache, _get_parsed_module) to avoid redundant file I/O and AST parsing in get_properties() and get_overloads() 2. Use exception-based early exit in ClassTreeAnalyzer to stop AST traversal once the target class is found 3. Replace O(n*m) run_async_cleanup() with O(n) set-based approach 4. Replace deepcopy + list.remove() in get_reportable_changes() with single-pass filtering - eliminates expensive deep copy and O(n^2) removal 5. Use re.search() instead of re.findall() for ignore rule matching 6. Add comprehensive performance tests demonstrating the optimizations Co-authored-by: msyyc <70930885+msyyc@users.noreply.github.com> Agent-Logs-Url: https://github.com/Azure/azure-sdk-for-python/sessions/328bed23-3c8d-44f6-bea0-a24571d01b8d --- .../breaking_changes_tracker.py | 57 ++- .../detect_breaking_changes.py | 61 ++- .../tests/test_performance.py | 445 ++++++++++++++++++ 3 files changed, 521 insertions(+), 42 deletions(-) create mode 100644 scripts/breaking_changes_checker/tests/test_performance.py diff --git a/scripts/breaking_changes_checker/breaking_changes_tracker.py b/scripts/breaking_changes_checker/breaking_changes_tracker.py index 63bd5c6e69f7..3382850750e5 100644 --- a/scripts/breaking_changes_checker/breaking_changes_tracker.py +++ b/scripts/breaking_changes_checker/breaking_changes_tracker.py @@ -9,7 +9,6 @@ import re from enum import Enum from typing import Any, Dict, List, Union -from copy import deepcopy from _models import ChangesChecker, Suppression, RegexSuppression, PostProcessingChecker @@ -129,15 +128,25 @@ def run_post_processing(self) -> None: # Remove duplicate reporting of changes that apply to both sync and async package components def run_async_cleanup(self, changes_list: List) -> None: - # Create a list of all sync changes - non_aio_changes = [bc for bc in changes_list if "aio" not in bc[2]] - # Remove any aio change if there is a sync change that is the same - for change in non_aio_changes: - for c in changes_list: - if "aio" in c[2]: - if change[1] == c[1] and change[3:] == c[3:]: - changes_list.remove(c) - break + def _make_hashable(item): + """Convert an item to a hashable type (lists become tuples).""" + if isinstance(item, list): + return tuple(_make_hashable(i) for i in item) + return item + + def _make_key(bc): + return tuple(_make_hashable(x) for x in (bc[1],) + bc[3:]) + + # Build a set of keys from non-aio changes for O(1) lookup + non_aio_keys = set() + for bc in changes_list: + if "aio" not in bc[2]: + non_aio_keys.add(_make_key(bc)) + # Keep only non-aio changes and aio changes that don't have a sync counterpart + changes_list[:] = [ + bc for bc in changes_list + if "aio" not in bc[2] or _make_key(bc) not in non_aio_keys + ] def run_breaking_change_diff_checks(self) -> None: for module_name, module in self.diff.items(): @@ -652,28 +661,36 @@ def get_reportable_changes(self, ignore_changes: Dict, changes_list: List) -> Li ignored = [] # Match all ignore rules that should apply to this package for ignored_package, ignore_rules in ignore_changes.items(): - if re.findall(ignored_package, self.package_name): + if re.search(ignored_package, self.package_name): ignored.extend(ignore_rules) - # Remove ignored changes from list of reportable changes - bc_copy = deepcopy(changes_list) - for bc in bc_copy: + if not ignored: + return + + # Pre-create Suppression objects once instead of recreating per change + suppressions = [Suppression(*rule) for rule in ignored] + + # Filter out ignored changes in a single pass instead of deepcopy + list.remove() + filtered = [] + for bc in changes_list: _, bc_type, module_name, *args = bc class_name = args[0] if args else None function_name = args[1] if len(args) > 1 else None parameter_name = args[2] if len(args) > 2 else None - for rule in ignored: - suppression = Suppression(*rule) - + should_keep = True + for suppression in suppressions: if suppression.parameter_or_property_name is not None: - # If the ignore rule is for a property or parameter, we should check up to that level on the original change if self.match((bc_type, module_name, class_name, function_name, parameter_name), suppression): - changes_list.remove(bc) + should_keep = False break elif self.match((bc_type, module_name, class_name, function_name), suppression): - changes_list.remove(bc) + should_keep = False break + if should_keep: + filtered.append(bc) + + changes_list[:] = filtered def report_changes(self) -> None: ignore_changes = self.ignore if self.ignore else {} diff --git a/scripts/breaking_changes_checker/detect_breaking_changes.py b/scripts/breaking_changes_checker/detect_breaking_changes.py index 52eb1117c82c..3c48282d2646 100644 --- a/scripts/breaking_changes_checker/detect_breaking_changes.py +++ b/scripts/breaking_changes_checker/detect_breaking_changes.py @@ -31,6 +31,11 @@ _LOGGER = logging.getLogger(__name__) +class _ClassNodeFound(Exception): + """Raised to short-circuit AST traversal when the target class is found.""" + pass + + class ClassTreeAnalyzer(ast.NodeVisitor): def __init__(self, name: str) -> None: self.name = name @@ -39,9 +44,34 @@ def __init__(self, name: str) -> None: def visit_ClassDef(self, node: ast.ClassDef) -> None: if node.name == self.name: self.cls_node = node + raise _ClassNodeFound() # Break out of entire traversal immediately self.generic_visit(node) +# Module-level cache for parsed AST trees, keyed by file path. +# This avoids re-reading and re-parsing the same source file when +# multiple classes (or the same class for properties + overloads) share a file. +_ast_cache: Dict[str, ast.Module] = {} + + +def _get_parsed_module(path: str) -> ast.Module: + """Return the parsed AST for the given file path, using a cache.""" + if path not in _ast_cache: + with open(path, "r", encoding="utf-8-sig") as source: + _ast_cache[path] = ast.parse(source.read()) + return _ast_cache[path] + + +def _find_class_node(module: ast.Module, class_name: str) -> ast.ClassDef: + """Find and return the AST ClassDef node for the given class name.""" + analyzer = ClassTreeAnalyzer(class_name) + try: + analyzer.visit(module) + except _ClassNodeFound: + pass + return analyzer.cls_node + + def test_find_modules(pkg_root_path: str) -> Dict: """Find modules within the package to import and parse. Code borrowed and edited from APIview. @@ -183,12 +213,8 @@ def get_properties(cls: Type) -> Dict: attribute_names = {} path = inspect.getsourcefile(cls) - with open(path, "r", encoding="utf-8-sig") as source: - module = ast.parse(source.read()) - - analyzer = ClassTreeAnalyzer(cls.__name__) - analyzer.visit(module) - cls_node = analyzer.cls_node + module = _get_parsed_module(path) + cls_node = _find_class_node(module, cls.__name__) extract_base_classes = True if hasattr(cls_node, "bases") else False if extract_base_classes: @@ -196,15 +222,12 @@ def get_properties(cls: Type) -> Dict: for base_class in base_classes: try: path = inspect.getsourcefile(base_class) - with open(path, "r", encoding="utf-8-sig") as source: - module = ast.parse(source.read()) + module = _get_parsed_module(path) except (TypeError, SyntaxError): _LOGGER.info(f"Unable to create ast of {base_class}") continue # was a built-in, e.g. "object", Exception, or a Model from msrest fails here - analyzer = ClassTreeAnalyzer(base_class.__name__) - analyzer.visit(module) - cls_node = analyzer.cls_node + cls_node = _find_class_node(module, base_class.__name__) if cls_node: get_property_names(cls_node, attribute_names) else: @@ -320,12 +343,8 @@ def create_parameters(args: ast.arg) -> Dict: def get_overloads(cls: Type, cls_methods: Dict): path = inspect.getsourcefile(cls) - with open(path, "r", encoding="utf-8-sig") as source: - module = ast.parse(source.read()) - - analyzer = ClassTreeAnalyzer(cls.__name__) - analyzer.visit(module) - cls_node = analyzer.cls_node + module = _get_parsed_module(path) + cls_node = _find_class_node(module, cls.__name__) extract_base_classes = check_base_classes(cls_node) if extract_base_classes: @@ -333,15 +352,12 @@ def get_overloads(cls: Type, cls_methods: Dict): for base_class in base_classes: try: path = inspect.getsourcefile(base_class) - with open(path, "r", encoding="utf-8-sig") as source: - module = ast.parse(source.read()) + module = _get_parsed_module(path) except (TypeError, SyntaxError): _LOGGER.info(f"Unable to create ast of {base_class}") continue # was a built-in, e.g. "object", Exception, or a Model from msrest fails here - analyzer = ClassTreeAnalyzer(base_class.__name__) - analyzer.visit(module) - cls_node = analyzer.cls_node + cls_node = _find_class_node(module, base_class.__name__) if cls_node: get_overload_data(cls_node, cls_methods) else: @@ -418,6 +434,7 @@ def resolve_module_name(module_name: str, target_module: str) -> str: def build_library_report(target_module: str) -> Dict: + _ast_cache.clear() # Clear AST cache to avoid stale data between runs module = importlib.import_module(target_module) modules = test_find_modules(module.__path__[0]) diff --git a/scripts/breaking_changes_checker/tests/test_performance.py b/scripts/breaking_changes_checker/tests/test_performance.py new file mode 100644 index 000000000000..ff79a5ecf655 --- /dev/null +++ b/scripts/breaking_changes_checker/tests/test_performance.py @@ -0,0 +1,445 @@ +#!/usr/bin/env python + +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +""" +Performance tests for the breaking_changes_checker. + +These tests demonstrate and verify the performance improvements made to: +1. run_async_cleanup() - O(n) set-based approach vs O(n*m) nested loop + list.remove() +2. get_reportable_changes() - single-pass filtering vs deepcopy + list.remove() +3. AST caching - _get_parsed_module() and _find_class_node() helpers +4. ClassTreeAnalyzer early exit +""" + +import time +import pytest +from breaking_changes_checker.breaking_changes_tracker import BreakingChangesTracker, BreakingChangeType +from breaking_changes_checker.changelog_tracker import ChangelogTracker + + +def _generate_large_stable_current(num_modules, num_classes_per_module, num_methods_per_class, num_params_per_method): + """Generate large stable/current API reports for performance testing.""" + stable = {} + current = {} + for m in range(num_modules): + module_name = f"azure.mgmt.network.v2023_01_{m:02d}" + stable[module_name] = {"class_nodes": {}, "function_nodes": {}} + current[module_name] = {"class_nodes": {}, "function_nodes": {}} + for c in range(num_classes_per_module): + class_name = f"NetworkClass{c}" + methods_stable = {} + methods_current = {} + for me in range(num_methods_per_class): + method_name = f"method_{me}" + params = {} + for p in range(num_params_per_method): + params[f"param_{p}"] = { + "default": None, + "param_type": "positional_or_keyword" + } + methods_stable[method_name] = { + "parameters": dict(params), + "is_async": False, + } + methods_current[method_name] = { + "parameters": dict(params), + "is_async": False, + } + stable[module_name]["class_nodes"][class_name] = { + "type": None, + "methods": methods_stable, + "properties": {f"prop_{i}": {"attr_type": "str"} for i in range(3)} + } + current[module_name]["class_nodes"][class_name] = { + "type": None, + "methods": methods_current, + "properties": {f"prop_{i}": {"attr_type": "str"} for i in range(3)} + } + return stable, current + + +def _generate_changes_list(n, include_aio=True): + """Generate a list of n breaking changes, half sync and half aio if include_aio.""" + changes = [] + for i in range(n): + module = f"azure.mgmt.network" + if include_aio and i % 2 == 1: + module = f"azure.mgmt.network.aio" + changes.append(( + "Deleted or renamed model `{}`", + BreakingChangeType.REMOVED_OR_RENAMED_CLASS, + module, f"SomeClass{i // 2 if include_aio else i}" + )) + return changes + + +class TestAsyncCleanupPerformance: + """Test performance of the optimized run_async_cleanup method.""" + + def test_async_cleanup_correctness_small(self): + """Verify correctness of optimized async cleanup with a small dataset.""" + changes = [ + ("msg", BreakingChangeType.REMOVED_OR_RENAMED_CLASS, "azure.mgmt.network", "ClassA"), + ("msg", BreakingChangeType.REMOVED_OR_RENAMED_CLASS, "azure.mgmt.network.aio", "ClassA"), + ("msg", BreakingChangeType.REMOVED_OR_RENAMED_CLASS, "azure.mgmt.network", "ClassB"), + ("msg", BreakingChangeType.REMOVED_OR_RENAMED_CLASS, "azure.mgmt.network.aio", "ClassC"), # no sync counterpart + ] + stable = {"azure.mgmt.network": {"class_nodes": {}, "function_nodes": {}}} + current = {"azure.mgmt.network": {"class_nodes": {}, "function_nodes": {}}} + tracker = BreakingChangesTracker(stable, current, "azure-mgmt-network") + tracker.run_async_cleanup(changes) + + # aio ClassA should be removed (has sync counterpart) + # aio ClassC should remain (no sync counterpart) + assert len(changes) == 3 + modules = [c[2] for c in changes] + assert "azure.mgmt.network.aio" in modules # ClassC remains + class_names = [c[3] for c in changes] + assert "ClassA" in class_names # sync ClassA remains + assert "ClassB" in class_names # sync ClassB remains + assert "ClassC" in class_names # aio ClassC remains (no sync counterpart) + + def test_async_cleanup_performance_large(self): + """Benchmark async cleanup with a large number of changes. + + The old O(n*m) implementation with list.remove() would be very slow + for large change lists. The new set-based O(n) approach should handle + this in milliseconds. + """ + n = 10000 # 10,000 changes (5,000 sync + 5,000 aio) + changes = _generate_changes_list(n, include_aio=True) + stable = {"azure.mgmt.network": {"class_nodes": {}, "function_nodes": {}}} + current = {"azure.mgmt.network": {"class_nodes": {}, "function_nodes": {}}} + tracker = BreakingChangesTracker(stable, current, "azure-mgmt-network") + + start = time.perf_counter() + tracker.run_async_cleanup(changes) + elapsed = time.perf_counter() - start + + # Should complete in well under 1 second with the optimized implementation + # The old implementation would take 10+ seconds for this size + assert elapsed < 1.0, f"run_async_cleanup took {elapsed:.3f}s for {n} changes (expected < 1s)" + # 5000 aio changes with sync counterparts should be removed + assert len(changes) == n // 2 + # All remaining should be sync changes + assert all("aio" not in c[2] for c in changes) + + def test_async_cleanup_performance_very_large(self): + """Benchmark with 50,000 changes to show scalability.""" + n = 50000 + changes = _generate_changes_list(n, include_aio=True) + stable = {"azure.mgmt.network": {"class_nodes": {}, "function_nodes": {}}} + current = {"azure.mgmt.network": {"class_nodes": {}, "function_nodes": {}}} + tracker = BreakingChangesTracker(stable, current, "azure-mgmt-network") + + start = time.perf_counter() + tracker.run_async_cleanup(changes) + elapsed = time.perf_counter() - start + + assert elapsed < 2.0, f"run_async_cleanup took {elapsed:.3f}s for {n} changes (expected < 2s)" + assert len(changes) == n // 2 + + def test_async_cleanup_with_list_args(self): + """Test that async cleanup handles unhashable types (lists) in change tuples.""" + changes = [ + ("msg", BreakingChangeType.CHANGED_PARAMETER_ORDERING, "azure.mgmt.network", + "MyClient", "my_method", ["param_a", "param_b"], ["param_b", "param_a"]), + ("msg", BreakingChangeType.CHANGED_PARAMETER_ORDERING, "azure.mgmt.network.aio", + "MyClient", "my_method", ["param_a", "param_b"], ["param_b", "param_a"]), + ] + stable = {"azure.mgmt.network": {"class_nodes": {}, "function_nodes": {}}} + current = {"azure.mgmt.network": {"class_nodes": {}, "function_nodes": {}}} + tracker = BreakingChangesTracker(stable, current, "azure-mgmt-network") + tracker.run_async_cleanup(changes) + + # The aio version should be removed since it has a sync counterpart + assert len(changes) == 1 + assert "aio" not in changes[0][2] + + +class TestReportableChangesPerformance: + """Test performance of the optimized get_reportable_changes method.""" + + def test_reportable_changes_no_ignores(self): + """Verify that with no ignore rules, all changes are kept.""" + changes = _generate_changes_list(100, include_aio=False) + stable = {"azure.mgmt.network": {"class_nodes": {}, "function_nodes": {}}} + current = {"azure.mgmt.network": {"class_nodes": {}, "function_nodes": {}}} + tracker = BreakingChangesTracker(stable, current, "azure-mgmt-network") + tracker.get_reportable_changes({}, changes) + assert len(changes) == 100 + + def test_reportable_changes_performance_large(self): + """Benchmark get_reportable_changes with many changes and ignore rules. + + The old implementation used deepcopy + list.remove() which is O(n^2). + The new single-pass filtering is O(n*m) where m = number of ignore rules. + """ + n = 5000 + changes = _generate_changes_list(n, include_aio=False) + ignore_rules = { + "azure-mgmt-.*": [ + (BreakingChangeType.REMOVED_OR_RENAMED_CLASS, "*", f"SomeClass{i}", None, None) + for i in range(100) # 100 ignore rules + ] + } + stable = {"azure.mgmt.network": {"class_nodes": {}, "function_nodes": {}}} + current = {"azure.mgmt.network": {"class_nodes": {}, "function_nodes": {}}} + tracker = BreakingChangesTracker(stable, current, "azure-mgmt-network") + + start = time.perf_counter() + tracker.get_reportable_changes(ignore_rules, changes) + elapsed = time.perf_counter() - start + + # Should complete well under 1 second + assert elapsed < 1.0, f"get_reportable_changes took {elapsed:.3f}s (expected < 1s)" + # 100 of the 5000 changes should be filtered out + assert len(changes) == n - 100 + + +class TestCheckerRunPerformance: + """Test full checker run performance with large API surfaces.""" + + def test_full_checker_run_large_unchanged_api(self): + """Benchmark a full checker run with a large unchanged API surface. + + When stable and current are identical, jsondiff produces an empty diff, + so the checker should complete almost instantly regardless of API surface size. + """ + stable, current = _generate_large_stable_current( + num_modules=10, + num_classes_per_module=50, + num_methods_per_class=10, + num_params_per_method=5, + ) + + start = time.perf_counter() + tracker = BreakingChangesTracker(stable, current, "azure-mgmt-network") + tracker.run_checks() + elapsed = time.perf_counter() - start + + assert elapsed < 5.0, f"Full checker run took {elapsed:.3f}s for large unchanged API (expected < 5s)" + assert len(tracker.breaking_changes) == 0 + + def test_changelog_tracker_run_large_with_additions(self): + """Benchmark ChangelogTracker with a large API that has many additions. + + This simulates a scenario similar to azure-mgmt-network where many new + classes and methods are added. + """ + stable, current = _generate_large_stable_current( + num_modules=5, + num_classes_per_module=20, + num_methods_per_class=5, + num_params_per_method=3, + ) + # Add new classes and methods in current + for m in range(5): + module_name = f"azure.mgmt.network.v2023_01_{m:02d}" + for c in range(10): + new_class = f"NewNetworkClass{m}_{c}" + current[module_name]["class_nodes"][new_class] = { + "type": None, + "methods": { + f"new_method_{me}": { + "parameters": { + f"param_{p}": {"default": None, "param_type": "positional_or_keyword"} + for p in range(3) + }, + "is_async": False, + } + for me in range(5) + }, + "properties": {f"prop_{i}": {"attr_type": "str"} for i in range(3)} + } + + start = time.perf_counter() + tracker = ChangelogTracker(stable, current, "azure-mgmt-network") + tracker.run_checks() + elapsed = time.perf_counter() - start + + assert elapsed < 5.0, f"ChangelogTracker run took {elapsed:.3f}s (expected < 5s)" + assert len(tracker.features_added) > 0 + + def test_parameter_ordering_check_large(self): + """Benchmark check_parameter_ordering with many modules/classes/methods.""" + stable, current = _generate_large_stable_current( + num_modules=10, + num_classes_per_module=50, + num_methods_per_class=10, + num_params_per_method=5, + ) + + tracker = BreakingChangesTracker(stable, current, "azure-mgmt-network") + + start = time.perf_counter() + tracker.check_parameter_ordering() + elapsed = time.perf_counter() - start + + # 10 modules * 50 classes * 10 methods = 5000 method comparisons + assert elapsed < 2.0, f"check_parameter_ordering took {elapsed:.3f}s (expected < 2s)" + assert len(tracker.breaking_changes) == 0 + + +class TestASTCachePerformance: + """Test performance of the AST caching mechanism.""" + + def test_ast_cache_avoids_redundant_parsing(self): + """Verify that the AST cache is populated and reused.""" + from breaking_changes_checker.detect_breaking_changes import ( + _ast_cache, _get_parsed_module, _find_class_node + ) + import tempfile + import os + + # Create a temporary Python file with multiple classes + with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f: + f.write("class Foo:\n x: int = 0\n\nclass Bar:\n y: str = ''\n\nclass Baz:\n z: float = 0.0\n") + temp_path = f.name + + try: + _ast_cache.clear() + + # First call should parse and cache + module1 = _get_parsed_module(temp_path) + assert temp_path in _ast_cache + + # Second call should return cached result (same object) + module2 = _get_parsed_module(temp_path) + assert module1 is module2 + + # Find multiple classes from the same cached module + foo_node = _find_class_node(module1, "Foo") + assert foo_node is not None + assert foo_node.name == "Foo" + + bar_node = _find_class_node(module1, "Bar") + assert bar_node is not None + assert bar_node.name == "Bar" + + baz_node = _find_class_node(module1, "Baz") + assert baz_node is not None + assert baz_node.name == "Baz" + finally: + os.unlink(temp_path) + _ast_cache.clear() + + def test_ast_cache_performance_many_lookups(self): + """Benchmark AST cache with many lookups to the same file. + + Simulates the scenario where many classes in a large package + share the same source file (e.g., _models.py in azure-mgmt-network). + """ + from breaking_changes_checker.detect_breaking_changes import ( + _ast_cache, _get_parsed_module + ) + import tempfile + import os + + # Create a temporary file with many classes (simulating a large _models.py) + num_classes = 200 + lines = [] + for i in range(num_classes): + lines.append(f"class Model{i}:\n attr_{i}: int = {i}\n\n") + + with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f: + f.write("".join(lines)) + temp_path = f.name + + try: + _ast_cache.clear() + + # Without caching, 200 lookups would mean 200 file reads + 200 AST parses + # With caching, it's 1 file read + 1 AST parse + 200 cache hits + start = time.perf_counter() + for i in range(num_classes): + module = _get_parsed_module(temp_path) + elapsed = time.perf_counter() - start + + # Should be nearly instantaneous since all after the first are cache hits + assert elapsed < 0.1, f"{num_classes} cached lookups took {elapsed:.3f}s (expected < 0.1s)" + assert len(_ast_cache) == 1 # Only one file cached + finally: + os.unlink(temp_path) + _ast_cache.clear() + + +class TestClassTreeAnalyzerEarlyExit: + """Test that ClassTreeAnalyzer exits early after finding the target class.""" + + def test_early_exit_finds_class(self): + """Verify early exit still finds the class correctly.""" + from breaking_changes_checker.detect_breaking_changes import _find_class_node + import ast + + source = """ +class First: + pass + +class Target: + x: int = 0 + +class Third: + pass +""" + module = ast.parse(source) + cls_node = _find_class_node(module, "Target") + assert cls_node is not None + assert cls_node.name == "Target" + + def test_early_exit_returns_none_for_missing(self): + """Verify that early exit correctly returns None when class is not found.""" + from breaking_changes_checker.detect_breaking_changes import _find_class_node + import ast + + source = """ +class First: + pass + +class Second: + pass +""" + module = ast.parse(source) + cls_node = _find_class_node(module, "NonExistent") + assert cls_node is None + + def test_early_exit_performance(self): + """Benchmark early exit vs full traversal for large source files. + + With early exit, finding a class near the top of a large file should + be significantly faster than traversing the entire AST. + """ + from breaking_changes_checker.detect_breaking_changes import _find_class_node + import ast + + # Generate a large source file with many classes + num_classes = 500 + lines = ["class TargetClass:\n x: int = 0\n\n"] # Target is first + for i in range(num_classes): + lines.append(f"class OtherClass{i}:\n attr: int = {i}\n\n") + source = "".join(lines) + module = ast.parse(source) + + # Find the first class (should benefit from early exit) + start = time.perf_counter() + for _ in range(1000): + _find_class_node(module, "TargetClass") + elapsed_first = time.perf_counter() - start + + # Find the last class (no benefit from early exit since it's at the end) + start = time.perf_counter() + for _ in range(1000): + cls_node = _find_class_node(module, f"OtherClass{num_classes - 1}") + elapsed_last = time.perf_counter() - start + + assert cls_node is not None + + # First class lookup should be significantly faster due to early exit + # (at least 2x faster since TargetClass is at position 0 of 500) + assert elapsed_first < elapsed_last, ( + f"Early exit not effective: first={elapsed_first:.4f}s, last={elapsed_last:.4f}s" + ) From 3901ba43d20248cd19f22b9bb1afceb3f26c083d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Mar 2026 08:03:21 +0000 Subject: [PATCH 2/6] Address code review feedback: improve docstrings, type hints, and hashable handling Co-authored-by: msyyc <70930885+msyyc@users.noreply.github.com> Agent-Logs-Url: https://github.com/Azure/azure-sdk-for-python/sessions/328bed23-3c8d-44f6-bea0-a24571d01b8d --- .../breaking_changes_tracker.py | 8 +++++++- .../detect_breaking_changes.py | 11 +++++++++-- .../tests/test_performance.py | 11 ++++++++++- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/scripts/breaking_changes_checker/breaking_changes_tracker.py b/scripts/breaking_changes_checker/breaking_changes_tracker.py index 3382850750e5..d5f8ccfbf89a 100644 --- a/scripts/breaking_changes_checker/breaking_changes_tracker.py +++ b/scripts/breaking_changes_checker/breaking_changes_tracker.py @@ -129,9 +129,15 @@ def run_post_processing(self) -> None: # Remove duplicate reporting of changes that apply to both sync and async package components def run_async_cleanup(self, changes_list: List) -> None: def _make_hashable(item): - """Convert an item to a hashable type (lists become tuples).""" + """Convert an item to a hashable type. + + Lists become tuples, dicts become frozensets of items. + Other types are returned as-is (assumed hashable). + """ if isinstance(item, list): return tuple(_make_hashable(i) for i in item) + if isinstance(item, dict): + return frozenset((_make_hashable(k), _make_hashable(v)) for k, v in item.items()) return item def _make_key(bc): diff --git a/scripts/breaking_changes_checker/detect_breaking_changes.py b/scripts/breaking_changes_checker/detect_breaking_changes.py index 3c48282d2646..79fb556ac7dc 100644 --- a/scripts/breaking_changes_checker/detect_breaking_changes.py +++ b/scripts/breaking_changes_checker/detect_breaking_changes.py @@ -37,6 +37,13 @@ class _ClassNodeFound(Exception): class ClassTreeAnalyzer(ast.NodeVisitor): + """AST visitor that locates a ClassDef node by name. + + Uses ``_ClassNodeFound`` exception to short-circuit the traversal once + the target class is found, since ``ast.NodeVisitor`` does not provide a + built-in early exit mechanism. + """ + def __init__(self, name: str) -> None: self.name = name self.cls_node = None @@ -62,8 +69,8 @@ def _get_parsed_module(path: str) -> ast.Module: return _ast_cache[path] -def _find_class_node(module: ast.Module, class_name: str) -> ast.ClassDef: - """Find and return the AST ClassDef node for the given class name.""" +def _find_class_node(module: ast.Module, class_name: str) -> Optional[ast.ClassDef]: + """Find and return the AST ClassDef node for the given class name, or None.""" analyzer = ClassTreeAnalyzer(class_name) try: analyzer.visit(module) diff --git a/scripts/breaking_changes_checker/tests/test_performance.py b/scripts/breaking_changes_checker/tests/test_performance.py index ff79a5ecf655..45bbe3acfaa9 100644 --- a/scripts/breaking_changes_checker/tests/test_performance.py +++ b/scripts/breaking_changes_checker/tests/test_performance.py @@ -298,7 +298,16 @@ def test_ast_cache_avoids_redundant_parsing(self): # Create a temporary Python file with multiple classes with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f: - f.write("class Foo:\n x: int = 0\n\nclass Bar:\n y: str = ''\n\nclass Baz:\n z: float = 0.0\n") + f.write( + "class Foo:\n" + " x: int = 0\n" + "\n" + "class Bar:\n" + " y: str = ''\n" + "\n" + "class Baz:\n" + " z: float = 0.0\n" + ) temp_path = f.name try: From 925e08372b64dd4f0a2b17d04bbcead653c24fb0 Mon Sep 17 00:00:00 2001 From: Yuchao Yan Date: Fri, 27 Mar 2026 15:01:06 +0800 Subject: [PATCH 3/6] Update scripts/breaking_changes_checker/tests/test_performance.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- scripts/breaking_changes_checker/tests/test_performance.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/breaking_changes_checker/tests/test_performance.py b/scripts/breaking_changes_checker/tests/test_performance.py index 45bbe3acfaa9..01efe79f6578 100644 --- a/scripts/breaking_changes_checker/tests/test_performance.py +++ b/scripts/breaking_changes_checker/tests/test_performance.py @@ -16,7 +16,6 @@ """ import time -import pytest from breaking_changes_checker.breaking_changes_tracker import BreakingChangesTracker, BreakingChangeType from breaking_changes_checker.changelog_tracker import ChangelogTracker From de7e11596ab9fbdb688970bd169b8e83bb3db91c Mon Sep 17 00:00:00 2001 From: Yuchao Yan Date: Fri, 27 Mar 2026 15:01:28 +0800 Subject: [PATCH 4/6] Update scripts/breaking_changes_checker/tests/test_performance.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- scripts/breaking_changes_checker/tests/test_performance.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/breaking_changes_checker/tests/test_performance.py b/scripts/breaking_changes_checker/tests/test_performance.py index 01efe79f6578..469d27111173 100644 --- a/scripts/breaking_changes_checker/tests/test_performance.py +++ b/scripts/breaking_changes_checker/tests/test_performance.py @@ -207,7 +207,9 @@ def test_full_checker_run_large_unchanged_api(self): """Benchmark a full checker run with a large unchanged API surface. When stable and current are identical, jsondiff produces an empty diff, - so the checker should complete almost instantly regardless of API surface size. + so diff-based checks do minimal work. However, run_checks() still executes + checks like parameter ordering across the full API surface. This test + ensures the overall run remains fast and bounded even for a large, unchanged API. """ stable, current = _generate_large_stable_current( num_modules=10, From 046ba0cbd01314d7e159afd0b210ca54b31da604 Mon Sep 17 00:00:00 2001 From: Yuchao Yan Date: Fri, 27 Mar 2026 15:19:32 +0800 Subject: [PATCH 5/6] update changelog level --- .../detect_breaking_changes.py | 189 +++++++++--------- 1 file changed, 93 insertions(+), 96 deletions(-) diff --git a/scripts/breaking_changes_checker/detect_breaking_changes.py b/scripts/breaking_changes_checker/detect_breaking_changes.py index 79fb556ac7dc..8c058cd5c606 100644 --- a/scripts/breaking_changes_checker/detect_breaking_changes.py +++ b/scripts/breaking_changes_checker/detect_breaking_changes.py @@ -33,6 +33,7 @@ class _ClassNodeFound(Exception): """Raised to short-circuit AST traversal when the target class is found.""" + pass @@ -97,9 +98,7 @@ def test_find_modules(pkg_root_path: str) -> Dict: # Add current path as module name if _init.py is present if "__init__.py" in files: - module_name = os.path.relpath(root, pkg_root_path).replace( - os.path.sep, "." - ) + module_name = os.path.relpath(root, pkg_root_path).replace(os.path.sep, ".") modules[module_name] = [] for f in files: if f.endswith(".py"): @@ -179,22 +178,17 @@ def get_property_names(node: ast.AST, attribute_names: Dict) -> None: if hasattr(assign, "target"): if hasattr(assign.target, "attr") and not assign.target.attr.startswith("_"): attr = assign.target - attribute_names.update({attr.attr: { - "attr_type": get_property_type(assign) - }}) + attribute_names.update({attr.attr: {"attr_type": get_property_type(assign)}}) if hasattr(assign, "targets"): for target in assign.targets: if hasattr(target, "attr") and not target.attr.startswith("_"): - attribute_names.update({target.attr: { - "attr_type": get_property_type(assign) - }}) + attribute_names.update({target.attr: {"attr_type": get_property_type(assign)}}) def check_base_classes(cls_node: ast.ClassDef) -> bool: should_look = False init_node = [ - node for node in cls_node.body - if isinstance(node, ast.FunctionDef) and node.name.startswith("__init__") + node for node in cls_node.body if isinstance(node, ast.FunctionDef) and node.name.startswith("__init__") ] if init_node: if hasattr(init_node, "body"): @@ -231,7 +225,7 @@ def get_properties(cls: Type) -> Dict: path = inspect.getsourcefile(base_class) module = _get_parsed_module(path) except (TypeError, SyntaxError): - _LOGGER.info(f"Unable to create ast of {base_class}") + _LOGGER.debug(f"Unable to create ast of {base_class}") continue # was a built-in, e.g. "object", Exception, or a Model from msrest fails here cls_node = _find_class_node(module, base_class.__name__) @@ -239,7 +233,7 @@ def get_properties(cls: Type) -> Dict: get_property_names(cls_node, attribute_names) else: # Abstract base classes fail here, e.g. "collections.abc.MuttableMapping" - _LOGGER.info(f"Unable to get class node for {base_class.__name__}. Skipping...") + _LOGGER.debug(f"Unable to get class node for {base_class.__name__}. Skipping...") else: get_property_names(cls_node, attribute_names) return attribute_names @@ -247,10 +241,7 @@ def get_properties(cls: Type) -> Dict: def create_function_report(f: Callable, is_async: bool = False) -> Dict: function = inspect.signature(f) - func_obj = { - "parameters": {}, - "is_async": is_async - } + func_obj = {"parameters": {}, "is_async": is_async} for par in function.parameters.values(): default_value = get_parameter_default(par) @@ -308,19 +299,27 @@ def create_parameters(args: ast.arg) -> Dict: if hasattr(args, "posonlyargs"): for arg in args.posonlyargs: # Initialize the function parameters - params.update({arg.arg: { - "type": get_parameter_type(arg.annotation), - "default": None, - "param_type": "positional_only" - }}) + params.update( + { + arg.arg: { + "type": get_parameter_type(arg.annotation), + "default": None, + "param_type": "positional_only", + } + } + ) if hasattr(args, "args"): for arg in args.args: # Initialize the function parameters - params.update({arg.arg: { - "type": get_parameter_type(arg.annotation), - "default": None, - "param_type": "positional_or_keyword" - }}) + params.update( + { + arg.arg: { + "type": get_parameter_type(arg.annotation), + "default": None, + "param_type": "positional_or_keyword", + } + } + ) # Range through the corresponding default values all_args = args.posonlyargs + args.args positional_defaults = [None] * (len(all_args) - len(args.defaults)) + args.defaults @@ -328,26 +327,27 @@ def create_parameters(args: ast.arg) -> Dict: params[arg.arg]["default"] = get_parameter_default_ast(default) if hasattr(args, "vararg"): if args.vararg: - params.update({args.vararg.arg: { - "type": get_parameter_type(args.vararg.annotation), - "default": None, - "param_type": "var_positional" - }}) + params.update( + { + args.vararg.arg: { + "type": get_parameter_type(args.vararg.annotation), + "default": None, + "param_type": "var_positional", + } + } + ) if hasattr(args, "kwonlyargs"): for arg in args.kwonlyargs: # Initialize the function parameters - params.update({ - arg.arg: { - "type": get_parameter_type(arg.annotation), - "default": None, - "param_type": "keyword_only" - } - }) + params.update( + {arg.arg: {"type": get_parameter_type(arg.annotation), "default": None, "param_type": "keyword_only"}} + ) # Range through the corresponding default values for i in range(len(args.kwonlyargs) - len(args.kw_defaults), len(args.kwonlyargs)): params[args.kwonlyargs[i].arg]["default"] = get_parameter_default_ast(args.kw_defaults[i]) return params + def get_overloads(cls: Type, cls_methods: Dict): path = inspect.getsourcefile(cls) module = _get_parsed_module(path) @@ -361,7 +361,7 @@ def get_overloads(cls: Type, cls_methods: Dict): path = inspect.getsourcefile(base_class) module = _get_parsed_module(path) except (TypeError, SyntaxError): - _LOGGER.info(f"Unable to create ast of {base_class}") + _LOGGER.debug(f"Unable to create ast of {base_class}") continue # was a built-in, e.g. "object", Exception, or a Model from msrest fails here cls_node = _find_class_node(module, base_class.__name__) @@ -369,14 +369,16 @@ def get_overloads(cls: Type, cls_methods: Dict): get_overload_data(cls_node, cls_methods) else: # Abstract base classes fail here, e.g. "collections.abc.MuttableMapping" - _LOGGER.info(f"Unable to get class node for {base_class.__name__}. Skipping...") + _LOGGER.debug(f"Unable to get class node for {base_class.__name__}. Skipping...") else: get_overload_data(cls_node, cls_methods) def get_overload_data(node: ast.ClassDef, cls_methods: Dict) -> None: func_nodes = [node for node in node.body if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))] - public_func_nodes = [func for func in func_nodes if not func.name.startswith("_") or func.name.startswith("__init__")] + public_func_nodes = [ + func for func in func_nodes if not func.name.startswith("_") or func.name.startswith("__init__") + ] # Check for method overloads on a class for func in public_func_nodes: if func.name not in cls_methods: @@ -393,7 +395,7 @@ def get_overload_data(node: ast.ClassDef, cls_methods: Dict) -> None: overload_report = { "parameters": create_parameters(func.args), "is_async": is_async, - "return_type": None + "return_type": None, } cls_methods[func.name]["overloads"].append(overload_report) @@ -422,7 +424,7 @@ def create_class_report(cls: Type) -> Dict: except AttributeError: _LOGGER.info(f"Skipping method check for {method} on {cls}.") continue - + if inspect.isfunction(m) or inspect.ismethod(m): if inspect.iscoroutinefunction(m): async_func = True @@ -464,7 +466,9 @@ def build_library_report(target_module: str) -> Dict: return public_api -def test_compare_reports(pkg_dir: str, changelog: bool, source_report: str = "stable.json", target_report: str = "current.json") -> None: +def test_compare_reports( + pkg_dir: str, changelog: bool, source_report: str = "stable.json", target_report: str = "current.json" +) -> None: package_name = os.path.basename(pkg_dir) # Preserve the original argument values so we can decide later whether cleanup is safe. @@ -489,18 +493,18 @@ def test_compare_reports(pkg_dir: str, changelog: bool, source_report: str = "st stable, current, package_name, - checkers = CHECKERS, - ignore = IGNORE_BREAKING_CHANGES, - post_processing_checkers = POST_PROCESSING_CHECKERS + checkers=CHECKERS, + ignore=IGNORE_BREAKING_CHANGES, + post_processing_checkers=POST_PROCESSING_CHECKERS, ) if changelog: checker = ChangelogTracker( stable, current, package_name, - checkers = CHECKERS, - ignore = IGNORE_BREAKING_CHANGES, - post_processing_checkers = POST_PROCESSING_CHECKERS, + checkers=CHECKERS, + ignore=IGNORE_BREAKING_CHANGES, + post_processing_checkers=POST_PROCESSING_CHECKERS, ) checker.run_checks() @@ -531,7 +535,7 @@ def remove_json_files(pkg_dir: str) -> None: def report_azure_mgmt_versioned_module(code_report): - + def parse_module_name(module): split_module = module.split(".") # Azure mgmt packages are typically in the form of: azure.mgmt. @@ -555,17 +559,17 @@ def parse_module_name(module): def main( - package_name: str, - target_module: str, - version: str, - in_venv: Union[bool, str], - pkg_dir: str, - changelog: bool, - code_report: bool, - latest_pypi_version: bool, - source_report: Optional[Path], - target_report: Optional[Path] - ): + package_name: str, + target_module: str, + version: str, + in_venv: Union[bool, str], + pkg_dir: str, + changelog: bool, + code_report: bool, + latest_pypi_version: bool, + source_report: Optional[Path], + target_report: Optional[Path], +): # If code_report is set, only generate a code report for the package and return if code_report: public_api = build_library_report(target_module) @@ -583,6 +587,7 @@ def main( if not version: from pypi_tools.pypi import PyPIClient + client = PyPIClient() try: @@ -601,28 +606,10 @@ def main( packages = [f"{package_name}=={version}", "jsondiff==1.2.0"] with create_venv_with_package(packages) as venv: subprocess.check_call( - [ - venv.env_exe, - "-m", - "pip", - "install", - "-r", - os.path.join(pkg_dir, "dev_requirements.txt") - ] + [venv.env_exe, "-m", "pip", "install", "-r", os.path.join(pkg_dir, "dev_requirements.txt")] ) _LOGGER.info(f"Installed version {version} of {package_name} in a venv") - args = [ - venv.env_exe, - __file__, - "-t", - package_name, - "-m", - target_module, - "--in-venv", - "true", - "-s", - version - ] + args = [venv.env_exe, __file__, "-t", package_name, "-m", target_module, "--in-venv", "true", "-s", version] try: subprocess.check_call(args) except subprocess.CalledProcessError: @@ -650,9 +637,7 @@ def main( if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Run breaking changes checks against target folder." - ) + parser = argparse.ArgumentParser(description="Run breaking changes checks against target folder.") parser.add_argument( "-t", @@ -670,11 +655,7 @@ def main( ) parser.add_argument( - "-v", - "--in-venv", - dest="in_venv", - help="Check if we are in the newly created venv.", - default=False + "-v", "--in-venv", dest="in_venv", help="Check if we are in the newly created venv.", default=False ) parser.add_argument( @@ -682,7 +663,7 @@ def main( "--stable_version", dest="stable_version", help="The stable version of the target package, if it exists on PyPi.", - default=None + default=None, ) parser.add_argument( @@ -736,13 +717,18 @@ def main( # We dont need to block for code report generation if not args.code_report: - if package_name not in RUN_BREAKING_CHANGES_PACKAGES and not any(bool(re.findall(p, package_name)) for p in RUN_BREAKING_CHANGES_PACKAGES): - _LOGGER.info(f"{package_name} opted out of breaking changes checks. " - f"See http://aka.ms/azsdk/breaking-changes-tool to opt-in.") + if package_name not in RUN_BREAKING_CHANGES_PACKAGES and not any( + bool(re.findall(p, package_name)) for p in RUN_BREAKING_CHANGES_PACKAGES + ): + _LOGGER.info( + f"{package_name} opted out of breaking changes checks. " + f"See http://aka.ms/azsdk/breaking-changes-tool to opt-in." + ) exit(0) if not target_module and not (args.source_report and args.target_report): from ci_tools.parsing import ParsedSetup + pkg_details = ParsedSetup.from_path(pkg_dir) target_module = pkg_details.namespace @@ -755,4 +741,15 @@ def main( _LOGGER.exception("If providing the `--target-report` flag, the `--source-report` flag is also required.") exit(1) - main(package_name, target_module, stable_version, in_venv, pkg_dir, changelog, args.code_report, args.latest_pypi_version, args.source_report, args.target_report) + main( + package_name, + target_module, + stable_version, + in_venv, + pkg_dir, + changelog, + args.code_report, + args.latest_pypi_version, + args.source_report, + args.target_report, + ) From 14fa0f76a8ba23e11369c12bbca4e4208fc307f3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 30 Mar 2026 02:25:22 +0000 Subject: [PATCH 6/6] Restore helpful comment in get_reportable_changes() suppression check Agent-Logs-Url: https://github.com/Azure/azure-sdk-for-python/sessions/5a023435-d425-4ad8-ad50-803c576d2fa8 Co-authored-by: msyyc <70930885+msyyc@users.noreply.github.com> --- scripts/breaking_changes_checker/breaking_changes_tracker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/breaking_changes_checker/breaking_changes_tracker.py b/scripts/breaking_changes_checker/breaking_changes_tracker.py index d5f8ccfbf89a..cc6dc369b2fb 100644 --- a/scripts/breaking_changes_checker/breaking_changes_tracker.py +++ b/scripts/breaking_changes_checker/breaking_changes_tracker.py @@ -687,6 +687,7 @@ def get_reportable_changes(self, ignore_changes: Dict, changes_list: List) -> Li should_keep = True for suppression in suppressions: if suppression.parameter_or_property_name is not None: + # If the ignore rule is for a property or parameter, we should check up to that level on the original change if self.match((bc_type, module_name, class_name, function_name, parameter_name), suppression): should_keep = False break