diff --git a/lib/expressir/express/remark_attacher.rb b/lib/expressir/express/remark_attacher.rb index a5934192..4e62c321 100644 --- a/lib/expressir/express/remark_attacher.rb +++ b/lib/expressir/express/remark_attacher.rb @@ -11,6 +11,17 @@ module Express # 2. Proximity-based matching for simple tags # 3. NOT creating spurious schema-level items for ambiguous tags class RemarkAttacher + # Child collection attributes to walk when traversing model elements. + # These are the named collections on model elements (e.g., entity.attributes, + # schema.entities, function.parameters). Extracted to a constant to avoid + # duplication between calculate_children_end_line and collect_children. + CHILD_COLLECTION_ATTRIBUTES = %i[ + schemas types entities functions procedures rules constants + attributes derived_attributes inverse_attributes + where_rules unique_rules informal_propositions + parameters variables statements items remark_items + ].freeze + def initialize(source) @source = source @attached_spans = Set.new @@ -23,8 +34,14 @@ def initialize(source) def attach(model) @model = model remarks = extract_all_remarks - attach_tagged_remarks(model, remarks) - attach_untagged_remarks(model, remarks) + + # Build nodes_with_positions ONCE for both tagged and untagged remark passes. + # This avoids double tree walk (381K nodes × 2 = 762K visits) which was + # the largest memory overhead in remark attachment (~430MB for large files). + nodes_with_positions = build_sorted_nodes_with_positions(model) + + attach_tagged_remarks(model, remarks, nodes_with_positions) + attach_untagged_remarks(remarks, nodes_with_positions) # Free expensive data structures after attachment is complete. # These are only needed during the attach process. @@ -133,7 +150,7 @@ def get_line_number(position) alias get_line_number_from_offset get_line_number - def attach_tagged_remarks(model, remarks) + def attach_tagged_remarks(model, remarks, nodes_with_positions) tagged = remarks.select { |r| r[:tag] } return if tagged.empty? @@ -143,12 +160,6 @@ def attach_tagged_remarks(model, remarks) # This is the key optimization that makes scope lookup O(1) per remark @scope_map ||= build_scope_map - # Collect nodes with positions for position-based fallback - nodes_with_positions = [] - collect_nodes_with_positions(model, nodes_with_positions) - # Use stable sort to ensure deterministic ordering across Ruby versions - nodes_with_positions.sort_by!.with_index { |n, i| [n[:position] || Float::INFINITY, i] } - tagged.sort_by { |r| r[:position] }.each do |remark| next if @attached_spans.include?(remark[:position]) @@ -917,15 +928,10 @@ def create_remark_item(parent, id) item end - def attach_untagged_remarks(model, remarks) + def attach_untagged_remarks(remarks, nodes_with_positions) untagged = remarks.reject { |r| r[:tag] } return unless untagged.any? - nodes_with_positions = [] - collect_nodes_with_positions(model, nodes_with_positions) - # Use stable sort to preserve original order for equal keys - nodes_with_positions.sort_by!.with_index { |n, i| [n[:position] || Float::INFINITY, i] } - untagged.each do |remark| next if @attached_spans.include?(remark[:position]) @@ -1092,10 +1098,7 @@ def calculate_children_end_line(node) end # Check specific child collections - %i[schemas types entities functions procedures rules constants - attributes derived_attributes inverse_attributes - where_rules unique_rules informal_propositions - parameters variables statements items remark_items].each do |attr| + CHILD_COLLECTION_ATTRIBUTES.each do |attr| collection = safe_get_collection(node, attr) collection&.each do |child| if child.is_a?(Model::ModelElement) && child.source_offset && child.source @@ -1114,10 +1117,7 @@ def collect_children(node, result, visited) collect_nodes_with_positions(child, result, visited) end - %i[schemas types entities functions procedures rules constants - attributes derived_attributes inverse_attributes - where_rules unique_rules informal_propositions - parameters variables statements items remark_items].each do |attr| + CHILD_COLLECTION_ATTRIBUTES.each do |attr| collection = safe_get_collection(node, attr) collection&.each do |item| collect_nodes_with_positions(item, result, visited) @@ -1125,6 +1125,16 @@ def collect_children(node, result, visited) end end + # Build sorted nodes_with_positions ONCE for both tagged and untagged remark passes. + # This merges the two separate tree walks into one, cutting node visits in half. + def build_sorted_nodes_with_positions(model) + nodes_with_positions = [] + collect_nodes_with_positions(model, nodes_with_positions) + # Stable sort: nil positions last, ties broken by insertion order + nodes_with_positions.sort_by!.with_index { |n, i| [n[:position] || Float::INFINITY, i] } + nodes_with_positions + end + def find_nearest_node(remark, nodes) remark_line = remark[:line]