Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 33 additions & 23 deletions lib/expressir/express/remark_attacher.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,17 @@ module Express
# 2. Proximity-based matching for simple tags
# 3. NOT creating spurious schema-level items for ambiguous tags
class RemarkAttacher
# Child collection attributes to walk when traversing model elements.
# These are the named collections on model elements (e.g., entity.attributes,
# schema.entities, function.parameters). Extracted to a constant to avoid
# duplication between calculate_children_end_line and collect_children.
CHILD_COLLECTION_ATTRIBUTES = %i[
schemas types entities functions procedures rules constants
attributes derived_attributes inverse_attributes
where_rules unique_rules informal_propositions
parameters variables statements items remark_items
].freeze

def initialize(source)
@source = source
@attached_spans = Set.new
Expand All @@ -23,8 +34,14 @@ def initialize(source)
def attach(model)
@model = model
remarks = extract_all_remarks
attach_tagged_remarks(model, remarks)
attach_untagged_remarks(model, remarks)

# Build nodes_with_positions ONCE for both tagged and untagged remark passes.
# This avoids double tree walk (381K nodes × 2 = 762K visits) which was
# the largest memory overhead in remark attachment (~430MB for large files).
nodes_with_positions = build_sorted_nodes_with_positions(model)

attach_tagged_remarks(model, remarks, nodes_with_positions)
attach_untagged_remarks(remarks, nodes_with_positions)

# Free expensive data structures after attachment is complete.
# These are only needed during the attach process.
Expand Down Expand Up @@ -133,7 +150,7 @@ def get_line_number(position)

alias get_line_number_from_offset get_line_number

def attach_tagged_remarks(model, remarks)
def attach_tagged_remarks(model, remarks, nodes_with_positions)
tagged = remarks.select { |r| r[:tag] }
return if tagged.empty?

Expand All @@ -143,12 +160,6 @@ def attach_tagged_remarks(model, remarks)
# This is the key optimization that makes scope lookup O(1) per remark
@scope_map ||= build_scope_map

# Collect nodes with positions for position-based fallback
nodes_with_positions = []
collect_nodes_with_positions(model, nodes_with_positions)
# Use stable sort to ensure deterministic ordering across Ruby versions
nodes_with_positions.sort_by!.with_index { |n, i| [n[:position] || Float::INFINITY, i] }

tagged.sort_by { |r| r[:position] }.each do |remark|
next if @attached_spans.include?(remark[:position])

Expand Down Expand Up @@ -917,15 +928,10 @@ def create_remark_item(parent, id)
item
end

def attach_untagged_remarks(model, remarks)
def attach_untagged_remarks(remarks, nodes_with_positions)
untagged = remarks.reject { |r| r[:tag] }
return unless untagged.any?

nodes_with_positions = []
collect_nodes_with_positions(model, nodes_with_positions)
# Use stable sort to preserve original order for equal keys
nodes_with_positions.sort_by!.with_index { |n, i| [n[:position] || Float::INFINITY, i] }

untagged.each do |remark|
next if @attached_spans.include?(remark[:position])

Expand Down Expand Up @@ -1092,10 +1098,7 @@ def calculate_children_end_line(node)
end

# Check specific child collections
%i[schemas types entities functions procedures rules constants
attributes derived_attributes inverse_attributes
where_rules unique_rules informal_propositions
parameters variables statements items remark_items].each do |attr|
CHILD_COLLECTION_ATTRIBUTES.each do |attr|
collection = safe_get_collection(node, attr)
collection&.each do |child|
if child.is_a?(Model::ModelElement) && child.source_offset && child.source
Expand All @@ -1114,17 +1117,24 @@ def collect_children(node, result, visited)
collect_nodes_with_positions(child, result, visited)
end

%i[schemas types entities functions procedures rules constants
attributes derived_attributes inverse_attributes
where_rules unique_rules informal_propositions
parameters variables statements items remark_items].each do |attr|
CHILD_COLLECTION_ATTRIBUTES.each do |attr|
collection = safe_get_collection(node, attr)
collection&.each do |item|
collect_nodes_with_positions(item, result, visited)
end
end
end

# Build sorted nodes_with_positions ONCE for both tagged and untagged remark passes.
# This merges the two separate tree walks into one, cutting node visits in half.
def build_sorted_nodes_with_positions(model)
nodes_with_positions = []
collect_nodes_with_positions(model, nodes_with_positions)
# Stable sort: nil positions last, ties broken by insertion order
nodes_with_positions.sort_by!.with_index { |n, i| [n[:position] || Float::INFINITY, i] }
nodes_with_positions
end

def find_nearest_node(remark, nodes)
remark_line = remark[:line]

Expand Down
Loading