rapx/analysis/core/alias_analysis/mfp/interproc.rs
1/// Interprocedural analysis utilities
2extern crate rustc_mir_dataflow;
3use rustc_hir::def_id::DefId;
4use rustc_middle::mir::{Body, TerminatorKind};
5use rustc_mir_dataflow::ResultsCursor;
6use std::collections::HashSet;
7
8use super::super::{AliasPair, FnAliasPairs};
9use super::intraproc::{FnAliasAnalyzer, PlaceId};
10
11/// Extract root local and field path from a PlaceId
12/// Returns (root_local, field_path)
13fn extract_fields(place: &PlaceId) -> (usize, Vec<usize>) {
14 let mut fields = Vec::new();
15 let mut current = place;
16
17 // Traverse from leaf to root, collecting field indices
18 loop {
19 match current {
20 PlaceId::Local(idx) => return (*idx, fields),
21 PlaceId::Field { base, field_idx } => {
22 fields.push(*field_idx);
23 current = base;
24 }
25 }
26 }
27}
28
29/// Extract only the field path from a PlaceId (excluding the root local)
30/// Returns field indices in order from root to leaf
31/// Example: _15.0.1 returns [0, 1]
32fn extract_field_path(place: &PlaceId) -> Vec<usize> {
33 let mut fields = Vec::new();
34 let mut current = place;
35
36 // Traverse from leaf to root
37 loop {
38 match current {
39 PlaceId::Local(_) => {
40 fields.reverse(); // Reverse to get root-to-leaf order
41 return fields;
42 }
43 PlaceId::Field { base, field_idx } => {
44 fields.push(*field_idx);
45 current = base;
46 }
47 }
48 }
49}
50
51/// Check if one field path is a proper prefix of another
52/// Returns true if `prefix` is a prefix of `full`
53///
54/// Examples:
55/// - is_field_prefix([], [0]) = true (parent-child)
56/// - is_field_prefix([0], [0, 1]) = true (parent-child)
57/// - is_field_prefix([0], [1]) = false (siblings, not prefix)
58/// - is_field_prefix([0, 1], [0, 2]) = false (different fields at same level)
59fn is_field_prefix(prefix: &[usize], full: &[usize]) -> bool {
60 if prefix.len() > full.len() {
61 return false;
62 }
63 prefix == &full[..prefix.len()]
64}
65
66/// Extract function summary from analysis results
67///
68/// This function uses transitive closure to identify all aliases related to
69/// function parameters and return values, including those connected through
70/// temporary variables.
71pub fn extract_summary<'tcx>(
72 results: &mut ResultsCursor<'_, 'tcx, FnAliasAnalyzer<'tcx>>,
73 body: &Body<'tcx>,
74 _def_id: DefId,
75) -> FnAliasPairs {
76 let arg_count = body.arg_count;
77 let mut summary = FnAliasPairs::new(arg_count);
78
79 // Find all Return terminators and extract aliases at those points
80 for (block_id, block_data) in body.basic_blocks.iter_enumerated() {
81 if let Some(terminator) = &block_data.terminator {
82 if matches!(terminator.kind, TerminatorKind::Return) {
83 // Seek to the end of this block (before the terminator)
84 results.seek_to_block_end(block_id);
85
86 let state = results.get();
87 let analyzer = results.analysis();
88 let place_info = analyzer.place_info();
89
90 // Step 1: Collect all alias pairs at this return point
91 // We need to examine all aliases, not just those directly involving args/return
92 let mut all_pairs = Vec::new();
93 for (idx_i, idx_j) in state.get_all_alias_pairs() {
94 if let (Some(place_i), Some(place_j)) =
95 (place_info.get_place(idx_i), place_info.get_place(idx_j))
96 {
97 all_pairs.push((idx_i, idx_j, place_i, place_j));
98 }
99 }
100
101 // Step 2: Initialize relevant_places with all places whose root is a parameter or return value
102 // Index 0 is return value, indices 1..=arg_count are arguments
103 let mut relevant_places = HashSet::new();
104 for idx in 0..place_info.num_places() {
105 if let Some(place) = place_info.get_place(idx) {
106 if place.root_local() <= arg_count {
107 relevant_places.insert(idx);
108 }
109 }
110 }
111
112 // Step 3: Expand relevant_places using transitive closure
113 // If a place aliases to a relevant place, it becomes relevant too
114 // This captures aliases that flow through temporary variables
115 // Example: _0 aliases _2, and _2 aliases _1.0, then _2 is relevant
116 const MAX_ITERATIONS: usize = 10;
117 for _iteration in 0..MAX_ITERATIONS {
118 let mut changed = false;
119 for &(idx_i, idx_j, _, _) in &all_pairs {
120 // If one place is relevant and the other isn't, make the other relevant
121 if relevant_places.contains(&idx_i) && !relevant_places.contains(&idx_j) {
122 relevant_places.insert(idx_j);
123 changed = true;
124 }
125 if relevant_places.contains(&idx_j) && !relevant_places.contains(&idx_i) {
126 relevant_places.insert(idx_i);
127 changed = true;
128 }
129 }
130 // Converged when no more places become relevant
131 if !changed {
132 break;
133 }
134 }
135
136 // Step 4: Collect candidate aliases from two sources
137 //
138 // We collect all potential aliases between parameters and return values into
139 // a candidate set, which will be filtered and compressed in Step 5.
140 //
141 // Two sources of candidates:
142 // 4.1: Aliases derived through bridge variables (indirect connections)
143 // 4.2: Aliases directly present in all_pairs (direct connections)
144 //
145 let mut candidate_aliases = std::collections::HashSet::new();
146
147 // Step 4.1: Derive aliases through bridge variables
148 //
149 // Problem: When analyzing complex nested structures (e.g., Vec::from_raw_parts_in),
150 // we may have aliases like:
151 // - _0.0.0.0 ≈ _15 (deeply nested field aliases with temporary variable)
152 // - _1 ≈ _15.0 (parameter aliases with field of temporary variable)
153 //
154 // Both sides are connected through the bridge variable _15, but:
155 // 1. _15 is a temporary (root > arg_count), so it won't appear in final summary
156 // 2. There's no direct alias between _0.x and _1 in all_pairs
157 // 3. Union-Find guarantees transitivity within all_pairs, but the connection
158 // happens at different structural levels (parent vs child fields)
159 //
160 // Solution: For alias pairs connected through the same bridge variable where one
161 // side references the bridge's parent and the other references a child field,
162 // we derive a direct alias between the parameter/return places, compressing
163 // deep field paths to maintain precision while avoiding temporary variables.
164 //
165 // Example derivation:
166 // Input: _0.0.0.0 ≈ _15 (place_i ≈ place_j)
167 // _1 ≈ _15.0 (place_k ≈ place_m)
168 // Check: place_j.root (15) == place_m.root (15) ✓ (same bridge)
169 // _15 is prefix of _15.0 ✓ (parent-child relation)
170 // Output: _0.0 ≈ _1 (compress _0's deep fields to first level)
171 //
172 for &(idx_i, idx_j, place_i, place_j) in &all_pairs {
173 if !relevant_places.contains(&idx_i) || !relevant_places.contains(&idx_j) {
174 continue;
175 }
176
177 for &(idx_k, idx_m, place_k, place_m) in &all_pairs {
178 if !relevant_places.contains(&idx_k) || !relevant_places.contains(&idx_m) {
179 continue;
180 }
181
182 // Check if both alias pairs share the same bridge variable (by root)
183 if place_j.root_local() != place_m.root_local() {
184 continue;
185 }
186
187 // Extract field paths for prefix checking
188 let j_fields = extract_field_path(place_j);
189 let m_fields = extract_field_path(place_m);
190
191 // Verify parent-child relationship (proper prefix, not sibling fields)
192 // E.g., [] is prefix of [0], but [0] is NOT prefix of [1]
193 if !is_field_prefix(&j_fields, &m_fields)
194 && !is_field_prefix(&m_fields, &j_fields)
195 {
196 continue;
197 }
198
199 // Extract roots and fields from both sides
200 let (root_i, mut fields_i) = extract_fields(place_i);
201 let (root_k, fields_k) = extract_fields(place_k);
202
203 // Only derive aliases between parameters and return value
204 if root_i > arg_count || root_k > arg_count {
205 continue;
206 }
207
208 // Compress deep field paths to first level only
209 // This balances precision (distinguishing struct fields) with
210 // generality (avoiding over-specification)
211 fields_i.reverse(); // extract_fields returns reversed order
212 if fields_i.len() > 1 {
213 fields_i = vec![fields_i[0]];
214 }
215
216 let mut fields_k_reversed = fields_k.clone();
217 fields_k_reversed.reverse();
218
219 // Create the derived alias and add to candidates
220 let mut alias = AliasPair::new(root_i, root_k);
221 alias.lhs_fields = fields_i;
222 alias.rhs_fields = fields_k_reversed;
223
224 candidate_aliases.insert(alias);
225 }
226 }
227
228 // Step 4.2: Direct aliases from all_pairs
229 //
230 // Collect aliases that are directly present between parameters and return values
231 // in the Union-Find analysis results. These may overlap with derived aliases from
232 // Step 4.1, but duplicates are automatically handled by the HashSet.
233 //
234 for (idx_i, idx_j, place_i, place_j) in all_pairs {
235 if relevant_places.contains(&idx_i) && relevant_places.contains(&idx_j) {
236 let (root_i, mut fields_i) = extract_fields(place_i);
237 let (root_j, mut fields_j) = extract_fields(place_j);
238
239 // Only include if both roots are parameters or return value
240 if root_i <= arg_count && root_j <= arg_count {
241 // Fields were collected from leaf to root, reverse them
242 fields_i.reverse();
243 fields_j.reverse();
244
245 // Create field-sensitive AliasPair and add to candidates
246 let mut alias = AliasPair::new(root_i, root_j);
247 alias.lhs_fields = fields_i;
248 alias.rhs_fields = fields_j;
249 candidate_aliases.insert(alias);
250 }
251 }
252 }
253
254 // Step 5: Normalize and filter redundant aliases
255 //
256 // Step 5.1: Normalize alias order for consistent comparison
257 //
258 // Alias relationships are symmetric: (A, B) ≡ (B, A)
259 // However, without normalization, (3, 0.0.0) and (0.0, 3) would be
260 // treated as having different local pairs and cannot be compared for
261 // subsumption relationships.
262 //
263 // We normalize by ensuring left_local <= right_local, which allows
264 // the filter to recognize that (0.0, 3) subsumes (0.0.0, 3) even if
265 // the latter was originally collected as (3, 0.0.0).
266 //
267 let normalized_aliases: std::collections::HashSet<_> = candidate_aliases
268 .iter()
269 .map(|alias| {
270 let mut normalized = alias.clone();
271 if normalized.left_local > normalized.right_local {
272 normalized.swap(); // Swap both locals and fields
273 }
274 normalized
275 })
276 .collect();
277
278 // Step 5.2: Filter redundant aliases
279 //
280 // The normalized candidate set may contain redundant aliases such as:
281 // - Self-aliases: (0, 0), (1, 1)
282 // - Prefix-subsumed aliases: (0.0, 1) subsumes (0.0.0, 1), (0.0.0.0, 1)
283 // - Reversed but equivalent: (3, 0.0.0) and (0.0, 3) after normalization
284 //
285 // We filter these to produce a minimal, canonical summary that retains
286 // precision while avoiding over-specification.
287 //
288 let filtered_aliases = filter_redundant_aliases(normalized_aliases);
289 for alias in filtered_aliases.clone() {
290 summary.add_alias(alias);
291 }
292 }
293 }
294 }
295
296 summary
297}
298
299/// Join two function summaries
300pub fn join_fn_summaries(summary1: &FnAliasPairs, summary2: &FnAliasPairs) -> FnAliasPairs {
301 // TODO: Implement summary join operation
302 let mut result = FnAliasPairs::new(summary1.arg_size());
303
304 // Add all aliases from both summaries
305 for alias in summary1.aliases() {
306 result.add_alias(alias.clone());
307 }
308
309 for alias in summary2.aliases() {
310 result.add_alias(alias.clone());
311 }
312
313 result
314}
315
316/// Filter redundant aliases from a candidate set
317///
318/// This function removes several types of redundant aliases to produce a minimal,
319/// canonical summary:
320///
321/// 1. **Self-aliases**: Aliases where both sides refer to the same place
322/// Example: (0, 0), (1, 1)
323///
324/// 2. **Prefix-subsumed aliases**: When one alias is strictly more general than another
325///
326/// Examples of redundancy patterns eliminated:
327///
328/// a) Single-side prefix (left more specific):
329/// - (1.0, 2) subsumes (1.0.0.0, 2) → keep (1.0, 2), remove (1.0.0.0, 2)
330/// - Rationale: If field 1.0 aliases with 2, then 1.0.0.0 (a sub-field) also aliases
331///
332/// b) Single-side prefix (right more specific):
333/// - (1.0, 2) subsumes (1.0, 2.0) → keep (1.0, 2), remove (1.0, 2.0)
334/// - Rationale: If 1.0 aliases with field 2, then 1.0 aliases with 2.0 (a sub-field)
335///
336/// c) Double-side prefix (synchronized fields):
337/// - (0, 1) subsumes (0.1, 1.1) → keep (0, 1), remove (0.1, 1.1)
338/// - Rationale: If 0 and 1 alias, their corresponding fields also alias
339/// - This handles cases like struct field synchronization
340///
341/// The filtering strategy:
342/// - For each pair of aliases with the same (left_local, right_local):
343/// - Check bidirectional subsumption (a subsumes b, or b subsumes a)
344/// - Keep the more general alias (shorter total field depth)
345/// - This ensures order-independence and catches all redundancy patterns
346///
347/// Returns: A filtered HashSet containing only non-redundant aliases
348fn filter_redundant_aliases(
349 aliases: std::collections::HashSet<AliasPair>,
350) -> std::collections::HashSet<AliasPair> {
351 use std::collections::HashSet;
352
353 let aliases_vec: Vec<_> = aliases.iter().cloned().collect();
354 let mut to_remove = HashSet::new();
355
356 for i in 0..aliases_vec.len() {
357 let alias_a = &aliases_vec[i];
358
359 // Skip if already marked for removal
360 if to_remove.contains(alias_a) {
361 continue;
362 }
363
364 // Rule 1: Remove self-aliases (same local)
365 // Example: (0, 0), (1, 1)
366 if alias_a.left_local == alias_a.right_local {
367 to_remove.insert(alias_a.clone());
368 continue;
369 }
370
371 // Rule 2: Check for prefix subsumption with other aliases
372 for j in 0..aliases_vec.len() {
373 if i == j {
374 continue;
375 }
376 let alias_b = &aliases_vec[j];
377
378 // Skip if already marked for removal
379 if to_remove.contains(alias_b) {
380 continue;
381 }
382
383 // Only compare aliases with the same locals
384 // Note: After normalization, this ensures left_local <= right_local for both,
385 // so (3, 0.0.0) normalized to (0.0.0, 3) can be compared with (0.0, 3)
386 if alias_a.left_local != alias_b.left_local
387 || alias_a.right_local != alias_b.right_local
388 {
389 continue;
390 }
391
392 // Check bidirectional subsumption relationships
393 //
394 // Subsumption means one alias is more general (has shorter field paths)
395 // than another. We check both directions:
396 // - Does a subsume b? (a is more general)
397 // - Does b subsume a? (b is more general)
398
399 // Check if a's fields subsume b's fields
400 let lhs_a_subsumes_b = is_strict_prefix(&alias_a.lhs_fields, &alias_b.lhs_fields)
401 || alias_a.lhs_fields == alias_b.lhs_fields;
402 let rhs_a_subsumes_b = is_strict_prefix(&alias_a.rhs_fields, &alias_b.rhs_fields)
403 || alias_a.rhs_fields == alias_b.rhs_fields;
404
405 // Check if b's fields subsume a's fields
406 let lhs_b_subsumes_a = is_strict_prefix(&alias_b.lhs_fields, &alias_a.lhs_fields)
407 || alias_b.lhs_fields == alias_a.lhs_fields;
408 let rhs_b_subsumes_a = is_strict_prefix(&alias_b.rhs_fields, &alias_a.rhs_fields)
409 || alias_b.rhs_fields == alias_a.rhs_fields;
410
411 // Determine if there's a subsumption relationship
412 let lhs_a_strict = is_strict_prefix(&alias_a.lhs_fields, &alias_b.lhs_fields);
413 let rhs_a_strict = is_strict_prefix(&alias_a.rhs_fields, &alias_b.rhs_fields);
414 let lhs_b_strict = is_strict_prefix(&alias_b.lhs_fields, &alias_a.lhs_fields);
415 let rhs_b_strict = is_strict_prefix(&alias_b.rhs_fields, &alias_a.rhs_fields);
416
417 // a subsumes b if both sides subsume and at least one is strict
418 let a_subsumes_b =
419 lhs_a_subsumes_b && rhs_a_subsumes_b && (lhs_a_strict || rhs_a_strict);
420 // b subsumes a if both sides subsume and at least one is strict
421 let b_subsumes_a =
422 lhs_b_subsumes_a && rhs_b_subsumes_a && (lhs_b_strict || rhs_b_strict);
423
424 // If there's a subsumption relationship, mark the more specific one for removal
425 if a_subsumes_b || b_subsumes_a {
426 // Compare specificity: more general alias has lower total field depth
427 let spec_a = alias_specificity(alias_a);
428 let spec_b = alias_specificity(alias_b);
429
430 if spec_a < spec_b {
431 // a is more general, mark b for removal
432 to_remove.insert(alias_b.clone());
433 } else if spec_b < spec_a {
434 // b is more general, mark a for removal
435 to_remove.insert(alias_a.clone());
436 break; // Stop comparing alias_a since it's marked for removal
437 }
438 // If equal specificity, keep both (shouldn't happen with strict prefix)
439 }
440 }
441 }
442
443 // Return the result with marked aliases removed
444 aliases.difference(&to_remove).cloned().collect()
445}
446
447/// Calculate the specificity of an alias based on total field depth
448///
449/// Specificity is measured as the sum of field depths on both sides.
450/// Lower values indicate more general (less specific) aliases.
451///
452/// Examples:
453/// - (0, 1): specificity = 0 + 0 = 0 (most general)
454/// - (0.1, 1): specificity = 1 + 0 = 1
455/// - (0.1, 1.1): specificity = 1 + 1 = 2
456/// - (1.0.0.0, 2): specificity = 3 + 0 = 3
457///
458/// When two aliases have a subsumption relationship, we keep the one
459/// with lower specificity (more general).
460fn alias_specificity(alias: &AliasPair) -> usize {
461 alias.lhs_fields.len() + alias.rhs_fields.len()
462}
463
464/// Check if `prefix` is a strict prefix of `full`
465///
466/// A strict prefix means:
467/// - `prefix` is shorter than `full`
468/// - All elements of `prefix` match the corresponding elements in `full`
469///
470/// Examples:
471/// - is_strict_prefix([0], [0, 1]) = true
472/// - is_strict_prefix([], [0]) = true
473/// - is_strict_prefix([0], [0]) = false (equal, not strict)
474/// - is_strict_prefix([0], [1]) = false (not a prefix)
475fn is_strict_prefix(prefix: &[usize], full: &[usize]) -> bool {
476 prefix.len() < full.len() && prefix == &full[..prefix.len()]
477}