rapx/analysis/core/alias_analysis/mfp/
interproc.rs

1/// Interprocedural analysis utilities
2extern crate rustc_mir_dataflow;
3use rustc_hir::def_id::DefId;
4use rustc_middle::mir::{Body, TerminatorKind};
5use rustc_mir_dataflow::ResultsCursor;
6use std::collections::HashSet;
7
8use super::super::{AliasPair, FnAliasPairs};
9use super::intraproc::{FnAliasAnalyzer, PlaceId};
10
11/// Extract root local and field path from a PlaceId
12/// Returns (root_local, field_path)
13fn extract_fields(place: &PlaceId) -> (usize, Vec<usize>) {
14    let mut fields = Vec::new();
15    let mut current = place;
16
17    // Traverse from leaf to root, collecting field indices
18    loop {
19        match current {
20            PlaceId::Local(idx) => return (*idx, fields),
21            PlaceId::Field { base, field_idx } => {
22                fields.push(*field_idx);
23                current = base;
24            }
25        }
26    }
27}
28
29/// Extract only the field path from a PlaceId (excluding the root local)
30/// Returns field indices in order from root to leaf
31/// Example: _15.0.1 returns [0, 1]
32fn extract_field_path(place: &PlaceId) -> Vec<usize> {
33    let mut fields = Vec::new();
34    let mut current = place;
35
36    // Traverse from leaf to root
37    loop {
38        match current {
39            PlaceId::Local(_) => {
40                fields.reverse(); // Reverse to get root-to-leaf order
41                return fields;
42            }
43            PlaceId::Field { base, field_idx } => {
44                fields.push(*field_idx);
45                current = base;
46            }
47        }
48    }
49}
50
51/// Check if one field path is a proper prefix of another
52/// Returns true if `prefix` is a prefix of `full`
53///
54/// Examples:
55///   - is_field_prefix([], [0]) = true  (parent-child)
56///   - is_field_prefix([0], [0, 1]) = true  (parent-child)
57///   - is_field_prefix([0], [1]) = false  (siblings, not prefix)
58///   - is_field_prefix([0, 1], [0, 2]) = false  (different fields at same level)
59fn is_field_prefix(prefix: &[usize], full: &[usize]) -> bool {
60    if prefix.len() > full.len() {
61        return false;
62    }
63    prefix == &full[..prefix.len()]
64}
65
66/// Extract function summary from analysis results
67///
68/// This function uses transitive closure to identify all aliases related to
69/// function parameters and return values, including those connected through
70/// temporary variables.
71pub fn extract_summary<'tcx>(
72    results: &mut ResultsCursor<'_, 'tcx, FnAliasAnalyzer<'tcx>>,
73    body: &Body<'tcx>,
74    _def_id: DefId,
75) -> FnAliasPairs {
76    let arg_count = body.arg_count;
77    let mut summary = FnAliasPairs::new(arg_count);
78
79    // Find all Return terminators and extract aliases at those points
80    for (block_id, block_data) in body.basic_blocks.iter_enumerated() {
81        if let Some(terminator) = &block_data.terminator {
82            if matches!(terminator.kind, TerminatorKind::Return) {
83                // Seek to the end of this block (before the terminator)
84                results.seek_to_block_end(block_id);
85
86                let state = results.get();
87                let analyzer = results.analysis();
88                let place_info = analyzer.place_info();
89
90                // Step 1: Collect all alias pairs at this return point
91                // We need to examine all aliases, not just those directly involving args/return
92                let mut all_pairs = Vec::new();
93                for (idx_i, idx_j) in state.get_all_alias_pairs() {
94                    if let (Some(place_i), Some(place_j)) =
95                        (place_info.get_place(idx_i), place_info.get_place(idx_j))
96                    {
97                        all_pairs.push((idx_i, idx_j, place_i, place_j));
98                    }
99                }
100
101                // Step 2: Initialize relevant_places with all places whose root is a parameter or return value
102                // Index 0 is return value, indices 1..=arg_count are arguments
103                let mut relevant_places = HashSet::new();
104                for idx in 0..place_info.num_places() {
105                    if let Some(place) = place_info.get_place(idx) {
106                        if place.root_local() <= arg_count {
107                            relevant_places.insert(idx);
108                        }
109                    }
110                }
111
112                // Step 3: Expand relevant_places using transitive closure
113                // If a place aliases to a relevant place, it becomes relevant too
114                // This captures aliases that flow through temporary variables
115                // Example: _0 aliases _2, and _2 aliases _1.0, then _2 is relevant
116                const MAX_ITERATIONS: usize = 10;
117                for _iteration in 0..MAX_ITERATIONS {
118                    let mut changed = false;
119                    for &(idx_i, idx_j, _, _) in &all_pairs {
120                        // If one place is relevant and the other isn't, make the other relevant
121                        if relevant_places.contains(&idx_i) && !relevant_places.contains(&idx_j) {
122                            relevant_places.insert(idx_j);
123                            changed = true;
124                        }
125                        if relevant_places.contains(&idx_j) && !relevant_places.contains(&idx_i) {
126                            relevant_places.insert(idx_i);
127                            changed = true;
128                        }
129                    }
130                    // Converged when no more places become relevant
131                    if !changed {
132                        break;
133                    }
134                }
135
136                // Step 4: Collect candidate aliases from two sources
137                //
138                // We collect all potential aliases between parameters and return values into
139                // a candidate set, which will be filtered and compressed in Step 5.
140                //
141                // Two sources of candidates:
142                //   4.1: Aliases derived through bridge variables (indirect connections)
143                //   4.2: Aliases directly present in all_pairs (direct connections)
144                //
145                let mut candidate_aliases = std::collections::HashSet::new();
146
147                // Step 4.1: Derive aliases through bridge variables
148                //
149                // Problem: When analyzing complex nested structures (e.g., Vec::from_raw_parts_in),
150                // we may have aliases like:
151                //   - _0.0.0.0 ≈ _15 (deeply nested field aliases with temporary variable)
152                //   - _1 ≈ _15.0 (parameter aliases with field of temporary variable)
153                //
154                // Both sides are connected through the bridge variable _15, but:
155                //   1. _15 is a temporary (root > arg_count), so it won't appear in final summary
156                //   2. There's no direct alias between _0.x and _1 in all_pairs
157                //   3. Union-Find guarantees transitivity within all_pairs, but the connection
158                //      happens at different structural levels (parent vs child fields)
159                //
160                // Solution: For alias pairs connected through the same bridge variable where one
161                // side references the bridge's parent and the other references a child field,
162                // we derive a direct alias between the parameter/return places, compressing
163                // deep field paths to maintain precision while avoiding temporary variables.
164                //
165                // Example derivation:
166                //   Input:  _0.0.0.0 ≈ _15 (place_i ≈ place_j)
167                //           _1 ≈ _15.0 (place_k ≈ place_m)
168                //   Check:  place_j.root (15) == place_m.root (15) ✓ (same bridge)
169                //           _15 is prefix of _15.0 ✓ (parent-child relation)
170                //   Output: _0.0 ≈ _1 (compress _0's deep fields to first level)
171                //
172                for &(idx_i, idx_j, place_i, place_j) in &all_pairs {
173                    if !relevant_places.contains(&idx_i) || !relevant_places.contains(&idx_j) {
174                        continue;
175                    }
176
177                    for &(idx_k, idx_m, place_k, place_m) in &all_pairs {
178                        if !relevant_places.contains(&idx_k) || !relevant_places.contains(&idx_m) {
179                            continue;
180                        }
181
182                        // Check if both alias pairs share the same bridge variable (by root)
183                        if place_j.root_local() != place_m.root_local() {
184                            continue;
185                        }
186
187                        // Extract field paths for prefix checking
188                        let j_fields = extract_field_path(place_j);
189                        let m_fields = extract_field_path(place_m);
190
191                        // Verify parent-child relationship (proper prefix, not sibling fields)
192                        // E.g., [] is prefix of [0], but [0] is NOT prefix of [1]
193                        if !is_field_prefix(&j_fields, &m_fields)
194                            && !is_field_prefix(&m_fields, &j_fields)
195                        {
196                            continue;
197                        }
198
199                        // Extract roots and fields from both sides
200                        let (root_i, mut fields_i) = extract_fields(place_i);
201                        let (root_k, fields_k) = extract_fields(place_k);
202
203                        // Only derive aliases between parameters and return value
204                        if root_i > arg_count || root_k > arg_count {
205                            continue;
206                        }
207
208                        // Compress deep field paths to first level only
209                        // This balances precision (distinguishing struct fields) with
210                        // generality (avoiding over-specification)
211                        fields_i.reverse(); // extract_fields returns reversed order
212                        if fields_i.len() > 1 {
213                            fields_i = vec![fields_i[0]];
214                        }
215
216                        let mut fields_k_reversed = fields_k.clone();
217                        fields_k_reversed.reverse();
218
219                        // Create the derived alias and add to candidates
220                        let mut alias = AliasPair::new(root_i, root_k);
221                        alias.lhs_fields = fields_i;
222                        alias.rhs_fields = fields_k_reversed;
223
224                        candidate_aliases.insert(alias);
225                    }
226                }
227
228                // Step 4.2: Direct aliases from all_pairs
229                //
230                // Collect aliases that are directly present between parameters and return values
231                // in the Union-Find analysis results. These may overlap with derived aliases from
232                // Step 4.1, but duplicates are automatically handled by the HashSet.
233                //
234                for (idx_i, idx_j, place_i, place_j) in all_pairs {
235                    if relevant_places.contains(&idx_i) && relevant_places.contains(&idx_j) {
236                        let (root_i, mut fields_i) = extract_fields(place_i);
237                        let (root_j, mut fields_j) = extract_fields(place_j);
238
239                        // Only include if both roots are parameters or return value
240                        if root_i <= arg_count && root_j <= arg_count {
241                            // Fields were collected from leaf to root, reverse them
242                            fields_i.reverse();
243                            fields_j.reverse();
244
245                            // Create field-sensitive AliasPair and add to candidates
246                            let mut alias = AliasPair::new(root_i, root_j);
247                            alias.lhs_fields = fields_i;
248                            alias.rhs_fields = fields_j;
249                            candidate_aliases.insert(alias);
250                        }
251                    }
252                }
253
254                // Step 5: Normalize and filter redundant aliases
255                //
256                // Step 5.1: Normalize alias order for consistent comparison
257                //
258                // Alias relationships are symmetric: (A, B) ≡ (B, A)
259                // However, without normalization, (3, 0.0.0) and (0.0, 3) would be
260                // treated as having different local pairs and cannot be compared for
261                // subsumption relationships.
262                //
263                // We normalize by ensuring left_local <= right_local, which allows
264                // the filter to recognize that (0.0, 3) subsumes (0.0.0, 3) even if
265                // the latter was originally collected as (3, 0.0.0).
266                //
267                let normalized_aliases: std::collections::HashSet<_> = candidate_aliases
268                    .iter()
269                    .map(|alias| {
270                        let mut normalized = alias.clone();
271                        if normalized.left_local > normalized.right_local {
272                            normalized.swap(); // Swap both locals and fields
273                        }
274                        normalized
275                    })
276                    .collect();
277
278                // Step 5.2: Filter redundant aliases
279                //
280                // The normalized candidate set may contain redundant aliases such as:
281                //   - Self-aliases: (0, 0), (1, 1)
282                //   - Prefix-subsumed aliases: (0.0, 1) subsumes (0.0.0, 1), (0.0.0.0, 1)
283                //   - Reversed but equivalent: (3, 0.0.0) and (0.0, 3) after normalization
284                //
285                // We filter these to produce a minimal, canonical summary that retains
286                // precision while avoiding over-specification.
287                //
288                let filtered_aliases = filter_redundant_aliases(normalized_aliases);
289                for alias in filtered_aliases.clone() {
290                    summary.add_alias(alias);
291                }
292            }
293        }
294    }
295
296    summary
297}
298
299/// Join two function summaries
300pub fn join_fn_summaries(summary1: &FnAliasPairs, summary2: &FnAliasPairs) -> FnAliasPairs {
301    // TODO: Implement summary join operation
302    let mut result = FnAliasPairs::new(summary1.arg_size());
303
304    // Add all aliases from both summaries
305    for alias in summary1.aliases() {
306        result.add_alias(alias.clone());
307    }
308
309    for alias in summary2.aliases() {
310        result.add_alias(alias.clone());
311    }
312
313    result
314}
315
316/// Filter redundant aliases from a candidate set
317///
318/// This function removes several types of redundant aliases to produce a minimal,
319/// canonical summary:
320///
321/// 1. **Self-aliases**: Aliases where both sides refer to the same place
322///    Example: (0, 0), (1, 1)
323///
324/// 2. **Prefix-subsumed aliases**: When one alias is strictly more general than another
325///
326///    Examples of redundancy patterns eliminated:
327///
328///    a) Single-side prefix (left more specific):
329///       - (1.0, 2) subsumes (1.0.0.0, 2) → keep (1.0, 2), remove (1.0.0.0, 2)
330///       - Rationale: If field 1.0 aliases with 2, then 1.0.0.0 (a sub-field) also aliases
331///
332///    b) Single-side prefix (right more specific):
333///       - (1.0, 2) subsumes (1.0, 2.0) → keep (1.0, 2), remove (1.0, 2.0)
334///       - Rationale: If 1.0 aliases with field 2, then 1.0 aliases with 2.0 (a sub-field)
335///
336///    c) Double-side prefix (synchronized fields):
337///       - (0, 1) subsumes (0.1, 1.1) → keep (0, 1), remove (0.1, 1.1)
338///       - Rationale: If 0 and 1 alias, their corresponding fields also alias
339///       - This handles cases like struct field synchronization
340///
341/// The filtering strategy:
342///   - For each pair of aliases with the same (left_local, right_local):
343///     - Check bidirectional subsumption (a subsumes b, or b subsumes a)
344///     - Keep the more general alias (shorter total field depth)
345///     - This ensures order-independence and catches all redundancy patterns
346///
347/// Returns: A filtered HashSet containing only non-redundant aliases
348fn filter_redundant_aliases(
349    aliases: std::collections::HashSet<AliasPair>,
350) -> std::collections::HashSet<AliasPair> {
351    use std::collections::HashSet;
352
353    let aliases_vec: Vec<_> = aliases.iter().cloned().collect();
354    let mut to_remove = HashSet::new();
355
356    for i in 0..aliases_vec.len() {
357        let alias_a = &aliases_vec[i];
358
359        // Skip if already marked for removal
360        if to_remove.contains(alias_a) {
361            continue;
362        }
363
364        // Rule 1: Remove self-aliases (same local)
365        // Example: (0, 0), (1, 1)
366        if alias_a.left_local == alias_a.right_local {
367            to_remove.insert(alias_a.clone());
368            continue;
369        }
370
371        // Rule 2: Check for prefix subsumption with other aliases
372        for j in 0..aliases_vec.len() {
373            if i == j {
374                continue;
375            }
376            let alias_b = &aliases_vec[j];
377
378            // Skip if already marked for removal
379            if to_remove.contains(alias_b) {
380                continue;
381            }
382
383            // Only compare aliases with the same locals
384            // Note: After normalization, this ensures left_local <= right_local for both,
385            // so (3, 0.0.0) normalized to (0.0.0, 3) can be compared with (0.0, 3)
386            if alias_a.left_local != alias_b.left_local
387                || alias_a.right_local != alias_b.right_local
388            {
389                continue;
390            }
391
392            // Check bidirectional subsumption relationships
393            //
394            // Subsumption means one alias is more general (has shorter field paths)
395            // than another. We check both directions:
396            //   - Does a subsume b? (a is more general)
397            //   - Does b subsume a? (b is more general)
398
399            // Check if a's fields subsume b's fields
400            let lhs_a_subsumes_b = is_strict_prefix(&alias_a.lhs_fields, &alias_b.lhs_fields)
401                || alias_a.lhs_fields == alias_b.lhs_fields;
402            let rhs_a_subsumes_b = is_strict_prefix(&alias_a.rhs_fields, &alias_b.rhs_fields)
403                || alias_a.rhs_fields == alias_b.rhs_fields;
404
405            // Check if b's fields subsume a's fields
406            let lhs_b_subsumes_a = is_strict_prefix(&alias_b.lhs_fields, &alias_a.lhs_fields)
407                || alias_b.lhs_fields == alias_a.lhs_fields;
408            let rhs_b_subsumes_a = is_strict_prefix(&alias_b.rhs_fields, &alias_a.rhs_fields)
409                || alias_b.rhs_fields == alias_a.rhs_fields;
410
411            // Determine if there's a subsumption relationship
412            let lhs_a_strict = is_strict_prefix(&alias_a.lhs_fields, &alias_b.lhs_fields);
413            let rhs_a_strict = is_strict_prefix(&alias_a.rhs_fields, &alias_b.rhs_fields);
414            let lhs_b_strict = is_strict_prefix(&alias_b.lhs_fields, &alias_a.lhs_fields);
415            let rhs_b_strict = is_strict_prefix(&alias_b.rhs_fields, &alias_a.rhs_fields);
416
417            // a subsumes b if both sides subsume and at least one is strict
418            let a_subsumes_b =
419                lhs_a_subsumes_b && rhs_a_subsumes_b && (lhs_a_strict || rhs_a_strict);
420            // b subsumes a if both sides subsume and at least one is strict
421            let b_subsumes_a =
422                lhs_b_subsumes_a && rhs_b_subsumes_a && (lhs_b_strict || rhs_b_strict);
423
424            // If there's a subsumption relationship, mark the more specific one for removal
425            if a_subsumes_b || b_subsumes_a {
426                // Compare specificity: more general alias has lower total field depth
427                let spec_a = alias_specificity(alias_a);
428                let spec_b = alias_specificity(alias_b);
429
430                if spec_a < spec_b {
431                    // a is more general, mark b for removal
432                    to_remove.insert(alias_b.clone());
433                } else if spec_b < spec_a {
434                    // b is more general, mark a for removal
435                    to_remove.insert(alias_a.clone());
436                    break; // Stop comparing alias_a since it's marked for removal
437                }
438                // If equal specificity, keep both (shouldn't happen with strict prefix)
439            }
440        }
441    }
442
443    // Return the result with marked aliases removed
444    aliases.difference(&to_remove).cloned().collect()
445}
446
447/// Calculate the specificity of an alias based on total field depth
448///
449/// Specificity is measured as the sum of field depths on both sides.
450/// Lower values indicate more general (less specific) aliases.
451///
452/// Examples:
453///   - (0, 1): specificity = 0 + 0 = 0 (most general)
454///   - (0.1, 1): specificity = 1 + 0 = 1
455///   - (0.1, 1.1): specificity = 1 + 1 = 2
456///   - (1.0.0.0, 2): specificity = 3 + 0 = 3
457///
458/// When two aliases have a subsumption relationship, we keep the one
459/// with lower specificity (more general).
460fn alias_specificity(alias: &AliasPair) -> usize {
461    alias.lhs_fields.len() + alias.rhs_fields.len()
462}
463
464/// Check if `prefix` is a strict prefix of `full`
465///
466/// A strict prefix means:
467///   - `prefix` is shorter than `full`
468///   - All elements of `prefix` match the corresponding elements in `full`
469///
470/// Examples:
471///   - is_strict_prefix([0], [0, 1]) = true
472///   - is_strict_prefix([], [0]) = true
473///   - is_strict_prefix([0], [0]) = false (equal, not strict)
474///   - is_strict_prefix([0], [1]) = false (not a prefix)
475fn is_strict_prefix(prefix: &[usize], full: &[usize]) -> bool {
476    prefix.len() < full.len() && prefix == &full[..prefix.len()]
477}