From d9d1caa8157e4e14941ef33c94a334d5925f274d Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Tue, 28 Apr 2026 09:25:09 +0200 Subject: [PATCH 1/2] fix(tree): treat deletions as undetermined in nearest node distance The nearest node search distance metric did not account for query deletions. Node mutations at positions within a query deletion were counted as mismatches, biasing placement toward the root for sequences with large internal deletions. Pass deletion ranges as a separate parameter and exclude deleted positions from the distance calculation. --- .../nextclade/src/run/nextclade_run_one.rs | 3 ++- .../src/tree/tree_find_nearest_node.rs | 18 +++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/packages/nextclade/src/run/nextclade_run_one.rs b/packages/nextclade/src/run/nextclade_run_one.rs index 81b14d774..cdc647fc4 100644 --- a/packages/nextclade/src/run/nextclade_run_one.rs +++ b/packages/nextclade/src/run/nextclade_run_one.rs @@ -327,7 +327,8 @@ pub fn nextclade_run_one( nearest_node_name, nearest_nodes, } = if let Some(graph) = graph { - let nearest_node_candidates = graph_find_nearest_nodes(graph, &substitutions, &missing, &alignment_range)?; + let nearest_node_candidates = + graph_find_nearest_nodes(graph, &substitutions, &missing, &deletions, &alignment_range)?; let nearest_node_id = nearest_node_candidates[0].node_key; let nearest_node = graph.get_node(nearest_node_id)?.payload(); let nearest_node_name = nearest_node.name.clone(); diff --git a/packages/nextclade/src/tree/tree_find_nearest_node.rs b/packages/nextclade/src/tree/tree_find_nearest_node.rs index 5c7670501..67930ca4e 100644 --- a/packages/nextclade/src/tree/tree_find_nearest_node.rs +++ b/packages/nextclade/src/tree/tree_find_nearest_node.rs @@ -1,6 +1,7 @@ use crate::alphabet::nuc::Nuc; use crate::analyze::is_sequenced::is_nuc_sequenced; use crate::analyze::letter_ranges::NucRange; +use crate::analyze::nuc_del::NucDelRange; use crate::analyze::nuc_sub::NucSub; use crate::coord::range::NucRefGlobalRange; use crate::graph::node::GraphNodeKey; @@ -21,6 +22,7 @@ pub fn graph_find_nearest_nodes( graph: &AuspiceGraph, qry_nuc_subs: &[NucSub], qry_missing: &[NucRange], + qry_deletions: &[NucDelRange], aln_range: &NucRefGlobalRange, ) -> Result, Report> { let masked_ranges = graph.data.meta.placement_mask_ranges(); @@ -29,7 +31,14 @@ pub fn graph_find_nearest_nodes( let nodes_by_placement_score = DftPre::new(graph.get_exactly_one_root()?, |node| graph.iter_children_of(node)) .map(|(_, node)| { let node_payload = node.payload(); - let distance = tree_calculate_node_distance(node_payload, qry_nuc_subs, qry_missing, aln_range, masked_ranges); + let distance = tree_calculate_node_distance( + node_payload, + qry_nuc_subs, + qry_missing, + qry_deletions, + aln_range, + masked_ranges, + ); let prior = get_prior(node_payload); TreePlacementInfo { node_key: node.key(), @@ -69,6 +78,7 @@ fn tree_calculate_node_distance( node: &AuspiceGraphNodePayload, qry_nuc_subs: &[NucSub], qry_missing: &[NucRange], + qry_deletions: &[NucDelRange], aln_range: &NucRefGlobalRange, masked_ranges: &[NucRefGlobalRange], ) -> i64 { @@ -105,11 +115,13 @@ fn tree_calculate_node_distance( } } - // determine the number of sites that are mutated in the node but missing in seq. + // determine the number of sites that are mutated in the node but missing or deleted in seq. // for these we can't tell whether the node agrees with seq let mut undetermined_sites = 0_i64; for pos in node.tmp.substitutions.keys() { - if !is_nuc_sequenced(*pos, &masked_qry_missing, aln_range) { + if !is_nuc_sequenced(*pos, &masked_qry_missing, aln_range) + || qry_deletions.iter().any(|del| del.range().contains(*pos)) + { undetermined_sites += 1; } } From 3004cda11b044866131731610246752533f9f646 Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Tue, 28 Apr 2026 09:25:25 +0200 Subject: [PATCH 2/2] test(tree): add test for deletion-aware nearest node distance Add test verifying that query deletions covering node mutation positions reduce the distance (undetermined sites excluded). Update existing tests for the new `qry_deletions` parameter. --- .../src/tree/tree_find_nearest_node.rs | 123 ++++++++++++++++-- 1 file changed, 114 insertions(+), 9 deletions(-) diff --git a/packages/nextclade/src/tree/tree_find_nearest_node.rs b/packages/nextclade/src/tree/tree_find_nearest_node.rs index 67930ca4e..6d87bbb37 100644 --- a/packages/nextclade/src/tree/tree_find_nearest_node.rs +++ b/packages/nextclade/src/tree/tree_find_nearest_node.rs @@ -246,7 +246,15 @@ mod tests { let aln_range = NucRefGlobalRange::from_usize(0, 100); let masked_ranges = vec![]; - let result = tree_calculate_node_distance(&node, &qry_nuc_subs, &qry_missing, &aln_range, &masked_ranges); + let qry_deletions: Vec = vec![]; + let result = tree_calculate_node_distance( + &node, + &qry_nuc_subs, + &qry_missing, + &qry_deletions, + &aln_range, + &masked_ranges, + ); assert_eq!(result, 0); @@ -261,7 +269,15 @@ mod tests { let aln_range = NucRefGlobalRange::from_usize(0, 100); let masked_ranges = vec![]; - let result = tree_calculate_node_distance(&node, &qry_nuc_subs, &qry_missing, &aln_range, &masked_ranges); + let qry_deletions: Vec = vec![]; + let result = tree_calculate_node_distance( + &node, + &qry_nuc_subs, + &qry_missing, + &qry_deletions, + &aln_range, + &masked_ranges, + ); assert_eq!(result, 3); @@ -276,7 +292,15 @@ mod tests { let aln_range = NucRefGlobalRange::from_usize(0, 100); let masked_ranges = vec![]; - let result = tree_calculate_node_distance(&node, &qry_nuc_subs, &qry_missing, &aln_range, &masked_ranges); + let qry_deletions: Vec = vec![]; + let result = tree_calculate_node_distance( + &node, + &qry_nuc_subs, + &qry_missing, + &qry_deletions, + &aln_range, + &masked_ranges, + ); assert_eq!(result, 5); @@ -291,7 +315,15 @@ mod tests { let aln_range = NucRefGlobalRange::from_usize(0, 100); let masked_ranges = vec![]; - let result = tree_calculate_node_distance(&node, &qry_nuc_subs, &qry_missing, &aln_range, &masked_ranges); + let qry_deletions: Vec = vec![]; + let result = tree_calculate_node_distance( + &node, + &qry_nuc_subs, + &qry_missing, + &qry_deletions, + &aln_range, + &masked_ranges, + ); assert_eq!(result, 5); @@ -306,7 +338,15 @@ mod tests { let aln_range = NucRefGlobalRange::from_usize(0, 100); let masked_ranges = vec![]; - let result = tree_calculate_node_distance(&node, &qry_nuc_subs, &qry_missing, &aln_range, &masked_ranges); + let qry_deletions: Vec = vec![]; + let result = tree_calculate_node_distance( + &node, + &qry_nuc_subs, + &qry_missing, + &qry_deletions, + &aln_range, + &masked_ranges, + ); assert_eq!(result, 4); @@ -321,7 +361,15 @@ mod tests { let aln_range = NucRefGlobalRange::from_usize(0, 20); let masked_ranges = vec![]; - let result = tree_calculate_node_distance(&node, &qry_nuc_subs, &qry_missing, &aln_range, &masked_ranges); + let qry_deletions: Vec = vec![]; + let result = tree_calculate_node_distance( + &node, + &qry_nuc_subs, + &qry_missing, + &qry_deletions, + &aln_range, + &masked_ranges, + ); assert_eq!(result, 3); @@ -336,7 +384,15 @@ mod tests { let aln_range = NucRefGlobalRange::from_usize(0, 100); let masked_ranges = vec![NucRefGlobalRange::from_usize(0, 100)]; - let result = tree_calculate_node_distance(&node, &qry_nuc_subs, &qry_missing, &aln_range, &masked_ranges); + let qry_deletions: Vec = vec![]; + let result = tree_calculate_node_distance( + &node, + &qry_nuc_subs, + &qry_missing, + &qry_deletions, + &aln_range, + &masked_ranges, + ); assert_eq!(result, 0); @@ -355,7 +411,15 @@ mod tests { NucRefGlobalRange::from_usize(30, 50), ]; - let result = tree_calculate_node_distance(&node, &qry_nuc_subs, &qry_missing, &aln_range, &masked_ranges); + let qry_deletions: Vec = vec![]; + let result = tree_calculate_node_distance( + &node, + &qry_nuc_subs, + &qry_missing, + &qry_deletions, + &aln_range, + &masked_ranges, + ); assert_eq!(result, 3); @@ -370,10 +434,51 @@ mod tests { let aln_range = NucRefGlobalRange::from_usize(0, 30); let masked_ranges = vec![NucRefGlobalRange::from_usize(12, 13)]; - let result = tree_calculate_node_distance(&node, &qry_nuc_subs, &qry_missing, &aln_range, &masked_ranges); + let qry_deletions: Vec = vec![]; + let result = tree_calculate_node_distance( + &node, + &qry_nuc_subs, + &qry_missing, + &qry_deletions, + &aln_range, + &masked_ranges, + ); assert_eq!(result, 3); Ok(()) } + + #[rstest] + fn deletion_covering_node_mutations_reduces_distance() -> Result<(), Report> { + let node = node_with_simple_nuc_subs(); + let qry_nuc_subs: Vec = vec![]; + let qry_missing: Vec = vec![]; + let aln_range = NucRefGlobalRange::from_usize(0, 100); + let masked_ranges = vec![]; + + let no_deletions: Vec = vec![]; + let distance_without_del = tree_calculate_node_distance( + &node, + &qry_nuc_subs, + &qry_missing, + &no_deletions, + &aln_range, + &masked_ranges, + ); + assert_eq!(distance_without_del, 5); + + let qry_deletions = vec![NucDelRange::from_usize(10, 25)]; + let distance_with_del = tree_calculate_node_distance( + &node, + &qry_nuc_subs, + &qry_missing, + &qry_deletions, + &aln_range, + &masked_ranges, + ); + assert_eq!(distance_with_del, 2); + + Ok(()) + } }