From e7e223573966578c2381ebe2a1a8817f4ec530d4 Mon Sep 17 00:00:00 2001
From: Pascal
Date: Wed, 23 Nov 2022 16:58:18 +0100
Subject: [PATCH 1/3] DescendPeaks in C++
This is the C++ implementation of the descendPeaks function of MSnbase. It takes a slightly different approach to the original by first subsetting the maximum region to consider. Next for each left/right descend, it checks if the next signal is A) above the signal percentage threshold and B) not higher than the previous signal (rising). If the signal passes both tests, the left/right index is updated. After both descends, these indexes are used to define the region of which the weighted mean of the mass is taken.
---
src/descendPeak.cpp | 143 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 143 insertions(+)
create mode 100644 src/descendPeak.cpp
diff --git a/src/descendPeak.cpp b/src/descendPeak.cpp
new file mode 100644
index 00000000..02900be7
--- /dev/null
+++ b/src/descendPeak.cpp
@@ -0,0 +1,143 @@
+// Pascal Maas - p.maas@lacdr.leidenuniv.nl
+
+#include
+using namespace Rcpp;
+using namespace std;
+
+double weightedMean(NumericVector mzs, NumericVector ints) {
+ // Get the total intensity
+ double intensity = sum(ints);
+
+ // Calculate the weighted mass
+ int n = ints.length();
+ double total = 0;
+ for (int i = 0; i < n; i++) {
+ total += mzs[i] * ints[i] / intensity;
+ }
+ return total;
+}
+
+double descend(int centroid, NumericVector intensities,
+ NumericVector masses, double signalRatio) {
+
+ // Maximum deviation from the centroid index
+ int maxK = 30;
+
+ // Define a min and max index for the region where the region is not negative
+ // and does not exceed the size of the intensities vector, since we only
+ // have to consider centroid - maxK until centroid + maxK
+
+ // Ensures the index is at least 0
+ int minId = std::max(centroid - maxK, 0);
+
+ // Ensures the index is at most the size of the intensities vector - 1
+ int maxId = std::min(centroid + maxK, (int) intensities.size() - 1);
+
+ // Get indices of this region
+ Rcpp::Range idx = Rcpp::Range(minId, maxId);
+
+ // Subset intensities and masses with the defined region
+ NumericVector ints = intensities[idx];
+ NumericVector mass = masses[idx];
+
+
+ // Find the new location of the centroid, as the centroid can be shifted by
+ // subsetting the region. By substracting the minimum value, the original
+ // centroid will now refer to the new centroid location.
+ centroid = centroid - minId;
+ double centroidValue = ints[centroid];
+
+ // Calculate the allowed limits within the region
+ // Update the left and right limits with new idx, as long as the values are
+ // within the thresholds of percentage and are not rising.
+ int leftLimit = centroid;
+
+ // Start looping from centroid to the left (negative)
+ for (int i = centroid - 1; i >= 0; i--) {
+ // Get current and previous values
+ double previous = ints[i + 1];
+ double current = ints[i];
+
+ // Good signal should be above the centroid / signal ratio
+ bool goodSignal = current / centroidValue > signalRatio;
+
+ // Compare current to previous value to check for a rising signal
+ bool risingSignal = current >= previous;
+
+ // If either the signal is too low or the current signal is rising,
+ // Break the loop since we've reached the left limit
+ if (!goodSignal || risingSignal) {
+ break;
+ }
+ // All good, so update the leftLimit to the current index
+ leftLimit = i;
+ }
+
+ // Repeat this process for the right side of the centroid
+ int rightLimit = centroid;
+ int N = ints.size();
+
+ // Start loop from centroid to right (positive)
+ for (int i = centroid + 1; i < N; i++) {
+ // Get current and previous values
+ double current = ints[i];
+ double previous = ints[i - 1];
+
+ // Good signal should be above the centroid / signal ratio
+ bool goodSignal = current / centroidValue > signalRatio;
+
+ // Compare current to previous value to check for a rising signal
+ bool risingSignal = current >= previous;
+
+ // If either the signal is too low or the current signal is rising,
+ // Break the loop since we've reached the right limit
+ if (!goodSignal || risingSignal) {
+ break;
+ }
+
+ // All good, so update the rightLimit to the current index
+ rightLimit = i;
+ }
+
+ // Determine the region between the left and right limits
+ // to obtain the region of interest
+ Range region = Rcpp::Range(leftLimit, rightLimit);
+
+ // calculate the weighted mean of the region and return the value
+ return weightedMean(mass[region], ints[region]);
+}
+
+//' @name descendPeaksC
+//' @title descendPeak algorithm from MSnbase for centroid refinement
+//' @description This is the C++ version of `descendPeak` of MSnbase. It
+//' calculates the weighted mean of the centroid region by checking for the
+//' signal ratio and rising signals.
+//' @details DescendPeaks is a centroid-refinement algorithm that descends from
+//' the centroid until the signal rises again. It also considers the intensity
+//' of neighbouring signals to be at least a percentage of the centroid
+//' intensity.
+//' @param centroids Position of the centroids (local maximum) as C-index
+//' (R-index - 1).
+//' @param intensities Vector of intensities in a scan, preferably smoothed
+//' @param masses Vector of masses in a scan
+//' @param signalPercentage Percentage between 0-100 defining the minimum signal
+//' for a peak to be considered for the weighted intensity region.
+//' @return Weighted masses for the given centroids
+//' @export
+// [[Rcpp::export]]
+NumericVector descendPeaksC(NumericVector centroids, NumericVector intensities,
+ NumericVector masses, double signalPercentage) {
+
+ // Prepare variables
+ int n = centroids.length();
+ double signalRatio = signalPercentage / 100;
+ NumericVector weightedMZs(n);
+
+ // Loop through all centroid positions (should be C-indexed, not R-indexed!)
+ for (int i = 0; i < n; i++) {
+ // Calculate the weighted mass
+ weightedMZs[i] = descend(centroids[i], intensities,
+ masses, signalRatio);
+ }
+ return weightedMZs;
+}
From b033e86380334a5ffffdc30f132b22eed48e842e Mon Sep 17 00:00:00 2001
From: Pascal
Date: Sat, 26 Nov 2022 16:00:05 +0100
Subject: [PATCH 2/3] Remove the need for sum
Avoids looping over the intensity vector twice
Co-authored-by: Sebastian Gibb
---
src/descendPeak.cpp | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/src/descendPeak.cpp b/src/descendPeak.cpp
index 02900be7..48bde902 100644
--- a/src/descendPeak.cpp
+++ b/src/descendPeak.cpp
@@ -5,16 +5,14 @@ using namespace Rcpp;
using namespace std;
double weightedMean(NumericVector mzs, NumericVector ints) {
- // Get the total intensity
- double intensity = sum(ints);
-
- // Calculate the weighted mass
int n = ints.length();
double total = 0;
+ double intensity = 0;
for (int i = 0; i < n; i++) {
- total += mzs[i] * ints[i] / intensity;
+ total += mzs[i] * ints[i];
+ intensity += ints[i];
}
- return total;
+ return total / intensity;
}
double descend(int centroid, NumericVector intensities,
From acdb17b3cc6579a2c75946fb4181b0857e95dd11 Mon Sep 17 00:00:00 2001
From: Pascal
Date: Sat, 26 Nov 2022 16:03:28 +0100
Subject: [PATCH 3/3] Moved maxK to function arguments
Allows for setting a different maxK, which determines the maximum region the descendPeak algorithm is applied to
---
src/descendPeak.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/src/descendPeak.cpp b/src/descendPeak.cpp
index 48bde902..cf8a1f59 100644
--- a/src/descendPeak.cpp
+++ b/src/descendPeak.cpp
@@ -16,10 +16,8 @@ double weightedMean(NumericVector mzs, NumericVector ints) {
}
double descend(int centroid, NumericVector intensities,
- NumericVector masses, double signalRatio) {
+ NumericVector masses, double signalRatio, int maxK = 30) {
- // Maximum deviation from the centroid index
- int maxK = 30;
// Define a min and max index for the region where the region is not negative
// and does not exceed the size of the intensities vector, since we only
@@ -124,7 +122,7 @@ double descend(int centroid, NumericVector intensities,
//' @export
// [[Rcpp::export]]
NumericVector descendPeaksC(NumericVector centroids, NumericVector intensities,
- NumericVector masses, double signalPercentage) {
+ NumericVector masses, double signalPercentage, int maxK = 30) {
// Prepare variables
int n = centroids.length();
@@ -135,7 +133,7 @@ NumericVector descendPeaksC(NumericVector centroids, NumericVector intensities,
for (int i = 0; i < n; i++) {
// Calculate the weighted mass
weightedMZs[i] = descend(centroids[i], intensities,
- masses, signalRatio);
+ masses, signalRatio, maxK);
}
return weightedMZs;
}