Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 65 additions & 15 deletions crates/ruborist/src/service/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use super::fetch::{
};
use super::http::get_client;
use crate::model::manifest::{CoreVersionManifest, FullManifest};
use crate::resolver::version::resolve_target_version;

/// Parse a JSON buffer on rayon's CPU thread pool (native) or inline
/// (wasm32). The buffer is consumed because `simd_json` mutates it in-place.
Expand Down Expand Up @@ -49,32 +50,52 @@ where
pub(crate) async fn parse_full_manifest_off_runtime(
raw_bytes: Bytes,
) -> Result<FullManifest, anyhow::Error> {
parse_full_manifest_with_core_off_runtime(raw_bytes, None)
.await
.map(|(manifest, _)| manifest)
}

pub(crate) type FullManifestParseResult = (FullManifest, Option<(String, CoreVersionManifest)>);

fn parse_full_manifest_with_core_sync(
raw_bytes: Bytes,
spec: Option<String>,
) -> Result<FullManifestParseResult, anyhow::Error> {
// simd_json mutates the parse buffer; copy inside the worker so
// response-body bytes can stay immutable until parsing starts.
let mut parse_buf = raw_bytes.to_vec();
let mut manifest: FullManifest = simd_json::serde::from_slice::<FullManifest>(&mut parse_buf)
.map_err(|e| anyhow!("JSON parse error: {e}"))?;
manifest.raw = raw_bytes;

let speculative = spec.and_then(|spec| {
resolve_target_version((&manifest).into(), &spec)
.ok()
.and_then(|version| manifest.get_core_version(&version).map(|core| (spec, core)))
});

Ok((manifest, speculative))
}
Comment on lines +60 to +78
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This implementation performs two full JSON parses and an extra memory allocation when a spec is provided. manifest.get_core_version calls extract_version, which clones manifest.raw into a new Vec and re-parses it using simd_json::to_borrowed_value.

Since this PR is focused on performance, you can optimize the Some(spec) path by parsing the JSON once into a simd_json::BorrowedValue DOM. From this DOM, you can both deserialize the FullManifest and extract the specific CoreVersionManifest without re-parsing or re-allocating the raw bytes.

For the None case, keeping the direct simd_json::serde::from_slice is preferred as it is generally faster than building the intermediate DOM.

fn parse_full_manifest_with_core_sync(
    raw_bytes: Bytes,
    spec: Option<String>,
) -> Result<FullManifestParseResult, anyhow::Error> {
    use serde::Deserialize;
    // simd_json mutates the parse buffer; copy inside the worker so
    // response-body bytes can stay immutable until parsing starts.
    let mut parse_buf = raw_bytes.to_vec();

    let (mut manifest, speculative) = if let Some(spec) = spec {
        use simd_json::prelude::ValueObjectAccess;
        let value = simd_json::to_borrowed_value(&mut parse_buf)
            .map_err(|e| anyhow!("JSON parse error: {e}"))?;

        let manifest = FullManifest::deserialize(&value)
            .map_err(|e| anyhow!("JSON mapping error: {e}"))?;

        let speculative = resolve_target_version((&manifest).into(), &spec)
            .ok()
            .and_then(|version| {
                let version_obj = value.get("versions")?.get(version.as_str())?;
                let core = CoreVersionManifest::deserialize(version_obj).ok()?;
                Some((spec, core))
            });

        (manifest, speculative)
    } else {
        let manifest: FullManifest = simd_json::serde::from_slice::<FullManifest>(&mut parse_buf)
            .map_err(|e| anyhow!("JSON parse error: {e}"))?;
        (manifest, None)
    };

    manifest.raw = raw_bytes;
    Ok((manifest, speculative))
}


/// Parse a full wire-fetched manifest and optionally extract the current BFS
/// edge's version in the same off-runtime worker task.
pub(crate) async fn parse_full_manifest_with_core_off_runtime(
raw_bytes: Bytes,
spec: Option<String>,
) -> Result<FullManifestParseResult, anyhow::Error> {
#[cfg(not(target_arch = "wasm32"))]
{
let (tx, rx) = tokio::sync::oneshot::channel();
rayon::spawn(move || {
let result = (|| -> Result<FullManifest, anyhow::Error> {
// simd_json mutates the parse buffer; copy inside the worker so
// response-body bytes can stay immutable until parsing starts.
let mut parse_buf = raw_bytes.to_vec();
let mut manifest: FullManifest =
simd_json::serde::from_slice::<FullManifest>(&mut parse_buf)
.map_err(|e| anyhow!("JSON parse error: {e}"))?;
manifest.raw = raw_bytes;

Ok(manifest)
})();
let result = parse_full_manifest_with_core_sync(raw_bytes, spec);
let _ = tx.send(result);
});
rx.await
.map_err(|e| anyhow!("rayon parse channel closed: {e}"))?
}
#[cfg(target_arch = "wasm32")]
{
let mut manifest: FullManifest =
parse_json_vec_off_runtime(raw_bytes.clone().to_vec()).await?;
manifest.raw = raw_bytes;
Ok(manifest)
parse_full_manifest_with_core_sync(raw_bytes, spec)
}
}

Expand Down Expand Up @@ -337,4 +358,33 @@ mod tests {
}
);
}

#[tokio::test]
async fn parse_full_manifest_with_core_extracts_requested_spec() {
let raw = Bytes::from_static(
br#"{
"name":"demo",
"dist-tags":{"latest":"1.0.0"},
"versions":{
"1.0.0":{
"name":"demo",
"version":"1.0.0",
"dist":{"tarball":"https://registry.example/demo-1.0.0.tgz"}
}
}
}"#,
);

let (manifest, speculative) =
parse_full_manifest_with_core_off_runtime(raw.clone(), Some("latest".to_string()))
.await
.unwrap();

assert_eq!(manifest.name, "demo");
assert_eq!(manifest.raw, raw);
let (spec, core) = speculative.unwrap();
assert_eq!(spec, "latest");
assert_eq!(core.name, "demo");
assert_eq!(core.version, "1.0.0");
}
}
Loading