From 62617b550888604c040f7b5b413140ec1021fbbb Mon Sep 17 00:00:00 2001 From: Taku Kodma <79110363+risu729@users.noreply.github.com> Date: Sun, 31 May 2026 18:47:24 +1000 Subject: [PATCH] fix(github): strip OpenGrep platform suffixes --- src/backend/asset_matcher.rs | 68 +------------------------------ src/backend/mod.rs | 1 + src/backend/platform_tokens.rs | 74 ++++++++++++++++++++++++++++++++++ src/backend/static_helpers.rs | 45 ++++++++++++--------- 4 files changed, 102 insertions(+), 86 deletions(-) create mode 100644 src/backend/platform_tokens.rs diff --git a/src/backend/asset_matcher.rs b/src/backend/asset_matcher.rs index 9e20777cdd..dcd2d112ff 100644 --- a/src/backend/asset_matcher.rs +++ b/src/backend/asset_matcher.rs @@ -20,6 +20,7 @@ use regex::Regex; use std::sync::LazyLock; use super::platform_target::PlatformTarget; +use super::platform_tokens::is_platform_or_version_token; use super::static_helpers::get_filename_from_url; use crate::file::TarFormat; use crate::http::HTTP; @@ -484,73 +485,6 @@ fn asset_name_stem(asset: &str) -> String { name } -fn is_platform_or_version_token(token: &str) -> bool { - if token.is_empty() { - return true; - } - if token.starts_with("manylinux") || token.starts_with("musllinux") { - return true; - } - if token.starts_with('v') - && token[1..] - .chars() - .next() - .is_some_and(|c| c.is_ascii_digit()) - { - return true; - } - if token.chars().next().is_some_and(|c| c.is_ascii_digit()) { - return true; - } - - matches!( - token, - "linux" - | "ubuntu" - | "debian" - | "fedora" - | "centos" - | "rhel" - | "alpine" - | "arch" - | "darwin" - | "mac" - | "macos" - | "macosx" - | "osx" - | "windows" - | "win" - | "win32" - | "win64" - | "mingw" - | "mingw32" - | "mingw64" - | "w64" - | "x86" - | "64" - | "x64" - | "amd64" - | "aarch64" - | "arm64" - | "arm" - | "armv6" - | "armv7" - | "i386" - | "i686" - | "ppc64" - | "ppc64le" - | "riscv64" - | "s390x" - | "gnu" - | "glibc" - | "musl" - | "msvc" - | "pc" - | "apple" - | "unknown" - ) -} - /// Detects platform information from a URL pub fn detect_platform_from_url(url: &str) -> Option { let mut detected_os = None; diff --git a/src/backend/mod.rs b/src/backend/mod.rs index 03e2ab24ec..8251cdda60 100644 --- a/src/backend/mod.rs +++ b/src/backend/mod.rs @@ -69,6 +69,7 @@ pub mod npm; pub(crate) mod options; pub mod pipx; pub mod platform_target; +mod platform_tokens; pub mod s3; pub mod spm; pub mod static_helpers; diff --git a/src/backend/platform_tokens.rs b/src/backend/platform_tokens.rs new file mode 100644 index 0000000000..d56503e93b --- /dev/null +++ b/src/backend/platform_tokens.rs @@ -0,0 +1,74 @@ +pub const BINARY_OS_TOKENS: &[&str] = &[ + "linux", + "manylinux", + "musllinux", + "darwin", + "macos", + "osx", + "windows", + "win", + "freebsd", + "openbsd", + "netbsd", + "android", + "unknown", +]; + +pub const BINARY_ARCH_TOKENS: &[&str] = &[ + "x86_64", "aarch64", "ppc64le", "ppc64", "armv7", "armv6", "arm64", "amd64", "mipsel", + "riscv64", "s390x", "i686", "i386", "x64", "mips", "arm", "x86", +]; + +const PREFERRED_NAME_OS_TOKENS: &[&str] = &[ + "ubuntu", "debian", "fedora", "centos", "rhel", "alpine", "arch", "mac", "macosx", "win32", + "win64", "mingw", "mingw32", "mingw64", "w64", +]; +const PREFERRED_NAME_ARCH_TOKENS: &[&str] = &["64"]; +const QUALIFIER_TOKENS: &[&str] = &["gnu", "glibc", "musl", "msvc", "pc", "apple"]; + +pub fn is_platform_or_version_token(token: &str) -> bool { + if token.is_empty() { + return true; + } + if is_os_token(token) || is_arch_token(token) || QUALIFIER_TOKENS.contains(&token) { + return true; + } + if token + .strip_prefix('v') + .and_then(|token| token.chars().next()) + .is_some_and(|c| c.is_ascii_digit()) + { + return true; + } + + token.chars().next().is_some_and(|c| c.is_ascii_digit()) +} + +fn is_os_token(token: &str) -> bool { + token.starts_with("manylinux") + || token.starts_with("musllinux") + || BINARY_OS_TOKENS.contains(&token) + || PREFERRED_NAME_OS_TOKENS.contains(&token) +} + +fn is_arch_token(token: &str) -> bool { + BINARY_ARCH_TOKENS.contains(&token) || PREFERRED_NAME_ARCH_TOKENS.contains(&token) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_platform_or_version_tokens_include_linux_variants() { + for token in [ + "osx", + "manylinux", + "manylinux2014", + "musllinux", + "musllinux_1_2", + ] { + assert!(is_platform_or_version_token(token)); + } + } +} diff --git a/src/backend/static_helpers.rs b/src/backend/static_helpers.rs index 3eff63a56c..642c20052f 100644 --- a/src/backend/static_helpers.rs +++ b/src/backend/static_helpers.rs @@ -12,6 +12,8 @@ use std::path::Path; use std::path::PathBuf; use std::sync::LazyLock; +use super::platform_tokens::{BINARY_ARCH_TOKENS, BINARY_OS_TOKENS}; + /// Regex pattern for matching version suffixes like -v1.2.3, _1.2.3, etc. static VERSION_PATTERN: LazyLock = LazyLock::new(|| regex::Regex::new(r"[-_]v?\d+(\.\d+)*(-[a-zA-Z0-9]+(\.\d+)?)?$").unwrap()); @@ -67,19 +69,6 @@ pub async fn fetch_checksum_from_file(checksum_url: &str, algo: &str) -> Option< } } -// ========== Platform Patterns ========== - -// Shared OS/arch patterns used across helpers -const OS_PATTERNS: &[&str] = &[ - "linux", "darwin", "macos", "windows", "win", "freebsd", "openbsd", "netbsd", "android", - "unknown", -]; -// Longer arch patterns first to avoid partial matches -const ARCH_PATTERNS: &[&str] = &[ - "x86_64", "aarch64", "ppc64le", "ppc64", "armv7", "armv6", "arm64", "amd64", "mipsel", - "riscv64", "s390x", "i686", "i386", "x64", "mips", "arm", "x86", -]; - pub trait VerifiableError: Sized + Send + Sync + 'static { fn is_not_found(&self) -> bool; fn into_eyre(self) -> eyre::Report; @@ -330,8 +319,8 @@ pub fn list_available_platforms_with_key(opts: &ToolVersionOptions, key_type: &s } // Probe nested keys using shared patterns - for os in OS_PATTERNS { - for arch in ARCH_PATTERNS { + for os in BINARY_OS_TOKENS { + for arch in BINARY_ARCH_TOKENS { for prefix in ["platforms", "platform"] { let nested_key = format!("{prefix}.{os}-{arch}.{key_type}"); if opts.contains_key(&nested_key) { @@ -889,8 +878,14 @@ pub fn clean_binary_name(name: &str, tool_name: Option<&str>) -> String { let mut cleaned = name_without_ext.to_string(); // First try combined OS-arch patterns - for os in OS_PATTERNS { - for arch in ARCH_PATTERNS { + for os in BINARY_OS_TOKENS { + if !cleaned.contains(os) { + continue; + } + for arch in BINARY_ARCH_TOKENS { + if !cleaned.contains(arch) { + continue; + } // Try different separator combinations let patterns = [ format!("-{os}-{arch}"), @@ -913,7 +908,7 @@ pub fn clean_binary_name(name: &str, tool_name: Option<&str>) -> String { } // Try just OS suffix (sometimes arch is omitted) - for os in OS_PATTERNS { + for os in BINARY_OS_TOKENS { let patterns = [format!("-{os}"), format!("_{os}")]; for pattern in &patterns { if let Some(pos) = cleaned.rfind(pattern.as_str()) { @@ -934,7 +929,7 @@ pub fn clean_binary_name(name: &str, tool_name: Option<&str>) -> String { } // Try just arch suffix (sometimes OS is omitted) - for arch in ARCH_PATTERNS { + for arch in BINARY_ARCH_TOKENS { let patterns = [format!("-{arch}"), format!("_{arch}")]; for pattern in &patterns { if let Some(pos) = cleaned.rfind(pattern.as_str()) { @@ -1081,6 +1076,18 @@ mod tests { clean_binary_name("app-linux.AppImage", None), "app.AppImage" ); + assert_eq!( + clean_binary_name("opengrep_osx_arm64", Some("opengrep/opengrep")), + "opengrep" + ); + assert_eq!( + clean_binary_name("opengrep_manylinux_x86", Some("opengrep/opengrep")), + "opengrep" + ); + assert_eq!( + clean_binary_name("opengrep_musllinux_x86", Some("opengrep/opengrep")), + "opengrep" + ); // Test edge cases assert_eq!(clean_binary_name("linux", None), "linux"); // Just OS name