Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions c/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ endif()
add_library(
cuvs_c SHARED
src/core/c_api.cpp
src/cluster/gmm.cpp
src/cluster/kmeans.cpp
src/neighbors/brute_force.cpp
src/neighbors/ivf_flat.cpp
Expand Down
260 changes: 260 additions & 0 deletions c/include/cuvs/cluster/gmm.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include <cuvs/core/c_api.h>
#include <dlpack/dlpack.h>
#include <stdbool.h>
#include <stdint.h>

#include <cuvs/core/export.h>

#ifdef __cplusplus
extern "C" {
#endif

/**
* @defgroup gmm_c_params Gaussian mixture hyperparameters
* @{
*/

/**
* @brief Covariance parameterization of the mixture components.
*/
typedef enum {
/** Each component has its own full covariance matrix. */
CUVS_GMM_COVARIANCE_FULL = 0,
/** All components share a single full covariance matrix. */
CUVS_GMM_COVARIANCE_TIED = 1,
/** Each component has its own diagonal covariance. */
CUVS_GMM_COVARIANCE_DIAG = 2,
/** Each component has a single variance. */
CUVS_GMM_COVARIANCE_SPHERICAL = 3
} cuvsGMMCovarianceType;

/**
* @brief Strategy used to initialize the responsibilities before EM.
*/
typedef enum {
/** Run k-means (itself seeded with k-means++) and use the hard labels. */
CUVS_GMM_INIT_KMEANS = 0,
/** Use the k-means++ seeding labels directly. */
CUVS_GMM_INIT_KMEANS_PLUS_PLUS = 1,
/** Random per-sample-normalized responsibilities. */
CUVS_GMM_INIT_RANDOM = 2,
/** Pick n_components samples at random as one-hot responsibilities. */
CUVS_GMM_INIT_RANDOM_FROM_DATA = 3
} cuvsGMMInitMethod;

/**
* @brief Hyper-parameters for the Gaussian mixture EM solver
*/
struct cuvsGMMParams {
/**
* The number of mixture components. Default: 1.
*/
int n_components;

/**
* Covariance parameterization of the mixture components. Default: FULL.
*/
cuvsGMMCovarianceType covariance_type;

/**
* Convergence threshold on the change of the per-sample average
* log-likelihood (lower bound). Default: 1e-3.
*/
double tol;

/**
* Non-negative regularization added to the diagonal of covariance.
* Default: 1e-6.
*/
double reg_covar;

/**
* Maximum number of EM iterations for a single run. Default: 100.
*/
int max_iter;

/**
* Number of initializations to perform; the best result is kept. Default: 1.
*/
int n_init;

/**
* Strategy used to initialize the responsibilities before EM.
* Default: KMEANS.
*/
cuvsGMMInitMethod init;

/**
* Seed to the random number generator. Default: 0.
*/
uint64_t seed;
};

typedef struct cuvsGMMParams* cuvsGMMParams_t;

/**
* @brief Allocate GMM params, and populate with default values
*
* @param[in] params cuvsGMMParams_t to allocate
* @return cuvsError_t
*/
CUVS_EXPORT cuvsError_t cuvsGMMParamsCreate(cuvsGMMParams_t* params);

/**
* @brief De-allocate GMM params
*
* @param[in] params
* @return cuvsError_t
*/
CUVS_EXPORT cuvsError_t cuvsGMMParamsDestroy(cuvsGMMParams_t params);

/**
* @}
*/

/**
* @defgroup gmm_c Gaussian mixture model APIs
* @{
*
* The covariance-shaped tensors (``covariances``, ``precisions_chol``,
* ``precisions``) depend on ``covariance_type``. With ``K = n_components``
* and ``d = n_features`` the expected shapes are (row-major):
*
* - ``CUVS_GMM_COVARIANCE_FULL``: (K, d, d)
* - ``CUVS_GMM_COVARIANCE_TIED``: (d, d)
* - ``CUVS_GMM_COVARIANCE_DIAG``: (K, d)
* - ``CUVS_GMM_COVARIANCE_SPHERICAL``: (K,)
*/

/**
* @brief Fit a Gaussian mixture with the EM algorithm.
*
* Runs ``params->n_init`` random restarts (unless ``warm_start`` is true) and
* keeps the parameters with the largest lower bound.
*
* All tensors must reside on device memory and be row-major. ``X``,
* ``weights``, ``means``, ``covariances``, ``precisions_chol`` and
* ``precisions`` must share one dtype (float32 or float64); ``labels`` is
* int32.
*
* @param[in] res opaque C handle
* @param[in] params Parameters for the GMM model.
* @param[in] X Training data. [dim = n_samples x n_features]
* @param[inout] weights Mixture weights. [len = n_components]
* @param[inout] means Component means.
* [dim = n_components x n_features]
* @param[inout] covariances Component covariances, flat. Length by
* covariance_type (K=n_components, d=n_features):
* FULL K*d*d, TIED d*d, DIAG K*d, SPHERICAL K.
* @param[out] precisions_chol Precision Cholesky factors, same flat layout as
* covariances (FULL/TIED: upper-triangular factor
* U with precision = U @ Uᵀ; DIAG/SPHERICAL:
* reciprocal standard deviations).
* @param[out] precisions Precision matrices, same flat layout as
* covariances.
* @param[out] labels Hard component assignment per sample.
* [len = n_samples]
* @param[out] lower_bound Per-sample average log-likelihood of the best
* fit.
* @param[out] n_iter Number of EM iterations of the best fit.
* @param[out] converged Whether the best fit converged within tol.
* @param[in] warm_start Use the incoming weights/means/covariances as
* the single initialization.
*/
CUVS_EXPORT cuvsError_t cuvsGMMFit(cuvsResources_t res,
cuvsGMMParams_t params,
DLManagedTensor* X,
DLManagedTensor* weights,
DLManagedTensor* means,
DLManagedTensor* covariances,
DLManagedTensor* precisions_chol,
DLManagedTensor* precisions,
DLManagedTensor* labels,
double* lower_bound,
int* n_iter,
bool* converged,
bool warm_start);

/**
* @brief Hard component labels (argmax responsibility) for new data.
*
* @param[in] res opaque C handle
* @param[in] params Parameters used to fit the GMM model.
* @param[in] X Data to assign. [dim = n_samples x n_features]
* @param[in] weights Fitted mixture weights. [len = n_components]
* @param[in] means Fitted component means.
* [dim = n_components x n_features]
* @param[in] precisions_chol Fitted precision Cholesky factors, flat. Length
* by covariance_type (K=n_components, d=n_features):
* FULL K*d*d, TIED d*d, DIAG K*d, SPHERICAL K.
* @param[out] labels Hard component assignment per sample (int32).
* [len = n_samples]
*/
CUVS_EXPORT cuvsError_t cuvsGMMPredict(cuvsResources_t res,
cuvsGMMParams_t params,
DLManagedTensor* X,
DLManagedTensor* weights,
DLManagedTensor* means,
DLManagedTensor* precisions_chol,
DLManagedTensor* labels);

/**
* @brief Posterior responsibilities for new data.
*
* @param[in] res opaque C handle
* @param[in] params Parameters used to fit the GMM model.
* @param[in] X Data to evaluate. [dim = n_samples x n_features]
* @param[in] weights Fitted mixture weights. [len = n_components]
* @param[in] means Fitted component means.
* [dim = n_components x n_features]
* @param[in] precisions_chol Fitted precision Cholesky factors, flat. Length
* by covariance_type (K=n_components, d=n_features):
* FULL K*d*d, TIED d*d, DIAG K*d, SPHERICAL K.
* @param[out] resp Posterior probability of each component for
* each sample. [dim = n_samples x n_components]
*/
CUVS_EXPORT cuvsError_t cuvsGMMPredictProba(cuvsResources_t res,
cuvsGMMParams_t params,
DLManagedTensor* X,
DLManagedTensor* weights,
DLManagedTensor* means,
DLManagedTensor* precisions_chol,
DLManagedTensor* resp);

/**
* @brief Per-sample log-likelihood log p(x_i) for new data.
*
* @param[in] res opaque C handle
* @param[in] params Parameters used to fit the GMM model.
* @param[in] X Data to evaluate. [dim = n_samples x n_features]
* @param[in] weights Fitted mixture weights. [len = n_components]
* @param[in] means Fitted component means.
* [dim = n_components x n_features]
* @param[in] precisions_chol Fitted precision Cholesky factors, flat. Length
* by covariance_type (K=n_components, d=n_features):
* FULL K*d*d, TIED d*d, DIAG K*d, SPHERICAL K.
* @param[out] log_prob_norm Log-likelihood of each sample under the model.
* [len = n_samples]
*/
CUVS_EXPORT cuvsError_t cuvsGMMScoreSamples(cuvsResources_t res,
cuvsGMMParams_t params,
DLManagedTensor* X,
DLManagedTensor* weights,
DLManagedTensor* means,
DLManagedTensor* precisions_chol,
DLManagedTensor* log_prob_norm);

/**
* @}
*/

#ifdef __cplusplus
}
#endif
1 change: 1 addition & 0 deletions c/include/cuvs/core/all.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <cuvs/core/export.h>
#include <cuvs/core/c_api.h>

#include <cuvs/cluster/gmm.h>
#include <cuvs/cluster/kmeans.h>

#include <cuvs/distance/distance.h>
Expand Down
Loading
Loading