BodyBalanceEvaluation/backend/venv/Lib/site-packages/sklearn/tree/_partitioner.pxd
2025-07-31 17:23:05 +08:00

179 lines
4.8 KiB
Cython

# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
# See _partitioner.pyx for details.
from ..utils._typedefs cimport (
float32_t, float64_t, int8_t, int32_t, intp_t, uint8_t, uint32_t
)
from ._splitter cimport SplitRecord
# Mitigate precision differences between 32 bit and 64 bit
cdef float32_t FEATURE_THRESHOLD = 1e-7
# We provide here the abstract interface for a Partitioner that would be
# theoretically shared between the Dense and Sparse partitioners. However,
# we leave it commented out for now as it is not used in the current
# implementation due to the performance hit from vtable lookups when using
# inheritance based polymorphism. It is left here for future reference.
#
# Note: Instead, in `_splitter.pyx`, we define a fused type that can be used
# to represent both the dense and sparse partitioners.
#
# cdef class BasePartitioner:
# cdef intp_t[::1] samples
# cdef float32_t[::1] feature_values
# cdef intp_t start
# cdef intp_t end
# cdef intp_t n_missing
# cdef const uint8_t[::1] missing_values_in_feature_mask
# cdef void sort_samples_and_feature_values(
# self, intp_t current_feature
# ) noexcept nogil
# cdef void init_node_split(
# self,
# intp_t start,
# intp_t end
# ) noexcept nogil
# cdef void find_min_max(
# self,
# intp_t current_feature,
# float32_t* min_feature_value_out,
# float32_t* max_feature_value_out,
# ) noexcept nogil
# cdef void next_p(
# self,
# intp_t* p_prev,
# intp_t* p
# ) noexcept nogil
# cdef intp_t partition_samples(
# self,
# float64_t current_threshold
# ) noexcept nogil
# cdef void partition_samples_final(
# self,
# intp_t best_pos,
# float64_t best_threshold,
# intp_t best_feature,
# intp_t n_missing,
# ) noexcept nogil
cdef class DensePartitioner:
"""Partitioner specialized for dense data.
Note that this partitioner is agnostic to the splitting strategy (best vs. random).
"""
cdef const float32_t[:, :] X
cdef intp_t[::1] samples
cdef float32_t[::1] feature_values
cdef intp_t start
cdef intp_t end
cdef intp_t n_missing
cdef const uint8_t[::1] missing_values_in_feature_mask
cdef void sort_samples_and_feature_values(
self, intp_t current_feature
) noexcept nogil
cdef void init_node_split(
self,
intp_t start,
intp_t end
) noexcept nogil
cdef void find_min_max(
self,
intp_t current_feature,
float32_t* min_feature_value_out,
float32_t* max_feature_value_out,
) noexcept nogil
cdef void next_p(
self,
intp_t* p_prev,
intp_t* p
) noexcept nogil
cdef intp_t partition_samples(
self,
float64_t current_threshold
) noexcept nogil
cdef void partition_samples_final(
self,
intp_t best_pos,
float64_t best_threshold,
intp_t best_feature,
intp_t n_missing,
) noexcept nogil
cdef class SparsePartitioner:
"""Partitioner specialized for sparse CSC data.
Note that this partitioner is agnostic to the splitting strategy (best vs. random).
"""
cdef const float32_t[::1] X_data
cdef const int32_t[::1] X_indices
cdef const int32_t[::1] X_indptr
cdef intp_t n_total_samples
cdef intp_t[::1] index_to_samples
cdef intp_t[::1] sorted_samples
cdef intp_t start_positive
cdef intp_t end_negative
cdef bint is_samples_sorted
cdef intp_t[::1] samples
cdef float32_t[::1] feature_values
cdef intp_t start
cdef intp_t end
cdef intp_t n_missing
cdef const uint8_t[::1] missing_values_in_feature_mask
cdef void sort_samples_and_feature_values(
self, intp_t current_feature
) noexcept nogil
cdef void init_node_split(
self,
intp_t start,
intp_t end
) noexcept nogil
cdef void find_min_max(
self,
intp_t current_feature,
float32_t* min_feature_value_out,
float32_t* max_feature_value_out,
) noexcept nogil
cdef void next_p(
self,
intp_t* p_prev,
intp_t* p
) noexcept nogil
cdef intp_t partition_samples(
self,
float64_t current_threshold
) noexcept nogil
cdef void partition_samples_final(
self,
intp_t best_pos,
float64_t best_threshold,
intp_t best_feature,
intp_t n_missing,
) noexcept nogil
cdef void extract_nnz(
self,
intp_t feature
) noexcept nogil
cdef intp_t _partition(
self,
float64_t threshold,
intp_t zero_pos
) noexcept nogil
cdef void shift_missing_values_to_left_if_required(
SplitRecord* best,
intp_t[::1] samples,
intp_t end,
) noexcept nogil