feature_elimination
SHAP-based backward feature elimination with cross-validation. Supports LightGBM and XGBoost.
ShapImportance(
model: Any = None,
cv: int | Any = 5,
scoring: str = "roc_auc",
n_jobs: int = -1,
random_state: int | None = None,
importance_method: Literal[
"mean", "variance_penalized"
] = "mean",
variance_penalty_factor: float = 0.5,
)
Bases: BaseEstimator
Source code in datasci_toolkit/feature_elimination/importance.py
| def __init__(
self,
model: Any = None,
cv: int | Any = 5,
scoring: str = "roc_auc",
n_jobs: int = -1,
random_state: int | None = None,
importance_method: Literal["mean", "variance_penalized"] = "mean",
variance_penalty_factor: float = 0.5,
) -> None:
self.model = model
self.cv = cv
self.scoring = scoring
self.n_jobs = n_jobs
self.random_state = random_state
self.importance_method = importance_method
self.variance_penalty_factor = variance_penalty_factor
|
ShapRFE(
model: Any = None,
step: int | float = 1,
min_features_to_select: int = 1,
cv: int | Any = 5,
scoring: str = "roc_auc",
n_jobs: int = -1,
random_state: int | None = None,
importance_method: Literal[
"mean", "variance_penalized"
] = "mean",
variance_penalty_factor: float = 0.5,
columns_to_keep: list[str] | None = None,
)
Bases: BaseEstimator
Source code in datasci_toolkit/feature_elimination/elimination.py
| def __init__(
self,
model: Any = None,
step: int | float = 1,
min_features_to_select: int = 1,
cv: int | Any = 5,
scoring: str = "roc_auc",
n_jobs: int = -1,
random_state: int | None = None,
importance_method: Literal["mean", "variance_penalized"] = "mean",
variance_penalty_factor: float = 0.5,
columns_to_keep: list[str] | None = None,
) -> None:
self.model = model
self.step = step
self.min_features_to_select = min_features_to_select
self.cv = cv
self.scoring = scoring
self.n_jobs = n_jobs
self.random_state = random_state
self.importance_method = importance_method
self.variance_penalty_factor = variance_penalty_factor
self.columns_to_keep = columns_to_keep
|
plot_shap_elimination(
report: DataFrame, show: bool = True
) -> Figure
Source code in datasci_toolkit/feature_elimination/_plot.py
| def plot_shap_elimination(report: pl.DataFrame, show: bool = True) -> Figure:
n_features = report["n_features"].to_list()
train_mean = report["train_score_mean"].to_numpy()
train_std = report["train_score_std"].to_numpy()
val_mean = report["val_score_mean"].to_numpy()
val_std = report["val_score_std"].to_numpy()
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(n_features, train_mean, label="Train")
ax.fill_between(n_features, train_mean - train_std, train_mean + train_std, alpha=0.2)
ax.plot(n_features, val_mean, label="Validation")
ax.fill_between(n_features, val_mean - val_std, val_mean + val_std, alpha=0.2)
ax.set_xlabel("Number of features")
ax.set_ylabel("Score")
ax.set_title("SHAP Backward Feature Elimination")
ax.invert_xaxis()
ax.legend()
if show:
plt.show()
else:
plt.close(fig)
return fig
|