"""Dataset loading utilities."""
import numpy as np
from sklearn.datasets import load_breast_cancer, load_wine, make_classification
[docs]
class DatasetLoader:
"""Load and prepare datasets for benchmarking."""
[docs]
@staticmethod
def load_breast_cancer():
"""
Load breast cancer dataset.
Returns:
Dictionary with dataset information
"""
data = load_breast_cancer()
return {
'name': 'Breast Cancer',
'X': data.data,
'y': data.target,
'n_important': 10,
'description': 'Binary classification, 30 features'
}
[docs]
@staticmethod
def load_wine():
"""
Load wine dataset.
Returns:
Dictionary with dataset information
"""
data = load_wine()
return {
'name': 'Wine',
'X': data.data,
'y': data.target,
'n_important': 7,
'description': '3-class classification, 13 features'
}
[docs]
@staticmethod
def create_synthetic_high_dim():
"""
Create synthetic high-dimensional dataset (MADELON-like).
Returns:
Dictionary with dataset information
"""
X, y = make_classification(
n_samples=600,
n_features=100,
n_informative=5,
n_redundant=10,
n_repeated=0,
n_classes=2,
n_clusters_per_class=2,
flip_y=0.03,
class_sep=1.0,
random_state=42
)
return {
'name': 'Synthetic-HighDim',
'X': X,
'y': y,
'n_important': 5,
'description': 'Binary classification, 100 features, 5 informative'
}