Quick Start

This section shows a minimal end‑to‑end example using SToG after installing the package from PyPI:

pip install stog

Basic Example: STG on Breast Cancer

The example below reproduces the main steps from the demo notebook in a compact form.

import torch
import torch.nn as nn
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from SToG import (
    STGLayer,
    FeatureSelectionTrainer,
    create_classification_model,
)

# Reproducibility
torch.manual_seed(42)

# 1. Load and prepare data
data = load_breast_cancer()
X = data.data
y = data.target

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
)

# Convert to PyTorch tensors
X_train = torch.FloatTensor(X_train)
y_train = torch.LongTensor(y_train)
X_val = torch.FloatTensor(X_val)
y_val = torch.LongTensor(y_val)
X_test = torch.FloatTensor(X_test)
y_test = torch.LongTensor(y_test)

# 2. Create model and STG selector
n_features = X_train.shape[1]
n_classes = len(torch.unique(y_train))

model = create_classification_model(
    input_dim=n_features,
    num_classes=n_classes,
)
selector = STGLayer(input_dim=n_features, sigma=0.5)

# 3. Create trainer
trainer = FeatureSelectionTrainer(
    model=model,
    selector=selector,
    criterion=nn.CrossEntropyLoss(),
    lambda_reg=0.05,
    device="cpu",
)

# 4. Train with early stopping
history = trainer.fit(
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    epochs=300,
    patience=50,
    verbose=True,
)

# 5. Evaluate on test set
result = trainer.evaluate(X_test, y_test)

print(f"\nTest Accuracy: {result['test_acc']:.2f}%")
print(f"Selected Features: {result['selected_count']} / {n_features}")

Quick Comparison of Methods

To mirror the demo, you can quickly compare several selection methods on the same train/val/test split.

from SToG import (
    STGLayer,
    STELayer,
    GumbelLayer,
    CorrelatedSTGLayer,
    L1Layer,
    FeatureSelectionTrainer,
    create_classification_model,
)

methods = {
    "STG": STGLayer(input_dim=n_features, sigma=0.5),
    "STE": STELayer(input_dim=n_features),
    "Gumbel": GumbelLayer(input_dim=n_features, temperature=1.0),
    "CorrelatedSTG": CorrelatedSTGLayer(input_dim=n_features, sigma=0.5),
    "L1": L1Layer(input_dim=n_features),
}

results = {}

for name, selector in methods.items():
    # fresh model for each method
    model = create_classification_model(
        input_dim=n_features,
        num_classes=n_classes,
    )

    trainer = FeatureSelectionTrainer(
        model=model,
        selector=selector,
        criterion=nn.CrossEntropyLoss(),
        lambda_reg=0.05,
        device="cpu",
    )

    trainer.fit(
        X_train=X_train,
        y_train=y_train,
        X_val=X_val,
        y_val=y_val,
        epochs=300,
        patience=50,
        verbose=False,
    )
    results[name] = trainer.evaluate(X_test, y_test)

print(f"\n{'Method':<20} {'Accuracy':<12} {'Selected':<12}")
print("-" * 44)
for name, res in results.items():
    print(
        f"{name:<20} {res['test_acc']:>10.2f}% "
        f"{res['selected_count']:>10} / {n_features}"
    )

Next Steps

For more advanced usage mirroring the full demo notebook, see:

Tutorial for a detailed synthetic example with plots
API Reference for the full API reference