Skip to content

Running Experiments

This guide covers the experiment infrastructure for publication-quality research.

Optional Dependencies

Full experiment support requires: pip install alberta-framework[analysis]

Multi-Seed Experiments

Research requires running experiments across multiple random seeds.

Basic Setup

from alberta_framework import LinearLearner, LMS, IDBD
from alberta_framework.streams import RandomWalkTarget
from alberta_framework.utils import (
    ExperimentConfig,
    run_multi_seed_experiment,
)
import jax.random as jr

# Define experiment configurations
configs = [
    ExperimentConfig(
        name="LMS",
        learner_factory=lambda: LinearLearner(optimizer=LMS(step_size=0.01)),
        stream_factory=lambda key: RandomWalkTarget(
            feature_dim=10, key=key, walk_std=0.01
        ),
        num_steps=10000,
    ),
    ExperimentConfig(
        name="IDBD",
        learner_factory=lambda: LinearLearner(optimizer=IDBD(initial_step_size=0.01)),
        stream_factory=lambda key: RandomWalkTarget(
            feature_dim=10, key=key, walk_std=0.01
        ),
        num_steps=10000,
    ),
]

# Run across 30 seeds
results = run_multi_seed_experiment(
    configs=configs,
    seeds=30,
    parallel=True,  # Use joblib for parallelization
)

Accessing Results

from alberta_framework.utils import get_final_performance, get_metric_timeseries

# Get final performance for each method
for name, agg_result in results.items():
    perf = get_final_performance(agg_result, metric="squared_error", window=1000)
    print(f"{name}: {perf.mean:.4f} +/- {perf.std:.4f}")

# Get learning curves
lms_curves = get_metric_timeseries(results["LMS"], metric="squared_error")
# Shape: (num_seeds, num_steps)

Statistical Analysis

Pairwise Comparisons

from alberta_framework.utils import pairwise_comparisons

# Compare all pairs of methods
comparisons = pairwise_comparisons(
    results,
    metric="squared_error",
    window=1000,
    test="welch",           # or "mann_whitney", "wilcoxon"
    correction="holm",      # Multiple comparison correction
)

for comp in comparisons:
    print(f"{comp.method_a} vs {comp.method_b}:")
    print(f"  p-value: {comp.p_value:.4f}")
    print(f"  Cohen's d: {comp.effect_size:.2f}")
    print(f"  Significant: {comp.significant}")

Individual Tests

from alberta_framework.utils import ttest_comparison, compute_statistics

# Two-sample t-test
result = ttest_comparison(
    results["LMS"],
    results["IDBD"],
    metric="squared_error",
)

# Summary statistics with confidence intervals
stats = compute_statistics(
    get_final_performance(results["IDBD"], "squared_error").values,
    confidence=0.95,
)
print(f"Mean: {stats.mean:.4f}")
print(f"95% CI: [{stats.ci_lower:.4f}, {stats.ci_upper:.4f}]")

Visualization

Learning Curves

from alberta_framework.utils import (
    set_publication_style,
    plot_learning_curves,
    save_figure,
)

set_publication_style()

fig, ax = plot_learning_curves(
    results,
    metric="squared_error",
    window=100,           # Smoothing window
    show_individual=False, # Show mean + CI only
    ci_alpha=0.2,         # Confidence band transparency
)

save_figure(fig, "learning_curves", formats=["pdf", "png"])

Performance Comparison

from alberta_framework.utils import plot_final_performance_bars

fig, ax = plot_final_performance_bars(
    results,
    metric="squared_error",
    window=1000,
    show_significance=True,  # Add significance markers
)

save_figure(fig, "performance_bars")

Multi-Panel Figures

from alberta_framework.utils import create_comparison_figure

fig = create_comparison_figure(
    results,
    metric="squared_error",
    window=1000,
    title="IDBD vs LMS Comparison",
)

save_figure(fig, "comparison", formats=["pdf"])

Export

Tables

from alberta_framework.utils import generate_latex_table, generate_markdown_table

# LaTeX table for papers
latex = generate_latex_table(
    results,
    metrics=["squared_error"],
    caption="Tracking Error Comparison",
    label="tab:results",
)

# Markdown for README
markdown = generate_markdown_table(results, metrics=["squared_error"])

Data Files

from alberta_framework.utils import export_to_csv, export_to_json

# CSV for external analysis
export_to_csv(results, "results.csv")

# JSON for archival
export_to_json(results, "results.json")

Complete Report

from alberta_framework.utils import save_experiment_report

# Save all artifacts at once
save_experiment_report(
    results,
    output_dir="experiment_output",
    formats=["pdf", "png"],
    include_tables=True,
    include_data=True,
)

This creates:

experiment_output/
├── figures/
│   ├── learning_curves.pdf
│   ├── learning_curves.png
│   ├── performance_bars.pdf
│   └── performance_bars.png
├── tables/
│   ├── results.tex
│   └── results.md
└── data/
    ├── results.csv
    └── results.json

Hyperparameter Sweeps

from alberta_framework.utils import extract_hyperparameter_results

# Run experiments with different step-sizes
step_sizes = [0.001, 0.01, 0.1]
all_configs = []

for alpha in step_sizes:
    all_configs.append(
        ExperimentConfig(
            name=f"LMS_alpha={alpha}",
            learner_factory=lambda a=alpha: LinearLearner(
                optimizer=LMS(step_size=a)
            ),
            stream_factory=lambda key: RandomWalkTarget(
                feature_dim=10, key=key
            ),
            num_steps=10000,
            metadata={"step_size": alpha},  # Store hyperparameters
        )
    )

results = run_multi_seed_experiment(all_configs, seeds=30)

# Extract best configuration
best = extract_hyperparameter_results(
    results,
    metric="squared_error",
    minimize=True,
)
print(f"Best step-size: {best.metadata['step_size']}")