Skip to content

Experiment

Bases: MetricSource

Main experiment tracking class that orchestrates all tracking functionality

Source code in tracelet/core/experiment.py
def __init__(
    self,
    name: str,
    config: Optional[ExperimentConfig] = None,
    backend: Optional[list[str]] = None,  # Changed to list[str]
    tags: Optional[list[str]] = None,
    automagic: bool = False,  # Enable automagic instrumentation
    automagic_config: Optional["AutomagicConfig"] = None,  # Custom automagic configuration
):
    self.name = name
    self.id = str(uuid.uuid4())
    self.config = config or ExperimentConfig()
    self.created_at = datetime.now(timezone.utc)
    self.tags = tags or []
    self._current_iteration = 0
    self._active_collectors = []
    self._backends = backend if backend is not None else []  # Changed to _backends
    self._framework = None

    # Automagic instrumentation
    self._automagic_enabled = automagic or self.config.enable_automagic
    self._automagic_config = automagic_config
    self._automagic_instrumentor = None

    # Initialize data flow orchestrator
    self._orchestrator = DataFlowOrchestrator(max_queue_size=10000, num_workers=4)

    # Initialize plugin manager
    self._plugin_manager = PluginManager()

    self._initialize()

iteration: int property

Get current iteration

capture_dataset(dataset)

Capture dataset information using automagic instrumentation.

Source code in tracelet/core/experiment.py
def capture_dataset(self, dataset: Any) -> dict[str, Any]:
    """Capture dataset information using automagic instrumentation."""
    if not self._automagic_enabled or not self._automagic_instrumentor:
        return {}

    return self._automagic_instrumentor.capture_dataset_info(dataset, self)

capture_hyperparams()

Capture hyperparameters from calling context using automagic instrumentation.

Source code in tracelet/core/experiment.py
def capture_hyperparams(self) -> dict[str, Any]:
    """Capture hyperparameters from calling context using automagic instrumentation."""
    if not self._automagic_enabled or not self._automagic_instrumentor:
        return {}

    return self._automagic_instrumentor.capture_hyperparameters(self, frame_depth=2)

capture_model(model)

Capture model information using automagic instrumentation.

Source code in tracelet/core/experiment.py
def capture_model(self, model: Any) -> dict[str, Any]:
    """Capture model information using automagic instrumentation."""
    if not self._automagic_enabled or not self._automagic_instrumentor:
        return {}

    return self._automagic_instrumentor.capture_model_info(model, self)

emit_metric(metric)

Emit a metric to the orchestrator

Source code in tracelet/core/experiment.py
def emit_metric(self, metric: MetricData):
    """Emit a metric to the orchestrator"""
    self._orchestrator.emit_metric(metric)

end()

End the experiment and clean up resources (alias for stop).

Source code in tracelet/core/experiment.py
def end(self):
    """End the experiment and clean up resources (alias for stop)."""
    self.stop()

get_source_id()

Return unique identifier for this experiment source

Source code in tracelet/core/experiment.py
def get_source_id(self) -> str:
    """Return unique identifier for this experiment source"""
    return f"experiment_{self.id}"

log_artifact(local_path, artifact_path=None)

Log a local file as an artifact

Source code in tracelet/core/experiment.py
def log_artifact(self, local_path: str, artifact_path: Optional[str] = None):
    """Log a local file as an artifact"""
    metric = MetricData(
        name=artifact_path or local_path,
        value=local_path,
        type=MetricType.ARTIFACT,
        iteration=None,  # Artifacts don't have iterations
        source=self.get_source_id(),
        metadata={"artifact_path": artifact_path},
    )
    self.emit_metric(metric)

log_hyperparameter(name, value)

Log a hyperparameter (alias for compatibility).

Source code in tracelet/core/experiment.py
def log_hyperparameter(self, name: str, value: Any):
    """Log a hyperparameter (alias for compatibility)."""
    # Create metric data directly to avoid intermediate dictionary creation
    metric = MetricData(
        name=name,
        value=value,
        type=MetricType.PARAMETER,
        iteration=None,  # Parameters don't have iterations
        source=self.get_source_id(),
    )
    self.emit_metric(metric)

log_metric(name, value, iteration=None)

Log a metric value

Source code in tracelet/core/experiment.py
def log_metric(self, name: str, value: Any, iteration: Optional[int] = None):
    """Log a metric value"""
    iteration = iteration or self._current_iteration

    # Create metric data
    metric = MetricData(
        name=name,
        value=value,
        type=MetricType.SCALAR if isinstance(value, (int, float)) else MetricType.CUSTOM,
        iteration=iteration,
        source=self.get_source_id(),
    )

    # Emit to orchestrator
    self.emit_metric(metric)

log_params(params)

Log experiment parameters

Source code in tracelet/core/experiment.py
def log_params(self, params: dict[str, Any]):
    """Log experiment parameters"""
    for name, value in params.items():
        metric = MetricData(
            name=name,
            value=value,
            type=MetricType.PARAMETER,
            iteration=None,  # Parameters don't have iterations
            source=self.get_source_id(),
        )
        self.emit_metric(metric)

set_iteration(iteration)

Set the current iteration

Source code in tracelet/core/experiment.py
def set_iteration(self, iteration: int):
    """Set the current iteration"""
    self._current_iteration = iteration

start()

Start the experiment tracking

Source code in tracelet/core/experiment.py
def start(self):
    """Start the experiment tracking"""
    # Start the orchestrator
    self._orchestrator.start()

    # Start backend plugins
    for backend_name in self._backends:
        self._plugin_manager.start_plugin(backend_name)

    # Start collector plugins
    for collector_info in self._plugin_manager.get_plugins_by_type(PluginType.COLLECTOR):
        if self._plugin_manager.initialize_plugin(collector_info.metadata.name):
            self._plugin_manager.start_plugin(collector_info.metadata.name)

stop()

Stop the experiment tracking and clean up resources

Source code in tracelet/core/experiment.py
def stop(self):
    """Stop the experiment tracking and clean up resources"""
    # Clean up automagic instrumentation first
    if self._automagic_enabled and self._automagic_instrumentor:
        self._automagic_instrumentor.detach_experiment(self.id)

    # Stop all active plugins
    for plugin_name, plugin_info in self._plugin_manager.plugins.items():
        if plugin_info.state == PluginState.ACTIVE:
            self._plugin_manager.stop_plugin(plugin_name)

    # Stop the orchestrator
    self._orchestrator.stop()

options: show_source: true show_bases: true merge_init_into_class: true heading_level: 2

Core Methods

Metric Logging

log_metric

Log a metric value

Source code in tracelet/core/experiment.py
def log_metric(self, name: str, value: Any, iteration: Optional[int] = None):
    """Log a metric value"""
    iteration = iteration or self._current_iteration

    # Create metric data
    metric = MetricData(
        name=name,
        value=value,
        type=MetricType.SCALAR if isinstance(value, (int, float)) else MetricType.CUSTOM,
        iteration=iteration,
        source=self.get_source_id(),
    )

    # Emit to orchestrator
    self.emit_metric(metric)

options: show_source: true heading_level: 4

Example Usage:

import tracelet

# Start experiment
exp = tracelet.start_logging(exp_name="metrics_demo", project="examples", backend="mlflow")

# Log scalar metrics
exp.log_metric("loss", 0.1, iteration=100)
exp.log_metric("accuracy", 0.95, iteration=100)
exp.log_metric("learning_rate", 0.001, iteration=100)

# Log metrics over time
for epoch in range(10):
    train_loss = 1.0 / (epoch + 1)  # Decreasing loss
    exp.log_metric("train_loss", train_loss, iteration=epoch)

    if epoch % 2 == 0:  # Log validation every 2 epochs
        val_loss = train_loss * 1.1
        exp.log_metric("val_loss", val_loss, iteration=epoch)

Parameter Logging

log_params

Log experiment parameters

Source code in tracelet/core/experiment.py
def log_params(self, params: dict[str, Any]):
    """Log experiment parameters"""
    for name, value in params.items():
        metric = MetricData(
            name=name,
            value=value,
            type=MetricType.PARAMETER,
            iteration=None,  # Parameters don't have iterations
            source=self.get_source_id(),
        )
        self.emit_metric(metric)

options: show_source: true heading_level: 4

Example Usage:

# Log hyperparameters
exp.log_params({
    "learning_rate": 0.001,
    "batch_size": 32,
    "epochs": 100,
    "optimizer": "adam",
    "model_type": "resnet50"
})

# Log model architecture details
exp.log_params({
    "num_layers": 18,
    "hidden_dim": 512,
    "dropout": 0.2,
    "activation": "relu"
})

# Log data preprocessing parameters
exp.log_params({
    "data_augmentation": True,
    "normalization": "imagenet",
    "train_split": 0.8,
    "random_seed": 42
})

Artifact Management

log_artifact

Log a local file as an artifact

Source code in tracelet/core/experiment.py
def log_artifact(self, local_path: str, artifact_path: Optional[str] = None):
    """Log a local file as an artifact"""
    metric = MetricData(
        name=artifact_path or local_path,
        value=local_path,
        type=MetricType.ARTIFACT,
        iteration=None,  # Artifacts don't have iterations
        source=self.get_source_id(),
        metadata={"artifact_path": artifact_path},
    )
    self.emit_metric(metric)

options: show_source: true heading_level: 4

Example Usage:

import torch
import matplotlib.pyplot as plt

# Save and log model checkpoint
torch.save(model.state_dict(), "model_checkpoint.pth")
exp.log_artifact("model_checkpoint.pth", "models/checkpoint_epoch_10.pth")

# Log training plots
plt.figure(figsize=(10, 6))
plt.plot(train_losses, label="Training Loss")
plt.plot(val_losses, label="Validation Loss")
plt.legend()
plt.savefig("training_curves.png")
exp.log_artifact("training_curves.png", "plots/training_curves.png")

# Log configuration files
exp.log_artifact("config.yaml", "configs/experiment_config.yaml")

# Log processed datasets
exp.log_artifact("processed_data.csv", "data/processed/final_dataset.csv")

Experiment Control

set_iteration

Set the current iteration

Source code in tracelet/core/experiment.py
def set_iteration(self, iteration: int):
    """Set the current iteration"""
    self._current_iteration = iteration

options: show_source: true heading_level: 4

Example Usage:

# Manual iteration tracking
for epoch in range(100):
    exp.set_iteration(epoch)

    # All subsequent metrics logged without iteration will use current iteration
    exp.log_metric("loss", train_loss)  # Uses iteration=epoch
    exp.log_metric("accuracy", train_acc)  # Uses iteration=epoch

    # Override with specific iteration if needed
    exp.log_metric("val_loss", val_loss, iteration=epoch*2)  # Custom iteration

start and stop

Start the experiment tracking

Source code in tracelet/core/experiment.py
def start(self):
    """Start the experiment tracking"""
    # Start the orchestrator
    self._orchestrator.start()

    # Start backend plugins
    for backend_name in self._backends:
        self._plugin_manager.start_plugin(backend_name)

    # Start collector plugins
    for collector_info in self._plugin_manager.get_plugins_by_type(PluginType.COLLECTOR):
        if self._plugin_manager.initialize_plugin(collector_info.metadata.name):
            self._plugin_manager.start_plugin(collector_info.metadata.name)

options: show_source: true heading_level: 4

Stop the experiment tracking and clean up resources

Source code in tracelet/core/experiment.py
def stop(self):
    """Stop the experiment tracking and clean up resources"""
    # Clean up automagic instrumentation first
    if self._automagic_enabled and self._automagic_instrumentor:
        self._automagic_instrumentor.detach_experiment(self.id)

    # Stop all active plugins
    for plugin_name, plugin_info in self._plugin_manager.plugins.items():
        if plugin_info.state == PluginState.ACTIVE:
            self._plugin_manager.stop_plugin(plugin_name)

    # Stop the orchestrator
    self._orchestrator.stop()

options: show_source: true heading_level: 4

Example Usage:

# Manual experiment lifecycle management
exp = tracelet.start_logging(exp_name="manual_control", project="test", backend="mlflow")

# Start tracking (usually called automatically)
exp.start()

# Log metrics during experiment
exp.log_metric("initial_metric", 1.0)

# Stop tracking (usually called by tracelet.stop_logging())
exp.stop()

Configuration

ExperimentConfig

Configuration for experiment tracking

options: show_source: true show_bases: true heading_level: 3

Example Usage:

from tracelet.core.experiment import ExperimentConfig

# Create custom configuration
config = ExperimentConfig(
    name="custom_experiment",
    project="research_project",
    backend_name="mlflow",
    tags={"team": "ml", "version": "v2.0"},
    tracking_uri="http://mlflow.company.com:5000"
)

# Use with experiment
exp = Experiment(name="test", config=config, backend=mlflow_backend)

Advanced Usage Patterns

Metric Batching

# Efficient metric logging for large datasets
metrics_batch = {}
for batch_idx, (data, target) in enumerate(dataloader):
    # ... training code ...

    # Collect metrics
    metrics_batch[f"batch_{batch_idx}_loss"] = loss.item()

    # Log in batches every 100 iterations
    if batch_idx % 100 == 0:
        exp.log_params(metrics_batch)
        metrics_batch.clear()

Hierarchical Parameter Organization

# Organize parameters hierarchically
exp.log_params({
    # Model parameters
    "model.architecture": "transformer",
    "model.num_layers": 12,
    "model.hidden_size": 768,

    # Training parameters
    "training.learning_rate": 0.0001,
    "training.batch_size": 16,
    "training.gradient_clip": 1.0,

    # Data parameters
    "data.max_seq_length": 512,
    "data.vocab_size": 30000,
})

Error Handling and Validation

try:
    exp.log_metric("accuracy", accuracy, iteration=epoch)
except Exception as e:
    print(f"Failed to log metric: {e}")
    # Continue training without failing

# Validate parameters before logging
params = {"lr": learning_rate, "batch_size": batch_size}
valid_params = {k: v for k, v in params.items() if v is not None}
exp.log_params(valid_params)