Source code for causalcompass.datasets.standardized

"""
Standardized data generation for VAR and Lorenz-96 systems.

This module generates normalized datasets using:
- Z-score normalization (zero mean, unit variance)
- Min-max normalization (scaled to [0, 1])

Both methods are applied to vanilla VAR and Lorenz-96 time series data.
"""

import numpy as np
import os
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from .vanilla import simulate_var, simulate_lorenz_96


def normalize_data(X_np, method):
    if method == 'zscore':
        scaler = StandardScaler()
    elif method == 'minmax':
        scaler = MinMaxScaler()
    else:
        raise ValueError("Normalization method must be 'zscore' or 'minmax'")
    return scaler.fit_transform(X_np)


[docs] def generate_standardized_var(p, T, lag=3, sparsity=0.2, beta_value=1.0, sd=0.1, method='zscore', burn_in=100, seed=0): """ Generate VAR data with z-score or min-max normalization applied. Parameters ---------- p : int Number of variables T : int Number of time points lag : int, default 3 Number of lags in the VAR model sparsity : float, default 0.2 Sparsity of the causal graph beta_value : float, default 1.0 Coefficient value sd : float, default 0.1 Noise standard deviation method : str, default 'zscore' Normalization method: 'zscore' or 'minmax' burn_in : int, default 100 Burn-in period seed : int, default 0 Random seed Returns ------- tuple (data, beta, GC) — time series array of shape (T, p), coefficient matrix, and ground-truth causal graph of shape (p, p). """ X_np, beta, GC = simulate_var(p, T, lag=lag, sparsity=sparsity, beta_value=beta_value, sd=sd, burn_in=burn_in, seed=seed) X_scaled = normalize_data(X_np, method) return X_scaled.astype(np.float32), beta, GC
[docs] def generate_standardized_lorenz_96(p, T, F=10.0, delta_t=0.1, sd=0.1, method='zscore', burn_in=1000, seed=0): """ Generate Lorenz-96 data with normalization applied. Parameters ---------- p : int Number of variables T : int Number of time points F : float, default 10.0 Forcing parameter delta_t : float, default 0.1 Time step for ODE solver sd : float, default 0.1 Noise standard deviation method : str, default 'zscore' Normalization method: 'zscore' or 'minmax' burn_in : int, default 1000 Burn-in period seed : int, default 0 Random seed Returns ------- tuple (data, GC) — time series array of shape (T, p) and ground-truth causal graph of shape (p, p). """ X_np, GC = simulate_lorenz_96(p, T, F=F, delta_t=delta_t, sd=sd, burn_in=burn_in, seed=seed) X_scaled = normalize_data(X_np, method) return X_scaled.astype(np.float32), GC