Source code for causalcompass.datasets.standardized

"""
Standardized data generation for VAR and Lorenz-96 systems.

This module generates normalized datasets using:
- Z-score normalization (zero mean, unit variance)
- Min-max normalization (scaled to [0, 1])

Both methods are applied to vanilla VAR and Lorenz-96 time series data.
"""

import numpy as np
import os
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from .vanilla import simulate_var, simulate_lorenz_96


def normalize_data(X_np, method):
    if method == 'zscore':
        scaler = StandardScaler()
    elif method == 'minmax':
        scaler = MinMaxScaler()
    else:
        raise ValueError("Normalization method must be 'zscore' or 'minmax'")
    return scaler.fit_transform(X_np)



[docs]
def generate_standardized_var(p, T, lag=3, sparsity=0.2, beta_value=1.0, sd=0.1, method='zscore', burn_in=100, seed=0):
    """
    Generate VAR data with z-score or min-max normalization applied.

    Parameters
    ----------
    p : int
        Number of variables
    T : int
        Number of time points
    lag : int, default 3
        Number of lags in the VAR model
    sparsity : float, default 0.2
        Sparsity of the causal graph
    beta_value : float, default 1.0
        Coefficient value
    sd : float, default 0.1
        Noise standard deviation
    method : str, default 'zscore'
        Normalization method: 'zscore' or 'minmax'
    burn_in : int, default 100
        Burn-in period
    seed : int, default 0
        Random seed

    Returns
    -------
    tuple
        (data, beta, GC) — time series array of shape (T, p), coefficient matrix, and ground-truth causal graph of shape (p, p).
    """
    X_np, beta, GC = simulate_var(p, T, lag=lag, sparsity=sparsity, beta_value=beta_value, sd=sd, burn_in=burn_in, seed=seed)
    X_scaled = normalize_data(X_np, method)
    return X_scaled.astype(np.float32), beta, GC




[docs]
def generate_standardized_lorenz_96(p, T, F=10.0, delta_t=0.1, sd=0.1, method='zscore', burn_in=1000, seed=0):
    """
    Generate Lorenz-96 data with normalization applied.

    Parameters
    ----------
    p : int
        Number of variables
    T : int
        Number of time points
    F : float, default 10.0
        Forcing parameter
    delta_t : float, default 0.1
        Time step for ODE solver
    sd : float, default 0.1
        Noise standard deviation
    method : str, default 'zscore'
        Normalization method: 'zscore' or 'minmax'
    burn_in : int, default 1000
        Burn-in period
    seed : int, default 0
        Random seed

    Returns
    -------
    tuple
        (data, GC) — time series array of shape (T, p) and ground-truth causal graph of shape (p, p).
    """
    X_np, GC = simulate_lorenz_96(p, T, F=F, delta_t=delta_t, sd=sd, burn_in=burn_in, seed=seed)
    X_scaled = normalize_data(X_np, method)
    return X_scaled.astype(np.float32), GC