Experiment ACRIM1 and HF

[1]:
import datetime
import os

import gpflow as gpf
import numpy as np
import pandas as pd
import scipy.signal
import tensorflow as tf

from tsipy.fusion import LocalGPModel, SVGPModel
from tsipy.fusion.kernels import MultiWhiteKernel
from tsipy.fusion.utils import (
    build_and_concat_label_mask,
    build_and_concat_label_mask_output,
)
from tsipy.utils import (
    plot_signals,
    plot_signals_and_confidence,
    pprint,
    sort_inputs,
    transform_time_to_unit,
)

Parameters

[2]:
# Widths in years
pred_window_width = 1.0
fit_window_width = 3.0

normalization = True
clipping = True

num_inducing_pts = 500
max_iter = 5000

Helpers

[3]:
def load_dataset(dataset_name: str) -> pd.DataFrame:
    data_ = pd.read_csv(
        os.path.join("../data", f"{dataset_name}.txt"),
        delimiter=" ",
        header=None,
    )
    data_ = data_.rename(
        columns={
            0: "t",
            1: "a",
            2: "b",
        }
    )

    data_["t_org"] = data_["t"].values.copy()
    data_["t"] = transform_time_to_unit(
        data_["t"] - data_["t"][0],
        t_label="year",
        start=datetime.datetime(1980, 1, 1),
    )

    result_ = pd.read_csv(
        os.path.join("../data", f"{dataset_name}_results.txt"),
        delimiter=",",
        header=None,
    )
    result_ = result_.rename(
        columns={
            0: "t_org",
            1: "t",
            2: "s_out_mean",
            3: "s_out_std",
        }
    )
    return data_, result_

Load dataset

[4]:
data, result = load_dataset("acrim1_hf")
t_a = t_b = data["t"].values
a = data["a"].values
b = data["b"].values

res_t = result["t"].values
res_s_mean = result["s_out_mean"].values
res_s_std = result["s_out_std"].values

pprint("Input Signal", level=0)
pprint("- t_a", t_a.shape, level=1)
pprint("- a", a.shape, level=1)

pprint("Input Signal", level=0)
pprint("- t_b", t_b.shape, level=1)
pprint("- b", b.shape, level=1)

pprint("Result Signal", level=0)
pprint("- res_t", t_b.shape, level=1)
pprint("- res_s_mean", res_s_mean.shape, level=1)
pprint("- res_s_std", res_s_std.shape, level=1)

_ = plot_signals(
    [
        (t_a, a, r"$a$", {}),
        (t_b, b, r"$b$", {}),
    ],
    x_ticker=1,
)

_ = plot_signals_and_confidence(
    [(res_t, res_s_mean, res_s_std, "SVGP")], x_ticker=1,
)
Input Signal
    - t_a                                         (3436,)
    - a                                           (3436,)
Input Signal
    - t_b                                         (3436,)
    - b                                           (3436,)
Result Signal
    - res_t                                       (3436,)
    - res_s_mean                                  (3436,)
    - res_s_std                                   (3436,)
../_images/notebooks_exp_acrim1_hf_7_1.png
../_images/notebooks_exp_acrim1_hf_7_2.png

Data Fusion

[5]:
t_a = build_and_concat_label_mask(t_a, label=1)
t_b = build_and_concat_label_mask(t_b, label=2)
t_out = build_and_concat_label_mask_output(t_a)

t = np.vstack((t_a, t_b))
s = np.reshape(np.hstack((a, b)), newshape=(-1, 1))
t, s = sort_inputs(t, s, sort_axis=0)

pprint("Training data", level=0)
pprint("- t", t.shape, level=1)
pprint("- s", s.shape, level=1)
Training data
    - t                                           (6872, 2)
    - s                                           (6872, 1)

Kernel and Fusion Model

[6]:
# Kernel
matern_kernel = gpf.kernels.Matern12(active_dims=[0])
white_kernel = MultiWhiteKernel(labels=(1, 2), active_dims=[1])  # Sensors 1 and 2
kernel = matern_kernel + white_kernel

# Fusion and local model
local_model = SVGPModel(
    kernel=kernel,
    num_inducing_pts=num_inducing_pts,
)
fusion_model = LocalGPModel(
    model=local_model,
    pred_window_width=pred_window_width,
    fit_window_width=fit_window_width,
    normalization=normalization,
    clipping=clipping,
)

Training and Inference

[7]:
# Train
fusion_model.fit(t, s, max_iter=max_iter, verbose=True)

# Predict
s_out_mean, s_out_std = fusion_model(t_out)

pprint("Output Signal", level=0)
pprint("- t_out", t_out.shape, level=1)
pprint("- s_out_mean", s_out_mean.shape, level=1)
pprint("- s_out_std", s_out_std.shape, level=1)
Window
    - Prediction:                                 -inf, 1981.000
    - Training:                                   1980.000, 1982.000
    - Data indices:                               0,     1461
    - x, y:                                       (1462, 2), (1462, 1)
    - n_ind_pts/time_unit                         167.000

    - Step      1/5000:                           -3235.966
    - Step   1000/5000:                           -1810.995
    - Step   2000/5000:                           -1325.180
    - Step   3000/5000:                           -1177.642
    - Step   4000/5000:                           -1300.299
    - Step   5000/5000:                           -1375.613
Window
    - Prediction:                                 1981.000, 1982.000
    - Training:                                   1980.000, 1983.000
    - Data indices:                               0,     2192
    - x, y:                                       (2193, 2), (2193, 1)
    - n_ind_pts/time_unit                         166.667

    - Step      1/5000:                           -5202.309
    - Step   1000/5000:                           -2859.642
    - Step   2000/5000:                           -2146.367
    - Step   3000/5000:                           -1777.347
    - Step   4000/5000:                           -2031.146
    - Step   5000/5000:                           -2071.007
Window
    - Prediction:                                 1982.000, 1983.000
    - Training:                                   1981.000, 1984.000
    - Data indices:                               730,     2923
    - x, y:                                       (2194, 2), (2194, 1)
    - n_ind_pts/time_unit                         166.667

    - Step      1/5000:                           -5204.170
    - Step   1000/5000:                           -2695.238
    - Step   2000/5000:                           -2427.323
    - Step   3000/5000:                           -2181.082
    - Step   4000/5000:                           -1850.362
    - Step   5000/5000:                           -2110.543
Window
    - Prediction:                                 1983.000, 1984.000
    - Training:                                   1982.000, 1985.000
    - Data indices:                               1461,     3652
    - x, y:                                       (2192, 2), (2192, 1)
    - n_ind_pts/time_unit                         166.667

    - Step      1/5000:                           -5491.939
    - Step   1000/5000:                           -2720.743
    - Step   2000/5000:                           -2743.488
    - Step   3000/5000:                           -2150.349
    - Step   4000/5000:                           -2325.357
    - Step   5000/5000:                           -2241.979
Window
    - Prediction:                                 1984.000, 1985.000
    - Training:                                   1983.000, 1986.000
    - Data indices:                               2192,     4383
    - x, y:                                       (2192, 2), (2192, 1)
    - n_ind_pts/time_unit                         166.667

    - Step      1/5000:                           -5395.265
    - Step   1000/5000:                           -2330.511
    - Step   2000/5000:                           -2149.807
    - Step   3000/5000:                           -2128.263
    - Step   4000/5000:                           -1971.500
    - Step   5000/5000:                           -1856.784
Window
    - Prediction:                                 1985.000, 1986.000
    - Training:                                   1984.000, 1987.000
    - Data indices:                               2922,     5115
    - x, y:                                       (2194, 2), (2194, 1)
    - n_ind_pts/time_unit                         166.667

    - Step      1/5000:                           -4729.477
    - Step   1000/5000:                           -2353.377
    - Step   2000/5000:                           -2294.417
    - Step   3000/5000:                           -2106.992
    - Step   4000/5000:                           -2192.391
    - Step   5000/5000:                           -1851.747
Window
    - Prediction:                                 1986.000, 1987.000
    - Training:                                   1985.000, 1988.000
    - Data indices:                               3652,     5845
    - x, y:                                       (2194, 2), (2194, 1)
    - n_ind_pts/time_unit                         166.667

    - Step      1/5000:                           -5043.008
    - Step   1000/5000:                           -2702.926
    - Step   2000/5000:                           -2552.658
    - Step   3000/5000:                           -2304.788
    - Step   4000/5000:                           -2103.928
    - Step   5000/5000:                           -2128.942
Window
    - Prediction:                                 1987.000, 1988.000
    - Training:                                   1986.000, 1989.000
    - Data indices:                               4383,     6574
    - x, y:                                       (2192, 2), (2192, 1)
    - n_ind_pts/time_unit                         166.667

    - Step      1/5000:                           -5029.538
    - Step   1000/5000:                           -2202.022
    - Step   2000/5000:                           -1997.388
    - Step   3000/5000:                           -1715.234
    - Step   4000/5000:                           -1675.907
    - Step   5000/5000:                           -1684.823
Window
    - Prediction:                                 1988.000, 1989.000
    - Training:                                   1987.000, 1989.405
    - Data indices:                               5115,     6871
    - x, y:                                       (1757, 2), (1757, 1)
    - n_ind_pts/time_unit                         166.769

    - Step      1/5000:                           -3960.499
    - Step   1000/5000:                           -2140.274
    - Step   2000/5000:                           -1527.736
    - Step   3000/5000:                           -1245.341
    - Step   4000/5000:                           -943.639
    - Step   5000/5000:                           -1291.773
Window
    - Prediction:                                 1989.000,      inf
    - Training:                                   1988.000, 1989.405
    - Data indices:                               5844,     6871
    - x, y:                                       (1028, 2), (1028, 1)
    - n_ind_pts/time_unit                         167.317

    - Step      1/5000:                           -2288.746
    - Step   1000/5000:                           -1323.910
    - Step   2000/5000:                           -757.828
    - Step   3000/5000:                           -655.949
    - Step   4000/5000:                           -695.344
    - Step   5000/5000:                           -681.305
Output Signal
    - t_out                                       (3436, 3)
    - s_out_mean                                  (3436,)
    - s_out_std                                   (3436,)

Results

[8]:
fig, ax = plot_signals_and_confidence(
    [(t_out[:, 0], s_out_mean, s_out_std, "LocalGP")], x_ticker=1,
)
ax.scatter(
    t_a[:, 0],
    a,
    label=r"$a$",
    s=3,
)
ax.scatter(
    t_b[:, 0],
    b,
    label=r"$b$",
    s=3,
)
[8]:
<matplotlib.collections.PathCollection at 0x22beeb1fb50>
../_images/notebooks_exp_acrim1_hf_15_1.png
[9]:
_ = plot_signals_and_confidence(
    [(t_out[:, 0], s_out_mean, s_out_std, "LocalGP")], x_ticker=1,
)

_ = plot_signals_and_confidence(
    [(res_t, res_s_mean, res_s_std, "SVGP")], x_ticker=1,
)
../_images/notebooks_exp_acrim1_hf_16_0.png
../_images/notebooks_exp_acrim1_hf_16_1.png
[10]:
_ = plot_signals_and_confidence(
    [
        (res_t, res_s_mean, res_s_std, "SVGP"),
        (t_out[:, 0], s_out_mean, s_out_std, "LocalGP"),
    ],
    legend="lower left",
    x_ticker=1,
)
../_images/notebooks_exp_acrim1_hf_17_0.png

Training

[11]:
for i, window in enumerate(fusion_model.windows):
    elbo = window.model.iter_elbo
    plot_signals([(np.arange(elbo.size), elbo, r"ELBO", {})])
../_images/notebooks_exp_acrim1_hf_19_0.png
../_images/notebooks_exp_acrim1_hf_19_1.png
../_images/notebooks_exp_acrim1_hf_19_2.png
../_images/notebooks_exp_acrim1_hf_19_3.png
../_images/notebooks_exp_acrim1_hf_19_4.png
../_images/notebooks_exp_acrim1_hf_19_5.png
../_images/notebooks_exp_acrim1_hf_19_6.png
../_images/notebooks_exp_acrim1_hf_19_7.png
../_images/notebooks_exp_acrim1_hf_19_8.png
../_images/notebooks_exp_acrim1_hf_19_9.png

Frequency Analysis

[14]:
nn = 2
nperseg = 1024 * nn
noverlap = np.floor(nperseg * 0.15)
fs = 1.0  # Sampling frequency in days

freqs_a, psd_a = scipy.signal.welch(a, fs=fs, nperseg=nperseg, noverlap=noverlap)
freqs_b, psd_b = scipy.signal.welch(b, fs=fs, nperseg=nperseg, noverlap=noverlap)
freqs_out, psd_out = scipy.signal.welch(s_out_mean, fs=fs, nperseg=nperseg, noverlap=noverlap)
freqs_res, psd_res = scipy.signal.welch(res_s_mean, fs=fs, nperseg=nperseg, noverlap=noverlap)

plot_signals(
    [
        (freqs_a, psd_a, "$a$", {}),
        (freqs_b, psd_b, "$b$", {}),
        (freqs_out, psd_out, "$o$", {}),
        (freqs_res, psd_res, "$r$", {}),
    ],
    legend="lower left",
    log_scale_y=True,
    log_scale_x=True,
)

plot_signals(
    [
        (freqs_out, psd_out, "$o$", {}),
        (freqs_res, psd_res, "$r$", {}),
    ],
    legend="lower left",
    log_scale_y=True,
    log_scale_x=True,
)
[14]:
(<Figure size 864x432 with 1 Axes>, <AxesSubplot:>)
../_images/notebooks_exp_acrim1_hf_21_1.png
../_images/notebooks_exp_acrim1_hf_21_2.png
[ ]: