目錄

1140808 meeting

前言

本次實驗為使用 dart 模組中之 TSMixerModelRegressionEnsembleModel 進行預測已知地點未來 10 日,並再使用 autoFRK 填補缺失地點之值。各已知地點有 $24 \times 250$ 個資料點,這些資料點不會重新預測。此結果可與 1140731 meeting1140806 meeting1140807 meeting 之結果進行比較。

資料讀取

資料讀取之程式碼如下:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# import modules
import os
import numpy as np
import pandas as pd
import xarray as xr
from tqdm import tqdm
import matplotlib.pyplot as plt
import csv
import cartopy.crs as ccrs
import cartopy.feature as cfeature


# functions
def check_data_folder(folder):
    return os.path.exists(folder) and os.path.isdir(folder)

def generate_date_range(start_date, end_date):
    """
    Generate a list of dates from start_date to end_date.
    """
    return pd.date_range(start=start_date, end=end_date, freq='D').strftime('%Y%m%d').tolist()

def load_data(file_path):
    """
    Load data from a NetCDF file.
    """
    if os.path.exists(file_path):
        return xr.open_dataset(file_path)
    else:
        raise FileNotFoundError(f'File not found: {file_path}')


# main program
## check data folder
data_folder = f'..\\..\\..\\surface_air_temperature\\data2024'
if check_data_folder(data_folder):
    print(f'Data folder found: {data_folder}')
else:
    raise FileNotFoundError(f'Data folder not found: {data_folder}')

## load data
start_date = '2024-01-01'
end_date = '2024-12-31'
date_list = generate_date_range(start_date, end_date)

## get location and shape
path = os.path.join(data_folder, f'M2T1NXFLX.5.12.4%3AMERRA2_400.tavg1_2d_flx_Nx.{date_list[0]}.nc4.dap.nc4')
nc4_data = load_data(path)
lat = nc4_data['lat'].values
lon = nc4_data['lon'].values
shape = nc4_data['TLML'].shape
total_locations = shape[1] * shape[2]

## combine data
sample_path = os.path.join(data_folder, f'M2T1NXFLX.5.12.4%3AMERRA2_400.tavg1_2d_flx_Nx.{date_list[0]}.nc4.dap.nc4')
sample_data = load_data(sample_path)
shape_per_file = sample_data['TLML'].shape
time_per_file = len(sample_data['time'])

total_samples = len(date_list)
combined = np.empty((total_samples * shape_per_file[0], *shape_per_file[1:]), dtype=np.float32)
time_list = np.empty(total_samples * time_per_file, dtype=sample_data['time'].dtype)

for i, date in enumerate(tqdm(date_list, desc="Combining")):
    path = os.path.join(data_folder, f'M2T1NXFLX.5.12.4%3AMERRA2_400.tavg1_2d_flx_Nx.{date}.nc4.dap.nc4')
    nc4_data = load_data(path)

    start = i * shape_per_file[0]
    end = (i + 1) * shape_per_file[0]

    combined[start:end] = nc4_data['TLML'].values
    time_list[start:end] = nc4_data['time'].values

print(f'Combined data shape: {combined.shape}')

## reshape data
locations = np.stack(np.meshgrid(lon, lat), axis=-1).reshape(-1, 2)
reshaped_data = combined.reshape(combined.shape[0], -1)
pd.DataFrame(reshaped_data)  # 2d data with time as rows and locations as columns
reshaped_df = pd.DataFrame(reshaped_data, columns=[f"({lon}, {lat})" for lon, lat in locations], index=list(time_list))
time_num = len(time_list)
locations_num = len(locations)

## reshape data to (24, day, location)  (24 hours)
reshaped_df.index = pd.to_datetime(reshaped_df.index)
groups = reshaped_df.groupby(reshaped_df.index.time)
stacked = np.stack([group.to_numpy() for time, group in sorted(groups)])
print(stacked)
time_order = sorted(groups.groups.keys())
print(time_order)

## train set
np.random.seed(123)
valid_mask = (locations[:, 0] >= 73) & (locations[:, 0] <= 104) & \
             (locations[:, 1] >= 36) & (locations[:, 1] <= 54)  # 緯度
valid_indices = np.where(valid_mask)[0]


day_num_train = 250
known_locations_num = valid_indices.shape[0] - 350  # 81.1%
unknown_locations_num = 350                            # 18.9%
locations_num = known_locations_num + unknown_locations_num

locations_index = np.random.choice(valid_indices, size=locations_num, replace=False)
known_locations_index = locations_index[:known_locations_num]
unknown_locations_index = locations_index[known_locations_num:(known_locations_num + unknown_locations_num)]
known_locations_index.sort()
unknown_locations_index.sort()
known_locations_choose = locations[known_locations_index, :]
unknown_locations_choose = locations[unknown_locations_index, :]
stacked_train = stacked[:, :day_num_train, known_locations_index]

future_days = 10
known_real_data = stacked[:, :day_num_train + future_days, known_locations_index]
unknown_real_data = stacked[:, :day_num_train + future_days, unknown_locations_index]


print(f'Load data complete!')

TSMixerModel + autoFRK

TSMixerModel 可於 GPU 上運行,其程式碼如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from darts.models import TSMixerModel
from darts import TimeSeries
from datetime import datetime
from tqdm import tqdm
real_data = known_real_data
location_choose = known_locations_choose
locations_index = known_locations_index
inference = np.array([[[0.0] * real_data.shape[2]] * future_days] * real_data.shape[0])
start_time = datetime.now()

for i in tqdm(range(real_data.shape[0])):
    train_set = real_data[i, :day_num_train, :]
    train_set = pd.DataFrame(train_set, index=pd.to_datetime(date_list[:day_num_train]), columns=[f"loc_{i}" for i in range(known_locations_num)])
    train_set = TimeSeries.from_dataframe(train_set)

    test_set = real_data[i, day_num_train:, :]
    test_set = pd.DataFrame(test_set, index=pd.to_datetime(date_list[day_num_train:day_num_train + future_days]), columns=[f"loc_{i}" for i in range(known_locations_num)])
    test_set = TimeSeries.from_dataframe(test_set)

    model = TSMixerModel(
        input_chunk_length=30, 
        output_chunk_length=10,
        n_epochs=3800,         
        dropout=0.0005,        
        batch_size=32,
        use_reversible_instance_norm=True,
        random_state=42
    )

    model.fit(train_set)

    forecast = model.predict(future_days)
    inference[i] = forecast.values()

print(f'Inference complete! Time taken: {datetime.now() - start_time}')

y_true = real_data[:, day_num_train:, :]
y_pred = inference

mspe = np.mean(np.square(y_pred - y_true))
rmspe = np.sqrt(mspe)
mape = mspe = np.mean(np.abs(y_pred - y_true))

mspe_pct = np.mean(np.square(y_pred - y_true) / y_true)
rmspe_pct = np.sqrt(mspe_pct)
mape_pct = mspe_pct = np.mean(np.abs(y_pred - y_true) / y_true)

result_df = pd.DataFrame({
    "Method": ["MSPE", "RMSPE", "MAPE", "MSPE%", "RMSPE%", "MAPE%"],
    "Value": [mspe, rmspe, mape, mspe_pct, rmspe_pct, mape_pct]
})

print(result_df)


save_dir = 'TSMixerModel_frk'
os.makedirs(save_dir, exist_ok=True)

train_set = np.concatenate((real_data[:, :day_num_train, :], inference), axis=1)
real_data = np.concatenate((known_real_data, unknown_real_data), axis=2)

np.save(os.path.join(save_dir, 'TSMixer_train_frk.npy'), train_set)                   
np.save(os.path.join(save_dir, 'known_location.npy'), known_locations_choose)         
np.save(os.path.join(save_dir, 'unknown_location.npy'), unknown_locations_choose)     
np.save(os.path.join(save_dir, 'real_data.npy'), real_data)                           
np.save(os.path.join(save_dir, 'all_locations.npy'), locations)                       
np.save(os.path.join(save_dir, 'known_location_index.npy'), known_locations_index)    
np.save(os.path.join(save_dir, 'unknown_location_index.npy'), unknown_locations_index)

使用 TSMixerModel 預測 1500 地點未來 10 天的結果為

MethodValue
MSPE7.549632
RMSPE8.623995
MAPE7.549632
MSPE%0.026664
RMSPE%0.513201
MAPE%0.026664

用時 2:23:58.293144 (GPU)。

autoFRK 填補之程式碼如下:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# load data
library(reticulate)
np <- import("numpy")

data_path = "../TSMixer_train_frk.npy"
data = np$load(data_path)

real_path = "../real_data.npy"
real_data = np$load(real_path)

known_loc_path = "../known_location.npy"
known_locs = np$load(known_loc_path)

unknown_loc_path = "../unknown_location.npy"
unknown_locs = np$load(unknown_loc_path)

# autoFRK (當期預測缺失)
library(autoFRK)
library(dplyr)

start_time <- Sys.time()

# 初始化
unknown_inference_shape = c(dim(data)[1:2],  dim(known_locs)[1] + dim(unknown_locs)[1])
unknown_inference <- array(NA, dim = unknown_inference_shape)
mrts_basis <- NULL

locs = rbind(known_locs, unknown_locs)
# 設定進度條
total_iter <- 24 * dim(data)[2]
pb <- txtProgressBar(min = 0, max = total_iter, style = 3)
iter <- 0

for (time_ in 1:24) {
  hour_data <- data[time_, , ]
  
  for (day_ in 1:dim(hour_data)[1]) {
    iter <- iter + 1
    iter_start <- Sys.time()
    
    day_data = hour_data[day_, ]
    
    if (is.null(mrts_basis)) {
      model = autoFRK(data = day_data, loc = known_locs)
      mrts_basis = model$G
    } else {
      model = autoFRK(data = day_data, loc = known_locs, G = mrts_basis)
    }
    
    pred = predict.FRK(object = model, newloc = locs)
    unknown_inference[time_, day_, ] = pred$pred.value
    
    # 更新進度條與預測剩餘時間
    elapsed <- as.numeric(difftime(Sys.time(), start_time, units = "secs"))
    avg_time <- elapsed / iter
    remaining <- avg_time * (total_iter - iter)
    est_done <- format(Sys.time() + remaining, "%H:%M:%S")
    
    setTxtProgressBar(pb, iter)
    cat(sprintf(" | Est. done at: %s | Remaining: %ds\r", est_done, round(remaining)))
  }
}

close(pb)

end_time <- Sys.time()
cat("\n")
cat("Total time elapsed:", format(end_time - start_time))
cat("\n")

# result
y_inf <- unknown_inference
real <- real_data
train <- known_data
test <- unknown_data

# === 通用 function ===
mspe <- function(pred, true) mean((pred - true)^2)
rmspe <- function(pred, true) sqrt(mean((pred - true)^2))
mape <- function(pred, true) mean(abs(pred - true))

mspe_p <- function(pred, true) mean((pred - true)^2 / true)
rmspe_p <- function(pred, true) sqrt(mean((pred - true)^2 / true))
mape_p <- function(pred, true) mean(abs(pred - true) / true)

# === 計算範圍定義 ===
future_days = 10
future_idx <- (dim(y_inf)[2] - future_days + 1):dim(y_inf)[2]
past_idx <- 1:(dim(y_inf)[2] - future_days)
known_idx <- 1:dim(known_locs)[1]
unknown_idx <- (dim(known_locs)[1] + 1):(dim(known_locs)[1] + dim(unknown_locs)[1])

# === 建立統一 function ===
compute_metrics <- function(pred, true) {
  c(
    MSPE = mspe(pred, true),
    RMSPE = rmspe(pred, true),
    `MSPE%` = mspe_p(pred, true),
    `RMSPE%` = rmspe_p(pred, true),
    MAPE = mape(pred, true),
    `MAPE%` = mape_p(pred, true)
  )
}

# === 不同情境計算 ===
result_table <- data.frame(
  row.names = c('MSPE', 'RMSPE', 'MSPE%', 'RMSPE%', 'MAPE', 'MAPE%'),

  `ALL Locs & All Time` = compute_metrics(y_inf, real),
  `Known Locs & All Time` = compute_metrics(y_inf[, , known_idx], train),
  `Unknown Locs & All Time` = compute_metrics(y_inf[, , unknown_idx], test),

  `ALL Locs & Future` = compute_metrics(y_inf[, future_idx, ], real[, future_idx, ]),
  `Known Locs & Future` = compute_metrics(y_inf[, future_idx, known_idx], train[, future_idx, ]),
  `Unknown Locs & Future` = compute_metrics(y_inf[, future_idx, unknown_idx], test[, future_idx, ]),

  `ALL Locs & Past` = compute_metrics(y_inf[, past_idx, ], real[, past_idx, ]),
  `Known Locs & Past` = compute_metrics(y_inf[, past_idx, known_idx], train[, past_idx, ]),
  `Unknown Locs & Past` = compute_metrics(y_inf[, past_idx, unknown_idx], test[, past_idx, ])
)

# === 輸出 ===
print(result_table)

# save to .npy
# combined data
output_matrix = unknown_inference
output_matrix %>% dim() %>% print()

# save to .npy
output_matrix = output_matrix %>% r_to_py()
save_path = "../plot_TSMixerModel_frk.npy"
np$save(save_path, output_matrix)

使用 autoFRK 填補結果如下,其中過往資料(Past)皆由真實資料生成。

MethodALL Locs & All TimeKnown Locs & All TimeUnknown Locs & All TimeALL Locs & FutureKnown Locs & FutureUnknown Locs & FutureALL Locs & PastKnown Locs & PastUnknown Locs & Past
MSPE5.527568895.2754363246.6081370275.9950102776.2482675474.90962192.7088712332.4365230753.876077622
RMSPE2.351078242.2968318012.570629698.717511708.732025408.65503451.6458648891.5609366021.968775666
MSPE%0.019616820.0187214150.023454260.269298580.270337770.26484490.0096295470.0086567600.013798632
RMSPE%0.140060050.1368262210.153147830.518939860.519940160.51463080.0981302550.0930417130.117467580
MAPE1.409845351.3559123041.640986977.580709817.599055467.50208561.1630107701.1061865781.406543020
MAPE%0.005016090.0048250620.005834780.026782310.026861710.02644200.0041454410.0039435960.005010491

用時 4.901547 小時(CPU)。

gallery_made_with_nanogallery_TSMixerModel+autoFRK

RegressionEnsembleModel + autoFRK

RegressionEnsembleModel 僅可於 CPU 上運行,其程式碼如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from darts.models import RegressionEnsembleModel, NaiveSeasonal, LinearRegressionModel, NaiveDrift
from darts import TimeSeries
from datetime import datetime
from tqdm import tqdm
real_data = known_real_data
location_choose = known_locations_choose
locations_index = known_locations_index
inference = np.array([[[0.0] * real_data.shape[2]] * future_days] * real_data.shape[0])
start_time = datetime.now()

for i in tqdm(range(real_data.shape[0])):
    train_set = real_data[i, :day_num_train, :]
    train_set = pd.DataFrame(train_set, index=pd.to_datetime(date_list[:day_num_train]), columns=[f"loc_{i}" for i in range(known_locations_num)])
    train_set = TimeSeries.from_dataframe(train_set)

    test_set = real_data[i, day_num_train:, :]
    test_set = pd.DataFrame(test_set, index=pd.to_datetime(date_list[day_num_train:day_num_train + future_days]), columns=[f"loc_{i}" for i in range(known_locations_num)])
    test_set = TimeSeries.from_dataframe(test_set)

    base_models = [
        NaiveSeasonal(K=7),
        LinearRegressionModel(lags=30),
        NaiveDrift()
    ]

    model = RegressionEnsembleModel(
        forecasting_models=base_models,
        regression_train_n_points=30
    )

    model.fit(train_set)

    forecast = model.predict(n=future_days)
    inference[i] = forecast.values()

print(f'Inference complete! Time taken: {datetime.now() - start_time}')

y_true = real_data[:, day_num_train:, :]
y_pred = inference

mspe = np.mean(np.square(y_pred - y_true))
rmspe = np.sqrt(mspe)
mape = mspe = np.mean(np.abs(y_pred - y_true))

mspe_pct = np.mean(np.square(y_pred - y_true) / y_true)
rmspe_pct = np.sqrt(mspe_pct)
mape_pct = mspe_pct = np.mean(np.abs(y_pred - y_true) / y_true)

result_df = pd.DataFrame({
    "Method": ["MSPE", "RMSPE", "MAPE", "MSPE%", "RMSPE%", "MAPE%"],
    "Value": [mspe, rmspe, mape, mspe_pct, rmspe_pct, mape_pct]
})

print(result_df)

save_dir = 'RegressionEnsembleModel_frk'
os.makedirs(save_dir, exist_ok=True)

train_set = np.concatenate((real_data[:, :day_num_train, :], inference), axis=1)
real_data = np.concatenate((known_real_data, unknown_real_data), axis=2)

np.save(os.path.join(save_dir, 'reg_train_frk.npy'), train_set)                       
np.save(os.path.join(save_dir, 'known_location.npy'), known_locations_choose)         
np.save(os.path.join(save_dir, 'unknown_location.npy'), unknown_locations_choose)     
np.save(os.path.join(save_dir, 'real_data.npy'), real_data)                           
np.save(os.path.join(save_dir, 'all_locations.npy'), locations)                       
np.save(os.path.join(save_dir, 'known_location_index.npy'), known_locations_index)    
np.save(os.path.join(save_dir, 'unknown_location_index.npy'), unknown_locations_index)

使用 RegressionEnsembleModel 預測 1500 地點未來 10 天的結果為

MethodValue
MSPE4.457576
RMSPE5.601874
MAPE4.457576
MSPE%0.015705
RMSPE%0.332526
MAPE%0.015705

用時 0:01:45.722061 (CPU)。

autoFRK 填補之程式碼如下:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# load data
library(reticulate)
np <- import("numpy")

data_path = "../reg_train_frk.npy"
data = np$load(data_path)

real_path = "../real_data.npy"
real_data = np$load(real_path)

known_loc_path = "../known_location.npy"
known_locs = np$load(known_loc_path)

unknown_loc_path = "../unknown_location.npy"
unknown_locs = np$load(unknown_loc_path)

# autoFRK (當期預測缺失)
library(autoFRK)
library(dplyr)

start_time <- Sys.time()

# 初始化
unknown_inference_shape = c(dim(data)[1:2],  dim(known_locs)[1] + dim(unknown_locs)[1])
unknown_inference <- array(NA, dim = unknown_inference_shape)
mrts_basis <- NULL

locs = rbind(known_locs, unknown_locs)
# 設定進度條
total_iter <- 24 * dim(data)[2]
pb <- txtProgressBar(min = 0, max = total_iter, style = 3)
iter <- 0

for (time_ in 1:24) {
  hour_data <- data[time_, , ]
  
  for (day_ in 1:dim(hour_data)[1]) {
    iter <- iter + 1
    iter_start <- Sys.time()
    
    day_data = hour_data[day_, ]
    
    if (is.null(mrts_basis)) {
      model = autoFRK(data = day_data, loc = known_locs)
      mrts_basis = model$G
    } else {
      model = autoFRK(data = day_data, loc = known_locs, G = mrts_basis)
    }
    
    pred = predict.FRK(object = model, newloc = locs)
    unknown_inference[time_, day_, ] = pred$pred.value
    
    # 更新進度條與預測剩餘時間
    elapsed <- as.numeric(difftime(Sys.time(), start_time, units = "secs"))
    avg_time <- elapsed / iter
    remaining <- avg_time * (total_iter - iter)
    est_done <- format(Sys.time() + remaining, "%H:%M:%S")
    
    setTxtProgressBar(pb, iter)
    cat(sprintf(" | Est. done at: %s | Remaining: %ds\r", est_done, round(remaining)))
  }
}

close(pb)

end_time <- Sys.time()
cat("\n")
cat("Total time elapsed:", format(end_time - start_time))
cat("\n")


# result
y_inf <- unknown_inference
real <- real_data
train <- known_data
test <- unknown_data

# === 通用 function ===
mspe <- function(pred, true) mean((pred - true)^2)
rmspe <- function(pred, true) sqrt(mean((pred - true)^2))
mape <- function(pred, true) mean(abs(pred - true))

mspe_p <- function(pred, true) mean((pred - true)^2 / true)
rmspe_p <- function(pred, true) sqrt(mean((pred - true)^2 / true))
mape_p <- function(pred, true) mean(abs(pred - true) / true)

# === 計算範圍定義 ===
future_days = 10
future_idx <- (dim(y_inf)[2] - future_days + 1):dim(y_inf)[2]
past_idx <- 1:(dim(y_inf)[2] - future_days)
known_idx <- 1:dim(known_locs)[1]
unknown_idx <- (dim(known_locs)[1] + 1):(dim(known_locs)[1] + dim(unknown_locs)[1])

# === 建立統一 function ===
compute_metrics <- function(pred, true) {
  c(
    MSPE = mspe(pred, true),
    RMSPE = rmspe(pred, true),
    `MSPE%` = mspe_p(pred, true),
    `RMSPE%` = rmspe_p(pred, true),
    MAPE = mape(pred, true),
    `MAPE%` = mape_p(pred, true)
  )
}

# === 不同情境計算 ===
result_table <- data.frame(
  row.names = c('MSPE', 'RMSPE', 'MSPE%', 'RMSPE%', 'MAPE', 'MAPE%'),

  `ALL Locs & All Time` = compute_metrics(y_inf, real),
  `Known Locs & All Time` = compute_metrics(y_inf[, , known_idx], train),
  `Unknown Locs & All Time` = compute_metrics(y_inf[, , unknown_idx], test),

  `ALL Locs & Future` = compute_metrics(y_inf[, future_idx, ], real[, future_idx, ]),
  `Known Locs & Future` = compute_metrics(y_inf[, future_idx, known_idx], train[, future_idx, ]),
  `Unknown Locs & Future` = compute_metrics(y_inf[, future_idx, unknown_idx], test[, future_idx, ]),

  `ALL Locs & Past` = compute_metrics(y_inf[, past_idx, ], real[, past_idx, ]),
  `Known Locs & Past` = compute_metrics(y_inf[, past_idx, known_idx], train[, past_idx, ]),
  `Unknown Locs & Past` = compute_metrics(y_inf[, past_idx, unknown_idx], test[, past_idx, ])
)

# === 輸出 ===
print(result_table)


# save to .npy

# combined data
output_matrix = unknown_inference
output_matrix %>% dim() %>% print()


# save to .npy
output_matrix = output_matrix %>% r_to_py()
save_path = "../plot_RegressionEnsembleModel_frk.npy"
np$save(save_path, output_matrix)

使用 autoFRK 填補結果如下,其中過往資料(Past)皆由真實資料生成。

MethodALL Locs & All TimeKnown Locs & All TimeUnknown Locs & All TimeALL Locs & FutureKnown Locs & FutureUnknown Locs & FutureALL Locs & PastKnown Locs & PastUnknown Locs & Past
MSPE3.8899111373.6208989995.04282030233.4159087533.2302970934.211387302.7088712332.4365230753.876077622
RMSPE1.9722857651.9028659962.2456224755.780649515.764572595.849050121.6458648891.5609366021.968775666
MSPE%0.0137832460.0128253990.0178883040.117625720.117041360.120130100.0096295470.0086567600.013798632
RMSPE%0.1174020690.1132492770.1337471650.342966060.342113080.346597900.0981302550.0930417130.117467580
MAPE1.2956999171.2405012191.5322657624.612928594.598367264.675334311.1630107701.1061865781.406543020
MAPE%0.0046106170.0044149180.0054493280.016240020.016197960.016420260.0041454410.0039435960.005010491

用時 4.979858 小時(CPU)。

gallery_made_with_nanogallery_RegressionEnsembleModel+autoFRK

結語

https://raw.githubusercontent.com/Josh-test-lab/website-assets-repository/refs/heads/main/posts/1140808%20meeting/To%20be%20continued.jpg
To be continued!

運行環境

  • 本機作業系統:Windows 11 24H2
    • 程式語言:Python 3.12.9
  • 計算平臺:財團法人國家實驗研究院國家高速網路與計算中心臺灣 AI 雲
    • 作業系統:Ubuntu
    • Miniconda
    • GPU:NVIDIA Tesla V100 32GB GPU
    • CUDA 12.8 driver
    • 程式語言:Python 3.10.16 for Linux

延伸學習

參考資料