온도 예측
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from keras import layers
assert tf.config.list_physical_devices('GPU'), 'No GPU available'weather = pd.read_csv(
'../data/jena_climate_2009_2016.csv',
parse_dates=['Date Time'], index_col='Date Time',
date_format=r'%d.%m.%Y %H:%M:%S')
# 중복 행 제거
weather.drop_duplicates(inplace=True)
weather.info()<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 420222 entries, 2009-01-01 00:10:00 to 2017-01-01 00:00:00
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 p (mbar) 420222 non-null float64
1 T (degC) 420222 non-null float64
2 Tpot (K) 420222 non-null float64
3 Tdew (degC) 420222 non-null float64
4 rh (%) 420222 non-null float64
5 VPmax (mbar) 420222 non-null float64
6 VPact (mbar) 420222 non-null float64
7 VPdef (mbar) 420222 non-null float64
8 sh (g/kg) 420222 non-null float64
9 H2OC (mmol/mol) 420222 non-null float64
10 rho (g/m**3) 420222 non-null float64
11 wv (m/s) 420222 non-null float64
12 max. wv (m/s) 420222 non-null float64
13 wd (deg) 420222 non-null float64
dtypes: float64(14)
memory usage: 48.1 MB
# 1. 날짜 정렬?
if not weather.index.is_monotonic_increasing:
weather = weather.sort_index()
assert weather.index.is_monotonic_increasing
# 2. 결측치 여부?
date_diff = weather.index.to_series().diff().dropna()
# 10 분 간격으로 측정되므로 10분 이상이면 결측치
결측치여부 = date_diff != pd.Timedelta(minutes=10)
결측치여부.sum()7assert isinstance(weather.index, pd.DatetimeIndex)
# 1 시간 간격으로 리샘플링. 다운샘플링 시, 평균값 계산
weather_hourly = weather.resample('h').mean()
# 결측치 제거
weather_hourly.dropna(inplace=True)from sklearn.model_selection import train_test_split
train, test = train_test_split(
weather_hourly, test_size=0.5, shuffle=False)
val, test = train_test_split(
test, test_size=0.5, shuffle=False)
len(train), len(val), len(test)(35020, 17510, 17511)from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(train)
X_train = scaler.transform(train)
X_val = scaler.transform(val)
X_test = scaler.transform(test)
print(type(train), '->' ,type(X_train))<class 'pandas.core.frame.DataFrame'> -> <class 'numpy.ndarray'>
from keras.utils import timeseries_dataset_from_array
특성차원 = X_train.shape[1]
시퀀스_길이 = 24 * 5 # 5일
offset = 24 # +24 시간
delay = 시퀀스_길이 + offset
batch_size = 256
train_dataset = timeseries_dataset_from_array(
data=X_train[:-offset],
targets=train['T (degC)'][delay:],
sequence_length=시퀀스_길이,
batch_size=None, shuffle=True)
val_dataset = timeseries_dataset_from_array(
data=X_val[:-offset],
targets=val['T (degC)'][delay:],
sequence_length=시퀀스_길이,
batch_size=None, shuffle=True)
test_dataset = timeseries_dataset_from_array(
data=X_test[:-offset],
targets=test['T (degC)'][delay:],
sequence_length=시퀀스_길이,
batch_size=None, shuffle=True)
for X_batch, y_batch in train_dataset.batch(batch_size, drop_remainder=True):
assert X_batch.shape == (batch_size, 시퀀스_길이, 특성차원), X_batch.shape
train_dataset = train_dataset.batch(batch_size, drop_remainder=True)
val_dataset = val_dataset.batch(batch_size, drop_remainder=True)
test_dataset = test_dataset.batch(batch_size, drop_remainder=True)1모형 (Model)¶
def baseline_model(dataset):
"""24시간 뒤의 온도는 시퀀스의 마지막 시점 온도와 동일하다고 가정"""
target, y_pred = [], []
for X_batch, y_batch in dataset:
# 시퀀스의 마지막 데이터 포인트 선택
last_datapoints = X_batch[:, -1]
# 원래 스케일로 복원
last_datapoints = scaler.inverse_transform(last_datapoints)
온도 = last_datapoints[:, 1]
target.extend(y_batch)
y_pred.extend(온도)
return np.array(target), np.array(y_pred)from sklearn.metrics import mean_absolute_error
y, y_pred = baseline_model(val_dataset)
print(f'MAE[검증]: {mean_absolute_error(y, y_pred):.3f}')
y, y_pred = baseline_model(test_dataset)
print(f'MAE[테스트]: {mean_absolute_error(y, y_pred):.3f}')MAE[검증]: 2.532
MAE[테스트]: 2.723
inputs = keras.Input(shape=(시퀀스_길이, 특성차원))def MLP(inputs):
x = layers.Flatten()(inputs)
x = layers.Dense(16, activation='relu')(x)
outputs = layers.Dense(1)(x)
return keras.Model(inputs, outputs)
keras.backend.clear_session()
model = MLP(inputs)
model.summary()
model.compile(
optimizer='rmsprop',
loss='mse',
metrics=['mae'])
# outputs = model(X_batch)
# train_loss, *metrics = model.evaluate(train_dataset)
history = model.fit(
train_dataset, epochs=10, validation_data=val_dataset)Loading...
Loading...
Loading...
Loading...
Loading...
Epoch 1/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 19ms/step - loss: 43.7426 - mae: 5.1138 - val_loss: 14.0587 - val_mae: 2.9753
Epoch 2/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 2s 17ms/step - loss: 13.9508 - mae: 2.9708 - val_loss: 16.2679 - val_mae: 3.1983
Epoch 3/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 3s 18ms/step - loss: 12.8055 - mae: 2.8292 - val_loss: 11.7951 - val_mae: 2.6970
Epoch 4/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 2s 18ms/step - loss: 11.8028 - mae: 2.7254 - val_loss: 13.0772 - val_mae: 2.8883
Epoch 5/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 3s 19ms/step - loss: 11.3019 - mae: 2.6488 - val_loss: 11.4802 - val_mae: 2.6597
Epoch 6/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 2s 18ms/step - loss: 10.9805 - mae: 2.6152 - val_loss: 15.9605 - val_mae: 3.2033
Epoch 7/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 2s 18ms/step - loss: 10.5837 - mae: 2.5625 - val_loss: 11.8828 - val_mae: 2.7465
Epoch 8/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 2s 16ms/step - loss: 10.0385 - mae: 2.4978 - val_loss: 10.6951 - val_mae: 2.5782
Epoch 9/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 3s 19ms/step - loss: 9.9309 - mae: 2.4788 - val_loss: 10.7090 - val_mae: 2.5720
Epoch 10/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 2s 17ms/step - loss: 9.7706 - mae: 2.4660 - val_loss: 11.4098 - val_mae: 2.6653
def RNN(inputs):
x = layers.LSTM(32)(inputs)
outputs = layers.Dense(1)(x)
return keras.Model(inputs, outputs)
keras.backend.clear_session()
model = RNN(inputs)
model.summary()
model.compile(
optimizer='rmsprop',
loss='mse',
metrics=['mae'])
# outputs = model(X_batch)
# train_loss, *metrics = model.evaluate(train_dataset)
history = model.fit(
train_dataset, epochs=50, validation_data=val_dataset,
callbacks=[
keras.callbacks.ModelCheckpoint(
'rnn_1.keras', save_best_only=True),
keras.callbacks.EarlyStopping(
patience=5, restore_best_weights=True,
monitor='val_loss')
]
)Loading...
Loading...
Loading...
Loading...
Loading...
Epoch 1/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 5s 28ms/step - loss: 91.8192 - mae: 7.6614 - val_loss: 30.4881 - val_mae: 4.1417
Epoch 2/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 26ms/step - loss: 27.9717 - mae: 3.9884 - val_loss: 16.8299 - val_mae: 3.1034
Epoch 3/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 3s 25ms/step - loss: 16.4925 - mae: 3.1093 - val_loss: 11.6736 - val_mae: 2.6247
Epoch 4/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 26ms/step - loss: 12.3188 - mae: 2.7278 - val_loss: 10.8461 - val_mae: 2.5458
Epoch 5/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 27ms/step - loss: 11.0675 - mae: 2.6050 - val_loss: 10.2747 - val_mae: 2.4819
Epoch 6/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 27ms/step - loss: 10.4174 - mae: 2.5260 - val_loss: 10.4551 - val_mae: 2.5087
Epoch 7/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 26ms/step - loss: 10.1743 - mae: 2.4868 - val_loss: 10.2651 - val_mae: 2.4833
Epoch 8/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 27ms/step - loss: 9.8625 - mae: 2.4516 - val_loss: 9.8310 - val_mae: 2.4328
Epoch 9/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 3s 25ms/step - loss: 9.6174 - mae: 2.4218 - val_loss: 10.0902 - val_mae: 2.4658
Epoch 10/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 3s 25ms/step - loss: 9.3640 - mae: 2.3861 - val_loss: 9.8439 - val_mae: 2.4399
Epoch 11/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 27ms/step - loss: 8.9700 - mae: 2.3430 - val_loss: 10.4594 - val_mae: 2.5147
Epoch 12/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 26ms/step - loss: 8.7416 - mae: 2.3104 - val_loss: 10.1592 - val_mae: 2.4730
Epoch 13/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 26ms/step - loss: 8.6591 - mae: 2.2978 - val_loss: 10.1650 - val_mae: 2.4812
print('valiation')
model.evaluate(val_dataset)
print('test')
model.evaluate(test_dataset)valiation
68/68 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 9.8602 - mae: 2.4332
test
68/68 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 11.8934 - mae: 2.7067
[11.920796394348145, 2.70729660987854]