Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

온도 예측

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from keras import layers
assert tf.config.list_physical_devices('GPU'), 'No GPU available'
weather = pd.read_csv(
    '../data/jena_climate_2009_2016.csv', 
    parse_dates=['Date Time'], index_col='Date Time',
    date_format=r'%d.%m.%Y %H:%M:%S')
# 중복 행 제거
weather.drop_duplicates(inplace=True)
weather.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 420222 entries, 2009-01-01 00:10:00 to 2017-01-01 00:00:00
Data columns (total 14 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   p (mbar)         420222 non-null  float64
 1   T (degC)         420222 non-null  float64
 2   Tpot (K)         420222 non-null  float64
 3   Tdew (degC)      420222 non-null  float64
 4   rh (%)           420222 non-null  float64
 5   VPmax (mbar)     420222 non-null  float64
 6   VPact (mbar)     420222 non-null  float64
 7   VPdef (mbar)     420222 non-null  float64
 8   sh (g/kg)        420222 non-null  float64
 9   H2OC (mmol/mol)  420222 non-null  float64
 10  rho (g/m**3)     420222 non-null  float64
 11  wv (m/s)         420222 non-null  float64
 12  max. wv (m/s)    420222 non-null  float64
 13  wd (deg)         420222 non-null  float64
dtypes: float64(14)
memory usage: 48.1 MB
# 1. 날짜 정렬?
if not weather.index.is_monotonic_increasing:
    weather = weather.sort_index()
assert weather.index.is_monotonic_increasing
# 2. 결측치 여부?
date_diff = weather.index.to_series().diff().dropna()
# 10 분 간격으로 측정되므로 10분 이상이면 결측치
결측치여부 = date_diff != pd.Timedelta(minutes=10)
결측치여부.sum()
7
assert isinstance(weather.index, pd.DatetimeIndex)
# 1 시간 간격으로 리샘플링. 다운샘플링 시, 평균값 계산
weather_hourly = weather.resample('h').mean()
# 결측치 제거
weather_hourly.dropna(inplace=True)
from sklearn.model_selection import train_test_split

train, test = train_test_split(
    weather_hourly, test_size=0.5, shuffle=False)
val, test = train_test_split(
    test, test_size=0.5, shuffle=False)

len(train), len(val), len(test)
(35020, 17510, 17511)
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(train)
X_train = scaler.transform(train)
X_val = scaler.transform(val)
X_test = scaler.transform(test)
print(type(train), '->'  ,type(X_train))
<class 'pandas.core.frame.DataFrame'> -> <class 'numpy.ndarray'>
from keras.utils import timeseries_dataset_from_array

특성차원 = X_train.shape[1]
시퀀스_길이 = 24 * 5 # 5일
offset = 24 # +24 시간
delay = 시퀀스_길이 + offset
batch_size = 256

train_dataset = timeseries_dataset_from_array(
    data=X_train[:-offset],
    targets=train['T (degC)'][delay:],
    sequence_length=시퀀스_길이,
    batch_size=None, shuffle=True)

val_dataset = timeseries_dataset_from_array(
    data=X_val[:-offset],
    targets=val['T (degC)'][delay:],
    sequence_length=시퀀스_길이,
    batch_size=None, shuffle=True)

test_dataset = timeseries_dataset_from_array(
    data=X_test[:-offset],
    targets=test['T (degC)'][delay:],
    sequence_length=시퀀스_길이,
    batch_size=None, shuffle=True)

for X_batch, y_batch in train_dataset.batch(batch_size, drop_remainder=True):
    assert X_batch.shape == (batch_size, 시퀀스_길이, 특성차원), X_batch.shape

train_dataset = train_dataset.batch(batch_size, drop_remainder=True)
val_dataset = val_dataset.batch(batch_size, drop_remainder=True)
test_dataset = test_dataset.batch(batch_size, drop_remainder=True)

1모형 (Model)

def baseline_model(dataset):
    """24시간 뒤의 온도는 시퀀스의 마지막 시점 온도와 동일하다고 가정"""
    target, y_pred = [], []
    for X_batch, y_batch in dataset:
        # 시퀀스의 마지막 데이터 포인트 선택
        last_datapoints = X_batch[:, -1]
        # 원래 스케일로 복원
        last_datapoints = scaler.inverse_transform(last_datapoints)
        온도 = last_datapoints[:, 1]
        target.extend(y_batch)
        y_pred.extend(온도)
    return np.array(target), np.array(y_pred)
from sklearn.metrics import mean_absolute_error

y, y_pred = baseline_model(val_dataset)
print(f'MAE[검증]: {mean_absolute_error(y, y_pred):.3f}')
y, y_pred = baseline_model(test_dataset)
print(f'MAE[테스트]: {mean_absolute_error(y, y_pred):.3f}')
MAE[검증]: 2.532
MAE[테스트]: 2.723
inputs = keras.Input(shape=(시퀀스_길이, 특성차원))
def MLP(inputs):
    x = layers.Flatten()(inputs)
    x = layers.Dense(16, activation='relu')(x)
    outputs = layers.Dense(1)(x)
    return keras.Model(inputs, outputs)

keras.backend.clear_session()
model = MLP(inputs)
model.summary()
model.compile(
    optimizer='rmsprop', 
    loss='mse',
    metrics=['mae'])

# outputs = model(X_batch)
# train_loss, *metrics = model.evaluate(train_dataset)
history = model.fit(
    train_dataset, epochs=10, validation_data=val_dataset)
Loading...
Loading...
Loading...
Loading...
Loading...
Epoch 1/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 19ms/step - loss: 43.7426 - mae: 5.1138 - val_loss: 14.0587 - val_mae: 2.9753
Epoch 2/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 2s 17ms/step - loss: 13.9508 - mae: 2.9708 - val_loss: 16.2679 - val_mae: 3.1983
Epoch 3/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 3s 18ms/step - loss: 12.8055 - mae: 2.8292 - val_loss: 11.7951 - val_mae: 2.6970
Epoch 4/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 2s 18ms/step - loss: 11.8028 - mae: 2.7254 - val_loss: 13.0772 - val_mae: 2.8883
Epoch 5/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 3s 19ms/step - loss: 11.3019 - mae: 2.6488 - val_loss: 11.4802 - val_mae: 2.6597
Epoch 6/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 2s 18ms/step - loss: 10.9805 - mae: 2.6152 - val_loss: 15.9605 - val_mae: 3.2033
Epoch 7/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 2s 18ms/step - loss: 10.5837 - mae: 2.5625 - val_loss: 11.8828 - val_mae: 2.7465
Epoch 8/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 2s 16ms/step - loss: 10.0385 - mae: 2.4978 - val_loss: 10.6951 - val_mae: 2.5782
Epoch 9/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 3s 19ms/step - loss: 9.9309 - mae: 2.4788 - val_loss: 10.7090 - val_mae: 2.5720
Epoch 10/10
135/135 ━━━━━━━━━━━━━━━━━━━━ 2s 17ms/step - loss: 9.7706 - mae: 2.4660 - val_loss: 11.4098 - val_mae: 2.6653
def RNN(inputs):
    x = layers.LSTM(32)(inputs)
    outputs = layers.Dense(1)(x)
    return keras.Model(inputs, outputs)

keras.backend.clear_session()
model = RNN(inputs)
model.summary()
model.compile(
    optimizer='rmsprop', 
    loss='mse',
    metrics=['mae'])

# outputs = model(X_batch)
# train_loss, *metrics = model.evaluate(train_dataset)
history = model.fit(
    train_dataset, epochs=50, validation_data=val_dataset,
    callbacks=[
        keras.callbacks.ModelCheckpoint(
            'rnn_1.keras', save_best_only=True),
        keras.callbacks.EarlyStopping(
            patience=5, restore_best_weights=True,
            monitor='val_loss')
    ]
)
Loading...
Loading...
Loading...
Loading...
Loading...
Epoch 1/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 5s 28ms/step - loss: 91.8192 - mae: 7.6614 - val_loss: 30.4881 - val_mae: 4.1417
Epoch 2/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 26ms/step - loss: 27.9717 - mae: 3.9884 - val_loss: 16.8299 - val_mae: 3.1034
Epoch 3/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 3s 25ms/step - loss: 16.4925 - mae: 3.1093 - val_loss: 11.6736 - val_mae: 2.6247
Epoch 4/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 26ms/step - loss: 12.3188 - mae: 2.7278 - val_loss: 10.8461 - val_mae: 2.5458
Epoch 5/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 27ms/step - loss: 11.0675 - mae: 2.6050 - val_loss: 10.2747 - val_mae: 2.4819
Epoch 6/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 27ms/step - loss: 10.4174 - mae: 2.5260 - val_loss: 10.4551 - val_mae: 2.5087
Epoch 7/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 26ms/step - loss: 10.1743 - mae: 2.4868 - val_loss: 10.2651 - val_mae: 2.4833
Epoch 8/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 27ms/step - loss: 9.8625 - mae: 2.4516 - val_loss: 9.8310 - val_mae: 2.4328
Epoch 9/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 3s 25ms/step - loss: 9.6174 - mae: 2.4218 - val_loss: 10.0902 - val_mae: 2.4658
Epoch 10/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 3s 25ms/step - loss: 9.3640 - mae: 2.3861 - val_loss: 9.8439 - val_mae: 2.4399
Epoch 11/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 27ms/step - loss: 8.9700 - mae: 2.3430 - val_loss: 10.4594 - val_mae: 2.5147
Epoch 12/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 26ms/step - loss: 8.7416 - mae: 2.3104 - val_loss: 10.1592 - val_mae: 2.4730
Epoch 13/50
135/135 ━━━━━━━━━━━━━━━━━━━━ 4s 26ms/step - loss: 8.6591 - mae: 2.2978 - val_loss: 10.1650 - val_mae: 2.4812
print('valiation')
model.evaluate(val_dataset)
print('test')
model.evaluate(test_dataset)
valiation
68/68 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 9.8602 - mae: 2.4332
test
68/68 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 11.8934 - mae: 2.7067
[11.920796394348145, 2.70729660987854]