[TensorFlow Cert] Q5-Time Series Data (Conv1D, LSTM)

[TensorFlow Cert] Q5-Time Series Data (Conv1D, LSTM)

2021, Oct 27    


Sunspot Dataset

  • Lambda used / Normalized ver.
  • optimizer : SGD(learning_rate=1e-5, momentum=0.9)
  • loss : Huber
  • window 함수 확인 (w[:-1], w[1:] )
import csv
import tensorflow as tf
import numpy as np
import urllib

from tensorflow.keras.layers import Dense, LSTM, Lambda, Conv1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import Huber

def normalization(series):
    min = np.min(series)      # 1. Normalization 
    max = np.max(series)
    series -= min
    series /= max
    return series

def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
    series = tf.expand_dims(series, axis=-1)
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift = 1, drop_remainder=True)
    ds = ds.flat_map(lambda w : w.batch(window_size + 1))
    ds = ds.shuffle(shuffle_buffer)
    ds = ds.map(lambda w: (w[:-1], w[1:]))
    return ds.batch(batch_size).prefetch(1)

def solution_model():
    url = 'https://storage.googleapis.com/download.tensorflow.org/data/Sunspots.csv'
    urllib.request.urlretrieve(url, 'sunspots.csv')

    time_step = []
    sunspots = []

    with open('sunspots.csv') as csvfile:
        reader = csv.reader(csvfile, delimiter = ',')
        next(reader)
        for row in reader:
            sunspots.append(float(row[2]))
            time_step.append(int(row[0]))

    series = np.array(sunspots)
    time = np.array(time_step)

    series = normalization(series)          # normalized

    split_time = 3000

    time_train = time[:split_time]
    time_valid = time[split_time:]

    x_train = series[:split_time]
    x_valid = series[split_time:]

    window_size = 30
    batch_size = 32
    shuffle_size = 1000

    train_set = windowed_dataset(x_train,
                                 window_size = window_size,
                                 batch_size = batch_size,
                                 shuffle_buffer = shuffle_size)
    
    validation_set = windowed_dataset(x_valid,
                                      window_size = window_size,
                                      batch_size = batch_size,
                                      shuffle_buffer = shuffle_size)
    
    model = Sequential([
                        Conv1D(70, kernel_size = 5,
                                padding = 'causal',
                                activation = 'relu',
                                input_shape = [None, 1]),
                        LSTM(64, return_sequences=True),
                        LSTM(64, return_sequences=True),
                        Dense(30, activation='relu'),
                        Dense(10, activation='relu'),
                        Dense(1),
                        Lambda(lambda x: x*400)         # 2. Lambda used
    ]) 

    optimizer = SGD(learning_rate=1e-5, momentum=0.9)
    loss = Huber()

    model.compile(optimizer = optimizer, loss = loss, metrics = ['mae'])

    checkpoint_path = 'my_checkpoint.ckpt'
    checkpoint = ModelCheckpoint(checkpoint_path,
                                 save_weights_only = True,
                                 save_best_only = True,
                                 monitor = 'val_mae',
                                 verbose = 1)
    
    model.fit(train_set,
              validation_data = (validation_set),
              epochs = 100,
              callbacks = [checkpoint])
    
    model.load_weights(checkpoint_path)

    return model

if __name__ == '__main__':
    model = solution_model()
    model.save("model.h5")


Disel Dataset

  • Layers : Conv1D, Bidirectional, LSTM
  • optimizer : Adam
  • loss : mae
  • window 함수 확인 (w[:n_past], w[n_past:])
import urllib
import os
import zipfile
import pandas as pd
import tensorflow as tf

from tensorflow.keras.layers import Dense, Conv1D, LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint

def normalize_series(data, min, max):
    data = data - min
    data = data / max
    return data

def windowed_dataset(series, batch_size, n_past=10, n_future=10, shift=1):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(size=n_past + n_future, shift=shift, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(n_past + n_future))
    ds = ds.map(lambda w: (w[:n_past], w[n_past:]))
    return ds.batch(batch_size).prefetch(1)


def solution_model():
    df = pd.read_csv('Weekly_U.S.Diesel_Retail_Prices.csv',
                     infer_datetime_format=True, index_col='Week of', header=0)

    N_FEATURES = len(df.columns) 
    data = df.values
    data = normalize_series(data, data.min(axis=0), data.max(axis=0))

    SPLIT_TIME = int(len(data) * 0.8) 
    x_train = data[:SPLIT_TIME]
    x_valid = data[SPLIT_TIME:]

    tf.keras.backend.clear_session()
    tf.random.set_seed(42)

    BATCH_SIZE = 32  
    N_PAST = 10  
    N_FUTURE = 10  
    SHIFT = 1 

    train_set = windowed_dataset(series=x_train, batch_size=BATCH_SIZE,
                                 n_past=N_PAST, n_future=N_FUTURE,
                                 shift=SHIFT)
    valid_set = windowed_dataset(series=x_valid, batch_size=BATCH_SIZE,
                                 n_past=N_PAST, n_future=N_FUTURE,
                                 shift=SHIFT)

    model = Sequential([
                        Conv1D(filters=32, kernel_size=3, padding='causal', activation='relu', input_shape=[N_PAST, 1]),
                        Bidirectional(LSTM(32, return_sequences=True)),
                        Bidirectional(LSTM(32, return_sequences=True)),
                        Dense(32, activation='relu'),
                        Dense(16, activation='relu'),
                        Dense(N_FEATURES)
    ])

    checkpoint_path = 'model/my_checkpoint.ckpt'
    checkpoint = ModelCheckpoint(filepath=checkpoint_path,
                                 save_weights_only=True,
                                 save_best_only=True,
                                 monitor='val_loss',
                                 verbose=1)
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
    model.compile(optimizer=optimizer, loss='mae', metrics=['mae'])

    model.fit(train_set,
              validation_data=(valid_set),
              epochs=20,
              callbacks=[checkpoint])
    model.load_weights(checkpoint_path)

    return model

if __name__ == '__main__':
    model = solution_model()
    model.save("model.h5")