import tensorflow as tf
import keras
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
print("tf version : {}".format(tf.__version__))
print("keras version : {}".format(keras.__version__))
print("numpy version : {}".format(np.__version__))
print("matplotlib version : {}".format(matplotlib.__version__))
print("pandas version : {}".format(pd.__version__))
tf version : 2.10.0 keras version : 2.10.0 numpy version : 1.21.5 matplotlib version : 3.5.1 pandas version : 1.4.2
## train 데이터 셋 , test 데이터 셋
## train 은 학습을 위한 입력 데이터 셋
## test 은 예측을 위한 새로운 데이터 셋(평가)
## parse_dates : datetime 컬럼을 시간형으로 불러올 수 있음
train = pd.read_csv("./bike/bike_mod_tr.csv", parse_dates=['datetime'])
test = pd.read_csv("./bike/bike_mod_test.csv", parse_dates=['datetime'])
train.columns
Index(['datetime', 'season', 'holiday', 'workingday', 'weather', 'temp', 'atemp', 'humidity', 'windspeed', 'casual', 'registered', 'count', 'year', 'month', 'day', 'hour', 'minute', 'second', 'dayofweek'], dtype='object')
test.columns
Index(['datetime', 'season', 'holiday', 'workingday', 'weather', 'temp', 'atemp', 'humidity', 'windspeed', 'year', 'month', 'day', 'dayofweek', 'hour', 'minute', 'second'], dtype='object')
print(train.info())
print()
print(test.info())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 10886 entries, 0 to 10885 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 datetime 10886 non-null datetime64[ns] 1 season 10886 non-null int64 2 holiday 10886 non-null int64 3 workingday 10886 non-null int64 4 weather 10886 non-null int64 5 temp 10886 non-null float64 6 atemp 10886 non-null float64 7 humidity 10886 non-null int64 8 windspeed 10886 non-null float64 9 casual 10886 non-null int64 10 registered 10886 non-null int64 11 count 10886 non-null int64 12 year 10886 non-null int64 13 month 10886 non-null int64 14 day 10886 non-null int64 15 hour 10886 non-null int64 16 minute 10886 non-null int64 17 second 10886 non-null int64 18 dayofweek 10886 non-null int64 dtypes: datetime64[ns](1), float64(3), int64(15) memory usage: 1.6 MB None <class 'pandas.core.frame.DataFrame'> RangeIndex: 6493 entries, 0 to 6492 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 datetime 6493 non-null datetime64[ns] 1 season 6493 non-null int64 2 holiday 6493 non-null int64 3 workingday 6493 non-null int64 4 weather 6493 non-null int64 5 temp 6493 non-null float64 6 atemp 6493 non-null float64 7 humidity 6493 non-null int64 8 windspeed 6493 non-null float64 9 year 6493 non-null int64 10 month 6493 non-null int64 11 day 6493 non-null int64 12 dayofweek 6493 non-null int64 13 hour 6493 non-null int64 14 minute 6493 non-null int64 15 second 6493 non-null int64 dtypes: datetime64[ns](1), float64(3), int64(12) memory usage: 811.8 KB None
X : weather, temp (시간, 온도)
y : count - 자전거 시간대별 렌탈 대수
input_col = [ 'weather', 'temp']
labeled_col = ['count']
X = train[ input_col ]
y = train[ labeled_col ]
X_val = test[input_col]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
random_state=0)
print(X_train.shape)
print(X_test.shape)
(8164, 2) (2722, 2)
### 난수 발생 패턴 결정 0
seed = 0
np.random.seed(seed)
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(32, input_dim=2, activation='relu'))
model.add(Dense(16, activation='relu') )
model.add(Dense(16, activation='relu') )
model.add(Dense(1))
model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense (Dense) (None, 32) 96 dense_1 (Dense) (None, 16) 528 dense_2 (Dense) (None, 16) 272 dense_3 (Dense) (None, 1) 17 ================================================================= Total params: 913 Trainable params: 913 Non-trainable params: 0 _________________________________________________________________
## 학습의 조기 종료 함수 - EarlyStopping()
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(patience = 10) # 조기종료 콜백함수 정의
model.compile(loss = 'mean_squared_error', optimizer='rmsprop')
model.fit(X_train, y_train, epochs=100,
validation_data=[X_test, y_test],
batch_size=16,
callbacks=[early_stopping])
Epoch 1/100 511/511 [==============================] - 2s 2ms/step - loss: 37601.4141 - val_loss: 27562.2598 Epoch 2/100 511/511 [==============================] - 1s 2ms/step - loss: 27881.7148 - val_loss: 27502.8164 Epoch 3/100 511/511 [==============================] - 1s 1ms/step - loss: 27826.5254 - val_loss: 27491.9258 Epoch 4/100 511/511 [==============================] - 1s 2ms/step - loss: 27780.5469 - val_loss: 27411.8594 Epoch 5/100 511/511 [==============================] - 1s 2ms/step - loss: 27728.5938 - val_loss: 27380.1211 Epoch 6/100 511/511 [==============================] - 1s 2ms/step - loss: 27735.6094 - val_loss: 27422.2305 Epoch 7/100 511/511 [==============================] - 1s 2ms/step - loss: 27703.3672 - val_loss: 27364.6445 Epoch 8/100 511/511 [==============================] - 1s 2ms/step - loss: 27685.1992 - val_loss: 27308.6133 Epoch 9/100 511/511 [==============================] - 1s 2ms/step - loss: 27652.4551 - val_loss: 27366.2871 Epoch 10/100 511/511 [==============================] - 1s 2ms/step - loss: 27602.9727 - val_loss: 27460.8047 Epoch 11/100 511/511 [==============================] - 1s 2ms/step - loss: 27633.3027 - val_loss: 27365.1191 Epoch 12/100 511/511 [==============================] - 1s 2ms/step - loss: 27603.9375 - val_loss: 27255.8359 Epoch 13/100 511/511 [==============================] - 1s 2ms/step - loss: 27606.1914 - val_loss: 27251.7383 Epoch 14/100 511/511 [==============================] - 1s 2ms/step - loss: 27550.2070 - val_loss: 27491.2246 Epoch 15/100 511/511 [==============================] - 1s 2ms/step - loss: 27578.2344 - val_loss: 27260.3184 Epoch 16/100 511/511 [==============================] - 1s 2ms/step - loss: 27544.0645 - val_loss: 27239.5215 Epoch 17/100 511/511 [==============================] - 1s 2ms/step - loss: 27575.0234 - val_loss: 27217.3867 Epoch 18/100 511/511 [==============================] - 1s 2ms/step - loss: 27574.8535 - val_loss: 27281.2969 Epoch 19/100 511/511 [==============================] - 1s 2ms/step - loss: 27558.8164 - val_loss: 27239.4922 Epoch 20/100 511/511 [==============================] - 1s 2ms/step - loss: 27535.5391 - val_loss: 27311.1953 Epoch 21/100 511/511 [==============================] - 1s 2ms/step - loss: 27522.7969 - val_loss: 27246.8750 Epoch 22/100 511/511 [==============================] - 1s 2ms/step - loss: 27529.7227 - val_loss: 27212.6465 Epoch 23/100 511/511 [==============================] - 1s 2ms/step - loss: 27528.5176 - val_loss: 27193.1016 Epoch 24/100 511/511 [==============================] - 1s 2ms/step - loss: 27491.9609 - val_loss: 27244.5938 Epoch 25/100 511/511 [==============================] - 1s 2ms/step - loss: 27527.7598 - val_loss: 27183.5215 Epoch 26/100 511/511 [==============================] - 1s 2ms/step - loss: 27524.2969 - val_loss: 27281.6699 Epoch 27/100 511/511 [==============================] - 1s 2ms/step - loss: 27519.2969 - val_loss: 27450.4453 Epoch 28/100 511/511 [==============================] - 1s 2ms/step - loss: 27488.3281 - val_loss: 27427.1309 Epoch 29/100 511/511 [==============================] - 1s 2ms/step - loss: 27507.6250 - val_loss: 27179.2422 Epoch 30/100 511/511 [==============================] - 1s 2ms/step - loss: 27521.1250 - val_loss: 27245.6895 Epoch 31/100 511/511 [==============================] - 1s 2ms/step - loss: 27491.3516 - val_loss: 27395.5469 Epoch 32/100 511/511 [==============================] - 1s 1ms/step - loss: 27480.8633 - val_loss: 27233.8496 Epoch 33/100 511/511 [==============================] - 1s 1ms/step - loss: 27501.6289 - val_loss: 27205.1660 Epoch 34/100 511/511 [==============================] - 1s 2ms/step - loss: 27468.9629 - val_loss: 27215.5332 Epoch 35/100 511/511 [==============================] - 1s 2ms/step - loss: 27501.2598 - val_loss: 27297.7227 Epoch 36/100 511/511 [==============================] - 1s 2ms/step - loss: 27499.4473 - val_loss: 27156.3555 Epoch 37/100 511/511 [==============================] - 1s 2ms/step - loss: 27474.3281 - val_loss: 27172.6523 Epoch 38/100 511/511 [==============================] - 1s 2ms/step - loss: 27456.0898 - val_loss: 27155.2266 Epoch 39/100 511/511 [==============================] - 1s 2ms/step - loss: 27461.4941 - val_loss: 27185.2109 Epoch 40/100 511/511 [==============================] - 1s 2ms/step - loss: 27457.3691 - val_loss: 27184.4961 Epoch 41/100 511/511 [==============================] - 1s 2ms/step - loss: 27479.6660 - val_loss: 27223.9902 Epoch 42/100 511/511 [==============================] - 1s 1ms/step - loss: 27478.2988 - val_loss: 27148.6230 Epoch 43/100 511/511 [==============================] - 1s 1ms/step - loss: 27487.1133 - val_loss: 27147.9258 Epoch 44/100 511/511 [==============================] - 1s 2ms/step - loss: 27449.9316 - val_loss: 27201.1133 Epoch 45/100 511/511 [==============================] - 1s 2ms/step - loss: 27444.1543 - val_loss: 27395.9531 Epoch 46/100 511/511 [==============================] - 1s 2ms/step - loss: 27447.4043 - val_loss: 27156.2461 Epoch 47/100 511/511 [==============================] - 1s 2ms/step - loss: 27475.3496 - val_loss: 27145.1660 Epoch 48/100 511/511 [==============================] - 1s 2ms/step - loss: 27472.4238 - val_loss: 27167.4004 Epoch 49/100 511/511 [==============================] - 1s 2ms/step - loss: 27465.2285 - val_loss: 27200.9902 Epoch 50/100 511/511 [==============================] - 1s 2ms/step - loss: 27453.1914 - val_loss: 27164.5078 Epoch 51/100 511/511 [==============================] - 1s 2ms/step - loss: 27467.5449 - val_loss: 27296.9102 Epoch 52/100 511/511 [==============================] - 1s 2ms/step - loss: 27474.7148 - val_loss: 27143.4863 Epoch 53/100 511/511 [==============================] - 1s 2ms/step - loss: 27475.8105 - val_loss: 27217.1191 Epoch 54/100 511/511 [==============================] - 1s 2ms/step - loss: 27471.6152 - val_loss: 27293.3496 Epoch 55/100 511/511 [==============================] - 1s 2ms/step - loss: 27458.7949 - val_loss: 27258.4531 Epoch 56/100 511/511 [==============================] - 1s 2ms/step - loss: 27432.8184 - val_loss: 27233.8965 Epoch 57/100 511/511 [==============================] - 1s 2ms/step - loss: 27426.7031 - val_loss: 27146.7656 Epoch 58/100 511/511 [==============================] - 1s 2ms/step - loss: 27437.7188 - val_loss: 27213.7520 Epoch 59/100 511/511 [==============================] - 1s 2ms/step - loss: 27446.7227 - val_loss: 27164.2207 Epoch 60/100 511/511 [==============================] - 1s 2ms/step - loss: 27422.4141 - val_loss: 27141.3633 Epoch 61/100 511/511 [==============================] - 1s 2ms/step - loss: 27455.0098 - val_loss: 27167.9434 Epoch 62/100 511/511 [==============================] - 1s 2ms/step - loss: 27456.4434 - val_loss: 27140.8730 Epoch 63/100 511/511 [==============================] - 1s 2ms/step - loss: 27464.4043 - val_loss: 27151.1680 Epoch 64/100 511/511 [==============================] - 1s 1ms/step - loss: 27437.4316 - val_loss: 27161.6602 Epoch 65/100 511/511 [==============================] - 1s 2ms/step - loss: 27451.5098 - val_loss: 27142.1309 Epoch 66/100 511/511 [==============================] - 1s 2ms/step - loss: 27458.4707 - val_loss: 27273.7949 Epoch 67/100 511/511 [==============================] - 1s 2ms/step - loss: 27450.6465 - val_loss: 27276.3184 Epoch 68/100 511/511 [==============================] - 1s 2ms/step - loss: 27440.6035 - val_loss: 27150.0918 Epoch 69/100 511/511 [==============================] - 1s 2ms/step - loss: 27411.6562 - val_loss: 27143.0215 Epoch 70/100 511/511 [==============================] - 1s 2ms/step - loss: 27439.0625 - val_loss: 27212.4844 Epoch 71/100 511/511 [==============================] - 1s 1ms/step - loss: 27466.7109 - val_loss: 27151.0273 Epoch 72/100 511/511 [==============================] - 1s 2ms/step - loss: 27453.4219 - val_loss: 27185.6895
<keras.callbacks.History at 0x1e292d79dc0>
model.evaluate(X_test, y_test)
86/86 [==============================] - 0s 1ms/step - loss: 27185.6953
27185.6953125
pred = model.predict(X_val)
203/203 [==============================] - 0s 1ms/step
sub = pd.read_csv("./bike/sampleSubmission.csv")
sub['count'] = pred
sub.loc[sub['count']<0, 'count'] = 0
sub.to_csv("./bike/nn_sub_2207.csv", index=False)
변수를 추가를 통해 성능을 향상시켜보자(5-10분) - epoch수도 증가