# 이미지 표시용
from IPython.display import display, Image
display(Image(filename='../img/kaggle/kaggle01.png'))
display(Image(filename='../img/kaggle/kaggle02.png'))
display(Image(filename='../img/kaggle/kaggle03.png'))
필드명 | 설명 |
---|---|
datetime | hourly date + timestamp |
season | 1 = spring(봄), 2 = summer(여름), 3 = fall(가을), 4 = winter(겨울) |
holiday | whether the day is considered a holiday(휴일인지 아닌지) |
workingday | whether the day is neither a weekend nor holiday(주말도 휴일도 아닌 날인지) |
weather | 1: Clear, Few clouds, Partly cloudy, Partly cloudy 2: Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist 3: Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds 4: Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog |
temp | temperature in Celsius (온도) |
atemp | "feels like" temperature in Celsius (체감온도) |
humidity | relative humidity (습도) |
windspeed | wind speed (바람속도) |
casual | number of non-registered user rentals initiated (비가입자 사용유저) |
registered | number of registered user rentals initiated (가입자 사용유저) |
count | number of total rentals (전체 렌탈 대수) |
import pandas as pd
train = pd.read_csv("../bike/train.csv", parse_dates=['datetime'])
test = pd.read_csv("../bike/test.csv", parse_dates=['datetime'])
print(train.shape) # : 행과 열 갯수 확인
print(test.shape)
(10886, 12) (6493, 9)
train.head()
datetime | season | holiday | workingday | weather | temp | atemp | humidity | windspeed | casual | registered | count | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2011-01-01 00:00:00 | 1 | 0 | 0 | 1 | 9.84 | 14.395 | 81 | 0.0 | 3 | 13 | 16 |
1 | 2011-01-01 01:00:00 | 1 | 0 | 0 | 1 | 9.02 | 13.635 | 80 | 0.0 | 8 | 32 | 40 |
2 | 2011-01-01 02:00:00 | 1 | 0 | 0 | 1 | 9.02 | 13.635 | 80 | 0.0 | 5 | 27 | 32 |
3 | 2011-01-01 03:00:00 | 1 | 0 | 0 | 1 | 9.84 | 14.395 | 75 | 0.0 | 3 | 10 | 13 |
4 | 2011-01-01 04:00:00 | 1 | 0 | 0 | 1 | 9.84 | 14.395 | 75 | 0.0 | 0 | 1 | 1 |
train.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 10886 entries, 0 to 10885 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 datetime 10886 non-null datetime64[ns] 1 season 10886 non-null int64 2 holiday 10886 non-null int64 3 workingday 10886 non-null int64 4 weather 10886 non-null int64 5 temp 10886 non-null float64 6 atemp 10886 non-null float64 7 humidity 10886 non-null int64 8 windspeed 10886 non-null float64 9 casual 10886 non-null int64 10 registered 10886 non-null int64 11 count 10886 non-null int64 dtypes: datetime64[ns](1), float64(3), int64(8) memory usage: 1020.7 KB
f_names = ['temp', 'atemp']
X_train = train[f_names] # 학습용 데이터의 변수 선택
X_test = test[f_names] # 테스트 데이터의 변수 선택
label_name = 'count' # 렌탈 대수 (종속변수)
y_train = train[label_name] # 렌탈 대수 변수 값 선택
from sklearn.linear_model import LinearRegression
model = LinearRegression() # 모델 선택
model.fit(X_train, y_train) # 모델 학습
model.predict(X_test) # 예측(새로운 데이터로)
array([101.95625474, 104.0156171 , 104.0156171 , ..., 103.33067499, 104.0156171 , 104.0156171 ])
print( model.coef_ ) # 모델(선형회귀의 계수)
print( model.intercept_) # 모델(선형 회귀의 교차점)
[8.19865874 0.90720808] 4.248132645803736
## 우리가 만든 모델
## 렌탈 대수 = temp * 8.19 + atemp * 0.97 + 4.24..
sub = pd.read_csv("../bike/sampleSubmission.csv")
sub.head()
datetime | count | |
---|---|---|
0 | 2011-01-20 00:00:00 | 0 |
1 | 2011-01-20 01:00:00 | 0 |
2 | 2011-01-20 02:00:00 | 0 |
3 | 2011-01-20 03:00:00 | 0 |
4 | 2011-01-20 04:00:00 | 0 |
pred = model.predict(X_test) # 예측
sub['count'] = pred
sub
datetime | count | |
---|---|---|
0 | 2011-01-20 00:00:00 | 101.956255 |
1 | 2011-01-20 01:00:00 | 104.015617 |
2 | 2011-01-20 02:00:00 | 104.015617 |
3 | 2011-01-20 03:00:00 | 103.330675 |
4 | 2011-01-20 04:00:00 | 103.330675 |
... | ... | ... |
6488 | 2012-12-31 19:00:00 | 103.330675 |
6489 | 2012-12-31 20:00:00 | 103.330675 |
6490 | 2012-12-31 21:00:00 | 103.330675 |
6491 | 2012-12-31 22:00:00 | 104.015617 |
6492 | 2012-12-31 23:00:00 | 104.015617 |
6493 rows × 2 columns
# 처음 만는 제출용 csv 파일, 행번호를 없애기
sub.to_csv("firstsubmission.csv", index=False)
display(Image(filename='../img/kaggle/bike01.png'))
## 업로드가 완료된 후, 아래 버튼 선택
display(Image(filename='../img/kaggle/bike01.png'))