from IPython.display import display, Image
display(Image(filename='img/kaggle/kaggle01.png'))
display(Image(filename='img/kaggle/kaggle02.png'))
display(Image(filename='img/kaggle/kaggle03.png'))
필드명 | 설명 |
---|---|
datetime | hourly date + timestamp |
season | 1 = spring, 2 = summer, 3 = fall, 4 = winter |
holiday | whether the day is considered a holiday |
workingday | whether the day is neither a weekend nor holiday |
weather | 1: Clear, Few clouds, Partly cloudy, Partly cloudy 2: Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist 3: Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds 4: Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog |
temp | temperature in Celsius (온도) |
atemp | "feels like" temperature in Celsius (체감온도) |
humidity | relative humidity (습도) |
windspeed | wind speed (바람속도) |
casual | number of non-registered user rentals initiated (비가입자 사용유저) |
registered | number of registered user rentals initiated (가입자 사용유저) |
count | number of total rentals (전체 렌탈 대수) |
import pandas as pd
train = pd.read_csv("train.csv", parse_dates=['datetime'])
test = pd.read_csv("test.csv", parse_dates=['datetime'])
print(train.shape) # : 행과 열 갯수 확인
print(test.shape)
train.head()
train.info()
f_names = ['temp', 'atemp']
X_train = train[f_names] # 학습용 데이터의 변수 선택
X_test = test[f_names] # 테스트 데이터의 변수 선택
label_name = 'count' # 렌탈 대수 (종속변수)
y_train = train[label_name] # 렌탈 대수 변수 값 선택
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)
model.predict(X_test) # 예측(새로운 데이터로)
model.coef_
model.intercept_
pred = model.predict(X_test) # 예측
sub = pd.read_csv("sampleSubmission.csv")
sub['count'] = pred
# 처음 만는 제출용 csv 파일, 행번호를 없애기
sub.to_csv("firstsubmission.csv", index=False)
display(Image(filename='img/kaggle/bike01.png'))
## 업로드가 완료된 후, 아래 버튼 선택
display(Image(filename='img/kaggle/bike01.png'))