Score = 38.34583 430/461
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# Any results you write to the current directory are saved as output.
train = pd.read_csv("/kaggle/input/demand-forecasting-kernels-only/train.csv")
train["date"] = train["date"].astype("datetime64")
train.dtypes
train["hour"] = train["date"].dt.hour
train["year"] = train["date"].dt.year
y = train["sales"]
train = train.drop(["date","sales"],1)
train.head()
test = pd.read_csv("/kaggle/input/demand-forecasting-kernels-only/test.csv")
test["date"] = test["date"].astype("datetime64")
test["hour"] = test["date"].dt.hour
test["year"] = test["date"].dt.year
test = test.drop(["id","date"],1)
test.head()
sub = pd.read_csv("/kaggle/input/demand-forecasting-kernels-only/sample_submission.csv")
sub.head()
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()
rf.fit(train,y)
p = rf.predict(test)
p
sub["sales"] = p
sub.to_csv("imu.csv",index = False)
아직 아이디어를 어떤 식으로 내는건지 안배워서 성능이 좋지 않음,,, 하위 20퍼 정도임 지금.. 전에 bike는 그게 아이디어 전부라 상위권이 가능했지만 이거는 좀 그렇네,,
'AI > Kaggle' 카테고리의 다른 글
kaggle 그림으로 데이터 분석 (0) | 2019.09.10 |
---|---|
Store Item Demand Forecasting Challenge 2회차 (0) | 2019.09.10 |
Bike Sharing Demand (2) 성능 개선 2회차 (0) | 2019.09.06 |
Bike Sharing Demand (0) | 2019.08.27 |
Kaggle 데이터셋 학습법 (0) | 2019.08.27 |