Store Item Demand Forecasting Challenge

AI/Kaggle

Store Item Demand Forecasting Challenge

이무쿤 2019. 8. 29. 15:23

Score = 38.34583 430/461

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

train = pd.read_csv("/kaggle/input/demand-forecasting-kernels-only/train.csv")
train["date"] = train["date"].astype("datetime64")
train.dtypes

train["hour"] = train["date"].dt.hour
train["year"] = train["date"].dt.year

y = train["sales"]

train = train.drop(["date","sales"],1)
train.head()

test = pd.read_csv("/kaggle/input/demand-forecasting-kernels-only/test.csv")

test["date"] = test["date"].astype("datetime64")
test["hour"] = test["date"].dt.hour
test["year"] = test["date"].dt.year

test = test.drop(["id","date"],1)

test.head()

sub = pd.read_csv("/kaggle/input/demand-forecasting-kernels-only/sample_submission.csv")
sub.head()

from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()

rf.fit(train,y)

p = rf.predict(test)
p

sub["sales"] = p

sub.to_csv("imu.csv",index = False)

아직 아이디어를 어떤 식으로 내는건지 안배워서 성능이 좋지 않음,,, 하위 20퍼 정도임 지금.. 전에 bike는 그게 아이디어 전부라 상위권이 가능했지만 이거는 좀 그렇네,,