-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path01
More file actions
165 lines (141 loc) · 4.63 KB
/
01
File metadata and controls
165 lines (141 loc) · 4.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
library(lubridate) #날짜 연산 package
library(psych)#산포도와 상관관계, 히스토그램 등을 모두 보여주는 함수
library(MASS)
library(e1071)
library(kernlab)
tr <- read.csv("train.csv")
tr <- tr[,-2]
tr$date <- as.Date(tr$date)
#nmae측정 함수
nm <- function(prediction, answer){
nmae <- abs((prediction-answer)/answer)
out <- c() #Inf인 값들의 index
for(i in 1:38){
if(nmae[i]==Inf) {
out <- c(out, i)
}
}
nmae <- nmae[-c(out)]
score <- rowMeans(nmae)
return(score)
}
######train데이터 준비######
#n=1~21-> 품목 개수 (1주뒤 예측) 모델 return
regression <- function(n){
#train할 날짜
date <- c(seq(as.Date('2017/09/28','%Y/%m/%d'),
as.Date('2017/12/01','%Y/%m/%d'),1),
seq(as.Date('2018/09/28','%Y/%m/%d'),
as.Date('2018/12/01','%Y/%m/%d'),1))
tr_date <- matrix(rep(0,len=4030),nrow=31,ncol=130) #날짜data포함
tr_date <- as.data.frame(tr_date)
colnames(tr_date) <- date
#train할 날짜데이터를 가지는 DF->tr_date
for(i in 1:130){
a <- c()
day <- c()
day <- date[i]
a <- date[i]+7 #예측할 label값(1주뒤)
a <- c(a,date[i])
a <- c(a,day-years(1)) #1년전 데이터
a <- c(a, day-seq(1,28,1))
tr_date[,i] <- a
}
rownames(tr_date) <- c('label','today','1year',1:28)
tr_date <- t(tr_date)
tr_date <- as.data.frame(tr_date)
tr_price <- matrix(rep(0,len=4030),nrow=130,ncol=31) #가격데이터를 담은 DF
tr_price <- as.data.frame(tr_price)
#train할 가격데이터를 가지는 DF->tr_price
for(i in 1:130){
for(j in 1:31){
tr_price[i,j] <- tr[tr_date[i,j]==tr$date,1+2*n]
}
}
subset <- subset(tr,tr[,1+2*n]!=0)
laplace <- mean(subset[,1+2*n])
for(i in 1:130){
for(j in 1:31){
#0을 포함하는 행을 제거하기 위해 행번호 수집
if(tr_price[i,j]==0){
tr_price[i,j] <- laplace
}
}
}
colnames(tr_price) <-c('label','today','1year',1:28)
rownames(tr_price) <- date
# tune.svm(label~., data=tr_price, gamma=2^(-1:1), cost=2^(2:4))
model <- svm(label~., data=tr_price, cost=2, kernel="linear")
# model <- ksvm(label~., data=tr_price, kernal="stringdot")
pred <- predict(model,tr_price[,2:31])
print(cor(pred,tr_price$label))
plot(tr_price$label)
points(pred, col="blue")
# model <- lm(label~., data=tr_price) #다중 선형 회귀 모델 생성
# m <- stepAIC(model)
return(model)
}
######test데이터 준비######
date2 <- seq(as.Date('2019/09/28','%Y/%m/%d'), #test할 데이터의 날짜
as.Date('2019/11/04','%Y/%m/%d'),1)
prediction <- function(n){
ts_date <- matrix(rep(0,len=1178),nrow=31,ncol=38) #날짜data포함
ts_date <- as.data.frame(ts_date)
colnames(ts_date) <- date2
#train할 날짜데이터를 가지는 DF
for(i in 1:38){
b <- c()
day <- c()
day <- date2[i]
b <- date2[i]+7 #예측할 label값(1주뒤)
b <- c(b,date2[i])
b <- c(b,day-years(1)) #1년 전 데이터
b <- c(b, day-seq(1,28,1))
ts_date[,i] <- b
}
rownames(ts_date) <- c('label','today','1year',1:28)
ts_date <- t(ts_date)
# ts_price <- matrix(rep(0,len=1178),nrow=38,ncol=31) #가격데이터를 담은 DF
# ts_price <- as.data.frame(ts_price)
for(i in 1:38){
for(j in 1:31){
ts_price[i,j] <- tr[ts_date[i,j]==tr$date,1+2*n]
}
}
subset <- subset(tr,tr[,1+2*n]!=0)
laplace <- mean(subset[,1+2*n])
for(i in 1:38){
for(j in 1:31){
#0을 포함하는 행을 제거하기 위해 행번호 수집
if(ts_price[i,j]==0){
ts_price[i,j] <- laplace
}
}
}
colnames(ts_price) <-c('label','today','1year',1:28)
rownames(ts_price) <- date2
ts_price
ml <- regression(n)#선형 회귀 모델
pred <- predict(ml,ts_price[2:31])
plot(ts_price$label)
points(pred, col="blue")
return(pred)
} #예측값을 반환해주는 함수
mx <- matrix(rep(0,len=798),ncol=38,nrow=21)
mx <- as.data.frame(mx)
for(i in 1:21){
mx[i,] <- prediction(i)
}
rownames(mx) <- c("배추","무","양파","건고추","마늘"
,"대파","얼갈이배추","양배추","깻잎"
,"시금치","미나리","당근","파프리카","새송이"
,"팽이버섯","토마토","청상추","백다다기"
,"애호박","캠벨얼리","샤인마스캇")
colnames(mx) <- date2+7
real_price <- subset(tr, date %in% seq(as.Date('2019/10/05','%Y/%m/%d'),
as.Date('2019/11/11','%Y/%m/%d'),1)) #기간내의 음식들의 실제 가격
NM <- c() #nmae값 담기
for(i in 1:21){
NM <- c(NM,nm(mx[i,],real_price[,2*i+1]))
}
mean(NM)