5.1 Statistik Uji ANOVA
Program 5.1
promo <- read.csv("promo.csv")
promo2 <- promo[,-c(1,2,4)]
promo2$X12 <- promo2$frekuensi.fashion*promo2$nilai.fashion
promo2$X13 <- promo2$frekuensi.footwear*promo2$nilai.footwear
promo2$X14 <- promo2$frekuensi.lainnya*promo2$nilai.lainnya
promo2$X15 <- promo2$total.nilai.tunai/(promo2$X12 + promo2$X13 + promo2$X14)
hasil = NULL
for(i in c(1:9, 11:14)){
y = promo2[,i]
x = promo2$promo
F = oneway.test(y ~ x)$statistic
hasil = rbind(hasil, c(i, F))
}
namavar = colnames(promo2)[-10]
hasil=data.frame(namavar, hasil)
Output 5.1
## namavar V1 F
## 11 X13 12 17.523
## 5 nilai.footwear 5 12.593
## 4 frekuensi.footwear 4 7.501
## 9 lama.member 9 6.086
## 1 usia 1 4.335
## 8 total.nilai.tunai 8 2.360
## 6 frekuensi.lainnya 6 1.270
## 3 nilai.fashion 3 0.753
## 2 frekuensi.fashion 2 0.517
## 10 X12 11 0.466
## 13 X15 14 0.252
## 12 X14 13 0.101
## 7 nilai.lainnya 7 0.000
5.2 Statistik Uji Chi-Square
Program 5.2
promo <- read.csv("promo.csv")
promo$X12 <- promo$frekuensi.fashion*promo$nilai.fashion
promo$X13 <- promo$frekuensi.footwear*promo$nilai.footwear
promo$X14 <- promo$frekuensi.lainnya*promo$nilai.lainnya
promo$X15 <- promo$total.nilai.tunai/(promo$X12 + promo$X13 + promo$X14)
hasil = NULL
for(i in c(2,4)){
x = promo[,i]
chi = chisq.test(x=x, y=promo$promo)$statistic
hasil = rbind(hasil, c(i, chi))
}
## Warning in chisq.test(x = x, y = promo$promo): Chi-squared approximation may be
## incorrect
library(classInt)
for(i in c(3, 5:12, 14:17)){
batas = classIntervals(promo[,i], style="quantile", n=3)$brks
batas = batas[!duplicated(batas)]
x = cut(promo[,i], breaks = batas)
chi = chisq.test(x=x, y=promo$promo)$statistic
hasil = rbind(hasil, c(i, chi))
}
namavar = colnames(promo)[hasil[,1]]
hasil = data.frame(namavar, hasil[,2])
hasil[order(hasil[,2], decreasing=TRUE),]
## namavar hasil...2.
## 13 X13 13.05116836
## 7 nilai.footwear 9.65383753
## 2 pendidikan 5.83806675
## 3 usia 5.22957670
## 11 lama.member 4.94429513
## 6 frekuensi.footwear 4.30378447
## 8 frekuensi.lainnya 3.55309707
## 14 X14 2.80560292
## 12 X12 1.90548398
## 10 total.nilai.tunai 1.72696472
## 4 frekuensi.fashion 1.47473733
## 5 nilai.fashion 1.07881176
## 15 X15 1.03711611
## 9 nilai.lainnya 0.20361673
## 1 jenis.kelamin 0.09438563
5.3 Performa Model Klasifikasi Sederhana
Program 5.3
promo <- read.csv("promo.csv")
promo$X12 <- promo$frekuensi.fashion*promo$nilai.fashion
promo$X13 <- promo$frekuensi.footwear*promo$nilai.footwear
promo$X14 <- promo$frekuensi.lainnya*promo$nilai.lainnya
promo$X15 <- promo$total.nilai.tunai/(promo$X12 + promo$X13 + promo$X14)
library(rpart)
library(pROC)
y = promo[,13]
hasil = NULL
for (i in c(2:12, 14:17)) {
x = promo[,i]
model.x = rpart(as.factor(y) ~ x, control=rpart.control(cp=-1, minsplit=15))
prediksi = predict(model.x, data.frame(x))[,2]
kurva.roc <- roc(y, prediksi)
hasil = rbind(hasil, c(i, auc(kurva.roc)))
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
namavar = colnames(promo)[hasil[,1]]
hasil = data.frame(namavar, hasil[,2])
hasil[order(hasil[,2], decreasing=TRUE),]
## namavar hasil...2.
## 13 X13 0.8322176
## 7 nilai.footwear 0.8321827
## 14 X14 0.8305265
## 9 nilai.lainnya 0.8065202
## 5 nilai.fashion 0.7940028
## 12 X12 0.7812936
## 11 lama.member 0.7152894
## 15 X15 0.7012204
## 10 total.nilai.tunai 0.6999128
## 2 usia 0.6625523
## 6 frekuensi.footwear 0.5932531
## 8 frekuensi.lainnya 0.5761158
## 4 frekuensi.fashion 0.5759589
## 3 pendidikan 0.5612796
## 1 jenis.kelamin 0.5116283
Program 5.4
promo <- read.csv("promo.csv")
promo$X12 <- promo$frekuensi.fashion*promo$nilai.fashion
promo$X13 <- promo$frekuensi.footwear*promo$nilai.footwear
promo$X14 <- promo$frekuensi.lainnya*promo$nilai.lainnya
promo$X15 <- promo$total.nilai.tunai/(promo$X12 + promo$X13 + promo$X14)
library(FSelector)
library(classInt)
promo.diskret = promo
for(i in c(3, 5:12, 14:17)){
batas = classIntervals(promo[,i], style="quantile", n=5)$brks
batas = batas[!duplicated(batas)]
promo.diskret[,i] = cut(promo[,i], breaks = batas)
}
promo.diskret = promo.diskret[,-1]
kinerja = (oneR(promo ~ ., promo.diskret))
kinerja = data.frame(namavar=rownames(kinerja), kinerja=kinerja[,1])
## Urutkan hasil
kinerja[order(kinerja[,2], decreasing=TRUE),]
## namavar kinerja
## 10 total.nilai.tunai 0.7994429
## 15 X15 0.7715877
## 14 X14 0.5292479
## 13 X13 0.5236769
## 8 frekuensi.lainnya 0.5208914
## 12 X12 0.5208914
## 6 frekuensi.footwear 0.5097493
## 4 frekuensi.fashion 0.4930362
## 7 nilai.footwear 0.4902507
## 5 nilai.fashion 0.4818942
## 2 usia 0.4791086
## 11 lama.member 0.4707521
## 9 nilai.lainnya 0.4596100
## 1 jenis.kelamin 0.4233983
## 3 pendidikan 0.4233983
5.4 Information Gain dan Gain Ratio
Program 5.5
promo <- read.csv("promo.csv")
promo$X12 <- promo$frekuensi.fashion*promo$nilai.fashion
promo$X13 <- promo$frekuensi.footwear*promo$nilai.footwear
promo$X14 <- promo$frekuensi.lainnya*promo$nilai.lainnya
promo$X15 <- promo$total.nilai.tunai/(promo$X12 + promo$X13 + promo$X14)
library(FSelector)
library(classInt)
promo.diskret = promo
for(i in c(3, 5:12, 14:17)){
batas = classIntervals(promo[,i], style="quantile", n=6)$brks
batas = batas[!duplicated(batas)]
promo.diskret[,i] = cut(promo[,i], breaks = batas)
}
promo.diskret = promo.diskret[,-1]
inf.gain <- information.gain(promo ~ ., promo.diskret, unit="log2")
inf.gain = data.frame(namavar=rownames(inf.gain), kinerja=inf.gain[,1])
## Urutkan hasil
inf.gain[order(inf.gain[,2], decreasing=TRUE),]
## namavar kinerja
## 13 X13 0.07177722
## 14 X14 0.06093536
## 7 nilai.footwear 0.06046003
## 11 lama.member 0.05865637
## 8 frekuensi.lainnya 0.04552890
## 5 nilai.fashion 0.04546746
## 4 frekuensi.fashion 0.04330867
## 6 frekuensi.footwear 0.04264099
## 10 total.nilai.tunai 0.04257620
## 2 usia 0.03935223
## 9 nilai.lainnya 0.03614214
## 12 X12 0.03303504
## 15 X15 0.01648100
## 1 jenis.kelamin 0.00000000
## 3 pendidikan 0.00000000
Program 5.6
gain.r <- gain.ratio(promo ~ ., promo.diskret, unit="log2")
gain.r = data.frame(namavar=rownames(gain.r), kinerja=gain.r[,1])
## Urutkan hasil
gain.r[order(gain.r[,2], decreasing=TRUE),]
## namavar kinerja
## 13 X13 0.026476710
## 7 nilai.footwear 0.023205901
## 14 X14 0.022230694
## 11 lama.member 0.022121073
## 10 total.nilai.tunai 0.019943318
## 6 frekuensi.footwear 0.018772561
## 8 frekuensi.lainnya 0.018458150
## 5 nilai.fashion 0.017451422
## 4 frekuensi.fashion 0.016819272
## 2 usia 0.015200164
## 9 nilai.lainnya 0.013872154
## 12 X12 0.012163445
## 15 X15 0.007719942
## 1 jenis.kelamin 0.000000000
## 3 pendidikan 0.000000000
5.5 Information Value
Program 5.7
library(woe)
hasil = NULL
for (i in c(1:11, 13:16)){
data = promo.diskret[,c(i,12)]
namavariable = colnames(data)[1]
n = nrow(data[,1])
iv = sum(woe(data, namavariable, FALSE, "promo", n, Bad=0, Good=1)[,9])
hasil = rbind(hasil, c(i, iv))
}
iv = data.frame(namavar=colnames(promo.diskret[,-12]), iv=hasil[,2])
iv[order(iv[,2], decreasing=TRUE),]
## namavar iv
## 13 X13 0.293
## 7 nilai.footwear 0.168
## 2 usia 0.111
## 11 lama.member 0.102
## 4 frekuensi.fashion 0.098
## 10 total.nilai.tunai 0.081
## 14 X14 0.079
## 6 frekuensi.footwear 0.074
## 3 pendidikan 0.069
## 15 X15 0.067
## 12 X12 0.051
## 8 frekuensi.lainnya 0.050
## 5 nilai.fashion 0.024
## 9 nilai.lainnya 0.009
## 1 jenis.kelamin 0.002
write.csv(iv, "hasil inf value.csv")