6.2 Best Subset
Program 6.1
best = leaps(x=rumah[,2:7], y=rumah[,8],
method="adjr2", nbest=2)
hasil = data.frame(size=best$size-1, best$which, adjr2=best$adjr2*100)
hasil = hasil[order(hasil[,8], decreasing=TRUE),]
row.names(hasil) = 1:nrow(hasil)
hasil
Output 6.1
## size X1 X2 X3 X4 X5 X6 adjr2
## 1 3 FALSE TRUE TRUE FALSE FALSE TRUE 84.56957
## 2 4 FALSE TRUE TRUE FALSE TRUE TRUE 84.37513
## 3 4 FALSE TRUE TRUE TRUE FALSE TRUE 84.31679
## 4 5 TRUE TRUE TRUE FALSE TRUE TRUE 84.12564
## 5 5 FALSE TRUE TRUE TRUE TRUE TRUE 84.10471
## 6 6 TRUE TRUE TRUE TRUE TRUE TRUE 83.86592
## 7 2 FALSE TRUE TRUE FALSE FALSE FALSE 83.82598
## 8 3 TRUE TRUE TRUE FALSE FALSE FALSE 83.57095
## 9 2 TRUE FALSE TRUE FALSE FALSE FALSE 81.51850
## 10 1 FALSE TRUE FALSE FALSE FALSE FALSE 68.58910
## 11 1 TRUE FALSE FALSE FALSE FALSE FALSE 66.86694
Program 6.2
models <- regsubsets(harga ~., data = rumah[,-1],
nvmax = 6, method="exhaustive")
summary(models)
hasil = summary(models)
terbaik = which.max(hasil$adjr2)
(hasil$which)[terbaik,]
Output 6.2
## Subset selection object
## Call: regsubsets.formula(harga ~ ., data = rumah[, -1], nvmax = 6,
## method = "exhaustive")
## 6 Variables (and intercept)
## Forced in Forced out
## luasbangunan FALSE FALSE
## luastanah FALSE FALSE
## umur FALSE FALSE
## kamartidur FALSE FALSE
## kamarmandi FALSE FALSE
## dekattol FALSE FALSE
## 1 subsets of each size up to 6
## Selection Algorithm: exhaustive
## luasbangunan luastanah umur kamartidur kamarmandi dekattol
## 1 ( 1 ) " " "*" " " " " " " " "
## 2 ( 1 ) " " "*" "*" " " " " " "
## 3 ( 1 ) " " "*" "*" " " " " "*"
## 4 ( 1 ) " " "*" "*" " " "*" "*"
## 5 ( 1 ) "*" "*" "*" " " "*" "*"
## 6 ( 1 ) "*" "*" "*" "*" "*" "*"
Output 6.3
## (Intercept) luasbangunan luastanah umur kamartidur kamarmandi
## TRUE FALSE TRUE TRUE FALSE FALSE
## dekattol
## TRUE
6.3 Metode Sekuensial (Forward, Backward, Stepwise)
Program 6.4
library(MASS)
awal <- lm(harga ~ 1, data=rumah[,-1])
lengkap <- lm(harga ~ ., data=rumah[,-1])
model.forward <- stepAIC(awal, direction="forward",
scope=formula(lengkap), trace=0)
model.forward$anova
summary(model.forward)
Output 6.4
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## harga ~ 1
##
## Final Model:
## harga ~ luastanah + umur + dekattol
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 61 574824.60 568.3505
## 2 + luastanah 1 397226.960 60 177597.64 497.5288
## 3 + umur 1 87673.686 59 89923.95 457.3343
## 4 + dekattol 1 5588.255 58 84335.70 455.3564
##
## Call:
## lm(formula = harga ~ luastanah + umur + dekattol, data = rumah[,
## -1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -102.807 -22.203 -5.158 22.221 119.653
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 197.20521 18.92788 10.419 6.64e-15 ***
## luastanah 1.19095 0.06891 17.282 < 2e-16 ***
## umur -3.82040 0.50982 -7.494 4.34e-10 ***
## dekattol 22.33619 11.39364 1.960 0.0548 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38.13 on 58 degrees of freedom
## Multiple R-squared: 0.8533, Adjusted R-squared: 0.8457
## F-statistic: 112.4 on 3 and 58 DF, p-value: < 2.2e-16
Program 6.5
library(MASS)
lengkap <- lm(harga ~ ., data=rumah[,-1])
model.backward <- stepAIC(lengkap, direction="backward",
scope=formula(lengkap), trace=0)
model.backward$anova
summary(model.backward)
Output 6.5
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## harga ~ luasbangunan + luastanah + umur + kamartidur + kamarmandi +
## dekattol
##
## Final Model:
## harga ~ luastanah + umur + dekattol
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 55 83620.45 460.8284
## 2 - kamartidur 1 149.8185 56 83770.27 458.9393
## 3 - luasbangunan 1 155.7576 57 83926.02 457.0545
## 4 - kamarmandi 1 409.6722 58 84335.70 455.3564
##
## Call:
## lm(formula = harga ~ luastanah + umur + dekattol, data = rumah[,
## -1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -102.807 -22.203 -5.158 22.221 119.653
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 197.20521 18.92788 10.419 6.64e-15 ***
## luastanah 1.19095 0.06891 17.282 < 2e-16 ***
## umur -3.82040 0.50982 -7.494 4.34e-10 ***
## dekattol 22.33619 11.39364 1.960 0.0548 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38.13 on 58 degrees of freedom
## Multiple R-squared: 0.8533, Adjusted R-squared: 0.8457
## F-statistic: 112.4 on 3 and 58 DF, p-value: < 2.2e-16
Program 6.6
library(MASS)
awal <- lm(harga ~ 1, data=rumah[,-1])
lengkap <- lm(harga ~ ., data=rumah[,-1])
model.stepwise <- stepAIC(awal, direction="both",
scope=formula(lengkap), trace=0)
model.stepwise$anova
summary(model.stepwise)
Output 6.6
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## harga ~ 1
##
## Final Model:
## harga ~ luastanah + umur + dekattol
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 61 574824.60 568.3505
## 2 + luastanah 1 397226.960 60 177597.64 497.5288
## 3 + umur 1 87673.686 59 89923.95 457.3343
## 4 + dekattol 1 5588.255 58 84335.70 455.3564
##
## Call:
## lm(formula = harga ~ luastanah + umur + dekattol, data = rumah[,
## -1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -102.807 -22.203 -5.158 22.221 119.653
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 197.20521 18.92788 10.419 6.64e-15 ***
## luastanah 1.19095 0.06891 17.282 < 2e-16 ***
## umur -3.82040 0.50982 -7.494 4.34e-10 ***
## dekattol 22.33619 11.39364 1.960 0.0548 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38.13 on 58 degrees of freedom
## Multiple R-squared: 0.8533, Adjusted R-squared: 0.8457
## F-statistic: 112.4 on 3 and 58 DF, p-value: < 2.2e-16
6.4 Recursive Feature Elimination
Program 6.7
library(caret)
library(randomForest)
X = rumah[,2:7]
y = rumah[,8]
control_rfe = rfeControl(functions = rfFuncs,
method = "repeatedcv",
repeats = 5, number = 10)
set.seed(50)
# Performing RFE
result_rfe = rfe(x = X, y = y, sizes = c(1:6),
rfeControl = control_rfe)
# summarising the results
result_rfe
Output 6.7
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 5 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 69.65 0.5441 56.08 23.31 0.2346 19.93
## 2 53.47 0.7466 44.85 17.99 0.1668 15.18
## 3 48.64 0.7864 40.54 15.97 0.1358 13.41
## 4 47.56 0.7895 39.39 15.48 0.1346 12.45 *
## 5 49.19 0.7816 40.76 16.05 0.1326 13.58
## 6 48.66 0.7817 40.14 15.43 0.1329 12.91
##
## The top 4 variables (out of 4):
## luastanah, umur, luasbangunan, kamartidur
6.5 Boruta
Program 6.7
library(Boruta)
hasil.boruta <- Boruta(harga ~ ., data = rumah[,-1],
pValue=0.05, ntree = 1000)
hasil.boruta
Output 6.7
## Boruta performed 14 iterations in 1.229682 secs.
## 5 attributes confirmed important: kamarmandi, kamartidur,
## luasbangunan, luastanah, umur;
## 1 attributes confirmed unimportant: dekattol;