BAB 6. Metode Seleksi secara Wrapper dalam Pemodelan

6.2 Best Subset

Program 6.1

best = leaps(x=rumah[,2:7], y=rumah[,8], 
             method="adjr2", nbest=2)
hasil = data.frame(size=best$size-1, best$which, adjr2=best$adjr2*100)
hasil = hasil[order(hasil[,8], decreasing=TRUE),]
row.names(hasil) = 1:nrow(hasil)
hasil

Output 6.1

##    size    X1    X2    X3    X4    X5    X6    adjr2
## 1     3 FALSE  TRUE  TRUE FALSE FALSE  TRUE 84.56957
## 2     4 FALSE  TRUE  TRUE FALSE  TRUE  TRUE 84.37513
## 3     4 FALSE  TRUE  TRUE  TRUE FALSE  TRUE 84.31679
## 4     5  TRUE  TRUE  TRUE FALSE  TRUE  TRUE 84.12564
## 5     5 FALSE  TRUE  TRUE  TRUE  TRUE  TRUE 84.10471
## 6     6  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE 83.86592
## 7     2 FALSE  TRUE  TRUE FALSE FALSE FALSE 83.82598
## 8     3  TRUE  TRUE  TRUE FALSE FALSE FALSE 83.57095
## 9     2  TRUE FALSE  TRUE FALSE FALSE FALSE 81.51850
## 10    1 FALSE  TRUE FALSE FALSE FALSE FALSE 68.58910
## 11    1  TRUE FALSE FALSE FALSE FALSE FALSE 66.86694

Program 6.2

models <- regsubsets(harga ~., data = rumah[,-1], 
                     nvmax = 6, method="exhaustive")
summary(models)
hasil = summary(models)
terbaik = which.max(hasil$adjr2)
(hasil$which)[terbaik,]

Output 6.2

## Subset selection object
## Call: regsubsets.formula(harga ~ ., data = rumah[, -1], nvmax = 6, 
##     method = "exhaustive")
## 6 Variables  (and intercept)
##              Forced in Forced out
## luasbangunan     FALSE      FALSE
## luastanah        FALSE      FALSE
## umur             FALSE      FALSE
## kamartidur       FALSE      FALSE
## kamarmandi       FALSE      FALSE
## dekattol         FALSE      FALSE
## 1 subsets of each size up to 6
## Selection Algorithm: exhaustive
##          luasbangunan luastanah umur kamartidur kamarmandi dekattol
## 1  ( 1 ) " "          "*"       " "  " "        " "        " "     
## 2  ( 1 ) " "          "*"       "*"  " "        " "        " "     
## 3  ( 1 ) " "          "*"       "*"  " "        " "        "*"     
## 4  ( 1 ) " "          "*"       "*"  " "        "*"        "*"     
## 5  ( 1 ) "*"          "*"       "*"  " "        "*"        "*"     
## 6  ( 1 ) "*"          "*"       "*"  "*"        "*"        "*"

Output 6.3

##  (Intercept) luasbangunan    luastanah         umur   kamartidur   kamarmandi 
##         TRUE        FALSE         TRUE         TRUE        FALSE        FALSE 
##     dekattol 
##         TRUE

6.3 Metode Sekuensial (Forward, Backward, Stepwise)

Program 6.4

library(MASS)
awal <- lm(harga ~ 1, data=rumah[,-1])
lengkap <- lm(harga ~ ., data=rumah[,-1])
model.forward <- stepAIC(awal, direction="forward", 
                         scope=formula(lengkap), trace=0)
model.forward$anova
summary(model.forward)

Output 6.4

## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## harga ~ 1
## 
## Final Model:
## harga ~ luastanah + umur + dekattol
## 
## 
##          Step Df   Deviance Resid. Df Resid. Dev      AIC
## 1                                  61  574824.60 568.3505
## 2 + luastanah  1 397226.960        60  177597.64 497.5288
## 3      + umur  1  87673.686        59   89923.95 457.3343
## 4  + dekattol  1   5588.255        58   84335.70 455.3564
## 
## Call:
## lm(formula = harga ~ luastanah + umur + dekattol, data = rumah[, 
##     -1])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -102.807  -22.203   -5.158   22.221  119.653 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 197.20521   18.92788  10.419 6.64e-15 ***
## luastanah     1.19095    0.06891  17.282  < 2e-16 ***
## umur         -3.82040    0.50982  -7.494 4.34e-10 ***
## dekattol     22.33619   11.39364   1.960   0.0548 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 38.13 on 58 degrees of freedom
## Multiple R-squared:  0.8533, Adjusted R-squared:  0.8457 
## F-statistic: 112.4 on 3 and 58 DF,  p-value: < 2.2e-16

Program 6.5

library(MASS)
lengkap <- lm(harga ~ ., data=rumah[,-1])
model.backward <- stepAIC(lengkap, direction="backward", 
                         scope=formula(lengkap), trace=0)
model.backward$anova
summary(model.backward)

Output 6.5

## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## harga ~ luasbangunan + luastanah + umur + kamartidur + kamarmandi + 
##     dekattol
## 
## Final Model:
## harga ~ luastanah + umur + dekattol
## 
## 
##             Step Df Deviance Resid. Df Resid. Dev      AIC
## 1                                   55   83620.45 460.8284
## 2   - kamartidur  1 149.8185        56   83770.27 458.9393
## 3 - luasbangunan  1 155.7576        57   83926.02 457.0545
## 4   - kamarmandi  1 409.6722        58   84335.70 455.3564
## 
## Call:
## lm(formula = harga ~ luastanah + umur + dekattol, data = rumah[, 
##     -1])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -102.807  -22.203   -5.158   22.221  119.653 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 197.20521   18.92788  10.419 6.64e-15 ***
## luastanah     1.19095    0.06891  17.282  < 2e-16 ***
## umur         -3.82040    0.50982  -7.494 4.34e-10 ***
## dekattol     22.33619   11.39364   1.960   0.0548 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 38.13 on 58 degrees of freedom
## Multiple R-squared:  0.8533, Adjusted R-squared:  0.8457 
## F-statistic: 112.4 on 3 and 58 DF,  p-value: < 2.2e-16

Program 6.6

library(MASS)
awal <- lm(harga ~ 1, data=rumah[,-1])
lengkap <- lm(harga ~ ., data=rumah[,-1])
model.stepwise <- stepAIC(awal, direction="both", 
                         scope=formula(lengkap), trace=0)
model.stepwise$anova
summary(model.stepwise)

Output 6.6

## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## harga ~ 1
## 
## Final Model:
## harga ~ luastanah + umur + dekattol
## 
## 
##          Step Df   Deviance Resid. Df Resid. Dev      AIC
## 1                                  61  574824.60 568.3505
## 2 + luastanah  1 397226.960        60  177597.64 497.5288
## 3      + umur  1  87673.686        59   89923.95 457.3343
## 4  + dekattol  1   5588.255        58   84335.70 455.3564
## 
## Call:
## lm(formula = harga ~ luastanah + umur + dekattol, data = rumah[, 
##     -1])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -102.807  -22.203   -5.158   22.221  119.653 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 197.20521   18.92788  10.419 6.64e-15 ***
## luastanah     1.19095    0.06891  17.282  < 2e-16 ***
## umur         -3.82040    0.50982  -7.494 4.34e-10 ***
## dekattol     22.33619   11.39364   1.960   0.0548 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 38.13 on 58 degrees of freedom
## Multiple R-squared:  0.8533, Adjusted R-squared:  0.8457 
## F-statistic: 112.4 on 3 and 58 DF,  p-value: < 2.2e-16

6.4 Recursive Feature Elimination

Program 6.7

library(caret)
library(randomForest)
X = rumah[,2:7]
y = rumah[,8]

control_rfe = rfeControl(functions = rfFuncs,
                         method = "repeatedcv",
                         repeats = 5, number = 10)
set.seed(50)
# Performing RFE
result_rfe = rfe(x = X, y = y, sizes = c(1:6),
                 rfeControl = control_rfe)

# summarising the results
result_rfe

Output 6.7

## 
## Recursive feature selection
## 
## Outer resampling method: Cross-Validated (10 fold, repeated 5 times) 
## 
## Resampling performance over subset size:
## 
##  Variables  RMSE Rsquared   MAE RMSESD RsquaredSD MAESD Selected
##          1 69.65   0.5441 56.08  23.31     0.2346 19.93         
##          2 53.47   0.7466 44.85  17.99     0.1668 15.18         
##          3 48.64   0.7864 40.54  15.97     0.1358 13.41         
##          4 47.56   0.7895 39.39  15.48     0.1346 12.45        *
##          5 49.19   0.7816 40.76  16.05     0.1326 13.58         
##          6 48.66   0.7817 40.14  15.43     0.1329 12.91         
## 
## The top 4 variables (out of 4):
##    luastanah, umur, luasbangunan, kamartidur

6.5 Boruta

Program 6.7

library(Boruta)
hasil.boruta <- Boruta(harga ~ ., data = rumah[,-1], 
                       pValue=0.05, ntree = 1000)
hasil.boruta

Output 6.7

## Boruta performed 14 iterations in 1.229682 secs.
##  5 attributes confirmed important: kamarmandi, kamartidur,
## luasbangunan, luastanah, umur;
##  1 attributes confirmed unimportant: dekattol;

Leave a Comment