BAB 8. Analisis Tingkat Kepentingan Variabel

8.1 Permutation Variable Importance

Program 8.1

rumah = read.csv("rumah.csv")

set.seed(50)
ambil = sample(1:nrow(rumah), 40)
datalatih = rumah[ambil,]
datauji = rumah[-ambil,]

modellengkap = lm(harga ~ luasbangunan + umur + kamarmandi + dekattol,
                  data=datalatih)
RMSEPasli = sqrt(mean((predict(modellengkap, datauji) - datauji$harga)^2))

n.uji = nrow(datauji)
permutasi1 = datauji
permutasi1$luasbangunan = permutasi1[sample(1:n.uji, n.uji),]$luasbangunan
RMSEP1 = sqrt(mean((predict(modellengkap, permutasi1) - datauji$harga)^2))

permutasi2 = datauji
permutasi2$umur = permutasi1[sample(1:n.uji, n.uji),]$umur
RMSEP2 = sqrt(mean((predict(modellengkap, permutasi2) - datauji$harga)^2))

permutasi3 = datauji
permutasi3$kamarmandi = permutasi1[sample(1:n.uji, n.uji),]$kamarmandi
RMSEP3 = sqrt(mean((predict(modellengkap, permutasi3) - datauji$harga)^2))

permutasi4 = datauji
permutasi4$dekattol = permutasi1[sample(1:n.uji, n.uji),]$dekattol
RMSEP4 = sqrt(mean((predict(modellengkap, permutasi4) - datauji$harga)^2))

c(RMSEPasli, RMSEP1-RMSEPasli, RMSEP2-RMSEPasli, RMSEP3-RMSEPasli, RMSEP4-RMSEPasli)

Program 8.2

## parsnip model object
## 
## Ranger result
## 
## Call:
##  ranger::ranger(x = maybe_data_frame(x), y = y, mtry = min_cols(~3,      x), num.trees = ~100, min.node.size = min_rows(~1, x), ~seed(25),      num.threads = 1, verbose = FALSE, seed = sample.int(10^5,          1)) 
## 
## Type:                             Regression 
## Number of trees:                  100 
## Sample size:                      43 
## Number of independent variables:  6 
## Mtry:                             3 
## Target node size:                 1 
## Variable importance mode:         none 
## Splitrule:                        variance 
## OOB prediction error (MSE):       3574.816 
## R squared (OOB):                  0.6410432
## Preparation of a new explainer is initiated
##   -> model label       :  model_fit  (  default  )
##   -> data              :  19  rows  6  cols 
##   -> target variable   :  19  values 
##   -> predict function  :  yhat.model_fit  will be used (  default  )
##   -> predicted values  :  No value for predict function target column. (  default  )
##   -> model_info        :  package parsnip , ver. 1.3.3 , task regression (  default  ) 
##   -> predicted values  :  numerical, min =  248.93 , mean =  318.2562 , max =  532.89  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -90.14 , mean =  14.58594 , max =  88.3  
##   A new explainer has been created!

8.2 Shapley Value Analysis

Program 8.3

rumah = read.csv("rumah.csv")

library(ShapleyValue)
y <- rumah$harga
x <- as.data.frame(rumah[,c(2,4,6,7)])
shapleyvalue(y,x)

Output 8.1

## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
##                            luastanah kamartidur dekattol  harga
## Shapley Value                 0.2468     0.2025   0.0056 0.5451
## Standardized Shapley Value    0.2468     0.2025   0.0056 0.5451

SHAP - Shapley Additive Explanation

Program 8.4

library(treeshap)
rumah = read.csv("rumah.csv")
drumah = rumah[,c(2,4,6,7,8)]

library(ranger)
model <- ranger(harga ~ ., data = drumah)
unified <- ranger.unify(model, drumah)
shap_treeshap <- treeshap(unified,  drumah[,1:4])

Output 8.2

## luasbangunan         umur   kamarmandi     dekattol 
##    48.723065    20.845532    16.771895     2.905601

Gambar 8.2

## luasbangunan         umur   kamarmandi     dekattol 
##    48.119051    21.548321    17.234670     2.857204

Leave a Comment