8.1 Permutation Variable Importance
Program 8.1
rumah = read.csv("rumah.csv")
set.seed(50)
ambil = sample(1:nrow(rumah), 40)
datalatih = rumah[ambil,]
datauji = rumah[-ambil,]
modellengkap = lm(harga ~ luasbangunan + umur + kamarmandi + dekattol,
data=datalatih)
RMSEPasli = sqrt(mean((predict(modellengkap, datauji) - datauji$harga)^2))
n.uji = nrow(datauji)
permutasi1 = datauji
permutasi1$luasbangunan = permutasi1[sample(1:n.uji, n.uji),]$luasbangunan
RMSEP1 = sqrt(mean((predict(modellengkap, permutasi1) - datauji$harga)^2))
permutasi2 = datauji
permutasi2$umur = permutasi1[sample(1:n.uji, n.uji),]$umur
RMSEP2 = sqrt(mean((predict(modellengkap, permutasi2) - datauji$harga)^2))
permutasi3 = datauji
permutasi3$kamarmandi = permutasi1[sample(1:n.uji, n.uji),]$kamarmandi
RMSEP3 = sqrt(mean((predict(modellengkap, permutasi3) - datauji$harga)^2))
permutasi4 = datauji
permutasi4$dekattol = permutasi1[sample(1:n.uji, n.uji),]$dekattol
RMSEP4 = sqrt(mean((predict(modellengkap, permutasi4) - datauji$harga)^2))
c(RMSEPasli, RMSEP1-RMSEPasli, RMSEP2-RMSEPasli, RMSEP3-RMSEPasli, RMSEP4-RMSEPasli)
Program 8.2
## parsnip model object
##
## Ranger result
##
## Call:
## ranger::ranger(x = maybe_data_frame(x), y = y, mtry = min_cols(~3, x), num.trees = ~100, min.node.size = min_rows(~1, x), ~seed(25), num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1))
##
## Type: Regression
## Number of trees: 100
## Sample size: 43
## Number of independent variables: 6
## Mtry: 3
## Target node size: 1
## Variable importance mode: none
## Splitrule: variance
## OOB prediction error (MSE): 3574.816
## R squared (OOB): 0.6410432
## Preparation of a new explainer is initiated
## -> model label : model_fit ( default )
## -> data : 19 rows 6 cols
## -> target variable : 19 values
## -> predict function : yhat.model_fit will be used ( default )
## -> predicted values : No value for predict function target column. ( default )
## -> model_info : package parsnip , ver. 1.3.3 , task regression ( default )
## -> predicted values : numerical, min = 248.93 , mean = 318.2562 , max = 532.89
## -> residual function : difference between y and yhat ( default )
## -> residuals : numerical, min = -90.14 , mean = 14.58594 , max = 88.3
## A new explainer has been created!
8.2 Shapley Value Analysis
Program 8.3
rumah = read.csv("rumah.csv")
library(ShapleyValue)
y <- rumah$harga
x <- as.data.frame(rumah[,c(2,4,6,7)])
shapleyvalue(y,x)
Output 8.1
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(lm2): essentially perfect fit: summary may be unreliable
## luastanah kamartidur dekattol harga
## Shapley Value 0.2468 0.2025 0.0056 0.5451
## Standardized Shapley Value 0.2468 0.2025 0.0056 0.5451
SHAP - Shapley Additive Explanation
Program 8.4
library(treeshap)
rumah = read.csv("rumah.csv")
drumah = rumah[,c(2,4,6,7,8)]
library(ranger)
model <- ranger(harga ~ ., data = drumah)
unified <- ranger.unify(model, drumah)
shap_treeshap <- treeshap(unified, drumah[,1:4])
Output 8.2
## luasbangunan umur kamarmandi dekattol
## 48.723065 20.845532 16.771895 2.905601
Gambar 8.2
## luasbangunan umur kamarmandi dekattol
## 48.119051 21.548321 17.234670 2.857204