3.1 Regresi Logistik
Program 3.1
# mengimpor data
dataEHR <- read.csv("data-EHR.csv", sep=",")
# Mengubah peubah “SOURCE” dan “AGE” menjadi factor.
library(dplyr)
dataEHR <- dataEHR %>% mutate_at(c(10,11),factor)
# Memisahkan data menjadi 80% data latih dan 20% data uji.
set.seed(123)
n <- round(0.2*nrow(dataEHR),0)
contoh <- sample(nrow(dataEHR), n, replace = FALSE)
EHRlatih <- dataEHR[-contoh,]
EHRuji <- dataEHR[contoh,]
# Menjalankan Regresi Logistik
logit <- glm(SOURCE~., data=EHRlatih, family=binomial)
# menyajikan ringkasan model
summary(logit)
# menampilkan Akurasi, Sensitivitas dan Spesifisitas
perform <- function(pred,data){
tabel <- caret::confusionMatrix(pred, data$SOURCE, positive =
"in")
result <- c(tabel$overall[1],tabel$byClass[1:2])
return(result)
}
pred.logit <- NULL
pred.logit$pred <- predict(logit, newdata = EHRuji,
type="response")
pred.logit$in_pred <- ifelse(pred.logit$pred > 0.50, "out", "in")
pred.logit$in_pred <- as.factor(pred.logit$in_pred)
data.frame(nilai=perform(pred.logit$in_pred, EHRuji))
Output 3.1
##
## Call:
## glm(formula = SOURCE ~ ., family = binomial, data = EHRlatih)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 12.7040360 9.2818415 1.369 0.1711
## HAEMATOCRIT 0.0087554 0.0605721 0.145 0.8851
## HAEMOGLOBINS 0.0991677 0.2089742 0.475 0.6351
## ERYTHROCYTE 0.5399139 0.4188916 1.289 0.1974
## LEUCOCYTE -0.0850973 0.0094811 -8.976 < 2e-16 ***
## THROMBOCYTE 0.0071632 0.0004233 16.923 < 2e-16 ***
## MCH 0.8117858 0.3244528 2.502 0.0123 *
## MCHC -0.6368730 0.2695991 -2.362 0.0182 *
## MCV -0.2206734 0.1081518 -2.040 0.0413 *
## AGE -0.0048489 0.0020418 -2.375 0.0176 *
## SEXM -0.3694524 0.0828202 -4.461 8.16e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4761.0 on 3529 degrees of freedom
## Residual deviance: 4045.6 on 3519 degrees of freedom
## AIC: 4067.6
##
## Number of Fisher Scoring iterations: 4
## nilai
## Accuracy 0.6882086
## Sensitivity 0.4333333
## Specificity 0.8639847
3.2. Pohon Klasifikasi
Program 3.2
# Memanggil data
dataEHR <- read.csv("data-EHR.csv", sep=",")
# Mengubah peubah “SOURCE” dan “AGE” menjadi factor.
library(dplyr)
dataEHR <- dataEHR %>% mutate_at(c(10,11),factor)
# Memisahkan data menjadi 80% latih dan 20% uji.
set.seed(123)
n <- round(0.2*nrow(dataEHR),0)
contoh <- sample(nrow(dataEHR), n, replace = FALSE)
EHRlatih <- dataEHR[-contoh,]
EHRuji <- dataEHR[contoh,]
# Menjalankan Pohon Klasifikasi
library(rpart)
pohon <- rpart(SOURCE~., data=EHRlatih,
method='class',
control=rpart.control(minsplit = 50, cp=0,
maxdepth = 4))
# Menampilkan Pohon Klasifikasi
library(rpart.plot)
rpart.plot(pohon, type = 2, extra = 101, under = TRUE, cex = 0.8)
Gambar 3.2
3.3. Support Vector Machine untuk Klasifikasi
Program 3.3
# Mengimport data
dataEHR <- read.csv("data-EHR.csv", sep=",")
# Mengubah peubah “SOURCE” dan “AGE” menjadi factor.
library(dplyr)
dataEHR <- dataEHR %>% mutate_at(c(10,11),factor)
# Memisahkan data menjadi 80% latih dan 20% uji.
set.seed(123)
n <- round(0.2*nrow(dataEHR),0)
contoh <- sample(nrow(dataEHR), n, replace = FALSE)
EHRlatih <- dataEHR[-contoh,]
EHRuji <- dataEHR[contoh,]
# Menjalankan fungsi svm dengan berbagai pilihan kernel
library(e1071)
model.linear <- svm(SOURCE~., data=EHRlatih, kernel="linear")
pred.linear <- predict(model.linear, EHRuji)
model.polynomial <- svm(SOURCE~.,data=EHRlatih,kernel="polynomial")
pred.polynomial <- predict(model.polynomial,EHRuji)
model.radial <- svm(SOURCE~., data=EHRlatih, kernel="radial")
pred.radial <- predict(model.radial, EHRuji)
model.sigmoid <- svm(SOURCE~.,data=EHRlatih, kernel="sigmoid")
pred.sigmoid <- predict(model.sigmoid, EHRuji)
# Fungsi mengeluarkan Akurasi, Sensitivitas dan Spesifisitas
perform <- function(pred,data){
tabel <- caret::confusionMatrix(pred, data$SOURCE, positive =
"in")
result <- c(tabel$overall[1],tabel$byClass[1:2])
return(result)
}
# Mengeluarkan output
data.frame(svm.linear=perform(pred.linear, EHRuji),
svm.polynomial=perform(pred.polynomial, EHRuji),
svm.radial=perform(pred.radial, EHRuji),
svm.sigmoid=perform(pred.sigmoid, EHRuji))
## svm.linear svm.polynomial svm.radial svm.sigmoid
## Accuracy 0.6916100 0.7142857 0.7346939 0.6099773
## Sensitivity 0.4250000 0.4416667 0.5138889 0.4666667
## Specificity 0.8754789 0.9022989 0.8869732 0.7088123
3.4. Neural Network untuk Klasifikasi
Program 3.4
# mengimport data
dataEHR <- read.csv("data-EHR.csv", sep=",")
# Mengubah peubah “SOURCE” dan “AGE” menjadi factor.
library(dplyr)
dataEHR <- dataEHR %>% mutate_at(c(10,11),factor)
# membagi data menjadi 80% latih dan 20% uji.
set.seed(123)
n <- round(0.2*nrow(dataEHR),0)
contoh <- sample(nrow(dataEHR), n, replace = FALSE)
EHRlatih <- dataEHR[-contoh,]
EHRuji <- dataEHR[contoh,]
# Mengubah paubah Kategorik menjadi Peubah Dummy
EHRlatih <- model.matrix(
~ SOURCE + SEX + ERYTHROCYTE + HAEMOGLOBINS + HAEMATOCRIT,
data = EHRlatih)
head(EHRlatih)
EHRuji <- model.matrix(
~ SOURCE + SEX + ERYTHROCYTE + HAEMOGLOBINS + HAEMATOCRIT,
data = EHRuji)
Output 3.3
## (Intercept) SOURCEout SEXM ERYTHROCYTE HAEMOGLOBINS HAEMATOCRIT
## 1 1 1 0 4.65 11.8 35.1
## 2 1 1 0 5.39 14.8 43.5
## 3 1 1 0 4.74 11.3 33.5
## 4 1 1 0 4.98 13.7 39.1
## 5 1 1 1 4.23 9.9 30.9
## 6 1 1 1 4.53 11.6 34.3
Program 3.5
# Menjalankan Neural Network
library(caret)
library(neuralnet)
model.nn =neuralnet(SOURCEout ~ HAEMOGLOBINS,
data = EHRlatih,
hidden = c(3),
linear.output = FALSE,
act.fct = "logistic")
model.nn$result.matrix
Output 3.4
## [,1]
## error 3.882273e+02
## reached.threshold 9.943177e-03
## steps 2.450100e+04
## Intercept.to.1layhid1 4.770737e+01
## HAEMOGLOBINS.to.1layhid1 -1.224816e+01
## Intercept.to.1layhid2 3.959587e+00
## HAEMOGLOBINS.to.1layhid2 -4.803760e-01
## Intercept.to.1layhid3 1.532838e+01
## HAEMOGLOBINS.to.1layhid3 -1.887416e+00
## Intercept.to.SOURCEout 1.280384e+00
## 1layhid1.to.SOURCEout 1.609495e+03
## 1layhid2.to.SOURCEout -6.941681e+00
## 1layhid3.to.SOURCEout 3.746391e+00
Program 3.6
# Mengeluarkan Plot Neural Network
plot(model.nn)
Output 3.5
## [,1]
## error 3.879311e+02
## reached.threshold 8.991188e-03
## steps 3.578200e+04
## Intercept.to.1layhid1 -1.379596e+01
## HAEMOGLOBINS.to.1layhid1 1.597750e+00
## Intercept.to.1layhid2 1.059891e+01
## HAEMOGLOBINS.to.1layhid2 -6.412510e-01
## Intercept.to.1layhid3 -1.142406e+01
## HAEMOGLOBINS.to.1layhid3 1.157201e+00
## Intercept.to.SOURCEout 1.847248e+00
## 1layhid1.to.SOURCEout -3.371541e+00
## 1layhid2.to.SOURCEout -1.638484e+00
## 1layhid3.to.SOURCEout 3.716425e+00
Program 3.7
pred.nn <- neuralnet::compute(model.nn, EHRuji)
head(pred.nn$net.result)
Output 3.6
## [,1]
## error 3.882653e+02
## reached.threshold 9.420197e-03
## steps 8.025000e+03
## Intercept.to.1layhid1 1.733201e+01
## HAEMOGLOBINS.to.1layhid1 -2.112837e+00
## Intercept.to.1layhid2 -4.336221e+00
## HAEMOGLOBINS.to.1layhid2 4.963030e-01
## Intercept.to.1layhid3 2.633239e+01
## HAEMOGLOBINS.to.1layhid3 -7.189382e+00
## Intercept.to.SOURCEout -4.594180e+00
## 1layhid1.to.SOURCEout 3.169463e+00
## 1layhid2.to.SOURCEout 5.864738e+00
## 1layhid3.to.SOURCEout 4.695038e+02
## [,1]
## 2463 0.7120028
## 2511 0.7393380
## 2227 0.5650440
## 526 0.6776682
## 4291 0.6191826
## 2986 0.7046642
Program 3.8
# mengkategorikan nilai prediksi peluang menjadi IN atau OUT berdasarkan batasn yang ditentukan.
yhat=data.frame("yhat"=ifelse(pred.nn$net.result >= 0.5,"1", "0"))
# menghasilkan confusion matriks.
confusionMatrix(data = as.factor(yhat$yhat),
reference = as.factor(EHRuji[,2]),
positive = "1")
Output 3.7
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 107 83
## 1 253 439
##
## Accuracy : 0.619
## 95% CI : (0.5861, 0.6512)
## No Information Rate : 0.5918
## P-Value [Acc > NIR] : 0.05331
##
## Kappa : 0.1491
##
## Mcnemar's Test P-Value : < 2e-16
##
## Sensitivity : 0.8410
## Specificity : 0.2972
## Pos Pred Value : 0.6344
## Neg Pred Value : 0.5632
## Prevalence : 0.5918
## Detection Rate : 0.4977
## Detection Prevalence : 0.7846
## Balanced Accuracy : 0.5691
##
## 'Positive' Class : 1
##