BAB 2. Pemodelan Regresi

2.1 Regresi Linier

Program 2.1

data <- read.csv("data sapi.csv")
data <- data[,-1]
regresi <- lm(Ybbt_badan ~ ., data=data)
regresi

Output 2.1

## 
## Call:
## lm(formula = Ybbt_badan ~ ., data = data)
## 
## Coefficients:
##  (Intercept)   X1pjg_badan   X2ttg_badan   X3lkr_badan    X4pjg_ekor  
##     118.8305        6.1877        4.5001        7.4794        1.1279  
## X5pjg_tanduk  
##       0.1448

Program 2.2

data <- read.csv("data sapi.csv")
data <- data[,-1]

ambil.uji <- sample(1:100, 20)
data.uji <- data[ambil.uji, ]
data.model <- data[-ambil.uji, ]

regresi <- lm(Ybbt_badan ~ ., data=data.model)
prediksi.uji <- predict(regresi, data.uji)
MAPE <- mean(abs(data.uji$Ybbt_badan - prediksi.uji)/
               data.uji$Ybbt_badan *100)
MAPE
## [1] 1.462459

Program 2.3

data <- read.csv("data sapi.csv")
data <- data[,-1]
regresi <- lm(Ybbt_badan ~ ., data=data)
summary(regresi)

Output 2.2

## 
## Call:
## lm(formula = Ybbt_badan ~ ., data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -115.727  -28.822   -0.299   32.171  108.928 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  118.8305    91.9128   1.293    0.199    
## X1pjg_badan    6.1877     0.1552  39.857   <2e-16 ***
## X2ttg_badan    4.5001     0.2917  15.425   <2e-16 ***
## X3lkr_badan    7.4794     0.3046  24.557   <2e-16 ***
## X4pjg_ekor     1.1279     1.5674   0.720    0.474    
## X5pjg_tanduk   0.1448     1.1915   0.122    0.904    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 45.41 on 94 degrees of freedom
## Multiple R-squared:  0.966,  Adjusted R-squared:  0.9642 
## F-statistic: 533.5 on 5 and 94 DF,  p-value: < 2.2e-16

Program 2.4

rumah <- read.csv("rumah.csv")
modelrumah <- lm(harga ~ luasbangunan + luastanah +
                   umur + dekattol, data=rumah)
summary(modelrumah)

library(car)
vif(modelrumah)

Output 2.3

## 
## Call:
## lm(formula = harga ~ luasbangunan + luastanah + umur + dekattol, 
##     data = rumah)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -104.309  -22.753   -3.994   22.168  119.011 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  195.20528   21.03807   9.279 5.43e-13 ***
## luasbangunan  -0.09913    0.43881  -0.226  0.82208    
## luastanah      1.28826    0.43634   2.952  0.00457 ** 
## umur          -3.82505    0.51445  -7.435 5.99e-10 ***
## dekattol      22.24126   11.49569   1.935  0.05799 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 38.45 on 57 degrees of freedom
## Multiple R-squared:  0.8534, Adjusted R-squared:  0.8431 
## F-statistic: 82.96 on 4 and 57 DF,  p-value: < 2.2e-16
## luasbangunan    luastanah         umur     dekattol 
##    41.507084    41.509087     1.025362     1.061228

Output 2.4

## 
## Call:
## lm(formula = harga ~ luasbangunan + umur + dekattol, data = rumah)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -86.602 -26.876  -2.347  22.319 125.661 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  227.02384   19.23260  11.804  < 2e-16 ***
## luasbangunan   1.17990    0.07438  15.863  < 2e-16 ***
## umur          -3.74643    0.54687  -6.851 5.22e-09 ***
## dekattol      22.38818   12.23647   1.830   0.0724 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40.93 on 58 degrees of freedom
## Multiple R-squared:  0.831,  Adjusted R-squared:  0.8223 
## F-statistic: 95.06 on 3 and 58 DF,  p-value: < 2.2e-16
## luasbangunan         umur     dekattol 
##     1.052514     1.022615     1.061208

2.2 Pohon Regresi dan Ensemble-nya

Program 2.5

rumah <- read.csv("rumah.csv")
library(rpart)
pohon <- rpart(harga ~ luasbangunan + 
                   umur + dekattol, data=rumah,
               control = rpart.control(minsplit=20, c=0))

library(rpart.plot)
rpart.plot(pohon, extra=1)
Gambar 2.1

Program 2.6

prediksiharga <- predict(pohon, rumah)
MAPE <- mean(abs(rumah$harga - prediksiharga)/
               rumah$harga *100)
MAPE
## [1] 13.83895
Gambar 2.2

Program 2.7

library(randomForest)
modelRF <- randomForest(harga ~ luasbangunan + 
                          umur + dekattol, data=rumah,
                        ntree=500, mtry=3)
prediksiRF <- predict(modelRF, rumah)
MAPE <- mean(abs(rumah$harga - prediksiRF)/
               rumah$harga *100)
MAPE
## [1] 6.359173

Program 2.8

library(gbm)
grad.boost <- gbm(
  formula = harga ~ luasbangunan + 
    umur + dekattol,
  data = rumah,
  n.trees = 300,
  shrinkage = 0.1,
  interaction.depth = 3,
)
## Distribution not specified, assuming gaussian ...
prediksi.gb <- predict(grad.boost, rumah)
## Using 300 trees...
MAPE <- mean(abs(rumah$harga - prediksi.gb)/
                rumah$harga *100)
MAPE
## [1] 9.828887

2.3 Support Vector Regression (SVR)

Program 2.9

library(ISLR)
Auto <- Auto[which(Auto$horsepower > 60),]
plot(Auto$horsepower, Auto$mpg, pch=19, col="grey90", cex=1.5,
     xlab="horsepower", ylab="miles per gallon")
points(Auto$horsepower, Auto$mpg, col="grey", cex=1.5)

library(e1071)

#SVR linear
modelsvm = svm(mpg ~ horsepower, Auto, kernel="linear")
predYsvm = predict(modelsvm, Auto)
points(Auto$horsepower, predYsvm, col = "red", pch=19, cex=0.5)

#SVR radial
modelsvm = svm(mpg ~ horsepower, Auto, kernel="radial", cost=2)
predYsvm = predict(modelsvm, Auto)
points(Auto$horsepower, predYsvm, col = "blue", pch=19, cex=0.5)

Program 2.9

2.4 Neural Network untuk Pemodelan Regresi

Program 2.10

library(ISLR)
Auto <- Auto[which(Auto$horsepower > 60),]

data <- Auto[,c("mpg", "horsepower")]
maxs <- apply(data, 2, max)  
mins <- apply(data, 2, min) 
scaled <- as.data.frame(scale(data, center = mins,  
                              scale = maxs - mins)) 

library(neuralnet)
nn <- neuralnet(mpg ~ horsepower,  
                data = scaled, hidden = c(3),
                act.fct = "logistic",
                linear.output = TRUE) 

nn$result.matrix

Output 2.5

##                               [,1]
## error                    2.2007578
## reached.threshold        0.0074052
## steps                  164.0000000
## Intercept.to.1layhid1    0.1864301
## horsepower.to.1layhid1  -0.2859610
## Intercept.to.1layhid2    0.4618093
## horsepower.to.1layhid2  -3.8322613
## Intercept.to.1layhid3   -1.0018501
## horsepower.to.1layhid3  -7.0302166
## Intercept.to.mpg        -0.3951037
## 1layhid1.to.mpg          0.9829309
## 1layhid2.to.mpg          0.2932757
## 1layhid3.to.mpg          1.4532635
Gambar 2.6
## NULL

Program 2.11

plot(Auto$horsepower, Auto$mpg, pch=19, col="grey90", cex=1.5,
     xlab="horsepower", ylab="miles per gallon")
points(Auto$horsepower, Auto$mpg, col="grey", cex=1.5)

pr.nn <- compute(nn, scaled)$net.result
prediksi.nn <- pr.nn * (max(Auto$mpg)-min(Auto$mpg)) + min(Auto$mpg)

points(Auto$horsepower, prediksi.nn, col="blue", pch=19, cex=0.5)
Gambar 2.7

Leave a Comment