See https://www.openintro.org/data/index.php?data=loan50 for more information.
# install.packages("openintro")
library(openintro)
## Warning: package 'openintro' was built under R version 4.4.1
## Loading required package: airports
## Warning: package 'airports' was built under R version 4.4.1
## Loading required package: cherryblossom
## Warning: package 'cherryblossom' was built under R version 4.4.1
## Loading required package: usdata
## Warning: package 'usdata' was built under R version 4.4.1
##
## Attaching package: 'openintro'
## The following object is masked from 'package:car':
##
## densityPlot
summary(loan50)
## state emp_length term homeownership annual_income
## CA : 9 Min. : 0.000 Min. :36.00 rent :21 Min. : 28800
## TX : 5 1st Qu.: 2.000 1st Qu.:36.00 mortgage:26 1st Qu.: 55750
## IL : 4 Median : 5.000 Median :36.00 own : 3 Median : 74000
## FL : 3 Mean : 4.896 Mean :42.72 Mean : 86170
## MD : 3 3rd Qu.: 8.250 3rd Qu.:60.00 3rd Qu.: 99500
## NJ : 3 Max. :10.000 Max. :60.00 Max. :325000
## (Other):23 NA's :2
## verified_income debt_to_income total_credit_limit
## : 0 Min. :0.05472 Min. : 15980
## Not Verified :21 1st Qu.:0.26642 1st Qu.: 70526
## Source Verified:20 Median :0.54047 Median :147364
## Verified : 9 Mean :0.72264 Mean :208547
## 3rd Qu.:0.74122 3rd Qu.:299766
## Max. :5.33373 Max. :793009
##
## total_credit_utilized num_cc_carrying_balance loan_purpose
## Min. : 2872 Min. : 1.00 debt_consolidation:23
## 1st Qu.: 25694 1st Qu.: 3.00 credit_card :13
## Median : 48006 Median : 4.00 home_improvement : 5
## Mean : 61547 Mean : 5.06 other : 4
## 3rd Qu.: 76796 3rd Qu.: 6.00 car : 2
## Max. :373361 Max. :14.00 house : 1
## (Other) : 2
## loan_amount grade interest_rate public_record_bankrupt
## Min. : 3000 B :19 Min. : 5.31 Min. :0.00
## 1st Qu.: 7125 A :15 1st Qu.: 7.96 1st Qu.:0.00
## Median :15500 D : 8 Median : 9.93 Median :0.00
## Mean :17083 C : 6 Mean :11.57 Mean :0.08
## 3rd Qu.:24000 E : 2 3rd Qu.:13.71 3rd Qu.:0.00
## Max. :40000 : 0 Max. :26.30 Max. :1.00
## (Other): 0
## loan_status has_second_income total_income
## : 0 Mode :logical Min. : 28800
## Charged Off : 0 FALSE:42 1st Qu.: 60000
## Current :44 TRUE :8 Median : 78750
## Fully Paid : 6 Mean :105221
## In Grace Period : 0 3rd Qu.:119000
## Late (16-30 days) : 0 Max. :325000
## Late (31-120 days): 0
# remove observations with missing data
dat <- loan50[-which(is.na(loan50), arr.ind = TRUE)[,1],]
summary(dat)
## state emp_length term homeownership annual_income
## CA : 9 Min. : 0.000 Min. :36 rent :21 Min. : 28800
## TX : 5 1st Qu.: 2.000 1st Qu.:36 mortgage:24 1st Qu.: 53750
## IL : 4 Median : 5.000 Median :36 own : 3 Median : 74000
## FL : 3 Mean : 4.896 Mean :43 Mean : 86844
## MD : 3 3rd Qu.: 8.250 3rd Qu.:60 3rd Qu.:100000
## NJ : 3 Max. :10.000 Max. :60 Max. :325000
## (Other):21
## verified_income debt_to_income total_credit_limit
## : 0 Min. :0.05472 Min. : 15980
## Not Verified :20 1st Qu.:0.26304 1st Qu.: 68025
## Source Verified:20 Median :0.51771 Median :140373
## Verified : 8 Mean :0.71508 Mean :207315
## 3rd Qu.:0.73271 3rd Qu.:301097
## Max. :5.33373 Max. :793009
##
## total_credit_utilized num_cc_carrying_balance loan_purpose
## Min. : 2872 Min. : 1.000 debt_consolidation:22
## 1st Qu.: 24777 1st Qu.: 3.000 credit_card :12
## Median : 45971 Median : 4.000 home_improvement : 5
## Mean : 61442 Mean : 4.896 other : 4
## 3rd Qu.: 73707 3rd Qu.: 6.000 car : 2
## Max. :373361 Max. :12.000 house : 1
## (Other) : 2
## loan_amount grade interest_rate public_record_bankrupt
## Min. : 3000 B :19 Min. : 5.31 Min. :0.00000
## 1st Qu.: 6875 A :14 1st Qu.: 7.96 1st Qu.:0.00000
## Median :15000 D : 8 Median : 9.93 Median :0.00000
## Mean :16753 C : 6 Mean :11.34 Mean :0.08333
## 3rd Qu.:22500 E : 1 3rd Qu.:12.98 3rd Qu.:0.00000
## Max. :40000 : 0 Max. :24.85 Max. :1.00000
## (Other): 0
## loan_status has_second_income total_income
## : 0 Mode :logical Min. : 28800
## Charged Off : 0 FALSE:40 1st Qu.: 59750
## Current :42 TRUE :8 Median : 80000
## Fully Paid : 6 Mean :106688
## In Grace Period : 0 3rd Qu.:126000
## Late (16-30 days) : 0 Max. :325000
## Late (31-120 days): 0
# correlations for all numeric variables
round(cor(dat[sapply(dat,is.numeric)]),2)
## emp_length term annual_income debt_to_income
## emp_length 1.00 0.22 0.09 -0.01
## term 0.22 1.00 0.11 -0.14
## annual_income 0.09 0.11 1.00 -0.15
## debt_to_income -0.01 -0.14 -0.15 1.00
## total_credit_limit 0.22 0.23 0.69 0.01
## total_credit_utilized -0.01 -0.10 0.07 0.91
## num_cc_carrying_balance -0.01 0.31 0.05 -0.09
## loan_amount 0.11 0.42 0.41 -0.25
## interest_rate -0.04 0.31 -0.20 0.12
## public_record_bankrupt 0.12 0.14 -0.02 -0.03
## total_income 0.02 0.23 0.75 -0.22
## total_credit_limit total_credit_utilized
## emp_length 0.22 -0.01
## term 0.23 -0.10
## annual_income 0.69 0.07
## debt_to_income 0.01 0.91
## total_credit_limit 1.00 0.24
## total_credit_utilized 0.24 1.00
## num_cc_carrying_balance 0.16 -0.09
## loan_amount 0.47 -0.02
## interest_rate -0.12 0.09
## public_record_bankrupt 0.08 -0.07
## total_income 0.64 0.12
## num_cc_carrying_balance loan_amount interest_rate
## emp_length -0.01 0.11 -0.04
## term 0.31 0.42 0.31
## annual_income 0.05 0.41 -0.20
## debt_to_income -0.09 -0.25 0.12
## total_credit_limit 0.16 0.47 -0.12
## total_credit_utilized -0.09 -0.02 0.09
## num_cc_carrying_balance 1.00 0.18 0.18
## loan_amount 0.18 1.00 0.18
## interest_rate 0.18 0.18 1.00
## public_record_bankrupt -0.18 0.02 -0.07
## total_income 0.05 0.56 -0.16
## public_record_bankrupt total_income
## emp_length 0.12 0.02
## term 0.14 0.23
## annual_income -0.02 0.75
## debt_to_income -0.03 -0.22
## total_credit_limit 0.08 0.64
## total_credit_utilized -0.07 0.12
## num_cc_carrying_balance -0.18 0.05
## loan_amount 0.02 0.56
## interest_rate -0.07 -0.16
## public_record_bankrupt 1.00 0.02
## total_income 0.02 1.00
mod1 <- lm(interest_rate ~ ., data=dat)
# vif(mod1) # Here's something really weird and unusual that can happen!
alias(mod1)
## Model :
## interest_rate ~ state + emp_length + term + homeownership + annual_income +
## verified_income + debt_to_income + total_credit_limit + total_credit_utilized +
## num_cc_carrying_balance + loan_purpose + loan_amount + grade +
## public_record_bankrupt + loan_status + has_second_income +
## total_income
##
## Complete :
## (Intercept) stateCA
## has_second_incomeTRUE 95199/7571 19840546/117297
## total_income 30306934278/23059 10191361068654/581459
## stateCT stateFL
## has_second_incomeTRUE 78971870/479479 688915/6328
## total_income 278678749081/16333 1266339675212/112273
## stateHI stateIL
## has_second_incomeTRUE 540787/5313 328917/4811
## total_income 283125777636698/26826789 915431201604373/129151565
## stateIN stateMA
## has_second_incomeTRUE 67537119/842767 2418737/21024
## total_income 15901012156/1911 700686230768/58553
## stateMD stateMI
## has_second_incomeTRUE 6178199/42342 3101137/22609
## total_income 24708086761999/1634225 12595264403531/887003
## stateMO stateMS
## has_second_incomeTRUE 1170633/12620 151065/1514
## total_income 37782863524/3935 210812198234/20395
## stateNE stateNH
## has_second_incomeTRUE 8546/4039 452614/2161
## total_income 33645513481/159906 2080213577895/95831
## stateNJ stateNV
## has_second_incomeTRUE 3000644807/16393881 14873407/265996
## total_income 5471475397001/288516 358046164915/62206
## stateNY stateOH
## has_second_incomeTRUE 129865983/1533145 7405609/100228
## total_income 234159311718/26659 4599416486184/599767
## stateRI stateTX
## has_second_incomeTRUE 456181/2460 8558275/63699
## total_income 26969157570973/1403168 1219625239836/87577
## stateVA stateWI
## has_second_incomeTRUE 1292109/6757 3933579/40567
## total_income 76101730572261/3836180 1778074084369/176563
## stateWV emp_length
## has_second_incomeTRUE 18721268/117389 -349619/49603
## total_income 134745000359/8149 -8886910082/12147
## term homeownershipmortgage
## has_second_incomeTRUE 25903/19529 38142/1103
## total_income 41576441701/303008 322421441511/89477
## homeownershipown annual_income
## has_second_incomeTRUE 466815/3367 0
## total_income 146688270868/10213 -36839815/806593
## verified_incomeSource Verified
## has_second_incomeTRUE -961613/121143
## total_income -88169346236320/107324169
## verified_incomeVerified debt_to_income
## has_second_incomeTRUE -829200/24059 1075/2974
## total_income -1054584178/295 350837458/7617
## total_credit_limit total_credit_utilized
## has_second_incomeTRUE 0 0
## total_income 957432/78563 -106771697/10753957
## num_cc_carrying_balance loan_purposecredit_card
## has_second_incomeTRUE -15818851/2859931 -267459683/2543358
## total_income -50077284268/87425 -35872494989/3292
## loan_purposedebt_consolidation
## has_second_incomeTRUE -1858838/11129
## total_income -9316325092308016/538206855
## loan_purposehome_improvement loan_purposehouse
## has_second_incomeTRUE -6303335/90896 -5195463/25834
## total_income -2668608061695/370796 -498767962367/23913
## loan_purposeother loan_purposerenewable_energy
## has_second_incomeTRUE -377114540/2697511 -2675637/26047
## total_income -3812398358074/263095 -27922459285769/2616756
## loan_purposesmall_business loan_amount
## has_second_incomeTRUE -110914108/1307757 78382/90272153
## total_income -30475131305/3474 5493103/60378
## gradeB gradeC
## has_second_incomeTRUE -9073/208 -36318363/648700
## total_income -7003840055/1546 -2223065707949/382467
## gradeD gradeE
## has_second_incomeTRUE 57813/992 -39972549/460543
## total_income 637732759459/105752 -41362592909/4593
## public_record_bankrupt loan_statusFully Paid
## has_second_incomeTRUE 732991/31187 62916/18787
## total_income 15805682146/6459 11611816486/35007
mod2 <- lm(interest_rate ~ . -state, data=dat)
vif(mod2)
## GVIF Df GVIF^(1/(2*Df))
## emp_length 2.490139 1 1.578017
## term 3.336552 1 1.826623
## homeownership 7.513952 2 1.655645
## annual_income 79.476547 1 8.914962
## verified_income 7.314552 2 1.644549
## debt_to_income 43.789188 1 6.617340
## total_credit_limit 6.252324 1 2.500465
## total_credit_utilized 40.351130 1 6.352254
## num_cc_carrying_balance 2.285321 1 1.511728
## loan_purpose 369.144764 7 1.525356
## loan_amount 2.492539 1 1.578778
## grade 30.140509 4 1.530713
## public_record_bankrupt 2.172461 1 1.473927
## loan_status 2.129092 1 1.459141
## has_second_income 40.128226 1 6.334684
## total_income 118.194154 1 10.871713
mod3 <- lm(interest_rate ~ . -state -loan_purpose, data=dat)
vif(mod3)
## GVIF Df GVIF^(1/(2*Df))
## emp_length 1.522478 1 1.233887
## term 2.002711 1 1.415172
## homeownership 3.685284 2 1.385536
## annual_income 34.535748 1 5.876712
## verified_income 2.392214 2 1.243655
## debt_to_income 32.753669 1 5.723082
## total_credit_limit 4.454640 1 2.110602
## total_credit_utilized 31.138837 1 5.580218
## num_cc_carrying_balance 1.630005 1 1.276716
## loan_amount 2.198457 1 1.482719
## grade 5.264086 4 1.230737
## public_record_bankrupt 1.390159 1 1.179050
## loan_status 1.533236 1 1.238239
## has_second_income 20.298000 1 4.505330
## total_income 47.559665 1 6.896352
mod4 <- lm(interest_rate ~ . -state -loan_purpose -total_income, data=dat)
vif(mod4)
## GVIF Df GVIF^(1/(2*Df))
## emp_length 1.389917 1 1.178948
## term 1.976419 1 1.405852
## homeownership 3.545343 2 1.372191
## annual_income 4.538011 1 2.130261
## verified_income 2.330610 2 1.235570
## debt_to_income 30.838790 1 5.553268
## total_credit_limit 4.406801 1 2.099238
## total_credit_utilized 29.476469 1 5.429224
## num_cc_carrying_balance 1.628448 1 1.276106
## loan_amount 2.196091 1 1.481921
## grade 4.896905 4 1.219664
## public_record_bankrupt 1.389875 1 1.178929
## loan_status 1.529116 1 1.236574
## has_second_income 3.494775 1 1.869432
mod5 <- lm(interest_rate ~ . -state -loan_purpose -total_income -debt_to_income, data=dat)
vif(mod5)
## GVIF Df GVIF^(1/(2*Df))
## emp_length 1.389874 1 1.178929
## term 1.881800 1 1.371787
## homeownership 3.333080 2 1.351174
## annual_income 2.860078 1 1.691176
## verified_income 2.246937 2 1.224328
## total_credit_limit 4.401197 1 2.097903
## total_credit_utilized 1.476120 1 1.214957
## num_cc_carrying_balance 1.628388 1 1.276083
## loan_amount 2.112003 1 1.453273
## grade 4.554768 4 1.208672
## public_record_bankrupt 1.304843 1 1.142297
## loan_status 1.521741 1 1.233588
## has_second_income 1.687785 1 1.299148
par(mfrow=c(2,2))
plot(mod5)
## Warning: not plotting observations with leverage one:
## 15
# observation 45 and possibly 15 seem to be an issue in the model fit
dat[15,]
## # A tibble: 1 × 18
## state emp_length term homeownership annual_income verified_income
## <fct> <dbl> <dbl> <fct> <dbl> <fct>
## 1 TX 2 60 mortgage 98000 Verified
## # ℹ 12 more variables: debt_to_income <dbl>, total_credit_limit <int>,
## # total_credit_utilized <int>, num_cc_carrying_balance <int>,
## # loan_purpose <fct>, loan_amount <int>, grade <fct>, interest_rate <dbl>,
## # public_record_bankrupt <int>, loan_status <fct>, has_second_income <lgl>,
## # total_income <dbl>
dat[45,]
## # A tibble: 1 × 18
## state emp_length term homeownership annual_income verified_income
## <fct> <dbl> <dbl> <fct> <dbl> <fct>
## 1 NV 0 36 mortgage 58500 Verified
## # ℹ 12 more variables: debt_to_income <dbl>, total_credit_limit <int>,
## # total_credit_utilized <int>, num_cc_carrying_balance <int>,
## # loan_purpose <fct>, loan_amount <int>, grade <fct>, interest_rate <dbl>,
## # public_record_bankrupt <int>, loan_status <fct>, has_second_income <lgl>,
## # total_income <dbl>
dat2 <- dat[-c(15,45),]
mod6 <- lm(interest_rate ~ . -state -loan_purpose -total_income -debt_to_income, data=dat2)
plot(mod6)
summary(mod6)
##
## Call:
## lm(formula = interest_rate ~ . - state - loan_purpose - total_income -
## debt_to_income, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.09459 -0.30934 -0.09556 0.35515 1.27983
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.778e+00 6.139e-01 7.783 1.77e-08 ***
## emp_length 1.171e-01 4.038e-02 2.899 0.00719 **
## term 2.771e-02 1.350e-02 2.053 0.04952 *
## homeownershipmortgage -8.272e-01 3.583e-01 -2.309 0.02857 *
## homeownershipown -4.432e-01 5.356e-01 -0.827 0.41497
## annual_income -5.875e-07 3.121e-06 -0.188 0.85205
## verified_incomeSource Verified 4.362e-01 2.719e-01 1.604 0.11990
## verified_incomeVerified 4.034e-01 4.366e-01 0.924 0.36341
## total_credit_limit -1.754e-07 1.357e-06 -0.129 0.89814
## total_credit_utilized 1.730e-06 2.011e-06 0.861 0.39680
## num_cc_carrying_balance 1.204e-01 4.986e-02 2.414 0.02256 *
## loan_amount -1.766e-05 1.588e-05 -1.112 0.27569
## gradeB 3.759e+00 2.990e-01 12.571 4.93e-13 ***
## gradeC 8.042e+00 4.490e-01 17.912 < 2e-16 ***
## gradeD 1.069e+01 4.052e-01 26.394 < 2e-16 ***
## public_record_bankrupt -1.357e+00 4.404e-01 -3.080 0.00460 **
## loan_statusFully Paid 8.104e-01 4.185e-01 1.936 0.06298 .
## has_second_incomeTRUE -4.463e-01 3.963e-01 -1.126 0.26969
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7377 on 28 degrees of freedom
## Multiple R-squared: 0.9789, Adjusted R-squared: 0.966
## F-statistic: 76.29 on 17 and 28 DF, p-value: < 2.2e-16
# remove two variables with p>0.8
mod7 <- lm(interest_rate ~ . -state -loan_purpose -total_income -debt_to_income
-annual_income -total_credit_limit, data=dat2)
anova(mod7, mod6) # very high p-value
## Analysis of Variance Table
##
## Model 1: interest_rate ~ (state + emp_length + term + homeownership +
## annual_income + verified_income + debt_to_income + total_credit_limit +
## total_credit_utilized + num_cc_carrying_balance + loan_purpose +
## loan_amount + grade + public_record_bankrupt + loan_status +
## has_second_income + total_income) - state - loan_purpose -
## total_income - debt_to_income - annual_income - total_credit_limit
## Model 2: interest_rate ~ (state + emp_length + term + homeownership +
## annual_income + verified_income + debt_to_income + total_credit_limit +
## total_credit_utilized + num_cc_carrying_balance + loan_purpose +
## loan_amount + grade + public_record_bankrupt + loan_status +
## has_second_income + total_income) - state - loan_purpose -
## total_income - debt_to_income
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 30 15.302
## 2 28 15.236 2 0.066073 0.0607 0.9412
summary(mod7)
##
## Call:
## lm(formula = interest_rate ~ . - state - loan_purpose - total_income -
## debt_to_income - annual_income - total_credit_limit, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.12772 -0.31439 -0.09806 0.37767 1.29015
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.748e+00 5.569e-01 8.526 1.63e-09 ***
## emp_length 1.198e-01 3.834e-02 3.125 0.00393 **
## term 2.769e-02 1.302e-02 2.126 0.04182 *
## homeownershipmortgage -8.803e-01 2.571e-01 -3.424 0.00180 **
## homeownershipown -4.439e-01 5.176e-01 -0.858 0.39794
## verified_incomeSource Verified 4.317e-01 2.619e-01 1.648 0.10973
## verified_incomeVerified 3.630e-01 4.072e-01 0.892 0.37975
## total_credit_utilized 1.533e-06 1.725e-06 0.888 0.38143
## num_cc_carrying_balance 1.180e-01 4.613e-02 2.559 0.01579 *
## loan_amount -2.034e-05 1.339e-05 -1.519 0.13926
## gradeB 3.770e+00 2.829e-01 13.328 3.88e-14 ***
## gradeC 8.070e+00 4.273e-01 18.884 < 2e-16 ***
## gradeD 1.073e+01 3.797e-01 28.253 < 2e-16 ***
## public_record_bankrupt -1.366e+00 4.177e-01 -3.271 0.00270 **
## loan_statusFully Paid 8.565e-01 3.842e-01 2.229 0.03346 *
## has_second_incomeTRUE -3.839e-01 3.391e-01 -1.132 0.26666
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7142 on 30 degrees of freedom
## Multiple R-squared: 0.9788, Adjusted R-squared: 0.9682
## F-statistic: 92.23 on 15 and 30 DF, p-value: < 2.2e-16
# let's try removing anything with p>0.2
mod8 <- lm(interest_rate ~ . -state -loan_purpose -total_income -debt_to_income
-annual_income -total_credit_limit
-total_credit_utilized -has_second_income, data=dat2)
anova(mod8, mod7)
## Analysis of Variance Table
##
## Model 1: interest_rate ~ (state + emp_length + term + homeownership +
## annual_income + verified_income + debt_to_income + total_credit_limit +
## total_credit_utilized + num_cc_carrying_balance + loan_purpose +
## loan_amount + grade + public_record_bankrupt + loan_status +
## has_second_income + total_income) - state - loan_purpose -
## total_income - debt_to_income - annual_income - total_credit_limit -
## total_credit_utilized - has_second_income
## Model 2: interest_rate ~ (state + emp_length + term + homeownership +
## annual_income + verified_income + debt_to_income + total_credit_limit +
## total_credit_utilized + num_cc_carrying_balance + loan_purpose +
## loan_amount + grade + public_record_bankrupt + loan_status +
## has_second_income + total_income) - state - loan_purpose -
## total_income - debt_to_income - annual_income - total_credit_limit
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 32 16.300
## 2 30 15.302 2 0.99779 0.9781 0.3877
summary(mod8)
##
## Call:
## lm(formula = interest_rate ~ . - state - loan_purpose - total_income -
## debt_to_income - annual_income - total_credit_limit - total_credit_utilized -
## has_second_income, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.10304 -0.35759 -0.07164 0.44116 1.23566
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.961e+00 5.054e-01 9.816 3.56e-11 ***
## emp_length 1.295e-01 3.655e-02 3.542 0.001242 **
## term 2.461e-02 1.281e-02 1.922 0.063600 .
## homeownershipmortgage -9.297e-01 2.544e-01 -3.654 0.000916 ***
## homeownershipown -4.249e-01 5.031e-01 -0.845 0.404642
## verified_incomeSource Verified 4.095e-01 2.591e-01 1.580 0.123896
## verified_incomeVerified 2.645e-01 3.951e-01 0.670 0.507972
## num_cc_carrying_balance 1.152e-01 4.597e-02 2.506 0.017487 *
## loan_amount -2.162e-05 1.318e-05 -1.641 0.110621
## gradeB 3.743e+00 2.818e-01 13.281 1.45e-14 ***
## gradeC 7.958e+00 4.099e-01 19.414 < 2e-16 ***
## gradeD 1.075e+01 3.726e-01 28.852 < 2e-16 ***
## public_record_bankrupt -1.361e+00 4.151e-01 -3.278 0.002522 **
## loan_statusFully Paid 8.448e-01 3.814e-01 2.215 0.033980 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7137 on 32 degrees of freedom
## Multiple R-squared: 0.9774, Adjusted R-squared: 0.9682
## F-statistic: 106.4 on 13 and 32 DF, p-value: < 2.2e-16
mod9 <- lm(interest_rate ~ . -state -loan_purpose -total_income -debt_to_income
-annual_income -total_credit_limit
-total_credit_utilized -has_second_income
-verified_income, data=dat2)
anova(mod9, mod8)
## Analysis of Variance Table
##
## Model 1: interest_rate ~ (state + emp_length + term + homeownership +
## annual_income + verified_income + debt_to_income + total_credit_limit +
## total_credit_utilized + num_cc_carrying_balance + loan_purpose +
## loan_amount + grade + public_record_bankrupt + loan_status +
## has_second_income + total_income) - state - loan_purpose -
## total_income - debt_to_income - annual_income - total_credit_limit -
## total_credit_utilized - has_second_income - verified_income
## Model 2: interest_rate ~ (state + emp_length + term + homeownership +
## annual_income + verified_income + debt_to_income + total_credit_limit +
## total_credit_utilized + num_cc_carrying_balance + loan_purpose +
## loan_amount + grade + public_record_bankrupt + loan_status +
## has_second_income + total_income) - state - loan_purpose -
## total_income - debt_to_income - annual_income - total_credit_limit -
## total_credit_utilized - has_second_income
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 34 17.579
## 2 32 16.300 2 1.2787 1.2552 0.2987
summary(mod9) # R2_adj went down!
##
## Call:
## lm(formula = interest_rate ~ . - state - loan_purpose - total_income -
## debt_to_income - annual_income - total_credit_limit - total_credit_utilized -
## has_second_income - verified_income, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.25937 -0.36741 -0.07513 0.53464 1.36094
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.257e+00 4.674e-01 11.248 5.28e-13 ***
## emp_length 1.204e-01 3.483e-02 3.457 0.001485 **
## term 2.292e-02 1.284e-02 1.785 0.083161 .
## homeownershipmortgage -9.718e-01 2.540e-01 -3.826 0.000532 ***
## homeownershipown -6.153e-01 4.919e-01 -1.251 0.219499
## num_cc_carrying_balance 1.168e-01 4.600e-02 2.540 0.015833 *
## loan_amount -1.774e-05 1.272e-05 -1.394 0.172234
## gradeB 3.674e+00 2.793e-01 13.155 6.78e-15 ***
## gradeC 8.041e+00 4.082e-01 19.699 < 2e-16 ***
## gradeD 1.090e+01 3.629e-01 30.047 < 2e-16 ***
## public_record_bankrupt -1.325e+00 4.176e-01 -3.172 0.003205 **
## loan_statusFully Paid 7.281e-01 3.757e-01 1.938 0.060933 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.719 on 34 degrees of freedom
## Multiple R-squared: 0.9756, Adjusted R-squared: 0.9677
## F-statistic: 123.7 on 11 and 34 DF, p-value: < 2.2e-16
# check diagnostic stuff one more time
plot(mod8)
interaction.plot(x.factor=droplevels(dat2$grade),
trace.factor=dat2$homeownership,
response=dat2$interest_rate)