The data

See https://www.openintro.org/data/index.php?data=loan50 for more information.

# install.packages("openintro")
library(openintro)
## Warning: package 'openintro' was built under R version 4.4.1
## Loading required package: airports
## Warning: package 'airports' was built under R version 4.4.1
## Loading required package: cherryblossom
## Warning: package 'cherryblossom' was built under R version 4.4.1
## Loading required package: usdata
## Warning: package 'usdata' was built under R version 4.4.1
## 
## Attaching package: 'openintro'
## The following object is masked from 'package:car':
## 
##     densityPlot
summary(loan50)
##      state      emp_length          term        homeownership annual_income   
##  CA     : 9   Min.   : 0.000   Min.   :36.00   rent    :21    Min.   : 28800  
##  TX     : 5   1st Qu.: 2.000   1st Qu.:36.00   mortgage:26    1st Qu.: 55750  
##  IL     : 4   Median : 5.000   Median :36.00   own     : 3    Median : 74000  
##  FL     : 3   Mean   : 4.896   Mean   :42.72                  Mean   : 86170  
##  MD     : 3   3rd Qu.: 8.250   3rd Qu.:60.00                  3rd Qu.: 99500  
##  NJ     : 3   Max.   :10.000   Max.   :60.00                  Max.   :325000  
##  (Other):23   NA's   :2                                                       
##         verified_income debt_to_income    total_credit_limit
##                 : 0     Min.   :0.05472   Min.   : 15980    
##  Not Verified   :21     1st Qu.:0.26642   1st Qu.: 70526    
##  Source Verified:20     Median :0.54047   Median :147364    
##  Verified       : 9     Mean   :0.72264   Mean   :208547    
##                         3rd Qu.:0.74122   3rd Qu.:299766    
##                         Max.   :5.33373   Max.   :793009    
##                                                             
##  total_credit_utilized num_cc_carrying_balance             loan_purpose
##  Min.   :  2872        Min.   : 1.00           debt_consolidation:23   
##  1st Qu.: 25694        1st Qu.: 3.00           credit_card       :13   
##  Median : 48006        Median : 4.00           home_improvement  : 5   
##  Mean   : 61547        Mean   : 5.06           other             : 4   
##  3rd Qu.: 76796        3rd Qu.: 6.00           car               : 2   
##  Max.   :373361        Max.   :14.00           house             : 1   
##                                                (Other)           : 2   
##   loan_amount        grade    interest_rate   public_record_bankrupt
##  Min.   : 3000   B      :19   Min.   : 5.31   Min.   :0.00          
##  1st Qu.: 7125   A      :15   1st Qu.: 7.96   1st Qu.:0.00          
##  Median :15500   D      : 8   Median : 9.93   Median :0.00          
##  Mean   :17083   C      : 6   Mean   :11.57   Mean   :0.08          
##  3rd Qu.:24000   E      : 2   3rd Qu.:13.71   3rd Qu.:0.00          
##  Max.   :40000          : 0   Max.   :26.30   Max.   :1.00          
##                  (Other): 0                                         
##              loan_status has_second_income  total_income   
##                    : 0   Mode :logical     Min.   : 28800  
##  Charged Off       : 0   FALSE:42          1st Qu.: 60000  
##  Current           :44   TRUE :8           Median : 78750  
##  Fully Paid        : 6                     Mean   :105221  
##  In Grace Period   : 0                     3rd Qu.:119000  
##  Late (16-30 days) : 0                     Max.   :325000  
##  Late (31-120 days): 0
# remove observations with missing data
dat <- loan50[-which(is.na(loan50), arr.ind = TRUE)[,1],]
summary(dat)
##      state      emp_length          term     homeownership annual_income   
##  CA     : 9   Min.   : 0.000   Min.   :36   rent    :21    Min.   : 28800  
##  TX     : 5   1st Qu.: 2.000   1st Qu.:36   mortgage:24    1st Qu.: 53750  
##  IL     : 4   Median : 5.000   Median :36   own     : 3    Median : 74000  
##  FL     : 3   Mean   : 4.896   Mean   :43                  Mean   : 86844  
##  MD     : 3   3rd Qu.: 8.250   3rd Qu.:60                  3rd Qu.:100000  
##  NJ     : 3   Max.   :10.000   Max.   :60                  Max.   :325000  
##  (Other):21                                                                
##         verified_income debt_to_income    total_credit_limit
##                 : 0     Min.   :0.05472   Min.   : 15980    
##  Not Verified   :20     1st Qu.:0.26304   1st Qu.: 68025    
##  Source Verified:20     Median :0.51771   Median :140373    
##  Verified       : 8     Mean   :0.71508   Mean   :207315    
##                         3rd Qu.:0.73271   3rd Qu.:301097    
##                         Max.   :5.33373   Max.   :793009    
##                                                             
##  total_credit_utilized num_cc_carrying_balance             loan_purpose
##  Min.   :  2872        Min.   : 1.000          debt_consolidation:22   
##  1st Qu.: 24777        1st Qu.: 3.000          credit_card       :12   
##  Median : 45971        Median : 4.000          home_improvement  : 5   
##  Mean   : 61442        Mean   : 4.896          other             : 4   
##  3rd Qu.: 73707        3rd Qu.: 6.000          car               : 2   
##  Max.   :373361        Max.   :12.000          house             : 1   
##                                                (Other)           : 2   
##   loan_amount        grade    interest_rate   public_record_bankrupt
##  Min.   : 3000   B      :19   Min.   : 5.31   Min.   :0.00000       
##  1st Qu.: 6875   A      :14   1st Qu.: 7.96   1st Qu.:0.00000       
##  Median :15000   D      : 8   Median : 9.93   Median :0.00000       
##  Mean   :16753   C      : 6   Mean   :11.34   Mean   :0.08333       
##  3rd Qu.:22500   E      : 1   3rd Qu.:12.98   3rd Qu.:0.00000       
##  Max.   :40000          : 0   Max.   :24.85   Max.   :1.00000       
##                  (Other): 0                                         
##              loan_status has_second_income  total_income   
##                    : 0   Mode :logical     Min.   : 28800  
##  Charged Off       : 0   FALSE:40          1st Qu.: 59750  
##  Current           :42   TRUE :8           Median : 80000  
##  Fully Paid        : 6                     Mean   :106688  
##  In Grace Period   : 0                     3rd Qu.:126000  
##  Late (16-30 days) : 0                     Max.   :325000  
##  Late (31-120 days): 0
# correlations for all numeric variables
round(cor(dat[sapply(dat,is.numeric)]),2) 
##                         emp_length  term annual_income debt_to_income
## emp_length                    1.00  0.22          0.09          -0.01
## term                          0.22  1.00          0.11          -0.14
## annual_income                 0.09  0.11          1.00          -0.15
## debt_to_income               -0.01 -0.14         -0.15           1.00
## total_credit_limit            0.22  0.23          0.69           0.01
## total_credit_utilized        -0.01 -0.10          0.07           0.91
## num_cc_carrying_balance      -0.01  0.31          0.05          -0.09
## loan_amount                   0.11  0.42          0.41          -0.25
## interest_rate                -0.04  0.31         -0.20           0.12
## public_record_bankrupt        0.12  0.14         -0.02          -0.03
## total_income                  0.02  0.23          0.75          -0.22
##                         total_credit_limit total_credit_utilized
## emp_length                            0.22                 -0.01
## term                                  0.23                 -0.10
## annual_income                         0.69                  0.07
## debt_to_income                        0.01                  0.91
## total_credit_limit                    1.00                  0.24
## total_credit_utilized                 0.24                  1.00
## num_cc_carrying_balance               0.16                 -0.09
## loan_amount                           0.47                 -0.02
## interest_rate                        -0.12                  0.09
## public_record_bankrupt                0.08                 -0.07
## total_income                          0.64                  0.12
##                         num_cc_carrying_balance loan_amount interest_rate
## emp_length                                -0.01        0.11         -0.04
## term                                       0.31        0.42          0.31
## annual_income                              0.05        0.41         -0.20
## debt_to_income                            -0.09       -0.25          0.12
## total_credit_limit                         0.16        0.47         -0.12
## total_credit_utilized                     -0.09       -0.02          0.09
## num_cc_carrying_balance                    1.00        0.18          0.18
## loan_amount                                0.18        1.00          0.18
## interest_rate                              0.18        0.18          1.00
## public_record_bankrupt                    -0.18        0.02         -0.07
## total_income                               0.05        0.56         -0.16
##                         public_record_bankrupt total_income
## emp_length                                0.12         0.02
## term                                      0.14         0.23
## annual_income                            -0.02         0.75
## debt_to_income                           -0.03        -0.22
## total_credit_limit                        0.08         0.64
## total_credit_utilized                    -0.07         0.12
## num_cc_carrying_balance                  -0.18         0.05
## loan_amount                               0.02         0.56
## interest_rate                            -0.07        -0.16
## public_record_bankrupt                    1.00         0.02
## total_income                              0.02         1.00

An initial model

Multicollinearity?

mod1 <- lm(interest_rate ~ ., data=dat)
# vif(mod1) # Here's something really weird and unusual that can happen!
alias(mod1)
## Model :
## interest_rate ~ state + emp_length + term + homeownership + annual_income + 
##     verified_income + debt_to_income + total_credit_limit + total_credit_utilized + 
##     num_cc_carrying_balance + loan_purpose + loan_amount + grade + 
##     public_record_bankrupt + loan_status + has_second_income + 
##     total_income
## 
## Complete :
##                       (Intercept)                 stateCA                    
## has_second_incomeTRUE                  95199/7571             19840546/117297
## total_income                    30306934278/23059       10191361068654/581459
##                       stateCT                     stateFL                    
## has_second_incomeTRUE             78971870/479479                 688915/6328
## total_income                   278678749081/16333        1266339675212/112273
##                       stateHI                     stateIL                    
## has_second_incomeTRUE                 540787/5313                 328917/4811
## total_income             283125777636698/26826789   915431201604373/129151565
##                       stateIN                     stateMA                    
## has_second_incomeTRUE             67537119/842767               2418737/21024
## total_income                     15901012156/1911          700686230768/58553
##                       stateMD                     stateMI                    
## has_second_incomeTRUE               6178199/42342               3101137/22609
## total_income               24708086761999/1634225       12595264403531/887003
##                       stateMO                     stateMS                    
## has_second_incomeTRUE               1170633/12620                 151065/1514
## total_income                     37782863524/3935          210812198234/20395
##                       stateNE                     stateNH                    
## has_second_incomeTRUE                   8546/4039                 452614/2161
## total_income                   33645513481/159906         2080213577895/95831
##                       stateNJ                     stateNV                    
## has_second_incomeTRUE         3000644807/16393881             14873407/265996
## total_income                 5471475397001/288516          358046164915/62206
##                       stateNY                     stateOH                    
## has_second_incomeTRUE           129865983/1533145              7405609/100228
## total_income                   234159311718/26659        4599416486184/599767
##                       stateRI                     stateTX                    
## has_second_incomeTRUE                 456181/2460               8558275/63699
## total_income               26969157570973/1403168         1219625239836/87577
##                       stateVA                     stateWI                    
## has_second_incomeTRUE                1292109/6757               3933579/40567
## total_income               76101730572261/3836180        1778074084369/176563
##                       stateWV                     emp_length                 
## has_second_incomeTRUE             18721268/117389               -349619/49603
## total_income                    134745000359/8149           -8886910082/12147
##                       term                        homeownershipmortgage      
## has_second_incomeTRUE                 25903/19529                  38142/1103
## total_income                   41576441701/303008          322421441511/89477
##                       homeownershipown            annual_income              
## has_second_incomeTRUE                 466815/3367                           0
## total_income                   146688270868/10213            -36839815/806593
##                       verified_incomeSource Verified
## has_second_incomeTRUE              -961613/121143   
## total_income            -88169346236320/107324169   
##                       verified_incomeVerified     debt_to_income             
## has_second_incomeTRUE               -829200/24059                   1075/2974
## total_income                      -1054584178/295              350837458/7617
##                       total_credit_limit          total_credit_utilized      
## has_second_incomeTRUE                           0                           0
## total_income                         957432/78563         -106771697/10753957
##                       num_cc_carrying_balance     loan_purposecredit_card    
## has_second_incomeTRUE           -15818851/2859931          -267459683/2543358
## total_income                   -50077284268/87425           -35872494989/3292
##                       loan_purposedebt_consolidation
## has_second_incomeTRUE              -1858838/11129   
## total_income          -9316325092308016/538206855   
##                       loan_purposehome_improvement loan_purposehouse          
## has_second_incomeTRUE              -6303335/90896               -5195463/25834
## total_income                -2668608061695/370796          -498767962367/23913
##                       loan_purposeother           loan_purposerenewable_energy
## has_second_incomeTRUE          -377114540/2697511              -2675637/26047 
## total_income                -3812398358074/263095     -27922459285769/2616756 
##                       loan_purposesmall_business  loan_amount                
## has_second_incomeTRUE          -110914108/1307757              78382/90272153
## total_income                    -30475131305/3474               5493103/60378
##                       gradeB                      gradeC                     
## has_second_incomeTRUE                   -9073/208            -36318363/648700
## total_income                     -7003840055/1546       -2223065707949/382467
##                       gradeD                      gradeE                     
## has_second_incomeTRUE                   57813/992            -39972549/460543
## total_income                  637732759459/105752           -41362592909/4593
##                       public_record_bankrupt      loan_statusFully Paid      
## has_second_incomeTRUE                732991/31187                 62916/18787
## total_income                     15805682146/6459           11611816486/35007
mod2 <- lm(interest_rate ~ . -state, data=dat)
vif(mod2)
##                               GVIF Df GVIF^(1/(2*Df))
## emp_length                2.490139  1        1.578017
## term                      3.336552  1        1.826623
## homeownership             7.513952  2        1.655645
## annual_income            79.476547  1        8.914962
## verified_income           7.314552  2        1.644549
## debt_to_income           43.789188  1        6.617340
## total_credit_limit        6.252324  1        2.500465
## total_credit_utilized    40.351130  1        6.352254
## num_cc_carrying_balance   2.285321  1        1.511728
## loan_purpose            369.144764  7        1.525356
## loan_amount               2.492539  1        1.578778
## grade                    30.140509  4        1.530713
## public_record_bankrupt    2.172461  1        1.473927
## loan_status               2.129092  1        1.459141
## has_second_income        40.128226  1        6.334684
## total_income            118.194154  1       10.871713
mod3 <- lm(interest_rate ~ . -state -loan_purpose, data=dat)
vif(mod3)
##                              GVIF Df GVIF^(1/(2*Df))
## emp_length               1.522478  1        1.233887
## term                     2.002711  1        1.415172
## homeownership            3.685284  2        1.385536
## annual_income           34.535748  1        5.876712
## verified_income          2.392214  2        1.243655
## debt_to_income          32.753669  1        5.723082
## total_credit_limit       4.454640  1        2.110602
## total_credit_utilized   31.138837  1        5.580218
## num_cc_carrying_balance  1.630005  1        1.276716
## loan_amount              2.198457  1        1.482719
## grade                    5.264086  4        1.230737
## public_record_bankrupt   1.390159  1        1.179050
## loan_status              1.533236  1        1.238239
## has_second_income       20.298000  1        4.505330
## total_income            47.559665  1        6.896352
mod4 <- lm(interest_rate ~ . -state -loan_purpose -total_income, data=dat)
vif(mod4)
##                              GVIF Df GVIF^(1/(2*Df))
## emp_length               1.389917  1        1.178948
## term                     1.976419  1        1.405852
## homeownership            3.545343  2        1.372191
## annual_income            4.538011  1        2.130261
## verified_income          2.330610  2        1.235570
## debt_to_income          30.838790  1        5.553268
## total_credit_limit       4.406801  1        2.099238
## total_credit_utilized   29.476469  1        5.429224
## num_cc_carrying_balance  1.628448  1        1.276106
## loan_amount              2.196091  1        1.481921
## grade                    4.896905  4        1.219664
## public_record_bankrupt   1.389875  1        1.178929
## loan_status              1.529116  1        1.236574
## has_second_income        3.494775  1        1.869432
mod5 <- lm(interest_rate ~ . -state -loan_purpose -total_income -debt_to_income, data=dat)
vif(mod5)
##                             GVIF Df GVIF^(1/(2*Df))
## emp_length              1.389874  1        1.178929
## term                    1.881800  1        1.371787
## homeownership           3.333080  2        1.351174
## annual_income           2.860078  1        1.691176
## verified_income         2.246937  2        1.224328
## total_credit_limit      4.401197  1        2.097903
## total_credit_utilized   1.476120  1        1.214957
## num_cc_carrying_balance 1.628388  1        1.276083
## loan_amount             2.112003  1        1.453273
## grade                   4.554768  4        1.208672
## public_record_bankrupt  1.304843  1        1.142297
## loan_status             1.521741  1        1.233588
## has_second_income       1.687785  1        1.299148

Diagnostic Plots

par(mfrow=c(2,2))
plot(mod5)
## Warning: not plotting observations with leverage one:
##   15

# observation 45 and possibly 15 seem to be an issue in the model fit
dat[15,]
## # A tibble: 1 × 18
##   state emp_length  term homeownership annual_income verified_income
##   <fct>      <dbl> <dbl> <fct>                 <dbl> <fct>          
## 1 TX             2    60 mortgage              98000 Verified       
## # ℹ 12 more variables: debt_to_income <dbl>, total_credit_limit <int>,
## #   total_credit_utilized <int>, num_cc_carrying_balance <int>,
## #   loan_purpose <fct>, loan_amount <int>, grade <fct>, interest_rate <dbl>,
## #   public_record_bankrupt <int>, loan_status <fct>, has_second_income <lgl>,
## #   total_income <dbl>
dat[45,]
## # A tibble: 1 × 18
##   state emp_length  term homeownership annual_income verified_income
##   <fct>      <dbl> <dbl> <fct>                 <dbl> <fct>          
## 1 NV             0    36 mortgage              58500 Verified       
## # ℹ 12 more variables: debt_to_income <dbl>, total_credit_limit <int>,
## #   total_credit_utilized <int>, num_cc_carrying_balance <int>,
## #   loan_purpose <fct>, loan_amount <int>, grade <fct>, interest_rate <dbl>,
## #   public_record_bankrupt <int>, loan_status <fct>, has_second_income <lgl>,
## #   total_income <dbl>
dat2 <- dat[-c(15,45),]
mod6 <- lm(interest_rate ~ . -state -loan_purpose -total_income -debt_to_income, data=dat2)
plot(mod6)

The model

summary(mod6)
## 
## Call:
## lm(formula = interest_rate ~ . - state - loan_purpose - total_income - 
##     debt_to_income, data = dat2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.09459 -0.30934 -0.09556  0.35515  1.27983 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     4.778e+00  6.139e-01   7.783 1.77e-08 ***
## emp_length                      1.171e-01  4.038e-02   2.899  0.00719 ** 
## term                            2.771e-02  1.350e-02   2.053  0.04952 *  
## homeownershipmortgage          -8.272e-01  3.583e-01  -2.309  0.02857 *  
## homeownershipown               -4.432e-01  5.356e-01  -0.827  0.41497    
## annual_income                  -5.875e-07  3.121e-06  -0.188  0.85205    
## verified_incomeSource Verified  4.362e-01  2.719e-01   1.604  0.11990    
## verified_incomeVerified         4.034e-01  4.366e-01   0.924  0.36341    
## total_credit_limit             -1.754e-07  1.357e-06  -0.129  0.89814    
## total_credit_utilized           1.730e-06  2.011e-06   0.861  0.39680    
## num_cc_carrying_balance         1.204e-01  4.986e-02   2.414  0.02256 *  
## loan_amount                    -1.766e-05  1.588e-05  -1.112  0.27569    
## gradeB                          3.759e+00  2.990e-01  12.571 4.93e-13 ***
## gradeC                          8.042e+00  4.490e-01  17.912  < 2e-16 ***
## gradeD                          1.069e+01  4.052e-01  26.394  < 2e-16 ***
## public_record_bankrupt         -1.357e+00  4.404e-01  -3.080  0.00460 ** 
## loan_statusFully Paid           8.104e-01  4.185e-01   1.936  0.06298 .  
## has_second_incomeTRUE          -4.463e-01  3.963e-01  -1.126  0.26969    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7377 on 28 degrees of freedom
## Multiple R-squared:  0.9789, Adjusted R-squared:  0.966 
## F-statistic: 76.29 on 17 and 28 DF,  p-value: < 2.2e-16
# remove two variables with p>0.8
mod7 <- lm(interest_rate ~ . -state -loan_purpose -total_income -debt_to_income
           -annual_income -total_credit_limit, data=dat2)
anova(mod7, mod6) # very high p-value
## Analysis of Variance Table
## 
## Model 1: interest_rate ~ (state + emp_length + term + homeownership + 
##     annual_income + verified_income + debt_to_income + total_credit_limit + 
##     total_credit_utilized + num_cc_carrying_balance + loan_purpose + 
##     loan_amount + grade + public_record_bankrupt + loan_status + 
##     has_second_income + total_income) - state - loan_purpose - 
##     total_income - debt_to_income - annual_income - total_credit_limit
## Model 2: interest_rate ~ (state + emp_length + term + homeownership + 
##     annual_income + verified_income + debt_to_income + total_credit_limit + 
##     total_credit_utilized + num_cc_carrying_balance + loan_purpose + 
##     loan_amount + grade + public_record_bankrupt + loan_status + 
##     has_second_income + total_income) - state - loan_purpose - 
##     total_income - debt_to_income
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     30 15.302                           
## 2     28 15.236  2  0.066073 0.0607 0.9412
summary(mod7)
## 
## Call:
## lm(formula = interest_rate ~ . - state - loan_purpose - total_income - 
##     debt_to_income - annual_income - total_credit_limit, data = dat2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.12772 -0.31439 -0.09806  0.37767  1.29015 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     4.748e+00  5.569e-01   8.526 1.63e-09 ***
## emp_length                      1.198e-01  3.834e-02   3.125  0.00393 ** 
## term                            2.769e-02  1.302e-02   2.126  0.04182 *  
## homeownershipmortgage          -8.803e-01  2.571e-01  -3.424  0.00180 ** 
## homeownershipown               -4.439e-01  5.176e-01  -0.858  0.39794    
## verified_incomeSource Verified  4.317e-01  2.619e-01   1.648  0.10973    
## verified_incomeVerified         3.630e-01  4.072e-01   0.892  0.37975    
## total_credit_utilized           1.533e-06  1.725e-06   0.888  0.38143    
## num_cc_carrying_balance         1.180e-01  4.613e-02   2.559  0.01579 *  
## loan_amount                    -2.034e-05  1.339e-05  -1.519  0.13926    
## gradeB                          3.770e+00  2.829e-01  13.328 3.88e-14 ***
## gradeC                          8.070e+00  4.273e-01  18.884  < 2e-16 ***
## gradeD                          1.073e+01  3.797e-01  28.253  < 2e-16 ***
## public_record_bankrupt         -1.366e+00  4.177e-01  -3.271  0.00270 ** 
## loan_statusFully Paid           8.565e-01  3.842e-01   2.229  0.03346 *  
## has_second_incomeTRUE          -3.839e-01  3.391e-01  -1.132  0.26666    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7142 on 30 degrees of freedom
## Multiple R-squared:  0.9788, Adjusted R-squared:  0.9682 
## F-statistic: 92.23 on 15 and 30 DF,  p-value: < 2.2e-16
# let's try removing anything with p>0.2
mod8 <- lm(interest_rate ~ . -state -loan_purpose -total_income -debt_to_income
           -annual_income -total_credit_limit
           -total_credit_utilized -has_second_income, data=dat2)
anova(mod8, mod7)
## Analysis of Variance Table
## 
## Model 1: interest_rate ~ (state + emp_length + term + homeownership + 
##     annual_income + verified_income + debt_to_income + total_credit_limit + 
##     total_credit_utilized + num_cc_carrying_balance + loan_purpose + 
##     loan_amount + grade + public_record_bankrupt + loan_status + 
##     has_second_income + total_income) - state - loan_purpose - 
##     total_income - debt_to_income - annual_income - total_credit_limit - 
##     total_credit_utilized - has_second_income
## Model 2: interest_rate ~ (state + emp_length + term + homeownership + 
##     annual_income + verified_income + debt_to_income + total_credit_limit + 
##     total_credit_utilized + num_cc_carrying_balance + loan_purpose + 
##     loan_amount + grade + public_record_bankrupt + loan_status + 
##     has_second_income + total_income) - state - loan_purpose - 
##     total_income - debt_to_income - annual_income - total_credit_limit
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     32 16.300                           
## 2     30 15.302  2   0.99779 0.9781 0.3877
summary(mod8)
## 
## Call:
## lm(formula = interest_rate ~ . - state - loan_purpose - total_income - 
##     debt_to_income - annual_income - total_credit_limit - total_credit_utilized - 
##     has_second_income, data = dat2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.10304 -0.35759 -0.07164  0.44116  1.23566 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     4.961e+00  5.054e-01   9.816 3.56e-11 ***
## emp_length                      1.295e-01  3.655e-02   3.542 0.001242 ** 
## term                            2.461e-02  1.281e-02   1.922 0.063600 .  
## homeownershipmortgage          -9.297e-01  2.544e-01  -3.654 0.000916 ***
## homeownershipown               -4.249e-01  5.031e-01  -0.845 0.404642    
## verified_incomeSource Verified  4.095e-01  2.591e-01   1.580 0.123896    
## verified_incomeVerified         2.645e-01  3.951e-01   0.670 0.507972    
## num_cc_carrying_balance         1.152e-01  4.597e-02   2.506 0.017487 *  
## loan_amount                    -2.162e-05  1.318e-05  -1.641 0.110621    
## gradeB                          3.743e+00  2.818e-01  13.281 1.45e-14 ***
## gradeC                          7.958e+00  4.099e-01  19.414  < 2e-16 ***
## gradeD                          1.075e+01  3.726e-01  28.852  < 2e-16 ***
## public_record_bankrupt         -1.361e+00  4.151e-01  -3.278 0.002522 ** 
## loan_statusFully Paid           8.448e-01  3.814e-01   2.215 0.033980 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7137 on 32 degrees of freedom
## Multiple R-squared:  0.9774, Adjusted R-squared:  0.9682 
## F-statistic: 106.4 on 13 and 32 DF,  p-value: < 2.2e-16
mod9 <- lm(interest_rate ~ . -state -loan_purpose -total_income -debt_to_income
           -annual_income -total_credit_limit
           -total_credit_utilized -has_second_income
           -verified_income, data=dat2)
anova(mod9, mod8)
## Analysis of Variance Table
## 
## Model 1: interest_rate ~ (state + emp_length + term + homeownership + 
##     annual_income + verified_income + debt_to_income + total_credit_limit + 
##     total_credit_utilized + num_cc_carrying_balance + loan_purpose + 
##     loan_amount + grade + public_record_bankrupt + loan_status + 
##     has_second_income + total_income) - state - loan_purpose - 
##     total_income - debt_to_income - annual_income - total_credit_limit - 
##     total_credit_utilized - has_second_income - verified_income
## Model 2: interest_rate ~ (state + emp_length + term + homeownership + 
##     annual_income + verified_income + debt_to_income + total_credit_limit + 
##     total_credit_utilized + num_cc_carrying_balance + loan_purpose + 
##     loan_amount + grade + public_record_bankrupt + loan_status + 
##     has_second_income + total_income) - state - loan_purpose - 
##     total_income - debt_to_income - annual_income - total_credit_limit - 
##     total_credit_utilized - has_second_income
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     34 17.579                           
## 2     32 16.300  2    1.2787 1.2552 0.2987
summary(mod9) # R2_adj went down!
## 
## Call:
## lm(formula = interest_rate ~ . - state - loan_purpose - total_income - 
##     debt_to_income - annual_income - total_credit_limit - total_credit_utilized - 
##     has_second_income - verified_income, data = dat2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.25937 -0.36741 -0.07513  0.53464  1.36094 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              5.257e+00  4.674e-01  11.248 5.28e-13 ***
## emp_length               1.204e-01  3.483e-02   3.457 0.001485 ** 
## term                     2.292e-02  1.284e-02   1.785 0.083161 .  
## homeownershipmortgage   -9.718e-01  2.540e-01  -3.826 0.000532 ***
## homeownershipown        -6.153e-01  4.919e-01  -1.251 0.219499    
## num_cc_carrying_balance  1.168e-01  4.600e-02   2.540 0.015833 *  
## loan_amount             -1.774e-05  1.272e-05  -1.394 0.172234    
## gradeB                   3.674e+00  2.793e-01  13.155 6.78e-15 ***
## gradeC                   8.041e+00  4.082e-01  19.699  < 2e-16 ***
## gradeD                   1.090e+01  3.629e-01  30.047  < 2e-16 ***
## public_record_bankrupt  -1.325e+00  4.176e-01  -3.172 0.003205 ** 
## loan_statusFully Paid    7.281e-01  3.757e-01   1.938 0.060933 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.719 on 34 degrees of freedom
## Multiple R-squared:  0.9756, Adjusted R-squared:  0.9677 
## F-statistic: 123.7 on 11 and 34 DF,  p-value: < 2.2e-16
# check diagnostic stuff one more time
plot(mod8)

Interactions?

interaction.plot(x.factor=droplevels(dat2$grade), 
                 trace.factor=dat2$homeownership, 
                 response=dat2$interest_rate)