SOCI832: Week 8: Linear Regression, Examples

0. Code to run to set up your computer.

# Update Packages
update.packages(ask = FALSE, repos='https://cran.csiro.au/', dependencies = TRUE)
# Install Packages
if(!require(dplyr)) {install.packages("dplyr", repos='https://cran.csiro.au/', dependencies=TRUE)}
if(!require(sjlabelled)) {install.packages("sjlabelled", repos='https://cran.csiro.au/', dependencies=TRUE)}
if(!require(sjmisc)) {install.packages("sjmisc", repos='https://cran.csiro.au/', dependencies=TRUE)}
if(!require(sjstats)) {install.packages("sjstats", repos='https://cran.csiro.au/', dependencies=TRUE)}
if(!require(sjPlot)) {install.packages("sjPlot", repos='https://cran.csiro.au/', dependencies=TRUE)}
if(!require(lm.beta)) {install.packages("lm.beta", repos='https://cran.csiro.au/', dependencies=TRUE)}

# Load packages into memory
base::library(dplyr)
base::library(sjlabelled)
base::library(sjmisc)
base::library(sjstats)
base::library(sjPlot)
base::library(lm.beta)

# Turn off scientific notation
options(digits=3, scipen=8) 

# Stop View from overloading memory with a large datasets
RStudioView <- View
View <- function(x) {
  if ("data.frame" %in% class(x)) { RStudioView(x[1:500,]) } else { RStudioView(x) }
}

# Datasets
# Example 1: Crime Dataset
lga <- readRDS(url("https://methods101.com/data/nsw-lga-crime-clean.RDS"))

# Example 2: AuSSA Dataset
aus2012 <- readRDS(url("https://mqsociology.github.io/learn-r/soci832/aussa2012.RDS"))

# Example 3: Australian Electoral Survey
aes_full <- readRDS(gzcon(url("https://mqsociology.github.io/learn-r/soci832/aes_full.rds")))

# Example 4: AES 2013, reduced
elect_2013 <- read.csv(url("https://methods101.com/data/elect_2013.csv"))

Example 1.1: Assault and Migrants

lga %>%
  stats::lm(astnondomviol ~ pctimmig, data = .) %>%
  lm.beta::lm.beta() %>%
  base::summary()
## 
## Call:
## stats::lm(formula = astnondomviol ~ pctimmig, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -374.6 -160.5  -38.3  111.8 1134.8 
## 
## Coefficients:
##             Estimate Standardized Std. Error t value Pr(>|t|)    
## (Intercept)  511.294        0.000     33.969    15.1   <2e-16 ***
## pctimmig      -4.513       -0.259      1.554    -2.9   0.0044 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 247 on 117 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.0672, Adjusted R-squared:  0.0593 
## F-statistic: 8.43 on 1 and 117 DF,  p-value: 0.0044

Example 1.2: Crime and Migrants and Median Income

lga %>%
  stats::lm(astnondomviol ~ pctimmig
    + medinc, data = .) %>%
  lm.beta::lm.beta() %>%
  base::summary()
## 
## Call:
## stats::lm(formula = astnondomviol ~ pctimmig + medinc, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -363.1 -140.4  -31.2  125.3 1128.8 
## 
## Coefficients:
##              Estimate Standardized Std. Error t value    Pr(>|t|)    
## (Intercept) 760.30074      0.00000  129.76942    5.86 0.000000045 ***
## pctimmig     -2.72124     -0.15635    1.78023   -1.53       0.129    
## medinc       -0.00625     -0.20318    0.00315   -1.99       0.049 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 244 on 116 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.0979, Adjusted R-squared:  0.0824 
## F-statistic:  6.3 on 2 and 116 DF,  p-value: 0.00254

Example 1.3: Crime and Migrants and Median Income and Unemployment

lga %>%
  stats::lm(astnondomviol ~ pctimmig
    + medinc
    + unemploy, data = .) %>%
  lm.beta::lm.beta() %>%
  base::summary()
## 
## Call:
## stats::lm(formula = astnondomviol ~ pctimmig + medinc + unemploy, 
##     data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -387.7 -131.7  -42.9  118.1 1140.9 
## 
## Coefficients:
##               Estimate Standardized Std. Error t value  Pr(>|t|)    
## (Intercept) -146.45064      0.00000  222.58664   -0.66   0.51189    
## pctimmig      -6.56554     -0.37724    1.81557   -3.62   0.00045 ***
## medinc         0.00564      0.18335    0.00380    1.49   0.13991    
## unemploy      78.11552      0.51304   16.21148    4.82 0.0000045 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 224 on 115 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.249,  Adjusted R-squared:  0.23 
## F-statistic: 12.7 on 3 and 115 DF,  p-value: 0.000000301

Example 1.4: Crime and Migrants and Median Income and Unemployment and Gini Coefficient

lga %>%
  stats::lm(astnondomviol ~ pctimmig
    + medinc
    + unemploy
    + giniinc, data = .) %>%
  lm.beta::lm.beta() %>%
  base::summary()
## 
## Call:
## stats::lm(formula = astnondomviol ~ pctimmig + medinc + unemploy + 
##     giniinc, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -370.6 -134.1  -33.2  122.4 1125.2 
## 
## Coefficients:
##               Estimate Standardized Std. Error t value  Pr(>|t|)    
## (Intercept) -312.52032      0.00000  267.09512   -1.17   0.24443    
## pctimmig      -6.53146     -0.40571    1.69106   -3.86   0.00019 ***
## medinc         0.00625      0.21782    0.00356    1.76   0.08192 .  
## unemploy      74.87961      0.52766   15.16781    4.94 0.0000028 ***
## giniinc      308.32309      0.06943  370.52935    0.83   0.40710    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 208 on 113 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.251,  Adjusted R-squared:  0.224 
## F-statistic: 9.45 on 4 and 113 DF,  p-value: 0.00000126

Example 1.5: Crime and Lots of Variables

lga %>%
  stats::lm(astnondomviol ~ pctimmig
    + medinc
    + unemploy
    + giniinc
    + pctchris
    + medage
    + pctrent
    + bachelor
    + graduate, data = .) %>%
  lm.beta::lm.beta() %>%
  base::summary()
## 
## Call:
## stats::lm(formula = astnondomviol ~ pctimmig + medinc + unemploy + 
##     giniinc + pctchris + medage + pctrent + bachelor + graduate, 
##     data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -274.8  -93.0  -19.4   71.4  634.9 
## 
## Coefficients:
##               Estimate Standardized Std. Error t value   Pr(>|t|)    
## (Intercept)  918.28546      0.00000  470.84692    1.95    0.05373 .  
## pctimmig     -14.25937     -0.88573    2.74190   -5.20 0.00000095 ***
## medinc        -0.00574     -0.19998    0.00378   -1.52    0.13206    
## unemploy      39.55066      0.27870   13.12671    3.01    0.00322 ** 
## giniinc     1203.77051      0.27105  447.38805    2.69    0.00826 ** 
## pctchris      -5.11587     -0.21449    3.03195   -1.69    0.09443 .  
## medage       -21.08372     -0.45765    5.58640   -3.77    0.00026 ***
## pctrent       17.17607      0.54868    3.54335    4.85 0.00000422 ***
## bachelor      -4.25946     -0.12543   11.18176   -0.38    0.70400    
## graduate      -8.97714     -0.02543   61.38036   -0.15    0.88399    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 152 on 108 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.617,  Adjusted R-squared:  0.585 
## F-statistic: 19.3 on 9 and 108 DF,  p-value: <2e-16

Example 1.6: Crime and Significant Variables from 1.5

lga %>%
  stats::lm(astnondomviol ~ pctimmig
    + unemploy
    + giniinc
    + pctchris
    + medage
    + pctrent, data = .) %>%
  lm.beta::lm.beta() %>%
  base::summary()
## 
## Call:
## stats::lm(formula = astnondomviol ~ pctimmig + unemploy + giniinc + 
##     pctchris + medage + pctrent, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -252.2  -98.1  -21.2   69.8  691.8 
## 
## Coefficients:
##             Estimate Standardized Std. Error t value Pr(>|t|)    
## (Intercept)  419.046        0.000    364.147    1.15  0.25230    
## pctimmig     -15.578       -0.968      1.815   -8.58  6.3e-14 ***
## unemploy      60.666        0.428     10.455    5.80  6.3e-08 ***
## giniinc      855.918        0.193    316.649    2.70  0.00795 ** 
## pctchris      -2.912       -0.122      2.559   -1.14  0.25753    
## medage       -17.697       -0.384      5.162   -3.43  0.00085 ***
## pctrent       16.437        0.525      3.444    4.77  5.6e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 155 on 111 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.592,  Adjusted R-squared:  0.57 
## F-statistic: 26.8 on 6 and 111 DF,  p-value: <2e-16

Example 2.1: Domestic Violence and Lots of Variables

lga %>%
  stats::lm(astdomviol ~ pctimmig
    + medinc
    + unemploy
    + giniinc
    + pctchris
    + medage
    + pctrent
    + bachelor
    + graduate, data = .) %>%
  lm.beta::lm.beta() %>%
  base::summary()
## 
## Call:
## stats::lm(formula = astdomviol ~ pctimmig + medinc + unemploy + 
##     giniinc + pctchris + medage + pctrent + bachelor + graduate, 
##     data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -393.6  -93.2   -9.4   64.6  676.7 
## 
## Coefficients:
##               Estimate Standardized Std. Error t value  Pr(>|t|)    
## (Intercept)  166.89747      0.00000  486.98571    0.34    0.7325    
## pctimmig      -9.20970     -0.56064    2.83588   -3.25    0.0016 ** 
## medinc         0.00155      0.05295    0.00391    0.40    0.6926    
## unemploy      66.81686      0.46144   13.57664    4.92 0.0000031 ***
## giniinc     1449.02748      0.31976  462.72276    3.13    0.0022 ** 
## pctchris       3.02929      0.12447    3.13587    0.97    0.3362    
## medage       -23.61753     -0.50241    5.77788   -4.09 0.0000841 ***
## pctrent        8.13371      0.25464    3.66481    2.22    0.0285 *  
## bachelor     -13.36172     -0.38560   11.56503   -1.16    0.2505    
## graduate      -4.99691     -0.01387   63.48424   -0.08    0.9374    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 158 on 108 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.606,  Adjusted R-squared:  0.573 
## F-statistic: 18.5 on 9 and 108 DF,  p-value: <2e-16
Last updated on 30 September, 2019 by Dr Nicholas Harrigan (nicholas.harrigan@mq.edu.au)