0. Code to run to set up your computer.
# Update Packages
update.packages(ask = FALSE, repos='https://cran.csiro.au/', dependencies = TRUE)
# Install Packages
if(!require(dplyr)) {install.packages("dplyr", repos='https://cran.csiro.au/', dependencies=TRUE)}
if(!require(sjlabelled)) {install.packages("sjlabelled", repos='https://cran.csiro.au/', dependencies=TRUE)}
if(!require(sjmisc)) {install.packages("sjmisc", repos='https://cran.csiro.au/', dependencies=TRUE)}
if(!require(sjstats)) {install.packages("sjstats", repos='https://cran.csiro.au/', dependencies=TRUE)}
if(!require(sjPlot)) {install.packages("sjPlot", repos='https://cran.csiro.au/', dependencies=TRUE)}
if(!require(lm.beta)) {install.packages("lm.beta", repos='https://cran.csiro.au/', dependencies=TRUE)}
# Load packages into memory
base::library(dplyr)
base::library(sjlabelled)
base::library(sjmisc)
base::library(sjstats)
base::library(sjPlot)
base::library(lm.beta)
# Turn off scientific notation
options(digits=3, scipen=8)
# Stop View from overloading memory with a large datasets
RStudioView <- View
View <- function(x) {
if ("data.frame" %in% class(x)) { RStudioView(x[1:500,]) } else { RStudioView(x) }
}
# Datasets
# Example 1: Crime Dataset
lga <- readRDS(url("https://methods101.com/data/nsw-lga-crime-clean.RDS"))
# Example 2: AuSSA Dataset
aus2012 <- readRDS(url("https://mqsociology.github.io/learn-r/soci832/aussa2012.RDS"))
# Example 3: Australian Electoral Survey
aes_full <- readRDS(gzcon(url("https://mqsociology.github.io/learn-r/soci832/aes_full.rds")))
# Example 4: AES 2013, reduced
elect_2013 <- read.csv(url("https://methods101.com/data/elect_2013.csv"))
Example 1.1: Assault and Migrants
lga %>%
stats::lm(astnondomviol ~ pctimmig, data = .) %>%
lm.beta::lm.beta() %>%
base::summary()
##
## Call:
## stats::lm(formula = astnondomviol ~ pctimmig, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -374.6 -160.5 -38.3 111.8 1134.8
##
## Coefficients:
## Estimate Standardized Std. Error t value Pr(>|t|)
## (Intercept) 511.294 0.000 33.969 15.1 <2e-16 ***
## pctimmig -4.513 -0.259 1.554 -2.9 0.0044 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 247 on 117 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.0672, Adjusted R-squared: 0.0593
## F-statistic: 8.43 on 1 and 117 DF, p-value: 0.0044
Example 1.5: Crime and Lots of Variables
lga %>%
stats::lm(astnondomviol ~ pctimmig
+ medinc
+ unemploy
+ giniinc
+ pctchris
+ medage
+ pctrent
+ bachelor
+ graduate, data = .) %>%
lm.beta::lm.beta() %>%
base::summary()
##
## Call:
## stats::lm(formula = astnondomviol ~ pctimmig + medinc + unemploy +
## giniinc + pctchris + medage + pctrent + bachelor + graduate,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -274.8 -93.0 -19.4 71.4 634.9
##
## Coefficients:
## Estimate Standardized Std. Error t value Pr(>|t|)
## (Intercept) 918.28546 0.00000 470.84692 1.95 0.05373 .
## pctimmig -14.25937 -0.88573 2.74190 -5.20 0.00000095 ***
## medinc -0.00574 -0.19998 0.00378 -1.52 0.13206
## unemploy 39.55066 0.27870 13.12671 3.01 0.00322 **
## giniinc 1203.77051 0.27105 447.38805 2.69 0.00826 **
## pctchris -5.11587 -0.21449 3.03195 -1.69 0.09443 .
## medage -21.08372 -0.45765 5.58640 -3.77 0.00026 ***
## pctrent 17.17607 0.54868 3.54335 4.85 0.00000422 ***
## bachelor -4.25946 -0.12543 11.18176 -0.38 0.70400
## graduate -8.97714 -0.02543 61.38036 -0.15 0.88399
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 152 on 108 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.617, Adjusted R-squared: 0.585
## F-statistic: 19.3 on 9 and 108 DF, p-value: <2e-16
Example 1.6: Crime and Significant Variables from 1.5
lga %>%
stats::lm(astnondomviol ~ pctimmig
+ unemploy
+ giniinc
+ pctchris
+ medage
+ pctrent, data = .) %>%
lm.beta::lm.beta() %>%
base::summary()
##
## Call:
## stats::lm(formula = astnondomviol ~ pctimmig + unemploy + giniinc +
## pctchris + medage + pctrent, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -252.2 -98.1 -21.2 69.8 691.8
##
## Coefficients:
## Estimate Standardized Std. Error t value Pr(>|t|)
## (Intercept) 419.046 0.000 364.147 1.15 0.25230
## pctimmig -15.578 -0.968 1.815 -8.58 6.3e-14 ***
## unemploy 60.666 0.428 10.455 5.80 6.3e-08 ***
## giniinc 855.918 0.193 316.649 2.70 0.00795 **
## pctchris -2.912 -0.122 2.559 -1.14 0.25753
## medage -17.697 -0.384 5.162 -3.43 0.00085 ***
## pctrent 16.437 0.525 3.444 4.77 5.6e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 155 on 111 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.592, Adjusted R-squared: 0.57
## F-statistic: 26.8 on 6 and 111 DF, p-value: <2e-16
Example 2.1: Domestic Violence and Lots of Variables
lga %>%
stats::lm(astdomviol ~ pctimmig
+ medinc
+ unemploy
+ giniinc
+ pctchris
+ medage
+ pctrent
+ bachelor
+ graduate, data = .) %>%
lm.beta::lm.beta() %>%
base::summary()
##
## Call:
## stats::lm(formula = astdomviol ~ pctimmig + medinc + unemploy +
## giniinc + pctchris + medage + pctrent + bachelor + graduate,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -393.6 -93.2 -9.4 64.6 676.7
##
## Coefficients:
## Estimate Standardized Std. Error t value Pr(>|t|)
## (Intercept) 166.89747 0.00000 486.98571 0.34 0.7325
## pctimmig -9.20970 -0.56064 2.83588 -3.25 0.0016 **
## medinc 0.00155 0.05295 0.00391 0.40 0.6926
## unemploy 66.81686 0.46144 13.57664 4.92 0.0000031 ***
## giniinc 1449.02748 0.31976 462.72276 3.13 0.0022 **
## pctchris 3.02929 0.12447 3.13587 0.97 0.3362
## medage -23.61753 -0.50241 5.77788 -4.09 0.0000841 ***
## pctrent 8.13371 0.25464 3.66481 2.22 0.0285 *
## bachelor -13.36172 -0.38560 11.56503 -1.16 0.2505
## graduate -4.99691 -0.01387 63.48424 -0.08 0.9374
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 158 on 108 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.606, Adjusted R-squared: 0.573
## F-statistic: 18.5 on 9 and 108 DF, p-value: <2e-16
Last updated on 30 September, 2019 by Dr Nicholas Harrigan (nicholas.harrigan@mq.edu.au)