setwd("/Users/zbigniewkarpinski/Dropbox/Prace pisemne/Trust and prudence/")
p <- read.table( "pgss08.csv", header = TRUE, sep = ";", dec = ",")
for (i in names( subset(p, select = g5a:g5k)) ) p[,i] <- ifelse( !p[,i] %in% 1:7, NA, p[,i] )
# Extracting the trust items
trust <- p[ ,c("g5a", "g5b", "g5d", "g5g", "g5h", "g5k") ]
# Extracting the prudence items
prudence <- p[ ,c("g5c", "g5e", "g5i", "g5j") ]
# Computing the indices of trust and prodence
p$s.trust <- rowMeans( trust )
p$s.prud <- rowMeans( prudence )
# The histogram of the trust index
hist( p$s.trust, col = "darkgreen", border = "white", main = "The histogram of trust", xlab = "The trust index", ylab = "Frequency", font.lab = 2 )
# The histogram of the index of prudence
hist( p$s.prud, col = "darkgreen", border = "white", main = "The histogram of prudence", xlab = "The index of prudence", ylab = "Frequency", font.lab = 2 )
# Specification of the basic model
null <- lm( formula = s.trust ~ s.prud, data = p )
summary(null)
coef(null)
confint(null)
head( model.frame(null) )
nrow( model.frame(null) )
nrow( p )
# Predicted vs. observed values of the trust index
plot( predict(null) ~ p$s.trust, col = "darkgreen", main = "Comparison of the predicted with the observed\nvalues of the trust index", xlab = "Observation", ylab = "Prediction" )
y <- predict(null)
x <- model.frame(null)[,"s.trust"]
plot( y ~ x, pch = 16, col = "darkgreen", main = "Comparison of the predicted with the observed\nvalues of the trust index", xlab = "Observation", ylab = "Prediction" )
# Predicted values of the trust index vs. observed values of the index of prudence
z <- model.frame(null)[,"s.prud"]
plot( y ~ z, type = "o", pch = 16, col = "darkgreen", main = "Comparison of the predicted values of trust\nwith the observed values values of prudence", xlab = "Observed prudence", ylab = "Predicted trust" )
# Adding a regression line to a scatterplot of trust by prudence
plot( x ~ z, pch = 0, col = "darkgreen", main = "Regression line", xlab = "Prudence", ylab = "Trust" )
lines( y ~ z, lwd = 2, col = "orangered1")
# Alternatively
plot( x ~ z, pch = 0, col = "darkgreen", main = "Regression line", xlab = "Prudence", ylab = "Trust" )
abline( a = coef(null)[1], b = coef(null)[2], lwd = 2, col = "orangered1" )
summary(null)$r.squared
# Specification of Model 1
mod1 <- lm( s.trust ~ s.prud + q9age, data = p )
mod1 <- update( mod1, .~. + q9age )
summary(mod1)
# Comparison of Model 1 with the basic model
anova( null, mod1 )
# Categorical independent variable
p$degree <- p$q131a1
p$degree <- with( p, replace( degree, which(degree < 5), 1 ) ) # below secondary
p$degree <- with( p, replace( degree, which(degree %in% 5:7), 2 ) ) # secondary
p$degree <- with( p, replace( degree, which(degree > 7), 3 ) ) # above secondary
p$degree <- factor( p$degree, levels = 1:3, labels = c("below secondary", "secondary", "above secondary") )
mod2 <- lm( s.trust ~ s.prud + degree, data = p )
summary(mod2)
# Changing the reference category
contrasts(p$degree) <- contr.treatment( levels(p$degree), base = 2 )
mod2 <- lm( s.trust ~ s.prud + degree, data = p )
summary(mod2)
coef(mod2)
# Alternatively
plot( model.frame(mod2)[,"s.trust"] ~ model.frame(mod2)[,"s.prud"], pch = 0, col = "darkgreen", main = "Regression lines", xlab = "Prudence", ylab = "Trust" )
abline( a = coef(mod2)[1], b = coef(null)[2], lwd = 2, col = "orangered1" )
abline( a = coef(mod2)[1] + coef(mod2)[3], b = coef(null)[2], lwd = 2, lty = 2, col = "orangered1" )
abline( a = coef(mod2)[1] + coef(mod2)[4], b = coef(null)[2], lwd = 2, lty = 3, col = "orangered1" )
legend( "topleft", legend = c("below secondary", "secondary", "above secondary"), lty = c(2, 1, 3), col = "orangered1", lwd = 2 )
# In-class work
# 1. The variable q8 in the dataset is denoted 1 for men and 2 for women. Transform the variable into factor and use it in a regression model that has s.trust as a dependent variable, and s.prud, degree, and q8 as independent variables.
# 2. Summarise the results. Who belongs to the reference category? Are women different than men in terms of the average level of trust?
# 3. Compare your model with model 2. Interpret the output.
# 4. Using the model frame of your model create a scatterplot of s.trust vs. s.prud. Add two regression lines: one for men with the lowest degree of education and another for women with the highest degree of education.