tidycensus
, load in variables on median household income and median housing value from the acs5
2013-2017.library(tidycensus)
library(tidyverse)
library(viridis)
library(plyr)
library(gtools)
census_api_key("Your Key here")
Var<-c("B19013_001","B25077_001")
## c(Median household Income, Median Housing Value)
CenDF <- get_acs(geography = "county",
variables = Var,
year = 2017,
survey = "acs5",
geometry = TRUE,
shift_geo = TRUE)
CenDF<-CenDF %>%
mutate(variable=case_when(
variable=="B19013_001" ~ "HHIncome",
variable=="B25077_001" ~ "HouseValue")) %>%
select(-moe) %>%
spread(variable, estimate) %>% #Spread moves rows into columns
mutate(HHInc_HousePrice_Ratio=round(HouseValue/HHIncome,2))
scale_fill_viridis
to create natural breaks for color sequencing and to mapHouse-Price-to-Income RatioPlot1<- ggplot(CenDF) +
geom_sf(aes(fill = HHInc_HousePrice_Ratio), color=NA) +
coord_sf(datum=NA) +
labs(title = "House-Price-to-Income Ratio \n R Built-in Color Cut",
caption = "Source: ACS 5-year, 2013-2017",
fill = "Price-Income Ratio") +
scale_fill_viridis(direction=-1)
Plot1
R has a variety of functions that allow the user to create color scales easily. These functions generally require you to specify a color on each end of the spectrum and they will interpolate the values between based upon how many levels you desire.
We select colors to communicate information about our data. If we are using a continuous variable the most basic decision is whether we want to represent it as positive and negative deviations from the average (a divergent scale), or as a continuum of low to high values (a sequential scale). If we have categorical data, it is generally visualized through different colors representing each group (a qualitative scale).
plot( 1:7, rep(1,7), ylim=c(-0.5,3.5), xlim=c(0,12), yaxt="n", xaxt="n", bty="n", xlab="", ylab="" )
color.function <- colorRampPalette( c("gray80","darkred") )
col.ramp <- color.function( 7 ) # number of groups you desire
points( 1:7, rep(3,7), pch=15, cex=8, col=col.ramp )
color.function <- colorRampPalette( c("darkred","gray80","steelblue") )
col.ramp <- color.function( 7 ) # number of groups you desire
points( 1:7, rep(2,7), pch=15, cex=8, col=col.ramp )
color.function <- colorRampPalette( c("gray80","black") )
col.ramp <- color.function( 7 ) # number of groups you desire
points( 1:7, rep(1,7), pch=15, cex=8, col=col.ramp )
text( 8, 3, "Sequential", pos=4 )
text( 8, 2, "Divergent", pos=4 )
text( 8, 1, "Grayscale", pos=4 )
HHInc_HousePrice_Ratio
from continuous variable to factor variable with 2 levels## Convert Integer into factor variable
CenDF$fill_factor <- quantcut(CenDF$HHInc_HousePrice_Ratio, q = seq(0, 1, by = 0.5))
CenDF$fill_factor = mapvalues(CenDF$fill_factor, from = levels(CenDF$fill_factor),
to = c("low","high"))
## Assign 2 colors
col.ramp <- viridis(n = 2) # number of groups you desire
Plot2<-ggplot(CenDF) +
geom_sf(aes(fill = fill_factor), color=NA) +
coord_sf(datum=NA) +
labs(title = "House-Price-to-Income Ratio",
caption = "Source: ACS 5-year, 2013-2017",
fill = "Price-Income Ratio") +
scale_fill_manual("Price-Income Ratio",values = col.ramp)
Plot2
HHInc_HousePrice_Ratio
from continuous variable to factor variable with 3 levels## Convert Integer into factor variable
CenDF$fill_factor <- quantcut(CenDF$HHInc_HousePrice_Ratio, q = seq(0, 1, by = .33))
CenDF$fill_factor = mapvalues(CenDF$fill_factor, from = levels(CenDF$fill_factor),
to = c("low","med", "high"))
## Assign 3 colors
col.ramp <- viridis(n = 3) # number of groups you desire
Plot3<-ggplot(CenDF) +
geom_sf(aes(fill = fill_factor), color=NA) +
coord_sf(datum=NA) +
labs(title = "House-Price-to-Income Ratio",
caption = "Source: ACS 5-year, 2013-2017",
fill = "Price-Income Ratio") +
scale_fill_manual("Price-Income Ratio \n (Terciles)",values = col.ramp)
Plot3
HHInc_HousePrice_Ratio
from continuous variable to factor variable with 5 levels## Convert Integer into factor variable
CenDF$fill_factor <- quantcut(CenDF$HHInc_HousePrice_Ratio, q = seq(0, 1, by = .2))
CenDF$fill_factor = mapvalues(CenDF$fill_factor, from = levels(CenDF$fill_factor),
to = c("1","2","3", "4","5"))
## Assign 5 colors
col.ramp <- viridis(n = 5) # number of groups you desire
Plot4<-ggplot(CenDF) +
geom_sf(aes(fill = fill_factor), color=NA) +
coord_sf(datum=NA) +
labs(title = "House-Price-to-Income Ratio",
caption = "Source: ACS 5-year, 2013-2017",
fill = "Price-Income Ratio") +
scale_fill_manual("Price-Income Ratio \n (Quintiles)",values = col.ramp)
Plot4
HHInc_HousePrice_Ratio
from continuous variable to factor variable with 6 levels## Convert Integer into factor variable
CenDF$fill_factor <- quantcut(CenDF$HHInc_HousePrice_Ratio, q = c(0,.1,.25,.5,.75,.9,1))
#CenDF$fill_factor = mapvalues(CenDF$fill_factor, from = levels(CenDF$fill_factor),
# to = c("< .1",".1-.25",".25-.5", ".5-.75",".75-.9","> .9"))
## Assign 6 colors
col.ramp <- viridis(n = 6) # number of groups you desire
Plot5<-ggplot(CenDF) +
geom_sf(aes(fill = fill_factor), color=NA) +
coord_sf(datum=NA) +
labs(title = "House-Price-to-Income Ratio",
caption = "Source: ACS 5-year, 2013-2017",
fill = "Price-Income Ratio") +
scale_fill_manual("Price-Income Ratio",values = col.ramp)
Plot5
library(gridExtra)
grid.arrange(Plot1,Plot3,Plot4,Plot5, nrow=2)
In get_acs
function, change year from 2017 to 2012, and call new DF as CenDF2012
.
Then create the ratio of household price divided by median household income for the 2008-2012. Call the variable name HHInc_HousePrice_Ratio2012
library(tidycensus)
library(tidyverse)
library(viridis)
library(plyr)
library(gtools)
census_api_key("8eab9b16f44cb26460ecbde164482194b7052772")
Var<-c("B19013_001","B25077_001")
# Download 2008-2012 df
CenDF2012 <- get_acs(geography = "county",
variables = Var,
year = 2012,
survey = "acs5",
geometry = FALSE)
#Create new variable for the housing price to income ratio.
CenDF2012<-CenDF2012 %>%
mutate(variable=case_when(
variable=="B19013_001" ~ "HHIncome2012",
variable=="B25077_001" ~ "HouseValue2012")) %>%
select(-moe,-NAME) %>%
spread(variable, estimate) %>% #Spread moves rows into columns
mutate(HHInc_HousePrice_Ratio2012=round(HouseValue2012/HHIncome2012,2))
Hint: you can not merge to sf
DFs. Go back and turn Geometry=FALSE
for one of your 2012 DF
CenDF<-merge(CenDF,CenDF2012,by.all="GEOID", all.x=TRUE) # all.x=TRUE makes sure that the merged dataframe keeps all counties in CenDF even if missing in CenDF2012
Look at summary statistics and plot histograms for the 2008-2012 housing price to income ratio versus the 2013-2017 housing price to income ratio.
Hist1<-ggplot(CenDF, aes(HHInc_HousePrice_Ratio)) +
geom_histogram(fill = "firebrick2",
color = "white", bins = 60) +
xlab("house price to income ratio by county, 2017") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_vline(xintercept = mean(CenDF$HHInc_HousePrice_Ratio,na.rm=TRUE), lty = "dashed")
Hist2<-ggplot(CenDF, aes(HHInc_HousePrice_Ratio2012)) +
geom_histogram(fill = "firebrick2",
color = "white", bins = 60) +
xlab("house price to income ratio by county, 2012") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_vline(xintercept = mean(CenDF$HHInc_HousePrice_Ratio2012,na.rm=TRUE), lty = "dashed")
library(gridExtra)
grid.arrange(Hist1,Hist2, nrow=2)
summary(CenDF$HHInc_HousePrice_Ratio2012)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.900 2.090 2.570 2.801 3.215 12.440 3
summary(CenDF$HHInc_HousePrice_Ratio)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.670 2.130 2.535 2.758 3.112 11.820 2
Hint: To calculate percentage change of a variable use the following equation: pct_change = 100 * (Present - Past) / Past. Use mutate
and replace present
and past
with the 2017 and 2012 house price to income ratios, respectively.
## Change Variable
CenDF<-CenDF %>%
mutate(pct_change = 100 * (`HHInc_HousePrice_Ratio` - `HHInc_HousePrice_Ratio2012`) / `HHInc_HousePrice_Ratio2012`)
Hint: You will make minor changes to the code you used to create Plot 1 above. e.g. change the fill
variable in ggplot
to the name of the change variable you created in the previous step.
upper_limit <- round(max(CenDF$pct_change,na.rm=TRUE) + 10, -1)
lower_limit <- round(min(CenDF$pct_change,na.rm=TRUE) - 10, -1)
Plot6<- ggplot(CenDF,aes(fill = pct_change)) +
geom_sf(size = 0) +
#geom_sf(data = major_roads_geo, color = "white", size = 0.8, fill = NA) +
#geom_sf(data = minor_roads_geo, color = "white", size = 0.4, fill = NA) +
scale_fill_viridis(option = "viridis", name = "% Change", limits = c(lower_limit, upper_limit), breaks = seq(lower_limit, upper_limit, 20)) +
labs(title="Changes in House Price to Income Ratio",
subtitle = "2017 5-Year Estimates vs. 2012 5-Year Estimates for Census Tracts",
caption = paste0(
"Data sources:",
"\n U.S. Census Bureau, 2012 and 2017 American Community Survey 5-Year Estimates"
)
) +
theme(plot.caption = element_text(hjust = 0, margin = margin(t = 15))) +
theme(axis.ticks = element_blank(), axis.text = element_blank()) +
theme(panel.background = element_blank())