library(ggplot2) cancer <- read.csv("cancer.csv") iowa <- subset(cancer, state == "Iowa") # Time series ---------------------------------------------------------------- qplot(year, mortality, data=iowa) qplot(year, mortality / population, data=iowa) iowa <- transform(iowa, irate = incidence / population * 100000, mrate = mortality / population * 100000 ) qplot(year, mrate, data=iowa) qplot(year, mrate, data=iowa, geom="line") qplot(year, mrate, data=iowa, group = site:race:sex, geom="line") qplot(year, mrate, data=iowa, group = site:race:sex, geom="line", colour=sex, facets = . ~ race) qplot(year, population, data=iowa, group = site:race:sex, geom="line", colour=sex, facets = . ~ race) # Reshaping ------------------------------------------------------------------ cancerm <- melt(cancer, id = 1:5) cast(cancerm, race ~ variable, sum) rates <- function(df) { transform(df, irate = incidence / population * 100000, mrate = mortality / population * 100000 ) } site_rates <- rates(cast(cancerm, site ~ variable, sum)) qplot(irate, site, data=site_rates, xlim=c(0, NA)) qplot(irate, reorder(site, irate), data=site_rates, xlim=c(0, NA)) site <- rates(cast(cancerm, site + year ~ variable, sum)) # Your turn: # * investigate the distribution of rates by state # * investigate the distribution of rates by race # * investigate the distribution of rates by year race_rates <- rates(cast(cancerm, race ~ variable, sum)) qplot(irate, race, data=race_rates, xlim=c(0, NA)) state_rates <- rates(cast(cancerm, state ~ variable, sum)) qplot(irate, state, data=state_rates, xlim=c(0, NA)) qplot(irate, reorder(state, irate), data=state_rates, xlim=c(0, NA)) # Break down by multiple variables site_sex_rates <- rates(cast(cancerm, site + sex ~ variable, sum)) qplot(irate, reorder(site, irate), data=site_sex_rates, xlim=c(0, NA), colour=sex) # Your turn # Maps ----------------------------------------------------------------------- states <- read.csv("states.csv") qplot(x, y, data=states, geom="path", group=state) qplot(x, y, data=states, geom="polygon", group=state) map_rates <- merge(states, state_rates, by="state") qplot(x, y, data=map_rates, group=state, fill=irate, geom="polygon") qplot(x, y, data=map_rates, group=state, fill=mrate / irate, geom="polygon") # Can you find a cancer with a clear geographic trend? # Hint: Use cast to produce a summary by state and site # Hint: You might want to subset to just select one year site_state <- rates(cast(cancerm, site + state ~ variable, sum)) map_rates <- merge(states, site_state, by="state") qplot(x, y, data=subset(map_rates, site == "Ovary"), group=state, fill=mrate, geom="polygon",facets = . ~ site) qplot(x, y, data=subset(map_rates, site == "Larynx"), group=state, fill=mrate, geom="polygon",facets = . ~ site) # 2d time series ------------------------------------------------------------- qplot(incidence, mortality, data=iowa) qplot(irate, mrate, data=iowa) qplot(irate, mrate, data=iowa, colour=sex) qplot(irate, mrate, data=iowa, colour=race) qplot(irate, mrate, data=iowa, colour=race, size=population) qplot(irate, mrate, data=iowa, colour=site) qplot(irate, mrate, data=iowa, facets = . ~ site, colour = sex) qplot(irate, mrate, data=iowa, group = site:race:sex, geom="line") common <- subset(cancer, incidence / population > 1 / 10000) common$site <- factor(common$site)