library(ggplot2)


cancer <- read.csv("cancer.csv")
iowa <- subset(cancer, state == "Iowa")

# Time series ----------------------------------------------------------------
qplot(year, mortality, data=iowa)
qplot(year, mortality / population, data=iowa)

iowa <- transform(iowa,
  irate = incidence / population * 100000,
  mrate = mortality / population * 100000
)

qplot(year, mrate, data=iowa)
qplot(year, mrate, data=iowa, geom="line")

qplot(year, mrate, data=iowa, group = site:race:sex, geom="line")
qplot(year, mrate, data=iowa, group = site:race:sex, geom="line", colour=sex, facets = . ~ race)
qplot(year, population, data=iowa, group = site:race:sex, geom="line", colour=sex, facets = . ~ race)

# Reshaping ------------------------------------------------------------------

cancerm <- melt(cancer, id = 1:5)

cast(cancerm, race ~ variable, sum)

rates <- function(df) {
  transform(df, 
    irate = incidence / population * 100000,
    mrate = mortality / population * 100000
  )
}

site_rates <- rates(cast(cancerm, site ~ variable, sum))
qplot(irate, site, data=site_rates, xlim=c(0, NA))
qplot(irate, reorder(site, irate), data=site_rates, xlim=c(0, NA))

site <- rates(cast(cancerm, site + year ~ variable, sum))


# Your turn:
#   * investigate the distribution of rates by state
#   * investigate the distribution of rates by race
#   * investigate the distribution of rates by year

race_rates <- rates(cast(cancerm, race ~ variable, sum))
qplot(irate, race, data=race_rates, xlim=c(0, NA))

state_rates <- rates(cast(cancerm, state ~ variable, sum))
qplot(irate, state, data=state_rates, xlim=c(0, NA))
qplot(irate, reorder(state, irate), data=state_rates, xlim=c(0, NA))

# Break down by multiple variables
site_sex_rates <- rates(cast(cancerm, site + sex ~ variable, sum))
qplot(irate, reorder(site, irate), data=site_sex_rates, xlim=c(0, NA), colour=sex)

# Your turn


# Maps -----------------------------------------------------------------------

states <- read.csv("states.csv")
qplot(x, y, data=states, geom="path", group=state)
qplot(x, y, data=states, geom="polygon", group=state)

map_rates <- merge(states, state_rates, by="state")
qplot(x, y, data=map_rates, group=state, fill=irate, geom="polygon")
qplot(x, y, data=map_rates, group=state, fill=mrate / irate, geom="polygon")

# Can you find a cancer with a clear geographic trend?
#  Hint: Use cast to produce a summary by state and site
#  Hint: You might want to subset to just select one year

site_state <- rates(cast(cancerm, site + state ~ variable, sum))
map_rates <- merge(states, site_state, by="state")

qplot(x, y, data=subset(map_rates, site == "Ovary"), group=state, fill=mrate, geom="polygon",facets = . ~ site)
qplot(x, y, data=subset(map_rates, site == "Larynx"), group=state, fill=mrate, geom="polygon",facets = . ~ site)

# 2d time series -------------------------------------------------------------
qplot(incidence, mortality, data=iowa)
qplot(irate, mrate, data=iowa)
qplot(irate, mrate, data=iowa, colour=sex)
qplot(irate, mrate, data=iowa, colour=race)
qplot(irate, mrate, data=iowa, colour=race, size=population)
qplot(irate, mrate, data=iowa, colour=site)
qplot(irate, mrate, data=iowa, facets = . ~ site, colour = sex)

qplot(irate, mrate, data=iowa, group = site:race:sex, geom="line")


common <- subset(cancer, incidence / population > 1 / 10000)
common$site <- factor(common$site)



