# Code - Dr Michael Tam # m.tam@unsw.edu.au # Projections using NLR to Gompertz and Richards' growth curve of Australian (VIC) COVID-19 # outbreak in August 2021. # The data excludes cases known to have been acquired from overseas (for instance, detected # in hotel quarantine) # # I've released this code into the public domain. If you make use of this code, attribution # is welcome but not necessary. # Establish the date and time the code is run. # This sets up some of the date dependent variables in the analysis, and also provides the # date and time labels in the charts. Please update this daily so that the analyses run # correctly and so that the charts are date and timestamped. todaydate = as.Date("2021-9-16") time = 1000 # Install libraries # These are the libraries used in this code that are not part of base R. You'll need to # install this the first time you run the code. # To do this, remove the hash symbol to take it off the comments, and copy-paste into the R # console to run. Once these packages are installed, you won't need to do it again. # install.packages("drc") # install.packages("ggplot2") # install.packages("tidyquant") # install.packages("gridExtra") # install.packages("readxl") # install.packages("scales") # install.packages("remotes") # install.packages("DescTools") # remotes::install_github("OnofriAndreaPG/aomisc") # Load libraries library(drc) library(aomisc) library(ggplot2) library(tidyquant) library(gridExtra) library(readxl) library(scales) library(DescTools) # Loading the data # I'm assuming that the data file for today is an Excel spreadsheet named # "au_covid_vic.xlsx". # If it isn't, rename it before uploading it into RStudio. The spreadsheet basically # contains only five columns - Date, Day number (1 = 4 August 2021), Number of new local # cases of COVID-19 announced by Vic Health, and the Cumulative number of # local cases of COVID-19 in the Vic outbreak starting from 4 August 2021. The final # column contains potential comments and is not used in the analysis. au_covid <- read_excel("au_covid_vic.xlsx", col_types = c("date", "numeric", "numeric", "numeric", "text")) View(au_covid) # Setting up some variables. # The day number starts at "1", and this is 4 August 2021. This was the date prior to the # reported local case in the Melbourne August 2021 outbreak. # all_days is simply a list of integers from 1 to 150. # Note: Day 150 is 31 December 2021. today = as.numeric(todaydate - as.Date("2021-8-4")) + 1 all_days = 1:150 # This analysis critically uses the following packages: # "drc": Ritz, C., Baty, F., Streibig, J. C., Gerhard, D. (2015) Dose-Response # Analysis Using R PLOS ONE, 10(12), e0146021 # "aomisc": https://rdrr.io/github/OnofriAndreaPG/aomisc/ # This first section is for model evaluation as the model changes over time with each new # daily data point. It generates the model for each day from 14 August to "today". The # parameters of the Gompertz and Richards' models are saved into a data frame. The 7 and # 14 days estimates and 95% CIs for each of the model are also saved, which allows # comparison to subsequent reality. model.history.g.d <- vector() model.history.g.7fit <- vector() model.history.g.7lwr <- vector() model.history.g.7upr <- vector() model.history.g.14fit <- vector() model.history.g.14lwr <- vector() model.history.g.14upr <- vector() model.history.g.7fit.error <- vector() model.history.g.7lwr.error <- vector() model.history.g.7upr.error <- vector() model.history.g.14fit.error <- vector() model.history.g.14lwr.error <- vector() model.history.g.14upr.error <- vector() model.history.g.d[1:150] <- NA model.history.g.7fit[1:150] <- NA model.history.g.7lwr[1:150] <- NA model.history.g.7upr[1:150] <- NA model.history.g.14fit[1:150] <- NA model.history.g.14lwr[1:150] <- NA model.history.g.14upr[1:150] <- NA model.history.g.7fit.error[1:150] <- NA model.history.g.7lwr.error[1:150] <- NA model.history.g.7upr.error[1:150] <- NA model.history.g.14fit.error[1:150] <- NA model.history.g.14lwr.error[1:150] <- NA model.history.g.14upr.error[1:150] <- NA model.history.r.d <- vector() model.history.r.7fit <- vector() model.history.r.7lwr <- vector() model.history.r.7upr <- vector() model.history.r.14fit <- vector() model.history.r.14lwr <- vector() model.history.r.14upr <- vector() model.history.r.7fit.error <- vector() model.history.r.7lwr.error <- vector() model.history.r.7upr.error <- vector() model.history.r.14fit.error <- vector() model.history.r.14lwr.error <- vector() model.history.r.14upr.error <- vector() model.history.r.d[1:150] <- NA model.history.r.7fit[1:150] <- NA model.history.r.7lwr[1:150] <- NA model.history.r.7upr[1:150] <- NA model.history.r.14fit[1:150] <- NA model.history.r.14lwr[1:150] <- NA model.history.r.14upr[1:150] <- NA model.history.r.7fit.error[1:150] <- NA model.history.r.7lwr.error[1:150] <- NA model.history.r.7upr.error[1:150] <- NA model.history.r.14fit.error[1:150] <- NA model.history.r.14lwr.error[1:150] <- NA model.history.r.14upr.error[1:150] <- NA real.7 <- vector() real.14 <- vector() real.7[1:150] <- NA real.14[1:150] <- NA for(i in 10:today) { model.cases <- au_covid$tot[1:i] model.day <- au_covid$day[1:i] model.r <- drm(model.cases ~ model.day, fct = L.5()) model.g <- drm(model.cases ~ model.day, fct = G.4()) model.summary.g <- summary(model.g) model.summary.r <- summary(model.r) model.history.g.d[i] <- model.summary.g$coefficients[3] model.history.r.d[i] <- model.summary.r$coefficients[3] prediction_days <- data.frame(day = all_days) model.g.pred <- data.frame(predict(model.g, newdata = prediction_days, interval = 'confidence')) model.history.g.7fit[i] <- model.g.pred$Prediction[i+7] model.history.g.7lwr[i] <- model.g.pred$Lower[i+7] model.history.g.7upr[i] <- model.g.pred$Upper[i+7] model.history.g.14fit[i] <- model.g.pred$Prediction[i+14] model.history.g.14lwr[i] <- model.g.pred$Lower[i+14] model.history.g.14upr[i] <- model.g.pred$Upper[i+14] model.r.pred <- data.frame(predict(model.r, newdata = prediction_days, interval = 'confidence')) model.history.r.7fit[i] <- model.r.pred$Prediction[i+7] model.history.r.7lwr[i] <- model.r.pred$Lower[i+7] model.history.r.7upr[i] <- model.r.pred$Upper[i+7] model.history.r.14fit[i] <- model.r.pred$Prediction[i+14] model.history.r.14lwr[i] <- model.r.pred$Lower[i+14] model.history.r.14upr[i] <- model.r.pred$Upper[i+14] real.7[i] <- au_covid$tot[i+7] real.14[i] <- au_covid$tot[i+14] model.history.g.7fit.error[i] <- (model.g.pred$Prediction[i+7] - real.7[i]) / real.7[i] model.history.g.7lwr.error[i] <- (model.g.pred$Lower[i+7] - real.7[i]) / real.7[i] model.history.g.7upr.error[i] <- (model.g.pred$Upper[i+7] - real.7[i]) / real.7[i] model.history.g.14fit.error[i] <- (model.g.pred$Prediction[i+14] - real.14[i]) / real.14[i] model.history.g.14lwr.error[i] <- (model.g.pred$Lower[i+14] - real.14[i]) / real.14[i] model.history.g.14upr.error[i] <- (model.g.pred$Upper[i+14] - real.14[i]) / real.14[i] model.history.r.7fit.error[i] <- (model.r.pred$Prediction[i+7] - real.7[i]) / real.7[i] model.history.r.7lwr.error[i] <- (model.r.pred$Lower[i+7] - real.7[i]) / real.7[i] model.history.r.7upr.error[i] <- (model.r.pred$Upper[i+7] - real.7[i]) / real.7[i] model.history.r.14fit.error[i] <- (model.r.pred$Prediction[i+14] - real.14[i]) / real.14[i] model.history.r.14lwr.error[i] <- (model.r.pred$Lower[i+14] - real.14[i]) / real.14[i] model.history.r.14upr.error[i] <- (model.r.pred$Upper[i+14] - real.14[i]) / real.14[i] } model.history <- data.frame( date = au_covid$date, day = all_days, g.d = model.history.g.d, g.7fit = model.history.g.7fit, g.7lwr = model.history.g.7lwr, g.7upr = model.history.g.7upr, g.14fit = model.history.g.14fit, g.14lwr = model.history.g.14lwr, g.14upr = model.history.g.14upr, r.d = model.history.r.d, r.7fit = model.history.r.7fit, r.7lwr = model.history.r.7lwr, r.7upr = model.history.r.7upr, r.14fit = model.history.r.14fit, r.14lwr = model.history.r.14lwr, r.14upr = model.history.r.14upr) model.history.g.error <- data.frame( date = au_covid$date, day = all_days, g.7fit = model.history.g.7fit.error, g.7lwr = model.history.g.7lwr.error, g.7upr = model.history.g.7upr.error, g.14fit = model.history.g.14fit.error, g.14lwr = model.history.g.14lwr.error, g.14upr = model.history.g.14upr.error) model.history.r.error <- data.frame( date = au_covid$date, day = all_days, r.7fit = model.history.r.7fit.error, r.7lwr = model.history.r.7lwr.error, r.7upr = model.history.r.7upr.error, r.14fit = model.history.r.14fit.error, r.14lwr = model.history.r.14lwr.error, r.14upr = model.history.r.14upr.error) # Projections of CUMULATIVE case counts based on Gompertz/Richards' model # The code here is a little ugly, but there is a reason for this. For some reason, when # using the predict() function with a gompertz model, asking it to include the standard # error of the estimate in the output, se=TRUE, results in zero values being returned for # the upper and lower bounds. I have no idea why. So basically, I use the predict() # function twice, once to get the estimate and confidence interval, and once to get the # standard error. I then create a new data frame to clean this up and get rid of the # duplicated estimate column, and the useless zeroed columns for the CI bounds. # # I also replace any model estimates of the fit and lower bound of the CI of the estimate # that are less than zero, with zero. This can sometimes occur with the first few days # in the model. model.au.gompertz.pred <- data.frame(predict(model.g, newdata = prediction_days, interval = 'confidence'), predict(model.g, newdata = prediction_days, interval = 'confidence', se=TRUE)) model.gompertz <- data.frame( day = all_days, fit = model.au.gompertz.pred$Prediction, lwr = model.au.gompertz.pred$Lower, upr = model.au.gompertz.pred$Upper, se = model.au.gompertz.pred$SE) # Replace fit and lower CI estimates that are less than zero with zero. model.gompertz$fit[model.gompertz$fit < 0] <- 0 model.gompertz$lwr[model.gompertz$lwr < 0] <- 0 model.au.pred <- data.frame(predict(model.r, newdata = prediction_days, interval = 'confidence'), predict(model.r, newdata = prediction_days, interval = 'confidence', se=TRUE)) model.au <- data.frame( day = all_days, fit = model.au.pred$Prediction, lwr = model.au.pred$Lower, upr = model.au.pred$Upper, se = model.au.pred$SE) model.au$fit[model.au$fit < 0] <- 0 model.au$lwr[model.au$lwr < 0] <- 0 ### Plots ## Plot cumulative cases - Richards' growth curve model # Scaling factor for fixed elements on chart # Note: the value of 1800 as equal to 100% of the height of the chart is an arbitrary # choice made originally when I was hardcoding a chart in 2020. There is no special # significance to this value other than it became too tedious to recode all the # charts. sf = model.au$upr[today+10] * 1.05 / 1800 plot_au <- ggplot(data=au_covid, aes(x=as.Date(date), y=tot)) + xlab("") + ylab("Cumulative Local COVID-19 cases (Vic) starting August 4") + scale_x_date(date_breaks = "1 weeks", date_labels = "%b %d") + geom_vline(xintercept=todaydate, color = "grey") + annotate("label", x=todaydate, y=50*sf, label=au_covid$date[today], size=2.5) + geom_vline(xintercept=todaydate + 7, color = "grey") + annotate("label", x=todaydate+7, y=50*sf, label=au_covid$date[today+7], size=2.5) + annotate("label", x=as.Date("2021-8-6"), y=1800*sf, label="Projection of total COVID-19 cases", size=5.5, hjust=0) + annotate("text", x=as.Date("2021-8-6"), y=1700*sf, label= paste(au_covid$date[today], "@", time, "GMT+10"), hjust=0) + annotate("text", x=as.Date("2021-8-6"), y=1625*sf, label="Assumption: fitted to Richards' growth curve", hjust=0) + geom_point(colour="grey20", size = 1) + geom_line(aes(y=model.au$fit), colour="grey20", alpha=0.5, size = 1, linetype=2) + geom_ribbon(aes(ymin=model.au$lwr, ymax=model.au$upr), colour=NA, fill="dark green", alpha=0.1) + annotate("label", x=todaydate, y=au_covid$tot[today], label=au_covid$tot[today], size=3) + annotate("label", x=todaydate + 7, y=model.au$fit[today + 7], label=round(model.au$fit[today + 7]), size=3) + annotate("text", x=todaydate + 7, y=model.au$upr[today + 7] * 1.04, label=round(model.au$upr[today + 7]), size=2) + annotate("text", x=todaydate + 7, y=model.au$lwr[today + 7] * 0.96, label=round(model.au$lwr[today + 7]), size=2) + coord_cartesian(ylim = c(0, 1800*sf), xlim = c(as.Date("2021-8-4"), todaydate + 10) ) ## Plot cumulative cases - Gompertz # scaling factor for fixed elements on chart sf = model.gompertz$upr[today+10] * 1.05 / 1800 plot_au_gompertz <- ggplot(data=au_covid, aes(x=as.Date(date), y=tot)) + xlab("") + ylab("Cumulative Local COVID-19 cases (Vic) starting August 4") + scale_x_date(date_breaks = "1 weeks", date_labels = "%b %d") + geom_vline(xintercept=todaydate, color = "grey") + annotate("label", x=todaydate, y=50*sf, label=au_covid$date[today], size=2.5) + geom_vline(xintercept=todaydate + 7, color = "grey") + annotate("label", x=todaydate+7, y=50*sf, label=au_covid$date[today+7], size=2.5) + annotate("label", x=as.Date("2021-8-6"), y=1800*sf, label="Projection of total COVID-19 cases", size=5.5, hjust=0) + annotate("text", x=as.Date("2021-8-6"), y=1700*sf, label= paste(au_covid$date[today], "@", time, "GMT+10"), hjust=0) + annotate("text", x=as.Date("2021-8-6"), y=1625*sf, label="Assumption: fitted to Gompertz growth curve", hjust=0) + geom_point(colour="grey20", size = 1) + geom_line(aes(y=model.gompertz$fit), colour="grey20", alpha=0.5, size = 1, linetype=2) + geom_ribbon(aes(ymin=model.gompertz$lwr, ymax=model.gompertz$upr), colour=NA, fill="blue", alpha=0.1) + annotate("label", x=todaydate, y=au_covid$tot[today], label=au_covid$tot[today], size=3) + annotate("label", x=todaydate + 7, y=model.gompertz$fit[today + 7], label=round(model.gompertz$fit[today + 7]), size=3) + annotate("text", x=todaydate + 7, y=model.gompertz$upr[today + 7] * 1.04, label=round(model.gompertz$upr[today + 7]), size=2) + annotate("text", x=todaydate + 7, y=model.gompertz$lwr[today + 7] * 0.94, label=round(model.gompertz$lwr[today + 7]), size=2) + coord_cartesian(ylim = c(0, 1800*sf), xlim = c(as.Date("2021-8-4"), todaydate + 10) ) ### Plots - new cases # Projections of NEW daily case counts based on Gompertz/Richards' model # Note: This is effectively the first derivative of the model. # # I've changed the way I compute the 95% confidence intervals on several occasions. # Initially, I couldn't think of how to easily compute this, so I resorted to a hack. # I basically used the confidence interval of the estimate from the model of the # cumulative cases as the estimate for the CI of the derivative. This would obviously # be an overestimate. # # On 7 August 2021, I changed the method to be more statistically defensible. # For each day's TOTAL case number projection in the fitted Gompertz/Richards' model, I use # the standard error of that fitted value to create a randomised normal distribution of # values. I compute a list of 1,000,000 values for each day. I then subtract a list of # these values from the list of the prior day, effectively creating a list of differences # between the days. This distribution of values will be overly conservative as in reality # it is not possible for the total number of cases to ever be less than a previous day's. # # On 9 September 2021, I changed the method once again, which I think is finally # acceptable. Both the Gompertz and Richards' models give standard errors for each of the # parameters. I use this to create a randomised normal distribution of these parameters, # and then use these parameters to simulate new curves in a boostrapping process (a run of # 10,000 times). From each of these simulated growth curves, I extract the daily growth in # new cases. For each day, the median, or middle value of this list of differences is the # NEW case estimate as directly computed from the model. Specific percentiles, which are # the proportion of values of the distribution can be interpreted as the confidence # interval. For instance, the 95% confidence interval is the interval between the 2.5% and # 97.5% centiles. ## # Initial set up of the vectors model.gompertz.fit.deriv <- vector() model.gompertz.lwr.deriv <- vector() model.gompertz.upr.deriv <- vector() model.gompertz.lwr.deriv.05 <- vector() model.gompertz.upr.deriv.05 <- vector() model.gompertz.lwr.deriv.15 <- vector() model.gompertz.upr.deriv.15 <- vector() model.gompertz.lwr.deriv.25 <- vector() model.gompertz.upr.deriv.25 <- vector() model.gompertz.lwr.deriv.35 <- vector() model.gompertz.upr.deriv.35 <- vector() model.gompertz.lwr.deriv.45 <- vector() model.gompertz.upr.deriv.45 <- vector() model.gompertz.lwr.deriv.55 <- vector() model.gompertz.upr.deriv.55 <- vector() model.gompertz.lwr.deriv.65 <- vector() model.gompertz.upr.deriv.65 <- vector() model.gompertz.lwr.deriv.75 <- vector() model.gompertz.upr.deriv.75 <- vector() model.gompertz.lwr.deriv.85 <- vector() model.gompertz.upr.deriv.85 <- vector() model.gompertz.lwr.deriv.99 <- vector() model.gompertz.upr.deriv.99 <- vector() model.gompertz.fit.deriv[1:150] <- NA model.gompertz.lwr.deriv[1:150] <- NA model.gompertz.upr.deriv[1:150] <- NA model.gompertz.lwr.deriv.05[1:150] <- NA model.gompertz.upr.deriv.05[1:150] <- NA model.gompertz.lwr.deriv.15[1:150] <- NA model.gompertz.upr.deriv.15[1:150] <- NA model.gompertz.lwr.deriv.25[1:150] <- NA model.gompertz.upr.deriv.25[1:150] <- NA model.gompertz.lwr.deriv.35[1:150] <- NA model.gompertz.upr.deriv.35[1:150] <- NA model.gompertz.lwr.deriv.45[1:150] <- NA model.gompertz.upr.deriv.45[1:150] <- NA model.gompertz.lwr.deriv.55[1:150] <- NA model.gompertz.upr.deriv.55[1:150] <- NA model.gompertz.lwr.deriv.65[1:150] <- NA model.gompertz.upr.deriv.65[1:150] <- NA model.gompertz.lwr.deriv.75[1:150] <- NA model.gompertz.upr.deriv.75[1:150] <- NA model.gompertz.lwr.deriv.85[1:150] <- NA model.gompertz.upr.deriv.85[1:150] <- NA model.gompertz.lwr.deriv.99[1:150] <- NA model.gompertz.upr.deriv.99[1:150] <- NA model.au.fit.deriv <- vector() model.au.lwr.deriv <- vector() model.au.upr.deriv <- vector() model.au.lwr.deriv.05 <- vector() model.au.upr.deriv.05 <- vector() model.au.lwr.deriv.15 <- vector() model.au.upr.deriv.15 <- vector() model.au.lwr.deriv.25 <- vector() model.au.upr.deriv.25 <- vector() model.au.lwr.deriv.35 <- vector() model.au.upr.deriv.35 <- vector() model.au.lwr.deriv.45 <- vector() model.au.upr.deriv.45 <- vector() model.au.lwr.deriv.55 <- vector() model.au.upr.deriv.55 <- vector() model.au.lwr.deriv.65 <- vector() model.au.upr.deriv.65 <- vector() model.au.lwr.deriv.75 <- vector() model.au.upr.deriv.75 <- vector() model.au.lwr.deriv.85 <- vector() model.au.upr.deriv.85 <- vector() model.au.lwr.deriv.99 <- vector() model.au.upr.deriv.99 <- vector() model.au.fit.deriv[1:150] <- NA model.au.lwr.deriv[1:150] <- NA model.au.upr.deriv[1:150] <- NA model.au.lwr.deriv.05[1:150] <- NA model.au.upr.deriv.05[1:150] <- NA model.au.lwr.deriv.15[1:150] <- NA model.au.upr.deriv.15[1:150] <- NA model.au.lwr.deriv.25[1:150] <- NA model.au.upr.deriv.25[1:150] <- NA model.au.lwr.deriv.35[1:150] <- NA model.au.upr.deriv.35[1:150] <- NA model.au.lwr.deriv.45[1:150] <- NA model.au.upr.deriv.45[1:150] <- NA model.au.lwr.deriv.55[1:150] <- NA model.au.upr.deriv.55[1:150] <- NA model.au.lwr.deriv.65[1:150] <- NA model.au.upr.deriv.65[1:150] <- NA model.au.lwr.deriv.75[1:150] <- NA model.au.upr.deriv.75[1:150] <- NA model.au.lwr.deriv.85[1:150] <- NA model.au.upr.deriv.85[1:150] <- NA model.au.lwr.deriv.99[1:150] <- NA model.au.upr.deriv.99[1:150] <- NA # For the Gompertz model # We calculate the estimate and the 95% CI # Further 5%, 15%, 25%, 35%, 45%, 55%, 65%, 75%, 85%, 99% CIs are calculated. # These are used to create the coloured confidence bands in the charts. # This will be the data frame where the derivative data from each simulation will be # placed. # Create the initial empty data frame. model.g.deriv <- data.frame(day = double(), deriv = double()) # Create the matrix of the variations of the parameters from the Gompertz model that # we'll use for bootstrapping. Effectively, we use the standard error for each of the # coefficients in the model to create a normal distribution of each parameter. model.g.boostrapcoef <- data.frame( b = rnorm(10000, mean=model.g$coefficients[1], sd=summary(model.g)$coefficients["b:(Intercept)","Std. Error"]), c = rnorm(10000, mean=model.g$coefficients[2], sd=summary(model.g)$coefficients["c:(Intercept)","Std. Error"]), d = rnorm(10000, mean=model.g$coefficients[3], sd=summary(model.g)$coefficients["d:(Intercept)","Std. Error"]), e = rnorm(10000, mean=model.g$coefficients[4], sd=summary(model.g)$coefficients["e:(Intercept)","Std. Error"])) # Start of the bootstrapping. # 10,000 simulations are run. for(i in 1:10000) { # Set up the model parameters for this instance of the simulation b = model.g.boostrapcoef$b[i] c = model.g.boostrapcoef$c[i] d = model.g.boostrapcoef$d[i] e = model.g.boostrapcoef$e[i] model.g.bootstrap.deriv <- vector() model.g.bootstrap.deriv[1:150] <- NA for(j in 2:150) { # The Gompertz equation is in the following form: # f(x) = c + (d-c) * ( exp( -exp( b * (x - e)))) # # Where x = day number # b, c, d, e are the parameters of the model # The calculation below uses the model to calculate the cumulative cases for the # current day, and subtracts it from that from the previous day. model.g.bootstrap.deriv[j] <- ( c + (d-c) * ( exp( -exp( b * (j - e)))) ) - ( c + (d-c) * ( exp( -exp( b * ((j-1) - e)))) ) } # Append the derivative data from this run of the simulation to those of all the previous # runs. bootstrap.deriv.temp <- data.frame(day = all_days, deriv = model.g.bootstrap.deriv) model.g.deriv <- data.frame( days = c(model.g.deriv$days, bootstrap.deriv.temp$day), deriv = c(model.g.deriv$deriv, bootstrap.deriv.temp$deriv)) } # Compute the confidence bands from the derivative data. for(i in 2:150) { day_diff <- model.g.deriv$deriv[model.g.deriv$days == i] model.gompertz.fit.deriv[i] <- median(day_diff) model.gompertz.lwr.deriv[i] <- quantile(day_diff, 0.025) model.gompertz.upr.deriv[i] <- quantile(day_diff, 0.975) model.gompertz.lwr.deriv.05[i] <- quantile(day_diff, 0.475) model.gompertz.upr.deriv.05[i] <- quantile(day_diff, 0.525) model.gompertz.lwr.deriv.15[i] <- quantile(day_diff, 0.425) model.gompertz.upr.deriv.15[i] <- quantile(day_diff, 0.575) model.gompertz.lwr.deriv.25[i] <- quantile(day_diff, 0.375) model.gompertz.upr.deriv.25[i] <- quantile(day_diff, 0.625) model.gompertz.lwr.deriv.35[i] <- quantile(day_diff, 0.325) model.gompertz.upr.deriv.35[i] <- quantile(day_diff, 0.675) model.gompertz.lwr.deriv.45[i] <- quantile(day_diff, 0.275) model.gompertz.upr.deriv.45[i] <- quantile(day_diff, 0.725) model.gompertz.lwr.deriv.55[i] <- quantile(day_diff, 0.225) model.gompertz.upr.deriv.55[i] <- quantile(day_diff, 0.775) model.gompertz.lwr.deriv.65[i] <- quantile(day_diff, 0.175) model.gompertz.upr.deriv.65[i] <- quantile(day_diff, 0.825) model.gompertz.lwr.deriv.75[i] <- quantile(day_diff, 0.125) model.gompertz.upr.deriv.75[i] <- quantile(day_diff, 0.875) model.gompertz.lwr.deriv.85[i] <- quantile(day_diff, 0.075) model.gompertz.upr.deriv.85[i] <- quantile(day_diff, 0.925) model.gompertz.lwr.deriv.99[i] <- quantile(day_diff, 0.005) model.gompertz.upr.deriv.99[i] <- quantile(day_diff, 0.995) } # For the Richards' growth model # This will be the data frame where the derivative data from each simulation will be # placed. # Create the initial empty data frame. model.r.deriv <- data.frame(day = double(), deriv = double()) # Create the matrix of the variations of the parameters from the Richards' model that # we'll use for bootstrapping. Effectively, we use the standard error for each of the # coefficients in the model to create a normal distribution of each parameter. model.r.boostrapcoef <- data.frame( b = rnorm(10000, mean=model.r$coefficients[1], sd=summary(model.r)$coefficients["b:(Intercept)","Std. Error"]), c = rnorm(10000, mean=model.r$coefficients[2], sd=summary(model.r)$coefficients["c:(Intercept)","Std. Error"]), d = rnorm(10000, mean=model.r$coefficients[3], sd=summary(model.r)$coefficients["d:(Intercept)","Std. Error"]), e = rnorm(10000, mean=model.r$coefficients[4], sd=summary(model.r)$coefficients["e:(Intercept)","Std. Error"]), f = rnorm(10000, mean=model.r$coefficients[5], sd=summary(model.r)$coefficients["f:(Intercept)","Std. Error"])) # Start of the bootstrapping. # 10,000 simulations are run. for(i in 1:10000) { # Set up the model parameters for this instance of the simulation b = model.r.boostrapcoef$b[i] c = model.r.boostrapcoef$c[i] d = model.r.boostrapcoef$d[i] e = model.r.boostrapcoef$e[i] f = model.r.boostrapcoef$f[i] model.r.bootstrap.deriv <- vector() model.r.bootstrap.deriv[1:150] <- NA for(j in 2:150) { # The Richards' growth curve is in the following form: # f(x) = c + ( (d-c) / (1 + exp( b * (x - e)))^f) # # Where x = day number # b, c, d, e, f are the parameters of the model # The calculation below uses the model to calculate the cumulative cases for the # current day, and subtracts it from that from the previous day. model.r.bootstrap.deriv[j] <- ( c + ( (d-c) / (1 + exp( b * (j - e)))^f) ) - (c + ( (d-c) / (1 + exp( b * ((j-1) - e)))^f) ) } # Append the derivative data from this run of the simulation to those of all the previous # runs. bootstrap.deriv.temp <- data.frame(day = all_days, deriv = model.r.bootstrap.deriv) model.r.deriv <- data.frame( days = c(model.r.deriv$days, bootstrap.deriv.temp$day), deriv = c(model.r.deriv$deriv, bootstrap.deriv.temp$deriv)) } # Compute the confidence bands from the derivative data. for(i in 2:150) { day_diff <- model.r.deriv$deriv[model.r.deriv$days == i] model.au.fit.deriv[i] <- median(day_diff) model.au.lwr.deriv[i] <- quantile(day_diff, 0.025) model.au.upr.deriv[i] <- quantile(day_diff, 0.975) model.au.lwr.deriv.05[i] <- quantile(day_diff, 0.475) model.au.upr.deriv.05[i] <- quantile(day_diff, 0.525) model.au.lwr.deriv.15[i] <- quantile(day_diff, 0.425) model.au.upr.deriv.15[i] <- quantile(day_diff, 0.575) model.au.lwr.deriv.25[i] <- quantile(day_diff, 0.375) model.au.upr.deriv.25[i] <- quantile(day_diff, 0.625) model.au.lwr.deriv.35[i] <- quantile(day_diff, 0.325) model.au.upr.deriv.35[i] <- quantile(day_diff, 0.675) model.au.lwr.deriv.45[i] <- quantile(day_diff, 0.275) model.au.upr.deriv.45[i] <- quantile(day_diff, 0.725) model.au.lwr.deriv.55[i] <- quantile(day_diff, 0.225) model.au.upr.deriv.55[i] <- quantile(day_diff, 0.775) model.au.lwr.deriv.65[i] <- quantile(day_diff, 0.175) model.au.upr.deriv.65[i] <- quantile(day_diff, 0.825) model.au.lwr.deriv.75[i] <- quantile(day_diff, 0.125) model.au.upr.deriv.75[i] <- quantile(day_diff, 0.875) model.au.lwr.deriv.85[i] <- quantile(day_diff, 0.075) model.au.upr.deriv.85[i] <- quantile(day_diff, 0.925) model.au.lwr.deriv.99[i] <- quantile(day_diff, 0.005) model.au.upr.deriv.99[i] <- quantile(day_diff, 0.995) } # scaling factor for fixed elements on chart sf = max(model.au.upr.deriv[1:today+7]) / 1800 alphaPI = 0.08 plot_au_new <- ggplot(data=au_covid, aes(x=as.Date(date), y=new)) + xlab("") + ylab("Daily Local COVID-19 cases (Vic)") + scale_x_date(date_breaks = "1 weeks", date_labels = "%b %d") + geom_vline(xintercept=todaydate, color = "grey") + geom_vline(xintercept=todaydate + 7, color = "grey") + annotate("label", x=as.Date("2021-8-6"), y=1800*sf, label="Projection of new daily cases of COVID-19", size=5.5, hjust=0) + annotate("text", x=as.Date("2021-8-6"), y=1700*sf, label= paste(au_covid$date[today], "@", time, "GMT+10"), hjust=0) + annotate("text", x=as.Date("2021-8-6"), y=1625*sf, label="Assumption: fitted to Richards' growth curve", hjust=0) + geom_ribbon(aes(ymin=model.au.lwr.deriv, ymax=model.au.upr.deriv), fill="dark green", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.au.lwr.deriv.05, ymax=model.au.upr.deriv.05), fill="dark green", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.au.lwr.deriv.15, ymax=model.au.upr.deriv.15), fill="dark green", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.au.lwr.deriv.25, ymax=model.au.upr.deriv.25), fill="dark green", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.au.lwr.deriv.35, ymax=model.au.upr.deriv.35), fill="dark green", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.au.lwr.deriv.45, ymax=model.au.upr.deriv.45), fill="dark green", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.au.lwr.deriv.55, ymax=model.au.upr.deriv.55), fill="dark green", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.au.lwr.deriv.65, ymax=model.au.upr.deriv.65), fill="dark green", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.au.lwr.deriv.75, ymax=model.au.upr.deriv.75), fill="dark green", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.au.lwr.deriv.85, ymax=model.au.upr.deriv.85), fill="dark green", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.au.lwr.deriv.99, ymax=model.au.upr.deriv.99), fill="dark green", colour=NA, alpha=0.03) + geom_point(colour="white", size = 3.2) + geom_point(colour="grey20", size = 2.8) + geom_line(aes(y=model.au.fit.deriv), colour="grey20", alpha=0.3, size = 1, linetype=2) + geom_line(aes(y=model.au.upr.deriv), colour="grey20", alpha=0.3, linetype=2) + geom_line(aes(y=model.au.lwr.deriv), colour="grey20", alpha=0.3, linetype=2) + annotate("label", x=(todaydate + 7), y=model.au.fit.deriv[today + 7], label=round(model.au.fit.deriv[today + 7]), size=3) + annotate("text", x=(todaydate + 7), y=model.au.upr.deriv[today + 7], label=round(model.au.upr.deriv[today + 7]), size=3) + annotate("text", x=(todaydate + 7), y=model.au.lwr.deriv[today + 7], label=round(model.au.lwr.deriv[today + 7]), size=3) + annotate("label", x=(todaydate), y=1800*sf, label=au_covid$date[today], size=3) + annotate("label", x=(todaydate + 7), y=85*sf, label=au_covid$date[today+7], size=2.5) + coord_cartesian(ylim = c(0, 1800*sf), xlim = c(as.Date("2021-8-4"),todaydate+10) ) # scaling factor for fixed elements on chart sf = max(model.gompertz.upr.deriv[1:today+7]) / 1800 alphaPI = 0.08 plot_au_new_gompertz <- ggplot(data=au_covid, aes(x=as.Date(date), y=new)) + xlab("") + ylab("Daily Local COVID-19 cases (Vic)") + scale_x_date(date_breaks = "1 weeks", date_labels = "%b %d") + geom_vline(xintercept=todaydate, color = "grey") + geom_vline(xintercept=todaydate + 7, color = "grey") + annotate("label", x=as.Date("2021-8-6"), y=1800*sf, label="Projection of new daily cases of COVID-19", size=5.5, hjust=0) + annotate("text", x=as.Date("2021-8-6"), y=1700*sf, label= paste(au_covid$date[today], "@", time, "GMT+10"), hjust=0) + annotate("text", x=as.Date("2021-8-6"), y=1625*sf, label="Assumption: fitted to Gompertz curve", hjust=0) + geom_ribbon(aes(ymin=model.gompertz.lwr.deriv, ymax=model.gompertz.upr.deriv), fill="blue", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.gompertz.lwr.deriv.05, ymax=model.gompertz.upr.deriv.05), fill="blue", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.gompertz.lwr.deriv.15, ymax=model.gompertz.upr.deriv.15), fill="blue", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.gompertz.lwr.deriv.25, ymax=model.gompertz.upr.deriv.25), fill="blue", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.gompertz.lwr.deriv.35, ymax=model.gompertz.upr.deriv.35), fill="blue", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.gompertz.lwr.deriv.45, ymax=model.gompertz.upr.deriv.45), fill="blue", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.gompertz.lwr.deriv.55, ymax=model.gompertz.upr.deriv.55), fill="blue", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.gompertz.lwr.deriv.65, ymax=model.gompertz.upr.deriv.65), fill="blue", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.gompertz.lwr.deriv.75, ymax=model.gompertz.upr.deriv.75), fill="blue", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.gompertz.lwr.deriv.85, ymax=model.gompertz.upr.deriv.85), fill="blue", colour=NA, alpha=alphaPI) + geom_ribbon(aes(ymin=model.gompertz.lwr.deriv.99, ymax=model.gompertz.upr.deriv.99), fill="blue", colour=NA, alpha=0.03) + geom_point(colour="white", size = 3.2) + geom_point(colour="grey20", size = 2.8) + geom_line(aes(y=model.gompertz.fit.deriv), colour="grey20", alpha=0.3, size = 1, linetype=2) + geom_line(aes(y=model.gompertz.upr.deriv), colour="grey20", alpha=0.3, linetype=2) + geom_line(aes(y=model.gompertz.lwr.deriv), colour="grey20", alpha=0.3, linetype=2) + annotate("label", x=(todaydate + 7), y=model.gompertz.fit.deriv[today + 7], label=round(model.gompertz.fit.deriv[today + 7]), size=3) + annotate("text", x=(todaydate + 7), y=model.gompertz.upr.deriv[today + 7], label=round(model.gompertz.upr.deriv[today + 7]), size=3) + annotate("text", x=(todaydate + 7), y=model.gompertz.lwr.deriv[today + 7], label=round(model.gompertz.lwr.deriv[today + 7]), size=3) + annotate("label", x=(todaydate), y=1800*sf, label=au_covid$date[today], size=3) + annotate("label", x=(todaydate + 7), y=85*sf, label=au_covid$date[today+7], size=2.5) + coord_cartesian(ylim = c(0, 1800*sf), xlim = c(as.Date("2021-8-4"),todaydate+10) ) # Comparison of data trends (moving averages and generalised additive model), the Gompertz # model, and the Richards' growth curve model # scaling factor for fixed elements on chart sf = 600 / 1800 p1 <- ggplot(data=au_covid, aes(x=as.Date(date), y=new)) + xlab("") + ylab("Confirmed local COVID-19 cases (Vic)") + geom_vline(xintercept=todaydate, color = "grey") + annotate("label", x=as.Date("2021-8-6"), y=1800*sf, label="Moving Avg (7 days)", size=5.5, hjust=0) + annotate("text", x=as.Date("2021-8-6"), y=1600*sf, label= paste(au_covid$date[today], "@", time, "GMT+10"), hjust=0) + geom_point(colour="white", size = 3.2) + geom_point(colour="grey20", size = 2.8) + geom_ma(n=7, colour="orange", linetype=1, size = 1) + annotate("label", x=(todaydate), y=0*sf, label=au_covid$date[today], size=3) + coord_cartesian(ylim = c(0, 1800*sf), xlim = c(as.Date("2021-8-4"),todaydate+10) ) p2 <- ggplot(data=au_covid, aes(x=as.Date(date), y=new)) + xlab("") + ylab("") + geom_vline(xintercept=todaydate, color = "grey") + annotate("label", x=as.Date("2021-8-6"), y=1800*sf, label="Generalised additive model", size=5.5, hjust=0) + geom_point(colour="white", size = 3.2) + geom_point(colour="grey20", size = 2.8) + geom_smooth(colour="red3", fill="red", alpha=0.1, method="gam") + annotate("label", x=(todaydate), y=0*sf, label=au_covid$date[today], size=3) + coord_cartesian(ylim = c(0, 1800*sf), xlim = c(as.Date("2021-8-4"),todaydate+10) ) p3 <- ggplot(data=au_covid, aes(x=as.Date(date), y=new)) + xlab("") + ylab("Confirmed local COVID-19 cases (Vic)") + geom_vline(xintercept=todaydate, color = "grey") + annotate("label", x=as.Date("2021-8-6"), y=1800*sf, label="Gompertz model projections", size=5.5, hjust=0) + geom_point(colour="white", size = 3.2) + geom_point(colour="grey20", size = 2.8) + geom_ribbon(aes(ymin=model.gompertz.lwr.deriv, ymax=model.gompertz.upr.deriv), fill="blue", colour=NA, alpha=0.1) + geom_line(aes(y=model.gompertz.fit.deriv), colour="blue", size = 1, linetype=1) + annotate("label", x=(todaydate), y=0*sf, label=au_covid$date[today], size=3) + coord_cartesian(ylim = c(0, 1800*sf), xlim = c(as.Date("2021-8-4"),todaydate+10) ) p4 <- ggplot(data=au_covid, aes(x=as.Date(date), y=new)) + xlab("") + ylab("") + geom_vline(xintercept=todaydate, color = "grey") + annotate("label", x=as.Date("2021-8-6"), y=1800*sf, label="Richards' curve projections", size=5.5, hjust=0) + geom_point(colour="white", size = 3.2) + geom_point(colour="grey20", size = 2.8) + geom_ribbon(aes(ymin=model.au.lwr.deriv, ymax=model.au.upr.deriv), fill="dark green", colour=NA, alpha=0.1) + geom_line(aes(y=model.au.fit.deriv), colour="dark green", size = 1, linetype=1) + annotate("label", x=(todaydate), y=0*sf, label=au_covid$date[today], size=3) + coord_cartesian(ylim = c(0, 1800*sf), xlim = c(as.Date("2021-8-4"),todaydate+10) ) # grid.arrange(p1, p2, p3, p4, nrow = 2) # Evaluation of the model estimates with reality. sf = 0.8 / 1800 e1 <- ggplot(data=au_covid, aes(x=as.Date(date), y=new)) + xlab("") + ylab("Error (%) from reality") + geom_vline(xintercept=todaydate, color = "grey") + geom_hline(yintercept=0, color = "grey") + annotate("label", x=as.Date("2021-8-15"), y=1800*sf, label="Gompertz - 7 day projection", size=5.5, hjust=0) + annotate("text", x=as.Date("2021-8-15"), y=1500*sf, label= paste(au_covid$date[today], "@", time, "GMT+10"), hjust=0) + geom_ribbon(aes(ymin=model.history.g.error$g.7lwr, ymax=model.history.g.error$g.7upr), fill="blue", colour=NA, alpha=0.1) + geom_line(aes(y=model.history.g.error$g.7fit), colour="blue", size = 1, linetype=1) + annotate("label", x=(todaydate), y=900*sf, label=au_covid$date[today], size=3) + scale_y_continuous(labels = scales::percent) + coord_cartesian(ylim = c(-0.8,0.8), xlim = c(as.Date("2021-8-14"),todaydate+4) ) e2 <- ggplot(data=au_covid, aes(x=as.Date(date), y=new)) + xlab("") + ylab("") + geom_vline(xintercept=todaydate, color = "grey") + geom_hline(yintercept=0, color = "grey") + annotate("label", x=as.Date("2021-8-15"), y=1800*sf, label="Gompertz - 14 day projection", size=5.5, hjust=0) + geom_ribbon(aes(ymin=model.history.g.error$g.14lwr, ymax=model.history.g.error$g.14upr), fill="blue", colour=NA, alpha=0.1) + geom_line(aes(y=model.history.g.error$g.14fit), colour="blue", size = 1, linetype=1) + annotate("label", x=(todaydate), y=900*sf, label=au_covid$date[today], size=3) + scale_y_continuous(labels = scales::percent) + coord_cartesian(ylim = c(-0.8,0.8), xlim = c(as.Date("2021-8-14"),todaydate+4) ) e3 <- ggplot(data=au_covid, aes(x=as.Date(date), y=new)) + xlab("Date") + ylab("Error (%) from reality") + geom_vline(xintercept=todaydate, color = "grey") + geom_hline(yintercept=0, color = "grey") + annotate("label", x=as.Date("2021-8-15"), y=1800*sf, label="Richards' - 7 day projection", size=5.5, hjust=0) + geom_ribbon(aes(ymin=model.history.r.error$r.7lwr, ymax=model.history.r.error$r.7upr), fill="dark green", colour=NA, alpha=0.1) + geom_line(aes(y=model.history.r.error$r.7fit), colour="dark green", size = 1, linetype=1) + annotate("label", x=(todaydate), y=900*sf, label=au_covid$date[today], size=3) + scale_y_continuous(labels = scales::percent) + coord_cartesian(ylim = c(-0.8,0.8), xlim = c(as.Date("2021-8-14"),todaydate+4) ) e4 <- ggplot(data=au_covid, aes(x=as.Date(date), y=new)) + xlab("Date") + ylab("") + geom_vline(xintercept=todaydate, color = "grey") + geom_hline(yintercept=0, color = "grey") + annotate("label", x=as.Date("2021-8-15"), y=1800*sf, label="Richards' - 14 day projection", size=5.5, hjust=0) + geom_ribbon(aes(ymin=model.history.r.error$r.14lwr, ymax=model.history.r.error$r.14upr), fill="dark green", colour=NA, alpha=0.1) + geom_line(aes(y=model.history.r.error$r.14fit), colour="dark green", size = 1, linetype=1) + annotate("label", x=(todaydate), y=900*sf, label=au_covid$date[today], size=3) + scale_y_continuous(labels = scales::percent) + coord_cartesian(ylim = c(-0.8,0.8), xlim = c(as.Date("2021-8-14"),todaydate+4) ) # grid.arrange(e1, e2, e3, e4, nrow = 2) # Gompertz projection asymptote (projected final size of outbreak) # How the model changed with time. This is parameter "d" in the growth models. # Scaling factor for fixed chart elements sf = max(model.history$g.d[15:today])*1.05 / 1800 f1 <- ggplot(data=model.history, aes(x=as.Date(date), y=g.d)) + xlab("Date") + ylab("Total number of COVID-19 cases") + scale_x_date(date_breaks = "1 weeks", date_labels = "%b %d") + scale_y_continuous(labels = comma) + geom_vline(xintercept=todaydate, color = "grey") + annotate("label", x=as.Date("2021-8-15"), y=1800*sf, label="Projection of FINAL SIZE of the outbreak", size=5.5, hjust=0) + annotate("text", x=as.Date("2021-8-15"), y=1700*sf, label="Assumption: fitted to Gompertz model", hjust=0) + annotate("text", x=as.Date("2021-8-15"), y=1625*sf, label= paste(au_covid$date[today], "@", time, "GMT+10"), hjust=0) + geom_point(colour="white", size = 3.2) + geom_point(colour="grey20", size = 2.8) + geom_smooth(method="gam", colour="blue", fill="blue", size=1, linetype=1, alpha=0.1) + annotate("label", x=(todaydate), y=500, label=au_covid$date[today], size=3) + coord_cartesian(ylim = c(0, 1800*sf), xlim = c(as.Date("2021-8-14"),todaydate+4) ) f2 <- ggplot(data=model.history, aes(x=as.Date(date), y=r.d)) + xlab("Date") + ylab("") + scale_x_date(date_breaks = "1 weeks", date_labels = "%b %d") + scale_y_continuous(labels = comma) + geom_vline(xintercept=todaydate, color = "grey") + annotate("label", x=as.Date("2021-8-15"), y=1800*sf, label="Projection of FINAL SIZE of the outbreak", size=5.5, hjust=0) + annotate("text", x=as.Date("2021-8-15"), y=1700*sf, label="Assumption: fitted to Richards' growth model", hjust=0) + geom_point(colour="white", size = 3.2) + geom_point(colour="grey20", size = 2.8) + geom_smooth(method="gam", colour="dark green", fill="dark green", size=1, linetype=1, alpha=0.1) + annotate("label", x=(todaydate), y=500, label=au_covid$date[today], size=3) + coord_cartesian(ylim = c(0, 1800*sf), xlim = c(as.Date("2021-8-14"),todaydate+4) ) # grid.arrange(f1, f2, nrow = 1) # Model outputs plot_au plot_au_gompertz plot_au_new plot_au_new_gompertz grid.arrange(p1, p2, p3, p4, nrow = 2) grid.arrange(e1, e2, e3, e4, nrow = 2) grid.arrange(f1, f2, nrow = 1) au_covid$date[today] today summary(model.r) model.au model.au.fit.deriv summary(model.g) model.gompertz model.gompertz.fit.deriv model.history.g.7fit.error model.history.g.14fit.error model.history.r.7fit.error model.history.r.14fit.error