diff --git a/README.md b/README.md
index d025daa..891ef63 100644
--- a/README.md
+++ b/README.md
@@ -19,12 +19,32 @@ The parametric Waiting Time Distribution has been developed to provide a data-dr
 
 This implementation of estimation procedures mimics that found in the corresponding Stata package (wtdttt). The model is fit using maximum likelihood estimation.
 
+## Example
+
+```R
+library(haven)
+df <- read_dta(system.file("extdata", "ranwtddat_discdates.dta", package="Rwtdttt"))
+
+fit_r <- ranwtdttt(data = df,
+                   rxdate ~ dlnorm(logitp, mu, lnsigma),
+                   id = "pid",
+                   start = as.Date('2014-01-01'),
+                   end = as.Date('2014-12-31'),
+                   reverse = T, robust = F
+)
+
+summary(fit_r)
+```
+Please see [examples.R](sandpit/examples.R) for more.
+
 ## More
 
 ### Known bugs
 
 Robust variance calculation, used by default in the randwtdttt() function, is very slow. We are working on improving this.
 
+Other planned improvements are listed in [TODO](sandpit/TODO)
+
 ### Bug reports, feature requests etc.
 
 This is a young project with some rough edges. Please submit any bug reports, feature request and comments as a Github issue.
@@ -40,6 +60,8 @@ The package was developed by:
 * Malcolm Gillies
 * Olga Paoletti
 
+Thank you to Jesper Hallas for kindly allowing us to reproduce the synthetic dispensing data in the `drugpakud.dta` dataset.
+
 ### License
 
 Rwtdttt is licensed under the GNU General Public License, Version 3.
diff --git a/inst/extdata/score_ex.xlsx b/inst/extdata/score_ex.xlsx
new file mode 100644
index 0000000..9190808
Binary files /dev/null and b/inst/extdata/score_ex.xlsx differ
diff --git a/sandpit/d1functions.R b/sandpit/d1functions.R
deleted file mode 100644
index 5defc42..0000000
--- a/sandpit/d1functions.R
+++ /dev/null
@@ -1,60 +0,0 @@
-#' Derivative of WTD log-likelihood wrt omega (=logitp)
-
-d1domega <- function(dfun, x, logitp, param1, param2 = 0, delta = 1) {
-  prob <- exp(logitp) / (1 + exp(logitp))
-
-
-  if (dfun == dlnorm) {
-    mu <- param1
-    sigma <- exp(param2)
-    mean <- exp(mu + exp(2 * lnsigma)/2)
-
-    gfun <- pnorm(log(x), mu, sigma, lower.tail = FALSE) / mean
-  }
-
-  if (dfun == dweib) {
-    beta <- exp(param1)
-    alpha <- exp(param2)
-
-    gfun <- exp(-((x*exp(lnbeta))^exp(lnalpha)) -
-                  lgamma(1+1/exp(lnalpha))) * exp(lnbeta)
-  }
-
-  if (dfun == dexp) {
-    beta <- exp(param1)
-    gfun <- exp(-((x*exp(lnbeta))) ) * exp(lnbeta)
-  }
-
-
-  d1 <- prob * (delta * gfun - 1) / (delta * gfun * exp(logitp) + 1)
-
-  return(d1lomega = d1)
-}
-
-#' Derivative of WTD log-likelihood wrt param1 (and param2)
-
-d1dtheta <- function(dfun, logitp, param1, param2 = 0, delta = 1) {
-  prob <- exp(logitp) / (1 + exp(logitp))
-
-  if (dfun == dlnorm) {
-    mu <- param1
-    # s is also known as lnsigma
-    s <- param2
-    sigma <- exp(param2)
-    mean <- exp(mu + exp(2 * lnsigma)/2)
-
-    gfun <- pnorm(log(x), mu, sigma, lower.tail = FALSE) / mean
-
-
-    y <- exp(-s) * (log(x) - mu)
-
-    d1FRDdmu <-(exp(-s) * dnorm(-y) - pnorm(-y)) / mean
-    d1FRDds <- (exp(-s) * y * dnorm(-y) - exp(2 * s) * pnorm(-y)) / mean
-
-    d1ldmu <- d1FRDmu / (gfun + exp(-logitp) + 1)
-    d1lds < d1FRDs / (gfun + exp(-logitp) + 1)
-    return(d1lmu = d1ldmu, d1lnsigma = d1lds)
-  }
-
-
-}
diff --git a/sandpit/try2.R b/sandpit/examples.R
similarity index 99%
rename from sandpit/try2.R
rename to sandpit/examples.R
index 758d822..b78aeab 100644
--- a/sandpit/try2.R
+++ b/sandpit/examples.R
@@ -1,7 +1,7 @@
 # Quick test
 
 library(bbmle)
-library(Rwtdttt)
+#library(Rwtdttt)
 library(haven)
 library(data.table)
 library(tidyverse)
@@ -71,7 +71,6 @@ summary(fit1)
 df <- haven::read_dta(system.file("extdata", "wtddat_covar.dta", package="Rwtdttt"))
 
 ### fit waiting time distribution (exp)
-# XXXX need to change wtdttt(... subset= ...) to use non-standard evaluation
 fit3 <- wtdttt(data = df,
                last_rxtime ~ dexp(logitp, lnbeta),
                start = 0, end = 1, reverse = T, subset = packsize==200
diff --git a/sandpit/good_practises.R b/sandpit/good_practises.R
deleted file mode 100644
index 3211826..0000000
--- a/sandpit/good_practises.R
+++ /dev/null
@@ -1,31 +0,0 @@
-#######################################################
-#                                                     #
-# Implementing good practises for creating a package  #
-#                                                     #
-#######################################################
-
-# why we moved all .R files out of the R folder?
-
-# open a new .R file within the R folder, where the new function will be stored
-use_r("dlnorm")
-# makes functions available, although it does not exist in the global environment
-load_all()
-# check if function exists in the Global Environment
-exists("dlnorm", where = globalenv(), inherits = FALSE)
-
-# If you see TRUE instead of FALSE, that indicates you’re still using a script-oriented workflow and sourcing your functions. Here’s how to get back on track:
-
-# 1) Clean out the global environment and restart R.
-# 2) Re-attach devtools with library(devtools) and re-load regexcite with load_all().
-# 3) Redefine the test input x and call strsplit1(x, split = ",") again. This should work!
-# 4) Run exists("strsplit1", where = globalenv(), inherits = FALSE) again and you should see FALSE
-
-# gold standard for checking that an R package is in full working order
-check()
-
-# converting dlnorm()’s special comment into man/dlnorm.Rd
-# updates the NAMESPACE file, based on @export tags found in roxygen comments
-document()
-
-# preview of the help file
-help(dlnorm)
diff --git a/sandpit/mlwtdttt_lnorm.R b/sandpit/mlwtdttt_lnorm.R
deleted file mode 100644
index f5e0d3d..0000000
--- a/sandpit/mlwtdttt_lnorm.R
+++ /dev/null
@@ -1,6 +0,0 @@
-mlwtdttt_lnorm <- function(logitp, mu, lnsigma) {
-  
-  a <- -sum(log(plogis(logitp) * pnorm(-(log(obstime)-mu)/exp(lnsigma))/exp(mu + exp(2*lnsigma)/2)+(plogis(-logitp))/delta))
-  return(a)
-  
-}
diff --git a/sandpit/sandwich_estimation_code_malcolm.R b/sandpit/sandwich_estimation_code_malcolm.R
deleted file mode 100644
index c78d645..0000000
--- a/sandpit/sandwich_estimation_code_malcolm.R
+++ /dev/null
@@ -1,80 +0,0 @@
-# proof of concept sandwich estimation
-
-library(haven)
-library(bbmle)
-library(numDeriv)
-
-
-tmp <- read_dta("inst/extdata/score_ex.dta")
-
-tmp$packcat <- factor(tmp$packsize)
-
-delta <- 1
-
-dlnorm2 <- function(x, logitp, mu, lnsigma, log = TRUE) {
-
-  prob <- exp(logitp) / (1 + exp(logitp))
-  sigma <- exp(lnsigma)
-  mean <- exp(mu + exp(2 * lnsigma)/2)
-
-  if (log == FALSE) {
-
-    prob * pnorm(log(x), mu, sigma, lower.tail = FALSE) / mean + (1 - prob) / delta
-
-  } else {
-
-    log(prob * pnorm(log(x), mu, sigma, lower.tail = FALSE) / mean + (1 - prob) / delta)
-
-  }
-
-}
-
-
-fit1 <- mle2(rxtime ~ dlnorm2(logitp, mu, lnsigma),
-             parameters = list(logitp ~ packcat, mu ~ packcat, lnsigma ~ packcat),
-             start = list(logitp = 0, mu = log(delta/5), lnsigma = 0),
-             data = tmp)
-
-summary(fit1)
-
-score_mat <- t(sapply(1:nrow(tmp),
-
-                      function(i) {
-
-                        logl <- function(x) {
-
-                          mm <- model.matrix(~packcat, data=tmp)[i,]
-
-                          dlnorm2(as.numeric(tmp[i, "rxtime"]),
-
-                                  logitp=t(x[1:2]) %*% mm,
-
-                                  mu=t(x[3:4]) %*% mm,
-
-                                  lnsigma=t(x[5:6]) %*% mm,
-
-                                  log=T)
-
-                        }
-
-                        grad(logl, fit1@coef)
-
-                      }
-
-))
-
-colnames(score_mat) <- names(fit1@coef)
-
-bread <- fit1@vcov
-
-applyTapplySum <- function(X,index) apply(X, 2, function(col) tapply(col, index, sum))
-
-psi <- applyTapplySum(as.matrix(score_mat), tmp$pid) # sum by cluster
-
-meat <- crossprod(as.matrix(psi))
-
-rownames(meat) <- colnames(meat) <- colnames(psi)
-
-sand <- (NROW(unique(tmp$pid))/(NROW(unique(tmp$pid))-1)) * (bread %*% meat %*% bread)
-
-sand
diff --git a/sandpit/try.R b/sandpit/try.R
deleted file mode 100644
index 12b5149..0000000
--- a/sandpit/try.R
+++ /dev/null
@@ -1,97 +0,0 @@
-rm(list = ls())
-
-library(data.table)
-library(tidyverse)
-library(haven)
-library(bbmle)
-
-# load data
-df <- haven::read_dta("data/wtddat_dates.dta")
-df <- as.data.table(df)
-
-# create time variable
-df_2 <- create_time(data = df, event.date.colname = rx1time, start = as.Date('2014-01-01'))
-
-
-# fit waiting time distribution
-fit1 <- wtdttt(data = df_2,
-               obstime ~ dlnorm(logitp, mu, lnsigma),
-               event.date.colname = rx1time,
-               event.time.colname = obstime,
-               start = as.Date('2014-01-01'), end = as.Date('2014-12-31'),
-               init = list(mu = 4.23, lnsigma = -1.26),
-               )
-
-###### ERROR
-# if the user provides the init argument, the following error is thrown:
-# Error in minuslogl(logitp = 1.65, logitp = 1.61280168302535, mu = 4.23,  :
-# formal argument "logitp" matched by multiple actual arguments
-#
-# SOLVED removing lpinit as element of the list argument 'init'
-#
-# N.B. fit1@fullcoef includes the estimates of three parameters (logitp, mu, lnsigma) if init values are NOT provided by the user
-#      fit2@fullcoef includes the estimates of two parameters (mu, lnsigma) if init values ARE provided by the user
-#
-# commented Henrik rows in wtdttt on redefinition of density functions
-# run again external dlnorm function in dfunctions.R (with delta=365 in the definition of the function)
-
-#############
-
-summary(fit1)
-
-predict(fit1, family = "lnorm")
-predict(fit1, family = "lnorm", type = "prob", distrx=c(2,100))
-
-plot(fit1)
-
-########################### RANDOM INEDX DATE
-
-# by hand
-
-# create sampling window and define a random index date for each individual
-set.seed(84)
-
-df_r <- df %>%
-       mutate(r_index_date = sample(as.Date(as.Date("2014-01-01"):as.Date("2014-12-31")), nrow(df), replace=T),
-              obstime = as.numeric(rx1time - r_index_date))
-
-# remove dispensations not within the observation window
-df_sel <- df_r %>%
-       filter(obstime>=0)
-
-########## ERROR
-# i) if init values provided:
-# Error in minuslogl(logitp = 1.76, logitp = 1.76026080216868, mu = 4.23,  :
-# formal argument "logitp" matched by multiple actual arguments
-#
-# --> i) SOLVED: revoming lpinit from the init values provided
-#
-# ii) if not provided:
-# Error in optim(par = c(logitp = 1.76026080216868, mu = -Inf, lnsigma = NaN :
-#                          valore non finito passato da optim
-###########
-
-# calling the created function
-df_sel <- create_time_random(data = df, event.date.colname = rx1time)
-
-###########
-
-# fit waiting time distribution
-fit2 <- wtdttt(data = df_sel,
-               obstime ~ dlnorm(logitp, mu, lnsigma),
-               event.date.colname = rx1time,
-               event.time.colname = obstime,
-               start = as.Date('2014-01-01'), end = as.Date('2014-12-31'),
-               init = list(mu = 4.23, lnsigma = -1.26)
-)
-
-summary(fit2)
-
-predict(fit2, family = "lnorm")
-predict(fit2, family = "lnorm", type = "prob", distrx=c(2,100))
-
-plot(fit2)
-
-################
-
-
diff --git a/sandpit/try_sandwich.R b/sandpit/try_sandwich.R
deleted file mode 100644
index 19f9767..0000000
--- a/sandpit/try_sandwich.R
+++ /dev/null
@@ -1,43 +0,0 @@
-# proof of concept sandwich estimation
-
-library(bbmle)
-library(Rwtdttt)
-library(haven)
-library(data.table)
-library(readxl)
-
-# load data
-# test data for sandwich from Henrik
-tmp <- haven::read_dta(system.file("extdata", "score_ex.dta", package="Rwtdttt"))
-
-tmp$packcat <- factor(tmp$packsize)
-
-cl <- tmp[order(tmp$pid),]
-
-fit1 <- wtdttt(data = cl,
-               rxtime ~ dlnorm(logitp, mu, lnsigma),
-               parameters = list(logitp ~ packcat, mu ~ packcat, lnsigma ~ packcat),
-               #id = "pid", # do this to stop wtdttt() from filtering the data
-               start = 0, end = 1
-)
-
-summary(fit1)
-
-fit1@idvar <- "pid" # XXXX nasty hack to add the ids back in again for the sandwich
-
-Rwtdttt:::sand_vcov(fit1)
-
-# Equivalent Stata output
-
-# . matrix list Dcl
-#
-# symmetric Dcl[6,6]
-# logitp:     logitp:         mu:         mu:    lnsigma:    lnsigma:
-#   ipack       _cons       ipack       _cons       ipack       _cons
-# logitp:ipack   .61015454
-# logitp:_cons   -.2340662    .2340662
-# mu:ipack   .01835992  -.00759435   .04215239
-# mu:_cons  -.00759435   .00759435    -.032557     .032557
-# lnsigma:ipack    .0696015    -.045143  -.03004152   .02833014   .12762532
-# lnsigma:_cons    -.045143     .045143   .02833014  -.02833014  -.07785109   .07785109
-
diff --git a/sandpit/wtd_bbmle_try.R b/sandpit/wtd_bbmle_try.R
deleted file mode 100644
index 617857c..0000000
--- a/sandpit/wtd_bbmle_try.R
+++ /dev/null
@@ -1,62 +0,0 @@
-library(tidyverse)
-library(bbmle)
-library(haven)
-
-
-wtddat <- read_dta("data/wtddat_covar.dta")
-
-wtddat <- mutate(wtddat,
-                 wtdtimes = 1 - last_rxtime,
-                 packcat = factor(packsize))
-
-delta <- 1
-
-dwtdttt <- function(x, logitp, par1, par2, log = FALSE, family = "lnorm") {
-
-  if (family=="lnorm") {
-    prob <- exp(logitp) / (1 + exp(logitp))
-    sigma <- exp(par2)
-    mean <- exp(par1 + exp(2 * par2)/2)
-
-    if (log == FALSE) {
-      prob * pnorm(log(x), par1, sigma, lower.tail = FALSE) / mean + (1 - prob) / delta
-    } else {
-      log(prob * pnorm(log(x), par1, sigma, lower.tail = FALSE) / mean + (1 - prob) / delta)
-    }
-
-  } else if (family=="weibull") {
-    prob <- exp(logitp) / (1 + exp(logitp))
-    alpha <- exp(par1)
-    beta <- exp(par2)
-
-    if (log == FALSE) {
-      prob * exp(-((x*exp(par2))^exp(par1)) - lgamma(1+1/exp(par1))) * exp(par2) + (1-prob)/delta
-    } else {
-      log(prob * exp(-((x*exp(par2))^exp(par1)) - lgamma(1+1/exp(par1))) * exp(par2) + (1-prob)/delta)
-    }
-  } else if (family=="exponential") {
-    prob <- exp(logitp) / (1 + exp(logitp))
-    alpha <- par1^0
-    beta <- exp(par2)
-
-    if (log == FALSE) {
-      prob * exp(-((x*exp(par2))^alpha) - lgamma(1+1/alpha)) * exp(par2) + (1-prob)/delta
-    } else {
-      log(prob * exp(-((x*exp(par2))^alpha) - lgamma(1+1/alpha)) * exp(par2) + (1-prob)/delta)
-    }
-  }
-}
-
-## Testing the density function - does it make sense?
-wtddat$wtddens <- dwtdttt(wtddat$wtdtimes,
-                          logitp = 1, par1 = log(.15), par2 = -0.4, log = FALSE, family = "exponential")
-ggplot(data = wtddat) +
-  geom_point(mapping = aes(x=wtdtimes, y=wtddens))
-
-
-## Covariates can now be included (SIC!!!)
-fit1 <- mle2(wtdtimes ~ dwtdttt(logitp, par1, par2, family = "exponential"),
-             parameters = list(logitp ~ packcat, par1 ~ packcat, par2 ~ 1),
-             start = list(logitp = 0, par1 = log(delta/5), par2 = 0),
-             data = wtddat)
-summary(fit1)
diff --git a/sandpit/wtd_class_test.R b/sandpit/wtd_class_test.R
deleted file mode 100644
index 0312f69..0000000
--- a/sandpit/wtd_class_test.R
+++ /dev/null
@@ -1,45 +0,0 @@
-# Rwtdttt
-# Combine Henrik, Malcolm & Sabrina's progress so far
-# Create a "wtd" model class and use it to set up a 'predict' method
-
-# Test it
-
-library(tidyverse)
-library(bbmle)
-library(haven)
-
-x <- read_dta("data/wtddat_dates.dta")
-
-# XXXX a bunch of preprocessing that should actually be
-#      implemented in wtdttt()
-
-start <- as.Date('2014-01-01'); end<-as.Date('2014-12-31')
-x$obstime <- 0.5 + as.double(x$rx1time - start, units="days")
-
-delta <- as.double(end - start, units="days") + 1
-ntot <- nrow(x)
-nonprevend <- sum(x$obstime > (delta * 2/3))
-prp <- 1 - 3 * nonprevend / ntot
-lpinit <- qlogis(prp)
-
-muinit <- mean(log(x$obstime[x$obstime < 0.5 * delta]))
-lnsinit <- log(sd(log(x$obstime[x$obstime < 0.5 * delta])))
-
-# ###############
-
-x.w <- wtdttt(obstime ~ dlnorm(logitp, mu, lnsigma), data=x,
-	start=as.Date('2014-01-01'), end=as.Date('2014-12-31'), reverse=F,
-	init=list(logitp=lpinit, mu = muinit, lnsigma = lnsinit))
-
-summary(x.w)
-
-# Predict duration at default quantile=0.8
-predict(x.w)
-
-# Predict probability
-predict(x.w, type="prob", distrx=c(10,100))
-
-# Diagnostic plot
-plot(x.w)
-
-
diff --git a/sandpit/wtdttt.R b/sandpit/wtdttt.R
deleted file mode 100644
index 53a677f..0000000
--- a/sandpit/wtdttt.R
+++ /dev/null
@@ -1,50 +0,0 @@
-if (!require("zoo")) install.packages("zoo")
-library(zoo)
-if (!require("haven")) install.packages("haven")
-library(haven)
-if (!require("tidyverse")) install.packages("tidyverse")
-library(tidyverse)
-
-wtdttt <- function(time, start, end) {
-  
-  # continuity correction for discrete data
-  tstart <- as.numeric(start) - 0.5
-  tend <- as.numeric(end) + 0.5
-  delta <- tend - tstart
-  
-  # if(reverse==TRUE) {
-  #   obstime <- as.numeric(as.Date(tend)-as.Date(time)) }
-  #   else {
-  #   obstime <- as.numeric(as.Date(time)-as.Date(tstart))
-    # }
-  
-    time <- as.numeric(time)
-    obstime <- as.numeric(as.Date(time)-as.Date(tstart))
-  
-  # initial values
-  ntot <- length(time)
-  nonprevend <- sum(obstime > (delta*2/3))
-  prp <- 1 - 3*nonprevend/ntot
-  lpinit <- log((prp)/(1-prp)) # logit
-  
-  # df <- df %>%
-  #        mutate(logtime = case_when(
-  #          obstime<delta*0.5 ~ log(obstime),
-  #          TRUE ~ obstime))
-  
-  muinit <- mean(log(obstime[which(obstime<delta*0.5)]))
-  lnsinit <- log(sd(log(obstime[which(obstime<delta*0.5)])))
-  
-  # start: initial parameters to be used as a starting point for the optimization
-  mle2(mlwtdttt_lnorm, start = list(mu=muinit, lnsigma=lnsinit, logitp=lpinit))
-  
-}
-
-########### 
-
-data_wtd <- read_dta("G:/Il mio Drive/Dottorato unipi/R functions/Stata files/wtddat_dates.dta")
-
-m <- wtdttt(time = data_wtd$rx1time, start = as.Date('2014-01-01'), end = as.Date('2014-12-31'))
-
-summary(m)
-
diff --git a/sandpit/wtdtttpreddur.R b/sandpit/wtdtttpreddur.R
deleted file mode 100644
index 06d292c..0000000
--- a/sandpit/wtdtttpreddur.R
+++ /dev/null
@@ -1,11 +0,0 @@
-# Case with iadmean==FALSE
-#           disttype==lnorm
-
-wtdtttpreddur <- function(iadpercentile) {
-  
-   mu <- m@fullcoef[2]
-   lnsigma <- m@fullcoef[3]
-   
-   rxdur <- exp(qnorm(iadpercentile)*exp(lnsigma)+mu)
-  
-}
\ No newline at end of file
diff --git a/sandpit/wtdtttpredprob.R b/sandpit/wtdtttpredprob.R
deleted file mode 100644
index 2985f1d..0000000
--- a/sandpit/wtdtttpredprob.R
+++ /dev/null
@@ -1,17 +0,0 @@
-wtdtttpredprob <- function(distrx) {
-  
-  mu <- m@fullcoef[2]
-  lnsigma <- m@fullcoef[3]
-  
-  prttt <- pnorm(-(log(distrx)-mu)/exp(lnsigma))
-  return(prttt)
-}
-
-
-################# PROVA
-
-data_predprob <- read_dta("G:/Il mio Drive/Dottorato unipi/R functions/Stata files/lastRx_index.dta")
-
-wtdtttpredprob(distrx = data_predprob$distlast)
-
-plot(data_predprob$distlast, prttt, type = "l")