Title: | Analysis of Event Data with Two Time Scales |
---|---|
Description: | Tools for the analysis of time to event data with two time scales. The main goal of the analysis is to estimate a smooth hazard that varies over two time scales and also, if covariates are available, to estimate a proportional hazards model with such a two-dimensional baseline hazard. Functions are provided to prepare the raw data for estimation, to estimate and to plot the two-dimensional smooth hazard. Additionally, the cumulative incidence functions over two time scales can be estimated in a competing risks model. For details about the method please refer to Carollo et al. (2024) <doi:10.1002/sim.10297>. |
Authors: | Angela Carollo [aut, cre, cph] , Paul H.C. Eilers [aut], Jutta Gampe [aut] |
Maintainer: | Angela Carollo <[email protected]> |
License: | GPL-3 |
Version: | 1.0.0 |
Built: | 2024-12-24 09:32:32 UTC |
Source: | https://github.com/angelacar/twotimescales |
covariates_plot()
produces a plot of the covariates' effects ()
with confidence intervals, or of the Hazard Ratios (
) with confidence intervals.
covariates_plot( fitted_model, confidence_lev = 0.95, plot_options = list(), ... )
covariates_plot( fitted_model, confidence_lev = 0.95, plot_options = list(), ... )
fitted_model |
A list returned by the function |
confidence_lev |
The level of confidence for the CIs. Default is 0.95 ( |
plot_options |
A list of options for the plot:
|
... |
further arguments passed to plot() |
A plot of the covariates' effects. The different covariates are plotted on the x-axis, and on the y-axis the effects on the coefficient- or on the HR-scale are plotted. The main estimate is represented by a point and the CIs are added as vertical bars.
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1) x1 <- c(0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0) fakedata <- as.data.frame(cbind(id, u, s, ev, x1)) covs <- subset(fakedata, select = c("x1")) fakedata2ts <- prepare_data(u = fakedata$u, s_out = fakedata$s, ev = fakedata$ev, ds = .5, individual = TRUE, covs = covs) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1 ,1.5 ,.5), seq(1 ,1.5 ,.5))) # Covariates plot with default options covariates_plot(fakemod) # Plot the hazard ratios instead covariates_plot(fakemod, plot_options = list( HR = TRUE)) # Change confidence level covariates_plot(fakemod, confidence_lev = .99)
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1) x1 <- c(0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0) fakedata <- as.data.frame(cbind(id, u, s, ev, x1)) covs <- subset(fakedata, select = c("x1")) fakedata2ts <- prepare_data(u = fakedata$u, s_out = fakedata$s, ev = fakedata$ev, ds = .5, individual = TRUE, covs = covs) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1 ,1.5 ,.5), seq(1 ,1.5 ,.5))) # Covariates plot with default options covariates_plot(fakemod) # Plot the hazard ratios instead covariates_plot(fakemod, plot_options = list( HR = TRUE)) # Change confidence level covariates_plot(fakemod, confidence_lev = .99)
Computes the cumulative hazard surface over two time scales
from a fitted model. The function is also called internally from plot()
if the user wants to plot the cumulative hazard from a fitted model.
cumhaz2ts( fitted_model, plot_grid = NULL, cause = NULL, midpoints = FALSE, where_slices = NULL, direction = c("u", "s", NULL), tmax = NULL )
cumhaz2ts( fitted_model, plot_grid = NULL, cause = NULL, midpoints = FALSE, where_slices = NULL, direction = c("u", "s", NULL), tmax = NULL )
fitted_model |
(optional) The output of the function |
plot_grid |
(optional) A list containing the parameters to build a new
finer grid of intervals over
|
cause |
a character string with a short name for the cause (optional). |
midpoints |
A Boolean. Default is |
where_slices |
A vector of values for the cutting points of the desired
slices of the surface. This option is included mostly for the plotting function.
When using |
direction |
If cross-sectional one-dimensional curves are plotted, this
indicates whether the cutting points are located on the |
tmax |
The maximum value of |
A list with the following elements:
* Haz
a list of estimated hazard and associated SEs
(obtained from the function get_hazard_2d
);
* CumHaz
the cumulated hazard estimate over u
and s
;
* cause
(if provided) the short name for the cause.
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1)#' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(u = fakedata$u, s_out = fakedata$s, ev = fakedata$ev, ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1 ,1.5 ,.5), seq(1 ,1.5 ,.5))) # Obtain the fake cumulated hazard fakecumhaz2ts <- cumhaz2ts(fakemod)
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1)#' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(u = fakedata$u, s_out = fakedata$s, ev = fakedata$ev, ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1 ,1.5 ,.5), seq(1 ,1.5 ,.5))) # Obtain the fake cumulated hazard fakecumhaz2ts <- cumhaz2ts(fakemod)
Cumulative incidence surface over two time scales
cuminc2ts(haz = list(), ds, cause = NULL)
cuminc2ts(haz = list(), ds, cause = NULL)
haz |
a list of cause-specific hazards |
ds |
the distance between two consecutive intervals over the |
cause |
is an optional vector of short names for the causes. It should be of the same length as the number of cause-specific cumulated hazards provided. |
a list with one cumulative incidence matrix for each cause-specific
hazard (named if a vector of short names is passed to cause
).
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1)#' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(u = fakedata$u, s_out = fakedata$s, ev = fakedata$ev, ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1 ,1.5 ,.5), seq(1 ,1.5 ,.5))) # Obtain the fake cumulated hazard fakecumhaz2ts <- cumhaz2ts(fakemod) # Fake cumulative incidence function 2ts fakecif2ts <- cuminc2ts(haz = list(fakecumhaz2ts$Haz$hazard), ds = .5)
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1)#' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(u = fakedata$u, s_out = fakedata$s, ev = fakedata$ev, ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1 ,1.5 ,.5), seq(1 ,1.5 ,.5))) # Obtain the fake cumulated hazard fakecumhaz2ts <- cumhaz2ts(fakemod) # Fake cumulative incidence function 2ts fakecif2ts <- cuminc2ts(haz = list(fakecumhaz2ts$Haz$hazard), ds = .5)
exposure_events_1d()
computes aggregated measures of exposure
times and event counts starting from individual records of time at entry,
time at exit and event's indicator, over one time scale (s
).
exposures_events_1d(s_in = NULL, s_out, ev, bins)
exposures_events_1d(s_in = NULL, s_out, ev, bins)
s_in |
A vector of (possibly left truncated) times at entry. If this is not provided by the user, the function will consider a value of 0 for all observations. |
s_out |
A vector of times at event or censoring. |
ev |
A vector of events' indicators (possible values 0/1). |
bins |
A vector of interval breaks for discretization (see also |
The time scale s
is divided into bins of equal size, which are
provided as input to the function. Then, the time-at-risk for each
individual is split according to these bins, and an event indicator is
placed in the bin where the exit time is located. Finally, the individual
contributions are summed in each bin to provide a vector of total exposure
time and total event counts. See also prepare_data()
to
conveniently prepare individual data for the analysis with one, or two time
scales.
A list with the following elements:
R
A matrix of dimension by
containing the exposure times for each
individual separately.
r
A vector of exposure times.
Y
A matrix of dimension by
containing the event counts for each
individual separately
y
A vector of event counts.
If the length of the input vectors do not match, an error message is returned.
Angela Carollo [email protected] and Paul Eilers [email protected]
# ---- Bin colon cancer data by time since recurrence ---- # First create vector of bins bins1ts <- make_bins(s_in = reccolon2ts$entrys, s_out = reccolon2ts$timesr, ds = 30) bindata <- exposures_events_1d(s_in = reccolon2ts$entrys, s_out = reccolon2ts$timesr, ev = reccolon2ts$status, bins = bins1ts$bins_s)
# ---- Bin colon cancer data by time since recurrence ---- # First create vector of bins bins1ts <- make_bins(s_in = reccolon2ts$entrys, s_out = reccolon2ts$timesr, ds = 30) bindata <- exposures_events_1d(s_in = reccolon2ts$entrys, s_out = reccolon2ts$timesr, ev = reccolon2ts$status, bins = bins1ts$bins_s)
exposures_events_2d()
computes individual or aggregated
matrices of exposure times and event counts starting from individual
records of time at entry in the process (measured over the first time
scale), duration at entry in the process (measured over the second time
scale), duration at exit from the process (measured over the second time
scale), and event's indicator.
exposures_events_2d(u, s_in = NULL, s_out, ev, bins_list, individual = FALSE)
exposures_events_2d(u, s_in = NULL, s_out, ev, bins_list, individual = FALSE)
u |
A vector of fixed times at entry in the process, measured over the first time scale. |
s_in |
A vector of (possibly left truncated) times at entry. If this is not provided by the user, the function will consider a value of 0 for all observations. |
s_out |
A vector of times at event or censoring. |
ev |
A vector of events' indicators (possible values 0/1). |
bins_list |
is a list with the following (necessary) elements
(usually prepared by
|
individual |
A Boolean. Default is |
The fixed-time variable u
and the second time scale s
are divided into and
intervals, respectively. The extremes of these
intervals are provided as input to the function. First, the fixed-time at
entry is located in one of the nu bins that cover the whole range of
u
. Then, the time-at-risk for each individual is split according to
the bins that span the whole range of values for
s
, and an event
indicator is placed in the bin where the exit time is located. This is done
by calling the function exposure_events_1d
. If individual matrices of
exposure and events are required, then the function returns two arrays of
dimension by
by
. If aggregated results are preferred, the
individual contributions are summed in each bin to provide a matrix of
total exposure time and a matrix of total event counts, both of dimensions
by
. See also
prepare_data()
to conveniently prepare individual data
for the analysis with one, or two time scales.
A list with the following elements:
R
an array of exposure times: if individual == TRUE
,
then R
is an array of dimension by
by
,
otherwise is an array of dimension
by
Y
an array of event counts: if individual == TRUE
,
then Y
is an array of
dimension by
by
, otherwise is an array of
dimension
by
Angela Carollo [email protected]
# ---- Bin colon cancer data by time at randomization and time since recurrence ---- # First create vectors of bins (using function `make_bins()`) bins <- make_bins(u = reccolon2ts$timer, s_out = reccolon2ts$timesr, du = 30, ds = 30) # Now bin data (note: the s_in argument is omitted because data are not left truncated) bindata2d <- exposures_events_2d(u = reccolon2ts$timer, s_out = reccolon2ts$timesr, ev = reccolon2ts$status, bins = bins)
# ---- Bin colon cancer data by time at randomization and time since recurrence ---- # First create vectors of bins (using function `make_bins()`) bins <- make_bins(u = reccolon2ts$timer, s_out = reccolon2ts$timesr, du = 30, ds = 30) # Now bin data (note: the s_in argument is omitted because data are not left truncated) bindata2d <- exposures_events_2d(u = reccolon2ts$timer, s_out = reccolon2ts$timesr, ev = reccolon2ts$status, bins = bins)
exposures_events_Lexis()
computes aggregated matrices of
exposure times and event counts over two time
scales, on the Lexis diagram.
The time scales are t
and s
. This function uses functions from
the package popEpi
and from the package Epi
, and code shared by Bendix Carstensen
on the website bendixcarstensen.com. See also prepare_data()
to conveniently prepare individual data for the analysis with one,
or two time scales.
exposures_events_Lexis(t_in = NULL, t_out, s_in = NULL, s_out, ev, bins_list)
exposures_events_Lexis(t_in = NULL, t_out, s_in = NULL, s_out, ev, bins_list)
t_in |
(optional) A vector of entry times on the time scale |
t_out |
(optional) A vector of exit times on the time scale |
s_in |
(optional) A vector of entry times on the time scale |
s_out |
A vector of exit times on the time scale |
ev |
A vector of event indicators (possible values 0/1). |
bins_list |
A list with the following (necessary) elements:
|
A list with the following elements:
R
an array of exposure times of dimension by
Y
an array of event counts of dimension by
Angela Carollo [email protected]
Carstensen B, Plummer M, Laara E, Hills M (2022). Epi: A Package for Statistical Analysis in Epidemiology. R package version 2.47.1, https://CRAN.R-project.org/package=Epi.
Miettinen J, Rantanen M, Seppa K (2023). popEpi: Functions for Epidemiological Analysis using Population Data. R package version 0.4.11, https://cran.r-project.org/package=popEpi.
# ---- Bin colon cancer data by time since randomization and time since recurrence ---- # First create vectors of bins (using function `make_bins()`) bins <- make_bins(t_out = reccolon2ts$timedc, s_out = reccolon2ts$timesr, dt = 90, ds = 90) # Now bin data (note: the t_in and s_in arguments are omitted because data are not left truncated) bindata2d <- exposures_events_Lexis(t_out = reccolon2ts$timedc, s_out = reccolon2ts$timesr, ev = reccolon2ts$status, bins = bins)
# ---- Bin colon cancer data by time since randomization and time since recurrence ---- # First create vectors of bins (using function `make_bins()`) bins <- make_bins(t_out = reccolon2ts$timedc, s_out = reccolon2ts$timesr, dt = 90, ds = 90) # Now bin data (note: the t_in and s_in arguments are omitted because data are not left truncated) bindata2d <- exposures_events_Lexis(t_out = reccolon2ts$timedc, s_out = reccolon2ts$timesr, ev = reccolon2ts$status, bins = bins)
fit1ts()
fits a smooth hazard model with one time scale.
Three methods are implemented for the search of the optimal smoothing
parameter (and therefore optimal model): a numerical optimization of the
AIC or BIC of the model, a search for the minimum AIC or BIC of the
model over a grid of values for the smoothing parameter and the
estimation using a sparse mixed model representation of P-splines.
Construction of the B-splines basis and of the penalty matrix is
incorporated within the function. If a matrix of covariates is provided,
the function will estimate a model with covariates.
fit1ts( data1ts = NULL, y = NULL, r = NULL, Z = NULL, bins = NULL, Bbases_spec = list(), Wprior = NULL, pord = 2, optim_method = c("ucminf", "grid_search", "LMMsolver"), optim_criterion = c("aic", "bic"), lrho = 0, ridge = 0, control_algorithm = list(), par_gridsearch = list() )
fit1ts( data1ts = NULL, y = NULL, r = NULL, Z = NULL, bins = NULL, Bbases_spec = list(), Wprior = NULL, pord = 2, optim_method = c("ucminf", "grid_search", "LMMsolver"), optim_criterion = c("aic", "bic"), lrho = 0, ridge = 0, control_algorithm = list(), par_gridsearch = list() )
data1ts |
(optional) an object created by the function
|
y |
A vector of event counts of length ns, or an array of dimension ns by n. |
r |
A vector of exposure times of length ns, or an array of dimension ns by n. |
Z |
(optional) A regression matrix of covariates of dimension n by p. |
bins |
a list with the specification for the bins. This is created by
the function |
Bbases_spec |
A list with the specification for the B-splines basis with the following elements:
|
Wprior |
An optional vector of a-priori weights. |
pord |
The order of the penalty. Default is 2. |
optim_method |
The method to be used for optimization:
|
optim_criterion |
The criterion to be used for optimization:
|
lrho |
A number if |
ridge |
A ridge penalty parameter: default is 0. |
control_algorithm |
A list with optional values for the parameters of the iterative processes:
|
par_gridsearch |
A list of parameters for the grid_search:
|
Some functions from the R-package LMMsolver
are used here.
We refer the interested readers to https://biometris.github.io/LMMsolver/
for more detail on LMMsolver
and its usage.
An object of class haz1ts
, or of class haz1tsLMM
.
For objects of class haz1ts
this is
optimal_model
A list with:
alpha
The vector of estimated P-splines coefficients of length .
SE_alpha
The vector of estimated Standard Errors for the alpha
coefficients,
of length .
beta
The vector of estimated covariate coefficients of length
(if model with covariates).
se_beta
The vector of estimated Standard Errors for the
beta
coefficients of length (if model with covariates).
eta
or eta0
. The vector of values of the (baseline) linear predictor
(log-hazard) of length .
H
The hat-matrix.
Cov
The full variance-covariance matrix.
deviance
The deviance.
ed
The effective dimension of the model.
aic
The value of the AIC.
bic
The value of the BIC.
Bbases
a list with the B-spline basis Bs
(this is a list for
compatibility with functions in 2d).
optimal_logrho
The optimal value of log10(rho_s)
.
P_optimal
The optimal penalty matrix P.
AIC
(if par_gridsearch$return_aic == TRUE
) The vector of AIC values.
BIC
(if par_gridsearch$return_bic == TRUE
) The vector of BIC values.
Objects of class haz1tsLMM
have a slight different structure. They are
a list with:
optimal_model
an object of class LMMsolve
AIC_BIC
a list with, among other things, the AIC and BIC values and the
ED of the model
n_events
the number of events
ns
the number of bins over the s-axis
cs
the number of B-splines over the s-axis
covariates
an indicator for PH model
Boer, Martin P. 2023. “Tensor Product P-Splines Using a Sparse Mixed Model Formulation.” Statistical Modelling 23 (5-6): 465–79. https://doi.org/10.1177/1471082X231178591.#'
## preparing data - no covariates dt1ts <- prepare_data(data = reccolon2ts, s_in = "entrys", s_out = "timesr", events = "status", ds = 180) ## fitting the model with fit1ts() - default options, that is ucminf optimization mod1 <- fit1ts(dt1ts) ## fitting with LMMsolver mod2 <- fit1ts(dt1ts, optim_method = "LMMsolver") ## preparing the data - covariates dt1ts_cov <- prepare_data(data = reccolon2ts, s_in = "entrys", s_out = "timesr", events = "status", ds = 180, individual = TRUE, covs = c("rx", "node4", "sex")) ## fitting the model with fit1ts() - grid search over only two log_10(rho_s) values mod3 <- fit1ts(dt1ts_cov, optim_method = "grid_search", lrho = c(1, 1.5))
## preparing data - no covariates dt1ts <- prepare_data(data = reccolon2ts, s_in = "entrys", s_out = "timesr", events = "status", ds = 180) ## fitting the model with fit1ts() - default options, that is ucminf optimization mod1 <- fit1ts(dt1ts) ## fitting with LMMsolver mod2 <- fit1ts(dt1ts, optim_method = "LMMsolver") ## preparing the data - covariates dt1ts_cov <- prepare_data(data = reccolon2ts, s_in = "entrys", s_out = "timesr", events = "status", ds = 180, individual = TRUE, covs = c("rx", "node4", "sex")) ## fitting the model with fit1ts() - grid search over only two log_10(rho_s) values mod3 <- fit1ts(dt1ts_cov, optim_method = "grid_search", lrho = c(1, 1.5))
fit1tsmodel_ucminf()
performs a numerical optimization of the
AIC or BIC of the one time scale model.
It finds the optimal values of and returns the estimated
optimal model.
See also
ucminf::ucminf()
.
fit1tsmodel_ucminf( r, y, Z = NULL, lrho = 0, Bs, Ds, Wprior = NULL, optim_criterion = c("aic", "bic"), control_algorithm = list() )
fit1tsmodel_ucminf( r, y, Z = NULL, lrho = 0, Bs, Ds, Wprior = NULL, optim_criterion = c("aic", "bic"), control_algorithm = list() )
r |
A vector of exposure times of length ns, or an array of dimension ns by n. |
y |
A vector of event counts of length ns, or an array of dimension ns by n. |
Z |
(optional) A regression matrix of covariates of dimension n by p. |
lrho |
A starting value for |
Bs |
A matrix of B-splines for the time scale |
Ds |
The difference matrix of the penalty. |
Wprior |
An optional vector of a-priori weights. |
optim_criterion |
The criterion to be used for optimization:
|
control_algorithm |
A list with optional values for the parameters of the iterative processes:
|
An object of class haz1ts
with the following elements:
optimal_model
A list containing the results of the optimal model.
optimal_logrho
The optimal value of log10(rho_s)
.
P_optimal
The optimal penalty matrix P.
Nielsen H, Mortensen S (2024). ucminf: General-Purpose Unconstrained Non-Linear Optimization. R package version 1.2.2, https://CRAN.R-project.org/package=ucminf
fit2ts()
fits a smooth hazard model with two time scales.
Three methods are implemented for the search of the optimal smoothing
parameters (and therefore optimal model): a numerical optimization of the
AIC or BIC of the model, a search for the minimum AIC or BIC of the
model over a grid of log_10
values for the smoothing parameters, and a
solution that uses a sparse mixed model representation of the P-spline model to
estimate the smoothing parameters.
Construction of the B-splines bases and of the penalty matrix is
incorporated within the function. If a matrix of covariates is provided,
the function will estimate a model with covariates.
fit2ts( data2ts = NULL, Y = NULL, R = NULL, Z = NULL, bins = NULL, Bbases_spec = list(), pord = 2, optim_method = c("ucminf", "grid_search", "LMMsolver"), optim_criterion = c("aic", "bic"), lrho = c(0, 0), Wprior = NULL, ridge = 0, control_algorithm = list(), par_gridsearch = list() )
fit2ts( data2ts = NULL, Y = NULL, R = NULL, Z = NULL, bins = NULL, Bbases_spec = list(), pord = 2, optim_method = c("ucminf", "grid_search", "LMMsolver"), optim_criterion = c("aic", "bic"), lrho = c(0, 0), Wprior = NULL, ridge = 0, control_algorithm = list(), par_gridsearch = list() )
data2ts |
(optional) an object of class created by the function
|
Y |
A matrix (or 3d-array) of event counts of dimension nu by ns (or nu by ns by n). |
R |
A matrix (or 3d-array) of exposure times of dimension nu by ns (or nu by ns by n). |
Z |
(optional) A regression matrix of covariates values of dimensions n by p. |
bins |
a list with the specification for the bins. This is created by
the function |
Bbases_spec |
A list with the specification for the B-splines basis with the following elements:
|
pord |
The order of the penalty. Default is 2. |
optim_method |
The method to be used for optimization:
|
optim_criterion |
The criterion to be used for optimization:
|
lrho |
A vector of two elements if |
Wprior |
An optional matrix of a-priori weights. |
ridge |
A ridge penalty parameter: default is 0. This is useful when, in
some cases the algorithm shows convergence problems. In this case, set to a small
number, for example |
control_algorithm |
A list with optional values for the parameters of the iterative processes:
|
par_gridsearch |
A list of parameters for the grid_search:
|
Some functions from the R-package LMMsolver
are used here.
We refer the interested readers to https://biometris.github.io/LMMsolver/
for more details on LMMsolver
and its usage.
An object of class haz2ts
, or of class haz2tsLMM
.
For objects of class haz2ts
this is
optimal_model
A list with :
Alpha
The matrix of estimated P-splines coefficients of dimension
by
.
Cov_alpha
The variance-covariance matrix of the Alpha
coefficients,
of dimension by
.
beta
The vector of length p of estimated covariates coefficients
(if model with covariates).
Cov_beta
The variance-covariance matrix of the beta
coefficients,
of dimension by
(if model with covariates).
SE_beta
The vector of length of estimated Standard Errors for the
beta
coefficients (if model with covariates)..
Eta
or Eta0
The matrix of values of the (baseline) linear predictor
(log-hazard) of dimension by
.
H
The hat-matrix.
deviance
The deviance.
ed
The effective dimension of the model.
aic
The value of the AIC.
bic
The value of the BIC.
Bbases
a list with the B-spline bases Bu
and Bs
optimal_logrho
A vector with the optimal values of log10(rho_u)
and
log10(rho_s)
.
P_optimal
The optimal penalty matrix P.
AIC
(if par_gridsearch$return_aic == TRUE
) The matrix of AIC values.
BIC
(if par_gridsearch$return_bic == TRUE
) The matrix of BIC values.
Objects of class haz2tsLMM
have a slight different structure. They are
a list with:
optimal_model
an object of class LMMsolve
AIC_BIC
a list with, among other things, the AIC and BIC values and the
ED of the model
n_events
the number of events
nu
the number of bins over the u-axis
ns
the number of bins over the s-axis
cu
the number of B-splines over the u-axis
cs
the number of B-splines over the s-axis
covariates
an indicator for PH model
Boer, Martin P. 2023. “Tensor Product P-Splines Using a Sparse Mixed Model Formulation.” Statistical Modelling 23 (5-6): 465–79. https://doi.org/10.1177/1471082X231178591. Carollo, Angela, Paul H. C. Eilers, Hein Putter, and Jutta Gampe. 2023. “Smooth Hazards with Multiple Time Scales.” arXiv Preprint: https://arxiv.org/abs/http://arxiv.org/abs/2305.09342v1
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1)#' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(data = fakedata, u = "u", s_out = "s", ev = "ev", ds = .5) # Fit a fake model - not optimal smoothing fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1, 1.5, .5), seq(1, 1.5, .5))) # For more examples please check the vignettes!!! Running more complicated examples # here would imply longer running times...
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1)#' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(data = fakedata, u = "u", s_out = "s", ev = "ev", ds = .5) # Fit a fake model - not optimal smoothing fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1, 1.5, .5), seq(1, 1.5, .5))) # For more examples please check the vignettes!!! Running more complicated examples # here would imply longer running times...
fit2tsmodel_ucminf()
performs a numerical optimization of the
AIC or BIC of the two time scales model.
It finds the optimal values of log_10(rho_u)
and log_10(rho_s)
and returns the estimated optimal model.
See also ucminf::ucminf()
.
fit2tsmodel_ucminf( Y, R, Z = NULL, optim_criterion = c("aic", "bic"), lrho = c(0, 0), Bu, Bs, Iu, Is, Du, Ds, Wprior = NULL, ridge = 0, control_algorithm = list() )
fit2tsmodel_ucminf( Y, R, Z = NULL, optim_criterion = c("aic", "bic"), lrho = c(0, 0), Bu, Bs, Iu, Is, Du, Ds, Wprior = NULL, ridge = 0, control_algorithm = list() )
Y |
A matrix (or 3d-array) of event counts of dimension nu by ns (or nu by ns by n). |
R |
A matrix (or 3d-array) of exposure times of dimension nu by ns (or nu by ns by n). |
Z |
(optional) A regression matrix of covariates values of dimensions n by p. |
optim_criterion |
The criterion to be used for optimization:
|
lrho |
A vector of two elements, the initial values for |
Bu |
A matrix of B-splines for the |
Bs |
A matrix of B-splines for the |
Iu |
An identity matrix of dimension nbu by nbu. |
Is |
An identity matrix of dimension nbs by nbs. |
Du |
The difference matrix over |
Ds |
The difference matrix over |
Wprior |
An optional matrix of a-priori weights. |
ridge |
A ridge penalty parameter: default is 0. This is useful when, in
some cases the algorithm shows convergence problems. In this case, set to a small
number, for example |
control_algorithm |
A list with optional values for the parameters of the iterative processes:
|
An object of class haz2ts
with the following elements:
optimal_model
A list containing the results of the optimal model.
optimal_logrho
A vector with the optimal values of log10(rho_u)
and
log10(rho_s)
.
P_optimal
The optimal penalty matrix P.
Nielsen H, Mortensen S (2024). ucminf: General-Purpose Unconstrained Non-Linear Optimization. R package version 1.2.2, https://CRAN.R-project.org/package=ucminf
get_aic_fit_1d()
fits the 1ts model with or without individual
level covariates and it returns the AIC of the model.
See also fit1tsmodel_ucminf()
and fit1ts()
.
get_aic_fit_1d( lrho, r, y, Z = NULL, Bs, Ds, Wprior = NULL, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE, monitor_ev = FALSE) )
get_aic_fit_1d( lrho, r, y, Z = NULL, Bs, Ds, Wprior = NULL, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE, monitor_ev = FALSE) )
lrho |
A starting value for |
r |
A vector of exposure times of length ns, or an array of dimension ns by n. |
y |
A vector of event counts of length ns, or an array of dimension ns by n. |
Z |
(optional) A regression matrix of covariates of dimension n by p. |
Bs |
A matrix of B-splines for the time scale |
Ds |
The difference matrix of the penalty. |
Wprior |
An optional vector of a-priori weights. |
control_algorithm |
A list with optional values for the parameters of the iterative processes:
|
The aic
value of the fitted model.
get_aic_fit_2d()
fits the 2ts model with or without individual
level covariates and it returns the AIC of the model.
See also fit2tsmodel_ucminf()
and fit2ts()
.
get_aic_fit_2d( lrho, R, Y, Z = NULL, Bu, Bs, Iu, Is, Du, Ds, Wprior = NULL, ridge = 0, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE, monitor_ev = FALSE) )
get_aic_fit_2d( lrho, R, Y, Z = NULL, Bu, Bs, Iu, Is, Du, Ds, Wprior = NULL, ridge = 0, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE, monitor_ev = FALSE) )
lrho |
A vector of two elements, the initial values for |
R |
A matrix (or 3d-array) of exposure times of dimension nu by ns (or nu by ns by n). |
Y |
A matrix (or 3d-array) of event counts of dimension nu by ns (or nu by ns by n). |
Z |
(optional) A regression matrix of covariates values of dimensions n by p. |
Bu |
A matrix of B-splines for the |
Bs |
A matrix of B-splines for the |
Iu |
An identity matrix of dimension nbu by nbu. |
Is |
An identity matrix of dimension nbs by nbs. |
Du |
The difference matrix over |
Ds |
The difference matrix over |
Wprior |
An optional matrix of a-priori weights. |
ridge |
A ridge penalty parameter: default is 0. This is useful when, in
some cases the algorithm shows convergence problems. In this case, set to a small
number, for example |
control_algorithm |
A list with optional values for the parameters of the iterative processes:
|
The aic
value of the fitted model.
get_bic_fit_1d()
fits the 1ts model with or without individual
level covariates and it returns the BIC of the model.
See also fit1tsmodel_ucminf()
and fit1ts()
.
get_bic_fit_1d( lrho, r, y, Z = NULL, Bs, Ds, Wprior = NULL, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE, monitor_ev = FALSE) )
get_bic_fit_1d( lrho, r, y, Z = NULL, Bs, Ds, Wprior = NULL, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE, monitor_ev = FALSE) )
lrho |
A starting value for |
r |
A vector of exposure times of length ns, or an array of dimension ns by n. |
y |
A vector of event counts of length ns, or an array of dimension ns by n. |
Z |
(optional) A regression matrix of covariates of dimension n by p. |
Bs |
A matrix of B-splines for the time scale |
Ds |
The difference matrix of the penalty. |
Wprior |
An optional vector of a-priori weights. |
control_algorithm |
A list with optional values for the parameters of the iterative processes:
|
the bic
value of the fitted model.
get_bic_fit_2d()
fits the 2ts model with or without individual
level covariates and it returns the BIC of the model.
See also fit2tsmodel_ucminf()
and fit2ts()
.
get_bic_fit_2d( lrho, R, Y, Z = NULL, Bu, Bs, Iu, Is, Du, Ds, Wprior = NULL, ridge = 0, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE, monitor_ev = FALSE) )
get_bic_fit_2d( lrho, R, Y, Z = NULL, Bu, Bs, Iu, Is, Du, Ds, Wprior = NULL, ridge = 0, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE, monitor_ev = FALSE) )
lrho |
A vector of two elements, the initial values for |
R |
A matrix (or 3d-array) of exposure times of dimension nu by ns (or nu by ns by n). |
Y |
A matrix (or 3d-array) of event counts of dimension nu by ns (or nu by ns by n). |
Z |
(optional) A regression matrix of covariates values of dimensions n by p. |
Bu |
A matrix of B-splines for the |
Bs |
A matrix of B-splines for the |
Iu |
An identity matrix of dimension nbu by nbu. |
Is |
An identity matrix of dimension nbs by nbs. |
Du |
The difference matrix over |
Ds |
The difference matrix over |
Wprior |
An optional matrix of a-priori weights. |
ridge |
A ridge penalty parameter: default is 0. This is useful when, in
some cases the algorithm shows convergence problems. In this case, set to a small
number, for example |
control_algorithm |
A list with optional values for the parameters of the iterative processes:
|
The bic
value of the fitted model.
get_hazard_1d()
takes as input the results of a model
estimated by fit1ts
and it returns the estimated values of the smooth log-hazard
and the smooth hazard together with their standard errors.
If the model includes covariates, then only the baseline (log-)hazard is returned. It is possible to provide values that define a new grid for evaluation of the estimated hazard. If not specified, the hazard is evaluated on the same grid used for the binning of the data, and therefore the estimation of the model. The function will check if the parameters for the new grid provided by the user are compatible with those originally used to construct the B-splines for estimating the model. If not, the grid will be adjusted accordingly and a warning will be returned.
get_hazard_1d(fitted_model, plot_grid = NULL)
get_hazard_1d(fitted_model, plot_grid = NULL)
fitted_model |
is an object of class |
plot_grid |
(optional) A named vector containing the parameters to build a new
grid of intervals over |
A list with the following elements:
new_plot_grid
A list of parameters that specify the new grid, of the form
list("ints", "smin", "smax", "ds")
hazard
A vector containing the estimated hazard values.
loghazard
A vector containing the estimated log-hazard values.
log10hazard
A vector containing the estimated log10-hazard values.
SE_hazard
A vector containing the estimated SEs for the hazard.
SE_loghazard
A vector containing the estimated SEs for the log-hazard.
SE_log10hazard
A vector containing the estimated SEs for the log10-hazard.
## preparing data - no covariates dt1ts <- prepare_data( data = reccolon2ts, s_in = "entrys", s_out = "timesr", events = "status", ds = 180 ) ## fitting the model with fit1ts() - default options mod1 <- fit1ts(dt1ts) # Obtain 1d hazard get_hazard_1d(mod1) # Change grid get_hazard_1d(mod1, plot_grid = c(smin = 0, smax = 2730, ds = 30) )
## preparing data - no covariates dt1ts <- prepare_data( data = reccolon2ts, s_in = "entrys", s_out = "timesr", events = "status", ds = 180 ) ## fitting the model with fit1ts() - default options mod1 <- fit1ts(dt1ts) # Obtain 1d hazard get_hazard_1d(mod1) # Change grid get_hazard_1d(mod1, plot_grid = c(smin = 0, smax = 2730, ds = 30) )
get_hazard_1d_LMM()
takes as input the results of a model
estimated by fit1ts
and it returns the estimated values of the smooth log-hazard
and the smooth hazard together with their standard errors.
If the model includes covariates, then only the baseline (log-)hazard is returned. It is possible to provide values that define a new grid for evaluation of the estimated hazard. If not specified, the hazard is evaluated on the same grid used for the binning of the data, and therefore the estimation of the model. The function will check if the parameters for the new grid provided by the user are compatible with those originally used to construct the B-splines for estimating the model. If not, the grid will be adjusted accordingly and a warning will be returned.
get_hazard_1d_LMM(fitted_model, plot_grid = NULL)
get_hazard_1d_LMM(fitted_model, plot_grid = NULL)
fitted_model |
is an object of class |
plot_grid |
(optional) A named vector containing the parameters to build a new
grid of intervals over |
A list with the following elements:
new_plot_grid
A list of parameters that specify the new grid, of the form
list("ints", "smin", "smax", "ds")
hazard
A vector containing the estimated hazard values.
loghazard
A vector containing the estimated log-hazard values.
log10hazard
A vector containing the estimated log10-hazard values.
SE_hazard
A vector containing the estimated SEs for the hazard.
SE_loghazard
A vector containing the estimated SEs for the log-hazard.
SE_log10hazard
A vector containing the estimated SEs for the log10-hazard.
## preparing data - no covariates dt1ts <- prepare_data( data = reccolon2ts, s_in = "entrys", s_out = "timesr", events = "status", ds = 180 ) ## fitting the model with fit1ts() mod1 <- fit1ts(dt1ts, optim_method = "LMMsolver" ) # Obtain 1d hazard get_hazard_1d_LMM(mod1) # Change grid get_hazard_1d_LMM(mod1, plot_grid = c(smin = 0, smax = 2730, ds = 30) )
## preparing data - no covariates dt1ts <- prepare_data( data = reccolon2ts, s_in = "entrys", s_out = "timesr", events = "status", ds = 180 ) ## fitting the model with fit1ts() mod1 <- fit1ts(dt1ts, optim_method = "LMMsolver" ) # Obtain 1d hazard get_hazard_1d_LMM(mod1) # Change grid get_hazard_1d_LMM(mod1, plot_grid = c(smin = 0, smax = 2730, ds = 30) )
get_hazard_2d()
takes as input the results of a model
estimated by fit2ts
and it returns the estimated smooth log-hazard
and the smooth hazard together with their standard errors.
It is possible to provide values that define a new grid for evaluation of the estimated hazard. If not specified, the hazard is evaluated on the same grid used for the binning of the data, and therefore the estimation of the model. The function will check if the parameters for the new grid provided by the user are compatible with those originally used to construct the B-splines for estimating the model. If not, the grid will be adjusted accordingly and a warning will be returned.
get_hazard_2d( fitted_model, plot_grid = NULL, where_slices = NULL, direction = c("u", "s", NULL), tmax = NULL, midpoints = FALSE )
get_hazard_2d( fitted_model, plot_grid = NULL, where_slices = NULL, direction = c("u", "s", NULL), tmax = NULL, midpoints = FALSE )
fitted_model |
is an object of class |
plot_grid |
(optional) A list containing the parameters to build a new
finer grid of intervals over u and s for plotting. This must be of the
form: |
where_slices |
A vector of values for the cutting points of the desired
slices of the surface. If |
direction |
If |
tmax |
The maximum value of |
midpoints |
A Boolean. Default is |
A list with the following elements:
new_plot_grid
A list of parameters that specify the new grid, of the form
list("intu", "umin", "umax", "du", "ints", "smin", "smax", "ds")
nBu
The B-spline basis for u
, evaluated over the new grid.
nBs
The B-spline basis for s
, evaluated over the new grid.
hazard
A matrix containing the estimated hazard values.
loghazard
A matrix containing the estimated log-hazard values.
log10hazard
A matrix containing the estimated log10-hazard values.
SE_hazard
A matrix containing the estimated SEs for the hazard.
SE_loghazard
A matrix containing the estimated SEs for the log-hazard.
SE_log10haz
A matrix containing the estimated SEs for the log10-hazard.
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1)#' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(data = fakedata, u = "u", s_out = "s", ev = "ev", ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1 ,1.5 ,.5), seq(1 ,1.5 ,.5))) # Obtain 2d hazard get_hazard_2d(fakemod) get_hazard_2d(fakemod, plot_grid = list(c(umin = 3, umax = 8.5, du = .1), c(smin = 0, smax = 7.1, ds = .1)))
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1)#' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(data = fakedata, u = "u", s_out = "s", ev = "ev", ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1 ,1.5 ,.5), seq(1 ,1.5 ,.5))) # Obtain 2d hazard get_hazard_2d(fakemod) get_hazard_2d(fakemod, plot_grid = list(c(umin = 3, umax = 8.5, du = .1), c(smin = 0, smax = 7.1, ds = .1)))
get_hazard_2d_LMM()
takes as input an object of class 'haz2tsLMM'
and it returns the estimated smooth log-hazard, the log10-hazard and the
hazard surface together with their standard errors.
It is possible to provide values that define a new grid for evaluation of the estimated hazard. If not specified, the hazard is evaluated on the same grid used for the binning of the data, and therefore the estimation of the model. The function will check if the parameters for the new grid provided by the user are compatible with those originally used to construct the B-splines for estimating the model. If not, the grid will be adjusted accordingly and a warning will be returned.
get_hazard_2d_LMM( fitted_model, plot_grid = NULL, where_slices = NULL, direction = c("u", "s", NULL), tmax = NULL )
get_hazard_2d_LMM( fitted_model, plot_grid = NULL, where_slices = NULL, direction = c("u", "s", NULL), tmax = NULL )
fitted_model |
is an object of class |
plot_grid |
A list containing the parameters to build a new
finer grid of intervals over |
where_slices |
A vector of values for the cutting points of the desired
slices of the surface. If |
direction |
If |
tmax |
The maximum value of |
A list with the following elements:
new_plot_grid
A list of parameters that specify the new grid, of the form
list("intu", "umin", "umax", "du", "ints", "smin", "smax", "ds")
hazard
A matrix containing the estimated hazard values.
loghazard
A matrix containing the estimated log-hazard values.
log10hazard
A matrix containing the estimated log10-hazard values.
SE_hazard
A matrix containing the estimated SEs for the hazard.
SE_loghazard
A matrix containing the estimated SEs for the log-hazard.
SE_log10haz
A matrix containing the estimated SEs for the log10-hazard.
# Create some fake data - the bare minimum id <- 1:20 u <- c( 5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96 ) s <- c( 0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00 ) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1) #' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(data = fakedata, u = "u", s_out = "s", ev = "ev", ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "LMMsolver" ) # Get hazard get_hazard_2d_LMM(fakemod) # Use a finer grid of points get_hazard_2d_LMM(fakemod, plot_grid = list(c(umin = 3, umax = 8.5, du = .1), c(smin = 0, smax = 7.1, ds = .1)))
# Create some fake data - the bare minimum id <- 1:20 u <- c( 5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96 ) s <- c( 0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00 ) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1) #' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(data = fakedata, u = "u", s_out = "s", ev = "ev", ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "LMMsolver" ) # Get hazard get_hazard_2d_LMM(fakemod) # Use a finer grid of points get_hazard_2d_LMM(fakemod, plot_grid = list(c(umin = 3, umax = 8.5, du = .1), c(smin = 0, smax = 7.1, ds = .1)))
get_hr()
takes as input the results of a model with covariates
estimated by fit2ts
or fit1ts
and returns the estimated hazard ratios
together with their standard errors.
get_hr(fitted_model)
get_hr(fitted_model)
fitted_model |
A list returned by the function |
A list with the following elements:
HR
A vector of hazard ratios (calculated as ).
SE_HR
A vector of Standard Errors for the hazard ratios calculated
via the delta method.
beta
A vector of the estimated coefficients.
SE_beta
A vector of the Standard Errors for the beta coefficients.
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1) x1 <- c(0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0) fakedata <- as.data.frame(cbind(id, u, s, ev, x1)) fakedata2ts <- prepare_data(data = fakedata, u = "u", s_out = "s", ev = "ev", ds = .5, individual = TRUE, covs = "x1") # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1, 1.5, .5), seq(1, 1.5, .5))) get_hr(fakemod)
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1) x1 <- c(0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0) fakedata <- as.data.frame(cbind(id, u, s, ev, x1)) fakedata2ts <- prepare_data(data = fakedata, u = "u", s_out = "s", ev = "ev", ds = .5, individual = TRUE, covs = "x1") # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1, 1.5, .5), seq(1, 1.5, .5))) get_hr(fakemod)
getAIC_BIC_LMM
is an utility function that takes an object of class
'LMMsolve'
fitted via fit1ts()
or fit2ts()
and calculates
AIC, BIC and ED.
getAIC_BIC_LMM(fit, offset)
getAIC_BIC_LMM(fit, offset)
fit |
An object of class |
offset |
The vector of exposure times from dataLMM |
A list with:
* ED
effective dimension of the full model;
* EDbase
effective dimension of the baseline hazard only;
* Dev
deviance;
* AIC
the aic;
* BIC
the bic;
* n_beta
the number of estimated covariate parameters (if PH model).
GLAM_1d_covariates()
fits a GLAM for the hazard with one time
scale, with covariates.
GLAM_1d_covariates( R, Y, Bs, Z = Z, Wprior = NULL, P, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE) )
GLAM_1d_covariates( R, Y, Bs, Z = Z, Wprior = NULL, P, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE) )
R |
A 2d-array of dimensions ns by n containing exposure times. |
Y |
A 2d-array of dimensions ns by n containing event indicators. |
Bs |
A matrix of B-splines for the |
Z |
A regression matrix of covariates values of dimensions n by p. |
Wprior |
An optional vector of length ns of a-priori weights. |
P |
The penalty matrix of dimension cs by cs. |
control_algorithm |
A list with optional values for the parameters of
iterative processes:
* |
A list with the following elements:
alpha
The vector of estimated P-splines coefficients of length cs.
SE_alpha
The vector of estimated Standard Errors for the alpha
coefficients,
of length cs.
beta
The vector of length p of estimated covariates coefficients.
se_beta
The vector of length p of estimated Standard Errors for the beta
coefficients.
eta0
The vector of values of the baseline linear predictor (log-hazard).
H
The hat-matrix.
Cov
The full variance-covariance matrix.
deviance
The deviance.
ed
The effective dimension of the model.
aic
The value of the AIC.
bic
The value of the BIC.
Bbases
a list with the B-spline basis Bs
(this is a list for
compatibility with functions in 2d).
GLAM_2d_covariates()
fits a GLAM for the hazard with two time
scales, with covariates.
GLAM_2d_covariates( R, Y, Bu, Bs, Z, Wprior = NULL, P, ridge = 0, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE) )
GLAM_2d_covariates( R, Y, Bu, Bs, Z, Wprior = NULL, P, ridge = 0, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE) )
R |
A 3d-array of dimensions nu by ns by n containing exposure times. |
Y |
A 3d-array of dimensions nu by ns by n containing event indicators. |
Bu |
A matrix of B-splines for the |
Bs |
A matrix of B-splines for the |
Z |
(optional) A regression matrix of covariates values of dimensions n by p. |
Wprior |
An optional matrix of a-priori weights. |
P |
The penalty matrix of dimension cucs by cucs. |
ridge |
A ridge penalty parameter: default is 0. |
control_algorithm |
A list with optional values for the parameters of the iterative processes:
|
A list with the following elements:
Alpha
The matrix of estimated P-splines coefficients of dimension
cu by cs.
Cov_alpha
The variance-covariance matrix of the Alpha
coefficients,
of dimension cucs by cucs.
beta
The vector of length p of estimated covariates coefficients.
Cov_beta
The variance-covariance matrix of the beta
coefficients,
of dimension p by p.
SE_beta
The vector of length p of estimated Standard Errors for the beta
coefficients.
Eta0
The matrix of values of the baseline linear predictor (log-hazard)
of dimension nu by ns.
H
The hat-matrix.
deviance
The deviance.
ed
The effective dimension of the model.
aic
The value of the AIC.
bic
The value of the BIC.
Bbases
a list with the B-spline bases Bu
and Bs
.
GLAM_2d_no_covariates()
fits a GLAM for the hazard with two time
scales, without covariates.
GLAM_2d_no_covariates( R, Y, Bu, Bs, Wprior = NULL, P, ridge = 0, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE) )
GLAM_2d_no_covariates( R, Y, Bu, Bs, Wprior = NULL, P, ridge = 0, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE) )
R |
A matrix of exposure times of dimension nu by ns. |
Y |
A matrix of event counts of dimension nu by ns. |
Bu |
A matrix of B-splines for the |
Bs |
A matrix of B-splines for the |
Wprior |
An optional matrix of a-priori weights. |
P |
The penalty matrix of dimension cucs by cucs. |
ridge |
A ridge penalty parameter: default is 0. |
control_algorithm |
A list with optional values for the parameters of the iterative processes:
|
A list with the following elements:
Alpha
The matrix of estimated P-splines coefficients of dimension
cu by cs.
Cov_alpha
The variance-covariance matrix of the Alpha
coefficients,
of dimension cucs by cucs.
Eta0
The matrix of values of the baseline linear predictor (log-hazard)
of dimension nu by ns.
H
The hat-matrix.
deviance
The deviance.
ed
The effective dimension of the model.
aic
The value of the AIC.
bic
The value of the BIC.
Bbases
a list with the B-spline bases Bu
and Bs
.
grid_search_1d()
performs a grid search for the minimum
AIC or BIC of the one time scale model.
It finds the optimal values of log_10(rho_s)
and returns the estimated
optimal model.
grid_search_1d( r, y, Z = NULL, lrho, Bs, Ds, Wprior = NULL, optim_criterion = c("aic", "bic"), control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE, monitor_ev = FALSE), par_gridsearch = list(plot_aic = FALSE, plot_bic = FALSE, return_aic = TRUE, return_bic = TRUE, mark_optimal = TRUE, main_aic = "AIC grid", main_bic = "BIC grid") )
grid_search_1d( r, y, Z = NULL, lrho, Bs, Ds, Wprior = NULL, optim_criterion = c("aic", "bic"), control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE, monitor_ev = FALSE), par_gridsearch = list(plot_aic = FALSE, plot_bic = FALSE, return_aic = TRUE, return_bic = TRUE, mark_optimal = TRUE, main_aic = "AIC grid", main_bic = "BIC grid") )
r |
A vector of exposure times of length ns, or an array of dimension ns by n. |
y |
A vector of event counts of length ns, or an array of dimension ns by n. |
Z |
(optional) A regression matrix of covariates of dimension n by p. |
lrho |
A vector of |
Bs |
A matrix of B-splines for the time scale |
Ds |
The difference matrix of the penalty. |
Wprior |
An optional vector of a-priori weights. |
optim_criterion |
The criterion to be used for optimization:
|
control_algorithm |
A list with optional values for the parameters of the iterative processes:
|
par_gridsearch |
A list of parameters for the grid_search:
|
An object of class h1tsfit
with the following elements:
optimal_model
A list containing the results of the optimal model.
optimal_logrho
The optimal value of log10(rho_s)
.
P_optimal
The optimal penalty matrix P.
AIC
(if par_gridsearch$return_aic == TRUE
) The vector of AIC values.
BIC
(if par_gridsearch$return_bic == TRUE
) The vector of BIC values.
Camarda, C. G. (2012). "MortalitySmooth: An R Package for Smoothing Poisson Counts with P-Splines." Journal of Statistical Software, 50(1), 1–24. https://doi.org/10.18637/jss.v050.i01
grid_search_2d()
performs a grid search for the minimum
AIC or BIC of the two time scales model.
It finds the optimal values of log_10(rho_u)
and log_10(rho_s)
and
returns the estimated optimal model.
grid_search_2d( lru, lrs, R, Y, Bu, Bs, Z = NULL, Iu, Is, Du, Ds, Wprior = NULL, ridge = 0, optim_criterion = c("aic", "bic"), control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE, monitor_ev = FALSE), par_gridsearch = list(plot_aic = FALSE, plot_bic = FALSE, return_aic = TRUE, return_bic = TRUE, col = grey.colors(n = 10), plot_contour = FALSE, mark_optimal = TRUE, main_aic = "AIC grid", main_bic = "BIC grid") )
grid_search_2d( lru, lrs, R, Y, Bu, Bs, Z = NULL, Iu, Is, Du, Ds, Wprior = NULL, ridge = 0, optim_criterion = c("aic", "bic"), control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE, monitor_ev = FALSE), par_gridsearch = list(plot_aic = FALSE, plot_bic = FALSE, return_aic = TRUE, return_bic = TRUE, col = grey.colors(n = 10), plot_contour = FALSE, mark_optimal = TRUE, main_aic = "AIC grid", main_bic = "BIC grid") )
lru |
A vector of |
lrs |
A vector of |
R |
A matrix (or 3d-array) of exposure times of dimension nu by ns (or nu by ns by n). |
Y |
A matrix (or 3d-array) of event counts of dimension nu by ns (or nu by ns by n). |
Bu |
A matrix of B-splines for the |
Bs |
A matrix of B-splines for the |
Z |
(optional) A regression matrix of covariates values of dimensions n by p. |
Iu |
An identity matrix of dimension nbu by nbu. |
Is |
An identity matrix of dimension nbs by nbs. |
Du |
The difference matrix over |
Ds |
The difference matrix over |
Wprior |
An optional matrix of a-priori weights. |
ridge |
A ridge penalty parameter: default is 0. This is useful when, in
some cases the algorithm shows convergence problems. In this case, set to a small
number, for example |
optim_criterion |
The criterion to be used for optimization:
|
control_algorithm |
A list with optional values for the parameters of the iterative processes:
|
par_gridsearch |
A list of parameters for the grid_search:
|
An object of class h2tsfit
with the following elements:
optimal_model
A list containing the results of the optimal model.
optimal_logrho
The optimal couple of log_10(rho_u)
and log_10(rho_s)
values.
P_optimal
The optimal penalty matrix P.
AIC
(if par_gridsearch$return_aic == TRUE
) The vector of AIC values.
BIC
(if par_gridsearch$return_bic == TRUE
) The vector of BIC values.
Camarda, C. G. (2012). "MortalitySmooth: An R Package for Smoothing Poisson Counts with P-Splines." Journal of Statistical Software, 50(1), 1–24. https://doi.org/10.18637/jss.v050.i01
Summary function for object of class 'haz2ts'
haz2ts_summary(x, ...)
haz2ts_summary(x, ...)
x |
an object of class 'haz2ts' returned by the function |
... |
further arguments |
a printed summary of the fitted model
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1)#' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(data = fakedata, u = "u", s_out = "s", ev = "ev", ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1 ,1.5 ,.5), seq(1 ,1.5 ,.5))) summary(fakemod)
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1)#' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(data = fakedata, u = "u", s_out = "s", ev = "ev", ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1 ,1.5 ,.5), seq(1 ,1.5 ,.5))) summary(fakemod)
Summary function for object of class 'haz2tsLMM'
haz2tsLMM_summary(x, ...)
haz2tsLMM_summary(x, ...)
x |
an object of class 'haz2tsLMM' returned by the function |
... |
further arguments |
a printed summary of the fitted model, including optimal smoothing paramters, the effective dimension ED and the AIC/BIC. For model with covariates, a regression table is also returned.
# Create some fake data - the bare minimum id <- 1:20 u <- c( 5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96 ) s <- c( 0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00 ) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1) #' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(data = fakedata, u = "u", s_out = "s", ev = "ev", ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "LMMsolver" ) summary(fakemod)
# Create some fake data - the bare minimum id <- 1:20 u <- c( 5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96 ) s <- c( 0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00 ) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1) #' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(data = fakedata, u = "u", s_out = "s", ev = "ev", ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "LMMsolver" ) summary(fakemod)
imageplot_2ts()
plots an image of the two time scales hazard (or survival or
cumulative hazard) with contour lines.
This is the default call implemented in plot.haz2ts.
imageplot_2ts(x, y, z, plot_options = list(), ...)
imageplot_2ts(x, y, z, plot_options = list(), ...)
x |
The coordinates for the x-axis. This is a vector of intervals
over the |
y |
The coordinates for the y-axis. This is a vector of intervals
over the |
z |
The values of the surface to plot, organized in a matrix with
dimensions compatible with those of |
plot_options |
A list of options for the plot:
|
... |
Further arguments to image.plot or image |
An image plot of an estimated surface.
imageplot_SE()
plots an image of the SEs of the two time scales hazard with
contour lines.
imageplot_SE(x, y, z, plot_options = list(), ...)
imageplot_SE(x, y, z, plot_options = list(), ...)
x |
The coordinates for the x-axis. This is a vector of intervals
over the |
y |
The coordinates for the y-axis. This is a vector of intervals
over the |
z |
The values of the surface to plot, organized in a matrix with
dimensions compatible with those of |
plot_options |
A list of options for the plot:
|
... |
Further arguments to image.plot or image |
An image plot of the SEs for the (log-) hazard surface.
iwls_1d()
fits the 1ts model with IWLS algorithm.
iwls_1d( r, y, Bs, P, Wprior = NULL, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE) )
iwls_1d( r, y, Bs, P, Wprior = NULL, control_algorithm = list(maxiter = 20, conv_crit = 1e-05, verbose = FALSE) )
r |
A vector of exposure times of length ns. |
y |
A vector of event counts of length ns. |
Bs |
A matrix of B-splines for the |
P |
The penalty matrix of dimension cs by cs. |
Wprior |
An optional vector of length ns of a-priori weights. |
control_algorithm |
A list with optional values for the parameters of the iterative processes:
|
A list with the following elements:
alpha
The vector of estimated P-splines coefficients of length cs.
SE_alpha
The vector of estimated Standard Errors for the alpha
coefficients,
of length cs.
H
The hat-matrix.
Cov
The full variance-covariance matrix.
deviance
The deviance.
ed
The effective dimension of the model.
aic
The value of the AIC.
bic
The value of the BIC.
Bbases
a list with the B-spline basis Bs
(this is a list for
compatibility with functions in 2d).
make_bins()
constructs the bins over the time axes and saves the extremes
of the bins in a vector.
make_bins( t_in = NULL, t_out = NULL, u = NULL, s_in = NULL, s_out, min_t = NULL, max_t = NULL, min_u = NULL, max_u = NULL, min_s = NULL, max_s = NULL, dt = NULL, du = NULL, ds )
make_bins( t_in = NULL, t_out = NULL, u = NULL, s_in = NULL, s_out, min_t = NULL, max_t = NULL, min_u = NULL, max_u = NULL, min_s = NULL, max_s = NULL, dt = NULL, du = NULL, ds )
t_in |
(optional) A vector of entry times on the time scale |
t_out |
(optional) A vector of exit times on the time scale |
u |
(optional) A vector of fixed-times at entry in the process. |
s_in |
(optional) A vector of entry times on the time scale |
s_out |
A vector of exit times on the time scale |
min_t |
(optional) A minimum value for the bins over |
max_t |
(optional) A maximum value for the bins over |
min_u |
(optional) A minimum value for the bins over |
max_u |
(optional) A maximum value for the bins over |
min_s |
(optional) A minimum value for the bins over |
max_s |
(optional) A maximum value for the bins over |
dt |
(optional) A scalar giving the length of the intervals on the |
du |
(optional) A scalar giving the length of the intervals on the |
ds |
A scalar giving the length of the intervals on the |
It allows construction of bins over the time scales t
and
s
and/or over the fixed-time axis u
. The time scale
s
is always required. See also prepare_data()
to conveniently
prepare individual data for the analysis with one, or two time scales.
A few words about constructing the grid of bins. There is no 'golden rule' or
optimal strategy for setting the number of bins over each time axis, or deciding
on the bins' width. It very much depends on the data structure, however, we
try to give some directions here. First, in most cases, more bins is better
than less bins. A good number is about 30 bins.
However, if data are scarce, the user might want to find a compromise between
having a larger number of bins, and having many bins empty.
Second, the chosen width of the bins (that is du
and ds
) does depend on
the time unit over which the time scales are measured. For example, if the time
is recorded in days, as in the example below, and several years of follow-up
are available, the user can split the data in bins of width 30 (corresponding
to about one month), 60 (about two months), 90 (about three months), etc.
If the time scale is measured in years, then appropriate width could be 0.25
(corresponding to a quarter of a year), or 0.5 (that is half year). However,
in some cases, time might be measure in completed years, as is often the case
for age. In this scenario, an appropriate bin width is 1.
Finally, it is always a good idea to plot your data first, and explore the range
of values over which the time scale(s) are recorded. This will give insight
about reasonable values for the arguments min_s
, min_u
, max_s
and max_u
(that in any case are optional).
A list with the following elements:
bins_t
if t_out
is provided, this is a vector of bins extremes for the time scale t
midt
if t_out
is provided, this is a vector with the midpoints of the bins over t
nt
if t_out
is provided, this is the number of bins over t
bins_u
if u
is provided, this is a vector of bins extremes for u
axis
midu
if u
is provided, this is a vector with the midpoints of the bins over u
nu
if u
is provided, this is the number of bins over u
bins_s
is a vector of bins extremes for the time scale s
mids
is a vector with the midpoints of the bins over s
ns
is the number of bins over s
# Make bins for colon cancer data by time at randomization and time since recurrence bins <- make_bins(u = reccolon2ts$timer, s_out = reccolon2ts$timesr, du = 30, ds = 30) # Make bins for colon cancer data only over time since recurrence bins <- make_bins(s_out = reccolon2ts$timesr, ds = 60)
# Make bins for colon cancer data by time at randomization and time since recurrence bins <- make_bins(u = reccolon2ts$timer, s_out = reccolon2ts$timesr, du = 30, ds = 30) # Make bins for colon cancer data only over time since recurrence bins <- make_bins(s_out = reccolon2ts$timesr, ds = 60)
plot_slices()
plots slices of the (log-)hazard with two time
scales, at selected values of one of the two time dimensions.
plot_slices(x, y, direction, plot_options = list())
plot_slices(x, y, direction, plot_options = list())
x |
A vector of values for the x-axis. This is a vector of values over the axis opposite to the one where the sliced are cut. |
y |
A matrix of (log-)hazard values. |
direction |
Either |
plot_options |
A list of options for the plot:
|
A plot of the slices of the hazard cut at selected points.
plot.haz1ts()
is a plot method for objects of class haz1ts
.
## S3 method for class 'haz1ts' plot( x, which_plot = c("hazard", "covariates"), plot_grid = NULL, plot_options = list(), ... )
## S3 method for class 'haz1ts' plot( x, which_plot = c("hazard", "covariates"), plot_grid = NULL, plot_options = list(), ... )
x |
The output of the function |
which_plot |
The type of plot required. Can be one of |
plot_grid |
(optional) A named vector containing the parameters to build a new
grid of intervals over |
plot_options |
A list with all possible options for any of the plots:
|
... |
Further arguments to plot. |
A plot of the type required.
## preparing data - no covariates dt1ts <- prepare_data(data = reccolon2ts, s_in = "entrys", s_out = "timesr", events = "status", ds = 180) ## fitting the model with fit1ts() - default options mod1 <- fit1ts(dt1ts) plot(mod1)
## preparing data - no covariates dt1ts <- prepare_data(data = reccolon2ts, s_in = "entrys", s_out = "timesr", events = "status", ds = 180) ## fitting the model with fit1ts() - default options mod1 <- fit1ts(dt1ts) plot(mod1)
plot.haz1tsLMM()
is a plot method for objects of class haz1tsLMM
.
## S3 method for class 'haz1tsLMM' plot( x, which_plot = c("hazard", "covariates"), plot_grid = NULL, plot_options = list(), ... )
## S3 method for class 'haz1tsLMM' plot( x, which_plot = c("hazard", "covariates"), plot_grid = NULL, plot_options = list(), ... )
x |
The output of the function |
which_plot |
The type of plot required. Can be one of |
plot_grid |
(optional) A named vector containing the parameters to build a new
grid of intervals over |
plot_options |
A list with all possible options for any of the plots:
|
... |
Further arguments to plot. |
The function obtainSmoothTrend
from the R-package LMMsolver
is
used here. We refer the interested readers to https://biometris.github.io/LMMsolver/
for more details on LMMsolver
and its usage.
A plot of the type required.
## preparing data - no covariates dt1ts <- prepare_data(data = reccolon2ts, s_in = "entrys", s_out = "timesr", events = "status", ds = 180) ## fitting the model with fit1ts() - default options mod1 <- fit1ts(dt1ts, optim_method = "LMMsolver") plot(mod1)
## preparing data - no covariates dt1ts <- prepare_data(data = reccolon2ts, s_in = "entrys", s_out = "timesr", events = "status", ds = 180) ## fitting the model with fit1ts() - default options mod1 <- fit1ts(dt1ts, optim_method = "LMMsolver") plot(mod1)
plot.haz2ts()
is the plot method for objects of class haz2ts
.
It produces several kinds of plots of the fitted model with two
time scales (see fit2ts()
), either in the original (t,s) plane, while respecting the
constraint imposed by the relation of the two time scales, or in the
transformed (u,s) plane.
## S3 method for class 'haz2ts' plot( x, plot_grid = NULL, which_plot = c("hazard", "covariates", "SE", "slices", "survival", "cumhaz"), where_slices = NULL, direction = c(NULL, "u", "s"), plot_options = list(), ... )
## S3 method for class 'haz2ts' plot( x, plot_grid = NULL, which_plot = c("hazard", "covariates", "SE", "slices", "survival", "cumhaz"), where_slices = NULL, direction = c(NULL, "u", "s"), plot_options = list(), ... )
x |
The output of the function |
plot_grid |
(optional) A list containing the parameters to build a new
finer grid of intervals over u and s for plotting. This must be of the
form: |
which_plot |
The type of plot required. Can be one of |
where_slices |
A vector of values for the cutting points of the desired
slices of the surface. If |
direction |
If |
plot_options |
A list with all possible options for any of the plots:
|
... |
Further arguments to image.plot or image |
The vignette "visualization" presents and discusses all the different
plotting options for the fitted model over two time scales.
In most of the cases, the user will want to visualize the hazard surface over
the two time scales. This can be plotted on the hazard scale, the log-hazard
scale or the log10-hazard scale, by switching to TRUE
the corresponding
argument in plot_options
.
The survival and cumulative hazard functions can be plotted as two-dimensional
surfaces over u
and s
or t
and s
. However, it is also very informative
to plot them as one-dimensional curves over s
(cross-sections or slices).
This is done by selecting which_plot = "survival"
and surv_slices = TRUE
in plot_options
. Additionally, a vector of values for the cutting points
over the u
-axis should be passed to the argument where_slices
, together
with setting direction = u
.
Similar plot is obtained for the cumulative hazard by selecting which_plot = "cumhaz"
,
cumhaz_slices = TRUE
, see examples section.
Please, notice that for the survival function and the cumulative hazard, only
cross-sections of the surface for selected values of u
(over the s
time)
can be plotted.
A plot of the fitted model.
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1)#' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(u = fakedata$u, s_out = fakedata$s, ev = fakedata$ev, ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1 ,1.5 ,.5), seq(1 ,1.5 ,.5))) # plot the hazard surface plot(fakemod) # plot the survival function as one-dimension curves over `s` plot(fakemod, which_plot = "survival", direction = "u", where_slices = c(4, 6, 8), plot_options = list( surv_slices = TRUE )) # Create a color pallete function from a RColorBrewer palette, using the function # colorRampPalette from grDevices. ## Not run: mypal <- function(n) { colorRampPalette(RColorBrewer::brewer.pal(9, "YlGnBu"))(n) } # if mod_haz is a fitted model of class `haz2ts`, the following code will # produce a cross-sections plot of the hazard over `s` for selected values # of `u`, with the palette specified above plot(mod_haz, which_plot = "slices", where_slices = c(30, 60, 90, 180, 365, 1000, 2000), direction = "u", plot_options = list( col_palette = mypal, main = "Cross-sections of the hazard", xlab = "Time since recurrence", ylab = "Hazard" ) ) ## End(Not run)
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1)#' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(u = fakedata$u, s_out = fakedata$s, ev = fakedata$ev, ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1 ,1.5 ,.5), seq(1 ,1.5 ,.5))) # plot the hazard surface plot(fakemod) # plot the survival function as one-dimension curves over `s` plot(fakemod, which_plot = "survival", direction = "u", where_slices = c(4, 6, 8), plot_options = list( surv_slices = TRUE )) # Create a color pallete function from a RColorBrewer palette, using the function # colorRampPalette from grDevices. ## Not run: mypal <- function(n) { colorRampPalette(RColorBrewer::brewer.pal(9, "YlGnBu"))(n) } # if mod_haz is a fitted model of class `haz2ts`, the following code will # produce a cross-sections plot of the hazard over `s` for selected values # of `u`, with the palette specified above plot(mod_haz, which_plot = "slices", where_slices = c(30, 60, 90, 180, 365, 1000, 2000), direction = "u", plot_options = list( col_palette = mypal, main = "Cross-sections of the hazard", xlab = "Time since recurrence", ylab = "Hazard" ) ) ## End(Not run)
plot.haz2tsLMM()
is the plot method for objects of class haz2tsLMM
.
It produces plots of the fitted model with two time scales (see fit2ts()
),
fitted via LMMsolver. The two-dimensional plots are limited to the transformed
plane, that is only plots over u
and s
axes are produced.
## S3 method for class 'haz2tsLMM' plot( x, plot_grid = NULL, which_plot = c("hazard", "covariates", "SE", "slices", "survival", "cumhaz"), where_slices = NULL, direction = c(NULL, "u", "s"), plot_options = list(), ... )
## S3 method for class 'haz2tsLMM' plot( x, plot_grid = NULL, which_plot = c("hazard", "covariates", "SE", "slices", "survival", "cumhaz"), where_slices = NULL, direction = c(NULL, "u", "s"), plot_options = list(), ... )
x |
The output of the function |
plot_grid |
A list containing the parameters to build a new
finer grid of intervals over |
which_plot |
The type of plot required. Can be one of |
where_slices |
A vector of values for the cutting points of the desired
slices of the surface. If |
direction |
If |
plot_options |
A list with all possible options for any of the plots:
|
... |
Further arguments to image.plot or image |
The vignette "visualization" presents and discusses all the different
plotting options for the fitted model over two time scales.
In most of the cases, the user will want to visualize the hazard surface over
the two time scales. This can be plotted on the hazard scale, the log-hazard
scale or the log10-hazard scale, by switching to TRUE
the corresponding
argument in plot_options
.
The survival and cumulative hazard functions can be plotted as two-dimensional
surfaces over u
and s
or t
and s
. However, it is also very informative
to plot them as one-dimensional curves over s
(cross-sections or slices).
This is done by selecting which_plot = "survival"
and surv_slices = TRUE
in plot_options
. Additionally, a vector of values for the cutting points
over the u
-axis should be passed to the argument where_slices
, together
with setting direction = u
.
Similar plot is obtained for the cumulative hazard by selecting which_plot = "cumhaz"
,
cumhaz_slices = TRUE
, see examples section.
Please, notice that for the survival function and the cumulative hazard, only
cross-sections of the surface for selected values of u
(over the s
time)
can be plotted.
The function obtainSmoothTrend
from the R-package LMMsolver
is
used here. We refer the interested readers to https://biometris.github.io/LMMsolver/
for more details on LMMsolver
and its usage.
A plot of the fitted model.
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1)#' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(u = fakedata$u, s_out = fakedata$s, ev = fakedata$ev, ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "LMMsolver") # plot the hazard surface plot(fakemod) # plot the survival function as one-dimension curves over `s` plot(fakemod, which_plot = "survival", direction = "u", where_slices = c(4, 6, 8), plot_options = list( surv_slices = TRUE )) # Create a color pallete function from a RColorBrewer palette, using the function # colorRampPalette from grDevices. ## Not run: mypal <- function(n) { colorRampPalette(RColorBrewer::brewer.pal(9, "YlGnBu"))(n) } # if mod_haz is a fitted model of class `haz2ts`, the following code will # produce a cross-sections plot of the hazard over `s` for selected values # of `u`, with the palette specified above plot(mod_haz, which_plot = "slices", where_slices = c(30, 60, 90, 180, 365, 1000, 2000), direction = "u", plot_options = list( col_palette = mypal, main = "Cross-sections of the hazard", xlab = "Time since recurrence", ylab = "Hazard" ) ) ## End(Not run)
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1)#' fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(u = fakedata$u, s_out = fakedata$s, ev = fakedata$ev, ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "LMMsolver") # plot the hazard surface plot(fakemod) # plot the survival function as one-dimension curves over `s` plot(fakemod, which_plot = "survival", direction = "u", where_slices = c(4, 6, 8), plot_options = list( surv_slices = TRUE )) # Create a color pallete function from a RColorBrewer palette, using the function # colorRampPalette from grDevices. ## Not run: mypal <- function(n) { colorRampPalette(RColorBrewer::brewer.pal(9, "YlGnBu"))(n) } # if mod_haz is a fitted model of class `haz2ts`, the following code will # produce a cross-sections plot of the hazard over `s` for selected values # of `u`, with the palette specified above plot(mod_haz, which_plot = "slices", where_slices = c(30, 60, 90, 180, 365, 1000, 2000), direction = "u", plot_options = list( col_palette = mypal, main = "Cross-sections of the hazard", xlab = "Time since recurrence", ylab = "Hazard" ) ) ## End(Not run)
prepare_data()
prepares the raw individual time-to-event data
for hazard estimation in 1d or 2d.
Given the raw data, this function first constructs the bins over one or two time axes and then computes the aggregated (or individual) vectors or matrices of exposure times and events indicators. A data.frame with covariates values can be provided by the user.
prepare_data( data = NULL, t_in = NULL, t_out = NULL, u = NULL, s_in = NULL, s_out, events, min_t = NULL, max_t = NULL, min_u = NULL, max_u = NULL, min_s = NULL, max_s = NULL, dt = NULL, du = NULL, ds, individual = FALSE, covs = NULL )
prepare_data( data = NULL, t_in = NULL, t_out = NULL, u = NULL, s_in = NULL, s_out, events, min_t = NULL, max_t = NULL, min_u = NULL, max_u = NULL, min_s = NULL, max_s = NULL, dt = NULL, du = NULL, ds, individual = FALSE, covs = NULL )
data |
A data frame. |
t_in |
(optional) A vector of entry times on the time scale |
t_out |
(optional) A vector of exit times on the time scale |
u |
(optional) A vector of fixed-times at entry in the process. |
s_in |
(optional) A vector of entry times on the time scale |
s_out |
A vector of exit times on the time scale |
events |
A vector of event's indicators (possible values 0/1). |
min_t |
(optional) A minimum value for the bins over |
max_t |
(optional) A maximum value for the bins over |
min_u |
(optional) A minimum value for the bins over |
max_u |
(optional) A maximum value for the bins over |
min_s |
(optional) A minimum value for the bins over |
max_s |
(optional) A maximum value for the bins over |
dt |
(optional) A scalar giving the length of the intervals on the |
du |
(optional) A scalar giving the length of the intervals on the |
ds |
A scalar giving the length of the intervals on the |
individual |
A Boolean. Default is |
covs |
A data.frame with the variables to be used as covariates.
The function will create dummy variables for any factor variable passed as argument in |
A few words about constructing the grid of bins.
Bins are containers for the individual data. There is no 'golden rule' or
optimal strategy for setting the number of bins over each time axis, or deciding
on the bins' width. It very much depends on the data structure, however, we
try to give some directions here. First, in most cases, more bins is better
than less bins. A good number is about 30 bins.
However, if data are scarce, the user might want to find a compromise between
having a larger number of bins, and having many bins empty.
Second, the chosen width of the bins (that is du
and ds
) does depend on
the time unit over which the time scales are measured. For example, if the time
is recorded in days, as in the example below, and several years of follow-up
are available, the user can split the data in bins of width 30 (corresponding
to about one month), 60 (about two months), 90 (about three months), etc.
If the time scale is measured in years, then appropriate width could be 0.25
(corresponding to a quarter of a year), or 0.5 (that is half year). However,
in some cases, time might be measure in completed years, as is often the case
for age. In this scenario, an appropriate bin width is 1.
Finally, it is always a good idea to plot the data first, and explore the range
of values over which the time scale(s) are recorded. This will give insight
about reasonable values for the arguments min_s
, min_u
, max_s
and max_u
(that in any case are optional).
Regarding names of covariates or levels of categorical covariates/factors: When using "LMMsolver" to fit a model with covariates that which have names (or factor labels) including a symbol such as "+", "-", "<" or ">" will result in an error. To avoid this, the responsible names (labels) will be rewritten without mathematical symbols. For example: "Lev+5FU" (in the colon cancer data) is replaced by "Lev&5FU".
A list with the following elements:
bins
a list:
bins_t
if t_out
is provided, this is a vector of bins extremes for the time scale t
.
mid_t
if t_out
is provided, this is a vector with the midpoints of the bins over t
.
nt
if t_out
is provided, this is the number of bins over t
.
bins_u
if u
is provided, this is a vector of bins extremes for u
axis.
midu
if u
is provided, this is a vector with the midpoints of the bins over u
.
nu
if u
is provided, this is the number of bins over u
.
bins_s
is a vector of bins extremes for the time scale s
.
mids
is a vector with the midpoints of the bins over s
.
ns
is the number of bins over s
.
bindata
:
r
or R
an array of exposure times: if binning the data over
one time scale only this is a vector.
If binning the data over two time scales and if individual == TRUE
then R
is an array of dimension nu by ns by n, otherwise it is an
array of dimension nu by ns
y
or Y
an array of event counts: if binning the data over one time
scale only this is a vector.
If binning the data over two time scales and if individual == TRUE
then Y
is an array of dimension nu by ns by n, otherwise it is an
array of dimension nu by ns
Z
A matrix of covariates' values to be used in the model,
of dimension n by p
Angela Carollo [email protected]
# Bin data over s = time since recurrence only, with intervals of length 30 days # aggregated data (no covariates) # The following example provide the vectors of data directly from the dataset binned_data <- prepare_data(s_out = reccolon2ts$timesr, events = reccolon2ts$status, ds = 30) # Visualize vector of event counts print(binned_data$bindata$y) # Visualize midpoints of the bins print(binned_data$bins$mids) # Visualize number of bins print(binned_data$bins$ns) # Now, the same thing is done by providing a dataset and the name of all relevant variables binned_data <- prepare_data(data = reccolon2ts, s_out = "timesr", events = "status", ds = 30) # Visualize vector of event counts print(binned_data$bindata$y) # Now using ds = .3 and the same variable measured in years binned_data <- prepare_data(s_out = reccolon2ts$timesr_y, events = reccolon2ts$status, ds = .3) # Visualize vector of exposure timess print(binned_data$bindata$r) # Bin data over u = time at recurrence and s = time since recurrence, measured in days # aggregated data (no covariates) # Note that if we do not provide du this is taken to be equal to ds binned_data <- prepare_data( u = reccolon2ts$timer, s_out = reccolon2ts$timesr, events = reccolon2ts$status, ds = 30 ) # Visualize matrix of event counts print(binned_data$bindata$Y) # Visualize midpoints of bins over u print(binned_data$bins$midu) # Bin data over u = time at recurrence and s = time since recurrence, measured in day # individual-level data required # we provide two covariates: nodes (numerical) and rx (factor) covs <- subset(reccolon2ts, select = c("nodes", "rx")) binned_data <- prepare_data( u = reccolon2ts$timer, s_out = reccolon2ts$timesr, events = reccolon2ts$status, ds = 30, individual = TRUE, covs = covs ) # Visualize structure of binned data print(str(binned_data$bindata)) # Alternatevely: binned_data <- prepare_data( data = reccolon2ts, u = "timer", s_out = "timesr", events = "status", ds = 30, individual = TRUE, covs = c("nodes", "rx") )
# Bin data over s = time since recurrence only, with intervals of length 30 days # aggregated data (no covariates) # The following example provide the vectors of data directly from the dataset binned_data <- prepare_data(s_out = reccolon2ts$timesr, events = reccolon2ts$status, ds = 30) # Visualize vector of event counts print(binned_data$bindata$y) # Visualize midpoints of the bins print(binned_data$bins$mids) # Visualize number of bins print(binned_data$bins$ns) # Now, the same thing is done by providing a dataset and the name of all relevant variables binned_data <- prepare_data(data = reccolon2ts, s_out = "timesr", events = "status", ds = 30) # Visualize vector of event counts print(binned_data$bindata$y) # Now using ds = .3 and the same variable measured in years binned_data <- prepare_data(s_out = reccolon2ts$timesr_y, events = reccolon2ts$status, ds = .3) # Visualize vector of exposure timess print(binned_data$bindata$r) # Bin data over u = time at recurrence and s = time since recurrence, measured in days # aggregated data (no covariates) # Note that if we do not provide du this is taken to be equal to ds binned_data <- prepare_data( u = reccolon2ts$timer, s_out = reccolon2ts$timesr, events = reccolon2ts$status, ds = 30 ) # Visualize matrix of event counts print(binned_data$bindata$Y) # Visualize midpoints of bins over u print(binned_data$bins$midu) # Bin data over u = time at recurrence and s = time since recurrence, measured in day # individual-level data required # we provide two covariates: nodes (numerical) and rx (factor) covs <- subset(reccolon2ts, select = c("nodes", "rx")) binned_data <- prepare_data( u = reccolon2ts$timer, s_out = reccolon2ts$timesr, events = reccolon2ts$status, ds = 30, individual = TRUE, covs = covs ) # Visualize structure of binned data print(str(binned_data$bindata)) # Alternatevely: binned_data <- prepare_data( data = reccolon2ts, u = "timer", s_out = "timesr", events = "status", ds = 30, individual = TRUE, covs = c("nodes", "rx") )
Process data to fit model with LMMsolver
prepare_data_LMMsolver(Y = Y, R = R, Z = NULL, bins = bins)
prepare_data_LMMsolver(Y = Y, R = R, Z = NULL, bins = bins)
Y |
A matrix (or 3d-array) of event counts of dimension nu by ns (or nu by ns by n). |
R |
A matrix (or 3d-array) of exposure times of dimension nu by ns (or nu by ns by n). |
Z |
(optional) A regression matrix of covariates values of dimensions n by p. |
bins |
a list with the specification for the bins. This is created by
the function |
A dataset in long form to fit the model with LMMsolver
data2ts
objectPrint method for an object of class data2ts
## S3 method for class 'data2ts' print(x, ...)
## S3 method for class 'data2ts' print(x, ...)
x |
of class |
... |
Further arguments to print |
No return value
Angela Carollo [email protected]
# Bin the colon cancer data over s (time since recurrence) dt1ts <- prepare_data(data = reccolon2ts, s_in = "entrys", s_out = "timesr", events = "status", ds = 180) print(dt1ts) # Bin the colon cancer data over u (time at recurrence) and s (time since recurrence) dt2ts <- prepare_data(data = reccolon2ts, u = "timer", s_in = "entrys", s_out = "timesr", events = "status", ds = 180) print(dt2ts)
# Bin the colon cancer data over s (time since recurrence) dt1ts <- prepare_data(data = reccolon2ts, s_in = "entrys", s_out = "timesr", events = "status", ds = 180) print(dt1ts) # Bin the colon cancer data over u (time at recurrence) and s (time since recurrence) dt2ts <- prepare_data(data = reccolon2ts, u = "timer", s_in = "entrys", s_out = "timesr", events = "status", ds = 180) print(dt2ts)
This dataset is a reduced version of the dataset colon from the package survival (Therneau, T.M. et al., 2022). Each observation is a transition from recurrence of colon cancer to death or censoring. The time scales are time from randomization to recurrence, time from randomization to death or censoring and time from recurrence of the cancer to death or censoring. Only observations about individuals with a recurrence of the cancer are selected. Additionally, 7 individuals with exit times from the risk set equal to entry times in the recurrence state (0 exposure time) were dropped from the sample. In the original dataset, all times of recurrence are known precisely, so that after recurrence all individuals are followed right from entry in the state, without left truncation. To be able to illustrate how to include left truncated times in the model, artificial left truncated entry in the 'recurrence' state are introduced for 40 individuals.
data(reccolon2ts)
data(reccolon2ts)
patient's id
1 for all patients
Treatment - Obs(ervation), Lev(amisole), Lev(amisole)+5-FU
1=male, 0=female
Age at transplant in years
obstruction of colon by tumor: 1=yes
perforation of colon: 1=yes
adherence to nearby organs: 1=yes
number of lymph nodes with detectable cancer
censoring status: 0=censoring, 1=event
differentiation of tumour: 1=well, 2=moderate, 3=poor
extent of local spread: 1=submucosa, 2=muscle, 3=serosa, 4=contigous structures
time from surgery to registration: 0=short, 1=long
more than 4 positive lymph nodes
2 for all patients (2=death)
time in days from randomization to death or censoring
time in days from randomization to recurrence
time in days from recurrence to death or censoring
artificial entry time on the time since recurrence scale. For most of the individual this is 0 (no left truncation). For 40 individuals a random number between 1 and the exit time on the time since recurrence scale (timesr) is simulated.
time in days from randomization to observation in the recurrence state. If the individual is observed from entry in the recurrence state this is equal to the time at recurrence. If the entry in the recurrence state is not observed from the beginning, left truncation is observed. This is not present in the original data, but has been here introduced artificially for 40 individuals. This is done by first increasing the time at recurrence by a random number between 1 and the exit time on the time since recurrence scale. Then, the time at recurrence is added to the artificial entry time.
time in years from randomization to death or censoring
time in years from randomization to recurrence
left truncated entry in the recurrence state measured in years since recurrence
left truncated entry in the recurrence state measured in years since randomization
time in years from recurrence to death or censoring
Therneau, T. (2023). A Package for Survival Analysis in R. R package version 3.5-3, https://CRAN.R-project.org/package=survival
Moertel, C.G, et al. (1995). Fluorouracil plus Levamisole as Effective Adjuvant Theraphy after Resection of Stage III Colon Carcinoma: A Final Report. Annals of Internal Medicine, 122:321-326
Moerel, C.G., et al. (1990). Levamisole and Fluorouracil for Adjvant Theraphy of Resected Colon Carcinoma. The New England Journal of Medicine, 322:352-8
data(reccolon2ts)
data(reccolon2ts)
Computes the survival matrix, that contains the probability of not
experiencing an event (of any cause) by time s
and fixed entry time u
.
The survival function can be obtained from one fitted model with only one
event type, or combining information from several cause-specific hazard
in a competing risks model. In the first case, a fitted object of class 'haz2ts'
or 'haz2tsLMM'
can be passed directly as argument to the function. In the
competing risks framework, the user should provide a list of cause-specific
cumulative hazard matrices. The function is also called internally from plot()
if the user wants to plot the cumulative hazard from a fitted model.
surv2ts( cumhaz = NULL, fitted_model = NULL, plot_grid = NULL, cause = NULL, midpoints = FALSE, where_slices = NULL, direction = c("u", "s", NULL), tmax = NULL )
surv2ts( cumhaz = NULL, fitted_model = NULL, plot_grid = NULL, cause = NULL, midpoints = FALSE, where_slices = NULL, direction = c("u", "s", NULL), tmax = NULL )
cumhaz |
(optional) a list with all the cause-specific cumulated hazard matrices (minimum one element needs to be supplied). If more than one cause-specific cumulated hazard is provided, then they should all be matrices of the same dimension. |
fitted_model |
(optional) The output of the function |
plot_grid |
(optional) A list containing the parameters to build a new
finer grid of intervals over
|
cause |
a character string with a short name for the cause (optional). |
midpoints |
A Boolean. Default is |
where_slices |
A vector of values for the cutting points of the desired
slices of the surface. This option is included mostly for the plotting function.
When using |
direction |
If cross-sectional one-dimensional curves are plotted, this
indicates whether the cutting points are located on the |
tmax |
The maximum value of |
a matrix containing the values of the survival function over s
and u
.
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1) fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(u = fakedata$u, s_out = fakedata$s, ev = fakedata$ev, ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1 , 1.5, .5), seq(1 , 1.5, .5))) # Obtain the fake cumulated hazard fakecumhaz2ts <- cumhaz2ts(fakemod) # Fake survival curve fakesurv2ts <- surv2ts(fitted_model = fakemod)
# Create some fake data - the bare minimum id <- 1:20 u <- c(5.43, 3.25, 8.15, 5.53, 7.28, 6.61, 5.91, 4.94, 4.25, 3.86, 4.05, 6.86, 4.94, 4.46, 2.14, 7.56, 5.55, 7.60, 6.46, 4.96) s <- c(0.44, 4.89, 0.92, 1.81, 2.02, 1.55, 3.16, 6.36, 0.66, 2.02, 1.22, 3.96, 7.07, 2.91, 3.38, 2.36, 1.74, 0.06, 5.76, 3.00) ev <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1) fakedata <- as.data.frame(cbind(id, u, s, ev)) fakedata2ts <- prepare_data(u = fakedata$u, s_out = fakedata$s, ev = fakedata$ev, ds = .5) # Fit a fake model - not optimal smoothing fakemod <- fit2ts(fakedata2ts, optim_method = "grid_search", lrho = list(seq(1 , 1.5, .5), seq(1 , 1.5, .5))) # Obtain the fake cumulated hazard fakecumhaz2ts <- cumhaz2ts(fakemod) # Fake survival curve fakesurv2ts <- surv2ts(fitted_model = fakemod)