####################################################################
# d009newr compute the NPMLE of distribution function from doubly #
# censored data. #
# copyright The software can be freely used and freely distributed #
# for non-commercial purposes. Please send comments, #
# bug report etc. to mai@ms.uky.edu or #
# #
# Mai Zhou #
# Department of Statistics #
# University of Kentucky #
# Lexington, KY 40506-0027 #
####################################################################
d009newr <- function(z, d, identical = rep(1, length(z)),
maxiter = 49, error = 0.00001)
{
# Written by Mai Zhou (mai@ms.uky.edu) and Li Lee (the C part). version 1.50.
# Last revision Aug. 17,1994. Gnu type copyright reserved. (free non-commercial
# use granted). There is a part of this function written in C. You should
# cut the C part to a separate file, compile and load (nrnew.o) into Splus.
#
# This function computes the NPMLE of CDF from doubly censored data
# via EM algorithm starting from an initial estimator that
# have jumps at censoring pattern of (0,2), (see below for definition)
# as well as uncensored points. When there are ties, the left (right)
# censored points are treated as happened before (after), to break tie.
# Also the last right censored observation and first left censored
# observations are changed to uncensored, in order to obtain
# a proper distribution as estimator. (though this can be modified
# easily as they are written in S language)
#
# Ref: Chang, M. N. and Yang, G. L. (1987). Strong consistency
# of a nonparametric estimator of the survival
# function with doubly censored data. Ann. Statist. 15, 1536-1547.
# See also Turnbull (1976) The empirical distribution function with
# arbitrarily gruped, censored and truncated data. JRSS B 290-295.
#
# Inputs:
# z -- a vector of length n denoting observed times, (ties permitted)
# d -- a vector of length n that contains censoring indicator:
# d= 2 or 1 or 0, (according to z being left, no, right censoring)
# identical, maxiter and error are optional input.
#
# maxiter and error are used to control the EM iterations. The iteration
# will stop whenever the difference between 2 consecutive iteration is
# less then error, or number of iteration exceeds maxiter.
#
# identical is a vector that has values either 0 or 1.
# identical[i]=1 means even if (z[i],d[i]) is identical with
# (z[j],d[j]), some j!=i, they still stay as 2 observations,
# (not 1 obs. with weight 2, which only happen if identical[i]=0
# and identical[j] =0). One reason for this may be because they have
# different covariates not shown here so that it has more flexibility
# for regression applications.
#
# Before calling the function, make sure z is a vector contain only
# real numbers and d is a vector contain only 2's or 1's or 0's.
# Finally, z and d must be of same length. I did not write built-in
# checkings, to keep things simple and save some cpu time.
#
# Output: time (of z according only to d=0,1, -1; where a -1 means those
# added z(i) for (0,2) pattern, in that case, I took the
# mid point as the time)
# status (the indicator, = 0, 1, or -1.)
# surv, jump, (these are self explaining)
# conv (the # of iteration and diff between last 2 iterations)
#
# Bug: only tested with Splus 3.1 and 3.2 on HP700 (both cc and gcc works)
# and SPARC (use cc, gcc do not work. only limited test).
#
# Example: if z=(1,2,3,4,5); d=(1,0,2,2,1) then d009newr(z,d) produce:
# $time:
# [1] 1.0 2.0 2.5 5.0 (notice the times, (3,4), coresponding
# to d=2 are removed, and time 2.5 added
# $status: since there is a (0,2) pattern at
# [1] 1 0 -1 1 times 2, 3. The status indicator of -1
# show that is is an added time )
# $surv:
# [1] 0.5000351 0.5000351 0.3333177 0.0000000
#
# $jump:
# [1] 0.4999649 0.0000000 0.1667174 0.3333177
#
# $conv:
# [1] 3.300000e+01 8.788214e-06 (32 iterations done)
# The true MLE of surv is (1/2, 1/2, 1/3, 0) at times (1,2,2.5,5).
#
niceorder <- order(z, - d) # order them as z increase. When z
sortz <- z[niceorder] # has ties, use d=2, 1, 0 to order
sortd <- d[niceorder] # the tied z values.
dupsortz <- duplicated(sortz) #see if there is dup in z. But even
argdiff <- c(1, diff(sortd)) #z's tie, if d is diff, it's not a tie
dupsortz[argdiff != 0] <- F #seems I need not dupsortz ==T &
dupsortz[identical != 0] <- F #also, do not collaps if identical !=0
sortz <- sortz[dupsortz != T] # get the unique values of sortz and
sortd <- sortd[dupsortz != T] # sortd. the weight or duplicatility
# w[i] will need to be computed later in C function
# change the last and first obs to uncensored as nesessory.
# This is tedious but must be done in order to get an estimator that is a
# proper distribution. I did not move this part into C-code since I feel
# there may be other ways of redefining the last right censored
# observations and first left censored observation. (flexibility!)
m<-length(sortd)
d01 <- sortd[sortd < 1.5]
last <- length(d01)
if(d01[last] == 0) {
i <- m
z01 <- sortz[sortd < 1.5]
while(sortd[i] != 1 && i > 0) {
#
# one more condition in while: sortz[i] >= z01[last] ?
#
if(sortd[i] == 0 && sortz[i] == z01[last])
sortd[i] <- 1
i <- i - 1
}
}
d12 <- sortd[sortd > 0.5]
if(d12[1] == 2) {
i <- 1
z12<-sortz[sortd > 0.5]
while(sortd[i] != 1 && i <= m ) {
if(sortd[i] == 2 && sortz[i] == z12[1])
sortd[i] <- 1
i <- i + 1
}
}
sur <- rep(9, length(sortz))
jum <- sur
tes <- .C("urnew",
as.double(sortz),
as.integer(sortd),
as.character(dupsortz),
as.double(sur),
as.double(jum),
as.integer(maxiter),
as.double(error),
as.integer(length(dupsortz)),
as.integer(length(sortd)),
as.integer(length(sortd[sortd>1.5])))
list(time = tes[[1]][1:tes[[9]]],
status = tes[[2]][1:tes[[9]]],
surv = tes[[4]][1:tes[[9]]],
jump = tes[[5]][1:tes[[9]]],
conv = c(tes[[6]], tes[[7]]))
}
============cut here and put the rest in a file called urnew.c=======
/*************************************************************************
Program Name : urnew.c
Programmer : Li Lee and Mai Zhou (mai@ms.uky.edu) Gnu type copyright.
Description : This program is used in Splus function d009newr, replacing
(old) d009. (On HP700 computers, compile by
cc -c -O urnew.c and then inside Splus use
> dyn.load2("urnew.o") to load it. Producing a smaller code
under HP-UX 9.01 than 9.05 (strange?))
Also, gcc seems to work OK under HP but not SPARC.
Date 1st ver. : June 9, 1993
Last revision : Aug. 17, 1994. This C program should be used with version
1.50 of the Splus program d009newr.
**************************************************************************/
/* #include */
#include
void urnew(z, d, dup, sur, jum, max, err, r, s, rs)
long *max, *r, *s, *rs;
long d[];
double *err;
double z[], sur[], jum[];
char *dup[]; /* I used char *dup and char dup[], they do not work! */
/* z, d, max and err are z012, d012, we012, maxiter and error in Splus.
dup, sur, jum are as in Splus.
Everything should ordered according to (z,-d) before calling this
function.
r is the length of dup which >= length of z or d.
s is the length of d. (and z)
rs is length of d[d=2] */
{ long i, j, h, mm, nn, en= *r, n= *s, el= *rs, num, *k, *dadd, m= *max;
long *d01;
double u, *o, *w, *wadd, *zadd, *w01, *z01, *w2, a= *err;
double ma();
void wur();
k=(void *)malloc((el+1)*sizeof(long));
o=(void *)malloc((n+1)*sizeof(double));
w=(void *)malloc((n+1)*sizeof(double));
wadd=(void *)malloc((n+el+1)*sizeof(double));
zadd=(void *)malloc((n+el+1)*sizeof(double));
dadd=(void *)malloc((n+el+1)*sizeof(long));
for(i=0;ia)
a=b;
return a;
}
/* The following function wur is to replace the Splus
function wkm. z, d, wt, sur and jum are as in wkm,
n is the length of z.
Although z did not appear, the data, d, wt, etc. are
sorted according to z value.
It computes the weighted Kaplan-Meier estimator, did not
remove the censored observation (i.e. leave the observations
as is. which is z, d, and wt. where wt do not have to sum to one.
The censoring indicator, d[i], do not have to be 0/1 value,
in fact, the only thing matters is wheather d[i]=0 or not.
The results are in sur, jum. (which is the survival prob and jump at zi)
n is included for convinience of C programing */
void wur(d, wt, sur, jum, n)
double wt[], sur[], jum[]; /* *wt, *sur, *jum[]? */
long d[], n; /* should that be *n ? long *n ? d[] ? */
/* since this is not directly interface with S */
/* may be we do not need them to be pointers */
{ long i;
double a=0.0;
for(i=0; i