#!/bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #!/bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
# README
# dumpdata.safe.predict
# helpfile.shar
# safe.predict.message1
# This archive created: Fri Mar 4 15:25:41 1994
export PATH; PATH=/bin:$PATH
if test -f 'README'
then
echo shar: over-writing existing file "'README'"
fi
cat << \SHAR_EOF > 'README'
To install the safe.predict software:
------------------------------------
mkdir .Data
mkdir .Data/.Help
cd .Data/.Help
sh ../../helpfile.shar
cd ../../
# pick one of the following
S < dumpdata.safe.predict
#Splus < dumpdata.safe.predict
Have a look at safe.predict.message1
--------------------------
Please send bug reports to:
Trevor Hastie trevor@research.att.com
1-908-582-5647 (FAX) 1-908-582-3340
rm 2C-261 AT&T Bell Labs, Murray Hill, NJ 07974
SHAR_EOF
if test -f 'dumpdata.safe.predict'
then
echo shar: over-writing existing file "'dumpdata.safe.predict'"
fi
cat << \SHAR_EOF > 'dumpdata.safe.predict'
"bs"<-
function(x, df = NULL, knots = NULL, degree = 3, intercept = F, Boundary.knots
= range(x))
{
nx <- names(x)
x <- as.vector(x)
nax <- is.na(x)
if(nas <- any(nax))
x <- x[!nax]
if(!missing(Boundary.knots)) {
Boundary.knots <- sort(Boundary.knots)
outside <- (ol <- x < Boundary.knots[1]) | (or <- x >
Boundary.knots[2])
}
else outside <- rep(F, length = length(x))
if(!missing(df) && missing(knots)) {
nIknots <- df - (degree + 1) + (1 - intercept)
if(nIknots < 0) {
nIknots <- 0
warning(paste("df was too small; have used ", (degree +
1) - (1 - intercept)))
}
if(nIknots > 0) {
knots <- seq(from = 0, to = 1, length = nIknots + 2)[ -
c(1, nIknots + 2)]
knots <- quantile(x[!outside], knots)
}
else knots <- NULL
}
Aknots <- sort(c(rep(Boundary.knots, degree + 1), knots))
if(any(outside)) {
warning("Some x values beyond boundary knots may cause ill-conditioned bases"
)
derivs <- seq(from = 0, to = degree)
# tricky way to do factorial
scalef <- c(1, exp(cumsum(log(seq(degree)))))
basis <- array(0, c(length(x), length(Aknots) - degree - 1))
if(any(ol)) {
k.pivot <- Boundary.knots[1]
xl <- cbind(1, outer(x[ol] - k.pivot, 1:degree, "^"))
tt <- spline.des(Aknots, rep(k.pivot, degree + 1),
degree + 1, derivs)$design
basis[ol, ] <- xl %*% (tt/scalef)
}
if(any(or)) {
k.pivot <- Boundary.knots[2]
xr <- cbind(1, outer(x[or] - k.pivot, 1:degree, "^"))
tt <- spline.des(Aknots, rep(k.pivot, degree + 1),
degree + 1, derivs)$design
basis[or, ] <- xr %*% (tt/scalef)
}
if(any(inside <- !outside))
basis[inside, ] <- spline.des(Aknots, x[inside],
degree + 1)$design
}
else basis <- spline.des(Aknots, x, degree + 1)$design
if(!intercept)
basis <- basis[, -1]
n.col <- ncol(basis)
if(nas) {
nmat <- matrix(NA, length(nax), n.col)
nmat[!nax, ] <- basis
basis <- nmat
}
dimnames(basis) <- list(nx, 1:n.col)
a <- list(degree = degree, knots = knots, Boundary.knots =
Boundary.knots, intercept = intercept, class = c("bs", "basis")
)
attributes(basis) <- c(attributes(basis), a)
basis
}
"ns"<-
function(x, df = NULL, knots = NULL, intercept = F, Boundary.knots = range(x))
{
nx <- names(x)
x <- as.vector(x)
nax <- is.na(x)
if(nas <- any(nax))
x <- x[!nax]
if(!missing(Boundary.knots)) {
Boundary.knots <- sort(Boundary.knots)
outside <- (ol <- x < Boundary.knots[1]) | (or <- x >
Boundary.knots[2])
}
else outside <- rep(F, length = length(x))
if(!missing(df) && missing(knots)) {
# df = number(interior knots) + 1 + intercept
nIknots <- df - 1 - intercept
if(nIknots < 0) {
nIknots <- 0
warning(paste("df was too small; have used ", 1 +
intercept))
}
if(nIknots > 0) {
knots <- seq(from = 0, to = 1, length = nIknots + 2)[ -
c(1, nIknots + 2)]
knots <- quantile(x[!outside], knots)
}
else knots <- NULL
}
Aknots <- sort(c(rep(Boundary.knots, 4), knots))
if(any(outside)) {
basis <- array(0, c(length(x), nIknots + 4))
if(any(ol)) {
k.pivot <- Boundary.knots[1]
xl <- cbind(1, x[ol] - k.pivot)
tt <- spline.des(Aknots, rep(k.pivot, 2), 4, c(0, 1))$
design
basis[ol, ] <- xl %*% tt
}
if(any(or)) {
k.pivot <- Boundary.knots[2]
xr <- cbind(1, x[or] - k.pivot)
tt <- spline.des(Aknots, rep(k.pivot, 2), 4, c(0, 1))$
design
basis[or, ] <- xr %*% tt
}
if(any(inside <- !outside))
basis[inside, ] <- spline.des(Aknots, x[inside], 4)$
design
}
else basis <- spline.des(Aknots, x, 4)$design
const <- spline.des(Aknots, Boundary.knots, 4, c(2, 2))$design
if(!intercept) {
const <- const[, -1]
basis <- basis[, -1]
}
qr.const <- qr(t(const))
basis <- as.matrix((t(qr.qty(qr.const, t(basis))))[, - (1:2)])
n.col <- ncol(basis)
if(nas) {
nmat <- matrix(NA, length(nax), n.col)
nmat[!nax, ] <- basis
basis <- nmat
}
dimnames(basis) <- list(nx, 1:n.col)
a <- list(degree = 4, knots = knots, Boundary.knots = Boundary.knots,
intercept = intercept, class = c("ns", "basis"))
attributes(basis) <- c(attributes(basis), a)
basis
}
"predict.bs"<-
function(object, newx, ...)
{
if(missing(newx))
return(object)
a <- c(list(x = newx), attributes(object)[c("degree", "knots",
"Boundary.knots", "intercept")])
do.call("bs", a)
}
"predict.ns"<-
function(object, newx, ...)
{
if(missing(newx))
return(object)
a <- c(list(x = newx), attributes(object)[c("degree", "knots",
"Boundary.knots", "intercept")])
do.call("ns", a)
}
SHAR_EOF
if test -f 'helpfile.shar'
then
echo shar: over-writing existing file "'helpfile.shar'"
fi
cat << \SHAR_EOF > 'helpfile.shar'
#!/bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #!/bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
# .Data/.Help/bs
# .Data/.Help/ns
# .Data/.Help/predict.bs
# .Data/.Help/predict.ns
# This archive created: Fri Mar 4 15:25:41 1994
export PATH; PATH=/bin:$PATH
if test -f 'bs'
then
echo shar: over-writing existing file "'bs'"
fi
cat << \EEEEE > 'bs'
.BG
.FN bs
.TL
Generate a Basis for Polynomial Splines
.CS
bs(x, df, knots, degree=3, intercept=FALSE, Boundary.knots)
.AG x
the predictor variable.
.AG df
degrees of freedom; one can specify `df' rather than `knots'; `bs()' then
chooses `df-degree-1' knots at suitable quantiles of `x'.
.AG knots
the
.I internal
breakpoints that define the spline. The default is `NULL', which results in a
basis for ordinary polynomial regression. Typical values
are the mean or median for one knot, quantiles for more knots. See also
`Boundary.knots'.
.AG degree
degree of the piecewise polynomial---default is 3 for cubic splines.
.AG intercept
if `TRUE', an intercept is included in the basis; default is `FALSE'.
.AG Boundary.knots
boundary points at which to anchor the B-spline basis (default the range of the data). If both `knots' and `Boundary.knots' are supplied, the basis parameters
do not depend on `x'. Data can extend beyond `Boundary.knots'
.RT
a matrix of dimension `length(x) * df', where either `df' was supplied or
if `knots' were supplied, `df = length(knots) + 3 + intercept'.
Attributes are returned that correspond to the arguments to `bs', and
explicitly give the `knots', `Boundary.knots' etc for use by `predict.bs()'.
.PP
`bs()' is based on the function `spline.des()' written
by Douglas Bates. It generates a basis matrix for representing the family of piecewise polynomials with the specified interior knots and degree, evaluated at the values of `x'. A primary use is in modeling formulas to directly specify a piecewise polynomial term in a model.
.PP
Beware of making predictions with new `x' values when `df' is used as an argument. Either use `safe.predict.gam()', or else specify `knots' and `Boundary.knots'.
.SA
`ns', `poly', `lo', `s', `smooth.spline', `predict.bs'.
.EX
lm(y ~ bs(age, 4) + bs(income, 4)) # an additive model
fit <- lm(y ~ bs(age, knots=c(20,30), B=c(0,100))
predict(fit, new.age) #safe predictions because explicit knot sequence was supplied.
.KW regression basis, splines
.WR
EEEEE
if test -f 'ns'
then
echo shar: over-writing existing file "'ns'"
fi
cat << \EEEEE > 'ns'
.BG
.FN ns
.TL
Generate a Basis Matrix for Natural Cubic Splines
.CS
ns(x, df, knots, intercept=F, Boundary.knots)
.AG x
the predictor variable.
.AG df
degrees of freedom. One can supply `df' rather than knots; `ns()' then
chooses `df-1-intercept' knots at suitably chosen quantiles of `x'.
.AG knots
breakpoints that define the spline. The default is no knots; together with the natural boundary conditions this results in a
basis for linear regression on `x'.
Typical values are the mean or median for one knot, quantiles for
more knots. See also `Boundary.knots'.
.AG intercept
if `TRUE', an intercept is included in the basis; default is `FALSE'.
.AG Boundary.knots
boundary points at which to impose the natural boundary conditions and
anchor the B-spline basis (default the range of the data).
If both `knots' and `Boundary.knots' are supplied, the basis parameters
do not depend on `x'. Data can extend beyond `Boundary.knots'
.RT
a matrix of dimension `length(x) * df' where either `df' was supplied or
if `knots' were supplied, `df = length(knots) + 1 + intercept'.
Attributes are returned that correspond to the arguments to `ns', and
explicitly give the `knots', `Boundary.knots' etc for use by `predict.ns()'.
.PP
`ns()' is based on the function `spline.des()' written
by Douglas Bates. It generates a basis matrix for representing the family of piecewise-cubic splines with the specified sequence of interior knots, and the natural boundary conditions.
These enforce the constraint that the function is linear beyond the boundary knots, which can either be supplied, else default to the extremes of the data.
A primary use is in modeling formula to directly specify a natural spline term in a model.
.PP
Beware of making predictions with new `x' values when `df' is used as an argument. Either use `safe.predict.gam()', or else specify `knots' and `Boundary.knots'.SA
`bs', `poly', `lo', `s', `predict.ns'
.EX
lsfit(ns(x,5),y)
lm(y ~ ns(age, 4) + ns(income, 4)) # an additive model
fit <- lm(y ~ ns(age, knots=c(20,30), B=c(10,60))
predict(fit, new.age) #safe predictions because explicit knot sequence was supplied.
.KW regression basis, splines
.WR
EEEEE
if test -f 'predict.bs'
then
echo shar: over-writing existing file "'predict.bs'"
fi
cat << \EEEEE > 'predict.bs'
.BG
.FN predict.bs
.FN predict.ns
.TL
Evaluate a predefined spline basis at new values of x
.CS
predict.bs(basis, newx)
predict.ns(basis, newx)
.AG basis
the result of a call to `bs()' or `ns()', in particular, having attributes
describing `knots', `degree' etc.
.AG newx
the `x' values at which evaluations are required.
.RT
an object just like `basis', except evaluated at the new values of `x'
.PP
These are methods for the function `predict()' for objects inheriting from classes `bs' or `ns'.
See `predict' for the general behavior of this function.
.SA
`bs', `ns', `poly', `lo', `s'
.EX
basis <- ns(x, knots=c(3,6), B=c(0,5)) #generate a basis
basis.eval <- predict(basis, newx)
.KW regression basis, splines
.WR
EEEEE
if test -f 'predict.ns'
then
echo shar: over-writing existing file "'predict.ns'"
fi
cat << \EEEEE > 'predict.ns'
.BG
.FN predict.bs
.FN predict.ns
.TL
Evaluate a predefined spline basis at new values of x
.CS
predict.bs(basis, newx)
predict.ns(basis, newx)
.AG basis
the result of a call to `bs()' or `ns()', in particular, having attributes
describing `knots', `degree' etc.
.AG newx
the `x' values at which evaluations are required.
.RT
an object just like `basis', except evaluated at the new values of `x'
.PP
These are methods for the function `predict()' for objects inheriting from classes `bs' or `ns'.
See `predict' for the general behavior of this function.
.SA
`bs', `ns', `poly', `lo', `s'
.EX
basis <- ns(x, knots=c(3,6), B=c(0,5)) #generate a basis
basis.eval <- predict(basis, newx)
.KW regression basis, splines
.WR
EEEEE
# End of shell archive
exit 0
SHAR_EOF
if test -f 'safe.predict.message1'
then
echo shar: over-writing existing file "'safe.predict.message1'"
fi
cat << \SHAR_EOF > 'safe.predict.message1'
I have posted version 1 of an improved and augmented set of functions
for working with regression splines. The functions posted are revisions
of bs() and ns(), and predict() methods for each. The entry is called
"safe.predict" for reasons which will become apparant.
Each of ns() and bs() have an additional argument: Boundary.knots.
These give the "anchor points" for the B-splines constructed by
spline.des(), a primative used by both ns() and bs(). Boundary.knots
defaults to the range of the supplied x argument. If supplied, they
do not need to cover the range of the data (i.e. data can lie outside
the boundary knots). If data lie far outside the boundary knots,
numerical problems typical of polynomials can occur.
For ns() the Boundary.knots simultaneously anchor the B-splines, and also
define the points at which the natural boundary conditions are imposed: the
basis is linear beyond them.
Why these added arguments? Now bs() and ns() can be used by lm(),
glm() etc, and produce accurate predictions IF both the "knots" and
"Boundary.knots" arguments are supplied in the original call.
E.G.
fit <- glm(Kyphosis ~ ns(Age, kn=c(5,12), B=c(0,50)) + Start, family=binomial)
eval.fit <- predict(fit,list(Age=seq(0,100,by=1)), type="response")
>>note the arguments can be abbreviated <<
predict.bs() and predict.ns() are simple functions that allow one to evaluate
the basis returned by bs() or ns() at new locations. The revised bs() and
ns() carry as attributes the explicit arguments needed to define the basis.
E.G.
basis <- ns(x, df=6) # create a basis, with data dependent parameters
attached as attributes
basis.aux < predict(basis, newx) # the same basis, evaluated elsewhere.
Why version 1, and way in an entry called safe.predict? There will be
a version 2 of experimental functions that allow the modelling functions
to remember the "parameters" of special smart functions like bs, ns and poly.
These parameters will be saved on the object, much like contrasts are now,
and will be retrieved when predictions are made from the model. More to
come a bit later.
SHAR_EOF
# End of shell archive
exit 0