%\VignetteIndexEntry{North Carolina SIDS data set}
%\VignetteDepends{}
%\VignetteKeywords{spatial}
%\VignettePackage{spdep}
\documentclass[a4paper,10pt]{article} 
\usepackage{Sweave}
\usepackage{times}
\usepackage{mathptm}
\usepackage{hyperref}
\usepackage{natbib}

\setkeys{Gin}{width=0.95\textwidth}
\newcommand{\strong}[1]{{\normalfont\fontseries{b}\selectfont #1}}
\let\pkg=\strong
\RequirePackage{alltt}
\newenvironment{example}{\begin{alltt}}{\end{alltt}}
\newenvironment{smallexample}{\begin{alltt}\small}{\end{alltt}}
\newcommand{\code}[1]{\texttt{\small #1}}
\def\RR{\textsf{R}\/}
\def\SP{\texttt{S-PLUS}\/}
\def\SS{\texttt{S}\/}
\SweaveOpts{keep.source=FALSE}

\title{Introduction to the North Carolina SIDS data set (revised)} 
\author{Roger Bivand} 

\begin{document} 

\maketitle 

<<echo=FALSE>>= 
owidth <- getOption("width")
options("width"=70)
ow <- getOption("warn")
options("warn"=-1)
.PngNo <- 0
@

<<label=afig,echo=FALSE,eval=FALSE>>= 
.PngNo <- .PngNo + 1; file <- paste("Fig-bitmap-", .PngNo, ".pdf", sep="")
pdf(file=file, width = 6.5, height = 3.5, pointsize = 12, bg = "white")
opar <- par(mar=c(3,3,1,1)+0.1)
@
<<label=afigl,echo=FALSE,eval=FALSE>>= 
.PngNo <- .PngNo + 1; file <- paste("Fig-bitmap-", .PngNo, ".pdf", sep="")
pdf(file=file, width = 6.5, height = 3.5, pointsize = 12, bg = "white")
@
<<label=bfigl,echo=FALSE,eval=FALSE>>= 
.PngNo <- .PngNo + 1; file <- paste("Fig-bitmap-", .PngNo, ".pdf", sep="")
pdf(file=file, width = 6.5, height = 5, pointsize = 12, bg = "white")
@

<<label=zfig,echo=FALSE,eval=FALSE>>=
par(opar)
dev.null <- dev.off()
cat("\\includegraphics[width=0.95\\textwidth]{", file, "}\n\n", sep="")
@
<<label=zfigl,echo=FALSE,eval=FALSE>>=
dev.null <- dev.off()
cat("\\includegraphics[width=0.95\\textwidth]{", file, "}\n\n", sep="")
@

\section{Introduction}

This data set was presented first in \citet{symonsetal:1983}, analysed with
reference to the spatial nature of the data in \citet{cressie+read:1985},
expanded in \citet{cressie+chan:1989}, and used in detail in \citet{cressie:1991}. It is for
the 100 counties of North Carolina, and includes counts of numbers of
live births (also non-white live births) and numbers of sudden infant
deaths, for the July 1, 1974 to June 30, 1978 and July 1, 1979 to June 30, 1984 periods. In \citet{cressie+read:1985}, a listing of county neighbours based on shared boundaries
(contiguity) is given, and in \citet{cressie+chan:1989}, and in \citet[][pp. 386--389]{cressie:1991}, a different listing based on the criterion of
distance between county seats, with a cutoff at 30 miles. The county seat
location coordinates are given in miles in a local (unknown) coordinate
reference system. The data are also used to exemplify a range of functions
in the \SP~spatial statistics module user's manual \citep{kaluznyetal:1996}.

<<echo=FALSE,eval=TRUE,results=hide>>= 
library(spdep)
@

\section{Getting the data into \RR}

We will be using the \pkg{spdep} package, here version:
\Sexpr{spdep()[1]}, the \pkg{sp} package and the \pkg{maptools}
package. The data from the sources refered to above is documented in
the help page for the \code{nc.sids} data set in \pkg{spdep}. The
actual data, included in a shapefile of the county boundaries
for North Carolina has been made available in the \pkg{maptools}
package\footnote{These data are taken with permission from:
\url{http://sal.agecon.uiuc.edu/datasets/sids.zip}.}. These data are known
to be geographical coordinates (longitude-latitude in decimal degrees)
and are assumed to use the NAD27 datum.

\begin{footnotesize}
<<echo=TRUE,eval=TRUE>>= 
library(sp)
library(maptools)
library(spdep)
nc_file <- system.file("etc/shapes/sids.shp", package="spdep")[1]
llCRS <- CRS("+proj=longlat +datum=NAD27")
nc <- readShapeSpatial(nc_file, ID="FIPSNO", proj4string=llCRS)
@
\end{footnotesize}

The shapefile format presupposes that you have three files with extensions
\code{*.shp}, \code{*.shx}, and \code{*.dbf}, where the first contains the
geometry data, the second the spatial index, and the third the attribute
data. They are required to have the same name apart from the extension,
and are read here using \code{readShapeSpatial()} into the 
\code{SpatialPolygonsDataFrame} object \code{nc}; the class is defined 
in \pkg{sp}. The centroids of the largest polygon in each county are available using the \code{coordinates} method from \pkg{sp} as a two-column matrix, and can be used to place labels:


\begin{footnotesize}
<<label=plotNC1,fig=FALSE,echo=TRUE,eval=FALSE>>= 
plot(nc, axes=TRUE)
text(coordinates(nc), label=nc$FIPSNO, cex=0.5)
@
\end{footnotesize}

\begin{figure}[htbp]
\begin{center} 
<<results=tex,echo=FALSE>>= 
<<afig>>
<<plotNC1>>
<<zfig>>
@ 
\end{center}
\caption{County boundaries and polygon centroids, North Carolina}
\label{plotNC1}
\end{figure}

We can examine the names of the columns of the data frame to see what it
contains --- in fact some of the same columns that we will be examining
below, and some others which will be useful in cleaning the data
set. 

\begin{footnotesize}
<<echo=TRUE,eval=TRUE>>= 
names(nc)
summary(nc)
@
\end{footnotesize}

We will now examine the data set reproduced from Cressie and collaborators,
included in \pkg{spdep}, and add the neighbour relationships used in
\citet{cressie+chan:1989} to the background map as a graph shown in Figure \ref{plot-CC89.nb}:


\begin{footnotesize}
<<echo=TRUE,eval=TRUE>>= 
gal_file <- system.file("etc/weights/ncCR85.gal", package="spdep")[1]
ncCR85 <- read.gal(gal_file, region.id=nc$FIPSNO)
ncCR85
gal_file <- system.file("etc/weights/ncCC89.gal", package="spdep")[1]
ncCC89 <- read.gal(gal_file, region.id=nc$FIPSNO)
ncCC89
@
\end{footnotesize}

\begin{footnotesize}
<<label=plot-CC89.nb,fig=FALSE,echo=TRUE,eval=FALSE>>= 
plot(nc, border="grey")
plot(ncCC89, coordinates(nc), add=TRUE, col="blue")
@ 
\end{footnotesize}

\begin{figure}[htbp]
\begin{center} 
<<results=tex,echo=FALSE>>= 
<<afig>>
<<plot-CC89.nb>>
<<zfig>>
@ 
\end{center}
\caption{Overplotting county boundaries with 30 mile neighbour relations as a graph.}
\label{plot-CC89.nb}
\end{figure}

Printing the neighbour object shows that it is a neighbour list object, with a
very sparse structure --- if displayed as a matrix, only 3.94\% of cells
would be filled. Objects of class \code{nb} contain a list as long as
the number of counties; each component of the list is a vector with the
index numbers of the neighbours of the county in question, so that the
neighbours of the county with \code{region.id} of \code{"37001"} 
can be retreived
by matching against the indices. More information can be obtained by
using \code{summary()} on an \code{nb} object. Finally, we associate a
vector of names with the neighbour list, through the \code{row.names}
argument. The names should be unique, as with data frame row names.

\begin{footnotesize}
<<echo=TRUE>>= 
ncCC89
r.id <- attr(ncCC89, "region.id")
ncCC89[[match("37001", r.id)]]
r.id[ncCC89[[match("37001", r.id)]]]
@ 
\end{footnotesize}
The neighbour list object records neighbours by their order in
relation to the list itself, so the neighbours list for the county with
\code{region.id} "37001" are the seventeenth, nineteenth, thirty-second, 
forty-first and sixty-eighth in the list. We can retreive their codes by 
looking them up in the \code{region.id} attribute.

\begin{footnotesize}
<<echo=TRUE>>= 
as.character(nc$NAME)[card(ncCC89) == 0]
@ 
\end{footnotesize}
We should also note that this neighbour criterion generates two counties
with no neighbours, Dare and Hyde, whose county seats were more than
30 miles from their nearest neighbours. The \code{card()} function
returns the cardinality of the neighbour set. We need to return to
methods for handling no-neighbour objects later on. We will also show
how new neighbours lists may be constructed in \RR, and compare these
with those from the literature.


\subsection{Probability mapping}


Rather than review functions for measuring and modelling spatial
dependence in the \pkg{spdep} package, we will focus on probability
mapping for disease rates data. Typically, we have counts of the incidence
of some disease by spatial unit, associated with counts of populations
at risk. The task is then to try to establish whether any spatial units
seem to be characterised by higher or lower counts of cases than might
have been expected in general terms \citep{bailey+gatrell:1995}.

An early approach by \citet{choynowski:1959}, described by
\citet{cressie+read:1985} and \citet{bailey+gatrell:1995}, assumes,
given that the true rate for the spatial units is small, that as the
population at risk increases to infinity, the spatial unit case counts
are Poisson with mean value equal to the population at risk times the
rate for the study area as a whole. Choynowski's approach folds the two
tails of the measured probabilities together, so that small values, for
a chosen $\alpha$, occur for spatial units with either unusually high
or low rates. For this reason, the high and low counties are plotted
separately in Figure \ref{choymap}.

\begin{footnotesize}
<<echo=TRUE>>= 
ch <- choynowski(nc$SID74, nc$BIR74)
nc$ch_pmap_low <- ifelse(ch$type, ch$pmap, NA)
nc$ch_pmap_high <- ifelse(!ch$type, ch$pmap, NA)
prbs <- c(0,.001,.01,.05,.1,1)
nc$high = cut(nc$ch_pmap_high, prbs)
nc$low = cut(nc$ch_pmap_low,prbs )
@ 
<<echo=TRUE,eval=FALSE>>= 
spplot(nc, c("low", "high"), col.regions=grey.colors(5))
@
\end{footnotesize}

\begin{figure}[htbp]
\begin{center} 
<<results=tex,echo=FALSE>>= 
<<afigl>>
print(spplot(nc, c("low", "high"), col.regions=grey.colors(5)))
<<zfigl>>
@ 
\end{center}
\caption{Probability map of North Carolina counties, SIDS cases 1974--78, $\alpha = 0.05$, reproducing \citet{cressie+read:1985}, Figure 1.}
\label{choymap}
\end{figure}


For more complicated thematic maps, it may be helpful to use ColorBrewer
(\url{http://colorbrewer.org}) colour palettes. Here we will only use
the grey sequential palette, available in \RR~in the \pkg{RColorBrewer}
package (the colours are copied here to avoid loading the package).

While the \code{choynowski()} function only provides the probability
map values required, the \code{probmap()} function returns raw (crude)
rates, expected counts (assuming a constant rate across the study area),
relative risks, and Poisson probability map values calculated using the
standard cumulative distribution function \code{ppois()}. This does not
fold the tails together, so that counties with lower observed counts
than expected, based on population size, have values in the lower tail,
and those with higher observed counts than expected have values in the
upper tail, as Figure \ref{poismap} shows.

\begin{footnotesize}
<<echo=TRUE>>= 
pmap <- probmap(nc$SID74, nc$BIR74)
nc$pmap <- pmap$pmap
brks <- c(0,0.001,0.01,0.025,0.05,0.95,0.975,0.99,0.999,1)
library(RColorBrewer)
@ 
<<echo=TRUE,eval=FALSE>>= 
spplot(nc, "pmap", at=brks, col.regions=rev(brewer.pal(9, "RdBu")))
@
\end{footnotesize}

\begin{figure}[htbp]
\begin{center} 
<<results=tex,echo=FALSE>>= 
<<afigl>>
print(spplot(nc, "pmap", at=brks, col.regions=rev(brewer.pal(9, "RdBu"))))
<<zfigl>>
@ 
\end{center}
\caption{Probability map of North Carolina counties, SIDS cases 1974--78, reproducing \citet{kaluznyetal:1996}, p. 57, Figure 3.28.}
\label{poismap}
\end{figure}

Marilia Carvalho (personal communication) and Virgilio G\'{o}mez Rubio
\citep{gomez-rubio+ferrandiz+lopez:2003} have pointed to the unusual
shape of the distribution of the Poisson probability values (Figure
\ref{poishist}), repeating the doubts about probability mapping voiced
by \citet[][p. 392]{cressie:1991}: ``an extreme value $\ldots$ may be
more due to its lack of fit to the Poisson model than to its deviation
from the constant rate assumption''. There are many more high values
than one would have expected, suggesting perhaps overdispersion, that
is that the ratio of the variance and mean is larger than unity.


\begin{footnotesize}
<<label=poishist,fig=FALSE,echo=TRUE,eval=FALSE>>= 
hist(nc$pmap, main="")
@
\end{footnotesize}


\begin{figure}[htbp]
\begin{center} 
<<results=tex,echo=FALSE>>= 
<<afig>>
<<poishist>>
<<zfig>>
@ 
\end{center}
\caption{Histogram of Poisson probability values.}
\label{poishist}
\end{figure}


One ad-hoc way to assess the impact of the possible failure of our
assumption that the counts follow the Poisson distribution is to estimate
the dispersion by fitting a generalized linear model of the observed counts
including only the intercept (null model) and offset by the observed
population at risk (suggested by Marilia Carvalho and associates):

\begin{footnotesize}
<<echo=TRUE>>= 
res <- glm(SID74 ~ offset(log(BIR74)), data=nc, family="quasipoisson")
nc$stdres <- rstandard(res)
brks <- c(-4, -3, -2, -1.5, -1, -0.5, 0.5, 1, 1.5, 2, 3, 4)
@
<<echo=TRUE,eval=FALSE>>= 
spplot(nc, "stdres", at=brks, col.regions=rev(brewer.pal(11, "RdBu")))
@
\end{footnotesize}

The dispersion is equal to \Sexpr{round(summary(res)$dispersion, digits=4)}, 
much greater than unity; we calculate the corrected
probability map values by taking the standardised residuals of the
model, taking the size of the dispersion into account; the results
are shown in Figure \ref{poismap2}. Many fewer counties appear
now to have unexpectedly large or small numbers of cases. This is
an ad-hoc adjustment made because \RR~provides access to a wide
range of model-fitting functions that can be used to help check our
assumptions. \citet{gomez-rubio+ferrandiz+lopez:2003} chose rather to
construct a probability map under the hypothesis that data are drawn
from a Negative Binomial distribution.

\begin{figure}[htbp]
\begin{center} 
<<results=tex,echo=FALSE>>= 
<<afigl>>
print(spplot(nc, "stdres", at=brks, col.regions=rev(brewer.pal(11, "RdBu"))))
<<zfigl>>
@ 
\end{center}
\caption{Standardised residual values from the fit of a quasi-Poisson fit of the null model for SIDS rates 1974-78, North Carolina counties.}
\label{poismap2}
\end{figure}
@ 


So far, none of the maps presented have made use of the spatial
dependence possibly present in the data. A further elementary step that
can be taken is to map Empirical Bayes estimates of the rates, which
are smoothed in relation to the raw rates. The underlying question
here is linked to the larger variance associated with rate estimates
for counties with small populations at risk compared with counties with
large populations at risk. Empirical Bayes estimates place more credence
on the raw rates of counties with large populations at risk, and modify
them much less than they modify rates for small counties. In the case
of small populations at risk, more confidence is placed in either the
global rate for the study area as a whole, or for local Empirical Bayes
estimates, in rates for a larger moving window including the neighbours of
the county being estimated. The function used for this in \pkg{spdep} is
\code{EBlocal()}, initially contributed by Marilia Carvalho. It parallels
a similar function in GeoDa, but uses the \citet{bailey+gatrell:1995}
interpretation of \citet{marshall:1991}, rather than that in GeoDa
\citep{anselin+syabri+smirnov:2002}.

\begin{footnotesize}
<<echo=TRUE>>= 
global_rate <- sum(nc$SID74)/sum(nc$BIR74)
nc$Expected <- global_rate * nc$BIR74
res <- EBlocal(nc$SID74, nc$Expected, ncCC89, zero.policy=TRUE)
nc$EB_loc <- res$est
brks <- c(0, 0.25, 0.5, 0.75, 1, 2, 3, 4, 5)
spl <- list("sp.text", loc=coordinates(nc)[card(ncCC89) == 0,], txt=rep("*", 2), cex=1.2)
@
<<echo=TRUE,eval=FALSE>>= 
spplot(nc, "EB_loc", at=brks, col.regions=rev(brewer.pal(8, "RdBu")), sp.layout=spl)
@
\end{footnotesize}

The results are shown in Figure \ref{EBlocal}. Like other relevant
functions in \pkg{spdep}, \code{EBlocal()} takes a \code{zero.policy}
argument to allow missing values to be passed through. In this case,
no local estimate is available for the two counties with no neighbours,
marked by stars.

\begin{figure}[htbp]
\begin{center} 
<<results=tex,echo=FALSE>>= 
<<afigl>>
print(spplot(nc, "EB_loc", at=brks, col.regions=rev(brewer.pal(8, "RdBu")), sp.layout=spl))
<<zfigl>>
@ 
\end{center}
\caption{Local Empirical Bayes estimates for SIDS rates per 1000 using the 30 mile county seat neighbours list.}
\label{EBlocal}
\end{figure}
@ 

In addition to Empirical Bayes smoothing globally, used both for disease mapping and the Assun\,{c}\~{a}o and Reis correction to Moran's $I$ for rates data (to shrink towards the global rate when the population at risk is small, here as a Monte Carlo test), lists of local neighbours can be used to shrink towards a local rate.


\begin{scriptsize}
<<echo=TRUE>>= 
set.seed(1)
EBImoran.mc(nc$SID74, nc$BIR74, nb2listw(ncCC89, style="B", zero.policy=TRUE), nsim=999, zero.policy=TRUE)
@
\end{scriptsize}


\section{Exploration and modelling of the data}

One of the first steps taken by \citet{cressie+read:1985} is to try to
bring out spatial trends by dividing North Carolina up into $4\times4$
rough rectangles. Just to see how this works, let us map these rough
rectangles before proceeding further (see Figure \ref{LMmap}).


\begin{footnotesize}
<<echo=TRUE>>= 
nc$both <- factor(paste(nc$L_id, nc$M_id, sep=":"))
nboth <- length(table(unclass(nc$both)))
@ 
<<echo=TRUE,eval=FALSE>>= 
spplot(nc, "both", col.regions=sample(rainbow(nboth)))
@
\end{footnotesize}

\begin{figure}[htbp]
\begin{center} 
<<results=tex,echo=FALSE>>= 
<<afigl>>
print(spplot(nc, "both", col.regions=sample(rainbow(nboth))))
<<zfigl>>
@ 
\end{center}
\caption{Rough rectangles used by \citet{cressie+read:1985} to bring out spatial trends.}
\label{LMmap}
\end{figure}

Cressie constructs a transformed SIDS rates variable, 1974--78, for his analyses (with co-workers). We can replicate his stem-and-leaf figure on p. 396 in the book, taken from \citet{cressie+read:1989}:

\begin{footnotesize}
<<echo=TRUE>>= 
nc$ft.SID74 <- sqrt(1000)*(sqrt(nc$SID74/nc$BIR74) + sqrt((nc$SID74+1)/nc$BIR74))
stem(round(nc$ft.SID74, 1), scale=2)
@
\end{footnotesize}

\subsection{Median polish smoothing}
\label{medpol}

\citet[][pp. 46--48, 393--400]{cressie:1991} discusses in some detail
how smoothing may be used to partition the variation in the data into
smooth and rough. In order to try it out on the North Carolina
SIDS data set, we will use a coarse gridding into four columns and four
rows given by \citet[][pp. 553--554]{cressie:1991}, where four grid cells
are empty; these are given by variables \code{L\_id} and \code{M\_id}
in object \code{nc}. Next we aggregate the number of live births
and the number of SIDS cases 1974--1978 for the grid cells:

\begin{footnotesize}
<<echo=TRUE,eval=TRUE>>=
mBIR74 <- tapply(nc$BIR74, nc$both, sum)
mSID74 <- tapply(nc$SID74, nc$both, sum)
@
\end{footnotesize}

Using the same Freeman-Tukey transformation as is used for the county
data, we coerce the data into a correctly configured matrix, some of
the cells of which are empty. The \code{medpolish} function is applied
to the matrix, being told to remove empty cells; the function iterates
over the rows and columns of the matrix using \code{median} to extract
an overall effect, row and column effects, and residuals:

\begin{footnotesize}
<<echo=TRUE,eval=TRUE>>=
mFT <- sqrt(1000)*(sqrt(mSID74/mBIR74) + sqrt((mSID74+1)/mBIR74))
mFT1 <- t(matrix(mFT, 4, 4, byrow=TRUE))
med <- medpolish(mFT1, na.rm=TRUE, trace.iter=FALSE)
med
@
\end{footnotesize}

Returning to the factors linking rows and columns to counties, and
generating matrices of dummy variables using \code{model.matrix}, we
can calculate fitted values of the Freeman-Tukey adjusted rate for each
county, and residuals by subtracting the fitted value from the observed
rate. Naturally, the fitted value will be the same for counties in the
same grid cell:

\begin{footnotesize}
<<echo=TRUE,eval=TRUE>>=
mL_id <- model.matrix(~ as.factor(nc$L_id) -1)
mM_id <- model.matrix(~ as.factor(nc$M_id) -1)
nc$pred <- c(med$overall + mL_id %*% med$row + mM_id %*% med$col)
nc$mp_resid <- nc$ft.SID74 - nc$pred
@
<<label=medpolfig,fig=FALSE,echo=TRUE,eval=FALSE>>= 
cI_ft <- pretty(nc$ft.SID74, n=9)
pal_ft <- colorRampPalette(brewer.pal(6, "YlOrBr"))(length(cI_ft)-1)
p1 <- spplot(nc, c("ft.SID74"), col.regions=pal_ft, at=cI_ft, col="grey30", main="FT transformed SIDS rate")
p2 <- spplot(nc, c("pred"), col.regions=pal_ft, at=cI_ft, col="grey30", main="Median-polish fit")
atn <- pretty(nc$mp_resid[nc$mp_resid < 0])
atp <- pretty(nc$mp_resid[nc$mp_resid >= 0])
pal <- c(rev(brewer.pal(length(atn-1), "YlOrRd")), brewer.pal(length(atp[-1]), "YlGnBu")[-1])
p3 <- spplot(nc, "mp_resid", at=c(atn, atp[-1]), col.regions=pal, col="grey30", main="Median-polish residuals")
plot(p1, split=c(1,1,1,3), more=TRUE)
plot(p2, split=c(1,2,1,3), more=TRUE)
plot(p3, split=c(1,3,1,3), more=FALSE)
@
\end{footnotesize}

\begin{figure}[htbp]
\begin{center} 
<<echo=FALSE,results=tex>>= 
<<bfigl>>
<<medpolfig>>
<<zfigl>>
@
\caption{Freeman-Tukey transformed SIDS rates,  fitted smoothed values, residuals, and Tukey additivity plot.}
\label{fig:med_pol}
\end{center}
\end{figure}

Figure \ref{fig:med_pol} shows the median polish smoothing results
as three maps, the observed Freeman-Tukey transformed SIDS rates,
the fitted smoothed values, and the residuals. In addition, a plot for
the median polish object is also shown, plotting the smooth residuals
against the outer product of the row and column effects divided by the
overall effect, which would indicate a lack of additivity between row
and column if this was the case --- this is more relevant for analysis
of tables of covariates rather than geographical grids. 


\subsection{CAR model fitting}


We will now try to replicate three of the four models fitted by \citep{cressie+chan:1989} to the transformed rates variable. The first thing to do is to try to replicate their 30 mile distance between county seats neighbours, which almost works. From there we try to reconstruct three of the four models they fit, concluding that we can get quite close, but that a number of questions are raised along the way.


Building the weights is much more complicated, because they use a combination of distance-metric and population-at-risk based weights, but we can get quite close \citep[see also][]{kaluznyetal:1996}:

\begin{footnotesize}
<<echo=TRUE>>= 
sids.nhbr30.dist <- nbdists(ncCC89, cbind(nc$east, nc$north))
sids.nhbr <- listw2sn(nb2listw(ncCC89, glist=sids.nhbr30.dist, style="B", zero.policy=TRUE))
dij <- sids.nhbr[,3]
n <- nc$BIR74
el1 <- min(dij)/dij
el2 <- sqrt(n[sids.nhbr$to]/n[sids.nhbr$from])
sids.nhbr$weights <- el1*el2
sids.nhbr.listw <- sn2listw(sids.nhbr)
@
\end{footnotesize}

The first model (I) is a null model with just an intercept, the second (II) includes all the  12 parcels of contiguous counties in 4 east-west and 4 north-south bands, while the fourth (IV) includes the transformed non-white birth-rate:

\begin{footnotesize}
<<echo=TRUE>>= 
nc$ft.NWBIR74 <- sqrt(1000)*(sqrt(nc$NWBIR74/nc$BIR74) + sqrt((nc$NWBIR74+1)/nc$BIR74))
@
\end{footnotesize}

Cressie identifies Anson county as an outlier, and drops it from further analysis. Because the weights are constructed in a complicated way, they will be subsetted by dropping the row and column of the weights matrix:

\begin{footnotesize}
<<echo=TRUE>>= 
lm_nc <- lm(ft.SID74 ~ 1, data=nc)
outl <- which.max(rstandard(lm_nc))
as.character(nc$names[outl])
W <- listw2mat(sids.nhbr.listw)
W.4 <- W[-outl, -outl]
sids.nhbr.listw.4 <- mat2listw(W.4)
nc2 <- nc[!(1:length(nc$CNTY_ID) %in% outl),]
@ 
\end{footnotesize}

It appears that both numerical issues (convergence in particular) and uncertainties about the exact spatial weights matrix used make it difficult to reproduce the results of \citet{cressie+chan:1989}, also given in \citet{cressie:1991}. We now try to replicate them for the null weighted CAR model (Cressie has intercept 2.838, $\hat{\theta}$ 0.833, for k=1):

\begin{footnotesize}
<<echo=TRUE>>= 
ecarIaw <- spautolm(ft.SID74 ~ 1, data=nc2, listw=sids.nhbr.listw.4, weights=BIR74, family="CAR")
summary(ecarIaw)
@
\end{footnotesize}

The spatial parcels model also seems to work, with Cressie's $\hat{\theta}$ 0.710, and the other coefficients agreeing more or less by rank:

\begin{footnotesize}
<<echo=TRUE>>= 
ecarIIaw <- spautolm(ft.SID74 ~ both - 1, data=nc2, listw=sids.nhbr.listw.4, weights=BIR74, family="CAR")
summary(ecarIIaw)
@
\end{footnotesize}

Finally, the non-white model repeats Cressie's finding that much of the variance of the transformed SIDS rate for 1974--8 can be accounted for by the transformed non-white birth variable (Cressie intercept 1.644, $\hat{b}$ 0.0346, $\hat{\theta}$ 0.640 --- not significant):

\begin{footnotesize}
<<echo=TRUE>>= 
ecarIVaw <- spautolm(ft.SID74 ~ ft.NWBIR74, data=nc2, listw=sids.nhbr.listw.4, weights=BIR74, family="CAR")
summary(ecarIVaw)
@
\end{footnotesize}

\begin{footnotesize}
<<echo=TRUE>>= 
nc2$fitIV <- fitted.values(ecarIVaw)
@
<<echo=TRUE,eval=FALSE>>= 
spplot(nc2, "fitIV", cuts=12, col.regions=grey.colors(13, 0.9, 0.3))
@
\end{footnotesize}

\begin{figure}[htbp]
\begin{center} 
<<echo=FALSE,eval=TRUE,results=tex>>= 
<<afigl>>
print(spplot(nc2, "fitIV", cuts=12, col.regions=grey.colors(13, 0.9, 0.3)))
<<zfigl>>
@
\caption{Fitted values for model IV, including covariate.}
\label{fig:fitIV}
\end{center}
\end{figure}

\begin{footnotesize}
<<echo=TRUE>>= 
ecarIawll <- spautolm(ft.SID74 ~ 1, data=nc2, listw=sids.nhbr.listw.4, weights=BIR74, family="CAR", llprof=seq(-0.1, 0.9020532358, length.out=100))
@
<<label=ll_prof,fig=FALSE,echo=TRUE,eval=FALSE>>= 
plot(ll ~ lambda, ecarIawll$llprof, type="l")
@
\end{footnotesize}

\begin{figure}[htbp]
\begin{center} 
<<echo=FALSE,results=tex>>= 
<<afig>>
<<ll_prof>>
<<zfig>>
@
\caption{Plot of log likelihood function values by coefficient values, model I.}
\label{fig:ll_prof}
\end{center}
\end{figure}


<<echo=FALSE>>=
options("width"=owidth)
options("warn"=ow)
@

\section*{References}

\begin{description}

\bibitem[\protect\citeauthoryear{Anselin, Syabri and Smirnov}{Anselin, Syabri and Smirnov}{2002}]{anselin+syabri+smirnov:2002}
Anselin, L., Syabri, I., Smirnov, O., 2002.
\newblock {Visualizing Multivariate Spatial Correlation with Dynamically Linked Windows}.
\newblock {In Anselin, L., Rey, S. (Eds.), Proceedings, CSISS Workshop on New Tools for Spatial Data Analysis, Santa Barbara, CA, May 10-11, 2002. Center for Spatially Integrated Social Science},
\newblock 20 pp., {\url{http://sal.agecon.uiuc.edu/csiss/pdf/multilisa.pdf}}.

\bibitem[\protect\citeauthoryear{Bailey and Gatrell}{Bailey and Gatrell}{1995}]{bailey+gatrell:1995}
Bailey, T.~C., Gatrell, A.~C., 1995.
\newblock {Interactive Spatial Data Analysis}.
\newblock {Harlow: Longman},
\newblock 413 pp.

\bibitem[\protect\citeauthoryear{Choynowski}{Choynowski}{1959}]{choynowski:1959}
Choynowski, M., 1959
\newblock {Maps based on probabilities}.
\newblock {Journal of the American Statistical Association},
\newblock 54 (286), {385--388}.

\bibitem[\protect\citeauthoryear{Cressie}{Cressie}{1991}]{cressie:1991}
Cressie, N., 1991.
\newblock {Statistics for spatial data}.
\newblock {New York: Wiley},
\newblock 900 pp.

\bibitem[\protect\citeauthoryear{Cressie and Chan}{Cressie and Chan}{1989}]{cressie+chan:1989}
Cressie, N., Chan N.~H., 1989.
\newblock {Spatial modelling of regional variables}.
\newblock {Journal of the American Statistical Association},
\newblock 84 (406), {393--401}.

\bibitem[\protect\citeauthoryear{Cressie and Read}{Cressie and Read}{1985}]{cressie+read:1985}
Cressie, N., Read, T.~R.~C., 1985.
\newblock {Do sudden infant deaths come in clusters?}.
\newblock {Statistics and Decisions},
\newblock {Supplement Issue 2, 333--349}.

\bibitem[\protect\citeauthoryear{Cressie and Read}{Cressie and Read}{1989}]{cressie+read:1989}
Cressie, N., Read, T.~R.~C., 1989.
\newblock {Spatial data-analysis of regional counts}.
\newblock {Biometrical Journal},
\newblock 31 (6), {699--719}.

\bibitem[\protect\citeauthoryear{G\'{o}mez Rubio, Ferr\'{a}ndiz and L\'{o}pez}{G\'{o}mez Rubio, Ferr\'{a}ndiz and L\'{o}pez}{2003}]{gomez-rubio+ferrandiz+lopez:2003}
G\'{o}mez Rubio, V., Ferr\'{a}ndiz, J., L\'{o}pez, A., 2003
\newblock {Detecting Disease Clusters with \RR}.
\newblock {In: Hornik, K., Leisch, F., Zeilis, A. (Eds), Proceedings of the 3rd International Workshop on Distributed Statistical Computing, Vienna, Austria},
\newblock 15 pp., {(\url{http://www.ci.tuwien.ac.at/Conferences/DSC-2003/Proceedings/GomezRubioEtAl.pdf})}.

\bibitem[\protect\citeauthoryear{Kaluzny et al.}{Kaluzny et al.}{1996}]{kaluznyetal:1996}
Kaluzny, S.~P., Vega, S.~C., Cardoso, T.~P., Shelly, A.~A., 1996.
\newblock {\SP~SPATIALSTATS user's manual version 1.0}.
\newblock {Seattle: MathSoft Inc.},
\newblock 226 pp.

\bibitem[\protect\citeauthoryear{Marshall}{Marshall}{1991}]{marshall:1991}
Marshall, R.~M., 1991.
\newblock {Mapping disease and mortality rates using Empirical Bayes Estimators}.
\newblock {Applied Statistics},
\newblock 40 (2), {283--294}.

\bibitem[\protect\citeauthoryear{Symons et al.}{Symons et al.}{1983}]{symonsetal:1983}
Symons, M.~J., Grimson, R.~C., Yuan, Y.~C., 1983.
\newblock {Clustering of rare events}.
\newblock {Biometrics},
\newblock 39 (1), {193--205}.

\end{description}

\end{document}