# Generalized linear mixed-effects models

## Binomial GLMM

### The model

$Y_{ij} \sim Bernoulli(p_{ij})$, $i=1, 2, \ldots, n$ clusters, $j=1, 2, \ldots, m$ observetions within clusters, $logit(p_{ij}) = \beta_{0} + \beta_{1} X_{ij} + \alpha_{i}$, $\alpha_{i} \sim Normal(0, \sigma^2)$.

### The problem

In these Neyman-Scott type problems, there are two types of asymptotics:

• increase the number of clusters or
• increase the number of observations in a cluster (keeping the number of clusters constant).

Of course, one can have both clusters and cluster size going to infinity but that is quite simple, unrealistic.

• The first asymptotics leads to MLE for $\sigma^2$ that is consistent.
• The second type of asymptotics does not lead to consistent MLE of $\sigma^2$. That was precisely the point in Kiefer and Wolfowitz (PDF from Project Euclid).

The second type of asymptotics where observations within a cluster increases but not the number of clusters, there are only fixed effects corresponding to $\alpha_{i}$. In this asymptotics, the information about $\sigma^2$ is not increasing and hence one cannot obtain consistent estimator.

### Data generation

set.seed(150467)
n <- 100
m <- 2
beta <- c(1.5, -1)
sigma_sq <- 1
x <- runif(n * m, min = 0, max = 1)
g <- rep(1:n, each = 2)
alpha <- rnorm(n, mean = 0, sd = sqrt(sigma_sq))[g]
mu <- alpha + beta[1] + beta[2] * x
p <- exp(mu) / (1 + exp(mu))
Y <- rbinom(n * m, 1, p)
dim(Y) <- c(n, m)
summary(Y)


### Cloning clusters

Cloning goes with n.

library(dclone)
model <- custommodel("model {
for (i in 1:n) {
for (j in 1:m) {
Y[i,j] ~ dbern(p[i,j])
logit(p[i,j]) <- alpha[i] + beta[1] + beta[2] * X[i,j]
}
alpha[i] ~ dnorm(0, 1 / sigma_sq)
}
beta[1] ~ dnorm(0, 0.01)
beta[2] ~ dnorm(0, 0.01)
sigma_sq ~ dgamma(0.001, 0.001)
}")
dat <- list(Y = Y,
X = matrix(x, n, m),
n = n, m = m)
dcfit1 <- dc.fit(data = dat,
params = c("beta", "sigma_sq"),
model = model, n.iter = 1000,
n.clones = c(1, 2, 4, 8),
unchanged = "m", multiply = "n")


Alternative data cloning approach:

model <- custommodel("model {
for (k in 1:K) {
for (i in 1:n) {
for (j in 1:m) {
Y[i,j,k] ~ dbern(p[i,j,k])
logit(p[i,j,k]) <- alpha[i,k] + beta[1] + beta[2] * X[i,j]
}
alpha[i,k] ~ dnorm(0, 1 / sigma_sq)
}
}
beta[1] ~ dnorm(0, 0.01)
beta[2] ~ dnorm(0, 0.01)
sigma_sq ~ dgamma(0.001, 0.001)
}")
dat <- list(Y = dcdim(array(Y, c(n, m, 1))),
X = matrix(x, n, m),
n = n, m = m, K = 1)
dcfit2 <- dc.fit(data = dat,
params = c("beta", "sigma_sq"),
model = model, n.iter = 1000,
n.clones = c(1, 2, 4, 8),
unchanged = c("n", "m", "X"), multiply = "K")


### Cloning replicates within clusters

Cloning goes with m, matrices transposed.

model <- custommodel("model {
for (i in 1:n) {
for (j in 1:m) {
Y[j,i] ~ dbern(p[j,i])
logit(p[j,i]) <- alpha[i] + beta[1] + beta[2] * X[j,i]
}
alpha[i] ~ dnorm(0, 1 / sigma_sq)
}
beta[1] ~ dnorm(0, 0.01)
beta[2] ~ dnorm(0, 0.01)
sigma_sq ~ dgamma(0.001, 0.001)
}")
dat <- list(Y = t(Y),
X = t(matrix(x, n, m)),
n = n, m = m)
dcfit3 <- dc.fit(data = dat,
params = c("beta", "sigma_sq"),
model = model, n.iter = 1000,
n.clones = c(1, 2, 4, 8),
unchanged = "n", multiply = "m")


### Compare disgnostic results

dcdiag(dcfit1)
dcdiag(dcfit2)
dcdiag(dcfit3)


## Poisson GLMM

The model: $(Y_{i} \mid \lambda_i) \sim Poisson(\lambda_{i})$, $i = 1, 2, \ldots, n$, $log(\lambda_i) = \alpha_i + \mathbf{X}^{\top}_{i} \mathbf{\beta}$, and $\alpha_i \sim Normal(0, \sigma^2)$.

### Data generation

set.seed(1234)
n <- 20
beta <- c(1, -1)
sigma_sq <- 0.5
x <- rnorm(n)
X <- model.matrix(~x)
alpha <- rnorm(n, mean = 0, sd = sqrt(sigma_sq))
lambda <- exp(alpha + X %*% beta)
Y <- rpois(n, lambda = lambda)
table(Y)


### Bayesian analysis

model <- custommodel("model {
for (i in 1:n) {
Y[i] ~ dpois(lambda[i])
log(lambda[i]) <- alpha[i] + inprod(X[i,], beta)
alpha[i] ~ dnorm(0, 1 / sigma_sq)
}
for (j in 1:px) {
beta[j] ~ dnorm(0, 0.001)
}
log_sigma ~ dnorm(0, 0.001)
sigma_sq <- exp(log_sigma)^2
}")
dat <- list(Y = Y, X = X, n = n, px = ncol(X))
fit <- jags.fit(data = dat,
params = c("beta", "sigma_sq"),
model = model)


### Data cloning

dcfit <- dc.fit(data = dat,
params = c("beta", "sigma_sq"),
model = model, n.iter = 1000,
n.clones = c(1, 2, 4, 8),
unchanged = "px", multiply = "n")
dcdiag(dcfit)
coef(dcfit)


### Prediction

model <- custommodel("model {
for (i in 1:n) {
Y[i] ~ dpois(lambda[i])
log(lambda[i]) <- alpha[i] + inprod(X[i,], beta)
alpha[i] ~ dnorm(0, 1 / sigma_sq)
}
for (j in 1:px) {
beta[j] ~ dnorm(0, 0.1)
}
log_sigma ~ dnorm(10, 1)
sigma_sq <- exp(log_sigma)^2
}")
dat <- list(Y = Y, X = X, n = n, px = ncol(X))
pred1 <- jags.fit(data = dat,
params = "alpha",
model = model)
model <- custommodel("model {
for (i in 1:n) {
Y[i] ~ dpois(lambda[i])
log(lambda[i]) <- alpha[i] + inprod(X[i,], beta)
alpha[i] ~ dnorm(0, 1 / sigma_sq)
}
param[1:(px + 1)] ~ dmnorm(cf[], V[,])
for (j in 1:px) {
beta[j] <- param[j]
}
log_sigma <- param[px + 1]
sigma_sq <- exp(log_sigma)^2
}")
dat <- list(Y = Y, X = X, n = n, px = ncol(X),
cf = coef(dcfit), V = solve(vcov(dcfit)))
pred2 <- jags.fit(data = dat,
params = "alpha",
model = model)

alpha_b <- cbind(est=coef(pred1),
t(quantile(pred1, probs = c(0.025, 0.975))))
alpha_dc <- cbind(est=coef(pred2),
t(quantile(pred2, probs = c(0.025, 0.975))))

plot(1:n, alpha[order(alpha)], type = "l",
ylim = range(alpha, alpha_b, alpha_dc))
points(1:n - 0.2, alpha_b[order(alpha),1],
col = 2, pch = 19, cex = 0.5)
segments(x0 = 1:n - 0.2, x1 = 1:n - 0.2,
y0 = alpha_b[order(alpha),2],
y1 = alpha_b[order(alpha),3], col = 2)
points(1:n + 0.2, alpha_dc[order(alpha),1],
col=4, pch = 19, cex = 0.5)
segments(x0 = 1:n + 0.2, x1 = 1:n + 0.2,
y0 = alpha_dc[order(alpha),2],
y1 = alpha_dc[order(alpha),3], col = 4)

table(rowSums(sign(alpha - alpha_b[,-1]))) / n
table(rowSums(sign(alpha - alpha_dc[,-1]))) / n


Use different prior distributions on the parameters and see how they affect the coverage.

## What have we learnt?

• Hierarchical models: Linear mixed models, measurement error.
• Latent variables versus parameters.
• Estimation and inference for the parameters.
• Prediction and inference (coverage) for the latent variables.