# Abundance models with detection error

We can easily generalize this to model abundance surveys. The N-mixture model is the simplest (though unrealistic in practice).

### Assumptions

• Replicate surveys,
• independence,
• closed population.

### Specification of the hierarchical model

• True abundance model: $N_i \sim Poisson(\lambda)$ for locations $i=1,2, \ldots, n$.
• Observation model: $(Y_{i,t} \mid N_i) \sim Binomial(N_i, p)$ for visits $t=1,2, \ldots, T$.
set.seed(1234)
n <- 200
T <- 1
p <- 0.6
lambda <- 4.2
N <- rpois(n = n, lambda = lambda)
Y <- matrix(NA, n, T)
for (t in 1:T) {
Y[,t] <- rbinom(n = n, size = N, prob = p)
}
table(N = N, Y = apply(Y, 1, max))

library(dclone)
library(rjags)
model <- custommodel("model {
for (i in 1:n) {
N[i] ~ dpois(lambda)
for (t in 1:T) {
Y[i,t] ~ dbin(p, N[i])
}
}
p ~ dunif(0.001, 0.999)
lambda ~ dlnorm(0, 0.001)
}")
dat <- list(Y = Y, n = n, T = T)
ini <- list(N = apply(Y, 1, max) + 1)
fit <- jags.fit(data = dat, params = c("p", "lambda"),
n.update = 10000,
model = model, inits = ini)
summary(fit)
gelman.diag(fit)
plot(fit)
pairs(fit)

ifun <- function(model, n.clones) {
dclone(list(N = apply(Y, 1, max) + 1), n.clones)
}
dcfit <- dc.fit(data = dat,
params = c("p", "lambda"), model = model,
inits = ini, initsfun = ifun,
n.clones = c(1, 2, 4, 8),
unchanged = "T", multiply = "n")
summary(dcfit)
plot(dcfit)
dctable(dcfit)
plot(dctable(dcfit))
dcdiag(dcfit)
plot(dcdiag(dcfit))
pairs(dcfit)


As before it is easy to include covariates in the models. There are various extensions and modifications proposed to this basic model. See Lele et al., Solymos et al, Solymos and Lele, Dail and Madsen. (Here we can advertise our work on single survey method and the poster.)

Single visit abundance model with covariates:

set.seed(1234)
n <- 200
x <- rnorm(n)
z <- rnorm(n)
beta <- c(0.9, 0.5)
theta <- c(0.8, -0.5)
Z <- model.matrix(~z)
X <- model.matrix(~x)
p <- plogis(Z %*% theta)
lambda <- exp(X %*% beta)
N <- rpois(n = n, lambda = lambda)
Y <- rbinom(n = n, size = N, prob = p)
table(N = N, Y = Y)
## naive abundance parameter estimates
m <- glm(Y ~ x, family = poisson("log"))
coef(m)

library(detect)
md <- svabu(Y ~ x | z, zeroinfl = FALSE)
coef(md)

model <- custommodel("model {
for (i in 1:n) {
N[i] ~ dpois(lambda[i])
Y[i] ~ dbin(p[i], N[i])
log(lambda[i]) <- inprod(X[i,], beta)
logit(p[i]) <- inprod(Z[i,], theta)
}
for (j in 1:px) {
beta[j] ~ dnorm(naive[j], 0.1)
}
for (j in 1:pz) {
theta[j] ~ dnorm(0, 0.01)
}
}")
dat <- list(Y = Y, n = n, X = X, Z = Z,
px = ncol(X), pz = ncol(Z), naive = coef(m))
ini <- list(N = Y + 1)
fit <- jags.fit(data = dat, params = c("beta", "theta"),
n.update = 5000,
model = model, inits = ini)
## DC
ifun <- function(model, n.clones) {
dclone(list(N = Y + 1), n.clones)
}
dcfit <- dc.fit(data = dat,
params = c("beta", "theta"), model = model,
inits = ini, initsfun = ifun,
n.clones = c(1, 2, 4, 8),
#    n.update = 5000,
unchanged = c("px", "pz", "naive"), multiply = "n")
summary(dcfit)
dcdiag(dcfit)


## Learning with DC

model <- custommodel("model {
for (i in 1:n) {
N[i] ~ dpois(lambda[i])
Y[i] ~ dbin(p[i], N[i])
log(lambda[i]) <- inprod(X[i,], beta)
logit(p[i]) <- inprod(Z[i,], theta)
}

cf[1:(px + pz)] ~ dmnorm(pr[,1], pr[,2:(px + pz + 1)])
beta <- cf[1:px]
theta <- cf[(px + 1):(px + pz)]
}")
dat <- list(Y = Y, n = n, X = X, Z = Z,
px = ncol(X), pz = ncol(Z),
pr = unname(cbind(c(coef(m), rep(0, ncol(Z))),
diag(0.01, ncol(X) + ncol(Z)))))
ini <- list(N = Y + 1)
ifun <- function(model, n.clones) {
dclone(list(N = Y + 1), n.clones)
}
## function to update prior
## defined as Multivariate Normal distribution
ufun <- function(model, n.clones) {
cbind(coef(model), solve(vcov(model)))
}
dcfit <- dc.fit(data = dat,
params = c("beta", "theta"), model = model,
inits = ini, initsfun = ifun,
update = "pr", updatefun = ufun,
n.clones = c(1, 2, 4, 8),
n.update = 5000,
unchanged = c("px", "pz", "pr"), multiply = "n")
summary(dcfit)
dcdiag(dcfit)


### Zero-inflated Poisson latent process

This really becomes an issue when $T = 1$. With $T > 1$ it is much easier to distinguish non occupied ($O_i = 0$ or $N_i = 0 | O_i = 1$) locations when all the detection history is 0, and non-detections when some of the detection history is >0 if $p$ is not too small.

set.seed(1234)
n <- 100
T <- 2
p <- 0.6
lambda <- 3.5
q <- 0.25
O <- rbinom(n, size = 1, prob = q)
N <- O * rpois(n = n, lambda = lambda)
Y <- matrix(NA, n, T)
for (t in 1:T) {
Y[,t] <- rbinom(n = n, size = N, prob = p)
}
table(N = N, Y = apply(Y, 1, max))

library(dclone)
model <- custommodel("model {
for (i in 1:n) {
O[i] ~ dbern(q)
N[i] ~ dpois(lambda * O[i])
for (t in 1:T) {
Y[i,t] ~ dbin(p, N[i])
}
}
p ~ dunif(0.001, 0.999)
lambda ~ dlnorm(0, 0.001)
q ~ dunif(0.001, 0.999)
}")
dat <- list(Y = Y, n = n, T = T)
## initial values are trickier
ini <- list(N = ifelse(rowSums(Y) > 0, 1, 0) * (apply(Y, 1, max) + 1),
O = ifelse(rowSums(Y) > 0, 1, 0))
fit <- jags.fit(data = dat, params = c("p", "lambda", "q"),
n.update = 10000,
model = model, inits = ini)
summary(fit)
gelman.diag(fit)
plot(fit)
pairs(fit)


Data cloning for zero inflated data: issues might arise with parent values, that is why we do conditional likelihood estimation. It is also possible to use data vloning and JAGS for conditional likelihood estimation as explained in Solymos et al. 2012 (PDF).

## Poisson-Poisson mixture

This modification can be suitable in cases when there is e.g. double counting of individuals, or false positives.

set.seed(1234)
n <- 200
x <- rnorm(n)
z <- rnorm(n)
beta <- c(0.9, 0.5)
theta <- c(0.8, -0.5)
Z <- model.matrix(~z)
X <- model.matrix(~x)
p <- plogis(Z %*% theta)
lambda <- exp(X %*% beta)
N <- rpois(n = n, lambda = lambda)
Y <- rpois(n = n, lambda = p * N)
table(N = N, Y = Y)
m <- glm(Y ~ x, family = poisson("log"))

## N is of interest for e.g. prediction
model1 <- custommodel("model {
for (i in 1:n) {
N[i] ~ dpois(lambda[i])
#Y[i] ~ dbin(p[i], N[i])
Y[i] ~ dpois(p[i] * N[i]) # this is the change
log(lambda[i]) <- inprod(X[i,], beta)
logit(p[i]) <- inprod(Z[i,], theta)
}
for (j in 1:px) {
beta[j] ~ dnorm(naive[j], 0.1)
}
for (j in 1:pz) {
theta[j] ~ dnorm(0, 0.01)
}
}")
## more efficient when N is not of interest
model2 <- custommodel("model {
for (i in 1:n) {
Y[i] ~ dpois(p[i] * lambda[i])
log(lambda[i]) <- inprod(X[i,], beta)
logit(p[i]) <- inprod(Z[i,], theta)
}
for (j in 1:px) {
beta[j] ~ dnorm(naive[j], 0.1)
}
for (j in 1:pz) {
theta[j] ~ dnorm(0, 0.01)
}
}")
dat <- list(Y = Y, n = n, X = X, Z = Z,
px = ncol(X), pz = ncol(Z), naive = coef(m))
ini <- list(N = Y + 1)
fit1 <- jags.fit(data = dat, params = c("beta", "theta"),
n.update = 5000, model = model1, inits = ini)
fit2 <- jags.fit(data = dat, params = c("beta", "theta"),
n.update = 5000, model = model2)
coef(fit1)
coef(fit2)
## DC
ifun <- function(model, n.clones) {
dclone(list(N = Y + 1), n.clones)
}
dcfit1 <- dc.fit(data = dat,
params = c("beta", "theta"), model = model1,
inits = ini, initsfun = ifun,
n.clones = c(1, 2, 4, 8),
#    n.update = 5000,
unchanged = c("px", "pz", "naive"), multiply = "n")
dcfit2 <- dc.fit(data = dat,
params = c("beta", "theta"), model = model2,
n.clones = c(1, 2, 4, 8),
#    n.update = 5000,
unchanged = c("px", "pz", "naive"), multiply = "n")
coef(dcfit1)
coef(dcfit2)