We can easily generalize this to model abundance surveys. The N-mixture model is the simplest (though unrealistic in practice).
set.seed(1234)
n <- 200
T <- 1
p <- 0.6
lambda <- 4.2
N <- rpois(n = n, lambda = lambda)
Y <- matrix(NA, n, T)
for (t in 1:T) {
Y[,t] <- rbinom(n = n, size = N, prob = p)
}
table(N = N, Y = apply(Y, 1, max))
library(dclone)
library(rjags)
model <- custommodel("model {
for (i in 1:n) {
N[i] ~ dpois(lambda)
for (t in 1:T) {
Y[i,t] ~ dbin(p, N[i])
}
}
p ~ dunif(0.001, 0.999)
lambda ~ dlnorm(0, 0.001)
}")
dat <- list(Y = Y, n = n, T = T)
ini <- list(N = apply(Y, 1, max) + 1)
fit <- jags.fit(data = dat, params = c("p", "lambda"),
n.update = 10000,
model = model, inits = ini)
summary(fit)
gelman.diag(fit)
plot(fit)
pairs(fit)
ifun <- function(model, n.clones) {
dclone(list(N = apply(Y, 1, max) + 1), n.clones)
}
dcfit <- dc.fit(data = dat,
params = c("p", "lambda"), model = model,
inits = ini, initsfun = ifun,
n.clones = c(1, 2, 4, 8),
unchanged = "T", multiply = "n")
summary(dcfit)
plot(dcfit)
dctable(dcfit)
plot(dctable(dcfit))
dcdiag(dcfit)
plot(dcdiag(dcfit))
pairs(dcfit)
As before it is easy to include covariates in the models. There are various extensions and modifications proposed to this basic model. See Lele et al., Solymos et al, Solymos and Lele, Dail and Madsen. (Here we can advertise our work on single survey method and the poster.)
Single visit abundance model with covariates:
set.seed(1234)
n <- 200
x <- rnorm(n)
z <- rnorm(n)
beta <- c(0.9, 0.5)
theta <- c(0.8, -0.5)
Z <- model.matrix(~z)
X <- model.matrix(~x)
p <- plogis(Z %*% theta)
lambda <- exp(X %*% beta)
N <- rpois(n = n, lambda = lambda)
Y <- rbinom(n = n, size = N, prob = p)
table(N = N, Y = Y)
## naive abundance parameter estimates
m <- glm(Y ~ x, family = poisson("log"))
coef(m)
library(detect)
md <- svabu(Y ~ x | z, zeroinfl = FALSE)
coef(md)
model <- custommodel("model {
for (i in 1:n) {
N[i] ~ dpois(lambda[i])
Y[i] ~ dbin(p[i], N[i])
log(lambda[i]) <- inprod(X[i,], beta)
logit(p[i]) <- inprod(Z[i,], theta)
}
for (j in 1:px) {
beta[j] ~ dnorm(naive[j], 0.1)
}
for (j in 1:pz) {
theta[j] ~ dnorm(0, 0.01)
}
}")
dat <- list(Y = Y, n = n, X = X, Z = Z,
px = ncol(X), pz = ncol(Z), naive = coef(m))
ini <- list(N = Y + 1)
fit <- jags.fit(data = dat, params = c("beta", "theta"),
n.update = 5000,
model = model, inits = ini)
## DC
ifun <- function(model, n.clones) {
dclone(list(N = Y + 1), n.clones)
}
dcfit <- dc.fit(data = dat,
params = c("beta", "theta"), model = model,
inits = ini, initsfun = ifun,
n.clones = c(1, 2, 4, 8),
# n.update = 5000,
unchanged = c("px", "pz", "naive"), multiply = "n")
summary(dcfit)
dcdiag(dcfit)
model <- custommodel("model {
for (i in 1:n) {
N[i] ~ dpois(lambda[i])
Y[i] ~ dbin(p[i], N[i])
log(lambda[i]) <- inprod(X[i,], beta)
logit(p[i]) <- inprod(Z[i,], theta)
}
cf[1:(px + pz)] ~ dmnorm(pr[,1], pr[,2:(px + pz + 1)])
beta <- cf[1:px]
theta <- cf[(px + 1):(px + pz)]
}")
dat <- list(Y = Y, n = n, X = X, Z = Z,
px = ncol(X), pz = ncol(Z),
pr = unname(cbind(c(coef(m), rep(0, ncol(Z))),
diag(0.01, ncol(X) + ncol(Z)))))
ini <- list(N = Y + 1)
ifun <- function(model, n.clones) {
dclone(list(N = Y + 1), n.clones)
}
## function to update prior
## defined as Multivariate Normal distribution
ufun <- function(model, n.clones) {
cbind(coef(model), solve(vcov(model)))
}
dcfit <- dc.fit(data = dat,
params = c("beta", "theta"), model = model,
inits = ini, initsfun = ifun,
update = "pr", updatefun = ufun,
n.clones = c(1, 2, 4, 8),
n.update = 5000,
unchanged = c("px", "pz", "pr"), multiply = "n")
summary(dcfit)
dcdiag(dcfit)
This really becomes an issue when $T = 1$. With $T > 1$ it is much easier to distinguish non occupied ($O_i = 0$ or $N_i = 0 | O_i = 1$) locations when all the detection history is 0, and non-detections when some of the detection history is >0 if $p$ is not too small.
set.seed(1234)
n <- 100
T <- 2
p <- 0.6
lambda <- 3.5
q <- 0.25
O <- rbinom(n, size = 1, prob = q)
N <- O * rpois(n = n, lambda = lambda)
Y <- matrix(NA, n, T)
for (t in 1:T) {
Y[,t] <- rbinom(n = n, size = N, prob = p)
}
table(N = N, Y = apply(Y, 1, max))
library(dclone)
model <- custommodel("model {
for (i in 1:n) {
O[i] ~ dbern(q)
N[i] ~ dpois(lambda * O[i])
for (t in 1:T) {
Y[i,t] ~ dbin(p, N[i])
}
}
p ~ dunif(0.001, 0.999)
lambda ~ dlnorm(0, 0.001)
q ~ dunif(0.001, 0.999)
}")
dat <- list(Y = Y, n = n, T = T)
## initial values are trickier
ini <- list(N = ifelse(rowSums(Y) > 0, 1, 0) * (apply(Y, 1, max) + 1),
O = ifelse(rowSums(Y) > 0, 1, 0))
fit <- jags.fit(data = dat, params = c("p", "lambda", "q"),
n.update = 10000,
model = model, inits = ini)
summary(fit)
gelman.diag(fit)
plot(fit)
pairs(fit)
Data cloning for zero inflated data: issues might arise with parent values, that is why we do conditional likelihood estimation. It is also possible to use data vloning and JAGS for conditional likelihood estimation as explained in Solymos et al. 2012 (PDF).
This modification can be suitable in cases when there is e.g. double counting of individuals, or false positives.
set.seed(1234)
n <- 200
x <- rnorm(n)
z <- rnorm(n)
beta <- c(0.9, 0.5)
theta <- c(0.8, -0.5)
Z <- model.matrix(~z)
X <- model.matrix(~x)
p <- plogis(Z %*% theta)
lambda <- exp(X %*% beta)
N <- rpois(n = n, lambda = lambda)
Y <- rpois(n = n, lambda = p * N)
table(N = N, Y = Y)
m <- glm(Y ~ x, family = poisson("log"))
## N is of interest for e.g. prediction
model1 <- custommodel("model {
for (i in 1:n) {
N[i] ~ dpois(lambda[i])
#Y[i] ~ dbin(p[i], N[i])
Y[i] ~ dpois(p[i] * N[i]) # this is the change
log(lambda[i]) <- inprod(X[i,], beta)
logit(p[i]) <- inprod(Z[i,], theta)
}
for (j in 1:px) {
beta[j] ~ dnorm(naive[j], 0.1)
}
for (j in 1:pz) {
theta[j] ~ dnorm(0, 0.01)
}
}")
## more efficient when N is not of interest
model2 <- custommodel("model {
for (i in 1:n) {
Y[i] ~ dpois(p[i] * lambda[i])
log(lambda[i]) <- inprod(X[i,], beta)
logit(p[i]) <- inprod(Z[i,], theta)
}
for (j in 1:px) {
beta[j] ~ dnorm(naive[j], 0.1)
}
for (j in 1:pz) {
theta[j] ~ dnorm(0, 0.01)
}
}")
dat <- list(Y = Y, n = n, X = X, Z = Z,
px = ncol(X), pz = ncol(Z), naive = coef(m))
ini <- list(N = Y + 1)
fit1 <- jags.fit(data = dat, params = c("beta", "theta"),
n.update = 5000, model = model1, inits = ini)
fit2 <- jags.fit(data = dat, params = c("beta", "theta"),
n.update = 5000, model = model2)
coef(fit1)
coef(fit2)
## DC
ifun <- function(model, n.clones) {
dclone(list(N = Y + 1), n.clones)
}
dcfit1 <- dc.fit(data = dat,
params = c("beta", "theta"), model = model1,
inits = ini, initsfun = ifun,
n.clones = c(1, 2, 4, 8),
# n.update = 5000,
unchanged = c("px", "pz", "naive"), multiply = "n")
dcfit2 <- dc.fit(data = dat,
params = c("beta", "theta"), model = model2,
n.clones = c(1, 2, 4, 8),
# n.update = 5000,
unchanged = c("px", "pz", "naive"), multiply = "n")
coef(dcfit1)
coef(dcfit2)