# Copyright (c) 2015 Mathy Vanhoef
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
library("Rmpfr")
#' Perform goodness-of-fit or independence test using the M-test of Fuchs and Kenett.
#'
#' @examples
#' # Two-way contingency table of two one bit-values. On its own each bit is uniformly
#' # distributed, but there is a clear correlation between both bit values.
#' pair <- matrix(data=c(30,70,70,30), nrow=2, ncol=2)
#' util.mtest(pair)
#'
#' bytevalues <- c(rmultinom(1, 2^20, rep(2^-8, 2^8))) # simulate uniform distributed byte
#' util.mtest(bytevalues) # calculate p-value with as null hypothesis uniform distribution
#'
#' @param n A vector of counts of successes, or a matrix representing a two-way
#' contingency table.
#' @param p0 [optional] Expected probabilities under the null hypothesis. If not given,
#' and n is a matrix, these are set to the marginal probabilities
#' of the two-way contingency table. Otherwise set to uniform.
#' @param N [optional] Number of trails performed, set to sum(n) if not given.
#'
#' @references Fuchs, Camil and Kenett, Ron. A test for Detecting Outlying Cells in
#' the Multinomial Distribution and Two-Way Contingency Tables.
#' @author Mathy Vanhoef
util.mtest <- function(n, p0 = NULL, N = NULL)
{
if (is.null(N)) N <- sum(n)
k <- length(n)
# Process (default) arguments
if (class(n) == "matrix") {
# For two-way contingency tables we calculate the expected (marginal) probabilities p0.
# See section 4: "for two-sided test ... using the methods of section 3 with k = IJ."
pi <- apply(n, c(1), sum) / N
pj <- apply(n, c(2), sum) / N
p0 <- sapply(pj, function(pr) pr * pi)
} else if (is.null(p0)) {
# Assume uniform distribution
p0 <- rep(1/k, k)
}
# formula (2.1): calculate adjusted residuals Z_i
z <- (n - N*p0) / sqrt(N * p0 * (1 - p0))
maxz <- max(abs(z))
# Calculate (an upper bound of) the p-value for a two-sided test. Based on formula 3.5,
# the definitions in Lemma 1, and the remark for two-sided tests in the last paragraph
# of section 3. We need high precision `mpfr` otherwise `pnorm` returns zero too quickly.
#pval <- as.numeric(2 * k * (1 - pnorm(mpfr(maxz, 2048))))
# Improved upper bound for the two-sided test by Sidak (1968). See formula (3.8)
# at the end of section 3. Doesn't offer that much advantage though.
pval <- as.numeric(1 - (2 * pnorm(mpfr(maxz, 2048))- 1)^k)
if (pval > 1) pval <- 1
return (pval)
}