I'm essentially trying to reproduce the dhyper function in R using
!n ~ gamma(n+1). Why does this approach not work for large values of the population?
# (n choose k)
fastChoose <- function(parm_n, parm_k) {
p1 = lgamma(parm_n+1)
p2 = lgamma(parm_n - parm_k +1)
p3 = lgamma(parm_k + 1)
result = p1 - p2 - p3
return (exp(result))
}
# P(X=k) = (K choose k) * (N-K choose K-k) / (N choose n)
myDHyper <- function(parm_k, parm_K, parm_N, parm_n){
return (fastChoose(parm_K,parm_k) * fastChoose(parm_N-parm_K,parm_K - parm_k) / fastChoose(parm_N,parm_n))
}
BIGN<-15000
whiteBallsInPop <- 50
iSampled<- 200
allPointMass<-c()
allPointMassMine <- c()
for(i in seq(0,whiteBallsInPop)){
fromDHyper <- dhyper(i, whiteBallsInPop, BIGN-whiteBallsInPop, iSampled, log = FALSE)
allPointMass <- c(allPointMass, fromDHyper)
fromMyDHyper <- myDHyper(i,whiteBallsInPop,BIGN,iSampled)
allPointMassMine <- c(allPointMassMine,fromMyDHyper )
}
sum(allPointMass)
sum(allPointMassMine)
Keeping everything on log scale and taking log(p) doesn't seem to work either:
#log( P(X=x))
myDHyperOverflowFixed <- function(k, K, N, n){
# P(X=k) = (K choose k) * (N-K choose K-k) / (N choose n)
# = K!/k!(K-k)! * (N-K)!/(K-k)!(N-K-K+k)! / N!/n!(N-n)!
log_p <- lgamma(K + 1) +
lgamma(N - K + 1) +
lgamma(n + 1) +
lgamma(N - n + 1) -
lgamma(k + 1) -
lgamma(K - k + 1) -
lgamma(K - k + 1) -
lgamma(N - K - K + k + 1) -
lgamma(N + 1)
return (log_p)
}
log.choosefunction aslog.choose <- function(n,k) lgamma(n+1)-lgamma(k+1)-lgamma(n-k+1)and then invoke it thrice:myDHyperOverflowFixed <- function(k,K,N,n) log.choose(K,k)+log.choose(N-K,K-k)-log.choose(N,n). That kind of programming is more readable, far easier to test and debug, and limits the effects of sly little typographical errors that can plague mathematical code like this. – whuber Jul 10 '14 at 18:05