I am comparing different method of fitting Pareto distributed random numbers. What seems very strange to me is that fitting a straight line in log-log scale has to be the worst numerical method and indeed it is, but when I superimpose the log-log histogram with that straight line they superimpose very good. The other fitting method superimpose very bad. How is it possible this paradox? I am doing some mistakes?
Here is my code in R for check:
pareto.MLE <- function(X)
{
n <- length(X)
m <- min(X)
a <- n/sum(log(X)-log(m))
curve(-(a+1)*x+log(a)+a*log(m),add=T,col='black')
return( c(a) )
}
pareto.Alpha <- function (X)
{
m <- min(X)
mu <- mean(X)
a <- mu/(mu-m)
curve(-(a+1)*x+log(a)+a*log(m),add=T,col='green')
return( c(a) )
}
pareto.median <- function (X)
{
m <- min(X)
med <- median(X)
a <- log(2.)/log(med/m)
curve(-(a+1)*x+log(a)+a*log(m),add=T,col='blue')
return( c(a) )
}
pareto.fit <- function (X)
{
dd <-hist(X,plot=F)
for(i in length(dd$counts):1)
{
if(dd$density[[i]]==0)
{
dd$density[[i]]=dd$density[[i+1]]
}
}
cc <-lm(log(dd$density)~log(dd$mids))
curve(x*(coefficients(cc)[2])+(coefficients(cc)[1]),add=T,col='red')
return( c(-1*(coefficients(cc)[2]+1)))
}
library(PtProcess)
alpha=1.
hh1<-(matrix(rpareto(100,alpha,0.1),ncol=1))
pp<-hist(hh1,plot=F)
plot(log(pp$mids),(log(pp$density)))
parameter.1<-pareto.MLE(hh1)
parameter.a <- pareto.Alpha(hh1)
parameter.m <- pareto.median(hh1)
parameter.f <- pareto.fit(hh1)
cat('Alpha: ',alpha,'\n')
cat('MLE: ',parameter.1,'\n')
cat('Mean: ',parameter.a,'\n')
cat('Median: ',parameter.m,'\n')
cat('Fit: ',parameter.f,'\n')