library(MASS)

# data reading
#data_2018school <- read.csv("2018data.csv",header=F)
data_2018school <- c(4.3,1.0,5.8,691,448,58.6,931,1.0,1.0,1.0,1.0,62.6,1.0,619,93.4,37.5,146,1.0,1.0) #NDs were set to 1.0

data_Aug_2019school <- c(1,1,1,1,1,1,1,400,60,100,180,2750,1,70,1,70,1,1,1,970,1,1,1,1,1,1,1,1,1,1,1,1,1,90,220,840,390,190,1,1,460,1,620,70,430,1,940,1,1,550,180,1,1,1,1,1,1,1,120,1,1,1,1,1,60,1,1,50,1,1,190,1,1,1,1,1,190,1,1,60,1180,1,110,1,300,320,1,1,1,1,420,890,1,1,1,1,1,1,100,1050,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1220,900,110,1,680,1) #NDs were set to 1.0

data_Sep_2019school <- c(1,1,1,3010,1,1,1,1,1,100,230,1,110,1,10434.78261,1,1,1,1,1,100,1,390,1,90) #NDs were set to 1.0

data_Oct_2019school <- c(310,320,140,1,210,190,1,1,1,320,1,170,1,60,1,1,1,1,1,1,1,1,110,2920,3970,1,2230,170,1,1,1880,4310,1,1,1310,1780,1,1,1,470,150,1970,1,1,1,1340,1,1,1,150,1,1,1,4400,1,1,1,1,110,260,6650,160,3730,970,260,1,1,1,1,1,1,1,1,1,1,1,1,1,1,290) #NDs were set to 1.0

# number of "over 500 µSv"
length(data_Aug_2019school[500 < data_Aug_2019school])
# sample size
length(data_Aug_2019school)


length(data_2018school[500 < data_2018school])
length(data_2018school)

length(data_Sep_2019school[500 < data_Sep_2019school])
length(data_Sep_2019school)

length(data_Oct_2019school[500 < data_Oct_2019school])
length(data_Oct_2019school)


# ratio of "over 500 µSv"
# data_2018school vs data_Aug_2019school

 x <- matrix(c(125-12, 12, 19-3, 3), ncol=2, byrow=T) 
 fisher.test(x)
 
 	Fisher's Exact Test for Count Data

data:  x
p-value = 0.4203
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 0.287615 7.566582
sample estimates:
odds ratio 
  1.757434 


# ratio of "over 500 µSv"
# data_2018school vs (data_Aug_2019school+data_Sep_2019school+data_Oct_2019school)
 
 x <- matrix(c(125+25+80-12-2-13, 12+2+13, 19, 19-3), ncol=2, byrow=T) 
 fisher.test(x)
 
	Fisher's Exact Test for Count Data

data:  x
p-value = 7.307e-06
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
  2.67498 14.68258
sample estimates:
odds ratio 
  6.265345 


#log-normal
ln.meanlog <- fitdistr(data_2018school,densfun="log-normal")$estimate[[1]]
ln.sdlog <- fitdistr(data_2018school,densfun="log-normal")$estimate[[2]]

#gamma
g.shape <- fitdistr(data_2018school,densfun="gamma")$estimate[[1]]
g.rate <- fitdistr(data_2018school,densfun="gamma")$estimate[[2]]


#weibull
w.shape <- fitdistr(data_2018school,densfun="weibull")$estimate[[1]]
w.rate <- fitdistr(data_2018school,densfun="weibull")$estimate[[2]]


#chart
par(ps = 20)
par (mex = 1.5) 

hist(data_2018school,freq=FALSE,breaks = 20, xlab="H'(0.07) at 1m [µSv/10 minutes]")

x <- seq(1, 1000, 10.0)

curve(dlnorm(x, ln.meanlog, ln.sdlog), type="l",add=T,col="red")
curve(dgamma(x, shape=g.shape, rate=g.rate), type="l",add=T,col="blue")
curve(dweibull(x, w.shape, w.rate), type="l",add=T,col="green")


# AIC check
aic_gamma <- fitdistr(data_2018school, densfun = "gamma")
aic_log_normal <- fitdistr(log(data_2018school), densfun = "normal")
aic_weibull <-fitdistr(data_2018school,densfun="weibull")


AIC(aic_gamma)
AIC(aic_log_normal)
AIC(aic_weibull)

# percentile
qgamma(0.90, shape=g.shape, rate=g.rate, log = FALSE)
qlnorm(0.90, ln.meanlog, ln.sdlog, log = FALSE)
qweibull(0.90, w.shape, w.rate, log = FALSE)


data_2019school <- c(data_Aug_2019school,data_Sep_2019school,data_Oct_2019school)

 
#log-normal
ln.meanlog <- fitdistr(data_2019school,densfun="log-normal")$estimate[[1]]
ln.sdlog <- fitdistr(data_2019school,densfun="log-normal")$estimate[[2]]


#gamma
g.shape <- fitdistr(data_2019school,densfun="gamma")$estimate[[1]]
g.rate <- fitdistr(data_2019school,densfun="gamma")$estimate[[2]]


#weibull
w.shape <- fitdistr(data_2019school,densfun="weibull")$estimate[[1]]
w.rate <- fitdistr(data_2019school,densfun="weibull")$estimate[[2]]


#chart
par(ps = 20)
par (mex = 1.5) 

hist(data_2019school,freq=FALSE,breaks = 20, xlab="H'(0.07) at 1m [µSv/10 minutes]")

x <- seq(1, 1000, 10.0)

curve(dlnorm(x, ln.meanlog, ln.sdlog), type="l",add=T,col="red")
curve(dgamma(x, shape=g.shape, rate=g.rate), type="l",add=T,col="blue")
curve(dweibull(x, w.shape, w.rate), type="l",add=T,col="green")


# AIC check
aic_gamma <- fitdistr(data_2019school, densfun = "gamma")
aic_log_normal <- fitdistr(log(data_2019school), densfun = "normal")
aic_weibull <-fitdistr(data_2019school,densfun="weibull")


AIC(aic_gamma)
AIC(aic_log_normal)
AIC(aic_weibull)

# percentile
qgamma(0.90, shape=g.shape, rate=g.rate, log = FALSE)
qlnorm(0.90, ln.meanlog, ln.sdlog, log = FALSE)
qweibull(0.90, w.shape, w.rate, log = FALSE)

hist(log10(data_2019school),freq=FALSE,breaks = 20, xlab="log(H'(0.07)) at 1m [µSv/10 minutes]")