runtimes_200 <- read.csv("~/workspace_genecloud/bloomEncryption/runtimes_200.csv", header=F)
runtimes_400 <- read.csv("~/workspace_genecloud/bloomEncryption/runtimes_400.csv", header=F)
runtimes_800 <- read.csv("~/workspace_genecloud/bloomEncryption/runtimes_800.csv", header=F)
runtimes_1500 <- read.csv("~/workspace_genecloud/bloomEncryption/runtimes_1500.csv", header=F)
#-------------------------------------------------------------------------------------------------

runtimes <- read.csv("~/workspace_genecloud/bloomEncryption/runtimes_200.csv", header=F)
runtimes_us <- runtimes[4] + runtimes[3]*1000 + runtimes[2]*1000*60 + runtimes[1]*1000*60*60
runtimes_us <- unlist(runtimes_us)
runtimes_s <- (runtimes_us/1000)

runtimes_us <- runtimes[4] + runtimes[3]*1000 + runtimes[2]*1000*60 + runtimes[1]*1000*60*60
runtimes_us <- unlist(runtimes_us)
runtimes_us.mean <- mean(runtimes_us)
runtimes_us.median <- median(runtimes_us)
runtimes_us.var <- var(runtimes_us)
runtimes_us.min <- min(runtimes_us)
runtimes_us.max <- max(runtimes_us)
runtimes_s <- (runtimes_us/1000)

breaks <- 90
histo <- hist(runtimes_s,breaks=breaks,xlab="Runtime in sec.",ylab="Number of runs",main=paste("Runtimes for sequence length",seq_len,sep=" "),col="darkred")
max.y <- max(histo$counts)
dist.y <- max.y * 0.05
top.y <- max.y - dist.y
min.x = min(runtimes_s)

text.num <- paste("Total number of runs: ", len(runtimes), sep="")
text.min <- paste("Min runtime: ", runtimes_us.min, sep="")
text.mean <- paste("Mean runtime: ", runtimes_us.mean, sep="")
text.median <- paste("Median runtime: ", runtimes_us.median, sep="")
text.max <- paste("Max runtime: ", runtimes_us.max, sep="")
text.var <- paste("Variance in runtimes: ", runtimes_us.var, sep="")

text(x=min.x, y=top.y, labels=text.num)

#-------------------------------------------------------------------------------------------------

plot_runtime <- function(csv_file, seq_len) {
  runtimes <- read.csv(csv_file, header=F)
  runtimes_us <- runtimes[4] + runtimes[3]*1000 + runtimes[2]*1000*60 + runtimes[1]*1000*60*60
  runtimes_us <- unlist(runtimes_us)
  runtimes_us.mean <- mean(runtimes_us)
  runtimes_us.median <- median(runtimes_us)
  runtimes_us.var <- var(runtimes_us)
  runtimes_us.min <- min(runtimes_us)
  runtimes_us.max <- max(runtimes_us)
  runtimes_s <- (runtimes_us/1000)
  runtimes_s.mean <- mean(runtimes_s)
  runtimes_s.median <- median(runtimes_s)
  runtimes_s.var <- var(runtimes_s)
  runtimes_s.min <- min(runtimes_s)
  runtimes_s.max <- max(runtimes_s)
  
  breaks <- 90
  histo <- hist(runtimes_s,breaks=breaks,xlab="Runtime in sec.",ylab="Number of runs",main=paste("Runtimes for sequence length",seq_len,sep=" "),col="darkred")
  max.y <- max(histo$counts)
  dist.y <- max.y * 0.05
  top.y <- max.y - dist.y
  min.x = min(runtimes_s)
  text.adj <- c(0,0)
  
  text.num <- paste("Total number of runs: ", length(runtimes_us), sep="")
  text.min <- paste("Min runtime: ", round(runtimes_s.min,digits=2), "s", sep="")
  text.mean <- paste("Mean runtime: ", round(runtimes_s.mean,digits=2), "s", sep="")
  text.median <- paste("Median runtime: ", round(runtimes_s.median,digits=2), "s", sep="")
  text.max <- paste("Max runtime: ", round(runtimes_s.max,digits=2), "s", sep="")
  text.var <- paste("Variance in runtimes: ", round(runtimes_s.var,digits=2), sep="")
  
  text(x=min.x, y=top.y, labels=text.num, adj=text.adj)
  text(x=min.x, y=(top.y-(dist.y*1)), labels=text.min, adj=text.adj)
  text(x=min.x, y=(top.y-(dist.y*2)), labels=text.mean, adj=text.adj)
  text(x=min.x, y=(top.y-(dist.y*3)), labels=text.median, adj=text.adj)
  text(x=min.x, y=(top.y-(dist.y*4)), labels=text.max, adj=text.adj)
  text(x=min.x, y=(top.y-(dist.y*5)), labels=text.var, adj=text.adj)
}

plot_bandwidth <- function(csv_file, seq_len, clientServerText) {
  bandwidth <- read.csv(csv_file, header=F)
  bandwidth <- unlist(bandwidth)
  bandwidth.mean <- mean(bandwidth)
  bandwidth.median <- median(bandwidth)
  bandwidth.var <- var(bandwidth)
  bandwidth.min <- min(bandwidth)
  bandwidth.max <- max(bandwidth)
  bandwidth_kb <- (bandwidth/1024)
  bandwidth_kb.mean <- mean(bandwidth_kb)
  bandwidth_kb.median <- median(bandwidth_kb)
  bandwidth_kb.var <- var(bandwidth_kb)
  bandwidth_kb.min <- min(bandwidth_kb)
  bandwidth_kb.max <- max(bandwidth_kb)

  #100 MBit/s
  speed <- (100 * 1024 * 1024)
  bandwidth_kb.time <- ((bandwidth.mean)/(speed/8/1000))
  
  breaks <- 10
  histo <- hist(bandwidth_kb,breaks=breaks,xlab="Bandwith in kB",ylab="Number of runs",main=paste("Bandwith from", clientServerText, "for sequence length",seq_len,"chars",sep=" "),col="darkred")
  max.y <- max(histo$counts)
  dist.y <- max.y * 0.05
  top.y <- max.y - dist.y
  min.x = min(bandwidth_kb)
  text.adj <- c(0,0)
  
  text.num <- paste("Total number of runs: ", length(bandwidth), sep="")
  text.min <- paste("Min bandwith: ", round(bandwidth_kb.min,digits=2), "kB", sep="")
  text.mean <- paste("Mean bandwidth: ", round(bandwidth_kb.mean,digits=2), "kB", sep="")
  text.median <- paste("Median bandwidth: ", round(bandwidth_kb.median,digits=2), "kB", sep="")
  text.max <- paste("Max bandwidth: ", round(bandwidth_kb.max,digits=2), "kB", sep="")
  text.var <- paste("Variance in bandwidth: ", round(bandwidth_kb.var,digits=2), sep="")
  text.time <- paste("Transfertime (100 MBit/s): ", round(bandwidth_kb.time,digits=2), "ms", sep="")
  
  
  text(x=min.x, y=top.y, labels=text.num, adj=text.adj)
  text(x=min.x, y=(top.y-(dist.y*1)), labels=text.min, adj=text.adj)
  text(x=min.x, y=(top.y-(dist.y*2)), labels=text.mean, adj=text.adj)
  text(x=min.x, y=(top.y-(dist.y*3)), labels=text.median, adj=text.adj)
  text(x=min.x, y=(top.y-(dist.y*4)), labels=text.max, adj=text.adj)
  text(x=min.x, y=(top.y-(dist.y*5)), labels=text.var, adj=text.adj)
  text(x=min.x, y=(top.y-(dist.y*6)), labels=text.time, adj=text.adj)
}

plot_runtime("~/workspace_genecloud/bloomEncryption/runtimes_200.csv", 200)
plot_runtime("~/workspace_genecloud/bloomEncryption/runtimes_400.csv", 400)
plot_runtime("~/workspace_genecloud/bloomEncryption/runtimes_800.csv", 800)
plot_runtime("~/workspace_genecloud/bloomEncryption/runtimes_1500.csv", 1500)
plot_runtime("~/workspace_genecloud/bloomEncryption/runtimes_3000.csv", 3000)
plot_runtime("~/workspace_genecloud/bloomEncryption/runtimes_10000.csv", 10000)

plot_runtime("~/workspace_genecloud/bloomEncryption/runtimes_server_200.csv", 200)
plot_runtime("~/workspace_genecloud/bloomEncryption/runtimes_server_400.csv", 400)
plot_runtime("~/workspace_genecloud/bloomEncryption/runtimes_server_800.csv", 800)
plot_runtime("~/workspace_genecloud/bloomEncryption/runtimes_server_1500.csv", 1500)
plot_runtime("~/workspace_genecloud/bloomEncryption/runtimes_server_3000.csv", 3000)
plot_runtime("~/workspace_genecloud/bloomEncryption/runtimes_server_10000.csv", 10000)

plot_bandwidth("~/workspace_genecloud/bloomEncryption/bandwidth_cs_200.csv", 200, "client to server")
plot_bandwidth("~/workspace_genecloud/bloomEncryption/bandwidth_cs_400.csv", 400, "client to server")
plot_bandwidth("~/workspace_genecloud/bloomEncryption/bandwidth_cs_800.csv", 800, "client to server")
plot_bandwidth("~/workspace_genecloud/bloomEncryption/bandwidth_cs_1500.csv", 1500, "client to server")
plot_bandwidth("~/workspace_genecloud/bloomEncryption/bandwidth_cs_3000.csv", 3000, "client to server")
plot_bandwidth("~/workspace_genecloud/bloomEncryption/bandwidth_cs_10000.csv", 10000, "client to server")

plot_bandwidth("~/workspace_genecloud/bloomEncryption/bandwidth_sc_200.csv", 200, "server to client")
plot_bandwidth("~/workspace_genecloud/bloomEncryption/bandwidth_sc_400.csv", 400, "server to client")
plot_bandwidth("~/workspace_genecloud/bloomEncryption/bandwidth_sc_800.csv", 800, "server to client")
plot_bandwidth("~/workspace_genecloud/bloomEncryption/bandwidth_sc_1500.csv", 1500, "server to client")
plot_bandwidth("~/workspace_genecloud/bloomEncryption/bandwidth_sc_3000.csv", 3000, "server to client")
plot_bandwidth("~/workspace_genecloud/bloomEncryption/bandwidth_sc_10000.csv", 10000, "server to client")

#---------------------------------------------------------------------------------------------------------

p <- 0.01 #probability that we find false-positive
q <- 23 #q-grams length
o <- 11 #offset
k <- 1

n <- 1000+(2*(o-1)) #string length
n1 <- (2*o+1)*(n-q+1) #number of set elements

l <- -1*((n1*log(p))/(log(2)^2))
l <- -1/ ((-1*p ^ (1/k)+1)^(1/(k*n1)) -1)

l <- 25000
p1 <- (1- ( 1-1/l )^(k*n1))^k

formula.l <- p1
formula.r <- (1- ( 1-1/l )^(k*n1))^k

formula.l <- p1 ^ (1/k)
formula.r <- (1- ( 1-1/l )^(k*n1))

formula.l <- (-p1 ^ (1/k)+1)
formula.r <- (( 1-1/l ))^(k*n1)

formula.l <- (-p1 ^ (1/k)+1)^(1/(k*n1))
formula.r <- (( 1-1/l ))

formula.l <- 1/(-1*(-p ^ (1/k)+1)^(1/(k*n1)) +1)
formula.r <- ((l ))

formula.l <- -1/((-1*p ^ (1/k)+1)^(1/(k*n1)) -1)

formula.l <- -1/((-1*p+1)^(1/(n1)) -1)

formula.l <- -1/((-1*p+1)^(1/(n1)) -1)

formula.l <- -1/((-p ^ (1/k))^(1/(k*n1)))

#formula.r <- ((l ))
-1*p ^ (1/k)

l1 <- 1/(-1*(-1*p ^ (1/k)+1)^(1/(k*n1)) +1)

l
p1
l1

#------------------------------------------------------------------------------------------------------

resultFileP01 <- "~/workspace_genecloud/bloomEncryption/realDistanceED1-100.txt"
resultFileP001 <- "~/workspace_genecloud/bloomEncryption/realDistanceED1-100P001.txt"
resultFileP001Q11S5 <- "~/workspace_genecloud/bloomEncryption/realDistanceED1-100P001Q11S5.txt"
resultDataP01Q23S11 <- unlist(read.csv(resultFileP01, header=F))
resultDataP001Q23S11 <- unlist(read.csv(resultFileP001, header=F))
resultDataP001Q11S5 <- unlist(read.csv(resultFileP001Q11S5, header=F))

resultData <- resultDataP001Q23S11
turns <- 25
runs <- 1000
distance <- rep(1,(turns*runs))
dim(distance) <- c(turns,runs)
levenshtein <- rep(1,(turns*runs))
dim(levenshtein) <- c(turns,runs)
var <- rep(1,turns)
mean <- rep(1,turns)

for (i in 1:turns) {
  start <- ((i-1)*runs + 1)
  end <- i*runs
  distance[i,] <- resultData[start:end]
  levenshtein[i,] <- rep(i,runs)
  var[i] <- var(distance[i,])
  mean[i] <- mean(distance[i,])
}

boxplot(t(distance), notch=TRUE, xlab="Levenshtein distance", ylab="Approximated distance range", main="Approximation with q=23, o=11, p=0.01"
        ,ylim=c(0,500))
boxplot(t(distance), notch=TRUE, xlab="Levenshtein distance", ylab="Approximated distance range", ylim=c(0,500))
plot(levenshtein,distance)

cor(1:turns,mean, method="spearman")
cor(mean,1:turns, method="kendall")
cor(mean, 1:turns, method="pearson")
var

#boxplot(distance.ed1, distance.ed2, distance.ed3, distance.ed4, distance.ed5, distance.ed6, distance.ed7, distance.ed8, distance.ed9, distance.ed10, distance.ed11, distance.ed12, distance.ed13, distance.ed14, distance.ed15, distance.ed16, distance.ed17, distance.ed18, distance.ed19, distance.ed20, notch=TRUE, xlab="Edit Distance", ylab="Distance Value")

#boxplot(t(distance), notch=TRUE, xlab="Levenshtein distance", ylab="Approximated distance range", main="Relation between Levenshtein distance and our distance", ylim=c(0,500))

o<-11
n<-1000
q<-23
(2*o+1)*(n-q+1)