# This is the script to split a .csv file into several smaller files which makes the search
# much more efficnent
# Tao Huan, 20150722
# Copy right @ University of Alberta

#####################################################################
# This is the setting part
data.path <- "F:/Data_Analysis/20140305MCID2manuscript/script/data/"
file.name <- "run3_opt.csv"
####################################################################

setwd(data.path)
data <- read.csv(file.name)
Split.size <- 100
k <- 101
count <- 1
file.number <- 1
while(is.na(k)==FALSE){
  while(count <= nrow(data)){
    text <- data[count,][1]
    text <- as.matrix(text)
    if(length(grep("###FS:", text)) == 0){
    } else{
      if(file.number == 1 & k == 101){
        old.count <- count
        head <- data[1:(count-1),]
        k <- -1
      }
      k <- k + 1
    }
    count <- count+1
    if (k == 100){
      new.data <- data[old.count:count-2,]
      new.data <- rbind(head, new.data)
      write.csv(new.data, file = paste("run3_splitted file ", file.number, ".csv", sep=""), row.names=FALSE)
      old.count <- count - 1
      k <- 0
      print(paste("Splitting the file", file.number))
      file.number <- file.number + 1
    }
    if(count==(nrow(data)+1)){
      new.data <- data[old.count:count-2,]
      new.data <- rbind(head, new.data)
      print(paste("Splitting the file", file.number))
      write.csv(new.data, file = paste("run3_split file ", file.number, ".csv", sep=""), row.names=FALSE)
      k <- NA
    }
  }
}