# This is the script to split a .csv file into several smaller files which makes the search # much more efficnent # Tao Huan, 20150722 # Copy right @ University of Alberta ##################################################################### # This is the setting part data.path <- "F:/Data_Analysis/20140305MCID2manuscript/script/data/" file.name <- "run3_opt.csv" #################################################################### setwd(data.path) data <- read.csv(file.name) Split.size <- 100 k <- 101 count <- 1 file.number <- 1 while(is.na(k)==FALSE){ while(count <= nrow(data)){ text <- data[count,][1] text <- as.matrix(text) if(length(grep("###FS:", text)) == 0){ } else{ if(file.number == 1 & k == 101){ old.count <- count head <- data[1:(count-1),] k <- -1 } k <- k + 1 } count <- count+1 if (k == 100){ new.data <- data[old.count:count-2,] new.data <- rbind(head, new.data) write.csv(new.data, file = paste("run3_splitted file ", file.number, ".csv", sep=""), row.names=FALSE) old.count <- count - 1 k <- 0 print(paste("Splitting the file", file.number)) file.number <- file.number + 1 } if(count==(nrow(data)+1)){ new.data <- data[old.count:count-2,] new.data <- rbind(head, new.data) print(paste("Splitting the file", file.number)) write.csv(new.data, file = paste("run3_split file ", file.number, ".csv", sep=""), row.names=FALSE) k <- NA } } }