options(stringsAsFactors=FALSE) library(WriteXLS) source("read_fasta.R") bad_s <- "YP_009725307.1" cod <- read.table("cod_to_AA.txt") #fin1 <- "gisaid_delta_05_11.out" #fin2 <- "gisaid_delta_05_11.fasta" #fout <- "gisaid_delta_05_11_syn_mis.xls" fin1 <- "delta_05_18.out" fin2 <- "delta_05_18.fasta" fout <- "delta_05_18_syn_mis.xls" x <- read.table(fin1) colnames(x) <- unlist(strsplit("qseqid qlen sseqid slen qstart qend sstart send evalue bitscore score length pident nident gapopen gaps sstrand"," ")) x <- x[x$sstart==1,] x <- x[x$gaps == 0,] junk <- paste(x$qseqid,x$sseqid) ind <- tapply(1:nrow(x),junk,function(i) i[which.max(x$score[i])]) x <- x[ind,] ref <- read_fasta("ref_dnaa.fa") gis <- read_fasta(fin2) ss <- unique(x$sseqid) qq <- unique(x$qseqid) res <- list() for (s in ss) { ref_seq <- unlist(strsplit(ref[s],"")) if (s == bad_s) ref_seq <- c(ref_seq[1:27],ref_seq[-(1:26)]) sp <- matrix(ref_seq,nrow=3) sp <- apply(sp,2, function(x) paste0(x,collapse="")) mm <- match(sp,cod[,1]) ref_prot <- cod[mm,2] y <- x[x$sseqid == s,] mat <- matrix(0,nrow=length(sp),ncol=2) colnames(mat) <- c("syn","mis") for (i in 1:nrow(y)) { if (y$pident[i] == 100 & y$length[i] == y$slen[i]) next seq <- unlist(strsplit(gis[y$qseqid[i]],"")) seq <- seq[y$qstart[i]:y$qend[i]] if (s == bad_s) seq <- c(seq[1:27],seq[-(1:26)]) if (length(seq) > 3*length(ref_prot)) seq <- seq[1:(3*length(ref_prot))] k <- 3*floor(length(seq)/3) seq <- seq[1:k] qp <- matrix(seq,nrow=3) qp <- apply(qp,2, function(x) paste0(x,collapse="")) mm <- match(qp,cod[,1]) prot <- cod[mm,2] bad <- which(is.na(prot)) prot[bad] <- "-" bad <- which(prot=="STOP") prot[bad] <- "-" d <- which(qp != sp[1:length(qp)] & prot != "-") mat[d[ref_prot[d] == prot[d]],"syn"] <- mat[d[ref_prot[d] == prot[d]],"syn"] + 1 mat[d[ref_prot[d] != prot[d]],"mis"] <- mat[d[ref_prot[d] != prot[d]],"mis"] + 1 } res[[s]] <- mat print(paste(s,colSums(mat))) } for (nam in names(res)) assign(nam,as.data.frame(res[[nam]])) WriteXLS(names(res),ExcelFileName=fout,SheetNames=names(res),col.names=TRUE,row.names=FALSE)