# To plot a cnv Partition for the PennCNV to a PDF file, the NsamplePerPage=70 can be modified.
# Bowang Chen
# DKFZ
# July 02, 2012

# 

args=(commandArgs(TRUE))

if(length(args)==0) {
 	print("No arguments supplied.")
}else{
	for(i in 1:length(args)){
		print(args[[i]])
		eval(parse(text=args[i]))
	}
}

if (!exists("filename")){
	filename="Y:/Myeloma/PennCNV/MM384.adjusted.longcnv" # windows
	# filename="/net/dkfzfsg/gpfs/m/daten/C080-big/Stroke_pennCNV/pennCNV_affy_10_neu.adjusted.rawcnv" #linux
	cat("no filename in agruments, use ",filename," as input name.","\n") 
}

if (!exists("PDFname")){
	PDFname="Y:/Myeloma/PennCNV/MM384.adjusted.raw.longcnv.PDF" #windows
	# PDFname<-"/net/dkfzfsg/gpfs/m/daten/C080-big/Stroke_pennCNV/test.PDF" # linux
	cat("no PDFname in agruments, use ",PDFname," as output name.","\n") 
}
if (!exists("Chrs")){
	Chrs=seq(22)
	cat("no PDFname in agruments, use ",Chrs," as output Chrs.","\n") 
}

plotCNV_data<-function(z,Chrs=NULL,Colors = NULL,...){
	
	NsamplePerPage=70
	if (is.null(Chrs)) {
		cat(paste("Wrong chromosome set: ",Chrs, "\nSet Chrs=c(1)\n",sep=""))
		Chrs=c(1)
	}
	
	for (i in Chrs){
		# cat("plotting chr:",i,", ")
		if (i>22||i<1)  {
			cat("this is a illegal chr\n")
		}
		else {
			CH=z[z$Chr==i,]
			if (nrow(CH)>0){
	
				Samples=as.data.frame(unique(CH$SampleID))
				colnames(Samples)[1]="SampleID"
				Samples$index=seq(1,nrow(Samples))
				CH<-merge(CH,Samples,by.x="SampleID",by.y="SampleID")

				for (k in seq(1,nrow(Samples),NsamplePerPage)){
					StartSample=k
					EndSample=min(k+NsamplePerPage-1,nrow(Samples))
					cat("plotting Chr ",i,",",nrow(Samples),"samples, sample",StartSample,"to sample", EndSample, " ... ")
					ch=CH[CH$index>=StartSample & CH$index <= EndSample,]

					# ch<-merge(ch,Col,by.x="CN",by.y="CN")
					for (j in 1:nrow(ch)) {
						if (ch$CN[j]==0)		ch$Col[j]=Colors[1]
						else if (ch$CN[j]==1)	ch$Col[j]=Colors[2]
						else if (ch$CN[j]==3)	ch$Col[j]=Colors[3]
						else if (ch$CN[j]==4)	ch$Col[j]=Colors[4]
						else 					ch$Col[j]="black"
					}
					
					par(xaxt = "s", yaxt = "n",las=0, cex.axis=0.8, mar=c(5,8,4,2) + 0.1)
					plot(c(ch$StartPosition,ch$EndPosition),c(ch$index,ch$index), type = "n", xlab = "",ylab="")

					axis(3,las=1,cex.axis=0.8) # lable on upper X axis for high plot
					rect(ch$StartPosition,ch$index-0.3,ch$EndPosition,ch$index+0.3,border=ch$Col)
					par(las=2,yaxt="s",cex.axis=0.5)
					samples=unique(data.frame(ch$SampleID,ch$index))			
					axis(2, at = samples$ch.index, labels =  samples$ch.SampleID) #Y label for sample ID
					minX=min(axTicks(1))
					maxX=max(axTicks(1))
					dx=(maxX-minX)/7
					mtext(c("Color legend:","CN=0","CN=1","CN=3","CN=4"), side=3, line=2,col=c("black",Colors),at=seq(minX+dx*1,minX+dx*5,dx),las=0) # legend of CNV with colors
					mtext("Sample", 2, line =5.5, las = 0)
					mtext(paste("Chromosome ", i,sep=""),1, line = 2.5, las = 0)
					box()
					cat("\n")
				}
			}
			else 
				cat("Chr ",i," nothing to plot.\n")
		}
	}
	return(ch)
}


# A raw CNV from PennCNV looks like:
# chr1:150822151-150852862      numsnp=32     length=30,712      state2,cn=1 /net/dkfzfsg/gpfs/m/daten/C080-big/Stroke_pennCNV/LRRBAF_new/101_neu startsnp=CN_452248 endsnp=CN_453512
# chr2:89865175-89885829        numsnp=28     length=20,655      state2,cn=1 /net/dkfzfsg/gpfs/m/daten/C080-big/Stroke_pennCNV/LRRBAF_new/101_neu startsnp=CN_867332 endsnp=CN_867355
# chr3:46777921-46788076        numsnp=15     length=10,156      state2,cn=1 /net/dkfzfsg/gpfs/m/daten/C080-big/Stroke_pennCNV/LRRBAF_new/101_neu startsnp=CN_1030794 endsnp=CN_1030808
# chr4:9832514-9843364          numsnp=30     length=10,851      state2,cn=1 /net/dkfzfsg/gpfs/m/daten/C080-big/Stroke_pennCNV/LRRBAF_new/101_neu startsnp=CN_1088266 endsnp=CN_1090406
# chr4:69378740-69408681        numsnp=3      length=29,942      state2,cn=1 /net/dkfzfsg/gpfs/m/daten/C080-big/Stroke_pennCNV/LRRBAF_new/101_neu startsnp=CN_1111686 endsnp=CN_1111690

splitChrIndex<-function(Col=NULL,Sep="=",Index=1){
	# to a split raw PennCNV variable into volumns for plot
	a=strsplit(as.character(Col),Sep)
	b<-do.call(rbind,a)
	return=b[,Index]
}

plotRawPennCNV<-function(filename,PDFname=NULL,Colors = NULL, ...){
		
	z<-read.table(filename,header=F)
	z$Chr=splitChrIndex(z$V1,":",1)
	z$Chr=splitChrIndex(z$Chr,"r",2)
	z$Position=splitChrIndex(z$V1,":",2)
	z$StartPosition=splitChrIndex(z$Position,"-",1)
	z$EndPosition=splitChrIndex(z$Position,"-",2)
	z$NumSNP=splitChrIndex(z$V2,"=",2)
	z$Length=splitChrIndex(z$V3,"=",2)
	z$State=splitChrIndex(z$V4,",",1)
	z$State=splitChrIndex(z$State,"e",2)
	z$CN=splitChrIndex(z$V4,",",2)
	z$CN=splitChrIndex(z$CN,"=",2)
	z$SampleID=basename(as.character(z$V5))
	z$StartSNP=splitChrIndex(z$V6,"=",2)
	z$EndSNP=splitChrIndex(z$V7,"=",2)
	if (is.null(Colors)) Colors=c("orange3","red","darkblue","green")
	if(!is.null(PDFname)) pdf(file=PDFname,paper="A4",title=filename,width=8.67,height=11.69)
		ch=plotCNV_data(z,PDFname=PDFname,Colors=Colors,...)
	if(!is.null(PDFname)) dev.off()
	return(z)
}

#Windows path format
# filename<-"Y:/Stroke_pennCNV/pennCNV_affy_10_neu.adjusted.rawcnv"
# PDFname<-"Y:/Stroke_pennCNV/test.pdf"


#linux path format
# filename="/net/dkfzfsg/gpfs/m/daten/C080-big/Stroke_pennCNV/pennCNV_affy_10_neu.adjusted.rawcnv"
# PDFname<-"/net/dkfzfsg/gpfs/m/daten/C080-big/Stroke_pennCNV/test.PDF"
# R CMD BATCH --no-save --no-restore "--args filename=\"$filename\" PDFname=\"$PDFname\"" $path_R/plot_raw_PennCNV2PDF.R plot_rawPennCNV2PDF.log

plotRawPennCNV(filename,PDFname=PDFname,Chrs=seq(1,22))