#These functions read in an array along with the configuration file 
#and return an array suitable for processing with DNAcopy package: column a= position, column b= intensity, column c= chromosome
#Intensity values are z-scores (numbers represent standard deviation from the mean) so that data can be compared between arrays.

process_gpl4664_gpl4661=function(data_file,layout_file){
	ld_merge=merge(data_file,layout_file,by.x='ID_REF',by.y="ID",all.x=T) #merges two data files by spot ID
	ld_noflag=ld_merge[ld_merge$flag=='PRESENT',] #removes error spots
	ld_num_list=ld_noflag
	ld_num_list$chromosome_start_position <-as.numeric(as.character(ld_noflag$chromosome_start_position)) #converts the genome positions to numeric values
	#here we separate by chromosome
	ld.chr=ld_num_list[grep('Chr',ld_num_list$ref),]
	ld.100=ld_num_list[grep('pNRC100',ld_num_list$ref),]
	ld.200=ld_num_list[grep('pNRC200',ld_num_list$ref),]
	#now we need to combine replicate probes
	ld.chrag=aggregate(ld.chr$Y_INT, by = list(ld.chr$chromosome_start_position), FUN = "mean")
	ld.100ag=aggregate(ld.100$Y_INT, by = list(ld.100$chromosome_start_position), FUN = "mean")
	ld.200ag=aggregate(ld.200$Y_INT, by = list(ld.200$chromosome_start_position), FUN = "mean")
	#add a column indicating chromosome
	ld.chrag$Chr='Chr'
	ld.100ag$Chr='pNRC100'
	ld.200ag$Chr='pNRC200'
	#rename columns
	colnames(ld.chrag)=c('Position','Intensity','Chr')
	colnames(ld.100ag)=c('Position','Intensity','Chr')
	colnames(ld.200ag)=c('Position','Intensity','Chr')
	#recombine into a single data frame
	ld_proc=rbind(ld.chrag,ld.100ag,ld.200ag)
	#adds a column of mean/variance normalized values (number = # of sd from mean)
	ld_proc$Scaled =scale(ld_proc$Intensity)
	#and return the shiny new data frame
	ld_proc
	}
	

process_gpl4662=function(data_file,layout_file){
	ld_merge=merge(data_file,layout_file,by.x='ID_REF',by.y="ID",all.x=T) #merges two data files by spot ID
	ld_noflag=ld_merge[ld_merge$FLAG=='I',] #removes error spots
	ld_num_list=ld_noflag
	ld_num_list$chromosome_start_position <-as.numeric(as.character(ld_noflag$chromosome_start_position)) #converts the genome positions to numeric values
	#here we separate by chromosome
	ld.chr=ld_num_list[grep('Chr',ld_num_list$ref),]
	ld.100=ld_num_list[grep('pNRC100',ld_num_list$ref),]
	ld.200=ld_num_list[grep('pNRC200',ld_num_list$ref),]
	#now we need to combine replicate probes
	ld.chrag=aggregate(ld.chr$Y_INT, by = list(ld.chr$chromosome_start_position), FUN = "mean")
	ld.100ag=aggregate(ld.100$Y_INT, by = list(ld.100$chromosome_start_position), FUN = "mean")
	ld.200ag=aggregate(ld.200$Y_INT, by = list(ld.200$chromosome_start_position), FUN = "mean")
	#add a column indicating chromosome
	ld.chrag$Chr='Chr'
	ld.100ag$Chr='pNRC100'
	ld.200ag$Chr='pNRC200'
	#rename columns
	colnames(ld.chrag)=c('Position','Intensity','Chr')
	colnames(ld.100ag)=c('Position','Intensity','Chr')
	colnames(ld.200ag)=c('Position','Intensity','Chr')
	#recombine into a single data frame
	ld_proc=rbind(ld.chrag,ld.100ag,ld.200ag)
	#adds a column of mean/variance normalized values (number = # of sd from mean)
	ld_proc$Scaled =scale(ld_proc$Intensity)
	#and return the shiny new data frame
	ld_proc
	}
	

#Note, this automatically uses the low intensity scan values due to better dynamic range
#Channel 1 will always be your alexa 546, channel 2 will be alexa 647.  
#The "channel" input to this function should either be '1' or '2'
process_raw_gpl4664_low=function(data_file,layout_file,channel){
	#instead of merging by ID, we'll just cbind
	# ld_merge=merge(data_file,layout_file,by.x='ID_REF',by.y="ID",all.x=T) #merges two data files by spot ID
	ld_merge=cbind(data_file,layout_file)
	ld_noflag=ld_merge[ld_merge$flag=='PRESENT',] #removes error spots
	ld_noflag2=ld_noflag[ld_noflag$sequence_name != 'None',]
	ld_num_list=ld_noflag2
	ld_num_list$chromosome_start_position <-as.numeric(as.character(ld_noflag$chromosome_start_position)) #converts the genome positions to numeric values
	#here we separate by chromosome
	ld.chr=ld_num_list[grep('Chr',ld_num_list$ref),]
	ld.100=ld_num_list[grep('pNRC100',ld_num_list$ref),]
	ld.200=ld_num_list[grep('pNRC200',ld_num_list$ref),]
	#now we need to combine replicate probes
	if(channel == 1){
	ld.chrag=aggregate(ld.chr$Spot.Mean.Intensity..Alexa546L, by = list(ld.chr$chromosome_start_position), FUN = "mean")
	ld.100ag=aggregate(ld.100$Spot.Mean.Intensity..Alexa546L, by = list(ld.100$chromosome_start_position), FUN = "mean")
	ld.200ag=aggregate(ld.200$Spot.Mean.Intensity..Alexa546L, by = list(ld.200$chromosome_start_position), FUN = "mean")
	}
	if(channel == 2){
	ld.chrag=aggregate(ld.chr$Spot.Mean.Intensity..Alexa647L, by = list(ld.chr$chromosome_start_position), FUN = "mean")
	ld.100ag=aggregate(ld.100$Spot.Mean.Intensity..Alexa647L, by = list(ld.100$chromosome_start_position), FUN = "mean")
	ld.200ag=aggregate(ld.200$Spot.Mean.Intensity..Alexa647L, by = list(ld.200$chromosome_start_position), FUN = "mean")
	}
	#add a column indicating chromosome
	ld.chrag$Chr='Chr'
	ld.100ag$Chr='pNRC100'
	ld.200ag$Chr='pNRC200'
	#rename columns
	colnames(ld.chrag)=c('Position','Intensity','Chr')
	colnames(ld.100ag)=c('Position','Intensity','Chr')
	colnames(ld.200ag)=c('Position','Intensity','Chr')
	#recombine into a single data frame
	ld_proc=rbind(ld.chrag,ld.100ag,ld.200ag)
	#adds a column of mean/variance normalized values (number = # of sd from mean)
	ld_proc$Scaled =scale(ld_proc$Intensity)
	#and return the shiny new data frame
	ld_proc
	}
	
	
#this version does background subtraction
process_raw_gpl4664.2=function(data_file,layout_file,channel){
	#instead of merging by ID, we'll just cbind
	# ld_merge=merge(data_file,layout_file,by.x='ID_REF',by.y="ID",all.x=T) #merges two data files by spot ID
	ld_merge=cbind(data_file,layout_file)
	ld_noflag=ld_merge[ld_merge$flag=='PRESENT',] #removes error spots
	ld_noflag2=ld_noflag[ld_noflag$sequence_name != 'None',]
	ld_num_list=ld_noflag2
	ld_num_list$chromosome_start_position <-as.numeric(as.character(ld_noflag$chromosome_start_position)) #converts the genome positions to numeric values
	#here we separate by chromosome
	ld.chr=ld_num_list[grep('Chr',ld_num_list$ref),]
	ld.100=ld_num_list[grep('pNRC100',ld_num_list$ref),]
	ld.200=ld_num_list[grep('pNRC200',ld_num_list$ref),]
	#now we need to combine replicate probes
	if(channel == 1){
	ld.chr$subtract=(ld.chr$Spot.Mean.Intensity..Alexa546L-ld.chr$Background.Mean.Intensity..Alexa546L)
	ld.100$subtract=(ld.100$Spot.Mean.Intensity..Alexa546L-ld.100$Background.Mean.Intensity..Alexa546L)
	ld.200$subtract=(ld.200$Spot.Mean.Intensity..Alexa546L-ld.200$Background.Mean.Intensity..Alexa546L)
	ld.chrag=aggregate(ld.chr$subtract, by = list(ld.chr$chromosome_start_position), FUN = "mean")
	ld.100ag=aggregate(ld.100$subtract, by = list(ld.100$chromosome_start_position), FUN = "mean")
	ld.200ag=aggregate(ld.200$subtract, by = list(ld.200$chromosome_start_position), FUN = "mean")
	}
	if(channel == 2){
	ld.chr$subtract=(ld.chr$Spot.Mean.Intensity..Alexa647L-ld.chr$Background.Mean.Intensity..Alexa647L)
	ld.100$subtract=(ld.100$Spot.Mean.Intensity..Alexa647L-ld.100$Background.Mean.Intensity..Alexa647L)
	ld.200$subtract=(ld.200$Spot.Mean.Intensity..Alexa647L-ld.200$Background.Mean.Intensity..Alexa647L)
	ld.chrag=aggregate(ld.chr$subtract, by = list(ld.chr$chromosome_start_position), FUN = "mean")
	ld.100ag=aggregate(ld.100$subtract, by = list(ld.100$chromosome_start_position), FUN = "mean")
	ld.200ag=aggregate(ld.200$subtract, by = list(ld.200$chromosome_start_position), FUN = "mean")
	}
	#add a column indicating chromosome
	ld.chrag$Chr='Chr'
	ld.100ag$Chr='pNRC100'
	ld.200ag$Chr='pNRC200'
	#rename columns
	colnames(ld.chrag)=c('Position','Intensity','Chr')
	colnames(ld.100ag)=c('Position','Intensity','Chr')
	colnames(ld.200ag)=c('Position','Intensity','Chr')
	#recombine into a single data frame
	ld_proc=rbind(ld.chrag,ld.100ag,ld.200ag)
	#adds a column of mean/variance normalized values (number = # of sd from mean)
	ld_proc$Scaled =scale(ld_proc$Intensity)
	#and return the shiny new data frame
	ld_proc
	}

#parser for clone files generated from GPL4662-4664 array formats
clone_parse=function(clone_file){
	good=clone_file[clone_file$flag=='PRESENT',] #removes absent or marginal 
	good$chromosome_start_position <-as.numeric(as.character(good$chromosome_start_position)) 	#converts the genome positions to numeric values
	#here we separate by chromosome
	chr=good[grep('Chr',good$primer_forward_name,ignore.case=T),]
	p100=good[grep('pNRC100',good$primer_forward_name,ignore.case=T),]
	p200=good[grep('pNRC200',good$primer_forward_name,ignore.case=T),]
	#add a column indicating chromosome
	chr$Chr='Chr'
	p100$Chr='pNRC100'
	p200$Chr='pNRC200'
	#reorder probes by location
	chr2=chr[order(chr$chromosome_start_position),]
	#recombine
	all=rbind(chr,p100,p200)
	#now only take those columns you need
	all2=data.frame(all$chromosome_start_position, all$Y0, all$Chr)
	colnames(all2)=c('Position','Intensity','Chr')
	#scales intensity values
	all2$Scaled=scale(all2$Intensity)
	all2
	}
	
#this version only looks at the chromosome
#I am looking to see if this shifts the chromosomal mean closer to zero
clone_chr_parse=function(clone_file){
	good=clone_file[clone_file$flag=='PRESENT',] #removes absent or marginal 
	good$chromosome_start_position <-as.numeric(as.character(good$chromosome_start_position)) 	#converts the genome positions to numeric values
	#here we separate by chromosome
	chr=good[grep('Chr',good$primer_forward_name,ignore.case=T),]
	#add a column indicating chromosome
	chr$Chr='Chr'
	all2=data.frame(chr$chromosome_start_position, chr$Y0, chr$Chr)
	colnames(all2)=c('Position','Intensity','Chr')
	#scales intensity values
	all2$Scaled=scale(all2$Intensity)
	all2
	}