#These functions read in an array along with the configuration file #and return an array suitable for processing with DNAcopy package: column a= position, column b= intensity, column c= chromosome #Intensity values are z-scores (numbers represent standard deviation from the mean) so that data can be compared between arrays. process_gpl4664_gpl4661=function(data_file,layout_file){ ld_merge=merge(data_file,layout_file,by.x='ID_REF',by.y="ID",all.x=T) #merges two data files by spot ID ld_noflag=ld_merge[ld_merge$flag=='PRESENT',] #removes error spots ld_num_list=ld_noflag ld_num_list$chromosome_start_position <-as.numeric(as.character(ld_noflag$chromosome_start_position)) #converts the genome positions to numeric values #here we separate by chromosome ld.chr=ld_num_list[grep('Chr',ld_num_list$ref),] ld.100=ld_num_list[grep('pNRC100',ld_num_list$ref),] ld.200=ld_num_list[grep('pNRC200',ld_num_list$ref),] #now we need to combine replicate probes ld.chrag=aggregate(ld.chr$Y_INT, by = list(ld.chr$chromosome_start_position), FUN = "mean") ld.100ag=aggregate(ld.100$Y_INT, by = list(ld.100$chromosome_start_position), FUN = "mean") ld.200ag=aggregate(ld.200$Y_INT, by = list(ld.200$chromosome_start_position), FUN = "mean") #add a column indicating chromosome ld.chrag$Chr='Chr' ld.100ag$Chr='pNRC100' ld.200ag$Chr='pNRC200' #rename columns colnames(ld.chrag)=c('Position','Intensity','Chr') colnames(ld.100ag)=c('Position','Intensity','Chr') colnames(ld.200ag)=c('Position','Intensity','Chr') #recombine into a single data frame ld_proc=rbind(ld.chrag,ld.100ag,ld.200ag) #adds a column of mean/variance normalized values (number = # of sd from mean) ld_proc$Scaled =scale(ld_proc$Intensity) #and return the shiny new data frame ld_proc } process_gpl4662=function(data_file,layout_file){ ld_merge=merge(data_file,layout_file,by.x='ID_REF',by.y="ID",all.x=T) #merges two data files by spot ID ld_noflag=ld_merge[ld_merge$FLAG=='I',] #removes error spots ld_num_list=ld_noflag ld_num_list$chromosome_start_position <-as.numeric(as.character(ld_noflag$chromosome_start_position)) #converts the genome positions to numeric values #here we separate by chromosome ld.chr=ld_num_list[grep('Chr',ld_num_list$ref),] ld.100=ld_num_list[grep('pNRC100',ld_num_list$ref),] ld.200=ld_num_list[grep('pNRC200',ld_num_list$ref),] #now we need to combine replicate probes ld.chrag=aggregate(ld.chr$Y_INT, by = list(ld.chr$chromosome_start_position), FUN = "mean") ld.100ag=aggregate(ld.100$Y_INT, by = list(ld.100$chromosome_start_position), FUN = "mean") ld.200ag=aggregate(ld.200$Y_INT, by = list(ld.200$chromosome_start_position), FUN = "mean") #add a column indicating chromosome ld.chrag$Chr='Chr' ld.100ag$Chr='pNRC100' ld.200ag$Chr='pNRC200' #rename columns colnames(ld.chrag)=c('Position','Intensity','Chr') colnames(ld.100ag)=c('Position','Intensity','Chr') colnames(ld.200ag)=c('Position','Intensity','Chr') #recombine into a single data frame ld_proc=rbind(ld.chrag,ld.100ag,ld.200ag) #adds a column of mean/variance normalized values (number = # of sd from mean) ld_proc$Scaled =scale(ld_proc$Intensity) #and return the shiny new data frame ld_proc } #Note, this automatically uses the low intensity scan values due to better dynamic range #Channel 1 will always be your alexa 546, channel 2 will be alexa 647. #The "channel" input to this function should either be '1' or '2' process_raw_gpl4664_low=function(data_file,layout_file,channel){ #instead of merging by ID, we'll just cbind # ld_merge=merge(data_file,layout_file,by.x='ID_REF',by.y="ID",all.x=T) #merges two data files by spot ID ld_merge=cbind(data_file,layout_file) ld_noflag=ld_merge[ld_merge$flag=='PRESENT',] #removes error spots ld_noflag2=ld_noflag[ld_noflag$sequence_name != 'None',] ld_num_list=ld_noflag2 ld_num_list$chromosome_start_position <-as.numeric(as.character(ld_noflag$chromosome_start_position)) #converts the genome positions to numeric values #here we separate by chromosome ld.chr=ld_num_list[grep('Chr',ld_num_list$ref),] ld.100=ld_num_list[grep('pNRC100',ld_num_list$ref),] ld.200=ld_num_list[grep('pNRC200',ld_num_list$ref),] #now we need to combine replicate probes if(channel == 1){ ld.chrag=aggregate(ld.chr$Spot.Mean.Intensity..Alexa546L, by = list(ld.chr$chromosome_start_position), FUN = "mean") ld.100ag=aggregate(ld.100$Spot.Mean.Intensity..Alexa546L, by = list(ld.100$chromosome_start_position), FUN = "mean") ld.200ag=aggregate(ld.200$Spot.Mean.Intensity..Alexa546L, by = list(ld.200$chromosome_start_position), FUN = "mean") } if(channel == 2){ ld.chrag=aggregate(ld.chr$Spot.Mean.Intensity..Alexa647L, by = list(ld.chr$chromosome_start_position), FUN = "mean") ld.100ag=aggregate(ld.100$Spot.Mean.Intensity..Alexa647L, by = list(ld.100$chromosome_start_position), FUN = "mean") ld.200ag=aggregate(ld.200$Spot.Mean.Intensity..Alexa647L, by = list(ld.200$chromosome_start_position), FUN = "mean") } #add a column indicating chromosome ld.chrag$Chr='Chr' ld.100ag$Chr='pNRC100' ld.200ag$Chr='pNRC200' #rename columns colnames(ld.chrag)=c('Position','Intensity','Chr') colnames(ld.100ag)=c('Position','Intensity','Chr') colnames(ld.200ag)=c('Position','Intensity','Chr') #recombine into a single data frame ld_proc=rbind(ld.chrag,ld.100ag,ld.200ag) #adds a column of mean/variance normalized values (number = # of sd from mean) ld_proc$Scaled =scale(ld_proc$Intensity) #and return the shiny new data frame ld_proc } #this version does background subtraction process_raw_gpl4664.2=function(data_file,layout_file,channel){ #instead of merging by ID, we'll just cbind # ld_merge=merge(data_file,layout_file,by.x='ID_REF',by.y="ID",all.x=T) #merges two data files by spot ID ld_merge=cbind(data_file,layout_file) ld_noflag=ld_merge[ld_merge$flag=='PRESENT',] #removes error spots ld_noflag2=ld_noflag[ld_noflag$sequence_name != 'None',] ld_num_list=ld_noflag2 ld_num_list$chromosome_start_position <-as.numeric(as.character(ld_noflag$chromosome_start_position)) #converts the genome positions to numeric values #here we separate by chromosome ld.chr=ld_num_list[grep('Chr',ld_num_list$ref),] ld.100=ld_num_list[grep('pNRC100',ld_num_list$ref),] ld.200=ld_num_list[grep('pNRC200',ld_num_list$ref),] #now we need to combine replicate probes if(channel == 1){ ld.chr$subtract=(ld.chr$Spot.Mean.Intensity..Alexa546L-ld.chr$Background.Mean.Intensity..Alexa546L) ld.100$subtract=(ld.100$Spot.Mean.Intensity..Alexa546L-ld.100$Background.Mean.Intensity..Alexa546L) ld.200$subtract=(ld.200$Spot.Mean.Intensity..Alexa546L-ld.200$Background.Mean.Intensity..Alexa546L) ld.chrag=aggregate(ld.chr$subtract, by = list(ld.chr$chromosome_start_position), FUN = "mean") ld.100ag=aggregate(ld.100$subtract, by = list(ld.100$chromosome_start_position), FUN = "mean") ld.200ag=aggregate(ld.200$subtract, by = list(ld.200$chromosome_start_position), FUN = "mean") } if(channel == 2){ ld.chr$subtract=(ld.chr$Spot.Mean.Intensity..Alexa647L-ld.chr$Background.Mean.Intensity..Alexa647L) ld.100$subtract=(ld.100$Spot.Mean.Intensity..Alexa647L-ld.100$Background.Mean.Intensity..Alexa647L) ld.200$subtract=(ld.200$Spot.Mean.Intensity..Alexa647L-ld.200$Background.Mean.Intensity..Alexa647L) ld.chrag=aggregate(ld.chr$subtract, by = list(ld.chr$chromosome_start_position), FUN = "mean") ld.100ag=aggregate(ld.100$subtract, by = list(ld.100$chromosome_start_position), FUN = "mean") ld.200ag=aggregate(ld.200$subtract, by = list(ld.200$chromosome_start_position), FUN = "mean") } #add a column indicating chromosome ld.chrag$Chr='Chr' ld.100ag$Chr='pNRC100' ld.200ag$Chr='pNRC200' #rename columns colnames(ld.chrag)=c('Position','Intensity','Chr') colnames(ld.100ag)=c('Position','Intensity','Chr') colnames(ld.200ag)=c('Position','Intensity','Chr') #recombine into a single data frame ld_proc=rbind(ld.chrag,ld.100ag,ld.200ag) #adds a column of mean/variance normalized values (number = # of sd from mean) ld_proc$Scaled =scale(ld_proc$Intensity) #and return the shiny new data frame ld_proc } #parser for clone files generated from GPL4662-4664 array formats clone_parse=function(clone_file){ good=clone_file[clone_file$flag=='PRESENT',] #removes absent or marginal good$chromosome_start_position <-as.numeric(as.character(good$chromosome_start_position)) #converts the genome positions to numeric values #here we separate by chromosome chr=good[grep('Chr',good$primer_forward_name,ignore.case=T),] p100=good[grep('pNRC100',good$primer_forward_name,ignore.case=T),] p200=good[grep('pNRC200',good$primer_forward_name,ignore.case=T),] #add a column indicating chromosome chr$Chr='Chr' p100$Chr='pNRC100' p200$Chr='pNRC200' #reorder probes by location chr2=chr[order(chr$chromosome_start_position),] #recombine all=rbind(chr,p100,p200) #now only take those columns you need all2=data.frame(all$chromosome_start_position, all$Y0, all$Chr) colnames(all2)=c('Position','Intensity','Chr') #scales intensity values all2$Scaled=scale(all2$Intensity) all2 } #this version only looks at the chromosome #I am looking to see if this shifts the chromosomal mean closer to zero clone_chr_parse=function(clone_file){ good=clone_file[clone_file$flag=='PRESENT',] #removes absent or marginal good$chromosome_start_position <-as.numeric(as.character(good$chromosome_start_position)) #converts the genome positions to numeric values #here we separate by chromosome chr=good[grep('Chr',good$primer_forward_name,ignore.case=T),] #add a column indicating chromosome chr$Chr='Chr' all2=data.frame(chr$chromosome_start_position, chr$Y0, chr$Chr) colnames(all2)=c('Position','Intensity','Chr') #scales intensity values all2$Scaled=scale(all2$Intensity) all2 }