####Code for "Maturational changes in song sparrow song" ###Data song_sparrow <- read.csv("song_sparrow.csv") as.factor(song_sparrow$age) ###Libraries library(lme4) library(dplyr) library(readxl) library(readxl) library(ggplot2) library(dplyr) library(stats) ###Linear Models: Used to Construct Table 1 ##MCV prop mcv_lmer <- lmer(song_sparrow$MCV~ factor(song_sparrow$age)+(1|song_sparrow$bird)) summary(mcv_lmer) mcv_lmer2 <- lmer(song_sparrow$MCV~ factor(song_sparrow$age, levels = c("2", "3", "1"))+(1|song_sparrow$bird)) summary(mcv_lmer2) #1-2: .000169 #1-3: 1.01e-6 #2-3: .086989 ##Number notes notes_lmer <- lmer(song_sparrow$avg~ factor(song_sparrow$age)+(1|song_sparrow$bird)) summary(notes_lmer) notes_lmer2 <- lmer(song_sparrow$avg~ factor(song_sparrow$age, levels = c("2", "3", "1"))+(1|song_sparrow$bird)) summary(notes_lmer2) #1-2: .000542 #1-3: .000104 #2-3: .564132 ##Unique Notes unique_lmer <- lmer(song_sparrow$avg_uniq~ factor(song_sparrow$age)+(1|song_sparrow$bird)) summary(unique_lmer) unique_lmer2 <- lmer(song_sparrow$avg_uniq~ factor(song_sparrow$age, levels = c("2", "3", "1"))+(1|song_sparrow$bird)) summary(unique_lmer2) #1-2: .000915 #1-3: 1.48e-5 #2-3: .157340 ##Time Interval (within) song_rate_wb_lmer_sqrt<- lmer(sqrt(song_sparrow$`ave.sec.bw.songs.within.bout`)~ factor(song_sparrow$age)+(1|song_sparrow$bird)) summary(song_rate_wb_lmer_sqrt) song_rate_wb_lmer2_sqrt<- lmer(sqrt(song_sparrow$`ave.sec.bw.songs.within.bout`)~ factor(song_sparrow$age, levels = c("2", "3", "1"))+ (1|song_sparrow$bird)) summary(song_rate_wb_lmer2_sqrt) #1-2: .0407 #1-3: .1214 #2-3: .5917 ##Time interval (between) song_rate_bb_lmer_sqrt<- lmer(sqrt(song_sparrow$`ave.sec.between.bout`)~ factor(song_sparrow$age)+(1|song_sparrow$bird)) summary(song_rate_bb_lmer_sqrt) song_rate_bb_lmer2_sqrt<- lmer(sqrt(song_sparrow$`ave.sec.between.bout`)~ factor(song_sparrow$age, levels = c("2", "3", "1"))+(1|song_sparrow$bird)) summary(song_rate_bb_lmer2_sqrt) #1-2: 1.58e-6 #1-3: .000269 #2-3: .0849 ##Song rate rate_lmer <- lmer(song_sparrow$rate~ factor(song_sparrow$age)+(1|song_sparrow$bird)) summary(rate_lmer) rate_lmer2 <- lmer(song_sparrow$rate~ factor(song_sparrow$age, levels = c("2", "3", "1"))+(1|song_sparrow$bird)) summary(rate_lmer2) #1-2: 3.61e-5 #1-3: .000143 #2-3: .63532 ##Within-song stereotypy stereotypy_lmer_sqrt <- lmer(sqrt(song_sparrow$w_avg)~ factor(song_sparrow$age)+(1|song_sparrow$bird)) summary(stereotypy_lmer_sqrt) stereotypy_lmer2_sqrt <- lmer(sqrt(song_sparrow$w_avg)~ factor(song_sparrow$age, levels = c("2", "3", "1"))+(1|song_sparrow$bird)) summary(stereotypy_lmer2_sqrt) #1-2: .900 #1-3: .876 #2-3: .778 ##Between song consistency consistency_lmer_sqrt <- lmer(sqrt(song_sparrow$b_avg)~ factor(song_sparrow$age)+(1|song_sparrow$bird)) summary(consistency_lmer_sqrt) consistency_lmer2_sqrt <- lmer(sqrt(song_sparrow$b_avg)~ factor(song_sparrow$age, levels = c("2", "3", "1"))+(1|song_sparrow$bird)) summary(consistency_lmer2_sqrt) #1-2: .0338 #1-3: .0387 #2-3: .9517 ###Figure 1 par(mfrow=c(2,4)) boxplot(song_sparrow$MCV~song_sparrow$age, main = "MCV Proportion", xlab = "Age", ylab = "Prop. Identified as MCV") segments(x0=1, y0=.82, x1=3, y1=.82) segments(x0=1, y0=.82, x1=1, y1=.81) segments(x0=3, y0=.82, x1=3, y1=.81) segments(x0=1.2, y0=.76, x1=1.8, y1=.76) segments(x0=1.2, y0=.76, x1=1.2, y1=.75) segments(x0=1.8, y0=.76, x1=1.8, y1=.75) text(labels="***", x=1.5, y= .78, cex = 2) text(labels="***", x=2, y= .84, cex = 2) boxplot(song_sparrow$avg~song_sparrow$age, main= "Number of Notes", xlab= "Age", ylab = "Average Number of Notes") segments(x0=1, y0=26, x1=3, y1=26) segments(x0=1, y0=26, x1=1, y1=25.7) segments(x0=3, y0=26, x1=3, y1=25.7) segments(x0=1.2, y0=24.5, x1=1.8, y1=24.5) segments(x0=1.2, y0=24.5, x1=1.2, y1=24.2) segments(x0=1.8, y0=24.5, x1=1.8, y1=24.2) text(labels="***", x=1.5, y = 25, cex = 2) text(labels="***", x=2, y = 26.5, cex = 2) boxplot(song_sparrow$avg_uniq~song_sparrow$age, main = "Unique Notes", xlab = "Age", ylab = "Average Number of Unique Notes", ylim = c(5,17)) segments(x0=1, y0=16.5, x1=3, y1=16.5) segments(x0=1, y0=16.5, x1=1, y1=16.3) segments(x0=3, y0=16.5, x1=3, y1=16.3) segments(x0=1.2, y0=15.5, x1=1.8, y1=15.5) segments(x0=1.2, y0=15.5, x1=1.2, y1=15.3) segments(x0=1.8, y0=15.5, x1=1.8, y1=15.3) text(labels="***", x=1.5, y = 16, cex = 2) text(labels="***", x=2, y = 17, cex = 2) boxplot(sqrt(song_sparrow$`ave.sec.bw.songs.within.bout`)~song_sparrow$age, main= "Within-Bout Time Interval", xlab= "Age", ylab= "Average Time Interval (Sqrt)", ylim = c(3.5,10)) segments(x0=1.2, y0=9.8, x1=1.8, y1=9.8) segments(x0=1.2, y0=9.8, x1=1.2, y1=9.7) segments(x0=1.8, y0=9.8, x1=1.8, y1=9.7) text(labels="*", x=1.5, y = 10, cex = 2) boxplot(sqrt(song_sparrow$`ave.sec.between.bout`)~song_sparrow$age, main= "Between-Bout Time Interval", xlab= "Age", ylab= "Average Time Interval (Sqrt)", ylim = c(4,31)) segments(x0=1, y0=30, x1=3, y1=30) segments(x0=1.2, y0=27, x1=1.8, y1=27) segments(x0=1, y0=30, x1=1, y1=29.5) segments(x0=3, y0=30, x1=3, y1=29.5) segments(x0=1.2, y0=27, x1=1.2, y1=26.5) segments(x0=1.8, y0=27, x1=1.8, y1=26.5) text(labels= "***", x = 1.5, y = 27.7, cex = 2) text(labels= "***", x = 2, y = 30.7, cex = 2) boxplot(song_sparrow$`rate`~song_sparrow$`age`, main= "Singing Rate", xlab= "Age", ylab= "Songs Sung per Hour", ylim = c(0,250)) segments(x0=1, y0=245, x1=3, y1=245) segments(x0=1.2, y0=220, x1=1.8, y1=220) segments(x0=1, y0=245, x1=1, y1=240) segments(x0=3, y0=245, x1=3, y1=240) segments(x0=1.2, y0=220, x1=1.2, y1=216) segments(x0=1.8, y0=220, x1=1.8, y1=216) text(labels= "***", x = 2, y = 253, cex = 2) text(labels= "***", x = 1.5, y = 228, cex = 2) boxplot(sqrt(song_sparrow$w_avg)~song_sparrow$age, main = "Within-Song Stereotypy", xlab = "Age", ylab = "Correlation Coefficient (Sqrt)") boxplot(sqrt(xc_sum$b_avg)~xc_sum$age, main = "Between-Song Consistency", xlab = "Age", ylab = "Correlation Coefficient (Sqrt)", ylim = c(.85,1.01)) segments(x0=1, y0=1.008, x1=3, y1=1.008) segments(x0=1, y0=1.008, x1=1, y1=1.005) segments(x0=3, y0=1.008, x1=3, y1=1.005) segments(x0=1.2, y0=1.00, x1=1.8, y1=1.00) segments(x0=1.2, y0=1.00, x1=1.2, y1= .998) segments(x0=1.8, y0=1.00, x1=1.8, y1=.998) text(labels= "*", x = 2, y = 1.012, cex = 2) text(labels= "*", x = 1.5, y = 1.004, cex = 2) ###LDA Analysis ##LDA all 3 ages set.seed(234234) song_sparrow$age <- as.factor(song_sparrow$age) training.samples.two <- song_sparrow$age %>% createDataPartition(p = 0.8, list = FALSE) train.data.two <- song_sparrow[training.samples.two, ] test.data.two <- song_sparrow[-training.samples.two, ] ##Normalize Data as.character(train.data.two$age) preproc.param.two <- preProcess(train.data.two, method = c("center", "scale")) train.transformed.two <- preproc.param.two %>% predict(train.data.two) test.transformed.two <- preproc.param.two %>% predict(test.data.two) ##LDA lda.model.two <- lda(as.factor(age)~`ave.sec.bw.songs.all` + avg + w_avg + b_avg + avg_uniq + `MCV` + `rate`, data = train.transformed.two, na.action = "na.omit") lda.model.two #LD1 biggest contributors: MCV (+), rate (-), avg unique notes (+) #LD2 biggest contributors: avg number notes (-), avg unique notes (+), between song consistancy predictions <- lda.model.two %>% predict(test.transformed.two) mean(predictions$class==test.transformed.two$age) #correctly classifed 55.6% of observations (41.67% when based 50/50 training/test; 48.7% when 20/80 training/test) lda.cm <- table(test.transformed.two$age, predictions$class) lda.cm #2 out of 3 for 1 year-olds when 80/20 training/test #7 out of 9 when 50/50 training/test #8 of 14 when 20/80 training/test ##LDA 1 vs all else song_sparrow_bin <- song_sparrow %>% mutate(age_bin = ifelse(age == "1", "1 year olds", "older")) set.seed(199009) training.samples.bin <- song_sparrow_bin$age_bin %>% createDataPartition(p = 0.8, list = FALSE) train.data.bin <- song_sparrow_bin[training.samples.bin, ] test.data.bin <- song_sparrow_bin[-training.samples.bin, ] ##Normalize Data preproc.param.bin <- train.data.bin %>% preProcess(method = c("center", "scale")) train.transformed.bin <- preproc.param.bin %>% predict(train.data.bin) test.transformed.bin <- preproc.param.bin %>% predict(test.data.bin) #LDA lda.model.bin <- lda(age_bin~`ave.sec.bw.songs.all` + avg + w_avg + b_avg + avg_uniq + `MCV` + `rate`, data = train.transformed.bin) lda.model.bin #LD1 biggest contributors: MCV (-), avg unique notes (-), song rate (+) predictions_bin <- lda.model.bin %>% predict(test.transformed.bin) mean(predictions_bin$class==test.transformed.bin$age_bin) #correctly classifed 88.9% of observations lda.cm.bin <- table(test.transformed.bin$age, predictions_bin$class) lda.cm.bin #80/20 training/test: 2/2 1 year olds; 1 incorrectly identified as 1 year old ###Figure 2 lda.data.two <- cbind(train.transformed.two, predict(lda.model.two)$x) lda.data.bin <- cbind(train.transformed.bin, predict(lda.model.bin)$x) ggarrange(ggplot(lda.data.two) + geom_density(aes(x = LD1, fill = age), alpha = .3) + geom_rug(aes(x = LD1, color = age)) + theme_classic() + scale_fill_manual(values = c("red", "#33CC00", "#3300CC")) + scale_color_manual(values = c("red", "#33CC00", "#3300CC")) + theme(legend.position="bottom") + scale_y_continuous(breaks = seq(0,.6,.1),limits = c(0,0.55)), ggplot(lda.data.bin) + geom_density(aes(LD1, fill = age_bin), alpha = .3) + geom_rug(aes(LD1, color = age_bin)) + theme_classic() + scale_fill_manual(name = 'age group', values = c("red", "light blue")) + scale_color_manual(name = 'age group', values = c("red", "light blue")) + theme(legend.position="bottom") + scale_y_continuous(breaks = seq(0,.6,.1),limits = c(0,0.55)), labels = c("A", "B"))