--- title: Mapping age of high TE subjects to those with low TE proportions using quantigene expression profile author: "Bonnie LaFleur" output: html_document: df_print: paged --- ```{r setup, include=TRUE, echo=FALSE} knitr::opts_chunk$set(echo=FALSE, warning=FALSE, message=FALSE) ``` # Introduction Based on these Tables 1 and 2 on page 3 and 4, it is clear that Age itself might be a reasonably good discriminator of proportions of thymic epithelium cells in total thymus. Low TE subjects are almost all adults and high TE are mostly young children. ## Read in subject characteristics data ```{r data, echo=TRUE, include=TRUE, warning=FALSE} require(dplyr) require(plyr) require(rms) require(readr) raw.data <- read_csv("Quantgene2.csv") ## data formats final.data <- as.data.frame(raw.data) %>% mutate(age.ch=as.factor(ifelse(`Age of Thymus Tissue Donor (in Years)`<=17, "Young","Adult"))) %>% # categorizing age to two groups in age.ch variable mutate(age.ch3=as.factor(ifelse(`Age of Thymus Tissue Donor (in Years)`<=1, "Postnatal", ifelse(`Age of Thymus Tissue Donor (in Years)`>1 & `Age of Thymus Tissue Donor (in Years)` <=17,"Child-Adolescent","Adult")))) %>% # categorizing age to two or three groups in age.ch and age.ch3 variable rename(replace = c("Total area of relevant tissue on slide:" = "relevant.tissue")) %>% rename(replace = c("Total area of lymphoid tissue on slide:" = "lymphoid.tissue")) %>% rename(replace = c("Total area of thymic epithelium on slide:" = "thymic.epithelium")) %>% rename(replace = c("Total active cortical area (thymopoiesis):" = "cortical")) %>% rename(replace=c("DHVI Thymus Tissue ID Number (T#)"="id")) %>% rename(replace =c("Age of Thymus Tissue Donor (in Years)"="Age")) names(final.data) <- gsub(" \\(human\\)","",names(final.data)) names(final.data) <- gsub(" ","",names(final.data)) #create \% thymic epithelial and \% cortical area.to.total <- final.data %>% mutate(lymphoid.tissue.p=100*lymphoid.tissue/relevant.tissue, thymic.epithelium.p=100*thymic.epithelium/relevant.tissue, cortical.p=100*cortical/relevant.tissue) %>% select(id, Age,age.ch,age.ch3,Gender, lymphoid.tissue.p, thymic.epithelium.p, cortical.p, everything(), -relevant.tissue, -lymphoid.tissue,-thymic.epithelium, -cortical) area.to.total$age.ch3 <- factor(area.to.total$age.ch3, levels=c("Postnatal", "Child-Adolescent", "Adult")) ``` ## Read in gene expression data ```{r readinnewdata, echo=TRUE, include=TRUE, warning=FALSE} new.expression <-read.csv("new.expression.csv", stringsAsFactors=F) ### P10 = P1, 9086= 908 missing 244 -- can I assume 244 was a typo in the original clinical ##### sort(new.expression$id) sort(area.to.total$id) names(area.to.total) dim(area.to.total) area.to.total2 <-subset(area.to.total, select = 1:8) updated.data <-merge(area.to.total2, new.expression, by="id") updated.data$agenew <-ifelse(updated.data$Age >= 60,"60 +", updated.data$age.ch) knitr::kable(table(updated.data$agenew)) knitr::kable(table(updated.data$Gender)) ``` ## GAPDH changes with age, demonstrating that it is not a good standardization gene. Also, examines univariate association with other genes. ```{r adjbyKRT14, echo=TRUE, include=TRUE, warning=FALSE} ### Show that GAPDH expression changes with age GAPDH <- lm(log(GAPDH) ~ age.ch, data=updated.data) summary(GAPDH) GAPDH.mean <- lm(log(GAPDH) ~ -1 + age.ch, data=updated.data) summary(GAPDH.mean) ### Does KRT14 show similar difference in age? KRT14 <- lm(log(KRT14) ~ age.ch, data=updated.data) summary(KRT14) KRT14means <-lm(log(KRT14) ~ -1 + age.ch, data=updated.data) summary(KRT14means) ##### Does KRT8 show similar difference in age? KRT8<- lm(log(KRT8) ~ age.ch, data=updated.data) summary(KRT8) KRT8means <-lm(log(KRT8) ~ -1 + age.ch, data=updated.data) summary(KRT8means) ######################## KRT8 <- lm(log(KRT8/KRT14) ~ age.ch, data=updated.data) anova(KRT8) #################### CD1a <- lm(log(CD1a/KRT14) ~ age.ch, data=updated.data) anova(CD1a) #################### CCL21 <- lm(log(CCL.21/ KRT14) ~ age.ch, data=updated.data) anova(CCL21) #################### CD3E <- lm(log(CD3.E/KRT14) ~ age.ch, data=updated.data) anova(CD3E) #################### CXCL12 <- lm(log(CXCL.12/KRT14) ~ age.ch, data=updated.data) anova(CXCL12) ``` # Analysis for the manuscript, genes standardized by KRT8. ```{r adjbyKRT8,echo=TRUE, include=TRUE, warning=FALSE} ######################## KRT14 <- lm(log(KRT14/KRT8) ~ age.ch, data=updated.data) anova(KRT14) KRT14.means <- lm(KRT14/KRT8 ~ -1 + age.ch, data=updated.data) summary(KRT14.means) #################### CD1a.krt8 <- lm(log(CD1a/KRT8) ~ age.ch, data=updated.data) anova(CD1a.krt8) CD1a.krt8.means <- lm(CD1a/KRT8 ~ -1 + age.ch, data=updated.data) summary(CD1a.krt8.means) #################### CCL21.krt8 <- lm(log(CCL.21/ KRT8) ~ age.ch, data=updated.data) anova(CCL21.krt8) CCL21.krt8.means <- lm(CCL.21/ KRT8 ~ -1 + age.ch, data=updated.data) summary(CCL21.krt8.means) #################### CD3E.krt8 <- lm(log(CD3.E/KRT8) ~ age.ch, data=updated.data) anova(CD3E.krt8) CD3E.krt8.means <- lm(CD3.E/KRT8 ~ -1 + age.ch, data=updated.data) summary(CD3E.krt8.means) #################### CXCL12.krt8 <- lm(log(CXCL.12/KRT8) ~ age.ch, data=updated.data) anova(CXCL12.krt8) CXCL12.krt8.means <- lm(CXCL.12/KRT8 ~ -1 + age.ch, data=updated.data) summary(CXCL12.krt8.means) ``` ## plots that correspond to the analysis of variance. ```{r plots, echo=TRUE, include=TRUE, warning=FALSE} library(beanplot) library(tidyr) updated.data.nogapdh <-subset(updated.data, select=-c(GAPDH)) updated.data.long <- updated.data.nogapdh %>% gather(Gene, Expression, c(KRT14, CD1a:CXCL.12)) #png("beanplot expression.png") beanplot(log(Expression/KRT8)~age.ch*Gene, data=updated.data.long, ll = 0.04, side = "both", xlab="", ylab="Log Ratio to KRT8", cex.axis=0.75, col = list("grey56", c("grey92", "black")), axes=T) #dev.off() library(dplyr) library(ggplot2) library(ggpubr) theme_set(theme_pubclean()) updated.data.long$Expression.adjusted <-log(updated.data.long$Expression/updated.data.long$KRT8) #png("boxplot expression.png") ggplot(updated.data.long, aes(Gene, Expression.adjusted)) + geom_boxplot(aes(color = age.ch))+ labs(y= "Log Expression Standardized to KRT8") + scale_color_manual(values = c("black", "grey41")) + stat_compare_means(aes(group = age.ch), label = "p.signif") #dev.off() ``` ```{r agebyTEP} area.to.total$age.zip <-ifelse(area.to.total$Age < 1, 0, area.to.total$Age) table(area.to.total$age.zip) library(rms) sex <- lm(thymic.epithelium.p ~ Gender, data=updated.data) summary(sex) baby.vs.none <- lm(thymic.epithelium.p ~ age.zip, data=area.to.total) summary(baby.vs.none) non.zero <-filter(area.to.total, age.zip != 0) #plot(density(non.zero$Age)) #summary(non.zero$Age) non.baby <-lm(thymic.epithelium.p ~ Age, data=non.zero) summary(non.baby) with.baby <-lm(thymic.epithelium.p ~ Age, data=area.to.total) summary(with.baby) png("regressions.older.png") ggplot(non.zero, aes(x=Age, y=thymic.epithelium.p)) + geom_point()+ geom_smooth(method=lm) dev.off() png("regressions.all.png") ggplot(updated.data, aes(x=Age, y=thymic.epithelium.p)) + geom_point()+ geom_smooth(method=lm) dev.off() ```