#path to forward ls -f ~/microbiome_analysis_ku/data/*_R1_001.fastq.gz | sort -V > r1path #path to reverse ls -f ~/microbiome_analysis_ku/data/*_R2_001.fastq.gz | sort -V > r2path
#checking if we have all pairs of forward and reverse reads diff -s <(cat r1path | cut -d_ -f4) <(cat r2path | cut -d_ -f4) #if the outcome is "identical", then we good :)
#Cutting sample IDs from the path file (from either is good) cut -d_ -f4 r1path > SampleID
#Binning ids and forward reverse columns together with tab-delimited seperation sed -i $'1 i\\\nsampleid \t forward-absolute-filepath \t reverse-absolute-filepath' manifest
一些数据的推理调查
## Exploring the reads zcat ./data/MAC2023_iSeq001_S1_L001_R2_001.fastq.gz
source activate qiime2.X qiime tools import \ --type"SampleData[PairedEndSequencesWithQuality]" \ --input-format PairedEndFastqManifestPhred33V2 \ --input-path ~/MAC2023/manifest.tsv \ # link to the folder in which my manifest file is located --output-path ~/MAC2023/demuxed_MAC23.qza # link to the path where I want my demultiplexed data to be exported in
qiime dada2 denoise-paired \ --i-demultiplexed-seqs ~/demuxed_mac23.qza \ #the input file for denoising is our demultiplexed pairedEnd reads that was generated in the previous step. --p-trim-left-f 17 \ #length of my forward primer (17 nt) --p-trim-left-r 21 \ #length of my reverse primer (21 nt) --p-trunc-len-f 260 \ #truncation length for forward reads. I.e. we truncate reads over 260 base since their quality started dropping from this point onwards. --p-trunc-len-r 220 \ #truncation length for reverse reads. I.e. we truncate reads over 220 base since their quality started dropping from this point onwards. --o-table ~/count_table.qza \ #the ASV count table --o-representative-sequences ~/repseqs.qza \ #the representative sequences for in each read --o-denoising-stats ~/denoising_stats.qza \ #the status of the denoising process in a full catalogue --p-n-threads 10
4.1. 可视化计数表
qiime feature-table summarize --i-table ~/count_table.qza \ #The ASV table as the input --m-sample-metadata-file ~/metadataA.tsv \ #The metadata as the input --o-visualization ./table.qzv #The qzv format of our ASV table
qiime feature-classifier fit-classifier-naive-bayes \ # here you can train your classifier based on a Naive-Bayes method --i-reference-reads ~/derepseqs-uniq-341f-805r.qza \ # Dereplicated sequences based on the primer set as input --i-reference-taxonomy ~/dereptaxa-uniq-341f-805r.qza\ # Dereplicated taxonomic annotations based on the primer set as input --o-classifier ~/silva-classifier-primered4.qza
# Running the classifier qiime feature-classifier classify-sklearn \ # Sklearn package for classification --i-reads ~/repseqs.qza \ # Representative sequences as the (input) --i-classifier ~/classifier/silva138-classifier-341f-805r.qza \ # Our costumized SILVA 138 classifier (input) --o-classification ~/taxonomy.qza \ # The taxonomic annotation linked to the repseqs (output) --p-n-jobs 10 # The number of cores/jobs
#making phyloseq objects from qiime files ps <- qiime2R::qza_to_phyloseq(features ="~/data/ccd/table-ccd.qza", taxonomy ="~/data/ccd/taxonomy-ccd.qza", tree ="./tree-ccd.qza") repseqs <- qiime2R::read_qza("~/data/ccd/repseqa.qza")$data #we need to merge the refseqs like this since in above fun, there is no argument for that ps= merge_phyloseq(ps, repseqs)
ft = read_tsv('./MAC2023.github.io/data/illumina/Results_MAC23_MAC96_S96/OTU-tables/zOTU_table_GG.txt')%>% data.frame() ft <- column_to_rownames(ft,"X.OTU.ID")
tx = ft %>% as.data.frame()%>% select(81)
ft[,dim(ft)[2]]<-NULL
tx = tidyr::separate(tx, col = taxonomy, into =c("Kingdom","Phylum", "Class","Order","Family", "Genus","Species"), sep =';')%>% as.matrix()
#monitoring the number of the samples in which the prevalence of a taxon is at least one prevdf <- apply(otu_table(pst),ifelse(taxa_are_rows(pst),1,2),function(x){sum(x>0)})
#Find out the phyla that are of mostly low-prevalence features by computing the total and average prev of features in Phylum plyr::ddply(weight_df,"Phylum",function(x){cbind(means =round(mean(x$ASVprev),2), sums =round(sum(x$ASVprev),2))})%>% mutate(sanity = ifelse(means == sums,"TRUE","FALSE"))
# This means that each ASV should have appeared at least in n samples to be kept. asv.filter =function(asvtab, n.samples =1){ filter.threshold <- n.samples/ncol(asvtab)*100# In how many samples out of total samples an ASV should have occured table_count <- apply(otu_table(asvtab),2,function(x) ifelse(x>0,1,0))%>% as.data.frame() suspected_ASV = table_count[which((rowSums(table_count)/ncol(table_count))*100< filter.threshold),]%>% rownames() return(suspected_ASV) }
print(glue("{ asv.filter(asvtab = otu_table(pst), n.samples = 2) } occured only in two samples"))
sus_ASV = asv.filter(asvtab = otu_table(pst), n.samples =2) ## Will you remove them? #pst = subset_taxa(pst, !taxa_names(pst) %in% sus_ASV)
#A function to find singletones. You need to be careful about this step! out.ASV =function(phyloseq, threshold =1, binwidth =0.01){ #Loading necessary pkgs pacman::p_load(glue, tidyverse, reshape2, ggrepel, S4Vectors)# nolint #This function requires phyloseq, tidyverse and glue packages to be loaded. if(sum(colSums(otu_table(phyloseq)))/ncol(otu_table(phyloseq))==100){#making the relative abundance table rel_abund = as(t(otu_table(phyloseq)),"matrix") }elseif(sum(colSums(otu_table(phyloseq)))/ncol(otu_table(phyloseq))==1){ rel_abund = as(t(otu_table(phyloseq)),"matrix") }else{ rel_abund = as(t(apply(otu_table(phyloseq), ifelse(taxa_are_rows(phyloseq),1,2), function(x) x/sum(x))),"matrix") } names.single = apply(rel_abund,1,function(x){ifelse(x == threshold,TRUE, ifelse(x ==sum(x),TRUE,FALSE))})%>% reshape2::melt()%>% filter(value ==TRUE)%>% dplyr::select(2)%>% pull%>% as.vector() if(length(names.single)==0){ print(glue("WOW! {length(names.single)} singletones detected in this dataset")) qplot.noSing = qplot(rel_abund, geom ="histogram", binwidth = binwidth, show.legend =F, main ="Frequency count of relative abundance, no singletones detected")+ xlab ("Relative abundance in samples")+ ylab("Frequency")+ theme_bw() return(structure(list(qplot.noSing))) }else{ single.ASV = rel_abund[rownames(rel_abund)%in% names.single,] single.ASV[single.ASV ==0]<-NA# A separate dataset for annotation of singletones on the barplot qplot.withSing = qplot(rel_abund, geom ="histogram", binwidth = binwidth, main ="Frequency count of relative abundance with singletones")+ geom_bar(aes(single.ASV), fill ="red", color =NA, width = binwidth)+ xlab ("Relative abundance in samples")+ ylab("Frequency")+ geom_label_repel(aes(x =1, y =length(rel_abund)/5), label.padding = unit(0.55,"lines"), label = glue("{length(names.single)}\n Singletones"), color ="black")+ theme_bw() qplot.rmSing = qplot(rel_abund[!rownames(rel_abund)%in% names.single,], geom ="histogram", binwidth = binwidth, main ="Frequency count of relative abundance without singletones")+ xlab ("Relative abundance in samples")+ ylab("Frequency")+ theme_bw() print(glue('Oh no..! {length(names.single)} singletones detected in the dataset')) return(structure(list(qplot.withSing, qplot.rmSing, unlist(names.single)))) } } single.test = out.ASV(phyloseq = pst, threshold =2, binwidth =0.01) #singletones = single.test[[3]] #here you can extract the names of the singletones
single.test[[1]]#to show the plot with singletones single.test[[2]]#to show the plot without singletones
#Now you can remove the singletones from your pst file as follows: #pst = subset_taxa(pst, !taxa_names(ps)%in% singletones)
rm(single.test)
4.阿尔法多样性
4.1. 稀疏
#library(MicrobiotaProcess)
#This takes a bit of time ps_rar_curve <- MicrobiotaProcess::ggrarecurve(obj = pst, indexNames =c("Observe","Shannon"), chunks=400, theme(legend.spacing.y = unit(0.02,"cm"), legend.text = element_text(size =6)), show.legend=F)
ps_rar_curve + theme_bw()+ geom_vline(xintercept =30000, lty =2, color = alpha("red",0.5))+ ggtitle("Rarefaction curves based on Richnessh and evenness","Each line is a sample")
#rarefying the table with minimum depth of 10000 reads per sample ps_rar = rarefy_even_depth(pst, sample.size =30000, replace =FALSE)#in this depth we have lost one sapmle (F.29) and no ASVs.
# Taxonomic filtering based on abundance for rarefied data: supervsed ## Abumdance: ASV > 0.0001% overall abundance across all samples total.depth <-sum(otu_table(ps_rar)) totAbuThreshold <- 1e-4* total.depth ps_rar <- prune_taxa(taxa_sums(ps_rar)>totAbuThreshold, ps_rar)
#Faith Phylogenetic Diversity: PD = Σ(branch lengths leading to selected species) library(picante, verbose =FALSE) FaithPD = picante::pd(samp = t(otu_table(ps_rar)), tree = phy_tree(ps_rar))$PD
#adding the indexes to the metadatas sample_data(ps_rar)<- data.frame(sample_data(ps_rar), Chao1=Chao1[rownames(Chao1)%in% rownames(sample_data(ps_rar)),][[1]], Shannon = Shannon$Shannon, FaithPD = FaithPD)#note that we have removed that sample which has been removed by rarefaction.
p11 = msd$gg + ggtitle("Mean vs. STD for raw counts of taxa")+ theme_bw()+ xlab("Mean of normalized raw counts")+ ylab("Standard deviation of counts")+ guides(fill ="none")
p12 = msd2$gg + ggtitle("Mean vs. STD for VST counts of taxa")+ theme_bw()+ xlab("Mean of normalized VST counts")+ ylab("Standard deviation of VST counts")
## Run 0 stress 0.04758852 ## Run 1 stress 0.05257593 ## Run 2 stress 0.04758856 ## ... Procrustes: rmse 3.155827e-05 max resid 8.339069e-05 ## ... Similar to previous best ## Run 3 stress 0.04758858 ## ... Procrustes: rmse 3.471801e-05 max resid 8.939346e-05 ## ... Similar to previous best ## Run 4 stress 0.0475886 ## ... Procrustes: rmse 1.71134e-05 max resid 5.518678e-05 ## ... Similar to previous best ## Run 5 stress 0.04758853 ## ... Procrustes: rmse 1.978362e-05 max resid 5.187433e-05 ## ... Similar to previous best ## Run 6 stress 0.07429898 ## Run 7 stress 0.05257595 ## Run 8 stress 0.07429908 ## Run 9 stress 0.04758852 ## ... Procrustes: rmse 1.726039e-05 max resid 4.740674e-05 ## ... Similar to previous best ## Run 10 stress 0.04758851 ## ... New best solution ## ... Procrustes: rmse 1.241057e-05 max resid 2.7403e-05 ## ... Similar to previous best ## Run 11 stress 0.04758852 ## ... Procrustes: rmse 4.215754e-06 max resid 1.148943e-05 ## ... Similar to previous best ## Run 12 stress 0.07429939 ## Run 13 stress 0.04758853 ## ... Procrustes: rmse 8.518784e-06 max resid 2.341394e-05 ## ... Similar to previous best ## Run 14 stress 0.04758852 ## ... Procrustes: rmse 7.077508e-06 max resid 1.774362e-05 ## ... Similar to previous best ## Run 15 stress 0.04758853 ## ... Procrustes: rmse 1.264987e-05 max resid 3.6171e-05 ## ... Similar to previous best ## Run 16 stress 0.04758853 ## ... Procrustes: rmse 3.895438e-05 max resid 0.0001050886 ## ... Similar to previous best ## Run 17 stress 0.04758853 ## ... Procrustes: rmse 4.289572e-05 max resid 0.0001150774 ## ... Similar to previous best ## Run 18 stress 0.05257595 ## Run 19 stress 0.04758852 ## ... Procrustes: rmse 1.454469e-06 max resid 7.042092e-06 ## ... Similar to previous best ## Run 20 stress 0.04758853 ## ... Procrustes: rmse 6.52229e-06 max resid 2.010471e-05 ## ... Similar to previous best ## *** Best solution repeated 9 times
bray_p + wunifrac_p + plot_layout( heights =8, widths =15, ncol =2)+ plot_annotation("Ordination plots of compostional analysis","Bray-Curtis dissimilarity and\n Weighted Unifrac phylogenetic distance","You might know this as Beta diversity")
Gloomer:一个包装“tax_glom()”函数并向分类单元添加简洁唯一名称的函数
# Gloomer
#A function to create unique names for each ASV. If species is set as the taxa level, it removes any NA in Order level then attempts to use the name of one level higher taxa for those who have similar names, e.g. uncultured_bacterium
#====================Sometimes in genus level, we might have multiple uncultured organisms, which if we want to make unique out of them for the species level it won't work==== #since adding uncultured to uncultered is sill duplication. therefore if the taxa_level is set to species we first make a unique genus and then we go further to the speices===#
#Removing unculured Family ps = subset_taxa(ps,!Family %in%c("uncultured","NA","uncategorized","unassigend",""," ")) if(taxa_level =="Species"){
ps = subset_taxa(ps,!Genus %in%NA)#we remove genus tagged NA tax_table(ps)[, taxa_level]<- ifelse(is.na(tax_table(ps)[, taxa_level]), paste0("unknown"), paste(tax_table(ps)[, taxa_level]))#convert NA in species into unknown physeq = tax_glom(physeq = ps, taxrank = taxa_level, NArm = NArm) taxdat = tax_table(physeq)[,seq_along(rank.names[1:which(rank.names == taxa_level)])]
taxdat = taxdat[complete.cases(taxdat),]%>% as.data.frame otudat = otu_table(physeq) #first take care of the uncultured genus taxdat[,6]= ifelse(taxdat[,6]%in%c("uncategorized",NA,"uncultured","unassigend",""," "), paste0("[", taxdat[,length(rank.names[1:which(rank.names=="Genus")])-1],"]","_", taxdat[,6]), taxdat[,6]) spec1 = taxdat[, taxa_level]%>% as.vector spec2 = taxdat[, taxa_level]%>% as.vector
rownames(uni)<-gen1 colnames(uni)<- gen2 uni[upper.tri(uni, diag =TRUE)]=0#get rid of diagonals and upper triangle
duplis = uni %>% reshape2::melt()%>% filter(value =="TRUE")
if(dim(duplis)[[1]]>0){#if there is not duplications, we can simply use the taxa names as the row name duplis = uni %>% reshape2::melt()%>% filter(value =="TRUE")%>% dplyr::select(1)%>% unique()%>% unlist %>% as.vector taxdat = taxdat %>% mutate( uni= ifelse(taxdat[, taxa_level]%in% duplis, paste0("[", taxdat[,length(rank.names[1:which(rank.names==taxa_level)])-1],"]","_", taxdat[,taxa_level]), taxdat[,taxa_level])) taxdat[, taxa_level]= taxdat[,"uni"] taxdat[,"uni"]<-NULL
#graph permutational test set.seed(2023) graph.test <- graph_perm_test(ps_total, sampletype ="group", max.dist =0.35, grouping ="sampleID", distance ="bray", type ="mst", nperm =1000) print(glue("Test statistic for the graph-based analysis indicates that\n with p-value = {graph.test$pval}, \n we REJECT the Null hypothesis that the distribution of taxa in samples from different groiups are similar!"))
## Test statistic for the graph-based analysis indicates that ## with p-value = 0.000999000999000999, ## we REJECT the Null hypothesis that the distribution of taxa in samples from different groiups are similar!
plotPerm1=plot_permutations(graph.test, bins =40)+ geom_text(aes(label = glue("{ifelse(round(graph.test$pval, 2) == 0, 'P < 0.01', round(graph.test$pval, 2))}"), x =65, y =40), color ="red")+ theme_bw()+ ggtitle("Permutation test for pure edges","Bray-Curtis")
grid.arrange(ncol =2, plotNet1, plotPerm1)+ geom_col(inherit.aes =F, color ="red")
#test for the disperssion of the variance around the centroids library(vegan) library(permute, verbose =FALSE) #set the age of the animal as the random variable set.seed(1990) h <- with(data = data.frame(sample_data(ps_log)), how(blocks = group, nperm =999))
##total data
#Now we do a Homogeneity of dispersion test set.seed(10) bray.disp <- vegan::betadisper(bray_log, group = sample_data(ps_log)$group, type ="centroid")#if the p-value is significant, it means that there is a significant difference in variance for any of the tested levels. perm.test = permutest(bray.disp, permutation =h, pairwise =T)
#jpeg( "./Beta/dispersion of variance_bray_total_sample.type.jpeg", quality = 100)
plot(bray.disp, col = cols, bty ="n", las =1, main ="Dispersion of variance around the centroids, \n bray | LogT dataset", sub=NULL, xlab = sprintf("PCo1 [%s%%]",round(eig.vals/sum(eig.vals)*100,1)[1]), ylab = sprintf("PCo2 [%s%%]", round(eig.vals/sum(eig.vals)*100,1)[2])); text(glue("P = {p.val.perm}"), x =-0.1, y =-0.29, cex =1.5, col ="red")
bray.dbrda = dbrda(t(otu_table(ps_log))~ group, dist ="bray", permutations=h, data = sample_data(ps_log)%>%data.frame) #Sex not sig, so reduced the model
permutest(x = bray.dbrda, by ="terms", permutations = h)
## ## Permutation test for dbrda under reduced model ## ## Blocks: group ## Permutation: free ## Number of permutations: 999 ## ## Model: dbrda(formula = t(otu_table(ps_log)) ~ group, data = ## sample_data(ps_log) %>% data.frame, distance = "bray", permutations = ## h) ## Permutation test for all constrained eigenvalues ## Df Inertia F Pr(>F) ## group 8 8.3821 15.781 1 ## Residual 78 5.1788
5.3.3. 绘制模型提取图
# Make the plot out of the model, which is the variation explained only by the terms
eig.vals = bray.dbrda$CCA$eig inertia.total = bray.dbrda$tot.chi #total variation (inertia) explained. #this number should be used as the denominator for measuring the amount of variance out of totoal variance wxplained by each dbrda
#Digesta score.site %>% ggplot(aes(dbRDA1, dbRDA2))+ geom_point(aes(fill = sample_data(ps_log)$group), color ="black", pch =21, alpha =0.5, size =6)+ geom_hline(yintercept =0, lty =2, alpha =0.5)+ geom_vline(xintercept =0, lty =2, alpha =0.5)+ scale_fill_manual(values = cols)+ theme_bw()+ scale_y_continuous(na.value =c(-2,3), n.breaks =10)+ scale_x_continuous(na.value =c(-1,1), n.breaks =10)+ labs(fill ="Groups")+ xlab(label = paste("dbRDA1 [",round(eig.vals[[1]]/sum(eig.vals)*100,1), "% of fitted and", round(eig.vals[[1]]/inertia.total*100,1), "% of total variation]"))+ ylab(label = paste("dbRDA2 [",round(eig.vals[[2]]/sum(eig.vals)*100,1), "% of fitted and",round(eig.vals[[2]]/inertia.total*100,1), "of total variation]"))+ theme(axis.title = element_text(size =10), text = element_text(size =13, face ="bold"), axis.text.x =element_text(size =10, face ="bold"), axis.text.y =element_text(size =10, face ="bold"))+ ggtitle(label ="dbRDA plot of Bray","LogT data")
# Phylum order x = tapply(sigtabspec$log2FoldChange, sigtabspec$Order,function(x)max(x)) x = sort(x,TRUE) sigtabspec$Order = factor(as.character(sigtabspec$Order), levels=names(x))
#Species reorder x = tapply(sigtabspec$log2FoldChange, sigtabspec$Species,function(x)max(x)) x = sort(x,TRUE) sigtabspec$Species = factor(as.character(sigtabspec$Species), levels=names(x))
cor_main = Hmisc::rcorr(countdf, biodf, type ="spearman") cors = cor_main$r cors = cors[rownames(cors)%in% colnames(countdf), colnames(cors)%in% colnames(biodf)]#rows as taxa, cols as chemical data
#calculating the qvalues for the correlations cor.pval = cor_main$P[rownames(cor_main$P)%in% rownames(cors), colnames(cor_main$P)%in% colnames(cors)] cor.qval = p.adjust(cor.pval, method ="BH")
q.vals = matrix(cor.qval, ncol = ncol(cor.pval), nrow = nrow(cor.pval),dimnames=list(rownames(cor.pval), colnames(cor.pval))) #Adding significance signes to the qval matrix to be used in the heatmap later on q.vals[cor.qval <0.05]="*" q.vals[cor.qval >=0.05]=""
library(Biobase, verbose =FALSE) asvdat = as(cors,"matrix")#in the aassay dataset, we add our correlation matrix instead of the abundance matrix
taxadat = Biobase::AnnotatedDataFrame(data.frame(cors))#taxa table pdata = cors %>% data.frame
x = ExpressionSet(assayData = asvdat, featureData = taxadat )
#Adding phenotype data pData(x)<- pdata
# Filtering based on row standard deviation and choosing the most variable 50 taxa library(matrixStats) sds <- rowSds(Biobase::exprs(x)) o <- order(sds, decreasing =TRUE)[1:50] h_1 <- hclust(dist(Biobase::exprs(x)[o,]), method ="ward.D2") h_2 <- hclust(dist(t(Biobase::exprs(x)[o,])), method ="ward.D2")
#making a phylum annotation and it only accepts one column dataframe
在原假设下,p 值是具有均匀分布的随机值。因此,我们必须执行调整后的 p 值来校正族错误率 (FWER)。FWER 是指同时进行多个假设检验时至少出现一个 I 类错误(误报)的概率。
# Set the parameters num_simulations <- 10000 # Number of simulated datasets sample_size <- 50 # Sample size for each group
# Initialize a vector to store p-values simulated_p_values <- numeric(num_simulations)
# Simulate data and compute p-values for(i in1:num_simulations){ # Simulate data under the null hypothesis for two groups group1 <- rnorm(sample_size) group2 <- rnorm(sample_size) # Perform a two-sample t-test on the simulated data test_result <- t.test(group1, group2) # Store the p-value simulated_p_values[i]<- test_result$p.value }
# Plot a histogram of simulated p-values hist(simulated_p_values, breaks =30, col ="lightblue", main ="Distribution of Simulated P-values under Null hypothesis")
print(glue("{sum(simulated_p_values<=0.05)} of the tests out of 10000 test were significant. \n This indicates that under the Null hypothesis alpha % of times your test will be rendered false positive (type-I error)."))
## 515 of the tests out of 10000 test were significant. ## This indicates that under the Null hypothesis alpha % of times your test will be rendered false positive (type-I error).
ps_mock = phyloseq::subset_samples(spec, group =="Mock") phyls = tax_table(ps_mock)[rowSums(ps_mock@otu_table)>0,2]%>% data.frame()%>% distinct()%>% pull()#chosoing phylum that that are in Mock samples
node.df <- mrca.wrap(highlight = phyls, taxa.df = tax_table(spec), tree = phy_tree(spec), tax_level ="Phylum", type ="node")
#drwing the tree tree_p = ggtree(spec, aes(color = Phylum), branch.length ="none", layout ="circular", show.legend =TRUE, open.angle =5, size =1.5)+ ggtitle("Phylogenetic tree of different Classes.", "Mock comunity phyla are highligted!")+ geom_tiplab(aes(label=Class, color = Phylum), check.overlap =FALSE, face ="bold", size =3, offset =0.3 )+ scale_color_manual(values = sample(color.index, length(unique(taxadf$Phylum)),FALSE), breaks = unique(taxadf$Phylum))+#to remove the NA from the legend geom_highlight(data = node.df, lwd =0.25, lty =3, alpha =0.1, aes(node = node.id, fill = highlights), extend =0.05, to.bottom =TRUE, align ="right", show.legend =TRUE)+ geom_nodepoint( pch =21, color ="black", size =2, fill ="white")+ geom_nodepoint(aes(subset = node %in% node.df$node.id), pch =21, size =3, color ="black", fill =c("#00ffff","#ff7700") )+ geom_tippoint(size =1, pch =21, color = alpha(colour ="white", alpha =0.2))+ geom_label(aes(x = branch, label =round(branch.length,2)), label.padding = unit(0.05,"line"), size =3, inherit.aes =TRUE)+ scale_fill_manual(values =c("#16f476","#fc00fc"))