Click here for setup information.

knitr::opts_chunk$set(echo = TRUE, eval = FALSE)
set.seed(119)
#library(conflicted)
#pacman::p_depends(microbiomeMarker, local = TRUE)  
#pacman::p_depends_reverse(microbiomeMarker, local = TRUE)  
library(phyloseq); packageVersion("phyloseq")
library(Biostrings); packageVersion("Biostrings")
pacman::p_load(tidyverse, patchwork, ampvis2, 
               agricolae, labdsv, naniar, ape,
               pairwiseAdonis, microbiome, seqRFLP, microbiomeMarker,
               microbiomeMarker, reactable, downloadthis, captioner,
               install = FALSE, update = FALSE)

options(scipen=999)
knitr::opts_current$get(c(
  "cache",
  "cache.path",
  "cache.rebuild",
  "dependson",
  "autodep"
))

Synopsis

This workflow contains ASV differential abundance analysis for the high temperature data sets. In order to run the workflow, you either need to first run the Data Preparation workflow and the Filtering workflow or or download the files linked below. See the Data Availability page for complete details.

Workflow Input

Files needed to run this workflow can be downloaded from figshare.

In this workflow, we use the labdsv package (Roberts and Roberts 2016) to run Dufrene-Legendre Indicator Species Analysis and the microbiomeMarker package (Cao 2020) to run linear discriminant analysis (LDA) effect size (LEfSe) (Segata et al. 2011).

16s rRNA

Indicator Analysis

Indicator Analysis calculates the indicator value (fidelity and relative abundance) of species in clusters or types.

Choose a data set(s) & set the p-value cutoff.

Code

samp_ps <- c("ssu18_ps_work", "ssu18_ps_filt", 
             "ssu18_ps_perfect", "ssu18_ps_pime")
indval_pval <- 0.05

Format Data Frame

Code

for (i in samp_ps) {
     tmp_get <- get(i)
     tmp_df <- data.frame(otu_table(tmp_get))
     tmp_df <- tmp_df[, which(colSums(tmp_df) != 0)]
     tmp_row_names <- row.names(tmp_df)
     tmp_row_names <- tmp_row_names %>%
              stringr::str_replace("P[0-9]{2}_D[0-9]{2}_[0-9]{3}_", "") %>%
              stringr::str_replace("[A-Z]$", "")
     tmp_df <- tmp_df %>% tibble::add_column(tmp_row_names, .before = 1)
     tmp_name <- purrr::map_chr(i, ~ paste0(., "_seq_tab"))
     assign(tmp_name, tmp_df)
     rm(list = ls(pattern = "tmp_"))
}
objects()

Run Indicator Analysis

Code

set.seed(1191)
for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_seq_tab")))
     tmp_iva <- indval(tmp_get[,-1], tmp_get[,1])
     tmp_gr <- tmp_iva$maxcls[tmp_iva$pval <= indval_pval]
     tmp_iv <- tmp_iva$indcls[tmp_iva$pval <= indval_pval]
     tmp_pv <- tmp_iva$pval[tmp_iva$pval <= indval_pval]
     tmp_fr <- apply(tmp_get[,-1] > 0, 2, sum)[tmp_iva$pval <= indval_pval]
     tmp_sum <- data.frame(group = tmp_gr, indval = tmp_iv,
                                  pval = tmp_pv, freq = tmp_fr)
     tmp_sum <- tmp_sum[order(tmp_sum$group, -tmp_sum$indval),]

     tmp_tax_df <- data.frame(tax_table(get(i)))
     tmp_tax_df$ASV_ID <- NULL
     tmp_sum_tax <- merge(tmp_sum, tmp_tax_df, by = "row.names", all = TRUE)
     tmp_sum_tax <- tmp_sum_tax[!(is.na(tmp_sum_tax$group)),]
     class(tmp_sum_tax$group) <- "character"
     tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^1$", "C0")
     tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^2$", "W3")
     tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^3$", "W8")
     tmp_sum_tax <- tmp_sum_tax %>% dplyr::rename("ASV_ID" = "Row.names")
     tmp_sum_tax <- tmp_sum_tax[order(as.numeric(gsub("[A-Z]{3}", "", tmp_sum_tax$ASV_ID))),]
     tmp_sum_tax$ASV_ID <-  as.character(tmp_sum_tax$ASV_ID)
     tmp_sum.prob.corrected <- p.adjust(tmp_sum$pval, "bonferroni")
     tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_indval_summary"))
     assign(tmp_res_name, tmp_sum_tax)
     rm(list = ls(pattern = "tmp_"))
}
objects()

Now we can save a few files and display the data.

Code

for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_seq_tab")))
     tmp_get[,1] <- NULL
     tmp_df <- as.data.frame(t(tmp_get))
     tmp_col_names <- colnames(tmp_df)
     tmp_col_names <- tmp_col_names %>%
              stringr::str_replace("P[0-9]{2}_D[0-9]{2}_[0-9]{3}_", "") %>%
              stringr::str_replace("[A-Z]$", "")
     colnames(tmp_df) <- tmp_col_names
     #tmp_df$freq_all <- apply(tmp_df > 0, 1, sum)
     tmp_df$freq_C0 <- apply(tmp_df[ , (names(tmp_df) %in% "C0")] > 0, 1, sum)
     tmp_df$freq_W3 <- apply(tmp_df[ , (names(tmp_df) %in% "W3")] > 0, 1, sum)
     tmp_df$freq_W8 <- apply(tmp_df[ , (names(tmp_df) %in% "W8")] > 0, 1, sum)
     tmp_df$reads_total <- base::rowSums(tmp_df[ , (names(tmp_df) %in% c("C0", "W3", "W8"))])
     tmp_df$reads_C0 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "C0")])
     tmp_df$reads_W3 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W3")])
     tmp_df$reads_W8 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W8")])
     tmp_df <- tmp_df[,!grepl("^[C | W]", names(tmp_df))]
     tmp_df <- tmp_df %>% tibble::rownames_to_column("ASV_ID")

     tmp_get_indval <- get(purrr::map_chr(i, ~ paste0(., "_indval_summary")))
     tmp_merge_df <- merge(tmp_df, tmp_get_indval, by = "ASV_ID", all = FALSE)
     tmp_merge_df <- tmp_merge_df[,c(1,9:12,2:8,13:19)]
     tmp_merge_df <- tmp_merge_df[order(tmp_merge_df$reads_total, decreasing = TRUE), ]
     tmp_merge_name <- purrr::map_chr(i, ~ paste0(., "_indval_final"))
     assign(tmp_merge_name, tmp_merge_df)
     rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_indval_summary")

Save a new phyloseq object

Code

for (i in samp_ps) {
     tmp_get <- get(i)
     tmp_tab <- get(purrr::map_chr(i, ~ paste0(., "_indval_summary")))
     tmp_list <- tmp_tab[,1]
     tmp_ps <- prune_taxa(tmp_list, tmp_get)
     tmp_name <- purrr::map_chr(i, ~ paste0(., "_ind"))
     assign(tmp_name, tmp_ps)
     tmp_ps@phy_tree <- NULL
     tmp_ps <- prune_samples(sample_sums(tmp_ps) > 0, tmp_ps)
     tmp_ps_tree <- rtree(ntaxa(tmp_ps), rooted = TRUE,
                            tip.label = taxa_names(tmp_ps))
     tmp_ps <- merge_phyloseq(tmp_ps, sample_data, tmp_ps_tree)
     assign(tmp_name, tmp_ps)
     rm(list = ls(pattern = "tmp_"))
}
objects()

Indicator Summary

(16S rRNA) Table 1 | Significant results of Indicator Analysis for FULL data set.

(16S rRNA) Table 2 | Significant results of Indicator Analysis for Arbitrary filtered ASV data set.

(16S rRNA) Table 3 | Significant results of Indicator Analysis for PERfect filtered data set.

(16S rRNA) Table 4 | Significant results of Indicator Analysis for PIME data set.

LEfSe Analysis

Note

From (Segata et al. 2011).

Linear discriminant analysis (LDA) effect size (LEfSe) is a method to support high-dimensional class comparisons with a particular focus on microbial community analyses. LEfSe determines the features (organisms, clades, operational taxonomic units, genes, or functions) most likely to explain differences between classes by coupling standard tests for statistical significance with additional tests encoding biological consistency and effect relevance. Class comparison methods typically predict biomarkers consisting of features that violate a null hypothesis of no difference between classes. The effect size provides an estimation of the magnitude of the observed phenomenon due to each characterizing feature and it is thus a valuable tool for ranking the relevance of different biological aspects and for addressing further investigations and analyses.

First, a little data tidying and subsetting only ASV data from the taxonomy table.

Code

for (i in samp_ps) {
  tmp_get <- get(i)
  tax_table(tmp_get) <- tax_table(tmp_get)[,c(1:6)]
  tax_table(tmp_get) <- cbind(tax_table(tmp_get),
                           rownames(tax_table(tmp_get)))
  colnames(tax_table(tmp_get)) <- c("Kingdom", "Phylum", "Class", "Order", 
                                    "Family", "Genus", "Species")
  tmp_name <- purrr::map_chr(i, ~ paste0(., "_rf_all"))
  assign(tmp_name, tmp_get)
  tax_table(tmp_get) <- tax_table(tmp_get)[,c(7)]
  tmp_name_asv <- purrr::map_chr(i, ~ paste0(., "_rf_asv"))
  assign(tmp_name_asv, tmp_get)
  rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_rf")

Choose a data set(s) & set the LEfSe cutoff values.

samp_ps <- c("ssu18_ps_work", "ssu18_ps_filt", "ssu18_ps_perfect", "ssu18_ps_pime")
lefse_lda <- 2
lefse_kw <- 0.05
lefse_wc <- 0.05

Code

for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_rf_asv")))
     tmp_lefse <- run_lefse(tmp_get, norm = "CPM", class = "TEMP", correct = "2", 
                        lda_cutoff = lefse_lda, 
                        kw_cutoff = lefse_kw, 
                        wilcoxon_cutoff = lefse_wc, 
                        bootstrap_n = 30, bootstrap_fraction = 2/3, 
                        sample_min = 10, multicls_strat = FALSE, curv = FALSE)
     tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv"))
     assign(tmp_name, tmp_lefse)
     rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_lefse_asv")

for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_rf_all")))
     tmp_lefse <- run_lefse(tmp_get, norm = "CPM", class = "TEMP", correct = "2", 
                        lda_cutoff = lefse_lda, 
                        kw_cutoff = lefse_kw, 
                        wilcoxon_cutoff = lefse_wc, 
                        bootstrap_n = 30, bootstrap_fraction = 2/3, 
                        sample_min = 10, multicls_strat = FALSE, curv = FALSE)
     tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse_all"))
     assign(tmp_name, tmp_lefse)
     rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_lefse_asv")
objects(pattern = "_lefse_all")

Code

for (i in samp_ps) {
    tmp_o_tax <- data.frame(tax_table(get(i)))
    tmp_o_tax$ASV_ID <- NULL 
    tmp_o_tax <- tmp_o_tax %>% tibble::rownames_to_column("ASV_ID")
    
    tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_asv")))
    tmp_mt <- data.frame(marker_table(tmp_get))
    tmp_mt$feature <- gsub('s__', '', tmp_mt$feature)
    tmp_mt <- tmp_mt %>% dplyr::rename(c("ASV_ID" = 1, "group" = 2, "pval" = 4)) %>%
          tibble::remove_rownames()
    tmp_sum <- dplyr::left_join(tmp_mt, tmp_o_tax, by = "ASV_ID")
    tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^0$", "C0")
    tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^3$", "W3")
    tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^8$", "W8")

    tmp_sum <- tmp_sum[order(as.numeric(gsub("[A-Z]{3}", "", tmp_sum$ASV_ID))),]
    
    tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_summary"))
    assign(tmp_res_name, tmp_sum)
    rm(list = ls(pattern = "tmp_"))
}

Now we can save a few files and display the data.

Code

for (i in samp_ps) {
     tmp_get <- get(i)
     tmp_df <- data.frame(t(otu_table(tmp_get)))
     #tmp_get[,1] <- NULL
     #tmp_df <- as.data.frame(t(tmp_otu))
     tmp_col_names <- colnames(tmp_df)
     tmp_col_names <- tmp_col_names %>%
              stringr::str_replace("P[0-9]{2}_D[0-9]{2}_[0-9]{3}_", "") %>%
              stringr::str_replace("[A-Z]$", "")
     colnames(tmp_df) <- tmp_col_names
     tmp_df$freq <- apply(tmp_df > 0, 1, sum)
     tmp_df$freq_C0 <- apply(tmp_df[ , (names(tmp_df) %in% "C0")] > 0, 1, sum)
     tmp_df$freq_W3 <- apply(tmp_df[ , (names(tmp_df) %in% "W3")] > 0, 1, sum)
     tmp_df$freq_W8 <- apply(tmp_df[ , (names(tmp_df) %in% "W8")] > 0, 1, sum)
     tmp_df$reads_total <- base::rowSums(tmp_df[ , (names(tmp_df) %in% c("C0", "W3", "W8"))])
     tmp_df$reads_C0 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "C0")])
     tmp_df$reads_W3 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W3")])
     tmp_df$reads_W8 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W8")])
     tmp_df <- tmp_df[,!grepl("^[C | W]", names(tmp_df))]
     tmp_df <- tmp_df %>% tibble::rownames_to_column("ASV_ID")

     tmp_get_lefse <- get(purrr::map_chr(i, ~ paste0(., "_lefse_summary")))
     tmp_merge_df <- merge(tmp_df, tmp_get_lefse, by = "ASV_ID", all = FALSE)

     tmp_merge_df <- tmp_merge_df[,c(1,10:12,2:9,14:20)]
     tmp_merge_df <- tmp_merge_df[order(tmp_merge_df$reads_total, decreasing = TRUE), ]
     tmp_merge_name <- purrr::map_chr(i, ~ paste0(., "_lefse_final"))
     assign(tmp_merge_name, tmp_merge_df)
     rm(list = ls(pattern = "tmp_"))
}

Save a new phyloseq object

Code

for (i in samp_ps) {
     tmp_get <- get(i)
     tmp_tab <- get(purrr::map_chr(i, ~ paste0(., "_lefse_summary")))
     tmp_list <- tmp_tab[,1]
     tmp_ps <- prune_taxa(tmp_list, tmp_get)
     tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse"))
     assign(tmp_name, tmp_ps)
     tmp_ps@phy_tree <- NULL
     tmp_ps <- prune_samples(sample_sums(tmp_ps) > 0, tmp_ps)
     tmp_ps_tree <- rtree(ntaxa(tmp_ps), rooted = TRUE,
                            tip.label = taxa_names(tmp_ps))
     tmp_ps <- merge_phyloseq(tmp_ps, sample_data, tmp_ps_tree)
     assign(tmp_name, tmp_ps)
     rm(list = ls(pattern = "tmp_"))
}
objects()

LEfSe Summaries (ASVs & Markers)

ASV summary data

(16S rRNA) Table 5 | Significant results of LEfSe Analysis for FULL data set.

(16S rRNA) Table 6 | Significant results of LEfSe Analysis for Arbitrary filtered ASV data set.

(16S rRNA) Table 7 | Significant results of LEfSe Analysis for PERfect filtered data set.

(16S rRNA) Table 8 | Significant results of LEfSe Analysis for PIME data set.

ASV Visualizations

Code

for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_asv")))
     tmp_mt <- data.frame(marker_table(tmp_get))
     tmp_mt  <-  tmp_mt %>% filter(ef_lda >= 3)
     tmp_mt <- marker_table(tmp_mt)
     tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv_viz_trim"))
     assign(tmp_res_name, tmp_mt)
     rm(list = ls(pattern = "tmp_"))
}

objects(pattern = "_lefse_asv_viz_trim")

(16S rRNA) Figure 1 | Differentially abundant ASVs from LEfSe analysis for the FULL data set, trimmed to show those with LDA score ≥ 3.0.

(16S rRNA) Figure 2 | Differentially abundant ASVs from LEfSe analysis for the Arbitrary filtered data set, trimmed to show those with LDA score ≥ 3.0.

(16S rRNA) Figure 3 | Differentially abundant ASVs from LEfSe analysis for the PERfect filtered data set, trimmed to show those with LDA score ≥ 3.0.

(16S rRNA) Figure 4 | Differentially abundant ASVs from LEfSe analysis for the PIME filtered data set, trimmed to show those with LDA score ≥ 3.0.

Marker summary data

Code

for (i in samp_ps) {
    tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_all")))
    tmp_mt <- data.frame(marker_table(tmp_get))
    tmp_mt <- tmp_mt[, c(2:4,1)]
    tmp_mt <- tmp_mt %>% dplyr::rename(c("group" = 1, "pval" = 3))
    tmp_mt$group <- stringr::str_replace(tmp_mt$group, "^0$", "C0")
    tmp_mt$group <- stringr::str_replace(tmp_mt$group, "^3$", "W3")
    tmp_mt$group <- stringr::str_replace(tmp_mt$group, "^8$", "W8")
    tmp_mt <- tmp_mt %>% tibble::rownames_to_column("marker")
    tmp_mt <- tmp_mt %>% tidyr::separate(col = "feature", 
                                         into = c("Kingdom", "Phylum", "Class", 
                                                  "Order", "Family", "Genus", "ASV_ID"), 
                                         sep = "\\|\\w__")
    
    tmp_mt$Kingdom <- gsub('k__', '', tmp_mt$Kingdom)
    tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_marker_final"))
    assign(tmp_res_name, tmp_mt)
    rm(list = ls(pattern = "tmp_"))
}
objects(pattern =  "_lefse_marker_final")

(16S rRNA) Table 9 | Significant results of LEfSe MARKER Analysis for FULL data set.

(16S rRNA) Table 10 | Significant results of LEfSe MARKER Analysis for Arbitrary filtered ASV data set.

(16S rRNA) Table 11 | Significant results of LEfSe MARKER Analysis for PERfect filtered data set.

(16S rRNA) Table 12 | Significant results of LEfSe MARKER Analysis for PIME data set.

Marker visualizations

Code

for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_all")))
     tmp_mt <- data.frame(marker_table(tmp_get))
     tmp_mt  <-  tmp_mt %>% filter(ef_lda >= 4)
     tmp_mt <- marker_table(tmp_mt)
     tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_all_viz_trim"))
     assign(tmp_res_name, tmp_mt)
     rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_lefse_all_viz_trim")

(16S rRNA) Figure 5 | Differentially abundant MARKER from LEfSe analysis for the FULL data set, trimmed to show those with LDA score ≥ 4.0.

(16S rRNA) Figure 6 | Differentially abundant MARKER from LEfSe analysis for the Arbitrary filtered data set, trimmed to show those with LDA score ≥ 4.0.

(16S rRNA) Figure 7 | Differentially abundant MARKER from LEfSe analysis for the PERfect filtered data set, trimmed to show those with LDA score ≥ 4.0.

(16S rRNA) Figure 8 | Differentially abundant MARKER from LEfSe analysis for the PIME filtered data set, trimmed to show those with LDA score ≥ 4.0.

Response of select taxa

In this section of the workflow we use the microbiomeMarker package to assess the response of taxonomic lineages to soil warming. In the first step we need to fix the selected data set to make it compatible with the various functions. For this analysis we use the PERfect filtered data set.

Code

## FIX ps object
ssu_ps <- ssu18_ps_perfect_rf_all
tmp_tax1 <- data.frame(tax_table(ssu_ps))
#tmp_tax1$ASV_SEQ <- NULL
tmp_rn <- row.names(tmp_tax1)
tmp_tax <- data.frame(lapply(tmp_tax1, function(x) {gsub("\\(|)", "", x)}))
row.names(tmp_tax) <- tmp_rn
identical(row.names(tmp_tax), row.names(tmp_tax1))
#dplyr::filter(tmp_tax, Phylum == "Acidobacteriota")
ps_tax_new <- as.matrix(tmp_tax)
tmp_ps <- phyloseq(otu_table(ssu_ps),
                     phy_tree(ssu_ps),
                     tax_table(ps_tax_new),
                     sample_data(ssu_ps))
ssu_ps <- tmp_ps
phyloseq::rank_names(ssu_ps)
data.frame(tax_table(ssu_ps))

Next we run a statistical test for multiple groups using the run_test_multiple_groups function.

Code

ssu_group_anova <-  run_test_multiple_groups(ssu_ps, group = "TEMP", 
                                             taxa_rank = "all", 
                                             method = "anova")
ssu_group_anova@marker_table
marker_table(ssu_group_anova)

And then conduct post hoc pairwise comparisons for multiple groups test using the run_posthoc_test function.

Code

ssu_default_pht <- run_posthoc_test(ssu_ps, group = "TEMP", 
                                    method = "tukey", transform = "log10")

We can filter out a select taxa and plot the results.

filter(data.frame(ssu_default_pht@result), group_name == "k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales")

        group                                                       group_name
3-0.140   141 k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales
8-0.140   141 k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales
8-3.140   141 k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales
        comparions   diff_mean      pvalue      ci_lower   ci_upper
3-0.140        3-0 0.007492377 0.645933374 -0.0145296109 0.02951437
8-0.140        8-0 0.030358682 0.008215796  0.0083366941 0.05238067
8-3.140        8-3 0.022866305 0.041743700  0.0008443168 0.04488829

plot_postHocTest(ssu_default_pht, feature = "k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales") & theme_bw()

But what we really want to do is get all of the markers that are significant from the analysis, excluding any significant ASVs so we can look at high taxa ranks.

Code

ssu_pht <- ssu_default_pht
ssu18_pht_filt <- filter(data.frame(ssu_pht@result), pvalue <= "0.05")[!grepl("ASV", filter(data.frame(ssu_pht@result), pvalue <= "0.05")$group_name),]
ssu18_pht_filt <- ssu18_pht_filt[!grepl("[a-z]__$", ssu18_pht_filt$group_name),]
ssu18_pht_filt <- distinct(ssu18_pht_filt, group_name, .keep_all = TRUE)
nrow(ssu18_pht_filt)
save.image("page_build/mmarker_wf.rdata")

There are 47 significantly different lineage markers.

Click here to see a list of significantly different lineage markers.

Now we select a subset of the 47 markers to plot and visualize the results.

Code

ssu_select <- c(
"k__Bacteria|p__Acidobacteriota|c__Acidobacteriae|o__Subgroup_2", 
"k__Bacteria|p__Bacteroidota|c__Bacteroidia|o__Chitinophagales|f__Saprospiraceae", 
"k__Bacteria|p__Bacteroidota|c__Bacteroidia|o__Cytophagales", 
"k__Bacteria|p__Bacteroidota|c__Bacteroidia|o__Flavobacteriales", 
"k__Bacteria|p__Bacteroidota|c__Bacteroidia|o__Sphingobacteriales", 
"k__Bacteria|p__Myxococcota|c__Polyangia|o__mle1-27", 
"k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Burkholderiales|f__Comamonadaceae|g__Rubrivivax", 
"k__Bacteria|p__Actinobacteriota|c__Acidimicrobiia|o__Microtrichales", 
"k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales", 
"k__Bacteria|p__Firmicutes|c__Bacilli|o__Bacillales", 
"k__Bacteria|p__Myxococcota|c__Myxococcia|o__Myxococcales|f__Myxococcaceae|g__Corallococcus", 
"k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Burkholderiales|f__Burkholderiaceae|g__Ralstonia" 
)

(16S rRNA) Figure 9 | The response of select taxa to two years of warming by +3°C and +8°C. Differences assessed for multiple-group pair-wise comparisons using ANOVA followed by Tukey HSD post hoc tests. PERfect filtered read count data was log10 transformed and normalized using total sum scaling (TSS). The centre line of each box plot represents the median, the lower and upper hinges represent the first and third quartiles and whiskers represent + 1.5 the interquartile range. Significant differences denoted by asterisks (* p ≤ 0.05, ** p ≤ 0.01, *** p ≤ 0.001, **** p ≤ 0.0001; ns = not significant).

ITS

Indicator Analysis

Choose a data set(s) & set the p-value cutoff.

samp_ps <- c("its18_ps_work", "its18_ps_filt", "its18_ps_perfect", "its18_ps_pime") 
indval_pval <- 0.05

Format Data Frame

Code

for (i in samp_ps) {
     tmp_get <- get(i)
     tmp_df <- data.frame(otu_table(tmp_get))
     tmp_df <- tmp_df[, which(colSums(tmp_df) != 0)]
     tmp_row_names <- row.names(tmp_df)
     tmp_row_names <- tmp_row_names %>%
              stringr::str_replace("P[0-9]{2}_D[0-9]{2}_[0-9]{3}_", "") %>%
              stringr::str_replace("[A-Z]$", "")
     tmp_df <- tmp_df %>% tibble::add_column(tmp_row_names, .before = 1)
     tmp_name <- purrr::map_chr(i, ~ paste0(., "_seq_tab"))
     assign(tmp_name, tmp_df)
     rm(list = ls(pattern = "tmp_"))
}
objects()

Run Indicator Analysis

Code

set.seed(1191)
for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_seq_tab")))
     tmp_iva <- indval(tmp_get[,-1], tmp_get[,1])
     tmp_gr <- tmp_iva$maxcls[tmp_iva$pval <= indval_pval]
     tmp_iv <- tmp_iva$indcls[tmp_iva$pval <= indval_pval]
     tmp_pv <- tmp_iva$pval[tmp_iva$pval <= indval_pval]
     tmp_fr <- apply(tmp_get[,-1] > 0, 2, sum)[tmp_iva$pval <= indval_pval]
     tmp_sum <- data.frame(group = tmp_gr, indval = tmp_iv,
                                  pval = tmp_pv, freq = tmp_fr)
     tmp_sum <- tmp_sum[order(tmp_sum$group, -tmp_sum$indval),]

     tmp_tax_df <- data.frame(tax_table(get(i)))
     tmp_tax_df$ASV_ID <- NULL
     tmp_sum_tax <- merge(tmp_sum, tmp_tax_df, by = "row.names", all = TRUE)
     tmp_sum_tax <- tmp_sum_tax[!(is.na(tmp_sum_tax$group)),]
     class(tmp_sum_tax$group) <- "character"
     tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^1$", "C0")
     tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^2$", "W3")
     tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^3$", "W8")
     tmp_sum_tax <- tmp_sum_tax %>% dplyr::rename("ASV_ID" = "Row.names")
     tmp_sum_tax <- tmp_sum_tax[order(as.numeric(gsub("[A-Z]{3}", "", tmp_sum_tax$ASV_ID))),]
     tmp_sum_tax$ASV_ID <-  as.character(tmp_sum_tax$ASV_ID)
     tmp_sum.prob.corrected <- p.adjust(tmp_sum$pval, "bonferroni")
     tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_indval_summary"))
     assign(tmp_res_name, tmp_sum_tax)
     rm(list = ls(pattern = "tmp_"))
}

Now we can save a few files and display the data.

Code

for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_seq_tab")))
     tmp_get[,1] <- NULL
     tmp_df <- as.data.frame(t(tmp_get))
     tmp_col_names <- colnames(tmp_df)
     tmp_col_names <- tmp_col_names %>%
              stringr::str_replace("P[0-9]{2}_D[0-9]{2}_[0-9]{3}_", "") %>%
              stringr::str_replace("[A-Z]$", "")
     colnames(tmp_df) <- tmp_col_names
     #tmp_df$freq_all <- apply(tmp_df > 0, 1, sum)
     tmp_df$freq_C0 <- apply(tmp_df[ , (names(tmp_df) %in% "C0")] > 0, 1, sum)
     tmp_df$freq_W3 <- apply(tmp_df[ , (names(tmp_df) %in% "W3")] > 0, 1, sum)
     tmp_df$freq_W8 <- apply(tmp_df[ , (names(tmp_df) %in% "W8")] > 0, 1, sum)
     tmp_df$reads_total <- base::rowSums(tmp_df[ , (names(tmp_df) %in% c("C0", 
                                                                         "W3", 
                                                                         "W8"))])
     tmp_df$reads_C0 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "C0")])
     tmp_df$reads_W3 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W3")])
     tmp_df$reads_W8 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W8")])
     tmp_df <- tmp_df[,!grepl("^[C | W]", names(tmp_df))]
     tmp_df <- tmp_df %>% tibble::rownames_to_column("ASV_ID")

     tmp_get_indval <- get(purrr::map_chr(i, ~ paste0(., "_indval_summary")))
     tmp_merge_df <- merge(tmp_df, tmp_get_indval, by = "ASV_ID", all = FALSE)
     tmp_merge_df <- tmp_merge_df[,c(1,9:12,2:8,13:19)]
     tmp_merge_df <- tmp_merge_df[order(tmp_merge_df$reads_total, decreasing = TRUE), ]
     tmp_merge_name <- purrr::map_chr(i, ~ paste0(., "_indval_final"))
     assign(tmp_merge_name, tmp_merge_df)
     rm(list = ls(pattern = "tmp_"))
}

Save a new phyloseq object

Code

for (i in samp_ps) {
     tmp_get <- get(i)
     tmp_tab <- get(purrr::map_chr(i, ~ paste0(., "_indval_summary")))
     tmp_list <- tmp_tab[,1]
     tmp_ps <- prune_taxa(tmp_list, tmp_get)
     tmp_name <- purrr::map_chr(i, ~ paste0(., "_ind"))
     assign(tmp_name, tmp_ps)
     tmp_ps@phy_tree <- NULL
     tmp_ps <- prune_samples(sample_sums(tmp_ps) > 0, tmp_ps)
     tmp_ps_tree <- rtree(ntaxa(tmp_ps), rooted = TRUE,
                            tip.label = taxa_names(tmp_ps))
     tmp_ps <- merge_phyloseq(tmp_ps, sample_data, tmp_ps_tree)
     assign(tmp_name, tmp_ps)
     rm(list = ls(pattern = "tmp_"))
}
objects()

Indicator Summary

(ITS) Table 1 | Significant results of Indicator Analysis for FULL data set.

(ITS) Table 2 | Significant results of Indicator Analysis for Arbitrary filtered ASV data set.

(ITS) Table 3 | Significant results of Indicator Analysis for PERfect filtered data set.

(ITS) Table 4 | Significant results of Indicator Analysis for PIME data set.

LEfSe Analysis

Details about LEFSE

microbiomeMarker

Remove ASV seq, subset only ASVs

Code

for (i in samp_ps) {
  tmp_get <- get(i)
  tax_table(tmp_get) <- tax_table(tmp_get)[,c(1:6)]
  tax_table(tmp_get) <- cbind(tax_table(tmp_get),
                           rownames(tax_table(tmp_get)))
  colnames(tax_table(tmp_get)) <- c("Kingdom", "Phylum", "Class", "Order", 
                                    "Family", "Genus", "Species")
  tmp_name <- purrr::map_chr(i, ~ paste0(., "_rf_all"))
  assign(tmp_name, tmp_get)
  tax_table(tmp_get) <- tax_table(tmp_get)[,c(7)]
  tmp_name_asv <- purrr::map_chr(i, ~ paste0(., "_rf_asv"))
  assign(tmp_name_asv, tmp_get)
  rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_rf")

Choose a data set(s) & set the LEfSe cutoff values.

samp_ps <- c("its18_ps_work", "its18_ps_filt", "its18_ps_perfect", "its18_ps_pime") 
lefse_lda <- 2  
lefse_kw <- 0.05    
lefse_wc <- 0.05

Code

for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_rf_asv")))
     tmp_lefse <- run_lefse(tmp_get, norm = "CPM", class = "TEMP", correct = "2", 
                        lda_cutoff = lefse_lda, kw_cutoff = lefse_kw, wilcoxon_cutoff = lefse_wc, 
                        bootstrap_n = 30, bootstrap_fraction = 2/3, 
                        sample_min = 10, multicls_strat = FALSE, curv = FALSE)
     tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv"))
     assign(tmp_name, tmp_lefse)
     rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_lefse_asv")

for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_rf_all")))
     tmp_lefse <- run_lefse(tmp_get, norm = "CPM", class = "TEMP", correct = "2", 
                        lda_cutoff = lefse_lda, kw_cutoff = lefse_kw, wilcoxon_cutoff = lefse_wc, 
                        bootstrap_n = 30, bootstrap_fraction = 2/3, 
                        sample_min = 10, multicls_strat = FALSE, curv = FALSE)
     tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse_all"))
     assign(tmp_name, tmp_lefse)
     rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_lefse_asv")
objects(pattern = "_lefse_all")

Code

for (i in samp_ps) {
    tmp_o_tax <- data.frame(tax_table(get(i)))
    tmp_o_tax$ASV_ID <- NULL 
    tmp_o_tax <- tmp_o_tax %>% tibble::rownames_to_column("ASV_ID")
    
    tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_asv")))
    tmp_mt <- data.frame(marker_table(tmp_get))
    tmp_mt$feature <- gsub('s__', '', tmp_mt$feature)
    tmp_mt <- tmp_mt %>% dplyr::rename(c("ASV_ID" = 1, "group" = 2, "pval" = 4)) %>%
          tibble::remove_rownames()
    tmp_sum <- dplyr::left_join(tmp_mt, tmp_o_tax, by = "ASV_ID")
    tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^0$", "C0")
    tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^3$", "W3")
    tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^8$", "W8")

    tmp_sum <- tmp_sum[order(as.numeric(gsub("[A-Z]{3}", "", tmp_sum$ASV_ID))),]
    
    tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_summary"))
    assign(tmp_res_name, tmp_sum)
    rm(list = ls(pattern = "tmp_"))
}

Now we can save a few files and display the data.

Code

for (i in samp_ps) {
     tmp_get <- get(i)
     tmp_df <- data.frame(t(otu_table(tmp_get)))
     #tmp_get[,1] <- NULL
     #tmp_df <- as.data.frame(t(tmp_otu))
     tmp_col_names <- colnames(tmp_df)
     tmp_col_names <- tmp_col_names %>%
              stringr::str_replace("P[0-9]{2}_D[0-9]{2}_[0-9]{3}_", "") %>%
              stringr::str_replace("[A-Z]$", "")
     colnames(tmp_df) <- tmp_col_names
     tmp_df$freq <- apply(tmp_df > 0, 1, sum)
     tmp_df$freq_C0 <- apply(tmp_df[ , (names(tmp_df) %in% "C0")] > 0, 1, sum)
     tmp_df$freq_W3 <- apply(tmp_df[ , (names(tmp_df) %in% "W3")] > 0, 1, sum)
     tmp_df$freq_W8 <- apply(tmp_df[ , (names(tmp_df) %in% "W8")] > 0, 1, sum)
     tmp_df$reads_total <- base::rowSums(tmp_df[ , (names(tmp_df) %in% c("C0", "W3", "W8"))])
     tmp_df$reads_C0 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "C0")])
     tmp_df$reads_W3 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W3")])
     tmp_df$reads_W8 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W8")])
     tmp_df <- tmp_df[,!grepl("^[C | W]", names(tmp_df))]
     tmp_df <- tmp_df %>% tibble::rownames_to_column("ASV_ID")

     tmp_get_lefse <- get(purrr::map_chr(i, ~ paste0(., "_lefse_summary")))
     tmp_merge_df <- merge(tmp_df, tmp_get_lefse, by = "ASV_ID", all = FALSE)

     tmp_merge_df <- tmp_merge_df[,c(1,10:12,2:9,14:20)]
     tmp_merge_df <- tmp_merge_df[order(tmp_merge_df$reads_total, decreasing = TRUE), ]
     tmp_merge_name <- purrr::map_chr(i, ~ paste0(., "_lefse_final"))
     assign(tmp_merge_name, tmp_merge_df)
     rm(list = ls(pattern = "tmp_"))
}

Save a new phyloseq object

Code

for (i in samp_ps) {
     tmp_get <- get(i)
     tmp_tab <- get(purrr::map_chr(i, ~ paste0(., "_lefse_summary")))
     tmp_list <- tmp_tab[,1]
     tmp_ps <- prune_taxa(tmp_list, tmp_get)
     tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse"))
     assign(tmp_name, tmp_ps)
     tmp_ps@phy_tree <- NULL
     tmp_ps <- prune_samples(sample_sums(tmp_ps) > 0, tmp_ps)
     tmp_ps_tree <- rtree(ntaxa(tmp_ps), rooted = TRUE,
                            tip.label = taxa_names(tmp_ps))
     tmp_ps <- merge_phyloseq(tmp_ps, sample_data, tmp_ps_tree)
     assign(tmp_name, tmp_ps)
     rm(list = ls(pattern = "tmp_"))
}
objects()

LEfSe Summaries (ASVs & Markers)

ASV summary data

(ITS) Table 5 | Significant results of LEfSe Analysis for FULL data set.

(ITS) Table 6 | Significant results of LEfSe Analysis for Arbitrary filtered ASV data set.

(ITS) Table 7 | Significant results of LEfSe Analysis for PERfect filtered data set.

(ITS) Table 8 | Significant results of LEfSe Analysis for PIME data set.

ASV Visualizations

Code

for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_asv")))
     tmp_mt <- data.frame(marker_table(tmp_get))
     tmp_mt  <-  tmp_mt %>% filter(ef_lda >= 3)
     tmp_mt <- marker_table(tmp_mt)
     tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv_viz_trim"))
     assign(tmp_res_name, tmp_mt)
     rm(list = ls(pattern = "tmp_"))
}

objects(pattern = "_lefse_asv_viz_trim")

(ITS) Figure 1 | Differentially abundant ASVs from LEfSe analysis for the FULL data set, trimmed to show those with LDA score ≥ 3.0.

(ITS) Figure 2 | Differentially abundant ASVs from LEfSe analysis for the Arbitrary filtered data set, trimmed to show those with LDA score ≥ 3.0.

(ITS) Figure 3 | Differentially abundant ASVs from LEfSe analysis for the PERfect filtered data set, trimmed to show those with LDA score ≥ 3.0.

(ITS) Figure 4 | Differentially abundant ASVs from LEfSe analysis for the PIME filtered data set, trimmed to show those with LDA score ≥ 3.0.

Marker summary data

Code

for (i in samp_ps) {
    tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_all")))
    tmp_mt <- data.frame(marker_table(tmp_get))
    tmp_mt <- tmp_mt[, c(2:4,1)]
    tmp_mt <- tmp_mt %>% dplyr::rename(c("group" = 1, "pval" = 3))
    tmp_mt$group <- stringr::str_replace(tmp_mt$group, "^0$", "C0")
    tmp_mt$group <- stringr::str_replace(tmp_mt$group, "^3$", "W3")
    tmp_mt$group <- stringr::str_replace(tmp_mt$group, "^8$", "W8")
    tmp_mt <- tmp_mt %>% tibble::rownames_to_column("marker")
    tmp_mt <- tmp_mt %>% tidyr::separate(col = "feature", 
                                         into = c("Kingdom", "Phylum", "Class", 
                                                  "Order", "Family", "Genus", "ASV_ID"), 
                                         sep = "\\|\\w__")
    
    tmp_mt$Kingdom <- gsub('k__', '', tmp_mt$Kingdom)
    tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_marker_final"))
    assign(tmp_res_name, tmp_mt)
    rm(list = ls(pattern = "tmp_"))
}

(ITS) Table 9 | Significant results of LEfSe MARKER Analysis for FULL data set.

(ITS) Table 10 | Significant results of LEfSe MARKER Analysis for Arbitrary filtered ASV data set.

(ITS) Table 11 | Significant results of LEfSe MARKER Analysis for PERfect filtered data set.

(ITS) Table 12 | Significant results of LEfSe MARKER Analysis for PIME data set.

Marker visualizations

Code

for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_all")))
     tmp_mt <- data.frame(marker_table(tmp_get))
     tmp_mt  <-  tmp_mt %>% filter(ef_lda >= 3.5)
     tmp_mt <- marker_table(tmp_mt)
     tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_all_viz_trim"))
     assign(tmp_res_name, tmp_mt)
     rm(list = ls(pattern = "tmp_"))
}

objects(pattern = "_lefse_all_viz_trim")

(ITS) Figure 5 | Differentially abundant MARKER from LEfSe analysis for the FULL data set, trimmed to show those with LDA score ≥ 4.0.

(ITS) Figure 6 | Differentially abundant MARKER from LEfSe analysis for the Arbitrary filtered data set, trimmed to show those with LDA score ≥ 4.0.

(ITS) Figure 7 | Differentially abundant MARKER from LEfSe analysis for the PERfect filtered data set, trimmed to show those with LDA score ≥ 4.0.

(ITS) Figure 8 | Differentially abundant MARKER from LEfSe analysis for the PIME filtered data set, trimmed to show those with LDA score ≥ 4.0.

Response of select taxa

The workflow here is basically a carbon copy of the workflow describe above. Presented here for posterity.

Click here to see the ITS version of this workflow.

## FIX ps object
ps <- its18_ps_perfect_rf_all
tmp_tax1 <- data.frame(tax_table(ps))
tmp_tax1$ASV_SEQ <- NULL
tmp_rn <- row.names(tmp_tax1)
tmp_tax <- data.frame(lapply(tmp_tax1, function(x) {gsub("\\(|)", "", x)}))
row.names(tmp_tax) <- tmp_rn
identical(row.names(tmp_tax), row.names(tmp_tax1))
#dplyr::filter(tmp_tax, Phylum == "Acidobacteriota")
ps_tax_new <- as.matrix(tmp_tax)
tmp_ps <- phyloseq(otu_table(ps),
                     tax_table(ps_tax_new),
                     sample_data(ps))
its_ps <- tmp_ps
phyloseq::rank_names(its_ps)
data.frame(tax_table(its_ps))

its_group_anova <-  run_test_multiple_groups(its_ps, group = "TEMP", 
                                             taxa_rank = "all", method = "anova")
its_group_anova@marker_table
marker_table(its_group_anova)

its_default_pht <- run_posthoc_test(its_ps, 
                                    group = "TEMP", method = "tukey", 
                                    transform = "log10")

its_pht <- its_default_pht
its18_pht_filt <- filter(data.frame(its_pht@result), pvalue <= "0.05")[!grepl("ASV", filter(data.frame(its_pht@result), pvalue <= "0.05")$group_name),]
its18_pht_filt <- its18_pht_filt[!grepl("[a-z]__$", its18_pht_filt$group_name),]
its18_pht_filt <- unique(its18_pht_filt$group_name)
length(its18_pht_filt)

There are 32 significantly different lineage markers.

Click here to see a list of significantly different lineage markers.

 [1] "k__Fungi"                                                                                               
 [2] "k__Fungi|p__Basidiomycota"                                                                              
 [3] "k__Fungi|p__Chytridiomycota"                                                                            
 [4] "k__Fungi|p__Glomeromycota"                                                                              
 [5] "k__Fungi|p__Mortierellomycota"                                                                          
 [6] "k__Fungi|p__Basidiomycota|c__Agaricomycetes"                                                            
 [7] "k__Fungi|p__Basidiomycota|c__Microbotryomycetes"                                                        
 [8] "k__Fungi|p__Glomeromycota|c__Glomeromycetes"                                                            
 [9] "k__Fungi|p__Mortierellomycota|c__Mortierellomycetes"                                                    
[10] "k__Fungi|p__Rozellomycota|c__Rozellomycotina_cls_Incertae_sedis"                                        
[11] "k__Fungi|p__Basidiomycota|c__Agaricomycetes|o__Agaricales"                                              
[12] "k__Fungi|p__Basidiomycota|c__Microbotryomycetes|o__Sporidiobolales"                                     
[13] "k__Fungi|p__Glomeromycota|c__Glomeromycetes|o__Glomerales"                                              
[14] "k__Fungi|p__Mortierellomycota|c__Mortierellomycetes|o__Mortierellales"                                  
[15] "k__Fungi|p__Rozellomycota|c__Rozellomycotina_cls_Incertae_sedis|o__GS11"                                
[16] "k__Fungi|p__Ascomycota|c__Eurotiomycetes|o__Eurotiales|f__Trichocomaceae"                               
[17] "k__Fungi|p__Ascomycota|c__Pezizomycetes|o__Pezizales|f__Pyronemataceae"                                 
[18] "k__Fungi|p__Ascomycota|c__Saccharomycetes|o__Saccharomycetales|f__Metschnikowiaceae"                    
[19] "k__Fungi|p__Ascomycota|c__Sordariomycetes|o__Xylariales|f__Microdochiaceae"                             
[20] "k__Fungi|p__Basidiomycota|c__Agaricomycetes|o__Agaricales|f__Clavariaceae"                              
[21] "k__Fungi|p__Basidiomycota|c__Agaricomycetes|o__Agaricales|f__Entolomataceae"                            
[22] "k__Fungi|p__Basidiomycota|c__Microbotryomycetes|o__Sporidiobolales|f__Sporidiobolaceae"                 
[23] "k__Fungi|p__Glomeromycota|c__Glomeromycetes|o__Glomerales|f__Glomeraceae"                               
[24] "k__Fungi|p__Mortierellomycota|c__Mortierellomycetes|o__Mortierellales|f__Mortierellaceae"               
[25] "k__Fungi|p__Ascomycota|c__Eurotiomycetes|o__Eurotiales|f__Trichocomaceae|g__Talaromyces"                
[26] "k__Fungi|p__Ascomycota|c__Pezizomycetes|o__Pezizales|f__Pyronemataceae|g__Scutellinia"                  
[27] "k__Fungi|p__Ascomycota|c__Saccharomycetes|o__Saccharomycetales|f__Metschnikowiaceae|g__Metschnikowia"   
[28] "k__Fungi|p__Ascomycota|c__Sordariomycetes|o__Hypocreales|f__Nectriaceae|g__Fusarium"                    
[29] "k__Fungi|p__Ascomycota|c__Sordariomycetes|o__Xylariales|f__Microdochiaceae|g__Idriella"                 
[30] "k__Fungi|p__Basidiomycota|c__Microbotryomycetes|o__Sporidiobolales|f__Sporidiobolaceae|g__Rhodotorula"  
[31] "k__Fungi|p__Glomeromycota|c__Glomeromycetes|o__Glomerales|f__Glomeraceae|g__Kamienskia"                 
[32] "k__Fungi|p__Mortierellomycota|c__Mortierellomycetes|o__Mortierellales|f__Mortierellaceae|g__Mortierella"

(ITS) Figure 9 | The response of select taxa to two years of warming by +3°C and +8°C. Differences assessed for multiple-group pair-wise comparisons using ANOVA followed by Tukey HSD post hoc tests. PERfect filtered read count data was log10 transformed and normalized using total sum scaling (TSS). The centre line of each box plot represents the median, the lower and upper hinges represent the first and third quartiles and whiskers represent + 1.5 the interquartile range. Significant differences denoted by asterisks (* p ≤ 0.05, ** p ≤ 0.01, *** p ≤ 0.001, **** p ≤ 0.0001; ns = not significant).

Visualizing DA ASVs in Anvi’o

Next, we will combine the results of the ISA and LEfSe analyses with the distribution of ASVs across each sample. We are going to do the analysis in anvi’o—an advanced analysis and visualization platform for ’omics data (Eren et al. 2015)—using the anvi-interactive command. Anvi’o likes databases but it also understands that sometimes you do not have a database. So it offers a manual mode. If you type this command you can have a look at the relevant pieces we need for the visualization, specifically those under the headings MANUAL INPUTS and ADDITIONAL STUFF.

anvi-interactive -h

MANUAL INPUTS:
  Mandatory input parameters to start the interactive interface without
  anvi'o databases.

--manual-mode           We need this flag to run anvi'o in an ad hoc
                        manner, i.e., no database.
-f FASTA, --fasta-file FASTA
                        A FASTA-formatted input file. This is sort of
                        optional
-d VIEW_DATA, --view-data VIEW_DATA
                        A TAB-delimited file for view data. This is the ASV
                        by sample matrix. We need this
-t NEWICK, --tree NEWICK
                        NEWICK formatted tree structure. How the ASVs are
                        ordered in our case.
ADDITIONAL STUFF:
  Parameters to provide additional layers, views, or layer data.

-V ADDITIONAL_VIEW, --additional-view ADDITIONAL_VIEW
                        A TAB-delimited file for an additional view to be used
                        in the interface. This file should contain all split
                        names, and values for each of them in all samples.
                        Each column in this file must correspond to a sample
                        name. Content of this file will be called 'user_view',
                        which will be available as a new item in the 'views'
                        combo box in the interface
-A ADDITIONAL_LAYERS, --additional-layers ADDITIONAL_LAYERS
                        A TAB-delimited file for additional layer info. In
                        our case this is info about each ASV. The first column
                        of the file must be the ASV names, and
                        the remaining columns should be unique attributes.

There are also a few files we generate that cannot be loaded directly. So, in addition to the files that can be loaded when running the interactive, we also have files that must be added to the database created by anvi’o.

Here is a nice tutorial on Working with anvi’o additional data tables. A lot of what we need is covered in this tutorial. To get the most out the visualization we need to create a few files to give anvi’o when we fire up the interactive interface.

View data: in our case, a sample by ASV abundance matrix.
Additional info about each ASV.
Additional info about each sample.
Taxa abundance data for each sample at some rank.
Dendrograms ordering the ASVs and samples (based on view data).
Fasta file of all ASVs in the analysis.

Main Steps

1. View data

Let’s start with the -d or --view-data file. This file needs to be an ASV by sample matrix of read counts. To simplify the visualization, we will use all ASVs represented by 100 or more total reads, including those identified as differentially abundant by the ISA and/or LEfSe.

Code

samp_ps_all <- c("ssu18_ps_filt", "ssu18_ps_pime", "ssu18_ps_perfect")
samp_ps <- c("ssu18_ps_filt", "ssu18_ps_pime", "ssu18_ps_perfect")
samp_ps_org <- c("ssu18_ps_work")
samp_ps_pime <- c("ssu18_ps_pime")
samp_ps_other <- c("ssu18_ps_filt", "ssu18_ps_perfect")

trim_val <- 100
for (i in samp_ps_pime) {
     tmp_get <- get(i)
     tmp_df <- prune_taxa(taxa_sums(tmp_get) > trim_val, tmp_get)
     tmp_name <- purrr::map_chr(i, ~ paste0(., "_trim"))
     assign(tmp_name, tmp_df)
     rm(list = ls(pattern = "tmp_"))
}

trim_val <- 300
for (i in samp_ps_other) {
     tmp_get <- get(i)
     tmp_df <- prune_taxa(taxa_sums(tmp_get) > trim_val, tmp_get)
     tmp_name <- purrr::map_chr(i, ~ paste0(., "_trim"))
     assign(tmp_name, tmp_df)
     rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_trim")

for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim")))
     tmp_df <- as.data.frame(t(otu_table(tmp_get)))
     tmp_df <- tmp_df %>% rownames_to_column("Group")
     tmp_path <- file.path("files/anvio/ssu/")
     write.table(tmp_df, paste(tmp_path, i, "/", "data.txt", sep = ""),
            quote = FALSE, sep = "\t", row.names = FALSE)
     rm(list = ls(pattern = "tmp_"))
}

Or export a table of transformed data.

Code

for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim")))
     tmp_trans <- transform_sample_counts(tmp_get, function(x) 1e5 * {x/sum(x)})
     tmp_df <- as.data.frame(t(otu_table(tmp_trans)))
     tmp_df <- tmp_df %>% rownames_to_column("Group")
     #tmp_name <- purrr::map_chr(i, ~ paste0(., "_trim_tab"))
     #assign(tmp_name, tmp_df)
     tmp_path <- file.path("files/anvio/ssu/")
     write.table(tmp_df, paste(tmp_path, i, "/", "data_trans.txt", sep = ""),
            quote = FALSE, sep = "\t", row.names = FALSE)
     rm(list = ls(pattern = "tmp_"))
}

2. Additional Layers for ASVs

Next, we need some additional data about the ASVs to overlay on the visual. This can be anything however what I specifically want are the details of the ISA analysis, total reads, and lineage info. I warn you; this code will get ugly and I urge you to find a better way.

Start with an ASV + lineage table for the ASVs in the new phyloseq object.

Code

for (i in samp_ps) {
     tmp_get_indval <- get(purrr::map_chr(i, ~ paste0(., "_indval_final")))
     tmp_get_indval <- tmp_get_indval %>% dplyr::rename("Group" = "ASV_ID") %>%
                                          dplyr::rename("enrich_indval" = "group") %>%
                                          dplyr::rename("test_indval" = "indval") %>%
                                          dplyr::rename("pval_indval" = "pval")
     tmp_get_indval <- tmp_get_indval[,1:5]

     tmp_get_lefse <- get(purrr::map_chr(i, ~ paste0(., "_lefse_final")))
     tmp_get_lefse <- tmp_get_lefse %>% dplyr::rename("Group" = "ASV_ID") %>%
                                          dplyr::rename("enrich_lefse" = "group") %>%
                                          dplyr::rename("test_lefse" = "lda") %>%
                                          dplyr::rename("pval_lefse" = "pval")
     tmp_get_lefse <- tmp_get_lefse[,1:4]
     
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim")))
     tmp_otu_df <- as.data.frame(t(otu_table(tmp_get)))
     tmp_total <- cbind(tmp_otu_df, total_reads = rowSums(tmp_otu_df))
     tmp_total <- rev(tmp_total)[1]
     tmp_total <- tmp_total %>% tibble::rownames_to_column("Group")
     tmp_tax_df <- as.data.frame(tax_table(tmp_get))
     tmp_tax_df$ASV_SEQ <- NULL
     tmp_tax_df$ASV_ID <- NULL
     
     tmp_tax_df <- tmp_tax_df %>% tibble::rownames_to_column("Group")
     tmp_add_lay <- dplyr::left_join(tmp_tax_df, tmp_total, by = "Group") %>%
                   dplyr::left_join(., tmp_get_indval, by = "Group") %>%
                   dplyr::left_join(., tmp_get_lefse, by = "Group")
     tmp_add_lay$ASV_ID <- tmp_add_lay$Group
     tmp_add_lay <- tmp_add_lay[, c(1,16,8,12,9:11,13:15,2:7)]
     tmp_path <- file.path("files/anvio/ssu/")
     write.table(tmp_add_lay, paste(tmp_path, i, "/", "additional_layers.txt", sep = ""),
            quote = FALSE, sep = "\t", row.names = FALSE, na = "")
     rm(list = ls(pattern = "tmp_"))
}

3. Additional Views for Samples

Now we want some general data about the samples to overlay on the visual. Again, this can be anything. How about a table of alpha diversity metrics? We actually have such a table that was generated way back up the road. Just need to fix the column names.

Note

Added to profile.db with anvi-import-misc-data command & --target-data-table layers flag.

Code

metadata_tab <- read.table("files/metadata/tables/metadata.txt",
                           header = TRUE)

tmp_x <- readRDS("files/alpha/rdata/ssu18_ps_pime.rds")
data.frame(sample_data(tmp_x))
metadata_tab[,c(2:5)] <- list(NULL)
for (i in samp_ps_all) {
     tmp_get <- get(i)
     tmp_df <- data.frame(sample_data(tmp_get))
     tmp_df <- tmp_df[,c(2:9)]
     tmp_df <- tmp_df %>% tibble::rownames_to_column("id")
     tmp_df <- tmp_df %>% dplyr::rename("no_asvs" = "Observed")
     tmp_rc <- data.frame(readcount(tmp_get))
     tmp_rc <- tmp_rc %>% tibble::rownames_to_column("id")
     tmp_rc <- tmp_rc %>% dplyr::rename("no_reads" = 2)
     #identical(tmp_df$id, tmp_rc$id)
     tmp_merge <- dplyr::left_join(tmp_df, tmp_rc)
     tmp_merge <- tmp_merge[, c(1:6,10,7:9)]
     tmp_final <- dplyr::left_join(tmp_merge, metadata_tab)
     tmp_path <- file.path("files/anvio/ssu/")
     write.table(tmp_final, paste(tmp_path, i, "/", "additional_views.txt", sep = ""),
            quote = FALSE, sep = "\t", row.names = FALSE)
     rm(list = ls(pattern = "tmp_"))
}
rm(metadata_tab)

4. Taxon rank abundance by sample

Turned out this was a little tricky to figure out, but thanks to a little nifty block of code written by guoyanzhao on the phyloseq Issues forum, it was a piece of cake. The code can be altered to take any rank. See the post for an explanation.

Anyway, the goal is to sum each taxon at some rank and present that as a bar chart for each sample in the visualization. Anvi’o has a specific format it needs where each row is a sample and each column is a taxon. Taxa names need the prefix t_<RANK>!. For example, t_class! should be added for Class rank.

Note

Added to profile.db with anvi-import-misc-data command & --target-data-table layers flag.

Code

pick_rank <- "Phylum"
pick_rank_l <- "phylum"
for (i in samp_ps_all) {
# Make the table
    tmp_get <- get(i)
    tmp_glom <- tax_glom(tmp_get, taxrank = pick_rank)
    tmp_melt <- psmelt(tmp_glom)
    tmp_melt[[pick_rank]] <- as.character(tmp_melt[[pick_rank]])
    tmp_abund <- aggregate(Abundance ~ Sample + tmp_melt[[pick_rank]], tmp_melt, FUN = sum)
    colnames(tmp_abund)[2] <- "tax_rank"
    library(reshape2)
    tmp_abund <- as.data.frame(reshape::cast(tmp_abund, Sample ~ tax_rank))
    tmp_abund <- tibble::remove_rownames(tmp_abund)
    tmp_abund <- tibble::column_to_rownames(tmp_abund, "Sample")
# Reorder table column by sum
    tmp_layers <- tmp_abund[,names(sort(colSums(tmp_abund), decreasing = TRUE))]
# Add the prefix
    tmp_layers <- tmp_layers %>% dplyr::rename_all(function(x) paste0("t_", pick_rank_l,"!", x))
    tmp_layers <- tibble::rownames_to_column (tmp_layers, "taxon")
# save the table
    tmp_path <- file.path("files/anvio/ssu/")
    write.table(tmp_layers, paste(tmp_path, i, "/", "tax_layers.txt", sep = ""),
            quote = FALSE, sep = "\t", row.names = FALSE)
    rm(list = ls(pattern = "tmp_"))
}

5. Construct Dendrograms

The last piece we need is to generate dendrograms that order the ASVs by their distribution in the samples and the samples by their ASV composition. For this task we will use anvi’o.

anvi-matrix-to-newick data.txt --distance euclidean \
                               --linkage ward \
                               -o asv.tre
anvi-matrix-to-newick data.txt --distance euclidean \
                               --linkage ward \
                               -o sample.tre \
                               --transpose

The first command reads the view data we generated above and uses Euclidean distance and Ward linkage for hierarchical clustering of the ASVs. The second command transposes the view data table and then does the same for the samples. There are several distance metrics and linkage methods available. See the help menu for the command by typing anvi-matrix-to-newick -h. Boom.

Alternatively, we can generate dendrograms using phyloseq::distance and hclust.

Code

pick_dist <- "bray"
pick_clust <- "complete"

for (i in samp_ps) {
# Make the table
    tmp_get <- get(i)
    tmp_dist <- phyloseq::distance(physeq = tmp_get, method = pick_dist, type = "sample")
    tmp_dend <- hclust(tmp_dist, method = pick_clust)
    plot(tmp_dend, hang = -1)
    tmp_tree <- as.phylo(tmp_dend) 
    
    tmp_path <- file.path("files/anvio/ssu/")
    write.tree(phy = tmp_tree, file = paste(tmp_path, i, "/", "sample_", pick_dist, "_", pick_clust, ".tre", sep = ""))
    rm(list = ls(pattern = "tmp_"))
}

Code

pick_dist_asv <- "euclidean"
pick_clust_asv <- "ward"

for (i in samp_ps) {
# Make the table
    tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim")))
    tmp_dist <- phyloseq::distance(physeq = tmp_get, method = pick_dist_asv, type = "taxa")
    tmp_dend <- hclust(tmp_dist, method = pick_clust_asv)
    plot(tmp_dend, hang = -1)
    tmp_tree <- as.phylo(tmp_dend) 
    
    tmp_path <- file.path("files/anvio/ssu/")
    write.tree(phy = tmp_tree, file = paste(tmp_path, i, "/", "asv_", pick_dist_asv, "_", pick_clust_asv, ".tre", sep = ""))
    rm(list = ls(pattern = "tmp_"))
}

Note

The file asv.tre is loaded with anvi-interactive command & the --tree flag.

The ASV tree is fine as is, but the sample tree needs a special format. Specifically, the tree needs to be in a three column, tab delimited, table. This way you can add multiple orderings to the same file and view them all in the interactive. The table needs to be in this format:

item_name	data_type	data_value
tree_1	newick	((P01_D00_010_W8A:0.0250122,P05_D00_010_W8C:0.02,..
tree_2	newick	((((((((OTU14195:0.0712585,OTU13230:0.0712585)0:,…
(…)	(…)	(…)

This is easy to do by hand, but I really need the practice so I will do it in R. Anvi’o is very particular about formatting. For example, if this file ends with a blank line, which it will because when anvi’o made the initial dendrogram it add a new line. We need to get rid of that or we get an error when trying to import the table.

Note

The file sample.tre is added to the profile.db with anvi-import-misc-data command & --target-data-table layer_orders flag.

Code

for (i in samp_ps) {
      tmp_path <- file.path("files/anvio/ssu/")
      tmp_tree <- read_file(paste(tmp_path, i, "/", "sample.tre", sep = ""))
      tmp_tree <- gsub("[\r\n]", "", tmp_tree)
      tmp_item <- c("bray_complete")
      tmp_type <- c("newick")
      tmp_df <- c(tmp_tree)
      tmp_tab <- data.frame(tmp_item, tmp_type, tmp_df)
      library(janitor)
      tmp_tab %>% remove_empty("rows")
      colnames(tmp_tab) <- c("item_name",   "data_type",    "data_value")
      write.table(tmp_tab, paste(tmp_path, i, "/", "sample.tre", sep = ""),
            sep = "\t", quote = FALSE, row.names = FALSE, na = "")
      rm(list = ls(pattern = "tmp_"))
}

Code

# FOR TRANSFORMED DATA
for (i in samp_ps) {
      tmp_path <- file.path("files/anvio/ssu/")
      tmp_tree <- read_file(paste(tmp_path, i, "/","sample_trans.tre", sep = ""))
      tmp_tree <- gsub("[\r\n]", "", tmp_tree)
      tmp_item <- c("bray_complete")
      tmp_type <- c("newick")
      tmp_df <- c(tmp_tree)
      tmp_tab <- data.frame(tmp_item, tmp_type, tmp_df)
      library(janitor)
      tmp_tab %>% remove_empty("rows")
      colnames(tmp_tab) <- c("item_name",   "data_type",    "data_value")
      write.table(tmp_tab, paste(tmp_path, i, "/", "sample_trans.tre", sep = ""),
            sep = "\t", quote = FALSE, row.names = FALSE, na = "")
      rm(list = ls(pattern = "tmp_"))
}
objects()

Code

# FOR HCLUST TREE
for (i in samp_ps) {
      tmp_path <- file.path("files/anvio/ssu/")
      tmp_tree <- read_file(paste(tmp_path, i, "/", "sample_", pick_dist, "_", pick_clust, ".tre", sep = ""))
      tmp_tree <- gsub("[\r\n]", "", tmp_tree)
      tmp_item <- c(paste(pick_dist, "_", pick_clust, "_hclust", sep = ""))
      tmp_type <- c("newick")
      tmp_df <- c(tmp_tree)
      tmp_tab <- data.frame(tmp_item, tmp_type, tmp_df)
      library(janitor)
      tmp_tab %>% remove_empty("rows")
      colnames(tmp_tab) <- c("item_name",   "data_type",    "data_value")
      write.table(tmp_tab, paste(tmp_path, i, "/", "sample_", pick_dist, "_", pick_clust, ".tre", sep = ""),
            sep = "\t", quote = FALSE, row.names = FALSE, na = "")
      rm(list = ls(pattern = "tmp_"))
}

6. Make a fasta file

We don’t need to add a fasta file, but it is a nice way to keep everything in one place. Plus, you can do BLAST searches directly in the interface by right clicking on the ASV of interest, so it is nice to have the sequences.

Note

Loaded with anvi-interactive command & --fasta-file flag.

Code

for (i in samp_ps) {
       tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim")))
       tmp_tab <- tax_table(tmp_get)
       tmp_tab <- tmp_tab[, 7]
       tmp_df <- data.frame(row.names(tmp_tab), tmp_tab)
       colnames(tmp_df) <- c("ASV_ID", "ASV_SEQ")
       tmp_df$ASV_ID <- sub("^", ">", tmp_df$ASV_ID)
       tmp_path <- file.path("files/anvio/ssu/")

       write.table(tmp_df, paste(tmp_path, i, "/", i, ".fasta", sep = ""),
            sep = "\n", col.names = FALSE, row.names = FALSE,
            quote = FALSE, fileEncoding = "UTF-8")
       rm(list = ls(pattern = "tmp_"))
}

Building the Profile Database

Time to put all of these pieces together. This gets a little tricky since we do not have a database to start with because some of these files can be loaded directly in the interface but some need to be added to a database. When we fire up the interactive in --manual mode, we must give anvi’o the name of a database and it will create that database for us. Then we can shut down the interactive, add the necessary data files, and start back up.

anvi-interactive --view-data data.txt \
                 --tree asv.tre \
                 --additional-layers additional_layers.txt \
                 --profile-db profile.db \
                 --manual

Now we have a new profile database that we can add the sample metadata (additional_layers.txt) and the sample dendrogram (sample.tre) using the command anvi-import-misc-data. These commands add the table to the new profile.db. First, kill the interactive.

anvi-import-misc-data additional_views.txt \
                      --pan-or-profile-db profile.db \
                      --target-data-table layers
anvi-import-misc-data sample.tre \
                      --pan-or-profile-db profile.db \
                      --target-data-table layer_orders

One last this is to get the table with the taxonomy total by sample (tax_layers.txt) into the profile database. We will run the same command we just used.

anvi-import-misc-data tax_layers.txt \
                      --pan-or-profile-db profile.db \
                      --target-data-table layers

In fact, we could just as easily append the taxonomy total data onto the additional_layers.txt and import in one command. But we didn’t.

Interactive Interface

With a populated database in hand, we can now begin modifying the visual by running the interactive command again.

anvi-interactive --view-data data.txt \
                 --tree asv.tre \
                 --additional-layers additional_layers.txt \
                 --profile-db profile.db
                 --fasta-file anvio.fasta \
                 --manual

Click here for the ITS version of this workflow.

The ITS version of the anvi’o workflow is basically a carbon copy of the workflow presented above. It is included here for posterity.

Visualizing DA ASVs in Anvi’o

anvi-interactive -h

MANUAL INPUTS:
  Mandatory input parameters to start the interactive interface without
  anvi'o databases.

--manual-mode           We need this flag to run anvi'o in an ad hoc
                        manner, i.e., no database.
-f FASTA, --fasta-file FASTA
                        A FASTA-formatted input file. This is sort of
                        optional
-d VIEW_DATA, --view-data VIEW_DATA
                        A TAB-delimited file for view data. This is the ASV
                        by sample matrix. We need this
-t NEWICK, --tree NEWICK
                        NEWICK formatted tree structure. How the ASVs are
                        ordered in our case.
ADDITIONAL STUFF:
  Parameters to provide additional layers, views, or layer data.

-V ADDITIONAL_VIEW, --additional-view ADDITIONAL_VIEW
                        A TAB-delimited file for an additional view to be used
                        in the interface. This file should contain all split
                        names, and values for each of them in all samples.
                        Each column in this file must correspond to a sample
                        name. Content of this file will be called 'user_view',
                        which will be available as a new item in the 'views'
                        combo box in the interface
-A ADDITIONAL_LAYERS, --additional-layers ADDITIONAL_LAYERS
                        A TAB-delimited file for additional layer info. In
                        our case this is info about each ASV. The first column
                        of the file must be the ASV names, and
                        the remaining columns should be unique attributes.

View data: in our case, a sample by ASV abundance matrix.
Additional info about each ASV.
Additional info about each sample.
Taxa abundance data for each sample at some rank.
Dendrograms ordering the ASVs and samples (based on view data).
Fasta file of all ASVs in the analysis.

Main steps

1. View data

Code

samp_ps_all <- c("its18_ps_filt", "its18_ps_pime", "its18_ps_perfect", 
                 "its18_ps_filt_otu", "its18_ps_pime_otu", "its18_ps_perfect_otu", 
                 "its18_ps_work", "its18_ps_work_otu")
samp_ps <- c("its18_ps_filt", "its18_ps_pime", "its18_ps_perfect", 
             "its18_ps_filt_otu", "its18_ps_pime_otu", "its18_ps_perfect_otu")
samp_ps_org <- c("its18_ps_work", "its18_ps_work_otu")
samp_ps_pime <- c("its18_ps_pime", "its18_ps_pime_otu")
samp_ps_other <- c("its18_ps_filt", "its18_ps_perfect", 
                   "its18_ps_filt_otu", "its18_ps_perfect_otu")

trim_val <- 50
for (i in samp_ps_pime) {
     tmp_get <- get(i)
     tmp_df <- prune_taxa(taxa_sums(tmp_get) > trim_val, tmp_get)
     tmp_name <- purrr::map_chr(i, ~ paste0(., "_trim"))
     assign(tmp_name, tmp_df)
     rm(list = ls(pattern = "tmp_"))
}

trim_val <- 300
for (i in samp_ps_other) {
     tmp_get <- get(i)
     tmp_df <- prune_taxa(taxa_sums(tmp_get) > trim_val, tmp_get)
     tmp_name <- purrr::map_chr(i, ~ paste0(., "_trim"))
     assign(tmp_name, tmp_df)
     rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_trim")
its18_ps_filt_trim
its18_ps_pime_trim
its18_ps_perfect_trim
its18_ps_filt_otu_trim
its18_ps_pime_otu_trim
its18_ps_perfect_otu_trim

Code

for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim")))
     tmp_df <- as.data.frame(t(otu_table(tmp_get)))
     tmp_df <- tmp_df %>% rownames_to_column("Group")
     tmp_path <- file.path("files/anvio/its/")
     write.table(tmp_df, paste(tmp_path, i, "/", "data.txt", sep = ""),
            quote = FALSE, sep = "\t", row.names = FALSE)
     rm(list = ls(pattern = "tmp_"))
}
objects()

Or export a table of transformed data.

Code

for (i in samp_ps) {
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim")))
     tmp_trans <- transform_sample_counts(tmp_get, function(x) 1e5 * {x/sum(x)})
     tmp_df <- as.data.frame(t(otu_table(tmp_trans)))
     tmp_df <- tmp_df %>% rownames_to_column("Group")
     #tmp_name <- purrr::map_chr(i, ~ paste0(., "_trim_tab"))
     #assign(tmp_name, tmp_df)
     tmp_path <- file.path("files/anvio/its/")
     write.table(tmp_df, paste(tmp_path, i, "/", "data_trans.txt", sep = ""),
            quote = FALSE, sep = "\t", row.names = FALSE)
     rm(list = ls(pattern = "tmp_"))
}
objects()

2. Additional Layers for ASVs

Start with an ASV + lineage table for the ASVs in the new phyloseq object.

Code

for (i in samp_ps) {
     tmp_get_indval <- get(purrr::map_chr(i, ~ paste0(., "_indval_final")))
     tmp_get_indval <- tmp_get_indval %>% dplyr::rename("Group" = "ASV_ID") %>%
                                          dplyr::rename("enrich_indval" = "group") %>%
                                          dplyr::rename("test_indval" = "indval") %>%
                                          dplyr::rename("pval_indval" = "pval")
     tmp_get_indval <- tmp_get_indval[,1:5]

     tmp_get_lefse <- get(purrr::map_chr(i, ~ paste0(., "_lefse_final")))
     tmp_get_lefse <- tmp_get_lefse %>% dplyr::rename("Group" = "ASV_ID") %>%
                                          dplyr::rename("enrich_lefse" = "group") %>%
                                          dplyr::rename("test_lefse" = "lda") %>%
                                          dplyr::rename("pval_lefse" = "pval")
     tmp_get_lefse <- tmp_get_lefse[,1:4]
     
     tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim")))
     tmp_otu_df <- as.data.frame(t(otu_table(tmp_get)))
     tmp_total <- cbind(tmp_otu_df, total_reads = rowSums(tmp_otu_df))
     tmp_total <- rev(tmp_total)[1]
     tmp_total <- tmp_total %>% tibble::rownames_to_column("Group")
     tmp_tax_df <- as.data.frame(tax_table(tmp_get))
     tmp_tax_df$ASV_SEQ <- NULL
     tmp_tax_df$ASV_ID <- NULL
     
     tmp_tax_df <- tmp_tax_df %>% tibble::rownames_to_column("Group")
     tmp_add_lay <- dplyr::left_join(tmp_tax_df, tmp_total, by = "Group") %>%
                   dplyr::left_join(., tmp_get_indval, by = "Group") %>%
                   dplyr::left_join(., tmp_get_lefse, by = "Group")
     tmp_add_lay$ASV_ID <- tmp_add_lay$Group
     tmp_add_lay <- tmp_add_lay[, c(1,16,8,12,9:11,13:15,2:7)]
     tmp_path <- file.path("files/anvio/its/")
     write.table(tmp_add_lay, paste(tmp_path, i, "/", "additional_layers.txt", sep = ""),
            quote = FALSE, sep = "\t", row.names = FALSE, na = "")
     rm(list = ls(pattern = "tmp_"))
}

3. Additional Views for Samples

Note

Added to profile.db with anvi-import-misc-data command & --target-data-table layers flag.

Code

metadata_tab <- read.table("files/metadata/tables/metadata.txt",
                           header = TRUE)

tmp_x <- readRDS("files/alpha/rdata/its18_ps_pime.rds")
data.frame(sample_data(tmp_x))
metadata_tab[,c(2:5)] <- list(NULL)
for (i in samp_ps_all) {
     tmp_get <- get(i)
     tmp_df <- data.frame(sample_data(tmp_get))
     tmp_df <- tmp_df[,c(2:9)]
     tmp_df <- tmp_df %>% tibble::rownames_to_column("id")
     tmp_df <- tmp_df %>% dplyr::rename("no_asvs" = "Observed")
     tmp_rc <- data.frame(readcount(tmp_get))
     tmp_rc <- tmp_rc %>% tibble::rownames_to_column("id")
     tmp_rc <- tmp_rc %>% dplyr::rename("no_reads" = 2)
     #identical(tmp_df$id, tmp_rc$id)
     tmp_merge <- dplyr::left_join(tmp_df, tmp_rc)
     tmp_merge <- tmp_merge[, c(1:6,10,7:9)]
     tmp_final <- dplyr::left_join(tmp_merge, metadata_tab)
     tmp_path <- file.path("files/anvio/its/")
     write.table(tmp_final, paste(tmp_path, i, "/", "additional_views.txt", sep = ""),
            quote = FALSE, sep = "\t", row.names = FALSE)
     rm(list = ls(pattern = "tmp_"))
}
rm(metadata_tab)

4. Taxon rank abundance by sample

Turned out this was a little tricky to figure out, but thanks to a little nifty block of code written by guoyanzhao on the phyloseq Iitses forum, it was a piece of cake. The code can be altered to take any rank. See the post for an explanation.

Note

Added to profile.db with anvi-import-misc-data command & --target-data-table layers flag.

Code

pick_rank <- "Order"
pick_rank_l <- "order"
for (i in samp_ps_all) {
# Make the table
    tmp_get <- get(i)
    tmp_glom <- tax_glom(tmp_get, taxrank = pick_rank)
    tmp_melt <- psmelt(tmp_glom)
    tmp_melt[[pick_rank]] <- as.character(tmp_melt[[pick_rank]])
    tmp_abund <- aggregate(Abundance ~ Sample + tmp_melt[[pick_rank]], tmp_melt, FUN = sum)
    colnames(tmp_abund)[2] <- "tax_rank"
    library(reshape2)
    tmp_abund <- as.data.frame(reshape::cast(tmp_abund, Sample ~ tax_rank))
    tmp_abund <- tibble::remove_rownames(tmp_abund)
    tmp_abund <- tibble::column_to_rownames(tmp_abund, "Sample")
# Reorder table column by sum
    tmp_layers <- tmp_abund[,names(sort(colSums(tmp_abund), decreasing = TRUE))]
# Add the prefix
    tmp_layers <- tmp_layers %>% dplyr::rename_all(function(x) paste0("t_", pick_rank_l,"!", x))
    tmp_layers <- tibble::rownames_to_column (tmp_layers, "taxon")
# save the table
    tmp_path <- file.path("files/anvio/its/")
    write.table(tmp_layers, paste(tmp_path, i, "/", "tax_layers.txt", sep = ""),
            quote = FALSE, sep = "\t", row.names = FALSE)
    rm(list = ls(pattern = "tmp_"))
}

5. Construct Dendrograms

The last piece we need is to generate dendrograms that order the ASVs by their distribution in the samples and the samples by their ASV composition. For this task we will use anvi’o.

anvi-matrix-to-newick data.txt --distance euclidean \
                               --linkage ward \
                               -o asv.tre
anvi-matrix-to-newick data.txt --distance euclidean \
                               --linkage ward \
                               -o sample.tre \
                               --transpose

Alternatively, we can generate dendrograms using phyloseq::distance and hclust.

Code

pick_dist <- "bray"
pick_clust <- "complete"

for (i in samp_ps) {
# Make the table
    tmp_get <- get(i)
    tmp_dist <- phyloseq::distance(physeq = tmp_get, method = pick_dist, type = "sample")
    tmp_dend <- hclust(tmp_dist, method = pick_clust)
    plot(tmp_dend, hang = -1)
    tmp_tree <- as.phylo(tmp_dend) 
    
    tmp_path <- file.path("files/anvio/its/")
    write.tree(phy = tmp_tree, file = paste(tmp_path, i, "/", "sample_", pick_dist, "_", pick_clust, ".tre", sep = ""))
    rm(list = ls(pattern = "tmp_"))
}

Code

pick_dist_asv <- "euclidean"
pick_clust_asv <- "ward"

for (i in samp_ps) {
# Make the table
    tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim")))
    tmp_dist <- phyloseq::distance(physeq = tmp_get, method = pick_dist_asv, type = "taxa")
    tmp_dend <- hclust(tmp_dist, method = pick_clust_asv)
    plot(tmp_dend, hang = -1)
    tmp_tree <- as.phylo(tmp_dend) 
    
    tmp_path <- file.path("files/anvio/its/")
    write.tree(phy = tmp_tree, file = paste(tmp_path, i, "/", "asv_", pick_dist_asv, "_", pick_clust_asv, ".tre", sep = ""))
    rm(list = ls(pattern = "tmp_"))
}

Note

The file asv.tre is loaded with anvi-interactive command & the --tree flag.

item_name	data_type	data_value
tree_1	newick	((P01_D00_010_W8A:0.0250122,P05_D00_010_W8C:0.02,..
tree_2	newick	((((((((OTU14195:0.0712585,OTU13230:0.0712585)0:,…
(…)	(…)	(…)

Note

The file sample.tre is added to the profile.db with anvi-import-misc-data command & --target-data-table layer_orders flag.

Code

for (i in samp_ps) {
      tmp_path <- file.path("files/anvio/its/")
      tmp_tree <- read_file(paste(tmp_path, i, "/", "sample.tre", sep = ""))
      tmp_tree <- gsub("[\r\n]", "", tmp_tree)
      tmp_item <- c("bray_complete")
      tmp_type <- c("newick")
      tmp_df <- c(tmp_tree)
      tmp_tab <- data.frame(tmp_item, tmp_type, tmp_df)
      library(janitor)
      tmp_tab %>% remove_empty("rows")
      colnames(tmp_tab) <- c("item_name",   "data_type",    "data_value")
      write.table(tmp_tab, paste(tmp_path, i, "/", "sample.tre", sep = ""),
            sep = "\t", quote = FALSE, row.names = FALSE, na = "")
      rm(list = ls(pattern = "tmp_"))
}

Code

# FOR TRANSFORMED DATA
for (i in samp_ps) {
      tmp_path <- file.path("files/anvio/its/")
      tmp_tree <- read_file(paste(tmp_path, i, "/","sample_trans.tre", sep = ""))
      tmp_tree <- gsub("[\r\n]", "", tmp_tree)
      tmp_item <- c("bray_complete")
      tmp_type <- c("newick")
      tmp_df <- c(tmp_tree)
      tmp_tab <- data.frame(tmp_item, tmp_type, tmp_df)
      library(janitor)
      tmp_tab %>% remove_empty("rows")
      colnames(tmp_tab) <- c("item_name",   "data_type",    "data_value")
      write.table(tmp_tab, paste(tmp_path, i, "/", "sample_trans.tre", sep = ""),
            sep = "\t", quote = FALSE, row.names = FALSE, na = "")
      rm(list = ls(pattern = "tmp_"))
}
objects()

Code

# FOR HCLUST TREE
for (i in samp_ps) {
      tmp_path <- file.path("files/anvio/its/")
      tmp_tree <- read_file(paste(tmp_path, i, "/", "sample_", pick_dist, "_", pick_clust, ".tre", sep = ""))
      tmp_tree <- gsub("[\r\n]", "", tmp_tree)
      tmp_item <- c(paste(pick_dist, "_", pick_clust, "_hclust", sep = ""))
      tmp_type <- c("newick")
      tmp_df <- c(tmp_tree)
      tmp_tab <- data.frame(tmp_item, tmp_type, tmp_df)
      library(janitor)
      tmp_tab %>% remove_empty("rows")
      colnames(tmp_tab) <- c("item_name",   "data_type",    "data_value")
      write.table(tmp_tab, paste(tmp_path, i, "/", "sample_", pick_dist, "_", pick_clust, ".tre", sep = ""),
            sep = "\t", quote = FALSE, row.names = FALSE, na = "")
      rm(list = ls(pattern = "tmp_"))
}

6. Make a fasta file

Note

Loaded with anvi-interactive command & --fasta-file flag.

Code

for (i in samp_ps) {
       tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim")))
       tmp_tab <- tax_table(tmp_get)
       tmp_tab <- tmp_tab[, 7]
       tmp_df <- data.frame(row.names(tmp_tab), tmp_tab)
       colnames(tmp_df) <- c("ASV_ID", "ASV_SEQ")
       tmp_df$ASV_ID <- sub("^", ">", tmp_df$ASV_ID)
       tmp_path <- file.path("files/anvio/its/")

       write.table(tmp_df, paste(tmp_path, i, "/", i, ".fasta", sep = ""),
            sep = "\n", col.names = FALSE, row.names = FALSE,
            quote = FALSE, fileEncoding = "UTF-8")
       rm(list = ls(pattern = "tmp_"))
}

Building the Profile Database

anvi-interactive --view-data data.txt \
                 --tree asv.tre \
                 --additional-layers additional_layers.txt \
                 --profile-db profile.db \
                 --manual

anvi-import-misc-data additional_views.txt \
                      --pan-or-profile-db profile.db \
                      --target-data-table layers
anvi-import-misc-data sample.tre \
                      --pan-or-profile-db profile.db \
                      --target-data-table layer_orders

One last this is to get the table with the taxonomy total by sample (tax_layers.txt) into the profile database. We will run the same command we just used.

anvi-import-misc-data tax_layers.txt \
                      --pan-or-profile-db profile.db \
                      --target-data-table layers

In fact, we could just as easily append the taxonomy total data onto the additional_layers.txt and import in one command. But we didn’t.

Interactive Interface

With a populated database in hand, we can now begin modifying the visual by running the interactive command again.

anvi-interactive --view-data data.txt \
                 --tree asv.tre \
                 --additional-layers additional_layers.txt \
                 --profile-db profile.db
                 --fasta-file anvio.fasta \
                 --manual

Workflow Output

Data products generated in this workflow can be downloaded from figshare.

Next workflow:
8. Metadata Analysis Previous workflow:
6. Beta Diversity & Dispersion Estimates

Source Code

The source code for this page can be accessed on GitHub by clicking this link.

Data Availability

Data generated in this workflow and the Rdata needed to run the workflow can be accessed on figshare at 10.25573/data.16828249.

Last updated on

[1] "2022-06-29 07:29:05 EST"

References

Cao, Yang. 2020. “microbiomeMarker: Microbiome Biomarker Analysis.” R Package Version 0.0.1.9000. https://doi.org/10.5281/zenodo.3749415.

Eren, A Murat, Özcan C Esen, Christopher Quince, Joseph H Vineis, Hilary G Morrison, Mitchell L Sogin, and Tom O Delmont. 2015. “Anvi’o: An Advanced Analysis and Visualization Platform for ‘Omics Data.” PeerJ 3: e1319. https://doi.org/10.7717/peerj.1319.

Roberts, David W, and Maintainer David W Roberts. 2016. “Package ‘Labdsv’.” Ordination and Multivariate 775. https://cran.r-project.org/web/packages/labdsv/index.html.

Segata, Nicola, Jacques Izard, Levi Waldron, Dirk Gevers, Larisa Miropolsky, Wendy S Garrett, and Curtis Huttenhower. 2011. “Metagenomic Biomarker Discovery and Explanation.” Genome Biology 12 (6): 1–18. https://doi.org/10.1186/gb-2011-12-6-r60.

--- title: "7. Differentially Abundant ASVs & Taxa" description: | Reproducible workflow to assess differentially abundant ASVs and taxa between temperature treatments using Indicator Species Analysis and linear discriminant analysis (LDA) effect size (LEfSe). format: html: toc: true toc-depth: 2 --- <details markdown="1"> <summary>Click here for setup information.</summary> ```{r setup, message=FALSE, results = 'hide'} knitr::opts_chunk$set(echo = TRUE, eval = FALSE) set.seed(119) #library(conflicted) #pacman::p_depends(microbiomeMarker, local = TRUE) #pacman::p_depends_reverse(microbiomeMarker, local = TRUE) library(phyloseq); packageVersion("phyloseq") library(Biostrings); packageVersion("Biostrings") pacman::p_load(tidyverse, patchwork, ampvis2, agricolae, labdsv, naniar, ape, pairwiseAdonis, microbiome, seqRFLP, microbiomeMarker, microbiomeMarker, reactable, downloadthis, captioner, install = FALSE, update = FALSE) options(scipen=999) knitr::opts_current$get(c( "cache", "cache.path", "cache.rebuild", "dependson", "autodep" )) ``` </details> ```{r} #| include: false #| eval: true ## Load to build page only #2 remove(list = ls()) load("page_build/da_wf.rdata") load("page_build/mmarker_wf.rdata") objects() ``` ```{r} #| echo: false #| eval: true #| results: hide # Create the caption(s) with captioner caption_tab_ssu <- captioner(prefix = "(16S rRNA) Table", suffix = " |", style = "b") caption_fig_ssu <- captioner(prefix = "(16S rRNA) Figure", suffix = " |", style = "b") caption_tab_its <- captioner(prefix = "(ITS) Table", suffix = " |", style = "b") caption_fig_its <- captioner(prefix = "(ITS) Figure", suffix = " |", style = "b") # Create a function for referring to the tables in text ref <- function(x) str_extract(x, "[^|]*") %>% trimws(which = "right", whitespace = "[ ]") ``` ```{r} #| echo: false #| eval: true source("assets/captions/captions_da.R") ``` # Synopsis This workflow contains ASV differential abundance analysis for the high temperature data sets. In order to run the workflow, you either need to first run the [Data Preparation workflow](data-prep.html) and the [Filtering workflow](filtering.html) or or download the files linked below. See the [Data Availability](data-availability.html) page for complete details. ## Workflow Input Files needed to run this workflow can be downloaded from figshare. <iframe src="https://widgets.figshare.com/articles/14690739/embed?show_title=1" width="100%" height="251" allowfullscreen frameborder="0"></iframe> <iframe src="https://widgets.figshare.com/articles/14701440/embed?show_title=1" width="100%" height="251" allowfullscreen frameborder="0"></iframe> In this workflow, we use the `labdsv` package [@roberts2016package] to run Dufrene-Legendre Indicator Species Analysis and the [microbiomeMarker package](https://github.com/yiluheihei/microbiomeMarker) [@cao2020package] to run linear discriminant analysis (LDA) effect size (LEfSe) [@segata2011metagenomic]. # 16s rRNA ```{r} #| include: false #| eval: false ## Initial Load for ANALYSIS #1 remove(list = ls()) ssu18_ps_work <- readRDS("files/data-prep/rdata/ssu18_ps_work.rds") ssu18_ps_filt <- readRDS("files/filtering/arbitrary/rdata/ssu18_ps_filt.rds") ssu18_ps_perfect <- readRDS("files/filtering/perfect/rdata/ssu18_ps_perfect.rds") ssu18_ps_pime <- readRDS("files/filtering/pime/rdata/ssu18_ps_pime.rds") ``` ```{r} #| echo: false samp_ps <- c("ssu18_ps_work", "ssu18_ps_filt", "ssu18_ps_perfect", "ssu18_ps_pime") for (i in samp_ps) { tmp_get <- get(i) tmp_path <- file.path("files/alpha/rdata/") tmp_read <- readRDS(paste(tmp_path, i, ".rds", sep = "")) tmp_sam_data <- sample_data(tmp_read) tmp_tax_data <- data.frame(tax_table(tmp_get)) #tmp_tax_data[,c(1:6)] tmp_tax_data$ASV_ID <- NULL # Some have, some do not tmp_tax_data[] <- data.frame(lapply(tmp_tax_data, gsub, pattern = "^[k | p | c | o | f]_.*", replacement = NA, fixed = FALSE)) tmp_tax_data <- as.matrix(tmp_tax_data) tmp_ps <- phyloseq(otu_table(tmp_get), phy_tree(tmp_get), tax_table(tmp_tax_data), tmp_sam_data) assign(i, tmp_ps) rm(list = ls(pattern = "tmp_")) } objects() ``` ```{r} #| echo: false swel_col <- c("#2271B2", "#71B222", "#B22271") ``` ## Indicator Analysis Indicator Analysis calculates the indicator value (fidelity and relative abundance) of species in clusters or types. 1. Choose a data set(s) & set the p-value cutoff. ```{r} #| code-fold: true samp_ps <- c("ssu18_ps_work", "ssu18_ps_filt", "ssu18_ps_perfect", "ssu18_ps_pime") indval_pval <- 0.05 ``` 2. Format Data Frame ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(i) tmp_df <- data.frame(otu_table(tmp_get)) tmp_df <- tmp_df[, which(colSums(tmp_df) != 0)] tmp_row_names <- row.names(tmp_df) tmp_row_names <- tmp_row_names %>% stringr::str_replace("P[0-9]{2}_D[0-9]{2}_[0-9]{3}_", "") %>% stringr::str_replace("[A-Z]$", "") tmp_df <- tmp_df %>% tibble::add_column(tmp_row_names, .before = 1) tmp_name <- purrr::map_chr(i, ~ paste0(., "_seq_tab")) assign(tmp_name, tmp_df) rm(list = ls(pattern = "tmp_")) } objects() ``` 3. Run Indicator Analysis ```{r} #| code-fold: true set.seed(1191) for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_seq_tab"))) tmp_iva <- indval(tmp_get[,-1], tmp_get[,1]) tmp_gr <- tmp_iva$maxcls[tmp_iva$pval <= indval_pval] tmp_iv <- tmp_iva$indcls[tmp_iva$pval <= indval_pval] tmp_pv <- tmp_iva$pval[tmp_iva$pval <= indval_pval] tmp_fr <- apply(tmp_get[,-1] > 0, 2, sum)[tmp_iva$pval <= indval_pval] tmp_sum <- data.frame(group = tmp_gr, indval = tmp_iv, pval = tmp_pv, freq = tmp_fr) tmp_sum <- tmp_sum[order(tmp_sum$group, -tmp_sum$indval),] tmp_tax_df <- data.frame(tax_table(get(i))) tmp_tax_df$ASV_ID <- NULL tmp_sum_tax <- merge(tmp_sum, tmp_tax_df, by = "row.names", all = TRUE) tmp_sum_tax <- tmp_sum_tax[!(is.na(tmp_sum_tax$group)),] class(tmp_sum_tax$group) <- "character" tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^1$", "C0") tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^2$", "W3") tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^3$", "W8") tmp_sum_tax <- tmp_sum_tax %>% dplyr::rename("ASV_ID" = "Row.names") tmp_sum_tax <- tmp_sum_tax[order(as.numeric(gsub("[A-Z]{3}", "", tmp_sum_tax$ASV_ID))),] tmp_sum_tax$ASV_ID <- as.character(tmp_sum_tax$ASV_ID) tmp_sum.prob.corrected <- p.adjust(tmp_sum$pval, "bonferroni") tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_indval_summary")) assign(tmp_res_name, tmp_sum_tax) rm(list = ls(pattern = "tmp_")) } objects() ``` Now we can save a few files and display the data. ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_seq_tab"))) tmp_get[,1] <- NULL tmp_df <- as.data.frame(t(tmp_get)) tmp_col_names <- colnames(tmp_df) tmp_col_names <- tmp_col_names %>% stringr::str_replace("P[0-9]{2}_D[0-9]{2}_[0-9]{3}_", "") %>% stringr::str_replace("[A-Z]$", "") colnames(tmp_df) <- tmp_col_names #tmp_df$freq_all <- apply(tmp_df > 0, 1, sum) tmp_df$freq_C0 <- apply(tmp_df[ , (names(tmp_df) %in% "C0")] > 0, 1, sum) tmp_df$freq_W3 <- apply(tmp_df[ , (names(tmp_df) %in% "W3")] > 0, 1, sum) tmp_df$freq_W8 <- apply(tmp_df[ , (names(tmp_df) %in% "W8")] > 0, 1, sum) tmp_df$reads_total <- base::rowSums(tmp_df[ , (names(tmp_df) %in% c("C0", "W3", "W8"))]) tmp_df$reads_C0 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "C0")]) tmp_df$reads_W3 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W3")]) tmp_df$reads_W8 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W8")]) tmp_df <- tmp_df[,!grepl("^[C | W]", names(tmp_df))] tmp_df <- tmp_df %>% tibble::rownames_to_column("ASV_ID") tmp_get_indval <- get(purrr::map_chr(i, ~ paste0(., "_indval_summary"))) tmp_merge_df <- merge(tmp_df, tmp_get_indval, by = "ASV_ID", all = FALSE) tmp_merge_df <- tmp_merge_df[,c(1,9:12,2:8,13:19)] tmp_merge_df <- tmp_merge_df[order(tmp_merge_df$reads_total, decreasing = TRUE), ] tmp_merge_name <- purrr::map_chr(i, ~ paste0(., "_indval_final")) assign(tmp_merge_name, tmp_merge_df) rm(list = ls(pattern = "tmp_")) } objects(pattern = "_indval_summary") ``` 4. Save a new phyloseq object ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(i) tmp_tab <- get(purrr::map_chr(i, ~ paste0(., "_indval_summary"))) tmp_list <- tmp_tab[,1] tmp_ps <- prune_taxa(tmp_list, tmp_get) tmp_name <- purrr::map_chr(i, ~ paste0(., "_ind")) assign(tmp_name, tmp_ps) tmp_ps@phy_tree <- NULL tmp_ps <- prune_samples(sample_sums(tmp_ps) > 0, tmp_ps) tmp_ps_tree <- rtree(ntaxa(tmp_ps), rooted = TRUE, tip.label = taxa_names(tmp_ps)) tmp_ps <- merge_phyloseq(tmp_ps, sample_data, tmp_ps_tree) assign(tmp_name, tmp_ps) rm(list = ls(pattern = "tmp_")) } objects() ``` ## Indicator Summary ::: {.column-body-outset} ::: {.panel-tabset} #### FULL ```{r} #| echo: false #| eval: true seq_table <- ssu18_ps_work_indval_final seq_table %>% download_this( output_name = "ssu_isa_results_full", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_ssu("ssu_isa_full")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), indval = colDef(name = "indval score", align = "center", filterable = FALSE, minWidth = 130), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "Indicator analysis results", sticky = "left", columns = c("group", "indval", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### Arbitrary filtered ```{r} #| echo: false #| eval: true seq_table <- ssu18_ps_filt_indval_final seq_table %>% download_this( output_name = "ssu_isa_results_arbitrary", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_ssu("ssu_isa_filt")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), indval = colDef(name = "indval score", align = "center", filterable = FALSE, minWidth = 130), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "Indicator analysis results", sticky = "left", columns = c("group", "indval", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### PERfect filtered ```{r} #| echo: false #| eval: true seq_table <- ssu18_ps_perfect_indval_final seq_table %>% download_this( output_name = "ssu_isa_results_perfect", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_ssu("ssu_isa_perfect")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), indval = colDef(name = "indval score", align = "center", filterable = FALSE, minWidth = 130), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "Indicator analysis results", sticky = "left", columns = c("group", "indval", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### PIME filtered ```{r} #| echo: false #| eval: true seq_table <- ssu18_ps_pime_indval_final seq_table %>% download_this( output_name = "ssu_isa_results_pime", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_ssu("ssu_isa_pime")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), indval = colDef(name = "indval score", align = "center", filterable = FALSE, minWidth = 130), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "Indicator analysis results", sticky = "left", columns = c("group", "indval", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` ::: ::: ## LEfSe Analysis :::{.callout-note} From [@segata2011metagenomic]. ::: Linear discriminant analysis (LDA) effect size (LEfSe) is a method to support high-dimensional class comparisons with a particular focus on microbial community analyses. LEfSe determines the features (organisms, clades, operational taxonomic units, genes, or functions) most likely to explain differences between classes by coupling standard tests for statistical significance with additional tests encoding biological consistency and effect relevance. Class comparison methods typically predict biomarkers consisting of features that violate a null hypothesis of no difference between classes. The effect size provides an estimation of the magnitude of the observed phenomenon due to each characterizing feature and it is thus a valuable tool for ranking the relevance of different biological aspects and for addressing further investigations and analyses. First, a little data tidying and subsetting only ASV data from the taxonomy table. ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(i) tax_table(tmp_get) <- tax_table(tmp_get)[,c(1:6)] tax_table(tmp_get) <- cbind(tax_table(tmp_get), rownames(tax_table(tmp_get))) colnames(tax_table(tmp_get)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") tmp_name <- purrr::map_chr(i, ~ paste0(., "_rf_all")) assign(tmp_name, tmp_get) tax_table(tmp_get) <- tax_table(tmp_get)[,c(7)] tmp_name_asv <- purrr::map_chr(i, ~ paste0(., "_rf_asv")) assign(tmp_name_asv, tmp_get) rm(list = ls(pattern = "tmp_")) } objects(pattern = "_rf") ``` 1. Choose a data set(s) & set the LEfSe cutoff values. ```{r} samp_ps <- c("ssu18_ps_work", "ssu18_ps_filt", "ssu18_ps_perfect", "ssu18_ps_pime") lefse_lda <- 2 lefse_kw <- 0.05 lefse_wc <- 0.05 ``` ```{r} #| warnings: false #| error: false #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_rf_asv"))) tmp_lefse <- run_lefse(tmp_get, norm = "CPM", class = "TEMP", correct = "2", lda_cutoff = lefse_lda, kw_cutoff = lefse_kw, wilcoxon_cutoff = lefse_wc, bootstrap_n = 30, bootstrap_fraction = 2/3, sample_min = 10, multicls_strat = FALSE, curv = FALSE) tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv")) assign(tmp_name, tmp_lefse) rm(list = ls(pattern = "tmp_")) } objects(pattern = "_lefse_asv") for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_rf_all"))) tmp_lefse <- run_lefse(tmp_get, norm = "CPM", class = "TEMP", correct = "2", lda_cutoff = lefse_lda, kw_cutoff = lefse_kw, wilcoxon_cutoff = lefse_wc, bootstrap_n = 30, bootstrap_fraction = 2/3, sample_min = 10, multicls_strat = FALSE, curv = FALSE) tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse_all")) assign(tmp_name, tmp_lefse) rm(list = ls(pattern = "tmp_")) } objects(pattern = "_lefse_asv") objects(pattern = "_lefse_all") ``` ```{r} #| echo: false # DONT RUN, TESTING ONLY #tmp_tab <- data.frame(marker_table(ssu18_ps_pime_lefse_asv)) #tmp_tab$feature <- gsub('s__', '', tmp_tab$feature) #tmp_ps <- data.frame(t(otu_table(ssu18_ps_pime_rf_asv))) #tmp_ps <- tmp_ps %>% tibble::rownames_to_column("feature") #write.table(dplyr::left_join(tmp_tab, tmp_ps, by = "feature"), "test.txt", quote = FALSE, sep = "\t") ``` ```{r} #| code-fold: true for (i in samp_ps) { tmp_o_tax <- data.frame(tax_table(get(i))) tmp_o_tax$ASV_ID <- NULL tmp_o_tax <- tmp_o_tax %>% tibble::rownames_to_column("ASV_ID") tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_asv"))) tmp_mt <- data.frame(marker_table(tmp_get)) tmp_mt$feature <- gsub('s__', '', tmp_mt$feature) tmp_mt <- tmp_mt %>% dplyr::rename(c("ASV_ID" = 1, "group" = 2, "pval" = 4)) %>% tibble::remove_rownames() tmp_sum <- dplyr::left_join(tmp_mt, tmp_o_tax, by = "ASV_ID") tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^0$", "C0") tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^3$", "W3") tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^8$", "W8") tmp_sum <- tmp_sum[order(as.numeric(gsub("[A-Z]{3}", "", tmp_sum$ASV_ID))),] tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_summary")) assign(tmp_res_name, tmp_sum) rm(list = ls(pattern = "tmp_")) } ``` Now we can save a few files and display the data. ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(i) tmp_df <- data.frame(t(otu_table(tmp_get))) #tmp_get[,1] <- NULL #tmp_df <- as.data.frame(t(tmp_otu)) tmp_col_names <- colnames(tmp_df) tmp_col_names <- tmp_col_names %>% stringr::str_replace("P[0-9]{2}_D[0-9]{2}_[0-9]{3}_", "") %>% stringr::str_replace("[A-Z]$", "") colnames(tmp_df) <- tmp_col_names tmp_df$freq <- apply(tmp_df > 0, 1, sum) tmp_df$freq_C0 <- apply(tmp_df[ , (names(tmp_df) %in% "C0")] > 0, 1, sum) tmp_df$freq_W3 <- apply(tmp_df[ , (names(tmp_df) %in% "W3")] > 0, 1, sum) tmp_df$freq_W8 <- apply(tmp_df[ , (names(tmp_df) %in% "W8")] > 0, 1, sum) tmp_df$reads_total <- base::rowSums(tmp_df[ , (names(tmp_df) %in% c("C0", "W3", "W8"))]) tmp_df$reads_C0 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "C0")]) tmp_df$reads_W3 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W3")]) tmp_df$reads_W8 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W8")]) tmp_df <- tmp_df[,!grepl("^[C | W]", names(tmp_df))] tmp_df <- tmp_df %>% tibble::rownames_to_column("ASV_ID") tmp_get_lefse <- get(purrr::map_chr(i, ~ paste0(., "_lefse_summary"))) tmp_merge_df <- merge(tmp_df, tmp_get_lefse, by = "ASV_ID", all = FALSE) tmp_merge_df <- tmp_merge_df[,c(1,10:12,2:9,14:20)] tmp_merge_df <- tmp_merge_df[order(tmp_merge_df$reads_total, decreasing = TRUE), ] tmp_merge_name <- purrr::map_chr(i, ~ paste0(., "_lefse_final")) assign(tmp_merge_name, tmp_merge_df) rm(list = ls(pattern = "tmp_")) } ``` Save a new phyloseq object ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(i) tmp_tab <- get(purrr::map_chr(i, ~ paste0(., "_lefse_summary"))) tmp_list <- tmp_tab[,1] tmp_ps <- prune_taxa(tmp_list, tmp_get) tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse")) assign(tmp_name, tmp_ps) tmp_ps@phy_tree <- NULL tmp_ps <- prune_samples(sample_sums(tmp_ps) > 0, tmp_ps) tmp_ps_tree <- rtree(ntaxa(tmp_ps), rooted = TRUE, tip.label = taxa_names(tmp_ps)) tmp_ps <- merge_phyloseq(tmp_ps, sample_data, tmp_ps_tree) assign(tmp_name, tmp_ps) rm(list = ls(pattern = "tmp_")) } objects() ``` ## LEfSe Summaries (ASVs & Markers) ### ASV summary data ::: {.column-body-outset} ::: {.panel-tabset} #### FULL ```{r} #| echo: false #| eval: true seq_table <- ssu18_ps_work_lefse_final seq_table %>% download_this( output_name = "ssu_lefse_results_full", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_ssu("ssu_lefse_full")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), ef_lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "ef_lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### Arbitrary filtered ```{r} #| echo: false #| eval: true seq_table <- ssu18_ps_filt_lefse_final seq_table %>% download_this( output_name = "ssu_lefse_results_arbitrary", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_ssu("ssu_lefse_filt")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### PERfect filtered ```{r} #| echo: false #| eval: true seq_table <- ssu18_ps_perfect_lefse_final seq_table %>% download_this( output_name = "ssu_lefse_results_perfect", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_ssu("ssu_lefse_perfect")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### PIME filtered ```{r} #| echo: false #| eval: true seq_table <- ssu18_ps_pime_lefse_final seq_table %>% download_this( output_name = "ssu_lefse_results_pime", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_ssu("ssu_lefse_pime")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` ::: ::: ### ASV Visualizations ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_asv"))) tmp_mt <- data.frame(marker_table(tmp_get)) tmp_mt <- tmp_mt %>% filter(ef_lda >= 3) tmp_mt <- marker_table(tmp_mt) tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv_viz_trim")) assign(tmp_res_name, tmp_mt) rm(list = ls(pattern = "tmp_")) } objects(pattern = "_lefse_asv_viz_trim") ``` ::: {.column-body} ::: {.panel-tabset} #### FULL ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(ssu18_ps_work_lefse_asv_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 8)) ``` `r caption_fig_ssu("ssu_lefse_asv_full")` #### Arbitrary filtered ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(ssu18_ps_filt_lefse_asv_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 8)) ``` `r caption_fig_ssu("ssu_lefse_asv_filt")` #### PERfect filtered ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(ssu18_ps_perfect_lefse_asv_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 6)) ``` `r caption_fig_ssu("ssu_lefse_asv_perfect")` #### PIME filtered ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(ssu18_ps_pime_lefse_asv_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 5)) ``` `r caption_fig_ssu("ssu_lefse_asv_pime")` ::: ::: ### Marker summary data ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_all"))) tmp_mt <- data.frame(marker_table(tmp_get)) tmp_mt <- tmp_mt[, c(2:4,1)] tmp_mt <- tmp_mt %>% dplyr::rename(c("group" = 1, "pval" = 3)) tmp_mt$group <- stringr::str_replace(tmp_mt$group, "^0$", "C0") tmp_mt$group <- stringr::str_replace(tmp_mt$group, "^3$", "W3") tmp_mt$group <- stringr::str_replace(tmp_mt$group, "^8$", "W8") tmp_mt <- tmp_mt %>% tibble::rownames_to_column("marker") tmp_mt <- tmp_mt %>% tidyr::separate(col = "feature", into = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "ASV_ID"), sep = "\\|\\w__") tmp_mt$Kingdom <- gsub('k__', '', tmp_mt$Kingdom) tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_marker_final")) assign(tmp_res_name, tmp_mt) rm(list = ls(pattern = "tmp_")) } objects(pattern = "_lefse_marker_final") ``` ::: {.column-body-outset} ::: {.panel-tabset} #### FULL ```{r} #| echo: false #| eval: true seq_table <- ssu18_ps_work_lefse_marker_final seq_table %>% download_this( output_name = "ssu_lefse_marker_results_full", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_ssu("ssu_lefse_marker_full")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( marker = colDef(name = "Marker", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), ef_lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_ID = colDef(name = "ASV ID", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "ef_lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### Arbitrary filtered ```{r} #| echo: false #| eval: true seq_table <- ssu18_ps_filt_lefse_marker_final seq_table %>% download_this( output_name = "ssu_lefse_marker_results_arbitrary", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_ssu("ssu_lefse_marker_filt")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( marker = colDef(name = "Marker", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_ID = colDef(name = "ASV ID", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### PERfect filtered ```{r} #| echo: false #| eval: true seq_table <- ssu18_ps_perfect_lefse_marker_final seq_table %>% download_this( output_name = "ssu_lefse_marker_results_perfect", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_ssu("ssu_lefse_marker_perfect")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( marker = colDef(name = "Marker", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_ID = colDef(name = "ASV ID", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### PIME filtered ```{r} #| echo: false #| eval: true seq_table <- ssu18_ps_pime_lefse_marker_final seq_table %>% download_this( output_name = "ssu_lefse_marker_results_pime", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_ssu("ssu_lefse_marker_pime")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( marker = colDef(name = "Marker", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_ID = colDef(name = "ASV ID", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` ::: ::: ### Marker visualizations ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_all"))) tmp_mt <- data.frame(marker_table(tmp_get)) tmp_mt <- tmp_mt %>% filter(ef_lda >= 4) tmp_mt <- marker_table(tmp_mt) tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_all_viz_trim")) assign(tmp_res_name, tmp_mt) rm(list = ls(pattern = "tmp_")) } objects(pattern = "_lefse_all_viz_trim") ``` ::: {.column-body} ::: {.panel-tabset} #### FULL ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(ssu18_ps_work_lefse_all_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 8)) ``` `r caption_fig_ssu("ssu_lefse_marker_full")` #### Arbitrary filtered ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(ssu18_ps_filt_lefse_all_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 8)) ``` `r caption_fig_ssu("ssu_lefse_marker_filt")` #### PERfect filtered ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(ssu18_ps_perfect_lefse_all_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 8)) ``` `r caption_fig_ssu("ssu_lefse_marker_perfect")` #### PIME filtered ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(ssu18_ps_pime_lefse_all_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 8)) ``` `r caption_fig_ssu("ssu_lefse_marker_pime")` ::: ::: ## Response of select taxa In this section of the workflow we use the [`microbiomeMarker`](https://github.com/yiluheihei/microbiomeMarker) package to assess the response of taxonomic lineages to soil warming. In the first step we need to fix the selected data set to make it compatible with the various functions. For this analysis we use the PERfect filtered data set. ```{r} #| echo: false #| eval: false #TEMP LOAD ONLY REMOVE WHEN WORKFLOW FINISHED remove(list = ls()) load("page_build/da_wf.rdata") gdata::keep("ssu18_ps_work_rf_all", "ssu18_ps_work_rf_asv", "ssu18_ps_filt_rf_all", "ssu18_ps_filt_rf_asv", "ssu18_ps_perfect_rf_all", "ssu18_ps_perfect_rf_asv", "ssu18_ps_pime_rf_all", "ssu18_ps_pime_rf_asv", sure = TRUE) objects() ``` ```{r} #| code-fold: true ## FIX ps object ssu_ps <- ssu18_ps_perfect_rf_all tmp_tax1 <- data.frame(tax_table(ssu_ps)) #tmp_tax1$ASV_SEQ <- NULL tmp_rn <- row.names(tmp_tax1) tmp_tax <- data.frame(lapply(tmp_tax1, function(x) {gsub("\\(|)", "", x)})) row.names(tmp_tax) <- tmp_rn identical(row.names(tmp_tax), row.names(tmp_tax1)) #dplyr::filter(tmp_tax, Phylum == "Acidobacteriota") ps_tax_new <- as.matrix(tmp_tax) tmp_ps <- phyloseq(otu_table(ssu_ps), phy_tree(ssu_ps), tax_table(ps_tax_new), sample_data(ssu_ps)) ssu_ps <- tmp_ps phyloseq::rank_names(ssu_ps) data.frame(tax_table(ssu_ps)) ``` Next we run a statistical test for multiple groups using the `run_test_multiple_groups` function. ```{r} #| code-fold: true ssu_group_anova <- run_test_multiple_groups(ssu_ps, group = "TEMP", taxa_rank = "all", method = "anova") ssu_group_anova@marker_table marker_table(ssu_group_anova) ``` ```{r} #| echo: false ## Loop through different norms #norm: #default = TSS #CSS = NONE = RLE = TMM #meth <- c("none", "rarefy", "TSS", "CLR") #for (i in meth) { # tmp_pht <- run_posthoc_test(ssu_ps, group = "TEMP", method = "tukey", norm = i, transform = "log10") # tmp_name <- purrr::map_chr(i, ~ paste0(., "_results")) # assign(tmp_name, tmp_pht) # rm(list = ls(pattern = "tmp_")) #} ## Filter results #filter(data.frame(CLR_results@result), group_name == "k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales") #filter(data.frame(CSS_results@result), group_name == "k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales") #filter(data.frame(none_results@result), group_name == "k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales") #filter(data.frame(rarefy_results@result), group_name == "k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales") #filter(data.frame(RLE_results@result), group_name == "k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales") #filter(data.frame(TMM_results@result), group_name == "k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales") ``` And then conduct post hoc pairwise comparisons for multiple groups test using the `run_posthoc_test` function. ```{r} #| code-fold: true ssu_default_pht <- run_posthoc_test(ssu_ps, group = "TEMP", method = "tukey", transform = "log10") ``` We can filter out a select taxa and plot the results. ```{r} #| echo: true #| eval: true filter(data.frame(ssu_default_pht@result), group_name == "k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales") plot_postHocTest(ssu_default_pht, feature = "k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales") & theme_bw() ``` But what we really want to do is get all of the markers that are significant from the analysis, excluding any significant ASVs so we can look at high taxa ranks. ```{r} #| code-fold: true #| ssu_pht <- ssu_default_pht ssu18_pht_filt <- filter(data.frame(ssu_pht@result), pvalue <= "0.05")[!grepl("ASV", filter(data.frame(ssu_pht@result), pvalue <= "0.05")$group_name),] ssu18_pht_filt <- ssu18_pht_filt[!grepl("[a-z]__$", ssu18_pht_filt$group_name),] ssu18_pht_filt <- distinct(ssu18_pht_filt, group_name, .keep_all = TRUE) nrow(ssu18_pht_filt) save.image("page_build/mmarker_wf.rdata") ``` There are **`r nrow(ssu18_pht_filt)`** significantly different lineage markers. <details markdown="1"> <summary>**Click here** to see a list of significantly different lineage markers.</summary> ```{r} #| echo: false #| eval: true seq_table <- filter(data.frame(ssu_pht@result), pvalue <= "0.05")[!grepl("ASV", filter(data.frame(ssu_pht@result), pvalue <= "0.05")$group_name),] seq_table <- seq_table[!grepl("[a-z]__$", seq_table$group_name),] seq_table$group <- NULL reactable(seq_table, rownames = FALSE, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( group_name = colDef(name = "Lineage", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 150), comparions = colDef(name = "comparions", align = "center"), diff_mean = colDef(name = "diff_mean", align = "center", filterable = FALSE, minWidth = 130), pvalue = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ci_lower = colDef(name = "ci_lower", align = "center", filterable = FALSE), ci_upper = colDef(name = "ci_upper", align = "center", filterable = FALSE) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` </details> ```{r} #| echo: false source("hack_code/plot_postHocTest_jjs.R") environment(plot_postHocTest_jjs) <- asNamespace('microbiomeMarker') ``` Now we select a subset of the `r nrow(ssu18_pht_filt)` markers to plot and visualize the results. ```{r} #| code-fold: true ssu_select <- c( "k__Bacteria|p__Acidobacteriota|c__Acidobacteriae|o__Subgroup_2", "k__Bacteria|p__Bacteroidota|c__Bacteroidia|o__Chitinophagales|f__Saprospiraceae", "k__Bacteria|p__Bacteroidota|c__Bacteroidia|o__Cytophagales", "k__Bacteria|p__Bacteroidota|c__Bacteroidia|o__Flavobacteriales", "k__Bacteria|p__Bacteroidota|c__Bacteroidia|o__Sphingobacteriales", "k__Bacteria|p__Myxococcota|c__Polyangia|o__mle1-27", "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Burkholderiales|f__Comamonadaceae|g__Rubrivivax", "k__Bacteria|p__Actinobacteriota|c__Acidimicrobiia|o__Microtrichales", "k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales", "k__Bacteria|p__Firmicutes|c__Bacilli|o__Bacillales", "k__Bacteria|p__Myxococcota|c__Myxococcia|o__Myxococcales|f__Myxococcaceae|g__Corallococcus", "k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Burkholderiales|f__Burkholderiaceae|g__Ralstonia" ) ``` ```{r} #| echo: false swel_col <- c("#2271B2", "#71B222", "#B22271") for (i in ssu_select) { tmp_select_feat <- i tmp_plot <- plot_postHocTest_jjs(ssu_pht, feature = tmp_select_feat) & theme_bw() tmp_plot <- tmp_plot + geom_boxplot(fill = swel_col) + scale_colour_manual(values = c("#191919", "#191919", "#191919")) + geom_point(size = 2, show.legend = FALSE) + ylab("Relative abundance (% total reads)") tmp_name <- purrr::map_chr(i, ~ paste0(., "_tax_plot")) #tmp_name <- purrr::map_chr(gsub(pattern = "^.*[a-z]\\|", x = i, replacement = ""), ~ paste0(., "_tax_plot")) assign(tmp_name, tmp_plot) ggplot2::ggsave(tmp_plot, file = paste0("files/da/figures/", i,".png", sep = ""), height = 7, width = 9, units = 'cm', dpi = 600, bg = "white") ggplot2::ggsave(tmp_plot, file = paste0("files/da/figures/", i,".pdf", sep = ""), height = 5, width = 6.5) rm(list = ls(pattern = "tmp_")) } ``` ```{r} #| echo: false ssu_title <- c( "Subgroup_2 (Acidobacteriota)", "Saprospiraceae (Bacteroidota)", "Cytophagales (Bacteroidota)", "Flavobacteriales (Bacteroidota)", "Sphingobacteriales (Bacteroidota)", "mle1-27 (Myxococcota)", "Rubrivivax (Proteobacteria)", "Microtrichales (Actinobacteriota)", "Gaiellales (Actinobacteriota)", "Bacillales (Firmicutes)", "Corallococcus (Myxococcota)", "Ralstonia (Proteobacteria)" ) ``` ```{r} #| echo: false tmp_1 <- get(purrr::map_chr(ssu_select[1], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(ssu_title[1]) tmp_2 <- get(purrr::map_chr(ssu_select[2], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(ssu_title[2]) tmp_3 <- get(purrr::map_chr(ssu_select[3], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(ssu_title[3]) tmp_4 <- get(purrr::map_chr(ssu_select[4], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(ssu_title[4]) tmp_5 <- get(purrr::map_chr(ssu_select[5], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(ssu_title[5]) tmp_6 <- get(purrr::map_chr(ssu_select[6], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(ssu_title[6]) tmp_7 <- get(purrr::map_chr(ssu_select[7], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(ssu_title[7]) tmp_8 <- get(purrr::map_chr(ssu_select[8], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(ssu_title[8]) tmp_9 <- get(purrr::map_chr(ssu_select[9], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(ssu_title[9]) tmp_10 <- get(purrr::map_chr(ssu_select[10], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(ssu_title[10]) tmp_11 <- get(purrr::map_chr(ssu_select[11], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(ssu_title[11]) tmp_12 <- get(purrr::map_chr(ssu_select[12], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(ssu_title[12]) ssu_taxa_combo_plot <- ((tmp_1 + tmp_2 + tmp_3) / (tmp_4 + tmp_5 + tmp_6) / (tmp_7 + tmp_8 + tmp_9) / (tmp_10 + tmp_11 + tmp_12)) ssu_taxa_combo_plot ``` ```{r} #| echo: false ggplot2::ggsave(ssu_taxa_combo_plot, path = "files/da/figures/combo_plots", filename = "ssu_taxa_marker.png", height = 32, width = 36, units = 'cm', dpi = 600, bg = "white") ggplot2::ggsave(ssu_taxa_combo_plot, path = "files/da/figures/combo_plots", filename = "ssu_taxa_marker.pdf", height = 12, width = 12) ``` ```{r} #| echo: false system("cp files/da/figures/combo_plots/ssu_taxa_marker.png include/da/ssu_taxa_marker.png") ``` ```{r} #| echo: false #| eval: true #| warning: false #| column: body-outset knitr::include_graphics("include/da/ssu_taxa_marker.png") ``` `r caption_fig_ssu("ssu_taxa_marker_plot")` ```{r} #| echo: false save.image("page_build/mmarker_wf.rdata") ``` # ITS ```{r} #| include: false ## Initial Load for ANALYSIS #1 remove(list = ls()) its18_ps_work <- readRDS("files/data-prep/rdata/its18_ps_work.rds") its18_ps_filt <- readRDS("files/filtering/arbitrary/rdata/its18_ps_filt.rds") its18_ps_perfect <- readRDS("files/filtering/perfect/rdata/its18_ps_perfect.rds") its18_ps_pime <- readRDS("files/filtering/pime/rdata/its18_ps_pime.rds") ``` ```{r} #| echo: false samp_ps <- c("its18_ps_work", "its18_ps_filt", "its18_ps_perfect", "its18_ps_pime") for (i in samp_ps) { tmp_get <- get(i) tmp_path <- file.path("files/alpha/rdata/") tmp_read <- readRDS(paste(tmp_path, i, ".rds", sep = "")) tmp_sam_data <- sample_data(tmp_read) tmp_tax_data <- data.frame(tax_table(tmp_get)) #tmp_tax_data[,c(1:6)] tmp_tax_data$ASV_ID <- NULL # Some have, some do not tmp_tax_data[] <- data.frame(lapply(tmp_tax_data, gsub, pattern = "^[k | p | c | o | f]_.*", replacement = NA, fixed = FALSE)) tmp_tax_data <- as.matrix(tmp_tax_data) tmp_ps <- phyloseq(otu_table(tmp_get), tax_table(tmp_tax_data), tmp_sam_data) assign(i, tmp_ps) rm(list = ls(pattern = "tmp_")) } objects() ``` ```{r} #| echo: false swel_col <- c("#2271B2", "#71B222", "#B22271") ``` ## Indicator Analysis 1. Choose a data set(s) & set the p-value cutoff. ```{r} samp_ps <- c("its18_ps_work", "its18_ps_filt", "its18_ps_perfect", "its18_ps_pime") indval_pval <- 0.05 ``` 2. Format Data Frame ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(i) tmp_df <- data.frame(otu_table(tmp_get)) tmp_df <- tmp_df[, which(colSums(tmp_df) != 0)] tmp_row_names <- row.names(tmp_df) tmp_row_names <- tmp_row_names %>% stringr::str_replace("P[0-9]{2}_D[0-9]{2}_[0-9]{3}_", "") %>% stringr::str_replace("[A-Z]$", "") tmp_df <- tmp_df %>% tibble::add_column(tmp_row_names, .before = 1) tmp_name <- purrr::map_chr(i, ~ paste0(., "_seq_tab")) assign(tmp_name, tmp_df) rm(list = ls(pattern = "tmp_")) } objects() ``` 3. Run Indicator Analysis ```{r} #| code-fold: true set.seed(1191) for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_seq_tab"))) tmp_iva <- indval(tmp_get[,-1], tmp_get[,1]) tmp_gr <- tmp_iva$maxcls[tmp_iva$pval <= indval_pval] tmp_iv <- tmp_iva$indcls[tmp_iva$pval <= indval_pval] tmp_pv <- tmp_iva$pval[tmp_iva$pval <= indval_pval] tmp_fr <- apply(tmp_get[,-1] > 0, 2, sum)[tmp_iva$pval <= indval_pval] tmp_sum <- data.frame(group = tmp_gr, indval = tmp_iv, pval = tmp_pv, freq = tmp_fr) tmp_sum <- tmp_sum[order(tmp_sum$group, -tmp_sum$indval),] tmp_tax_df <- data.frame(tax_table(get(i))) tmp_tax_df$ASV_ID <- NULL tmp_sum_tax <- merge(tmp_sum, tmp_tax_df, by = "row.names", all = TRUE) tmp_sum_tax <- tmp_sum_tax[!(is.na(tmp_sum_tax$group)),] class(tmp_sum_tax$group) <- "character" tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^1$", "C0") tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^2$", "W3") tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^3$", "W8") tmp_sum_tax <- tmp_sum_tax %>% dplyr::rename("ASV_ID" = "Row.names") tmp_sum_tax <- tmp_sum_tax[order(as.numeric(gsub("[A-Z]{3}", "", tmp_sum_tax$ASV_ID))),] tmp_sum_tax$ASV_ID <- as.character(tmp_sum_tax$ASV_ID) tmp_sum.prob.corrected <- p.adjust(tmp_sum$pval, "bonferroni") tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_indval_summary")) assign(tmp_res_name, tmp_sum_tax) rm(list = ls(pattern = "tmp_")) } ``` Now we can save a few files and display the data. ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_seq_tab"))) tmp_get[,1] <- NULL tmp_df <- as.data.frame(t(tmp_get)) tmp_col_names <- colnames(tmp_df) tmp_col_names <- tmp_col_names %>% stringr::str_replace("P[0-9]{2}_D[0-9]{2}_[0-9]{3}_", "") %>% stringr::str_replace("[A-Z]$", "") colnames(tmp_df) <- tmp_col_names #tmp_df$freq_all <- apply(tmp_df > 0, 1, sum) tmp_df$freq_C0 <- apply(tmp_df[ , (names(tmp_df) %in% "C0")] > 0, 1, sum) tmp_df$freq_W3 <- apply(tmp_df[ , (names(tmp_df) %in% "W3")] > 0, 1, sum) tmp_df$freq_W8 <- apply(tmp_df[ , (names(tmp_df) %in% "W8")] > 0, 1, sum) tmp_df$reads_total <- base::rowSums(tmp_df[ , (names(tmp_df) %in% c("C0", "W3", "W8"))]) tmp_df$reads_C0 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "C0")]) tmp_df$reads_W3 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W3")]) tmp_df$reads_W8 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W8")]) tmp_df <- tmp_df[,!grepl("^[C | W]", names(tmp_df))] tmp_df <- tmp_df %>% tibble::rownames_to_column("ASV_ID") tmp_get_indval <- get(purrr::map_chr(i, ~ paste0(., "_indval_summary"))) tmp_merge_df <- merge(tmp_df, tmp_get_indval, by = "ASV_ID", all = FALSE) tmp_merge_df <- tmp_merge_df[,c(1,9:12,2:8,13:19)] tmp_merge_df <- tmp_merge_df[order(tmp_merge_df$reads_total, decreasing = TRUE), ] tmp_merge_name <- purrr::map_chr(i, ~ paste0(., "_indval_final")) assign(tmp_merge_name, tmp_merge_df) rm(list = ls(pattern = "tmp_")) } ``` 4. Save a new phyloseq object ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(i) tmp_tab <- get(purrr::map_chr(i, ~ paste0(., "_indval_summary"))) tmp_list <- tmp_tab[,1] tmp_ps <- prune_taxa(tmp_list, tmp_get) tmp_name <- purrr::map_chr(i, ~ paste0(., "_ind")) assign(tmp_name, tmp_ps) tmp_ps@phy_tree <- NULL tmp_ps <- prune_samples(sample_sums(tmp_ps) > 0, tmp_ps) tmp_ps_tree <- rtree(ntaxa(tmp_ps), rooted = TRUE, tip.label = taxa_names(tmp_ps)) tmp_ps <- merge_phyloseq(tmp_ps, sample_data, tmp_ps_tree) assign(tmp_name, tmp_ps) rm(list = ls(pattern = "tmp_")) } objects() ``` ## Indicator Summary ::: {.column-body-outset} ::: {.panel-tabset} #### FULL ```{r} #| echo: false #| eval: true seq_table <- its18_ps_work_indval_final seq_table %>% download_this( output_name = "its_isa_results_full", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_its("its_isa_full")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), indval = colDef(name = "indval score", align = "center", filterable = FALSE, minWidth = 130), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "Indicator analysis results", sticky = "left", columns = c("group", "indval", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### Arbitrary filtered ```{r} #| echo: false #| eval: true seq_table <- its18_ps_filt_indval_final seq_table %>% download_this( output_name = "its_isa_results_arbitrary", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_its("its_isa_filt")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), indval = colDef(name = "indval score", align = "center", filterable = FALSE, minWidth = 130), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "Indicator analysis results", sticky = "left", columns = c("group", "indval", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### PERfect filtered ```{r} #| echo: false #| eval: true seq_table <- its18_ps_perfect_indval_final seq_table %>% download_this( output_name = "its_isa_results_perfect", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_its("its_isa_perfect")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), indval = colDef(name = "indval score", align = "center", filterable = FALSE, minWidth = 130), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "Indicator analysis results", sticky = "left", columns = c("group", "indval", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### PIME filtered ```{r} #| echo: false #| eval: true seq_table <- its18_ps_pime_indval_final seq_table %>% download_this( output_name = "its_isa_results_pime", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_its("its_isa_pime")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), indval = colDef(name = "indval score", align = "center", filterable = FALSE, minWidth = 130), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "Indicator analysis results", sticky = "left", columns = c("group", "indval", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` ::: ::: ## LEfSe Analysis Details about LEFSE [microbiomeMarker](https://github.com/yiluheihei/microbiomeMarker) Remove ASV seq, subset only ASVs ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(i) tax_table(tmp_get) <- tax_table(tmp_get)[,c(1:6)] tax_table(tmp_get) <- cbind(tax_table(tmp_get), rownames(tax_table(tmp_get))) colnames(tax_table(tmp_get)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") tmp_name <- purrr::map_chr(i, ~ paste0(., "_rf_all")) assign(tmp_name, tmp_get) tax_table(tmp_get) <- tax_table(tmp_get)[,c(7)] tmp_name_asv <- purrr::map_chr(i, ~ paste0(., "_rf_asv")) assign(tmp_name_asv, tmp_get) rm(list = ls(pattern = "tmp_")) } objects(pattern = "_rf") ``` 1. Choose a data set(s) & set the LEfSe cutoff values. ```{r} samp_ps <- c("its18_ps_work", "its18_ps_filt", "its18_ps_perfect", "its18_ps_pime") lefse_lda <- 2 lefse_kw <- 0.05 lefse_wc <- 0.05 ``` ```{r} #| warnings: false #| error: false #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_rf_asv"))) tmp_lefse <- run_lefse(tmp_get, norm = "CPM", class = "TEMP", correct = "2", lda_cutoff = lefse_lda, kw_cutoff = lefse_kw, wilcoxon_cutoff = lefse_wc, bootstrap_n = 30, bootstrap_fraction = 2/3, sample_min = 10, multicls_strat = FALSE, curv = FALSE) tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv")) assign(tmp_name, tmp_lefse) rm(list = ls(pattern = "tmp_")) } objects(pattern = "_lefse_asv") for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_rf_all"))) tmp_lefse <- run_lefse(tmp_get, norm = "CPM", class = "TEMP", correct = "2", lda_cutoff = lefse_lda, kw_cutoff = lefse_kw, wilcoxon_cutoff = lefse_wc, bootstrap_n = 30, bootstrap_fraction = 2/3, sample_min = 10, multicls_strat = FALSE, curv = FALSE) tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse_all")) assign(tmp_name, tmp_lefse) rm(list = ls(pattern = "tmp_")) } objects(pattern = "_lefse_asv") objects(pattern = "_lefse_all") ``` ```{r} #| echo: false # DONT RUN, TESTING ONLY #tmp_tab <- data.frame(marker_table(its18_ps_pime_lefse_asv)) #tmp_tab$feature <- gsub('s__', '', tmp_tab$feature) #tmp_ps <- data.frame(t(otu_table(its18_ps_pime_rf_asv))) #tmp_ps <- tmp_ps %>% tibble::rownames_to_column("feature") #write.table(dplyr::left_join(tmp_tab, tmp_ps, by = "feature"), "test.txt", quote = FALSE, sep = "\t") ``` ```{r} #| code-fold: true for (i in samp_ps) { tmp_o_tax <- data.frame(tax_table(get(i))) tmp_o_tax$ASV_ID <- NULL tmp_o_tax <- tmp_o_tax %>% tibble::rownames_to_column("ASV_ID") tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_asv"))) tmp_mt <- data.frame(marker_table(tmp_get)) tmp_mt$feature <- gsub('s__', '', tmp_mt$feature) tmp_mt <- tmp_mt %>% dplyr::rename(c("ASV_ID" = 1, "group" = 2, "pval" = 4)) %>% tibble::remove_rownames() tmp_sum <- dplyr::left_join(tmp_mt, tmp_o_tax, by = "ASV_ID") tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^0$", "C0") tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^3$", "W3") tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^8$", "W8") tmp_sum <- tmp_sum[order(as.numeric(gsub("[A-Z]{3}", "", tmp_sum$ASV_ID))),] tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_summary")) assign(tmp_res_name, tmp_sum) rm(list = ls(pattern = "tmp_")) } ``` Now we can save a few files and display the data. ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(i) tmp_df <- data.frame(t(otu_table(tmp_get))) #tmp_get[,1] <- NULL #tmp_df <- as.data.frame(t(tmp_otu)) tmp_col_names <- colnames(tmp_df) tmp_col_names <- tmp_col_names %>% stringr::str_replace("P[0-9]{2}_D[0-9]{2}_[0-9]{3}_", "") %>% stringr::str_replace("[A-Z]$", "") colnames(tmp_df) <- tmp_col_names tmp_df$freq <- apply(tmp_df > 0, 1, sum) tmp_df$freq_C0 <- apply(tmp_df[ , (names(tmp_df) %in% "C0")] > 0, 1, sum) tmp_df$freq_W3 <- apply(tmp_df[ , (names(tmp_df) %in% "W3")] > 0, 1, sum) tmp_df$freq_W8 <- apply(tmp_df[ , (names(tmp_df) %in% "W8")] > 0, 1, sum) tmp_df$reads_total <- base::rowSums(tmp_df[ , (names(tmp_df) %in% c("C0", "W3", "W8"))]) tmp_df$reads_C0 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "C0")]) tmp_df$reads_W3 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W3")]) tmp_df$reads_W8 <- base::rowSums(tmp_df[ , (names(tmp_df) %in% "W8")]) tmp_df <- tmp_df[,!grepl("^[C | W]", names(tmp_df))] tmp_df <- tmp_df %>% tibble::rownames_to_column("ASV_ID") tmp_get_lefse <- get(purrr::map_chr(i, ~ paste0(., "_lefse_summary"))) tmp_merge_df <- merge(tmp_df, tmp_get_lefse, by = "ASV_ID", all = FALSE) tmp_merge_df <- tmp_merge_df[,c(1,10:12,2:9,14:20)] tmp_merge_df <- tmp_merge_df[order(tmp_merge_df$reads_total, decreasing = TRUE), ] tmp_merge_name <- purrr::map_chr(i, ~ paste0(., "_lefse_final")) assign(tmp_merge_name, tmp_merge_df) rm(list = ls(pattern = "tmp_")) } ``` Save a new phyloseq object ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(i) tmp_tab <- get(purrr::map_chr(i, ~ paste0(., "_lefse_summary"))) tmp_list <- tmp_tab[,1] tmp_ps <- prune_taxa(tmp_list, tmp_get) tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse")) assign(tmp_name, tmp_ps) tmp_ps@phy_tree <- NULL tmp_ps <- prune_samples(sample_sums(tmp_ps) > 0, tmp_ps) tmp_ps_tree <- rtree(ntaxa(tmp_ps), rooted = TRUE, tip.label = taxa_names(tmp_ps)) tmp_ps <- merge_phyloseq(tmp_ps, sample_data, tmp_ps_tree) assign(tmp_name, tmp_ps) rm(list = ls(pattern = "tmp_")) } objects() ``` ## LEfSe Summaries (ASVs & Markers) ### ASV summary data ::: {.column-body-outset} ::: {.panel-tabset} #### FULL ```{r} #| echo: false #| eval: true seq_table <- its18_ps_work_lefse_final seq_table %>% download_this( output_name = "its_lefse_results_full", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_its("its_lefse_full")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), ef_lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "ef_lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### Arbitrary filtered ```{r} #| echo: false #| eval: true seq_table <- its18_ps_filt_lefse_final seq_table %>% download_this( output_name = "its_lefse_results_arbitrary", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_its("its_lefse_filt")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### PERfect filtered ```{r} #| echo: false #| eval: true seq_table <- its18_ps_perfect_lefse_final seq_table %>% download_this( output_name = "its_lefse_results_perfect", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_its("its_lefse_perfect")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### PIME filtered ```{r} #| echo: false #| eval: true seq_table <- its18_ps_pime_lefse_final seq_table %>% download_this( output_name = "its_lefse_results_pime", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_its("its_lefse_pime")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( ASV_ID = colDef(name = "ASV_ID", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), freq = colDef(name = "all samples", align = "center", filterable = FALSE), freq_C0 = colDef(name = "control", align = "center", filterable = FALSE), freq_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), freq_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), reads_total = colDef(name = "all", align = "center", filterable = FALSE), reads_C0 = colDef(name = "control", align = "center", filterable = FALSE), reads_W3 = colDef(name = "+3°C", align = "center", filterable = FALSE), reads_W8 = colDef(name = "+8°C", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_SEQ = colDef(name = "ASV sequence", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Frequency", columns = c("freq", "freq_C0", "freq_W3", "freq_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Read totals", columns = c("reads_total", "reads_C0", "reads_W3", "reads_W8"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` ::: ::: ### ASV Visualizations ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_asv"))) tmp_mt <- data.frame(marker_table(tmp_get)) tmp_mt <- tmp_mt %>% filter(ef_lda >= 3) tmp_mt <- marker_table(tmp_mt) tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv_viz_trim")) assign(tmp_res_name, tmp_mt) rm(list = ls(pattern = "tmp_")) } objects(pattern = "_lefse_asv_viz_trim") ``` ::: {.column-body} ::: {.panel-tabset} #### FULL ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(its18_ps_work_lefse_asv_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 8)) ``` `r caption_fig_its("its_lefse_asv_full")` #### Arbitrary filtered ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(its18_ps_filt_lefse_asv_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 8)) ``` `r caption_fig_its("its_lefse_asv_filt")` #### PERfect filtered ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(its18_ps_perfect_lefse_asv_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 6)) ``` `r caption_fig_its("its_lefse_asv_perfect")` #### PIME filtered ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(its18_ps_pime_lefse_asv_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 5)) ``` `r caption_fig_its("its_lefse_asv_pime")` ::: ::: ### Marker summary data ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_all"))) tmp_mt <- data.frame(marker_table(tmp_get)) tmp_mt <- tmp_mt[, c(2:4,1)] tmp_mt <- tmp_mt %>% dplyr::rename(c("group" = 1, "pval" = 3)) tmp_mt$group <- stringr::str_replace(tmp_mt$group, "^0$", "C0") tmp_mt$group <- stringr::str_replace(tmp_mt$group, "^3$", "W3") tmp_mt$group <- stringr::str_replace(tmp_mt$group, "^8$", "W8") tmp_mt <- tmp_mt %>% tibble::rownames_to_column("marker") tmp_mt <- tmp_mt %>% tidyr::separate(col = "feature", into = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "ASV_ID"), sep = "\\|\\w__") tmp_mt$Kingdom <- gsub('k__', '', tmp_mt$Kingdom) tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_marker_final")) assign(tmp_res_name, tmp_mt) rm(list = ls(pattern = "tmp_")) } ``` ::: {.column-body-outset} ::: {.panel-tabset} #### FULL ```{r} #| echo: false #| eval: true seq_table <- its18_ps_work_lefse_marker_final seq_table %>% download_this( output_name = "its_lefse_marker_results_full", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_its("its_lefse_marker_full")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( marker = colDef(name = "Marker", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), ef_lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_ID = colDef(name = "ASV ID", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "ef_lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### Arbitrary filtered ```{r} #| echo: false #| eval: true seq_table <- its18_ps_filt_lefse_marker_final seq_table %>% download_this( output_name = "its_lefse_marker_results_arbitrary", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_its("its_lefse_marker_filt")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( marker = colDef(name = "Marker", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_ID = colDef(name = "ASV ID", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### PERfect filtered ```{r} #| echo: false #| eval: true seq_table <- its18_ps_perfect_lefse_marker_final seq_table %>% download_this( output_name = "its_lefse_marker_results_perfect", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_its("its_lefse_marker_perfect")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( marker = colDef(name = "Marker", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_ID = colDef(name = "ASV ID", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` #### PIME filtered ```{r} #| echo: false #| eval: true seq_table <- its18_ps_pime_lefse_marker_final seq_table %>% download_this( output_name = "its_lefse_marker_results_pime", output_extension = ".csv", button_label = "Download data as csv file", button_type = "default", csv2 = FALSE, has_icon = TRUE, icon = "fa fa-save") ``` `r caption_tab_its("its_lefse_marker_pime")` ```{r} #| echo: false #| eval: true reactable(seq_table, defaultColDef = colDef(headerStyle = list(fontSize = "0.8em"), header = function(value) gsub("_", " ", value, fixed = TRUE), cell = function(value) format(value, nsmall = 0), align = "left", filterable = TRUE, sortable = TRUE, resizable = TRUE, footerStyle = list(fontWeight = "bold") ), columns = list( marker = colDef(name = "Marker", sticky = "left", style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em"), align = "left", minWidth = 100), group = colDef(name = "enriched", align = "center"), lda = colDef(name = "LDA score", align = "center", filterable = FALSE, minWidth = 100), pval = colDef(name = "p-value", align = "center", filterable = FALSE, style = list(borderRight = "5px solid #eee"), headerStyle = list(borderRight = "5px solid #eee", fontSize = "0.8em")), ASV_ID = colDef(name = "ASV ID", filterable = FALSE) ), columnGroups = list( colGroup(name = "LEfSe results", sticky = "left", columns = c("group", "lda", "pval"), headerStyle = list(fontSize = "1.2em", borderRight = "5px solid #eee")), colGroup(name = "Lineage", align = "left", columns = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus")) ), searchable = TRUE, defaultPageSize = 5, pageSizeOptions = c(5, 10, nrow(seq_table)), showPageSizeOptions = TRUE, highlight = TRUE, bordered = TRUE, striped = TRUE, compact = FALSE, wrap = FALSE, showSortable = TRUE, fullWidth = TRUE, theme = reactableTheme(style = list(fontSize = "0.8em"))) rm(seq_table) ``` ::: ::: ### Marker visualizations ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_all"))) tmp_mt <- data.frame(marker_table(tmp_get)) tmp_mt <- tmp_mt %>% filter(ef_lda >= 3.5) tmp_mt <- marker_table(tmp_mt) tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_all_viz_trim")) assign(tmp_res_name, tmp_mt) rm(list = ls(pattern = "tmp_")) } objects(pattern = "_lefse_all_viz_trim") ``` ::: {.column-body} ::: {.panel-tabset} #### FULL ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(its18_ps_work_lefse_all_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 8)) ``` `r caption_fig_its("its_lefse_marker_full")` #### Arbitrary filtered ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(its18_ps_filt_lefse_all_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 8)) ``` `r caption_fig_its("its_lefse_marker_filt")` #### PERfect filtered ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(its18_ps_perfect_lefse_all_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 8)) ``` `r caption_fig_its("its_lefse_marker_perfect")` #### PIME filtered ```{r} #| echo: false #| eval: true #| warning: false plot_ef_bar(its18_ps_pime_lefse_all_viz_trim) + scale_fill_manual(values = c("0" = "#2271B2", "3" = "#71B222", "8" = "#B22271")) + theme(axis.text = element_text(size = 8)) ``` `r caption_fig_its("its_lefse_marker_pime")` ::: ::: ## Response of select taxa The workflow here is basically a carbon copy of the workflow describe above. Presented here for posterity. <details markdown="1"> <summary>**Click here** to see the ITS version of this workflow.</summary> ```{r} #| echo: false #| eval: false #TEMP LOAD ONLY REMOVE WHEN WORKFLOW FINISHED remove(list = ls()) load("page_build/da_wf.rdata") gdata::keep("its18_ps_work_rf_all", "its18_ps_work_rf_asv", "its18_ps_filt_rf_all", "its18_ps_filt_rf_asv", "its18_ps_perfect_rf_all", "its18_ps_perfect_rf_asv", "its18_ps_pime_rf_all", "its18_ps_pime_rf_asv", sure = TRUE) objects() ``` ```{r} ## FIX ps object ps <- its18_ps_perfect_rf_all tmp_tax1 <- data.frame(tax_table(ps)) tmp_tax1$ASV_SEQ <- NULL tmp_rn <- row.names(tmp_tax1) tmp_tax <- data.frame(lapply(tmp_tax1, function(x) {gsub("\\(|)", "", x)})) row.names(tmp_tax) <- tmp_rn identical(row.names(tmp_tax), row.names(tmp_tax1)) #dplyr::filter(tmp_tax, Phylum == "Acidobacteriota") ps_tax_new <- as.matrix(tmp_tax) tmp_ps <- phyloseq(otu_table(ps), tax_table(ps_tax_new), sample_data(ps)) its_ps <- tmp_ps phyloseq::rank_names(its_ps) data.frame(tax_table(its_ps)) ``` ```{r} its_group_anova <- run_test_multiple_groups(its_ps, group = "TEMP", taxa_rank = "all", method = "anova") its_group_anova@marker_table marker_table(its_group_anova) ``` ```{r} its_default_pht <- run_posthoc_test(its_ps, group = "TEMP", method = "tukey", transform = "log10") ``` ```{r} #| echo: false #filter(data.frame(its_default_pht@result), group_name == "k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales") #plot_postHocTest(its_default_pht, feature = "k__Bacteria|p__Actinobacteriota|c__Thermoleophilia|o__Gaiellales") & theme_bw() ``` ```{r} its_pht <- its_default_pht its18_pht_filt <- filter(data.frame(its_pht@result), pvalue <= "0.05")[!grepl("ASV", filter(data.frame(its_pht@result), pvalue <= "0.05")$group_name),] its18_pht_filt <- its18_pht_filt[!grepl("[a-z]__$", its18_pht_filt$group_name),] its18_pht_filt <- unique(its18_pht_filt$group_name) length(its18_pht_filt) ``` There are **`r length(its18_pht_filt)`** significantly different lineage markers. <details markdown="1"> <summary>**Click here** to see a list of significantly different lineage markers.</summary> ```{r} #| echo: false #| eval: true its18_pht_filt ``` </details> ```{r} #| echo: false source("hack_code/plot_postHocTest_jjs.R") environment(plot_postHocTest_jjs) <- asNamespace('microbiomeMarker') ``` ```{r} #| echo: false its_select <- c( "k__Fungi|p__Ascomycota|c__Sordariomycetes|o__Xylariales|f__Microdochiaceae", "k__Fungi|p__Basidiomycota|c__Agaricomycetes|o__Agaricales|f__Entolomataceae", "k__Fungi|p__Basidiomycota|c__Agaricomycetes|o__Agaricales|f__Clavariaceae", "k__Fungi|p__Basidiomycota|c__Agaricomycetes|o__Agaricales", "k__Fungi|p__Basidiomycota|c__Microbotryomycetes|o__Sporidiobolales", "k__Fungi|p__Rozellomycota|c__Rozellomycotina_cls_Incertae_sedis", "k__Fungi|p__Ascomycota|c__Eurotiomycetes|o__Eurotiales|f__Trichocomaceae|g__Talaromyces", "k__Fungi|p__Ascomycota|c__Pezizomycetes|o__Pezizales|f__Pyronemataceae", "k__Fungi|p__Ascomycota|c__Sordariomycetes|o__Hypocreales|f__Nectriaceae|g__Fusarium", "k__Fungi|p__Ascomycota|c__Saccharomycetes|o__Saccharomycetales|f__Metschnikowiaceae", "k__Fungi|p__Glomeromycota|c__Glomeromycetes|o__Glomerales", "k__Fungi|p__Mortierellomycota|c__Mortierellomycetes|o__Mortierellales" ) ``` ```{r} #| echo: false swel_col <- c("#2271B2", "#71B222", "#B22271") for (i in its_select) { tmp_select_feat <- i tmp_plot <- plot_postHocTest_jjs(its_pht, feature = tmp_select_feat) & theme_bw() tmp_plot <- tmp_plot + geom_boxplot(fill = swel_col) + scale_colour_manual(values = c("#191919", "#191919", "#191919")) + geom_point(size = 2, show.legend = FALSE) + ylab("Relative abundance (% total reads)") tmp_name <- purrr::map_chr(i, ~ paste0(., "_tax_plot")) #tmp_name <- purrr::map_chr(gsub(pattern = "^.*[a-z]\\|", x = i, replacement = ""), ~ paste0(., "_tax_plot")) assign(tmp_name, tmp_plot) ggplot2::ggsave(tmp_plot, file = paste0("files/da/figures/", i,".png", sep = ""), height = 7, width = 9, units = 'cm', dpi = 600, bg = "white") ggplot2::ggsave(tmp_plot, file = paste0("files/da/figures/", i,".pdf", sep = ""), height = 5, width = 6.5) rm(list = ls(pattern = "tmp_")) } ``` ```{r} #| echo: false its_title <- c( "Microdochiaceae (Ascomycota)", "Entolomataceae (Basidiomycota)", "Clavariaceae (Basidiomycota)", "Agaricales (Basidiomycota)", "Sporidiobolales (Basidiomycota)", "Rozellomycotina (Rozellomycota)", "Talaromyces (Ascomycota)", "Pyronemataceae (Ascomycota)", "Fusarium (Ascomycota)", "Metschnikowiaceae (Ascomycota)", "Glomerales (Glomeromycota)", "Mortierellales (Mortierellomycota)" ) ``` ```{r} #| echo: false tmp_1 <- get(purrr::map_chr(its_select[1], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(its_title[1]) tmp_2 <- get(purrr::map_chr(its_select[2], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(its_title[2]) tmp_3 <- get(purrr::map_chr(its_select[3], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(its_title[3]) tmp_4 <- get(purrr::map_chr(its_select[4], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(its_title[4]) tmp_5 <- get(purrr::map_chr(its_select[5], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(its_title[5]) tmp_6 <- get(purrr::map_chr(its_select[6], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(its_title[6]) tmp_7 <- get(purrr::map_chr(its_select[7], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(its_title[7]) tmp_8 <- get(purrr::map_chr(its_select[8], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(its_title[8]) tmp_9 <- get(purrr::map_chr(its_select[9], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(its_title[9]) tmp_10 <- get(purrr::map_chr(its_select[10], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(its_title[10]) tmp_11 <- get(purrr::map_chr(its_select[11], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(its_title[11]) tmp_12 <- get(purrr::map_chr(its_select[12], ~ paste0(., "_tax_plot"))) + geom_point(show.legend = FALSE) + ggtitle(its_title[12]) its_taxa_combo_plot <- ((tmp_1 + tmp_2 + tmp_3) / (tmp_4 + tmp_5 + tmp_6) / (tmp_7 + tmp_8 + tmp_9) / (tmp_10 + tmp_11 + tmp_12)) its_taxa_combo_plot ``` ```{r} #| echo: false ggplot2::ggsave(its_taxa_combo_plot, path = "files/da/figures/combo_plots", filename = "its_taxa_marker.png", height = 32, width = 36, units = 'cm', dpi = 600, bg = "white") ggplot2::ggsave(its_taxa_combo_plot, path = "files/da/figures/combo_plots", filename = "its_taxa_marker.pdf", height = 12, width = 12) ``` ```{r} #| echo: false system("cp files/da/figures/combo_plots/its_taxa_marker.png include/da/its_taxa_marker.png") ``` </details> ```{r} #| echo: false #| eval: true #| warning: false #| column: body-outset knitr::include_graphics("include/da/ssu_taxa_marker.png") ``` `r caption_fig_its("its_taxa_marker_plot")` # Visualizing DA ASVs in Anvi’o Next, we will combine the results of the ISA and LEfSe analyses with the distribution of ASVs across each sample. We are going to do the analysis in [anvi’o](https://github.com/merenlab/anvio)---an advanced analysis and visualization platform for ‘omics data [@eren2015anvi]---using the `anvi-interactive` command. Anvi’o likes databases but it also understands that sometimes you do not have a database. So it offers a manual mode. If you type this command you can have a look at the relevant pieces we need for the visualization, specifically those under the headings MANUAL INPUTS and ADDITIONAL STUFF. ```bash anvi-interactive -h ``` ``` MANUAL INPUTS: Mandatory input parameters to start the interactive interface without anvi'o databases. --manual-mode We need this flag to run anvi'o in an ad hoc manner, i.e., no database. -f FASTA, --fasta-file FASTA A FASTA-formatted input file. This is sort of optional -d VIEW_DATA, --view-data VIEW_DATA A TAB-delimited file for view data. This is the ASV by sample matrix. We need this -t NEWICK, --tree NEWICK NEWICK formatted tree structure. How the ASVs are ordered in our case. ADDITIONAL STUFF: Parameters to provide additional layers, views, or layer data. -V ADDITIONAL_VIEW, --additional-view ADDITIONAL_VIEW A TAB-delimited file for an additional view to be used in the interface. This file should contain all split names, and values for each of them in all samples. Each column in this file must correspond to a sample name. Content of this file will be called 'user_view', which will be available as a new item in the 'views' combo box in the interface -A ADDITIONAL_LAYERS, --additional-layers ADDITIONAL_LAYERS A TAB-delimited file for additional layer info. In our case this is info about each ASV. The first column of the file must be the ASV names, and the remaining columns should be unique attributes. ``` There are also a few files we generate that cannot be loaded directly. So, in addition to the files that can be loaded when running the interactive, we also have files that must be added to the database created by anvi’o. Here is a nice tutorial on [Working with anvi’o additional data tables](http://merenlab.org/2017/12/11/additional-data-tables/). A lot of what we need is covered in this tutorial. To get the most out the visualization we need to create a few files to give anvi’o when we fire up the interactive interface. 1. View data: in our case, a sample by ASV abundance matrix. 2. Additional info about each ASV. 3. Additional info about each sample. 4. Taxa abundance data for each sample at some rank. 5. Dendrograms ordering the ASVs and samples (based on view data). 6. Fasta file of all ASVs in the analysis. ## Main Steps ### 1. View data Let’s start with the `-d` or `--view-data` file. This file needs to be an ASV by sample matrix of read counts. To simplify the visualization, we will use ***all*** ASVs represented by 100 or more total reads, including those identified as differentially abundant by the ISA and/or LEfSe. ```{r} #| code-fold: true samp_ps_all <- c("ssu18_ps_filt", "ssu18_ps_pime", "ssu18_ps_perfect") samp_ps <- c("ssu18_ps_filt", "ssu18_ps_pime", "ssu18_ps_perfect") samp_ps_org <- c("ssu18_ps_work") samp_ps_pime <- c("ssu18_ps_pime") samp_ps_other <- c("ssu18_ps_filt", "ssu18_ps_perfect") trim_val <- 100 for (i in samp_ps_pime) { tmp_get <- get(i) tmp_df <- prune_taxa(taxa_sums(tmp_get) > trim_val, tmp_get) tmp_name <- purrr::map_chr(i, ~ paste0(., "_trim")) assign(tmp_name, tmp_df) rm(list = ls(pattern = "tmp_")) } trim_val <- 300 for (i in samp_ps_other) { tmp_get <- get(i) tmp_df <- prune_taxa(taxa_sums(tmp_get) > trim_val, tmp_get) tmp_name <- purrr::map_chr(i, ~ paste0(., "_trim")) assign(tmp_name, tmp_df) rm(list = ls(pattern = "tmp_")) } objects(pattern = "_trim") ``` ```{r} #| echo: false for (i in samp_ps) { tmp_path <- file.path("files/anvio/ssu/") dir.create(paste(tmp_path, i, sep = "")) rm(list = ls(pattern = "tmp_")) } for (i in samp_ps_org) { tmp_path <- file.path("files/anvio/ssu/") dir.create(paste(tmp_path, i, sep = "")) rm(list = ls(pattern = "tmp_")) } ``` ```{r} for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim"))) tmp_df <- as.data.frame(t(otu_table(tmp_get))) tmp_df <- tmp_df %>% rownames_to_column("Group") tmp_path <- file.path("files/anvio/ssu/") write.table(tmp_df, paste(tmp_path, i, "/", "data.txt", sep = ""), quote = FALSE, sep = "\t", row.names = FALSE) rm(list = ls(pattern = "tmp_")) } ``` Or export a table of transformed data. ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim"))) tmp_trans <- transform_sample_counts(tmp_get, function(x) 1e5 * {x/sum(x)}) tmp_df <- as.data.frame(t(otu_table(tmp_trans))) tmp_df <- tmp_df %>% rownames_to_column("Group") #tmp_name <- purrr::map_chr(i, ~ paste0(., "_trim_tab")) #assign(tmp_name, tmp_df) tmp_path <- file.path("files/anvio/ssu/") write.table(tmp_df, paste(tmp_path, i, "/", "data_trans.txt", sep = ""), quote = FALSE, sep = "\t", row.names = FALSE) rm(list = ls(pattern = "tmp_")) } ``` ### 2. Additional Layers for ASVs Next, we need some additional data **about the ASVs** to overlay on the visual. This can be anything however what I specifically want are the details of the ISA analysis, total reads, and lineage info. I warn you; this code will get ugly and I urge you to find a better way. Start with an ASV + lineage table for the ASVs in the new phyloseq object. ```{r} #| code-fold: true for (i in samp_ps) { tmp_get_indval <- get(purrr::map_chr(i, ~ paste0(., "_indval_final"))) tmp_get_indval <- tmp_get_indval %>% dplyr::rename("Group" = "ASV_ID") %>% dplyr::rename("enrich_indval" = "group") %>% dplyr::rename("test_indval" = "indval") %>% dplyr::rename("pval_indval" = "pval") tmp_get_indval <- tmp_get_indval[,1:5] tmp_get_lefse <- get(purrr::map_chr(i, ~ paste0(., "_lefse_final"))) tmp_get_lefse <- tmp_get_lefse %>% dplyr::rename("Group" = "ASV_ID") %>% dplyr::rename("enrich_lefse" = "group") %>% dplyr::rename("test_lefse" = "lda") %>% dplyr::rename("pval_lefse" = "pval") tmp_get_lefse <- tmp_get_lefse[,1:4] tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim"))) tmp_otu_df <- as.data.frame(t(otu_table(tmp_get))) tmp_total <- cbind(tmp_otu_df, total_reads = rowSums(tmp_otu_df)) tmp_total <- rev(tmp_total)[1] tmp_total <- tmp_total %>% tibble::rownames_to_column("Group") tmp_tax_df <- as.data.frame(tax_table(tmp_get)) tmp_tax_df$ASV_SEQ <- NULL tmp_tax_df$ASV_ID <- NULL tmp_tax_df <- tmp_tax_df %>% tibble::rownames_to_column("Group") tmp_add_lay <- dplyr::left_join(tmp_tax_df, tmp_total, by = "Group") %>% dplyr::left_join(., tmp_get_indval, by = "Group") %>% dplyr::left_join(., tmp_get_lefse, by = "Group") tmp_add_lay$ASV_ID <- tmp_add_lay$Group tmp_add_lay <- tmp_add_lay[, c(1,16,8,12,9:11,13:15,2:7)] tmp_path <- file.path("files/anvio/ssu/") write.table(tmp_add_lay, paste(tmp_path, i, "/", "additional_layers.txt", sep = ""), quote = FALSE, sep = "\t", row.names = FALSE, na = "") rm(list = ls(pattern = "tmp_")) } ``` ### 3. Additional Views for Samples Now we want some general data **about the samples** to overlay on the visual. Again, this can be anything. How about a table of alpha diversity metrics? We actually have such a table that was generated way back up the road. Just need to fix the column names. :::{.callout-note} **Added** to profile.db with `anvi-import-misc-data` command & `--target-data-table layers` flag. ::: ```{r} #| code-fold: true metadata_tab <- read.table("files/metadata/tables/metadata.txt", header = TRUE) tmp_x <- readRDS("files/alpha/rdata/ssu18_ps_pime.rds") data.frame(sample_data(tmp_x)) metadata_tab[,c(2:5)] <- list(NULL) for (i in samp_ps_all) { tmp_get <- get(i) tmp_df <- data.frame(sample_data(tmp_get)) tmp_df <- tmp_df[,c(2:9)] tmp_df <- tmp_df %>% tibble::rownames_to_column("id") tmp_df <- tmp_df %>% dplyr::rename("no_asvs" = "Observed") tmp_rc <- data.frame(readcount(tmp_get)) tmp_rc <- tmp_rc %>% tibble::rownames_to_column("id") tmp_rc <- tmp_rc %>% dplyr::rename("no_reads" = 2) #identical(tmp_df$id, tmp_rc$id) tmp_merge <- dplyr::left_join(tmp_df, tmp_rc) tmp_merge <- tmp_merge[, c(1:6,10,7:9)] tmp_final <- dplyr::left_join(tmp_merge, metadata_tab) tmp_path <- file.path("files/anvio/ssu/") write.table(tmp_final, paste(tmp_path, i, "/", "additional_views.txt", sep = ""), quote = FALSE, sep = "\t", row.names = FALSE) rm(list = ls(pattern = "tmp_")) } rm(metadata_tab) ``` ### 4. Taxon rank abundance by sample Turned out this was a little tricky to figure out, but thanks to a [little nifty block of code](https://github.com/joey711/phyloseq/issues/418#issuecomment-262637034) written by [guoyanzhao](https://github.com/guoyanzhao) on the phyloseq Issues forum, it was a piece of cake. The code can be altered to take any rank. See the post for an explanation. Anyway, the goal is to sum each taxon at some rank and present that as a bar chart for each sample in the visualization. Anvi'o has a specific format it needs where each row is a sample and each column is a taxon. Taxa names need the prefix `t_<RANK>!`. For example, `t_class!` should be added for Class rank. :::{.callout-note} **Added** to profile.db with `anvi-import-misc-data` command & `--target-data-table layers` flag. ::: ```{r} #| code-fold: true pick_rank <- "Phylum" pick_rank_l <- "phylum" for (i in samp_ps_all) { # Make the table tmp_get <- get(i) tmp_glom <- tax_glom(tmp_get, taxrank = pick_rank) tmp_melt <- psmelt(tmp_glom) tmp_melt[[pick_rank]] <- as.character(tmp_melt[[pick_rank]]) tmp_abund <- aggregate(Abundance ~ Sample + tmp_melt[[pick_rank]], tmp_melt, FUN = sum) colnames(tmp_abund)[2] <- "tax_rank" library(reshape2) tmp_abund <- as.data.frame(reshape::cast(tmp_abund, Sample ~ tax_rank)) tmp_abund <- tibble::remove_rownames(tmp_abund) tmp_abund <- tibble::column_to_rownames(tmp_abund, "Sample") # Reorder table column by sum tmp_layers <- tmp_abund[,names(sort(colSums(tmp_abund), decreasing = TRUE))] # Add the prefix tmp_layers <- tmp_layers %>% dplyr::rename_all(function(x) paste0("t_", pick_rank_l,"!", x)) tmp_layers <- tibble::rownames_to_column (tmp_layers, "taxon") # save the table tmp_path <- file.path("files/anvio/ssu/") write.table(tmp_layers, paste(tmp_path, i, "/", "tax_layers.txt", sep = ""), quote = FALSE, sep = "\t", row.names = FALSE) rm(list = ls(pattern = "tmp_")) } ``` ### 5. Construct Dendrograms The last piece we need is to generate dendrograms that order the ASVs by their distribution in the samples and the samples by their ASV composition. For this task we will use anvi'o. ```bash anvi-matrix-to-newick data.txt --distance euclidean \ --linkage ward \ -o asv.tre anvi-matrix-to-newick data.txt --distance euclidean \ --linkage ward \ -o sample.tre \ --transpose ``` The first command reads the view data we generated above and uses Euclidean distance and Ward linkage for hierarchical clustering of the ASVs. The second command transposes the view data table and then does the same for the samples. There are several distance metrics and linkage methods available. See the help menu for the command by typing `anvi-matrix-to-newick -h`. Boom. ```{r} #| echo: false #bash_commands <- c() USE this to combine all commands # including in loop creates separate files for (i in samp_ps) { bash_commands <- c() tmp_command_asv <- purrr::map_chr(i, ~ paste0("anvi-matrix-to-newick data.txt", " --distance euclidean --linkage ward -o ", "asv.tre")) bash_commands <- append(bash_commands, tmp_command_asv) tmp_command_samp <- purrr::map_chr(i, ~ paste0("anvi-matrix-to-newick data.txt", " --distance braycurtis --linkage complete -o ", "sample.tre --transpose")) bash_commands <- append(bash_commands, tmp_command_samp) tmp_path <- file.path("files/anvio/ssu/") write(bash_commands, paste(tmp_path, i, "/", "tre.sh", sep = "")) rm(list = ls(pattern = "tmp_")) } ``` ```{r} #| echo: false # FOR TANSFORMED DATA for (i in samp_ps) { bash_commands <- c() tmp_command_asv <- purrr::map_chr(i, ~ paste0("anvi-matrix-to-newick data_trans.txt", " --distance euclidean --linkage ward -o ", "asv_trans.tre")) bash_commands <- append(bash_commands, tmp_command_asv) tmp_command_samp <- purrr::map_chr(i, ~ paste0("anvi-matrix-to-newick data_trans.txt", " --distance braycurtis --linkage complete -o ", "sample_trans.tre --transpose")) bash_commands <- append(bash_commands, tmp_command_samp) tmp_path <- file.path("files/anvio/ssu/") write(bash_commands, paste(tmp_path, i, "/", "tre_transformed.sh", sep = "")) rm(list = ls(pattern = "tmp_")) } ``` ```{bash, echo=FALSE} #NEED to run in anvio from base dir cd ssu18_ps_filt bash tre.sh bash tre_transformed.sh cd ../ cd ssu18_ps_filt_otu bash tre.sh bash tre_transformed.sh cd ../ cd ssu18_ps_perfect bash tre.sh bash tre_transformed.sh cd ../ cd ssu18_ps_perfect_otu bash tre.sh bash tre_transformed.sh cd ../ cd ssu18_ps_pime bash tre.sh bash tre_transformed.sh cd ../ cd ssu18_ps_pime_otu bash tre.sh bash tre_transformed.sh cd ../ ``` Alternatively, we can generate dendrograms using `phyloseq::distance` and `hclust`. ```{r} #| code-fold: true pick_dist <- "bray" pick_clust <- "complete" for (i in samp_ps) { # Make the table tmp_get <- get(i) tmp_dist <- phyloseq::distance(physeq = tmp_get, method = pick_dist, type = "sample") tmp_dend <- hclust(tmp_dist, method = pick_clust) plot(tmp_dend, hang = -1) tmp_tree <- as.phylo(tmp_dend) tmp_path <- file.path("files/anvio/ssu/") write.tree(phy = tmp_tree, file = paste(tmp_path, i, "/", "sample_", pick_dist, "_", pick_clust, ".tre", sep = "")) rm(list = ls(pattern = "tmp_")) } ``` ```{r} #| code-fold: true pick_dist_asv <- "euclidean" pick_clust_asv <- "ward" for (i in samp_ps) { # Make the table tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim"))) tmp_dist <- phyloseq::distance(physeq = tmp_get, method = pick_dist_asv, type = "taxa") tmp_dend <- hclust(tmp_dist, method = pick_clust_asv) plot(tmp_dend, hang = -1) tmp_tree <- as.phylo(tmp_dend) tmp_path <- file.path("files/anvio/ssu/") write.tree(phy = tmp_tree, file = paste(tmp_path, i, "/", "asv_", pick_dist_asv, "_", pick_clust_asv, ".tre", sep = "")) rm(list = ls(pattern = "tmp_")) } ``` :::{.callout-note} The file `asv.tre` is **loaded** with `anvi-interactive` command & the `--tree` flag. ::: The ASV tree is fine as is, but the sample tree needs a [special format](http://merenlab.org/2017/12/11/additional-data-tables/#layer-orders-additional-data-table). Specifically, the tree needs to be in a three column, tab delimited, *table*. This way you can add multiple orderings to the same file and view them all in the interactive. The table needs to be in this format: | item_name | data_type | data_value | |-----------|-----------|-----------------------------------------------------| | tree_1 | newick | ((P01_D00_010_W8A:0.0250122,P05_D00_010_W8C:0.02,.. | | tree_2 | newick | ((((((((OTU14195:0.0712585,OTU13230:0.0712585)0:,...| | (…) | (…) | (…) | This is easy to do by hand, but I *really* need the practice so I will do it in R. Anvi'o is very particular about formatting. For example, if this file ends with a blank line, which it will because when anvi'o made the initial dendrogram it add a new line. We need to get rid of that or we get an error when trying to import the table. :::{.callout-note} The file `sample.tre` is **added** to the profile.db with `anvi-import-misc-data` command & `--target-data-table layer_orders` flag. ::: ```{r} #| code-fold: true for (i in samp_ps) { tmp_path <- file.path("files/anvio/ssu/") tmp_tree <- read_file(paste(tmp_path, i, "/", "sample.tre", sep = "")) tmp_tree <- gsub("[\r\n]", "", tmp_tree) tmp_item <- c("bray_complete") tmp_type <- c("newick") tmp_df <- c(tmp_tree) tmp_tab <- data.frame(tmp_item, tmp_type, tmp_df) library(janitor) tmp_tab %>% remove_empty("rows") colnames(tmp_tab) <- c("item_name", "data_type", "data_value") write.table(tmp_tab, paste(tmp_path, i, "/", "sample.tre", sep = ""), sep = "\t", quote = FALSE, row.names = FALSE, na = "") rm(list = ls(pattern = "tmp_")) } ``` ```{r} #| code-fold: true # FOR TRANSFORMED DATA for (i in samp_ps) { tmp_path <- file.path("files/anvio/ssu/") tmp_tree <- read_file(paste(tmp_path, i, "/","sample_trans.tre", sep = "")) tmp_tree <- gsub("[\r\n]", "", tmp_tree) tmp_item <- c("bray_complete") tmp_type <- c("newick") tmp_df <- c(tmp_tree) tmp_tab <- data.frame(tmp_item, tmp_type, tmp_df) library(janitor) tmp_tab %>% remove_empty("rows") colnames(tmp_tab) <- c("item_name", "data_type", "data_value") write.table(tmp_tab, paste(tmp_path, i, "/", "sample_trans.tre", sep = ""), sep = "\t", quote = FALSE, row.names = FALSE, na = "") rm(list = ls(pattern = "tmp_")) } objects() ``` ```{r} #| code-fold: true # FOR HCLUST TREE for (i in samp_ps) { tmp_path <- file.path("files/anvio/ssu/") tmp_tree <- read_file(paste(tmp_path, i, "/", "sample_", pick_dist, "_", pick_clust, ".tre", sep = "")) tmp_tree <- gsub("[\r\n]", "", tmp_tree) tmp_item <- c(paste(pick_dist, "_", pick_clust, "_hclust", sep = "")) tmp_type <- c("newick") tmp_df <- c(tmp_tree) tmp_tab <- data.frame(tmp_item, tmp_type, tmp_df) library(janitor) tmp_tab %>% remove_empty("rows") colnames(tmp_tab) <- c("item_name", "data_type", "data_value") write.table(tmp_tab, paste(tmp_path, i, "/", "sample_", pick_dist, "_", pick_clust, ".tre", sep = ""), sep = "\t", quote = FALSE, row.names = FALSE, na = "") rm(list = ls(pattern = "tmp_")) } ``` ### 6. Make a fasta file We don't need to add a fasta file, but it is a nice way to keep everything in one place. Plus, you can do BLAST searches directly in the interface by right clicking on the ASV of interest, so it is nice to have the sequences. :::{.callout-note} **Loaded** with `anvi-interactive` command & `--fasta-file` flag. ::: ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim"))) tmp_tab <- tax_table(tmp_get) tmp_tab <- tmp_tab[, 7] tmp_df <- data.frame(row.names(tmp_tab), tmp_tab) colnames(tmp_df) <- c("ASV_ID", "ASV_SEQ") tmp_df$ASV_ID <- sub("^", ">", tmp_df$ASV_ID) tmp_path <- file.path("files/anvio/ssu/") write.table(tmp_df, paste(tmp_path, i, "/", i, ".fasta", sep = ""), sep = "\n", col.names = FALSE, row.names = FALSE, quote = FALSE, fileEncoding = "UTF-8") rm(list = ls(pattern = "tmp_")) } ``` ## Building the Profile Database Time to put all of these pieces together. This gets a little tricky since we do not have a database to start with because some of these files can be loaded directly in the interface but some need to be added to a database. When we fire up the interactive in `--manual` mode, we ***must*** give anvi'o the name of a database and it will *create* that database for us. Then we can shut down the interactive, add the necessary data files, and start back up. ```bash anvi-interactive --view-data data.txt \ --tree asv.tre \ --additional-layers additional_layers.txt \ --profile-db profile.db \ --manual ``` Now we have a new profile database that we can add the sample metadata (`additional_layers.txt`) and the sample dendrogram (sample.tre) using the command `anvi-import-misc-data`. These commands add the table to the new `profile.db`. First, kill the interactive. ```bash anvi-import-misc-data additional_views.txt \ --pan-or-profile-db profile.db \ --target-data-table layers anvi-import-misc-data sample.tre \ --pan-or-profile-db profile.db \ --target-data-table layer_orders ``` One last this is to get the table with the taxonomy total by sample (`tax_layers.txt`) into the profile database. We will run the same command we just used. ```bash anvi-import-misc-data tax_layers.txt \ --pan-or-profile-db profile.db \ --target-data-table layers ``` In fact, we could just as easily append the taxonomy total data onto the `additional_layers.txt` and import in one command. But we didn't. ## Interactive Interface With a populated database in hand, we can now begin modifying the visual by running the interactive command again. ```bash anvi-interactive --view-data data.txt \ --tree asv.tre \ --additional-layers additional_layers.txt \ --profile-db profile.db --fasta-file anvio.fasta \ --manual ``` ```{r} #| echo: false #| warning: false #| fig-height: 2 knitr::include_graphics("figures/16s-da-asvs/before.png") ``` ```{r} #| echo: false # ALL commands #anvi-interactive --view-data data_ssu18_ps_pime.txt --tree asv_ssu18_ps_pime.tre --additional-layers additional_layers_ssu18_ps_pime.txt --profile-db profile_ssu18_ps_pime.db --fasta-file ssu18_ps_pime.fasta --manual #anvi-import-misc-data additional_views_ssu18_ps_pime.txt --pan-or-profile-db profile_ssu18_ps_pime.db --target-data-table layers #anvi-import-misc-data sample_ssu18_ps_pime.tre --pan-or-profile-db profile_ssu18_ps_pime.db --target-data-table layer_orders #anvi-import-misc-data tax_layers_ssu18_ps_pime.txt --pan-or-profile-db profile_ssu18_ps_pime.db --target-data-table layers #anvi-interactive --view-data data_ssu18_ps_pime.txt --tree asv_ssu18_ps_pime.tre --additional-layers additional_layers_ssu18_ps_pime.txt --profile-db profile_ssu18_ps_pime.db --fasta-file ssu18_ps_pime.fasta --manual ``` <details markdown="1"> <summary>Click here for the ITS version of this workflow.</summary> The ITS version of the anvi'o workflow is basically a carbon copy of the workflow presented above. It is included here for posterity. ### Visualizing DA ASVs in Anvi’o Next, we will combine the results of the ISA and LEfSe analyses with the distribution of ASVs across each sample. We are going to do the analysis in [anvi’o](https://github.com/merenlab/anvio)---an advanced analysis and visualization platform for ‘omics data [@eren2015anvi]---using the `anvi-interactive` command. Anvi’o likes databases but it also understands that sometimes you do not have a database. So it offers a manual mode. If you type this command you can have a look at the relevant pieces we need for the visualization, specifically those under the headings MANUAL INPUTS and ADDITIONAL STUFF. ```bash anvi-interactive -h ``` ``` MANUAL INPUTS: Mandatory input parameters to start the interactive interface without anvi'o databases. --manual-mode We need this flag to run anvi'o in an ad hoc manner, i.e., no database. -f FASTA, --fasta-file FASTA A FASTA-formatted input file. This is sort of optional -d VIEW_DATA, --view-data VIEW_DATA A TAB-delimited file for view data. This is the ASV by sample matrix. We need this -t NEWICK, --tree NEWICK NEWICK formatted tree structure. How the ASVs are ordered in our case. ADDITIONAL STUFF: Parameters to provide additional layers, views, or layer data. -V ADDITIONAL_VIEW, --additional-view ADDITIONAL_VIEW A TAB-delimited file for an additional view to be used in the interface. This file should contain all split names, and values for each of them in all samples. Each column in this file must correspond to a sample name. Content of this file will be called 'user_view', which will be available as a new item in the 'views' combo box in the interface -A ADDITIONAL_LAYERS, --additional-layers ADDITIONAL_LAYERS A TAB-delimited file for additional layer info. In our case this is info about each ASV. The first column of the file must be the ASV names, and the remaining columns should be unique attributes. ``` There are also a few files we generate that cannot be loaded directly. So, in addition to the files that can be loaded when running the interactive, we also have files that must be added to the database created by anvi’o. Here is a nice tutorial on [Working with anvi’o additional data tables](http://merenlab.org/2017/12/11/additional-data-tables/). A lot of what we need is covered in this tutorial. To get the most out the visualization we need to create a few files to give anvi’o when we fire up the interactive interface. 1. View data: in our case, a sample by ASV abundance matrix. 2. Additional info about each ASV. 3. Additional info about each sample. 4. Taxa abundance data for each sample at some rank. 5. Dendrograms ordering the ASVs and samples (based on view data). 6. Fasta file of all ASVs in the analysis. ### Main steps #### 1. View data Let’s start with the `-d` or `--view-data` file. This file needs to be an ASV by sample matrix of read counts. To simplify the visualization, we will use ***all*** ASVs represented by 100 or more total reads, including those identified as differentially abundant by the ISA and/or LEfSe. ```{r} #| code-fold: true samp_ps_all <- c("its18_ps_filt", "its18_ps_pime", "its18_ps_perfect", "its18_ps_filt_otu", "its18_ps_pime_otu", "its18_ps_perfect_otu", "its18_ps_work", "its18_ps_work_otu") samp_ps <- c("its18_ps_filt", "its18_ps_pime", "its18_ps_perfect", "its18_ps_filt_otu", "its18_ps_pime_otu", "its18_ps_perfect_otu") samp_ps_org <- c("its18_ps_work", "its18_ps_work_otu") samp_ps_pime <- c("its18_ps_pime", "its18_ps_pime_otu") samp_ps_other <- c("its18_ps_filt", "its18_ps_perfect", "its18_ps_filt_otu", "its18_ps_perfect_otu") trim_val <- 50 for (i in samp_ps_pime) { tmp_get <- get(i) tmp_df <- prune_taxa(taxa_sums(tmp_get) > trim_val, tmp_get) tmp_name <- purrr::map_chr(i, ~ paste0(., "_trim")) assign(tmp_name, tmp_df) rm(list = ls(pattern = "tmp_")) } trim_val <- 300 for (i in samp_ps_other) { tmp_get <- get(i) tmp_df <- prune_taxa(taxa_sums(tmp_get) > trim_val, tmp_get) tmp_name <- purrr::map_chr(i, ~ paste0(., "_trim")) assign(tmp_name, tmp_df) rm(list = ls(pattern = "tmp_")) } objects(pattern = "_trim") its18_ps_filt_trim its18_ps_pime_trim its18_ps_perfect_trim its18_ps_filt_otu_trim its18_ps_pime_otu_trim its18_ps_perfect_otu_trim ``` ```{r} #| echo: false for (i in samp_ps) { tmp_path <- file.path("files/anvio/its/") dir.create(paste(tmp_path, i, sep = "")) rm(list = ls(pattern = "tmp_")) } for (i in samp_ps_org) { tmp_path <- file.path("files/anvio/its/") dir.create(paste(tmp_path, i, sep = "")) rm(list = ls(pattern = "tmp_")) } objects() ``` ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim"))) tmp_df <- as.data.frame(t(otu_table(tmp_get))) tmp_df <- tmp_df %>% rownames_to_column("Group") tmp_path <- file.path("files/anvio/its/") write.table(tmp_df, paste(tmp_path, i, "/", "data.txt", sep = ""), quote = FALSE, sep = "\t", row.names = FALSE) rm(list = ls(pattern = "tmp_")) } objects() ``` Or export a table of transformed data. ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim"))) tmp_trans <- transform_sample_counts(tmp_get, function(x) 1e5 * {x/sum(x)}) tmp_df <- as.data.frame(t(otu_table(tmp_trans))) tmp_df <- tmp_df %>% rownames_to_column("Group") #tmp_name <- purrr::map_chr(i, ~ paste0(., "_trim_tab")) #assign(tmp_name, tmp_df) tmp_path <- file.path("files/anvio/its/") write.table(tmp_df, paste(tmp_path, i, "/", "data_trans.txt", sep = ""), quote = FALSE, sep = "\t", row.names = FALSE) rm(list = ls(pattern = "tmp_")) } objects() ``` #### 2. Additional Layers for ASVs Next, we need some additional data **about the ASVs** to overlay on the visual. This can be anything however what I specifically want are the details of the ISA analysis, total reads, and lineage info. I warn you; this code will get ugly and I urge you to find a better way. Start with an ASV + lineage table for the ASVs in the new phyloseq object. ```{r} #| code-fold: true for (i in samp_ps) { tmp_get_indval <- get(purrr::map_chr(i, ~ paste0(., "_indval_final"))) tmp_get_indval <- tmp_get_indval %>% dplyr::rename("Group" = "ASV_ID") %>% dplyr::rename("enrich_indval" = "group") %>% dplyr::rename("test_indval" = "indval") %>% dplyr::rename("pval_indval" = "pval") tmp_get_indval <- tmp_get_indval[,1:5] tmp_get_lefse <- get(purrr::map_chr(i, ~ paste0(., "_lefse_final"))) tmp_get_lefse <- tmp_get_lefse %>% dplyr::rename("Group" = "ASV_ID") %>% dplyr::rename("enrich_lefse" = "group") %>% dplyr::rename("test_lefse" = "lda") %>% dplyr::rename("pval_lefse" = "pval") tmp_get_lefse <- tmp_get_lefse[,1:4] tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim"))) tmp_otu_df <- as.data.frame(t(otu_table(tmp_get))) tmp_total <- cbind(tmp_otu_df, total_reads = rowSums(tmp_otu_df)) tmp_total <- rev(tmp_total)[1] tmp_total <- tmp_total %>% tibble::rownames_to_column("Group") tmp_tax_df <- as.data.frame(tax_table(tmp_get)) tmp_tax_df$ASV_SEQ <- NULL tmp_tax_df$ASV_ID <- NULL tmp_tax_df <- tmp_tax_df %>% tibble::rownames_to_column("Group") tmp_add_lay <- dplyr::left_join(tmp_tax_df, tmp_total, by = "Group") %>% dplyr::left_join(., tmp_get_indval, by = "Group") %>% dplyr::left_join(., tmp_get_lefse, by = "Group") tmp_add_lay$ASV_ID <- tmp_add_lay$Group tmp_add_lay <- tmp_add_lay[, c(1,16,8,12,9:11,13:15,2:7)] tmp_path <- file.path("files/anvio/its/") write.table(tmp_add_lay, paste(tmp_path, i, "/", "additional_layers.txt", sep = ""), quote = FALSE, sep = "\t", row.names = FALSE, na = "") rm(list = ls(pattern = "tmp_")) } ``` #### 3. Additional Views for Samples Now we want some general data **about the samples** to overlay on the visual. Again, this can be anything. How about a table of alpha diversity metrics? We actually have such a table that was generated way back up the road. Just need to fix the column names. :::{.callout-note} **Added** to profile.db with `anvi-import-misc-data` command & `--target-data-table layers` flag. ::: ```{r} #| code-fold: true metadata_tab <- read.table("files/metadata/tables/metadata.txt", header = TRUE) tmp_x <- readRDS("files/alpha/rdata/its18_ps_pime.rds") data.frame(sample_data(tmp_x)) metadata_tab[,c(2:5)] <- list(NULL) for (i in samp_ps_all) { tmp_get <- get(i) tmp_df <- data.frame(sample_data(tmp_get)) tmp_df <- tmp_df[,c(2:9)] tmp_df <- tmp_df %>% tibble::rownames_to_column("id") tmp_df <- tmp_df %>% dplyr::rename("no_asvs" = "Observed") tmp_rc <- data.frame(readcount(tmp_get)) tmp_rc <- tmp_rc %>% tibble::rownames_to_column("id") tmp_rc <- tmp_rc %>% dplyr::rename("no_reads" = 2) #identical(tmp_df$id, tmp_rc$id) tmp_merge <- dplyr::left_join(tmp_df, tmp_rc) tmp_merge <- tmp_merge[, c(1:6,10,7:9)] tmp_final <- dplyr::left_join(tmp_merge, metadata_tab) tmp_path <- file.path("files/anvio/its/") write.table(tmp_final, paste(tmp_path, i, "/", "additional_views.txt", sep = ""), quote = FALSE, sep = "\t", row.names = FALSE) rm(list = ls(pattern = "tmp_")) } rm(metadata_tab) ``` #### 4. Taxon rank abundance by sample Turned out this was a little tricky to figure out, but thanks to a [little nifty block of code](https://github.com/joey711/phyloseq/iitses/418#iitsecomment-262637034) written by [guoyanzhao](https://github.com/guoyanzhao) on the phyloseq Iitses forum, it was a piece of cake. The code can be altered to take any rank. See the post for an explanation. Anyway, the goal is to sum each taxon at some rank and present that as a bar chart for each sample in the visualization. Anvi'o has a specific format it needs where each row is a sample and each column is a taxon. Taxa names need the prefix `t_<RANK>!`. For example, `t_class!` should be added for Class rank. :::{.callout-note} **Added** to profile.db with `anvi-import-misc-data` command & `--target-data-table layers` flag. ::: ```{r} #| code-fold: true pick_rank <- "Order" pick_rank_l <- "order" for (i in samp_ps_all) { # Make the table tmp_get <- get(i) tmp_glom <- tax_glom(tmp_get, taxrank = pick_rank) tmp_melt <- psmelt(tmp_glom) tmp_melt[[pick_rank]] <- as.character(tmp_melt[[pick_rank]]) tmp_abund <- aggregate(Abundance ~ Sample + tmp_melt[[pick_rank]], tmp_melt, FUN = sum) colnames(tmp_abund)[2] <- "tax_rank" library(reshape2) tmp_abund <- as.data.frame(reshape::cast(tmp_abund, Sample ~ tax_rank)) tmp_abund <- tibble::remove_rownames(tmp_abund) tmp_abund <- tibble::column_to_rownames(tmp_abund, "Sample") # Reorder table column by sum tmp_layers <- tmp_abund[,names(sort(colSums(tmp_abund), decreasing = TRUE))] # Add the prefix tmp_layers <- tmp_layers %>% dplyr::rename_all(function(x) paste0("t_", pick_rank_l,"!", x)) tmp_layers <- tibble::rownames_to_column (tmp_layers, "taxon") # save the table tmp_path <- file.path("files/anvio/its/") write.table(tmp_layers, paste(tmp_path, i, "/", "tax_layers.txt", sep = ""), quote = FALSE, sep = "\t", row.names = FALSE) rm(list = ls(pattern = "tmp_")) } ``` #### 5. Construct Dendrograms The last piece we need is to generate dendrograms that order the ASVs by their distribution in the samples and the samples by their ASV composition. For this task we will use anvi'o. ```bash anvi-matrix-to-newick data.txt --distance euclidean \ --linkage ward \ -o asv.tre anvi-matrix-to-newick data.txt --distance euclidean \ --linkage ward \ -o sample.tre \ --transpose ``` The first command reads the view data we generated above and uses Euclidean distance and Ward linkage for hierarchical clustering of the ASVs. The second command transposes the view data table and then does the same for the samples. There are several distance metrics and linkage methods available. See the help menu for the command by typing `anvi-matrix-to-newick -h`. Boom. ```{r} #| echo: false #bash_commands <- c() USE this to combine all commands # including in loop creates separate files for (i in samp_ps) { bash_commands <- c() tmp_command_asv <- purrr::map_chr(i, ~ paste0("anvi-matrix-to-newick data.txt", " --distance euclidean --linkage ward -o ", "asv.tre")) bash_commands <- append(bash_commands, tmp_command_asv) tmp_command_samp <- purrr::map_chr(i, ~ paste0("anvi-matrix-to-newick data.txt", " --distance braycurtis --linkage complete -o ", "sample.tre --transpose")) bash_commands <- append(bash_commands, tmp_command_samp) tmp_path <- file.path("files/anvio/its/") write(bash_commands, paste(tmp_path, i, "/", "tre.sh", sep = "")) rm(list = ls(pattern = "tmp_")) } ``` ```{r} #| echo: false # FOR TANSFORMED DATA for (i in samp_ps) { bash_commands <- c() tmp_command_asv <- purrr::map_chr(i, ~ paste0("anvi-matrix-to-newick data_trans.txt", " --distance euclidean --linkage ward -o ", "asv_trans.tre")) bash_commands <- append(bash_commands, tmp_command_asv) tmp_command_samp <- purrr::map_chr(i, ~ paste0("anvi-matrix-to-newick data_trans.txt", " --distance braycurtis --linkage complete -o ", "sample_trans.tre --transpose")) bash_commands <- append(bash_commands, tmp_command_samp) tmp_path <- file.path("files/anvio/its/") write(bash_commands, paste(tmp_path, i, "/", "tre_transformed.sh", sep = "")) rm(list = ls(pattern = "tmp_")) } ``` ```{bash, echo=FALSE} #NEED to run in anvio from base dir cd its18_ps_filt bash tre.sh bash tre_transformed.sh cd ../ cd its18_ps_filt_otu bash tre.sh bash tre_transformed.sh cd ../ cd its18_ps_perfect bash tre.sh bash tre_transformed.sh cd ../ cd its18_ps_perfect_otu bash tre.sh bash tre_transformed.sh cd ../ cd its18_ps_pime bash tre.sh bash tre_transformed.sh cd ../ cd its18_ps_pime_otu bash tre.sh bash tre_transformed.sh cd ../ ``` Alternatively, we can generate dendrograms using `phyloseq::distance` and `hclust`. ```{r} #| code-fold: true pick_dist <- "bray" pick_clust <- "complete" for (i in samp_ps) { # Make the table tmp_get <- get(i) tmp_dist <- phyloseq::distance(physeq = tmp_get, method = pick_dist, type = "sample") tmp_dend <- hclust(tmp_dist, method = pick_clust) plot(tmp_dend, hang = -1) tmp_tree <- as.phylo(tmp_dend) tmp_path <- file.path("files/anvio/its/") write.tree(phy = tmp_tree, file = paste(tmp_path, i, "/", "sample_", pick_dist, "_", pick_clust, ".tre", sep = "")) rm(list = ls(pattern = "tmp_")) } ``` ```{r} #| code-fold: true pick_dist_asv <- "euclidean" pick_clust_asv <- "ward" for (i in samp_ps) { # Make the table tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim"))) tmp_dist <- phyloseq::distance(physeq = tmp_get, method = pick_dist_asv, type = "taxa") tmp_dend <- hclust(tmp_dist, method = pick_clust_asv) plot(tmp_dend, hang = -1) tmp_tree <- as.phylo(tmp_dend) tmp_path <- file.path("files/anvio/its/") write.tree(phy = tmp_tree, file = paste(tmp_path, i, "/", "asv_", pick_dist_asv, "_", pick_clust_asv, ".tre", sep = "")) rm(list = ls(pattern = "tmp_")) } ``` :::{.callout-note} The file `asv.tre` is **loaded** with `anvi-interactive` command & the `--tree` flag. ::: The ASV tree is fine as is, but the sample tree needs a [special format](http://merenlab.org/2017/12/11/additional-data-tables/#layer-orders-additional-data-table). Specifically, the tree needs to be in a three column, tab delimited, *table*. This way you can add multiple orderings to the same file and view them all in the interactive. The table needs to be in this format: | item_name | data_type | data_value | |-----------|-----------|-----------------------------------------------------| | tree_1 | newick | ((P01_D00_010_W8A:0.0250122,P05_D00_010_W8C:0.02,.. | | tree_2 | newick | ((((((((OTU14195:0.0712585,OTU13230:0.0712585)0:,...| | (…) | (…) | (…) | This is easy to do by hand, but I *really* need the practice so I will do it in R. Anvi'o is very particular about formatting. For example, if this file ends with a blank line, which it will because when anvi'o made the initial dendrogram it add a new line. We need to get rid of that or we get an error when trying to import the table. :::{.callout-note} The file `sample.tre` is **added** to the profile.db with `anvi-import-misc-data` command & `--target-data-table layer_orders` flag. ::: ```{r} #| code-fold: true for (i in samp_ps) { tmp_path <- file.path("files/anvio/its/") tmp_tree <- read_file(paste(tmp_path, i, "/", "sample.tre", sep = "")) tmp_tree <- gsub("[\r\n]", "", tmp_tree) tmp_item <- c("bray_complete") tmp_type <- c("newick") tmp_df <- c(tmp_tree) tmp_tab <- data.frame(tmp_item, tmp_type, tmp_df) library(janitor) tmp_tab %>% remove_empty("rows") colnames(tmp_tab) <- c("item_name", "data_type", "data_value") write.table(tmp_tab, paste(tmp_path, i, "/", "sample.tre", sep = ""), sep = "\t", quote = FALSE, row.names = FALSE, na = "") rm(list = ls(pattern = "tmp_")) } ``` ```{r} #| code-fold: true # FOR TRANSFORMED DATA for (i in samp_ps) { tmp_path <- file.path("files/anvio/its/") tmp_tree <- read_file(paste(tmp_path, i, "/","sample_trans.tre", sep = "")) tmp_tree <- gsub("[\r\n]", "", tmp_tree) tmp_item <- c("bray_complete") tmp_type <- c("newick") tmp_df <- c(tmp_tree) tmp_tab <- data.frame(tmp_item, tmp_type, tmp_df) library(janitor) tmp_tab %>% remove_empty("rows") colnames(tmp_tab) <- c("item_name", "data_type", "data_value") write.table(tmp_tab, paste(tmp_path, i, "/", "sample_trans.tre", sep = ""), sep = "\t", quote = FALSE, row.names = FALSE, na = "") rm(list = ls(pattern = "tmp_")) } objects() ``` ```{r} #| code-fold: true # FOR HCLUST TREE for (i in samp_ps) { tmp_path <- file.path("files/anvio/its/") tmp_tree <- read_file(paste(tmp_path, i, "/", "sample_", pick_dist, "_", pick_clust, ".tre", sep = "")) tmp_tree <- gsub("[\r\n]", "", tmp_tree) tmp_item <- c(paste(pick_dist, "_", pick_clust, "_hclust", sep = "")) tmp_type <- c("newick") tmp_df <- c(tmp_tree) tmp_tab <- data.frame(tmp_item, tmp_type, tmp_df) library(janitor) tmp_tab %>% remove_empty("rows") colnames(tmp_tab) <- c("item_name", "data_type", "data_value") write.table(tmp_tab, paste(tmp_path, i, "/", "sample_", pick_dist, "_", pick_clust, ".tre", sep = ""), sep = "\t", quote = FALSE, row.names = FALSE, na = "") rm(list = ls(pattern = "tmp_")) } ``` #### 6. Make a fasta file We don't need to add a fasta file, but it is a nice way to keep everything in one place. Plus, you can do BLAST searches directly in the interface by right clicking on the ASV of interest, so it is nice to have the sequences. :::{.callout-note} **Loaded** with `anvi-interactive` command & `--fasta-file` flag. ::: ```{r} #| code-fold: true for (i in samp_ps) { tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_trim"))) tmp_tab <- tax_table(tmp_get) tmp_tab <- tmp_tab[, 7] tmp_df <- data.frame(row.names(tmp_tab), tmp_tab) colnames(tmp_df) <- c("ASV_ID", "ASV_SEQ") tmp_df$ASV_ID <- sub("^", ">", tmp_df$ASV_ID) tmp_path <- file.path("files/anvio/its/") write.table(tmp_df, paste(tmp_path, i, "/", i, ".fasta", sep = ""), sep = "\n", col.names = FALSE, row.names = FALSE, quote = FALSE, fileEncoding = "UTF-8") rm(list = ls(pattern = "tmp_")) } ``` ### Building the Profile Database Time to put all of these pieces together. This gets a little tricky since we do not have a database to start with because some of these files can be loaded directly in the interface but some need to be added to a database. When we fire up the interactive in `--manual` mode, we ***must*** give anvi'o the name of a database and it will *create* that database for us. Then we can shut down the interactive, add the necessary data files, and start back up. ```bash anvi-interactive --view-data data.txt \ --tree asv.tre \ --additional-layers additional_layers.txt \ --profile-db profile.db \ --manual ``` Now we have a new profile database that we can add the sample metadata (`additional_layers.txt`) and the sample dendrogram (sample.tre) using the command `anvi-import-misc-data`. These commands add the table to the new `profile.db`. First, kill the interactive. ```bash anvi-import-misc-data additional_views.txt \ --pan-or-profile-db profile.db \ --target-data-table layers anvi-import-misc-data sample.tre \ --pan-or-profile-db profile.db \ --target-data-table layer_orders ``` One last this is to get the table with the taxonomy total by sample (`tax_layers.txt`) into the profile database. We will run the same command we just used. ```bash anvi-import-misc-data tax_layers.txt \ --pan-or-profile-db profile.db \ --target-data-table layers ``` In fact, we could just as easily append the taxonomy total data onto the `additional_layers.txt` and import in one command. But we didn't. ### Interactive Interface With a populated database in hand, we can now begin modifying the visual by running the interactive command again. ```bash anvi-interactive --view-data data.txt \ --tree asv.tre \ --additional-layers additional_layers.txt \ --profile-db profile.db --fasta-file anvio.fasta \ --manual ``` ```{r} #| echo: false #| warning: false #| fig-height: 2 knitr::include_graphics("figures/16s-da-asvs/before.png") ``` ```{r} #| echo: false # ALL commands #anvi-interactive --view-data data_its18_ps_pime.txt --tree asv_its18_ps_pime.tre --additional-layers additional_layers_its18_ps_pime.txt --profile-db profile_its18_ps_pime.db --fasta-file its18_ps_pime.fasta --manual #anvi-import-misc-data additional_views_its18_ps_pime.txt --pan-or-profile-db profile_its18_ps_pime.db --target-data-table layers #anvi-import-misc-data sample_its18_ps_pime.tre --pan-or-profile-db profile_its18_ps_pime.db --target-data-table layer_orders #anvi-import-misc-data tax_layers_its18_ps_pime.txt --pan-or-profile-db profile_its18_ps_pime.db --target-data-table layers #anvi-interactive --view-data data_its18_ps_pime.txt --tree asv_its18_ps_pime.tre --additional-layers additional_layers_its18_ps_pime.txt --profile-db profile_its18_ps_pime.db --fasta-file its18_ps_pime.fasta --manual ``` </details> ```{r} #| echo: false save.image("page_build/mmarker_wf.rdata") ``` ```{r} #| include: false #| eval: true remove(list = ls()) ``` # Workflow Output Data products generated in this workflow can be downloaded from figshare. <iframe src="https://widgets.figshare.com/articles/16828249/embed?show_title=1" width="100%" height="251" allowfullscreen frameborder="0"></iframe> <a href="metadata.html" class="btnnav button_round" style="float: right;">Next workflow: 8. Metadata Analysis</a> <a href="beta.html" class="btnnav button_round" style="float: left;">Previous workflow: 6. Beta Diversity & Dispersion Estimates</a> ```{r} #| message: false #| results: hide #| eval: true #| echo: false remove(list = ls()) ### COmmon formatting scripts ### NOTE: captioner.R must be read BEFORE captions_XYZ.R source(file.path("assets", "functions.R")) ``` #### Source Code {.appendix} The source code for this page can be accessed on GitHub `r fa(name = "github")` by [clicking this link](`r source_code()`). #### Data Availability {.appendix} Data generated in this workflow and the Rdata needed to run the workflow can be accessed on figshare at [10.25573/data.16828249](https://doi.org/10.25573/data.16828249). #### Last updated on {.appendix} ```{r} #| echo: false #| eval: true Sys.time() ```