How to do it...

Reading amplicon data from raw reads with dada2 can be done using the following steps:

  1. Load the libraries and prepare a plot for each fastq file:
library(dada2)
library(cowplot)

fq_dir <- file.path(getwd(), "datasets", "ch5", "fq")
read_files <- list.files(fq_dir, full.names = TRUE,  pattern = "fq.gz")

quality_plots <- lapply(read_files, plotQualityProfile)
plot_grid(plotlist = quality_plots)

  1. Quality trimming and dereplicating the files:
for (fq in read_files ){
  out_fq <- paste0(fq, ".trimmed.filtered")
  fastqFilter(fq, out_fq, trimLeft=10, truncLen=250,
                      maxN=0, maxEE=2, truncQ=2,
                      compress=TRUE)
}

trimmed_files <-  list.files(fq_dir, full.names = TRUE, pattern = "trimmed.filtered")
derep_reads <- derepFastq(trimmed_files)
  1. Estimate the dada2 model from a subset of samples:
trimmed_files <-  list.files(fq_dir, full.names = TRUE, pattern = "trimmed.filtered")
derep_reads <- derepFastq(trimmed_files)

dd_model <- dada(derep_reads[1:5], err=NULL, selfConsist=TRUE)
  1. Infer the sequence composition of the samples using the parameters estimated in Step 3:
dada_all <- dada(derep_reads, err=dd_model[[1]]$err_out, pool=TRUE)
  1. Assign taxonomy to the sequences:
sequence_tb <-makeSequenceTable( dada_all )
taxonomy_tb <- assignTaxonomy(sequence_tb, refFasta = file.path(getwd(), "datasets", "ch5", "rdp_train_set_14.fa")) 
taxonomy_tb[1, 1:6]
..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.
Reset
3.137.202.103