Extracting information in genomic regions of interest can be done using the following steps:
- Load in packages and define some functions that create GRanges from common files:
library(GenomicRanges) library(rtracklayer) library(SummarizedExperiment) get_granges_from_gff <- function(file_name) { gff <- rtracklayer::import.gff(file_name) as(gff, "GRanges") } get_granges_from_bed <- function(file_name){ bed <- rtracklayer::import.bed(file_name) as(bed, "GRanges") } get_granges_from_text <- function(file_name){ df <- readr::read_tsv(file_name, col_names = TRUE )
GenomicRanges::makeGRangesFromDataFrame(df, keep.extra.columns = TRUE) }
-
Actually create some GRanges objects using those functions:
gr_from_gff <- get_annotated_regions_from_gff(file.path(getwd(), "datasets", "ch2", "arabidopsis_chr4.gff"))
gr_from_txt <- get_granges_from_text(file.path(getwd(), "datasets", "ch2", "arabidopsis_chr4.txt"))
- Extract a region by filtering on attributes; in this caseāthe seqnames and metadata columns:
genes_on_chr4 <- gr_from_gff[ gr_from_gff$type == "gene" & seqnames(gr_from_gff) %in% c("Chr4") ]
- Manually create a region of interest:
region_of_interest_gr <- GRanges(
seqnames = c("Chr4"),
IRanges(c(10000), width= c(1000))
)
- Use the region of interest to subset the larger object:
overlap_hits <- findOverlaps(region_of_interest_gr, gr_from_gff)
features_in_region <- gr_from_gff[subjectHits(overlap_hits) ]
features_in_region