How to do it...

Genomic loci that match peptides can be identified using the following steps:

  1. Load in the libraries and the data:
library(MSnID)
library(dplyr)
library(Biostrings)

msnid <- MSnID() # create object
msnid <- read_mzIDs(msnid, file.path(getwd(), "datasets", "ch6", "PXD006247.mzXML.mzid"))

peptide_info <- as(msnid, "data.table") %>%
filter(isDecoy == FALSE) %>%
select(spectrumID, pepSeq, ) %>%
mutate(fasta_id = paste0( spectrumID, ":", 1:length(spectrumID)) )

  1. Extract the peptide sequence and save it as a fasta file:
string_set <- AAStringSet(peptide_info$pepSeq )
names(string_set) <- peptide_info$fasta_id
writeXStringSet(string_set[1], file.path(getwd(), "datasets", "ch6", "peptides.fa"))
  1. Prepare the filenames for the BLAST run:
input_seqs <- file.path(getwd(), "datasets", "ch6", "peptides.fa")
genome_seqs <- file.path(getwd(), "datasets", "ch6", "ecoli_genome.fasta")
output_blast <- file.path(getwd(), "datasets", "ch6", "out.blast")
  1. Prepare the BLAST command:
command <- paste0(
"tblastn",
" -query ", input_seqs ,
" -subject ", genome_seqs,
" -out ", output_blast,
" -word_size 2 -evalue 20000 -seg no -matrix PAM30 -comp_based_stats F -outfmt 6 -max_hsps 1"
)
  1. Run BLAST as a background process:
library(withr)
with_path("/Users/macleand/miniconda2/bin", system(command, wait = TRUE) )
  1. Convert BLAST into GFF and GRanges:
results <- read.table(output_blast)

blast_to_gff <- function(blst_res){
blst_res %>%
mutate(
seqid = V2,
source = rep("tblastn", length(V1)),
type = rep(".", length(V1)),
start = V9,
end = V10,
score = V3,
strand = rep(".", length(V1)),
phase = rep(".", length(V1)),
attributes = paste("Name=",V1)
) %>%
select( - starts_with("V") )
}

gff_df <- blast_to_gff(results)

library(GenomicRanges)
granges<-makeGRangesFromDataFrame(gff_df)
..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.
Reset
3.137.161.193