Forum >> Programmazione Python >> Files e Directory >> Script per interrogazione database BLAST

Pagina: 1

ciao a tutti
ho trovato in un articolo uni script (blast_with_ncbi.py) che permette, fornendo un file di sequenze di geni in formato .fasta, l'interrogazione di un database locale.

aprendo il file con gedit risulta chiaro che devo modificare alcune variabili, in particolare le directory in cui sono contenuti i vari file.

tuttavia non riesco a farlo funzionare. credo che l'errore dipenda dal fatto che non so indicare in modo corretto la "workdir":
# Main directory for work
workdir = os.path.dirname(os.path.realpath(__file__))


qualcuno potrebbe aiutarmi.
riporto di seguito lo script originale e a seguire la parte modificata da me

#
# Imports
import os
import glob
from   Bio import SeqIO
from   Bio.Blast.Applications import NcbiblastnCommandline
 
#
# Defines
# Change these for your specific needs
 
# Main directory for work
workdir   = os.path.dirname(os.path.realpath(__file__))
 
# Local BLAST database and GI filter list
blast_db  = '%s/../blast-db' % workdir
 
# FASTA directory, where to find the sequences
fasta_dir = '%s/../fasta' % workdir
 
# Output of our BLAST results
outdir    = '%s/../blast_results' % workdir
 
#
# Given a directory this returns a list of fasta files
# Change if 'fa' is not the extension that you want to find.
def get_fasta_files(fasta_dir):
    abs_path = os.path.abspath(fasta_dir)
    print('Looking for fasta files in: ',abs_path)
    return glob.glob('%s/*.fasta' % abs_path)
 
#
# Returns the output file
def outfile(dir,file):
    return '%s/%s.csv' % (dir,os.path.splitext(os.path.basename(file))0)
 
#
# BLASTS the sequence file against the local database
def ncbi_blast(in_file,out_file,dbp):
    cur_dir = os.getcwd()
    os.chdir(dbp)
    # Replace the database names with your own local databases
    # ...plus we are using 8 threads so change according to the resources available
    cmd_line = NcbiblastnCommandline(query=in_file, db="'nt_ncbi_plants fpuk'", out=out_file, outfmt="'10 std score stitle'", max_target_seqs=20,num_threads=8)
    cmd_line()
    os.chdir(cur_dir)
 
#
# Each fasta file contains a set of sequences that were
# matched for a given tag. They are either reverse or forward.
# We blast them to the database.
def blast_sequences( fastas, odir, db_dir ):
    for file in fastas:
        print( 'Processing: ', file )
        ncbi_blast(file,outfile(odir,file),db_dir)            
 
# Main
print('Running')
os.chdir(workdir)
fasta_files = get_fasta_files(fasta_dir)
blast_sequences(fasta_files,outdir,blast_db)
print('Done')
_______________________

#
# Imports
import os
import glob
from   Bio import SeqIO
from   Bio.Blast.Applications import NcbiblastnCommandline
 
#
# Defines
# Change these for your specific needs
 
# Main directory for work workdir   = os.path.dirname(os.path.realpath(__file__))
workdir   = '/home/doiar/Scrivania/home/doiar/Scrivania/pyscript/BLAST-Tools' ('/home/doiar/Scrivania/home/doiar/Scrivania/pyscript/BLAST-Tools/bin' '%(blast_with_ncbi.py)%')

 
# Local BLAST database and GI filter list
blast_db  = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/blast-db/' % workdir

 
# FASTA directory, where to find the sequences
fasta_dir = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/fasta' % workdir

 
# Output of our BLAST results
outdir = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/blast_results' % workdir

 
#
# Given a directory this returns a list of fasta files
# Change if 'fa' is not the extension that you want to find.
def get_fasta_files(fasta_dir): 
    abs_path = os.path.abspath(fasta_dir)
    print('Looking for fasta files in: ',abs_path)
    return glob.glob('%s/*.fasta' % abs_path)
 
#
# Returns the output file
def outfile(dir,file):
    return '%s/%s.csv' % (dir,os.path.splitext(os.path.basename(file))0)
 
#
# BLASTS the sequence file against the local database
def ncbi_blast(in_file,out_file,dbp):
    cur_dir = os.getcwd()
    os.chdir(dbp)
    # Replace the database names with your own local databases
    # ...plus we are using 8 threads so change according to the resources available
    cmd_line = NcbiblastnCommandline(query=in_file, db="'ITSNCBIDB'", out=out_file, outfmt="'10 std score stitle'", max_target_seqs=10,num_threads=8)
    cmd_line()
    os.chdir(cur_dir)
 
#
# Each fasta file contains a set of sequences that were
# matched for a given tag. They are either reverse or forward.
# We blast them to the database.
def blast_sequences( fastas, odir, db_dir ):
    for file in fastas:
        print( 'Processing: ', file )
        ncbi_blast(file,outfile(odir,file),db_dir)            
 
# Main
print('Running')
os.chdir(workdir)
fasta_files = get_fasta_files(fasta_dir)
blast_sequences(fasta_files,outdir,blast_db)
print('Done')

Ciao caro, non conosco quello di cui stai parlando, ma a logica vedo alcuni problemi.

# Main directory for work workdir   = os.path.dirname(os.path.realpath(__file__))
workdir   = '/home/doiar/Scrivania/home/doiar/Scrivania/pyscript/BLAST-Tools' ('/home/doiar/Scrivania/home/doiar/Scrivania/pyscript/BLAST-Tools/bin' '%(blast_with_ncbi.py)%')
Per esempio qui sopra non c'è una ripetizione? Leggo due volte /home/doiar/Scrivania non credo sia corretto/voluto, fai una verifica.


 
# Local BLAST database and GI filter list
blast_db  = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/blast-db/' % workdir

 
# FASTA directory, where to find the sequences
fasta_dir = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/fasta' % workdir

 
# Output of our BLAST results
outdir = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/blast_results' % workdir

No, anche qui sopra c'è un errore. Se sostituisci la workdir con quanto hai specificato prima, anche qui otterrai una ripetizione del percorso. Basterebbe lasciare l'ultima parte del percorso (blast-db, fasta, blast_result....), ovviamente a patto che workdir sia configurata correttamente.

Ciao.
Daniele


Pagina: 1



Esegui il login per scrivere una risposta.