Package Biskit :: Module Blast2Seq
[hide private]
[frames] | no frames]

Source Code for Module Biskit.Blast2Seq

  1  #!/usr/bin/python 
  2  ## 
  3  ## Biskit, a toolkit for the manipulation of macromolecular structures 
  4  ## Copyright (C) 2004-2006 Raik Gruenberg & Johan Leckner 
  5  ## 
  6  ## This program is free software; you can redistribute it and/or 
  7  ## modify it under the terms of the GNU General Public License as 
  8  ## published by the Free Software Foundation; either version 2 of the 
  9  ## License, or any later version. 
 10  ## 
 11  ## This program is distributed in the hope that it will be useful, 
 12  ## but WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 14  ## General Public License for more details. 
 15  ## 
 16  ## You find a copy of the GNU General Public License in the file 
 17  ## license.txt along with this program; if not, write to the Free 
 18  ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 19  ## 
 20  ## 
 21  ## $Revision: 2.9 $ 
 22  ## last $Date: 2007/02/28 22:37:32 $ 
 23  ## last $Author: graik $ 
 24   
 25  """ 
 26  Blast 2 sequences against each other, 
 27  Return sequence identity 
 28   
 29  @note: argueably obsolete - use BioPython instead 
 30  """ 
 31   
 32  from Biskit import Executor, TemplateError 
 33  import tools as T           
 34  import tempfile, re 
 35  import os.path 
 36  from Biskit.Errors import BiskitError 
 37   
 38   
39 -class Blast2SeqError( BiskitError ):
40 pass
41 42
43 -class Blast2Seq( Executor ):
44 """ 45 Determine sequence identity between 2 protein sequences 46 """ 47
48 - def __init__(self, seq1, seq2, **kw ):
49 """ 50 @param seq1: sequence string 51 @type seq1: str 52 @param seq2: sequence string 53 @type seq2: str 54 """ 55 self.seq1 = seq1 56 self.seq2 = seq2 57 58 self.inp1 = tempfile.mktemp('_seq1.fasta') 59 self.inp2 = tempfile.mktemp('_seq2.fasta') 60 61 # Blast Identities and Expext value 62 self.ex_identity = re.compile('.+ Identities = (\d+)/(\d+) ') 63 self.ex_expect = re.compile('.+ Expect = ([\d\-e\.]+)') 64 65 blastcmd = '-i %s -j %s -M BLOSUM62 -p blastp -F F'\ 66 %(self.inp1, self.inp2) 67 68 Executor.__init__( self, 'bl2seq', blastcmd, catch_out=1, **kw )
69 70
71 - def prepare( self ):
72 """ 73 create temporary fasta files for bl2seq 74 """ 75 f1 = open(self.inp1, 'w') 76 f2 = open(self.inp2, 'w') 77 f1.write(">Sequence 1\n"+self.seq1) 78 f2.write(">Sequence 2\n"+self.seq2) 79 f1.close() 80 f2.close()
81 82
83 - def cleanup(self):
84 """ 85 remove temporary files 86 """ 87 Executor.cleanup( self ) 88 89 if not self.debug: 90 T.tryRemove( self.inp1 ) 91 T.tryRemove( self.inp2 )
92 93
94 - def filterBlastHit( self ):
95 """ 96 Extract sequence identity and overlap length from one single 97 bl2seq hit 98 99 @return: blast results, e.g. {'aln_id':1.0, 'aln_len':120} 100 @rtype: dict 101 """ 102 ## check that the outfut file is there and seems valid 103 if not os.path.exists( self.f_out ): 104 raise Blast2SeqError,\ 105 'Hmmersearch result file %s does not exist.'%self.f_out 106 107 if T.fileLength( self.f_out ) < 10: 108 raise Blast2SeqError,\ 109 'Hmmersearch result file %s seems incomplete.'%self.f_out 110 111 out = open( self.f_out, 'r' ) 112 hitStr = out.read() 113 out.close() 114 115 # get rid of line breaks 116 hitStr = hitStr.replace( '\n', '#' ) 117 118 try: 119 _id = self.ex_identity.match(hitStr) # Blast Identity 120 if (_id == None): 121 return {'res_id':0, 'aln_id':0, 'aln_len':0} 122 res_identical, res_number = int(_id.group(1)), int(_id.group(2)) 123 id = 1.0 * res_identical/res_number 124 return {'res_id':res_identical, 'aln_id':id, 'aln_len':res_number} 125 except: 126 T.errWriteln("Error 1 in filterBlastHit:") 127 T.errWriteln("Hit Parsed: " + hitStr) 128 return {}
129 130
131 - def finish( self ):
132 """ 133 Overrides Executor method 134 """ 135 Executor.finish( self ) 136 self.result = self.filterBlastHit( )
137 138 139 ############# 140 ## TESTING 141 ############# 142 143 import Biskit.test as BT 144
145 -class Test(BT.BiskitTest):
146 """Test Blast2Seq""" 147 148 TAGS = [BT.EXE] 149
150 - def test_all( self ):
151 """Blast2Seq test """ 152 self.blaster = Blast2Seq("AAAFDASEFFGIGHHSFKKEL", 153 "AAAFDASEFFGIGHHSAKK") 154 155 self.r = self.blaster.run() 156 157 if self.local: 158 print self.r 159 160 self.assertEqual( self.r,{'aln_len': 19, 'aln_id': 0.94736842105263153, 161 'res_id': 18} )
162 163 164 if __name__ == '__main__': 165 166 BT.localTest() 167