Package Biskit :: Module DSSP
[hide private]
[frames] | no frames]

Source Code for Module Biskit.DSSP

  1  ## Automatically adapted for numpy.oldnumeric Mar 26, 2007 by alter_code1.py 
  2   
  3  ## 
  4  ## Biskit, a toolkit for the manipulation of macromolecular structures 
  5  ## Copyright (C) 2004-2006 Raik Gruenberg & Johan Leckner 
  6  ## 
  7  ## This program is free software; you can redistribute it and/or 
  8  ## modify it under the terms of the GNU General Public License as 
  9  ## published by the Free Software Foundation; either version 2 of the 
 10  ## License, or any later version. 
 11  ## 
 12  ## This program is distributed in the hope that it will be useful, 
 13  ## but WITHOUT ANY WARRANTY; without even the implied warranty of 
 14  ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 15  ## General Public License for more details. 
 16  ## 
 17  ## You find a copy of the GNU General Public License in the file 
 18  ## license.txt along with this program; if not, write to the Free 
 19  ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 20  ## 
 21  ##  
 22  ## last $Author: graik $ 
 23  ## last $Date: 2007/03/26 18:40:40 $ 
 24  ## $Revision: 1.11 $ 
 25   
 26  """ 
 27  Calculates the secondary structure using DSSP. 
 28  """ 
 29   
 30  import tempfile 
 31  import numpy.oldnumeric as N 
 32  from Biskit import Executor, TemplateError 
 33  import Biskit.tools as T 
 34  import Biskit.molUtils as MU 
 35  from Errors import BiskitError 
 36   
37 -class Dssp_Error( BiskitError ):
38 pass
39 40
41 -class Dssp( Executor ):
42 """ 43 Run Dssp 44 ======== 45 The DSSP program will define the secondary structure of a given 46 structure. The secondary structure elements defined are:: 47 48 H = alpha helix 49 B = residue in isolated beta-bridge 50 E = extended strand, participates in beta ladder 51 G = 3-helix (3/10 helix) 52 I = 5 helix (pi helix) 53 T = hydrogen bonded turn 54 S = bend 55 . = loop or irregular 56 57 Example usage 58 ------------- 59 >>> d = Dssp( model ) 60 >>> result = d.run() 61 62 References 63 ---------- 64 - U{http://swift.cmbi.ru.nl/gv/dssp/} 65 - Kabsch W, Sander C (1983) Dictionary of protein secondary 66 structure: pattern recognition of hydrogen-bonded and geometrical 67 features. Biopolymers Dec;22(12):2577-637. 68 """ 69
70 - def __init__( self, model, **kw ):
71 """ 72 @param model: model analyze 73 @type model: PDBModel 74 75 @param kw: additional key=value parameters for Executor: 76 @type kw: key=value pairs 77 :: 78 debug - 0|1, keep all temporary files (default: 0) 79 verbose - 0|1, print progress messages to log (log != STDOUT) 80 node - str, host for calculation (None->local) NOT TESTED 81 (default: None) 82 nice - int, nice level (default: 0) 83 log - Biskit.LogFile, program log (None->STOUT) (default: None) 84 """ 85 self.model = model 86 # self.model = model.clone( deepcopy=1 ) 87 88 ## temporary pdb-file 89 self.f_pdb = tempfile.mktemp( '_dssp.pdb') 90 self.f_out = tempfile.mktemp( '_dssp.out') 91 92 Executor.__init__( self, 'dsspcmbi', 93 args='-na %s'%self.f_pdb, 94 catch_err=1, **kw )
95 96
97 - def prepare( self ):
98 """ 99 Overrides Executor method. 100 """ 101 self.model = self.model.compress( self.model.maskHeavy() ) 102 if self.model.lenAtoms() == N.sum(self.model.maskCA): 103 raise Dssp_Error, 'The structure you want to calculate the secondary structure for seems to be a carbon alpha trace. Terminating' 104 self.model.writePdb( self.f_pdb )
105 106
107 - def cleanup( self ):
108 """ 109 Tidy up the mess you created. 110 """ 111 Executor.cleanup( self ) 112 113 if not self.debug: 114 T.tryRemove( self.f_pdb )
115 116
117 - def parse_result( self ):
118 """ 119 Parse the secondary structure from tha DSSP output file. 120 121 @return: a list with the standard DSSP secondary structure 122 description (one letter code) with the exception that 123 a blank " " has been replaced by a dot "." 124 @rtype: [str] 125 """ 126 ## check that the outfut file is there and seems valid 127 try: 128 out_file = open( self.f_out ) 129 lines = out_file.readlines() 130 out_file.close() 131 except: 132 raise Dssp_Error,\ 133 'Dssp result file %s does not exist.'%self.f_out 134 if len(lines) == 0: 135 raise Dssp_Error,\ 136 'Dssp result file %s empty'%self.f_out 137 if len(lines) < 9: 138 raise Dssp_Error,\ 139 'Dssp result file %s contains no secondary structure data'%self.f_out 140 141 ## Collect secondary structure data. Note that: 142 ## 143 ## 1. If Dssp detects a chain break or a residue with an 144 ## incomplete backbone it inserts a line with an 145 ## exclamation mark "!" in the residue column. 146 ## 147 ## Example: 496 524 C V < 148 ## 497 525 C P 149 ## 498 ! 150 ## 499 539 C F 151 ## 500 540 C N > - 152 ## 153 ## 2. A termini (a switch of chain identifier) is marked 154 ## by a asterisk "*" in the column next to the "!" 155 ## 156 ## Example: 721 789 C Q T < 157 ## 722 790 C A < 158 ## 723 !* 159 ## 724 33 Z E > 160 ## 725 34 Z K T 4 + 161 ## 726 35 Z L T >> S+ 162 ## 163 ## 3. If DSSP detects an incomplete residue in the terminus 164 ## we only get a single line with a "!" and "*" not a 165 ## for the incomplete residue and one for the terminus 166 ## as woulld be expected! 167 ## 168 ## Example: 84 101 A A 169 ## 85 102 A E 170 ## 86 !* <-- residue 103 skipped! 171 ## 87 16 B S 172 ## 88 17 B G E 173 174 ## don't parse the header 175 for i in range( len(lines) ): 176 if lines[i][:12]==' # RESIDUE': 177 start = i+1 178 179 ## collect DSSP data 180 ss, seq, term = [], [], [] 181 for i in range( start, len(lines) ): 182 ss += [ lines[i][16:17] ] 183 term += [ lines[i][14:15] ] 184 seq += [ lines[i][13:14] ] 185 186 def __completeBB( res ): 187 """ 188 Check that residue have all backbone atoms 189 CA, N, C and O or OXT 190 """ 191 atoms = [ a['name'] for a in res ] 192 count = atoms.count('CA') + atoms.count('N') + \ 193 atoms.count('C') + atoms.count('O') 194 if count == 4: 195 return 1
196 197 secStruc = [] 198 199 resDic = self.model.resList() 200 i = 0 201 j = 0 202 while i<len(ss) or j<len(resDic): 203 204 complete = __completeBB( resDic[j] ) 205 ## res_name = MU.singleAA( [resDic[j][0]['residue_name']] )[0] 206 207 ## assign irregular if not complete residue, DSSP 208 ## skipps these residues 209 if not complete: 210 secStruc += ['.'] 211 j += 1 212 213 ## termini, only in DSSP output 214 elif ( seq[i] == '!' ) and ( term[i] == '*' ): 215 i += 1 216 217 ## chain break, only in DSSP output 218 elif seq[i] == '!': 219 i += 1 220 221 ## normal data 222 elif seq[i] != '!': 223 ## replace ' ' with '.' 224 if ss[i] == ' ': 225 secStruc += ['.'] 226 else: 227 secStruc += [ss[i]] 228 i += 1 229 j += 1 230 231 ## check that the entire sequence has a secondary structure assigned 232 assert len(secStruc) == self.model.lenResidues() 233 234 return ''.join(secStruc)
235 236
237 - def finish( self ):
238 """ 239 Overrides Executor method 240 """ 241 Executor.finish( self ) 242 self.result = self.parse_result( )
243 244 245 246 ############# 247 ## TESTING 248 ############# 249 250 import Biskit.test as BT
251 -class Test(BT.BiskitTest):
252 """DSSP test""" 253 254 TAGS = [BT.EXE] 255
256 - def prepare(self):
257 self.f = T.testRoot()+"/com/1BGS.pdb"
258 259
260 - def test_DSSP( self ):
261 """DSSP test""" 262 263 from Biskit import PDBModel 264 265 if self.local: print 'Loading PDB...' 266 self.m = PDBModel(self.f) 267 self.m = self.m.compress( self.m.maskProtein() ) 268 269 if self.local: print 'Starting DSSP' 270 self.dssp = Dssp( self.m ) 271 272 if self.local: print 'Running DSSP' 273 274 self.result = self.dssp.run() 275 276 if self.local: 277 print "Sequence :", self.m.sequence() 278 print "Secondary:", self.result 279 280 self.assertEquals( self.result, self.EXPECTED)
281 282 283 EXPECTED = '.....SHHHHHHHHHHHSS..TTEE.HHHHHHHT..GGGT.HHHHSTT.EEEEEEE..TT..S...TT..EEEEE.S..SSS..S.EEEEETT..EEEESSSSSS.EE...EEEEETTT..SHHHHHHHHHHHHT..TT..SSHHHHHHHHHHT..SSEEEEEE.HHHHHHHTTTTHHHHHHHHHHHHHHT..EEEEE.'
284 285 if __name__ == '__main__': 286 287 BT.localTest() 288