Package Biskit :: Module PDBDope
[hide private]
[frames] | no frames]

Source Code for Module Biskit.PDBDope

  1  ## 
  2  ## Biskit, a toolkit for the manipulation of macromolecular structures 
  3  ## Copyright (C) 2004-2005 Raik Gruenberg & Johan Leckner 
  4  ## 
  5  ## This program is free software; you can redistribute it and/or 
  6  ## modify it under the terms of the GNU General Public License as 
  7  ## published by the Free Software Foundation; either version 2 of the 
  8  ## License, or any later version. 
  9  ## 
 10  ## This program is distributed in the hope that it will be useful, 
 11  ## but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 13  ## General Public License for more details. 
 14  ## 
 15  ## You find a copy of the GNU General Public License in the file 
 16  ## license.txt along with this program; if not, write to the Free 
 17  ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 18  ## 
 19  ## 
 20  ## $Revision: 2.11 $ 
 21  ## last $Author: leckner $ 
 22  ## last $Date: 2006/09/13 10:28:13 $ 
 23  """ 
 24  Calculate and add various properties to PDBModel 
 25  """ 
 26   
 27  import Numeric as N 
 28  import Biskit.tools as T 
 29  import os.path 
 30   
 31  from Biskit.WhatIf import WhatIf  
 32  from Biskit.Hmmer import Hmmer 
 33  from Biskit.DSSP import Dssp 
 34  from Biskit.Fold_X import Fold_X 
 35  from Biskit.SurfaceRacer import SurfaceRacer 
 36   
 37   
38 -class PDBDope:
39 """ 40 Decorate a PDBModel with calculated properties (profiles) 41 """ 42
43 - def __init__( self, model ):
44 """ 45 @param model: model to dope 46 @type model: PDBModel 47 """ 48 self.m = model
49
50 - def version( self ):
51 """ 52 @return: version of class 53 @rtype: str 54 """ 55 return 'PDBDope $Revision: 2.11 $'
56
57 - def model( self ):
58 """ 59 @return: model 60 @rtype: PDBModel 61 """ 62 return self.m
63 64
65 - def addASA( self ):
66 """ 67 Add profiles of Accessible Surface Area: 'relASA', 'ASA_total', 68 'ASA_sc', 'ASA_bb'. See L{Biskit.WhatIf} 69 70 @note: Using WhatIf to calculate relative accessabilities is not 71 nessesary any more. SurfaceRacer now also adds a profile 72 'relAS' (conataining relative solvent accessible surf) and 73 'relMS' (relative molecular surface). 74 75 @raise ProfileError: if WhatIf-returned atom/residue lists don't match. 76 Usually that means, WhatIf didn't recognize some 77 residue name 78 """ 79 w = WhatIf( self.m ) 80 81 atomRelAcc, resASA, resMask = w.run() 82 83 ## normalAtoms = N.logical_not( N.logical_or(self.m.maskHetatm(), 84 ## self.m.maskSolvent() ) ) 85 86 normalAtoms = self.m.maskProtein( standard=1 ) 87 88 normalRes = self.m.atom2resMask( normalAtoms ) 89 90 self.m.setAtomProfile( 'relASA', atomRelAcc, ## normalAtoms, 0, 91 comment='relative accessible surface area in %', 92 version= T.dateString() + ' ' + self.version() ) 93 94 self.m.setResProfile( 'ASA_total', resASA[:,0], normalRes, 0, 95 comment='accessible surface area in A^2', 96 version= T.dateString() + ' ' + self.version() ) 97 98 self.m.setResProfile( 'ASA_sc', resASA[:,1], normalRes, 0, 99 comment='side chain accessible surface area in A^2', 100 version= T.dateString() + ' ' + self.version() ) 101 102 self.m.setResProfile( 'ASA_bb', resASA[:,2], normalRes, 0, 103 comment='back bone accessible surface area in A^2', 104 version= T.dateString() + ' ' + self.version() )
105 106
107 - def addSurfaceMask( self, pname='relAS' ):
108 """ 109 Adds a surface mask profie that contains atoms with > 40% exposure 110 compared to a random coil state. 111 112 @param pname: name of relative profile to use 113 (Whatif-relASA OR SurfaceRacer - relAS) 114 (default: relAS) 115 @type pname: str 116 """ 117 r = self.m.profile2mask( pname, cutoff_min=40 ) 118 self.m.setResProfile( 'surfMask', self.m.atom2resMask(r), 119 comment='residues with any atom > 40% exposed', 120 version= T.dateString() + ' ' + self.version() )
121 122
123 - def addSecondaryStructure( self ):
124 """ 125 Adds a residue profile with the secondary structure as 126 calculated by the DSSP program. 127 128 Profile code:: 129 B = residue in isolated beta-bridge 130 E = extended strand, participates in beta ladder 131 G = 3-helix (3/10 helix) 132 I = 5 helix (pi helix) 133 T = hydrogen bonded turn 134 S = bend 135 . = loop or irregular 136 """ 137 dssp = Dssp( self.m ) 138 ss = dssp.run() 139 140 self.m.setResProfile( 'secondary', ss, 141 comment='secondary structure from DSSP', 142 version= T.dateString() + ' ' + self.version() )
143 144
145 - def addConservation( self, pfamEntries=None ):
146 """ 147 Adds a conservation profile. See L{Biskit.Hmmer} 148 149 @param pfamEntries: External hmmSearch result, list of 150 (non-overlapping) profile hits. 151 (default: None, do the search) Example:: 152 [{'ribonuclease': [[1, 108]]},..] 153 [{profileName : [ [startPos, endPos], 154 [start2, end2]]}] 155 - startPos, endPos as reported by hmmPfam 156 for PDB sequence generated from this model 157 @type pfamEntries: [{dict}] 158 """ 159 ## mask with normal AA also used for HMM search 160 mask = self.m.maskCA() 161 mask = self.m.atom2resMask( mask ) 162 163 h = Hmmer() 164 h.checkHmmdbIndex() 165 166 p, hmmHits = h.scoreAbsSum( self.m, hmmNames=pfamEntries ) 167 168 self.m.setResProfile( 'cons_abs', p, mask, 0, hmmHits=hmmHits, 169 comment="absolute sum of all 20 hmm scores per position", 170 version= T.dateString() + ' ' + self.version() ) 171 172 p, hmmHits = h.scoreMaxAll( self.m, hmmNames=hmmHits ) 173 174 self.m.setResProfile( 'cons_max', p, mask, 0, hmmHits=hmmHits, 175 comment="max of 20 hmm scores (-average / SD) per position", 176 version= T.dateString() + ' ' + self.version() ) 177 178 p, hmmHits = h.scoreEntropy( self.m, hmmNames=hmmHits ) 179 180 self.m.setResProfile( 'cons_ent', p, mask, 0, hmmHits=hmmHits, 181 comment="entropy of emmission probabilities per position "+ 182 "(high -> high conservation/discrimination)", 183 version= T.dateString() + ' ' + self.version() )
184 185
186 - def addDensity( self, radius=6, minasa=None, profName='density' ):
187 """ 188 Count the number of heavy atoms within the given radius. 189 Values are only collected for atoms with |minasa| accessible surface 190 area. 191 192 @param minasa: relative exposed surface - 0 to 100% 193 @type minasa: float 194 @param radius: in Angstrom 195 @type radius: float 196 """ 197 mHeavy = self.m.maskHeavy() 198 199 xyz = N.compress( mHeavy, self.m.getXyz(), 0 ) 200 201 if minasa and self.m.profile( 'relAS', 0 ) == 0: 202 self.addASA() 203 204 if minasa: 205 mSurf = self.m.profile2mask( 'relAS', minasa ) 206 else: 207 mSurf = N.ones( self.m.lenAtoms() ) 208 209 ## loop over all surface atoms 210 surf_pos = N.nonzero( mSurf ) 211 contacts = [] 212 213 for i in surf_pos: 214 dist = N.sum(( xyz - self.m.xyz[i])**2, 1) 215 contacts += [ N.sum( N.less(dist, radius**2 )) -1] 216 217 self.m.setAtomProfile( profName, contacts, mSurf, default=-1, 218 comment='atom density radius %3.1fA' % radius, 219 version= T.dateString() + ' ' + self.version() )
220
221 - def addFoldX( self ):
222 """ 223 Adds dict with fold-X energies to PDBModel's info dict. 224 See L{Biskit.Fold_X} 225 """ 226 x = Fold_X( self.m ) 227 self.m.info['foldX'] = x.run()
228 229
230 - def addSurfaceRacer( self, probe=1.4, vdw_set=1, probe_suffix=0 ):
231 """ 232 Always adds three different profiles as calculated by fastSurf:: 233 curvature - average curvature (or curvature_1.4 if probe_suffix=1) 234 MS - molecular surface area (or MS_1.4 if probe_suffix=1) 235 AS - accessible surface area (or AS_1.4 if probe_suffix=1) 236 237 If the probe radii is 1.4 Angstrom and the Richards vdw radii 238 set is used the following two profiles are also added:: 239 relAS - Relative solvent accessible surface 240 relMS - Relative molecular surface 241 242 See {Bikit.SurfaceRacer} 243 244 @param probe: probe radius 245 @type probe: float 246 @param vdw_set: defines what wdv-set to use (1-Richards, 2-Chothia) 247 @type vdw_set: 1|2 248 @param probe_suffix: append probe radius to profile names 249 @type probe_suffix: 1|0 250 """ 251 name_MS = 'MS' + probe_suffix * ('_%3.1f' % probe) 252 name_AS = 'AS' + probe_suffix * ('_%3.1f' % probe) 253 name_curv = 'curvature' + probe_suffix * ('_%3.1f' % probe) 254 255 ## hydrogens are not allowed during FastSurf calculation 256 mask = self.m.maskHeavy() 257 258 fs = SurfaceRacer( self.m, probe, vdw_set=vdw_set ) 259 fs_dic = fs.run() 260 261 fs_info= fs_dic['surfaceRacerInfo'] 262 263 self.m.setAtomProfile( name_MS, fs_dic['MS'], mask, 0, 264 comment='Molecular Surface area in A', 265 version= T.dateString() + ' ' + self.version(), 266 **fs_info ) 267 268 self.m.setAtomProfile( name_AS, fs_dic['AS'], mask, 0, 269 comment='Accessible Surface area in A', 270 version= T.dateString() + ' ' + self.version(), 271 **fs_info ) 272 273 self.m.setAtomProfile( name_curv, fs_dic['curvature'], mask, 0, 274 comment='Average curvature', 275 version= T.dateString() + ' ' + self.version(), 276 **fs_info ) 277 278 if round(probe, 1) == 1.4 and vdw_set == 1: 279 self.m.setAtomProfile( 'relAS', fs_dic['relAS'], mask, 0, 280 comment='Relative solvent accessible surf.', 281 version= T.dateString()+' ' +self.version(), 282 **fs_info ) 283 284 self.m.setAtomProfile( 'relMS', fs_dic['relMS'], mask, 0, 285 comment='Relative molecular surf.', 286 version= T.dateString()+' '+self.version(), 287 **fs_info )
288 289 290 291 ############# 292 ## TESTING 293 ############# 294
295 -class Test:
296 """ 297 Test class 298 """ 299
300 - def run( self, local=0 ):
301 """ 302 run function test 303 304 @param local: transfer local variables to global and perform 305 other tasks only when run locally 306 @type local: 1|0 307 308 @return: 1 309 @rtype: int 310 """ 311 from Biskit import PDBModel 312 313 if local: print "Loading PDB..." 314 # f = T.testRoot() + '/lig/1A19.pdb' 315 f = T.testRoot() + '/com/1BGS.pdb' 316 mdl = PDBModel(f) 317 318 mdl = mdl.compress( mdl.maskProtein() ) 319 320 if local: print "Initiating PDBDope...", 321 self.d = PDBDope( mdl ) 322 if local: print 'Done.' 323 324 if local: print "Adding FoldX energy...", 325 self.d.addFoldX() 326 if local: print 'Done.' 327 328 # if local: print "Adding WhatIf ASA...", 329 # self.d.addASA() 330 # if local: print 'Done.' 331 332 if local: print "Adding SurfaceRacer curvature...", 333 self.d.addSurfaceRacer( probe=1.4 ) 334 if local: print 'Done.' 335 336 if local: print "Adding surface mask...", 337 self.d.addSurfaceMask() 338 if local: print 'Done.' 339 340 if local: print "Adding secondary structure profile...", 341 self.d.addSecondaryStructure() 342 if local: print 'Done.' 343 344 ## skipped in test as it takes long time to calculate 345 # if local: print "Adding conservation data...", 346 # self.d.addConservation() 347 # if local: print 'Done.' 348 349 if local: print "Adding surface density...", 350 self.d.addDensity() 351 if local: print 'Done.' 352 353 if local: 354 print '\nData added to info record of model (key -- value):' 355 for k in self.d.m.info.keys(): 356 print '%s -- %s'%(k, self.d.m.info[k]) 357 358 print '\nAdded atom profiles:' 359 print mdl.aProfiles 360 361 print '\nAdded residue profiles:' 362 print mdl.rProfiles 363 364 ## check that nothing has changed 365 print '\nChecking that models are unchanged by doping ...' 366 m_ref = PDBModel(f) 367 m_ref = m_ref.compress( m_ref.maskProtein() ) 368 for k in m_ref.atoms[0].keys(): 369 ref = [ m_ref.atoms[i][k] for i in range( m_ref.lenAtoms() ) ] 370 mod = [ mdl.atoms[i][k] for i in range( mdl.lenAtoms() ) ] 371 if not ref == mod: 372 print 'CHANGED!! ', k 373 if ref == mod: 374 print 'Unchanged ', k 375 376 ## display in Pymol 377 print "Starting PyMol..." 378 from Biskit.Pymoler import Pymoler 379 380 pm = Pymoler() 381 pm.addPdb( mdl, 'm' ) 382 pm.colorAtoms( 'm', N.clip(mdl.profile('relAS'), 0.0, 100.0) ) 383 pm.show() 384 385 globals().update( locals() ) 386 387 return 1
388 389
390 - def expected_result( self ):
391 """ 392 Precalculated result to check for consistent performance. 393 394 @return: 1 395 @rtype: int 396 """ 397 return 1
398 399 400 401 if __name__ == '__main__': 402 403 test = Test() 404 405 assert test.run( local=1 ) == test.expected_result() 406