Package Biskit :: Module PDBCleaner
[hide private]
[frames] | no frames]

Source Code for Module Biskit.PDBCleaner

  1  ## 
  2  ## Biskit, a toolkit for the manipulation of macromolecular structures 
  3  ## Copyright (C) 2004-2005 Raik Gruenberg & Johan Leckner 
  4  ## 
  5  ## This program is free software; you can redistribute it and/or 
  6  ## modify it under the terms of the GNU General Public License as 
  7  ## published by the Free Software Foundation; either version 2 of the 
  8  ## License, or any later version. 
  9  ## 
 10  ## This program is distributed in the hope that it will be useful, 
 11  ## but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 13  ## General Public License for more details. 
 14  ## 
 15  ## You find a copy of the GNU General Public License in the file 
 16  ## license.txt along with this program; if not, write to the Free 
 17  ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 18  ## 
 19  ## 
 20  ## 
 21  ## last $Author: graik $ 
 22  ## last $Date: 2006/12/16 15:01:27 $ 
 23  ## $Revision: 2.6 $ 
 24   
 25  """ 
 26  Clean PDB-files so that they can be used for MD. 
 27  """ 
 28   
 29  import Biskit.molUtils as MU 
 30  import Biskit.tools as t 
 31  from Biskit.PDBModel import PDBModel 
 32   
 33  import Numeric as N 
 34   
 35  import copy 
 36   
37 -class CleanerError( Exception ):
38 pass
39
40 -class PDBCleaner:
41 """ 42 Remove HETAtoms from PDB, replace non-standard AA by closest standard AA. 43 Remove non-standard atoms from standard AA residues (and more). 44 """ 45
46 - def __init__( self, fpdb, log=None ):
47 """ 48 @param fpdb: pdb file OR PDBModel 49 @type fpdb: str 50 @param log: LogFile object 51 @type log: object 52 """ 53 self.model = PDBModel( fpdb ) 54 self.log = log
55 56
57 - def logWrite( self, msg, force=1 ):
58 if self.log: 59 self.log.add( msg ) 60 else: 61 if force: 62 print msg
63
64 - def remove_multi_occupancies( self ):
65 """ 66 Keep only atoms with alternate A field (well, or no alternate). 67 """ 68 self.logWrite( self.model.pdbCode + 69 ': Removing multiple occupancies of atoms ...') 70 71 i = 0 72 to_be_removed = [] 73 74 for a in self.model.getAtoms(): 75 76 if a['alternate']: 77 try: 78 str_id = "%i %s %s %i" % (a['serial_number'], a['name'], 79 a['residue_name'], 80 a['residue_number']) 81 82 if a['alternate'].upper() == 'A': 83 a['alternate'] = '' 84 85 else: 86 if float( a['occupancy'] ) < 1.0: 87 to_be_removed += [ i ] 88 self.logWrite( 89 'removing %s (%s %s)' % (str_id,a['alternate'], 90 a['occupancy'])) 91 else: 92 self.logWrite( 93 ('keeping non-A duplicate %s because of 1.0 '+ 94 'occupancy') % str_id ) 95 96 except: 97 self.logWrite("Error removing duplicate: "+t.lastError() ) 98 i+=1 99 100 try: 101 self.model.remove( to_be_removed ) 102 self.logWrite('Removed %i atoms' % len( to_be_removed ) ) 103 104 except: 105 self.logWrite('No atoms with multiple occupancies to remove' )
106 107
108 - def replace_non_standard_AA( self, amber=0, keep=[] ):
109 """ 110 Replace amino acids with none standard names with standard 111 amino acids according to L{MU.nonStandardAA} 112 113 @param amber: don't rename HID, HIE, HIP, CYX, NME, ACE [0] 114 @type amber: 1||0 115 @param keep: names of residues to keep 116 @type keep: [ str ] 117 """ 118 standard = MU.atomDic.keys() + keep 119 120 if amber: 121 standard.extend( ['HID', 'HIE', 'HIP', 'CYX', 'NME', 'ACE'] ) 122 123 replaced = 0 124 125 self.logWrite(self.model.pdbCode + 126 ': Looking for non-standard residue names...') 127 128 for a in self.model.getAtoms(): 129 130 resname = a['residue_name'].upper() 131 132 if resname not in standard: 133 if resname in MU.nonStandardAA: 134 a['residue_name'] = MU.nonStandardAA[ resname ] 135 136 self.logWrite('renamed %s %i to %s' % \ 137 (resname, a['residue_number'], 138 MU.nonStandardAA[ resname ])) 139 else: 140 a['residue_name'] = 'ALA' 141 142 self.logWrite('Warning: unknown residue name %s %i: ' \ 143 % (resname, a['residue_number'] ) ) 144 self.logWrite('\t->renamed to ALA.') 145 146 replaced += 1 147 148 self.logWrite('Found %i atoms with non-standard residue names.'% \ 149 replaced )
150 151
152 - def __standard_res( self, resname ):
153 """ 154 Check if resname is a standard residue (according to L{MU.atomDic}) 155 if not return the closest standard residue (according to 156 L{MU.nonStandardAA}). 157 158 @param resname: 3-letter residue name 159 @type resname: str 160 161 @return: name of closest standard residue or resname itself 162 @rtype: str 163 """ 164 if resname in MU.atomDic: 165 return resname 166 167 if resname in MU.nonStandardAA: 168 return MU.nonStandardAA[ resname ] 169 170 return resname
171 172
173 - def remove_non_standard_atoms( self ):
174 """ 175 First missing standard atom triggers removal of standard atoms that 176 follow in the standard order. All non-standard atoms are removed too. 177 Data about standard atoms are taken from L{MU.atomDic} and symomym 178 atom name is defined in L{MU.atomSynonyms}. 179 180 @return: number of atoms removed 181 @rtype: int 182 """ 183 mask = [] 184 185 self.logWrite("Checking content of standard amino-acids...") 186 187 for res in self.model.resList(): 188 189 resname = self.__standard_res( res[0]['residue_name'] ).upper() 190 standard = copy.copy( MU.atomDic[ resname ] ) 191 192 ## replace known synonyms by standard atom name 193 for a in res: 194 n = a['name'] 195 if not n in standard and MU.atomSynonyms.get(n,0) in standard: 196 a['name'] = MU.atomSynonyms[n] 197 self.logWrite('%s: renaming %s to %s in %s %i' %\ 198 ( self.model.pdbCode, n, a['name'], 199 a['residue_name'], a['residue_number'] ) ) 200 201 anames = [ a['name'] for a in res ] 202 keep = 1 203 204 ## kick out all standard atoms that follow a missing one 205 rm = [] 206 for n in standard: 207 if not n in anames: 208 keep = 0 209 210 if not keep: 211 rm += [ n ] 212 213 for n in rm: 214 standard.remove( n ) 215 216 ## keep only atoms that are standard (and not kicked out above) 217 for a in res: 218 219 if a['name'] not in standard: 220 mask += [1] 221 self.logWrite('%s: removing atom %s in %s %i '%\ 222 ( self.model.pdbCode, a['name'], 223 a['residue_name'], a['residue_number'] ) ) 224 else: 225 mask += [0] 226 227 self.model.remove( mask ) 228 229 self.logWrite('Removed ' + str(N.sum(mask)) + 230 ' atoms because they were non-standard' + 231 ' or followed a missing atom.' ) 232 233 return N.sum( mask )
234 235
236 - def process( self, keep_hetatoms=0, amber=0, keep_xaa=[] ):
237 """ 238 Remove Hetatoms, waters. Replace non-standard names. 239 Remove non-standard atoms. 240 241 @param keep_hetatoms: option 242 @type keep_hetatoms: 0||1 243 @param amber: don't rename amber residue names (HIE, HID, CYX,..) 244 @type amber: 0||1 245 @param keep_xaa: names of non-standard residues to be kept 246 @type keep_xaa: [ str ] 247 248 @return: PDBModel (reference to internal) 249 @rtype: PDBModel 250 251 @raise CleanerError: if something doesn't go as expected ... 252 """ 253 try: 254 if not keep_hetatoms: 255 self.model.remove( self.model.maskHetatm() ) 256 257 self.model.remove( self.model.maskH2O() ) 258 259 self.model.remove( self.model.maskH() ) 260 261 self.remove_multi_occupancies() 262 263 self.replace_non_standard_AA( amber=amber, keep=keep_xaa ) 264 265 self.remove_non_standard_atoms() 266 267 268 except KeyboardInterrupt, why: 269 raise KeyboardInterrupt( why ) 270 except Exception, why: 271 raise CleanerError( 'Error cleaning model: %r' % why ) 272 273 return self.model
274 275 276 277 ############# 278 ## TESTING 279 ############# 280
281 -class Test:
282 """ 283 Test class 284 """ 285 286
287 - def run( self, local=0 ):
288 """ 289 run function test 290 291 @param local: transfer local variables to global and perform 292 other tasks only when run locally 293 @type local: 1|0 294 295 @return: molecular mass 296 @rtype: float 297 """ 298 from Biskit.LogFile import LogFile 299 import tempfile 300 301 f_out = tempfile.mktemp( '_test_PDBCleaner' ) 302 303 l = LogFile( f_out, mode='w') 304 305 ## Loading PDB... 306 c = PDBCleaner( t.testRoot() + '/rec/1A2P_rec_original.pdb', 307 log=l ) 308 309 m = c.process() 310 311 if local: 312 print 'PDBCleaner log file written to: %s'%f_out 313 globals().update( locals() ) 314 315 ## cleanup 316 t.tryRemove( f_out ) 317 318 return N.sum( m.masses())
319 320
321 - def expected_result( self ):
322 """ 323 Precalculated result to check for consistent performance. 324 325 @return: molecular mass 326 @rtype: float 327 """ 328 return 34029.0115499993
329 330 331 if __name__ == '__main__': 332 333 test = Test() 334 335 assert abs( test.run( local=1 ) - test.expected_result() ) < 1e-8 336