Package Biskit :: Module PDBParseFile
[hide private]
[frames] | no frames]

Source Code for Module Biskit.PDBParseFile

  1  ## 
  2  ## Biskit, a toolkit for the manipulation of macromolecular structures 
  3  ## Copyright (C) 2004-2005 Raik Gruenberg & Johan Leckner 
  4  ## 
  5  ## This program is free software; you can redistribute it and/or 
  6  ## modify it under the terms of the GNU General Public License as 
  7  ## published by the Free Software Foundation; either version 2 of the 
  8  ## License, or any later version. 
  9  ## 
 10  ## This program is distributed in the hope that it will be useful, 
 11  ## but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 13  ## General Public License for more details. 
 14  ## 
 15  ## You find a copy of the GNU General Public License in the file 
 16  ## license.txt along with this program; if not, write to the Free 
 17  ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 18  ## 
 19  ## 
 20  ## last $Author: graik $ 
 21  ## last $Date: 2006/12/16 20:53:22 $ 
 22  ## $Revision: 2.1 $ 
 23  """ 
 24  Parse a PDB file into a PDBModel. 
 25   
 26  @see L{PDBModel} 
 27  @see L{PDBParserFactory} 
 28  """ 
 29  import Scientific.IO.PDB as IO 
 30  import Numeric as N 
 31   
 32  import Biskit.tools as T 
 33  import Biskit as B 
 34  from Biskit.PDBParser import PDBParser, PDBParserError 
 35   
 36   
37 -class PDBParseFile( PDBParser ):
38 39 @staticmethod
40 - def supports( source ):
41 """ 42 The method is static and can thus be called directly with the parser 43 class rather than with an instance:: 44 45 >>> if PDBParser.supports('myfile.pdb'): 46 >>> ... 47 48 @return: True if the given source is supported by this parser 49 implementation 50 @rtype: bool 51 """ 52 return (type(source) is str or isinstance(source, B.LocalPath)) and \ 53 (source[-4:].upper() == '.PDB' or 54 source[-7:].upper() == '.PDB.GZ')
55 56 57 @staticmethod
58 - def description():
59 """ 60 The method is static and can thus be called directly with the parser 61 class rather than with an instance:: 62 63 >>> if PDBParser.description('myfile.pdb'): 64 >>> ... 65 66 @return: short free text description of the supported format 67 @rtype: str 68 """ 69 return 'PDB file'
70 71
72 - def idFromName( self, fname ):
73 """ 74 Extract PDB code from file name. 75 @param fname: file name 76 @type fname: str 77 @return: first 4 letters of filename if available 78 @rtype: str 79 """ 80 name = T.stripFilename( fname ) 81 82 if len( name ) > 3: 83 return name[:4] 84 85 return ''
86 87
88 - def update( self, model, source, skipRes=None, lookHarder=0):
89 """ 90 Update empty or missing fields of model from the source. The 91 model will be connected to the source via model.source. 92 93 @param model: existing model 94 @type model: PDBModel 95 @param source: source PDB file 96 @type source: str 97 @param skipRes: list residue names that should not be parsed 98 @type skipRes: [ str ] 99 @param lookHarder: ignored 100 @type lookHarder: 1|0 101 102 @raise PDBParserError - if something is wrong with the source file 103 """ 104 105 try: 106 ## atoms and/or coordinates need to be updated from PDB 107 if self.needsUpdate( model ): 108 109 atoms, xyz = self.__collectAll( source, skipRes ) 110 111 model.atoms = model.atoms or atoms 112 113 model.xyz = model.xyz or xyz 114 115 model.__terAtoms = model._PDBModel__pdbTer() 116 117 model.fileName = model.fileName or source 118 119 model.pdbCode = model.pdbCode or \ 120 self.idFromName( model.fileName) 121 122 except: 123 msg = self.__xplorAtomIndicesTest( source ) or ' ' 124 raise PDBParserError('Cannot read ' + str(source) + ' as PDB\n'\ 125 '\ERROR: ' + T.lastError() + msg) 126 127 model.setSource( source )
128 129
130 - def __xplorAtomIndicesTest( self, source ):
131 """ 132 In some cases the setup with parallell xplor trajectories 133 run out of atom indices when writing the pdb files to disc. 134 When this happens (usualy for the TIP3 waters in the later 135 of the 10 parallell trajectories) the atom indices get 136 replaced with ***** which will cause the parsing to fail. 137 The error message recieved is quite cryptic - this function 138 is here to give a more comprehensible message. 139 140 @param source: file that failed to be parsed 141 @type source: str 142 """ 143 import re 144 f = open( source, 'r' ) 145 lines = f.readlines() 146 f.close() 147 148 for i in range( len(lines) ): 149 if re.match( '^ATOM\s{2}\*{5}', lines[i]): 150 msg = """ 151 Line %i to %i of the file %s contains invalid atom indices! 152 153 In some cases the setup with parallell xplor trajectories run out of atom indices when writing the pdb files to disc. When this happens (usualy for the TIP3 waters in the later of the 10 parallell trajectories) the atom indices get replaced with ***** which will cause the parsing to fail. 154 155 REMEDY: run the script fixAtomIndices.py 156 """ % (i, len(lines), source) 157 158 return msg
159
160 - def __firstLetter( self, aName ):
161 """ 162 Return first letter in a string (e.g. atom mane) 163 164 @param aName: atom name 165 @type aName: str 166 167 @return: first letter (i.e. not a number) from a string. 168 @rtype: letter 169 """ 170 try: 171 i = int( aName[0] ) 172 return self.__firstLetter( aName[1:] ) 173 except: 174 return aName[0]
175 176
177 - def __collectAll( self, fname, skipRes=None ):
178 """ 179 Parse ATOM/HETATM lines from PDB. Collect coordinates plus 180 dictionaries with the other pdb records of each atom. 181 REMARK, HEADER, etc. lines are ignored. 182 183 Some changes are made to the dictionary from PDBFile.readline():: 184 - the 'position' entry (with the coordinates) is removed 185 - leading and trailing spaces are removed from 'name' .. 186 - .. but a 'name_original' entry keeps the old name with spaces 187 - a 'type' entry is added. Its value is 'ATOM' or 'HETATM' 188 - a 'after_ter' entry is added. Its value is 1, if atom is 189 preceeded by a 'TER' line, otherwise 0 190 - empty 'element' entries are filled with the first non-number 191 letter from the atom 'name' 192 193 @param fname: name of pdb file 194 @type fname: str 195 @param skipRes: list with residue names that should be skipped 196 @type skipRes: list of str 197 198 @return: tuple of list of dictionaries from PDBFile.readline() 199 and xyz array N x 3 200 @rtype: ( list, array ) 201 """ 202 items = [] 203 xyz = [] 204 205 f = IO.PDBFile( fname ) 206 207 try: 208 line, i = ('',''), 0 209 210 while line[0] <> 'END' and line[0] <> 'ENDMDL': 211 212 i += 1 213 try: 214 line = f.readLine() 215 except ValueError, what: 216 self.log.add('Warning: Error parsing line %i of %s' % 217 (i, T.stripFilename( fname )) ) 218 self.log.add('\tError: '+str(what) ) 219 continue 220 221 ## preserve position of TER records 222 newChain = line[0] == 'TER' 223 if newChain: 224 line = f.readLine() 225 226 if (line[0] in ['ATOM','HETATM'] ): 227 228 a = line[1] 229 230 if skipRes and a['residue_name'] in skipRes: 231 continue 232 233 a['name_original'] = a['name'] 234 a['name'] = a['name'].strip() 235 236 a['type'] = line[0] 237 if newChain: a['after_ter'] = 1 238 239 if a['element'] == '': 240 a['element'] = self.__firstLetter( a['name'] ) 241 242 if a['position'].is_vector: 243 lst = [ a['position'][0], 244 a['position'][1], 245 a['position'][2]] 246 xyz.append( lst ) 247 else: 248 xyz.append( a['position'] ) 249 250 del a['position'] 251 252 items += [ a ] 253 254 except: 255 raise PDBParserError("Error parsing file "+fname+": " + T.lastError()) 256 257 try: 258 f.close() 259 except: 260 pass 261 262 if len( xyz ) == 0: 263 raise PDBParserError("Error parsing file "+fname+": "+ 264 "Couldn't find any atoms.") 265 266 return items, N.array( xyz, 'f' )
267 268
269 -class Test:
270 """ 271 Test class 272 """ 273
274 - def run( self, local=0 ):
275 """ 276 run function test 277 278 @param local: transfer local variables to global and perform 279 other tasks only when run locally 280 @type local: 1|0 281 282 @return: coordinates of center of mass 283 @rtype: array 284 """ 285 286 ## loading output file from X-plor 287 if local: 288 print 'Loading pdb file ..' 289 290 p = PDBParseFile() 291 m = p.parse2new( T.testRoot()+'/rec/1A2P.pdb') 292 293 if local: 294 globals().update( locals() ) 295 296 return N.sum( m.centerOfMass() )
297 298
299 - def expected_result( self ):
300 """ 301 Precalculated result to check for consistent performance. 302 303 @return: coordinates of center of mass 304 @rtype: array 305 """ 306 return N.sum( N.array([ 29.53385022, 46.39655482, 37.75218589]))
307 308 309 if __name__ == '__main__': 310 311 test = Test() 312 313 assert abs( test.run( local=1 ) - test.expected_result() ) < 1e-8 314