Biskit.PDBParseFile

38 39 @staticmethod

40 - def supports( source ):

41 """ 42 The method is static and can thus be called directly with the parser 43 class rather than with an instance:: 44 45 >>> if PDBParser.supports('myfile.pdb'): 46 >>> ... 47 48 @return: True if the given source is supported by this parser 49 implementation 50 @rtype: bool 51 """ 52 return (type(source) is str or isinstance(source, B.LocalPath)) and \ 53 (source[-4:].upper() == '.PDB' or 54 source[-7:].upper() == '.PDB.GZ')

55 56 57 @staticmethod

58 - def description():

59 """ 60 The method is static and can thus be called directly with the parser 61 class rather than with an instance:: 62 63 >>> if PDBParser.description('myfile.pdb'): 64 >>> ... 65 66 @return: short free text description of the supported format 67 @rtype: str 68 """ 69 return 'PDB file'

70 71

72 - def idFromName( self, fname ):

73 """ 74 Extract PDB code from file name. 75 @param fname: file name 76 @type fname: str 77 @return: first 4 letters of filename if available 78 @rtype: str 79 """ 80 name = T.stripFilename( fname ) 81 82 if len( name ) > 3: 83 return name[:4] 84 85 return ''

86 87

88 - def update( self, model, source, skipRes=None, lookHarder=0):

89 """ 90 Update empty or missing fields of model from the source. The 91 model will be connected to the source via model.source. 92 93 @param model: existing model 94 @type model: PDBModel 95 @param source: source PDB file 96 @type source: str 97 @param skipRes: list residue names that should not be parsed 98 @type skipRes: [ str ] 99 @param lookHarder: ignored 100 @type lookHarder: 1|0 101 102 @raise PDBParserError - if something is wrong with the source file 103 """ 104 105 try: 106 ## atoms and/or coordinates need to be updated from PDB 107 if self.needsUpdate( model ): 108 109 atoms, xyz = self.__collectAll( source, skipRes ) 110 111 model.atoms = model.atoms or atoms 112 113 model.xyz = model.xyz or xyz 114 115 model.__terAtoms = model._PDBModel__pdbTer() 116 117 model.fileName = model.fileName or source 118 119 model.pdbCode = model.pdbCode or \ 120 self.idFromName( model.fileName) 121 122 except: 123 msg = self.__xplorAtomIndicesTest( source ) or ' ' 124 raise PDBParserError('Cannot read ' + str(source) + ' as PDB\n'\ 125 '\ERROR: ' + T.lastError() + msg) 126 127 model.setSource( source )

128 129

130 - def __xplorAtomIndicesTest( self, source ):

131 """ 132 In some cases the setup with parallell xplor trajectories 133 run out of atom indices when writing the pdb files to disc. 134 When this happens (usualy for the TIP3 waters in the later 135 of the 10 parallell trajectories) the atom indices get 136 replaced with ***** which will cause the parsing to fail. 137 The error message recieved is quite cryptic - this function 138 is here to give a more comprehensible message. 139 140 @param source: file that failed to be parsed 141 @type source: str 142 """ 143 import re 144 f = open( source, 'r' ) 145 lines = f.readlines() 146 f.close() 147 148 for i in range( len(lines) ): 149 if re.match( '^ATOM\s{2}\*{5}', lines[i]): 150 msg = """ 151 Line %i to %i of the file %s contains invalid atom indices! 152 153 In some cases the setup with parallell xplor trajectories run out of atom indices when writing the pdb files to disc. When this happens (usualy for the TIP3 waters in the later of the 10 parallell trajectories) the atom indices get replaced with ***** which will cause the parsing to fail. 154 155 REMEDY: run the script fixAtomIndices.py 156 """ % (i, len(lines), source) 157 158 return msg

159

160 - def __firstLetter( self, aName ):

161 """ 162 Return first letter in a string (e.g. atom mane) 163 164 @param aName: atom name 165 @type aName: str 166 167 @return: first letter (i.e. not a number) from a string. 168 @rtype: letter 169 """ 170 try: 171 i = int( aName[0] ) 172 return self.__firstLetter( aName[1:] ) 173 except: 174 return aName[0]

175 176

177 - def __collectAll( self, fname, skipRes=None ):

178 """ 179 Parse ATOM/HETATM lines from PDB. Collect coordinates plus 180 dictionaries with the other pdb records of each atom. 181 REMARK, HEADER, etc. lines are ignored. 182 183 Some changes are made to the dictionary from PDBFile.readline():: 184 - the 'position' entry (with the coordinates) is removed 185 - leading and trailing spaces are removed from 'name' .. 186 - .. but a 'name_original' entry keeps the old name with spaces 187 - a 'type' entry is added. Its value is 'ATOM' or 'HETATM' 188 - a 'after_ter' entry is added. Its value is 1, if atom is 189 preceeded by a 'TER' line, otherwise 0 190 - empty 'element' entries are filled with the first non-number 191 letter from the atom 'name' 192 193 @param fname: name of pdb file 194 @type fname: str 195 @param skipRes: list with residue names that should be skipped 196 @type skipRes: list of str 197 198 @return: tuple of list of dictionaries from PDBFile.readline() 199 and xyz array N x 3 200 @rtype: ( list, array ) 201 """ 202 items = [] 203 xyz = [] 204 205 f = IO.PDBFile( fname ) 206 207 try: 208 line, i = ('',''), 0 209 210 while line[0] <> 'END' and line[0] <> 'ENDMDL': 211 212 i += 1 213 try: 214 line = f.readLine() 215 except ValueError, what: 216 self.log.add('Warning: Error parsing line %i of %s' % 217 (i, T.stripFilename( fname )) ) 218 self.log.add('\tError: '+str(what) ) 219 continue 220 221 ## preserve position of TER records 222 newChain = line[0] == 'TER' 223 if newChain: 224 line = f.readLine() 225 226 if (line[0] in ['ATOM','HETATM'] ): 227 228 a = line[1] 229 230 if skipRes and a['residue_name'] in skipRes: 231 continue 232 233 a['name_original'] = a['name'] 234 a['name'] = a['name'].strip() 235 236 a['type'] = line[0] 237 if newChain: a['after_ter'] = 1 238 239 if a['element'] == '': 240 a['element'] = self.__firstLetter( a['name'] ) 241 242 if a['position'].is_vector: 243 lst = [ a['position'][0], 244 a['position'][1], 245 a['position'][2]] 246 xyz.append( lst ) 247 else: 248 xyz.append( a['position'] ) 249 250 del a['position'] 251 252 items += [ a ] 253 254 except: 255 raise PDBParserError("Error parsing file "+fname+": " + T.lastError()) 256 257 try: 258 f.close() 259 except: 260 pass 261 262 if len( xyz ) == 0: 263 raise PDBParserError("Error parsing file "+fname+": "+ 264 "Couldn't find any atoms.") 265 266 return items, N.array( xyz, 'f' )

Source Code for Module Biskit.PDBParseFile