Biskit.ChainCleaner

35 """ 36 Take with each call to next() one chain from ChainSeparator; 37 Return this chain with completed residues. 38 """ 39

40 - def __init__(self, chainSeparator):

41 """ 42 Clean up separate chains. 43 44 @param chainSeparator: ChainSeparator 45 @type chainSeparator: module 46 """ 47 self.reader = chainSeparator 48 self.pdbname = self.reader.pdbname() # take over pdb name 49 self.log = self.reader.log # take over log file 50 51 self.aminoAcidDict = {'GLY':['N','CA','C','O'], 52 'ALA':['N','CA','C','O','CB'], 53 'VAL':['N','CA','C','O','CB','CG1','CG2'], 54 'LEU':['N','CA','C','O','CB','CG','CD1','CD2'], 55 'ILE':['N','CA','C','O','CB','CG1','CG2','CD1'], 56 'MET':['N','CA','C','O','CB','CG','SD','CE'], 57 'PRO':['N','CA','C','O','CB','CG','CD'], 58 'PHE':['N','CA','C','O','CB','CG','CD1','CD2','CE1','CE2','CZ'], 59 'TRP':['N','CA','C','O','CB','CG','CD1','CD2','NE1','CE2','CE3', 60 'CZ2','CZ3','CH2'], 61 'SER':['N','CA','C','O','CB','OG'], 62 'THR':['N','CA','C','O','CB','OG1','CG2'], 63 'ASN':['N','CA','C','O','CB','CG','OD1','ND2'], 64 'GLN':['N','CA','C','O','CB','CG','CD','OE1','NE2'], 65 'TYR':['N','CA','C','O','CB','CG','CD1','CD2','CE1','CE2','CZ','OH'], 66 'CYS':['N','CA','C','O','CB','SG'], 67 'LYS':['N','CA','C','O','CB','CG','CD','CE','NZ'], 68 'ARG':['N','CA','C','O','CB','CG','CD','NE','CZ','NH1','NH2'], 69 'HIS':['N','CA','C','O','CB','CG','ND1','CD2','CE1','NE2'], 70 'ASP':['N','CA','C','O','CB','CG','OD1','OD2'], 71 'GLU':['N','CA','C','O','CB','CG','CD','OE1','OE2'], 72 'ACE':['CA', 'C', 'O'], 73 'NME':['N', 'CA'] }

74 75

76 - def _res2Terminal(self, aName_list):

77 """ 78 Tweak list of allowed atom names to one expected for a 79 C-terminal residue. 80 81 @param aName_list: list of allowed atom names 82 e.g. ['N','CA','C','O','CB'] 83 @type aName_list: list of strings 84 85 @return: e.g. ['N','CA','C','OT1','CB','OT2'] 86 @rtype: list of str 87 """ 88 result = [] # make local copy instead of taking reference 89 result += aName_list 90 try: 91 result[result.index('O')] = 'OT1' 92 result += ['OT2'] 93 except: 94 pass ## skip for CBX (Methyl amine) 95 return result

96 97

98 - def _addMissing(self, residue, atom_name):

99 """ 100 Add atom with given name to residue. 101 102 @param residue: Scientific.IO.PDB.Residue object 103 @type residue: object 104 @param atom_name: atom name 105 @type atom_name: str 106 """ 107 if len(atom_name) < 2: 108 # Atom.__init__ complaints about 1-char names 109 atom_name = atom_name + ' ' 110 # new atom, element property needed for correct alignment in file 111 residue.atom_list.append( Atom(atom_name, Vector(0,0,0),\ 112 element=atom_name[0] ) )

113 114

115 - def _completeResidues(self, chain):

116 """ 117 Look for missing or unknown atom names, add missing atoms, 118 report unknown atoms. 119 120 @param chain: Scientific.IO.PDB.PeptideChain object 121 @type chain: object 122 123 @return: Scientific.IO.PDB.PeptideChain object 124 @rtype: chain object 125 """ 126 chain.deleteHydrogens() ## delete all hydrogens 127 i = 0 128 self.log.add("Checking atoms of chain "+chain.segment_id) 129 130 for res in chain: 131 try: 132 if i < len(chain)-1: # normal residue 133 alowed = self.aminoAcidDict[res.name] 134 else: # c-terminal residue 135 alowed = self._res2Terminal(self.aminoAcidDict[res.name]) 136 137 name_list = [] 138 139 for atom in res.atom_list: # check for unknown atom names 140 # store for missing atom check 141 name_list = name_list + [atom.name] 142 if not (atom.name in alowed): 143 self.log.add('\tunknown atom: ' + atom.name + ' : '+\ 144 res.name+ str(res.number)) 145 146 for name in alowed: # check for missing atoms 147 if not (name in name_list): 148 # add missing atom with 0 xyz 149 self._addMissing(res, name) 150 self.log.add('\tadded missing atom -> '+ name+ ' : '+\ 151 res.name+ str(res.number)) 152 153 except: 154 s = "\ncompleteResidues(): Error while checking atoms.\n" 155 s = s + "residue " + str(i)+ " :"+ str(res) + "\n" 156 s = s + T.lastError() 157 T.errWriteln( 158 "Error while completing residues, check log for details.") 159 self.log.add(s) 160 161 i = i+1 162 163 return chain

164 165

166 - def _checkOccupancy(self, chain):

167 """ 168 Check and report atoms with ocupancies that is not 100% 169 Scientific.PDB.IO will only take one of the atoms even if there are 170 alternate locations indicated in the PDB-file. The code below does only 171 check for none 100% occupancies and report them to the log-file. 172 173 @param chain: Scientific.IO.PDB.PeptideChain object 174 @type chain: chain object 175 176 @return: Scientific.IO.PDB.PeptideChain object 177 @rtype: chain object 178 """ 179 self.log.add("Checking atom occupancies of chain "+chain.segment_id) 180 for res in chain: 181 for atom in res: 182 if atom.properties.get('occupancy',1.0) != 1.0: 183 self.log.add('\tOccupancy: '+\ 184 str(atom.properties['occupancy']) \ 185 + ' : ' + atom.name + ' : '+ res.name+ ' ' +\ 186 str(res.number)) 187 return chain

188 189

190 - def _find_and_change(self, chain, oldAtomName, residueNum, newAtomName):

191 """ 192 Change name of atoms in specified residue. 193 194 @param chain: Scientific.IO.PDB.PeptideChain object 195 @type chain: chain object 196 @param oldAtomName: atom name 197 @type oldAtomName: str 198 @param residueNum: residue number 199 @type residueNum: int 200 @param newAtomName: atom name 201 @type newAtomName: str 202 203 @return: Scientific.IO.PDB.PeptideChain object 204 @rtype: chain object 205 """ 206 changeRes = chain.residues[residueNum] 207 if changeRes.atoms.has_key(oldAtomName) == 1: 208 changeRes.atoms[oldAtomName].name = newAtomName 209 return chain

210 211

212 - def _correct_Cterm(self, chain):

213 """ 214 Terminal amino acid can't have atom type O and OXT - have to be 215 OT1 and OT2 216 217 @param chain: Scientific.IO.PDB.PeptideChain object 218 @type chain: chain object 219 220 @return: Scientific.IO.PDB.PeptideChain object 221 @rtype: chain object 222 """ 223 self._find_and_change(chain, 'O', -1, 'OT1') 224 self._find_and_change(chain, 'OXT', -1, 'OT2') 225 return chain

226 227

228 - def next(self):

229 """ 230 Obtain next chain from ChainSeparator; Add missing atoms. 231 232 @return: Scientific.IO.PDB.PeptideChain, completed chain OR 233 if no chain is left 234 @rtype: chain object OR None 235 """ 236 chain = self.reader.next() 237 if (chain == None): 238 ## extract all waters into separate pdb 239 self.reader.extractWaters( ) 240 241 return None 242 self.log.add("\nCleaning up chain "+chain.segment_id+": ") 243 244 # check for atoms that don't have 100% occupancy 245 chain = self._checkOccupancy(chain) 246 247 # change OXT to OT2 and O to OT2 for C terminal 248 chain = self._correct_Cterm(chain) 249 chain = self._completeResidues(chain) 250 251 return chain

260 """ Test ChainCleaner""" 261

262 - def prepare(self):

263 self.fname = T.testRoot() + '/rec/1A2P_rec_original.pdb' 264 self.outPath = T.tempDir()

265

266 - def cleanUp(self):

267 T.tryRemove( self.cleaner.log.fname )

268 269

270 - def test_ChainCleaner( self):

271 """ChainCleaner test""" 272 self.cleaner = ChainCleaner( ChainSeparator(self.fname, self.outPath) ) 273 self.cleaner.next() 274 275 if self.local: 276 print 'Wrote log: %s'%(self.cleaner.log.fname) 277 278 ## return logfile contents 279 f= open( self.cleaner.log.fname, 'r') 280 self.result = f.readlines() 281 f.close() 282 283 self.assertEqual(self.result, self.EXPECTED)

284 285 286 EXPECTED = ['\n', 'WARNING! The PDB-file contains coordinates for none water HETATMs.\n', 'If you want to keep the HETATM - prepare the file for Xplor manualy \n', '\n', 'String found in line(s): \n', '\n', 'HETATM \n', 'HETATM 881 ZN ZN A 112 33.898 42.301 55.562 0.30 41.80 ZN \n', 'HETATM 1891 ZN ZN B 112 8.869 30.194 74.011 0.35 32.13 ZN \n', 'HETATM 2908 ZN ZN C 112 39.324 15.495 56.124 1.00 12.86 ZN \n', '\n', '--------------------------------------------------------------------------------\n', '\n', 'Separate chains: \n', '------------------\n', '\n', " Chain ID's of compared chains: ['A', 'B', 'C']\n", ' Cross-Identity between chains:\n', '[[ 1. 1. 1.]\n', ' [ 0. 1. 1.]\n', ' [ 0. 0. 1.]]\n', ' Identity threshold used: 0.9\n', 'NOTE: Identity matrix will be used for removing identical chains.\n', '2 chains have been removed.\n', '\n', '0 breaks found in chain (108 residues) A: []\n', 'changed segment ID of chain A to 1A2A\n', '\n', 'Cleaning up chain 1A2A: \n', 'Checking atom occupancies of chain 1A2A\n', '\tOccupancy: 0.3 : CD2 : TYR 17\n', '\tOccupancy: 0.8 : CE2 : TYR 17\n', '\tOccupancy: 0.5 : CZ : TYR 17\n', '\tOccupancy: 0.5 : OH : TYR 17\n', '\tOccupancy: 0.5 : OD1 : ASP 22\n', '\tOccupancy: 0.5 : OD2 : ASP 22\n', '\tOccupancy: 0.5 : CB : SER 28\n', '\tOccupancy: 0.5 : OG : SER 28\n', '\tOccupancy: 0.5 : CB : GLN 31\n', '\tOccupancy: 0.5 : CG : GLN 31\n', '\tOccupancy: 0.5 : CD : GLN 31\n', '\tOccupancy: 0.5 : OE1 : GLN 31\n', '\tOccupancy: 0.5 : NE2 : GLN 31\n', '\tOccupancy: 0.5 : CB : SER 38\n', '\tOccupancy: 0.5 : OG : SER 38\n', '\tOccupancy: 0.5 : N : ARG 59\n', '\tOccupancy: 0.5 : CA : ARG 59\n', '\tOccupancy: 0.5 : C : ARG 59\n', '\tOccupancy: 0.5 : O : ARG 59\n', '\tOccupancy: 0.0 : CB : ARG 59\n', '\tOccupancy: 0.0 : CG : ARG 59\n', '\tOccupancy: 0.0 : CD : ARG 59\n', '\tOccupancy: 0.0 : NE : ARG 59\n', '\tOccupancy: 0.0 : CZ : ARG 59\n', '\tOccupancy: 0.0 : NH1 : ARG 59\n', '\tOccupancy: 0.0 : NH2 : ARG 59\n', '\tOccupancy: 0.5 : OE1 : GLU 60\n', '\tOccupancy: 0.5 : OE2 : GLU 60\n', '\tOccupancy: 0.5 : CE : LYS 66\n', '\tOccupancy: 0.5 : NZ : LYS 66\n', '\tOccupancy: 0.5 : CB : SER 85\n', '\tOccupancy: 0.5 : OG : SER 85\n', '\tOccupancy: 0.5 : CB : ILE 96\n', '\tOccupancy: 0.5 : CG1 : ILE 96\n', '\tOccupancy: 0.5 : CG2 : ILE 96\n', '\tOccupancy: 0.5 : CD1 : ILE 96\n', 'Checking atoms of chain 1A2A\n']

Source Code for Module Biskit.ChainCleaner