Biskit.molUtils

Source Code for Module Biskit.molUtils

1 ## 2 ## Biskit, a toolkit for the manipulation of macromolecular structures 3 ## Copyright (C) 2004-2005 Raik Gruenberg & Johan Leckner 4 ## 5 ## This program is free software; you can redistribute it and/or 6 ## modify it under the terms of the GNU General Public License as 7 ## published by the Free Software Foundation; either version 2 of the 8 ## License, or any later version. 9 ## 10 ## This program is distributed in the hope that it will be useful, 11 ## but WITHOUT ANY WARRANTY; without even the implied warranty of 12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 ## General Public License for more details. 14 ## 15 ## You find a copy of the GNU General Public License in the file 16 ## license.txt along with this program; if not, write to the Free 17 ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 ## 19 20 ## 21 ## last $Author: leckner $ 22 ## last $Date: 2006/09/06 13:59:10 $ 23 ## $revision: $ 24 25 """ 26 Utilities for handling structures and sequences 27 """ 28 29 import ErrorHandler 30 from Biskit import EHandler 31 import tools as t 32 33 import Numeric as N 34 import copy 35 import types 36

37 -class MolUtilError( Exception ):

38 pass

39 40 ## translate amino acid names to single letter code 41 aaDicStandard =\ 42 {'asp':'D', 'glu':'E', 'lys':'K', 'his':'H', 'arg':'R', 43 'gln':'Q', 'asn':'N', 'ser':'S', 'asx':'B', 'glx':'Z', 44 'phe':'F', 'trp':'W', 'tyr':'Y', 45 'gly':'G', 'ala':'A', 'ile':'I', 'leu':'L', 'cys':'C', 46 'met':'M', 'thr':'T', 'val':'V', 'pro':'P' } 47 48 ## same for nucleic acids (incomplete) 49 nsDicStandard = {'atp':'A', 'gtp':'G', 'ctp':'C', 'thy':'T', 'ura':'U'} 50 51 aaDic = copy.copy( aaDicStandard ) 52 53 aaDic.update( {'cyx':'C', 'hid':'H', 'hie':'H', 'hip':'H', 54 'unk':'X', 'ace':'X', 'nme':'X'} )#, 'ndp':'X' } ) 55 56 ## translate common hetero residues to pseudo single letter code 57 xxDic = {'tip3':'~', 'hoh':'~', 'wat':'~', 'cl-':'-', 'na+':'+', 58 'ndp':'X', 'nap':'X'} 59 60 ## map non-standard amino acid names to closest standard amino acid 61 ## 62 ## Data from: http://www.ccp4.ac.uk/html/lib_list.html#peptide_synonyms 63 ## More info at: http://xray.bmc.uu.se/hicup/XXX/ where XXX is the residue code 64 ## 65 ## NOT ADDED: 66 ## SAR SARCOSINE 67 ## PCA 5-pyrrolidone-2-carboxylic_acid 68 ## INI Amidinated_lysine_with_methyl_isonicotinimida 69 ## SAH S-ADENOSYL-L-HOMOCYSTEINE 70 ## SAM S-ADENOSYLMETHIONINE 71 ## LLP LYSINE-PYRIDOXAL-5*-PHOSPHATE 72 73 ## ACE acetyl 74 ## FOR Formyl 75 ## BOC TERT-BUTYLOXYCARBONYL GROUP 76 ## MLE N-METHYLLEUCINE 77 ## MVA N-METHYLVALINE 78 ## IVA Isovaleric_acid 79 ## STA STATINE 80 ## ETA ethanolamine 81 ## TFA TRIFLUOROACETYL GROUP 82 ## ANI 4-TRIFLUOROMEHYLANILINE 83 ## MPR BETA-MERCAPTOPROPIONATE 84 ## DAM N-METHYL-ALPHA-BETA-DEHYDROALANINE 85 ## ACB 2-AMINO-3-CARBONYLBUTANOIC ACID 86 ## ADD 2,6,8-TRIMETHYL-3-AMINO-9-BENZYL-9-M 87 ## CXM N-CARBOXYMETHIONINE 88 ## DIP DIPENTYLAMINE 89 ## BAL BETA-ALANINE 90 91 nonStandardAA={ 'UNK':'ALA', 'ABA':'ALA', 'B2A':'ALA', 92 'ORN':'ARG', 93 'ASX':'ASP', 94 'CSH':'CYS', 'OCS':'CYS', 'CSO':'CYS', 95 'GLX':'GLU', 'CGU':'GLU', 'ILG':'GLU', 96 'B2I':'ILE', 97 'BLE':'LEU', 98 'KCX':'LYS', 'BLY':'LYS', 99 'MSE':'MET', 100 'B1F':'PHE', 'B2F':'PHE', 101 'HYP':'PRO', '5HP':'PRO', 102 'SEP':'SER', 103 'TYS':'TYR', 104 'B2V':'B2V', 105 'HIE':'HIS', 'HID':'HIS', 'HIP':'HIS', 106 'CYX':'CYS' } 107 108 ## heavy atoms of amino acids in standard order 109 aaAtoms={'GLY':['N','CA','C','O', 'OXT' ], 110 'ALA':['N','CA','C','O', 'CB', 'OXT'], 111 'VAL':['N','CA','C','O','CB','CG1','CG2', 'OXT'], 112 'LEU':['N','CA','C','O','CB','CG','CD1','CD2', 'OXT'], 113 'ILE':['N','CA','C','O','CB','CG1','CG2','CD1', 'OXT'], 114 'MET':['N','CA','C','O','CB','CG','SD','CE', 'OXT'], 115 'PRO':['N','CA','C','O','CB','CG','CD', 'OXT'], 116 'PHE':['N','CA','C','O','CB','CG','CD1','CD2','CE1','CE2','CZ', 117 'OXT'], 118 'TRP':['N','CA','C','O','CB','CG','CD1','CD2','NE1','CE2','CE3', 119 'CZ2','CZ3','CH2', 'OXT'], 120 'SER':['N','CA','C','O','CB','OG', 'OXT'], 121 'THR':['N','CA','C','O','CB','OG1','CG2', 'OXT'], 122 'ASN':['N','CA','C','O','CB','CG','OD1','ND2', 'OXT'], 123 'GLN':['N','CA','C','O','CB','CG','CD','OE1','NE2', 'OXT'], 124 'TYR':['N','CA','C','O','CB','CG','CD1','CD2','CE1','CE2','CZ','OH', 125 'OXT'], 126 'CYS':['N','CA','C','O','CB','SG', 'OXT'], 127 'LYS':['N','CA','C','O','CB','CG','CD','CE','NZ', 'OXT'], 128 'ARG':['N','CA','C','O','CB','CG','CD','NE','CZ','NH1','NH2', 'OXT'], 129 'HIS':['N','CA','C','O','CB','CG','ND1','CD2','CE1','NE2', 'OXT'], 130 'ASP':['N','CA','C','O','CB','CG','OD1','OD2', 'OXT'], 131 'GLU':['N','CA','C','O','CB','CG','CD','OE1','OE2', 'OXT']} 132 133 ## dictionary of elements 134 elements = { 'carbon':['C', 'CD2', 'CZ2', 'CB', 'CA', 'CG', 'CE', 'CD', 'CZ', 135 'CH2', 'CE3', 'CD1', 'CE1', 'CZ3', 'CG1', 'CG2', 'CE2'], 136 'nitrogen':['NZ', 'ND2', 'NH1', 'NH2', 'ND1', 'NE1', 'NE2', 137 'NE', 'N'], 138 'oxygen':['OG', 'OE2', 'OXT', 'OD1', 'OE1', 'OH', 'OG1', 'OD2', 139 'O'], 140 'suplphur':['SG', 'SD'], 141 'clustering_BDZ':['C','CB','CD','CD1','CD2','CZ','CZ2','CZ3', 142 'ND1','ND2','NZ','OD1','OD2','SD' ], 143 'clustering_ABDZ':['C','CA','CB','CD','CD1','CD2','CZ','CZ2', 144 'CZ3', 145 'ND1','ND2','NZ','OD1','OD2','SD' ], 146 'clustering_G':['C','CG','CG1','OG','OG1','SG' ], 147 'clustering_B':['C','CB'], 148 'clustering_AG':['C','CA','CG','CG1','OG','OG1','SG' ], 149 'clustering_AGE':['C','CA','CG','CG1','OG','OG1','SG','NE','OE1', 150 'CE1','CE','CE3' ], 151 'clustering_BD':['C','CB','CD','CD1','OD1','SD' ], 152 'clustering_ABD':['C','CA','CB','CD','CD1','OD1','SD' ], 153 'clustering_AB':['C','CA','CB']} 154 155 ## number of attached H for each heavy atom in each amino acid 156 aaAtomsH={'XXX':{'N':1,'CA':1,'C':0,'O':0,'OXT':0}, 157 'GLY':{}, 158 'ALA':{'CB':3}, 159 'VAL':{'CB':0,'CG1':3,'CG2':3}, 160 'LEU':{'CB':2,'CG':0,'CD1':3,'CD2':3}, 161 'ILE':{'CB':0,'CG1':1,'CG2':3,'CD1':3}, 162 'MET':{'CB':2,'CG':2,'SD':0,'CE':3 }, 163 'PRO':{'N':0,'CB':2,'CG':2,'CD':2}, 164 'PHE':{'CB':2,'CG':0,'CD1':1,'CD2':1,'CE1':1,'CE2':1,'CZ':1}, 165 'TRP':{'CB':2,'CG':0,'CD1':1,'CD2':0,'NE1':1,'CE2':0,'CE3':1, 166 'CZ2':1,'CZ3':1,'CH2':1}, 167 'SER':{'CB':2,'OG':1}, 168 'THR':{'CB':0,'OG1':1,'CG2':3}, 169 'ASN':{'CB':2,'CG':0,'OD1':0,'ND2':2}, 170 'GLN':{'CB':2,'CG':2,'CD':0,'OE1':0,'NE2':2}, 171 'TYR':{'CB':2,'CG':0,'CD1':1,'CD2':1,'CE1':1,'CE2':1,'CZ':0,'OH':1}, 172 'CYS':{'CB':2,'SG':1}, 173 'LYS':{'CB':2,'CG':2,'CD':2,'CE':2,'NZ':3}, 174 'ARG':{'CB':2,'CG':2,'CD':2,'NE':1,'CZ':0,'NH1':2,'NH2':2}, 175 'HIS':{'CB':2,'CG':0,'ND1':1,'CD2':1,'CE1':1,'NE2':0}, 176 'ASP':{'CB':2,'CG':0,'OD1':0,'OD2':0}, 177 'GLU':{'CB':2,'CG':2,'CD':0,'OE1':0,'OE2':0} } 178 179 for aa in aaAtomsH: 180 default = copy.copy( aaAtomsH['XXX'] ) 181 default.update( aaAtomsH[aa] ) 182 aaAtomsH[aa] = default 183 184 ## work in progress...heavy atoms of nucleic acids in standard order 185 nsAtoms={ 186 'ATP':['PG', 'O1G', 'O2G', 'O3G', 'PB', 'O1B', 'O2B', 'O3B', 'PA', 'O1A', 187 'O2A', 'O3A', 'O5*', 'C5*', 'C4*', 'O4*', 'C3*', 'O3*', 'C2*', 188 'O2*', 'C1*', 'N9', 'C8', 'N7', 'C5', 'C6', 'N6', 'N1', 'C2', 'N3', 189 'C4'], 190 'GTP':['PG', 'O1G', 'O2G', 'O3G', 'PB', 'O1B', 'O2B', 'O3B', 'PA', 'O1A', 191 'O2A', 'O3A', 'O5*', 'C5*', 'C4*', 'O4*', 'C3*', 'O3*', 'C2*', 192 'O2*', 'C1*', 'N9', 'C8', 'N7', 'C5', 'C6', 'O6', 'N1', 'C2', 'N2', 193 'N3', 'C4'], 194 'MG' :['MG'], 195 'NDP':['P1', 'O1', 'O2', 'O5R', 'C5R', 'O1R', 'C4R', 'C3R', 'O3R', 'C2R', 196 'O2R', 'C1R', 'N9', 'C8', 'N7', 'C5', 'C6', 'N6', 'N1', 'C2', 197 'N3', 'C4', 'O10', 'P2', 'O11', 'O21', 'O51R', 'C51R', 'O11R', 198 'C41R', 'C31R', 'O31R', 'C21R', 'O21R', 'C11R', 'N11', 'C61', 199 'C51', 'C71', 'O71', 'N71', 'C41', 'C31', 'C21', 'P3', 'O3', 200 'O4', 'O5', 'H8', 'H9', 'H7', 'H6', 'H1', 'H5', 'H4', 'H13', 201 'H11', 'H12', 'H10', 'H18', 'H19', 'H17', 'H16', 'H3', 'H15', 202 'H2', 'H14', 'H23', 'H24', 'H25', 'H22', 'H26', 'H21', 'H20'] } 203 204 nsAtoms['NAP'] = nsAtoms['NDP'].remove('H26') 205 206 ## map AA and NS and some other residue names to single letter code 207 resDic = copy.copy( aaDic ) 208 resDic.update( nsDicStandard ) 209 210 ## map AA and NS residue names to list of allowed heavy atoms 211 atomDic = copy.copy( aaAtoms ) 212 atomDic.update( nsAtoms ) 213 214 ## some common synonyms of atom names 215 atomSynonyms = { "O'":'O', 'OT1':'O', "O''":'OXT', 'OT2':'OXT', 216 'O1':'O', 'O2':'OXT', 217 'CD':'CD1'} 218 219 hydrogenSynonyms = { 'H':'HN', '1HE2':'HE21', '2HE2':'HE22', 220 '1HH1':'HH11', '2HH1':'HH12', '1HH2':'HH21', 221 '2HH2':'HH22', '1HD2':'HD21', '2HD2':'HD22' } 222 223 ################### 224 ## Hydrogen bond 225 ## 226 hbonds={ 'donors': {'GLY':['H','H1','H2','H3'], 227 'ALA':['H','H1','H2','H3'], 228 'VAL':['H','H1','H2','H3'], 229 'LEU':['H','H1','H2','H3'], 230 'ILE':['H','H1','H2','H3'], 231 'MET':['H','H1','H2','H3'], 232 'PRO':['H','H1','H2','H3'], 233 'PHE':['H','H1','H2','H3'], 234 'TRP':['H','H1','H2','H3','HE1'], 235 'SER':['H','H1','H2','H3','HG'], 236 'THR':['H','H1','H2','H3','HG1'], 237 'ASN':['H','H1','H2','H3','1HD2','2HD2'], 238 'GLN':['H','H1','H2','H3','1HE2','2HE2'], 239 'TYR':['H','H1','H2','H3','HH'], 240 'CYS':['H','H1','H2','H3','HG'], 241 'LYS':['H','H1','H2','H3','HZ1','HZ2','HZ3'], 242 'ARG':['H','H1','H2','H3','HE','1HH1','2HH1', 243 '1HH2','2HH2'], 244 'HIS':['H','H1','H2','H3','HD1','HE2'], 245 'ASP':['H','H1','H2','H3'], 246 'GLU':['H','H1','H2','H3']}, 247 'acceptors': {'GLY':['O','OXT' ], 248 'ALA':['O','OXT'], 249 'VAL':['O','OXT'], 250 'LEU':['O','OXT'], 251 'ILE':['O','OXT'], 252 'MET':['O','SD','OXT'], 253 'PRO':['O','OXT'], 254 'PHE':['O','OXT'], 255 'TRP':['O','OXT'], 256 'SER':['O','OG', 'OXT'], 257 'THR':['O','OG1','CG2', 'OXT'], 258 'ASN':['O','OD1','OXT'], 259 'GLN':['O','OE1','OXT'], 260 'TYR':['O','OH','OXT'], 261 'CYS':['O','SG','OXT'], 262 'LYS':['O','OXT'], 263 'ARG':['O','OXT'], 264 'HIS':['O','OXT'], 265 'ASP':['O','OD1','OD2', 'OXT'], 266 'GLU':['O','OE1','OE2', 'OXT']} } 267 268 269 ############################## 270 ## Polar hydrogen connectivity -- PARAM19 271 272 polarH = {'GLY':{'H':'N','H1':'N','H2':'N','H3':'N'}, 273 'ALA':{'H':'N','H1':'N','H2':'N','H3':'N'}, 274 'VAL':{'H':'N','H1':'N','H2':'N','H3':'N'}, 275 'LEU':{'H':'N','H1':'N','H2':'N','H3':'N'}, 276 'ILE':{'H':'N','H1':'N','H2':'N','H3':'N'}, 277 'MET':{'H':'N','H1':'N','H2':'N','H3':'N'}, 278 'PRO':{'H':'N','H1':'N','H2':'N','H3':'N'}, 279 'PHE':{'H':'N','H1':'N','H2':'N','H3':'N'}, 280 'TRP':{'H':'N','H1':'N','H2':'N','H3':'N', 281 'HE1':'NE1'}, 282 'SER':{'H':'N','H1':'N','H2':'N','H3':'N', 283 'HG':'OG'}, 284 'THR':{'H':'N','H1':'N','H2':'N','H3':'N', 285 'HG1':'OG1'}, 286 'ASN':{'H':'N','H1':'N','H2':'N','H3':'N', 287 'HD21':'ND2','HD22':'ND2'}, 288 'GLN':{'H':'N','H1':'N','H2':'N','H3':'N', 289 'HE21':'NE2','HE22':'NE2'}, 290 'TYR':{'H':'N','H1':'N','H2':'N','H3':'N', 291 'HH':'OH'}, 292 'CYS':{'H':'N','H1':'N','H2':'N','H3':'N'}, 293 'LYS':{'H':'N','H1':'N','H2':'N','H3':'N', 294 'HZ1':'NZ','HZ2':'NZ','HZ3':'NZ'}, 295 'ARG':{'H':'N','H1':'N','H2':'N','H3':'N', 296 'HE':'NE', 'HH11':'NH1','HH12':'NH1', 297 'HH21':'NH2','HH22':'NH2'}, 298 'HIS':{'H':'N','H1':'N','H2':'N','H3':'N', 299 'HD1':'ND1','HE2':'NE2'}, 300 'ASP':{'H':'N','H1':'N','H2':'N','H3':'N'}, 301 'GLU':{'H':'N','H1':'N','H2':'N','H3':'N'}} 302 303 304 ## Scoring matrix for protein-protein interaction surfaces 305 ## (Volume normalized values, Table IV in reference) 306 ## 307 ## The Matrix is based on data from a db of 621 noneredundant protein-protein 308 ## complexes, a CB-CB (CA for Gly) of 6 A was used 309 ## 310 ## Reference: 311 ## "Residue Frequencies and Pair Preferences at Protein-Protein Interfaces" 312 ## F. Glaser, D. M. Steinberg, I. A. Vakser and N. Ben-Tal, 313 ## Proteins 43:89-102 (2001) 314 ## 315 ## Warning. This is just half of the matrix (above diagonal), the residue names 316 ## in the pairs is sorted in the same order as in Complex.resPairCounts() 317 318 pairScore = {'WW': 5.85, 'WY': 6.19, 'RT': 3.77, 'RV': 4.18, 'RW': 8.57, 'RR': 2.87, 319 'RS': 2.82, 'RY': 5.28, 'GW': 1.42, 'GV':-0.41, 'GT': 0.21, 'GS':-1.53, 320 'GR': 1.59, 'GQ': 1.70, 'GP':-0.51, 'GY': 1.25, 'GG':-4.40, 'GN':-0.54, 321 'GM': 0.91, 'GL':-0.37, 'GK': 1.33, 'GI': 0.77, 'GH': 1.08, 'SS':-0.09, 322 'IY': 5.61, 'HY': 6.05, 'HR': 4.90, 'HS': 0.80, 'HP': 2.89, 'HQ': 4.00, 323 'HV': 3.21, 'HW': 6.46, 'HT': 2.71, 'KN': 3.17, 'HK': 2.72, 'HH': 5.37, 324 'HI': 3.38, 'HN': 2.38, 'HL': 4.88, 'HM': 4.65, 'ST': 1.91, 'PR': 3.99, 325 'PS': 1.33, 'PP': 0.60, 'PQ': 3.50, 'PV': 2.90, 'PW': 7.87, 'PT': 2.65, 326 'PY': 4.22, 'IQ': 3.60, 'IP': 3.27, 'AK': 2.13, 'EM': 3.88, 'EL': 3.12, 327 'EN': 2.68, 'EI': 3.20, 'EH': 2.30, 'EK': 5.32, 'EE': 1.65, 'EG':-0.89, 328 'EF': 2.87, 'IT': 3.05, 'EY': 4.54, 'ET': 2.88, 'EW': 1.20, 'IV': 4.91, 329 'EQ': 1.95, 'EP': 3.17, 'ES': 2.60, 'ER': 5.75, 'II': 3.89, 'MM': 6.02, 330 'MN': 2.30, 'AS': 0.39, 'MT': 2.09, 'MW': 4.89, 'MV': 4.37, 'MQ': 4.18, 331 'MP': 3.38, 'MS': 1.61, 'MR': 3.62, 'MY': 4.81, 'IL': 4.59, 'FP': 4.25, 332 'FQ': 4.25, 'FR': 4.49, 'FS': 1.75, 'FT': 3.34, 'VV': 3.74, 'FV': 4.69, 333 'FW': 5.83, 'FY': 5.83, 'AV': 2.57, 'FF': 5.34, 'FG': 0.14, 'FH': 3.47, 334 'FI': 5.33, 'FK': 3.57, 'FL': 4.86, 'FM': 5.28, 'FN': 3.11, 'EV': 3.22, 335 'NN': 2.92, 'NY': 3.66, 'NP': 3.09, 'NQ': 3.45, 'NR': 3.85, 'NS': 1.77, 336 'NT': 2.52, 'NV': 1.36, 'NW': 3.54, 'CK': 2.05, 'CI': 1.76, 'CH': 4.12, 337 'CN':-0.42, 'CM': 1.84, 'CL': 2.93, 'CC': 7.65, 'CG':-0.25, 'CF': 3.68, 338 'CE': 2.51, 'CD': 0.24, 'CY': 2.47, 'CS': 2.48, 'CR': 2.81, 'CQ': 1.33, 339 'CP': 2.47, 'CW': 2.14, 'CV': 2.89, 'CT': 1.03, 'SY': 2.30, 'VW': 2.92, 340 'KK': 3.24, 'SW': 2.87, 'SV': 1.42, 'KM': 3.93, 'KL': 3.15, 'KS': 2.74, 341 'KR': 2.29, 'KQ': 3.50, 'KP': 3.75, 'KW': 5.76, 'KV': 4.45, 'KT': 3.67, 342 'KY': 5.26, 'DN': 3.85, 'DL': 1.40, 'DM': 0.36, 'DK': 3.90, 'DH': 5.20, 343 'DI': 2.30, 'DF': 0.99, 'DG':-0.08, 'DD': 0.13, 'DE': 0.08, 'YY': 5.93, 344 'DY': 1.76, 'DV': 1.93, 'DW': 2.62, 'DT': 3.88, 'DR': 4.94, 'DS': 2.94, 345 'DP': 1.46, 'DQ': 3.26, 'TY': 3.14, 'LN': 2.31, 'TW': 5.12, 'LL': 4.03, 346 'LM': 5.32, 'LV': 4.20, 'LW': 5.77, 'LT': 2.07, 'LR': 4.99, 'LS': 1.41, 347 'LP': 2.50, 'LQ': 3.46, 'LY': 4.19, 'AA':-0.52, 'AC': 1.46, 'AE': 1.71, 348 'AD': 1.13, 'AG':-1.77, 'AF': 3.00, 'AI': 2.84, 'AH': 2.59, 'IS': 1.00, 349 'IR': 3.80, 'AM': 2.30, 'AL': 2.77, 'IW': 6.24, 'AN': 1.69, 'AQ': 1.72, 350 'AP': 1.22, 'IK': 3.23, 'AR': 1.90, 'IM': 5.25, 'AT': 1.21, 'AW': 3.37, 351 'IN': 1.59, 'AY': 2.47, 'VY': 3.95, 'QQ': 2.83, 'QS': 2.00, 'QR': 4.50, 352 'QT': 1.82, 'QW': 1.37, 'QV': 3.22, 'QY': 2.05, 'TV': 2.83, 'TT': 1.27} 353 354 ## various constants 355 boltzmann = 1.38066e-23 ## [J/K] 356 NA = 6.02214199e+23 ## Avogadro constant [1/mol] 357 planck2 = 1.0545727e-34 ## [J s], h/2Pi 358 euler = N.e 359 mu = 1.66056e-27 ## atomic mass unit in [kg] 360 angstroem = 1e-10 ## [m] 361 calorie = 4.184 ## [J] 362 363 atomMasses = { 'H':1.00797, 'C':12.01115, 'N':14.0067, 364 'S':32.064, 'O':15.9994, 'P':30.9738 } 365

366 -def allAACodes():

367 """ 368 @return: list of all single AA codes, including B, Z, X 369 @rtype: [str] 370 """ 371 result = [] 372 for aa in aaDic.values(): 373 if not aa in result: 374 result += aa 375 376 return result

377 378

379 -def allAA():

380 """ 381 @return: list of all 20 'exact' single AA codes. 382 @rtype: [str] 383 """ 384 result = allAACodes() 385 386 for a in ['Z','B','X']: 387 result.remove( a ) 388 389 return result

390 391

392 -def elementType( eLetter ):

393 """ 394 Classify an atom as polar or unpolar:: 395 atomType( eLetter ) -> list of types this element belongs to 396 397 @param eLetter: atom name 398 @type eLetter: str 399 400 @return: return 'p' for polar, 'u' for unpolar and None if not 401 in classified 402 @rtype: p|u OR None 403 """ 404 types = {'p' : ['N','O','H','Cl'], ## polar 405 'u' : ['C','S'] } ## unpolar 406 407 for key, values in types.items(): 408 if eLetter in values: 409 return key 410 return None

411 412

413 -def resType( resCode ):

414 """ 415 Classify residues as aromatic (a), charged (c) or polar (p). 416 417 @param resCode: amino acid code 418 @type resCode: str 419 420 @return: list of types this residue belongs to... 421 @rtype: a|c|p OR None 422 """ 423 types = {'a' : ['F','Y','W','H'], ## aromatic 424 'c' : ['E','D','L','R','H'], ## charged 425 'p' : ['Q','N','S'] } ## polar 426 427 result = [] 428 429 for t in types.keys(): 430 if resCode in types[t]: 431 result += [t] 432 433 if result == []: 434 result = ['u'] 435 436 return result

437 438

439 -def singleAA(seq, xtable=xxDic ):

440 """ 441 convert list with 3-letter AA code to list with 1-letter code 442 443 @param seq: amino acid sequence in 3-letter code 444 @type seq: [str] 445 @param xtable: dictionary with additional str:single_char mapping 446 @type xtable: dict 447 448 @return: list with 1-letter code; C{ ['A','C','L','A'...]} 449 @rtype: [str] 450 """ 451 result = [] # will hold 1-letter list 452 table = aaDic 453 if xtable: 454 table = copy.copy( resDic ) 455 table.update( xtable ) 456 457 for aa in seq: 458 try: 459 aa = aa.lower() 460 result += [ table[aa] ] 461 except: 462 EHandler.warning("singleAA(): unknown Residue: " + aa) 463 result = result + ['X'] 464 return result

465 466

467 -def single2longAA( seq ):

468 """ 469 Convert string of 1-letter AA code into list of 3-letter AA codes. 470 471 @param seq: amino acid sequence in 1-letter code 472 @type seq: str 473 474 @return: list with the amino acids in 3-letter code 475 @rtype: [str] 476 """ 477 ## invert AA dict 478 invTab = {} 479 480 for key in aaDicStandard: 481 invTab[ aaDicStandard[key] ] = key 482 483 result = [] 484 for aa in seq: 485 try: 486 aa = aa.upper() 487 result += [ invTab[aa].upper() ] 488 except: 489 EHandler.warning("unknown residue: " + str(aa)) 490 result += ['Xaa'] 491 492 return result

493 494

495 -def positionByDescription( model, descr, report=0 ):

496 """ 497 @note: Obsolete: use PDBModel.filter instead. 498 499 Find the position of an atom(s) in the atom dictioanry by 500 description. If the description matches more than one atom 501 a list of matching positions is returned. 502 503 Example atom dictioary (valid keys):: 504 {'name': 'OG1', 'residue_number': 20, 'insertion_code': '', 505 'alternate': '', 'name_original': ' OG1', 506 'temperature_factor': 23.640000000000001, 'occupancy': 1.0, 507 'element': 'O', 'segment_id': 'RECA', 'charge': '', 508 'residue_name': 'THR', 'serial_number': 201, 'type': 'ATOM', 509 'chain_id': ''} 510 511 @param descr: dictionary with keys from the atom dictionary 512 @type descr: dict 513 @param report: write a message to stdOut 514 @type report: 1|0 515 516 @return: int or list of matching positions 517 @rtype: 518 """ 519 posLst = [] 520 521 ## check that all keys are valid 522 for k in descr.keys(): 523 if not k in model.atoms[0].keys(): 524 EHandler.warning("Key:%s not in atom dictionary"%str(k) ) 525 526 ## find all occurances of all keys 527 for k in descr.keys(): 528 lst = [ a[k] for a in model.atoms ] 529 530 ## make sure that its possible ot itterate over value 531 if type( descr[k] ) != types.ListType: 532 values = [ descr[k] ] 533 else: 534 values = descr[k] 535 536 ## itterate over valuse of each keay, collect positions 537 i = 0 538 for v in values: 539 ## how many matches 540 try: 541 n = lst.count( v ) 542 except: 543 EHandler.warning("Key %s doesn't contain value %s"%\ 544 (str(k), str(v) ) ) 545 ## collect positions 546 for x in range(n): 547 posLst += [ lst.index( v ) + i ] 548 lst.remove( v ) 549 i += 1 550 551 ## identify postions common to all keys 552 nr_identifiers = len( descr.keys() ) 553 positions = [] 554 for v in posLst: 555 if posLst.count(v) == nr_identifiers: 556 positions += [v] 557 posLst.remove( v ) 558 559 if report: 560 print 'pos serial_nr segID chainID residue atom' 561 for p in positions: 562 atomDic = model.atoms[p] 563 print '%4i %6i %4s %1s %4i %3s %4s '\ 564 %( p, 565 atomDic['serial_number'], 566 atomDic['segment_id'], 567 atomDic['chain_id'], 568 atomDic['residue_number'], 569 atomDic['residue_name'], 570 atomDic['name'] ) 571 572 if len(positions) ==1: 573 return positions[0] 574 return positions

575 576

577 -def cmpAtoms( a1, a2 ):

578 """ 579 Comparison function for bringing atoms into standard order 580 within residues as defined by L{atomDic}. 581 582 @param a1: atom dictionary 583 @type a1: PDBModel.atoms 584 @param a2: atom dictionary 585 @type a2: PDBModel.atoms 586 587 @return: int or list of matching positions 588 @rtype: [-1|0|1] 589 """ 590 ## get standard order within residues 591 target = atomDic[ a1['residue_name'] ] 592 593 i1 = len( target ) 594 if a1['name'] in target: 595 i1 = target.index( a1['name'] ) 596 597 i2 = len( target ) 598 if a2['name'] in target: 599 i2 = target.index( a2['name'] ) 600 601 return cmp(i1, i2)

602 603

604 -def sortAtomsOfModel( model ):

605 """ 606 Sort atoms within residues into the standard order defined in L{atomDic}. 607 608 @param model: model to sort 609 @type model: PDBModel 610 611 @return: model with sorted atoms 612 @rtype: PDBModel 613 """ 614 ## make a copy 615 model = model.take( range(model.lenAtoms()), deepcopy=1 ) 616 617 ## sort atoms 618 model = model.sort( model.argsort( cmpAtoms ) ) 619 620 return model

621 622 623 624 ############# 625 ## TESTING 626 ############# 627

628 -class Test:

629 """ 630 Test class 631 """ 632

633 - def run( self, local=0 ):

634 """ 635 run function test 636 637 @param local: transfer local variables to global and perform 638 other tasks only when run locally 639 @type local: 1|0 640 641 @return: something 642 @rtype: float 643 """ 644 from Biskit import PDBModel 645 646 ## load a structure 647 m = PDBModel( t.testRoot()+'/lig/1A19.pdb' ) 648 model_1 = m.compress( m.maskProtein() ) 649 650 ## now sort in standard order 651 model_2 = sortAtomsOfModel( model_1) 652 653 ## compare the atom order 654 cmp = [] 655 for a in range( model_1.lenAtoms() ): 656 cmp += [ cmpAtoms( model_1.atoms[a], model_2.atoms[a] )] 657 658 ## get the primaty sequence as a string 659 seq = model_1.sequence() 660 661 ## convert it to a list of three letter code 662 seq=single2longAA(seq) 663 664 ## convert it to a list in one letter code 665 seq=singleAA(seq) 666 667 if local: 668 globals().update( locals() ) 669 670 return N.sum(cmp)

671 672

673 - def expected_result( self ):

674 """ 675 Precalculated result to check for consistent performance. 676 677 @return: something 678 @rtype: float 679 """ 680 return 159

681 682 683 if __name__ == '__main__': 684 685 test = Test() 686 687 assert test.run( local=1 ) == test.expected_result() 688