Package Biskit :: Module msms
[hide private]
[frames] | no frames]

Source Code for Module Biskit.msms

  1  ## Class msms 
  2  ## 
  3  ## Biskit, a toolkit for the manipulation of macromolecular structures 
  4  ## Copyright (C) 2004-2005 Raik Gruenberg & Johan Leckner 
  5  ## 
  6  ## This program is free software; you can redistribute it and/or 
  7  ## modify it under the terms of the GNU General Public License as 
  8  ## published by the Free Software Foundation; either version 2 of the 
  9  ## License, or any later version. 
 10  ## 
 11  ## This program is distributed in the hope that it will be useful, 
 12  ## but WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 14  ## General Public License for more details. 
 15  ## 
 16  ## You find a copy of the GNU General Public License in the file 
 17  ## license.txt along with this program; if not, write to the Free 
 18  ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 19  ## 
 20  ## 
 21  ## 
 22  ## $Revision: 2.6 $ 
 23  ## last $Author: leckner $ 
 24  ## last $Date: 2006/09/13 10:28:14 $ 
 25  """ 
 26  Use MSMS to calculate surface info. 
 27   
 28  @note: This module is not any more the prefered module for 
 29         surface area calculations. SurfaceRacer is the default 
 30         module for this kind of calculations. 
 31  """ 
 32   
 33  import Numeric as N 
 34  import Biskit.tools as T 
 35  import string 
 36  import os 
 37  import tempfile 
 38   
 39  ## import Mslib 
 40   
 41  from Biskit import Executor, TemplateError 
 42  ## import Biskit.settings as S 
 43   
 44   
45 -class pdb2xyzrnError( Exception ):
46 pass
47
48 -class pdb2xyzrn( Executor ):
49 """ 50 MSMS - pdb2xyzrn 51 ================ 52 53 Run the awk script C{ pdb_to_xyzrn } to extract names and radii. 54 This is nessesary to run msms. The radii and atoms depend on where 55 in the sequence an atom resides. The radii used is determined 56 by the information in the C{ atomtypenumbers } file provided 57 with the MSMS application. 58 59 Result 60 ------ 61 :: 62 r - atom radii array 63 n - atom name list 64 """ 65
66 - def __init__( self, model, **kw ):
67 """ 68 @param model: reference 69 @type model: PDBModel 70 71 @param kw: additional key=value parameters for Executor: 72 @type kw: key=value pairs 73 :: 74 debug - 0|1, keep all temporary files (default: 0) 75 verbose - 0|1, print progress messages to log (log != STDOUT) 76 node - str, host for calculation (None->local) NOT TESTED 77 (default: None) 78 nice - int, nice level (default: 0) 79 log - Biskit.LogFile, program log (None->STOUT) (default: None) 80 81 """ 82 self.f_pdb = tempfile.mktemp('_pdb_to_xyzrn.pdb') 83 84 ## gpdb_to_xyzrn have to be run i the local directory where 85 ## it resides. Otherwise it will not find the data file 86 ## called "atmtypenumbers". 87 if os.path.exists( T.projectRoot() +'/external/msms/'): 88 dir = T.projectRoot() +'/external/msms/' 89 else: 90 raise pdb2xyzrnError_Error, 'Cannot find msms directory. This should reside in ~biskit/external/msms' 91 92 Executor.__init__( self, 'pdb2xyzrn', f_in=self.f_pdb, 93 cwd=dir, **kw ) 94 95 self.model = model
96 97
98 - def prepare( self ):
99 """ 100 Overrides Executor method. 101 """ 102 self.model.writePdb( self.f_pdb )
103 104
105 - def cleanup( self ):
106 Executor.cleanup( self ) 107 108 if not self.debug: 109 T.tryRemove( self.f_pdb )
110 111
112 - def parse_xyzrn( self, output ):
113 """ 114 Extract xyz and r from output. 115 116 @param output: STDOUT from pdb_to_xyzrn 117 @type output: [str] 118 119 @return: r, n - array with atom radii, list with atom names 120 @rtype: array, array 121 """ 122 lines = output.split('\n') 123 124 ## convert into lists that mslib will like 125 xyzr = [] 126 r = [] 127 n = [] 128 129 for l in lines: 130 ## don't let the last empty line mess things up 131 if len(l)!=0: 132 l = string.split( l ) 133 xyzr += [ float(l[0]), float(l[1]), \ 134 float(l[2]), float(l[3]) ] 135 n += [ l[5] ] 136 137 xyzr = N.reshape( xyzr, ( len(xyzr)/4, 4 ) ) 138 139 r = xyzr[:,3] 140 141 ## xyz data is not passed on as it is not needed 142 xyz = xyzr[:,:3] 143 144 return r, n
145 146
147 - def isFailed( self ):
148 """ 149 Overrides Executor method 150 """ 151 return self.error is None
152 153
154 - def finish( self ):
155 """ 156 Overrides Executor method 157 """ 158 Executor.finish( self ) 159 self.result = self.parse_xyzrn( self.output )
160 161 162 ## class MSLIB_Error( Exception ): 163 ## pass 164 165 166 ## class MSLIB: 167 ## """ 168 ## MSLIB 169 ## ===== 170 ## Calculate SAS (Solvent Accessible surface) and 171 ## SES (Solvent Excluded Surface) using MSLIB. 172 173 ## @note: This class has been retired. If you need to run MSMS 174 ## use the L[Biskit.MSMS} class insted. The current default class for 175 ## calculating MS and AS is otherwise L{Biskit.SurfaceRacer} 176 177 ## @bug: MSLIB fails when calling Mslib.MSMS() 178 ## The error seems to go back to Scientificpython: 179 180 ## ERROR:: 181 ## File "/usr/tmp/python-8778q1e", line 160, in calc 182 ## surf = Mslib.MSMS( coords = xyz, radii = self.r ) 183 ## File "Mslib/__init__.py", line 120, in __init__ 184 ## msms.MOLSRF.__init__(self, name=name, coord=c, nat=nat, maxat=maxnat, names=atnames) 185 ## File "Mslib/msms.py", line 1832, in __init__ 186 ## self.this = apply(msmsc.new_MOLSRF,_args,_kwargs) 187 ## ValueError: Failed to make a contiguous array of type 6 188 ## """ 189 190 ## def __init__( self , model ): 191 192 ## self.model = model 193 194 ## ## get radiia and name array 195 ## p2x = pdb2xyzrn(self.model) 196 ## self.r, self.n = p2x.run() 197 198 199 ## def calc( self, descr='no_name', probeRadius=1.5 ): 200 ## """ 201 ## Run msms analytical surface calculation, i.e.:: 202 ## msms -if xyzr.coord -af atoms.area -surface ases 203 204 ## @param descr: descriptive name 205 ## @type descr: str 206 ## @param probeRadius: probe radius 207 ## @type probeRadius: float 208 209 ## @return: out, probe radii used, total SAS and SES 210 ## sesList, array of lenght atoms with SES 211 ## (Solvent Excluded Surface) 212 ## sasList, array of lenght atoms with SAS 213 ## (Solvent Accessible surface) 214 ## atmList, list of atom names 215 ## @rtype: dict, array, array, [str] 216 ## """ 217 ## xyz = self.model.xyz 218 219 ## print N.shape(xyz), N.shape(self.r) 220 ## print xyz[0], self.r[0] 221 ## print xyz[-1], self.r[-1] 222 ## ## run msms 223 ## surf = Mslib.MSMS( coords = xyz, 224 ## radii = self.r, 225 ## atnames = self.n, 226 ## name = descr, 227 ## maxnat = len(xyz)+100 ) 228 229 ## surf = Mslib.MSMS( coords = xyz, radii = self.r ) 230 231 ## surf.compute( probe_radius = probeRadius, density=1.0 ) 232 ## area = surf.compute_ses_area() 233 234 ## out = {} 235 ## if area == 0: 236 ## surf.write_ses_area( surfName ) 237 ## out['ses'] = surf.sesr.fst.a_ses_area 238 ## out['sas'] = surf.sesr.fst.a_sas_area 239 ## else: 240 ## raise MSLIB_Error('compute_ses_area not successfull') 241 242 ## ## parse atom specific SAS (Solvent Accessible surface) 243 ## ## and SES (Solvent Excluded Surface) 244 ## surfName = surfName + '_0' 245 ## outList = open( surfName , 'r').readlines() 246 247 ## sasList = [] 248 ## sesList = [] 249 ## atmList = [] 250 251 ## for i in range( 1, len(outList) ): 252 ## line = string.split( outList[i] ) 253 ## sesList += [ float( line[1] ) ] 254 ## sasList += [ float( line[2] ) ] 255 ## atmList += [ line[3] ] 256 257 ## ## polar and nonepolar surfaces 258 ## N_mask = N.transpose(atmList)[0] == 'N' 259 ## O_mask = N.transpose(atmList)[0] == 'O' 260 ## C_mask = N.transpose(atmList)[0] == 'C' 261 ## out['ses_polar'] = N.sum( sesList * O_mask ) + N.sum( sesList * N_mask ) 262 ## out['ses_non-polar'] = N.sum( sesList * C_mask ) 263 ## out['sas_polar'] = N.sum( sasList * O_mask ) + N.sum( sasList * N_mask ) 264 ## out['sas_non-polar'] = N.sum( sasList * C_mask ) 265 266 ## ## cleanup 267 ## try: 268 ## os.remove( surfName ) 269 ## except: 270 ## pass 271 272 ## return out, sesList, sasList, atmList 273 274
275 -class MSMS_Error( Exception ):
276 pass
277 278
279 -class MSMS( Executor ):
280 """ 281 MSMS 282 ==== 283 284 Calculate SAS (Solvent Accessible surface) and 285 SES (Solvent Excluded Surface) using the msms applicaton. 286 287 Run msms analytical surface calculation i.e.:: 288 msms -if xyzr.coord -af atoms.area -surface ases 289 290 Result 291 ------ 292 :: 293 out - dictionary - probe radii used, total SAS and SES 294 sesList - array of lenght atoms with SES (Solvent Excluded Surface) 295 sasList - array of lenght atoms with SAS (Solvent Accessible surface) 296 297 @note: The current default class for calculating MS and AS 298 is L{Biskit.SurfaceRacer}. 299 """ 300 301
302 - def __init__( self, model, verbose=1, debug=1, **kw ):
303 """ 304 @param model: PDBModel 305 @type model: 306 307 @param kw: additional key=value parameters for Executor: 308 @type kw: key=value pairs 309 :: 310 debug - 0|1, keep all temporary files (default: 0) 311 verbose - 0|1, print progress messages to log (log != STDOUT) 312 node - str, host for calculation (None->local) NOT TESTED 313 (default: None) 314 nice - int, nice level (default: 0) 315 log - Biskit.LogFile, program log (None->STOUT) (default: None) 316 """ 317 self.f_xyzrn = tempfile.mktemp('_msms.xyzrn') 318 319 ## output file from MSMS, will add .area exiension to file 320 self.f_surf = tempfile.mktemp( ) 321 322 arg =' -surface ases -if %s -af %s'%( self.f_xyzrn, self.f_surf ) 323 324 Executor.__init__( self, 'msms', args=arg, **kw ) 325 326 self.model = model.clone( deepcopy=1 )
327 328
329 - def prepare( self ):
330 """ 331 Write a xyzrn coordinate file to disc. 332 Overrides Executor method. 333 """ 334 ## get radiia and name array 335 p2x = pdb2xyzrn(self.model ) 336 r, n = p2x.run() 337 338 xyz = self.model.xyz 339 xyzr = N.concatenate( ( xyz, N.transpose([r]) ) ,axis=1 ) 340 341 f = open( self.f_xyzrn, 'w' ) 342 i = 0 343 for line in xyzr: 344 f.write( str(line)[2:-1] + ' 1 ' + n[i] + '\n') 345 i += 1 346 f.close()
347 348
349 - def cleanup( self ):
350 """ 351 Remove temp files. 352 """ 353 Executor.cleanup( self ) 354 355 if not self.debug: 356 T.tryRemove( self.f_xyzrn ) 357 T.tryRemove( self.f_surf + '.area' )
358 359
360 - def parse_msms( self ):
361 """ 362 Parse the result file from a msms calculation. 363 364 @return: probe radii used, total SAS and SES, 365 array of lenght atoms with SES (Solvent Excluded Surface), 366 array of lenght atoms with SAS (Solvent Accessible surface), 367 and a list of atom names 368 @rtype: dict, array, array, [str] 369 """ 370 ## get ASA, SES and probe radiii from stdout 371 out = string.split( open( self.f_out ).readlines()[-3] ) 372 out = { 'probe':float(out[1]), 373 'ses':float(out[-2]), 374 'sas':float(out[-1]) } 375 376 ## MSMS adds the extension .area to output file 377 ## over writing any existing extension 378 surfName = self.f_surf + '.area' 379 380 ## parse atom specific SAS (Solvent Accessible surface) 381 ## and SES (Solvent Excluded Surface) 382 outList = open( surfName , 'r').readlines() 383 384 sasList = [] 385 sesList = [] 386 atmList = [] 387 388 for i in range( 1, len(outList) ): 389 line = string.split( outList[i] ) 390 391 if len(line)== 4: 392 sesList += [ float( line[1] ) ] 393 sasList += [ float( line[2] ) ] 394 atmList += [ line[3] ] 395 else: 396 raise MSMS_Error, 'Wrong format in file %s'%surfName 397 398 return out, sesList, sasList, atmList
399 400
401 - def isFailed( self ):
402 """ 403 Overrides Executor method 404 """ 405 return not self.error is None
406 407
408 - def finish( self ):
409 """ 410 Overrides Executor method 411 """ 412 Executor.finish( self ) 413 self.result = self.parse_msms( )
414 415 416 417 ############# 418 ## TESTING 419 ############# 420
421 -class Test:
422 """ 423 Test class 424 """ 425 from Biskit import PDBModel 426 427
428 - def run( self, local=0 ):
429 """ 430 run function test 431 432 @param local: transfer local variables to global and perform 433 other tasks only when run locally 434 @type local: 1|0 435 436 @return: sum of SAS and SES areas 437 @rtype: float 438 """ 439 from Biskit import PDBModel 440 441 if local: 'Loading PDB...' 442 m = PDBModel( T.testRoot() + '/lig/1A19.pdb' ) 443 m = m.compress( m.maskProtein() ) 444 445 if local: 'Getting surfaces via the MSMS executable' 446 ms = MSMS( m, debug=1, verbose=1 ) 447 448 if local: 'Running' 449 out, sesList, sasList, atmList = ms.run() 450 451 if local: 452 print out 453 print '\nResult from MSMS (first 5 lines): ' 454 print 'SES \tSAS \tAtom name' 455 for i in range(5): 456 print '%.2f \t%.2f \t%s'%(sesList[i], sasList[i], atmList[i]) 457 458 print 'MSMS done' 459 460 globals().update( locals() ) 461 462 463 return out['sas'] + out['ses']
464 465
466 - def expected_result( self ):
467 """ 468 Precalculated result to check for consistent performance. 469 470 @return: sum of SAS and SES areas 471 @rtype: float 472 """ 473 return 5085.1580000000004 + 4208.7389999999996
474 475 476 477 if __name__ == '__main__': 478 479 test = Test() 480 481 assert abs( test.run( local=1 ) - test.expected_result() ) < 1e-8 482 483 484 ## #################################### 485 ## ## Testing 486 487 ## if __name__ == '__main__': 488 489 ## from Biskit import PDBModel 490 491 ## print "Loading PDB..." 492 493 ## m = PDBModel( T.testRoot() + '/lig/1A19.pdb' ) 494 ## m = m.compress( m.maskProtein() ) 495 496 497 ## ## ####### 498 ## ## ## convert pdb file to coordinates, radii and names 499 ## ## print "Starting xpdb_to_xyzrn" 500 ## ## x = pdb2xyzrn( m, debug=0, verbose=1 ) 501 502 ## ## print "Running" 503 ## ## r, n = x.run() 504 505 506 ## ## print '\nResult from pdb_to_xyzrn (first 5 lines): ' 507 ## ## print 'radii \t\tnames' 508 ## ## for i in range(5): 509 ## ## print '%f \t%s'%(r[i], n[i]) 510 511 ## ####### 512 ## ## get surfaces via the MSMS executable 513 514 ## print "Starting MSMS" 515 ## a = MSMS( m, debug=1, verbose=1 ) 516 517 ## print "Running" 518 ## out, sesList, sasList, atmList = a.run() 519 520 ## print out 521 ## print '\nResult from MSMS (first 5 lines): ' 522 ## print 'SES \tSAS \tAtom name' 523 ## for i in range(5): 524 ## print '%.2f \t%.2f \t%s'%(sesList[i], sasList[i], atmList[i]) 525 526 ## print 'MSMS done' 527 528 529 ## ## ####### 530 ## ## ## get surfaces via MSLIB 531 532 ## ## a = MSLIB( m ) 533 ## ## out, sesList, sasList, atmList = a.calc( m.xyz ) 534 ## ## print out 535