Package Biskit :: Module ProfileCollection
[hide private]
[frames] | no frames]

Source Code for Module Biskit.ProfileCollection

  1  ## 
  2  ## Biskit, a toolkit for the manipulation of macromolecular structures 
  3  ## Copyright (C) 2004-2005 Raik Gruenberg & Johan Leckner 
  4  ## 
  5  ## This program is free software; you can redistribute it and/or 
  6  ## modify it under the terms of the GNU General Public License as 
  7  ## published by the Free Software Foundation; either version 2 of the 
  8  ## License, or any later version. 
  9  ## 
 10  ## This program is distributed in the hope that it will be useful, 
 11  ## but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 13  ## General Public License for more details. 
 14  ## 
 15  ## You find a copy of the GNU General Public License in the file 
 16  ## license.txt along with this program; if not, write to the Free 
 17  ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 18  ## 
 19  ## 
 20  ## last $Date: 2006/12/21 09:39:15 $ 
 21  ## last $Author: leckner $ 
 22  ## $Revision: 2.11 $ 
 23   
 24  """ 
 25  Manage profiles. 
 26  """ 
 27   
 28  import Numeric as N 
 29  import tools as T 
 30  import mathUtils as M 
 31  from Biskit import EHandler 
 32   
 33  import copy 
 34   
 35  try: 
 36      import biggles 
 37  except: 
 38      biggles = 0 
 39   
 40   
41 -class ProfileError(Exception):
42 pass
43
44 -class ProfileCollection:
45 """ 46 Manage profiles (arrays or lists of values) for trajectory frames 47 or atoms/residues in PDBModel. ProfileCollection resembles a 48 2-dimensional array where the first axis (let's say row) is 49 accessed by a string key and each row has an additional info 50 dictionary assigned to it. The take() and concat() methods operate 51 on the columns, i.e. they are applied to all profiles at the same 52 time. 53 54 By default, profiles of numbers are stored and returned as 55 Numeric.array and all others are stored and returned as ordinary 56 list. This behaviour can be modified with the option asarray of 57 ProfileCollection.set(). Using both lists and arrays is a 58 compromise between the efficiency of Numeric arrays and the 59 problem that arrays of objects cannot be unpickled (Numeric bug) 60 and that arrays of strings would end up as 2-D arrays of char. 61 The 'isarray' entry of a profile's info dictionary tells whether 62 the profile is stored as array or as list. 63 64 ProfileCollection p can be accessed like a dictionary of lists:: 65 len( p ) -> number of profiles (== len( p.profiles ) ) 66 p['prof1'] -> list with values of profile 'prof1' 67 del p['prof1'] -> remove a profile 68 p['prof1'] = [..] -> add a profile without additional infos 69 for k in p -> iterate over profile keys 70 'prof1' in p -> 1, if collection contains key 'prof1' 71 72 But it is more than that - each key also has a dictionary of info values 73 assigned to it (see getInfo(), setInfo(), p.infos). These can be accessed 74 like:: 75 p['prof1','date'] -> date of creation of profile named 'prof1' 76 p.getInfo('prof1') -> returns all info records 77 p['prof1','comment'] = 'first prof' -> add/change single info value 78 """ 79
80 - def __init__( self, version=None, profiles=None, infos=None ):
81 82 self.profiles = profiles or {} 83 self.infos = infos or {} 84 85 self.initVersion = version or self.version()
86 87
88 - def version( self ):
89 """ 90 Class version. 91 92 @return: class version number 93 @rtype: str 94 """ 95 return 'ProfileCollection $Revision: 2.11 $'
96 97
98 - def __getitem__( self, k ):
99 """ 100 Get profile item:: 101 p['prof1'] <==> p.get( 'prof1' ) 102 p['prof1','info1] <==> p.get( 'prof1','info1' ) 103 104 @return: item 105 @rtype: any 106 """ 107 return self.get( k )
108 109
110 - def __setitem__( self, k, v ):
111 """ 112 Set profile item:: 113 p['prof1'] = range(10) <==> p.set( 'prof1', range(10) ) 114 p['prof1','info1]='comment' <==> p.setInfo('prof1',info1='comment') 115 116 @return: item 117 @rtype: any 118 """ 119 if type(k) == tuple: 120 return self.setInfo( k[0], **{k[1]:v} ) 121 122 return self.set( k, v )
123 124
125 - def __delitem__( self, k ):
126 """ 127 Delete profile item:: 128 del p['prof1'] <==> p.remove( 'prof1' ) 129 del p['prof1','info1'] <==> p.remove( 'prof1', 'info1' ) 130 """ 131 result = self.remove( k )
132 133
134 - def __len__( self ):
135 """ 136 Length of profile 137 138 @return: profile length 139 @rtype: int 140 """ 141 return len( self.profiles )
142 143
144 - def __contains__( self, k ):
145 """ 146 Check if profile contains key:: 147 k in self <==> p.has_key( k ) 148 149 @return: True or False 150 @rtype: 1|0 151 """ 152 return self.has_key( k )
153 154
155 - def __iter__(self):
156 """ 157 Iterate over profile:: 158 for k in self <==> for k in p.keys() 159 160 @return: list of items 161 @rtype: list 162 """ 163 return iter(self.profiles)
164 165
166 - def keys( self ):
167 return self.profiles.keys()
168 169
170 - def has_key( self, k ):
171 return self.profiles.has_key(k)
172 173
174 - def values( self ):
175 return self.profiles.values()
176 177
178 - def items( self ):
179 """ 180 Get list of tuples of profile names and profiles:: 181 p.items() -> [ (key1, [any]), (key2, [any]), ..) ] 182 183 @return: list of tuples of profile names and profiles 184 @rtype: list 185 """ 186 return self.profiles.items()
187 188
189 - def __array_or_list( self, prof, asarray ):
190 """ 191 Convert to array or list depending on asarray option 192 193 @param prof: profile 194 @type prof: list OR array 195 @param asarray: 1.. autodetect type, 0.. force list, 2.. force array 196 @type asarray: 2|1|0 197 198 @return: profile 199 @rtype: list OR array 200 201 @raise ProfileError: 202 """ 203 ## autodetect type 204 if asarray == 1: 205 if isinstance( prof, N.arraytype ): 206 return prof 207 208 p = N.array( prof ) 209 if p.typecode() not in ['O','c']: ## no char or object arrays! 210 return p 211 return prof 212 213 ## force list 214 if asarray == 0: 215 if isinstance( prof, N.arraytype ): 216 return prof.tolist() 217 return prof 218 219 ## force array 220 if asarray == 2: 221 if isinstance( prof, N.arraytype ): 222 return prof 223 return N.array( prof ) 224 225 raise ProfileError, "%r not allowed as value for asarray" % asarray
226 227
228 - def __expand( self, prof, mask, default ):
229 """ 230 Expand profile to have a value also for masked positions. 231 232 @param prof: profile 233 @type prof: list OR array 234 @param mask: atom mask 235 @type mask: [int] 236 @param default: default value 237 @type default: any 238 239 @return: profile 240 @rtype: list OR array 241 """ 242 if mask: 243 244 ## optimized variant for arrays 245 if isinstance( prof, N.arraytype ): 246 p = N.resize( prof, (len(mask), ) ) 247 p[:] = default 248 N.put( p, N.nonzero( mask ), prof ) 249 return p 250 251 p = [ default ] * len( mask ) 252 prof.reverse() 253 for i in N.nonzero( mask ): 254 p[i] = prof.pop() 255 return p 256 257 return prof
258 259
260 - def set( self, name, prof, mask=None, default=None, asarray=1, 261 comment=None, **moreInfo ):
262 """ 263 Add/override a profile. None is allowed as special purpose value - in 264 which case all other parameters are ignored. Otherwise, the two info 265 records 'version', 'changed' and 'isarray' are always modified but can 266 be overridden by key=value pairs to this function. 267 268 @param name: profile name (i.e. key) 269 @type name: str 270 @param prof: list of values OR None 271 @type prof: [any] OR None 272 @param mask: list 1 x N_items of 0|1, if there are less values than 273 items, provide mask with 0 for missing values, 274 N.sum(mask)==N_items 275 @type mask: [int] 276 @param default: value for items masked. 277 (default: None for lists, 0 for arrays] 278 @type default: any 279 @param asarray: store as list (0), as array (2) or store numbers as 280 array but everything else as list (1) (default: 1) 281 @type asarray: 0|1|2 282 @param comment: goes into info[name]['comment'] 283 @type comment: str 284 @param moreInfo: additional key-value pairs for info[name] 285 @type moreInfo: key=value 286 287 @raise ProfileError: if length of prof != length of other profiles 288 @raise ProfileError: if mask is given but N.sum(mask) != len(prof) 289 """ 290 if prof is None: 291 self.profiles[ name ] = None 292 return 293 294 ## consistency check 295 if mask and N.sum(mask) != len(prof): 296 raise ProfileError( 297 "Mask doesn't match profile ( N.sum(mask)!=len(prof) ). " + 298 "%i != %i" % (N.sum(mask), len( prof ) ) ) 299 300 prof = self.__array_or_list( prof, asarray ) 301 302 ## use default == 0 for arrays 303 if not default and isinstance( prof, N.arraytype ): 304 default = 0 305 306 ## expand profile to have a value also for masked positions 307 prof = self.__expand( prof, mask, default ) 308 309 l = self.profLength() 310 if l and len( prof ) != l: 311 raise ProfileError( "Profile %s has wrong length." % name ) 312 313 ## collect additional infos about this profile 314 info = self.infos.get( name, {} ) 315 316 info['version'] = '%s %s' % (T.dateString(), self.version() ) 317 if comment: info['comment'] = comment 318 info['isarray'] = isinstance( prof, N.arraytype ) 319 320 ## optional infos 321 info.update( moreInfo ) 322 323 ## new profiles are always changed=1, updated profiles are checked 324 if not 'changed' in moreInfo: 325 if name in self.keys(): 326 info['changed'] = self.infos[name]['changed'] or \ 327 not M.arrayEqual( self[name], prof ) 328 else: 329 info['changed'] = 1 330 331 ## put profile into dict 332 self.profiles[ name ] = prof 333 self.infos[ name ] = info
334 335
336 - def setInfo( self, name, **args ):
337 """ 338 Add/Override infos about a given profile:: 339 e.g. setInfo('relASA', comment='new', params={'bin':'whatif'}) 340 341 @raise ProfileError: if no profile is found with |name| 342 """ 343 self.getInfo( name ).update( args )
344 345
346 - def setMany( self, profileDict, infos={} ):
347 """ 348 setMany( dict, [infoDict] ) Add/Override many profiles 349 350 @param profileDict: dict with name:profile pairs 351 @type profileDict: dict 352 @param infos: info dicts for each profile, indexed by name 353 @type infos: dict of dict 354 """ 355 for key, value in profileDict.items(): 356 self.set( key, value, **infos.get( key,{} ) )
357 358
359 - def get( self, name, default=None ):
360 """ 361 get( profKey, [default] ) -> list of values 362 B{OR} 363 get( (profKey, infoKey), [default] ) -> single value of info dict 364 365 @param name: profile key or profile and info key 366 @type name: str OR (str, str) 367 @param default: default result if no profile is found, 368 if None and no profile is found, raise exception 369 @type default: any 370 371 @raise ProfileError: if no profile is found with |name| 372 """ 373 ## get an info value 374 if type( name ) == tuple: 375 result = self.getInfo( name[0] ).get( name[1], default ) 376 377 if result is None and not self.infos[ name[0] ].has_key(name[1]): 378 raise ProfileError( 'No info value found for '+str(name[1]) ) 379 380 return result 381 382 ## get a profile 383 result = self.profiles.get( name, default ) 384 385 ## but tolerate profiles that are set to None -> return None 386 if result is None and not self.profiles.has_key(name): 387 raise ProfileError( 'No profile found with name '+str(name) ) 388 389 return result
390 391
392 - def getInfo( self, name ):
393 """ 394 Use:: 395 getInfo( name ) -> dict with infos about profile:: 396 397 Guaranteed infos: 'version'->str, 'comment'->str, 'changed'->1|0 398 399 @param name: profile name 400 @type name: str 401 402 @return: dict with infos about profile 403 @rtype: dict 404 405 @raise ProfileError: if no profile is found with |name| 406 """ 407 result = self.infos.get( name, None ) 408 409 if result is None: 410 raise ProfileError( 'No profile info found with name '+str(name)) 411 412 return result
413 414
415 - def profile2mask(self, profName, cutoff_min=None, cutoff_max=None ):
416 """ 417 Convert profile into a mask based on the max and min cutoff values. 418 419 @param profName: profile name 420 @type profName: str 421 @param cutoff_min: lower limit 422 @type cutoff_min: float 423 @param cutoff_max: upper limit 424 @type cutoff_max: float 425 426 @return: mask len( get(profName) ) x 1|0 427 @rtype: [1|0] 428 """ 429 p = self.get( profName ) 430 431 cutoff_min = cutoff_min or min( p ) - 1 432 cutoff_max = cutoff_max or max( p ) + 1 433 434 return N.greater( p, cutoff_min ) * N.less( p, cutoff_max )
435 436
437 - def take( self, indices ):
438 """ 439 Take on profile using provided indices:: 440 take( indices ) -> ProfileCollection with extract of all profiles 441 442 @param indices: list of indices 443 @type indices [int] 444 445 @return: new profile from indices 446 @rtype: profile 447 448 @raise ProfileError: if take error 449 """ 450 result = self.__class__( self.version() ) 451 452 try: 453 for key, prof in self.profiles.items(): 454 455 if isinstance( prof, N.arraytype ): 456 result.set( key, N.take( prof, indices ) ) 457 else: 458 result.set( key, [ prof[i] for i in indices ], asarray=0 ) 459 460 result.setInfo( key, **copy.deepcopy(self.getInfo(key)) ) 461 462 except Exception, why: 463 raise ProfileError( "Can't take sub-profile: "+str(why) ) 464 465 return result
466 467
468 - def remove( self, *key ):
469 """ 470 Remove profile B{OR} info values of profile:: 471 remove( profKey ) -> 1|0, 1 if complete entry has been removed 472 remove( profKey, infoKey ) -> 1|0, 1 if single info value was removed 473 474 @param key: profile name OR name, infoKey 475 @type key: str OR str, str 476 477 @return: sucess status 478 @rtype: 1|0 479 """ 480 try: 481 if len( key ) == 2: 482 del self.infos[ key[0] ][ key[1] ] 483 484 else: 485 del self.profiles[ key[0] ] 486 del self.infos[ key[0] ] 487 488 except KeyError: 489 return 0 490 491 return 1
492 493
494 - def concat( self, *profiles ):
495 """ 496 Concatenate all profiles in this with corresponding profiles in the 497 given ProfileCollection(s). Profiles that are not found in all 498 ProfileCollections are skipped:: 499 p0.concat( p1 [, p2, ..]) -> single ProfileCollection with the 500 same number of profiles as p0 but with the length of p0+p1+p2.. 501 502 @param profiles: profile(s) to concatenate 503 @type profiles: profileCollection(s) 504 505 @return: concatenated profile(s) 506 @rtype: profileCollection 507 """ 508 509 if len( profiles ) == 0: 510 return self 511 512 next = profiles[0] 513 514 r = self.__class__() 515 516 for k, p in self.profiles.items(): 517 518 try: 519 if isinstance( p, N.arraytype ): 520 r.set( k, N.concatenate( (p, next.get(k)) ), 521 **self.infos[k] ) 522 else: 523 r.set( k, p + next.get(k), **self.infos[k] ) 524 except: 525 EHandler.warning("Can't concat profile "+k) 526 r.remove( k ) 527 528 return r.concat( *profiles[1:] )
529 530
531 - def update( self, other, stickyChanged=1 ):
532 """ 533 Merge other ProfileCollection into this one, replacing existing 534 profiles and info values. This is the obvious translation of 535 dict.update(). The changed flag of each profile is set to 1 if: 536 1. an existing profile is overridden with different values 537 2. the profile is marked 'changed' in the other collection 538 539 @param other: profile 540 @type other: ProfileCollection 541 @param stickyChanged: mark all profiles 'changed' that are marked 542 'changed' in the other collection (default: 1) 543 @type stickyChanged: 0|1 544 """ 545 for key, prof in other.items(): 546 547 info = copy.copy( other.getInfo( key ) ) 548 changed = info.get('changed',0) 549 550 if stickyChanged: 551 if not changed: 552 del info['changed'] 553 else: 554 del info['changed'] 555 556 self.set( key, prof, **info )
557 558
559 - def updateMissing( self, source, copyMissing=1, allowEmpty=0 ):
560 """ 561 Merge other ProfileCollection into this one but do not replace / update 562 existing profiles and info records. There is one exception: 563 Empty profiles (None or []) are replaced but their info records stay 564 untouched. If copyMissing=0, profiles that are existing in source but 565 not in this collection, are NOT copied (i.e. only empty profiles are 566 replaced). 567 568 @param source: profile 569 @type source: ProfileCollection 570 @param copyMissing: copy missing profiles that exist in source 571 (default: 1) 572 @type copyMissing: 0|1 573 @param allowEmpty: tolerate zero-length profiles after update 574 (default: 0) 575 @type allowEmpty: 0|1 576 577 @raise ProfileError: if allowEmpty is 0 and some empty profiles 578 cannot be found in source 579 """ 580 for key, prof in source.items(): 581 582 ## replace "None" profiles 583 if key in self and not self[ key ]: 584 self.set( key, prof ) 585 586 ## add profiles that exist in source but not yet in this collection 587 if copyMissing and not key in self: 588 info = copy.copy( source.getInfo( key ) ) 589 del info['changed'] 590 591 self.set( key, prof, **info ) 592 593 if not allowEmpty and ( None in self.values() or [] in self.values() ): 594 for key, prof in self.items(): 595 if not prof: 596 raise ProfileError, \ 597 ('Trying to update %s profile but cannot find'\ 598 + ' it in source.') % key
599 600
601 - def clone( self ):
602 """ 603 Clone (deepcopy) profile:: 604 clone() -> ProfileCollection (or sub-class, actually a deepcopy) 605 606 @return: profile 607 @rtype: ProfileCollection 608 """ 609 return copy.deepcopy( self )
610 611
612 - def clear( self ):
613 """ 614 Delete all:: 615 clear() -> None; delete all profiles and infos. 616 """ 617 self.profiles = {} 618 self.infos = {}
619 620
621 - def profLength( self ):
622 """ 623 Length of profile:: 624 profLength() -> int; length of first non-None profile or 0 625 626 @return: length of first non-None profile or 0 627 @rtype: int 628 """ 629 for k, p in self.items(): 630 631 if p != None: 632 return len( p ) 633 634 return 0
635 636
637 - def plot( self, *name, **arg ):
638 """ 639 Plot one or more profiles using Biggles:: 640 plot( name1, [name2, ..],[arg1=x, arg2=y]) -> biggles.FramedPlot 641 642 @param name: one or more profile names 643 @type name: str 644 @param arg: key=value pairs for Biggles.Curve() function 645 @type arg: 646 @raise TypeError: if profile contains non-number items 647 648 @return: plot, view using plot.show() 649 @rtype: biggles.FramedPlot 650 651 @raise ImportError: If biggles module could not be imported 652 """ 653 if not biggles: 654 raise ImportError, 'module biggles could not be imported' 655 656 plot = biggles.FramedPlot() 657 658 colors = T.colorSpectrum( len( name ) , '00FF00', 'FF00FF') 659 660 for i in range( len(name)): 661 662 p = N.array( self.get( name[i] ) ) 663 664 if p.typecode() in ['O','c']: 665 raise TypeError, 'Cannot plot values of profile %s.' % name[i] 666 667 plot.add( biggles.Curve( range( len(p) ), p, color=colors[i], 668 **arg ) ) 669 670 plot.add( biggles.PlotLabel( 0.8, 0.8-i/8.0, name[i], 671 color=colors[i]) ) 672 673 return plot
674 675
676 - def __shortString( self, s, maxLen ):
677 """ 678 """ 679 if len( s ) <= maxLen: 680 return s 681 682 return s[:maxLen-4] + '...'
683 684
685 - def __repr__( self ):
686 """ 687 @return: string representation within interactive python interpreter. 688 @rtype: str 689 """ 690 s = "ProfileCollection: %i profiles of length %i\n" % \ 691 (len( self ), self.profLength() ) 692 for k in self.keys(): 693 s += k + '\n' 694 s += str(self.infos[k]) + '\n' 695 s += '\t' + self.__shortString( str(self.profiles[k]), 50 ) + '\n' 696 return s
697 698 699 ############# 700 ## TESTING 701 ############# 702
703 -class Test:
704 """ 705 Test class 706 """ 707
708 - def run( self, local=0 ):
709 """ 710 run function test 711 712 @param local: transfer local variables to global and perform 713 other tasks only when run locally 714 @type local: 1|0 715 716 @return: 1 717 @rtype: int 718 """ 719 import string 720 721 p = ProfileCollection() 722 723 p.set( 't1', range(10), comment='test 1', option='x' ) 724 p.set( 't2', range(12,22), comment='second test', option='y' ) 725 726 mask = N.zeros( 10 ) 727 mask[0:10:2] = 1 728 l = [ s for s in string.letters[:5] ] ## list of letters 729 p.set( 't3', l, comment='masked test', option='z', 730 mask=mask, default=99, asarray=0 ) 731 732 if local: print repr( p['t3'] ) 733 734 p = p.take( range(0,10,2) ) 735 736 if local: print repr( p['t3'] ) 737 738 p2 = ProfileCollection() 739 p2.set( 't1', p['t1'], comment='overridden', changed=1 ) 740 p2.set( 't4', range(30, 35), comment='added' ) 741 742 r = p.concat( p, p ) ## concatenate 3 copies of p 743 744 p.update( p2, stickyChanged=1 ) 745 746 if local: 747 globals().update( locals() ) 748 749 return r['t1']
750 751
752 - def expected_result( self ):
753 """ 754 Precalculated result to check for consistent performance. 755 756 @return: 1 757 @rtype: int 758 """ 759 return N.array([0, 2, 4, 6, 8, 0, 2, 4, 6, 8, 0, 2, 4, 6, 8])
760 761 762 763 if __name__ == '__main__': 764 765 test = Test() 766 767 assert test.run( local=1 ) == test.expected_result() 768