Package Biskit :: Module SparseArray
[hide private]
[frames] | no frames]

Source Code for Module Biskit.SparseArray

  1  ## 
  2  ## Biskit, a toolkit for the manipulation of macromolecular structures 
  3  ## Copyright (C) 2004-2005 Raik Gruenberg & Johan Leckner 
  4  ## 
  5  ## This program is free software; you can redistribute it and/or 
  6  ## modify it under the terms of the GNU General Public License as 
  7  ## published by the Free Software Foundation; either version 2 of the 
  8  ## License, or any later version. 
  9  ## 
 10  ## This program is distributed in the hope that it will be useful, 
 11  ## but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 13  ## General Public License for more details. 
 14  ## 
 15  ## You find a copy of the GNU General Public License in the file 
 16  ## license.txt along with this program; if not, write to the Free 
 17  ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 18  ## 
 19  ## 
 20  ## last $Author: leckner $ 
 21  ## last $Date: 2006/09/13 10:28:13 $ 
 22  ## $Revision: 2.5 $ 
 23   
 24  """ 
 25  Memory saving representation of a sparse array. 
 26  """ 
 27   
 28  import Numeric as N 
 29  import types 
 30  import copy 
 31   
32 -class SparseArrayError( Exception ):
33 pass
34 35 36 ## to be transferred into Biskit.tools
37 -def isType( o, t ):
38 """ 39 Test for correct type or correct class:: 40 isType( o, type_or_class ) -> 1|0 41 42 @param o: object to test 43 @type o: any 44 @param t: type OR class 45 @type t: any 46 @return: result of test 47 @rtype: 1|0 48 """ 49 tt = type(o) 50 if tt == types.TypeType: 51 return type( o ) == t 52 if tt == types.ClassType: 53 return isinstance( o, t ) 54 raise Exception, 'unsupported argument type: %s.' % str(tt)
55 56 57 ## to be transferred into Biskit.tools
58 -def getType( o ):
59 """ 60 Get the type or (if o is instance) class of an object:: 61 getType( o ) -> type or Class 62 63 @param o: get the type OR class of this object 64 @type o: any 65 """ 66 r = type( o ) 67 if r == types.InstanceType: 68 return o.__class__ 69 return r
70 71
72 -class SparseArray:
73 """ 74 Memory - saving representation of a sparse array. 75 """ 76
77 - def __init__( self, array_or_shape=0, typecode='f', default=0. ):
78 """ 79 Create a sparse array from a normal Numeric array or create 80 an empty sparse array with a certain shape. 81 82 @param array_or_shape: craeate sparse array from:: 83 ( int, ), shape of array 84 OR int, length of array 85 OR array, numeric (dense) array 86 @type array_or_shape: ( int, ) OR int OR array 87 @param typecode: single char type of values ['f' OR input type] 88 @type typecode: str 89 @param default: value of majority of array content (default: 0.) 90 @type default: any 91 """ 92 self.values = [] 93 self.indices = [] 94 self.__default = default 95 self.__typecode= typecode 96 97 self.__last_pos = 0 ## cache last position manipulated 98 self.__last_i = 0 ## cache last index manipulated 99 100 a = array_or_shape 101 atype = type( a ) 102 103 if atype is tuple: self.shape = a 104 else: 105 if atype is int: self.shape = ( a, ) 106 else: 107 if atype is N.arraytype or atype is list: 108 self.shape = N.shape( a ) 109 else: 110 raise SparseArrayError, '%s argument not allowed.' %\ 111 str(atype) 112 113 self.is1D = len( self.shape ) == 1 114 115 if not self.is1D: 116 ## multidimensional 117 self.__default = SparseArray( self.shape[1:], typecode, default ) 118 self.__typecode = 'SA' 119 120 if atype is N.arraytype or atype is list : 121 self.__setAll( a )
122 123
124 - def __setAll_1D( self, a ):
125 """ 126 Replace content of this sparseArray with values from Numeric array 127 or list of numbers -- only for 1-dimensional arrays. 128 129 @param a: array OR list 130 @type a: array OR [ number ] 131 """ 132 if type( a ) is list: 133 a = N.array( a, self.__typecode ) 134 135 if self.shape != a.shape: 136 raise SparseArrayError, 'dimensions not aligned' 137 138 self.indices = N.nonzero( N.logical_not( N.equal(a, self.__default) ) ) 139 self.indices = self.indices.tolist() 140 141 self.values = N.take( a, self.indices ) 142 self.values = self.values.tolist()
143 144
145 - def __setAll( self, a ):
146 """ 147 Replace content of this array with values from (multidimensional) 148 Numeric array of numbers or list of lists (of lists..) of numbers. 149 150 @param a: array OR list of lists 151 @type a: array OR [ [ number ] ] 152 """ 153 if self.is1D: 154 return self.__setAll_1D( a ) 155 156 if type(a) in [ N.arraytype, list ]: 157 if len(a) != self.shape[0]: 158 raise SparseArrayError, 'dimensions not aligned' 159 160 ## typecode and default value for lowest dimension 161 typecode = self.typecode() 162 default = self.default() 163 164 for i in range( len(a) ): 165 x = SparseArray( a[i], typecode, default ) 166 if not x.__eq__( self.__default ): 167 self.indices.append( i ) 168 self.values.append( x )
169
170 - def typecode( self ):
171 """ 172 Get type code of object. 173 174 @return: typecode of lowest dimension 175 @rtype: str 176 """ 177 if self.is1D: 178 return self.__typecode 179 return self.__default.typecode()
180 181
182 - def default( self ):
183 """ 184 Get default value, defined in L{__init__}. 185 186 @return: default value for array elements (of lowest dimension) 187 @rtype: number 188 """ 189 if self.is1D: 190 return self.__default 191 ## multidimensional 192 return self.__default.default()
193 194
195 - def nondefault( self ):
196 """ 197 Get a 1D list of indices that have a non-default value in a raveled 198 version of this array. If L.default()==0 this would be equivalent to 199 nonzero( ravel( L.toarray() ) ) (except that the Numeric array is 200 never constructed). 201 202 @return: list of indices with none default values 203 @rtype: [ int ] 204 """ 205 if self.is1D: 206 return self.indices 207 208 ## multidimensional 209 r = [] 210 len_axis_B = self.shape[1] 211 for (i,a) in zip( self.indices, self.values ): 212 r += (N.array( a.nondefault() ) + len_axis_B * i ).tolist() 213 214 return r
215 216
217 - def nondefault_values( self ):
218 """ 219 Get a 1D-list of all values != L.default() in the order that they 220 would have in a raveled array. If L.default()==0 this would be 221 equivalent to take( L.toarray(), nonzero( ravel( L.toarray() ) ) ) 222 (except that the Numeric array is never constructed). 223 224 @return: list of none default values 225 @rtype: [ number ] 226 """ 227 if self.is1D: 228 return self.values 229 230 ## multidimensional 231 r = [] 232 for a in self.values: 233 r.extend( a.nondefault_values() ) 234 return r
235 236
237 - def toarray( self ):
238 """ 239 Reconstruct dense array:: 240 L.toarray() -> Numeric.array, normal dense array 241 242 @return: normal dense array 243 @rtype: array 244 """ 245 if self.default() is 0: 246 a = N.zeros( ( self.shape ), self.typecode() ) 247 else: 248 a = N.ones( (self.shape ), self.typecode() ) * self.default() 249 250 N.put( a, self.nondefault(), self.nondefault_values() ) 251 252 return a
253 254
255 - def tolist( self ):
256 """ 257 Reconstruct list from dense array:: 258 L.tolist() -> [ ([) number (]) ], same as L.toarray().tolist() 259 260 @return: list from normal dense array 261 @rtype: list 262 """ 263 return self.toarray().tolist()
264 265
266 - def put( self, i, v ):
267 """ 268 Replace one or several values, L.put( i, v ) 269 270 @param i: indices 271 @type i: int OR [ int ] 272 @param v: values 273 @type v: any OR [ any ] 274 """ 275 if type( i ) in [ list, N.arraytype ]: 276 self.__setMany( i, v ) 277 else: 278 self.__setitem__( i, v )
279 280
281 - def __setMany( self, indices, values ):
282 """ 283 Add / replace values of the array. 284 285 @param indices: indices, [ int ] OR Numeric.array('i') 286 @type indices: [int] 287 @param values: values, [ any ] OR Numeric.array 288 @type values: [any] OR array 289 """ 290 if type( values ) not in [ list, N.arraytype ]: 291 values = [ values ] * len( indices ) 292 293 map( self.__setitem__, indices, values )
294 295
296 - def __pos( self, i ):
297 """ 298 Target position of given index in index list. 299 300 @param i: index 301 @type i: int 302 303 @return: target position of given index in index list 304 @rtype: int 305 306 @raise IndexError: if out of bounds 307 """ 308 if i > self.shape[0] or i < 0: 309 raise IndexError, "index %i out of bounds" % i 310 311 if i >= self.__last_i: 312 pos = self.__last_pos 313 else: 314 pos = 0 315 316 l = len( self.indices ) 317 while pos != l and self.indices[ pos ] < i: 318 pos += 1 319 320 if pos != l: 321 self.__last_i = self.indices[ pos ] 322 self.__last_pos = pos 323 else: 324 self.__last_i = 0 325 self.__last_pos = 0 326 327 return pos
328 329
330 - def __setitem__( self, i, v ):
331 """ 332 Set position specifyed by the index i to value v. 333 334 @param i: index 335 @type i: int OR (int,int) 336 @param v: value 337 @type v: any 338 339 @raise SparseArrayError: if dimensions not aligned 340 @raise SparseArrayError: if no sequence value 341 """ 342 itype = type( i ) 343 vtype = getType( v ) 344 345 if itype is tuple and len( i ) == 1: 346 i = i[0] 347 itype = int 348 349 if itype is int: 350 try: 351 if not self.is1D: 352 if len( v ) != self.shape[1]: 353 raise SparseArrayError, 'dimensions not aligned.' 354 if vtype is not SparseArray: 355 v = SparseArray( v, self.typecode(), self.default() ) 356 357 return self.__setitem_1D( i, v ) 358 359 except TypeError: 360 raise SparseArrayError, 'sequence value required.' 361 362 if itype is tuple: 363 x, y = i[0], i[1:] 364 365 a = self.__getitem__( x ) 366 a.__setitem__( y, v ) 367 self.__setitem__( x, a )
368 369
370 - def __setitem_1D( self, i, v ):
371 """ 372 Set position specifyed by the index i to value v in a 1D array. 373 374 @param i: array index 375 @type i: int 376 @param v: value 377 @type v: any 378 379 @raise IndexError: if i < 0 or i > len( this ) 380 """ 381 if i in self.indices: 382 pos = self.indices.index( i ) 383 384 if v != self.__default: 385 self.values[ pos ] = v 386 else: 387 del self.values[ pos ] 388 del self.indices[ pos ] 389 390 else: 391 if v != self.__default: 392 393 pos = self.__pos( i ) 394 395 self.indices.insert( pos, i ) 396 self.values.insert( pos, v )
397 398
399 - def __getitem_1D( self, i ):
400 """ 401 Value for specified position:: 402 this[ i ] -> number OR SparseArray 403 404 @param i: array index 405 @type i: int 406 407 @raise IndexError: if i < 0 or i >= len( this ) 408 """ 409 if i >= self.shape[0] or i < 0: 410 raise IndexError, "index %i out of bounds" % i 411 412 if i in self.indices: 413 pos = self.indices.index( i ) 414 return self.values[ pos ] 415 416 if self.is1D: 417 return self.__default 418 419 return copy.deepcopy( self.__default )
420 421
422 - def __getitem__( self, i ):
423 """ 424 Value for specified position. 425 426 @param i: array index 427 @type i: int OR (int,int) 428 """ 429 itype = type( i ) 430 431 if itype is tuple and len( i ) == 1: 432 i = i[0] 433 itype = int 434 435 if itype is int: 436 return self.__getitem_1D( i ) 437 438 if itype is tuple: 439 x, y = i[0], i[1:] 440 return self.__getitem__( x ).__getitem__( y )
441 442
443 - def __len__( self ):
444 """ 445 Get length: supports len( this ) 446 447 @return: object length 448 @rtype: int 449 """ 450 return self.shape[0]
451 452
453 - def __eq__( self, o ):
454 """ 455 Comparison equal: supports this == other 456 457 @return: result of comparison 458 @rtype: 0|1 459 """ 460 if not isinstance( o, self.__class__ ): 461 return 0 462 if self.shape != o.shape: 463 return 0 464 return self.values == o.values and self.indices == o.indices
465 466
467 - def __ne__( self, o ):
468 """ 469 Comparison not equal: supports this != other 470 471 @return: result of comparison 472 @rtype: 0|1 473 """ 474 if not isinstance( o, self.__class__ ): 475 return 1 476 if self.shape != o.shape: 477 return 1 478 return self.values != o.values or self.indices != o.indices
479 480
481 - def __getslice__( self, a, b ):
482 """ 483 Slice a sparce array:: 484 this[ a : b ] -> SparseArray 485 486 @return: sliced sparse array 487 @rtype: SparseArray 488 """ 489 shape = ( abs( b - a ), ) + self.shape[1:] 490 result = self.__class__( shape, self.__typecode, self.__default ) 491 492 pos_low = self.__pos( a ) 493 pos_high = self.__pos( b ) 494 495 result.put( N.array( self.indices[pos_low : pos_high] ) - a, 496 self.values[pos_low : pos_high] ) 497 return result
498 499
500 - def __contains__( self, v ):
501 """ 502 Sparse array contains value: supports v in this -> 0|1 503 504 @param v: value 505 @type v: any 506 507 @return: result of comparison 508 @rtype: 0|1 509 """ 510 return ( cmp( v, self.__default ) == 0 ) or (v in self.values)
511 512
513 - def count(self, v ):
514 """ 515 Count the occuravces of value in sparse array:: 516 count( value ) -> int, number of occurences of value. 517 518 @param v: value 519 @type v: any 520 521 @return: number of occurances 522 @rtype: int 523 """ 524 if v == self.__default: 525 return len( self ) - len( self.values ) 526 return self.values.count( v )
527 528
529 - def index( self, v ):
530 """ 531 position of first occurrence of value:: 532 index( value ) -> int 533 534 @param v: value to look for 535 @type v: any 536 537 @return: index of first occurance 538 @rtype: int 539 540 @raise ValueError: if value is not contained in this list 541 """ 542 if v in self.values: 543 return self.indices[ self.values.index( v ) ] 544 545 if v != self.__default: 546 raise ValueError, "SparseArray.index(): value not in list" 547 548 i = 0 549 while i in self.indices: 550 i += 1 551 552 if i == len( self ): 553 raise ValueError, "SparseArray.index(): value not in list" 554 555 return i
556 557
558 - def empty( self ):
559 """ 560 Check if object is empty. 561 562 @return: true if lenght is 0 563 @rtype: 1|0 564 """ 565 return len( self.indices ) is 0
566 567
568 - def __delitem__( self, i ):
569 """ 570 Delete value at index i: supports del this[i] 571 572 @note: del this[int:int] is not supported 573 574 @param i: index 575 @type i: int 576 """ 577 if i >= self.shape[0] or i < 0: 578 raise IndexError, "index %i out of bounds" % i 579 580 if i in self.indices: 581 pos = self.indices.index( i ) 582 583 del self.indices[ pos ] 584 del self.values[ pos ] 585 586 else: 587 pos = self.__pos( i ) 588 589 if pos < len( self.indices ) - 1: 590 self.indices = self.indices[:pos] +\ 591 [ p-1 for p in self.indices[pos:] ] 592 593 self.shape = (self.shape[0] - 1,) + self.shape[1:]
594 595 596 ## def reverse( self ): 597 ## raise Exception, "not implemented." 598 599
600 - def insert( self, i, v ):
601 """ 602 Insert value before index:: 603 this.insert(index, value) 604 605 @param v: value to insert 606 @type v: any 607 @param i: index 608 @type i: int 609 610 @raise IndexError: if i < 0 or i > len( this ) 611 """ 612 pos = self.__pos( i ) 613 614 inc = 0 615 616 if v != self.__default: 617 self.indices.insert( pos, i ) 618 self.values.insert( pos, v ) 619 inc = 1 620 621 if pos < len( self.indices ): 622 self.indices = self.indices[:pos+inc] +\ 623 [ p+1 for p in self.indices[pos+inc:] ] 624 625 self.shape = (self.shape[0]+1, ) + self.shape[1:]
626 627 628 ## def pop( self ): 629 ## raise Exception, "not implemented." 630 631 632 ## def remove( self, i ): 633 ## raise Exception, "not implemented." 634 635 636 ## def __add__( self, lst ): 637 ## raise Exception, "not implemented." 638 639 640 ## def __iadd__( self, lst ): 641 ## raise Exception, "not implemented." 642 643
644 - def append( self, v, axis=0 ):
645 """ 646 Enlarge this array along an axis by one value:: 647 L.append( value, [ axis=0 ]) 648 649 @param v: v MUST be a sparse array if the given 650 axis is not the last axis of this array 651 @type v: number OR SparseArray 652 @param axis: dimension along wich v should be added 653 @type axis: int 654 655 @raise SparseArrayError: if dimension errors 656 """ 657 if axis == 0: 658 659 t_default, t_v = getType( self.__default ), getType( v ) 660 if t_default == SparseArray and t_v == N.arraytype: 661 v = SparseArray( v, self.typecode(), self.default() ) 662 663 if getType(v) != t_default: 664 raise SparseArrayError, 'Cannot append %s to array of %s.' \ 665 % ( str(type(v)), str(t_default) ) 666 667 if v != self.__default: 668 self.indices.append( self.shape[0] ) 669 self.values.append( v ) 670 671 self.shape = ( self.shape[0]+1, ) + self.shape[1:] 672 673 else: 674 if len( v ) != self.shape[0]: 675 raise SparseArrayError, 'dimensions not aligned' 676 677 for i in range( self.shape[0] ): 678 v_i = v[i] 679 a_i = self.__getitem__( i ) 680 is0 = a_i.empty() ## -> a_i is a copy of self.__default 681 682 if v_i != self.__default.__default: 683 a_i.append( v_i, axis=axis-1 ) 684 if is0: ## otherwise change would be lost 685 self.__setitem__( i, a_i ) 686 687 d = self.__default 688 x = axis - 1 689 d.shape = d.shape[:x] + (d.shape[x]+1,) + d.shape[x+1:] 690 691 self.shape = self.shape[:axis] + ( self.shape[axis]+1, ) +\ 692 self.shape[axis+1:]
693 694
695 - def extend( self, lst ):
696 """ 697 Extend list by appending elements from the iterable:: 698 L.extend(iterable) 699 700 @param lst: list OR SparseArray with extend values 701 @type lst: [any] OR SparseArray 702 """ 703 if not isinstance( lst, SparseArray ): 704 lst = SparseArray( lst, default=self.__default ) 705 706 l = self.shape[0] 707 self.indices.extend( [ i + l for i in lst.indices ] ) 708 self.values.extend( lst.values ) 709 self.shape = ( l + lst.shape[0], ) + self.shape[1:]
710 711 712 ## def __setitem__( self, i, v ): 713 714 ## if getattr( v, 'shape', None ) is None: 715 ## return SparseList.__setitem__( i, v ) 716 717 ## pass 718 719 720 721 ############# 722 ## TESTING 723 ############# 724
725 -class Test:
726 """ 727 Test class 728 """ 729 730
731 - def run( self, local=0 ):
732 """ 733 run function test 734 735 @return: reconstructed full array 736 @rtype: array 737 """ 738 a = N.zeros( (6,), 'f' ) 739 740 sa = SparseArray( a.shape ) 741 sa[3] = 1. 742 sa[5] = 2. 743 744 b = N.zeros( (5, 6), 'f' ) 745 b[0,1] = 3. 746 b[0,2] = 4 747 b[4,2] = 5 748 b[3,0] = 6 749 750 sb = SparseArray( b ) 751 752 sb.append( sa ) 753 754 if local: 755 print sa.toarray() 756 globals().update( locals() ) 757 758 return sb.toarray()
759 760
761 - def expected_result( self ):
762 """ 763 Precalculated result to check for consistent performance. 764 765 @return: full array representation 766 @rtype: array 767 """ 768 return N.array([[ 0., 3., 4., 0., 0., 0.], 769 [ 0., 0., 0., 0., 0., 0.], 770 [ 0., 0., 0., 0., 0., 0.], 771 [ 6., 0., 0., 0., 0., 0.], 772 [ 0., 0., 5., 0., 0., 0.], 773 [ 0., 0., 0., 1., 0., 2.]])
774 775 776 if __name__ == '__main__': 777 778 test = Test() 779 780 assert test.run( local=1 ) == test.expected_result() 781