Package Biskit :: Module FuzzyCluster
[hide private]
[frames] | no frames]

Source Code for Module Biskit.FuzzyCluster

  1  ## Automatically adapted for numpy.oldnumeric Mar 26, 2007 by alter_code1.py 
  2   
  3  ## 
  4  ## Biskit, a toolkit for the manipulation of macromolecular structures 
  5  ## Copyright (C) 2002-2004; Wolfgang Rieping 
  6  ## Copyright (C) 2005; Raik Gruenberg & Johan Leckner 
  7  ## 
  8  ## This program is free software; you can redistribute it and/or 
  9  ## modify it under the terms of the GNU General Public License as 
 10  ## published by the Free Software Foundation; either version 2 of the 
 11  ## License, or any later version. 
 12  ## 
 13  ## This program is distributed in the hope that it will be useful, 
 14  ## but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 16  ## General Public License for more details. 
 17  ## 
 18  ## You find a copy of the GNU General Public License in the file 
 19  ## license.txt along with this program; if not, write to the Free 
 20  ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 21   
 22  ## $Revision: 2.8 $ 
 23  ## last $Date: 2007/03/26 18:40:40 $ 
 24  ## last $Author: graik $ 
 25   
 26  """ 
 27  Implementation of the fuzzy c-means algorithm 
 28   
 29  Author: Wolfgang Rieping 1998, 2002 
 30   
 31  Reference:: 
 32    Heather L. Gordon, Rajmund L. Somorjai 
 33    Fuzzy Cluster Analysis of Molecular Dynamics Trajectories 
 34    PROTEINS: Structure, Function and Genetics 14:249-264 1992 
 35  """ 
 36   
 37  import numpy.oldnumeric as N 
 38  import Biskit.mathUtils as MU 
 39  import tools 
 40   
 41  from numpy.oldnumeric.random_array import seed, random 
 42   
 43  ## def average(x): 
 44  ##     return N.sum(N.array(x)) / len(x) 
 45   
 46  ## def variance(x, avg = None): 
 47  ##     if avg is None: 
 48  ##         avg = N.average(x) 
 49   
 50  ##     return N.sum(N.power(N.array(x) - avg, 2)) / (len(x) - 1.) 
 51   
 52  ## def standardDeviation(x, avg = None): 
 53  ##     return N.sqrt(variance(x, avg)) 
 54   
55 -def squared_distance_matrix(x, y):
56 57 d1 = N.diagonal(N.dot(x, N.transpose(x))) 58 d2 = N.diagonal(N.dot(y, N.transpose(y))) 59 60 a1 = N.add.outer(d1,d2) 61 a2 = N.dot(x, N.transpose(y)) 62 63 return a1 - 2 * a2
64 65
66 -def distance_matrix(x, y):
67 return N.sqrt(squared_distance_matrix(x, y))
68 69
70 -class FuzzyCluster:
71 - def __init__(self, data, n_cluster, weight, seedx = 0, seedy = 0):
72 """ 73 @param data: cluster this 74 @type data: [float] OR array 75 @param n_cluster: number of clusters 76 @type n_cluster: int 77 @param weight: fuzziness weigth 78 @type weight: float 79 @param seedx: random seed value for RandomArray.seed (default: 0) 80 @type seedx: int OR 0 81 @param seedy: random seed value for RandomArray.seed 82 (default: 0, set seed from clock) 83 @type seedy: int OR 0 84 """ 85 self.data = N.array(data, N.Float) 86 self.w = weight 87 self.n_cluster = n_cluster 88 self.npoints, self.dimension = N.shape(data) 89 self.seedx = seedx 90 self.seedy = seedy
91 92
93 - def calc_membership_matrix(self, d2):
94 ## remove 0s (if a cluster center is exactly on one item) 95 d2 = N.clip( d2, N.power(1e200, 1-self.w), 1e300 ) 96 q = N.power(d2, 1. / (1. - self.w)) 97 return q / N.sum(q)
98 99
100 - def calc_cluster_center(self, msm):
101 p = N.power(msm, self.w) 102 ccenter = N.transpose(N.dot(p, self.data)) 103 return N.transpose(ccenter / N.sum(p, 1))
104 105
106 - def updateDistanceMatrix(self):
107 return squared_distance_matrix(self.cluster_center, self.data)
108 109
110 - def iterate(self, centers):
111 """ 112 @param centers: array with cluster centers 113 @type centers: array('f') 114 115 @return: distance to the centers, membership matrix, array of cenetrs 116 @rtype: array, array, array 117 """ 118 d2 = squared_distance_matrix(centers, self.data) 119 msm = self.calc_membership_matrix(d2) 120 centers = self.calc_cluster_center(msm) 121 122 return d2, msm, centers
123 124
125 - def error(self, msm, d2):
126 """ 127 @param msm: membership matrix 128 @type msm: array('f') 129 @param d2: distance from data to the centers 130 @type d2: array('f') 131 132 @return: weighted error 133 @rtype: float 134 """ 135 p = N.power(msm, self.w) 136 product = N.dot(p, N.transpose(d2)) 137 return N.trace(product)
138 139
140 - def create_membership_matrix(self):
141 """ 142 Create a random membership matrix. 143 144 @return: random array of shape length of data to 145 cluster times number of clusters 146 @rtype: array('f') 147 """ 148 seed(self.seedx, self.seedy) 149 150 r = random((self.npoints, self.n_cluster)) 151 return N.transpose(r / N.sum(r))
152 153
154 - def go(self, errorthreshold, n_iterations=1e10, nstep=10, verbose=1):
155 """ 156 Start the cluestering. Run until the error is below the error 157 treshold or the max number of iterations have been run. 158 159 @param errorthreshold: treshold value for error 160 @type errorthreshold: float 161 @param n_iterations: treshold value for number of iterations 162 (default: 1e10) 163 @type n_iterations: int 164 @param nstep: print information for every n'th step in the iteration 165 @type nstep: int 166 167 @return: array with cluster centers 168 @rtype: array('f') 169 """ 170 iteration = 0 171 rel_err = 1e10 172 error = 1.e10 173 174 msm = self.create_membership_matrix() 175 centers = self.calc_cluster_center(msm) 176 177 while rel_err > errorthreshold and iteration < n_iterations: 178 d2, msm, centers = self.iterate(centers) 179 old_error = error 180 error = self.error(msm, d2) 181 rel_err = abs(1. - error/old_error) 182 iteration = iteration+1 183 if not iteration % nstep and verbose: 184 tools.errWrite( "%i %f\n" % (iteration, error) ) 185 186 self.centers = centers 187 self.msm = msm 188 self.d2 = d2 189 190 return centers
191 192
193 - def clusterEntropy(self):
194 centropy = N.diagonal(N.dot(self.msm, 195 N.transpose(N.log(self.msm)))) 196 return -1/float(self.npoints)*centropy
197 198
199 - def entropy(self):
200 return N.sum(self.clusterEntropy())
201 202
203 - def nonFuzzyIndex(self):
204 p = N.power(self.msm, self.w) 205 return (self.n_cluster*N.sum(N.sum(p))- 206 self.npoints)/(self.npoints*(self.n_cluster-1))
207 208
210 return N.sum(N.power(self.msm, self.w), 1)/self.npoints
211 212
213 - def partitionCoefficient(self):
214 return N.sum(self.clusterPartitionCoefficient())
215 216
217 - def getMembershipMatrix(self):
218 return self.msm
219 220
221 - def getClusterCenter(self):
222 return self.cluster_center
223 224
225 - def entropySD(self):
226 centropy = N.sum(-N.log(self.msm)*\ 227 self.msm)/float(self.n_cluster) 228 return MU.SD(centropy)
229 230
231 - def standardDeviation(self):
232 sd = MU.SD(self.msm) 233 return sd
234 235 236 ############# 237 ## TESTING 238 ############# 239 import Biskit.test as BT 240
241 -class Test(BT.BiskitTest):
242 """FuzzyCluster test""" 243
244 - def test_FuzzyCluster( self):
245 """FuzzyCluster test""" 246 import gnuplot as G 247 248 x1 = random((500,2)) 249 x2 = random((500,2)) + 1 250 x3 = random((500,2)) + 2 251 252 self.x = N.concatenate((x1, x2, x3)) 253 254 self.fuzzy = FuzzyCluster(self.x, n_cluster=5, weight=1.5) 255 256 self.centers = self.fuzzy.go(1.e-30, n_iterations=50, nstep=10, 257 verbose=self.local) 258 259 if self.local: 260 print "cluster centers are displayed in green" 261 G.scatter( self.x, self.centers ) 262 263 self.assertEqual( N.shape(self.centers), (5, 2) )
264 265 if __name__ == '__main__': 266 267 BT.localTest() 268