Package Biskit :: Package Mod :: Module ValidationSetup
[hide private]
[frames] | no frames]

Source Code for Module Biskit.Mod.ValidationSetup

  1  ## 
  2  ## Biskit, a toolkit for the manipulation of macromolecular structures 
  3  ## Copyright (C) 2004-2005 Raik Gruenberg & Johan Leckner 
  4  ## 
  5  ## This program is free software; you can redistribute it and/or 
  6  ## modify it under the terms of the GNU General Public License as 
  7  ## published by the Free Software Foundation; either version 2 of the 
  8  ## License, or any later version. 
  9  ## 
 10  ## This program is distributed in the hope that it will be useful, 
 11  ## but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 13  ## General Public License for more details. 
 14  ## 
 15  ## You find a copy of the GNU General Public License in the file 
 16  ## license.txt along with this program; if not, write to the Free 
 17  ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 18  ## 
 19  ## 
 20  ## Contributions: olivier PERIN 
 21  ## last $Author: leckner $ 
 22  ## last $Date: 2006/09/12 06:34:36 $ 
 23  ## $Revision: 2.7 $ 
 24   
 25  """ 
 26  Setup directory structure for the validation.   
 27  """ 
 28   
 29  import Biskit.tools as T 
 30  from Biskit.PDBModel import PDBModel 
 31  import re 
 32  import glob 
 33  import copy 
 34   
 35  import modUtils as MU 
 36   
 37  from TemplateSearcher import TemplateSearcher 
 38  from SequenceSearcher import SequenceSearcher 
 39  from TemplateCleaner import TemplateCleaner 
 40   
 41  import os.path 
 42  import os, string 
 43   
44 -class ValidationSetup:
45 """ 46 Takes a TemplateSearcher result folder and creates sub-projects 47 with each template cluster center as target sequence to be modeled. 48 In each sub-project folder a folder structures analogue to the main 49 project is set up. 50 The real structure is linked into the sub-project folder as reference.pdb 51 """ 52 53 F_RESULT_FOLDER = '/validation' 54 F_NR_FOLDER = SequenceSearcher.F_RESULT_FOLDER 55 56 F_ALPHA_FOLDER = TemplateCleaner.F_COFFEE 57 F_PDB_FOLDER = TemplateCleaner.F_MODELLER 58 F_PDB_LINK = F_PDB_FOLDER 59 F_ALPHA_LINK = F_ALPHA_FOLDER 60 61 F_TEMPLATE_SEQUENCE = '/target.fasta' 62 F_TCOFFEE = '/t_coffee_template_files' 63 F_TEMPLATES_FASTA = '/templates.fasta' 64 F_KNOWN_STRUCTURE = '/reference.pdb' 65 66
67 - def __init__( self, outFolder, log=None ):
68 """ 69 @param outFolder: base folder 70 @type outFolder: str 71 @param log: None reports to STDOUT 72 @type log: LogFile instance or None 73 """ 74 self.outFolder = T.absfile( outFolder ) 75 self.log = log 76 77 self.prepareFolders()
78 79
80 - def prepareFolders( self ):
81 """ 82 Check that all needed folders exist, if not create them 83 """ 84 if not os.path.exists( self.outFolder + self.F_RESULT_FOLDER ): 85 os.mkdir( self.outFolder + self.F_RESULT_FOLDER )
86 87
88 - def logWrite( self, msg, force=1 ):
89 """ 90 Write message to log. 91 92 @param msg: message to print 93 @type msg: str 94 """ 95 if self.log: 96 self.log.add( msg ) 97 else: 98 if force: 99 print msg
100 101
102 - def cluster_result(self, chain_index = None):
103 """ 104 Take clustering result from the file 'chain_index.txt' 105 106 @param chain_index: file with clustering results 107 (default: None-> L{TemplateSearcher.F_CHAIN_INDEX}) 108 @type chain_index: 109 110 @return: pdb codes of templates 111 @rtype: [str] 112 """ 113 chain_index = chain_index or self.outFolder + \ 114 TemplateSearcher.F_NR + TemplateSearcher.F_CHAIN_INDEX 115 116 r1 = re.compile( r'([A-Z0-9]{4}).pdb' ) 117 index = open( "%s"%chain_index, 'r' ) 118 119 cluster_list = [] 120 121 string_lines = index.readlines() 122 for i in string_lines: 123 if( r1.search(i) ): 124 code = r1.findall(i)[0] 125 cluster_list.append(code) 126 127 index.close() 128 129 return cluster_list
130 131
132 - def createTemplatesFolder(self, validation_folder, cluster):
133 """ 134 Create folders for the templates to be used for the validation. 135 136 @param validation_folder: top folder for the validation 137 @type validation_folder: str 138 @param cluster: name for validation subfolder 139 (e.g. pdb code of cluster center) 140 @type cluster: str 141 """ 142 try: 143 os.mkdir( '%s/%s'%(validation_folder,cluster) ) 144 except: 145 print 'Folder %s/%s alredy exists.'\ 146 %(self.F_RESULT_FOLDER, cluster)
147 148
149 - def prepare_alpha(self, cluster_list, alpha_folder = None, 150 output_folder = None):
151 """ 152 Create a dictionary where the keys are template pdb codes and 153 the value are the corresponding file names of the carbon alpha 154 pdb files for ALIGNER (.alpha). 155 156 @param cluster_list: pdb codes of templates 157 @type cluster_list: [str] 158 @param alpha_folder: folder with template CA-trace files 159 (default: None -> L{F_ALPHA_FOLDER}) 160 @type alpha_folder: str 161 @param output_folder: top output folder 162 (default: None -> L{F_RESULT_FOLDER}) 163 @type output_folder: str 164 165 @return: dictionary mapping pdb code to CA-trace files 166 @rtype: {str:str} 167 """ 168 alpha_folder = alpha_folder or self.outFolder + self.F_ALPHA_FOLDER 169 output_folder = output_folder or self.outFolder + self.F_RESULT_FOLDER 170 171 alpha_path = glob.glob('%s/*.alpha'%alpha_folder) 172 173 alpha_files = [] 174 for i in alpha_path: 175 alpha_files.append(os.path.split(i)[1]) 176 177 alpha_dictionary = {} 178 for cluster in cluster_list: 179 alpha_tmp = copy.copy(alpha_path) 180 181 for i in range( len(alpha_files) ): 182 ## remove current cluster center 183 if alpha_files[i][0:4] == cluster: 184 alpha_tmp.remove(alpha_tmp[i]) 185 186 alpha_dictionary.update({'%s'%cluster : alpha_tmp}) 187 output = open("%s/%s"%(output_folder,cluster +self.F_TCOFFEE),'w') 188 189 for line in alpha_tmp: 190 output.write(line + "\n") 191 192 output.close() 193 194 return alpha_dictionary
195 196
197 - def prepare_pdb(self, cluster_list, pdb_folder = None, 198 output_folder = None):
199 """ 200 Create a dictionary which keys are templates pdb code and the value 201 the different file names of pdb files for MODELLER 202 203 @param cluster_list: pdb codes of templates 204 @type cluster_list: [str] 205 @param pdb_folder: folder with Modeller pdb files 206 (default: None -> L{F_PDB_FOLDER}) 207 @type pdb_folder: str 208 @param output_folder: top output folder 209 (default: None -> L{F_RESULT_FOLDER}) 210 @type output_folder: str 211 212 @return: dictionary mapping pdb code to pdb files used by Modeller 213 @rtype: {str:str} 214 """ 215 pdb_folder = pdb_folder or self.outFolder + self.F_PDB_FOLDER 216 output_folder = output_folder or self.outFolder + self.F_RESULT_FOLDER 217 self.pdb_path = glob.glob('%s/*.pdb'%pdb_folder) 218 219 pdb_files = [] 220 for i in self.pdb_path: 221 pdb_files.append(os.path.split(i)[1]) 222 223 pdb_dictionary = {} 224 for cluster in cluster_list: 225 pdb_tmp = copy.copy(self.pdb_path) 226 227 for i in range(len(pdb_files)): 228 if(pdb_files[i][0:4] == cluster): 229 pdb_tmp.remove(pdb_tmp[i]) 230 231 pdb_dictionary.update({'%s'%cluster : pdb_tmp}) 232 233 return pdb_dictionary
234 235
236 - def prepare_templatesfasta(self, cluster_list, pdb_dictionary, 237 output_folder = None):
238 """ 239 Create 'templates.fasta' file for each template to validate 240 241 @param cluster_list: pdb codes of templates 242 @type cluster_list: [str] 243 @param pdb_dictionary: dictionary mapping pdb code to pdb files 244 used by Modeller 245 @type pdb_dictionary: {str:str} 246 @param output_folder: top output folder 247 (default: None -> L{F_RESULT_FOLDER}) 248 @type output_folder: str 249 """ 250 output_folder = output_folder or self.outFolder + self.F_RESULT_FOLDER 251 252 for cluster in cluster_list: 253 folder = '%s/%s'%(output_folder, cluster + \ 254 TemplateSearcher.F_RESULT_FOLDER) 255 if not os.path.exists( folder ): 256 os.mkdir( folder) 257 else: 258 print 'Directory %s exists, skipping'%( cluster + \ 259 TemplateSearcher.F_RESULT_FOLDER) 260 261 pdb_path = pdb_dictionary["%s"%cluster] 262 PDBModels_list = [] 263 pdb_name = [] 264 265 for pdb in pdb_path: 266 PDBModels_list.append(PDBModel('%s'%pdb)) 267 pdb_name.append(os.path.split(pdb)[1][:-4]) 268 269 input_file = self.outFolder + self.F_RESULT_FOLDER + \ 270 '/%s'%cluster + TemplateSearcher.F_RESULT_FOLDER \ 271 + self.F_TEMPLATES_FASTA 272 273 templatesfasta = open("%s"%input_file,'w') 274 275 for i in range(len(PDBModels_list)): 276 templatesfasta.write(">%s\n"%pdb_name[i]) 277 sequence = PDBModels_list[i].sequence() 278 sequence = MU.format_fasta(seq = sequence) 279 templatesfasta.write("%s\n"%sequence) 280 281 templatesfasta.close()
282 283 343 344
345 - def prepare_target(self, cluster, output_folder = None):
346 """ 347 Create the 'target.fasta' file for each template to validate 348 349 @param cluster: name of the cluster which is used for the 350 foldder name in which the validation is run. 351 @type cluster: str 352 @param output_folder: top output folder 353 (default: None -> L{F_RESULT_FOLDER}) 354 @type output_folder: str 355 """ 356 output_folder = output_folder or self.outFolder + \ 357 self.F_RESULT_FOLDER + '/%s/'%cluster 358 target = open("%s"%(output_folder + self.F_TEMPLATE_SEQUENCE),'w') 359 target.write(">target\n") 360 361 for pdb in self.pdb_path: 362 if(cluster == os.path.split(pdb)[1][0:4]): 363 364 model = PDBModel('%s'%pdb) 365 sequence = model.sequence() 366 sequence = MU.format_fasta(seq = sequence) 367 target.write("%s"%sequence) 368 369 target.close()
370 371
372 - def prepare_sequences(self, cluster, sequences_folder = None, 373 output_folder = None):
374 """ 375 Link the 'sequences' directory from the project directory 376 in each template folder 377 378 @param cluster: name of the cluster which is used for the 379 folder name in which the validation is run. 380 @type cluster: str 381 @param sequences_folder: folder with sequences (default: None -> 382 L{SequenceSearcher.F_RESULT_FOLDER}) 383 @type sequences_folder: str 384 @param output_folder: top output folder 385 (default: None -> L{F_RESULT_FOLDER}) 386 @type output_folder: str 387 """ 388 sequences_folder = sequences_folder or self.outFolder + \ 389 SequenceSearcher.F_RESULT_FOLDER 390 391 output_folder = output_folder or self.outFolder + \ 392 self.F_RESULT_FOLDER + '/%s'%cluster + \ 393 SequenceSearcher.F_RESULT_FOLDER 394 395 if not os.path.exists( output_folder ): 396 ## os.link doesn't seem to work with folders 397 os.system('ln -s %s %s'%(sequences_folder , output_folder)) 398 399 else: 400 print 'Folder %s alredy exists, linking skipped.\ 401 '%(self.F_RESULT_FOLDER + '/%s'%cluster + 402 SequenceSearcher.F_RESULT_FOLDER )
403 404 429 # os.system('ln %s %s'%(input_folder + pdb, output_file)) 430 431
432 - def go(self, validation_folder = None):
433 """ 434 Greate validation directory setup. 435 436 @param validation_folder: top output folder 437 (default: None -> L{F_RESULT_FOLDER}) 438 @type validation_folder: str 439 """ 440 validation_folder = validation_folder + self.F_RESULT_FOLDER or \ 441 self.outFolder + self.F_RESULT_FOLDER 442 443 cluster_list = self.cluster_result() 444 for cluster in cluster_list: 445 self.createTemplatesFolder(validation_folder, cluster) 446 self.prepare_sequences(cluster) 447 448 alpha_dictionary = self.prepare_alpha(cluster_list) 449 pdb_dictionary = self.prepare_pdb(cluster_list) 450 self.prepare_templatesfasta(cluster_list, pdb_dictionary) 451 self.link_pdb(cluster_list, pdb_dictionary, alpha_dictionary) 452 453 for cluster in cluster_list: 454 self.prepare_target(cluster) 455 self.link_reference_pdb(cluster)
456 457 458 ############# 459 ## TESTING 460 ############# 461
462 -class Test:
463 """ 464 Test class 465 """ 466
467 - def run( self, local=0 ):
468 """ 469 run function test 470 471 @param local: transfer local variables to global and perform 472 other tasks only when run locally 473 @type local: 1|0 474 475 @return: 1 476 @rtype: int 477 """ 478 import tempfile 479 import shutil 480 481 ## collect the input files needed 482 outfolder = tempfile.mkdtemp( '_test_ValidationSetup' ) 483 os.mkdir( outfolder +'/templates' ) 484 485 shutil.copytree( T.testRoot() + '/Mod/project/templates/nr', 486 outfolder + '/templates/nr' ) 487 488 shutil.copytree( T.testRoot() + '/Mod/project/templates/modeller', 489 outfolder + '/templates/modeller' ) 490 491 v = ValidationSetup( outFolder = outfolder ) 492 493 v.go( validation_folder =outfolder ) 494 495 if local: 496 print 'The validation project can be found in %s/validation'%outfolder 497 globals().update( locals() ) 498 499 500 return 1
501 502
503 - def expected_result( self ):
504 """ 505 Precalculated result to check for consistent performance. 506 507 @return: 1 508 @rtype: int 509 """ 510 return 1
511 512 513 if __name__ == '__main__': 514 515 test = Test() 516 517 assert test.run( local=1 ) == test.expected_result() 518