20 import multiprocessing
21 from multiprocessing
import Process
28 from Tkinter
import Listbox
37 from collections
import defaultdict
39 def dumpPDB(p, native_pose, filepath, score, overwrite=False):
41 Dumps the pose using the Py Job Distributor
44 jd=PyJobDistributor(filepath, 100000, score);
47 jd.native_pose=native_pose
49 os.remove(jd.current_name+
".in_progress")
51 print "Removing .fasc + .pdb with same output name."
53 if os.path.exists(filepath+
".fasc"):
54 os.remove(filepath+
".fasc")
55 if os.path.exists(filepath+
"_1.pdb"):
56 os.remove(filepath+
"_1.pdb")
57 native_pose.dump_pdb(filepath+
"_1.pdb")
58 output_scorefile(p, filepath, filepath+
"_1.pdb", filepath+
".fasc", score, 1, native_pose)
60 print "Pose written to "+os.path.dirname(filepath)
64 if not p.total_residue():
65 print "\n No pose loaded...\n"
68 score = create_score_function_ws_patch(
'standard',
'score12')
78 Saves a Rosetta Loop file. Also asks to discard residues or not.
79 Migrate to use Regions.
81 if not p.total_residue():
82 print "\n No pose loaded...\n"
86 print "\n No loops to save...\n"
92 ask_cut_points = tkMessageBox.askyesno(message=
"Define Cutpoints?", default=tkMessageBox.NO)
94 discard_loops = tkMessageBox.askyesno(message =
"Discard Phi/Psi and build using ideal bond lengths and angles?")
96 ask_cut_points =
False
99 outfilename = tkFileDialog.asksaveasfilename(initialdir = global_variables.current_directory, title=
"Output loop file to...")
100 if not outfilename:
return
101 global_variables.current_directory=os.path.dirname(outfilename)
103 FILE =
open(outfilename,
'w')
104 for region
in regions:
106 start = region.get_rosetta_start(p)
107 end = region.get_rosetta_end(p)
110 cutpoint_known =
False
112 while not cutpoint_known:
113 cut = tkSimpleDialog.askstring(title=
"cutpoint", prompt=
"Cutpoint for Loop (#, default, 0 for random) " +loop_string, initialvalue=
"default")
115 cut = (end - start)/2
122 if ((
int(cut) < start) | (
int(cut) > end)):
123 tkMessageBox.showerror(message=
"Invalid CutPoint!")
127 cut = (end - start)/2
129 FILE.write(
"LOOP"+
" "+repr(start)+
" "+repr(end)+
" "+repr(cut)+
" 0 "+repr(
int(discard_loops))+
"\n")
132 print "\nLoop File written...\n"
136 if isinstance(obj, basestring):
138 elif isinstance(obj, list):
140 elif isinstance(obj, tuple):
142 elif isinstance(obj, dict):
149 Saves an empty resfile, numbered by PDB with NATRO designation
150 If outname is not given, will ask where to save.
152 if not p.total_residue():
153 print "\n No pose loaded...\n"
158 outname = tkFileDialog.asksaveasfilename(initialdir = global_variables.current_directory, title=
"Output resfile to...")
159 if not outname:
return
160 global_variables.current_directory=os.path.dirname(outname)
166 Saves a resifile, readable by PyRosetta and Rosetta.
168 ResDic is [string pdbNum:pdbChain]:[array name:three_letter:one_letter string]
169 If NC - ResDic should be [string pdbNum:pdbChain]:[array 'NC':residue string]
171 if not p.total_residue():
172 print "\n No pose loaded...\n"
175 tot = p.total_residue()
176 FILE =
open(filename,
'w')
177 FILE.write(
" start\n")
178 for i
in range(1, tot+1):
179 chain = p.pdb_info().chain(i)
180 chainStr = chain.rjust(3)
181 pdbNum = p.pdb_info().number(i)
182 pdbStr = str(pdbNum).
rjust(4)
183 res = repr(pdbNum) +
":" +chain
188 for residue_string
in ResDic[res]:
190 if residue_string ==
"NATRO":
191 line = pdbStr + chainStr +
" NATRO"+
"\n"
192 elif residue_string ==
"NATAA":
193 line = pdbStr + chainStr +
" NATAA"+
"\n"
194 elif residue_string ==
"ALLAA":
195 line = pdbStr + chainStr +
" ALLAA" +
"\n"
196 elif residue_string.split(
":")[0]==
"NC":
197 type = residue_string.split(
":")[1]+
" "
200 line = pdbStr + chainStr + x +
"\n";
202 residuesAll = residue_string.split(
":")
203 x = x + residuesAll[2]
204 line = pdbStr + chainStr +
" PIKAA " + x +
"\n";
208 line = pdbStr + chainStr +
" NATRO" +
"\n"
211 print "\nRes File written....\n"
215 Used in conversion to output scwrl sequence file.
217 if not p.total_residue():
218 print "\n No pose loaded...\n"
231 seqList[res-1]=seqList[res-1].upper()
243 Saves a scwrl seq file. Needs to be more robust to account for all Seths changes ( some NCAA, carbohydrates) when it is eventually released.
244 Needs to migrate to use region class.
246 if not p.total_residue():
247 print "\n No pose loaded...\n"
250 fileout = tkFileDialog.asksaveasfilename(initialdir=global_variables.current_directory)
251 if not fileout:
return
252 global_variables.current_directory = os.path.dirname(fileout)
253 newList = loop_tools.loopArea(p, loops_as_strings)
255 FILE =
open(fileout,
'w')
258 print "Seq file saved.."
264 Saves a basic blueprint file to be manually edited.
265 If output is false, returns a string of the file for manipulation.
266 If outfilename is not given, will ask where to save.
268 if not p.total_residue():
269 print "\n No pose loaded...\n"
273 define = restype_definitions.definitions()
275 for i
in range(1, p.total_residue()+1):
276 pdb_num = p.pdb_info().number(i)
277 single_letter_code = seq[i-1]
278 out_string = out_string+repr(pdb_num)+
" "+single_letter_code+
" . NATRO\n"
282 outfilename = tkFileDialog.asksaveasfilename(initialdir = global_variables.current_directory)
283 if not outfilename:
return
284 global_variables.current_directory=os.path.dirname(outfilename)
286 FILE =
open(outfilename,
'w')
287 FILE.write(out_string)
289 print "\nBlueprint Saved...\n"
296 Makes a list of PDB's from a directory. Does not walk directory.
297 Later realize could have used find command...
300 directory = tkFileDialog.askdirectory(title =
"Choose directory with PDB files", initialdir = global_variables.current_directory)
301 if not directory:
return
302 global_variables.current_directory=directory
304 contains = tkSimpleDialog.askstring(title=
"Contains...", prompt=
"Separate mutliple match criteria by a coma...", initialvalue=
".pdb")
305 containsSP = contains.split(
",")
306 FILES = os.listdir(directory)
310 print "No PDBs found. Returning."
316 for pattern
in containsSP:
317 pattern = pattern.strip()
318 if re.search(pattern, name)
and (re.search(
"\._", name)==
None)
and (re.search(
"~", name)==
None):
325 print "File "+repr(filenum)+
": "+name
326 p = os.path.join(directory, name)
331 NEWFILE =
open(directory+
"/PDBLIST.txt",
'w')
332 for match
in matches:
333 NEWFILE.write(match+
"\n")
335 print "File saved as 'PDBLIST.txt' in directory specified."
336 return directory+
"/PDBLIST.txt"
338 print "No matches found.."
343 directory = tkFileDialog.askdirectory(initialdir = global_variables.current_directory)
344 if not directory:
return
345 global_variables.current_directory=directory
347 contains = tkSimpleDialog.askstring(title=
"Contains...", prompt=
"Separate mutliple match criteria by a coma...", initialvalue=
".pdb,")
348 NEWFILE =
open(directory+
"/PDBLIST_RECURSIVE.txt",
'w')
350 containsSP = contains.split(
",")
352 for root, dirs, files
in os.walk(directory, topdown=
True):
356 for pattern
in containsSP:
357 pattern = pattern.strip()
358 if re.search(pattern, f)
and (re.search(
"\._", f)==
None)
and (re.search(
"~", f)==
None):
365 print "File "+repr(filenum)+
":"+f
366 p = os.path.join(root, f)
371 NEWFILE =
open(directory+
"/PDBLIST.txt",
'w')
372 for match
in matches:
373 NEWFILE.write(match+
"\n")
375 print "File saved as 'PDBLIST.txt' in directory specified."
376 return directory+
"/PDBLIST.txt"
378 print "No matches found.."
383 Adds each PDB info excluding header information into an SQLITE3 Database. The module for this is modules/PDB.py.
384 Needs to be basename as querying with '/' in a string doesn't seem to work.
388 print "Please choose PDBList..."
390 structID = tkSimpleDialog.askstring(title=
"structID", prompt=
"These entries will have a structID of...", initialvalue=
"na")
391 PDBLIST =
open(pdblist_path,
'r')
392 dbname = os.path.dirname(pdblist_path)+"/DATABASE.db"
394 DB =
SQLPDB(
"",
"",
"",
False, dbname)
396 for filepath
in PDBLIST:
397 pdbID = os.path.basename(filepath).
split(
".")[0]
399 DB.set_basic_options(pdbID, i, structID)
400 filepath = filepath.strip()
401 DB.read_pdb_into_database_flat(filepath,
False,
False)
404 print "Database written to PDBLIST directory"
407 dbfilename = tkFileDialog.askopenfilename(initialdir = global_variables.current_directory, title =
"Database filename")
408 if not dbfilename:
return
409 global_variables.current_directory = os.path.dirname(dbfilename)
411 pdbID = tkSimpleDialog.askstring(title =
"pdbID", prompt=
"Please enter the pdbID/filepath you wish to extract")
415 print "Database Opened"
416 table = pdbdb.scrub(
"pdb")
417 pdbdb.query_pdbID(table, pdbID)
418 outname = tkFileDialog.asksaveasfilename(initialdir = global_variables.current_directory)
419 if not outname:
return
420 global_variables.current_directory = os.path.dirname(outname)
422 pdbdb.set_output_DIR(os.path.dirname(outname))
423 pdbdb.save_cur_as_pdb(os.path.basename(outname))
426 dbfilename = tkFileDialog.askopenfilename(initialdir = global_variables.current_directory, title =
"Database filename")
427 if not dbfilename:
return
428 global_variables.current_directory = os.path.dirname(dbfilename)
430 print "Database Opened"
431 outdir = tkFileDialog.askdirectory(initialdir = global_variables.current_directory, title=
"Choose output directory")
432 global_variables.current_directory = outdir
434 keep_original_filename = tkMessageBox.askyesno(title=
"Keep Original Filename?", message=
"Add numerical designation to filename to keep from overwriting same PDBs in list?", default=tkMessageBox.NO)
435 strucID = tkSimpleDialog.askstring(title=
"strucID", prompt=
"Extract entries with structID of...", initialvalue=
"na")
436 PDBLIST =
open(pdblist_path,
'r')
437 pdbdb.set_output_DIR(outdir)
439 table = pdbdb.scrub("pdb")
440 for pdbID
in PDBLIST:
441 pdbdb.query_pdbID_and_strucID(table, os.path.basename(pdbID), strucID)
442 newname = os.path.basename(pdbID)
443 if not keep_original_filename: newname = newname+
"_"+repr(filenum)
444 print "Saving "+ newname
445 pdbdb.save_cur_as_pdb(newname)
446 pdbdb._reset_cursor()
454 Used by score_PDBList for multiprocessing rescoring of decoy structures.
455 Only speed up if PDBs are large.
459 pose_from_pdb(p, path)
461 except PyRosettaException:
462 print "Cannot Load "+path+
" Try using -ignore_unrecognized_residues in options window..."
466 if p.total_residue()>0:
468 score = scorefunction(p)
469 manager_dict[path]=score
475 Outputs a simple pdb vs score for simple analysis.
476 if pdblist_path=False, a dialog box opens.
477 will grab the number of processors from output_class (processor StringVar variable) and attempt multiprocessing rescoring of all PDBs.
481 pdblist_path = tkFileDialog.askopenfilename(initialdir = global_variables.current_directory, title =
"PDBLIST")
482 if not pdblist_path:
return
483 global_variables.current_directory = os.path.dirname(pdblist_path)
484 PDBLIST =
open(pdblist_path,
'r')
486 PDBLIST =
open(pdblist_path,
'r')
487 SCORED_PDBLIST = open(os.path.dirname(pdblist_path)+"/SCORED_PDBLIST.txt",
'w')
495 pose_from_pdb(p, path)
496 except PyRosettaException:
497 print "Cannot Load "+path+
" Try using -ignore_unrecognized_residues in options window..."
500 SCORED_PDBLIST.write(path+
"\t%.3f\n"%e)
501 print "\nComplete. File written to SCORED_PDBLIST.txt\n"
503 SCORED_PDBLIST.close()
508 output_class.terminal_output.set(1)
509 manager = multiprocessing.Manager()
510 result_map = manager.dict();
515 if not os.path.exists(line):
516 print "Could not find "+line
519 result_map[line]=
"NA"
520 worker = Process(name = line, target=rescore_single_pdb, args=(line, score, result_map))
521 workers.append(worker)
523 total_allowed_jobs = processors
524 print "Total allowed jobs: "+repr(total_allowed_jobs)
525 total_running_jobs = 0
529 while not job_complete:
533 for worker
in workers:
534 if worker.is_alive():
537 elif result_map[worker.name]!=
"NA":
538 if worker.exitcode!=0:
539 print "%s.exitcode = %s" %(worker.name, worker.exitcode)
541 workers.pop(workers.index(worker));
542 total_running_jobs-=1
543 print "Total running jobs: "+repr(total_running_jobs)
544 print "Total workers waiting: "+repr(
len(workers)-total_running_jobs)
550 if total_running_jobs<total_allowed_jobs:
551 for worker
in workers:
552 if not worker.is_alive():
553 print "Starting Worker"
556 except AssertionError:
558 print "Total running jobs: "+repr(total_running_jobs)
559 print "Total workers waiting: "+repr(
len(workers)-total_running_jobs)
560 total_running_jobs+=1
561 if total_running_jobs>=total_allowed_jobs:
break
563 if total_running_jobs==0:
569 for path
in result_map.keys():
570 d[path] = result_map[path]
572 for path
in sorted(d, key=d.get):
574 print path+
"\t%.3f\n"%e
575 SCORED_PDBLIST.write(path+
"\t%.3f\n"%e)
577 print "\nComplete. File written to "+os.path.dirname(pdblist_path)+
"/SCORED_PDBLIST.txt\n"
579 output_class.terminal_output.set(0)
580 SCORED_PDBLIST.close()
582 return os.path.dirname(pdblist_path)+
"/SCORED_PDBLIST.txt"
586 def save_FASTA(pose, base_name, outfilename = None, regions = None ):
588 If outfilename is False, will ask for a directory using current_directory.
589 If loops_as_strings is given, output FASTA of loops. Base_name is used as label >base_name (region) for fasta
592 if not pose.total_residue():
593 print "\n No pose loaded...\n"
597 outfilename = tkFileDialog.asksaveasfilename(initialdir = global_variables.current_directory, title=
"Output FASTA to...")
598 if not outfilename:
return
599 global_variables.current_directory = os.path.dirname(outfilename)
600 OUTFILE =
open(outfilename,
'w')
602 region_array = regions.get_regions()
603 for region
in region_array:
604 if not region.region_exists(pose):
continue
606 seq = region.get_sequence(pose)
607 header =
">"+base_name+
" "+region.get_region_string_with_all_residues(pose)
608 OUTFILE.write(header+
"\n")
609 OUTFILE.write(seq+
"\n")
611 seq = pose.sequence()
612 OUTFILE.write(
">"+base_name+
"\n")
613 OUTFILE.write(seq+
"\n")
615 print "FASTA written."
620 If outfilename is False, will ask for a filename
621 Goes through each member of PDBLIST
622 Uses pyrosetta to get sequence.
625 outfilename = tkFileDialog.asksaveasfilename(initialdir = global_variables.current_directory, title=
"Output FASTA to...")
626 if not outfilename:
return
627 global_variables.current_directory = os.path.dirname(outfilename)
629 OUTFILE =
open(outfilename,
'w')
630 PDBLIST =
open(pdblist_path,
'r')
632 for pdbpath
in PDBLIST:
633 pdbpath = pdbpath.strip()
634 if not pdbpath:
continue
636 pdb = os.path.basename(pdbpath)
637 pdbID = pdb.split(
".")[0]
640 region_array = regions.get_regions()
642 pose_from_pdb(pose, pdbpath)
643 except PyRosettaException:
644 print "Could not load.. "+pdbID+
"..continueing.."
647 for region
in region_array:
649 seq = region.get_sequence(pose)
652 header =
">"+repr(i)+
"_"+os.path.basename(pdbpath)+
" "+region.get_region_string_with_all_residues(pose)
655 OUTFILE.write(header+
"\n")
656 OUTFILE.write(seq+
"\n\n")
659 OUTFILE.write(
">"+pdbID+
" "+pdbpath+
"\n")
660 OUTFILE.write(seq+
"\n")
665 print "Fasta written..."
671 Exports a list of scores.
674 PDBLIST = tkFileDialog.askopenfile(title =
"PDBLIST", initialdir = global_variables.current_directory)
675 OUTFILE = tkFileDialog.asksaveasfile(title =
"Save As...", initialdir = global_variables.current_directory)
677 if PDBLIST ==
None or OUTFILE==
None:
680 for PDBPath
in PDBLIST:
681 PDBPath = PDBPath.strip()
684 pose_from_pdb(p, PDBPath)
687 OUTFILE.write(PDBPath+
":%.3f"%SCORE+
"\n")
694 Uses molfile to params in pyrosetta bindings to convert.
695 Maybe should be converted to a window for more options.
698 print "Using molfile_to_params.py script located in pyrosetta/toolbox/molfile2params written by Ian W Davis. For more options, please use script."
699 script_path = os.environ[
"PYROSETTA"]+
"/toolbox/molfile2params/molfile_to_params.py"
701 if not os.path.exists(script_path):
702 print "Untarring script"
703 extract_path = os.environ[
"PYROSETTA"]+
"/toolbox"
704 tar_path = extract_path+
"/molfile2params.tar.gz"
706 if tarfile.is_tarfile(tar_path):
707 tfile = tarfile.open(tar_path)
708 tfile.extractall(extract_path)
710 print "Could not extract tar file."
713 mdl_file = tkFileDialog.askopenfilename(initialdir = global_variables.current_directory, title =
"Open MDL, MOL, MOL2, or SDF file")
714 if not mdl_file:
return
715 global_variables.current_directory=os.path.dirname(mdl_file)
717 output_kinemage = tkMessageBox.askyesno(title =
"kinemage", message=
"Output kinemage file for ligand visualization?")
719 options =
" "+mdl_file+
" "
721 options = options+
"-k "
723 outdir = os.path.dirname(mdl_file)+
"/"+os.path.basename(mdl_file).
split(
".")[0]
724 if not os.path.exists(outdir): os.mkdir(outdir)
726 prefix = outdir+
"/"+os.path.basename(mdl_file).
split(
".")[0]
728 options = options +
"-c --clobber "+
"-p "+prefix
730 print "Running molfile_to_params with these options: "+options
731 os.system(
"python "+script_path+options)
732 print "Parameters generated. Output directed to: "+outdir
738 Saves a file of paths.
740 if not array_of_paths:
741 print "No extra params enabled."
744 outfilename = tkFileDialog.asksaveasfilename(initialdir = global_variables.current_directory, title=
"Output Parm pathList to...")
745 if not outfilename:
return
746 global_variables.current_directory = os.path.dirname(outfilename)
747 FILE =
open(outfilename,
'w')
750 for path
in array_of_paths:d[path]=0
753 FILE.write(path+
"\n")
Fstring::size_type len(Fstring const &s)
Length.
utility::vector1< std::string > split(const std::string &s)
split given std::string using ' ' symbol.
bool open(utility::io::izstream &db_stream, std::string const &db_file, bool warn)
Open a database file on a provided stream.
Fstring rjust(Fstring const &s)
Right-Justified Copy.