9 from optparse
import OptionParser, IndentedHelpFormatter
12 except:
from sets
import Set
as set
17 return description.strip() +
"\n"
27 if isinstance(regex, str): regex = re.compile(regex)
29 if not os.path.isdir(path):
continue
30 hits = [ f
for f
in os.listdir(path)
if regex.match(f)
is not None ]
32 return os.path.join(path, hits[0])
34 if required:
raise ValueError(
"Multiple hits for '%s' in '%s' (try using the --compile-tag option)" % (regex.pattern, path))
36 if required:
raise ValueError(
"'%s' not found" % regex.pattern)
49 protein_exts = [
'pdb']
50 chemical_exts = [
'mol',
'mol2',
'sdf']
56 return self._protein_cache.setdefault(docking_file, docking_file)
58 return self._cofactor_cache.setdefault(docking_file, docking_file)
60 return self._ligand_cache.setdefault(docking_file, docking_file)
66 ext_re =
r'\.(%s)$' %
'|'.
join(exts)
70 return cmp(self.
path, other.path)
72 return hash(self.
path)
81 PATH_paths = os.environ[
'PATH'].
split(os.pathsep)
82 ros_paths = [os.path.join(options.mini,
'bin')] + PATH_paths
83 if options.compile_tag
is not None:
84 comp_tag = options.compile_tag.replace(
'.',
r'\.')
86 comp_tag =
r'.+release'
87 scr_paths = [os.path.join(options.mini,
'src',
'apps',
'public',
'ligand_docking'),
88 os.path.join(options.mini,
'src',
'python',
'apps',
'public')] + PATH_paths
89 oe_paths = [os.path.join(options.openeye,
'bin')] + PATH_paths
90 if 'OE_LICENSE' in os.environ:
91 oe_paths.append( os.path.join(os.path.dirname(os.environ[
'OE_LICENSE']),
'bin') )
93 'dock':
find_file(
r'ligand_dock\.'+comp_tag, ros_paths),
94 'rpkmin':
find_file(
r'ligand_rpkmin\.'+comp_tag, ros_paths),
95 'confidence':
find_file(
r'ligdock_confidence\.'+comp_tag, ros_paths, required=
False),
96 'extract':
find_file(
r'extract_atomtree_diffs\.'+comp_tag, ros_paths),
97 'best_poses':
find_file(
r'select_best_unique_ligand_poses\.'+comp_tag, ros_paths, required=
False),
98 'assign_charges':
find_file(
r'assign_charges\.py', scr_paths, required=
not options.skip_charges),
99 'molfile_to_params':
find_file(
r'molfile_to_params\.py', scr_paths),
100 'pdb_to_molfile':
find_file(
r'pdb_to_molfile\.py', scr_paths),
101 'get_scores':
find_file(
r'get_scores\.py', scr_paths),
102 'plot_funnels':
find_file(
r'plot_funnels\.R', scr_paths),
103 'parallel':
find_file(
r'parallel\.py', scr_paths),
104 'omega':
find_file(
r'omega2', oe_paths, required=
not options.skip_omega),
106 if options.skip_omega: progs[
'omega'] =
None
107 if options.skip_charges: progs[
'assign_charges'] =
None
113 paths = [ os.getcwd() ]
116 if line.startswith(
"#")
or line ==
"":
continue
117 fields = line.split()
119 raise ValueError(
"Not enough fields in list of input structures: "+line.rstrip())
125 return max(1,
int(options.njobs) //
len(docking_cases))
129 proteins = set([ d.protein
for d
in docking_cases ])
131 ligands = set([ d.ligand
for d
in docking_cases ])
133 outfile.write(
'''#!/bin/bash
136 # Set up directory structure
137 mkdir -p ligand cofactor unbound input native
141 mkdir -p {fa,cen}/{conf1,confs,kins,withxtal}
144 if progs[
'omega']
is not None:
145 outfile.write(
'omega="%s -includeInput -commentEnergy"\n' % progs[
'omega'])
146 if options.num_procs > 1: outfile.write(
"%s -j%i <<HEREDOC\n" % (progs[
'parallel'], options.num_procs))
147 for i, ligand
in enumerate(ligands):
148 ligcode =
"X%02X" % i
149 ligbase = ligand.base
150 ligpath = ligand.path
151 if progs[
'omega']
is not None:
153 ligpath =
"%s.omega.mol2" % ligbase
154 outfile.write(
"$omega -in %s -out %s -prefix _%s && " % (oldpath, ligpath, ligbase))
155 if progs[
'assign_charges']
is not None:
157 ligpath =
"%s.am1bcc.mol2" % ligbase
158 outfile.write(
"%s %s < %s > %s && " % (options.python, progs[
'assign_charges'], oldpath, ligpath))
159 outfile.write(
"%s %s -c -n%s -p%s -k%s.kin %s" % (options.python, progs[
'molfile_to_params'], ligcode, ligbase, ligbase, ligpath))
160 for ext
in [
'fa',
'cen']:
161 outfile.write(
" && cat %s_????.%s.pdb | gzip -c > %s/withxtal/%s_confs.%s.pdb.gz" % (ligbase, ext, ext, ligbase, ext))
163 outfile.write(
" && ( [ -f %s_0002.%s.pdb ] || cp %s_0001.%s.pdb %s_0002.%s.pdb )" % (ligbase, ext, ligbase, ext, ligbase, ext))
164 outfile.write(
" && mv %s_0001.%s.pdb %s/conf1/" % (ligbase, ext, ext))
165 outfile.write(
" && cat %s_????.%s.pdb | gzip -c > %s/%s_confs.%s.pdb.gz" % (ligbase, ext, ext, ligbase, ext))
166 outfile.write(
" && mv %s_????.%s.pdb %s/confs/" % (ligbase, ext, ext))
167 outfile.write(
" && echo 'PDB_ROTAMERS %s_confs.%s.pdb' >> %s.%s.params" % (ligbase, ext, ligbase, ext))
168 outfile.write(
" && cp %s.%s.params %s/withxtal/" % (ligbase, ext, ext))
169 outfile.write(
" && mv %s.%s.params %s/" % (ligbase, ext, ext))
170 outfile.write(
" && mv %s.%s.kin %s/kins/" % (ligbase, ext, ext))
172 if options.num_procs > 1: outfile.write(
"HEREDOC\n")
178 mkdir -p {fa,cen}/{conf1,confs,kins,withxtal}
181 if options.num_procs > 1: outfile.write(
"%s -j%i <<HEREDOC\n" % (progs[
'parallel'], options.num_procs))
182 for i, ligand
in enumerate(cofactors):
183 ligcode =
"Q%02X" % i
184 ligbase = ligand.base
185 ligpath = ligand.path
187 if progs[
'omega']
is not None:
189 ligpath =
"%s.omega.mol2" % ligbase
190 outfile.write(
"$omega -in %s -out %s -prefix _%s && " % (oldpath, ligpath, ligbase))
191 if progs[
'assign_charges']
is not None:
193 ligpath =
"%s.am1bcc.mol2" % ligbase
194 outfile.write(
"%s %s < %s > %s && " % (options.python, progs[
'assign_charges'], oldpath, ligpath))
195 outfile.write(
"%s %s -c -n%s -p%s -k%s.kin %s" % (options.python, progs[
'molfile_to_params'], ligcode, ligbase, ligbase, ligpath))
196 for ext
in [
'fa',
'cen']:
197 outfile.write(
" && cat %s_????.%s.pdb | gzip -c > %s/withxtal/%s_confs.%s.pdb.gz" % (ligbase, ext, ext, ligbase, ext))
199 outfile.write(
" && ( [ -f %s_0002.%s.pdb ] || cp %s_0001.%s.pdb %s_0002.%s.pdb )" % (ligbase, ext, ligbase, ext, ligbase, ext))
200 outfile.write(
" && mv %s_0001.%s.pdb %s/conf1/" % (ligbase, ext, ext))
202 outfile.write(
" && mv %s_????.%s.pdb %s/confs/" % (ligbase, ext, ext))
205 outfile.write(
" && mv %s.%s.params %s/" % (ligbase, ext, ext))
206 outfile.write(
" && mv %s.%s.kin %s/kins/" % (ligbase, ext, ext))
208 if options.num_procs > 1: outfile.write(
"HEREDOC\n")
212 # Make "unbound" proteins (never prepacked, never a ligand or cofactor)
214 for protein
in proteins:
215 outfile.write(
"cp %s unbound/%s.pdb\n" % (protein.path, protein.base))
217 # Make native and input PDBs, with ligands and cofactors
218 # May later be replaced by repacked PDB files
220 for docking_case
in docking_cases:
221 cofs =
" ".
join([
"cofactor/fa/conf1/%s_0001.fa.pdb" % c.base
for c
in docking_case.cofactors])
222 nat_files =
" ".
join([docking_case.protein.path, cofs,
"ligand/fa/conf1/%s_0001.fa.pdb" % docking_case.ligand.base])
223 inp_files =
" ".
join([docking_case.protein.path, cofs,
"ligand/fa/confs/%s_0002.fa.pdb" % docking_case.ligand.base])
224 outfile.write(
"cat %s > native/%s_%s.pdb\n" % (nat_files, docking_case.protein.base, docking_case.ligand.base))
225 outfile.write(
"cat %s > input/%s_%s.pdb\n" % (inp_files, docking_case.protein.base, docking_case.ligand.base))
230 database = options.database
232 ligands = set([ d.ligand
for d
in docking_cases ])
233 extra_res_cen =
" ".
join(
234 [
"ligand/cen/%s.cen.params" % l.base
for l
in ligands]
235 + [
"cofactor/cen/%s.cen.params" % c.base
for c
in cofactors])
236 extra_res_fa =
" ".
join(
237 [
"ligand/fa/%s.fa.params" % l.base
for l
in ligands]
238 + [
"cofactor/fa/%s.fa.params" % c.base
for c
in cofactors])
242 ## On a large cluster, the database should be on a local scratch disk
243 ## to avoid over-taxing NFS.
244 #-database /scratch/ROSETTA/minirosetta_database
245 ## "Fallback" database locations can also be specified,
246 ## in case the primary database is missing on some nodes:
247 #-database /work/davis/rosetta/rosetta_database
248 ## Location provided by user:
249 -database %(database)s
251 ## You must supply .params files for any residue types (ligands)
252 ## that are not present in the standard Rosetta database.
253 ## Centroid residues are not needed for docking,
254 ## but may be needed for other Rosetta protocols.
255 #-extra_res_cen %(extra_res_cen)s
256 -extra_res_fa %(extra_res_fa)s
258 ## These channels generate a LOT of output,
259 ## especially with -improve_orientation.
260 -mute protocols.geometry.RB_geometry core.scoring.dunbrack.SingleLigandRotamerLibrary core.scoring.rms_util core.pack.dunbrack.SingleLigandRotamerLibrary
262 ## I prefer output structures with Rosetta numbering, from 1 to N residues.
263 ## To keep the original PDB numbering, omit this flag:
266 ## Recording the SVN revision of the code in your output files
267 ## makes it easier to figure out what you did later.
269 ## MT is now the default random number generator, actually.
271 ## Rosetta's default behavior is to pretend that atoms with zero occupancy
272 ## don't exist at all, which can lead to whole residues disappearing...
273 -ignore_zero_occupancy false
275 ## Includes the input sidechain conformations as rotamers when repacking,
276 ## but they can be "lost" if any other rotamer is chosen at that position.
278 ## Instead, use -unboundrot to permanently add the rotamers from one or more
279 ## PDB files. Their rotamer ("Dunbrack") energies are also adjusted to be
280 ## equal to the best rotamer at that position, thereby favoring the native.
281 ## Since most sidechains do not change position much upon ligand binding,
282 ## including this knowledge generally improves docking results:
284 ## Controls the number of (protein) rotamers used.
288 ## Ensures that extra rotamers are used for surface residues too.
292 ## Use soft-repulsive scoring during search (but not final minimization).
293 ## This slightly improves search. Hard-rep is used for final scoring,
294 ## however, because it gives better discrimination.
296 ## Like Rosetta++, only evaluate the Coloumb term between protein and ligand.
306 ## If you weren't supplying the input file(s) on the command line,
307 ## this is where you would put them:
310 ## Number of trajectories to run (per input structure given with -s)
313 ## Where to write the PDB files.
314 -pdb prepack/all_traj
316 ## Do nothing for 0 - N seconds on startup. If many processes are started
317 ## at once, this helps avoid too much I/O as they all load the database.
320 ## If your PDB file does not have hydrogens in the right places, use this:
322 ## You may also want to fix Asn/Gln/His sidechains:
333 ## If you weren't supplying the input file(s) on the command line,
334 ## this is where you would put them:
336 ## To get meaningful RMS values in the output, either the input PDB must have
337 ## the ligand in the correct place, or you must supply another file that does:
340 ## Number of trajectories to run (per input structure given with -s)
342 ## With multiple processes, ensures a unique name for every output structure.
343 ## Each process should get a different string here, so you can't really
344 ## put it in the FLAGS.txt file, it has to go in the Condor script:
347 ## Where to write the silent.out file. Specified in my Condor script.
348 #-pdb work/jnk_pp_1_001/3
350 ## Do nothing for 0 - N seconds on startup. If many processes are started
351 ## at once, this helps avoid too much I/O as they all load the database.
354 ## If your PDB file does not have hydrogens in the right places, use this:
356 ## You may also want to fix Asn/Gln/His sidechains:
359 ## Flags to control the initial perturbation of the ligand,
360 ## and thus how much of the binding pocket to explore.
362 ## Random perturbation of up to N Angstroms in X, Y, and Z,
363 ## drawn from a uniform distribution.
364 ## This gives a cube, but points outside the sphere of radius N are discarded,
365 ## resulting in uniform positional sampling within the sphere.
366 -uniform_trans 5 # angstroms
367 ## Randomize the starting orientation and conformer (respectively) of the
368 ## ligand. Unnecessary if using -improve_orientation.
371 ## An alternative to -uniform_trans and -randomize2.
372 ## Random perturbations in X, Y, and Z and the 3 Euler angles,
373 ## drawn from a Gaussian distribution.
374 ## I *believe* that the three Gaussians in X, Y, and Z actually give a
375 ## spherically isotropic distribution of positions, but large angles
376 ## clearly give unreasonable results (use -randomize2 instead).
377 ## This can be used for positional perturbation only (instead of
378 ## -uniform_trans) by setting the angle component to zero.
379 #-dock_pert 30 5 # rot degrees, trans angstroms
381 ## Instead of choosing an orientation and conformer at random, try all
382 ## available conformers in N random orientations, and try to maximize
383 ## shape complementarity to the protein (keeping in mind that sidechains
384 ## may move later, etc etc. See the paper or code for details.)
385 ## We find this *significantly* improves search, and costs just a few seconds.
386 -improve_orientation 1000
387 ## Let ligand rotatable bonds minimize, with harmonic restraints
388 ## where one standard deviation is 15 degrees:
390 -harmonic_torsions 15
391 ## Allows rotamers to "recombine" instead of always minimizing towards
392 ## the last rotamer selected by the packer.
393 -use_ambig_constraints
394 ## Introduces random torsion tweaks in the MCM cycle; may help large ligands.
395 ## Requires -use_ambig_constraints.
397 ## In the final minimization, let the backbone minimize with harmonic
398 ## restraints on the Calphas (stddev = 0.3 A).
399 ## Only stretches of residues near the ligand are minimized,
400 ## typically 20 - 40 in total. (40 - 80 residues are repacked.)
402 -harmonic_Calphas 0.3
403 ## The 6-cycle protocol with repacks in cycles 1 and 4.
405 ## In each trajectory, one of the following points will be chosen at random
406 ## as the starting point for the ligand centroid (followed by other
407 ## perturbations like -uniform_trans, etc).
408 #-start_from 20.0 28.4 26.0
409 #-start_from 23.3 18.9 26.0
410 #-start_from 28.7 14.0 19.6
411 #-start_from 16.9 16.0 32.7
412 #-start_from 21.3 10.8 31.1
413 #-start_from 25.6 4.4 32.5
414 ## By supplying multiple .params files that have the same 3-letter residue
415 ## name, the same heavy atoms, but different proton configurations, you can
416 ## sample different protomers / tautomers within the packing steps.
417 ## In many cases this gives a negligible increase in run time (nice).
418 ## However, I'm unaware of any cases (so far) where it actually helped
419 ## to improve docking results, so use at your own risk.
427 ## Flags for generating "minimized natives", if applicable.
428 ## In docking benchmarks, these can be used to diagnose search problems vs scoring problems.
434 -use_input_sc # causes -use_ambig_constraints to include native rotamer!
438 -harmonic_torsions 15
439 -use_ambig_constraints
442 -harmonic_Calphas 0.3
450 proteins_and_cofactors = set([ tuple([d.protein]+d.cofactors)
for d
in docking_cases ])
451 outfile.write(
'''#!/bin/bash
454 # Set up directory structure
455 mkdir -p prepack/{input,all_traj}
457 # Prepack and minimize protein receptors
460 if options.num_procs > 1: outfile.write(
"%s -j%i <<'HEREDOC'\n" % (progs[
'parallel'], options.num_procs))
461 for p_and_c
in proteins_and_cofactors:
463 cofactors = p_and_c[1:]
464 base =
"_".
join([pc.base
for pc
in p_and_c])
465 cof_files =
" ".
join([
"cofactor/fa/conf1/%s_0001.fa.pdb" % c.base
for c
in cofactors])
466 outfile.write(
"cat %s %s > prepack/input/%s.pdb" % (protein.path, cof_files, base))
467 outfile.write(
" && %s @COMMON.flags @RPKMIN.flags -s prepack/input/%s.pdb" % (progs[
'rpkmin'], base))
468 outfile.write(
" && egrep '^(ATOM|HETATM)' $(awk '/^pose / {print FILENAME, $NF}' prepack/all_traj/%s_????.pdb | sort -nk2 | head -1 | cut -d' ' -f1) > prepack/%s.pdb" % (base, base))
470 if options.num_procs > 1: outfile.write(
"HEREDOC\n")
472 # Replace by old input PDB files with repacked ones
474 for d
in docking_cases:
475 base =
"_".
join([pc.base
for pc
in [d.protein]+d.cofactors])
476 outfile.write(
"cat prepack/%s.pdb ligand/fa/confs/%s_0002.fa.pdb > input/%s_%s.pdb\n" % (base, d.ligand.base, d.protein.base, d.ligand.base))
481 if not options.local :
482 outfile.write(
'''#!/bin/bash
483 #set -v # this messes up the nice echo output at the end
484 shopt -s nullglob # avoid error messages from tar about non-existant files
486 # Make tarball of files needed for docking
487 tar cvzf $(basename $(pwd)).tgz {cofactor,ligand}/{fa,cen}/{withxtal/,}*.params {cofactor,ligand}/{fa,cen}/{withxtal/,}*_confs.*.pdb* input/ native/ unbound/ *.flags [45]_*.sh
491 echo " Now copy this file to the cluster where you'll do docking."
493 echo " rsync -avxP $(basename $(pwd)).tgz syd:runs/$(basename $(pwd))/"
495 echo " *** If necessary, adjust path to database in COMMON.flags"
496 echo " *** If necessary, adjust path to executable in 4_dock_condor.sh"
500 outfile.write(
'''#!/bin/bash
502 echo " *** If necessary, adjust path to database in COMMON.flags"
503 echo " *** If necessary, adjust path to executable in 4_dock_condor.sh"
510 ligand_dock = progs[
'dock']
512 outfile.write(
'''#!/bin/bash
516 # Create the header for the CONDOR files
517 minnatfile="CONDOR.minnat"
518 dockfile="CONDOR.dock"
520 for f in "$minnatfile" "$dockfile"; do
521 cat > "$f" <<'HEREDOC'
522 Executable = %(ligand_dock)s
524 Requirements = Memory > 256
526 copy_to_spool = false
532 cat >> "$minnatfile" <<'HEREDOC'
533 Error = log/err.minnat.$(Process).log
534 Log = log/condor.minnat.$(Process).log
535 Output = log/out.minnat.$(Process).log
540 cat >> "$dockfile" <<'HEREDOC'
541 Error = log/err.$(Process).log
542 Log = log/condor.$(Process).log
543 Output = log/out.$(Process).log
548 # Function to set up each docking case
549 # Takes protein_name ligand_name num_procs
555 mkdir -p "work/minnat/$pl"
556 echo "Arguments = @COMMON.flags @MINNAT.flags -in:file:s native/$pl.pdb -in:file:native native/$pl.pdb -packing:unboundrot unbound/$p.pdb -out:path:pdb work/minnat/$pl" >> "$minnatfile"
557 echo "Queue" >> "$minnatfile"
558 for((i=0;i<$3;i++)); do
559 mkdir -p "work/$pl/$i"
560 echo "Arguments = @COMMON.flags @DOCK.flags -in:file:s input/$pl.pdb -in:file:native native/$pl.pdb -packing:unboundrot unbound/$p.pdb -out:path:pdb work/$pl/$i -run:seed_offset $i -out:suffix _$i" >> "$dockfile"
561 echo "Queue" >> "$dockfile"
566 # List the individual docking cases
569 outfile.write(
'procs_per_case=%i\n' % procs_per_case)
570 for d
in docking_cases:
571 prot_lig =
"%s_%s" % (d.protein.base, d.ligand.base)
572 outfile.write(
'setup_docking_case "%s" "%s" $procs_per_case\n' % (d.protein.base, d.ligand.base))
575 echo " condor_submit $minnatfile # if you want minimized natives"
576 echo " condor_submit $dockfile # the actual docking calculations"
582 ligand_dock = progs[
'dock']
584 outfile.write(
'''#!/bin/bash
586 Executable="%(ligand_dock)s"
590 # Create the header for the BASH files
591 minnatfile="BASH.minnat.sh"
592 dockfile="BASH.dock.sh"
594 # Make sure that neither of them already exist.
598 # Function to set up each docking case
599 # Takes protein_name ligand_name num_procs
605 mkdir -p "work/minnat/$pl"
606 echo "${Executable} @COMMON.flags @MINNAT.flags -in:file:s native/$pl.pdb -in:file:native native/$pl.pdb -packing:unboundrot unbound/$p.pdb -out:path:pdb work/minnat/$pl > log/err.minnat.${pl}.log 2> log/out.minnat.${pl}.log" >> "$minnatfile"
607 echo >> "$minnatfile"
608 for((i=0;i<$3;i++)); do
609 mkdir -p "work/$pl/$i"
610 echo "${Executable} @COMMON.flags @DOCK.flags -in:file:s input/$pl.pdb -in:file:native native/$pl.pdb -packing:unboundrot unbound/$p.pdb -out:path:pdb work/$pl/$i -run:seed_offset $i -out:suffix _$i > log/out.${pl}.${i}.log 2> log/err.${pl}.${i}.log" >> "$dockfile"
615 # List the individual docking cases
618 outfile.write(
'procs_per_case=%i\n' % procs_per_case)
619 for d
in docking_cases:
620 prot_lig =
"%s_%s" % (d.protein.base, d.ligand.base)
621 outfile.write(
'setup_docking_case "%s" "%s" $procs_per_case\n' % (d.protein.base, d.ligand.base))
624 # Make scripts executable
625 chmod u+x $minnatfile $dockfile
628 echo " nohup nice $minnatfile # if you want minimized natives"
629 echo " nohup nice $dockfile # the actual docking calculations"
631 echo " Or for multiple processors something like:"
633 echo " %s -j %s < $minnatfile # if you want minimized natives"
634 echo " %s -j %s < $dockfile # the actual docking calculations"
635 ''' % (progs[
'parallel'], options.num_procs, progs[
'parallel'], options.num_procs) )
640 outfile.write(
'''#!/bin/bash
642 for dir in work/minnat/*; do
645 f="out/minnat/${d}_silent.out"
646 mv "$dir"/silent*.out $f
647 # Remove new file if it is contains no data (0 size)
651 for dir in work/*; do
653 [ "$d" = minnat ] && continue
655 f="out/${d}_silent.out"
656 # Remove old file first
658 find $dir -name 'silent*.out' -print0 | xargs -0 cat >> $f
659 # Remove new file if it is contains no data (0 size)
663 for f in out/*_silent.out; do
665 a=$(grep -c '^POSE_TAG ' $f)
666 b=$(grep -c '^END_POSE_TAG ' $f)
670 echo "***ERROR*** $a $b"
674 if not options.local:
675 outfile.write(
'''tar cvzf $(basename $(pwd))_out.tgz out/
679 echo " Now copy this file back to your computer for analysis."
681 echo " rsync -avxP $(basename $(pwd))_out.tgz dig:"
683 echo " cd path/to/project/"
684 echo " tar xvzf ~/$(basename $(pwd))_out.tgz && rm ~/$(basename $(pwd))_out.tgz"
691 outfile.write(
'''#!/bin/bash
693 # Check for result files
694 if [ ! -d out ]; then
696 echo " Can't find out/ directory. Have you untarred the results?"
702 mkdir -p {out,scores}/minnat
705 for f in *_silent.out; do
707 %(get_scores)s < $f > ../../scores/minnat/${f/_silent.out/.tab}
710 for f in *_silent.out; do
712 %(get_scores)s < $f > ../scores/${f/_silent.out/.tab}
716 # Make PDF plots if R is on the PATH
718 if which R > /dev/null; then
719 R --vanilla < %(plot_funnels)s
722 echo " Can't find R on the PATH. Skipping funnel plots..."
727 if progs[
'confidence']
is not None:
729 # Estimate confidence in docking results
730 %(confidence)s @COMMON.flags -in:file:silent out/*.out
732 if progs[
'best_poses']
is not None:
734 # Extract best-scoring docked poses, skipping duplicates
735 # Wow, the select_best_unique_ligand_poses program is awkward. I should fix that.
737 for f in out/*.out; do
738 %(best_poses)s @COMMON.flags -docking:ligand:max_poses 10 -docking:ligand:min_rms 1.0 -out:path:pdb pdbs -in:file:silent $f -out:file:silent default.out
739 tags=( $(head -1 cluster_rms.tab) ) # Bash array of selected tags
740 %(extract)s @COMMON.flags -out:path:pdb pdbs -in:file:s $f -in:file:tags "${tags[@]}"
741 # Prefix each extracted structure filename with its rank: 01, 02, 03, ...
742 for ((i=0; i<${#tags[@]}; ++i)); do
743 mv pdbs/${tags[$i]}.pdb pdbs/$(printf "%%02i_" $((i+1)))${tags[$i]}.pdb
745 # select_best_unique_ligand_poses seems to mangle the silentfile sometimes, so better to use the original
746 #%(extract)s @COMMON.flags -out:path:pdb pdbs -in:file:s default.out
747 rm default.out cluster_rms.tab
750 # Convert extracted PDBs into mol2 files, for people who prefer that
752 pdb_to_molfile="%(pdb_to_molfile)s --comment -oMOL2"
754 for d
in docking_cases:
755 prot_lig =
"%s_%s" % (d.protein.base, d.ligand.base)
756 if progs[
'assign_charges']
is not None: lig_path =
"ligand/%s.am1bcc.mol2" % d.ligand.base
757 elif progs[
'omega']
is not None: lig_path =
"ligand/%s.omega.mol2" % d.ligand.base
758 else: lig_path = d.ligand.path
759 outfile.write(
'$pdb_to_molfile %s pdbs/%s*.pdb > mol2_out/%s.mol2\n' % (lig_path, prot_lig, prot_lig))
764 filename = os.path.join(options.target_dir, filename)
765 if os.path.exists(filename)
and not options.clobber:
766 print "File %s exists, use --clobber to overwrite" % filename
769 outfile =
open(filename,
'w')
770 function(outfile, *func_args, **func_kwargs)
772 if filename.endswith(
'.sh'): os.chmod(filename, 0750)
773 print "Wrote %s" % filename
774 except Exception, ex:
775 print "Exception while writing %s: %s" % (filename, ex)
784 Automatic RosettaLigand Setup (ARLS)
786 Given a list of protein-ligand pairings, generates all the required input
787 files for RosettaLigand docking, along with suggested flags and submit scripts.
788 The scripts are numbered 1 - N and should be run in order.
789 See the Doxygen documentation for important information on using RosettaLigand.
791 The script takes one argument, a list of protein-ligand pairs to dock.
792 Each line of the list should consist of the protein name, zero or more cofactor
793 names, and the ligand name (whitespace separated).
794 In the current directory should be files with the same name and an appropriate
795 extension: .pdb for proteins, and one of (.mol, .sdf, .mol2) for ligands and
796 cofactors. For example, the line "1abc cofactor mylig" could use files named
797 "1abc.pdb", "cofactor.mol", and "mylig.mol2". No unnecessary work will be done
798 even if a protein/cofactor/ligand appears in multiple lines.
800 The protein file should only contain standard amino acid residues.
801 The cofactor and ligand files should contain a single conformation
802 of a single compound (unless using --skip-omega).
803 The MiniRosetta database should be in ~/rosetta/rosetta_database;
804 otherwise use --database.
805 OpenEye's Omega should be in ~/openeye or on your PATH;
806 otherwise use --openeye or --skip-omega.
807 The OpenEye QUACPAC toolkit should be on your PYTHONPATH
808 for charges to be assigned (else use --skip-charges).
809 You can specify which clustering software to set up the docking runs for with
810 --cluster. Specifying BASH will result in a standard shell script-type submission.
811 for those without clustering software (It is recommended to also set --njobs.) You
812 also can use the BASH output file with MOAB/TORQUE/PBS style clustering software.
815 def up_dir(path, num_steps=1):
816 for i
in xrange(num_steps):
817 path = os.path.dirname(path)
819 MINI_HOME = up_dir(os.path.abspath(sys.path[0]), 4)
822 parser.set_description(main.__doc__)
831 parser.add_option(
"-j",
"--num-procs",
834 help=
"Number of processors to use on local machine for non-CONDOR tasks (default: 1)",
836 parser.add_option(
"-m",
"--mini",
838 help=
"Directory where rosetta_source is found (default: %s)" % MINI_HOME,
840 parser.add_option(
"-d",
"--database",
841 default=os.path.join( os.path.expanduser(
"~"),
"rosetta",
"rosetta_database"),
842 help=
"Directory where the Rosetta database is found (default: ~/rosetta/rosetta_database)",
844 parser.add_option(
"--openeye",
845 default=os.path.join( os.path.expanduser(
"~"),
"openeye"),
846 help=
"Directory where OpenEye tools are found (default: ~/openeye)",
848 parser.add_option(
"-t",
"--target-dir",
849 help=
"Directory where output should be created (default: ./arls_work)",
852 parser.add_option(
"--python",
853 help=
"Python executable (2.5/2.6 required for Omega) ",
854 default=
"/usr/bin/env python",
856 parser.add_option(
"--clobber",
859 help=
"Overwrite pre-existing output files",
861 parser.add_option(
"--skip-omega",
864 help=
"Do not use OpenEye's Omega to generate small molecule conformers. Assumes small molecule file already contains all desired conformers.",
866 parser.add_option(
"--skip-charges",
869 help=
"Do not assign partial charges with OpenEye's AM1BCC. Assumes small molecule file is in .mol2 format and already has charges, or that you want default Rosetta charges.",
871 parser.add_option(
"--local",
874 help=
"Run docking jobs locally (through current system), rather than on a remote cluster.",
876 parser.add_option(
"--cluster",
878 help=
"Type of clustering software to use. Acceptable values: CONDOR, BASH",
880 parser.add_option(
"--njobs",
882 help=
"Aproximate number of cluster jobs to split processing over (default: 250).",
884 parser.add_option(
"--compile-tag",
886 help=
"""The compile tag (e.g. "static.linuxgccrelease") for Rosetta programs (default: use a very basic autodetect).""",
888 (options, args) = parser.parse_args(args=argv)
893 infile =
open(args[0])
896 print "Too many arguments!"
903 if len(docking_cases) == 0:
904 raise ValueError(
"No protein-ligand pairs were supplied!")
905 if len(set([(d.protein, d.ligand)
for d
in docking_cases])) <
len(docking_cases):
906 raise ValueError(
"Some protein-ligand pairs are duplicated!")
908 options.cluster = str(options.cluster).upper()
909 if options.cluster
not in [
"CONDOR",
"BASH"]:
910 raise ValueError(
'Invalid cluster type "%s"specified with --cluster'% options.cluster)
911 if not os.path.isdir(options.mini):
912 raise ValueError(
"Cannot find MiniRosetta source tree; please specify with --mini")
913 if not os.path.isdir(options.database):
914 raise ValueError(
"Cannot find Rosetta database; please specify with --database")
915 if not options.skip_omega
and not os.path.isdir(options.openeye):
916 raise ValueError(
"Cannot find OpenEye tools; please specify with --openeye (or use --skip-omega)")
918 if not os.path.isdir(options.target_dir):
919 os.mkdir(options.target_dir)
922 write_setup_script, options, progs, docking_cases)
924 write_common_flags, options, docking_cases)
926 write_rpkmin_flags, options, docking_cases)
928 write_dock_flags, options, docking_cases)
930 write_minnat_flags, options, docking_cases)
932 write_rpkmin_script, options, progs, docking_cases)
934 write_tarball_pre_script, options, progs, docking_cases)
936 if options.cluster ==
"CONDOR":
938 write_dock_condor_script, options, progs, docking_cases)
939 elif options.cluster ==
"BASH":
941 write_dock_bash_script, options, progs, docking_cases)
943 raise ValueError(
"Shouldn't get here: invalid cluster type %s"%options.cluster)
946 write_concat_script, options, progs, docking_cases)
948 write_analysis_script, options, progs, docking_cases)
950 except ValueError, v:
959 if __name__ ==
"__main__":
960 sys.exit(
main(sys.argv[1:]))
961 print "You should really be running the wrapper script, arls.py, instead!"
def write_analysis_script
def write_dock_condor_script
Fstring::size_type len(Fstring const &s)
Length.
utility::vector1< std::string > split(const std::string &s)
split given std::string using ' ' symbol.
def write_tarball_pre_script
bool open(utility::io::izstream &db_stream, std::string const &db_file, bool warn)
Open a database file on a provided stream.
def write_dock_bash_script