Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
cleaning.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # :noTabs=true:
3 
4 
5 # (c) Copyright Rosetta Commons Member Institutions.
6 # (c) This file is part of the Rosetta software suite and is made available under license.
7 # (c) The Rosetta software is developed by the contributing members of the Rosetta Commons.
8 # (c) For more information, see http://www.rosettacommons.org. Questions about this can be
9 # (c) addressed to University of Washington UW TechTransfer, email: license@u.washington.edu.
10 
11 ## @file cleaning.py
12 ## @brief
13 ## @author Evan H. Baugh, Johns Hopkins University
14 
15 import os
16 
17 from rosetta import Pose
18 from rosetta import pose_from_pdb
19 
20 # removes non ATOM lines from <pdb_file> and writes to <out_file>
21 def cleanATOM( pdb_file , out_file = '', edit = -4 ):
22  """
23  Writes all lines in the PDB file <pdb_file> beginning with "ATOM" or
24  "TER" into <out_file> (defaults to <pdb_file>.clean.pdb)
25  note: the third argument, <edit>, if for PDB files not ending in .pdb
26 
27  example:
28  cleanATOM('1YY9.pdb')
29  See also:
30  Pose
31  Pose.dump_pdb
32  pose_from_pdb
33  pose_from_rcsb
34  cleanCRYS
35  """
36  # an optional argument for PDB files not ending in .pdb
37  if not edit:
38  edit = 255
39  # if the file exists
40  if os.path.exists( os.getcwd() + '/' + pdb_file ):
41  # find all ATOM and TER lines
42  fid = open(pdb_file,'r')
43  data = fid.readlines()
44  fid.close()
45  good = []
46  for i in data:
47  if i[:5] == 'ATOM ' or i[:4] == 'TER ':
48  # add your preference rules for ligands, DNA, water, etc.
49  good.append(i)
50  # default output file to <pdb_file>.clean.pdb
51  if not out_file:
52  out_file = pdb_file[:edit]+'.clean.pdb'
53  # write the found lines
54  print 'if the file',out_file,'already exists, it will be overwritten'
55  fid = open(out_file,'w')
56  fid.writelines(good)
57  fid.close()
58  print 'PDB',pdb_file,'successfully cleaned, non-ATOM lines removed\nclean data written to',out_file
59  return True
60  else:
61  print 'No such file or directory named '+pdb_file
62  return False
63 
64 # if you would prefer a simpler call using grep, it looks something like this
65 # os.system("grep \"ATOM\" %s.pdb > %s.clean.pdb"%(pdb_file[:edit],pdb_file[:edit]))
66 
67 # removes redundant crystal contacts, isolate monomer
68 def cleanCRYS( pdb_file , olig = 2 , out_file = '' ):
69  """
70  Writes a PDB file for a monomer of <pdb_file> if it is a <olig>-mer
71  to <out_file> (defaults to <pdb_file>.mono.pdb)
72  note: this is a simple sequence comparison
73 
74  example:
75  cleanCRYS('1YY8.pdb',2)
76  See also:
77  Pose
78  Pose.dump_pdb
79  pose_from_pdb
80  pose_from_rcsb
81  cleanATOM
82  """
83  # if the file exists
84  if os.path.exists( os.getcwd() + '/' + pdb_file ):
85  # load in the PDB...this is really just to get the sequence
86  pose = pose_from_pdb(pdb_file)
87  tot = pose.total_residue()
88  seq = pose.sequence()
89  # generate sequence fragments until
90  frags = ['']*olig
91  match = [False]*(olig-1)
92  olig = float(olig)
93  frac = int(round(tot/olig))
94  for f in range(int(olig)):
95  frags[f] = seq[:frac]
96  seq = seq[frac:]
97  # determine if sequence fragments are identical
98  for f in range(int(olig-1)):
99  match[f] = (frags[0]==frags[f+1])
100  # if the protein has repeats, delete all other residues
101  if sum(match)==(olig-1):
102  for i in range(frac*int(olig-1)):
103  pose.delete_polymer_residue(frac+1) # I hope this works!
104  # write the new pdb file
105  if not out_file:
106  out_file = pdb_file[:-4]+'.mono.pdb'
107  print 'if the file',out_file,' already exists, it will be overwritten'
108  pose.dump_pdb(out_file)
109  print 'PDB',pdb_file,'successfully cleaned, redundant monomers removed\nmonomer data written to',out_file
110  return True
111  else:
112  print pdb_file,'is not a '+str(int(olig))+'-mer'
113  return False
114  else:
115  print 'No such file or directory named '+pdb_file
116  return False
117 
118 
Real sum(ddGs &scores_to_sum)
bool open(utility::io::izstream &db_stream, std::string const &db_file, bool warn)
Open a database file on a provided stream.
Definition: open.cc:55
core::Real round(core::Real d, core::Size digits)