Rosetta
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
calccontacts.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 from loadPDB import *
3 import coordlib
4 import string
5 import math
6 import copy
7 
9  def __init__(self, atom1, atom2):
10  self.atom1 = atom1
11  self.atom2 = atom2
12  #hbonds only relevant to full atom mode
13  if contactType == 'full':
14  self.isHbond = (self.atom1[0] in ['N', 'O'] and self.atom2[0]
15  in ['N', 'O'])
16 
17  def numSwap(self):
18  #swap the identities of atom1 and atom2
19  tmp_atom = self.atom1
20  self.atom1 = self.atom2
21  self.atom2 = tmp_atom
22 
23 class resContact:
24  def __init__(self, partnerA_res, partnerB_res, atomContactList):
25  self.res1 = partnerA_res
26  self.res2 = partnerB_res
27  self.index1 = partnerA_res.index
28  self.index2 = partnerB_res.index
29  self.atomContactList = atomContactList
30  #only relevant to full atom mode
31  if contactType == 'full':
32  self.Ncontacts = len(atomContactList)
33  self.Nhbonds = 0
34  for atom_contact in atomContactList:
35  if atom_contact.isHbond:
36  self.Nhbonds = self.Nhbonds + 1
37 
38  def copy(self):
39  tmp_copy = copy.copy(self)
40  tmp_copy.atomContactList = self.atomContactList[:]
41  Natomcontacts = len(self.atomContactList)
42  for i in range(Natomcontacts):
43  tmp_copy.atomContactList[i] = atom_contact \
44  (self.atomContactList[i].atom1, self.atomContactList[i].atom2)
45  return tmp_copy
46 
47  def reduceContact(self, res):
48  """
49  This changes the form of resContact from a contact-map type format
50  to a contact-profile type format. A contact map is a list of
51  contacts, whereas a contact profile is a list of residues with
52  their contacts.
53 
54  In a contact profile, each residue only needs to know which other
55  residues it contacts, not the complete resContact format.
56 
57  When creating a contact profile, each contact in the map is added
58  to both residues involved. This means changing the format from:
59 
60  i-j
61  i-k
62  i-l
63  j-k
64  j-l
65  j-m
66 
67  (list/map format)
68 
69  to
70 
71  i-j,k,l
72  j-i,k,l,m
73 
74  (profile format)
75 
76  For example, for residue i, I want to keep only the info for j,k, and
77  l since I already have the info for residue i
78 
79  for residue j, I want to keep only the information for k,l, and m. For
80  the i-j contact it is more complicated because I need to invert the
81  order to j-i before removing residue j.
82 
83  Ex. for i-j contact
84 
85  1) make one copy
86  2) delete the info for residue i ('res1', 'index1')
87  3) add contact to residue i
88 
89  4) make another copy
90  5a) invert identity of res1/res2, index1/index2
91  b) invert each of the atom contacts
92  6) delete res1, index1 which now match j.
93 
94  Input parameter res determines which option is picked
95  If 'res1', carry out steps 1-3
96  If 'res2', carry out steps 4-6.
97  """
98  if res == 'res2':
99  #swap residue indices
100  self.res2 = self.res1
101  self.index2 = self.index1
102  #swap atom contacts
103  for atom_contact in self.atomContactList:
104  atom_contact.numSwap()
105  delattr(self, 'res1')
106  delattr(self, 'index1')
107 
108 class shellAtom:
109  def __init__(self, printid, index, atomname):
110  """
111  Class attributes:
112 
113  printid: printid of the atom's residue
114  index: index of the atoms residue
115  atomname: the atom name
116  count: number of times that shellAtom occurs (for multiple monomers)
117  atomid: a tag for easy comparison of shell atoms
118  """
119  self.printid = printid
120  self.index = index
121  self.atomname = atomname
122  self.count = 1
123  self.atomid = str(self.index) + self.atomname
124 
125 def addMax_radius(loadedPDBres):
126  #calculate maximum radius
127  if loadedPDBres.max_radius != None:
128  return
129  max_radius_sq = 0
130  for atom in loadedPDBres.coordlist:
131  atomdist_sq = coordlib.dist_sq(atom, loadedPDBres.centroid)
132  max_radius_sq = max(atomdist_sq, max_radius_sq)
133  setattr(loadedPDBres, 'max_radius', math.sqrt(max_radius_sq))
134 
135 def makeContactPDB(loadedPDB):
136  """
137  Add certain attributes to loadedPDB to make it a contactPDB:
138  centroid: centroid of the residue or ligand
139  max_radius: farthest atom from the centroid
140 
141  To be computed later:
142  contactList: a list of all contacts made by that residue
143  Ncontacts
144  Nhbonds
145  """
146  for res in loadedPDB:
147  #initialize centroid only in full atom mode
148  if getAtoms == 'centroid':
149  res.addCentroid()
150  addMax_radius(res)
151  #initialize sccentroid only in sccentroid mode
152  if getAtoms == 'sccentroid':
153  res.addscCentroid()
154  #Ncontacts and Nhbonds should only be set in full atom mode
155  if contactType == 'full':
156  setattr(res, 'Ncontacts', 0)
157  setattr(res, 'Nhbonds', 0)
158  setattr(res, 'contactList', [])
159  #NresContacts (res-res pairs) is relevant to all modes
160  setattr(res, 'NresContacts', 0)
161 
162 def parse_and_pair(contactPDB, parserule):
163  """
164  This is the major function that allows one find-contacts script to
165  calculate contacts in a variety of different ways.
166  Using the parserule, the appropriate segment(s) of the pdb are extracted.
167 
168  Two types of contact calculations are enabled by this routine, and the
169  type of calculation determines the inputs and actions:
170  internal contacts (no '-' in the parserule)
171  interface/binding site contacts ('-' in the parserule determining which
172  subsets of the protein to use)
173 
174  For internal calculations:
175  The following types of input are accepted:
176  'all' - all residues in the protein, including all chains and any ligand
177  groups in the pdb
178  'A' - chain A only, all internal contacts
179  'AB' - chains A and B, all internal + A-B interface
180 
181  action: this subroutine extracts the appropriate portion of the pdb and
182  then passes it to pair_close_internal(contactPDB), and returns the result
183 
184  For interface calculations:
185  'A-B', 'AB-C', 'AB-CD', etc: docking partners. Can be multiple chains
186  per partner
187  'A-ligand', 'AB-ligand', 'ligand-A' - subset of protein vs. all ligands
188  'all-ligand' - all protein vs. all ligands
189  action: extracts the units separated by '-' and passes them through
190  pair_close_interface, and returns the result.
191 
192 
193  Input: contactPDB (see class contactProtein)
194  Output: [partnerA, partnerB] where partnerA and partnerB are subsets of
195  the contactPDB such that partnerA-partnerB contacts are calculated but not
196  internal contacts within either partner A or partner B. A parsing rule is
197  used to split loadePDB into partners A and B.
198  Currently, 3 parsing rules are implemented:
199 
200  This routine is flexible so that new parsing rules can be
201  added independently of any previous rules
202  """
203  #interface contacts mode
204  if '-' in parserule:
205  splitRule = string.split(parserule, '-')
206  #protein-ligand contacts
207  if 'ligand' in splitRule:
208  # 'ligand-A' form
209  if splitRule[0] == 'ligand':
210  proteinChain = splitRule[1]
211  # 'A-ligand' form
212  else:
213  proteinChain = splitRule[0]
214  # 'all-ligand'
215  # Pop ligands from contactPDB and assign to partner B
216  partnerB = popLigands(contactPDB)
217  if proteinChain == 'all':
218  partnerA = contactPDB
219  else:
220  partnerA = extractChains(contactPDB, proteinChain)
221  # two docking/interface partners (e.g. 'AB-C')
222  else:
223  partnerA = extractChains(contactPDB, splitRule[0])
224  partnerB = extractChains(contactPDB, splitRule[1])
225  #see pair_close_interface subroutine
226  return pair_close_interface(partnerA, partnerB)
227  #internal contacts
228  else:
229  #whole PDB
230  if parserule == 'all':
231  subset = contactPDB
232  #some subset (specified chains)
233  else:
234  subset = extractChains(contactPDB, parserule)
235  #see pair_close_internal subroutine below
236  return pair_close_internal(subset)
237 
238 def pair_close_internal(contactPDBsubset):
239  """
240  Input: subset of contact PDB
241 
242  Output: all pairs of residue pairs i, j where a contact is possible
243  according to the reference (max radius to centroid) distances for
244  each group plus 5.0A
245  """
246  Nres = len(contactPDBsubset)
247  close_pair_list = []
248  for i in range(0, Nres):
249  for j in range(i+1, Nres):
250  #refChain filter
251  if refChain != None:
252  if not(contactPDBsubset[i].chain in refChain or
253  contactPDBsubset[j].chain in refChain):
254  continue
255 
256  #max_radius from centroid
257  #filter = max_radius1 + max_radius2 + 5.0A
258  #if the centroids are farther apart than this, there can't be any
259  #contacts between residues i and j
260  thispair = 0
261  #pair without filtering (if global flag smartfilter is not turned
262  #on)
263  if not(smartfilter):
264  thispair = 1
265  #pair with filtering on centroid - centroid distance (applicable
266  #only to full atom mode)
267  else:
268 
269  filter_sq = (contactPDBsubset[i].max_radius +
270  contactPDBsubset[j].max_radius + contactFilter +
271  1.0)**2
272  cendist_sq = coordlib.dist_sq(contactPDBsubset[i].centroid,
273  contactPDBsubset[j].centroid)
274  if cendist_sq <= filter_sq:
275  thispair = 1
276 
277  if thispair:
278  close_pair_list.append([contactPDBsubset[i],
279  contactPDBsubset[j]])
280  return close_pair_list
281 
282 def pair_close_interface(partnerA, partnerB):
283  """
284  Input: partner A and partner B of a contactPDB
285  In internal case, just [partnerA] is given to save memory
286 
287  Output: all pairs of residues between partner A and
288  partner B for which a contact is possible according to the reference
289  distances for each group
290  """
291  close_pair_list = []
292  NresA = len(partnerA)
293  NresB = len(partnerB)
294  for i in range(0, NresA):
295  for j in range(0, NresB):
296  #max_radius from centroid
297  #filter = max_radius1 + max_radius2 + 5.0A
298  #if the centroids are farther apart than this, there can't be any
299  #contacts between residues i and j
300  thispair = 0
301  #pair without filtering (if global flag smartfilter is not turned
302  #on)
303  if not(smartfilter):
304  thispair = 1
305  #pair with filtering on centroid - centroid distance (applicable
306  #only to full atom mode)
307  else:
308  filter_sq = (partnerA[i].max_radius + partnerB[j].max_radius +
309  contactFilter + 1.0)**2
310  cendist_sq = coordlib.dist_sq(partnerA[i].centroid,
311  partnerB[j].centroid)
312  if cendist_sq <= filter_sq:
313  thispair = 1
314  if thispair:
315  close_pair_list.append([partnerA[i], partnerB[j]])
316  return close_pair_list
317 
318 def filterContacts(close_residue_list):
319  """
320  Filter a close_residue_list (output of pair_close_refAtoms) for residues
321  with at least one atom-atom contact of < 4.0A+
322  (the filter can be set to another distance in class contactProtein)
323  """
324 
325  resContactList = []
326  for close_pair in close_residue_list:
327  makesContact = 0
328  partnerA_res = close_pair[0]
329  partnerB_res = close_pair[1]
330  #bond filter - do not count covalent bonds between residues (< 2.0A)
331  #as contacts
332  bondfilter_sq = 4.0
333  contactfilter_sq = contactFilter**2
334  atomContactList=[]
335  #full atom mode; iterate over all atoms
336  if contactAtoms == 'all':
337  idA = int(partnerA_res.resid)
338  idB = int(partnerB_res.resid)
339  A_atoms = partnerA_res.atomlist
340  B_atoms = partnerB_res.atomlist
341  #designed if you only want contacts between CA-CA, centroid-centroid,
342  #etc.
343  #Just count contacts for those atom pairs
344  else:
345  A_atoms = B_atoms = [contactAtoms]
346  #all atoms from partner A res against all atoms of partner B res
347  for A_atom in A_atoms:
348  for B_atom in B_atoms:
349  atomdist_sq = coordlib.dist_sq(partnerA_res[A_atom],
350  partnerB_res[B_atom])
351  if atomdist_sq >= bondfilter_sq and atomdist_sq > 16.0 and atomdist_sq <= 64.0: # kepler
352  #if atomdist_sq >= bondfilter_sq and atomdist_sq <= contactfilter_sq: #Mike
353  #see atom_contact class above
354  atomContactList.append(atom_contact(A_atom, B_atom))
355  #if idA > 23 and idA < 33 and idB > 23 and idB < 33 and idB - idA > 5:
356  # if atomdist_sq >= 16.0 and atomdist_sq <= 36.0:
357  # if A_atom[:1] == 'C' or A_atom[:1] == 'N':
358  # if B_atom[:1] == 'C' or B_atom[:1] == 'N':
359  # print str(idA)+A_atom+str(idB)+B_atom
360 
361  if atomContactList != []:
362  #see resContactList class above
363  resContactList.append(resContact(partnerA_res, partnerB_res,
364  atomContactList))
365 
366  return resContactList
367 
368 def makeContactProfile(contactMap, loadedPDB):
369  """
370  Takes the contactMap of a contactProtein structure and adds the contacts
371  into the original loadedPDB
372  In makeContactPDB(loadedPDB), an empty contactList was added to each
373  residue in the loaded PDB. This list will now be filled up with all the
374  contacts that each residue makes.
375 
376  Also fill in Ncontacts and Nhbonds for each residue, which were initialized
377  to zero.
378  """
379  #iterate through the contact map
380  #The loadedPDB indices of each contact have been recorded and will now
381  #be used to send each contact to the contact lists of the appropriate
382  #two residues
383  for contact in contactMap:
384  res1 = loadedPDB[contact.index1]
385  res2 = loadedPDB[contact.index2]
386  #Make two copies of the contact, one for res1 and one for res2.
387  #Reset the resContact and add it to the appropriate residue.
388  contact_copy1 = contact.copy()
389  contact_copy2 = contact.copy()
390  contact_copy1.reduceContact('res1')
391  contact_copy2.reduceContact('res2')
392  res1.contactList.append(contact_copy1)
393  res2.contactList.append(contact_copy2)
394  res1.NresContacts = res1.NresContacts + 1
395  res2.NresContacts = res2.NresContacts + 1
396  #only relevant to full atom mode!
397  if contactType == 'full':
398  res1.Ncontacts = res1.Ncontacts + contact.Ncontacts
399  res1.Nhbonds = res1.Nhbonds + contact.Nhbonds
400  res2.Ncontacts = res2.Ncontacts + contact.Ncontacts
401  res2.Nhbonds = res2.Nhbonds + contact.Nhbonds
402 
403 def makeShellProfile(contactProfile):
404  """
405  This converts a contact profile (list of residue-residue contacts for each
406  residue) to a shell profile (list of the atomic shell for each residue).
407 
408  Contact profile format is useful for listing residues that contact a
409  particular residue, but it is tedious for analyzing the atomic shell of a
410  residue since one must search the atoms of each residue in the list. Here
411  is an example:
412 
413  contact profile format:
414 
415  contacts for residue LYS A 4:
416  GLU A 3 N,CA,OE1
417  PRO A 5 CA,CB,CG
418 
419  contact shell format:
420 
421  shell for residue LYS A 4:
422  atom 1: GLU A 3 N
423  atom 2: GLU A 3 CA
424  etc.
425 
426  This allows direct iteration over the atomic shell without having to iterate
427  through particular residues first.
428 
429  Practically, for each atom in the shell, I want to keep track of printid
430  (residue label), index (list identifier used to compare different pdbs),
431  atom name, counts (used if multiple monomers are added), and an
432  I will keep an atom label that combines the index and the name for easy
433  referencing. These attributes will be recorded in the shellatom class
434  at the top.
435  """
436  for res in contactProfile:
437  setattr(res, 'atomShell', [])
438  for resContact in res.contactList:
439  atomList = []
440  for atom_contact in resContact.atomContactList:
441  #record printid, index, and atom name
442  if not(atom_contact.atom2 in atomList):
443  #use res.name (the name I want to use for comparisons,
444  #not the printid taken directly from the pdb)
445  atomLabel = resContact.res2.name + atom_contact.atom2
446  #res.atomShell.append(shellAtom(resContact.res2.printid,
447  #resContact.index2, atom_contact.atom2))
448  res.atomShell.append(atomLabel)
449  atomList.append(atom_contact.atom2)
450  delattr(res, 'contactList')
451 
452 smartfilter = 1
453 contactAtoms = 'all'
454 contactFilter = 8.0 #kepler
455 #contactFilter = 4.0 # Mike
456 contactType = 'full'
457 getAtoms = 'centroid'
458 ligandFlag = 1
459 
460 #This variable can be set externally. Basically, it limits contact calculation
461 #to those pairs where at least one residue is in one chain in the string
462 #refChain. Is set to None (unrestricted) by default
463 #(only applies to internal contacts calculations)
464 refChain = None
465 
467  """
468  global flags dependent on mode:
469 
470  smartfilter: filter on centroid-centroid distance in full atom mode,
471  but not in CA-CA mode or sidechain-sidechain centroid mode
472 
473  contactAtoms: defines atom list to iterate over when calculating contacts.
474  Allows full atom mode and single atom modes to use the same contact-
475  counting function
476 
477  contactType: control creation and printing of certain contact attributes
478  (Ncontacts, Nhbonds) only relevant to full atom mode
479 
480  contactFilter: the contact distance cutoff is dependent on the mode.
481  Currently, it is 4.0A for full atom mode, 7.0A for CA-CA mode, and 6.0A for
482  sidechain-sidechain centroid mode.
483 
484  getAtoms: global flag to determine if (whole residue) centroid and
485  sidechain centroid need to be initialized. To save computing time.
486  """
487  global smartfilter, contactType, contactAtoms, contactFilter, getAtoms
488  if mode == 'fullatom':
489  return
490  if mode == 'shell':
491  contactFilter = 6.0
492  if mode in ['calpha', 'sccentroid', 'disulf']:
493  smartfilter = 0
494  contactType = 'brief'
495  if mode == 'calpha':
496  contactAtoms = 'CA'
497  contactFilter = 7.0
498  return
499  if mode == 'sccentroid':
500  contactAtoms = 'sccentroid'
501  contactFilter = 6.0
502  getAtoms = 'sccentroid'
503  return
504  if mode == 'disulf':
505  contactAtoms = 'SG'
506  contactFilter = 3.0
507 
508 def addContacts(startProtein, mode, parserule):
509  """
510  Add contacts to a Protein object from loadPDB.py
511  This is somewhat complicated because I need to convert the Protein into a
512  loadedPDB to calculate the contacts and then convert it back again.
513  However, this works efficiently because the getAllRes() function of a
514  Protein object maintains the original order of residues so that when
515  the protein is recreated using parse(), the original structure is
516  maintained.
517  """
518  #Create a contact object and calculate contacts
519  if ligandFlag:
520  newLoadedPDB = startProtein.getAllRes()
521  else:
522  newLoadedPDB = startProtein.getAllProteinRes()
523  print 'ligand flag off; not counting ligands in contacts'
524  newContactProtein = contactProtein(mode)
525  newContactProtein.contactPDB = newLoadedPDB
526  newContactProtein.find_contacts(parserule)
527  newContactProtein.initContactProfile()
528 
529  #Re-create the protein structure
530  newProtein = Protein()
531  newProtein.parse(newContactProtein.contactPDB)
532  startProtein = newProtein
533 
534 def makeContactMap(pdb1, pdb2):
535  """
536  Calculate contacts between any two pdbs
537  """
538  initialize_global_flags('fullatom')
539  makeContactPDB(pdb1)
540  makeContactPDB(pdb2)
541  closeResiduePairs = pair_close_interface(pdb1, pdb2)
542  return filterContacts(closeResiduePairs)
543 
545  """
546  The contactProtein class contains information about the protein's contacts
547  in two forms:
548  list of contacts
549  contact profile (contacts by residue)
550  """
551  def __init__(self, mode):
552  self.mode = mode
554  self.contactPDB = None
555 
556  def initializePDB(self, PDBfile):
557  self.contactPDB = loadProt(PDBfile)
558 
559  def find_contacts(self, parserule):
560  #mode currently allows full atom, CA-CA, and sidechain centroid-
561  #centroid
562  #set global control flags to control behavior of the functions with
563  #respect to mode
564  if self.contactPDB == None:
565  print 'You are trying to calculate contacts without a loadedPDB'
566  print 'Call initializePDB(PDBfile) to load the PDB from a PDB'
567  print 'file or load the PDB externally and add it to the \
568  contactProtein'
569  sys.exit(1)
570  if contactType == 'brief':
571  popLigands(self.contactPDB)
573  addIndices(self.contactPDB)
574  closeResiduePairs = parse_and_pair(self.contactPDB, parserule)
575  self.contactMap = filterContacts(closeResiduePairs)
576 
578  #make a contact profile only if needed (contact map always needed)
580  if self.mode == 'shell':
581  self.convert_to_shell()
582 
583  def convert_to_shell(self):
584  """
585  convert from contact profile format (list of residue contacts for each
586  residue) to shell format (list of atoms in the atomic shell of a given
587  residue)
588  """
590 
591  def outputMap(self, dest, PDBfile):
592  """
593  Output method to either print contacts or write them to a file
594  dest = 'print' outputs the contact map to the screen
595  dest = 'file' outputs the contact map to a file
596  """
597  if dest == 'file':
598  pdbid = string.split(PDBfile, '.')[0]
599  outfname = pdbid + '.contactmap'
600  output = open(outfname, 'w')
601  #header = 'restype1 chain1 resnum1 restype2 chain2 resnum2'
602  #if contactType == 'full':
603  # header = header + ' Ncontacts Nhbonds'
604  #if dest == 'print':
605  # print header
606  #else:
607  # output.write(header + '\n')
608  for resContact in self.contactMap:
609  printList = [resContact.res1.printid, resContact.res2.printid]
610  # if contactType == 'full':
611  # printList = printList + [resContact.Ncontacts,
612  # resContact.Nhbonds]
613  outstr = ''
614  for item in printList:
615  outstr = outstr + str(item) + ' '
616  if dest == 'print':
617  print outstr
618  else:
619  output.write(outstr + '\n')
620 
621  def outputProfile(self, dest, PDBfile):
622  """
623  Output method to print contact profile or write them to a file.
624  Number of contacts and hbonds for each residue
625  equivalent to find_interface.pl in interface mode
626  dest = 'print' sends the output to the screen
627  dest = 'file sends the output to a file (pdb.contactprofile)
628  """
629  print 'wtf!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
630  if dest == 'file':
631  pdbid = string.split(PDBfile, '.')[0]
632  outfname = pdbid + '.contactprofile'
633  output = open(outfname, 'w')
634  header = 'restype chain resnum NresContacts'
635  if contactType == 'full':
636  header = header + ' Ncontacts Nhbonds'
637  if dest == 'print':
638  print header
639  else:
640  output.write(header + '\n')
641  for res in self.contactPDB:
642  if res.NresContacts > 0:
643  printList = [res.printid, res.NresContacts]
644  if contactType == 'full':
645  printList = printList + [res.Ncontacts, res.Nhbonds]
646  outstr = ''
647  for item in printList:
648  outstr = outstr + str(item) + ' '
649  if dest == 'print':
650  print outstr
651  else:
652  output.write(outstr + '\n')
653 
def makeShellProfile
def popLigands
Definition: loadPDB.py:824
Fstring::size_type len(Fstring const &s)
Length.
Definition: Fstring.hh:2207
def extractChains
Definition: loadPDB.py:861
def pair_close_interface
def addIndices
Definition: loadPDB.py:815
bool open(utility::io::izstream &db_stream, std::string const &db_file, bool warn)
Open a database file on a provided stream.
Definition: open.cc:55
def dist_sq
Definition: coordlib.py:12
def initialize_global_flags
def makeContactProfile
def pair_close_internal
def loadProt
Definition: loadPDB.py:633
static T max(T x, T y)
Definition: Svm.cc:19