Comments (4)
Hi,
Yes, you are right, the DB5.5 data contains many inconsistencies between the bound and unbound structures for the same protein, not sure exactly why. To generate that plot, we just did a dynamic programming implementation of the longest common subsequence problem to align the two sequences, and then only use those residues in the RMSD computations. This is not a perfect solution, but not sure what can be done to alleviate this data issue ...
from equidock_public.
Thank you for your reply!
I also try to do that as you said with the Biopython-> pairwise2.align.globalxx() function to align the bound and unbound sequences. And I get the result with complex_rmsd_median: 1.455, complex_rmsd_mean: 2.1065, complex_rmsd_std: 2.5771. I seems similar to your published result, however, the max complex rmsd I calculated is 29.3533, which is greater than 35 in Figure 12. So I am not sure that whether there is any mistake in my code. It will be so great if you could share the code of how to get the rmsd between bound and unbound structures. Thanks so much!
from equidock_public.
Yep, here is the code (also see the full list of RMSD computed below).
import os
import numpy as np
from src.utils.protein_utils import rigid_transform_Kabsch_3D
from src.utils.db5_data import get_residues_db5
import scipy.spatial as spa
# Dynamic programming implementation of LCS problem
# Returns length of LCS for X[0..m-1], Y[0..n-1]
def LCSubSeq(X, Y):
m = len(X)
n = len(Y)
L = [[0 for _ in range(n + 1)] for _ in range(m + 1)]
# Following steps build L[m+1][n+1] in bottom up fashion. Note
# that L[i][j] contains length of LCS of X[0..i-1] and Y[0..j-1]
for i in range(m + 1):
for j in range(n + 1):
if i == 0 or j == 0:
L[i][j] = 0
elif X[i - 1][1]['resname'].iloc[0] == Y[j - 1][1]['resname'].iloc[0]:
L[i][j] = L[i - 1][j - 1] + 1
else:
L[i][j] = max(L[i - 1][j], L[i][j - 1])
# Following code is used to print LCS
index = L[m][n]
# Create a character array to store the lcs string
# lcs = [""] * (index + 1)
# lcs[index] = ""
lcs_X = [""] * (index)
lcs_Y = [""] * (index)
# Start from the right-most-bottom-most corner and one by one store characters in lcs[]
i = m
j = n
while i > 0 and j > 0:
# If current character in X[] and Y are same, then current character is part of LCS
if X[i - 1][1]['resname'].iloc[0] == Y[j - 1][1]['resname'].iloc[0]:
lcs_X[index - 1] = X[i - 1]
lcs_Y[index - 1] = Y[j - 1]
i -= 1
j -= 1
index -= 1
# If not same, then find the largest of the two and go in the direction of larger value
elif L[i - 1][j] > L[i][j - 1]:
i -= 1
else:
j -= 1
return lcs_X, lcs_Y
def filter_residues(residues):
residues_filtered = []
for residue in residues:
df = residue[1]
Natom = df[df['atom_name'] == 'N']
alphaCatom = df[df['atom_name'] == 'CA']
Catom = df[df['atom_name'] == 'C']
if Natom.shape[0] == 1 and alphaCatom.shape[0] == 1 and Catom.shape[0] == 1:
residues_filtered.append(residue)
return residues_filtered
###################
def get_alphaC_loc_array(bound_predic_clean_list):
bound_alphaC_loc_clean_list = []
for residue in bound_predic_clean_list:
df = residue[1]
alphaCatom = df[df['atom_name'] == 'CA']
alphaC_loc = alphaCatom[['x', 'y', 'z']].to_numpy().squeeze().astype(np.float32)
assert alphaC_loc.shape == (3,), \
f"alphac loc shape problem, shape: {alphaC_loc.shape} residue {df} resid {df['residue']}"
bound_alphaC_loc_clean_list.append(alphaC_loc)
if len(bound_alphaC_loc_clean_list) <= 1:
bound_alphaC_loc_clean_list.append(np.zeros(3))
return np.stack(bound_alphaC_loc_clean_list, axis=0) # (N_res,3)
def get_alphaC_list(bound_predic_clean_list):
lll = []
for residue in bound_predic_clean_list:
df = residue[1]
alphaCatom = df[df['atom_name'] == 'CA']['atom_number'].iloc[0]
lll.append(alphaCatom)
return lll
def get_CA_coords(b_file, u_file):
b_residues = filter_residues(get_residues_db5(b_file))
u_residues = filter_residues(get_residues_db5(u_file))
before = len(b_residues)
b_residues, u_residues = LCSubSeq(b_residues, u_residues)
# print('after b_res', get_alphaC_list(b_residues))
# print('after u_res', get_alphaC_list(u_residues))
# print(functools.reduce(lambda x, y : x and y, map(lambda p, q: p == q, get_alphaC_list(b_residues),get_alphaC_list(u_residues)), True))
b_residues = get_alphaC_loc_array(b_residues)
u_residues = get_alphaC_loc_array(u_residues)
after = b_residues.shape[0]
if before == after == u_residues.shape[0]:
print(b_file, u_file)
print(before, after, u_residues.shape[0])
return b_residues, u_residues
def plot_all():
input_dir = './data/benchmark5.5/structures'
pdb_files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f)) and f.endswith('.pdb')]
all_rmsd = []
num_test_files = 0
for file in pdb_files:
if file.endswith('_l_u.pdb'):
ll = len('_l_u.pdb')
bound_file = os.path.join(input_dir, file[:-ll] + '_l_b.pdb')
unbound_file = os.path.join(input_dir, file[:-ll] + '_l_u.pdb')
elif file.endswith('_r_u.pdb'):
ll = len('_r_u.pdb')
bound_file = os.path.join(input_dir, file[:-ll] + '_r_b.pdb')
unbound_file = os.path.join(input_dir, file[:-ll] + '_r_u.pdb')
else:
continue
b_coords, u_coords = get_CA_coords(bound_file, unbound_file)
dmat = spa.distance.cdist(b_coords, b_coords)
dmat = dmat + 100. * np.eye(dmat.shape[0])
# print('mean d = ', np.min(dmat, axis=1).mean())
# print('median d = ', np.median(np.min(dmat, axis=1)))
if b_coords.shape[0] != u_coords.shape[0]:
print(file)
continue
num_test_files += 1
R, b = rigid_transform_Kabsch_3D(b_coords.T, u_coords.T)
b_coords_aligned = ((R @ b_coords.T) + b).T
rmsd = np.sqrt(np.mean(np.sum((b_coords_aligned - u_coords) ** 2, axis=1)))
all_rmsd.append(rmsd)
# print('num_test_files done ', num_test_files)
print('all_rmsd', all_rmsd)
print(len(all_rmsd))
return all_rmsd
all_rmsd = plot_all()
# all_rmsd = [0.6962418, 27.03626981434666, 2.9686131, 3.5345614, 1.7044786, 1.319995, 5.1798444, 0.5049575, 0.45675388, 2.6630943, 2.546523, 1.2128848, 0.59461874, 1.1774287, 0.8224659, 1.5111822, 1.1599942, 1.7826275, 0.65784246, 0.5582014, 2.8252356, 1.9506255, 1.3800596, 2.7822397, 2.0161128, 1.4857557, 0.80386686, 1.2378623, 2.7285552, 19.54151, 1.5791698, 0.77282906, 0.32637906, 0.4722987, 1.2494422, 0.7638261, 1.3630565, 0.5317533, 1.9764836e-06, 0.55156565, 2.7910872, 0.9011328, 1.1561608, 2.5926828, 3.8203578, 0.70295584, 1.1666499, 0.6264738, 0.6232077, 1.1883411, 0.56530625, 1.2028227, 1.3477685, 1.8282441, 1.5747728, 2.128397, 1.7903032, 4.280821, 1.4810741, 2.0309215, 1.7389334, 1.3359979, 0.9335407, 1.2722429, 1.0772936, 1.4273612, 0.90363824, 1.1851745, 0.65965843, 0.6105662, 0.88963103, 0.7989469, 3.0286636, 1.4925646, 1.0704238, 1.2667646, 0.9554937, 2.1890259, 0.4463879, 3.0691504, 12.041163, 1.1361443, 1.6985788, 1.8809482, 0.70238364, 2.1219745, 2.4284933, 1.1817397, 0.618453, 3.5171118, 1.7590178, 0.57367647, 0.8153385, 0.5950234, 1.4212565, 0.8913942, 4.423849, 0.6001566, 3.0566185, 0.45846713, 0.3916329, 3.1001763, 0.84036165, 1.2455623, 0.94656336, 0.43186194, 0.70226103, 2.1483116, 0.65598303, 0.49707252, 9.990827, 1.2918882, 0.77361584, 1.4257747, 0.80301446, 1.5146624, 1.3873624, 0.9554937, 1.0153382, 1.3809197, 0.9295433, 0.4461008, 0.3887217, 2.71268, 0.6058705, 2.5367131, 1.5577396, 2.7555687, 1.5684363, 0.5662212, 1.8282441, 0.34369707, 3.5691407, 29.304473532867767, 0.8604853, 3.8777807, 0.3287732, 1.192507, 0.53047264, 1.1643264, 1.8756974, 4.397976, 1.9317511, 0.8851453, 4.280821, 1.6856755, 2.1955128, 1.0414593, 3.8239803, 0.6302643, 1.2624161, 2.8827038, 1.0478029, 3.4473317, 0.770314, 0.64568347, 1.4387469, 1.6138345, 1.7465917, 1.962105, 1.0688684, 1.5654682, 0.43968844, 0.7649709, 1.7110707, 1.9716086, 1.4674189, 1.9981797, 0.8618321, 0.6721722, 2.071398, 0.51870245, 2.982918, 0.8164904, 3.4028974, 1.0031127, 0.6929141, 1.1985035e-06, 22.282746453028356, 1.1930548, 0.49166816, 4.2538457, 0.889148, 1.6589879, 0.7498187, 0.4058273, 0.5056364, 0.62057555, 4.0950875, 1.1783762, 1.0915266, 9.825888, 0.6582492, 1.2743578, 0.4995857, 10.788652, 11.905804, 0.8787025, 2.2948368e-06, 3.3075848, 0.6850603, 1.7519094, 1.9719495, 0.44748992, 0.55696625, 0.6096156, 0.64054054, 3.0099761, 2.3606842, 1.6786654, 1.1942681, 0.6978604, 0.43543476, 7.2658124, 0.8521949, 1.4701211, 1.7214085, 0.5289863, 1.1149201, 3.729744, 1.7212167, 5.581814, 3.9925954, 0.9347552, 2.0152748, 5.539305, 1.3955237, 1.253993, 18.731201, 0.5405805, 1.1983395, 1.3311571, 1.0794693, 2.2760947, 38.5945, 2.7223735, 1.6750678, 0.51923317, 1.7674416, 2.7703874, 3.936683e-06, 1.4250864, 1.7576742, 0.66976196, 2.1966534e-06, 3.8855648, 0.572205, 1.06187, 0.4635174, 0.5988311, 0.46330267, 1.9889144, 1.5581557, 3.1054556, 1.0541656, 1.6353214e-06, 1.032354, 0.665778, 1.087051, 0.6215222, 0.60802054, 6.606358, 0.5751045, 0.4689752, 0.37107491, 1.067861, 4.729647, 4.178835, 1.2696042, 0.74371666, 4.3974447, 3.731839, 1.1009775, 6.2470875, 2.1379695, 2.7581146, 3.2439315, 0.7497935, 4.4688506, 0.74708706, 1.2113068, 3.0039606, 1.6956894, 0.7109091, 1.6787137, 32.79343038081955, 7.0521705e-06, 1.1089412, 0.86929286, 2.2734303, 1.9720659, 1.9889144, 0.5684412, 0.72357994, 1.0572548, 1.2094994, 0.87182486, 0.6639882, 0.6363185, 0.68100965, 1.0455867, 2.7395332, 7.231181, 0.629642, 7.0089383, 2.757306, 1.0041324, 1.650062, 2.9461417, 2.0531807, 0.3912766, 0.6195286, 2.1035693, 0.93100214, 1.7368435, 2.107763, 1.3961453, 1.3059765, 1.1841563, 7.2915254, 0.8731526, 2.5055175, 1.0232973, 2.1887197, 1.6135501, 1.7934625, 2.218598, 0.6318677, 1.1169442, 1.41212, 1.8013055, 0.83939904, 0.5386748, 1.3009996, 1.0179409, 1.5744836, 1.7691509, 1.8033211, 4.0990553, 0.40228027, 1.5470148, 2.229582, 1.0745113, 1.2127255, 0.64667904, 3.870754, 0.6516309, 0.5308961, 0.6830703, 0.37585256, 3.6248713, 1.0849175, 1.1546352, 0.35467917, 0.8584139, 1.713536, 0.9835134, 4.863827, 1.1013104, 1.5991068, 2.7803283, 0.5220013, 0.7984924, 1.1942775, 5.522912, 1.0631386, 0.45954815, 0.8350289, 1.6775175, 2.48245, 1.1269842, 0.84553206, 0.59515387, 0.7430531, 1.1843232, 0.8322676, 0.71300054, 0.8555032, 1.1336085, 1.536494, 29.936849938155948, 0.5567714, 0.50779784, 2.8568308, 0.5459214, 0.766995, 0.9020894, 1.6567634, 1.1025717, 0.7798935, 1.0375565, 0.63780326, 0.66015315, 0.5144089, 0.87448967, 2.9359785e-07, 0.51562625, 1.1334723, 1.5090711, 2.2924914, 0.9140785, 8.136824, 3.3150349, 0.3546582, 6.9085704e-06, 2.0621643, 1.1151338, 4.43456, 0.51987344, 1.9622375, 1.1580093, 0.27936888, 0.76604265, 1.1473372, 0.7945121, 1.3007371, 1.3704231, 1.1207563, 2.4275997, 1.1122599, 17.701017, 0.48094028, 3.4398391, 4.495497, 3.4427142, 1.3895931, 0.8203486, 1.0786057, 4.4397645, 3.2132168, 1.5061754e-06, 5.1517496, 4.1289624e-07, 1.9721442, 0.8347928, 9.394635, 0.35105443, 1.3121231, 1.9221034, 0.95891297, 1.2227244, 8.433344, 2.9149215, 1.9889144, 0.37265316, 2.0469155, 2.2152584, 0.4578064, 1.2578293, 2.6659403, 0.62059456, 4.739158, 8.855496, 0.48358005, 0.6706964, 0.9605975, 1.148719, 1.3905442, 3.1397026, 6.062035, 0.66542107, 1.0939509, 3.4895027, 0.7346339, 0.80699295, 3.7031268e-07, 1.1419114, 1.5155606, 0.72397625, 2.7118328, 1.6419793, 0.63384485, 0.8620122, 3.213217, 9.210633, 0.41391, 0.58243823, 0.24865517, 0.50800824, 1.123511, 1.8632666, 0.4743436, 2.338837, 2.268722, 10.291622, 0.41552648, 1.7778066, 1.0593673, 0.7203342, 1.0793355, 1.1355674, 0.2461644, 1.548148, 0.5702176, 0.5160546, 2.4865284, 1.7082264, 1.0295498, 1.2837969, 1.753832, 1.0659016, 0.7255667, 0.62942755, 2.8856826, 0.74441904, 0.46673965, 0.5662806, 1.1644644, 2.005169, 0.51830894, 1.7189244, 0.7285067, 2.3819146, 1.0490074, 0.6707141, 2.7278082, 1.0616714, 4.2380977, 0.8752755, 0.32089052, 1.5652136, 1.512635, 1.0160714, 1.4508268, 1.3173221, 0.32655928, 0.40831152, 0.5914244, 0.86104983, 2.3572385, 1.2258587, 3.525297, 10.330421, 4.2397623, 1.1214422, 0.32262784, 10.559156, 0.429923, 1.493592, 0.35212544, 0.9639773, 1.025263]
import plotly.express as px
fig = px.histogram(all_rmsd, nbins=100)
fig.add_vline(x=np.median(all_rmsd), annotation_text="median = " + "{:.1f}".format(np.median(all_rmsd)) + ' Å',
annotation_font_size=20, annotation_position="top right", line_dash = 'dash', line_color = 'firebrick', annotation_font_color= 'firebrick')
fig.add_vline(x=np.mean(all_rmsd), annotation_text="\t<br>mean = " + "{:.1f}".format(np.mean(all_rmsd)) + ' Å',
annotation_font_size=20, annotation_position="top right", line_dash = 'dash', line_color = 'green', annotation_font_color= 'green')
fig.update_xaxes(title='RMSD (Å) bound - unbound structures (of the CĪ± atomic coordinates)', title_font = {"size": 20}, tickfont = {"size": 20})
fig.update_yaxes(title='Frequency in DB5.5', title_font = {"size": 20}, tickfont = {"size": 20})
fig.show()
from equidock_public.
It is so kind of you!
Since the LCS subsquence may not be unique, it may influence the result I got. I will check my code with reference to this. Thank you so much again!
from equidock_public.
Related Issues (20)
- inference script has no docs HOT 1
- Hyperparameters HOT 2
- Why optimal transport matrix is not used? HOT 4
- Installation problems HOT 1
- Matrix Product Error in Kabsh Model HOT 1
- How to run inference on custom PDB + Problems with Installation HOT 5
- best validation score & some other variations
- Source code for DIPS split HOT 6
- Thank you for your generous open source work, salute your work, and wish your soul peace
- what is the difference between rigid protein docking and protein-protein docking?
- can not achieve the performance which mentioned in the original paper HOT 8
- cuda and dgl version
- error when I run preprocess_raw_data.py. How can I fix it? HOT 4
- where is the docked pose? HOT 8
- How to get the complex pose? HOT 13
- to speed up rsync
- deallock in make_dataset HOT 3
- DIPS dataset HOT 2
- about preprocess_raw_data.py HOT 5
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
š Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. ššš
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ā¤ļø Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from equidock_public.