The CR2.ipynb notebook can be used to run the bond order analysis for each of the systems separately. In this notebook, we will try to summarize all the data coming from those calculations. This notebook assumes you have run all of the systems using the CR2 notebook.
from __future__ import print_function
from os.path import join
import pickle
from matplotlib import pyplot as plt
import pandas as pd
%matplotlib inline
labels = ["1CRN", "Laccase", "MG", "Pentacene"]
markers = {}
markers["1CRN"] = 's'
markers["Laccase"] = 'x'
markers["MG"] = 'd'
markers["MG-only"] = '+'
markers["Pentacene"] = 'o'
markers["1CRN-FU"] = 'P'
markers["Laccase-FU"] = '*'
colors = {}
colors["1CRN"] = '#1f77b4'
colors["Laccase"] = '#ff7f0e'
colors["MG"] = '#2ca02c'
colors["MG-only"] = '#d62728'
colors["Pentacene"] = '#9467bd'
colors["1CRN-FU"] = '#8c564b'
colors["Laccase-FU"] = '#e377c2'
The first thing we will do is summarize the relationship between the purity cutoff values, and the number of fragments in a given system.
cutoffs = [1.0, 0.5, 0.25, 0.10, 0.0875, 0.075, 0.05, 0.0375, 0.025, 0.0125, 0.0075, 0.0050, 0.0025, 0.00125]
sizes = {}
for sys in labels:
sizes[sys] = []
for cut in cutoffs:
pname = join("Cache-BO", sys+"-"+str(cut)+".pickle")
with open(pname, "rb") as ifile:
varfrag = pickle.load(ifile)
sizes[sys].append(len(varfrag.keys()))
We are also interested in the change in size of the RNA in the range 0.25 to 0.025.
cutrange = [0.25, 0.10, 0.0875, 0.075, 0.05, 0.0375, 0.025]
rna_size = []
for cut in cutrange:
pname = join("Cache-BO", "MG-"+str(cut)+".pickle")
with open(pname, "rb") as ifile:
varfrag = pickle.load(ifile)
size = 0
for frag in varfrag.values():
atom_list = [x.sym for x in frag]
atom_set = set(atom_list)
if atom_set == set(["H", "O"]) or atom_set == set(["MG"]) or atom_set == set(["MG", "H", "O"]):
continue
size = size + 1
rna_size.append(size)
fig, axs = plt.subplots(1,1,figsize=(6,4))
axs.set_xscale("log")
axs.set_xlabel("Purity Indicator Cutoff", fontsize="14")
axs.set_ylabel("Relative Number of Fragments", fontsize="14")
axs.axvline(0.05, color='k', linestyle='--', label="cutoff = 0.05")
for sys in sorted(labels):
axs.plot(cutoffs, [x/(1.0*max(sizes[sys])) for x in sizes[sys]],
label=sys.upper(), marker=markers[sys], color=colors[sys],
linestyle='--', markersize=10)
axs.plot(cutrange, [x/(388.0) for x in rna_size],
label="MG just RNA", marker=markers["MG"], color=colors["MG-only"],
linestyle='--', markersize=10, fillstyle='none')
axs.legend()
fig.savefig(join("Figures", "relfrag.eps"))
The second thing we will consider is how the bond order interaction falls off with the number of fragments.
spillvals = {}
for sys in labels:
pname = join("Cache-BO", sys+"-fbo.pickle")
with open(pname, "rb") as ifile:
spillvals[sys] = pickle.load(ifile)
from numpy import cumsum
cumsumvals = {}
for sys in labels:
spillage = spillvals[sys]
cumsumvals[sys] = sum(spillage.values()) - cumsum(sorted(spillage.values(), reverse=True))
fig, axs = plt.subplots(1,1,figsize=(6,4))
axs.set_yscale("log")
axs.set_xlabel("Fragment", fontsize="14")
axs.set_ylabel("Remaining Bond Order", fontsize="14")
for sys in sorted(labels):
axs.plot(cumsumvals[sys], label=sys.upper(), color=colors[sys],
marker=markers[sys], linestyle='--')
axs.set_xlim(-1, 90)
axs.set_ylim(1e-6, 20)
axs.legend()
fig.savefig(join("Figures","falloff.eps"))
We also would like to generate some images of the target regions and embedding environments used in each of the QM/MM calculations. These TCL scripts can be run with VMD.
from BigDFT.Visualization import VMDGenerator
from BigDFT.Fragments import System
vmd = VMDGenerator()
for sys in labels:
pname = join("Cache-BO", sys+"-resys-target.pickle")
with open(pname, "rb") as ifile:
target, resys = pickle.load(ifile)
pname = join("Cache-BO", sys+"-qmmm-0.01.pickle")
with open(pname, "rb") as ifile:
target, subsystem = pickle.load(ifile)
vmd.visualize_qmmm(resys, subsystem, target, join("Viz-BO", sys+"-qmmm.tcl"),
join("Viz-BO", sys+"-system.xyz"), join("Viz-BO", sys+"-subsystem.xyz"))
Next we will compare the convergence rate of QM/MM calculations using the two different criterias.
spillerror = {}
spillcuts = [100, 1, 0.1, 0.01, 0.001]
for sys in labels:
spillerror[sys] = {}
for cut in spillcuts:
pname = join("Cache-BO", sys+"-"+str(cut)+"-spillageqmmm.pickle")
with open(pname, "rb") as ifile:
spillerror[sys][cut] = pickle.load(ifile)
disterror = {}
distance = [2, 3, 4, 5, 6]
for sys in labels:
disterror[sys] = {}
for cut in distance:
pname = join("Cache-BO", sys+"-"+str(cut)+"-distanceqmmm.pickle")
with open(pname, "rb") as ifile:
disterror[sys][cut] = pickle.load(ifile)
fig, axs = plt.subplots(2,2,figsize=(8,6))
axs[1,0].set_xlabel("Distance (Bohr)", fontsize=14)
axs[1,1].set_xlabel("Fragment Bond Order", fontsize=14)
axs[0,0].set_ylabel("Norm Error (Relative)", fontsize=14)
axs[1,0].set_ylabel("Angle Error (Degrees)", fontsize=14)
axs[0,1].set_xscale("log")
axs[1,1].set_xscale("log")
axs[0,1].axvspan(0.001, 0.01, facecolor='#fffed6')
axs[1,1].axvspan(0.001, 0.01, facecolor='#fffed6')
axs[0,0].set_ylim(-0.01, 0.9)
axs[0,1].set_ylim(-0.01, 0.9)
for sys in labels:
vals = [disterror[sys][x]["D1 Error"] for x in distance]
axs[0,0].plot(distance, vals, linestyle='--', marker=markers[sys],
label=sys.upper(), color=colors[sys], markersize=10)
xvals = [spillerror[sys][x]["Remainder"] for x in spillcuts]
vals = [spillerror[sys][x]["D1 Error"] for x in spillcuts]
axs[0,1].plot(xvals, vals, linestyle='--', marker=markers[sys],
label=sys.upper(),color=colors[sys], markersize=10)
vals = [disterror[sys][x]["D1 Angle"] for x in distance]
axs[1,0].plot(distance, vals, linestyle='--', marker=markers[sys],
label=sys.upper(), color=colors[sys], markersize=10)
xvals = [spillerror[sys][x]["Remainder"] for x in spillcuts]
vals = [spillerror[sys][x]["D1 Angle"] for x in spillcuts]
axs[1,1].plot(xvals, vals, linestyle='--', marker=markers[sys],
label=sys.upper(), color=colors[sys], markersize=10)
fig.tight_layout()
axs[0,1].invert_xaxis()
axs[1,1].invert_xaxis()
lgd = axs[0,1].legend(bbox_to_anchor=(0.75, 1.19), ncol=len(labels), fontsize=12)
fig.savefig(join("Figures","convergence.eps"), box_extra_artists=(lgd,), bbox_inches='tight')
We can also compare both methods with respect to the size of the QM region.
fig, axs = plt.subplots(2,2,figsize=(8,6))
axs[1,0].set_xlabel("Distance Criteria", fontsize=14)
axs[1,1].set_xlabel("Fragment Bond Order Criteria", fontsize=14)
axs[0,0].set_ylabel("Norm Error (Relative)", fontsize=14)
axs[1,0].set_ylabel("Angle Error (Degrees)", fontsize=14)
axs[0,0].set_ylim(-0.01, 0.9)
axs[0,1].set_ylim(-0.01, 0.9)
for sys in labels:
xvals = [disterror[sys][x]["Size"] for x in distance]
vals = [disterror[sys][x]["D1 Error"] for x in distance]
axs[0,0].plot(xvals, vals, linestyle='--', marker=markers[sys],
label=sys.upper(), color=colors[sys], markersize=10)
xvals = [spillerror[sys][x]["Size"] for x in spillcuts]
vals = [spillerror[sys][x]["D1 Error"] for x in spillcuts]
axs[0,1].plot(xvals, vals, linestyle='--', marker=markers[sys],
label=sys.upper(), color=colors[sys], markersize=10)
xvals = [disterror[sys][x]["Size"] for x in distance]
vals = [disterror[sys][x]["D1 Angle"] for x in distance]
axs[1,0].plot(xvals, vals, linestyle='--', marker=markers[sys],
label=sys.upper(), color=colors[sys], markersize=10)
xvals = [spillerror[sys][x]["Size"] for x in spillcuts]
vals = [spillerror[sys][x]["D1 Angle"] for x in spillcuts]
axs[1,1].plot(xvals, vals, linestyle='--', marker=markers[sys],
label=sys.upper(), color=colors[sys], markersize=10)
fig.tight_layout()
lgd = axs[0,1].legend(bbox_to_anchor=(0.75, 1.19), ncol=len(labels), fontsize=12)
fig.savefig(join("Figures","convergence-size.eps"), box_extra_artists=(lgd,), bbox_inches='tight')
We will now summarize the convergence of the forces. First, we will get the fluctuation of the forces of the 1CRN calculation to put this data in context.
from BigDFT.Logfiles import Logfile
l = Logfile(join("Output", "log-1CRN.yaml"))
fluct = l.log["Average noise forces"]
Now we load and plot.
forceerror = {}
spillcuts = [100, 1, 0.1, 0.01, 0.001]
for sys in labels:
forceerror[sys] = {}
for cut in spillcuts:
pname = join("Cache-BO", sys+"-"+str(cut)+"-forces.pickle")
with open(pname, "rb") as ifile:
forceerror[sys][cut] = pickle.load(ifile)
from numpy import average, std, max
fig, axs = plt.subplots(1,1,figsize=(6,4))
axs.set_xlabel("Fragment Bond Order", fontsize=14)
axs.set_ylabel("Average Force Error (Norm)", fontsize=14)
axs.axvspan(0.001, 0.01, facecolor='#fffed6')
for sys in labels:
xvals = [spillerror[sys][x]["Remainder"] for x in spillcuts]
vals = [average(forceerror[sys][x]) for x in forceerror[sys]]
axs.plot(xvals, vals, linestyle='--', marker=markers[sys],
label=sys.upper(), color=colors[sys], markersize=10)
axs.axhline(fluct["total"], color='k', linestyle=':')
axs.set_xscale("log")
axs.set_yscale("log")
axs.invert_xaxis()
axs.legend()
lgd = axs.legend(bbox_to_anchor=(1, 1.14), ncol=len(labels), fontsize=10)
fig.savefig(join("Figures","convergence-forces.eps"), box_extra_artists=(lgd,), bbox_inches='tight')
As well as the errors in any individual component.
forceerror = {}
spillcuts = [100, 1, 0.1, 0.01, 0.001]
for sys in labels:
forceerror[sys] = {}
for cut in spillcuts:
pname = join("Cache-BO", sys+"-"+str(cut)+"-comp_forces.pickle")
with open(pname, "rb") as ifile:
forceerror[sys][cut] = pickle.load(ifile)
from numpy import average, std, max
fig, axs = plt.subplots(1,1,figsize=(6,4))
axs.set_xlabel("Fragment Bond Order", fontsize=14)
axs.set_ylabel("Average Force Error (Component)", fontsize=14)
axs.axvspan(0.001, 0.01, facecolor='#fffed6')
for sys in labels:
xvals = [spillerror[sys][x]["Remainder"] for x in spillcuts]
vals = [average(forceerror[sys][x]) for x in forceerror[sys]]
axs.plot(xvals, vals, linestyle='--', marker=markers[sys],
label=sys.upper(), color=colors[sys], markersize=10)
axs.axhline(max([fluct['x'], fluct['y'], fluct['z']]), color='k', linestyle=':')
axs.set_xscale("log")
axs.set_yscale("log")
axs.invert_xaxis()
axs.legend()
lgd = axs.legend(bbox_to_anchor=(1, 1.14), ncol=len(labels), fontsize=10)
fig.savefig(join("Figures","convergence-comp-forces.eps"), box_extra_artists=(lgd,), bbox_inches='tight')
In this section, we will summarize the graph metrics associated with the various configurations.
from networkx import average_clustering, average_shortest_path_length
ac = []
aspl = []
graphcuts = [100, 1, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001]
for cut in graphcuts:
ac_temp = []
aspl_temp = []
for sys in labels:
pname = join("Cache-BO", sys+"-"+str(cut)+"-graph.pickle")
with open(pname, "rb") as ifile:
bond_mats = pickle.load(ifile)
G = bond_mats
ac_temp.append(average_clustering(G))
try:
aspl_temp.append(average_shortest_path_length(G))
except:
aspl_temp.append("Not Connected")
if cut == 100:
cutstr = "Bare Target"
else:
cutstr = str(cut)
ac.append([cutstr] + ac_temp)
aspl.append([cutstr] + aspl_temp)
First, a table with the average clustering.
display(pd.DataFrame(ac, columns=["Cutoff"] + labels))
We will also plot those values.
fig, axs = plt.subplots(1,1,figsize=(6,4))
xval = [100] + [float(x[0]) for x in ac[1:]]
for i, sys in enumerate(labels):
yval = [x[i+1] for x in ac]
axs.plot(xval, yval, label=sys.upper(), marker=markers[sys], linestyle='--',
markersize=10, color=colors[sys])
axs.set_xlim(graphcuts[0]*2.0, graphcuts[-1]/2)
axs.set_xscale("log")
axs.set_xlabel("Fragment Bond Order", fontsize=12)
axs.set_ylabel("Average Clusterng", fontsize=12)
axs.legend()
fig.savefig(join("Figures","acc.eps"))
Next, a table with the average shortest path length.
display(pd.DataFrame(aspl, columns=["Cutoff"] + labels))
fig, axs = plt.subplots(1,1,figsize=(6,4))
xval = [100] + [float(x[0]) for x in aspl[1:]]
for i, sys in enumerate(labels):
yval = [x[i+1] for x in aspl]
subx = []
suby = []
for x, y in zip(xval, yval):
if y != 'Not Connected':
subx.append(x)
suby.append(y)
axs.plot(subx, suby, label=sys.upper(), marker=markers[sys], linestyle='--',
markersize=10, color=colors[sys])
axs.set_xlim(10, graphcuts[-1]/2)
axs.set_xscale("log")
axs.set_xlabel("Fragment Bond Order", fontsize=12)
axs.set_ylabel("Average Shortest Path Length", fontsize=12)
axs.legend()
fig.savefig(join("Figures","aspl.eps"))
Next, we will also examine the degree distribution of the various systems.
dist = {}
for cut in graphcuts:
dist[cut] = {}
for sys in labels:
pname = join("Cache-BO", sys+"-"+str(cut)+"-graph.pickle")
with open(pname, "rb") as ifile:
bond_mats = pickle.load(ifile)
dist[cut][sys] = [x[1] for x in bond_mats.degree()]
fig, axs4 = plt.subplots(2,2, figsize=(6,5))
axs = []
axs.append(axs4[0,0])
axs.append(axs4[0,1])
axs.append(axs4[1,0])
axs.append(axs4[1,1])
for i, sys in enumerate(labels):
axs[i].set_title(sys, fontsize=14)
for cut in graphcuts[-1:0:-2]:
axs[i].hist(dist[cut][sys], label=str(cut), bins=10, log=True)
axs[1].legend(bbox_to_anchor=(1.47, 1.05))
fig.tight_layout()
This picture summarizes the change in average shortest path length with the choice of purity indicator threshold.
from numpy import e
from networkx import connected_components
fig, axs4 = plt.subplots(2,2, figsize=(6,5))
axs = []
axs.append(axs4[0,0])
axs.append(axs4[0,1])
axs.append(axs4[1,0])
axs.append(axs4[1,1])
for i, sys in enumerate(labels):
axs[i].set_title(sys, fontsize=14)
nnodes = []
asplvals = []
for cutoff in [0.075, 0.05, 0.0375, 0.025, 0.0125, 0.0075, 0.0050, 0.0025, 0.00125]:
pname = join("Cache-BO", sys+"-"+str(cutoff)+"-0.01-avg-graph.pickle")
with open(pname, "rb") as ifile:
G = pickle.load(ifile)
nnodes.append(G.number_of_nodes())
try:
asplvals.append(average_shortest_path_length(G))
except:
sub_graphs = connected_components(G)
aspl = 0
count = 0
for sub in sub_graphs:
aspl += average_shortest_path_length(G.subgraph(sub))
count += 1
aspl /= count
asplvals.append(aspl)
axs[i].plot(nnodes, asplvals, markers[sys], linestyle='--', color=colors[sys], markersize=10)
axs[i].axvline(nnodes[3], color='k', linestyle='--', label="cutoff = 0.05")
axs[i].set_xscale("log", basex=2)
fig.text(0.0, 0.5, 'Average Shortest Path Length', va='center', rotation='vertical', fontsize=12)
axs[2].set_xlabel("Number of Fragments", fontsize=12)
axs[3].set_xlabel("Number of Fragments", fontsize=12)
fig.tight_layout()
plt.savefig(join("Figures", "aspvspurity.eps"))
Next, we will do some comparisons of fragmentations. First, we will look at the fragmentations generated by the FU Program. This will allow us to break down the fragment by amino acid.
from BigDFT import Fragments as F
from BigDFT import PostProcessing as PP
from BigDFT import Logfiles as L
This subroutine uses a BDA file to fragment a system.
def bda_frag(bda_file, sys):
fraginfo = []
with open(bda_file, "r") as ifile:
rangeval = []
fragnames = []
for line in ifile:
if "FRAGMENT" not in line:
continue
rangestr = line.split("]")[0]
rangestr = rangestr.split("[")[1]
rangestr = rangestr.split(",")
rangeval.append([])
fragnames.append(line.split()[2])
for val in rangestr:
if '-' in val:
left, right = val.split('-')
rangeval[-1].extend(range(int(left), int(right)+1))
else:
rangeval[-1].extend([int(val)])
resys = F.System()
for fid, frag in zip(fragnames, rangeval):
fstr = fid[:3] + ":" + fid[3:]
resys[fstr] = F.Fragment()
for val in frag:
resys[fstr] += sys["ATOM:"+str(val-1)]
return resys
First we will analyze the 1CRN system. The BDA file was generated with the following PDB input file.
from BigDFT import Atom
crn_sys = F.System()
i = 0
with open(join("Geometries", "1CRN.pdb")) as ifile:
for line in ifile:
if "ATOM" not in line:
continue
split = line.split()
sym = split[-2]
pos = [float(x) for x in split[5:8]]
at = Atom.Atom({sym: pos, "units": "angstroem"})
crn_sys["ATOM:"+str(i)] = F.Fragment([at])
i = i + 1
Once the pdb is read in we can now fragment the system.
crn_resys = bda_frag(join("BDA", "1CRN.bda"), crn_sys)
Using this new fragmentation, we can compute the purity values.
btool = PP.BigDFTool()
crn_log = L.Logfile(join("Output", "log-1CRN.yaml"))
pv = btool.run_compute_purity(crn_resys, crn_log)
And plot them.
fig, axs = plt.subplots(1,1,figsize=(8,4))
F.plot_fragment_information(axs, {x: (crn_resys[x].purity_indicator) for x in crn_resys})
axs.axhline(-0.05)
axs.set_ylabel("Purity Indicator", fontsize=12)
fig.tight_layout()
fig.savefig(join("Figures", "fu-crn.eps"), bbox_inches='tight')
The fragments here are all pure. It might be interesting to modify our definition of purity though, and see about combining these fragments.
df = []
for cut in [0.05, 0.025, 0.01, 0.005, 0.001]:
tempsys = btool.auto_fragment(crn_resys, crn_log, cut, criteria="bondorder")
df.append([cut, len(tempsys)])
display(pd.DataFrame(df, columns=["Cutoff", "Number of Fragments"]))
Returning to the original fragmentation, we can plot the purity values and fragment sizes in comparison with the autofragmentation procedure.
pname = join("Cache-BO", "1CRN-0.05.pickle")
with open(pname, "rb") as ifile:
crn_auto_05 = pickle.load(ifile)
pname = join("Cache-BO", "1CRN-0.0375.pickle")
with open(pname, "rb") as ifile:
crn_auto_0375 = pickle.load(ifile)
fig, axs = plt.subplots(2,1,figsize=(6,5))
axs[0].plot(sorted([len(x) for x in crn_auto_05.values()]), 'x--', label="Auto (0.05)")
axs[0].plot(sorted([len(x) for x in crn_auto_0375.values()]), 'p--', label="Auto (0.0375)")
axs[0].plot(sorted([len(x) for x in crn_resys.values()]), 'd--', label="FU Fragment")
axs[1].plot(sorted([x.purity_indicator for x in crn_auto_05.values()]), 'x--', label="Auto (0.05)")
axs[1].plot(sorted([x.purity_indicator for x in crn_auto_0375.values()]), 'p--', label="Auto (0.0375)")
axs[1].plot(sorted([x.purity_indicator for x in crn_resys.values()]), 'd--', label="FU Fragment")
axs[1].axhline(-0.05, color='k', linestyle='--')
axs[0].set_ylabel("Fragment Size", fontsize=12)
axs[1].set_ylabel("Purity Value", fontsize=12)
axs[1].set_xlabel("Fragment ID", fontsize=12)
axs[0].legend(bbox_to_anchor=(1, 1.3), ncol=3)
Next we will look at the Laccase enzyme. This required more manual input to the FU program to handle the copper atoms.
try:
from BigDFT.FragmentIO import XYZReader
except ImportError:
from BigDFT.XYZ import XYZReader
laccase_sys = System()
with XYZReader(join("Geometries", "Laccase.xyz")) as ifile:
for i, line in enumerate(ifile):
laccase_sys["ATOM:"+str(i)] = F.Fragment([line])
laccase_resys = bda_frag(join("BDA", "Laccase.bda"), laccase_sys)
laccase_log = L.Logfile(join("Output", "log-Laccase.yaml"))
pv = btool.run_compute_purity(laccase_resys, laccase_log)
We can plot the purity of those values.
fig, axs = plt.subplots(1,1,figsize=(12,4))
F.plot_fragment_information(axs, {x: laccase_resys[x].purity_indicator for x in laccase_resys})
axs.axhline(-0.05)
axs.set_ylabel("Purity Indicator", fontsize=12)
axs.set_xticklabels([''])
fig.savefig(join("Viz-BO", "LaccasePurity.png"))
We see in this case that our choice of fragments for this system was overall good. The copper atoms were combined with a sufficient number of surrounding amino acids achieve the purity condition. One fragment remains an outlier, suggesting that some further modelling should be done on this system.
from copy import deepcopy
df = []
df.append(["Amino Acid", len(laccase_resys)])
tempsys = deepcopy(laccase_resys)
kxs = None
for cut in [0.05, 0.025, 0.01, 0.005, 0.001]:
pname = join("Cache-BO", "Laccase-Amino-"+str(cut)+".pickle")
try:
with open(pname, "rb") as ifile:
tempsys = pickle.load(ifile)
except:
tempsys = btool.auto_fragment(tempsys, laccase_log, cut, kxs=kxs, criteria="bondorder")
with open(pname, "wb") as ofile:
pickle.dump(tempsys, ofile)
df.append([cut, len(tempsys)])
display(pd.DataFrame(df, columns=["Cutoff", "Number of Fragments"]))
pname = join("Cache-BO", "Laccase-0.05.pickle")
with open(pname, "rb") as ifile:
laccase_auto_05 = pickle.load(ifile)
pname = join("Cache-BO", "Laccase-0.0375.pickle")
with open(pname, "rb") as ifile:
laccase_auto_0375 = pickle.load(ifile)
As with 1CRN, we will compare these fragmentations in terms of size and purity.
fig, axs = plt.subplots(2,1,figsize=(6,5))
axs[0].plot(sorted([len(x) for x in laccase_auto_05.values()]), 'x--', label="Auto (0.05)")
axs[0].plot(sorted([len(x) for x in laccase_auto_0375.values()]), 'p--', label="Auto (0.0375)")
axs[0].plot(sorted([len(x) for x in laccase_resys.values()]), 'd--', label="FU Fragment")
axs[1].plot(sorted([x.purity_indicator for x in laccase_auto_05.values()]), 'x--', label="Auto (0.05)")
axs[1].plot(sorted([x.purity_indicator for x in laccase_auto_0375.values()]), 'p--', label="Auto (0.0375)")
axs[1].plot(sorted([x.purity_indicator for x in laccase_resys.values()]), 'd--', label="FU Fragment")
axs[1].axhline(-0.05, color='k', linestyle='--')
axs[0].set_ylabel("Fragment Size", fontsize=12)
axs[1].set_ylabel("Purity Value", fontsize=12)
axs[1].set_xlabel("Fragment ID", fontsize=12)
axs[0].legend(bbox_to_anchor=(1, 1.3), ncol=3)
This chart will summarize the distribution of fragment sizes in the various systems.
size_dist = {}
for sys in labels:
pname = join("Cache-BO", sys+"-"+str(0.05)+".pickle")
with open(pname, "rb") as ifile:
varfrag = pickle.load(ifile)
size_dist[sys] = sorted([len(x) for x in varfrag.values()])
fig, axs2d = plt.subplots(2,2,figsize=(6,5))
axs = []
axs.append(axs2d[0,0])
axs.append(axs2d[1,0])
axs.append(axs2d[0,1])
axs.append(axs2d[1,1])
for i, sys in enumerate(labels):
axs[i].plot(size_dist[sys], label=sys + " - Auto", marker=markers[sys],
linestyle='--', markersize=7, color=colors[sys])
axs[0].plot(sorted([len(x) for x in crn_resys.values()]), label="CRN - FU",
marker=markers["1CRN-FU"], color=colors["1CRN-FU"], linestyle='--', markersize=7)
axs[1].plot(sorted([len(x) for x in laccase_resys.values()]), label="Laccase - FU",
marker=markers["Laccase-FU"], color=colors["Laccase-FU"], linestyle='--', markersize=7)
axs[0].set_ylabel("Fragment Size", fontsize=12)
axs[1].set_ylabel("Fragment Size", fontsize=12)
axs[1].set_xlabel("Fragment Index", fontsize=12)
axs[3].set_xlabel("Fragment Index", fontsize=12)
for i in range(0, 4):
axs[i].legend()
fig.savefig(join("Figures", "size-summary.eps"), bbox_inches='tight')
The last thing we will try to do is to compare the fragments that exist in 1CRN with the fragments in the Laccase system. The comparison will rely on the fp2 fingerprint representation of each fragment.
from BigDFT import BabelInterop as BI
crn_kxs = btool.get_matrix_kxs(crn_log)
crn_fp = {}
for fragid, frag in crn_auto_05.items():
btool.set_fragment_connectivity(frag, crn_log, crn_kxs)
crn_fp[fragid] = BI.compute_fragment_fingerprint(frag)
crn_fu_fp = {}
for fragid, frag in crn_resys.items():
btool.set_fragment_connectivity(frag, crn_log, crn_kxs)
crn_fu_fp[fragid] = BI.compute_fragment_fingerprint(frag)
laccase_kxs = btool.get_matrix_kxs(laccase_log)
laccase_fp = {}
for fragid, frag in laccase_auto_05.items():
btool.set_fragment_connectivity(frag, laccase_log, laccase_kxs)
laccase_fp[fragid] = BI.compute_fragment_fingerprint(frag)
laccase_fu_fp = {}
for fragid, frag in laccase_resys.items():
btool.set_fragment_connectivity(frag, laccase_log, laccase_kxs)
laccase_fu_fp[fragid] = BI.compute_fragment_fingerprint(frag)
We can then compare these representations to compute the Tanimoto coefficient. First, we do the comparison with the autofragments.
from numpy import zeros
simmat = zeros((len(crn_fp), len(laccase_fp)))
for i, frag1 in enumerate(crn_fp):
for j, frag2 in enumerate(laccase_fp):
simmat[i,j] = crn_fp[frag1] | laccase_fp[frag2]
mv_laccase_crn = {}
for i, key in enumerate(laccase_fp):
mv_laccase_crn[key] = max(simmat[:,i])
Next a comparison of the fragments generated by the FU program.
from numpy import zeros
simmat = zeros((len(crn_fu_fp), len(laccase_fu_fp)))
for i, frag1 in enumerate(crn_fu_fp):
for j, frag2 in enumerate(laccase_fu_fp):
simmat[i,j] = crn_fu_fp[frag1] | laccase_fu_fp[frag2]
mv_laccase_crn_fu = {}
for i, key in enumerate(laccase_fu_fp):
mv_laccase_crn_fu[key] = max(simmat[:,i])
It might also be interesting simply to compare the FU fragments for 1CRN with the auto fragments for 1CRN.
from numpy import zeros
simmat = zeros((len(crn_fu_fp), len(crn_fp)))
for i, frag1 in enumerate(crn_fu_fp):
for j, frag2 in enumerate(crn_fp):
simmat[i,j] = crn_fu_fp[frag1] | crn_fp[frag2]
mv_crn_crn = {}
for i, key in enumerate(crn_fp):
mv_crn_crn[key] = max(simmat[:,i])
Finally, we can generate a histogram showing the distribution of Tanimoto coefficients.
fig, axs = plt.subplots(1,1,figsize=(6,4))
axs.hist([list(mv_laccase_crn.values()), list(mv_laccase_crn_fu.values()), list(mv_crn_crn.values())],
bins=10, label=["Auto 1CRN - Auto Laccase", "Amino Acid 1CRN - Amino Acid Laccase", "Auto 1CRN - Amino Acid 1CRN"],
log=True, histtype='bar')
axs.set_xlabel("Tanimoto Coefficient", fontsize=12)
axs.legend()
fig.savefig(join("Figures", "comparison.eps"))
In this section, we will generate an image to be used as a graphical abstract picture. We will draw a picture of the connectivity graph of the 1CRN protein using the amino acid fragmentation. First, we compute the connectivity.
def graph_bond(sys, threshold, pairwise_bo):
from numpy import zeros
mat = zeros((len(sys),len(sys)))
for i, fragid1 in enumerate(sys):
spilldict = pairwise_bo[fragid1]
ifrag = [(x, y) for x,y in enumerate(spilldict)]
sorted_ifrag = sorted(ifrag, key=lambda x: spilldict[x[1]], reverse=True)
remainder = sum(spilldict.values())
for j, frag2 in sorted_ifrag:
mat[i,j] = 1
remainder -= spilldict[frag2]
if remainder < threshold:
break
return mat
pairwise_bo = btool.fragment_bond_order(crn_resys, crn_resys.keys(), crn_resys.keys(), crn_log)
from networkx import from_numpy_matrix
bondmat = graph_bond(crn_resys, 0.1, pairwise_bo)
Next we setup the colors and labels.
colordict = {}
colordict["ALA"] = (0.0, 0.0, 1.0)
colordict["ARG"] = (0.9, 0.9, 0.9)
colordict["ASN"] = (0.5, 0.5, 0.2)
colordict["CYS"] = (1, 1, 0)
colordict["GLU"] = (1, 0.6, 0.6)
colordict["GLY"] = (0.9, 0.9, 0.9)
colordict["ILE"] = (0, 1, 0)
colordict["LEU"] = (1, 0.4, 0.8)
colordict["PHE"] = (0.25, 0.75, 0.75)
colordict["PRO"] = (0.5, 0.5, 0.75)
colordict["SER"] = (1, 0.8, 0.2)
colordict["THR"] = (0.7, 0.6, 0.7)
colordict["TYR"] = (0.1, 0.8, 0.1)
colordict["VAL"] = (0.75, 0.5, 0.2)
colorlist = [0 for x in crn_resys]
labels = {}
for key in crn_resys:
split = key.split(":")
num = int(split[1]) - 1
labels[num] = split[0]
colorlist[num] = colordict[split[0]]
Finally we draw.
from networkx import draw_kamada_kawai
draw_kamada_kawai(from_numpy_matrix(bondmat), labels=labels, node_size=800, font_size=10,
font_family='serif', linewidths=4, font_weight='bold', node_color=colorlist, alpha=0.95,
style='dashed')
plt.savefig(join("Figures", "cover.eps"))
from copy import deepcopy
syslist = {}
varfrag = deepcopy(crn_resys)
for cutoff in [0.05, 0.025, 0.01, 0.005]:
varfrag = btool.auto_fragment(system=varfrag, cutoff=cutoff,
log=crn_log,criteria="bondorder")
syslist[cutoff] = varfrag
mats = {}
for cutoff in [0.05, 0.025, 0.01, 0.005]:
sys = syslist[cutoff]
pairwise_bo = btool.fragment_bond_order(sys, sys.keys(), sys.keys(), crn_log)
mats[cutoff] = graph_bond(sys, 0.01, pairwise_bo)
fig, axs4 = plt.subplots(2,2, figsize=(6,5))
axs = []
axs.append(axs4[0,0])
axs.append(axs4[0,1])
axs.append(axs4[1,0])
axs.append(axs4[1,1])
for i, cut in enumerate([0.05, 0.025, 0.01, 0.005]):
axs[i].set_title("Cutoff = -"+str(cut), fontsize=14)
draw_kamada_kawai(from_numpy_matrix(mats[cut]), node_size=100, font_size=8,
font_family='serif', linewidths=4, font_weight='bold',
style='dashed', ax=axs[i])
fig.tight_layout()
plt.savefig(join("Figures", "refine-amino-acids.eps"))