Skip to content
Snippets Groups Projects

Kubernetes

Merged Vladimír Višňovský requested to merge kubernetes into k8s-development
1 file
+ 1
0
Compare changes
  • Side-by-side
  • Inline
+ 1
0
%% Cell type:markdown id: tags:
## 1. Definition of computational parameters
%% Cell type:code id: tags:
``` python
import os
import sys
import shutil
import re
import math
import time
import subprocess
import nglview as nv
import pytraj as pt
#insert path to python scripts
sys.path.insert(1, os.environ['BASE'] + "/modules")
sys.path.insert(1, os.environ['BASE'] + "/modules/gmx_orca")
from draw_2d import *
from draw_3d import drawit
from tqdm.notebook import tqdm
from molvs import Standardizer
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import SDWriter
from rdkit.Chem import AllChem
from rdkit.Chem import MolFromSmarts
from rdkit.Chem import rdmolops
from rdkit.Chem import rdchem
from convert import convert_to_orca_methods
from plot_graph import plot_landmarks
from gmx_orca_run import gmx_run, orca_run, parallel_wait
from replace import replace_text_for_embedding
```
%% Cell type:code id: tags:
``` python
#number of configurations to generate in basic energy minimization
numc = 50
#path to applications
orca_job_check = "/home/base/modules/orcajobcheck.py"
xyz_to_mol = "/home/base/modules/xyz2mol/xyz2mol.py"
```
%% Cell type:markdown id: tags:
## 2. Molecule shape processing
%% Cell type:code id: tags:
``` python
#specify input of desired molecule in SMILES
smiles_molecule = "CC1=C(C=C(C=C1)NC(=O)C2=CC=C(C=C2)C[NH+]3CCN(CC3)C)NC4=NC=CC(=N4)C5=CN=CC=C5"
molecule = Chem.MolFromSmiles(smiles_molecule)
render_svg(moltosvg(molecule))
```
%% Cell type:code id: tags:
``` python
with open("molekula.smi", "w") as smifile:
smifile.write(smiles_molecule)
s = Standardizer()
molecule = s.standardize(molecule)
molecule = Chem.AddHs(molecule)
natoms = molecule.GetNumAtoms()
charge = Chem.rdmolops.GetFormalCharge(molecule)
render_svg(moltosvg(molecule))
```
%% Cell type:code id: tags:
``` python
#set lowest energy configuration
#set default id of minimal energy conformer to -1
minid = -1
#set default minimal energy to highest possible float number
minene = sys.float_info.max
AllChem.EmbedMultipleConfs(molecule, clearConfs=True, numConfs=numc)
#run 'MMFF94 force field' energy evaluation
done = AllChem.MMFFOptimizeMoleculeConfs(molecule)
for i in range(len(done)):
if done[i][1]<minene:
minene = done[i][1]
minid = i
print(f'Minimal energy: {minene}')
writer = SDWriter("molekula.mol")
writer.write(molecule, confId=minid)
drawit(molecule)
```
%% Cell type:code id: tags:
``` python
#set pattern to detect torsion angles
RotatableBond = Chem.MolFromSmarts('[!$([NH]!@C(=O))&!D1&!$(*#*)&!$([C;H3])&!$([O;H1])&!$([N;H3])]-&!@[!$([NH]!@C(=O))&!D1&!$(*#*)&!$([C;H3])&!$([O;H1])&!$([N;H3])]')
rotatables = molecule.GetSubstructMatches(RotatableBond)
print(rotatables)
```
%% Cell type:code id: tags:
``` python
#get numbers of atoms that form torsion angles
torsions = []
for rotatable in rotatables:
pairs1 = []
pairs2 = []
for bond in molecule.GetBonds():
if rotatable[0]==bond.GetBeginAtomIdx() and rotatable[1]!=bond.GetEndAtomIdx():
pairs1.append([bond.GetBeginAtomIdx(),bond.GetEndAtomIdx()])
if rotatable[1]==bond.GetBeginAtomIdx() and rotatable[0]!=bond.GetEndAtomIdx():
pairs2.append([bond.GetBeginAtomIdx(),bond.GetEndAtomIdx()])
torsions.append([pairs1[0][1],pairs1[0][0],pairs2[0][0],pairs2[0][1]])
print(torsions)
```
%% Cell type:markdown id: tags:
## 3. Preparation of environment and molecule
%% Cell type:code id: tags:
``` python
# prepare input files for energetic minimization
netcharge = rdmolops.GetFormalCharge(molecule)
!antechamber -i molekula.mol -fi mdl -o molekula.prepi -fo prepi -c bcc -nc {netcharge} && \
parmchk2 -i molekula.prepi -f prepi -o molekula.frcmod && \
tleap -f tleapin.txt && \
acpype -p molekula.prmtop -x molekula.inpcrd
```
%% Cell type:code id: tags:
``` python
#fix ordering of atoms
order_before = []
order_after = []
with open('sqm.pdb','r') as pdbfile:
for atom in pdbfile.readlines():
order_before.append(atom.split()[2])
with open('MOL_GMX.gro','r') as grofile:
for atom in grofile.readlines():
if atom.startswith(' 1 MOL'):
order_after.append(atom.split()[2])
torsions_new = []
torsion_new = []
for torsion in torsions:
for i in torsion:
torsion_new.append(order_after.index(order_before[i])+1)
torsions_new.append(torsion_new)
torsion_new = []
torsions = torsions_new
print(torsions)
```
%% Cell type:code id: tags:
``` python
!mkdir -p em
with open("em/em.mdp", "w") as emfile:
emfile.write("integrator = steep\n")
emfile.write("nsteps = 100000\n")
emfile.write("emtol = 0\n")
emfile.write("emstep = 0.1\n")
emfile.write("nstcomm = 1\n")
emfile.write("nstxout = 100\n")
emfile.write("nstvout = 100\n")
emfile.write("nstfout = 0\n")
emfile.write("nstlog = 100\n")
emfile.write("nstenergy = 100\n")
emfile.write("nstlist = 1\n")
emfile.write("ns_type = grid\n")
emfile.write("coulombtype = cut-off\n")
emfile.write("rlist = 1.4\n")
emfile.write("rcoulomb = 1.4\n")
emfile.write("rvdw = 1.4\n")
emfile.write("energygrps = System\n")
emfile.write("epsilon-r = 80\n")
emfile.write("\n")
shutil.copy("MOL_GMX.gro", "em/")
shutil.copy("MOL_GMX.top", "em/")
```
%% Cell type:code id: tags:
``` python
gmx_run("editconf -f MOL_GMX -o box -c -box 3 3 3", workdir="em")
gmx_run("grompp -f em.mdp -c box -p MOL_GMX -o em1", workdir="em")
gmx_run("mdrun -deffnm em1 -ntomp 2", workdir="em")
```
%% Cell type:code id: tags:
``` python
#prepare files for getting the molecule to dynamic state
!mkdir -p md
with open("md/md.mdp", "w") as mdfile:
mdfile.write("integrator = sd\n")
mdfile.write("nsteps = 100000\n")
mdfile.write("dt = 0.001\n")
mdfile.write("nstxout = 1000\n")
mdfile.write("nstvout = 1000\n")
mdfile.write("nstenergy = 1000\n")
mdfile.write("nstlog = 1000\n")
mdfile.write("continuation = no\n")
mdfile.write("constraints = none\n")
mdfile.write("cutoff-scheme = Verlet\n")
mdfile.write("ns_type = grid\n")
mdfile.write("nstlist = 1\n")
mdfile.write("rlist = 1.4\n")
mdfile.write("rcoulomb = 1.4\n")
mdfile.write("rvdw = 1.4\n")
mdfile.write("coulombtype = cut-off\n")
mdfile.write("tcoupl = V-rescale\n")
mdfile.write("tc-grps = system\n")
mdfile.write("tau_t = 0.1\n")
mdfile.write("ref_t = 300\n")
mdfile.write("pcoupl = no\n")
mdfile.write("pbc = xyz\n")
mdfile.write("gen_vel = yes\n")
mdfile.write("epsilon-r = 80\n")
mdfile.write("\n")
shutil.copy("em/em1.gro", "md/")
shutil.copy("MOL_GMX.top", "md/")
```
%% Cell type:code id: tags:
``` python
gmx_run("grompp -f md.mdp -c em1 -p MOL_GMX -o md1", workdir="md")
gmx_run("mdrun -deffnm md1 -ntomp 2", workdir="md")
```
%% Cell type:code id: tags:
``` python
#show trajectory of conformations
#select group for trjconv evaluation
#Group 0 ( System)
#Group 1 ( Other)
#Group 2 ( MOL)
group = "0"
gmx_run("trjconv -s md1.tpr -f md1.trr -o outTraj.pdb", workdir="md", groups=group)
traj = pt.load('md/outTraj.pdb')
view = nv.show_pytraj(traj)
view
```
%% Cell type:code id: tags:
``` python
#fix periodic boundaries errors when the molecule jumps out of the box
#select group for trjconv evaluation
#Group 0 ( System)
#Group 1 ( Other)
#Group 2 ( MOL)
group = "1"
for i in tqdm(range(len(torsions))):
fr = str(float(100-len(torsions)+i)-0.01)
to = str(float(100-len(torsions)+i)+0.01)
gmx_run(f"trjconv -pbc nojump -s md1 -f md1 -o frame{i}.gro -b {fr} -e {to}<<EOF\n0\nEOF", workdir="md", groups=group)
```
%% Cell type:markdown id: tags:
## 4. Generation of representative configurations
### 4.1 Trajectory generation
%% Cell type:code id: tags:
``` python
!mkdir -p mtd
with open("mtd/mtd.mdp", "w") as mtdfile:
mtdfile.write("integrator = sd\n")
mtdfile.write("nsteps = 10000000\n") #10 ns each replica
mtdfile.write("dt = 0.001\n")
mtdfile.write("nstxout = 10000\n") # output coordinates every 10ps to *.trr
mtdfile.write("nstvout = 10000\n") # output velocities every 10ps to *.trr
mtdfile.write("nstenergy = 1000\n")
mtdfile.write("nstlog = 1000\n")
mtdfile.write("continuation = no\n")
mtdfile.write("constraints = none\n")
mtdfile.write("cutoff-scheme = Verlet\n")
mtdfile.write("ns_type = grid\n")
mtdfile.write("nstlist = 1\n")
mtdfile.write("rlist = 1.4\n")
mtdfile.write("rcoulomb = 1.4\n")
mtdfile.write("rvdw = 1.4\n")
mtdfile.write("coulombtype = cut-off\n")
mtdfile.write("tcoupl = V-rescale\n")
mtdfile.write("tc-grps = system\n")
mtdfile.write("tau_t = 0.1\n")
mtdfile.write("ref_t = 300\n")
mtdfile.write("pcoupl = no\n")
mtdfile.write("pbc = xyz\n")
mtdfile.write("gen_vel = yes\n")
mtdfile.write("epsilon-r = 80\n")
mtdfile.write("\n")
```
%% Cell type:code id: tags:
``` python
for i in range(len(torsions)):
if not os.path.exists("mtd/w{}".format(i)):
!mkdir -p mtd/w{i}
with open("mtd/w{}/plumed.dat".format(i), "w") as plumeddat:
plumeddat.write("RANDOM_EXCHANGES\n")
plumeddat.write("WHOLEMOLECULES ENTITY0=1-{}\n".format(natoms))
for j in range(len(torsions)):
plumeddat.write("TORSION ATOMS={},{},{},{} LABEL=cv{}\n".format(torsions[j][0],torsions[j][1],torsions[j][2],torsions[j][3],j+1))
plumeddat.write("METAD ARG=cv{} HEIGHT=0.5 SIGMA=0.3 PACE=1000 GRID_MIN=-pi GRID_MAX=pi BIASFACTOR=15 LABEL=be\n".format(i+1))
cvs = ""
for j in range(len(torsions)):
cvs=cvs+"cv{},".format(j+1)
cvs = cvs[:-1]
plumeddat.write("PRINT ARG={} STRIDE=1000 FILE=COLVAR\n".format(cvs))
plumeddat.write("PRINT ARG=be.bias STRIDE=1000 FILE=BIAS\n")
shutil.copy("MOL_GMX.top", "mtd/")
```
%% Cell type:code id: tags:
``` python
for i in tqdm(range(len(torsions))):
shutil.copy("md/frame{}.gro".format(i), "mtd/w{}/".format(i))
gmx_run(f"grompp -f mtd.mdp -c w{i}/frame{i} -p MOL_GMX -o w{i}/mtd1", workdir="mtd", parallel=True)
parallel_wait()
```
%% Cell type:code id: tags:
``` python
directories = ""
for i in range(len(torsions)):
directories = directories + "w{} ".format(i)
#see mdrunlog in mtd directory for insight to mdrun simulation (e.g 'tail -f mdrunlog')
gmx_run(f"mdrun -g mdrunlog -ntomp 1 -deffnm mtd1 -replex 500 -plumed plumed.dat -multidir {directories}",
workdir="mtd",
mpi_run=len(torsions))
```
%% Cell type:markdown id: tags:
### 4.2 Configurations clustering
%% Cell type:code id: tags:
``` python
#select groups for cluster evaluation
#Group 0 ( System)
#Group 1 ( Other)
#Group 2 ( MOL)
#Group 3 ( Custom)
groups = "30"
!mkdir -p -m 757 clustering ; mkdir -p -m 757 clustering/outClustersPDB ; mkdir -p -m 757 clustering/outClustersXYZ ; mkdir -p -m 757 clustering/outClusters
trajectories = ""
for i in range(len(torsions)):
trajectories = trajectories + f"mtd/w{i}/mtd1.trr "
#concatinate trajectories
gmx_run(f"trjcat -f {trajectories} -cat -o mtd/mtd1.trr")
#make index file with non-hydrogen atoms, use 1 & ! a H* and q for input
gmx_run("make_ndx -f md/md1.tpr -o mtd/index.ndx", make_ndx="1&!aH*")
#include index file and select index group 3
gmx_run("cluster -method gromos -f mtd/mtd1.trr -s mtd/w0/mtd1.tpr -n mtd/index.ndx -cutoff 0.15 -cl clustering/outCluster.pdb",
groups=groups)
#divide all clusters from gmx cluster to single clusters
with open("clustering/outCluster.pdb") as input_cluster:
i = 0
outFile = open("clustering/outClustersPDB/outCluster{}.pdb".format(i), "w")
for line in input_cluster:
if line != "ENDMDL\n":
outFile.write(line)
continue
outFile.write("ENDMDL\n")
i += 1
outFile.close()
outFile = open("clustering/outClustersPDB/outCluster{}.pdb".format(i), "w")
!rm clustering/outClustersPDB/outCluster{i}.pdb
clusters_ids = [x for x in range(0, i)]
#convert .pdb clusters to .xyz
for pdb_cluster in tqdm(os.listdir("clustering/outClustersPDB/")):
!babel -ipdb clustering/outClustersPDB/{pdb_cluster} -oxyz clustering/outClustersXYZ/{pdb_cluster.replace("pdb", "xyz")}
```
%% Cell type:markdown id: tags:
### 4.3 Visualize landmarks
%% Cell type:code id: tags:
``` python
#apply periodic boundary conditions to mtd trajectory -> fitting -> no Hydrogen -> train parmtSNEcv -> compute embeddings
!mkdir -p visualization/traj
#Group 0 ( System)
#Group 1 ( Other)
#Group 2 ( MOL)
#Group 3 ( Custom)
#select group for periodic boundaries check output:
group = "0"
gmx_run("trjconv -f mtd/mtd1.trr -s mtd/w0/mtd1.tpr -pbc mol -o visualization/traj/mtd1_nopbc.xtc", groups=group)
#select groups for fitting and output:
groups = "00"
gmx_run("trjconv -f visualization/traj/mtd1_nopbc.xtc -s clustering/outClustersPDB/outCluster0.pdb -fit rot+trans -o visualization/traj/mtd1_fit.xtc",
groups=groups)
#select group for no Hydrogen output:
group = "3"
gmx_run("trjconv -f visualization/traj/mtd1_fit.xtc -n mtd/index.ndx -o visualization/traj/mtd1_fit_noH.xtc",
groups=group)
#select groups for size of box and output:
groups = "33"
gmx_run("editconf -f clustering/outClustersPDB/outCluster0.pdb -n mtd/index.ndx -box 3 3 3 -c -o visualization/ref.pdb",
groups=groups)
#train parmtSNEcv
!parmtSNEcv -i visualization/traj/mtd1_fit_noH.xtc -p visualization/ref.pdb -boxx 3 -boxy 3 -boxz 3 -dim 2 -layers 2 -o visualization/out.txt -plumed visualization/plumed.dat -epochs 2000
#modify plumed.dat to compute embedding in every step and change name of file for *** mtd trajectory ***
replace_text_for_embedding("visualization/plumed.dat", stride=(100, 1), file_name=("COLVAR", "2d_embedding"))
#fix weights of atoms
replace_text_for_embedding("visualization/ref.pdb", weight=(0.00, 1.00))
#run as plumed
gmx_run("driver --plumed visualization/plumed.dat --mf_xtc visualization/traj/mtd1_fit.xtc")
#modify plumed.dat to compute embedding in every step and change name of file for *** representatives ***
replace_text_for_embedding("visualization/plumed.dat", file_name=("2d_embedding", "landmarks"))
#run as plumed
gmx_run("driver --plumed visualization/plumed.dat --mf_pdb clustering/outCluster.pdb")
```
%% Cell type:code id: tags:
``` python
#visualize
x = []
y = []
x1 = []
y1 = []
with open("2d_embedding", "r") as ifile:
for line in ifile.readlines()[1:]:
split_values = line.split()
x.append(split_values[1])
y.append(split_values[2])
with open("landmarks", "r") as ifile:
for line in ifile.readlines()[1:]:
split_values = line.split()
x1.append(split_values[1])
y1.append(split_values[2])
plot_landmarks(x, y, x1, y1).show()
```
%% Cell type:markdown id: tags:
## 5. Accurate and innacurate energy evaluation
### 5.1 AM1 method
%% Cell type:code id: tags:
``` python
#fix indexing
#orca indexes from zero
orca_torsions = [[] for _ in range(len(torsions))]
torsion_index = 0
for torsion in torsions:
for atom_index in torsion:
orca_torsions[torsion_index].append(atom_index - 1)
torsion_index += 1
```
%% Cell type:code id: tags:
``` python
#1. convert
#orca method description - first line in orca method
method_description = "!AM1 Opt"
input_dir = "clustering/outClustersXYZ/"
output_dir = "am1/input/"
spin = 1
!mkdir -p -m 757 am1 ; mkdir -p -m 757 am1/input ; mkdir -p -m 757 am1/output
!mkdir -p -m 757 orca_output ; mkdir -p -m 757 orca_output/am1 ; mkdir -p -m 757 orca_output/bp86svp ; mkdir -p -m 757 orca_output/bp86tzvp
convert_to_orca_methods(input_dir, output_dir, orca_torsions, method_description, -1, (charge, spin))
```
%% Cell type:code id: tags:
``` python
#2. compute
#minimisation of conformations before quantum mechanics
#see logs in orca_output directory...
input_path = "am1/input/"
output_path = "am1/output/"
successfuly_converged = 0
clusters_count = len(clusters_ids)
for orca_method in tqdm(os.listdir(input_path)):
if not orca_method.endswith(".inp"):
continue
orca_method_name = orca_method.replace(".inp", "")
orca_method_number = orca_method_name[len("outCluster"):]
output_dir_path = output_path + orca_method_name + "/"
if not os.path.exists(output_dir_path):
!mkdir -p -m 757 {output_dir_path}
shutil.copy(input_path + orca_method, output_dir_path)
orca_run(orca_method, f"orca_output/am1/orca_output{orca_method_number}.out", workdir=output_dir_path, parallel=True)
parallel_wait()
```
%% Cell type:code id: tags:
``` python
#3. check output
#check output of orca methods
for cluster_id in clusters_ids:
with open(f"orca_output/am1/orca_output{cluster_id}.out") as iorca_output:
iorca_output_text = iorca_output.read()
if "****ORCA TERMINATED NORMALLY****" not in iorca_output_text:
print(iorca_output_text)
raise SystemExit(f"Error in am1 method of cluster no. {cluster_id}. You should not continue computation!")
```
%% Cell type:markdown id: tags:
### 5.2 BP86 SVP method
%% Cell type:code id: tags:
``` python
#1. convert
#orca method description - first line in orca method
method_description = "!BP86 def2-SVP TightSCF Opt"
output_dir = "bp86svp/input/"
spin = 1
required_cpus = 16
#orca method description - first line in orca method
method_description = "!BP86 def2-SVP TightSCF Opt"
!mkdir -p -m 757 bp86svp ; mkdir -p -m 757 bp86svp/input ; mkdir -p -m 757 bp86svp/output
for i in clusters_ids:
input_dir = "am1/output/outCluster{}/".format(i)
convert_to_orca_methods(input_dir, output_dir, orca_torsions, method_description, required_cpus, (charge, spin))
```
%% Cell type:code id: tags:
``` python
#2. compute
#minimisation of conformations before quantum mechanics
#see logs in orca_output directory...
input_path = "bp86svp/input/"
output_path = "bp86svp/output/"
successfuly_converged = 0
clusters_count = len(clusters_ids)
for orca_method in tqdm(os.listdir(input_path)):
if not orca_method.endswith(".inp"):
continue
orca_method_name = orca_method.replace(".inp", "")
orca_method_number = orca_method_name[len("outCluster"):]
output_dir_path = output_path + orca_method_name + "/"
if not os.path.exists(output_dir_path):
!mkdir -p -m 757 {output_dir_path}
shutil.copy(input_path + orca_method, output_dir_path)
orca_run(orca_method, f"orca_output/bp86svp/orca_output{orca_method_number}.out", workdir=output_dir_path, parallel=True)
parallel_wait()
```
%% Cell type:code id: tags:
``` python
#3. check output && discard non converging clusters
for cluster_id in clusters_ids:
with open(f"orca_output/bp86svp/orca_output{cluster_id}.out") as iorca_output:
iorca_output_text = iorca_output.read()
if "****ORCA TERMINATED NORMALLY****" not in iorca_output_text:
!{orca_job_check} orca_output/bp86svp/orca_output{cluster_id}.out
clusters_ids.remove(int(orca_method_number))
else:
successfuly_converged += 1
print(f"** {successfuly_converged}/{clusters_count} successfuly converged - unconverged are not considered in next steps **")
```
%% Cell type:markdown id: tags:
### 5.3 BP86 TZVP method
%% Cell type:code id: tags:
``` python
#1. convert
#orca method description - first line in orca method
method_description = "!BP86 def2-TZVP TightSCF Opt"
output_dir = "bp86tzvp/input/"
spin = 1
required_cpus = 16
#orca method description - first line in orca method
method_description = "!BP86 def2-TZVP TightSCF Opt"
!mkdir -p -m 757 bp86tzvp ; mkdir -p -m 757 bp86tzvp/input ; mkdir -p -m 757 bp86tzvp/output
for i in clusters_ids:
input_dir = "bp86svp/output/outCluster{}/".format(i)
convert_to_orca_methods(input_dir, output_dir, orca_torsions, method_description, required_cpus, (charge, spin))
```
%% Cell type:code id: tags:
``` python
#2. compute
#run quantum mechanics
#see logs in orca_output directory...
input_path = "bp86tzvp/input/"
output_path = "bp86tzvp/output/"
successfuly_converged = 0
clusters_count = len(clusters_ids)
for orca_method in tqdm(os.listdir(input_path)):
if not orca_method.endswith(".inp"):
continue
orca_method_name = orca_method.replace(".inp", "")
orca_method_number = orca_method_name[len("outCluster"):]
output_dir_path = output_path + orca_method_name + "/"
if not os.path.exists(output_dir_path):
!mkdir -p -m 757 {output_dir_path}
shutil.copy(input_path + orca_method, output_dir_path)
orca_run(orca_method, f"orca_output/bp86tzvp/orca_output{orca_method_number}.out", workdir=output_dir_path, parallel=True)
parallel_wait()
```
%% Cell type:code id: tags:
``` python
#3. check output && discard non converging clusters
for cluster_id in clusters_ids:
with open(f"orca_output/bp86tzvp/orca_output{cluster_id}.out" as iorca_output:
iorca_output_text = iorca_output.read()
if "****ORCA TERMINATED NORMALLY****" not in iorca_output_text:
!{orca_job_check} orca_output/bp86tzvp/orca_output{cluster_id}.out
clusters_ids.remove(int(cluster_id))
else:
successfuly_converged += 1
print(f"** {successfuly_converged}/{clusters_count} successfuly converged - unconverged are not considered in next steps **")
```
%% Cell type:code id: tags:
``` python
#extract final energies from output of orca
with open("orca_energies.txt", "a") as ofile:
for orca_output in os.listdir("orca_output/bp86tzvp"):
with open("orca_output/bp86tzvp/" + orca_output) as iorca_output:
for line in reversed(list(iorca_output)):
energy_list = re.findall(r'(FINAL SINGLE POINT ENERGY)( +)(-?\d+\.\d+)', line)
if len(energy_list) > 0:
ofile.write(energy_list[0][2])
ofile.write('\n')
break
```
%% Cell type:code id: tags:
``` python
#convert from hartree to kJ/mol and write quantum mechanics energies to file
energies_in_hartree = []
with open("orca_energies.txt", "r") as ifile:
for line in ifile.readlines():
energies_in_hartree.append(float(line))
#constant to convert from hartree to kJ/mol
CONVERSION_CONST = 2625.499638
minimum = min(energies_in_hartree)
energies_in_kJ = []
for H in energies_in_hartree:
energies_in_kJ.append((H-minimum)*CONVERSION_CONST)
with open("orca_energies.txt", "w") as ofile:
for energy in energies_in_kJ:
ofile.write("{}\n".format(energy))
```
%% Cell type:code id: tags:
``` python
#copy atoms from conformation before QM and combine it with orca optimised conformations
output_dir = "pdb_opt/"
input_dir = "bp86tzvp/output/outCluster"
!mkdir -p {output_dir}
atoms = []
with open("clustering/outClustersPDB/outCluster0.pdb", "r") as ifile:
for line in ifile.readlines():
if "ATOM" in line:
atoms.append(line[:26])
for i in clusters_ids:
hetatms = []
!babel -ixyz {input_dir}{i}/outCluster{i}.xyz -opdb {output_dir}temp_cluster_{i}.pdb
with open("{}temp_cluster_{}.pdb".format(output_dir, i), "r") as ifile:
for line in ifile.readlines():
if "HETATM" in line:
hetatms.append(line[27:66])
with open("pdb_opt/cluster{}.pdb".format(i), "w") as output_cluster:
for j in range(len(atoms)):
output_cluster.write(atoms[j])
output_cluster.write(hetatms[j] + "\n")
!rm {output_dir}/temp_*
```
%% Cell type:code id: tags:
``` python
input_dir = "pdb_opt/"
output_file = "clusters.pdb"
#concatinate optimised conformations to trajectory
with open(output_file, "w") as ofile:
for i in clusters_ids:
ofile.write("MODEL {}\n".format(str(i)))
with open("{}cluster{}.pdb".format(input_dir, i), "r") as ifile:
ofile.write(ifile.read())
ofile.write("ENDMDL\n")
with open("plumed.dat", "w") as ifile:
cvs = []
num_atoms = sum(1 for line in open("{}cluster{}.pdb".format(input_dir, clusters_ids[0])))
ifile.write("WHOLEMOLECULES ENTITY0=1-{}\n".format(str(num_atoms)))
for i in range(0, len(torsions)):
cvs.append("cv{}".format(i))
ifile.write("TORSION ATOMS=")
ifile.write(",".join(str(x) for x in torsions[i]))
ifile.write(" LABEL={}\n".format(cvs[i]))
ifile.write("PRINT ARG=")
ifile.write(",".join(cvs))
ifile.write(" STRIDE=1 FILE=DIHEDRALS")
#compute dihedrals
gmx_run(f"driver --plumed plumed.dat --mf_pdb {output_file}")
lines = []
with open("DIHEDRALS", "r") as ifile:
for line in ifile.readlines():
if "#" not in line:
lines.append(line)
with open("DIHEDRALS", "w") as ofile:
for line in lines:
ofile.write(line)
```
%% Cell type:code id: tags:
``` python
#perform minimisations and compute GAFF energy
cvs = [[] for _ in range(len(clusters_ids))]
with open("DIHEDRALS","r") as ifile:
dihedrals = ifile.readlines()
for i in range(len(dihedrals)):
t_angles = dihedrals[i].split()
for j in range(len(torsions)):
cvs[i].append(float(t_angles[j+1])*(180/math.pi))
def generate_restraint(cluster_i):
with open("MOL_GMX.top", "r") as ifile, open("gaff/cluster_{}/restrained.top".format(str(cluster_i)), "w") as ofile:
for line in ifile.readlines():
if line == "; Ligand position restraints\n":
ofile.write("\n")
ofile.write("[ dihedral_restraints ]\n")
for j in range(len(torsions)):
ofile.write(" ".join(torsions[j]))
ofile.write("2 %3.1f 0 500\n" %cvs[i][j])
ofile.write(line)
!mkdir -p -m 757 gaff
#select groups for energy evaluation
groups = "10"
for i in tqdm(clusters_ids):
!mkdir -p -m 757 gaff/cluster_{i}
shutil.copy("pdb_opt/cluster{}.pdb".format(i), "gaff/cluster_{}".format(i))
shutil.copy("MOL_GMX.top", "gaff/cluster_{}".format(i))
shutil.copy("em.mdp", "gaff/cluster_{}".format(i))
shutil.copy("md.mdp", "gaff/cluster_{}".format(i))
generate_restraint(i)
gmx_run(f"editconf -f cluster{i}.pdb -box 3 3 3 -bt cubic -c -o box.gro", workdir=f"/gaff/cluster_{i}")
gmx_run("grompp -f em -c box.gro -p restrained.top -o em1", workdir=f"/gaff/cluster_{i}")
gmx_run("mdrun -ntomp 2 -s em1 -c after_em1 -g em1 -e em1 -o em1", workdir=f"/gaff/cluster_{i}")
gmx_run("grompp -f md -c box.gro -p MOL_GMX.top -o rerun", workdir=f"/gaff/cluster_{i}")
gmx_run("mdrun -ntomp 2 -s rerun -rerun em1 -c after_rerun -g rerun -e rerun -o rerun", workdir=f"/gaff/cluster_{i}")
gmx_run("energy -f rerun.edr -o rerun.xvg", workdir=f"/gaff/cluster_{i}", groups=groups)
```
%% Cell type:code id: tags:
``` python
#extract energies and from each energy value subtract minimal energy
energies_lst = []
for i in clusters_ids:
with open("gaff/cluster_{}/rerun.xvg".format(i, "r")) as ifile:
last_line = ifile.readlines()[-1]
energies = last_line.split(" ")
energies_lst.append(energies[len(energies) - 1].rstrip())
min_energy = min(energies_lst)
with open("gaff_energies.txt", "w") as ofile:
for energy in energies_lst:
ofile.write("{} \n".format((float(energy) - float(min_energy))))
```
%% Cell type:markdown id: tags:
## 6. Define correction of force field
%% Cell type:code id: tags:
``` python
#subtract from orca energies computed in step 5, GAFF energies computed in step 6
with open("reference", "w") as ofile:
with open("orca_energies.txt", "r") as orca_energies, open("gaff_energies.txt", "r") as gaff_energies:
for orca_energy in orca_energies.readlines():
for gaff_energy in gaff_energies.readlines():
ofile.write("{} \n".format((float(orca_energy) - float(gaff_energy))))
```
%% Cell type:code id: tags:
``` python
#write final corrections into output file along with representative conformations
input_dir = "pdb_opt/"
with open("reference.pdb", "w") as ofile, open("reference", "r") as ifile:
final_energies = ifile.readlines()
energy_index = 0
for i in clusters_ids:
ofile.write("REMARK X={}".format(final_energies[energy_index]))
with open("{}cluster{}.pdb".format(input_dir, i), "r") as ifile1:
ofile.writelines(ifile1.readlines())
energy_index += 1
```
%% Cell type:code id: tags:
``` python
x1 = []
y1 = []
with open("DIHEDRALS") as ifile:
for line in ifile.readlines():
split_values = line.split()
x1.append(split_values[1])
y1.append(split_values[2])
plot_landmarks(x, y, x1, y1).show()
```
Loading