Skip to content
Snippets Groups Projects
Commit e2e12bcf authored by Vladimír Višňovský's avatar Vladimír Višňovský
Browse files

Revert "added intro and hyperlinks"

This reverts commit f88c0962
parent f88c0962
Branches
No related tags found
No related merge requests found
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Property Map Collective Variable Force Field Correction Pipeline
---
The pipeline generates force field corrections in .pdb format and is divided into these steps:
1. [Definition of computational parameters](#1.-Definition-of-computational-parameters)
2. [Molecule shape processing](#2.-Molecule-shape-processing)
3. [Preparation of environment and molecule](#3.-Preparation-of-environment-and-molecule)
4. [Generation of representative configurations](#4.-Generation-of-representative-configurations)
5. [Accurate and innacurate energy evaluation](#5.-Accurate-and-innacurate-energy-evaluation)
6. [Define correction of force field](#6.-Define-correction-of-force-field)
## 1. Definition of computational parameters ## 1. Definition of computational parameters
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import os import os
import sys import sys
import shutil import shutil
import re import re
import math import math
import time import time
import subprocess import subprocess
import nglview as nv import nglview as nv
import pytraj as pt import pytraj as pt
#insert path to python scripts #insert path to python scripts
sys.path.insert(1,"/home/base/modules") sys.path.insert(1,"/home/base/modules")
sys.path.insert(1,"/home/base/modules/gmx_orca") sys.path.insert(1,"/home/base/modules/gmx_orca")
from draw_2d import * from draw_2d import *
from draw_3d import drawit from draw_3d import drawit
from tqdm.notebook import tqdm from tqdm.notebook import tqdm
from molvs import Standardizer from molvs import Standardizer
from rdkit import Chem from rdkit import Chem
from rdkit.Chem import Draw from rdkit.Chem import Draw
from rdkit.Chem import SDWriter from rdkit.Chem import SDWriter
from rdkit.Chem import AllChem from rdkit.Chem import AllChem
from rdkit.Chem import MolFromSmarts from rdkit.Chem import MolFromSmarts
from rdkit.Chem import rdmolops from rdkit.Chem import rdmolops
from rdkit.Chem import rdchem from rdkit.Chem import rdchem
from convert import convert_to_orca_methods from convert import convert_to_orca_methods
from plot_graph import plot_landmarks from plot_graph import plot_landmarks
from gmx_orca_run import gmx_run, orca_run, parallel_wait from gmx_orca_run import gmx_run, orca_run, parallel_wait
from replace import replace_text_for_embedding from replace import replace_text_for_embedding
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#number of configurations to generate in basic energy minimization #number of configurations to generate in basic energy minimization
numc = 50 numc = 50
#path to applications #path to applications
orca_job_check = "/home/base/modules/orcajobcheck.py" orca_job_check = "/home/base/modules/orcajobcheck.py"
xyz_to_mol = "/home/base/modules/xyz2mol/xyz2mol.py" xyz_to_mol = "/home/base/modules/xyz2mol/xyz2mol.py"
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## 2. Molecule shape processing ## 2. Molecule shape processing
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#specify input of desired molecule in SMILES #specify input of desired molecule in SMILES
smiles_molecule = "CC1=C(C=C(C=C1)NC(=O)C2=CC=C(C=C2)C[NH+]3CCN(CC3)C)NC4=NC=CC(=N4)C5=CN=CC=C5" smiles_molecule = "CC1=C(C=C(C=C1)NC(=O)C2=CC=C(C=C2)C[NH+]3CCN(CC3)C)NC4=NC=CC(=N4)C5=CN=CC=C5"
molecule = Chem.MolFromSmiles(smiles_molecule) molecule = Chem.MolFromSmiles(smiles_molecule)
render_svg(moltosvg(molecule)) render_svg(moltosvg(molecule))
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
with open("molekula.smi", "w") as smifile: with open("molekula.smi", "w") as smifile:
smifile.write(smiles_molecule) smifile.write(smiles_molecule)
s = Standardizer() s = Standardizer()
molecule = s.standardize(molecule) molecule = s.standardize(molecule)
molecule = Chem.AddHs(molecule) molecule = Chem.AddHs(molecule)
natoms = molecule.GetNumAtoms() natoms = molecule.GetNumAtoms()
charge = Chem.rdmolops.GetFormalCharge(molecule) charge = Chem.rdmolops.GetFormalCharge(molecule)
render_svg(moltosvg(molecule)) render_svg(moltosvg(molecule))
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#set lowest energy configuration #set lowest energy configuration
#set default id of minimal energy conformer to -1 #set default id of minimal energy conformer to -1
minid = -1 minid = -1
#set default minimal energy to highest possible float number #set default minimal energy to highest possible float number
minene = sys.float_info.max minene = sys.float_info.max
AllChem.EmbedMultipleConfs(molecule, clearConfs=True, numConfs=numc) AllChem.EmbedMultipleConfs(molecule, clearConfs=True, numConfs=numc)
#run 'MMFF94 force field' energy evaluation #run 'MMFF94 force field' energy evaluation
done = AllChem.MMFFOptimizeMoleculeConfs(molecule) done = AllChem.MMFFOptimizeMoleculeConfs(molecule)
for i in range(len(done)): for i in range(len(done)):
if done[i][1]<minene: if done[i][1]<minene:
minene = done[i][1] minene = done[i][1]
minid = i minid = i
print(f'Minimal energy: {minene}') print(f'Minimal energy: {minene}')
writer = SDWriter("molekula.mol") writer = SDWriter("molekula.mol")
writer.write(molecule, confId=minid) writer.write(molecule, confId=minid)
drawit(molecule) drawit(molecule)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#set pattern to detect torsion angles #set pattern to detect torsion angles
RotatableBond = Chem.MolFromSmarts('[!$([NH]!@C(=O))&!D1&!$(*#*)&!$([C;H3])&!$([O;H1])&!$([N;H3])]-&!@[!$([NH]!@C(=O))&!D1&!$(*#*)&!$([C;H3])&!$([O;H1])&!$([N;H3])]') RotatableBond = Chem.MolFromSmarts('[!$([NH]!@C(=O))&!D1&!$(*#*)&!$([C;H3])&!$([O;H1])&!$([N;H3])]-&!@[!$([NH]!@C(=O))&!D1&!$(*#*)&!$([C;H3])&!$([O;H1])&!$([N;H3])]')
rotatables = molecule.GetSubstructMatches(RotatableBond) rotatables = molecule.GetSubstructMatches(RotatableBond)
print(rotatables) print(rotatables)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#get numbers of atoms that form torsion angles #get numbers of atoms that form torsion angles
torsions = [] torsions = []
for rotatable in rotatables: for rotatable in rotatables:
pairs1 = [] pairs1 = []
pairs2 = [] pairs2 = []
for bond in molecule.GetBonds(): for bond in molecule.GetBonds():
if rotatable[0]==bond.GetBeginAtomIdx() and rotatable[1]!=bond.GetEndAtomIdx(): if rotatable[0]==bond.GetBeginAtomIdx() and rotatable[1]!=bond.GetEndAtomIdx():
pairs1.append([bond.GetBeginAtomIdx(),bond.GetEndAtomIdx()]) pairs1.append([bond.GetBeginAtomIdx(),bond.GetEndAtomIdx()])
if rotatable[1]==bond.GetBeginAtomIdx() and rotatable[0]!=bond.GetEndAtomIdx(): if rotatable[1]==bond.GetBeginAtomIdx() and rotatable[0]!=bond.GetEndAtomIdx():
pairs2.append([bond.GetBeginAtomIdx(),bond.GetEndAtomIdx()]) pairs2.append([bond.GetBeginAtomIdx(),bond.GetEndAtomIdx()])
torsions.append([pairs1[0][1],pairs1[0][0],pairs2[0][0],pairs2[0][1]]) torsions.append([pairs1[0][1],pairs1[0][0],pairs2[0][0],pairs2[0][1]])
print(torsions) print(torsions)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## 3. Preparation of environment and molecule ## 3. Preparation of environment and molecule
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# prepare input files for energetic minimization # prepare input files for energetic minimization
netcharge = rdmolops.GetFormalCharge(molecule) netcharge = rdmolops.GetFormalCharge(molecule)
!antechamber -i molekula.mol -fi mdl -o molekula.prepi -fo prepi -c bcc -nc {netcharge} && \ !antechamber -i molekula.mol -fi mdl -o molekula.prepi -fo prepi -c bcc -nc {netcharge} && \
parmchk2 -i molekula.prepi -f prepi -o molekula.frcmod && \ parmchk2 -i molekula.prepi -f prepi -o molekula.frcmod && \
tleap -f tleapin.txt && \ tleap -f tleapin.txt && \
acpype -p molekula.prmtop -x molekula.inpcrd acpype -p molekula.prmtop -x molekula.inpcrd
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#fix ordering of atoms #fix ordering of atoms
order_before = [] order_before = []
order_after = [] order_after = []
with open('sqm.pdb','r') as pdbfile: with open('sqm.pdb','r') as pdbfile:
for atom in pdbfile.readlines(): for atom in pdbfile.readlines():
order_before.append(atom.split()[2]) order_before.append(atom.split()[2])
with open('MOL_GMX.gro','r') as grofile: with open('MOL_GMX.gro','r') as grofile:
for atom in grofile.readlines(): for atom in grofile.readlines():
if atom.startswith(' 1 MOL'): if atom.startswith(' 1 MOL'):
order_after.append(atom.split()[2]) order_after.append(atom.split()[2])
torsions_new = [] torsions_new = []
torsion_new = [] torsion_new = []
for torsion in torsions: for torsion in torsions:
for i in torsion: for i in torsion:
torsion_new.append(order_after.index(order_before[i])+1) torsion_new.append(order_after.index(order_before[i])+1)
torsions_new.append(torsion_new) torsions_new.append(torsion_new)
torsion_new = [] torsion_new = []
torsions = torsions_new torsions = torsions_new
print(torsions) print(torsions)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
!mkdir -p em !mkdir -p em
with open("em/em.mdp", "w") as emfile: with open("em/em.mdp", "w") as emfile:
emfile.write("integrator = steep\n") emfile.write("integrator = steep\n")
emfile.write("nsteps = 100000\n") emfile.write("nsteps = 100000\n")
emfile.write("emtol = 0\n") emfile.write("emtol = 0\n")
emfile.write("emstep = 0.1\n") emfile.write("emstep = 0.1\n")
emfile.write("nstcomm = 1\n") emfile.write("nstcomm = 1\n")
emfile.write("nstxout = 100\n") emfile.write("nstxout = 100\n")
emfile.write("nstvout = 100\n") emfile.write("nstvout = 100\n")
emfile.write("nstfout = 0\n") emfile.write("nstfout = 0\n")
emfile.write("nstlog = 100\n") emfile.write("nstlog = 100\n")
emfile.write("nstenergy = 100\n") emfile.write("nstenergy = 100\n")
emfile.write("nstlist = 1\n") emfile.write("nstlist = 1\n")
emfile.write("ns_type = grid\n") emfile.write("ns_type = grid\n")
emfile.write("coulombtype = cut-off\n") emfile.write("coulombtype = cut-off\n")
emfile.write("rlist = 1.4\n") emfile.write("rlist = 1.4\n")
emfile.write("rcoulomb = 1.4\n") emfile.write("rcoulomb = 1.4\n")
emfile.write("rvdw = 1.4\n") emfile.write("rvdw = 1.4\n")
emfile.write("energygrps = System\n") emfile.write("energygrps = System\n")
emfile.write("epsilon-r = 80\n") emfile.write("epsilon-r = 80\n")
emfile.write("\n") emfile.write("\n")
shutil.copy("MOL_GMX.gro", "em/") shutil.copy("MOL_GMX.gro", "em/")
shutil.copy("MOL_GMX.top", "em/") shutil.copy("MOL_GMX.top", "em/")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
gmx_run("editconf -f MOL_GMX -o box -c -box 3 3 3", workdir="em") gmx_run("editconf -f MOL_GMX -o box -c -box 3 3 3", workdir="em")
gmx_run("grompp -f em.mdp -c box -p MOL_GMX -o em1", workdir="em") gmx_run("grompp -f em.mdp -c box -p MOL_GMX -o em1", workdir="em")
gmx_run("mdrun -deffnm em1 -ntomp 2", workdir="em") gmx_run("mdrun -deffnm em1 -ntomp 2", workdir="em")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#prepare files for getting the molecule to dynamic state #prepare files for getting the molecule to dynamic state
!mkdir -p md !mkdir -p md
with open("md/md.mdp", "w") as mdfile: with open("md/md.mdp", "w") as mdfile:
mdfile.write("integrator = sd\n") mdfile.write("integrator = sd\n")
mdfile.write("nsteps = 100000\n") mdfile.write("nsteps = 100000\n")
mdfile.write("dt = 0.001\n") mdfile.write("dt = 0.001\n")
mdfile.write("nstxout = 1000\n") mdfile.write("nstxout = 1000\n")
mdfile.write("nstvout = 1000\n") mdfile.write("nstvout = 1000\n")
mdfile.write("nstenergy = 1000\n") mdfile.write("nstenergy = 1000\n")
mdfile.write("nstlog = 1000\n") mdfile.write("nstlog = 1000\n")
mdfile.write("continuation = no\n") mdfile.write("continuation = no\n")
mdfile.write("constraints = none\n") mdfile.write("constraints = none\n")
mdfile.write("cutoff-scheme = Verlet\n") mdfile.write("cutoff-scheme = Verlet\n")
mdfile.write("ns_type = grid\n") mdfile.write("ns_type = grid\n")
mdfile.write("nstlist = 1\n") mdfile.write("nstlist = 1\n")
mdfile.write("rlist = 1.4\n") mdfile.write("rlist = 1.4\n")
mdfile.write("rcoulomb = 1.4\n") mdfile.write("rcoulomb = 1.4\n")
mdfile.write("rvdw = 1.4\n") mdfile.write("rvdw = 1.4\n")
mdfile.write("coulombtype = cut-off\n") mdfile.write("coulombtype = cut-off\n")
mdfile.write("tcoupl = V-rescale\n") mdfile.write("tcoupl = V-rescale\n")
mdfile.write("tc-grps = system\n") mdfile.write("tc-grps = system\n")
mdfile.write("tau_t = 0.1\n") mdfile.write("tau_t = 0.1\n")
mdfile.write("ref_t = 300\n") mdfile.write("ref_t = 300\n")
mdfile.write("pcoupl = no\n") mdfile.write("pcoupl = no\n")
mdfile.write("pbc = xyz\n") mdfile.write("pbc = xyz\n")
mdfile.write("gen_vel = yes\n") mdfile.write("gen_vel = yes\n")
mdfile.write("epsilon-r = 80\n") mdfile.write("epsilon-r = 80\n")
mdfile.write("\n") mdfile.write("\n")
shutil.copy("em/em1.gro", "md/") shutil.copy("em/em1.gro", "md/")
shutil.copy("MOL_GMX.top", "md/") shutil.copy("MOL_GMX.top", "md/")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
gmx_run("grompp -f md.mdp -c em1 -p MOL_GMX -o md1", workdir="md") gmx_run("grompp -f md.mdp -c em1 -p MOL_GMX -o md1", workdir="md")
gmx_run("mdrun -deffnm md1 -ntomp 2", workdir="md") gmx_run("mdrun -deffnm md1 -ntomp 2", workdir="md")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#show trajectory of conformations #show trajectory of conformations
#select group for trjconv evaluation #select group for trjconv evaluation
#Group 0 ( System) #Group 0 ( System)
#Group 1 ( Other) #Group 1 ( Other)
#Group 2 ( MOL) #Group 2 ( MOL)
group = "0" group = "0"
gmx_run("trjconv -s md1.tpr -f md1.trr -o outTraj.pdb", workdir="md", groups=group) gmx_run("trjconv -s md1.tpr -f md1.trr -o outTraj.pdb", workdir="md", groups=group)
traj = pt.load('md/outTraj.pdb') traj = pt.load('md/outTraj.pdb')
view = nv.show_pytraj(traj) view = nv.show_pytraj(traj)
view view
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#fix periodic boundaries errors when the molecule jumps out of the box #fix periodic boundaries errors when the molecule jumps out of the box
#select group for trjconv evaluation #select group for trjconv evaluation
#Group 0 ( System) #Group 0 ( System)
#Group 1 ( Other) #Group 1 ( Other)
#Group 2 ( MOL) #Group 2 ( MOL)
group = "1" group = "1"
for i in tqdm(range(len(torsions))): for i in tqdm(range(len(torsions))):
fr = str(float(100-len(torsions)+i)-0.01) fr = str(float(100-len(torsions)+i)-0.01)
to = str(float(100-len(torsions)+i)+0.01) to = str(float(100-len(torsions)+i)+0.01)
gmx_run(f"trjconv -pbc nojump -s md1 -f md1 -o frame{i}.gro -b {fr} -e {to}<<EOF\n0\nEOF", workdir="md", groups=group) gmx_run(f"trjconv -pbc nojump -s md1 -f md1 -o frame{i}.gro -b {fr} -e {to}<<EOF\n0\nEOF", workdir="md", groups=group)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## 4. Generation of representative configurations ## 4. Generation of representative configurations
### 4.1 Trajectory generation ### 4.1 Trajectory generation
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
!mkdir -p mtd !mkdir -p mtd
with open("mtd/mtd.mdp", "w") as mtdfile: with open("mtd/mtd.mdp", "w") as mtdfile:
mtdfile.write("integrator = sd\n") mtdfile.write("integrator = sd\n")
mtdfile.write("nsteps = 10000000\n") #10 ns each replica mtdfile.write("nsteps = 10000000\n") #10 ns each replica
mtdfile.write("dt = 0.001\n") mtdfile.write("dt = 0.001\n")
mtdfile.write("nstxout = 10000\n") # output coordinates every 10ps to *.trr mtdfile.write("nstxout = 10000\n") # output coordinates every 10ps to *.trr
mtdfile.write("nstvout = 10000\n") # output velocities every 10ps to *.trr mtdfile.write("nstvout = 10000\n") # output velocities every 10ps to *.trr
mtdfile.write("nstenergy = 1000\n") mtdfile.write("nstenergy = 1000\n")
mtdfile.write("nstlog = 1000\n") mtdfile.write("nstlog = 1000\n")
mtdfile.write("continuation = no\n") mtdfile.write("continuation = no\n")
mtdfile.write("constraints = none\n") mtdfile.write("constraints = none\n")
mtdfile.write("cutoff-scheme = Verlet\n") mtdfile.write("cutoff-scheme = Verlet\n")
mtdfile.write("ns_type = grid\n") mtdfile.write("ns_type = grid\n")
mtdfile.write("nstlist = 1\n") mtdfile.write("nstlist = 1\n")
mtdfile.write("rlist = 1.4\n") mtdfile.write("rlist = 1.4\n")
mtdfile.write("rcoulomb = 1.4\n") mtdfile.write("rcoulomb = 1.4\n")
mtdfile.write("rvdw = 1.4\n") mtdfile.write("rvdw = 1.4\n")
mtdfile.write("coulombtype = cut-off\n") mtdfile.write("coulombtype = cut-off\n")
mtdfile.write("tcoupl = V-rescale\n") mtdfile.write("tcoupl = V-rescale\n")
mtdfile.write("tc-grps = system\n") mtdfile.write("tc-grps = system\n")
mtdfile.write("tau_t = 0.1\n") mtdfile.write("tau_t = 0.1\n")
mtdfile.write("ref_t = 300\n") mtdfile.write("ref_t = 300\n")
mtdfile.write("pcoupl = no\n") mtdfile.write("pcoupl = no\n")
mtdfile.write("pbc = xyz\n") mtdfile.write("pbc = xyz\n")
mtdfile.write("gen_vel = yes\n") mtdfile.write("gen_vel = yes\n")
mtdfile.write("epsilon-r = 80\n") mtdfile.write("epsilon-r = 80\n")
mtdfile.write("\n") mtdfile.write("\n")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
for i in range(len(torsions)): for i in range(len(torsions)):
if not os.path.exists("mtd/w{}".format(i)): if not os.path.exists("mtd/w{}".format(i)):
!mkdir -p mtd/w{i} !mkdir -p mtd/w{i}
with open("mtd/w{}/plumed.dat".format(i), "w") as plumeddat: with open("mtd/w{}/plumed.dat".format(i), "w") as plumeddat:
plumeddat.write("RANDOM_EXCHANGES\n") plumeddat.write("RANDOM_EXCHANGES\n")
plumeddat.write("WHOLEMOLECULES ENTITY0=1-{}\n".format(natoms)) plumeddat.write("WHOLEMOLECULES ENTITY0=1-{}\n".format(natoms))
for j in range(len(torsions)): for j in range(len(torsions)):
plumeddat.write("TORSION ATOMS={},{},{},{} LABEL=cv{}\n".format(torsions[j][0],torsions[j][1],torsions[j][2],torsions[j][3],j+1)) plumeddat.write("TORSION ATOMS={},{},{},{} LABEL=cv{}\n".format(torsions[j][0],torsions[j][1],torsions[j][2],torsions[j][3],j+1))
plumeddat.write("METAD ARG=cv{} HEIGHT=0.5 SIGMA=0.3 PACE=1000 GRID_MIN=-pi GRID_MAX=pi BIASFACTOR=15 LABEL=be\n".format(i+1)) plumeddat.write("METAD ARG=cv{} HEIGHT=0.5 SIGMA=0.3 PACE=1000 GRID_MIN=-pi GRID_MAX=pi BIASFACTOR=15 LABEL=be\n".format(i+1))
cvs = "" cvs = ""
for j in range(len(torsions)): for j in range(len(torsions)):
cvs=cvs+"cv{},".format(j+1) cvs=cvs+"cv{},".format(j+1)
cvs = cvs[:-1] cvs = cvs[:-1]
plumeddat.write("PRINT ARG={} STRIDE=1000 FILE=COLVAR\n".format(cvs)) plumeddat.write("PRINT ARG={} STRIDE=1000 FILE=COLVAR\n".format(cvs))
plumeddat.write("PRINT ARG=be.bias STRIDE=1000 FILE=BIAS\n") plumeddat.write("PRINT ARG=be.bias STRIDE=1000 FILE=BIAS\n")
shutil.copy("MOL_GMX.top", "mtd/") shutil.copy("MOL_GMX.top", "mtd/")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
for i in tqdm(range(len(torsions))): for i in tqdm(range(len(torsions))):
shutil.copy("md/frame{}.gro".format(i), "mtd/w{}/".format(i)) shutil.copy("md/frame{}.gro".format(i), "mtd/w{}/".format(i))
gmx_run(f"grompp -f mtd.mdp -c w{i}/frame{i} -p MOL_GMX -o w{i}/mtd1", workdir="mtd", parallel=True) gmx_run(f"grompp -f mtd.mdp -c w{i}/frame{i} -p MOL_GMX -o w{i}/mtd1", workdir="mtd", parallel=True)
parallel_wait() parallel_wait()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
directories = "" directories = ""
for i in range(len(torsions)): for i in range(len(torsions)):
directories = directories + "w{} ".format(i) directories = directories + "w{} ".format(i)
#see mdrunlog in mtd directory for insight to mdrun simulation (e.g 'tail -f mdrunlog') #see mdrunlog in mtd directory for insight to mdrun simulation (e.g 'tail -f mdrunlog')
gmx_run(f"mdrun -g mdrunlog -ntomp 1 -deffnm mtd1 -replex 500 -plumed plumed.dat -multidir {directories}", gmx_run(f"mdrun -g mdrunlog -ntomp 1 -deffnm mtd1 -replex 500 -plumed plumed.dat -multidir {directories}",
workdir="mtd", workdir="mtd",
mpi_run=len(torsions)) mpi_run=len(torsions))
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### 4.2 Configurations clustering ### 4.2 Configurations clustering
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#select groups for cluster evaluation #select groups for cluster evaluation
#Group 0 ( System) #Group 0 ( System)
#Group 1 ( Other) #Group 1 ( Other)
#Group 2 ( MOL) #Group 2 ( MOL)
#Group 3 ( Custom) #Group 3 ( Custom)
groups = "30" groups = "30"
!mkdir -p -m 757 clustering ; mkdir -p -m 757 clustering/outClustersPDB ; mkdir -p -m 757 clustering/outClustersXYZ ; mkdir -p -m 757 clustering/outClusters !mkdir -p -m 757 clustering ; mkdir -p -m 757 clustering/outClustersPDB ; mkdir -p -m 757 clustering/outClustersXYZ ; mkdir -p -m 757 clustering/outClusters
trajectories = "" trajectories = ""
for i in range(len(torsions)): for i in range(len(torsions)):
trajectories = trajectories + f"mtd/w{i}/mtd1.trr " trajectories = trajectories + f"mtd/w{i}/mtd1.trr "
#concatinate trajectories #concatinate trajectories
gmx_run(f"trjcat -f {trajectories} -cat -o mtd/mtd1.trr") gmx_run(f"trjcat -f {trajectories} -cat -o mtd/mtd1.trr")
#make index file with non-hydrogen atoms, use 1 & ! a H* and q for input #make index file with non-hydrogen atoms, use 1 & ! a H* and q for input
gmx_run("make_ndx -f md/md1.tpr -o mtd/index.ndx", make_ndx="1&!aH*") gmx_run("make_ndx -f md/md1.tpr -o mtd/index.ndx", make_ndx="1&!aH*")
#include index file and select index group 3 #include index file and select index group 3
gmx_run("cluster -method gromos -f mtd/mtd1.trr -s mtd/w0/mtd1.tpr -n mtd/index.ndx -cutoff 0.15 -cl clustering/outCluster.pdb", gmx_run("cluster -method gromos -f mtd/mtd1.trr -s mtd/w0/mtd1.tpr -n mtd/index.ndx -cutoff 0.15 -cl clustering/outCluster.pdb",
groups=groups) groups=groups)
#divide all clusters from gmx cluster to single clusters #divide all clusters from gmx cluster to single clusters
with open("clustering/outCluster.pdb") as input_cluster: with open("clustering/outCluster.pdb") as input_cluster:
i = 0 i = 0
outFile = open("clustering/outClustersPDB/outCluster{}.pdb".format(i), "w") outFile = open("clustering/outClustersPDB/outCluster{}.pdb".format(i), "w")
for line in input_cluster: for line in input_cluster:
if line != "ENDMDL\n": if line != "ENDMDL\n":
outFile.write(line) outFile.write(line)
continue continue
outFile.write("ENDMDL\n") outFile.write("ENDMDL\n")
i += 1 i += 1
outFile.close() outFile.close()
outFile = open("clustering/outClustersPDB/outCluster{}.pdb".format(i), "w") outFile = open("clustering/outClustersPDB/outCluster{}.pdb".format(i), "w")
!rm clustering/outClustersPDB/outCluster{i}.pdb !rm clustering/outClustersPDB/outCluster{i}.pdb
clusters_ids = [x for x in range(0, i)] clusters_ids = [x for x in range(0, i)]
#convert .pdb clusters to .xyz #convert .pdb clusters to .xyz
for pdb_cluster in tqdm(os.listdir("clustering/outClustersPDB/")): for pdb_cluster in tqdm(os.listdir("clustering/outClustersPDB/")):
!babel -ipdb clustering/outClustersPDB/{pdb_cluster} -oxyz clustering/outClustersXYZ/{pdb_cluster.replace("pdb", "xyz")} !babel -ipdb clustering/outClustersPDB/{pdb_cluster} -oxyz clustering/outClustersXYZ/{pdb_cluster.replace("pdb", "xyz")}
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### 4.3 Visualize landmarks ### 4.3 Visualize landmarks
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#apply periodic boundary conditions to mtd trajectory -> fitting -> no Hydrogen -> train parmtSNEcv -> compute embeddings #apply periodic boundary conditions to mtd trajectory -> fitting -> no Hydrogen -> train parmtSNEcv -> compute embeddings
!mkdir -p visualization/traj !mkdir -p visualization/traj
#Group 0 ( System) #Group 0 ( System)
#Group 1 ( Other) #Group 1 ( Other)
#Group 2 ( MOL) #Group 2 ( MOL)
#Group 3 ( Custom) #Group 3 ( Custom)
#select group for periodic boundaries check output: #select group for periodic boundaries check output:
group = "0" group = "0"
gmx_run("trjconv -f mtd/mtd1.trr -s mtd/w0/mtd1.tpr -pbc mol -o visualization/traj/mtd1_nopbc.xtc", groups=group) gmx_run("trjconv -f mtd/mtd1.trr -s mtd/w0/mtd1.tpr -pbc mol -o visualization/traj/mtd1_nopbc.xtc", groups=group)
#select groups for fitting and output: #select groups for fitting and output:
groups = "00" groups = "00"
gmx_run("trjconv -f visualization/traj/mtd1_nopbc.xtc -s clustering/outClustersPDB/outCluster0.pdb -fit rot+trans -o visualization/traj/mtd1_fit.xtc", gmx_run("trjconv -f visualization/traj/mtd1_nopbc.xtc -s clustering/outClustersPDB/outCluster0.pdb -fit rot+trans -o visualization/traj/mtd1_fit.xtc",
groups=groups) groups=groups)
#select group for no Hydrogen output: #select group for no Hydrogen output:
group = "3" group = "3"
gmx_run("trjconv -f visualization/traj/mtd1_fit.xtc -n mtd/index.ndx -o visualization/traj/mtd1_fit_noH.xtc", gmx_run("trjconv -f visualization/traj/mtd1_fit.xtc -n mtd/index.ndx -o visualization/traj/mtd1_fit_noH.xtc",
groups=group) groups=group)
#select groups for size of box and output: #select groups for size of box and output:
groups = "33" groups = "33"
gmx_run("editconf -f clustering/outClustersPDB/outCluster0.pdb -n mtd/index.ndx -box 3 3 3 -c -o visualization/ref.pdb", gmx_run("editconf -f clustering/outClustersPDB/outCluster0.pdb -n mtd/index.ndx -box 3 3 3 -c -o visualization/ref.pdb",
groups=groups) groups=groups)
#train parmtSNEcv #train parmtSNEcv
!parmtSNEcv -i visualization/traj/mtd1_fit_noH.xtc -p visualization/ref.pdb -boxx 3 -boxy 3 -boxz 3 -dim 2 -layers 2 -o visualization/out.txt -plumed visualization/plumed.dat -epochs 2000 !parmtSNEcv -i visualization/traj/mtd1_fit_noH.xtc -p visualization/ref.pdb -boxx 3 -boxy 3 -boxz 3 -dim 2 -layers 2 -o visualization/out.txt -plumed visualization/plumed.dat -epochs 2000
#modify plumed.dat to compute embedding in every step and change name of file for *** mtd trajectory *** #modify plumed.dat to compute embedding in every step and change name of file for *** mtd trajectory ***
replace_text_for_embedding("visualization/plumed.dat", stride=(100, 1), file_name=("COLVAR", "2d_embedding")) replace_text_for_embedding("visualization/plumed.dat", stride=(100, 1), file_name=("COLVAR", "2d_embedding"))
#fix weights of atoms #fix weights of atoms
replace_text_for_embedding("visualization/ref.pdb", weight=(0.00, 1.00)) replace_text_for_embedding("visualization/ref.pdb", weight=(0.00, 1.00))
#run as plumed #run as plumed
gmx_run("driver --plumed visualization/plumed.dat --mf_xtc visualization/traj/mtd1_fit.xtc") gmx_run("driver --plumed visualization/plumed.dat --mf_xtc visualization/traj/mtd1_fit.xtc")
#modify plumed.dat to compute embedding in every step and change name of file for *** representatives *** #modify plumed.dat to compute embedding in every step and change name of file for *** representatives ***
replace_text_for_embedding("visualization/plumed.dat", file_name=("2d_embedding", "landmarks")) replace_text_for_embedding("visualization/plumed.dat", file_name=("2d_embedding", "landmarks"))
#run as plumed #run as plumed
gmx_run("driver --plumed visualization/plumed.dat --mf_pdb clustering/outCluster.pdb") gmx_run("driver --plumed visualization/plumed.dat --mf_pdb clustering/outCluster.pdb")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#visualize #visualize
x = [] x = []
y = [] y = []
x1 = [] x1 = []
y1 = [] y1 = []
with open("2d_embedding", "r") as ifile: with open("2d_embedding", "r") as ifile:
for line in ifile.readlines()[1:]: for line in ifile.readlines()[1:]:
split_values = line.split() split_values = line.split()
x.append(split_values[1]) x.append(split_values[1])
y.append(split_values[2]) y.append(split_values[2])
with open("landmarks", "r") as ifile: with open("landmarks", "r") as ifile:
for line in ifile.readlines()[1:]: for line in ifile.readlines()[1:]:
split_values = line.split() split_values = line.split()
x1.append(split_values[1]) x1.append(split_values[1])
y1.append(split_values[2]) y1.append(split_values[2])
plot_landmarks(x, y, x1, y1).show() plot_landmarks(x, y, x1, y1).show()
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## 5. Accurate and innacurate energy evaluation ## 5. Accurate and innacurate energy evaluation
### 5.1 AM1 method ### 5.1 AM1 method
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#fix indexing #fix indexing
#orca indexes from zero #orca indexes from zero
orca_torsions = [[] for _ in range(len(torsions))] orca_torsions = [[] for _ in range(len(torsions))]
torsion_index = 0 torsion_index = 0
for torsion in torsions: for torsion in torsions:
for atom_index in torsion: for atom_index in torsion:
orca_torsions[torsion_index].append(atom_index - 1) orca_torsions[torsion_index].append(atom_index - 1)
torsion_index += 1 torsion_index += 1
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#1. convert #1. convert
#orca method description - first line in orca method #orca method description - first line in orca method
method_description = "!AM1 Opt" method_description = "!AM1 Opt"
input_dir = "clustering/outClustersXYZ/" input_dir = "clustering/outClustersXYZ/"
output_dir = "am1/input/" output_dir = "am1/input/"
spin = 1 spin = 1
!mkdir -p -m 757 am1 ; mkdir -p -m 757 am1/input ; mkdir -p -m 757 am1/output !mkdir -p -m 757 am1 ; mkdir -p -m 757 am1/input ; mkdir -p -m 757 am1/output
!mkdir -p -m 757 orca_output ; mkdir -p -m 757 orca_output/am1 ; mkdir -p -m 757 orca_output/bp86svp ; mkdir -p -m 757 orca_output/bp86tzvp !mkdir -p -m 757 orca_output ; mkdir -p -m 757 orca_output/am1 ; mkdir -p -m 757 orca_output/bp86svp ; mkdir -p -m 757 orca_output/bp86tzvp
convert_to_orca_methods(input_dir, output_dir, orca_torsions, method_description, -1, (charge, spin)) convert_to_orca_methods(input_dir, output_dir, orca_torsions, method_description, -1, (charge, spin))
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#2. compute #2. compute
#minimisation of conformations before quantum mechanics #minimisation of conformations before quantum mechanics
#see logs in orca_output directory... #see logs in orca_output directory...
input_path = "am1/input/" input_path = "am1/input/"
output_path = "am1/output/" output_path = "am1/output/"
successfuly_converged = 0 successfuly_converged = 0
clusters_count = len(clusters_ids) clusters_count = len(clusters_ids)
for orca_method in tqdm(os.listdir(input_path)): for orca_method in tqdm(os.listdir(input_path)):
if not orca_method.endswith(".inp"): if not orca_method.endswith(".inp"):
continue continue
orca_method_name = orca_method.replace(".inp", "") orca_method_name = orca_method.replace(".inp", "")
orca_method_number = orca_method_name[len("outCluster"):] orca_method_number = orca_method_name[len("outCluster"):]
output_dir_path = output_path + orca_method_name + "/" output_dir_path = output_path + orca_method_name + "/"
if not os.path.exists(output_dir_path): if not os.path.exists(output_dir_path):
!mkdir -p -m 757 {output_dir_path} !mkdir -p -m 757 {output_dir_path}
shutil.copy(input_path + orca_method, output_dir_path) shutil.copy(input_path + orca_method, output_dir_path)
orca_run(orca_method, f"orca_output/am1/orca_output{orca_method_number}.out", workdir=output_dir_path, parallel=True) orca_run(orca_method, f"orca_output/am1/orca_output{orca_method_number}.out", workdir=output_dir_path, parallel=True)
parallel_wait() parallel_wait()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#3. check output #3. check output
#check output of orca methods #check output of orca methods
for cluster_id in clusters_ids: for cluster_id in clusters_ids:
with open(f"orca_output/am1/orca_output{cluster_id}.out") as iorca_output: with open(f"orca_output/am1/orca_output{cluster_id}.out") as iorca_output:
iorca_output_text = iorca_output.read() iorca_output_text = iorca_output.read()
if "****ORCA TERMINATED NORMALLY****" not in iorca_output_text: if "****ORCA TERMINATED NORMALLY****" not in iorca_output_text:
print(iorca_output_text) print(iorca_output_text)
raise SystemExit(f"Error in am1 method of cluster no. {cluster_id}. You should not continue computation!") raise SystemExit(f"Error in am1 method of cluster no. {cluster_id}. You should not continue computation!")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### 5.2 BP86 SVP method ### 5.2 BP86 SVP method
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#1. convert #1. convert
#orca method description - first line in orca method #orca method description - first line in orca method
method_description = "!BP86 def2-SVP TightSCF Opt" method_description = "!BP86 def2-SVP TightSCF Opt"
output_dir = "bp86svp/input/" output_dir = "bp86svp/input/"
spin = 1 spin = 1
required_cpus = 12 required_cpus = 12
#orca method description - first line in orca method #orca method description - first line in orca method
method_description = "!BP86 def2-SVP TightSCF Opt" method_description = "!BP86 def2-SVP TightSCF Opt"
!mkdir -p -m 757 bp86svp ; mkdir -p -m 757 bp86svp/input ; mkdir -p -m 757 bp86svp/output !mkdir -p -m 757 bp86svp ; mkdir -p -m 757 bp86svp/input ; mkdir -p -m 757 bp86svp/output
for i in clusters_ids: for i in clusters_ids:
input_dir = "am1/output/outCluster{}/".format(i) input_dir = "am1/output/outCluster{}/".format(i)
convert_to_orca_methods(input_dir, output_dir, orca_torsions, method_description, required_cpus, (charge, spin)) convert_to_orca_methods(input_dir, output_dir, orca_torsions, method_description, required_cpus, (charge, spin))
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#2. compute #2. compute
#minimisation of conformations before quantum mechanics #minimisation of conformations before quantum mechanics
#see logs in orca_output directory... #see logs in orca_output directory...
input_path = "bp86svp/input/" input_path = "bp86svp/input/"
output_path = "bp86svp/output/" output_path = "bp86svp/output/"
successfuly_converged = 0 successfuly_converged = 0
clusters_count = len(clusters_ids) clusters_count = len(clusters_ids)
for orca_method in tqdm(os.listdir(input_path)): for orca_method in tqdm(os.listdir(input_path)):
if not orca_method.endswith(".inp"): if not orca_method.endswith(".inp"):
continue continue
orca_method_name = orca_method.replace(".inp", "") orca_method_name = orca_method.replace(".inp", "")
orca_method_number = orca_method_name[len("outCluster"):] orca_method_number = orca_method_name[len("outCluster"):]
output_dir_path = output_path + orca_method_name + "/" output_dir_path = output_path + orca_method_name + "/"
if not os.path.exists(output_dir_path): if not os.path.exists(output_dir_path):
!mkdir -p -m 757 {output_dir_path} !mkdir -p -m 757 {output_dir_path}
shutil.copy(input_path + orca_method, output_dir_path) shutil.copy(input_path + orca_method, output_dir_path)
orca_run(orca_method, f"orca_output/bp86svp/orca_output{orca_method_number}.out", workdir=output_dir_path, parallel=True) orca_run(orca_method, f"orca_output/bp86svp/orca_output{orca_method_number}.out", workdir=output_dir_path, parallel=True)
parallel_wait() parallel_wait()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#3. check output && discard non converging clusters #3. check output && discard non converging clusters
for cluster_id in clusters_ids: for cluster_id in clusters_ids:
with open(f"orca_output/bp86svp/orca_output{cluster_id}.out") as iorca_output: with open(f"orca_output/bp86svp/orca_output{cluster_id}.out") as iorca_output:
iorca_output_text = iorca_output.read() iorca_output_text = iorca_output.read()
if "****ORCA TERMINATED NORMALLY****" not in iorca_output_text: if "****ORCA TERMINATED NORMALLY****" not in iorca_output_text:
!{orca_job_check} orca_output/bp86svp/orca_output{cluster_id}.out !{orca_job_check} orca_output/bp86svp/orca_output{cluster_id}.out
clusters_ids.remove(int(orca_method_number)) clusters_ids.remove(int(orca_method_number))
else: else:
successfuly_converged += 1 successfuly_converged += 1
print(f"** {successfuly_converged}/{clusters_count} successfuly converged - unconverged are not considered in next steps **") print(f"** {successfuly_converged}/{clusters_count} successfuly converged - unconverged are not considered in next steps **")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### 5.3 BP86 TZVP method ### 5.3 BP86 TZVP method
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#1. convert #1. convert
#orca method description - first line in orca method #orca method description - first line in orca method
method_description = "!BP86 def2-TZVP TightSCF Opt" method_description = "!BP86 def2-TZVP TightSCF Opt"
output_dir = "bp86tzvp/input/" output_dir = "bp86tzvp/input/"
spin = 1 spin = 1
required_cpus = 12 required_cpus = 12
#orca method description - first line in orca method #orca method description - first line in orca method
method_description = "!BP86 def2-TZVP TightSCF Opt" method_description = "!BP86 def2-TZVP TightSCF Opt"
!mkdir -p -m 757 bp86tzvp ; mkdir -p -m 757 bp86tzvp/input ; mkdir -p -m 757 bp86tzvp/output !mkdir -p -m 757 bp86tzvp ; mkdir -p -m 757 bp86tzvp/input ; mkdir -p -m 757 bp86tzvp/output
for i in clusters_ids: for i in clusters_ids:
input_dir = "bp86svp/output/outCluster{}/".format(i) input_dir = "bp86svp/output/outCluster{}/".format(i)
convert_to_orca_methods(input_dir, output_dir, orca_torsions, method_description, required_cpus, (charge, spin)) convert_to_orca_methods(input_dir, output_dir, orca_torsions, method_description, required_cpus, (charge, spin))
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#2. compute #2. compute
#run quantum mechanics #run quantum mechanics
#see logs in orca_output directory... #see logs in orca_output directory...
input_path = "bp86tzvp/input/" input_path = "bp86tzvp/input/"
output_path = "bp86tzvp/output/" output_path = "bp86tzvp/output/"
successfuly_converged = 0 successfuly_converged = 0
clusters_count = len(clusters_ids) clusters_count = len(clusters_ids)
for orca_method in tqdm(os.listdir(input_path)): for orca_method in tqdm(os.listdir(input_path)):
if not orca_method.endswith(".inp"): if not orca_method.endswith(".inp"):
continue continue
orca_method_name = orca_method.replace(".inp", "") orca_method_name = orca_method.replace(".inp", "")
orca_method_number = orca_method_name[len("outCluster"):] orca_method_number = orca_method_name[len("outCluster"):]
output_dir_path = output_path + orca_method_name + "/" output_dir_path = output_path + orca_method_name + "/"
if not os.path.exists(output_dir_path): if not os.path.exists(output_dir_path):
!mkdir -p -m 757 {output_dir_path} !mkdir -p -m 757 {output_dir_path}
shutil.copy(input_path + orca_method, output_dir_path) shutil.copy(input_path + orca_method, output_dir_path)
orca_run(orca_method, f"orca_output/bp86tzvp/orca_output{orca_method_number}.out", workdir=output_dir_path, parallel=True) orca_run(orca_method, f"orca_output/bp86tzvp/orca_output{orca_method_number}.out", workdir=output_dir_path, parallel=True)
parallel_wait() parallel_wait()
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#3. check output && discard non converging clusters #3. check output && discard non converging clusters
for cluster_id in clusters_ids: for cluster_id in clusters_ids:
with open(f"orca_output/bp86tzvp/orca_output{cluster_id}.out") as iorca_output: with open(f"orca_output/bp86tzvp/orca_output{cluster_id}.out") as iorca_output:
iorca_output_text = iorca_output.read() iorca_output_text = iorca_output.read()
if "****ORCA TERMINATED NORMALLY****" not in iorca_output_text: if "****ORCA TERMINATED NORMALLY****" not in iorca_output_text:
!{orca_job_check} orca_output/bp86tzvp/orca_output{cluster_id}.out !{orca_job_check} orca_output/bp86tzvp/orca_output{cluster_id}.out
clusters_ids.remove(int(cluster_id)) clusters_ids.remove(int(cluster_id))
else: else:
successfuly_converged += 1 successfuly_converged += 1
print(f"** {successfuly_converged}/{clusters_count} successfuly converged - unconverged are not considered in next steps **") print(f"** {successfuly_converged}/{clusters_count} successfuly converged - unconverged are not considered in next steps **")
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#extract final energies from output of orca #extract final energies from output of orca
with open("orca_energies.txt", "a") as ofile: with open("orca_energies.txt", "a") as ofile:
for orca_output in os.listdir("orca_output/bp86tzvp"): for orca_output in os.listdir("orca_output/bp86tzvp"):
with open("orca_output/bp86tzvp/" + orca_output) as iorca_output: with open("orca_output/bp86tzvp/" + orca_output) as iorca_output:
for line in reversed(list(iorca_output)): for line in reversed(list(iorca_output)):
energy_list = re.findall(r'(FINAL SINGLE POINT ENERGY)( +)(-?\d+\.\d+)', line) energy_list = re.findall(r'(FINAL SINGLE POINT ENERGY)( +)(-?\d+\.\d+)', line)
if len(energy_list) > 0: if len(energy_list) > 0:
ofile.write(energy_list[0][2]) ofile.write(energy_list[0][2])
ofile.write('\n') ofile.write('\n')
break break
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#convert from hartree to kJ/mol and write quantum mechanics energies to file #convert from hartree to kJ/mol and write quantum mechanics energies to file
energies_in_hartree = [] energies_in_hartree = []
with open("orca_energies.txt", "r") as ifile: with open("orca_energies.txt", "r") as ifile:
for line in ifile.readlines(): for line in ifile.readlines():
energies_in_hartree.append(float(line)) energies_in_hartree.append(float(line))
#constant to convert from hartree to kJ/mol #constant to convert from hartree to kJ/mol
CONVERSION_CONST = 2625.499638 CONVERSION_CONST = 2625.499638
minimum = min(energies_in_hartree) minimum = min(energies_in_hartree)
energies_in_kJ = [] energies_in_kJ = []
for H in energies_in_hartree: for H in energies_in_hartree:
energies_in_kJ.append((H-minimum)*CONVERSION_CONST) energies_in_kJ.append((H-minimum)*CONVERSION_CONST)
with open("orca_energies.txt", "w") as ofile: with open("orca_energies.txt", "w") as ofile:
for energy in energies_in_kJ: for energy in energies_in_kJ:
ofile.write("{}\n".format(energy)) ofile.write("{}\n".format(energy))
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#copy atoms from conformation before QM and combine it with orca optimised conformations #copy atoms from conformation before QM and combine it with orca optimised conformations
output_dir = "pdb_opt/" output_dir = "pdb_opt/"
input_dir = "bp86tzvp/output/outCluster" input_dir = "bp86tzvp/output/outCluster"
!mkdir -p {output_dir} !mkdir -p {output_dir}
atoms = [] atoms = []
with open("clustering/outClustersPDB/outCluster0.pdb", "r") as ifile: with open("clustering/outClustersPDB/outCluster0.pdb", "r") as ifile:
for line in ifile.readlines(): for line in ifile.readlines():
if "ATOM" in line: if "ATOM" in line:
atoms.append(line[:26]) atoms.append(line[:26])
for i in clusters_ids: for i in clusters_ids:
hetatms = [] hetatms = []
!babel -ixyz {input_dir}{i}/outCluster{i}.xyz -opdb {output_dir}temp_cluster_{i}.pdb !babel -ixyz {input_dir}{i}/outCluster{i}.xyz -opdb {output_dir}temp_cluster_{i}.pdb
with open("{}temp_cluster_{}.pdb".format(output_dir, i), "r") as ifile: with open("{}temp_cluster_{}.pdb".format(output_dir, i), "r") as ifile:
for line in ifile.readlines(): for line in ifile.readlines():
if "HETATM" in line: if "HETATM" in line:
hetatms.append(line[27:66]) hetatms.append(line[27:66])
with open("pdb_opt/cluster{}.pdb".format(i), "w") as output_cluster: with open("pdb_opt/cluster{}.pdb".format(i), "w") as output_cluster:
for j in range(len(atoms)): for j in range(len(atoms)):
output_cluster.write(atoms[j]) output_cluster.write(atoms[j])
output_cluster.write(hetatms[j] + "\n") output_cluster.write(hetatms[j] + "\n")
!rm {output_dir}/temp_* !rm {output_dir}/temp_*
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
input_dir = "pdb_opt/" input_dir = "pdb_opt/"
output_file = "clusters.pdb" output_file = "clusters.pdb"
#concatinate optimised conformations to trajectory #concatinate optimised conformations to trajectory
with open(output_file, "w") as ofile: with open(output_file, "w") as ofile:
for i in clusters_ids: for i in clusters_ids:
ofile.write("MODEL {}\n".format(str(i))) ofile.write("MODEL {}\n".format(str(i)))
with open("{}cluster{}.pdb".format(input_dir, i), "r") as ifile: with open("{}cluster{}.pdb".format(input_dir, i), "r") as ifile:
ofile.write(ifile.read()) ofile.write(ifile.read())
ofile.write("ENDMDL\n") ofile.write("ENDMDL\n")
with open("plumed.dat", "w") as ifile: with open("plumed.dat", "w") as ifile:
cvs = [] cvs = []
num_atoms = sum(1 for line in open("{}cluster{}.pdb".format(input_dir, clusters_ids[0]))) num_atoms = sum(1 for line in open("{}cluster{}.pdb".format(input_dir, clusters_ids[0])))
ifile.write("WHOLEMOLECULES ENTITY0=1-{}\n".format(str(num_atoms))) ifile.write("WHOLEMOLECULES ENTITY0=1-{}\n".format(str(num_atoms)))
for i in range(0, len(torsions)): for i in range(0, len(torsions)):
cvs.append("cv{}".format(i)) cvs.append("cv{}".format(i))
ifile.write("TORSION ATOMS=") ifile.write("TORSION ATOMS=")
ifile.write(",".join(str(x) for x in torsions[i])) ifile.write(",".join(str(x) for x in torsions[i]))
ifile.write(" LABEL={}\n".format(cvs[i])) ifile.write(" LABEL={}\n".format(cvs[i]))
ifile.write("PRINT ARG=") ifile.write("PRINT ARG=")
ifile.write(",".join(cvs)) ifile.write(",".join(cvs))
ifile.write(" STRIDE=1 FILE=DIHEDRALS") ifile.write(" STRIDE=1 FILE=DIHEDRALS")
#compute dihedrals #compute dihedrals
gmx_run(f"driver --plumed plumed.dat --mf_pdb {output_file}") gmx_run(f"driver --plumed plumed.dat --mf_pdb {output_file}")
lines = [] lines = []
with open("DIHEDRALS", "r") as ifile: with open("DIHEDRALS", "r") as ifile:
for line in ifile.readlines(): for line in ifile.readlines():
if "#" not in line: if "#" not in line:
lines.append(line) lines.append(line)
with open("DIHEDRALS", "w") as ofile: with open("DIHEDRALS", "w") as ofile:
for line in lines: for line in lines:
ofile.write(line) ofile.write(line)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#perform minimisations and compute GAFF energy #perform minimisations and compute GAFF energy
cvs = [[] for _ in range(len(clusters_ids))] cvs = [[] for _ in range(len(clusters_ids))]
with open("DIHEDRALS","r") as ifile: with open("DIHEDRALS","r") as ifile:
dihedrals = ifile.readlines() dihedrals = ifile.readlines()
for i in range(len(dihedrals)): for i in range(len(dihedrals)):
t_angles = dihedrals[i].split() t_angles = dihedrals[i].split()
for j in range(len(torsions)): for j in range(len(torsions)):
cvs[i].append(float(t_angles[j+1])*(180/math.pi)) cvs[i].append(float(t_angles[j+1])*(180/math.pi))
def generate_restraint(cluster_i): def generate_restraint(cluster_i):
with open("MOL_GMX.top", "r") as ifile, open("gaff/cluster_{}/restrained.top".format(str(cluster_i)), "w") as ofile: with open("MOL_GMX.top", "r") as ifile, open("gaff/cluster_{}/restrained.top".format(str(cluster_i)), "w") as ofile:
for line in ifile.readlines(): for line in ifile.readlines():
if line == "; Ligand position restraints\n": if line == "; Ligand position restraints\n":
ofile.write("\n") ofile.write("\n")
ofile.write("[ dihedral_restraints ]\n") ofile.write("[ dihedral_restraints ]\n")
for j in range(len(torsions)): for j in range(len(torsions)):
ofile.write(" ".join(torsions[j])) ofile.write(" ".join(torsions[j]))
ofile.write("2 %3.1f 0 500\n" %cvs[i][j]) ofile.write("2 %3.1f 0 500\n" %cvs[i][j])
ofile.write(line) ofile.write(line)
!mkdir -p -m 757 gaff !mkdir -p -m 757 gaff
#select groups for energy evaluation #select groups for energy evaluation
groups = "10" groups = "10"
for i in tqdm(clusters_ids): for i in tqdm(clusters_ids):
!mkdir -p -m 757 gaff/cluster_{i} !mkdir -p -m 757 gaff/cluster_{i}
shutil.copy("pdb_opt/cluster{}.pdb".format(i), "gaff/cluster_{}".format(i)) shutil.copy("pdb_opt/cluster{}.pdb".format(i), "gaff/cluster_{}".format(i))
shutil.copy("MOL_GMX.top", "gaff/cluster_{}".format(i)) shutil.copy("MOL_GMX.top", "gaff/cluster_{}".format(i))
shutil.copy("em.mdp", "gaff/cluster_{}".format(i)) shutil.copy("em.mdp", "gaff/cluster_{}".format(i))
shutil.copy("md.mdp", "gaff/cluster_{}".format(i)) shutil.copy("md.mdp", "gaff/cluster_{}".format(i))
generate_restraint(i) generate_restraint(i)
gmx_run(f"editconf -f cluster{i}.pdb -box 3 3 3 -bt cubic -c -o box.gro", workdir=f"/gaff/cluster_{i}") gmx_run(f"editconf -f cluster{i}.pdb -box 3 3 3 -bt cubic -c -o box.gro", workdir=f"/gaff/cluster_{i}")
gmx_run("grompp -f em -c box.gro -p restrained.top -o em1", workdir=f"/gaff/cluster_{i}") gmx_run("grompp -f em -c box.gro -p restrained.top -o em1", workdir=f"/gaff/cluster_{i}")
gmx_run("mdrun -ntomp 2 -s em1 -c after_em1 -g em1 -e em1 -o em1", workdir=f"/gaff/cluster_{i}") gmx_run("mdrun -ntomp 2 -s em1 -c after_em1 -g em1 -e em1 -o em1", workdir=f"/gaff/cluster_{i}")
gmx_run("grompp -f md -c box.gro -p MOL_GMX.top -o rerun", workdir=f"/gaff/cluster_{i}") gmx_run("grompp -f md -c box.gro -p MOL_GMX.top -o rerun", workdir=f"/gaff/cluster_{i}")
gmx_run("mdrun -ntomp 2 -s rerun -rerun em1 -c after_rerun -g rerun -e rerun -o rerun", workdir=f"/gaff/cluster_{i}") gmx_run("mdrun -ntomp 2 -s rerun -rerun em1 -c after_rerun -g rerun -e rerun -o rerun", workdir=f"/gaff/cluster_{i}")
gmx_run("energy -f rerun.edr -o rerun.xvg", workdir=f"/gaff/cluster_{i}", groups=groups) gmx_run("energy -f rerun.edr -o rerun.xvg", workdir=f"/gaff/cluster_{i}", groups=groups)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#extract energies and from each energy value subtract minimal energy #extract energies and from each energy value subtract minimal energy
energies_lst = [] energies_lst = []
for i in clusters_ids: for i in clusters_ids:
with open("gaff/cluster_{}/rerun.xvg".format(i, "r")) as ifile: with open("gaff/cluster_{}/rerun.xvg".format(i, "r")) as ifile:
last_line = ifile.readlines()[-1] last_line = ifile.readlines()[-1]
energies = last_line.split(" ") energies = last_line.split(" ")
energies_lst.append(energies[len(energies) - 1].rstrip()) energies_lst.append(energies[len(energies) - 1].rstrip())
min_energy = min(energies_lst) min_energy = min(energies_lst)
with open("gaff_energies.txt", "w") as ofile: with open("gaff_energies.txt", "w") as ofile:
for energy in energies_lst: for energy in energies_lst:
ofile.write("{} \n".format((float(energy) - float(min_energy)))) ofile.write("{} \n".format((float(energy) - float(min_energy))))
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## 6. Define correction of force field ## 6. Define correction of force field
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#subtract from orca energies computed in step 5, GAFF energies computed in step 6 #subtract from orca energies computed in step 5, GAFF energies computed in step 6
with open("reference", "w") as ofile: with open("reference", "w") as ofile:
with open("orca_energies.txt", "r") as orca_energies, open("gaff_energies.txt", "r") as gaff_energies: with open("orca_energies.txt", "r") as orca_energies, open("gaff_energies.txt", "r") as gaff_energies:
for orca_energy in orca_energies.readlines(): for orca_energy in orca_energies.readlines():
for gaff_energy in gaff_energies.readlines(): for gaff_energy in gaff_energies.readlines():
ofile.write("{} \n".format((float(orca_energy) - float(gaff_energy)))) ofile.write("{} \n".format((float(orca_energy) - float(gaff_energy))))
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#write final corrections into output file along with representative conformations #write final corrections into output file along with representative conformations
input_dir = "pdb_opt/" input_dir = "pdb_opt/"
with open("reference.pdb", "w") as ofile, open("reference", "r") as ifile: with open("reference.pdb", "w") as ofile, open("reference", "r") as ifile:
final_energies = ifile.readlines() final_energies = ifile.readlines()
energy_index = 0 energy_index = 0
for i in clusters_ids: for i in clusters_ids:
ofile.write("REMARK X={}".format(final_energies[energy_index])) ofile.write("REMARK X={}".format(final_energies[energy_index]))
with open("{}cluster{}.pdb".format(input_dir, i), "r") as ifile1: with open("{}cluster{}.pdb".format(input_dir, i), "r") as ifile1:
ofile.writelines(ifile1.readlines()) ofile.writelines(ifile1.readlines())
energy_index += 1 energy_index += 1
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
x1 = [] x1 = []
y1 = [] y1 = []
with open("DIHEDRALS") as ifile: with open("DIHEDRALS") as ifile:
for line in ifile.readlines(): for line in ifile.readlines():
split_values = line.split() split_values = line.split()
x1.append(split_values[1]) x1.append(split_values[1])
y1.append(split_values[2]) y1.append(split_values[2])
plot_landmarks(x, y, x1, y1).show() plot_landmarks(x, y, x1, y1).show()
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment