Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
PMCV force field correction
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Vladimír Višňovský
PMCV force field correction
Commits
a3647db4
Commit
a3647db4
authored
May 17, 2021
by
Vladimír Višňovský
Browse files
Options
Downloads
Plain Diff
Merge branch 'k8s-development' into 'master'
added ) typo See merge request
!14
parents
f8aa9f18
1fe5c11a
Loading
Loading
2 merge requests
!15
Refactor run
,
!14
added ) typo
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
pipelineJupyter.ipynb
+1
-1
1 addition, 1 deletion
pipelineJupyter.ipynb
with
1 addition
and
1 deletion
pipelineJupyter.ipynb
+
1
−
1
View file @
a3647db4
...
...
@@ -807,7 +807,7 @@
"source": [
"#3. check output && discard non converging clusters\n",
"for cluster_id in clusters_ids:\n",
" with open(f\"orca_output/bp86tzvp/orca_output{cluster_id}.out\" as iorca_output:\n",
" with open(f\"orca_output/bp86tzvp/orca_output{cluster_id}.out\"
)
as iorca_output:\n",
" iorca_output_text = iorca_output.read()\n",
" if \"****ORCA TERMINATED NORMALLY****\" not in iorca_output_text:\n",
" !{orca_job_check} orca_output/bp86tzvp/orca_output{cluster_id}.out\n",
...
...
%% Cell type:markdown id: tags:
## 1. Definition of computational parameters
%% Cell type:code id: tags:
```
python
import
os
import
sys
import
shutil
import
re
import
math
import
time
import
subprocess
import
nglview
as
nv
import
pytraj
as
pt
#insert path to python scripts
sys
.
path
.
insert
(
1
,
"
/home/base/modules
"
)
sys
.
path
.
insert
(
1
,
"
/home/base/modules/gmx_orca
"
)
from
draw_2d
import
*
from
draw_3d
import
drawit
from
tqdm.notebook
import
tqdm
from
molvs
import
Standardizer
from
rdkit
import
Chem
from
rdkit.Chem
import
Draw
from
rdkit.Chem
import
SDWriter
from
rdkit.Chem
import
AllChem
from
rdkit.Chem
import
MolFromSmarts
from
rdkit.Chem
import
rdmolops
from
rdkit.Chem
import
rdchem
from
convert
import
convert_to_orca_methods
from
plot_graph
import
plot_landmarks
from
gmx_orca_run
import
gmx_run
,
orca_run
,
parallel_wait
from
replace
import
replace_text_for_embedding
```
%% Cell type:code id: tags:
```
python
#number of configurations to generate in basic energy minimization
numc
=
50
#path to applications
orca_job_check
=
"
/home/base/modules/orcajobcheck.py
"
xyz_to_mol
=
"
/home/base/modules/xyz2mol/xyz2mol.py
"
```
%% Cell type:markdown id: tags:
## 2. Molecule shape processing
%% Cell type:code id: tags:
```
python
#specify input of desired molecule in SMILES
smiles_molecule
=
"
CC1=C(C=C(C=C1)NC(=O)C2=CC=C(C=C2)C[NH+]3CCN(CC3)C)NC4=NC=CC(=N4)C5=CN=CC=C5
"
molecule
=
Chem
.
MolFromSmiles
(
smiles_molecule
)
render_svg
(
moltosvg
(
molecule
))
```
%% Cell type:code id: tags:
```
python
with
open
(
"
molekula.smi
"
,
"
w
"
)
as
smifile
:
smifile
.
write
(
smiles_molecule
)
s
=
Standardizer
()
molecule
=
s
.
standardize
(
molecule
)
molecule
=
Chem
.
AddHs
(
molecule
)
natoms
=
molecule
.
GetNumAtoms
()
charge
=
Chem
.
rdmolops
.
GetFormalCharge
(
molecule
)
render_svg
(
moltosvg
(
molecule
))
```
%% Cell type:code id: tags:
```
python
#set lowest energy configuration
#set default id of minimal energy conformer to -1
minid
=
-
1
#set default minimal energy to highest possible float number
minene
=
sys
.
float_info
.
max
AllChem
.
EmbedMultipleConfs
(
molecule
,
clearConfs
=
True
,
numConfs
=
numc
)
#run 'MMFF94 force field' energy evaluation
done
=
AllChem
.
MMFFOptimizeMoleculeConfs
(
molecule
)
for
i
in
range
(
len
(
done
)):
if
done
[
i
][
1
]
<
minene
:
minene
=
done
[
i
][
1
]
minid
=
i
print
(
f
'
Minimal energy:
{
minene
}
'
)
writer
=
SDWriter
(
"
molekula.mol
"
)
writer
.
write
(
molecule
,
confId
=
minid
)
drawit
(
molecule
)
```
%% Cell type:code id: tags:
```
python
#set pattern to detect torsion angles
RotatableBond
=
Chem
.
MolFromSmarts
(
'
[!$([NH]!@C(=O))&!D1&!$(*#*)&!$([C;H3])&!$([O;H1])&!$([N;H3])]-&!@[!$([NH]!@C(=O))&!D1&!$(*#*)&!$([C;H3])&!$([O;H1])&!$([N;H3])]
'
)
rotatables
=
molecule
.
GetSubstructMatches
(
RotatableBond
)
print
(
rotatables
)
```
%% Cell type:code id: tags:
```
python
#get numbers of atoms that form torsion angles
torsions
=
[]
for
rotatable
in
rotatables
:
pairs1
=
[]
pairs2
=
[]
for
bond
in
molecule
.
GetBonds
():
if
rotatable
[
0
]
==
bond
.
GetBeginAtomIdx
()
and
rotatable
[
1
]
!=
bond
.
GetEndAtomIdx
():
pairs1
.
append
([
bond
.
GetBeginAtomIdx
(),
bond
.
GetEndAtomIdx
()])
if
rotatable
[
1
]
==
bond
.
GetBeginAtomIdx
()
and
rotatable
[
0
]
!=
bond
.
GetEndAtomIdx
():
pairs2
.
append
([
bond
.
GetBeginAtomIdx
(),
bond
.
GetEndAtomIdx
()])
torsions
.
append
([
pairs1
[
0
][
1
],
pairs1
[
0
][
0
],
pairs2
[
0
][
0
],
pairs2
[
0
][
1
]])
print
(
torsions
)
```
%% Cell type:markdown id: tags:
## 3. Preparation of environment and molecule
%% Cell type:code id: tags:
```
python
# prepare input files for energetic minimization
netcharge
=
rdmolops
.
GetFormalCharge
(
molecule
)
!
antechamber
-
i
molekula
.
mol
-
fi
mdl
-
o
molekula
.
prepi
-
fo
prepi
-
c
bcc
-
nc
{
netcharge
}
&&
\
parmchk2
-
i
molekula
.
prepi
-
f
prepi
-
o
molekula
.
frcmod
&&
\
tleap
-
f
tleapin
.
txt
&&
\
acpype
-
p
molekula
.
prmtop
-
x
molekula
.
inpcrd
```
%% Cell type:code id: tags:
```
python
#fix ordering of atoms
order_before
=
[]
order_after
=
[]
with
open
(
'
sqm.pdb
'
,
'
r
'
)
as
pdbfile
:
for
atom
in
pdbfile
.
readlines
():
order_before
.
append
(
atom
.
split
()[
2
])
with
open
(
'
MOL_GMX.gro
'
,
'
r
'
)
as
grofile
:
for
atom
in
grofile
.
readlines
():
if
atom
.
startswith
(
'
1 MOL
'
):
order_after
.
append
(
atom
.
split
()[
2
])
torsions_new
=
[]
torsion_new
=
[]
for
torsion
in
torsions
:
for
i
in
torsion
:
torsion_new
.
append
(
order_after
.
index
(
order_before
[
i
])
+
1
)
torsions_new
.
append
(
torsion_new
)
torsion_new
=
[]
torsions
=
torsions_new
print
(
torsions
)
```
%% Cell type:code id: tags:
```
python
!
mkdir
-
p
em
with
open
(
"
em/em.mdp
"
,
"
w
"
)
as
emfile
:
emfile
.
write
(
"
integrator = steep
\n
"
)
emfile
.
write
(
"
nsteps = 100000
\n
"
)
emfile
.
write
(
"
emtol = 0
\n
"
)
emfile
.
write
(
"
emstep = 0.1
\n
"
)
emfile
.
write
(
"
nstcomm = 1
\n
"
)
emfile
.
write
(
"
nstxout = 100
\n
"
)
emfile
.
write
(
"
nstvout = 100
\n
"
)
emfile
.
write
(
"
nstfout = 0
\n
"
)
emfile
.
write
(
"
nstlog = 100
\n
"
)
emfile
.
write
(
"
nstenergy = 100
\n
"
)
emfile
.
write
(
"
nstlist = 1
\n
"
)
emfile
.
write
(
"
ns_type = grid
\n
"
)
emfile
.
write
(
"
coulombtype = cut-off
\n
"
)
emfile
.
write
(
"
rlist = 1.4
\n
"
)
emfile
.
write
(
"
rcoulomb = 1.4
\n
"
)
emfile
.
write
(
"
rvdw = 1.4
\n
"
)
emfile
.
write
(
"
energygrps = System
\n
"
)
emfile
.
write
(
"
epsilon-r = 80
\n
"
)
emfile
.
write
(
"
\n
"
)
shutil
.
copy
(
"
MOL_GMX.gro
"
,
"
em/
"
)
shutil
.
copy
(
"
MOL_GMX.top
"
,
"
em/
"
)
```
%% Cell type:code id: tags:
```
python
gmx_run
(
"
editconf -f MOL_GMX -o box -c -box 3 3 3
"
,
workdir
=
"
em
"
)
gmx_run
(
"
grompp -f em.mdp -c box -p MOL_GMX -o em1
"
,
workdir
=
"
em
"
)
gmx_run
(
"
mdrun -deffnm em1 -ntomp 2
"
,
workdir
=
"
em
"
)
```
%% Cell type:code id: tags:
```
python
#prepare files for getting the molecule to dynamic state
!
mkdir
-
p
md
with
open
(
"
md/md.mdp
"
,
"
w
"
)
as
mdfile
:
mdfile
.
write
(
"
integrator = sd
\n
"
)
mdfile
.
write
(
"
nsteps = 100000
\n
"
)
mdfile
.
write
(
"
dt = 0.001
\n
"
)
mdfile
.
write
(
"
nstxout = 1000
\n
"
)
mdfile
.
write
(
"
nstvout = 1000
\n
"
)
mdfile
.
write
(
"
nstenergy = 1000
\n
"
)
mdfile
.
write
(
"
nstlog = 1000
\n
"
)
mdfile
.
write
(
"
continuation = no
\n
"
)
mdfile
.
write
(
"
constraints = none
\n
"
)
mdfile
.
write
(
"
cutoff-scheme = Verlet
\n
"
)
mdfile
.
write
(
"
ns_type = grid
\n
"
)
mdfile
.
write
(
"
nstlist = 1
\n
"
)
mdfile
.
write
(
"
rlist = 1.4
\n
"
)
mdfile
.
write
(
"
rcoulomb = 1.4
\n
"
)
mdfile
.
write
(
"
rvdw = 1.4
\n
"
)
mdfile
.
write
(
"
coulombtype = cut-off
\n
"
)
mdfile
.
write
(
"
tcoupl = V-rescale
\n
"
)
mdfile
.
write
(
"
tc-grps = system
\n
"
)
mdfile
.
write
(
"
tau_t = 0.1
\n
"
)
mdfile
.
write
(
"
ref_t = 300
\n
"
)
mdfile
.
write
(
"
pcoupl = no
\n
"
)
mdfile
.
write
(
"
pbc = xyz
\n
"
)
mdfile
.
write
(
"
gen_vel = yes
\n
"
)
mdfile
.
write
(
"
epsilon-r = 80
\n
"
)
mdfile
.
write
(
"
\n
"
)
shutil
.
copy
(
"
em/em1.gro
"
,
"
md/
"
)
shutil
.
copy
(
"
MOL_GMX.top
"
,
"
md/
"
)
```
%% Cell type:code id: tags:
```
python
gmx_run
(
"
grompp -f md.mdp -c em1 -p MOL_GMX -o md1
"
,
workdir
=
"
md
"
)
gmx_run
(
"
mdrun -deffnm md1 -ntomp 2
"
,
workdir
=
"
md
"
)
```
%% Cell type:code id: tags:
```
python
#show trajectory of conformations
#select group for trjconv evaluation
#Group 0 ( System)
#Group 1 ( Other)
#Group 2 ( MOL)
group
=
"
0
"
gmx_run
(
"
trjconv -s md1.tpr -f md1.trr -o outTraj.pdb
"
,
workdir
=
"
md
"
,
groups
=
group
)
traj
=
pt
.
load
(
'
md/outTraj.pdb
'
)
view
=
nv
.
show_pytraj
(
traj
)
view
```
%% Cell type:code id: tags:
```
python
#fix periodic boundaries errors when the molecule jumps out of the box
#select group for trjconv evaluation
#Group 0 ( System)
#Group 1 ( Other)
#Group 2 ( MOL)
group
=
"
1
"
for
i
in
tqdm
(
range
(
len
(
torsions
))):
fr
=
str
(
float
(
100
-
len
(
torsions
)
+
i
)
-
0.01
)
to
=
str
(
float
(
100
-
len
(
torsions
)
+
i
)
+
0.01
)
gmx_run
(
f
"
trjconv -pbc nojump -s md1 -f md1 -o frame
{
i
}
.gro -b
{
fr
}
-e
{
to
}
<<EOF
\n
0
\n
EOF
"
,
workdir
=
"
md
"
,
groups
=
group
)
```
%% Cell type:markdown id: tags:
## 4. Generation of representative configurations
### 4.1 Trajectory generation
%% Cell type:code id: tags:
```
python
!
mkdir
-
p
mtd
with
open
(
"
mtd/mtd.mdp
"
,
"
w
"
)
as
mtdfile
:
mtdfile
.
write
(
"
integrator = sd
\n
"
)
mtdfile
.
write
(
"
nsteps = 10000000
\n
"
)
#10 ns each replica
mtdfile
.
write
(
"
dt = 0.001
\n
"
)
mtdfile
.
write
(
"
nstxout = 10000
\n
"
)
# output coordinates every 10ps to *.trr
mtdfile
.
write
(
"
nstvout = 10000
\n
"
)
# output velocities every 10ps to *.trr
mtdfile
.
write
(
"
nstenergy = 1000
\n
"
)
mtdfile
.
write
(
"
nstlog = 1000
\n
"
)
mtdfile
.
write
(
"
continuation = no
\n
"
)
mtdfile
.
write
(
"
constraints = none
\n
"
)
mtdfile
.
write
(
"
cutoff-scheme = Verlet
\n
"
)
mtdfile
.
write
(
"
ns_type = grid
\n
"
)
mtdfile
.
write
(
"
nstlist = 1
\n
"
)
mtdfile
.
write
(
"
rlist = 1.4
\n
"
)
mtdfile
.
write
(
"
rcoulomb = 1.4
\n
"
)
mtdfile
.
write
(
"
rvdw = 1.4
\n
"
)
mtdfile
.
write
(
"
coulombtype = cut-off
\n
"
)
mtdfile
.
write
(
"
tcoupl = V-rescale
\n
"
)
mtdfile
.
write
(
"
tc-grps = system
\n
"
)
mtdfile
.
write
(
"
tau_t = 0.1
\n
"
)
mtdfile
.
write
(
"
ref_t = 300
\n
"
)
mtdfile
.
write
(
"
pcoupl = no
\n
"
)
mtdfile
.
write
(
"
pbc = xyz
\n
"
)
mtdfile
.
write
(
"
gen_vel = yes
\n
"
)
mtdfile
.
write
(
"
epsilon-r = 80
\n
"
)
mtdfile
.
write
(
"
\n
"
)
```
%% Cell type:code id: tags:
```
python
for
i
in
range
(
len
(
torsions
)):
if
not
os
.
path
.
exists
(
"
mtd/w{}
"
.
format
(
i
)):
!
mkdir
-
p
mtd
/
w
{
i
}
with
open
(
"
mtd/w{}/plumed.dat
"
.
format
(
i
),
"
w
"
)
as
plumeddat
:
plumeddat
.
write
(
"
RANDOM_EXCHANGES
\n
"
)
plumeddat
.
write
(
"
WHOLEMOLECULES ENTITY0=1-{}
\n
"
.
format
(
natoms
))
for
j
in
range
(
len
(
torsions
)):
plumeddat
.
write
(
"
TORSION ATOMS={},{},{},{} LABEL=cv{}
\n
"
.
format
(
torsions
[
j
][
0
],
torsions
[
j
][
1
],
torsions
[
j
][
2
],
torsions
[
j
][
3
],
j
+
1
))
plumeddat
.
write
(
"
METAD ARG=cv{} HEIGHT=0.5 SIGMA=0.3 PACE=1000 GRID_MIN=-pi GRID_MAX=pi BIASFACTOR=15 LABEL=be
\n
"
.
format
(
i
+
1
))
cvs
=
""
for
j
in
range
(
len
(
torsions
)):
cvs
=
cvs
+
"
cv{},
"
.
format
(
j
+
1
)
cvs
=
cvs
[:
-
1
]
plumeddat
.
write
(
"
PRINT ARG={} STRIDE=1000 FILE=COLVAR
\n
"
.
format
(
cvs
))
plumeddat
.
write
(
"
PRINT ARG=be.bias STRIDE=1000 FILE=BIAS
\n
"
)
shutil
.
copy
(
"
MOL_GMX.top
"
,
"
mtd/
"
)
```
%% Cell type:code id: tags:
```
python
for
i
in
tqdm
(
range
(
len
(
torsions
))):
shutil
.
copy
(
"
md/frame{}.gro
"
.
format
(
i
),
"
mtd/w{}/
"
.
format
(
i
))
gmx_run
(
f
"
grompp -f mtd.mdp -c w
{
i
}
/frame
{
i
}
-p MOL_GMX -o w
{
i
}
/mtd1
"
,
workdir
=
"
mtd
"
,
parallel
=
True
)
parallel_wait
()
```
%% Cell type:code id: tags:
```
python
directories
=
""
for
i
in
range
(
len
(
torsions
)):
directories
=
directories
+
"
w{}
"
.
format
(
i
)
#see mdrunlog in mtd directory for insight to mdrun simulation (e.g 'tail -f mdrunlog')
gmx_run
(
f
"
mdrun -g mdrunlog -ntomp 1 -deffnm mtd1 -replex 500 -plumed plumed.dat -multidir
{
directories
}
"
,
workdir
=
"
mtd
"
,
mpi_run
=
len
(
torsions
))
```
%% Cell type:markdown id: tags:
### 4.2 Configurations clustering
%% Cell type:code id: tags:
```
python
#select groups for cluster evaluation
#Group 0 ( System)
#Group 1 ( Other)
#Group 2 ( MOL)
#Group 3 ( Custom)
groups
=
"
30
"
!
mkdir
-
p
-
m
757
clustering
;
mkdir
-
p
-
m
757
clustering
/
outClustersPDB
;
mkdir
-
p
-
m
757
clustering
/
outClustersXYZ
;
mkdir
-
p
-
m
757
clustering
/
outClusters
trajectories
=
""
for
i
in
range
(
len
(
torsions
)):
trajectories
=
trajectories
+
f
"
mtd/w
{
i
}
/mtd1.trr
"
#concatinate trajectories
gmx_run
(
f
"
trjcat -f
{
trajectories
}
-cat -o mtd/mtd1.trr
"
)
#make index file with non-hydrogen atoms, use 1 & ! a H* and q for input
gmx_run
(
"
make_ndx -f md/md1.tpr -o mtd/index.ndx
"
,
make_ndx
=
"
1&!aH*
"
)
#include index file and select index group 3
gmx_run
(
"
cluster -method gromos -f mtd/mtd1.trr -s mtd/w0/mtd1.tpr -n mtd/index.ndx -cutoff 0.15 -cl clustering/outCluster.pdb
"
,
groups
=
groups
)
#divide all clusters from gmx cluster to single clusters
with
open
(
"
clustering/outCluster.pdb
"
)
as
input_cluster
:
i
=
0
outFile
=
open
(
"
clustering/outClustersPDB/outCluster{}.pdb
"
.
format
(
i
),
"
w
"
)
for
line
in
input_cluster
:
if
line
!=
"
ENDMDL
\n
"
:
outFile
.
write
(
line
)
continue
outFile
.
write
(
"
ENDMDL
\n
"
)
i
+=
1
outFile
.
close
()
outFile
=
open
(
"
clustering/outClustersPDB/outCluster{}.pdb
"
.
format
(
i
),
"
w
"
)
!
rm
clustering
/
outClustersPDB
/
outCluster
{
i
}.
pdb
clusters_ids
=
[
x
for
x
in
range
(
0
,
i
)]
#convert .pdb clusters to .xyz
for
pdb_cluster
in
tqdm
(
os
.
listdir
(
"
clustering/outClustersPDB/
"
)):
!
babel
-
ipdb
clustering
/
outClustersPDB
/
{
pdb_cluster
}
-
oxyz
clustering
/
outClustersXYZ
/
{
pdb_cluster
.
replace
(
"
pdb
"
,
"
xyz
"
)}
```
%% Cell type:markdown id: tags:
### 4.3 Visualize landmarks
%% Cell type:code id: tags:
```
python
#apply periodic boundary conditions to mtd trajectory -> fitting -> no Hydrogen -> train parmtSNEcv -> compute embeddings
!
mkdir
-
p
visualization
/
traj
#Group 0 ( System)
#Group 1 ( Other)
#Group 2 ( MOL)
#Group 3 ( Custom)
#select group for periodic boundaries check output:
group
=
"
0
"
gmx_run
(
"
trjconv -f mtd/mtd1.trr -s mtd/w0/mtd1.tpr -pbc mol -o visualization/traj/mtd1_nopbc.xtc
"
,
groups
=
group
)
#select groups for fitting and output:
groups
=
"
00
"
gmx_run
(
"
trjconv -f visualization/traj/mtd1_nopbc.xtc -s clustering/outClustersPDB/outCluster0.pdb -fit rot+trans -o visualization/traj/mtd1_fit.xtc
"
,
groups
=
groups
)
#select group for no Hydrogen output:
group
=
"
3
"
gmx_run
(
"
trjconv -f visualization/traj/mtd1_fit.xtc -n mtd/index.ndx -o visualization/traj/mtd1_fit_noH.xtc
"
,
groups
=
group
)
#select groups for size of box and output:
groups
=
"
33
"
gmx_run
(
"
editconf -f clustering/outClustersPDB/outCluster0.pdb -n mtd/index.ndx -box 3 3 3 -c -o visualization/ref.pdb
"
,
groups
=
groups
)
#train parmtSNEcv
!
parmtSNEcv
-
i
visualization
/
traj
/
mtd1_fit_noH
.
xtc
-
p
visualization
/
ref
.
pdb
-
boxx
3
-
boxy
3
-
boxz
3
-
dim
2
-
layers
2
-
o
visualization
/
out
.
txt
-
plumed
visualization
/
plumed
.
dat
-
epochs
2000
#modify plumed.dat to compute embedding in every step and change name of file for *** mtd trajectory ***
replace_text_for_embedding
(
"
visualization/plumed.dat
"
,
stride
=
(
100
,
1
),
file_name
=
(
"
COLVAR
"
,
"
2d_embedding
"
))
#fix weights of atoms
replace_text_for_embedding
(
"
visualization/ref.pdb
"
,
weight
=
(
0.00
,
1.00
))
#run as plumed
gmx_run
(
"
driver --plumed visualization/plumed.dat --mf_xtc visualization/traj/mtd1_fit.xtc
"
)
#modify plumed.dat to compute embedding in every step and change name of file for *** representatives ***
replace_text_for_embedding
(
"
visualization/plumed.dat
"
,
file_name
=
(
"
2d_embedding
"
,
"
landmarks
"
))
#run as plumed
gmx_run
(
"
driver --plumed visualization/plumed.dat --mf_pdb clustering/outCluster.pdb
"
)
```
%% Cell type:code id: tags:
```
python
#visualize
x
=
[]
y
=
[]
x1
=
[]
y1
=
[]
with
open
(
"
2d_embedding
"
,
"
r
"
)
as
ifile
:
for
line
in
ifile
.
readlines
()[
1
:]:
split_values
=
line
.
split
()
x
.
append
(
split_values
[
1
])
y
.
append
(
split_values
[
2
])
with
open
(
"
landmarks
"
,
"
r
"
)
as
ifile
:
for
line
in
ifile
.
readlines
()[
1
:]:
split_values
=
line
.
split
()
x1
.
append
(
split_values
[
1
])
y1
.
append
(
split_values
[
2
])
plot_landmarks
(
x
,
y
,
x1
,
y1
).
show
()
```
%% Cell type:markdown id: tags:
## 5. Accurate and innacurate energy evaluation
### 5.1 AM1 method
%% Cell type:code id: tags:
```
python
#fix indexing
#orca indexes from zero
orca_torsions
=
[[]
for
_
in
range
(
len
(
torsions
))]
torsion_index
=
0
for
torsion
in
torsions
:
for
atom_index
in
torsion
:
orca_torsions
[
torsion_index
].
append
(
atom_index
-
1
)
torsion_index
+=
1
```
%% Cell type:code id: tags:
```
python
#1. convert
#orca method description - first line in orca method
method_description
=
"
!AM1 Opt
"
input_dir
=
"
clustering/outClustersXYZ/
"
output_dir
=
"
am1/input/
"
spin
=
1
!
mkdir
-
p
-
m
757
am1
;
mkdir
-
p
-
m
757
am1
/
input
;
mkdir
-
p
-
m
757
am1
/
output
!
mkdir
-
p
-
m
757
orca_output
;
mkdir
-
p
-
m
757
orca_output
/
am1
;
mkdir
-
p
-
m
757
orca_output
/
bp86svp
;
mkdir
-
p
-
m
757
orca_output
/
bp86tzvp
convert_to_orca_methods
(
input_dir
,
output_dir
,
orca_torsions
,
method_description
,
-
1
,
(
charge
,
spin
))
```
%% Cell type:code id: tags:
```
python
#2. compute
#minimisation of conformations before quantum mechanics
#see logs in orca_output directory...
input_path
=
"
am1/input/
"
output_path
=
"
am1/output/
"
successfuly_converged
=
0
clusters_count
=
len
(
clusters_ids
)
for
orca_method
in
tqdm
(
os
.
listdir
(
input_path
)):
if
not
orca_method
.
endswith
(
"
.inp
"
):
continue
orca_method_name
=
orca_method
.
replace
(
"
.inp
"
,
""
)
orca_method_number
=
orca_method_name
[
len
(
"
outCluster
"
):]
output_dir_path
=
output_path
+
orca_method_name
+
"
/
"
if
not
os
.
path
.
exists
(
output_dir_path
):
!
mkdir
-
p
-
m
757
{
output_dir_path
}
shutil
.
copy
(
input_path
+
orca_method
,
output_dir_path
)
orca_run
(
orca_method
,
f
"
orca_output/am1/orca_output
{
orca_method_number
}
.out
"
,
workdir
=
output_dir_path
,
parallel
=
True
)
parallel_wait
()
```
%% Cell type:code id: tags:
```
python
#3. check output
#check output of orca methods
for
cluster_id
in
clusters_ids
:
with
open
(
f
"
orca_output/am1/orca_output
{
cluster_id
}
.out
"
)
as
iorca_output
:
iorca_output_text
=
iorca_output
.
read
()
if
"
****ORCA TERMINATED NORMALLY****
"
not
in
iorca_output_text
:
print
(
iorca_output_text
)
raise
SystemExit
(
f
"
Error in am1 method of cluster no.
{
cluster_id
}
. You should not continue computation!
"
)
```
%% Cell type:markdown id: tags:
### 5.2 BP86 SVP method
%% Cell type:code id: tags:
```
python
#1. convert
#orca method description - first line in orca method
method_description
=
"
!BP86 def2-SVP TightSCF Opt
"
output_dir
=
"
bp86svp/input/
"
spin
=
1
required_cpus
=
12
#orca method description - first line in orca method
method_description
=
"
!BP86 def2-SVP TightSCF Opt
"
!
mkdir
-
p
-
m
757
bp86svp
;
mkdir
-
p
-
m
757
bp86svp
/
input
;
mkdir
-
p
-
m
757
bp86svp
/
output
for
i
in
clusters_ids
:
input_dir
=
"
am1/output/outCluster{}/
"
.
format
(
i
)
convert_to_orca_methods
(
input_dir
,
output_dir
,
orca_torsions
,
method_description
,
required_cpus
,
(
charge
,
spin
))
```
%% Cell type:code id: tags:
```
python
#2. compute
#minimisation of conformations before quantum mechanics
#see logs in orca_output directory...
input_path
=
"
bp86svp/input/
"
output_path
=
"
bp86svp/output/
"
successfuly_converged
=
0
clusters_count
=
len
(
clusters_ids
)
for
orca_method
in
tqdm
(
os
.
listdir
(
input_path
)):
if
not
orca_method
.
endswith
(
"
.inp
"
):
continue
orca_method_name
=
orca_method
.
replace
(
"
.inp
"
,
""
)
orca_method_number
=
orca_method_name
[
len
(
"
outCluster
"
):]
output_dir_path
=
output_path
+
orca_method_name
+
"
/
"
if
not
os
.
path
.
exists
(
output_dir_path
):
!
mkdir
-
p
-
m
757
{
output_dir_path
}
shutil
.
copy
(
input_path
+
orca_method
,
output_dir_path
)
orca_run
(
orca_method
,
f
"
orca_output/bp86svp/orca_output
{
orca_method_number
}
.out
"
,
workdir
=
output_dir_path
,
parallel
=
True
)
parallel_wait
()
```
%% Cell type:code id: tags:
```
python
#3. check output && discard non converging clusters
for
cluster_id
in
clusters_ids
:
with
open
(
f
"
orca_output/bp86svp/orca_output
{
cluster_id
}
.out
"
)
as
iorca_output
:
iorca_output_text
=
iorca_output
.
read
()
if
"
****ORCA TERMINATED NORMALLY****
"
not
in
iorca_output_text
:
!
{
orca_job_check
}
orca_output
/
bp86svp
/
orca_output
{
cluster_id
}.
out
clusters_ids
.
remove
(
int
(
orca_method_number
))
else
:
successfuly_converged
+=
1
print
(
f
"
**
{
successfuly_converged
}
/
{
clusters_count
}
successfuly converged - unconverged are not considered in next steps **
"
)
```
%% Cell type:markdown id: tags:
### 5.3 BP86 TZVP method
%% Cell type:code id: tags:
```
python
#1. convert
#orca method description - first line in orca method
method_description
=
"
!BP86 def2-TZVP TightSCF Opt
"
output_dir
=
"
bp86tzvp/input/
"
spin
=
1
required_cpus
=
12
#orca method description - first line in orca method
method_description
=
"
!BP86 def2-TZVP TightSCF Opt
"
!
mkdir
-
p
-
m
757
bp86tzvp
;
mkdir
-
p
-
m
757
bp86tzvp
/
input
;
mkdir
-
p
-
m
757
bp86tzvp
/
output
for
i
in
clusters_ids
:
input_dir
=
"
bp86svp/output/outCluster{}/
"
.
format
(
i
)
convert_to_orca_methods
(
input_dir
,
output_dir
,
orca_torsions
,
method_description
,
required_cpus
,
(
charge
,
spin
))
```
%% Cell type:code id: tags:
```
python
#2. compute
#run quantum mechanics
#see logs in orca_output directory...
input_path
=
"
bp86tzvp/input/
"
output_path
=
"
bp86tzvp/output/
"
successfuly_converged
=
0
clusters_count
=
len
(
clusters_ids
)
for
orca_method
in
tqdm
(
os
.
listdir
(
input_path
)):
if
not
orca_method
.
endswith
(
"
.inp
"
):
continue
orca_method_name
=
orca_method
.
replace
(
"
.inp
"
,
""
)
orca_method_number
=
orca_method_name
[
len
(
"
outCluster
"
):]
output_dir_path
=
output_path
+
orca_method_name
+
"
/
"
if
not
os
.
path
.
exists
(
output_dir_path
):
!
mkdir
-
p
-
m
757
{
output_dir_path
}
shutil
.
copy
(
input_path
+
orca_method
,
output_dir_path
)
orca_run
(
orca_method
,
f
"
orca_output/bp86tzvp/orca_output
{
orca_method_number
}
.out
"
,
workdir
=
output_dir_path
,
parallel
=
True
)
parallel_wait
()
```
%% Cell type:code id: tags:
```
python
#3. check output && discard non converging clusters
for
cluster_id
in
clusters_ids
:
with
open
(
f
"
orca_output/bp86tzvp/orca_output
{
cluster_id
}
.out
"
as
iorca_output
:
with
open
(
f
"
orca_output/bp86tzvp/orca_output
{
cluster_id
}
.out
"
)
as
iorca_output
:
iorca_output_text
=
iorca_output
.
read
()
if
"
****ORCA TERMINATED NORMALLY****
"
not
in
iorca_output_text
:
!
{
orca_job_check
}
orca_output
/
bp86tzvp
/
orca_output
{
cluster_id
}.
out
clusters_ids
.
remove
(
int
(
cluster_id
))
else
:
successfuly_converged
+=
1
print
(
f
"
**
{
successfuly_converged
}
/
{
clusters_count
}
successfuly converged - unconverged are not considered in next steps **
"
)
```
%% Cell type:code id: tags:
```
python
#extract final energies from output of orca
with
open
(
"
orca_energies.txt
"
,
"
a
"
)
as
ofile
:
for
orca_output
in
os
.
listdir
(
"
orca_output/bp86tzvp
"
):
with
open
(
"
orca_output/bp86tzvp/
"
+
orca_output
)
as
iorca_output
:
for
line
in
reversed
(
list
(
iorca_output
)):
energy_list
=
re
.
findall
(
r
'
(FINAL SINGLE POINT ENERGY)( +)(-?\d+\.\d+)
'
,
line
)
if
len
(
energy_list
)
>
0
:
ofile
.
write
(
energy_list
[
0
][
2
])
ofile
.
write
(
'
\n
'
)
break
```
%% Cell type:code id: tags:
```
python
#convert from hartree to kJ/mol and write quantum mechanics energies to file
energies_in_hartree
=
[]
with
open
(
"
orca_energies.txt
"
,
"
r
"
)
as
ifile
:
for
line
in
ifile
.
readlines
():
energies_in_hartree
.
append
(
float
(
line
))
#constant to convert from hartree to kJ/mol
CONVERSION_CONST
=
2625.499638
minimum
=
min
(
energies_in_hartree
)
energies_in_kJ
=
[]
for
H
in
energies_in_hartree
:
energies_in_kJ
.
append
((
H
-
minimum
)
*
CONVERSION_CONST
)
with
open
(
"
orca_energies.txt
"
,
"
w
"
)
as
ofile
:
for
energy
in
energies_in_kJ
:
ofile
.
write
(
"
{}
\n
"
.
format
(
energy
))
```
%% Cell type:code id: tags:
```
python
#copy atoms from conformation before QM and combine it with orca optimised conformations
output_dir
=
"
pdb_opt/
"
input_dir
=
"
bp86tzvp/output/outCluster
"
!
mkdir
-
p
{
output_dir
}
atoms
=
[]
with
open
(
"
clustering/outClustersPDB/outCluster0.pdb
"
,
"
r
"
)
as
ifile
:
for
line
in
ifile
.
readlines
():
if
"
ATOM
"
in
line
:
atoms
.
append
(
line
[:
26
])
for
i
in
clusters_ids
:
hetatms
=
[]
!
babel
-
ixyz
{
input_dir
}{
i
}
/
outCluster
{
i
}.
xyz
-
opdb
{
output_dir
}
temp_cluster_
{
i
}.
pdb
with
open
(
"
{}temp_cluster_{}.pdb
"
.
format
(
output_dir
,
i
),
"
r
"
)
as
ifile
:
for
line
in
ifile
.
readlines
():
if
"
HETATM
"
in
line
:
hetatms
.
append
(
line
[
27
:
66
])
with
open
(
"
pdb_opt/cluster{}.pdb
"
.
format
(
i
),
"
w
"
)
as
output_cluster
:
for
j
in
range
(
len
(
atoms
)):
output_cluster
.
write
(
atoms
[
j
])
output_cluster
.
write
(
hetatms
[
j
]
+
"
\n
"
)
!
rm
{
output_dir
}
/
temp_
*
```
%% Cell type:code id: tags:
```
python
input_dir
=
"
pdb_opt/
"
output_file
=
"
clusters.pdb
"
#concatinate optimised conformations to trajectory
with
open
(
output_file
,
"
w
"
)
as
ofile
:
for
i
in
clusters_ids
:
ofile
.
write
(
"
MODEL {}
\n
"
.
format
(
str
(
i
)))
with
open
(
"
{}cluster{}.pdb
"
.
format
(
input_dir
,
i
),
"
r
"
)
as
ifile
:
ofile
.
write
(
ifile
.
read
())
ofile
.
write
(
"
ENDMDL
\n
"
)
with
open
(
"
plumed.dat
"
,
"
w
"
)
as
ifile
:
cvs
=
[]
num_atoms
=
sum
(
1
for
line
in
open
(
"
{}cluster{}.pdb
"
.
format
(
input_dir
,
clusters_ids
[
0
])))
ifile
.
write
(
"
WHOLEMOLECULES ENTITY0=1-{}
\n
"
.
format
(
str
(
num_atoms
)))
for
i
in
range
(
0
,
len
(
torsions
)):
cvs
.
append
(
"
cv{}
"
.
format
(
i
))
ifile
.
write
(
"
TORSION ATOMS=
"
)
ifile
.
write
(
"
,
"
.
join
(
str
(
x
)
for
x
in
torsions
[
i
]))
ifile
.
write
(
"
LABEL={}
\n
"
.
format
(
cvs
[
i
]))
ifile
.
write
(
"
PRINT ARG=
"
)
ifile
.
write
(
"
,
"
.
join
(
cvs
))
ifile
.
write
(
"
STRIDE=1 FILE=DIHEDRALS
"
)
#compute dihedrals
gmx_run
(
f
"
driver --plumed plumed.dat --mf_pdb
{
output_file
}
"
)
lines
=
[]
with
open
(
"
DIHEDRALS
"
,
"
r
"
)
as
ifile
:
for
line
in
ifile
.
readlines
():
if
"
#
"
not
in
line
:
lines
.
append
(
line
)
with
open
(
"
DIHEDRALS
"
,
"
w
"
)
as
ofile
:
for
line
in
lines
:
ofile
.
write
(
line
)
```
%% Cell type:code id: tags:
```
python
#perform minimisations and compute GAFF energy
cvs
=
[[]
for
_
in
range
(
len
(
clusters_ids
))]
with
open
(
"
DIHEDRALS
"
,
"
r
"
)
as
ifile
:
dihedrals
=
ifile
.
readlines
()
for
i
in
range
(
len
(
dihedrals
)):
t_angles
=
dihedrals
[
i
].
split
()
for
j
in
range
(
len
(
torsions
)):
cvs
[
i
].
append
(
float
(
t_angles
[
j
+
1
])
*
(
180
/
math
.
pi
))
def
generate_restraint
(
cluster_i
):
with
open
(
"
MOL_GMX.top
"
,
"
r
"
)
as
ifile
,
open
(
"
gaff/cluster_{}/restrained.top
"
.
format
(
str
(
cluster_i
)),
"
w
"
)
as
ofile
:
for
line
in
ifile
.
readlines
():
if
line
==
"
; Ligand position restraints
\n
"
:
ofile
.
write
(
"
\n
"
)
ofile
.
write
(
"
[ dihedral_restraints ]
\n
"
)
for
j
in
range
(
len
(
torsions
)):
ofile
.
write
(
"
"
.
join
(
torsions
[
j
]))
ofile
.
write
(
"
2 %3.1f 0 500
\n
"
%
cvs
[
i
][
j
])
ofile
.
write
(
line
)
!
mkdir
-
p
-
m
757
gaff
#select groups for energy evaluation
groups
=
"
10
"
for
i
in
tqdm
(
clusters_ids
):
!
mkdir
-
p
-
m
757
gaff
/
cluster_
{
i
}
shutil
.
copy
(
"
pdb_opt/cluster{}.pdb
"
.
format
(
i
),
"
gaff/cluster_{}
"
.
format
(
i
))
shutil
.
copy
(
"
MOL_GMX.top
"
,
"
gaff/cluster_{}
"
.
format
(
i
))
shutil
.
copy
(
"
em.mdp
"
,
"
gaff/cluster_{}
"
.
format
(
i
))
shutil
.
copy
(
"
md.mdp
"
,
"
gaff/cluster_{}
"
.
format
(
i
))
generate_restraint
(
i
)
gmx_run
(
f
"
editconf -f cluster
{
i
}
.pdb -box 3 3 3 -bt cubic -c -o box.gro
"
,
workdir
=
f
"
/gaff/cluster_
{
i
}
"
)
gmx_run
(
"
grompp -f em -c box.gro -p restrained.top -o em1
"
,
workdir
=
f
"
/gaff/cluster_
{
i
}
"
)
gmx_run
(
"
mdrun -ntomp 2 -s em1 -c after_em1 -g em1 -e em1 -o em1
"
,
workdir
=
f
"
/gaff/cluster_
{
i
}
"
)
gmx_run
(
"
grompp -f md -c box.gro -p MOL_GMX.top -o rerun
"
,
workdir
=
f
"
/gaff/cluster_
{
i
}
"
)
gmx_run
(
"
mdrun -ntomp 2 -s rerun -rerun em1 -c after_rerun -g rerun -e rerun -o rerun
"
,
workdir
=
f
"
/gaff/cluster_
{
i
}
"
)
gmx_run
(
"
energy -f rerun.edr -o rerun.xvg
"
,
workdir
=
f
"
/gaff/cluster_
{
i
}
"
,
groups
=
groups
)
```
%% Cell type:code id: tags:
```
python
#extract energies and from each energy value subtract minimal energy
energies_lst
=
[]
for
i
in
clusters_ids
:
with
open
(
"
gaff/cluster_{}/rerun.xvg
"
.
format
(
i
,
"
r
"
))
as
ifile
:
last_line
=
ifile
.
readlines
()[
-
1
]
energies
=
last_line
.
split
(
"
"
)
energies_lst
.
append
(
energies
[
len
(
energies
)
-
1
].
rstrip
())
min_energy
=
min
(
energies_lst
)
with
open
(
"
gaff_energies.txt
"
,
"
w
"
)
as
ofile
:
for
energy
in
energies_lst
:
ofile
.
write
(
"
{}
\n
"
.
format
((
float
(
energy
)
-
float
(
min_energy
))))
```
%% Cell type:markdown id: tags:
## 6. Define correction of force field
%% Cell type:code id: tags:
```
python
#subtract from orca energies computed in step 5, GAFF energies computed in step 6
with
open
(
"
reference
"
,
"
w
"
)
as
ofile
:
with
open
(
"
orca_energies.txt
"
,
"
r
"
)
as
orca_energies
,
open
(
"
gaff_energies.txt
"
,
"
r
"
)
as
gaff_energies
:
for
orca_energy
in
orca_energies
.
readlines
():
for
gaff_energy
in
gaff_energies
.
readlines
():
ofile
.
write
(
"
{}
\n
"
.
format
((
float
(
orca_energy
)
-
float
(
gaff_energy
))))
```
%% Cell type:code id: tags:
```
python
#write final corrections into output file along with representative conformations
input_dir
=
"
pdb_opt/
"
with
open
(
"
reference.pdb
"
,
"
w
"
)
as
ofile
,
open
(
"
reference
"
,
"
r
"
)
as
ifile
:
final_energies
=
ifile
.
readlines
()
energy_index
=
0
for
i
in
clusters_ids
:
ofile
.
write
(
"
REMARK X={}
"
.
format
(
final_energies
[
energy_index
]))
with
open
(
"
{}cluster{}.pdb
"
.
format
(
input_dir
,
i
),
"
r
"
)
as
ifile1
:
ofile
.
writelines
(
ifile1
.
readlines
())
energy_index
+=
1
```
%% Cell type:code id: tags:
```
python
x1
=
[]
y1
=
[]
with
open
(
"
DIHEDRALS
"
)
as
ifile
:
for
line
in
ifile
.
readlines
():
split_values
=
line
.
split
()
x1
.
append
(
split_values
[
1
])
y1
.
append
(
split_values
[
2
])
plot_landmarks
(
x
,
y
,
x1
,
y1
).
show
()
```
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment