进阶教程

主要讲述RDKit中不常见但很重要的功能以及一些方法的参数细节。 根据笔者或者读者在实际工作中的需求通过RDKit进行编写的实践代码,进一步提高工作和研发效率。

绘图

绘制不带颜色的分子图片

opts.elemDict这个参数可以控制不同元素的颜色,

def defaultDrawOptions():
    '''This function returns an RDKit drawing options object with
    default drawing options.'''

    opts = Draw.DrawingOptions()
    # opts.elemDict = defaultdict(lambda: (0,0,0)) # all atoms are black
    opts.noCarbonSymbols = True
    opts.selectColor = (1, 0, 0)
    opts.wedgeBonds = True

    opts.elemDict = defaultdict(lambda: (0, 0, 0))
    opts.dotsPerAngstrom = 20
    opts.bondLineWidth = 1.5
    atomLabelFontFace = 'arial'

    return opts
options=defaultDrawOptions()
opts.elemDict = defaultdict(lambda: (0, 0, 0))

设置成(0,0,0)代表每个元素的颜色都为黑色。

RDkit 中默认元素的颜色:

import rdkit.Chem.Draw as Draw
opts2 = Draw.DrawingOptions()
print(opts2.elemDict)

输出:

{1: (0.55, 0.55, 0.55),  # H
 7: (0, 0, 1),           # N
 8: (1, 0, 0),           # O
 9: (0.2, 0.8, 0.8),     # F
 15: (1, 0.5, 0),        # P
 16: (0.8, 0.8, 0),      # S
 17: (0, 0.8, 0),        # Cl
 35: (0.5, 0.3, 0.1),    # Br
 53: (0.63, 0.12, 0.94), # I
 0: (0.5, 0.5, 0.5)}     #  ?

示例代码

import rdkit.Chem.Draw as Draw
mol= Chem.MolFromSmiles('CCCOCCC')
opts1 = Draw.DrawingOptions()
opts1.elemDict = defaultdict(lambda: (0, 0, 0))
img1 = Draw.MolToImage(mol,  options=opts1)
opts2 = Draw.DrawingOptions()
img2_color = Draw.MolToImage(mol,  options=opts2)
display(img1)
display(img2_color)

输出:

_images/mol_black2020-03-18_072607.931163.png _images/mol_color2020-03-18_072613.639540.png

高亮原子和键的颜色 d2d.DrawMolecule

我看到子结构匹配的图片时候,感觉非常漂亮,于是看了一下源码知道其是如何绘制高亮图片的。 主要是借助了**d2d.DrawMolecule**函数,里面参数有高亮的atom id列表,bond id列表, 以及自定义atom背景颜色和bond背景颜色的字典。我对其简单封装成了clourMol函数。

示例代码如下:

from rdkit import Chem
from rdkit.Chem.Draw import rdMolDraw2D
from io import BytesIO
import numpy as np
from PIL import Image, ImageOps
def _drawerToImage(d2d):
    try:
        import Image
    except ImportError:
        from PIL import Image
    sio = BytesIO(d2d.GetDrawingText())
    return Image.open(sio)

def clourMol(mol,highlightAtoms_p=None,highlightAtomColors_p=None,highlightBonds_p=None,highlightBondColors_p=None,sz=[400,400]):
    '''

    '''
    d2d = rdMolDraw2D.MolDraw2DCairo(sz[0], sz[1])
    op = d2d.drawOptions()
    op.dotsPerAngstrom = 20
    op.useBWAtomPalette()
    mc = rdMolDraw2D.PrepareMolForDrawing(mol)
    d2d.DrawMolecule(mc, legend='', highlightAtoms=highlightAtoms_p,highlightAtomColors=highlightAtomColors_p, highlightBonds= highlightBonds_p,highlightBondColors=highlightBondColors_p)
    d2d.FinishDrawing()
    product_img=_drawerToImage(d2d)
    return product_img
def StripAlphaFromImage(img):
    '''This function takes an RGBA PIL image and returns an RGB image'''

    if len(img.split()) == 3:
        return img
    return Image.merge('RGB', img.split()[:3])


def TrimImgByWhite(img, padding=10):
    '''This function takes a PIL image, img, and crops it to the minimum rectangle
    based on its whiteness/transparency. 5 pixel padding used automatically.'''

    # Convert to array
    as_array = np.array(img)  # N x N x (r,g,b,a)

    # Set previously-transparent pixels to white
    if as_array.shape[2] == 4:
        as_array[as_array[:, :, 3] == 0] = [255, 255, 255, 0]

    as_array = as_array[:, :, :3]

    # Content defined as non-white and non-transparent pixel
    has_content = np.sum(as_array, axis=2, dtype=np.uint32) != 255 * 3
    xs, ys = np.nonzero(has_content)

    # Crop down
    margin = 5
    x_range = max([min(xs) - margin, 0]), min([max(xs) + margin, as_array.shape[0]])
    y_range = max([min(ys) - margin, 0]), min([max(ys) + margin, as_array.shape[1]])
    as_array_cropped = as_array[
        x_range[0]:x_range[1], y_range[0]:y_range[1], 0:3]

    img = Image.fromarray(as_array_cropped, mode='RGB')

    return ImageOps.expand(img, border=padding, fill=(255, 255, 255))


smi = 'CCCc1c(OCc2ccc(S(=O)c3cccc(-c4nnn[nH]4)c3)cc2)ccc(C(C)=O)c1O'
mol =  Chem.MolFromSmiles(smi)
#hight atoms
img1 = clourMol(mol,highlightAtoms_p=[1,2,3])
img1 =StripAlphaFromImage(img1)
img1 = TrimImgByWhite(img1)
display(img1)
# custom atom color
img2=clourMol(mol,highlightAtoms_p=[1,2,3],highlightAtomColors_p={1:(0.2,0.3,1),2:(1,0.3,0.3)})
img2 =StripAlphaFromImage(img2)
img2 = TrimImgByWhite(img2)
display(img2)
# hight bond
img3 = clourMol(mol,highlightBonds_p=[1,2])
img3 =StripAlphaFromImage(img3)
img3 = TrimImgByWhite(img3)
display(img3)
# custom bond color
img4 = clourMol(mol,highlightBonds_p=[1,2],highlightBondColors_p={1:(0.1,0.2,1),2:(1,0.3,0.3)})
img4 = TrimImgByWhite(img4)
display(img4)

#all
img5 = clourMol(mol,highlightAtoms_p=[1,2,3],highlightAtomColors_p={1:(0.2,0.3,1),2:(1,0.3,0.3)},highlightBonds_p=[1,2],highlightBondColors_p={1:(0.1,0.2,0.3),2:(0.1,0.3,0.3)})
img5 = TrimImgByWhite(img5)
display(img5)

输出:

_images/higliahtAtomBond2020-03-28_204517.208660.png

片段拼接

3片段拼接

药物化学家需要对多个片段进行系统排列组合研究,比如研究的分子由3个片段(A片段,B片段,C片段)组成。 假设A有10种片段、B有8种片段、C有10种片段,则一共有800种组合,化学家手画需要大量的时间,利用rdkit编写脚本可以节约 化学家大量的时间。

该脚本也适用于Protac的拼接功能。

编写代码combine3_v1.py, 代码如下:

#!/python

## author: Zhaoqiang Chen
## contact: 18321885908

from glob import glob
from rdkit import Chem
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem.Draw import MolDrawing, DrawingOptions


def combine3frag(Amol,Bmol,Cmol):
    ABmolsmi=combine2frag(Amol,'Fr',Bmol,'Cs')
    print('ABSMI',ABmolsmi)
    ABmol= Chem.MolFromSmiles(ABmolsmi)
    ABCmolsmi=combine2frag(ABmol,'Rb',Cmol,'Fr')
    print("ABCsmi","ABCmolsmi")
    ABCmol=Chem.MolFromSmiles(ABCmolsmi)
    smi=Chem.MolToSmiles(ABCmol)
    return smi


def combine2frag(Amol,Fr,Bmol,Cs):
    combo = Chem.CombineMols(Amol,Bmol)
    Fr_NEI_ID=get_neiid_bysymbol(combo,Fr)
    Cs_NEI_ID=get_neiid_bysymbol(combo,Cs)
    edcombo = Chem.EditableMol(combo)
    edcombo.AddBond(Fr_NEI_ID,Cs_NEI_ID,order=Chem.rdchem.BondType.SINGLE)

    Fr_ID=get_id_bysymbol(combo,Fr)
    edcombo.RemoveAtom(Fr_ID)
    back = edcombo.GetMol()


    Cs_ID=get_id_bysymbol(back,Cs)

    edcombo=Chem.EditableMol(back)
    edcombo.RemoveAtom(Cs_ID)
    back = edcombo.GetMol()
    smi= Chem.MolToSmiles(back)
    return smi


def get_neiid_bysymbol(combo,symbol):
    for at in combo.GetAtoms():
        if at.GetSymbol()==symbol:
            at_nei=at.GetNeighbors()[0]
            return at_nei.GetIdx()
def get_id_bysymbol(combo,symbol):
    for at in combo.GetAtoms():
        if at.GetSymbol()==symbol:
            return at.GetIdx()



Asdfs=glob('./A/*')
Bsdfs=glob('./B/*')
Csdfs=glob('./C/*')

Asdfs=glob('./A/*')
Bsdfs=glob('./B/*')
Csdfs=glob('./C/*')


Amols=[ Chem.SDMolSupplier(f)[0] for f in Asdfs]
Bmols=[ Chem.SDMolSupplier(f)[0] for f in Bsdfs]
Cmols=[ Chem.SDMolSupplier(f)[0] for f in Csdfs]

resultsmi=[]
for Amol in Amols[0:]:
    for Bmol in Bmols[0:]:
        for Cmol in Cmols[0:]:
            smi=combine3frag(Amol,Bmol,Cmol)
            print(smi)
            resultsmi.append(smi)

w = Chem.SDWriter("result.sdf")
for smi in resultsmi:
    mol = Chem.MolFromSmiles(smi)
    w.write(mol)
w.close()

为了方便化学家的使用,我把脚本打包成了EXE程序 combine3_v1.exe

使用说明如下:

  1. 创建一个工作文件夹,比如”20220430”;
  2. 在工作文件夹下分别创建3个文件夹, 并命名为”A”,”B”,”C”;
  3. 用chemdraw 编辑A片段,连接的地方用’Fr’原子表示,保存为sdf文件,如A1.sdf,A2.sdf,A3.sdf等
  4. 用chemdraw 编辑B片段,连接的地方用’Cs’ ‘Rb’原子表示,保存为sdf文件,Cs接头和A片段的Fr接头项链,如B1.sdf,B2.sdf,B3.sdf等
  5. 用chemdraw 编辑C片段,连接的地方用’Fr’原子表示,Fr接头和B片段的Rb接头相连,保存为sdf文件,如C1.sdf,C2.sdf,B3.sdf等
  6. 将程序“combine3_v1.exe”或者脚本“combine3_v1.py” 拷贝到工作文件夹,双击运行程序或者运行脚本;
  7. 如果产生result.sdf且分子数目是对的,则说明是对的。否则可以联系作者。

示例A片段如下:

A1: [Fr]c1ccccc1
A2: Cc1cccc([Fr])c1
A3: CCc1cccc([Fr])c1
_images/Amols2022-04-30_182102.077199.png

示例B片段如下:

B1: [Rb]CC1CCC(C[Cs])CC1
B2: O=C([Rb])C1CCC(C[Cs])CC1
B3: O=C([Rb])C1CCC(C2([Cs])CC2)CC1
_images/Bmols2022-04-30_182503.826125.png

示例C片段如下:

C1: [Fr]C1CC1
C2: CC1CC1[Fr]
_images/Cmols2022-04-30_183205.113978.png

拼接后分子如下:

_images/resultMols2022-04-30_183800.246625.png

百度网盘下载连接如下:

链接:https://pan.baidu.com/s/1Mdi8E7ElAVjvnf242M02Ew?pwd=xa51
提取码:xa51
--来自百度网盘超级会员V4的分享

2片段(多片段)拼接

不管是3片段还是4片段的拼接本质上可以看成是2片段的拼接, 比如A-B-C-D的拼接,我们可以先让A和B拼接得到AB片段, 再让AB和C拼接得到ABC片段,最后让ABC和D 拼接得到ABCD。

为了增加代码的灵活性,这里引入了配置文件config.ini文件。 第一行中填入A片段的接头符号和B片段的接头符号,以空格隔开。

配置文件(config.ini)如下:

Fr  Cs

代码如下:

#!/python
## author: Zhaoqiang Chen
## contact: 18321885908

from glob import glob
from rdkit import Chem
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem.Draw import MolDrawing, DrawingOptions
import os

# if os.path.exists('AB'):
#     raise("please delete or rename AB dir")
# else:
#     os.mkdir("AB")

def combine2frag(Amol,Fr,Bmol,Cs):
    combo = Chem.CombineMols(Amol,Bmol)
    Fr_NEI_ID=get_neiid_bysymbol(combo,Fr)
    Cs_NEI_ID=get_neiid_bysymbol(combo,Cs)
    edcombo = Chem.EditableMol(combo)
    edcombo.AddBond(Fr_NEI_ID,Cs_NEI_ID,order=Chem.rdchem.BondType.SINGLE)

    Fr_ID=get_id_bysymbol(combo,Fr)
    edcombo.RemoveAtom(Fr_ID)
    back = edcombo.GetMol()


    Cs_ID=get_id_bysymbol(back,Cs)

    edcombo=Chem.EditableMol(back)
    edcombo.RemoveAtom(Cs_ID)
    back = edcombo.GetMol()
    smi= Chem.MolToSmiles(back)
    return smi


def get_neiid_bysymbol(combo,symbol):
    for at in combo.GetAtoms():
        if at.GetSymbol()==symbol:
            at_nei=at.GetNeighbors()[0]
            return at_nei.GetIdx()
def get_id_bysymbol(combo,symbol):
    for at in combo.GetAtoms():
        if at.GetSymbol()==symbol:
            return at.GetIdx()



Asdfs=glob('./A/*')
Bsdfs=glob('./B/*')

Asdfs=glob('./A/*')
Bsdfs=glob('./B/*')


Amols=[ Chem.SDMolSupplier(f)[0] for f in Asdfs]
Bmols=[ Chem.SDMolSupplier(f)[0] for f in Bsdfs]

resultsmi=[]
resultmols=[]

Fr='Fr'
Cs='Cs'
if os.path.exists('config.ini'):
    fh=open('config.ini')
    line=fh.readlines()[0].strip()
    Fr,Cs =line.split()

asmi = Chem.MolToSmiles(Amols[0])
bsmi = Chem.MolToSmiles(Bmols[0])

if Fr in asmi and Cs in bsmi:
    pass
else:
    raise("error,please fill joint symbol into the file config.ini")

for Amol in Amols[0:]:
        for Bmol in Bmols[0:]:
            smi=combine2frag(Amol,Fr,Bmol,Cs)
            print(smi)
            resultsmi.append(smi)

w = Chem.SDWriter("result.sdf")
for smi in resultsmi:
    mol = Chem.MolFromSmiles(smi)
    w.write(mol)
w.close()

使用说明:

  1. 将程序“combine2_v1.exe”或者脚本“combine2_v1.py” 拷贝到工作文件夹,双击运行程序或者运行脚本;
  2. 如果产生result.sdf且分子数目是对的,则说明是对的。否则可以联系作者。

百度网盘下载连接如下

链接:https://pan.baidu.com/s/17c8Hg2SJy9myuLeJwef7AQ?pwd=58sq
提取码:58sq
--来自百度网盘超级会员V4的分享

PMI分析

分子的空间分布分析:

文献中的示例图片:

_images/disk_rod2022-10-22_230457.795639.png _images/pmi_com_3d2022-10-22_230638.540365.png

2D 图形的横坐标是Disklikeness=I1/I3;纵坐标是Rodlikeness=I2/I3;

苯(0.5,0.5);乙炔(0,1);金刚烷(1,1)

disk=x=rdkit.Chem.Descriptors3D.NPR1(*x, **y) : Normalized principal moments ratio 1 (=I1/I3)
y=rod=rdkit.Chem.Descriptors3D.NPR2(*x, **y)  : Normalized principal moments ratio 2 (=I2/I3)

示例代码

import rdkit
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem


CCsmi="C#C"
bensmi="C1=CC=CC=C1"
Adamantanesmi="C1C2CC3CC1CC(C2)C3"


smi=CCsmi
mol=Chem.MolFromSmiles(smi)
m2=Chem.AddHs(mol)
AllChem.EmbedMolecule(m2)
# maxIters默认是优化200次,有时不会收敛,建议设置成2000
opt_state=AllChem.MMFFOptimizeMolecule(m2,maxIters=2000)
x=rdkit.Chem.rdMolDescriptors.CalcNPR1(m2)
y=rdkit.Chem.rdMolDescriptors.CalcNPR2(m2)
print("CC",x,y)


smi=bensmi
mol=Chem.MolFromSmiles(smi)
m2=Chem.AddHs(mol)
AllChem.EmbedMolecule(m2)
# maxIters默认是优化200次,有时不会收敛,建议设置成2000
opt_state=AllChem.MMFFOptimizeMolecule(m2,maxIters=2000)
# print(opt_state)
m2
x=rdkit.Chem.rdMolDescriptors.CalcNPR1(m2)
y=rdkit.Chem.rdMolDescriptors.CalcNPR2(m2)
print("benzene",x,y)


smi=Adamantanesmi
mol=Chem.MolFromSmiles(smi)
m2=Chem.AddHs(mol)
AllChem.EmbedMolecule(m2)
# maxIters默认是优化200次,有时不会收敛,建议设置成2000
opt_state=AllChem.MMFFOptimizeMolecule(m2,maxIters=2000)
m2
x=rdkit.Chem.rdMolDescriptors.CalcNPR1(m2)
y=rdkit.Chem.rdMolDescriptors.CalcNPR2(m2)
print("adamantane",x,y)

输出:

CC 7.614072578672083e-14 0.9999999999999301
benzene 0.4999999843169691 0.5000000156831409
adamantane 0.9999994596825732 0.9999998986910213

molecular complexity的定义不同人有不同的定义,rdkit中自带是Bertz的定义。

rdkit.Chem.GraphDescriptors.BertzCT(mol, cutoff=100, dMat=None, forceDMat=1)

   A topological index meant to quantify “complexity” of molecules.

   Consists of a sum of two terms, one representing the complexity of the bonding, the other representing the complexity of the distribution of heteroatoms.

   From S. H. Bertz, J. Am. Chem. Soc., vol 103, 3599-3601 (1981)

文献中使用的Bottcher的定义。

注解

Bottcher molecular complexity: https://github.com/boskovicgroup/bottchercomplexity

GB18: 448.39 mcbits, 17.25 mcbits/atom; himgaline: 477.83 mcbits, 20.78 mcbits/atom

形状相似性计算

Dear all,

A c++ implementation and Python wrappers of the ultrafast shape recognition
(USR) descriptor (Ballester and Richards, J. Comput. Chem. (2007), 28,
1711) and the USR CREDO atom types (USRCAT) descriptor (Schreyer and
Blundell, J. Cheminf. (2012), 4, 27) are now available for the RDKit. The
code is based on the Python implementations of Jan Domanski and Adrian
Schreyer.

The descriptors can be accessed from Python via
rdkit.Chem.rdMolDescriptors.GetUSR and
rdkit.Chem.rdMolDescriptors.GetUSRCAT.

Best regards,
Sereina

应用场景1:

分子生成-》对接-》相互作用-》形状过滤-》设计分子

应用场景2

2D smiles->3D mol->minization-> 形状的相似性打分

示例文件 lincs_50_smile.txt

示例代码:

import rdkit
from rdkit import Chem
from rdkit.Chem.Descriptors import NumValenceElectrons
from rdkit.Chem import Descriptors
from rdkit.Chem import AllChem
from rdkit.Chem.rdMolDescriptors import GetUSRScore, GetUSRCAT
import nglview

fh=open('lincs_50_smile.txt')
smis=[]
for line in fh.readlines():
    smi=line.strip()
    smis.append(smi)

# print(smis)
mols3d=[]
for smi in smis:
    mol=Chem.MolFromSmiles(smi)
    m2=Chem.AddHs(mol)
    AllChem.EmbedMolecule(m2)
    # maxIters默认是优化200次,有时不会收敛,建议设置成2000
    opt_state=AllChem.MMFFOptimizeMolecule(m2,maxIters=2000)
#     print(opt_state)
    mols3d.append(m2)

usrcats = [ GetUSRCAT( mol ) for mol in mols3d ]
for i in range( len( usrcats )):
    for j in range( len( usrcats )):
        score = GetUSRScore( usrcats[ i ], usrcats[ j ] )
        print(i,j,"usrscroe:",score)

输出:

0 0 usrscroe: 1.0
0 1 usrscroe: 0.2136718008888286
0 2 usrscroe: 0.11508212828197005
0 3 usrscroe: 0.11843775593882168
0 4 usrscroe: 0.1447975173956143
0 5 usrscroe: 0.12813777238473245
0 6 usrscroe: 0.13726495667508753
0 7 usrscroe: 0.12131620151077707
0 8 usrscroe: 0.0900556733174051
0 9 usrscroe: 0.11663586460430561

注解

jupyter 文档中可以使用nglview 交互查看分子的3D 构象

import nglview
nglview.show_rdkit(mols3d[24])
nglview.show_rdkit(mols3d[13])
_images/usrcat2022-10-22_222106.361425.png

USR 描述符

ultrafast shape recognition
(USR) descriptor

https://iwatobipen.wordpress.com/2017/11/17/calculate-usrcat-with-rdkit-rdkit/

USRCAT 描述符

USR CREDO atom types (USRCAT) descriptor

https://iwatobipen.wordpress.com/2017/11/17/calculate-usrcat-with-rdkit-rdkit/

可旋转二面角

提取分子中可旋转二面角,

示例文件: JGQ_ideal.sdf

示例代码:

import rdkit
from rdkit import Chem
from rdkit.Chem.Descriptors import NumValenceElectrons
from rdkit.Chem import Descriptors
from rdkit.Chem import AllChem
from rdkit.Chem.rdMolDescriptors import GetUSRScore, GetUSRCAT
import nglview

# https://files.rcsb.org/ligands/view/JGQ_ideal.sdf
sdffile="JGQ_ideal.sdf"
from rdkit import Chem
mols_suppl = Chem.SDMolSupplier(sdffile)
# print(type(mols_suppl))
mol=mols_suppl[0]
def getdihes(mol):
    #pytmol label atom rank
    dihes=[]
    for bond in mol.GetBonds():
        if bond.GetBondType().name=='SINGLE' and bond.IsInRing()==False:
            atom2id=bond.GetBeginAtomIdx()
            atom3id=bond.GetEndAtomIdx()
            atom2=bond.GetBeginAtom()
            atom3=bond.GetEndAtom()

            if len(atom3.GetNeighbors())==1  or len(atom2.GetNeighbors())==1 :
                pass
            else:
                atom1s=atom2.GetNeighbors()
                atom1sid=[ at.GetIdx()   for at in atom1s]
    #             print(atom1sid,atom3id)
                atom1sid.remove(atom3id)
                atom1id=atom1sid[0]

                atom4s=atom3.GetNeighbors()
                atom4sid=[ at.GetIdx()   for at in atom4s]
    #             print(atom1sid,atom3id)
                atom4sid.remove(atom2id)
                atom4id=atom4sid[0]
#                 print(atom1id,atom2id,atom3id,atom4id)
                dihe=[atom1id,atom2id,atom3id,atom4id]
                dihes.append(dihe)
    return dihes

dihes=getdihes(mol)
print(dihes)

输出:

[[18, 17, 29, 7], [6, 7, 29, 17], [8, 9, 10, 15], [21, 4, 26, 5], [14, 13, 16, 28], [26, 4, 21, 0], [21, 20, 25, 30]]

计算小分子的中心坐标

示例文件:

输入:

6IC_ideal.sdf

import rdkit
from rdkit import Chem
from rdkit.Chem.Descriptors import NumValenceElectrons
from rdkit.Chem import Descriptors
from rdkit.Chem import AllChem
from rdkit.Chem.rdMolDescriptors import GetUSRScore, GetUSRCAT
# import nglview

# 6IC_ideal.sdf
sdffile="5rec_lig.sdf"
sdffile="6IC_ideal.sdf"
from rdkit import Chem
mols_suppl = Chem.SDMolSupplier(sdffile)
mol3d=mol.GetConformer()
mol=mols_suppl[0]
# mol

def get_center(mol,mol3d):
    coord=[0,0,0]
    num=0
    x=0
    y=0
    z=0
    for ind,at in enumerate(mol.GetAtoms()):
        num+=1
        x+=mol3d.GetAtomPosition(ind).x
#         print("x",x)
        y+=mol3d.GetAtomPosition(ind).y
        z+=mol3d.GetAtomPosition(ind).z
#     print(x,y,z,num)
    x=x/num
    y=y/num
    z=z/num
    coord=[x,y,z]
    return coord

get_center(mol,mol3d)

输出:

[0.11904545454545469, 0.03899999999999992, -0.004090909090909135]

计算描述符FSP3

Fsp3, the number of sp3 hybridized carbons/total carbon count。

from rdkit import Chem
from rdkit.Chem import AllChem, Descriptors, Descriptors3D, rdMolDescriptors

def mol_with_atom_index(mol):
    for atom in mol.GetAtoms():
        atom.SetAtomMapNum(atom.GetIdx())
    return mol


smi="CCO[C@H]1CCN([C@@H](C1)c2ccc(cc2)C(=O)O)Cc3c4cc[nH]c4c(cc3OC)C"
mol=Chem.MolFromSmiles(smi)
mol_with_atom_index(mol)
FSP3=rdMolDescriptors.CalcFractionCSP3(mol)
print(FSP3)

输出

0.4

The RDKit Aromaticity Model

The Simple Aromaticity Model

The MDL Aromaticity Model

SMILES Support and Extensions

SMARTS Support and Extensions

Ring Finding and SSSR

Reaction SMARTS

Some features Chirality Rules and caveats

The Feature Definition File Format

Representation of Pharmacophore Fingerprints

Atom-Atom Matching in Substructure Queries

Molecular Sanitization

Implementation Details

Atom Properties and SDF files

Support for Enhanced Stereochemistry

Additional Information About the Fingerprints

license