Source code for diffpy.structure.parsers.p_pdb

#!/usr/bin/env python
##############################################################################
#
# diffpy.structure  by DANSE Diffraction group
#                   Simon J. L. Billinge
#                   (c) 2008 trustees of the Michigan State University.
#                   All rights reserved.
#
# File coded by:    Pavol Juhas
#
# See AUTHORS.txt for a list of people who contributed.
# See LICENSE_DANSE.txt for license information.
#
##############################################################################

"""Basic parser for PDB structure format.

Note
----
References:
    https://www.wwpdb.org/documentation/file-format-content/format23/v2.3.html
    https://www.wwpdb.org/documentation/file-format-content/format30/index.html
"""

import sys

import numpy
from numpy import pi

from diffpy.structure import Structure
from diffpy.structure.parsers import StructureParser
from diffpy.structure.structureerrors import StructureFormatError



[docs]
class P_pdb(StructureParser):
    """Simple parser for PDB format.

    The parser understands following PDB records: `TITLE, CRYST1, SCALE1,
    SCALE2, SCALE3, ATOM, SIGATM, ANISOU, SIGUIJ, TER, HETATM, END`.

    Attributes
    ----------
    format : str
        Format name, default "pdb".
    """

    # Static data members
    orderOfRecords = [
        "HEADER",
        "OBSLTE",
        "TITLE",
        "CAVEAT",
        "COMPND",
        "SOURCE",
        "KEYWDS",
        "EXPDTA",
        "AUTHOR",
        "REVDAT",
        "SPRSDE",
        "JRNL",
        "REMARK",
        "REMARK",
        "REMARK",
        "REMARK",
        "DBREF",
        "SEQADV",
        "SEQRES",
        "MODRES",
        "HET",
        "HETNAM",
        "HETSYN",
        "FORMUL",
        "HELIX",
        "SHEET",
        "TURN",
        "SSBOND",
        "LINK",
        "HYDBND",
        "SLTBRG",
        "CISPEP",
        "SITE",
        "CRYST1",
        "ORIGX1",
        "ORIGX2",
        "ORIGX3",
        "SCALE1",
        "SCALE2",
        "SCALE3",
        "MTRIX1",
        "MTRIX2",
        "MTRIX3",
        "TVECT",
        "MODEL",
        "ATOM",
        "SIGATM",
        "ANISOU",
        "SIGUIJ",
        "TER",
        "HETATM",
        "ENDMDL",
        "CONECT",
        "MASTER",
        "END",
    ]
    """list: Ordered list of PDB record labels."""

    validRecords = dict.fromkeys(orderOfRecords)
    """dict: Dictionary of PDB record labels."""

    def __init__(self):
        StructureParser.__init__(self)
        self.format = "pdb"
        return


[docs]
    def parseLines(self, lines):
        """Parse list of lines in PDB format.

        Parameters
        ----------
        lines : list of str
            List of lines in PDB format.

        Returns
        -------
        Structure
            Parsed structure instance.

        Raises
        ------
        StructureFormatError
            Invalid PDB record.
        """
        try:
            stru = Structure()
            scale = numpy.identity(3, dtype=float)
            scaleU = numpy.zeros(3, dtype=float)
            p_nl = 0
            for line in lines:
                p_nl += 1
                # skip blank lines
                if not line.strip():
                    continue
                # make sure line has 80 characters
                if len(line) < 80:
                    line = "%-80s" % line
                words = line.split()
                record = words[0]
                if record == "TITLE":
                    continuation = line[8:10]
                    if continuation.strip():
                        stru.title += line[10:].rstrip()
                    else:
                        stru.title = line[10:].rstrip()
                elif record == "CRYST1":
                    a = float(line[7:15])
                    b = float(line[15:24])
                    c = float(line[24:33])
                    alpha = float(line[33:40])
                    beta = float(line[40:47])
                    gamma = float(line[47:54])
                    stru.lattice.setLatPar(a, b, c, alpha, beta, gamma)
                    scale = numpy.transpose(stru.lattice.recbase)
                elif record == "SCALE1":
                    sc = numpy.zeros((3, 3), dtype=float)
                    sc[0, :] = [float(x) for x in line[10:40].split()]
                    scaleU[0] = float(line[45:55])
                elif record == "SCALE2":
                    sc[1, :] = [float(x) for x in line[10:40].split()]
                    scaleU[1] = float(line[45:55])
                elif record == "SCALE3":
                    sc[2, :] = [float(x) for x in line[10:40].split()]
                    scaleU[2] = float(line[45:55])
                    base = numpy.transpose(numpy.linalg.inv(sc))
                    abcABGcryst = numpy.array(stru.lattice.abcABG())
                    stru.lattice.setLatBase(base)
                    abcABGscale = numpy.array(stru.lattice.abcABG())
                    reldiff = numpy.fabs(1.0 - abcABGscale / abcABGcryst)
                    if not numpy.all(reldiff < 1.0e-4):
                        emsg = "%d: " % p_nl + "SCALE and CRYST1 are not consistent."
                        raise StructureFormatError(emsg)
                    if numpy.any(scaleU != 0.0):
                        emsg = "Origin offset not yet implemented."
                        raise NotImplementedError(emsg)
                elif record in ("ATOM", "HETATM"):
                    name = line[12:16].strip()
                    rc = [float(x) for x in line[30:54].split()]
                    try:
                        occupancy = float(line[54:60])
                    except ValueError:
                        occupancy = 1.0
                    try:
                        B = float(line[60:66])
                        uiso = B / (8 * pi**2)
                    except ValueError:
                        uiso = 0.0
                    element = line[76:78].strip()
                    if element == "":
                        # get element from the first 2 characters of name
                        element = line[12:14].strip()
                        element = element[0].upper() + element[1:].lower()
                    stru.addNewAtom(element, occupancy=occupancy, label=name)
                    last_atom = stru.getLastAtom()
                    last_atom.xyz_cartn = rc
                    last_atom.Uisoequiv = uiso
                elif record == "SIGATM":
                    sigrc = [float(x) for x in line[30:54].split()]
                    sigxyz = numpy.dot(scale, sigrc)
                    try:
                        sigo = float(line[54:60])
                    except ValueError:
                        sigo = 0.0
                    try:
                        sigB = float(line[60:66])
                        sigU = numpy.identity(3) * sigB / (8 * pi**2)
                    except ValueError:
                        sigU = numpy.zeros((3, 3), dtype=float)
                    last_atom.sigxyz = sigxyz
                    last_atom.sigo = sigo
                    last_atom.sigU = sigU
                elif record == "ANISOU":
                    last_atom.anisotropy = True
                    Uij = [float(x) * 1.0e-4 for x in line[28:70].split()]
                    Ua = last_atom.U
                    for i in range(3):
                        Ua[i, i] = Uij[i]
                    Ua[0, 1] = Ua[1, 0] = Uij[3]
                    Ua[0, 2] = Ua[2, 0] = Uij[4]
                    Ua[1, 2] = Ua[2, 1] = Uij[5]
                elif record == "SIGUIJ":
                    sigUij = [float(x) * 1.0e-4 for x in line[28:70].split()]
                    for i in range(3):
                        last_atom.sigU[i, i] = sigUij[i]
                    last_atom.sigU[0, 1] = last_atom.sigU[1, 0] = sigUij[3]
                    last_atom.sigU[0, 2] = last_atom.sigU[2, 0] = sigUij[4]
                    last_atom.sigU[1, 2] = last_atom.sigU[2, 1] = sigUij[5]
                elif record in P_pdb.validRecords:
                    pass
                else:
                    emsg = "%d: invalid record name '%r'" % (p_nl, record)
                    raise StructureFormatError(emsg)
        except (ValueError, IndexError):
            emsg = "%d: invalid PDB record" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            e = StructureFormatError(emsg)
            raise e.with_traceback(exc_traceback)
        return stru



[docs]
    def titleLines(self, stru):
        """Build lines corresponding to `TITLE` record."""
        lines = []
        title = stru.title
        while title != "":
            stop = len(title)
            # maximum length of title record is 60
            if stop > 60:
                stop = title.rfind(" ", 10, 60)
                if stop < 0:
                    stop = 60
            if len(lines) == 0:
                continuation = "  "
            else:
                continuation = "%2i" % (len(lines) + 1)
            lines.append("%-80s" % ("TITLE   " + continuation + title[0:stop]))
            title = title[stop:]
        return lines



[docs]
    def cryst1Lines(self, stru):
        """Build lines corresponding to `CRYST1` record."""
        lines = []
        latpar = (
            stru.lattice.a,
            stru.lattice.b,
            stru.lattice.c,
            stru.lattice.alpha,
            stru.lattice.beta,
            stru.lattice.gamma,
        )
        if latpar != (1.0, 1.0, 1.0, 90.0, 90.0, 90.0):
            line = "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f" % latpar
            lines.append("%-80s" % line)
        return lines



[docs]
    def atomLines(self, stru, idx):
        """Build `ATOM` records and possibly `SIGATM`, `ANISOU` or `SIGUIJ` records
        for `structure` stru `atom` number aidx.
        """
        lines = []
        a = stru[idx]
        ad = a.__dict__
        rc = a.xyz_cartn
        B = a.Bisoequiv
        atomline = (
            "ATOM  "  # 1-6
            + "%(serial)5i "  # 7-11, 12
            + "%(name)-4s"  # 13-16
            + "%(altLoc)c"  # 17
            + "%(resName)-3s "  # 18-20, 21
            + "%(chainID)c"  # 22
            + "%(resSeq)4i"  # 23-26
            + "%(iCode)c   "  # 27, 28-30
            + "%(x)8.3f%(y)8.3f%(z)8.3f"  # 31-54
            + "%(occupancy)6.2f"  # 55-60
            + "%(tempFactor)6.2f      "  # 61-66, 67-72
            + "%(segID)-4s"  # 73-76
            + "%(element)2s"  # 77-78
            + "%(charge)-2s"  # 79-80
        ) % {
            "serial": idx + 1,
            "name": a.label or a.element,
            "altLoc": " ",
            "resName": "",
            "chainID": " ",
            "resSeq": 1,
            "iCode": " ",
            "x": rc[0],
            "y": rc[1],
            "z": rc[2],
            "occupancy": a.occupancy,
            "tempFactor": B,
            "segID": "",
            "element": a.element,
            "charge": "",
        }
        lines.append(atomline)
        isotropic = numpy.all(a.U == a.U[0, 0] * numpy.identity(3))
        if not isotropic:
            mid = " %7i%7i%7i%7i%7i%7i  " % tuple(
                numpy.around(1e4 * numpy.array([a.U[0, 0], a.U[1, 1], a.U[2, 2], a.U[0, 1], a.U[0, 2], a.U[1, 2]]))
            )
            line = "ANISOU" + atomline[6:27] + mid + atomline[72:80]
            lines.append(line)
        # default values of standard deviations
        d_sigxyz = numpy.zeros(3, dtype=float)
        d_sigo = 0.0
        d_sigU = numpy.zeros((3, 3), dtype=float)
        sigxyz = ad.get("sigxyz", d_sigxyz)
        sigo = [ad.get("sigo", d_sigo)]
        sigU = ad.get("sigU", d_sigU)
        sigB = [8 * pi**2 * numpy.average([sigU[i, i] for i in range(3)])]
        sigmas = numpy.concatenate((sigxyz, sigo, sigB))
        # no need to print sigmas if they all round to zero
        hassigmas = numpy.any(numpy.fabs(sigmas) >= numpy.array(3 * [5e-4] + 2 * [5e-3])) or numpy.any(
            numpy.fabs(sigU) > 5.0e-5
        )
        if hassigmas:
            mid = "   %8.3f%8.3f%8.3f%6.2f%6.2f      " % tuple(sigmas)
            line = "SIGATM" + atomline[6:27] + mid + atomline[72:80]
            lines.append(line)
            # do we need SIGUIJ record?
            if not numpy.all(sigU == sigU[0, 0] * numpy.identity(3)):
                mid = " %7i%7i%7i%7i%7i%7i  " % tuple(
                    numpy.around(
                        1e4 * numpy.array([sigU[0, 0], sigU[1, 1], sigU[2, 2], sigU[0, 1], sigU[0, 2], sigU[1, 2]])
                    )
                )
                line = "SIGUIJ" + atomline[6:27] + mid + atomline[72:80]
                lines.append(line)
        return lines



[docs]
    def toLines(self, stru):
        """Convert `Structure` stru to a list of lines in PDB format.

        Parameters
        ----------
        stru : Structure
            Structure to be converted.

        Returns
        -------
        list of str
            List of lines in PDB format.
        """
        lines = []
        lines.extend(self.titleLines(stru))
        lines.extend(self.cryst1Lines(stru))
        for idx in range(len(stru)):
            lines.extend(self.atomLines(stru, idx))
        line = (
            "TER   "  # 1-6
            + "%(serial)5i      "  # 7-11, 12-17
            + "%(resName)-3s "  # 18-20, 21
            + "%(chainID)c"  # 22
            + "%(resSeq)4i"  # 23-26
            + "%(iCode)c"  # 27
            + "%(blank)53s"  # 28-80
        ) % {"serial": len(stru) + 1, "resName": "", "chainID": " ", "resSeq": 1, "iCode": " ", "blank": " "}
        lines.append(line)
        lines.append("%-80s" % "END")
        return lines




# End of class P_pdb

# Routines -------------------------------------------------------------------



[docs]
def getParser():
    """Return new `parser` object for PDB format.

    Returns
    -------
    P_pdb
        Instance of `P_pdb`.
    """
    return P_pdb()