Source code for diffpy.structure.parsers.p_xcfg

#!/usr/bin/env python
##############################################################################
#
# diffpy.structure  by DANSE Diffraction group
#                   Simon J. L. Billinge
#                   (c) 2007 trustees of the Michigan State University.
#                   All rights reserved.
#
# File coded by:    Pavol Juhas
#
# See AUTHORS.txt for a list of people who contributed.
# See LICENSE_DANSE.txt for license information.
#
##############################################################################

"""Parser for extended CFG format used by atomeye.

Attributes
----------
AtomicMass : dict
    Dictionary of atomic masses for elements.
"""

import re
import sys

import numpy

from diffpy.structure import Structure
from diffpy.structure.parsers import StructureParser
from diffpy.structure.structureerrors import StructureFormatError
from diffpy.structure.utils import isfloat

# Constants ------------------------------------------------------------------

# Atomic Mass of elements
# This can be later when PeriodicTable package becomes available.

AtomicMass = {
    "H": 1.007947,  # 1 H hydrogen 1.007947
    "He": 4.0026022,  # 2 He helium 4.0026022
    "Li": 6.9412,  # 3 Li lithium 6.9412
    "Be": 9.0121823,  # 4 Be beryllium 9.0121823
    "B": 10.8117,  # 5 B boron 10.8117
    "C": 12.01078,  # 6 C carbon 12.01078
    "N": 14.00672,  # 7 N nitrogen 14.00672
    "O": 15.99943,  # 8 O oxygen 15.99943
    "F": 18.99840325,  # 9 F fluorine 18.99840325
    "Ne": 20.17976,  # 10 Ne neon 20.17976
    "Na": 22.9897702,  # 11 Na sodium 22.9897702
    "Mg": 24.30506,  # 12 Mg magnesium 24.30506
    "Al": 26.9815382,  # 13 Al aluminium 26.9815382
    "Si": 28.08553,  # 14 Si silicon 28.08553
    "P": 30.9737612,  # 15 P phosphorus 30.9737612
    "S": 32.0655,  # 16 S sulfur 32.0655
    "Cl": 35.4532,  # 17 Cl chlorine 35.4532
    "Ar": 39.9481,  # 18 Ar argon 39.9481
    "K": 39.09831,  # 19 K potassium 39.09831
    "Ca": 40.0784,  # 20 Ca calcium 40.0784
    "Sc": 44.9559108,  # 21 Sc scandium 44.9559108
    "Ti": 47.8671,  # 22 Ti titanium 47.8671
    "V": 50.94151,  # 23 V vanadium 50.94151
    "Cr": 51.99616,  # 24 Cr chromium 51.99616
    "Mn": 54.9380499,  # 25 Mn manganese 54.9380499
    "Fe": 55.8452,  # 26 Fe iron 55.8452
    "Co": 58.9332009,  # 27 Co cobalt 58.9332009
    "Ni": 58.69342,  # 28 Ni nickel 58.69342
    "Cu": 63.5463,  # 29 Cu copper 63.5463
    "Zn": 65.4094,  # 30 Zn zinc 65.4094
    "Ga": 69.7231,  # 31 Ga gallium 69.7231
    "Ge": 72.641,  # 32 Ge germanium 72.641
    "As": 74.921602,  # 33 As arsenic 74.921602
    "Se": 78.963,  # 34 Se selenium 78.963
    "Br": 79.9041,  # 35 Br bromine 79.9041
    "Kr": 83.7982,  # 36 Kr krypton 83.7982
    "Rb": 85.46783,  # 37 Rb rubidium 85.46783
    "Sr": 87.621,  # 38 Sr strontium 87.621
    "Y": 88.905852,  # 39 Y yttrium 88.905852
    "Zr": 91.2242,  # 40 Zr zirconium 91.2242
    "Nb": 92.906382,  # 41 Nb niobium 92.906382
    "Mo": 95.942,  # 42 Mo molybdenum 95.942
    "Tc": 98.0,  # 43 Tc technetium 98
    "Ru": 101.072,  # 44 Ru ruthenium 101.072
    "Rh": 102.905502,  # 45 Rh rhodium 102.905502
    "Pd": 106.421,  # 46 Pd palladium 106.421
    "Ag": 107.86822,  # 47 Ag silver 107.86822
    "Cd": 112.4118,  # 48 Cd cadmium 112.4118
    "In": 114.8183,  # 49 In indium 114.8183
    "Sn": 118.7107,  # 50 Sn tin 118.7107
    "Sb": 121.7601,  # 51 Sb antimony 121.7601
    "Te": 127.603,  # 52 Te tellurium 127.603
    "I": 126.904473,  # 53 I iodine 126.904473
    "Xe": 131.2936,  # 54 Xe xenon 131.2936
    "Cs": 132.905452,  # 55 Cs caesium 132.905452
    "Ba": 137.3277,  # 56 Ba barium 137.3277
    "La": 138.90552,  # 57 La lanthanum 138.90552
    "Ce": 140.1161,  # 58 Ce cerium 140.1161
    "Pr": 140.907652,  # 59 Pr praseodymium 140.907652
    "Nd": 144.243,  # 60 Nd neodymium 144.243
    "Pm": 145.0,  # 61 Pm promethium 145
    "Sm": 150.363,  # 62 Sm samarium 150.363
    "Eu": 151.9641,  # 63 Eu europium 151.9641
    "Gd": 157.253,  # 64 Gd gadolinium 157.253
    "Tb": 158.925342,  # 65 Tb terbium 158.925342
    "Dy": 162.5001,  # 66 Dy dysprosium 162.5001
    "Ho": 164.930322,  # 67 Ho holmium 164.930322
    "Er": 167.2593,  # 68 Er erbium 167.2593
    "Tm": 168.934212,  # 69 Tm thulium 168.934212
    "Yb": 173.043,  # 70 Yb ytterbium 173.043
    "Lu": 174.9671,  # 71 Lu lutetium 174.9671
    "Hf": 178.492,  # 72 Hf hafnium 178.492
    "Ta": 180.94791,  # 73 Ta tantalum 180.94791
    "W": 183.841,  # 74 W tungsten 183.841
    "Re": 186.2071,  # 75 Re rhenium 186.2071
    "Os": 190.233,  # 76 Os osmium 190.233
    "Ir": 192.2173,  # 77 Ir iridium 192.2173
    "Pt": 195.0782,  # 78 Pt platinum 195.0782
    "Au": 196.966552,  # 79 Au gold 196.966552
    "Hg": 200.592,  # 80 Hg mercury 200.592
    "Tl": 204.38332,  # 81 Tl thallium 204.38332
    "Pb": 207.21,  # 82 Pb lead 207.21
    "Bi": 208.980382,  # 83 Bi bismuth 208.980382
    "Po": 209.0,  # 84 Po polonium 209
    "At": 210.0,  # 85 At astatine 210
    "Rn": 222.0,  # 86 Rn radon 222
    "Fr": 223.0,  # 87 Fr francium 223
    "Ra": 226.0,  # 88 Ra radium 226
    "Ac": 227.0,  # 89 Ac actinium 227
    "Th": 232.03811,  # 90 Th thorium 232.03811
    "Pa": 231.035882,  # 91 Pa protactinium 231.035882
    "U": 238.028913,  # 92 U uranium 238.028913
    "Np": 237.0,  # 93 Np neptunium 237
    "Pu": 244.0,  # 94 Pu plutonium 244
    "Am": 243.0,  # 95 Am americium 243
    "Cm": 247.0,  # 96 Cm curium 247
    "Bk": 247.0,  # 97 Bk berkelium 247
    "Cf": 251.0,  # 98 Cf californium 251
    "Es": 252.0,  # 99 Es einsteinium 252
    "Fm": 257.0,  # 100 Fm fermium 257
    "Md": 258.0,  # 101 Md mendelevium 258
    "No": 259.0,  # 102 No nobelium 259
    "Lr": 262.0,  # 103 Lr lawrencium 262
    "Rf": 261.0,  # 104 Rf rutherfordium 261
    "Db": 262.0,  # 105 Db dubnium 262
    "Sg": 266.0,  # 106 Sg seaborgium 266
    "Bh": 264.0,  # 107 Bh bohrium 264
    "Hs": 277.0,  # 108 Hs hassium 277
    "Mt": 268.0,  # 109 Mt meitnerium 268
    "Ds": 281.0,  # 110 Ds darmstadtium 281
    "Rg": 272.0,  # 111 Rg roentgenium 272
}

# ----------------------------------------------------------------------------


[docs] class P_xcfg(StructureParser): """Parser for AtomEye extended CFG format. Attributes ---------- format : str Format name, default "xcfg". """ cluster_boundary = 2 """int: Width of boundary around corners of non-periodic cluster to avoid PBC effects in atomeye. """ def __init__(self): StructureParser.__init__(self) self.format = "xcfg" return
[docs] def parseLines(self, lines): """Parse list of lines in XCFG format. Parameters ---------- lines : list of str List of lines in XCFG format. Returns ------- Structure Parsed structure instance. Raises ------ StructureFormatError Invalid XCFG format. """ xcfg_Number_of_particles = None xcfg_A = None xcfg_H0 = numpy.zeros((3, 3), dtype=float) xcfg_H0_set = numpy.zeros((3, 3), dtype=bool) xcfg_NO_VELOCITY = False xcfg_entry_count = None p_nl = 0 p_auxiliary_re = re.compile(r"^auxiliary\[(\d+)\] =") p_auxiliary = {} stru = Structure() # ignore trailing blank lines stop = len(lines) for line in reversed(lines): if line.strip(): break stop -= 1 # iterator over the valid data lines ilines = iter(lines[:stop]) try: # read XCFG header for line in ilines: p_nl += 1 stripped_line = line.strip() # blank lines and lines starting with # are ignored if stripped_line == "" or line[0] == "#": continue elif xcfg_Number_of_particles is None: if line.find("Number of particles =") != 0: emsg = ("%d: first line must " + "contain 'Number of particles ='") % p_nl raise StructureFormatError(emsg) xcfg_Number_of_particles = int(line[21:].split(None, 1)[0]) p_natoms = xcfg_Number_of_particles elif line.find("A =") == 0: xcfg_A = float(line[3:].split(None, 1)[0]) elif line.find("H0(") == 0: i, j = (int(line[3]) - 1, int(line[5]) - 1) xcfg_H0[i, j] = float(line[10:].split(None, 1)[0]) xcfg_H0_set[i, j] = True elif line.find(".NO_VELOCITY.") == 0: xcfg_NO_VELOCITY = True elif line.find("entry_count =") == 0: xcfg_entry_count = int(line[13:].split(None, 1)[0]) elif p_auxiliary_re.match(line): m = p_auxiliary_re.match(line) idx = int(m.group(1)) p_auxiliary[idx] = line[m.end() :].split(None, 1)[0] else: break # check header for consistency if not numpy.all(xcfg_H0_set): emsg = "H0 tensor is not properly defined" raise StructureFormatError(emsg) p_auxnum = len(p_auxiliary) and max(p_auxiliary.keys()) + 1 for i in range(p_auxnum): if i not in p_auxiliary: p_auxiliary[i] = "aux%d" % i sorted_aux_keys = sorted(p_auxiliary.keys()) if p_auxnum != 0: stru.xcfg = {"auxiliaries": [p_auxiliary[k] for k in sorted_aux_keys]} ecnt = len(p_auxiliary) + (3 if xcfg_NO_VELOCITY else 6) if ecnt != xcfg_entry_count: emsg = ("%d: auxiliary fields are " "not consistent with entry_count") % p_nl raise StructureFormatError(emsg) # define proper lattice stru.lattice.setLatBase(xcfg_H0) # here we are inside the data block p_element = None for line in ilines: p_nl += 1 words = line.split() # ignore atom mass if len(words) == 1 and isfloat(words[0]): continue # parse element allowing empty symbol elif len(words) <= 1: w = line.strip() p_element = w[:1].upper() + w[1:].lower() elif len(words) == xcfg_entry_count and p_element is not None: fields = [float(w) for w in words] xyz = [xcfg_A * xi for xi in fields[:3]] stru.addNewAtom(p_element, xyz=xyz) a = stru[-1] _assign_auxiliaries(a, fields, auxiliaries=p_auxiliary, no_velocity=xcfg_NO_VELOCITY) else: emsg = "%d: invalid record" % p_nl raise StructureFormatError(emsg) if len(stru) != p_natoms: emsg = "expected %d atoms, read %d" % (p_natoms, len(stru)) raise StructureFormatError(emsg) except (ValueError, IndexError): emsg = "%d: file is not in XCFG format" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() e = StructureFormatError(emsg) raise e.with_traceback(exc_traceback) return stru
[docs] def toLines(self, stru): """Convert Structure stru to a list of lines in XCFG atomeye format. Parameters ---------- stru : Structure Structure to be converted. Returns ------- list of str List of lines in XCFG format. Raises ------ StructureFormatError Cannot convert empty structure to XCFG format. """ if len(stru) == 0: emsg = "cannot convert empty structure to XCFG format" raise StructureFormatError(emsg) lines = [] lines.append("Number of particles = %i" % len(stru)) # figure out length unit A allxyz = numpy.array([a.xyz for a in stru]) lo_xyz = allxyz.min(axis=0) hi_xyz = allxyz.max(axis=0) max_range_xyz = (hi_xyz - lo_xyz).max() if numpy.allclose(stru.lattice.abcABG(), (1, 1, 1, 90, 90, 90)): max_range_xyz += self.cluster_boundary # range of CFG coordinates must be less than 1 p_A = numpy.ceil(max_range_xyz + 1.0e-13) # atomeye draws rubbish when boxsize is less than 3.5 hi_ucvect = max([numpy.sqrt(numpy.dot(v, v)) for v in stru.lattice.base]) if hi_ucvect * p_A < 3.5: p_A = numpy.ceil(3.5 / hi_ucvect) lines.append("A = %.8g Angstrom" % p_A) # how much do we need to shift the coordinates? p_dxyz = numpy.zeros(3, dtype=float) for i in range(3): if lo_xyz[i] / p_A < 0.0 or hi_xyz[i] / p_A >= 1.0 or (lo_xyz[i] == hi_xyz[i] and lo_xyz[i] == 0.0): p_dxyz[i] = 0.5 - (hi_xyz[i] + lo_xyz[i]) / 2.0 / p_A # H0 tensor for i in range(3): for j in range(3): lines.append("H0(%i,%i) = %.8g A" % (i + 1, j + 1, stru.lattice.base[i, j])) # get out for empty structure if len(stru) == 0: return lines a_first = stru[0] p_NO_VELOCITY = "v" not in a_first.__dict__ if p_NO_VELOCITY: lines.append(".NO_VELOCITY.") # build a p_auxiliaries list of (aux_name,atom_expression) tuples # if stru came from xcfg file, it would store original auxiliaries in # xcfg dictionary try: p_auxiliaries = [(aux, "a." + aux) for aux in stru.xcfg["auxiliaries"]] except AttributeError: p_auxiliaries = [] # add occupancy if any atom has nonunit occupancy for a in stru: if a.occupancy != 1.0: p_auxiliaries.append(("occupancy", "a.occupancy")) break # add temperature factor with as many terms as needed # check whether all temperature factors are zero or isotropic p_allUzero = True p_allUiso = True for a in stru: if p_allUzero and numpy.any(a.U != 0.0): p_allUzero = False if not numpy.all(a.U == a.U[0, 0] * numpy.identity(3)): p_allUiso = False # here p_allUzero must be false break if p_allUzero: pass elif p_allUiso: p_auxiliaries.append(("Uiso", "uflat[0]")) else: p_auxiliaries.extend([("U11", "uflat[0]"), ("U22", "uflat[4]"), ("U33", "uflat[8]")]) # check if there are off-diagonal elements allU = numpy.array([a.U for a in stru]) if numpy.any(allU[:, 0, 1] != 0.0): p_auxiliaries.append(("U12", "uflat[1]")) if numpy.any(allU[:, 0, 2] != 0.0): p_auxiliaries.append(("U13", "uflat[2]")) if numpy.any(allU[:, 1, 2] != 0.0): p_auxiliaries.append(("U23", "uflat[5]")) # count entries p_entry_count = (3 if p_NO_VELOCITY else 6) + len(p_auxiliaries) lines.append("entry_count = %d" % p_entry_count) # add auxiliaries for i in range(len(p_auxiliaries)): lines.append("auxiliary[%d] = %s [au]" % (i, p_auxiliaries[i][0])) # now define entry format efmt for representing atom properties fmwords = ["{pos[0]:.8g}", "{pos[1]:.8g}", "{pos[2]:.8g}"] if not p_NO_VELOCITY: fmwords += ["{v[0]:.8g}", "{v[1]:.8g}", "{v[2]:.8g}"] fmwords += (("{" + e + ":.8g}") for p, e in p_auxiliaries) efmt = " ".join(fmwords) # we are ready to output atoms: lines.append("") p_element = None for a in stru: if a.element != p_element: p_element = a.element lines.append("%.4f" % AtomicMass.get(p_element, 0.0)) lines.append(p_element) pos = a.xyz / p_A + p_dxyz v = None if p_NO_VELOCITY else a.v uflat = numpy.ravel(a.U) entry = efmt.format(pos=pos, v=v, uflat=uflat, a=a) lines.append(entry) return lines
# End of class P_xcfg # Routines -------------------------------------------------------------------
[docs] def getParser(): """Return new `parser` object for XCFG format. Returns ------- P_xcfg Instance of `P_xcfg`. """ return P_xcfg()
# Local Helpers -------------------------------------------------------------- def _assign_auxiliaries(a, fields, auxiliaries, no_velocity): """Assing auxiliary properties for `Atom` object when reading CFG format. Parameters ---------- a : Atom The `Atom` instance for which the auxiliary properties need to be set. fields : list Floating point values for the current row of the processed CFG file. auxiliaries : dict Dictionary of zero-based indices and names of auxiliary properties defined in the CFG format. no_velocity : bool When `False` set atom velocity `a.v` to `fields[3:6]`. Use `fields[3:6]` for auxiliary values otherwise. """ if not no_velocity: a.v = numpy.asarray(fields[3:6], dtype=float) auxfirst = 3 if no_velocity else 6 for i, prop in auxiliaries.items(): value = fields[auxfirst + i] if prop == "Uiso": a.Uisoequiv = value elif prop == "Biso": a.Bisoequiv = value elif prop[0] in "BU" and all(d in "123" for d in prop[1:]): nm = prop if prop[1] <= prop[2] else prop[0] + prop[2] + prop[1] a.anisotropy = True setattr(a, nm, value) else: setattr(a, prop, value) return