Source code for pymatgen.io.cp2k.utils

"""
Utility functions for assisting with cp2k IO
"""

import os
import re
from pathlib import Path

import numpy as np
from monty.io import zopen
from monty.serialization import loadfn
from ruamel import yaml

from pymatgen.core import SETTINGS

MODULE_DIR = Path(__file__).resolve().parent


def _postprocessor(s):
    """
    Helper function to post process the results of the pattern matching functions in Cp2kOutput and turn them to
    python types.
    """
    s = s.rstrip()  # Remove leading/trailing whitespace
    s = s.replace(" ", "_")  # Remove whitespaces

    if s.lower() == "no":
        return False
    if s.lower() == "none":
        return None
    if s.lower() == "yes" or s.lower() == "true":
        return True
    if re.match(r"^-?\d+$", s):
        try:
            return int(s)
        except ValueError:
            raise IOError("Error in parsing CP2K file.")
    if re.match(r"^[+\-]?(?=.)(?:0|[1-9]\d*)?(?:\.\d*)?(?:\d[eE][+\-]?\d+)?$", s):
        try:
            return float(s)
        except ValueError:
            raise IOError("Error in parsing CP2K file.")
    if re.match(r"\*+", s):
        try:
            return np.NaN
        except ValueError:
            raise IOError("Error in parsing CP2K file.")
    return s


def _preprocessor(s, d="."):
    """
    Cp2k contains internal preprocessor flags that are evaluated before
    execution. This helper function recognizes those preprocessor flags
    and replacees them with an equivalent cp2k input (this way everything
    is contained neatly in the cp2k input structure, even if the user
    preferred to use the flags.

    CP2K preprocessor flags (with arguments) are:

        @INCLUDE FILENAME: Insert the contents of FILENAME into the file at
            this location.
        @SET VAR VALUE: set a variable, VAR, to have the value, VALUE.
        $VAR or ${VAR}: replace these with the value of the variable, as set
            by the @SET flag.
        @IF/@ELIF: Not implemented yet.

    Args:
        s (str): string representation of cp2k input to preprocess
    """
    includes = re.findall(r"(@include.+)", s, re.IGNORECASE)
    for incl in includes:
        inc = incl.split()
        assert len(inc) == 2  # @include filename
        inc = inc[1].strip("'")
        inc = inc.strip('"')
        with zopen(os.path.join(d, inc)) as f:
            s = re.sub(r"{}".format(incl), f.read(), s)
    variable_sets = re.findall(r"(@SET.+)", s, re.IGNORECASE)
    for match in variable_sets:
        v = match.split()
        assert len(v) == 3  # @SET VAR value
        var, value = v[1:]
        s = re.sub(r"{}".format(match), "", s)
        s = re.sub(r"\${?" + var + "}?", value, s)

    c1 = re.findall(r"@IF", s, re.IGNORECASE)
    c2 = re.findall(r"@ELIF", s, re.IGNORECASE)
    if len(c1) > 0 or len(c2) > 0:
        raise NotImplementedError("This cp2k input processer does not currently " "support conditional blocks.")
    return s


[docs]def natural_keys(text): """ Sort text by numbers coming after an underscore with natural number convention, Ex: [file_1, file_12, file_2] becomes [file_1, file_2, file_12] """ def atoi(t): return int(t) if t.isdigit() else t return [atoi(c) for c in re.split(r"_(\d+)", text)]
[docs]def get_basis_and_potential(species, d, cardinality="DZVP", functional="PBE"): """ Given a specie and a potential/basis type, this function accesses the available basis sets and potentials. Generally, the GTH potentials are used with the GTH basis sets. Args: species: (list) list of species for which to get the potential/basis strings d: (dict) a dictionary specifying how bases and/or potentials should be assigned to species E.g. {'Si': {'cardinality': 'DZVP', 'sr': True}, 'O': {'cardinality': 'TZVP'}} functional: (str) functional type. Default: 'PBE' basis_type: (str) the basis set type. Default: 'MOLOPT' cardinality: (str) basis set cardinality. Default: 'DZVP' Returns: (dict) of the form {'specie': {'potential': potential, 'basis': basis}...} """ potential_filename = SETTINGS.get("PMG_DEFAULT_CP2K_POTENTIAL_FILE", "GTH_POTENTIALS") basis_filenames = ["BASIS_MOLOPT", "BASIS_MOLOPT_UCL"] functional = functional or SETTINGS.get("PMG_DEFAULT_FUNCTIONAL", "PBE") cardinality = cardinality or SETTINGS.get("PMG_DEFAULT_BASIS_CARDINALITY", "DZVP") basis_and_potential = { "basis_filenames": basis_filenames, "potential_filename": potential_filename, } for s in species: if s not in d: d[s] = {} if "sr" not in d[s]: d[s]["sr"] = True if "cardinality" not in d[s]: d[s]["cardinality"] = cardinality with open(os.path.join(MODULE_DIR, "basis_molopt.yaml"), "rt") as f: data_b = yaml.load(f, Loader=yaml.Loader) with open(os.path.join(MODULE_DIR, "gth_potentials.yaml"), "rt") as f: data_p = yaml.load(f, Loader=yaml.Loader) for s in species: basis_and_potential[s] = {} b = [_ for _ in data_b[s] if d[s]["cardinality"] in _.split("-")] if d[s]["sr"] and any("SR" in _ for _ in b): b = [_ for _ in b if "SR" in _] else: b = [_ for _ in b if "SR" not in _] if "q" in d[s]: b = [_ for _ in b if d[s]["q"] in _] else: def srt(x): return int(x.split("q")[-1]) b = sorted(b, key=srt)[-1:] if len(b) == 0: raise LookupError("NO BASIS OF THAT TYPE AVAILABLE") if len(b) > 1: raise LookupError("AMBIGUITY IN BASIS. PLEASE SPECIFY FURTHER") basis_and_potential[s]["basis"] = b[0] p = [_ for _ in data_p[s] if functional in _.split("-")] if len(p) == 0: raise LookupError("NO PSEUDOPOTENTIAL OF THAT TYPE AVAILABLE") if len(p) > 1: raise LookupError("AMBIGUITY IN POTENTIAL. PLEASE SPECIFY FURTHER") basis_and_potential[s]["potential"] = p[0] return basis_and_potential
[docs]def get_aux_basis(basis_type, default_basis_type="cFIT"): """ Get auxiliary basis info for a list of species. Args: basis_type (dict): dict of auxiliary basis sets to use. i.e: basis_type = {'Si': 'cFIT', 'O': 'cpFIT'}. Basis type needs to exist for that species. Basis types: FIT cFIT pFIT cpFIT GTH-def2 aug-{FIT,cFIT,pFIT,cpFIT, GTH-def2} default_basis_type (str) default basis type if n """ default_basis_type = default_basis_type or SETTINGS.get("PMG_CP2K_DEFAULT_AUX_BASIS_TYPE") basis_type = {k: basis_type[k] if basis_type[k] else default_basis_type for k in basis_type} basis = {k: {} for k in basis_type} aux_bases = loadfn(os.path.join(MODULE_DIR, "aux_basis.yaml")) for k in basis_type: for i in aux_bases[k]: if i.startswith(basis_type[k]): basis[k] = i break for k in basis: if not basis[k]: if aux_bases[k]: basis[k] = aux_bases[k][0] else: raise LookupError("NO BASIS OF THAT TYPE") return basis
[docs]def get_unique_site_indices(structure): """ Get unique site indices for a structure according to site properties. Whatever site-property has the most unique values is used for indexing. For example, if you have magnetic CoO with half Co atoms having a positive moment, and the other half having a negative moment. Then this function will create a dict of sites for Co_1, Co_2, O. This creates unique sites, based on site properties, but does not have anything to do with turning those site properties into CP2K input parameters. """ sites = {} _property = None for s in structure.symbol_set: s_ids = structure.indices_from_symbol(s) unique = [0] for site_prop, vals in structure.site_properties.items(): _unique = np.unique([vals[i] for i in s_ids]) if len(unique) < len(_unique): unique = _unique _property = site_prop if _property is None: sites[s] = s_ids else: for i, u in enumerate(unique): sites[s + "_" + str(i + 1)] = [] for j, site in zip( s_ids, [structure.site_properties[_property][ids] for ids in s_ids], ): if site == u: sites[s + "_" + str(i + 1)].append(j) return sites