Source code for pymatgen.io.qchem.inputs
# coding: utf-8
# Copyright (c) Pymatgen Development Team.
# Distributed under the terms of the MIT License.
"""
Classes for reading/manipulating/writing QChem input files.
"""
import logging
from typing import Union, Dict, List, Optional
from typing_extensions import Literal
from monty.io import zopen
from monty.json import MSONable
from pymatgen.core import Molecule
from .utils import lower_and_check_unique, read_pattern, read_table_pattern
__author__ = "Brandon Wood, Samuel Blau, Shyam Dwaraknath, Julian Self, Evan Spotte-Smith"
__copyright__ = "Copyright 2018, The Materials Project"
__version__ = "0.1"
__email__ = "b.wood@berkeley.edu"
__credits__ = "Xiaohui Qu"
logger = logging.getLogger(__name__)
[docs]class QCInput(MSONable):
"""
An object representing a QChem input file. QCInput attributes represent different sections of a QChem input file.
To add a new section one needs to modify __init__, __str__, from_sting and add staticmethods
to read and write the new section i.e. section_template and read_section. By design, there is very little (or no)
checking that input parameters conform to the appropriate QChem format, this responsible lands on the user or a
separate error handling software.
"""
def __init__(
self,
molecule: Union[Molecule, Literal["read"]],
rem: Dict,
opt: Optional[Dict[str, List]] = None,
pcm: Optional[Dict] = None,
solvent: Optional[Dict] = None,
smx: Optional[Dict] = None,
scan: Optional[Dict[str, List]] = None,
plots: Optional[Dict] = None,
):
"""
Args:
molecule (pymatgen Molecule object or "read"):
Input molecule. molecule can be set as either a pymatgen Molecule object or as the str "read".
"read" can be used in multi_job QChem input files where the molecule is read in from the
previous calculation.
rem (dict):
A dictionary of all the input parameters for the rem section of QChem input file.
Ex. rem = {'method': 'rimp2', 'basis': '6-31*G++' ... }
opt (dict of lists):
A dictionary of opt sections, where each opt section is a key and the corresponding
values are a list of strings. Stings must be formatted as instructed by the QChem manual.
The different opt sections are: CONSTRAINT, FIXED, DUMMY, and CONNECT
Ex. opt = {"CONSTRAINT": ["tors 2 3 4 5 25.0", "tors 2 5 7 9 80.0"], "FIXED": ["2 XY"]}
pcm (dict):
A dictionary of the PCM section, defining behavior for use of the polarizable continuum model.
Ex: pcm = {"theory": "cpcm", "hpoints": 194}
solvent (dict):
A dictionary defining the solvent parameters used with PCM.
Ex: solvent = {"dielectric": 78.39, "temperature": 298.15}
smx (dict):
A dictionary defining solvent parameters used with the SMD method, a solvent method that adds
short-range terms to PCM.
Ex: smx = {"solvent": "water"}
scan (dict of lists):
A dictionary of scan variables. Because two constraints of the same type are allowed (for instance, two
torsions or two bond stretches), each TYPE of variable (stre, bend, tors) should be its own key in the
dict, rather than each variable. Note that the total number of variable (sum of lengths of all lists)
CANNOT be
more than two.
Ex. scan = {"stre": ["3 6 1.5 1.9 0.1"], "tors": ["1 2 3 4 -180 180 15"]}
"""
self.molecule = molecule
self.rem = lower_and_check_unique(rem)
self.opt = opt
self.pcm = lower_and_check_unique(pcm)
self.solvent = lower_and_check_unique(solvent)
self.smx = lower_and_check_unique(smx)
self.scan = lower_and_check_unique(scan)
self.plots = lower_and_check_unique(plots)
# Make sure rem is valid:
# - Has a basis
# - Has a method or DFT exchange functional
# - Has a valid job_type or jobtype
valid_job_types = [
"opt",
"optimization",
"sp",
"freq",
"frequency",
"force",
"nmr",
"ts",
"pes_scan",
]
if "basis" not in self.rem:
raise ValueError("The rem dictionary must contain a 'basis' entry")
if "method" not in self.rem:
if "exchange" not in self.rem:
raise ValueError("The rem dictionary must contain either a 'method' entry or an 'exchange' entry")
if "job_type" not in self.rem:
raise ValueError("The rem dictionary must contain a 'job_type' entry")
if self.rem.get("job_type").lower() not in valid_job_types:
raise ValueError("The rem dictionary must contain a valid 'job_type' entry")
# Still to do:
# - Check that the method or functional is valid
# - Check that basis is valid
# - Check that basis is defined for all species in the molecule
# - Validity checks specific to job type?
# - Check OPT and PCM sections?
def __str__(self):
combined_list = []
# molecule section
combined_list.append(self.molecule_template(self.molecule))
combined_list.append("")
# rem section
combined_list.append(self.rem_template(self.rem))
combined_list.append("")
# opt section
if self.opt:
combined_list.append(self.opt_template(self.opt))
combined_list.append("")
# pcm section
if self.pcm:
combined_list.append(self.pcm_template(self.pcm))
combined_list.append("")
# solvent section
if self.solvent:
combined_list.append(self.solvent_template(self.solvent))
combined_list.append("")
if self.smx:
combined_list.append(self.smx_template(self.smx))
combined_list.append("")
# section for pes_scan
if self.scan:
combined_list.append(self.scan_template(self.scan))
combined_list.append("")
# plots section
if self.plots:
combined_list.append(self.plots_template(self.plots))
combined_list.append("")
return "\n".join(combined_list)
[docs] @staticmethod
def multi_job_string(job_list: List["QCInput"]) -> str:
"""
Args:
job_list (): List of jobs
Returns:
(str) String representation of multi job input file.
"""
multi_job_string = str()
for i, job_i in enumerate(job_list):
if i < len(job_list) - 1:
multi_job_string += job_i.__str__() + "\n@@@\n\n"
else:
multi_job_string += job_i.__str__()
return multi_job_string
[docs] @classmethod
def from_string(cls, string: str) -> "QCInput":
"""
Read QcInput from string.
Args:
string (str): String input.
Returns:
QcInput
"""
sections = cls.find_sections(string)
molecule = cls.read_molecule(string)
rem = cls.read_rem(string)
# only molecule and rem are necessary everything else is checked
opt = None
pcm = None
solvent = None
smx = None
scan = None
plots = None
if "opt" in sections:
opt = cls.read_opt(string)
if "pcm" in sections:
pcm = cls.read_pcm(string)
if "solvent" in sections:
solvent = cls.read_solvent(string)
if "smx" in sections:
smx = cls.read_smx(string)
if "scan" in sections:
scan = cls.read_scan(string)
if "plots" in sections:
plots = cls.read_plots(string)
return cls(molecule, rem, opt=opt, pcm=pcm, solvent=solvent, smx=smx, scan=scan, plots=plots)
[docs] def write_file(self, filename: str):
"""
Write QcInput to file.
Args:
filename (str): Filename
"""
with zopen(filename, "wt") as f:
f.write(self.__str__())
[docs] @staticmethod
def write_multi_job_file(job_list: List["QCInput"], filename: str):
"""
Write a multijob file.
Args:
job_list (): List of jobs.
filename (): Filename
"""
with zopen(filename, "wt") as f:
f.write(QCInput.multi_job_string(job_list))
[docs] @staticmethod
def from_file(filename: str) -> "QCInput":
"""
Create QcInput from file.
Args:
filename (str): Filename
Returns:
QcInput
"""
with zopen(filename, "rt") as f:
return QCInput.from_string(f.read())
[docs] @classmethod
def from_multi_jobs_file(cls, filename: str) -> List["QCInput"]:
"""
Create list of QcInput from a file.
Args:
filename (str): Filename
Returns:
List of QCInput objects
"""
with zopen(filename, "rt") as f:
# the delimiter between QChem jobs is @@@
multi_job_strings = f.read().split("@@@")
# list of individual QChem jobs
input_list = [cls.from_string(i) for i in multi_job_strings]
return input_list
[docs] @staticmethod
def molecule_template(molecule: Union[Molecule, Literal["read"]]) -> str:
"""
Args:
molecule (Molecule): molecule
Returns:
(str) Molecule template.
"""
# todo: add ghost atoms
mol_list = []
mol_list.append("$molecule")
if isinstance(molecule, str):
if molecule == "read":
mol_list.append(" read")
else:
raise ValueError('The only acceptable text value for molecule is "read"')
else:
mol_list.append(
" {charge} {spin_mult}".format(charge=int(molecule.charge), spin_mult=molecule.spin_multiplicity)
)
for site in molecule.sites:
mol_list.append(
" {atom} {x: .10f} {y: .10f} {z: .10f}".format(
atom=site.species_string, x=site.x, y=site.y, z=site.z
)
)
mol_list.append("$end")
return "\n".join(mol_list)
[docs] @staticmethod
def rem_template(rem: Dict) -> str:
"""
Args:
rem ():
Returns:
(str)
"""
rem_list = []
rem_list.append("$rem")
for key, value in rem.items():
rem_list.append(" {key} = {value}".format(key=key, value=value))
rem_list.append("$end")
return "\n".join(rem_list)
[docs] @staticmethod
def opt_template(opt: Dict[str, List]) -> str:
"""
Optimization template.
Args:
opt ():
Returns:
(str)
"""
opt_list = []
opt_list.append("$opt")
# loops over all opt sections
for key, value in opt.items():
opt_list.append("{section}".format(section=key))
# loops over all values within the section
for i in value:
opt_list.append(" {val}".format(val=i))
opt_list.append("END{section}".format(section=key))
opt_list.append("")
# this deletes the empty space after the last section
del opt_list[-1]
opt_list.append("$end")
return "\n".join(opt_list)
[docs] @staticmethod
def pcm_template(pcm: Dict) -> str:
"""
Pcm run template.
Args:
pcm ():
Returns:
(str)
"""
pcm_list = []
pcm_list.append("$pcm")
for key, value in pcm.items():
pcm_list.append(" {key} {value}".format(key=key, value=value))
pcm_list.append("$end")
return "\n".join(pcm_list)
[docs] @staticmethod
def solvent_template(solvent: Dict) -> str:
"""
Solvent template.
Args:
solvent ():
Returns:
(str)
"""
solvent_list = []
solvent_list.append("$solvent")
for key, value in solvent.items():
solvent_list.append(" {key} {value}".format(key=key, value=value))
solvent_list.append("$end")
return "\n".join(solvent_list)
[docs] @staticmethod
def smx_template(smx: Dict) -> str:
"""
Args:
smx ():
Returns:
(str)
"""
smx_list = []
smx_list.append("$smx")
for key, value in smx.items():
if value == "tetrahydrofuran":
smx_list.append(" {key} {value}".format(key=key, value="thf"))
else:
smx_list.append(" {key} {value}".format(key=key, value=value))
smx_list.append("$end")
return "\n".join(smx_list)
[docs] @staticmethod
def scan_template(scan: Dict[str, List]) -> str:
"""
Args:
scan (dict): Dictionary with scan section information.
Ex: {"stre": ["3 6 1.5 1.9 0.1"], "tors": ["1 2 3 4 -180 180 15"]}
Returns:
String representing Q-Chem input format for scan section
"""
scan_list = list()
scan_list.append("$scan")
total_vars = sum([len(v) for v in scan.values()])
if total_vars > 2:
raise ValueError("Q-Chem only supports PES_SCAN with two or less " "variables.")
for var_type, variables in scan.items():
if variables not in [None, list()]:
for var in variables:
scan_list.append(" {var_type} {var}".format(var_type=var_type, var=var))
scan_list.append("$end")
return "\n".join(scan_list)
[docs] @staticmethod
def plots_template(plots: Dict) -> str:
"""
Args:
plots ():
Returns:
(str)
"""
plots_list = []
plots_list.append("$plots")
for key, value in plots.items():
plots_list.append(" {key} {value}".format(key=key, value=value))
plots_list.append("$end")
return "\n".join(plots_list)
[docs] @staticmethod
def find_sections(string: str) -> List:
"""
Find sections in the string.
Args:
string (str): String
Returns:
List of sections.
"""
patterns = {"sections": r"^\s*?\$([a-z]+)", "multiple_jobs": r"(@@@)"}
matches = read_pattern(string, patterns)
# list of the sections present
sections = [val[0] for val in matches["sections"]]
# remove end from sections
sections = [sec for sec in sections if sec != "end"]
# this error should be replaced by a multi job read function when it is added
if "multiple_jobs" in matches.keys():
raise ValueError("Output file contains multiple qchem jobs please parse separately")
if "molecule" not in sections:
raise ValueError("Output file does not contain a molecule section")
if "rem" not in sections:
raise ValueError("Output file does not contain a rem section")
return sections
[docs] @staticmethod
def read_molecule(string: str) -> Union[Molecule, Literal["read"]]:
"""
Read molecule from string.
Args:
string (str): String
Returns:
Molecule
"""
charge = None
spin_mult = None
patterns = {
"read": r"^\s*\$molecule\n\s*(read)",
"charge": r"^\s*\$molecule\n\s*((?:\-)*\d+)\s+\d",
"spin_mult": r"^\s*\$molecule\n\s(?:\-)*\d+\s*(\d)",
}
matches = read_pattern(string, patterns)
if "read" in matches.keys():
return "read"
if "charge" in matches.keys():
charge = float(matches["charge"][0][0])
if "spin_mult" in matches.keys():
spin_mult = int(matches["spin_mult"][0][0])
header = r"^\s*\$molecule\n\s*(?:\-)*\d+\s*\d"
row = r"\s*((?i)[a-z]+)\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)"
footer = r"^\$end"
mol_table = read_table_pattern(string, header_pattern=header, row_pattern=row, footer_pattern=footer)
species = [val[0] for val in mol_table[0]]
coords = [[float(val[1]), float(val[2]), float(val[3])] for val in mol_table[0]]
if charge is None:
mol = Molecule(species=species, coords=coords)
else:
mol = Molecule(species=species, coords=coords, charge=charge, spin_multiplicity=spin_mult)
return mol
[docs] @staticmethod
def read_rem(string: str) -> Dict:
"""
Parse rem from string.
Args:
string (str): String
Returns:
(dict) rem
"""
header = r"^\s*\$rem"
row = r"\s*([a-zA-Z\_]+)\s*=?\s*(\S+)"
footer = r"^\s*\$end"
rem_table = read_table_pattern(string, header_pattern=header, row_pattern=row, footer_pattern=footer)
return dict(rem_table[0])
[docs] @staticmethod
def read_opt(string: str) -> Dict[str, List]:
"""
Read opt section from string.
Args:
string (str): String
Returns:
(dict) Opt section
"""
patterns = {
"CONSTRAINT": r"^\s*CONSTRAINT",
"FIXED": r"^\s*FIXED",
"DUMMY": r"^\s*DUMMY",
"CONNECT": r"^\s*CONNECT",
}
opt_matches = read_pattern(string, patterns)
opt_sections = list(opt_matches.keys())
opt = {}
if "CONSTRAINT" in opt_sections:
c_header = r"^\s*CONSTRAINT\n"
c_row = r"(\w.*)\n"
c_footer = r"^\s*ENDCONSTRAINT\n"
c_table = read_table_pattern(string, header_pattern=c_header, row_pattern=c_row, footer_pattern=c_footer)
opt["CONSTRAINT"] = [val[0] for val in c_table[0]]
if "FIXED" in opt_sections:
f_header = r"^\s*FIXED\n"
f_row = r"(\w.*)\n"
f_footer = r"^\s*ENDFIXED\n"
f_table = read_table_pattern(
string,
header_pattern=f_header,
row_pattern=f_row,
footer_pattern=f_footer,
)
opt["FIXED"] = [val[0] for val in f_table[0]]
if "DUMMY" in opt_sections:
d_header = r"^\s*DUMMY\n"
d_row = r"(\w.*)\n"
d_footer = r"^\s*ENDDUMMY\n"
d_table = read_table_pattern(
string,
header_pattern=d_header,
row_pattern=d_row,
footer_pattern=d_footer,
)
opt["DUMMY"] = [val[0] for val in d_table[0]]
if "CONNECT" in opt_sections:
cc_header = r"^\s*CONNECT\n"
cc_row = r"(\w.*)\n"
cc_footer = r"^\s*ENDCONNECT\n"
cc_table = read_table_pattern(
string,
header_pattern=cc_header,
row_pattern=cc_row,
footer_pattern=cc_footer,
)
opt["CONNECT"] = [val[0] for val in cc_table[0]]
return opt
[docs] @staticmethod
def read_pcm(string: str) -> Dict:
"""
Read pcm parameters from string.
Args:
string (str): String
Returns:
(dict) PCM parameters
"""
header = r"^\s*\$pcm"
row = r"\s*([a-zA-Z\_]+)\s+(\S+)"
footer = r"^\s*\$end"
pcm_table = read_table_pattern(string, header_pattern=header, row_pattern=row, footer_pattern=footer)
if not pcm_table:
print("No valid PCM inputs found. Note that there should be no '=' chracters in PCM input lines.")
return {}
return dict(pcm_table[0])
[docs] @staticmethod
def read_solvent(string: str) -> Dict:
"""
Read solvent parameters from string.
Args:
string (str): String
Returns:
(dict) Solvent parameters
"""
header = r"^\s*\$solvent"
row = r"\s*([a-zA-Z\_]+)\s+(\S+)"
footer = r"^\s*\$end"
solvent_table = read_table_pattern(string, header_pattern=header, row_pattern=row, footer_pattern=footer)
if not solvent_table:
print("No valid solvent inputs found. Note that there should be no '=' chracters in solvent input lines.")
return {}
return dict(solvent_table[0])
[docs] @staticmethod
def read_smx(string: str) -> Dict:
"""
Read smx parameters from string.
Args:
string (str): String
Returns:
(dict) SMX parameters.
"""
header = r"^\s*\$smx"
row = r"\s*([a-zA-Z\_]+)\s+(\S+)"
footer = r"^\s*\$end"
smx_table = read_table_pattern(string, header_pattern=header, row_pattern=row, footer_pattern=footer)
if not smx_table:
print("No valid smx inputs found. Note that there should be no '=' chracters in smx input lines.")
return {}
smx = {}
for key, val in smx_table[0]:
smx[key] = val
if smx["solvent"] == "tetrahydrofuran":
smx["solvent"] = "thf"
return smx
[docs] @staticmethod
def read_scan(string: str) -> Dict[str, List]:
"""
Read scan section from a string.
Args:
string: String to be parsed
Returns:
Dict representing Q-Chem scan section
"""
header = r"^\s*\$scan"
row = r"\s*(stre|bend|tors|STRE|BEND|TORS)\s+((?:[\-\.0-9]+\s*)+)"
footer = r"^\s*\$end"
scan_table = read_table_pattern(string, header_pattern=header, row_pattern=row, footer_pattern=footer)
if scan_table == list():
print("No valid scan inputs found. Note that there should be no '=' chracters in scan input lines.")
return dict()
stre = list()
bend = list()
tors = list()
for row in scan_table[0]:
if row[0].lower() == "stre":
stre.append(row[1].replace("\n", "").rstrip())
elif row[0].lower() == "bend":
bend.append(row[1].replace("\n", "").rstrip())
elif row[0].lower() == "tors":
tors.append(row[1].replace("\n", "").rstrip())
if len(stre) + len(bend) + len(tors) > 2:
raise ValueError("No more than two variables are allows in the scan section!")
return {"stre": stre, "bend": bend, "tors": tors}
[docs] @staticmethod
def read_plots(string: str) -> Dict:
"""
Read plots parameters from string.
Args:
string (str): String
Returns:
(dict) plots parameters.
"""
header = r"^\s*\$plots"
row = r"\s*([a-zA-Z\_]+)\s+(\S+)"
footer = r"^\s*\$end"
plots_table = read_table_pattern(string, header_pattern=header, row_pattern=row, footer_pattern=footer)
if plots_table == []:
print("No valid plots inputs found. Note that there should be no '=' chracters in plots input lines.")
return {}
plots = {}
for key, val in plots_table[0]:
plots[key] = val
return plots