# coding: utf-8
# Copyright (c) Pymatgen Development Team.
# Distributed under the terms of the MIT License.
"""
This module provides utility classes for string operations.
"""
import re
from fractions import Fraction
from monty.dev import deprecated
SUBSCRIPT_UNICODE = {
"0": "₀",
"1": "₁",
"2": "₂",
"3": "₃",
"4": "₄",
"5": "₅",
"6": "₆",
"7": "₇",
"8": "₈",
"9": "₉",
}
SUPERSCRIPT_UNICODE = {
"0": "⁰",
"1": "¹",
"2": "²",
"3": "³",
"4": "⁴",
"5": "⁵",
"6": "⁶",
"7": "⁷",
"8": "⁸",
"9": "⁹",
"+": "⁺",
"-": "⁻",
}
[docs]class Stringify:
"""
Mix-in class for string formatting, e.g. superscripting numbers and symbols or superscripting.
"""
STRING_MODE = "SUBSCRIPT"
[docs] def to_pretty_string(self) -> str:
"""
:return: A pretty string representation. By default, the __str__ output is used, but this method can be
overridden if a different representation from default is desired.
"""
return self.__str__()
[docs] def to_latex_string(self) -> str:
"""
Generates a LaTeX formatted string. The mode is set by the class variable STRING_MODE, which defaults to
"SUBSCRIPT". E.g., Fe2O3 is transformed to Fe$_{2}$O$_{3}$. Setting STRING_MODE to "SUPERSCRIPT" creates
superscript, e.g., Fe2+ becomes Fe^{2+}. The initial string is obtained from the class's __str__ method.
:return: String for display as in LaTeX with proper superscripts and subscripts.
"""
str_ = self.to_pretty_string()
# First we process strings that already have _ and ^ by escaping the relevant parts.
str_ = re.sub(r"_(\d+)", r"$_{\1}$", str_)
str_ = re.sub(r"\^([\d\+\-]+)", r"$^{\1}$", str_)
if self.STRING_MODE == "SUBSCRIPT":
return re.sub(r"([A-Za-z\(\)])([\d\+\-\.]+)", r"\1$_{\2}$", str_)
if self.STRING_MODE == "SUPERSCRIPT":
return re.sub(r"([A-Za-z\(\)])([\d\+\-\.]+)", r"\1$^{\2}$", str_)
return str_
[docs] def to_html_string(self) -> str:
"""
Generates a HTML formatted string. This uses the output from to_latex_string to generate a HTML output.
:return: HTML formatted string.
"""
str_ = re.sub(r"\$_\{([^}]+)\}\$", r"<sub>\1</sub>", self.to_latex_string())
str_ = re.sub(r"\$\^\{([^}]+)\}\$", r"<sup>\1</sup>", str_)
return re.sub(r"\$\\overline\{([^}]+)\}\$", r'<span style="text-decoration:overline">\1</span>', str_)
[docs] def to_unicode_string(self):
"""
:return: Unicode string with proper sub and superscripts. Note that this works only with systems where the sub
and superscripts are pure integers.
"""
str_ = self.to_latex_string()
for m in re.finditer(r"\$_\{(\d+)\}\$", str_):
s1 = m.group()
s2 = [SUBSCRIPT_UNICODE[s] for s in m.group(1)]
str_ = str_.replace(s1, "".join(s2))
for m in re.finditer(r"\$\^\{([\d\+\-]+)\}\$", str_):
s1 = m.group()
s2 = [SUPERSCRIPT_UNICODE[s] for s in m.group(1)]
str_ = str_.replace(s1, "".join(s2))
return str_
[docs]def str_delimited(results, header=None, delimiter="\t"):
"""
Given a tuple of tuples, generate a delimited string form.
>>> results = [["a","b","c"],["d","e","f"],[1,2,3]]
>>> print(str_delimited(results,delimiter=","))
a,b,c
d,e,f
1,2,3
Args:
result: 2d sequence of arbitrary types.
header: optional header
Returns:
Aligned string output in a table-like format.
"""
returnstr = ""
if header is not None:
returnstr += delimiter.join(header) + "\n"
return returnstr + "\n".join([delimiter.join([str(m) for m in result]) for result in results])
@deprecated(
message="These methods have been deprecated in favor of using the Stringify mix-in class, which provides "
"to_latex_string, to_unicode_string, etc. They will be removed in v2022."
)
def latexify(formula):
"""
Generates a LaTeX formatted formula. E.g., Fe2O3 is transformed to
Fe$_{2}$O$_{3}$.
Args:
formula (str): Input formula.
Returns:
Formula suitable for display as in LaTeX with proper subscripts.
"""
return re.sub(r"([A-Za-z\(\)])([\d\.]+)", r"\1$_{\2}$", formula)
@deprecated(
message="These methods have been deprecated in favor of using the Stringify mix-in class, which provides "
"to_latex_string, to_unicode_string, etc. They will be removed in v2022."
)
def htmlify(formula):
"""
Generates a HTML formatted formula, e.g. Fe2O3 is transformed to
Fe<sub>2</sub>O</sub>3</sub>
:param formula:
:return:
"""
return re.sub(r"([A-Za-z\(\)])([\d\.]+)", r"\1<sub>\2</sub>", formula)
@deprecated(
message="These methods have been deprecated in favor of using the Stringify mix-in class, which provides "
"to_latex_string, to_unicode_string, etc. They will be removed in v2022."
)
def unicodeify(formula):
"""
Generates a formula with unicode subscripts, e.g. Fe2O3 is transformed
to Fe₂O₃. Does not support formulae with decimal points.
:param formula:
:return:
"""
if "." in formula:
raise ValueError("No unicode character exists for subscript period.")
for original_subscript, subscript_unicode in SUBSCRIPT_UNICODE.items():
formula = formula.replace(str(original_subscript), subscript_unicode)
return formula
@deprecated(
message="These methods have been deprecated in favor of using the Stringify mix-in class, which provides "
"to_latex_string, to_unicode_string, etc. They will be removed in v2022."
)
def latexify_spacegroup(spacegroup_symbol):
r"""
Generates a latex formatted spacegroup. E.g., P2_1/c is converted to
P2$_{1}$/c and P-1 is converted to P$\\overline{1}$.
Args:
spacegroup_symbol (str): A spacegroup symbol
Returns:
A latex formatted spacegroup with proper subscripts and overlines.
"""
sym = re.sub(r"_(\d+)", r"$_{\1}$", spacegroup_symbol)
return re.sub(r"-(\d)", r"$\\overline{\1}$", sym)
@deprecated(
message="These methods have been deprecated in favor of using the Stringify mix-in class, which provides "
"to_latex_string, to_unicode_string, etc. They will be removed in v2022."
)
def unicodeify_spacegroup(spacegroup_symbol):
r"""
Generates a unicode formatted spacegroup. E.g., P2$_{1}$/c is converted to
P2₁/c and P$\\overline{1}$ is converted to P̅1.
Args:
spacegroup_symbol (str): A spacegroup symbol as LaTeX
Returns:
A unicode spacegroup with proper subscripts and overlines.
"""
if not spacegroup_symbol:
return ""
symbol = latexify_spacegroup(spacegroup_symbol)
for number, unicode_number in SUBSCRIPT_UNICODE.items():
symbol = symbol.replace("$_{" + str(number) + "}$", unicode_number)
symbol = symbol.replace("_" + str(number), unicode_number)
overline = "\u0305" # u"\u0304" (macron) is also an option
symbol = symbol.replace("$\\overline{", "")
symbol = symbol.replace("$", "")
symbol = symbol.replace("{", "")
# overline unicode symbol comes after the character with the overline
symbol = symbol.replace("}", overline)
return symbol
@deprecated(
message="These methods have been deprecated in favor of using the Stringify mix-in class, which provides "
"to_latex_string, to_unicode_string, etc. They will be removed in v2022."
)
def unicodeify_species(specie_string):
r"""
Generates a unicode formatted species string, with appropriate
superscripts for oxidation states.
Args:
specie_string (str): Species string, e.g. O2-
Returns:
Species string, e.g. O²⁻
"""
if not specie_string:
return ""
for character, unicode_character in SUPERSCRIPT_UNICODE.items():
specie_string = specie_string.replace(character, unicode_character)
return specie_string
[docs]def stream_has_colours(stream):
"""
True if stream supports colours. Python cookbook, #475186
"""
if not hasattr(stream, "isatty"):
return False
if not stream.isatty():
return False # auto color only on TTYs
try:
import curses
curses.setupterm()
return curses.tigetnum("colors") > 2
except Exception:
return False # guess false in case of error