"""Support for optimizing tensor networks using automatic differentiation to
automatically derive gradients for input to scipy optimizers.
"""
import re
import warnings
import functools
import importlib
from collections.abc import Iterable
import tqdm
import numpy as np
from autoray import to_numpy, astype, get_dtype_name
from .contraction import (
contract_backend,
)
from .tensor_core import (
Tensor,
TensorNetwork,
PTensor,
tags_to_oset,
)
from .array_ops import iscomplex
from ..core import qarray
from ..utils import valmap, ensure_dict
if importlib.util.find_spec("jax") is not None:
_DEFAULT_BACKEND = 'jax'
elif importlib.util.find_spec("tensorflow") is not None:
_DEFAULT_BACKEND = 'tensorflow'
elif importlib.util.find_spec("torch") is not None:
_DEFAULT_BACKEND = 'torch'
else:
_DEFAULT_BACKEND = 'autograd'
_REAL_CONVERSION = {
'float32': 'float32',
'float64': 'float64',
'complex64': 'float32',
'complex128': 'float64',
}
_COMPLEX_CONVERSION = {
'float32': 'complex64',
'float64': 'complex128',
'complex64': 'complex64',
'complex128': 'complex128',
}
def equivalent_real_type(x):
return _REAL_CONVERSION[x.dtype.name]
def equivalent_complex_type(x):
return _COMPLEX_CONVERSION[x.dtype.name]
[docs]class Vectorizer:
"""Object for mapping a sequence of mixed real/complex n-dimensional arrays
to a single numpy vector and back and forth.
Parameters
----------
array : sequence of array
The set of arrays to map into a single real vector.
"""
def __init__(self, arrays):
self.shapes = [x.shape for x in arrays]
self.iscomplexes = [iscomplex(x) for x in arrays]
self.dtypes = [get_dtype_name(x) for x in arrays]
self.sizes = [np.prod(s) for s in self.shapes]
self.d = sum(
(1 + int(cmplx)) * size
for size, cmplx in zip(self.sizes, self.iscomplexes)
)
self.pack(arrays)
[docs] def pack(self, arrays, name='vector'):
"""Take ``arrays`` and pack their values into attribute `.{name}`, by
default `.vector`.
"""
# scipy's optimization routines require real, double data
if not hasattr(self, name):
setattr(self, name, np.empty(self.d, 'float64'))
x = getattr(self, name)
i = 0
for array, size, cmplx in zip(arrays, self.sizes, self.iscomplexes):
if not isinstance(array, np.ndarray):
array = to_numpy(array)
if not cmplx:
x[i:i + size] = array.reshape(-1)
i += size
else:
real_view = array.reshape(-1).view(equivalent_real_type(array))
x[i:i + 2 * size] = real_view
i += 2 * size
return x
[docs] def unpack(self, vector=None):
"""Turn the single, flat ``vector`` into a sequence of arrays.
"""
if vector is None:
vector = self.vector
i = 0
arrays = []
for shape, size, cmplx, dtype in zip(self.shapes, self.sizes,
self.iscomplexes, self.dtypes):
if not cmplx:
array = vector[i:i + size]
array.shape = shape
i += size
else:
array = vector[i:i + 2 * size]
array = array.view(equivalent_complex_type(array))
array.shape = shape
i += 2 * size
if get_dtype_name(array) != dtype:
array = astype(array, dtype)
arrays.append(array)
return arrays
_VARIABLE_TAG = "__VARIABLE{}__"
variable_finder = re.compile(r'__VARIABLE(\d+)__')
def _get_tensor_data(t):
"""Simple function to extract tensor data.
"""
if isinstance(t, PTensor):
data = t.params
else:
data = t.data
# jax doesn't like numpy.ndarray subclasses...
if isinstance(data, qarray):
data = data.A
return data
def _parse_opt_in(tn, tags, shared_tags, to_constant):
"""Parse a tensor network where tensors are assumed to be constant unless
tagged.
"""
tn_ag = tn.copy()
variables = []
# tags where each individual tensor should get a separate variable
individual_tags = tags - shared_tags
# handle tagged tensors that are not shared
for t in tn_ag.select_tensors(individual_tags, 'any'):
# append the raw data but mark the corresponding tensor
# for reinsertion
data = _get_tensor_data(t)
variables.append(data)
t.add_tag(_VARIABLE_TAG.format(len(variables) - 1))
# handle shared tags
for tag in shared_tags:
var_name = _VARIABLE_TAG.format(len(variables))
test_data = None
for t in tn_ag.select_tensors(tag):
data = _get_tensor_data(t)
# detect that this tensor is already variable tagged and skip
# if it is
if any(variable_finder.match(tag) for tag in t.tags):
warnings.warn('TNOptimizer warning, tensor tagged with'
' multiple `tags` or `shared_tags`.')
continue
if test_data is None:
# create variable and store data
variables.append(data)
test_data = data
else:
# check that the shape of the variable's data matches the
# data of this new tensor
if test_data.shape != data.shape:
raise ValueError('TNOptimizer error, a `shared_tags` tag '
'covers tensors with different numbers of'
' params.')
# mark the corresponding tensor for reinsertion
t.add_tag(var_name)
# iterate over tensors which *don't* have any of the given tags
for t in tn_ag.select_tensors(tags, which='!any'):
t.modify(apply=to_constant)
return tn_ag, variables
def _parse_opt_out(tn, constant_tags, to_constant,):
"""Parse a tensor network where tensors are assumed to be variables unless
tagged.
"""
tn_ag = tn.copy()
variables = []
for t in tn_ag:
if t.tags & constant_tags:
t.modify(apply=to_constant)
continue
# append the raw data but mark the corresponding tensor
# for reinsertion
data = _get_tensor_data(t)
variables.append(data)
t.add_tag(_VARIABLE_TAG.format(len(variables) - 1))
return tn_ag, variables
[docs]def parse_network_to_backend(
tn,
to_constant,
tags=None,
shared_tags=None,
constant_tags=None,
):
"""
Parse tensor network to:
- identify the dimension of the optimisation space and the initial
point of the optimisation from the current values in the tensor
network,
- add variable tags to individual tensors so that optimisation vector
values can be efficiently reinserted into the tensor network.
There are two different modes:
- 'opt in' : `tags` (and optionally `shared_tags`) are specified and
only these tensor tags will be optimised over. In this case
`constant_tags` is ignored if it is passed,
- 'opt out' : `tags` is not specified. In this case all tensors will be
optimised over, unless they have one of `constant_tags` tags.
Parameters
----------
tn : TensorNetwork
The initial tensor network to parse.
to_constant : Callable
Function that fixes a tensor as constant.
tags : str, or sequence of str, optional
Set of opt-in tags to optimise.
shared_tags : str, or sequence of str, optional
Subset of opt-in tags to joint optimise i.e. all tensors with tag s in
shared_tags will correspond to the same optimisation variables.
constant_tags : str, or sequence of str, optional
Set of opt-out tags if `tags` not passed.
Returns
-------
tn_ag : TensorNetwork
Tensor network tagged for reinsertion of optimisation variable values.
variables : list
List of variables extracted from ``tn``.
"""
tags = tags_to_oset(tags)
shared_tags = tags_to_oset(shared_tags)
constant_tags = tags_to_oset(constant_tags)
if tags | shared_tags:
# opt_in
if not (tags & shared_tags) == shared_tags:
tags = tags | shared_tags
warnings.warn('TNOptimizer warning, some `shared_tags` are missing'
' from `tags`. Automatically adding these missing'
' `shared_tags` to `tags`.')
if constant_tags:
warnings.warn('TNOptimizer warning, if `tags` or `shared_tags` are'
' specified then `constant_tags` is ignored - '
'consider instead untagging those tensors.')
return _parse_opt_in(tn, tags, shared_tags, to_constant, )
# opt-out
return _parse_opt_out(tn, constant_tags, to_constant, )
def constant_t(t, to_constant):
ag_t = t.copy()
ag_t.modify(apply=to_constant)
return ag_t
[docs]def constant_tn(tn, to_constant):
"""Convert a tensor network's arrays to constants.
"""
ag_tn = tn.copy()
ag_tn.apply_to_arrays(to_constant)
return ag_tn
@functools.lru_cache(1)
def get_autograd():
import autograd
return autograd
class AutoGradHandler:
def __init__(self, device='cpu'):
if device != 'cpu':
raise ValueError("`autograd` currently is only "
"backed by cpu, numpy arrays.")
def to_variable(self, x):
return np.asarray(x)
def to_constant(self, x):
return np.asarray(x)
def setup_fn(self, fn):
autograd = get_autograd()
self._backend_fn = fn
self._value_and_grad = autograd.value_and_grad(fn)
self._hvp = autograd.hessian_vector_product(fn)
def value(self, arrays):
return self._backend_fn(arrays)
def value_and_grad(self, arrays):
loss, grads = self._value_and_grad(arrays)
return loss, [x.conj() for x in grads]
@functools.lru_cache(1)
def get_jax():
import jax
return jax
class JaxHandler:
def __init__(self, jit_fn=True, device=None):
self.jit_fn = jit_fn
self.device = device
def to_variable(self, x):
jax = get_jax()
return jax.numpy.asarray(x)
def to_constant(self, x):
jax = get_jax()
return jax.numpy.asarray(x)
def setup_fn(self, fn):
jax = get_jax()
if self.jit_fn:
self._backend_fn = jax.jit(fn, backend=self.device)
self._value_and_grad = jax.jit(
jax.value_and_grad(fn), backend=self.device)
else:
self._backend_fn = fn
self._value_and_grad = jax.value_and_grad(fn)
self._setup_hessp(fn)
def _setup_hessp(self, fn):
jax = get_jax()
def hvp(primals, tangents):
return jax.jvp(jax.grad(fn), (primals,), (tangents,))[1]
if self.jit_fn:
hvp = jax.jit(hvp, device=self.device)
self._hvp = hvp
def value(self, arrays):
jax_arrays = tuple(map(self.to_constant, arrays))
return to_numpy(self._backend_fn(jax_arrays))
def value_and_grad(self, arrays):
loss, grads = self._value_and_grad(arrays)
return loss, [to_numpy(x.conj()) for x in grads]
def hessp(self, primals, tangents):
jax_arrays = self._hvp(primals, tangents)
return tuple(map(to_numpy, jax_arrays))
@functools.lru_cache(1)
def get_tensorflow():
import tensorflow
return tensorflow
class TensorFlowHandler:
def __init__(
self,
jit_fn=False,
autograph=False,
experimental_compile=False,
device=None,
):
self.jit_fn = jit_fn
self.autograph = autograph
self.experimental_compile = experimental_compile
self.device = device
def to_variable(self, x):
tf = get_tensorflow()
if self.device is None:
return tf.Variable(x)
with tf.device(self.device):
return tf.Variable(x)
def to_constant(self, x):
tf = get_tensorflow()
if self.device is None:
return tf.constant(x)
with tf.device(self.device):
return tf.constant(x)
def setup_fn(self, fn):
tf = get_tensorflow()
if self.jit_fn:
self._backend_fn = tf.function(
fn,
autograph=self.autograph,
experimental_compile=self.experimental_compile)
else:
self._backend_fn = fn
def value(self, arrays):
tf_arrays = tuple(map(self.to_constant, arrays))
return to_numpy(self._backend_fn(tf_arrays))
def value_and_grad(self, arrays):
tf = get_tensorflow()
variables = [self.to_variable(x) for x in arrays]
with tf.GradientTape() as t:
result = self._backend_fn(variables)
tf_grads = t.gradient(result, variables)
grads = [
# unused variables return as None
# NB note different convention for conjugation (i.e. none)
np.zeros_like(arrays[i]) if g is None else to_numpy(g)
for i, g in enumerate(tf_grads)
]
loss = to_numpy(result)
return loss, grads
@functools.lru_cache(1)
def get_torch():
import torch
return torch
class TorchHandler:
def __init__(self, jit_fn=False, device=None):
torch = get_torch()
self.jit_fn = jit_fn
if device is None:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
self.device = device
def to_variable(self, x):
torch = get_torch()
return torch.tensor(x).to(self.device).requires_grad_()
def to_constant(self, x):
torch = get_torch()
return torch.tensor(x).to(self.device)
def setup_fn(self, fn):
self._fn = fn
self._backend_fn = None
def _setup_backend_fn(self, arrays):
torch = get_torch()
if self.jit_fn:
example_inputs = (tuple(map(self.to_constant, arrays)),)
self._backend_fn = torch.jit.trace(
self._fn, example_inputs=example_inputs)
else:
self._backend_fn = self._fn
def value(self, arrays):
if self._backend_fn is None:
self._setup_backend_fn(arrays)
torch_arrays = tuple(map(self.to_constant, arrays))
return to_numpy(self._backend_fn(torch_arrays))
def value_and_grad(self, arrays):
torch = get_torch()
if self._backend_fn is None:
self._setup_backend_fn(arrays)
variables = [self.to_variable(x) for x in arrays]
result = self._backend_fn(variables)
torch_grads = torch.autograd.grad(result, variables, allow_unused=True)
grads = [
# unused variables return as None
np.zeros_like(arrays[i]) if g is None else to_numpy(g).conj()
for i, g in enumerate(torch_grads)
]
loss = to_numpy(result)
return loss, grads
_BACKEND_HANDLERS = {
'numpy': AutoGradHandler,
'autograd': AutoGradHandler,
'jax': JaxHandler,
'tensorflow': TensorFlowHandler,
'torch': TorchHandler,
}
class MultiLossHandler:
def __init__(self, autodiff_backend, executor=None, **backend_opts):
self.autodiff_backend = autodiff_backend
self.backend_opts = backend_opts
self.executor = executor
# start just with one, as we don't don't know how many functions yet
h0 = _BACKEND_HANDLERS[autodiff_backend](**backend_opts)
self.handlers = [h0]
# ... but we do need access to `to_constant`
self.to_constant = h0.to_constant
def setup_fn(self, funcs):
fn0, *fns = funcs
self.handlers[0].setup_fn(fn0)
for fn in fns:
h = _BACKEND_HANDLERS[self.autodiff_backend](**self.backend_opts)
h.setup_fn(fn)
self.handlers.append(h)
def _value_seq(self, arrays):
return sum(h.value(arrays) for h in self.handlers)
def _value_par_seq(self, arrays):
futures = [self.executor.submit(h.value, arrays)
for h in self.handlers]
return sum(f.result() for f in futures)
def value(self, arrays):
if self.executor is not None:
return self._value_par(arrays)
return self._value_seq(arrays)
def _value_and_grad_seq(self, arrays):
h0, *hs = self.handlers
loss, grads = h0.value_and_grad(arrays)
# need to make arrays writeable for efficient inplace sum
grads = list(map(np.array, grads))
for h in hs:
loss_i, grads_i = h.value_and_grad(arrays)
loss += loss_i
for i, g_i in enumerate(grads_i):
grads[i] += g_i
return loss, grads
def _value_and_grad_par(self, arrays):
futures = [self.executor.submit(h.value_and_grad, arrays)
for h in self.handlers]
results = (f.result() for f in futures)
# get first result
loss, grads = next(results)
grads = list(map(np.array, grads))
# process remaining results
for loss_i, grads_i in results:
loss += loss_i
for i, g_i in enumerate(grads_i):
grads[i] += g_i
return loss, grads
def value_and_grad(self, arrays):
if self.executor is not None:
return self._value_and_grad_par(arrays)
return self._value_and_grad_seq(arrays)
def inject_(arrays, tn):
for t in tn:
for tag in t.tags:
match = variable_finder.match(tag)
if match is not None:
i = int(match.groups(1)[0])
if isinstance(t, PTensor):
t.params = arrays[i]
else:
t.modify(data=arrays[i], left_inds=t.left_inds)
break
[docs]class SGD:
"""Stateful ``scipy.optimize.minimize`` compatible implementation of
stochastic gradient descent with momentum.
Adapted from ``autograd/misc/optimizers.py``.
"""
def __init__(self):
from scipy.optimize import OptimizeResult
self.OptimizeResult = OptimizeResult
self._i = 0
self._velocity = None
def get_velocity(self, x):
if self._velocity is None:
self._velocity = np.zeros_like(x)
return self._velocity
def __call__(self, fun, x0, jac, args=(), learning_rate=0.1, mass=0.9,
maxiter=1000, callback=None, bounds=None, **kwargs):
x = x0
velocity = self.get_velocity(x)
for _ in range(maxiter):
self._i += 1
g = jac(x)
if callback and callback(x):
break
velocity = mass * velocity - (1.0 - mass) * g
x = x + learning_rate * velocity
if bounds is not None:
x = np.clip(x, bounds[:, 0], bounds[:, 1])
# save for restart
self._velocity = velocity
return self.OptimizeResult(
x=x, fun=fun(x), jac=g, nit=self._i, nfev=self._i, success=True)
[docs]class RMSPROP:
"""Stateful ``scipy.optimize.minimize`` compatible implementation of
root mean squared prop: See Adagrad paper for details.
Adapted from ``autograd/misc/optimizers.py``.
"""
def __init__(self):
from scipy.optimize import OptimizeResult
self.OptimizeResult = OptimizeResult
self._i = 0
self._avg_sq_grad = None
def get_avg_sq_grad(self, x):
if self._avg_sq_grad is None:
self._avg_sq_grad = np.ones_like(x)
return self._avg_sq_grad
def __call__(self, fun, x0, jac, args=(), learning_rate=0.1, gamma=0.9,
eps=1e-8, maxiter=1000, callback=None, bounds=None, **kwargs):
x = x0
avg_sq_grad = self.get_avg_sq_grad(x)
for _ in range(maxiter):
self._i += 1
g = jac(x)
if callback and callback(x):
break
avg_sq_grad = avg_sq_grad * gamma + g**2 * (1 - gamma)
x = x - learning_rate * g / (np.sqrt(avg_sq_grad) + eps)
if bounds is not None:
x = np.clip(x, bounds[:, 0], bounds[:, 1])
# save for restart
self._avg_sq_grad = avg_sq_grad
return self.OptimizeResult(
x=x, fun=fun(x), jac=g, nit=self._i, nfev=self._i, success=True)
[docs]class ADAM:
"""Stateful ``scipy.optimize.minimize`` compatible implementation of
ADAM - http://arxiv.org/pdf/1412.6980.pdf.
Adapted from ``autograd/misc/optimizers.py``.
"""
def __init__(self):
from scipy.optimize import OptimizeResult
self.OptimizeResult = OptimizeResult
self._i = 0
self._m = None
self._v = None
def get_m(self, x):
if self._m is None:
self._m = np.zeros_like(x)
return self._m
def get_v(self, x):
if self._v is None:
self._v = np.zeros_like(x)
return self._v
def __call__(self, fun, x0, jac, args=(), learning_rate=0.001, beta1=0.9,
beta2=0.999, eps=1e-8, maxiter=1000, callback=None,
bounds=None, **kwargs):
x = x0
m = self.get_m(x)
v = self.get_v(x)
for _ in range(maxiter):
self._i += 1
g = jac(x)
if callback and callback(x):
break
m = (1 - beta1) * g + beta1 * m # first moment estimate.
v = (1 - beta2) * (g**2) + beta2 * v # second moment estimate.
mhat = m / (1 - beta1**(self._i)) # bias correction.
vhat = v / (1 - beta2**(self._i))
x = x - learning_rate * mhat / (np.sqrt(vhat) + eps)
if bounds is not None:
x = np.clip(x, bounds[:, 0], bounds[:, 1])
# save for restart
self._m = m
self._v = v
return self.OptimizeResult(
x=x, fun=fun(x), jac=g, nit=self._i, nfev=self._i, success=True)
[docs]class NADAM:
"""Stateful ``scipy.optimize.minimize`` compatible implementation of
NADAM - [Dozat - http://cs229.stanford.edu/proj2015/054_report.pdf].
Adapted from ``autograd/misc/optimizers.py``.
"""
def __init__(self):
from scipy.optimize import OptimizeResult
self.OptimizeResult = OptimizeResult
self._i = 0
self._m = None
self._v = None
self._mus = None
def get_m(self, x):
if self._m is None:
self._m = np.zeros_like(x)
return self._m
def get_v(self, x):
if self._v is None:
self._v = np.zeros_like(x)
return self._v
def get_mus(self, beta1):
if self._mus is None:
self._mus = [1, beta1 * (1 - 0.5 * 0.96**0.004)]
return self._mus
def __call__(self, fun, x0, jac, args=(), learning_rate=0.001, beta1=0.9,
beta2=0.999, eps=1e-8, maxiter=1000, callback=None,
bounds=None, **kwargs):
x = x0
m = self.get_m(x)
v = self.get_v(x)
mus = self.get_mus(beta1)
for _ in range(maxiter):
self._i += 1
# this is ``mu[t + 1]`` -> already computed ``mu[t]``
self._mus.append(beta1 * (1 - 0.5 * 0.96**(0.004 * (self._i + 1))))
g = jac(x)
if callback and callback(x):
break
gd = g / (1 - np.prod(self._mus[:-1]))
m = beta1 * m + (1 - beta1) * g
md = m / (1 - np.prod(self._mus))
v = beta2 * v + (1 - beta2) * g**2
vd = v / (1 - beta2**self._i)
mhat = (1 - self._mus[self._i]) * gd + self._mus[self._i + 1] * md
x = x - learning_rate * mhat / (np.sqrt(vd) + eps)
if bounds is not None:
x = np.clip(x, bounds[:, 0], bounds[:, 1])
# save for restart
self._m = m
self._v = v
self._mus = mus
return self.OptimizeResult(
x=x, fun=fun(x), jac=g, nit=self._i, nfev=self._i, success=True)
_STOC_GRAD_METHODS = {
'sgd': SGD,
'rmsprop': RMSPROP,
'adam': ADAM,
'nadam': NADAM,
}
def parse_constant_arg(arg, to_constant):
# check if tensor network supplied
if isinstance(arg, TensorNetwork):
# convert it to constant TN
return constant_tn(arg, to_constant)
if isinstance(arg, Tensor):
return constant_t(arg, to_constant)
if isinstance(arg, dict):
return valmap(to_constant, arg)
if isinstance(arg, list):
return list(map(to_constant, arg))
if isinstance(arg, tuple):
return tuple(map(to_constant, arg))
# assume ``arg`` is a raw array
return to_constant(arg)
[docs]class MakeArrayFn:
"""Class wrapper so picklable.
"""
__name__ = 'MakeArrayFn'
def __init__(self, tn_opt, loss_fn, norm_fn, autodiff_backend):
self.tn_opt = tn_opt
self.loss_fn = loss_fn
self.norm_fn = norm_fn
self.autodiff_backend = autodiff_backend
def __call__(self, arrays):
# copy the TN so norm and loss functions can modify in place
# XXX: make optional for efficiency?
tn_compute = self.tn_opt.copy()
inject_(arrays, tn_compute)
# set backend explicitly as maybe mixing with numpy arrays
with contract_backend(self.autodiff_backend):
return self.loss_fn(self.norm_fn(tn_compute))
def identity_fn(x):
return x
[docs]class TNOptimizer:
"""Globally optimize tensors within a tensor network with respect to any
loss function via automatic differentiation. If parametrized tensors are
used, optimize the parameters rather than the raw arrays.
Parameters
----------
tn : TensorNetwork
The core tensor network structure within which to optimize tensors.
loss_fn : callable or sequence of callable
The function that takes ``tn`` (as well as ``loss_constants`` and
``loss_kwargs``) and returns a single real 'loss' to be minimized.
For Hamiltonians which can be represented as a sum over terms, an
iterable collection of terms (e.g. list) can be given instead. In that
case each term is evaluated independently and the sum taken as loss_fn.
This can reduce the total memory requirements or allow for
parallelization (see ``executor``).
norm_fn : callable, optional
A function to call before ``loss_fn`` that prepares or 'normalizes' the
raw tensor network in some way.
loss_constants : dict, optional
Extra tensor networks, tensors, dicts/list/tuples of arrays, or arrays
which will be supplied to ``loss_fn`` but also converted to the correct
backend array type.
loss_kwargs : dict, optional
Extra options to supply to ``loss_fn`` (unlike ``loss_constants`` these
are assumed to be simple options that don't need conversion).
tags : str, or sequence of str, optional
If supplied, only optimize tensors with any of these tags.
shared_tags : str, or sequence of str, optional
If supplied, each tag in ``shared_tags`` corresponds to a group of
tensors to be optimized together.
constant_tags : str, or sequence of str, optional
If supplied, skip optimizing tensors with any of these tags. This
'opt-out' mode is overridden if either ``tags`` or ``shared_tags`` is
supplied.
loss_target : float, optional
Stop optimizing once this loss value is reached.
optimizer : str, optional
Which ``scipy.optimize.minimize`` optimizer to use (the ``'method'``
kwarg of that function). In addition, ``quimb`` implements a few custom
optimizers compatible with this interface that you can reference by
name - ``{'adam', 'nadam', 'rmsprop', 'sgd'}``.
executor : None or Executor, optional
To be used with term-by-term Hamiltonians. If supplied, this executor
is used to parallelize the evaluation. Otherwise each term is
evaluated in sequence. It should implement the basic
concurrent.futures (PEP 3148) interface.
progbar : bool, optional
Whether to show live progress.
bounds : None or (float, float), optional
Constrain the optimized tensor entries within this range (if the scipy
optimizer supports it).
autodiff_backend : {'jax', 'autograd', 'tensorflow', 'torch'}, optional
Which backend library to use to perform the automatic differentation
(and computation).
backend_opts
Supplied to the backend function compiler and array handler. For
example ``jit_fn=True`` or ``device='cpu'`` .
"""
def __init__(
self,
tn,
loss_fn,
norm_fn=None,
loss_constants=None,
loss_kwargs=None,
tags=None,
shared_tags=None,
constant_tags=None,
loss_target=None,
optimizer='L-BFGS-B',
progbar=True,
bounds=None,
autodiff_backend='AUTO',
executor=None,
**backend_opts
):
self.progbar = progbar
self.tags = tags
self.shared_tags = shared_tags
self.constant_tags = constant_tags
if autodiff_backend.upper() == 'AUTO':
autodiff_backend = _DEFAULT_BACKEND
self._autodiff_backend = autodiff_backend
self._multiloss = isinstance(loss_fn, Iterable)
# the object that handles converting to backend + computing gradient
if self._multiloss:
# special meta-handler if loss function is sequence to sum
backend_opts['executor'] = executor
self.handler = MultiLossHandler(autodiff_backend, **backend_opts)
else:
self.handler = _BACKEND_HANDLERS[autodiff_backend](**backend_opts)
# use identity if no nomalization required
if norm_fn is None:
norm_fn = identity_fn
self.norm_fn = norm_fn
self.reset(tn, loss_target=loss_target)
# convert constant arrays ahead of time to correct backend
self.loss_constants = {
k: parse_constant_arg(v, self.handler.to_constant)
for k, v in ensure_dict(loss_constants).items()
}
self.loss_kwargs = ensure_dict(loss_kwargs)
kws = {**self.loss_constants, **self.loss_kwargs}
# inject these constant options to the loss function(s)
if self._multiloss:
# loss is a sum of independent terms
self.loss_fn = [functools.partial(fn, **kws) for fn in loss_fn]
else:
# loss is all in one
self.loss_fn = functools.partial(loss_fn, **kws)
# first we wrap the function to convert from array args to TN arg
# (i.e. to autodiff library compatible form)
if self._multiloss:
array_fn = [MakeArrayFn(self._tn_opt, fn, self.norm_fn,
autodiff_backend) for fn in self.loss_fn]
else:
array_fn = MakeArrayFn(
self._tn_opt, self.loss_fn, self.norm_fn, autodiff_backend)
# then we pass it to the handler which generates a function that
# computes both the value and gradients (still in array form)
self.handler.setup_fn(array_fn)
# options to do with the minimizer
self.bounds = bounds
self.optimizer = optimizer
def _set_tn(self, tn):
# work out which tensors to optimize and get the underlying data
self._tn_opt, variables = parse_network_to_backend(
tn,
tags=self.tags,
shared_tags=self.shared_tags,
constant_tags=self.constant_tags,
to_constant=self.handler.to_constant
)
# handles storing and packing / unpacking many arrays as a vector
self.vectorizer = Vectorizer(variables)
def _reset_tracking_info(self, loss_target=None):
# tracking info
self.loss = float('inf')
self.loss_best = float('inf')
self.loss_target = loss_target
self.losses = []
self._n = 0
self._pbar = None
[docs] def reset(self, tn=None, clear_info=True, loss_target=None):
"""Reset this optimizer without losing the compiled loss and gradient
functions.
Parameters
----------
tn : TensorNetwork, optional
Set this tensor network as the current state of the optimizer, it
must exactly match the original tensor network.
clear_info : bool, optional
Clear the tracked losses and iterations.
"""
if tn is not None:
self._set_tn(tn)
if clear_info:
self._reset_tracking_info(loss_target=loss_target)
def _maybe_init_pbar(self, n):
if self.progbar:
self._pbar = tqdm.tqdm(total=n)
def _maybe_update_pbar(self):
if self._pbar is not None:
self._pbar.update()
self.loss_best = min(self.loss_best, self.loss)
msg = f"{self.loss:+.12f} [best: {self.loss_best:+.12f}] "
self._pbar.set_description(msg)
def _maybe_close_pbar(self):
if self._pbar is not None:
self._pbar.close()
self._pbar = None
def _check_loss_target(self):
if (self.loss_target is not None) and (self.loss <= self.loss_target):
# for scipy terminating optimizer with callback doesn't work
raise KeyboardInterrupt
[docs] def vectorized_value(self, x):
"""The value of the loss function at vector ``x``.
"""
self.vectorizer.vector[:] = x
arrays = self.vectorizer.unpack()
self.loss = self.handler.value(arrays).item()
self.losses.append(self.loss)
self._n += 1
self._maybe_update_pbar()
self._check_loss_target()
return self.loss
[docs] def vectorized_value_and_grad(self, x):
"""The value and gradient of the loss function at vector ``x``.
"""
self.vectorizer.vector[:] = x
arrays = self.vectorizer.unpack()
result, grads = self.handler.value_and_grad(arrays)
self._n += 1
self.loss = result.item()
self.losses.append(self.loss)
vec_grad = self.vectorizer.pack(grads, 'grad')
self._maybe_update_pbar()
self._check_loss_target()
return self.loss, vec_grad
[docs] def vectorized_hessp(self, x, p):
"""The action of the hessian at point ``x`` on vector ``p``.
"""
primals = self.vectorizer.unpack(x)
tangents = self.vectorizer.unpack(p)
hp_arrays = self.handler.hessp(primals, tangents)
self._n += 1
self.losses.append(self.loss)
self._maybe_update_pbar()
return self.vectorizer.pack(hp_arrays, 'hp')
def __repr__(self):
return (f"<TNOptimizer(d={self.d}, "
f"backend={self._autodiff_backend})>")
@property
def d(self):
return int(self.vectorizer.d)
@property
def nevals(self):
"""The number of gradient evaluations.
"""
return self._n
@property
def optimizer(self):
"""The underlying optimizer that works with the vectorized functions.
"""
return self._optimizer
@optimizer.setter
def optimizer(self, x):
self._optimizer = x
if self.optimizer in _STOC_GRAD_METHODS:
self._method = _STOC_GRAD_METHODS[self.optimizer]()
else:
self._method = self.optimizer
@property
def bounds(self):
return self._bounds
@bounds.setter
def bounds(self, x):
if x is not None:
self._bounds = np.array((x,) * self.vectorizer.d)
else:
self._bounds = None
[docs] def get_tn_opt(self):
"""Extract the optimized tensor network, this is a three part process:
1. inject the current optimized vector into the target tensor
network,
2. run it through ``norm_fn``,
3. drop any tags used to identify variables.
Returns
-------
tn_opt : TensorNetwork
"""
arrays = tuple(map(self.handler.to_constant, self.vectorizer.unpack()))
inject_(arrays, self._tn_opt)
tn = self.norm_fn(self._tn_opt.copy())
tn.drop_tags(t for t in tn.tags if variable_finder.match(t))
for t in tn:
if isinstance(t, PTensor):
t.params = to_numpy(t.params)
else:
t.modify(data=to_numpy(t.data), left_inds=t.left_inds)
return tn
[docs] def optimize(
self,
n,
tol=None,
jac=True,
hessp=False,
**options
):
"""Run the optimizer for ``n`` function evaluations, using
:func:`scipy.optimize.minimize` as the driver for the vectorized
computation. Supplying the gradient and hessian vector product is
controlled by the ``jac`` and ``hessp`` options respectively.
Parameters
----------
n : int
Notionally the maximum number of iterations for the optimizer, note
that depending on the optimizer being used, this may correspond to
number of function evaluations rather than just iterations.
tol : None or float, optional
Tolerance for convergence, note that various more specific
tolerances can usually be supplied to ``options``, depending on
the optimizer being used.
jac : bool, optional
Whether to supply the jacobian, i.e. gradient, of the loss
function.
hessp : bool, optional
Whether to supply the hessian vector product of the loss function.
options
Supplied to :func:`scipy.optimize.minimize`.
Returns
-------
tn_opt : TensorNetwork
"""
from scipy.optimize import minimize
if jac:
fun = self.vectorized_value_and_grad
else:
fun = self.vectorized_value
try:
self._maybe_init_pbar(n)
self.res = minimize(
fun=fun,
jac=jac,
hessp=self.vectorized_hessp if hessp else None,
x0=self.vectorizer.vector,
tol=tol,
bounds=self.bounds,
method=self._method,
options=dict(maxiter=n, **options),
)
self.vectorizer.vector[:] = self.res.x
except KeyboardInterrupt:
pass
finally:
self._maybe_close_pbar()
return self.get_tn_opt()
[docs] def optimize_basinhopping(
self,
n,
nhop,
temperature=1.0,
jac=True,
hessp=False,
**options
):
"""Run the optimizer for using :func:`scipy.optimize.basinhopping`
as the driver for the vectorized computation. This performs ``nhop``
local optimization each with ``n`` iterations.
Parameters
----------
n : int
Number of iterations per local optimization.
nhop : int
Number of local optimizations to hop between.
temperature : float, optional
H
options
Supplied to the inner :func:`scipy.optimize.minimize` call.
Returns
-------
tn_opt : TensorNetwork
"""
from scipy.optimize import basinhopping
if jac:
fun = self.vectorized_value_and_grad
else:
fun = self.vectorized_value
try:
self._maybe_init_pbar(n * nhop)
self.res = basinhopping(
func=fun,
x0=self.vectorizer.vector,
niter=nhop,
minimizer_kwargs=dict(
jac=jac,
hessp=self.vectorized_hessp if hessp else None,
method=self._method,
bounds=self.bounds,
options=dict(maxiter=n, **options)
),
T=temperature,
)
self.vectorizer.vector[:] = self.res.x
except KeyboardInterrupt:
pass
finally:
self._maybe_close_pbar()
return self.get_tn_opt()
[docs] def optimize_nlopt(
self,
n,
ftol_rel=None,
ftol_abs=None,
xtol_rel=None,
xtol_abs=None,
):
"""Run the optimizer for ``n`` function evaluations, using ``nlopt`` as
the backend library to run the optimization. Whether the gradient is
computed depends on which ``optimizer`` is selected, see valid options
at https://nlopt.readthedocs.io/en/latest/NLopt_Algorithms/.
Parameters
----------
n : int
The maximum number of iterations for the optimizer.
ftol_rel : float, optional
Set relative tolerance on function value.
ftol_abs : float, optional
Set absolute tolerance on function value.
xtol_rel : float, optional
Set relative tolerance on optimization parameters.
xtol_abs : float, optional
Set absolute tolerances on optimization parameters.
Returns
-------
tn_opt : TensorNetwork
"""
import nlopt
try:
self._maybe_init_pbar(n)
def f(x, grad):
self.vectorizer.vector[:] = x
arrays = self.vectorizer.unpack()
if grad.size > 0:
result, grads = self.handler.value_and_grad(arrays)
grad[:] = self.vectorizer.pack(grads, 'grad')
else:
result = self.handler.value(arrays)
self._n += 1
self.loss = result.item()
self.losses.append(self.loss)
self._maybe_update_pbar()
return self.loss
opt = nlopt.opt(getattr(nlopt, self.optimizer), self.d)
opt.set_min_objective(f)
opt.set_maxeval(n)
if self.bounds is not None:
opt.set_lower_bounds(self.bounds[:, 0])
opt.set_upper_bounds(self.bounds[:, 1])
if self.loss_target is not None:
opt.set_stopval(self.loss_target)
if ftol_rel is not None:
opt.set_ftol_rel(ftol_rel)
if ftol_abs is not None:
opt.set_ftol_abs(ftol_abs)
if xtol_rel is not None:
opt.set_xtol_rel(xtol_rel)
if xtol_abs is not None:
opt.set_xtol_abs(xtol_abs)
self.vectorizer.vector[:] = opt.optimize(self.vectorizer.vector)
except (KeyboardInterrupt, RuntimeError):
pass
finally:
self._maybe_close_pbar()
return self.get_tn_opt()
[docs] def optimize_ipopt(self, n, tol=None, **options):
"""Run the optimizer for ``n`` function evaluations, using ``ipopt`` as
the backend library to run the optimization via the python package
``cyipopt``.
Parameters
----------
n : int
The maximum number of iterations for the optimizer.
Returns
-------
tn_opt : TensorNetwork
"""
from cyipopt import minimize_ipopt
try:
self._maybe_init_pbar(n)
self.res = minimize_ipopt(
fun=self.vectorized_value_and_grad,
jac=True,
x0=self.vectorizer.vector,
tol=tol,
bounds=self.bounds,
method=self._method,
options=dict(maxiter=n, **options),
)
self.vectorizer.vector[:] = self.res.x
except KeyboardInterrupt:
pass
finally:
self._maybe_close_pbar()
return self.get_tn_opt()
[docs] def optimize_nevergrad(self, n):
"""Run the optimizer for ``n`` function evaluations, using
``nevergrad`` as the backend library to run the optimization. As the
name suggests, the gradient is not required for this method.
Parameters
----------
n : int
The maximum number of iterations for the optimizer.
Returns
-------
tn_opt : TensorNetwork
"""
import nevergrad as ng
opt = getattr(ng.optimizers, self.optimizer)(
parametrization=ng.p.Array(
init=self.vectorizer.vector,
lower=self.bounds[:, 0] if self.bounds is not None else None,
upper=self.bounds[:, 1] if self.bounds is not None else None,
),
budget=n
)
try:
self._maybe_init_pbar(n)
for _ in range(n):
x = opt.ask()
loss = self.vectorized_value(*x.args, **x.kwargs)
opt.tell(x, loss)
if self.loss_target is not None:
if self.loss < self.loss_target:
break
except KeyboardInterrupt:
pass
finally:
self._maybe_close_pbar()
# recommendation = opt.minimize(self.vectorized_value)
recommendation = opt.provide_recommendation()
self.vectorizer.vector[:] = recommendation.value
return self.get_tn_opt()