diff options
author | Edoardo Pasca <edo.paskino@gmail.com> | 2019-11-19 10:00:25 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-11-19 10:00:25 +0000 |
commit | a1341271589a5bd3dad0def8198c49c37d1c197d (patch) | |
tree | 744476f5e9bb0d3b8219cdc689114a16e4fa5ab8 /Wrappers | |
parent | aa80145534d4fe325fbfbae6569bcb6709006d33 (diff) | |
download | framework-a1341271589a5bd3dad0def8198c49c37d1c197d.tar.gz framework-a1341271589a5bd3dad0def8198c49c37d1c197d.tar.bz2 framework-a1341271589a5bd3dad0def8198c49c37d1c197d.tar.xz framework-a1341271589a5bd3dad0def8198c49c37d1c197d.zip |
added armijo rule (#390)
* added armijo rule
* added info and kwarg for alpha
* updated Armijo rule
* modified stop criterion to GD
* added kmax to limit the while True loop
* fix raise comment
* change default stopping criterion
* updated test
* added Rosenbrock function and test Gradient descent on that
* removed test code
* added missing import
Diffstat (limited to 'Wrappers')
6 files changed, 254 insertions, 31 deletions
diff --git a/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py b/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py index df8dbf5..408a904 100755 --- a/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py +++ b/Wrappers/Python/ccpi/optimisation/algorithms/Algorithm.py @@ -197,6 +197,10 @@ class Algorithm(object): if verbose: print (self.verbose_output(very_verbose)) break + if verbose: + if self.update_objective_interval != 1: + print (self.verbose_output(very_verbose)) + print ("Stop criterion has been reached.") def verbose_output(self, verbose=False): '''Creates a nice tabulated output''' diff --git a/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py b/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py index 8f9c958..5e7284a 100755 --- a/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py +++ b/Wrappers/Python/ccpi/optimisation/algorithms/GradientDescent.py @@ -24,7 +24,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals - +import numpy from ccpi.optimisation.algorithms import Algorithm class GradientDescent(Algorithm): @@ -35,7 +35,7 @@ class GradientDescent(Algorithm): ''' - def __init__(self, x_init=None, objective_function=None, rate=None, **kwargs): + def __init__(self, x_init=None, objective_function=None, step_size =None, **kwargs): '''GradientDescent algorithm creator initialisation can be done at creation time if all @@ -43,51 +43,135 @@ class GradientDescent(Algorithm): :param x_init: initial guess :param objective_function: objective function to be minimised - :param rate: step rate + :param step_size: step size for gradient descent iteration + :param alpha: optional parameter to start the backtracking algorithm, default 1e6 + :param beta: optional parameter defining the reduction of step, default 0.5. + It's value can be in (0,1) + :param rtol: optional parameter defining the relative tolerance comparing the + current objective function to 0, default 1e-5, see numpy.isclose + :param atol: optional parameter defining the absolute tolerance comparing the + current objective function to 0, default 1e-8, see numpy.isclose ''' super(GradientDescent, self).__init__(**kwargs) - - if x_init is not None and objective_function is not None and rate is not None: - self.set_up(x_init=x_init, objective_function=objective_function, rate=rate) - - def should_stop(self): - '''stopping criterion, currently only based on number of iterations''' - return self.iteration >= self.max_iteration + self.alpha = kwargs.get('alpha' , 1e6) + self.beta = kwargs.get('beta', 0.5) + self.rtol = kwargs.get('rtol', 1e-5) + self.atol = kwargs.get('atol', 1e-8) + if x_init is not None and objective_function is not None : + self.set_up(x_init=x_init, objective_function=objective_function, step_size=step_size) - def set_up(self, x_init, objective_function, rate): + def set_up(self, x_init, objective_function, step_size): '''initialisation of the algorithm :param x_init: initial guess :param objective_function: objective function to be minimised - :param rate: step rate''' + :param step_size: step size''' print("{} setting up".format(self.__class__.__name__, )) self.x = x_init.copy() self.objective_function = objective_function - self.rate = rate + - self.loss.append(objective_function(x_init)) + if step_size is None: + self.k = 0 + self.update_step_size = True + self.x_armijo = x_init.copy() + # self.rate = self.armijo_rule() * 2 + # print (self.rate) + else: + self.step_size = step_size + self.update_step_size = False + + + self.update_objective() self.iteration = 0 - try: - self.memopt = self.objective_function.memopt - except AttributeError as ae: - self.memopt = False - if self.memopt: - self.x_update = x_init.copy() + self.x_update = x_init.copy() self.configured = True print("{} configured".format(self.__class__.__name__, )) def update(self): '''Single iteration''' - if self.memopt: - self.objective_function.gradient(self.x, out=self.x_update) - self.x_update *= -self.rate - self.x += self.x_update + + self.objective_function.gradient(self.x, out=self.x_update) + + if self.update_step_size: + # the next update and solution are calculated within the armijo_rule + self.step_size = self.armijo_rule() else: - self.x += -self.rate * self.objective_function.gradient(self.x) + self.x_update *= -self.step_size + self.x += self.x_update + + def update_objective(self): self.loss.append(self.objective_function(self.x)) + + def armijo_rule(self): + '''Applies the Armijo rule to calculate the step size (step_size) + + https://projecteuclid.org/download/pdf_1/euclid.pjm/1102995080 + + The Armijo rule runs a while loop to find the appropriate step_size by starting + from a very large number (alpha). The step_size is found by dividing alpha by 2 + in an iterative way until a certain criterion is met. To avoid infinite loops, we + add a maximum number of times the while loop is run. + + This rule would allow to reach a minimum step_size of 10^-alpha. + + if + alpha = numpy.power(10,gamma) + delta = 3 + step_size = numpy.power(10, -delta) + with armijo rule we can get to step_size from initial alpha by repeating the while loop k times + where + alpha / 2^k = step_size + 10^gamma / 2^k = 10^-delta + 2^k = 10^(gamma+delta) + k = gamma+delta / log10(2) \\approx 3.3 * (gamma+delta) + + if we would take by default delta = gamma + kmax = numpy.ceil ( 2 * gamma / numpy.log10(2) ) + kmax = numpy.ceil (2 * numpy.log10(alpha) / numpy.log10(2)) + + ''' + f_x = self.objective_function(self.x) + if not hasattr(self, 'x_update'): + self.x_update = self.objective_function.gradient(self.x) + + while self.k < self.kmax: + # self.x - alpha * self.x_update + self.x_update.multiply(self.alpha, out=self.x_armijo) + self.x.subtract(self.x_armijo, out=self.x_armijo) + + f_x_a = self.objective_function(self.x_armijo) + sqnorm = self.x_update.squared_norm() + if f_x_a - f_x <= - ( self.alpha/2. ) * sqnorm: + self.x.fill(self.x_armijo) + break + else: + self.k += 1. + # we don't want to update kmax + self._alpha *= self.beta + + if self.k == self.kmax: + raise ValueError('Could not find a proper step_size in {} loops. Consider increasing alpha.'.format(self.kmax)) + return self.alpha + + @property + def alpha(self): + return self._alpha + @alpha.setter + def alpha(self, alpha): + self._alpha = alpha + self.kmax = numpy.ceil (2 * numpy.log10(alpha) / numpy.log10(2)) + self.k = 0 + + def should_stop(self): + return self.max_iteration_stop_cryterion() or \ + numpy.isclose(self.get_last_objective(), 0., rtol=self.rtol, + atol=self.atol, equal_nan=False) + + diff --git a/Wrappers/Python/ccpi/optimisation/functions/Rosenbrock.py b/Wrappers/Python/ccpi/optimisation/functions/Rosenbrock.py new file mode 100755 index 0000000..03abf7a --- /dev/null +++ b/Wrappers/Python/ccpi/optimisation/functions/Rosenbrock.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +#======================================================================== +# Copyright 2019 Science Technology Facilities Council +# Copyright 2019 University of Manchester +# +# This work is part of the Core Imaging Library developed by Science Technology +# Facilities Council and University of Manchester +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#========================================================================= + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +import numpy +from ccpi.optimisation.functions import Function +from ccpi.framework import VectorData, VectorGeometry + +class Rosenbrock(Function): + r'''Rosenbrock function + + .. math:: + + F(x,y) = (\alpha - x)^2 + \beta(y-x^2)^2 + + The function has a global minimum at .. math:: (x,y)=(\alpha, \alpha^2) + + ''' + def __init__(self, alpha, beta): + super(Rosenbrock, self).__init__() + + self.alpha = alpha + self.beta = beta + + def __call__(self, x): + if not isinstance(x, VectorData): + raise TypeError('Rosenbrock function works on VectorData only') + vec = x.as_array() + a = (self.alpha - vec[0]) + b = (vec[1] - (vec[0]*vec[0])) + return a * a + self.beta * b * b + + def gradient(self, x, out=None): + r'''Gradient of the Rosenbrock function + + .. math:: + + \nabla f(x,y) = \left[ 2*((x-\alpha) - 2\beta x(y-x^2)) ; 2\beta (y - x^2) \right] + + ''' + if not isinstance(x, VectorData): + raise TypeError('Rosenbrock function works on VectorData only') + + vec = x.as_array() + a = (vec[0] - self.alpha) + b = (vec[1] - (vec[0]*vec[0])) + + res = numpy.empty_like(vec) + res[0] = 2 * ( a - 2 * self.beta * vec[0] * b) + res[1] = 2 * self.beta * b + + if out is not None: + out.fill (res) + else: + return VectorData(res) + diff --git a/Wrappers/Python/ccpi/optimisation/functions/__init__.py b/Wrappers/Python/ccpi/optimisation/functions/__init__.py index 67abeef..d968539 100644 --- a/Wrappers/Python/ccpi/optimisation/functions/__init__.py +++ b/Wrappers/Python/ccpi/optimisation/functions/__init__.py @@ -26,3 +26,4 @@ from .MixedL21Norm import MixedL21Norm from .IndicatorBox import IndicatorBox from .KullbackLeibler import KullbackLeibler from .Norm2Sq import Norm2Sq +from .Rosenbrock import Rosenbrock diff --git a/Wrappers/Python/test/test_algorithms.py b/Wrappers/Python/test/test_algorithms.py index db13b97..7a312f9 100755 --- a/Wrappers/Python/test/test_algorithms.py +++ b/Wrappers/Python/test/test_algorithms.py @@ -66,24 +66,73 @@ class TestAlgorithms(unittest.TestCase): identity = Identity(ig) norm2sq = Norm2Sq(identity, b) - rate = 0.3 rate = norm2sq.L / 3. alg = GradientDescent(x_init=x_init, objective_function=norm2sq, - rate=rate) + rate=rate, atol=1e-9, rtol=1e-6) + alg.max_iteration = 1000 + alg.run() + self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array()) + alg = GradientDescent(x_init=x_init, + objective_function=norm2sq, + rate=rate, max_iteration=20, + update_objective_interval=2, + atol=1e-9, rtol=1e-6) alg.max_iteration = 20 + self.assertTrue(alg.max_iteration == 20) + self.assertTrue(alg.update_objective_interval==2) alg.run(20, verbose=True) self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array()) + def test_GradientDescentArmijo(self): + print ("Test GradientDescent") + ig = ImageGeometry(12,13,14) + x_init = ig.allocate() + # b = x_init.copy() + # fill with random numbers + # b.fill(numpy.random.random(x_init.shape)) + b = ig.allocate('random') + identity = Identity(ig) + + norm2sq = Norm2Sq(identity, b) + rate = None + + alg = GradientDescent(x_init=x_init, + objective_function=norm2sq, rate=rate) + alg.max_iteration = 100 + alg.run() + self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array()) alg = GradientDescent(x_init=x_init, objective_function=norm2sq, - rate=rate, max_iteration=20, + max_iteration=20, update_objective_interval=2) - alg.max_iteration = 20 + #alg.max_iteration = 20 self.assertTrue(alg.max_iteration == 20) self.assertTrue(alg.update_objective_interval==2) alg.run(20, verbose=True) self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array()) + def test_GradientDescentArmijo2(self): + from ccpi.optimisation.functions import Rosenbrock + from ccpi.framework import VectorData, VectorGeometry + + f = Rosenbrock (alpha = 1., beta=100.) + vg = VectorGeometry(2) + x = vg.allocate('random_int', seed=2) + # x = vg.allocate('random', seed=1) + x.fill(numpy.asarray([10.,-3.])) + + max_iter = 1000000 + update_interval = 100000 + + alg = GradientDescent(x, f, max_iteration=max_iter, update_objective_interval=update_interval, alpha=1e6) + + alg.run() + + print (alg.get_output().as_array(), alg.step_size, alg.kmax, alg.k) + + numpy.testing.assert_array_almost_equal(alg.get_output().as_array(), [1,1], decimal = 1) + numpy.testing.assert_array_almost_equal(alg.get_output().as_array(), [0.982744, 0.965725], decimal = 6) + def test_CGLS(self): print ("Test CGLS") #ig = ImageGeometry(124,153,154) diff --git a/Wrappers/Python/test/test_functions.py b/Wrappers/Python/test/test_functions.py index 550f1de..d295234 100644 --- a/Wrappers/Python/test/test_functions.py +++ b/Wrappers/Python/test/test_functions.py @@ -17,8 +17,8 @@ # limitations under the License. import numpy as np -from ccpi.optimisation.functions import Function, KullbackLeibler -from ccpi.framework import DataContainer, ImageData, ImageGeometry +from ccpi.optimisation.functions import Function, KullbackLeibler, Rosenbrock +from ccpi.framework import DataContainer, ImageData, ImageGeometry , VectorData from ccpi.optimisation.operators import Identity from ccpi.optimisation.operators import BlockOperator from ccpi.framework import BlockDataContainer @@ -371,3 +371,10 @@ class TestFunction(unittest.TestCase): proxc = f.proximal_conjugate(x,1.2) f.proximal_conjugate(x, 1.2, out=out) numpy.testing.assert_array_equal(proxc.as_array(), out.as_array()) + + def test_Rosenbrock(self): + f = Rosenbrock (alpha = 1, beta=100) + x = VectorData(numpy.asarray([1,1])) + assert f(x) == 0. + numpy.testing.assert_array_almost_equal( f.gradient(x).as_array(), numpy.zeros(shape=(2,), dtype=numpy.float32)) + |