cms.optimization.optimize_sd_cg2d_linesearch のソースコード

import csv
import numpy as np
from numpy import sin, cos, tan, pi, exp, sqrt
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.axes3d import Axes3D
from matplotlib import cm
import time
import sys

# optimization parameters
x0 = [0.0, 0.0]

#algorism = 'sd'
algorism = 'cg'

# for line search '' or 'one', 'simple', 'exact', 'golden', 'armijo'
lsmode = 'simple'

nmaxlsiter = 100
# for lsmode == 'golden'
alphaeps = 1.0e-2
xrange = 0.5
# for lsmode == 'exact'
dump = 0.3
h = 1.0e-3

# for global optimization
eps   = 1.0e-5
nmaxiter = 200
iprintiterval = 1

# target function
#functype = 'ellipsoid'
functype = 'general'

if __name__ == "__main__":
    if len(sys.argv) >= 2:
        x0[0] = float(sys.argv[1])

    if len(sys.argv) >= 3:
        x0[1] = float(sys.argv[2])

    if len(sys.argv) >= 4:
        algorism = sys.argv[3]

    if len(sys.argv) >= 5:
        lsmode = sys.argv[4]

    if len(sys.argv) >= 6:
        functype = sys.argv[5]


if functype == 'ellipsoid':
    alpha = 0.03
else:
    alpha = 0.01

if lsmode == 'golden':
    alpha = 1.0
elif lsmode == 'armijo':
    alpha = 1.0


# differentiation x mesh
h = 0.01

# graph plot parameters
fplot = 1
ngdata = 51
xgmin = -4.0
xgmax =  4.0
ygmin = -4.0
ygmax =  4.0
tsleep = 0.3


[ドキュメント]
def linesearch_armijo(func, x0, sdir, 
            xrange = None, h = None, alpha = None, dump = None, nmaxiter = None, eps = None):
    xi  = 0.5   # 0 < xi < 1
    tau = 0.5   # 0 < tau < 1
    dr2 = np.inner(sdir, sdir)
    f0 = func(x0)
    for i in range(nmaxiter):
        dx = alpha * sdir
        fp = func(x0 + dx)
        fxi = f0 - xi * alpha * dr2
#        print("i={:3d}  alpha={:6.3f}  fp={:6.3f}  fxi={:6.3f}".format(i, alpha, fp, fxi), "   dx=", dx)
        if fp <= fxi or alpha < eps:
            return i, x0 + dx, dx, alpha
        else:
             alpha *= tau
    return -1, x0 + dx, dx, alpha



[ドキュメント]
def linesearch_golden(func, x0, sdir,
            xrange = None, h = None, alpha = None, dump = None, nmaxiter = None, eps = None):
    eta = 0.381966011   # (sqrt(5) - 1) / (sqrt(5) + 1)
    dr2 = np.inner(sdir, sdir)
    dr  = sqrt(dr2)
    x = [0.0, 0.0 + xrange*eta, xrange - xrange*eta, xrange]
    f0 = func(x0)
    f3 = func(x0 + xrange * sdir)
    for i in range(nmaxiter):
        alpha2 = (x[1] + x[2]) / 2.0
        dx = alpha2 * sdir
        if abs(x[0] - x[1]) < eps:
            return i, x0 + dx, dx, alpha2

        f1 = func(x0 + x[1] * sdir)
        f2 = func(x0 + x[2] * sdir)
        if f1 < f2 or (f0 < f1 and f0 < f2 and f0 < f3):
            dxe = x[2] - x[0]
            x = [x[0], x[0] + dxe * eta, x[2] - dxe * eta, x[2]]
            f3 = f2
        else:
            dxe = x[3] - x[1]
            x = [x[1], x[1] + dxe * eta, x[3] - dxe * eta, x[3]]
            f0 = f1
        """
        f = []
        f.append(func(x0 + x[0] * sdir))
        f.append(func(x0 + x[1] * sdir))
        f.append(func(x0 + x[2] * sdir))
        f.append(func(x0 + x[3] * sdir))
        print("i=", i, "  x=", x, "  f=", f)
        """
    return -1, x0 + dx, dx, alpha2



[ドキュメント]
def linesearch_exact(func, x0, sdir,
            xrange = None, h = None, alpha = None, dump = None, nmaxiter = None, eps = None):
    dr2 = np.inner(sdir, sdir)
    dr  = sqrt(dr2)
    for i in range(nmaxiter):
        fm = func(x0 + h * sdir / dr)
        f0 = func(x0)
        fp = func(x0 - h * sdir / dr)
        f1 = (fp - fm) / 2.0 / h
        f2 = (fp - 2.0 * f0 + fm) / h / h
        alpha2 = f1 / (abs(f2) + dump)
#        print("f1 = ", f1, "  f2 = ", f2, "  dr = ", dr)
        dx0 = alpha2 * sdir / dr
        x0 += dx0
        dr0 = sqrt(np.inner(dx0, dx0))
        if abs(alpha2/dr) < eps:
            return i, x0, alpha2 * sdir, alpha2
    return -1, x0, alpha2 * sdir, alpha2




[ドキュメント]
def linesearch_one(func, x0, sdir,
            xrange = None, h = None, alpha = None, dump = None, nmaxiter = None, eps = None):
    return 1, x0 + alpha * sdir, alpha * sdir, alpha



[ドキュメント]
def linesearch_simple(func, x0, sdir,
            xrange = None, h = None, alpha = None, dump = None, nmaxiter = None, eps = None):
    diff = -sdir
    f0 = func(x0)
#    print("x0=", x0, "   f0=", f0)
    xprev = x0.copy()
    x = x0.copy()
    for i in range(1, nmaxiter+1):
        alpha2 = i * alpha
        for j in range(len(x0)):
           x[j] = x0[j] - alpha2 * diff[j]
        f1 = func(x)
#        print("i:{}".format(i), "  x=", x, "  f1=", f1)
        if f1 > f0:
            return i, xprev, -alpha2 * diff, alpha2
        else:
            xprev = x.copy()
            f0 = f1
    return -1, x, -alpha2 * diff, alpha2



[ドキュメント]
def searchdir_sd(x = None, diff1func = None, icg = None, gradfkm = None, dkm = None):
    n = len(x0)
    gradfk = np.empty(n)
    for i in range(0, n):
# gradient of the target function
       gradfk[i] = diff1func(i, x)

    return -gradfk, icg, gradfkm, dkm




[ドキュメント]
def searchdir_cg(x = None, diff1func = None, icg = None, gradfkm = None, dkm = None):
    n = len(x)
    gradfk = np.empty(n)
    for i in range(0, n):
# gradient of the target function
       gradfk[i] = diff1func(i, x)

    if icg == 0:
# search direction
        dk = -gradfk
    else:
        yk = gradfk - gradfkm
        inn1 = np.inner(gradfk, yk)
        inn2 = np.inner(dkm, yk)
# search direction
        dk = -gradfk + inn1 / inn2 * dkm
    gradfkm = gradfk.copy()
    dkm     = dk.copy()
    icg += 1
# reset if the number of conjugate gradients exceeds the dimension of the parameters
    if icg >= n:
        icg = 0
    return dk, icg, gradfkm, dkm



# first derivative of func(x)

[ドキュメント]
def ediff1(i, x):
    if i == 0:
        return 2.0 * x[0]
    if i == 1:
        return 18.0 * x[1]



[ドキュメント]
def gdiff1(i, x):
    if i == 0:
        return - 10.0 - 60.0 * x[0] + 4.5 * x[0]*x[0] + 12.0 * x[0]*x[0]*x[0] \
                + 3.0 * x[1] * x[1]
    if i == 1:
        return 30.0 - 60.0 * x[1] + 12.0 * x[1]*x[1]*x[1] \
                + 6.0 * x[0] * x[1]



[ドキュメント]
def efunc(x):
    return x[0]*x[0] + 9.0*x[1]*x[1]



[ドキュメント]
def gfunc(x):
    return -3.0 - 10.0 * x[0] - 30.0 * x[0]*x[0] + 1.5 * x[0]*x[0]*x[0] + 3.0 * x[0]*x[0]*x[0]*x[0] \
                + 30.0 * x[1] - 30.0 * x[1]*x[1] + 3.0 * x[1]*x[1]*x[1]*x[1] \
                + 3.0 * x[0] * x[1] * x[1]


# global function variables
if functype == 'ellipsoid':
    diff1 = ediff1
    func = efunc
else:
    diff1 = gdiff1
    func = gfunc



[ドキュメント]
def main():
    global plt, ngdata, xgmin, xgmax
    global x0, alpha, dump, eps, nmaxiter, iprintinterval
    global func, diff1

    print("Find minimum point by steepest-descend / conjugate gradient methods")
    print("")

# plot surface graph
    if fplot == 1:
        xgstep = (xgmax - xgmin) / (ngdata - 1)
        ygstep = (ygmax - ygmin) / (ngdata - 1)
        xg = np.empty([ngdata, ngdata])
        yg = np.empty([ngdata, ngdata])
        zg = np.empty([ngdata, ngdata])
        for ix in range(ngdata):
            for iy in range(ngdata):
                xg[ix, iy] = xgmin + ix * xgstep
                yg[ix, iy] = ygmin + iy * ygstep
                zg[ix, iy] = func([xg[ix][iy], yg[ix][iy]])

        fig = plt.figure(figsize = (10, 5))
        ax  = fig.add_subplot(1, 2, 1)
        ax2 = fig.add_subplot(1, 2, 2, projection='3d')
#        ax = fig.add_subplot(111, projection='3d')
#        surf = ax.plot_surface(xg, yg, zg, cmap=cm.coolwarm)
        surf = ax2.plot_wireframe(xg, yg, zg, rstride=2, cstride=2)
#        ax.set_zlim3d(-3.01, 3.01)
#        plt.colorbar(surf, shrink=0.5, aspect=10)
        ax.set_title('contour')
        contour = ax.contourf(xg, yg, zg, levels = 51, cmap = 'Spectral')
#        contour = ax.contour(xg, yg, zg, levels = 51, cmap = 'Spectral')
        solve, = ax.plot([], color = 'blue', marker = 'o', linestyle = '-')
#        solve, = ax.plot([], color = 'blue', marker = 'o', linestyle = '')
        plt.pause(0.1)
#        plt.show()

# calculate initial parameters
    searchdir_func = searchdir_sd
    if algorism == 'cg':
        searchdir_func = searchdir_cg

    ls_func = linesearch_one
    if lsmode == 'simple':
        ls_func = linesearch_simple
    elif lsmode == 'exact':
        ls_func = linesearch_exact
    elif lsmode == 'golden':
        ls_func = linesearch_golden
    elif lsmode == 'armijo':
        ls_func = linesearch_armijo

    n       = len(x0)
    gradfkm = None
    dkm     = None
    f = func(x0)
    print("x0 = ({}, {}): f = {}".format(x0[0], x0[1], f))
    xt = []
    yt = []
    xt.append(x0[0])
    yt.append(x0[1])

# counter of cg steps
    icg = 0
# optimization start
    for iter in range(nmaxiter):
        dk, icg, gradfkm, dkm = searchdir_func(x0, diff1, icg, gradfkm, dkm)

# pass the first derivative of the target function, i.e., the negative value of the search direction dk, -dk
        insiter, x0, dx, alpha2 = ls_func(func, x0, dk, 
                    xrange = xrange, h = h, alpha = alpha, dump = dump, 
                    nmaxiter = nmaxlsiter, eps = alphaeps)
        f = func(x0)
        dxmax = max(abs(dx))
        print("{:4d}({:3d}): x = ({:14.6g}, {:14.6g}) dx = {:14.6g}: f = {:14.6g}".format(
                        iter, insiter, x0[0], x0[1], dxmax, f))

        if fplot == 1:
            xt.append(x0[0])
            yt.append(x0[1])
            solve.set_data(xt, yt)
            plt.pause(0.1)
            time.sleep(tsleep)

        if dxmax < eps:
            print("Converged at x = ", x0, " dx = {}   f = {}".format(dxmax, f))
            break;
    else:
        print("Not converged")

    if fplot == 1:
        print("Press enter to terminate:", end = '')
        ret = input()



if __name__ == "__main__":
    main()