가장 쉬운 국소 최소화 [python 3] by 바죠

가장 쉬운 국소 최소화 [python 3]

목적함수의 일차 도함수를 알수 있는 경우, 해석적으로 알 수 있는 경우, 국소 최소화는 아주 빨리 마무리 될 수 있다.
일차 도함수가 알려지지 않은 경우, 별도의 알고리듬을 활용한다.
목적함수의 도함수가 알려지지 않은 경우, 국소 최소화를 효율적으로 진행할 수 없어서, 많은 컴퓨터 자원을 사용해야만 한다. 

수치적으로 도함수를 계산하는 방식도 scipy에서는 지원한다.

국소 최소화는 광역 최소화의 성분으로 이용될 수 있다. 예를 들어, 유전 알고리듬의 구성요소로서 사용될 수 있다.

---------------------------------------------------------------------------------------------------------------------

import numpy as np
from scipy.optimize import minimize
def rosen(x):
    """The Rosenbrock function"""
    return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)
def rosen_der(x):
    xm = x[1:-1]
    xm_m1 = x[:-2]
    xm_p1 = x[2:]
    der = np.zeros_like(x)
    der[1:-1] = 200*(xm-xm_m1**2) - 400*(xm_p1 - xm**2)*xm - 2*(1-xm)
    der[0] = -400*x[0]*(x[1]-x[0]**2) - 2*(1-x[0])
    der[-1] = 200*(x[-1]-x[-2]**2)
    return der
def rosen_hess(x):
    x = np.asarray(x)
    H = np.diag(-400*x[:-1],1) - np.diag(400*x[:-1],-1)
    diagonal = np.zeros_like(x)
    diagonal[0] = 1200*x[0]**2-400*x[1]+2
    diagonal[-1] = 200
    diagonal[1:-1] = 202 + 1200*x[1:-1]**2 - 400*x[2:]
    H = H + np.diag(diagonal)
    return H
def rosen_hess_p(x, p):
    x = np.asarray(x)
    Hp = np.zeros_like(x)
    Hp[0] = (1200*x[0]**2 - 400*x[1] + 2)*p[0] - 400*x[0]*p[1]
    Hp[1:-1] = -400*x[:-2]*p[:-2]+(202+1200*x[1:-1]**2-400*x[2:])*p[1:-1] \
               -400*x[1:-1]*p[2:]
    Hp[-1] = -400*x[-2]*p[-2] + 200*p[-1]
    return Hp

x0 = np.array([1.3,0.7,0.8,4.9,1.2])
res = minimize(rosen,x0,method='nelder-mead',options={'xtol':1e-8,'disp':True})
print('Nelder-Mead:',res.x)
res = minimize(rosen, x0, method='BFGS', jac=rosen_der,options={'disp': True})
print('BFGS:',res.x)
res = minimize(rosen, x0, method='Newton-CG',jac=rosen_der, hess=rosen_hess,options={'xtol': 1e-8, 'disp': True})
print('Newton-CG with Hessian:',res.x)
res= minimize(rosen, x0, method='Newton-CG',jac=rosen_der, hessp=rosen_hess_p,options={'xtol': 1e-8, 'disp': True})
print('Newton-CG with Hessian product:',res.x)
res = minimize(rosen, x0, method='trust-ncg',jac=rosen_der, hess=rosen_hess,options={'gtol': 1e-8, 'disp': True})
print('trust-ncg with Hessian:',res.x)
res = minimize(rosen, x0, method='trust-ncg',jac=rosen_der, hessp=rosen_hess_p,options={'gtol': 1e-8, 'disp': True})
print('trust-ncg with Hessian product:',res.x)
res = minimize(rosen, x0, method='trust-krylov',jac=rosen_der, hess=rosen_hess,options={'gtol': 1e-8, 'disp': True})
res = minimize(rosen, x0, method='trust-krylov',jac=rosen_der, hessp=rosen_hess_p,options={'gtol': 1e-8, 'disp': True})

import scipy.optimize as optimize 
optimize.show_options(solver='minimize',method='nelder-mead')
print(res.x)
print(res.fun)

---------------------------------------------------------------------------------------------------------------------

import scipy.optimize as optimize
fun = lambda x: (x[0] - 1)**2 + (x[1] - 2.5)**2
res = optimize.minimize(fun, (2, 0), method='TNC', tol=1e-10)
print(res.x)
# [ 1.          2.49999999]
bnds = ((0.25, 0.75), (0, 2.0))
res = optimize.minimize(fun, (2, 0), method='TNC', bounds=bnds, tol=1e-10)
print(res.x)

[1.  2.5]
[0.75 2.  ]

---------------------------------------------------------------------------------------------------------------------

기계학습 방법을 활용한 가장 쉬운 베이지안 옵티마이제이션(Bayesian optimization) : http://incredible.egloos.com/7479039
독립변수들의 갯수가 많지 않을 때, 
목적함수의 도함수를 알 수 없을 때, 
목적함수 계산이 오래 걸릴 때, 
목적함수의 도함수가 없을 때 사용하는 방법이다. 
또한, 목적함수가 노이즈를 가지고 있을 때에도 적용해 볼 수 있는 방법이다.
hyperparameter 최적화에 널리 활용되는 방법이다.

import scipy.optimize as optimize
from bayes_opt import BayesianOptimization
fun = lambda x: (x[0] - 1)**2 + (x[1] - 2.5)**2
res = optimize.minimize(fun, (2, 0), method='TNC', tol=1e-10)
print(res.x)
print(res.fun)
def fun(x1,x2):
    return -((x1-1)**2+ (x2-2.5)**2)
BO = BayesianOptimization(fun, {'x1': (0., 2.), 'x2': (0., 3.) }, verbose=2)
BO.maximize(init_points=2, n_iter=20)
for i, res in enumerate(BO.res):
    print("Iteration {}: \n\t{}".format(i, res))
print(BO.max)

---------------------------------------------------------------------------------------------------------------------
#   http://krasserm.github.io/2018/03/21/bayesian-optimization/
#   Modified by In-Ho Lee, KRISS, February 14, 2020.
import numpy as np
from scipy.stats import norm
def expected_improvement(X,X_sample,Y_sample,gpr,xi=0.01):
    '''
    Computes the EI at points X based on existing samples X_sample
    and Y_sample using a Gaussian process surrogate model.
    Args:
        X: Points at which EI shall be computed (m x d).
        X_sample: Sample locations (n x d).
        Y_sample: Sample values (n x 1).
        gpr: A GaussianProcessRegressor fitted to samples.
        xi: Exploitation-exploration trade-off parameter.
    Returns:
        Expected improvements at points X.
    '''
    mu, sigma=gpr.predict(X,return_std=True)
    sigma1=np.zeros_like(sigma)
    sigma1[:]=sigma1[:]+1e-16
    sigma=sigma+sigma1
    mu_sample=gpr.predict(X_sample)
    sigma = sigma.reshape(-1, 1)
#   Needed for noise-based model, otherwise use np.max(Y_sample). See also section 2.4 in [...]
    mu_sample_opt = np.max(mu_sample)
    with np.errstate(divide='warn'):
        imp = mu - mu_sample_opt - xi
        Z = imp / sigma
        ei = imp * norm.cdf(Z) + sigma * norm.pdf(Z)
        ei[sigma == 0.0] = 0.0
    return ei
from scipy.optimize import minimize
def propose_location(acquisition,X_sample,Y_sample,gpr,bounds, n_restarts=25):
    '''
    Proposes the next sampling point by optimizing the acquisition function.
    Args:
        acquisition: Acquisition function.
        X_sample: Sample locations (n x d).
        Y_sample: Sample values (n x 1).
        gpr: A GaussianProcessRegressor fitted to samples.
    Returns:
        Location of the acquisition function maximum.
    '''
    ndim = X_sample.shape[1]
    min_val = 1
    min_x = None
    def min_obj(X):
#   Minimization objective is the negative acquisition function
        return -acquisition(X.reshape(-1,ndim),X_sample,Y_sample,gpr)
#   Find the best optimum by starting from n_restart different random points.
    for x0 in np.random.uniform(bounds[:, 0],bounds[:, 1],size=(n_restarts,ndim)):
        res = minimize(min_obj,x0=x0,bounds=bounds,method='L-BFGS-B')
        if res.fun < min_val:
            min_val = res.fun
            min_x = res.x
    return min_x.reshape(-1,ndim)

import time
ndim=10
ninit=ndim*2
bounds=np.zeros((ndim,2))
for i in range(ndim):
    for j in range(2):
        if j == 0 :
           bounds[i,j]= -1.00+float(i)
           bounds[i,j]= -3.00
        if j == 1 :
           bounds[i,j]= 1.00+float(i)
           bounds[i,j]= 3.00
noise = 1e-8
def f(X,noise=noise):
    y=np.zeros((X.shape[0],))
    ndim = X.shape[1]
    if False:
        for i in range(X.shape[0]):
            y[i]=-10.*ndim
            for j in range(X.shape[1]):
                y[i]=y[i]-X[i,j]**2+10.*np.cos(2.*(np.pi)*X[i,j])
    if True:
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                y[i]=y[i]+np.sin(X[i,j])
    if False:
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                y[i]=y[i]-(X[i,j]-float(j))**2
    return y
X_init=np.zeros((ninit,ndim))
for i in range(ninit):
    for j in range(ndim):
        X_init[i,j]=bounds[j,0]+(bounds[j,1]-bounds[j,0])*np.random.random()
Y_init=f(X_init)
for i in range(ninit):
    print(X_init[i,:],Y_init[i])
best_obj=Y_init[0]
best_sol=X_init[0,:]
for i in range(ninit):
    if best_obj < Y_init[i]:
       best_obj=Y_init[i]
       best_sol=X_init[i,:]
       print(best_obj)
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel,Matern
#   Gaussian process with Matern kernel as surrogate model
m52=ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5)
gpr=GaussianProcessRegressor(kernel=m52, alpha=noise**2)
X_sample=X_init
Y_sample=Y_init
n_iter =ndim*20
for i in range(n_iter):
    starty=time.time()
    gpr.fit(X_sample,Y_sample)
    starty=time.time()-starty
#   Obtain next sampling point from the acquisition function (expected_improvement)
    startz=time.time()
    X_next=propose_location(expected_improvement,X_sample,Y_sample,gpr,bounds,n_restarts=ndim*10)
    startz=time.time()-startz
    print(starty,startz,'sec, fit, propose',X_sample.shape[0])
    Y_next=f(X_next,noise)
#   print(X_next.ravel(),Y_next)
    if best_obj < Y_next:
       best_obj=Y_next
       best_sol=X_next
       print(X_next.ravel(),Y_next)
    X_sample=np.vstack((X_sample,X_next))
    Y_sample=np.hstack((Y_sample,Y_next))
print(best_sol.ravel(),best_obj)
import matplotlib.pyplot as plt
plt.rc('font', size=14)          # controls default text sizes
#plt.rc('figure', titlesize=14)  # fontsize of the figure title
#plt.rc('axes', titlesize=14)     # fontsize of the axes title
#plt.rc('axes', labelsize=18)    # fontsize of the x and y labels
#plt.rc('xtick', labelsize=16)    # fontsize of the tick labels
#plt.rc('ytick', labelsize=16)    # fontsize of the tick labels
#plt.rc('legend', fontsize=14)    # legend fontsize
x=np.zeros((Y_sample.shape[0],))
for i in range(Y_sample.shape[0]):
    x[i]=i+1
fig=plt.figure()
plt.plot(x[:ninit], Y_sample[:ninit], marker='+', color='red', linewidth=2, markersize=8, label='Random')
plt.plot(x[ninit:], Y_sample[ninit:], marker='o', color='blue', linewidth=2, markersize=8, label='Gaussian Process')
plt.xlabel('Samples', fontsize=20)
plt.ylabel('Objective function', fontsize=20)
plt.legend()
fig.tight_layout()
fig.savefig('gpsamples.eps')
#plt.show()


---------------------------------------------------------------------------------------------------------------------
import numpy as np
from scipy.optimize import minimize
def rastrigin(x):
#   Rastrigin
    total=10.*len(x)
    for j in range(len(x)):
        total+=x[j]**2-10.*np.cos(2.*np.pi*x[j])
    return total
def styblinski(x):
#   Styblinski-Tang
    total=0.
    for j in range(len(x)):
        total+=(x[j]**4-16.*x[j]**2+5.*x[j])/2.
    return total
def rosenbrock(x):
#  Rosenbrock
    return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)
def rosenbrock_der(x):
    xm = x[1:-1]
    xm_m1 = x[:-2]
    xm_p1 = x[2:]
    der = np.zeros_like(x)
    der[1:-1] = 200*(xm-xm_m1**2) - 400*(xm_p1 - xm**2)*xm - 2*(1-xm)
    der[0] = -400*x[0]*(x[1]-x[0]**2) - 2*(1-x[0])
    der[-1] = 200*(x[-1]-x[-2]**2)
    return der

x0 = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
x0 = np.array([1.1, 2.2, 3.0, 4.0, 5.0])

res = minimize(rosenbrock,x0,method='nelder-mead', options={'xtol':1e-8,'disp':True})
print('Nelder-Mead:',res.x)
print(res.fun)

x0 = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
x0 = np.array([1.1, 2.2, 3.0, 4.0, 5.0])
res = minimize(rosenbrock, x0, method='BFGS', jac=rosenbrock_der, options={'disp': True})
print('BFGS:',res.x)
print(res.fun)

x0 = np.array([1.1, 2.2, 3.0, 4.0, 5.0])
x0 = np.array([0.0, 0.2, 0.0, 0.0, 0.0])
res = minimize(rastrigin,x0,method='nelder-mead', options={'xtol':1e-8,'disp':True})
print('Nelder-Mead:',res.x)
print(res.fun)

---------------------------------------------------------------------------------------------------------------------
import numpy as np
import os
import os.path
from scipy.optimize import dual_annealing
from scipy.optimize import minimize
from scipy import optimize
def append_new_line(file_name, text_to_append):
    with open(file_name, "a+") as file_object:
        file_object.seek(0)
        data = file_object.read(100)
        if len(data) > 0:
            file_object.write("\n")
        file_object.write(text_to_append)
def append_multiple_lines(file_name, lines_to_append):
    with open(file_name, "a+") as file_object:
        appendEOL = False
        file_object.seek(0)
        data = file_object.read(100)
        if len(data) > 0:
            appendEOL = True
        for line in lines_to_append:
            if appendEOL == True:
                file_object.write("\n")
            else:
                appendEOL = True
            file_object.write(line)

lw = [-15.0] * 10
up = [15.0] * 10
if True:
   ret = dual_annealing(func, bounds=list(zip(lw, up)), seed=1234, maxiter=20)
if False:
   ret=optimize.differential_evolution(func, bounds=list(zip(lw, up)), maxiter=10)
if False:
   x0=[ 0. for i in range(10)]
   ret = minimize(func, x0, method='Nelder-Mead', tol=1e-6)
lines=[]
lines.append(str(ret.fun))
for i in range(len(ret.x)):
    lines.append(str(ret.x[i]))
append_multiple_lines('./OUTPUT', lines)

---------------------------------------------------------------------------------------------------------------------

덧글

댓글 입력 영역

최근 포토로그



MathJax