Actual source code: cg.c
1: /*$Id$*/
3: #include "cg.h"
5: #define CG_FletcherReeves 0
6: #define CG_PolakRibiere 1
7: #define CG_PolakRibierePlus 2
8: #define CG_HestenesStiefel 3
9: #define CG_DaiYuan 4
10: #define CG_Types 5
12: static const char *CG_Table[64] = {
13: "cg_fr", "cg_pr", "cg_prp", "cg_hs", "cg_dy"
14: };
16: #define TAO_ZER_SAFEGUARD 1e-8
17: #define TAO_INF_SAFEGUARD 1e+8
21: static int TaoSolve_CG(TAO_SOLVER tao, void *solver)
22: {
23: TAO_CG *cg = (TAO_CG *)solver;
24: TaoVec *X, *Xm1 = cg->X2;
25: TaoVec *G = cg->G1, *Gm1 = cg->G2;
26: TaoVec *D = cg->D, *W = cg->W;
28: TaoTerminateReason reason;
30: double f, f_full, fm1, gnorm, gnorm2, gnorm2m1, ginner, gd, gm1d;
31: double beta, delta, step = 1.0;
33: int iter = 0, status = 0, info;
35: TaoFunctionBegin;
37: // Get vectors we will need
38: info = TaoGetSolution(tao, &X); CHKERRQ(info);
40: // Check convergence criteria
41: info = TaoComputeFunctionGradient(tao, X, &f, G); CHKERRQ(info);
42: info = G->Norm2(&gnorm); CHKERRQ(info);
43: if (TaoInfOrNaN(f) || TaoInfOrNaN(gnorm)) {
44: SETERRQ(1, "User provided compute function generated Inf or NaN");
45: }
47: info = TaoMonitor(tao, iter, f, gnorm, 0.0, step, &reason); CHKERRQ(info);
48: if (reason != TAO_CONTINUE_ITERATING) {
49: TaoFunctionReturn(0);
50: }
52: // Have not converged; initialize variables
53: info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);
54: gnorm2 = gnorm*gnorm;
56: // Set initial scaling for the function
57: if (f != 0.0) {
58: delta = 2.0 * TaoAbsDouble(f) / gnorm2;
59: delta = TaoMax(delta, cg->delta_min);
60: delta = TaoMin(delta, cg->delta_max);
61: }
62: else {
63: delta = 2.0 / gnorm2;
64: delta = TaoMax(delta, cg->delta_min);
65: delta = TaoMin(delta, cg->delta_max);
66: }
68: // Set counter for gradient/reset steps
69: cg->grad = 0;
70: cg->reset = 0;
72: while (1) {
73: // Save the current gradient information
74: fm1 = f;
75: gnorm2m1 = gnorm2;
76: info = Xm1->CopyFrom(X); CHKERRQ(info);
77: info = Gm1->CopyFrom(G); CHKERRQ(info);
79: info = D->Dot(G, &gd); CHKERRQ(info);
80: if ((gd >= 0) || TaoInfOrNaN(gd)) {
81: // Step is not descent or direction generated not a number
82: // Use steepest descent direction
83: ++cg->grad;
85: if (f != 0.0) {
86: delta = 2.0 * TaoAbsDouble(f) / gnorm2;
87: delta = TaoMax(delta, cg->delta_min);
88: delta = TaoMin(delta, cg->delta_max);
89: }
90: else {
91: delta = 2.0 / gnorm2;
92: delta = TaoMax(delta, cg->delta_min);
93: delta = TaoMin(delta, cg->delta_max);
94: }
96: info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);
98: // Gradient step cannot include not a number; this test is not needed.
99: // info = D->Norm2(&dnorm); CHKERRQ(info);
100: // if (TaoInfOrNaN(dnorm)) {
101: // SETERRQ(1, "Direction generated Not-a-Number");
102: // }
103: }
105: // Search direction for improving point
106: step = delta;
107: info = TaoLineSearchApply(tao, X, G, D, W, &f, &f_full, &step, &status); CHKERRQ(info);
109: if (status) {
110: // Linesearch failed
111: // Reset factors and use scaled gradient step
112: ++cg->reset;
114: f = fm1;
115: gnorm2 = gnorm2m1;
116: info = X->CopyFrom(Xm1); CHKERRQ(info);
117: info = G->CopyFrom(Gm1); CHKERRQ(info);
119: if (f != 0.0) {
120: delta = 2.0 * TaoAbsDouble(f) / gnorm2;
121: delta = TaoMax(delta, cg->delta_min);
122: delta = TaoMin(delta, cg->delta_max);
123: }
124: else {
125: delta = 2.0 / gnorm2;
126: delta = TaoMax(delta, cg->delta_min);
127: delta = TaoMin(delta, cg->delta_max);
128: }
130: info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);
132: // Gradient step cannot include not a number; this test is not needed.
133: // info = D->Norm2(&dnorm); CHKERRQ(info);
134: // if (TaoInfOrNaN(dnorm)) {
135: // SETERRQ(1, "Direction generated Not-a-Number");
136: // }
138: // This may be incorrect; linesearch has values for stepmax and stepmin
139: // that should be reset.
140: step = delta;
141: info = TaoLineSearchApply(tao, X, G, D, W, &f, &f_full, &step, &status); CHKERRQ(info);
143: if (status) {
144: // Linesearch failed,
145: // Switch to unscaled gradient
147: f = fm1;
148: gnorm2 = gnorm2m1;
149: info = X->CopyFrom(Xm1); CHKERRQ(info);
150: info = G->CopyFrom(Gm1); CHKERRQ(info);
152: delta = 1.0;
154: info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);
156: // Gradient step cannot include not a number; this test is not needed.
157: // info = D->Norm2(&dnorm); CHKERRQ(info);
158: // if (TaoInfOrNaN(dnorm)) {
159: // SETERRQ(1, "Direction generated Not-a-Number");
160: // }
162: // This may be incorrect; linesearch has values for stepmax and stepmin
163: // that should be reset.
164: step = delta;
165: info = TaoLineSearchApply(tao, X, G, D, W, &f, &f_full, &step, &status); CHKERRQ(info);
166: if (status) {
167: // Steepest descent direction did not produce a new value
168: // Stop here
170: f = fm1;
171: gnorm2 = gnorm2m1;
172: info = X->CopyFrom(Xm1); CHKERRQ(info);
173: info = G->CopyFrom(Gm1); CHKERRQ(info);
174: step = 0.0;
175: }
176: }
177: }
179: // Check for termination
180: info = G->Norm2(&gnorm); CHKERRQ(info);
181: if (TaoInfOrNaN(f) || TaoInfOrNaN(gnorm)) {
182: SETERRQ(1, "User provided compute function generated Inf or NaN");
183: }
184: gnorm2 = gnorm*gnorm;
186: // Check for termination
187: info = TaoMonitor(tao, ++iter, f, gnorm, 0.0, step, &reason); CHKERRQ(info);
188: if (reason != TAO_CONTINUE_ITERATING) {
189: break;
190: }
192: // Check for restart condition
193: info = G->Dot(Gm1, &ginner); CHKERRQ(info);
194: if (fabs(ginner) >= cg->eta * gnorm2) {
195: // Gradients far from orthogonal; use steepest descent direction
196: beta = 0.0;
197: }
198: else {
199: // Gradients close to orthogonal; use conjugate gradient formula
201: switch(cg->cg_type) {
202: case CG_FletcherReeves:
203: beta = gnorm2 / gnorm2m1;
204: break;
206: case CG_PolakRibiere:
207: beta = (gnorm2 - ginner) / gnorm2m1;
208: break;
210: case CG_PolakRibierePlus:
211: beta = TaoMax((gnorm2 - ginner) / gnorm2m1, 0.0);
212: break;
214: case CG_HestenesStiefel:
215: info = G->Dot(D, &gd); CHKERRQ(info);
216: info = Gm1->Dot(D, &gm1d); CHKERRQ(info);
217: beta = (gnorm2 - ginner) / (gd - gm1d);
218: break;
220: case CG_DaiYuan:
221: info = G->Dot(D, &gd); CHKERRQ(info);
222: info = Gm1->Dot(D, &gm1d); CHKERRQ(info);
223: beta = gnorm2 / (gd - gm1d);
224: break;
226: default:
227: beta = 0.0;
228: break;
229: }
230: }
232: // Compute the direction
233: info = D->Axpby(-1.0, G, beta); CHKERRQ(info);
235: // Update initial steplength choice
236: delta = 1.0;
237: delta = TaoMax(delta, cg->delta_min);
238: delta = TaoMin(delta, cg->delta_max);
239: }
240: TaoFunctionReturn(0);
241: }
243: /* ---------------------------------------------------------- */
246: static int TaoSetUp_CG(TAO_SOLVER tao, void *solver)
247: {
248: TAO_CG *cg = (TAO_CG *)solver;
249: TaoVec *X;
250: int info;
252: TaoFunctionBegin;
253:
254: info = TaoGetSolution(tao, &X); CHKERRQ(info);
255: info = X->Clone(&cg->X2); CHKERRQ(info);
256: info = X->Clone(&cg->G1); CHKERRQ(info);
257: info = X->Clone(&cg->G2); CHKERRQ(info);
258: info = X->Clone(&cg->D); CHKERRQ(info);
259: info = X->Clone(&cg->W); CHKERRQ(info);
261: info = TaoSetLagrangianGradientVector(tao, cg->G1); CHKERRQ(info);
262: info = TaoSetStepDirectionVector(tao, cg->D); CHKERRQ(info);
264: info = TaoCheckFG(tao); CHKERRQ(info);
265: TaoFunctionReturn(0);
266: }
268: /* ---------------------------------------------------------- */
271: static int TaoDestroy_CG(TAO_SOLVER tao, void *solver)
272: {
273: TAO_CG *cg = (TAO_CG *)solver;
274: int info;
276: TaoFunctionBegin;
278: info = TaoVecDestroy(cg->X2); CHKERRQ(info);
279: info = TaoVecDestroy(cg->G1); CHKERRQ(info);
280: info = TaoVecDestroy(cg->G2); CHKERRQ(info);
281: info = TaoVecDestroy(cg->D); CHKERRQ(info);
282: info = TaoVecDestroy(cg->W); CHKERRQ(info);
284: info = TaoSetLagrangianGradientVector(tao, 0); CHKERRQ(info);
285: info = TaoSetStepDirectionVector(tao, 0); CHKERRQ(info);
287: TaoFunctionReturn(0);
288: }
290: /*------------------------------------------------------------*/
293: static int TaoSetOptions_CG(TAO_SOLVER tao, void *solver)
294: {
295: TAO_CG *cg = (TAO_CG *)solver;
296: int info;
298: TaoFunctionBegin;
299: info = TaoOptionsHead("Nonlinear Conjugate Gradient method for unconstrained optimization"); CHKERRQ(info);
301: info = TaoOptionDouble("-tao_cg_eta", "restart tolerance", "", cg->eta, &cg->eta, 0); CHKERRQ(info);
302: info = TaoOptionList("-tao_cg_type", "cg formula", "", CG_Table, CG_Types, CG_Table[cg->cg_type], &cg->cg_type, 0); CHKERRQ(info);
303: info = TaoOptionDouble("-tao_cg_delta_min", "minimum delta value", "", cg->delta_min, &cg->delta_min, 0); CHKERRQ(info);
304: info = TaoOptionDouble("-tao_cg_delta_max", "maximum delta value", "", cg->delta_max, &cg->delta_max, 0); CHKERRQ(info);
306: info = TaoLineSearchSetFromOptions(tao); CHKERRQ(info);
307: info = TaoOptionsTail(); CHKERRQ(info);
308: TaoFunctionReturn(0);
309: }
311: /*------------------------------------------------------------*/
314: static int TaoView_CG(TAO_SOLVER tao, void *solver)
315: {
316: TAO_CG *cg = (TAO_CG *)solver;
317: int info;
319: TaoFunctionBegin;
320: info = TaoPrintInt(tao, " Gradient steps: %d\n", cg->grad); CHKERRQ(info);
321: info = TaoPrintInt(tao, " Reset steps: %d\n", cg->reset); CHKERRQ(info);
322: info = TaoLineSearchView(tao); CHKERRQ(info);
323: TaoFunctionReturn(0);
324: }
326: /*------------------------------------------------------------*/
330: int TaoCreate_CG(TAO_SOLVER tao)
331: {
332: TAO_CG *cg;
333: int info;
335: TaoFunctionBegin;
337: info = TaoNew(TAO_CG, &cg); CHKERRQ(info);
338: info = PetscLogObjectMemory(tao, sizeof(TAO_CG)); CHKERRQ(info);
340: info=TaoSetTaoSolveRoutine(tao, TaoSolve_CG, (void *)cg); CHKERRQ(info);
341: info=TaoSetTaoSetUpDownRoutines(tao, TaoSetUp_CG, TaoDestroy_CG); CHKERRQ(info);
342: info=TaoSetTaoOptionsRoutine(tao, TaoSetOptions_CG); CHKERRQ(info);
343: info=TaoSetTaoViewRoutine(tao, TaoView_CG); CHKERRQ(info);
345: info = TaoSetMaximumIterates(tao, 2000); CHKERRQ(info);
346: info = TaoSetMaximumFunctionEvaluations(tao, 4000); CHKERRQ(info);
347: info = TaoSetTolerances(tao, 1e-4, 1e-4, 0, 0); CHKERRQ(info);
349: cg->eta = 0.1;
350: cg->delta_min = 1e-7;
351: cg->delta_max = 100;
353: cg->cg_type = CG_PolakRibierePlus;
355: // Note: nondefault values should be used for nonlinear conjugate gradient
356: // method. In particular, gtol should be less that 0.5; the value used in
357: // Nocedal and Wright is 0.10. We use the default values for the
358: // linesearch because it seems to work better.
359: info = TaoCreateMoreThuenteLineSearch(tao, 0, 0); CHKERRQ(info);
360: TaoFunctionReturn(0);
361: }