Actual source code: cg.c

  1: /*$Id$*/

  3: #include "cg.h"

  5: #define CG_FletcherReeves       0
  6: #define CG_PolakRibiere         1
  7: #define CG_PolakRibierePlus     2
  8: #define CG_HestenesStiefel      3
  9: #define CG_DaiYuan              4
 10: #define CG_Types                5

 12: static const char *CG_Table[64] = {
 13:   "cg_fr", "cg_pr", "cg_prp", "cg_hs", "cg_dy"
 14: };

 16: #define TAO_ZER_SAFEGUARD        1e-8
 17: #define TAO_INF_SAFEGUARD        1e+8

 21: static int TaoSolve_CG(TAO_SOLVER tao, void *solver)
 22: {
 23:   TAO_CG *cg = (TAO_CG *)solver;
 24:   TaoVec *X, *Xm1 = cg->X2;
 25:   TaoVec *G = cg->G1, *Gm1 = cg->G2;
 26:   TaoVec *D = cg->D, *W = cg->W;

 28:   TaoTerminateReason reason;

 30:   double f, f_full, fm1, gnorm, gnorm2, gnorm2m1, ginner, gd, gm1d;
 31:   double beta, delta, step = 1.0;

 33:   int iter = 0, status = 0, info;

 35:   TaoFunctionBegin;

 37:   // Get vectors we will need
 38:   info = TaoGetSolution(tao, &X); CHKERRQ(info);

 40:   // Check convergence criteria
 41:   info = TaoComputeFunctionGradient(tao, X, &f, G); CHKERRQ(info);
 42:   info = G->Norm2(&gnorm); CHKERRQ(info);
 43:   if (TaoInfOrNaN(f) || TaoInfOrNaN(gnorm)) {
 44:     SETERRQ(1, "User provided compute function generated Inf or NaN");
 45:   }

 47:   info = TaoMonitor(tao, iter, f, gnorm, 0.0, step, &reason); CHKERRQ(info);
 48:   if (reason != TAO_CONTINUE_ITERATING) {
 49:     TaoFunctionReturn(0);
 50:   }

 52:   // Have not converged; initialize variables
 53:   info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);
 54:   gnorm2 = gnorm*gnorm;

 56:   // Set initial scaling for the function
 57:   if (f != 0.0) {
 58:     delta = 2.0 * TaoAbsDouble(f) / gnorm2;
 59:     delta = TaoMax(delta, cg->delta_min);
 60:     delta = TaoMin(delta, cg->delta_max);
 61:   }
 62:   else {
 63:     delta = 2.0 / gnorm2;
 64:     delta = TaoMax(delta, cg->delta_min);
 65:     delta = TaoMin(delta, cg->delta_max);
 66:   }

 68:   // Set counter for gradient/reset steps
 69:   cg->grad = 0;
 70:   cg->reset = 0;

 72:   while (1) {
 73:     // Save the current gradient information
 74:     fm1 = f;
 75:     gnorm2m1 = gnorm2;
 76:     info = Xm1->CopyFrom(X); CHKERRQ(info);
 77:     info = Gm1->CopyFrom(G); CHKERRQ(info);

 79:     info = D->Dot(G, &gd); CHKERRQ(info);
 80:     if ((gd >= 0) || TaoInfOrNaN(gd)) {
 81:       // Step is not descent or direction generated not a number
 82:       // Use steepest descent direction
 83:       ++cg->grad;

 85:       if (f != 0.0) {
 86:         delta = 2.0 * TaoAbsDouble(f) / gnorm2;
 87:         delta = TaoMax(delta, cg->delta_min);
 88:         delta = TaoMin(delta, cg->delta_max);
 89:       }
 90:       else {
 91:         delta = 2.0 / gnorm2;
 92:         delta = TaoMax(delta, cg->delta_min);
 93:         delta = TaoMin(delta, cg->delta_max);
 94:       }

 96:       info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);

 98:       // Gradient step cannot include not a number; this test is not needed.
 99:       // info = D->Norm2(&dnorm); CHKERRQ(info);
100:       // if (TaoInfOrNaN(dnorm)) {
101:       //   SETERRQ(1, "Direction generated Not-a-Number");
102:       // }
103:     }

105:     // Search direction for improving point
106:     step = delta;
107:     info = TaoLineSearchApply(tao, X, G, D, W, &f, &f_full, &step, &status); CHKERRQ(info);

109:     if (status) {
110:       // Linesearch failed
111:       // Reset factors and use scaled gradient step
112:       ++cg->reset;

114:       f = fm1;
115:       gnorm2 = gnorm2m1;
116:       info = X->CopyFrom(Xm1); CHKERRQ(info);
117:       info = G->CopyFrom(Gm1); CHKERRQ(info);

119:       if (f != 0.0) {
120:         delta = 2.0 * TaoAbsDouble(f) / gnorm2;
121:         delta = TaoMax(delta, cg->delta_min);
122:         delta = TaoMin(delta, cg->delta_max);
123:       }
124:       else {
125:         delta = 2.0 / gnorm2;
126:         delta = TaoMax(delta, cg->delta_min);
127:         delta = TaoMin(delta, cg->delta_max);
128:       }

130:       info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);

132:       // Gradient step cannot include not a number; this test is not needed.
133:       // info = D->Norm2(&dnorm); CHKERRQ(info);
134:       // if (TaoInfOrNaN(dnorm)) {
135:       //   SETERRQ(1, "Direction generated Not-a-Number");
136:       // }

138:       // This may be incorrect; linesearch has values for stepmax and stepmin
139:       // that should be reset.
140:       step = delta;
141:       info = TaoLineSearchApply(tao, X, G, D, W, &f, &f_full, &step, &status); CHKERRQ(info);

143:       if (status) {
144:         // Linesearch failed,
145:         // Switch to unscaled gradient

147:         f = fm1;
148:         gnorm2 = gnorm2m1;
149:         info = X->CopyFrom(Xm1); CHKERRQ(info);
150:         info = G->CopyFrom(Gm1); CHKERRQ(info);

152:         delta = 1.0;

154:         info = D->ScaleCopyFrom(-1.0, G); CHKERRQ(info);

156:         // Gradient step cannot include not a number; this test is not needed.
157:         // info = D->Norm2(&dnorm); CHKERRQ(info);
158:         // if (TaoInfOrNaN(dnorm)) {
159:         //   SETERRQ(1, "Direction generated Not-a-Number");
160:         // }

162:         // This may be incorrect; linesearch has values for stepmax and stepmin
163:         // that should be reset.
164:         step = delta;
165:         info = TaoLineSearchApply(tao, X, G, D, W, &f, &f_full, &step, &status); CHKERRQ(info);
166:         if (status) {
167:           // Steepest descent direction did not produce a new value
168:           // Stop here

170:           f = fm1;
171:           gnorm2 = gnorm2m1;
172:           info = X->CopyFrom(Xm1); CHKERRQ(info);
173:           info = G->CopyFrom(Gm1); CHKERRQ(info);
174:           step = 0.0;
175:         }
176:       }
177:     }

179:     // Check for termination
180:     info = G->Norm2(&gnorm); CHKERRQ(info);
181:     if (TaoInfOrNaN(f) || TaoInfOrNaN(gnorm)) {
182:       SETERRQ(1, "User provided compute function generated Inf or NaN");
183:     }
184:     gnorm2 = gnorm*gnorm;

186:     // Check for termination
187:     info = TaoMonitor(tao, ++iter, f, gnorm, 0.0, step, &reason); CHKERRQ(info);
188:     if (reason != TAO_CONTINUE_ITERATING) {
189:       break;
190:     }

192:     // Check for restart condition
193:     info = G->Dot(Gm1, &ginner); CHKERRQ(info);
194:     if (fabs(ginner) >= cg->eta * gnorm2) {
195:       // Gradients far from orthogonal; use steepest descent direction
196:       beta = 0.0;
197:     }
198:     else {
199:       // Gradients close to orthogonal; use conjugate gradient formula

201:       switch(cg->cg_type) {
202:       case CG_FletcherReeves:
203:         beta = gnorm2 / gnorm2m1;
204:         break;

206:       case CG_PolakRibiere:
207:         beta = (gnorm2 - ginner) / gnorm2m1;
208:         break;

210:       case CG_PolakRibierePlus:
211:         beta = TaoMax((gnorm2 - ginner) / gnorm2m1, 0.0);
212:         break;

214:       case CG_HestenesStiefel:
215:         info = G->Dot(D, &gd); CHKERRQ(info);
216:         info = Gm1->Dot(D, &gm1d); CHKERRQ(info);
217:         beta = (gnorm2 - ginner) / (gd - gm1d); 
218:         break;

220:       case CG_DaiYuan:
221:         info = G->Dot(D, &gd); CHKERRQ(info);
222:         info = Gm1->Dot(D, &gm1d); CHKERRQ(info);
223:         beta = gnorm2 / (gd - gm1d); 
224:         break;

226:       default:
227:         beta = 0.0;
228:         break;
229:       }
230:     }

232:     // Compute the direction
233:     info = D->Axpby(-1.0, G, beta); CHKERRQ(info);

235:     // Update initial steplength choice
236:     delta = 1.0;
237:     delta = TaoMax(delta, cg->delta_min);
238:     delta = TaoMin(delta, cg->delta_max);
239:   }
240:   TaoFunctionReturn(0);
241: }

243: /* ---------------------------------------------------------- */
246: static int TaoSetUp_CG(TAO_SOLVER tao, void *solver)
247: {
248:   TAO_CG *cg = (TAO_CG *)solver;
249:   TaoVec *X;
250:   int info;

252:   TaoFunctionBegin;
253:   
254:   info = TaoGetSolution(tao, &X); CHKERRQ(info);
255:   info = X->Clone(&cg->X2); CHKERRQ(info);
256:   info = X->Clone(&cg->G1); CHKERRQ(info);
257:   info = X->Clone(&cg->G2); CHKERRQ(info);
258:   info = X->Clone(&cg->D); CHKERRQ(info);
259:   info = X->Clone(&cg->W); CHKERRQ(info);

261:   info = TaoSetLagrangianGradientVector(tao, cg->G1); CHKERRQ(info);
262:   info = TaoSetStepDirectionVector(tao, cg->D); CHKERRQ(info);

264:   info = TaoCheckFG(tao); CHKERRQ(info);
265:   TaoFunctionReturn(0);
266: }

268: /* ---------------------------------------------------------- */
271: static int TaoDestroy_CG(TAO_SOLVER tao, void *solver)
272: {
273:   TAO_CG *cg = (TAO_CG *)solver;
274:   int info;

276:   TaoFunctionBegin;

278:   info = TaoVecDestroy(cg->X2); CHKERRQ(info);
279:   info = TaoVecDestroy(cg->G1); CHKERRQ(info);
280:   info = TaoVecDestroy(cg->G2); CHKERRQ(info);
281:   info = TaoVecDestroy(cg->D); CHKERRQ(info);
282:   info = TaoVecDestroy(cg->W); CHKERRQ(info);

284:   info = TaoSetLagrangianGradientVector(tao, 0); CHKERRQ(info);
285:   info = TaoSetStepDirectionVector(tao, 0); CHKERRQ(info);

287:   TaoFunctionReturn(0);
288: }

290: /*------------------------------------------------------------*/
293: static int TaoSetOptions_CG(TAO_SOLVER tao, void *solver)
294: {
295:   TAO_CG *cg = (TAO_CG *)solver;
296:   int info;

298:   TaoFunctionBegin;
299:   info = TaoOptionsHead("Nonlinear Conjugate Gradient method for unconstrained optimization"); CHKERRQ(info);

301:   info = TaoOptionDouble("-tao_cg_eta", "restart tolerance", "", cg->eta, &cg->eta, 0); CHKERRQ(info);
302:   info = TaoOptionList("-tao_cg_type", "cg formula", "", CG_Table, CG_Types, CG_Table[cg->cg_type], &cg->cg_type, 0); CHKERRQ(info);
303:   info = TaoOptionDouble("-tao_cg_delta_min", "minimum delta value", "", cg->delta_min, &cg->delta_min, 0); CHKERRQ(info);
304:   info = TaoOptionDouble("-tao_cg_delta_max", "maximum delta value", "", cg->delta_max, &cg->delta_max, 0); CHKERRQ(info);

306:   info = TaoLineSearchSetFromOptions(tao); CHKERRQ(info);
307:   info = TaoOptionsTail(); CHKERRQ(info);
308:   TaoFunctionReturn(0);
309: }

311: /*------------------------------------------------------------*/
314: static int TaoView_CG(TAO_SOLVER tao, void *solver)
315: {
316:   TAO_CG   *cg = (TAO_CG *)solver;
317:   int      info;

319:   TaoFunctionBegin;
320:   info = TaoPrintInt(tao, "  Gradient steps: %d\n", cg->grad); CHKERRQ(info);
321:   info = TaoPrintInt(tao, "  Reset steps: %d\n", cg->reset); CHKERRQ(info);
322:   info = TaoLineSearchView(tao); CHKERRQ(info);
323:   TaoFunctionReturn(0);
324: }

326: /*------------------------------------------------------------*/
330: int TaoCreate_CG(TAO_SOLVER tao)
331: {
332:   TAO_CG *cg;
333:   int info;

335:   TaoFunctionBegin;

337:   info = TaoNew(TAO_CG, &cg); CHKERRQ(info);
338:   info = PetscLogObjectMemory(tao, sizeof(TAO_CG)); CHKERRQ(info);

340:   info=TaoSetTaoSolveRoutine(tao, TaoSolve_CG, (void *)cg); CHKERRQ(info);
341:   info=TaoSetTaoSetUpDownRoutines(tao, TaoSetUp_CG, TaoDestroy_CG); CHKERRQ(info);
342:   info=TaoSetTaoOptionsRoutine(tao, TaoSetOptions_CG); CHKERRQ(info);
343:   info=TaoSetTaoViewRoutine(tao, TaoView_CG); CHKERRQ(info);

345:   info = TaoSetMaximumIterates(tao, 2000); CHKERRQ(info);
346:   info = TaoSetMaximumFunctionEvaluations(tao, 4000); CHKERRQ(info);
347:   info = TaoSetTolerances(tao, 1e-4, 1e-4, 0, 0); CHKERRQ(info);

349:   cg->eta = 0.1;
350:   cg->delta_min = 1e-7;
351:   cg->delta_max = 100;

353:   cg->cg_type = CG_PolakRibierePlus;

355:   // Note: nondefault values should be used for nonlinear conjugate gradient 
356:   // method.  In particular, gtol should be less that 0.5; the value used in 
357:   // Nocedal and Wright is 0.10.  We use the default values for the 
358:   // linesearch because it seems to work better.
359:   info = TaoCreateMoreThuenteLineSearch(tao, 0, 0); CHKERRQ(info);
360:   TaoFunctionReturn(0);
361: }