OOPS
DRPBlockLanczosMinimizer.h
Go to the documentation of this file.
1 /*
2  * (C) Copyright 2018 UCAR
3  *
4  * This software is licensed under the terms of the Apache Licence Version 2.0
5  * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6  */
7 
8 #ifndef OOPS_ASSIMILATION_DRPBLOCKLANCZOSMINIMIZER_H_
9 #define OOPS_ASSIMILATION_DRPBLOCKLANCZOSMINIMIZER_H_
10 
11 #include <Eigen/Dense>
12 
13 #include <algorithm>
14 #include <cmath>
15 #include <memory>
16 #include <string>
17 #include <vector>
18 
19 #include "eckit/config/Configuration.h"
20 #include "eckit/mpi/Comm.h"
21 
29 #include "oops/mpi/mpi.h"
30 #include "oops/util/dot_product.h"
31 #include "oops/util/formats.h"
32 #include "oops/util/Logger.h"
33 
34 namespace oops {
35 
36 template<typename MODEL, typename OBS> class DRPBlockLanczosMinimizer :
37  public DRMinimizer<MODEL, OBS> {
42  typedef Eigen::VectorXd eigenvec_;
43  typedef Eigen::MatrixXd eigenmat_;
44 
45  public:
46  const std::string classname() const override {return "DRPBlockLanczosMinimizer";}
47  DRPBlockLanczosMinimizer(const eckit::Configuration &, const CostFct_ &);
49 
50  private:
51  double solve(CtrlInc_ &, CtrlInc_ &, CtrlInc_ &, const Bmat_ &, const HtRinvH_ &, const double,
52  const double, const int, const double) override;
53 
54  // other functions:
55  void get_proj(const CtrlInc_ &, const CtrlInc_ &, eigenmat_ &, int &,
56  const eckit::mpi::Comm &, CtrlInc_ &);
57  void apply_proj(CtrlInc_ &, const CtrlInc_ &, const eigenmat_ &, int &,
58  const eckit::mpi::Comm &, CtrlInc_ &);
59  void mqrgs(CtrlInc_ &, CtrlInc_ &, eigenmat_ &, const CtrlInc_ &, int &,
60  const eckit::mpi::Comm &, CtrlInc_ &, CtrlInc_ &);
61  void HtRinvH0(const CtrlInc_ &, CtrlInc_ &, const HtRinvH_ &, int &,
62  const eckit::mpi::Comm &, CtrlInc_ &);
63 
64  // For MPI purposes
65  const int members_;
66  const int ntasks_;
67  const int tasks_per_member_;
68  const int global_task_;
69  const int mymember_;
70  const int local_task_;
71 
72  // For diagnostics
73  eckit::LocalConfiguration diagConf_;
75 };
76 
77 // ===============================================================================================
78 // Block-B-Lanczos algorithm
79 // Primal space
80 // Lanczos algorithm with complete reorthogonalization.
81 // MPI version (storage of partial Krylov base on each processor)
82 
83 template<typename MODEL, typename OBS>
85  const CostFct_ & J)
86  : DRMinimizer<MODEL, OBS>(J), members_(conf.getInt("members")),
87  ntasks_(oops::mpi::world().size()),
88  tasks_per_member_(ntasks_/members_), global_task_(oops::mpi::world().rank()),
89  mymember_(global_task_ / tasks_per_member_), local_task_(global_task_%tasks_per_member_),
90  diagConf_(conf), outerLoop_(0) {}
91 
92 // -----------------------------------------------------------------------------------------------
93 
94 template<typename MODEL, typename OBS>
96  const Bmat_ & B, const HtRinvH_ & HtRinvH,
97  const double costJ0Jb, const double costJ0JoJc,
98  const int maxiter, const double tolerance) {
99  eigenvec_ zerov = Eigen::VectorXd::Zero(members_);
100  eigenmat_ zeromm = Eigen::MatrixXd::Zero(members_, members_);
101 
102  CtrlInc_ ww(xh); // Current w_i
103  CtrlInc_ vv(rr); // Current v_i
104  CtrlInc_ zz(xh); // Current z_i
105 
106  CtrlInc_ temp1(xh); // Current w_i
107  CtrlInc_ temp2(xh); // Current w_i
108 
109  int gestag = 0;
110 
111  std::vector<std::unique_ptr<CtrlInc_>> Zbase; // store the zz during iterative process
112  std::vector<std::unique_ptr<CtrlInc_>> Vbase; // store the vv during iterative process
113 
114  eigenmat_ projsol(members_, members_); // projectors (w,z) for residuals calculation.
115 
116  eigenmat_ alpha(members_, members_);
117  eigenmat_ beta = zeromm;
118  eigenmat_ beta0 = zeromm;
119  eigenmat_ SSLK;
120 
121  std::vector<eigenmat_> ALPHAS; // store the diagonal blocks of the Arnoldi matrix
122  std::vector<eigenmat_> BETAS; // store the underdiagonal blocks of the Arnoldi matrix
123 
124  eigenmat_ ss; // contains the solution
125  eigenvec_ ss_loc = zerov;
126 
127  double norm_iiter = 0;
128  double norm0 = 0;
129  double normReduction = 1;
130  double normReductionIter = 1;
131 
132  int iterTotal = maxiter;
133 
134  bool complexValues = false;
135 
136  eigenvec_ norm_red_loc(maxiter);
137  eigenmat_ norm_red_all(maxiter, members_);
138 
139  eigenvec_ costj_loc(maxiter);
140  eigenmat_ costj_all(maxiter, members_);
141 
142 // -----------------------------------------------------------------------------------------------
143 // Creating the proper communicator (geographic area) for send and receive
144  std::string CommGeoStr = "comm_geo_" + std::to_string(local_task_);
145  char const *CommGeoName = CommGeoStr.c_str();
146  const eckit::mpi::Comm & CommGeo = oops::mpi::world().split(local_task_, CommGeoName);
147  Log::info() << "Geo communicators created, for task 0 of member 0: "
148  << CommGeo.name() << " of size " << CommGeo.size() << std::endl;
149 
150 // -----------------------------------------------------------------------------------------------
151  B.multiply(rr, zz); // z_0 = B * r_0
152  norm0 = sqrt(dot_product(zz, rr));
153 
154  // QR decomposition
155  mqrgs(zz, vv, beta0, rr, gestag, CommGeo, temp1, temp2); // [z_1, v_1, b0] = qr[r_0, v_0]
156 
157  Vbase.emplace_back(std::unique_ptr<CtrlInc_>(new CtrlInc_(vv)));
158  Zbase.emplace_back(std::unique_ptr<CtrlInc_>(new CtrlInc_(zz)));
159 
160  for (int iiter = 0; iiter < maxiter && normReductionIter > tolerance; ++iiter) {
161  Log::info() << "BlockBLanczos starting iteration " << iiter+1 << " for rank: " << mymember_
162  << std::endl;
163 
164  // Hessian application: w_i = v_i + HtRinvH * B*v_i = v_i + HtRinvH * z_i
165  // --> new search directions
166  // HtRinvH.multiply(zz, ww);
167  HtRinvH0(zz, ww, HtRinvH, gestag, CommGeo, temp1);
168 
169  ww += vv;
170 
171  // Orthogonalize ww against previous base vectors
172  for (int jiter = 0; jiter < iiter + 1; ++jiter) {
173  get_proj(ww, *Zbase[jiter], alpha, gestag, CommGeo, temp1);
174  apply_proj(ww, *Vbase[jiter], alpha, gestag, CommGeo, temp1);
175  }
176 
177  B.multiply(ww, zz);
178  get_proj(ww, zz, projsol, gestag, CommGeo, temp1); // projectors for residuals calculation
179 
180  vv = ww;
181  mqrgs(zz, vv, beta, ww, gestag, CommGeo, temp1, temp2);
182 
183  Zbase.emplace_back(std::unique_ptr<CtrlInc_>(new CtrlInc_(zz)));
184  Vbase.emplace_back(std::unique_ptr<CtrlInc_>(new CtrlInc_(vv)));
185  ALPHAS.push_back(alpha);
186  BETAS.push_back(beta);
187 
188  // solve T ss = beta0 * e1
189  blockTriDiagSolve(ALPHAS, BETAS, beta0, ss, complexValues, members_);
190 
191  eigenvec_ ss_loc = (ss.block(iiter*members_, 0, members_, members_)).col(mymember_);
192 
193  eigenvec_ temp = projsol * ss_loc;
194  norm_iiter = sqrt(temp.dot(ss_loc));
195  normReduction = norm_iiter / norm0;
196 
197  const double costJ0 = costJ0Jb + costJ0JoJc;
198  double costJ = costJ0;
199  double costJb = 0;
200  double costJoJc = 0;
201 
202  for (int ll = 0; ll < iiter+1; ++ll) {
203  SSLK = - (ss.block(ll*members_, 0, members_, members_));
204  // Compute the quadratic cost function
205  // J[du_{i}] = J[0] - 0.5 s_{i}^T Z_{i}^T r_{0}
206  // Jb[du_{i}] = 0.5 s_{i}^T V_{i}^T Z_{i} s_{i}
207  temp2.zero();
208  apply_proj(temp2, *Zbase[ll], SSLK, gestag, CommGeo, temp1);
209  costJ -= 0.5 * dot_product(temp2, rr);
210  }
211 
212  Log::info() << "BlockBLanczos end of iteration " << iiter+1 << std::endl;
213  printNormReduction(iiter+1, norm_iiter, normReduction);
214  printQuadraticCostFunction(iiter+1, costJ, costJb, costJoJc);
215 
216  norm_red_loc[iiter] = normReduction;
217  costj_loc[iiter] = costJ;
218 
219  oops::mpi::world().allReduce(normReduction, normReductionIter, eckit::mpi::max());
220  if (normReductionIter < tolerance) {
221  Log::info() << "DRPBlockLanczos: Achieved required reduction in residual norm." << std::endl;
222  iterTotal = iiter+1;
223  }
224  } // main loop iiter
225 
226  oops::mpi::allGather(CommGeo, norm_red_loc, norm_red_all);
227  oops::mpi::allGather(CommGeo, costj_loc, costj_all);
228 
229  xh.zero();
230  xx.zero();
231  eigenmat_ tmp_norm;
232  eigenmat_ tmp_costj;
233  Eigen::IOFormat HeavyFmt(-2, 0, ", ", ";\n");
234  Eigen::IOFormat TestFmt(-1, 0, ", ", ";\n");
235 
236 
237  for (int ll = 0; ll < iterTotal; ++ll) {
238  SSLK = - (ss.block(ll*members_, 0, members_, members_));
239  apply_proj(xh, *Vbase[ll], SSLK, gestag, CommGeo, temp1);
240  apply_proj(xx, *Zbase[ll], SSLK, gestag, CommGeo, temp1);
241  if (outerLoop_ == 0) writeKrylovBasis(diagConf_, *Zbase[ll], ll);
242  tmp_norm = norm_red_all.block(ll, 0, 1, members_);
243  tmp_costj = costj_all.block(ll, 0, 1, members_);
244 
245  Log::info() << " Norm reduction all members (" << std::setw(2) << ll+1 << ") = "
246  << tmp_norm.format(HeavyFmt) << std::endl;
247  Log::info() << " Quadratic cost function all members: J (" << std::setw(2) << ll+1 << ") = "
248  << tmp_costj.format(HeavyFmt) << std::endl;
249  Log::test() << " Norm reduction all members (" << std::setw(2) << ll+1 << ") = "
250  << tmp_norm.format(TestFmt) << std::endl;
251  Log::test() << " Quadratic cost function all members: J (" << std::setw(2) << ll+1 << ") = "
252  << tmp_costj.format(TestFmt) << std::endl;
253  }
254 
255  eckit::mpi::deleteComm(CommGeoName);
256  ++outerLoop_;
257  return normReduction;
258 }
259 
260 // -----------------------------------------------------------------------------------------------
261 // OTHER FUNCTIONS
262 // -----------------------------------------------------------------------------------------------
263 
264 template<typename MODEL, typename OBS>
266  const CtrlInc_ & incr_tosend,
267  eigenmat_ & alpha_mat, int & tag,
268  const eckit::mpi::Comm & comm,
269  CtrlInc_ & incr_rcv) {
270  // Computes mat_proj = Zt.W
271  // with incr_tosend(member_i) the columns of matrix Z
272  // with www(member_i) the columns of matrix W
273  eigenvec_ alpha_loc = Eigen::VectorXd::Zero(members_);
274  int tag_rcv;
275  int tag_send;
276 
277  for (int p = 0; p < mymember_; p++) {
278  tag_rcv = p * members_ + mymember_ + tag * members_ * members_;
279  tag_send = mymember_ * members_ + p + tag * members_ * members_;
280  oops::mpi::send(comm, incr_tosend, p, tag_send);
281  oops::mpi::receive(comm, incr_rcv, p, tag_rcv);
282  alpha_loc(p) = dot_product(incr_rcv, www);
283  }
284 
285  alpha_loc(mymember_) = dot_product(incr_tosend, www);
286 
287  for (int p = mymember_ + 1; p < members_; p++) {
288  tag_rcv = p * members_ + mymember_ + tag * members_ * members_;
289  tag_send = mymember_ * members_ + p + tag * members_ * members_;
290  oops::mpi::receive(comm, incr_rcv, p, tag_rcv);
291  oops::mpi::send(comm, incr_tosend, p, tag_send);
292  alpha_loc(p) = dot_product(incr_rcv, www);
293  }
294  oops::mpi::allGather(comm, alpha_loc, alpha_mat);
295 
296  tag += 2;
297 }
298 
299 // -----------------------------------------------------------------------------------------------
300 
301 template<typename MODEL, typename OBS>
303  const CtrlInc_ & incr_tosend,
304  const eigenmat_ & alpha_mat, int & tag,
305  const eckit::mpi::Comm & comm,
306  CtrlInc_ & incr_rcv) {
307  // Computes W = W - V.alpha
308  // with incr_tochange(member_i) the columns of matrix W
309  // with incr_tosend(member_i) the columns of matrix V
310  eigenvec_ alpha_loc = alpha_mat.col(mymember_);
311  int tag_rcv;
312  int tag_send;
313 
314  for (int p = 0; p < mymember_; p++) {
315  tag_rcv = p * members_ + mymember_ + tag * members_ * members_;
316  tag_send = mymember_ * members_ + p + tag * members_ * members_;
317  oops::mpi::send(comm, incr_tosend, p, tag_send);
318  oops::mpi::receive(comm, incr_rcv, p, tag_rcv);
319  incr_tochange.axpy(-alpha_loc(p), incr_rcv);
320  }
321 
322  incr_tochange.axpy(-alpha_loc(mymember_), incr_tosend);
323 
324  for (int p = mymember_ + 1; p < members_; p++) {
325  tag_rcv = p * members_ + mymember_ + tag * members_ * members_;
326  tag_send = mymember_ * members_ + p + tag * members_ * members_;
327  oops::mpi::receive(comm, incr_rcv, p, tag_rcv);
328  oops::mpi::send(comm, incr_tosend, p, tag_send);
329  incr_tochange.axpy(-alpha_loc(p), incr_rcv);
330  }
331  tag += 2;
332 }
333 
334 // -----------------------------------------------------------------------------------------------
335 
336 template<typename MODEL, typename OBS>
338  eigenmat_ & beta_mat, const CtrlInc_ & www,
339  int & tag,
340  const eckit::mpi::Comm & comm,
341  CtrlInc_ & v_other, CtrlInc_ & z_other) {
342  // QR decomposition: [zzz, vvv, beta_mat] = qr[www, vvv] using Gram-Schmidt
343  eigenvec_ beta_loc = Eigen::VectorXd::Zero(members_);
344  int tag_rcv_v;
345  int tag_rcv_z;
346  int tag_send_v;
347  int tag_send_z;
348 
349  // Orthogonalization
350  for (int p = 0; p < mymember_; p++) {
351  tag_rcv_v = p * members_ + mymember_ + tag * members_ * members_;
352  tag_rcv_z = p * members_ + mymember_ + (tag + 1) * members_ * members_;
353  oops::mpi::receive(comm, v_other, p, tag_rcv_v);
354  oops::mpi::receive(comm, z_other, p, tag_rcv_z);
355  beta_loc(p) = dot_product(www, z_other) / dot_product(v_other, z_other);
356  vvv.axpy(-beta_loc(p), v_other);
357  zzz.axpy(-beta_loc(p), z_other);
358  }
359 
360  for (int p = mymember_ + 1; p < members_; p++) {
361  tag_send_v = mymember_ * members_ + p + tag * members_ * members_;
362  tag_send_z = mymember_ * members_ + p + (tag + 1) * members_ * members_;
363  oops::mpi::send(comm, vvv, p, tag_send_v);
364  oops::mpi::send(comm, zzz, p, tag_send_z);
365  }
366 
367  // Normalization (TODO: needs better writing)
368  beta_loc(mymember_) = sqrt(std::max(dot_product(vvv, zzz), 1e-15));
369 
370  vvv *= (1 / beta_loc(mymember_));
371  zzz *= (1 / beta_loc(mymember_));
372 
373  oops::mpi::allGather(comm, beta_loc, beta_mat);
374 
375  for (int ii = 0; ii < members_; ++ii) {
376  for (int jj = ii + 1; jj < members_; ++jj) {
377  beta_mat(ii, jj) = beta_mat(ii, jj) * beta_mat(ii, ii);
378  }
379  }
380 
381  tag += 2;
382 }
383 
384 // -----------------------------------------------------------------------------------------------
385 
386 template<typename MODEL, typename OBS>
388  const HtRinvH_ & HtRinvH, int & tag,
389  const eckit::mpi::Comm & comm, CtrlInc_ & z_other) {
390 // send z_loc to task 0, process HtRinvH_0 * z_loc and send the result back to original task
391  int tag_send_w;
392  int tag_rcv_w;
393  int tag_send_z;
394  int tag_rcv_z;
395  int dest = 0;
396 
397  if (mymember_ == 0) {
398  for (int ii = 1; ii < members_; ++ii) {
399  tag_rcv_z = members_ * ii + tag * members_ * members_;
400  oops::mpi::receive(comm, z_other, ii, tag_rcv_z);
401  HtRinvH.multiply(z_other, w_out);
402  tag_send_w = members_ * ii + (tag + 1) * members_ * members_;
403  oops::mpi::send(comm, w_out, ii, tag_send_w); // send to task ii
404  }
405  HtRinvH.multiply(z_loc, w_out);
406  } else {
407  tag_send_z = mymember_ * members_ + tag * members_ * members_;
408  oops::mpi::send(comm, z_loc, dest, tag_send_z); // send to task 0
409  tag_rcv_w = mymember_ * members_ + (tag + 1) * members_ * members_;
410  oops::mpi::receive(comm, w_out, dest, tag_rcv_w);
411  }
412  tag += 2;
413 }
414 
415 // -----------------------------------------------------------------------------------------------
416 
417 } // namespace oops
418 
419 #endif // OOPS_ASSIMILATION_DRPBLOCKLANCZOSMINIMIZER_H_
The matrix.
Definition: BMatrix.h:27
void multiply(const CtrlInc_ &x, CtrlInc_ &bx) const
Definition: BMatrix.h:33
void zero()
Linear algebra operators.
void axpy(const double, const ControlIncrement &)
Cost Function.
Definition: CostFunction.h:53
DR (Derber and Rosati) Minimizers.
Definition: DRMinimizer.h:46
void mqrgs(CtrlInc_ &, CtrlInc_ &, eigenmat_ &, const CtrlInc_ &, int &, const eckit::mpi::Comm &, CtrlInc_ &, CtrlInc_ &)
void get_proj(const CtrlInc_ &, const CtrlInc_ &, eigenmat_ &, int &, const eckit::mpi::Comm &, CtrlInc_ &)
ControlIncrement< MODEL, OBS > CtrlInc_
void apply_proj(CtrlInc_ &, const CtrlInc_ &, const eigenmat_ &, int &, const eckit::mpi::Comm &, CtrlInc_ &)
double solve(CtrlInc_ &, CtrlInc_ &, CtrlInc_ &, const Bmat_ &, const HtRinvH_ &, const double, const double, const int, const double) override
void HtRinvH0(const CtrlInc_ &, CtrlInc_ &, const HtRinvH_ &, int &, const eckit::mpi::Comm &, CtrlInc_ &)
const std::string classname() const override
eckit::LocalConfiguration diagConf_
HtRinvHMatrix< MODEL, OBS > HtRinvH_
CostFunction< MODEL, OBS > CostFct_
DRPBlockLanczosMinimizer(const eckit::Configuration &, const CostFct_ &)
void multiply(const CtrlInc_ &dx, CtrlInc_ &dz) const
Definition: HtRinvHMatrix.h:63
void allGather(const eckit::mpi::Comm &comm, const Eigen::VectorXd &sendbuf, Eigen::MatrixXd &recvbuf)
void send(const eckit::mpi::Comm &comm, const SERIALIZABLE &sendobj, const int dest, const int tag)
Extend eckit Comm for Serializable oops objects.
Definition: oops/mpi/mpi.h:44
const eckit::mpi::Comm & world()
Default communicator with all MPI tasks (ie MPI_COMM_WORLD)
Definition: oops/mpi/mpi.cc:84
void receive(const eckit::mpi::Comm &comm, SERIALIZABLE &recvobj, const int source, const int tag)
Definition: oops/mpi/mpi.h:55
The namespace for the main oops code.
void writeKrylovBasis(const eckit::Configuration &config, const ControlIncrement< MODEL, OBS > &dx, const int &loop)
void printQuadraticCostFunction(int iteration, const double &costJ, const double &costJb, const double &costJoJc)
void blockTriDiagSolve(const std::vector< Eigen::MatrixXd > &alphas, const std::vector< Eigen::MatrixXd > &betas, const Eigen::MatrixXd &beta0, Eigen::MatrixXd &ss, bool &complexValues, const int members)
void printNormReduction(int iteration, const double &grad, const double &norm)