Actual source code: dibtdc.c

slepc-3.7.4 2017-05-17
Report Typos and Errors
  1: /*
  2:    BDC - Block-divide and conquer (see description in README file).

  4:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  5:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  6:    Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain

  8:    This file is part of SLEPc.

 10:    SLEPc is free software: you can redistribute it and/or modify it under  the
 11:    terms of version 3 of the GNU Lesser General Public License as published by
 12:    the Free Software Foundation.

 14:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 15:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 16:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 17:    more details.

 19:    You  should have received a copy of the GNU Lesser General  Public  License
 20:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 21:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 22: */

 24: #include <slepc/private/dsimpl.h>
 25: #include <slepcblaslapack.h>

 27: static PetscErrorCode cutlr_(PetscBLASInt start,PetscBLASInt n,PetscBLASInt blkct, 
 28:         PetscBLASInt *bsizes,PetscBLASInt *ranks,PetscBLASInt *cut,
 29:         PetscBLASInt *lsum,PetscBLASInt *lblks,PetscBLASInt *info)
 30: {
 31: /*  -- Routine written in LAPACK Version 3.0 style -- */
 32: /* *************************************************** */
 33: /*     Written by */
 34: /*     Michael Moldaschl and Wilfried Gansterer */
 35: /*     University of Vienna */
 36: /*     last modification: March 16, 2014 */

 38: /*     Small adaptations of original code written by */
 39: /*     Wilfried Gansterer and Bob Ward, */
 40: /*     Department of Computer Science, University of Tennessee */
 41: /*     see http://dx.doi.org/10.1137/S1064827501399432 */
 42: /* *************************************************** */

 44: /*  Purpose */
 45: /*  ======= */

 47: /*  CUTLR computes the optimal cut in a sequence of BLKCT neighboring */
 48: /*  blocks whose sizes are given by the array BSIZES. */
 49: /*  The sum of all block sizes in the sequence considered is given by N. */
 50: /*  The cut is optimal in the sense that the difference of the sizes of */
 51: /*  the resulting two halves is minimum over all cuts with minimum ranks */
 52: /*  between blocks of the sequence considered. */

 54: /*  Arguments */
 55: /*  ========= */

 57: /*  START  (input) INTEGER */
 58: /*         In the original array KSIZES of the calling routine DIBTDC, */
 59: /*         the position where the sequence considered in this routine starts. */
 60: /*         START >= 1. */

 62: /*  N      (input) INTEGER */
 63: /*         The sum of all the block sizes of the sequence to be cut = */
 64: /*         = sum_{i=1}^{BLKCT} BSIZES( I ). */
 65: /*         N >= 3. */

 67: /*  BLKCT  (input) INTEGER */
 68: /*         The number of blocks in the sequence to be cut. */
 69: /*         BLKCT >= 3. */

 71: /*  BSIZES (input) INTEGER array, dimension (BLKCT) */
 72: /*         The dimensions of the (quadratic) blocks of the sequence to be */
 73: /*         cut. sum_{i=1}^{BLKCT} BSIZES( I ) = N. */

 75: /*  RANKS  (input) INTEGER array, dimension (BLKCT-1) */
 76: /*         The ranks determining the approximations of the off-diagonal */
 77: /*         blocks in the sequence considered. */

 79: /*  CUT    (output) INTEGER */
 80: /*         After the optimum cut has been determined, the position (in the */
 81: /*         overall problem as worked on in DIBTDC !) of the last block in */
 82: /*         the first half of the sequence to be cut. */
 83: /*         START <= CUT <= START+BLKCT-2. */

 85: /*  LSUM   (output) INTEGER */
 86: /*         After the optimum cut has been determined, the sum of the */
 87: /*         block sizes in the first half of the sequence to be cut. */
 88: /*         LSUM < N. */

 90: /*  LBLKS  (output) INTEGER */
 91: /*         After the optimum cut has been determined, the number of the */
 92: /*         blocks in the first half of the sequence to be cut. */
 93: /*         1 <= LBLKS < BLKCT. */

 95: /*  INFO   (output) INTEGER */
 96: /*          = 0:  successful exit. */
 97: /*          < 0:  illegal arguments. */
 98: /*                if INFO = -i, the i-th (input) argument had an illegal */
 99: /*                value. */
100: /*          > 0:  illegal results. */
101: /*                if INFO = i, the i-th (output) argument had an illegal */
102: /*                value. */

104: /*  Further Details */
105: /*  =============== */

107: /*  Based on code written by */
108: /*     Wilfried Gansterer and Bob Ward, */
109: /*     Department of Computer Science, University of Tennessee */

111: /*  ===================================================================== */

113:   PetscBLASInt i, ksk, kchk, ksum, nhalf, deviat, mindev, minrnk, tmpsum;

116:   *info = 0;
117:   *lblks = 1;
118:   *lsum = 1;
119:   *cut = start;

121:   if (start < 1) {
122:     *info = -1;
123:   } else if (n < 3) {
124:     *info = -2;
125:   } else if (blkct < 3) {
126:     *info = -3;
127:   }
128:   if (*info == 0) {
129:     ksum = 0;
130:     kchk = 0;
131:     for (i = 0; i < blkct; ++i) {
132:       ksk = bsizes[i];
133:       ksum += ksk;
134:       if (ksk < 1) kchk = 1;
135:     }
136:     if (ksum != n || kchk == 1) *info = -4;
137:   }
138:   if (*info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %d in CUTLR",-(*info));

140:   /* determine smallest rank in the range considered */

142:   minrnk = n;
143:   for (i = 0; i < blkct-1; ++i) {
144:     if (ranks[i] < minrnk) minrnk = ranks[i];
145:   }

147:   /* determine best cut among those with smallest rank */

149:   nhalf = n / 2;
150:   tmpsum = 0;
151:   mindev = n;
152:   for (i = 0; i < blkct; ++i) {
153:     tmpsum += bsizes[i];
154:     if (ranks[i] == minrnk) {

156:       /* determine deviation from "optimal" cut NHALF */

158:       deviat = tmpsum - nhalf;
159:       if (deviat<0) deviat = -deviat;

161:       /* compare to best deviation so far */

163:       if (deviat < mindev) {
164:         mindev = deviat;
165:         *cut = start + i;
166:         *lblks = i + 1;
167:         *lsum = tmpsum;
168:       }
169:     }
170:   }

172:   if (*cut < start || *cut >= start + blkct - 1) {
173:     *info = 6;
174:   } else if (*lsum < 1 || *lsum >= n) {
175:     *info = 7;
176:   } else if (*lblks < 1 || *lblks >= blkct) {
177:     *info = 8;
178:   }
179:   return(0);
180: }


183: PetscErrorCode BDC_dibtdc_(const char *jobz,PetscBLASInt n,PetscBLASInt nblks, 
184:         PetscBLASInt *ksizes,PetscReal *d,PetscBLASInt l1d,PetscBLASInt l2d, 
185:         PetscReal *e,PetscBLASInt *rank,PetscBLASInt l1e,PetscBLASInt l2e,
186:         PetscReal tol,PetscReal *ev,PetscReal *z,PetscBLASInt ldz,PetscReal *work, 
187:         PetscBLASInt lwork,PetscBLASInt *iwork,PetscBLASInt liwork,
188:         PetscBLASInt *info,PetscBLASInt jobz_len)
189: {
190: /*  -- Routine written in LAPACK Version 3.0 style -- */
191: /* *************************************************** */
192: /*     Written by */
193: /*     Michael Moldaschl and Wilfried Gansterer */
194: /*     University of Vienna */
195: /*     last modification: March 16, 2014 */

197: /*     Small adaptations of original code written by */
198: /*     Wilfried Gansterer and Bob Ward, */
199: /*     Department of Computer Science, University of Tennessee */
200: /*     see http://dx.doi.org/10.1137/S1064827501399432 */
201: /* *************************************************** */

203: /*  Purpose */
204: /*  ======= */

206: /*  DIBTDC computes all eigenvalues and corresponding eigenvectors of a */
207: /*  symmetric irreducible block tridiagonal matrix with rank RANK matrices */
208: /*  as the subdiagonal blocks using a block divide and conquer method. */

210: /*  Arguments */
211: /*  ========= */

213: /*  JOBZ    (input) CHARACTER*1 */
214: /*          = 'N':  Compute eigenvalues only (not implemented); */
215: /*          = 'D':  Compute eigenvalues and eigenvectors. */
216: /*                  Eigenvectors are accumulated in the */
217: /*                  divide-and-conquer process. */

219: /*  N      (input) INTEGER */
220: /*         The dimension of the symmetric irreducible block tridiagonal */
221: /*         matrix.  N >= 2. */

223: /*  NBLKS  (input) INTEGER, 2 <= NBLKS <= N */
224: /*         The number of diagonal blocks in the matrix. */

226: /*  KSIZES (input) INTEGER array, dimension (NBLKS) */
227: /*         The dimension of the square diagonal blocks from top left */
228: /*         to bottom right.  KSIZES(I) >= 1 for all I, and the sum of */
229: /*         KSIZES(I) for I = 1 to NBLKS has to be equal to N. */

231: /*  D      (input) DOUBLE PRECISION array, dimension (L1D,L2D,NBLKS) */
232: /*         The lower triangular elements of the symmetric diagonal */
233: /*         blocks of the block tridiagonal matrix.  Elements of the top */
234: /*         left diagonal block, which is of dimension KSIZES(1), are */
235: /*         contained in D(*,*,1); the elements of the next diagonal */
236: /*         block, which is of dimension KSIZES(2), are contained in */
237: /*         D(*,*,2); etc. */

239: /*  L1D    (input) INTEGER */
240: /*         The leading dimension of the array D.  L1D >= max(3,KMAX), */
241: /*         where KMAX is the dimension of the largest diagonal block. */

243: /*  L2D    (input) INTEGER */
244: /*         The second dimension of the array D.  L2D >= max(3,KMAX), */
245: /*         where KMAX is as stated in L1D above. */

247: /*  E      (input) DOUBLE PRECISION array, dimension (L1E,L2E,NBLKS-1) */
248: /*         Contains the elements of the scalars (singular values) and */
249: /*         vectors (singular vectors) defining the rank RANK subdiagonal */
250: /*         blocks of the matrix. */
251: /*         E(1:RANK(K),RANK(K)+1,K) holds the RANK(K) scalars, */
252: /*         E(:,1:RANK(K),K) holds the RANK(K) column vectors, and */
253: /*         E(:,RANK(K)+2:2*RANK(K)+1,K) holds the row vectors for the K-th */
254: /*         subdiagonal block. */

256: /*  RANK   (input) INTEGER array, dimension (NBLKS-1). */
257: /*         The ranks of all the subdiagonal blocks contained in the array E. */
258: /*         RANK( K ) <= MIN( KSIZES( K ), KSIZES( K+1 ) ) */

260: /*  L1E    (input) INTEGER */
261: /*         The leading dimension of the array E.  L1E >= max(3,2*KMAX+1), */
262: /*         where KMAX is as stated in L1D above. */

264: /*  L2E    (input) INTEGER */
265: /*         The second dimension of the array E.  L2E >= max(3,2*KMAX+1), */
266: /*         where KMAX is as stated in L1D above. */

268: /*  TOL    (input) DOUBLE PRECISION, TOL <= 1.0D-1 */
269: /*         User specified deflation tolerance for the routine DMERG2. */
270: /*         If ( 1.0D-1 >= TOL >= 20*EPS ) then TOL is used as */
271: /*         the deflation tolerance in DSRTDF. */
272: /*         If ( TOL < 20*EPS ) then the standard deflation tolerance from */
273: /*         LAPACK is used as the deflation tolerance in DSRTDF. */

275: /*  EV     (output) DOUBLE PRECISION array, dimension (N) */
276: /*         If INFO = 0, then EV contains the eigenvalues of the */
277: /*         symmetric block tridiagonal matrix in ascending order. */

279: /*  Z      (input/output) DOUBLE PRECISION array, dimension (LDZ, N) */
280: /*         On entry, Z will be the identity matrix. */
281: /*         On exit, Z contains the eigenvectors of the block tridiagonal */
282: /*         matrix. */

284: /*  LDZ    (input) INTEGER */
285: /*         The leading dimension of the array Z.  LDZ >= max(1,N). */

287: /*  WORK   (workspace) DOUBLE PRECISION array, dimension (LWORK) */

289: /*  LWORK   (input) INTEGER */
290: /*          The dimension of the array WORK. */
291: /*          In order to guarantee correct results in all cases, */
292: /*          LWORK must be at least ( 2*N**2 + 3*N ). In many cases, */
293: /*          less workspace is required. The absolute minimum required is */
294: /*          ( N**2 + 3*N ). */
295: /*          If the workspace provided is not sufficient, the routine will */
296: /*          return a corresponding error code and report how much workspace */
297: /*          was missing (see INFO). */

299: /*  IWORK  (workspace) INTEGER array, dimension (LIWORK) */

301: /*  LIWORK  (input) INTEGER */
302: /*          The dimension of the array IWORK. */
303: /*          LIWORK must be at least ( 5*N + 3 + 4*NBLKS - 4 ): */
304: /*                 5*KMAX+3 for DSYEVD, 5*N for ????, */
305: /*                 4*NBLKS-4 for the preprocessing (merging order) */
306: /*          Summarizing, the minimum integer workspace needed is */
307: /*          MAX( 5*N, 5*KMAX + 3 ) + 4*NBLKS - 4 */

309: /*  INFO   (output) INTEGER */
310: /*          = 0:  successful exit. */
311: /*          < 0, > -99: illegal arguments. */
312: /*                if INFO = -i, the i-th argument had an illegal value. */
313: /*          = -99: error in the preprocessing (call of CUTLR). */
314: /*          < -200: not enough workspace. Space for ABS(INFO + 200) */
315: /*                numbers is required in addition to the workspace provided, */
316: /*                otherwise some eigenvectors will be incorrect. */
317: /*          > 0:  The algorithm failed to compute an eigenvalue while */
318: /*                working on the submatrix lying in rows and columns */
319: /*                INFO/(N+1) through mod(INFO,N+1). */

321: /*  Further Details */
322: /*  =============== */

324: /*  Based on code written by */
325: /*     Wilfried Gansterer and Bob Ward, */
326: /*     Department of Computer Science, University of Tennessee */

328: /*  This routine is comparable to Dlaed0.f from LAPACK. */

330: /*  ===================================================================== */

332: #if defined(SLEPC_MISSING_LAPACK_LACPY) || defined(PETSC_MISSING_LAPACK_SYEV)
334:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LACPY/SYEV - Lapack routine is unavailable");
335: #else
336:   PetscBLASInt   i, j, k, np, rp1, ksk, one=1;
337:   PetscBLASInt   cut, mat1, kchk, kbrk, blks, kmax, icut, size, ksum, lsum;
338:   PetscBLASInt   lblks, rblks, isize, lwmin, ilsum;
339:   PetscBLASInt   start, vstrt, istck1, istck2, istck3, merged;
340:   PetscBLASInt   liwmin, matsiz, startp, istrtp;
341:   PetscReal      rho, done=1.0, dmone=-1.0;

345:   *info = 0;

347:   if (*(unsigned char *)jobz != 'N' && *(unsigned char *)jobz != 'D') {
348:     *info = -1;
349:   } else if (n < 2) {
350:     *info = -2;
351:   } else if (nblks < 2 || nblks > n) {
352:     *info = -3;
353:   }
354:   if (*info == 0) {
355:     ksum = 0;
356:     kmax = 0;
357:     kchk = 0;
358:     for (k = 0; k < nblks; ++k) {
359:       ksk = ksizes[k];
360:       ksum += ksk;
361:       if (ksk > kmax) kmax = ksk;
362:       if (ksk < 1) kchk = 1;
363:     }
364:     lwmin = n*n + n * 3;
365:     liwmin = PetscMax(n * 5,kmax * 5 + 3) + 4*nblks - 4;
366:     if (ksum != n || kchk == 1) {
367:       *info = -4;
368:     } else if (l1d < PetscMax(3,kmax)) {
369:       *info = -6;
370:     } else if (l2d < PetscMax(3,kmax)) {
371:       *info = -7;
372:     } else if (l1e < PetscMax(3,2*kmax + 1)) {
373:       *info = -10;
374:     } else if (l2e < PetscMax(3,2*kmax + 1)) {
375:       *info = -11;
376:     } else if (tol > .1) {
377:       *info = -12;
378:     } else if (ldz < PetscMax(1,n)) {
379:       *info = -15;
380:     } else if (lwork < lwmin) {
381:       *info = -17;
382:     } else if (liwork < liwmin) {
383:       *info = -19;
384:     }
385:   }
386:   if (*info == 0) {
387:     for (k = 0; k < nblks-1; ++k) {
388:       if (rank[k] > PetscMin(ksizes[k],ksizes[k+1]) || rank[k] < 1) *info = -9;
389:     }
390:   }

392:   if (*info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %d in DIBTDC",-(*info));

394: /* **************************************************************************** */

396:   /* ...Preprocessing..................................................... */
397:   /*    Determine the optimal order for merging the subblocks and how much */
398:   /*    workspace will be needed for the merging (determined by the last */
399:   /*    merge). Cutpoints for the merging operations are determined and stored */
400:   /*    in reverse chronological order (starting with the final merging */
401:   /*    operation). */
402:   
403:   /*    integer workspace requirements for the preprocessing: */
404:   /*         4*(NBLKS-1) for merging history */
405:   /*         at most 3*(NBLKS-1) for stack */

407:   start = 1;
408:   size = n;
409:   blks = nblks;
410:   merged = 0;
411:   k = 0;

413:   /* integer workspace used for the stack is not needed any more after the */
414:   /* preprocessing and therefore can use part of the 5*N */
415:   /* integer workspace needed later on in the code */

417:   istck1 = 0;
418:   istck2 = istck1 + nblks;
419:   istck3 = istck2 + nblks;

421:   /* integer workspace used for storing the order of merges starts AFTER */
422:   /* the integer workspace 5*N+3 which is needed later on in the code */
423:   /* (5*KMAX+3 for DSYEVD, 4*N in DMERG2) */

425:   istrtp = n * 5 + 4;
426:   icut = istrtp + nblks - 1;
427:   isize = icut + nblks - 1;
428:   ilsum = isize + nblks - 1;

430: L200:

432:   if (nblks >= 3) {

434:     /* Determine the cut point. Note that in the routine CUTLR it is */
435:     /* chosen such that it yields the best balanced merging operation */
436:     /* among all the rank modifications with minimum rank. */

438:     cutlr_(start, size, blks, &ksizes[start-1], &rank[start-1], &cut, 
439:                   &lsum, &lblks, info);
440:     if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dibtdc: Error in cutlr, info = %d",*info);

442:   } else {
443:     cut = 1;
444:     lsum = ksizes[0];
445:     lblks = 1;
446:   }

448:   ++merged;
449:   startp = 0;
450:   for (i = 0; i < start-1; ++i) startp += ksizes[i];
451:   iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
452:   iwork[icut + (nblks - 1) - merged-1] = cut;
453:   iwork[isize + (nblks - 1) - merged-1] = size;
454:   iwork[ilsum + (nblks - 1) - merged-1] = lsum;

456:   if (lblks == 2) {

458:     /* one merge in left branch, left branch done */
459:     ++merged;
460:     iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
461:     iwork[icut + (nblks - 1) - merged-1] = start;
462:     iwork[isize + (nblks - 1) - merged-1] = lsum;
463:     iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start-1];
464:   }

466:   if (lblks == 1 || lblks == 2) {

468:     /* left branch done, continue on the right side */
469:     start += lblks;
470:     size -= lsum;
471:     blks -= lblks;

473:     if (blks <= 0) SETERRQ1(PETSC_COMM_SELF,1,"dibtdc: Error in preprocessing, blks = %d",blks);

475:     if (blks == 2) {

477:       /* one merge in right branch, right branch done */
478:       ++merged;
479:       startp += lsum;
480:       iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
481:       iwork[icut + (nblks - 1) - merged-1] = start;
482:       iwork[isize + (nblks - 1) - merged-1] = size;
483:       iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start-1];
484:     }

486:     if (blks == 1 || blks == 2) {

488:       /* get the next subproblem from the stack or finished */

490:       if (k >= 1) {

492:         /* something left on the stack */
493:         start = iwork[istck1 + k-1];
494:         size = iwork[istck2 + k-1];
495:         blks = iwork[istck3 + k-1];
496:         --k;
497:         goto L200;
498:       } else {

500:         /* nothing left on the stack */
501:         if (merged != nblks-1) SETERRQ(PETSC_COMM_SELF,1,"ERROR in preprocessing - not enough merges performed");

503:         /* exit preprocessing */

505:       }
506:     } else {

508:       /* BLKS.GE.3, and therefore analyze the right side */

510:       goto L200;
511:     }
512:   } else {

514:     /* LBLKS.GE.3, and therefore check the right side and */
515:     /* put it on the stack if required */

517:     rblks = blks - lblks;
518:     if (rblks >= 3) {
519:       ++k;
520:       iwork[istck1 + k-1] = cut + 1;
521:       iwork[istck2 + k-1] = size - lsum;
522:       iwork[istck3 + k-1] = rblks;
523:     } else if (rblks == 2) {

525:       /* one merge in right branch, right branch done */
526:       /* (note that nothing needs to be done if RBLKS.EQ.1 !) */

528:       ++merged;
529:       startp += lsum;
530:       iwork[istrtp + (nblks - 1) - merged-1] = startp + 1;
531:       iwork[icut + (nblks - 1) - merged-1] = start + lblks;
532:       iwork[isize + (nblks - 1) - merged-1] = size - lsum;
533:       iwork[ilsum + (nblks - 1) - merged-1] = ksizes[start + lblks-1];
534:     }
535:     if (rblks <= 0) SETERRQ1(PETSC_COMM_SELF,1,"dibtdc: ERROR in preprocessing - rblks = %d",rblks);

537:     /* continue on the left side */

539:     size = lsum;
540:     blks = lblks;
541:     goto L200;
542:   }

544:   /*  SIZE = IWORK( ISIZE+NBLKS-2 ) */
545:   /*  MAT1 = IWORK( ILSUM+NBLKS-2 ) */
546:   
547:   /* Note: after the dimensions SIZE and MAT1 of the last merging */
548:   /* operation have been determined, an upper bound for the workspace */
549:   /* requirements which is independent of how much deflation occurs in */
550:   /* the last merging operation could be determined as follows */
551:   /* (based on (3.15) and (3.19) from UT-CS-00-447): */
552:   
553:   /*  IF( MAT1.LE.N/2 ) THEN */
554:   /*     WSPREQ = 3*N + 3/2*( SIZE-MAT1 )**2 + N*N/2 + MAT1*MAT1 */
555:   /*  ELSE */
556:   /*     WSPREQ = 3*N + 3/2*MAT1*MAT1 + N*N/2 + ( SIZE-MAT1 )**2 */
557:   /*  END IF */
558:   
559:   /*  IF( LWORK-WSPREQ.LT.0 )THEN */
560:   /*          not enough work space provided */
561:   /*     INFO = -200 - ( WSPREQ-LWORK ) */
562:   /*     RETURN */
563:   /*  END IF */
564:   /*  However, this is not really useful, since the actual check whether */
565:   /*  enough workspace is provided happens in DMERG2.f ! */

567: /* ************************************************************************* */

569:   /* ...Solve subproblems................................... */
570:   
571:   /* Divide the matrix into NBLKS submatrices using rank-r */
572:   /* modifications (cuts) and solve for their eigenvalues and */
573:   /* eigenvectors. Initialize index array to sort eigenvalues. */
574:   
575:   /* first block: ...................................... */
576:   
577:   /*    correction for block 1: D1 - V1 \Sigma1 V1^T */

579:   ksk = ksizes[0];
580:   rp1 = rank[0];

582:   /* initialize the proper part of Z with the diagonal block D1 */
583:   /* (the correction will be made in Z and then the call of DSYEVD will */
584:   /*  overwrite it with the eigenvectors) */

586:   PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, d, &l1d, z, &ldz));

588:   /* copy D1 into WORK (in order to be able to restore it afterwards) */

590:   PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, d, &l1d, work, &ksk));

592:   /* copy V1 into the first RANK(1) columns of D1 and then */
593:   /* multiply with \Sigma1 */

595:   for (i = 0; i < rank[0]; ++i) {
596:     PetscStackCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(rp1 + i+1)*l1e], &one, &d[i*l1d], &one));
597:     PetscStackCallBLAS("BLASscal",BLASscal_(&ksk, &e[i + rp1*l1e], &d[i*l1d], &one));
598:   }

600:   /* multiply the first RANK( 1 ) columns of D1 with V1^T and */
601:   /* subtract the result from the proper part of Z (previously */
602:   /* initialized with D1) */

604:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, rank, &dmone,
605:           d, &l1d, &e[(rank[0]+1)*l1e], &l1e, &done, z, &ldz));

607:   /* restore the original D1 from WORK */

609:   PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, work, &ksk, d, &l1d));

611:   /* eigenanalysis of block 1 (using DSYEVD) */

613:   PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, z, &ldz, ev, work, &lwork, info));
614:   if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dibtdc: Error in DSYEVD for block 1, info = %d",*info);

616:   /* EV( 1: ) contains the eigenvalues in ascending order */
617:   /* (they are returned this way by DSYEVD) */

619:   for (i = 0; i < ksk; ++i) iwork[i] = i+1;

621:     /* intermediate blocks: .............................. */

623:     np = ksk;

625:     /* remaining number of blocks */

627:     if (nblks > 2) {
628:       for (k = 1; k < nblks-1; ++k) {

630:       /* correction for block K: */
631:       /* Dk - U(k-1) \Sigma(k-1) U(k-1)^T - Vk \Sigmak Vk^T */

633:       ksk = ksizes[k];
634:       rp1 = rank[k];

636:       /* initialize the proper part of Z with the diagonal block Dk */
637:       /* (the correction will be made in Z and then the call of DSYEVD will */
638:       /*  overwrite it with the eigenvectors) */

640:       PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, &d[k*l1d*l2d], &l1d, &z[np+np*ldz], &ldz));

642:       /* copy Dk into WORK (in order to be able to restore it afterwards) */

644:       PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, &d[k*l1d*l2d], &l1d, work, &ksk));

646:       /* copy U(K-1) into the first RANK(K-1) columns of Dk and then */
647:       /* multiply with \Sigma(K-1) */

649:       for (i = 0; i < rank[k-1]; ++i) {
650:         PetscStackCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(i+(k-1)*l2e)*l1e], &one, &d[(i+k*l2d)*l1d], &one));
651:         PetscStackCallBLAS("BLASscal",BLASscal_(&ksk, &e[i+(rank[k-1]+(k-1)*l2e)*l1e], &d[(i+k*l2d)*l1d], &one));
652:       }

654:       /* multiply the first RANK(K-1) columns of Dk with U(k-1)^T and */
655:       /* subtract the result from the proper part of Z (previously */
656:       /* initialized with Dk) */

658:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[k-1],
659:                     &dmone, &d[k*l1d*l2d],
660:                     &l1d, &e[(k-1)*l1e*l2e], &l1e, &done, &z[np+np*ldz], &ldz));

662:       /* copy Vk into the first RANK(K) columns of Dk and then */
663:       /* multiply with \Sigmak */

665:       for (i = 0; i < rank[k]; ++i) {
666:         PetscStackCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(rp1+i+1 + k*l2e)*l1e], &one, &d[(i + k*l2d)*l1d], &one));
667:         PetscStackCallBLAS("BLASscal",BLASscal_(&ksk, &e[i + (rp1 + k*l2e)*l1e], &d[(i + k*l2d)*l1d], &one));
668:       }

670:       /* multiply the first RANK(K) columns of Dk with Vk^T and */
671:       /* subtract the result from the proper part of Z (previously */
672:       /* updated with [- U(k-1) \Sigma(k-1) U(k-1)^T] ) */

674:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[k],
675:                     &dmone, &d[k*l1d*l2d], &l1d,
676:                     &e[(rank[k]+1 + k*l2e)*l1e], &l1e, &done, &z[np+np*ldz], &ldz));

678:       /* restore the original Dk from WORK */

680:       PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, work, &ksk, &d[k*l1d*l2d], &l1d));

682:       /* eigenanalysis of block K (using dsyevd) */

684:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, &z[np+np*ldz],
685:                      &ldz, &ev[np], work, &lwork, info));
686:       if (*info) SETERRQ2(PETSC_COMM_SELF,1,"dibtdc: Error in DSYEVD for block %d, info = %d",k,*info);

688:       /* EV( NPP1: ) contains the eigenvalues in ascending order */
689:       /* (they are returned this way by DSYEVD) */

691:       for (i = 0; i < ksk; ++i) iwork[np + i] = i+1;

693:       /* update NP */
694:       np += ksk;
695:     }
696:   }

698:   /* last block: ....................................... */
699:   
700:   /*    correction for block NBLKS: */
701:   /*    D(nblks) - U(nblks-1) \Sigma(nblks-1) U(nblks-1)^T */

703:   ksk = ksizes[nblks-1];

705:   /* initialize the proper part of Z with the diagonal block D(nblks) */
706:   /* (the correction will be made in Z and then the call of DSYEVD will */
707:   /* overwrite it with the eigenvectors) */

709:   PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, &d[(nblks-1)*l1d*l2d], &l1d, &z[np+np*ldz], &ldz));

711:   /* copy D(nblks) into WORK (in order to be able to restore it afterwards) */

713:   PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, &d[(nblks-1)*l1d*l2d], &l1d, work, &ksk));

715:   /* copy U(nblks-1) into the first RANK(nblks-1) columns of D(nblks) and then */
716:   /* multiply with \Sigma(nblks-1) */

718:   for (i = 0; i < rank[nblks-2]; ++i) {
719:     PetscStackCallBLAS("BLAScopy",BLAScopy_(&ksk, &e[(i + (nblks-2)*l2e)*l1e],
720:               &one, &d[(i + (nblks-1)*l2d)*l1d], &one));
721:     PetscStackCallBLAS("BLASscal",BLASscal_(&ksk,
722:               &e[i + (rank[nblks-2] + (nblks-2)*l2e)*l1e],
723:               &d[(i + (nblks-1)*l2d)*l1d], &one));
724:   }

726:   /* multiply the first RANK(nblks-1) columns of D(nblks) with U(nblks-1)^T */
727:   /* and subtract the result from the proper part of Z (previously */
728:   /* initialized with D(nblks) ) */

730:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N", "T", &ksk, &ksk, &rank[nblks - 2],
731:           &dmone, &d[(nblks-1)*l1d*l2d], &l1d,
732:           &e[(nblks-2)*l1e*l2e], &l1e, &done, &z[np+np*ldz], &ldz));

734:   /* restore the original D(nblks) from WORK */

736:   PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L", &ksk, &ksk, work, &ksk, &d[(nblks-1)*l1d*l2d], &l1d));

738:   /* eigenanalysis of block NBLKS (using dsyevd) */

740:   PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V", "L", &ksk, &z[np+np*ldz], &ldz, &ev[np], work, &lwork, info));
741:   if (*info) SETERRQ2(PETSC_COMM_SELF,1,"dibtdc: Error in DSYEVD for block %d, info = %d",nblks,*info);

743:   /* EV( NPP1: ) contains the eigenvalues in ascending order */
744:   /* (they are returned this way by DSYEVD) */

746:   for (i = 0; i < ksk; ++i) iwork[np + i] = i+1;

748:   /* note that from here on the entire workspace is available again */


751:   /* Perform all the merging operations. */

753:   vstrt = 0;
754:   for (i = 0; i < nblks-1; ++i) {

756:     /* MATSIZ = total size of the current rank RANK modification problem */

758:     matsiz = iwork[isize + i - 1];
759:     np = iwork[istrtp + i - 1];
760:     kbrk = iwork[icut + i - 1];
761:     mat1 = iwork[ilsum + i - 1];
762:     vstrt += np;

764:     for (j = 0; j < rank[kbrk-1]; ++j) {

766:       /* NOTE: The parameter RHO in DMERG2 is modified in DSRTDF */
767:       /*       (multiplied by 2) ! In order not to change the */
768:       /*       singular value stored in E( :, RANK( KBRK )+1, KBRK ), */
769:       /*       we do not pass on this variable as an argument to DMERG2, */
770:       /*       but we assign a separate variable RHO here which is passed */
771:       /*       on to DMERG2. */
772:       /*       Alternative solution in F90: */
773:       /*       pass E( :,RANK( KBRK )+1,KBRK ) to an INTENT( IN ) parameter */
774:       /*       in DMERG2. */

776:       rho = e[j + (rank[kbrk-1] + (kbrk-1)*l2e)*l1e];

778:       /* eigenvectors are accumulated ( JOBZ.EQ.'D' ) */

780:       BDC_dmerg2_(jobz, j+1, matsiz, &ev[np-1], &z[np-1+(np-1)*ldz], 
781:                     ldz, &iwork[np-1], &rho, &e[(j + (kbrk-1)*l2e)*l1e],
782:                     ksizes[kbrk], &e[(rank[kbrk-1]+j+1 + (kbrk-1)*l2e)*l1e],
783:                     ksizes[kbrk-1], mat1, work, lwork, &iwork[n], tol, info, 1);
784:                     
785:       if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dibtdc: Error in dmerg2, info = %d",*info);
786:     }

788:     /* at this point all RANK( KBRK ) rank-one modifications corresponding */
789:     /* to the current off-diagonal block are finished. */
790:     /* Move on to the next off-diagonal block. */

792:   }

794:   /* Re-merge the eigenvalues/vectors which were deflated at the final */
795:   /* merging step by sorting all eigenvalues and eigenvectors according */
796:   /* to the permutation stored in IWORK. */
797:   
798:   /* copy eigenvalues and eigenvectors in ordered form into WORK */
799:   /* (eigenvalues into WORK( 1:N ), eigenvectors into WORK( N+1:N+1+N^2 ) ) */

801:   for (i = 0; i < n; ++i) {
802:     j = iwork[i];
803:     work[i] = ev[j-1];
804:     PetscStackCallBLAS("BLAScopy",BLAScopy_(&n, &z[(j-1)*ldz], &one, &work[n*(i+1)], &one));
805:   }

807:   /* copy ordered eigenvalues back from WORK( 1:N ) into EV */

809:   PetscStackCallBLAS("BLAScopy",BLAScopy_(&n, work, &one, ev, &one));

811:   /* copy ordered eigenvectors back from WORK( N+1:N+1+N^2 ) into Z */

813:   PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("A", &n, &n, &work[n], &n, z, &ldz));
814:   return(0);
815: #endif
816: }