1: /*
2: BDC - Block-divide and conquer (see description in README file).
4: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5: SLEPc - Scalable Library for Eigenvalue Problem Computations
6: Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain
8: This file is part of SLEPc.
10: SLEPc is free software: you can redistribute it and/or modify it under the
11: terms of version 3 of the GNU Lesser General Public License as published by
12: the Free Software Foundation.
14: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
15: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
17: more details.
19: You should have received a copy of the GNU Lesser General Public License
20: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
21: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
22: */
24: #include <slepc/private/dsimpl.h>
25: #include <slepcblaslapack.h>
27: PetscErrorCode BDC_dsrtdf_(PetscBLASInt *k,PetscBLASInt n,PetscBLASInt n1, 28: PetscReal *d,PetscReal *q,PetscBLASInt ldq,PetscBLASInt *indxq, 29: PetscReal *rho,PetscReal *z,PetscReal *dlamda,PetscReal *w, 30: PetscReal *q2,PetscBLASInt *indx,PetscBLASInt *indxc,PetscBLASInt *indxp, 31: PetscBLASInt *coltyp,PetscReal reltol,PetscBLASInt *dz,PetscBLASInt *de, 32: PetscBLASInt *info) 33: {
34: /* -- Routine written in LAPACK Version 3.0 style -- */
35: /* *************************************************** */
36: /* Written by */
37: /* Michael Moldaschl and Wilfried Gansterer */
38: /* University of Vienna */
39: /* last modification: March 16, 2014 */
41: /* Small adaptations of original code written by */
42: /* Wilfried Gansterer and Bob Ward, */
43: /* Department of Computer Science, University of Tennessee */
44: /* see http://dx.doi.org/10.1137/S1064827501399432 */
45: /* *************************************************** */
47: /* Purpose */
48: /* ======= */
50: /* DSRTDF merges the two sets of eigenvalues of a rank-one modified */
51: /* diagonal matrix D+ZZ^T together into a single sorted set. Then it tries */
52: /* to deflate the size of the problem. */
53: /* There are two ways in which deflation can occur: when two or more */
54: /* eigenvalues of D are close together or if there is a tiny entry in the */
55: /* Z vector. For each such occurrence the order of the related secular */
56: /* equation problem is reduced by one. */
58: /* Arguments */
59: /* ========= */
61: /* K (output) INTEGER */
62: /* The number of non-deflated eigenvalues, and the order of the */
63: /* related secular equation. 0 <= K <=N. */
65: /* N (input) INTEGER */
66: /* The dimension of the diagonal matrix. N >= 0. */
68: /* N1 (input) INTEGER */
69: /* The location of the last eigenvalue in the leading sub-matrix. */
70: /* min(1,N) <= N1 <= max(1,N). */
72: /* D (input/output) DOUBLE PRECISION array, dimension (N) */
73: /* On entry, D contains the eigenvalues of the two submatrices to */
74: /* be combined. */
75: /* On exit, D contains the trailing (N-K) updated eigenvalues */
76: /* (those which were deflated) sorted into increasing order. */
78: /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */
79: /* On entry, Q contains the eigenvectors of two submatrices in */
80: /* the two square blocks with corners at (1,1), (N1,N1) */
81: /* and (N1+1, N1+1), (N,N). */
82: /* On exit, Q contains the trailing (N-K) updated eigenvectors */
83: /* (those which were deflated) in its last N-K columns. */
85: /* LDQ (input) INTEGER */
86: /* The leading dimension of the array Q. LDQ >= max(1,N). */
88: /* INDXQ (input/output) INTEGER array, dimension (N) */
89: /* The permutation which separately sorts the two sub-problems */
90: /* in D into ascending order. Note that elements in the second */
91: /* half of this permutation must first have N1 added to their */
92: /* values. Destroyed on exit. */
94: /* RHO (input/output) DOUBLE PRECISION */
95: /* On entry, the off-diagonal element associated with the rank-1 */
96: /* cut which originally split the two submatrices which are now */
97: /* being recombined. */
98: /* On exit, RHO has been modified to the value required by */
99: /* DLAED3M (made positive and multiplied by two, such that */
100: /* the modification vector has norm one). */
102: /* Z (input/output) DOUBLE PRECISION array, dimension (N) */
103: /* On entry, Z contains the updating vector. */
104: /* On exit, the contents of Z has been destroyed by the updating */
105: /* process. */
107: /* DLAMDA (output) DOUBLE PRECISION array, dimension (N) */
108: /* A copy of the first K non-deflated eigenvalues which */
109: /* will be used by DLAED3M to form the secular equation. */
111: /* W (output) DOUBLE PRECISION array, dimension (N) */
112: /* The first K values of the final deflation-altered z-vector */
113: /* which will be passed to DLAED3M. */
115: /* Q2 (output) DOUBLE PRECISION array, dimension */
116: /* ( N*N ) (if everything is deflated) or */
117: /* ( N1*(COLTYP(1)+COLTYP(2)) + (N-N1)*(COLTYP(2)+COLTYP(3)) ) */
118: /* (if not everything is deflated) */
119: /* If everything is deflated, then N*N intermediate workspace */
120: /* is needed in Q2. */
121: /* If not everything is deflated, Q2 contains on exit a copy of the */
122: /* first K non-deflated eigenvectors which will be used by */
123: /* DLAED3M in a matrix multiply (DGEMM) to accumulate the new */
124: /* eigenvectors, followed by the N-K deflated eigenvectors. */
126: /* INDX (workspace) INTEGER array, dimension (N) */
127: /* The permutation used to sort the contents of DLAMDA into */
128: /* ascending order. */
130: /* INDXC (output) INTEGER array, dimension (N) */
131: /* The permutation used to arrange the columns of the deflated */
132: /* Q matrix into three groups: columns in the first group contain */
133: /* non-zero elements only at and above N1 (type 1), columns in the */
134: /* second group are dense (type 2), and columns in the third group */
135: /* contain non-zero elements only below N1 (type 3). */
137: /* INDXP (workspace) INTEGER array, dimension (N) */
138: /* The permutation used to place deflated values of D at the end */
139: /* of the array. INDXP(1:K) points to the nondeflated D-values */
140: /* and INDXP(K+1:N) points to the deflated eigenvalues. */
142: /* COLTYP (workspace/output) INTEGER array, dimension (N) */
143: /* During execution, a label which will indicate which of the */
144: /* following types a column in the Q2 matrix is: */
145: /* 1 : non-zero in the upper half only; */
146: /* 2 : dense; */
147: /* 3 : non-zero in the lower half only; */
148: /* 4 : deflated. */
149: /* On exit, COLTYP(i) is the number of columns of type i, */
150: /* for i=1 to 4 only. */
152: /* RELTOL (input) DOUBLE PRECISION */
153: /* User specified deflation tolerance. If RELTOL.LT.20*EPS, then */
154: /* the standard value used in the original LAPACK routines is used. */
156: /* DZ (output) INTEGER, DZ.GE.0 */
157: /* counts the deflation due to a small component in the modification */
158: /* vector. */
160: /* DE (output) INTEGER, DE.GE.0 */
161: /* counts the deflation due to close eigenvalues. */
163: /* INFO (output) INTEGER */
164: /* = 0: successful exit. */
165: /* < 0: if INFO = -i, the i-th argument had an illegal value. */
167: /* Further Details */
168: /* =============== */
170: /* Based on code written by */
171: /* Wilfried Gansterer and Bob Ward, */
172: /* Department of Computer Science, University of Tennessee */
174: /* Based on the design of the LAPACK code DLAED2 with modifications */
175: /* to allow a block divide and conquer scheme */
177: /* DLAED2 was written by Jeff Rutter, Computer Science Division, University */
178: /* of California at Berkeley, USA, and modified by Francoise Tisseur, */
179: /* University of Tennessee. */
181: /* ===================================================================== */
183: #if defined(SLEPC_MISSING_LAPACK_LAMRG) || defined(SLEPC_MISSING_LAPACK_LACPY)
185: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAMRG/LACPY - Lapack routine is unavailable");
186: #else
187: PetscReal c, s, t, eps, tau, tol, dmax, dmone = -1.;
188: PetscBLASInt i, j, i1, k2, n2, ct, nj, pj=0, js, iq1, iq2;
189: PetscBLASInt psm[4], imax, jmax, ctot[4], factmp=1, one=1;
192: *info = 0;
194: if (n < 0) {
195: *info = -2;
196: } else if (n1 < PetscMin(1,n) || n1 > PetscMax(1,n)) {
197: *info = -3;
198: } else if (ldq < PetscMax(1,n)) {
199: *info = -6;
200: }
201: if (*info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %d in DSRTDF",-(*info));
203: /* Initialize deflation counters */
205: *dz = 0;
206: *de = 0;
208: /* **************************************************************************** */
210: /* Quick return if possible */
212: if (n == 0) return(0);
214: /* **************************************************************************** */
216: n2 = n - n1;
218: /* Modify Z so that RHO is positive. */
220: if (*rho < 0.) {
221: PetscStackCallBLAS("BLASscal",BLASscal_(&n2, &dmone, &z[n1], &one));
222: }
224: /* Normalize z so that norm2(z) = 1. Since z is the concatenation of */
225: /* two normalized vectors, norm2(z) = sqrt(2). (NOTE that this holds also */
226: /* here in the approximate block-tridiagonal D&C because the two vectors are */
227: /* singular vectors, but it would not necessarily hold in a D&C for a */
228: /* general banded matrix !) */
230: t = 1. / PetscSqrtReal(2.);
231: PetscStackCallBLAS("BLASscal",BLASscal_(&n, &t, z, &one));
233: /* NOTE: at this point the value of RHO is modified in order to */
234: /* normalize Z: RHO = ABS( norm2(z)**2 * RHO */
235: /* in our case: norm2(z) = sqrt(2), and therefore: */
237: *rho = PetscAbs(*rho * 2.);
239: /* Sort the eigenvalues into increasing order */
241: for (i = n1; i < n; ++i) indxq[i] += n1;
243: /* re-integrate the deflated parts from the last pass */
245: for (i = 0; i < n; ++i) dlamda[i] = d[indxq[i]-1];
246: PetscStackCallBLAS("LAPACKlamrg",LAPACKlamrg_(&n1, &n2, dlamda, &one, &one, indxc));
247: for (i = 0; i < n; ++i) indx[i] = indxq[indxc[i]-1];
248: for (i = 0; i < n; ++i) indxq[i] = 0;
250: /* Calculate the allowable deflation tolerance */
252: /* imax = BLASamax_(&n, z, &one); */
253: imax = 1;
254: dmax = PetscAbsReal(z[0]);
255: for (i=1;i<n;i++) {
256: if (PetscAbsReal(z[i])>dmax) {
257: imax = i+1;
258: dmax = PetscAbsReal(z[i]);
259: }
260: }
261: /* jmax = BLASamax_(&n, d, &one); */
262: jmax = 1;
263: dmax = PetscAbsReal(d[0]);
264: for (i=1;i<n;i++) {
265: if (PetscAbsReal(d[i])>dmax) {
266: jmax = i+1;
267: dmax = PetscAbsReal(d[i]);
268: }
269: }
271: eps = LAPACKlamch_("Epsilon");
272: if (reltol < eps * 20) {
273: /* use the standard deflation tolerance from the original LAPACK code */
274: tol = eps * 8. * PetscMax(PetscAbs(d[jmax-1]),PetscAbs(z[imax-1]));
275: } else {
276: /* otherwise set TOL to the input parameter RELTOL ! */
277: tol = reltol * PetscMax(PetscAbs(d[jmax-1]),PetscAbs(z[imax-1]));
278: }
280: /* If the rank-1 modifier is small enough, no more needs to be done */
281: /* except to reorganize Q so that its columns correspond with the */
282: /* elements in D. */
284: if (*rho * PetscAbs(z[imax-1]) <= tol) {
286: /* complete deflation because of small rank-one modifier */
287: /* NOTE: in this case we need N*N space in the array Q2 ! */
289: *dz = n;
290: *k = 0;
291: iq2 = 1;
292: for (j = 0; j < n; ++j) {
293: i = indx[j];
294: indxq[j] = i;
295: PetscStackCallBLAS("BLAScopy",BLAScopy_(&n, &q[(i-1)*ldq], &one, &q2[iq2-1], &one));
296: dlamda[j] = d[i-1];
297: iq2 += n;
298: }
299: PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("A", &n, &n, q2, &n, q, &ldq));
300: PetscStackCallBLAS("BLAScopy",BLAScopy_(&n, dlamda, &one, d, &one));
301: return(0);
302: }
304: /* If there are multiple eigenvalues then the problem deflates. Here */
305: /* the number of equal eigenvalues is found. As each equal */
306: /* eigenvalue is found, an elementary reflector is computed to rotate */
307: /* the corresponding eigensubspace so that the corresponding */
308: /* components of Z are zero in this new basis. */
309: 310: /* initialize the column types */
312: /* first N1 columns are initially (before deflation) of type 1 */
313: for (i = 0; i < n1; ++i) coltyp[i] = 1;
314: /* columns N1+1 to N are initially (before deflation) of type 3 */
315: for (i = n1; i < n; ++i) coltyp[i] = 3;
317: *k = 0;
318: k2 = n + 1;
319: for (j = 0; j < n; ++j) {
320: nj = indx[j]-1;
321: if (*rho * PetscAbs(z[nj]) <= tol) {
323: /* Deflate due to small z component. */
324: ++(*dz);
325: --k2;
327: /* deflated eigenpair, therefore column type 4 */
328: coltyp[nj] = 4;
329: indxp[k2-1] = nj+1;
330: indxq[k2-1] = nj+1;
331: if (j+1 == n) goto L100;
332: } else {
334: /* not deflated */
335: pj = nj;
336: goto L80;
337: }
338: }
339: factmp = 1;
340: L80:341: ++j;
342: nj = indx[j]-1;
343: if (j+1 > n) goto L100;
344: if (*rho * PetscAbs(z[nj]) <= tol) {
346: /* Deflate due to small z component. */
347: ++(*dz);
348: --k2;
349: coltyp[nj] = 4;
350: indxp[k2-1] = nj+1;
351: indxq[k2-1] = nj+1;
352: } else {
354: /* Check if eigenvalues are close enough to allow deflation. */
355: s = z[pj];
356: c = z[nj];
358: /* Find sqrt(a**2+b**2) without overflow or */
359: /* destructive underflow. */
361: tau = LAPACKlapy2_(&c, &s);
362: t = d[nj] - d[pj];
363: c /= tau;
364: s = -s / tau;
365: if (PetscAbs(t * c * s) <= tol) {
367: /* Deflate due to close eigenvalues. */
368: ++(*de);
369: z[nj] = tau;
370: z[pj] = 0.;
371: if (coltyp[nj] != coltyp[pj]) coltyp[nj] = 2;
373: /* if deflation happens across the two eigenvector blocks */
374: /* (eigenvalues corresponding to different blocks), then */
375: /* on column in the eigenvector matrix fills up completely */
376: /* (changes from type 1 or 3 to type 2) */
378: /* eigenpair PJ is deflated, therefore the column type changes */
379: /* to 4 */
381: coltyp[pj] = 4;
382: PetscStackCallBLAS("BLASrot",BLASrot_(&n, &q[pj*ldq], &one, &q[nj*ldq], &one, &c, &s));
383: t = d[pj] * (c * c) + d[nj] * (s * s);
384: d[nj] = d[pj] * (s * s) + d[nj] * (c * c);
385: d[pj] = t;
386: --k2;
387: i = 1;
388: L90:389: if (k2 + i <= n) {
390: if (d[pj] < d[indxp[k2 + i-1]-1]) {
391: indxp[k2 + i - 2] = indxp[k2 + i - 1];
392: indxq[k2 + i - 2] = indxq[k2 + i - 1];
393: indxp[k2 + i - 1] = pj + 1;
394: indxq[k2 + i - 2] = pj + 1;
395: ++i;
396: goto L90;
397: } else {
398: indxp[k2 + i - 2] = pj + 1;
399: indxq[k2 + i - 2] = pj + 1;
400: }
401: } else {
402: indxp[k2 + i - 2] = pj + 1;
403: indxq[k2 + i - 2] = pj + 1;
404: }
405: pj = nj;
406: factmp = -1;
407: } else {
409: /* do not deflate */
410: ++(*k);
411: dlamda[*k-1] = d[pj];
412: w[*k-1] = z[pj];
413: indxp[*k-1] = pj + 1;
414: indxq[*k-1] = pj + 1;
415: factmp = 1;
416: pj = nj;
417: }
418: }
419: goto L80;
420: L100:422: /* Record the last eigenvalue. */
423: ++(*k);
424: dlamda[*k-1] = d[pj];
425: w[*k-1] = z[pj];
426: indxp[*k-1] = pj+1;
427: indxq[*k-1] = (pj+1) * factmp;
429: /* Count up the total number of the various types of columns, then */
430: /* form a permutation which positions the four column types into */
431: /* four uniform groups (although one or more of these groups may be */
432: /* empty). */
434: for (j = 0; j < 4; ++j) ctot[j] = 0;
435: for (j = 0; j < n; ++j) {
436: ct = coltyp[j];
437: ++ctot[ct-1];
438: }
440: /* PSM(*) = Position in SubMatrix (of types 1 through 4) */
441: psm[0] = 1;
442: psm[1] = ctot[0] + 1;
443: psm[2] = psm[1] + ctot[1];
444: psm[3] = psm[2] + ctot[2];
445: *k = n - ctot[3];
447: /* Fill out the INDXC array so that the permutation which it induces */
448: /* will place all type-1 columns first, all type-2 columns next, */
449: /* then all type-3's, and finally all type-4's. */
451: for (j = 0; j < n; ++j) {
452: js = indxp[j];
453: ct = coltyp[js-1];
454: indx[psm[ct - 1]-1] = js;
455: indxc[psm[ct - 1]-1] = j+1;
456: ++psm[ct - 1];
457: }
459: /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */
460: /* and Q2 respectively. The eigenvalues/vectors which were not */
461: /* deflated go into the first K slots of DLAMDA and Q2 respectively, */
462: /* while those which were deflated go into the last N - K slots. */
464: i = 0;
465: iq1 = 0;
466: iq2 = (ctot[0] + ctot[1]) * n1;
467: for (j = 0; j < ctot[0]; ++j) {
468: js = indx[i];
469: PetscStackCallBLAS("BLAScopy",BLAScopy_(&n1, &q[(js-1)*ldq], &one, &q2[iq1], &one));
470: z[i] = d[js-1];
471: ++i;
472: iq1 += n1;
473: }
475: for (j = 0; j < ctot[1]; ++j) {
476: js = indx[i];
477: PetscStackCallBLAS("BLAScopy",BLAScopy_(&n1, &q[(js-1)*ldq], &one, &q2[iq1], &one));
478: PetscStackCallBLAS("BLAScopy",BLAScopy_(&n2, &q[n1+(js-1)*ldq], &one, &q2[iq2], &one));
479: z[i] = d[js-1];
480: ++i;
481: iq1 += n1;
482: iq2 += n2;
483: }
485: for (j = 0; j < ctot[2]; ++j) {
486: js = indx[i];
487: PetscStackCallBLAS("BLAScopy",BLAScopy_(&n2, &q[n1+(js-1)*ldq], &one, &q2[iq2], &one));
488: z[i] = d[js-1];
489: ++i;
490: iq2 += n2;
491: }
493: iq1 = iq2;
494: for (j = 0; j < ctot[3]; ++j) {
495: js = indx[i];
496: PetscStackCallBLAS("BLAScopy",BLAScopy_(&n, &q[(js-1)*ldq], &one, &q2[iq2], &one));
497: iq2 += n;
498: z[i] = d[js-1];
499: ++i;
500: }
502: /* The deflated eigenvalues and their corresponding vectors go back */
503: /* into the last N - K slots of D and Q respectively. */
505: PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("A", &n, &ctot[3], &q2[iq1], &n, &q[*k*ldq], &ldq));
506: i1 = n - *k;
507: PetscStackCallBLAS("BLAScopy",BLAScopy_(&i1, &z[*k], &one, &d[*k], &one));
509: /* Copy CTOT into COLTYP for referencing in DLAED3M. */
511: for (j = 0; j < 4; ++j) coltyp[j] = ctot[j];
512: return(0);
513: #endif
514: }