Actual source code: dqds.c
slepc-3.7.4 2017-05-17
1: /*
2: DQDS-type dense solver for generalized symmetric-indefinite eigenproblem.
3: Based on Matlab code from Carla Ferreira.
5: References:
7: [1] C. Ferreira, B. Parlett, "Real DQDS for the nonsymmetric tridiagonal
8: eigenvalue problem", preprint, 2012.
10: [2] C. Ferreira. The unsymmetric tridiagonal eigenvalue problem. Ph.D
11: Thesis, University of Minho, 2007.
13: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
14: SLEPc - Scalable Library for Eigenvalue Problem Computations
15: Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain
17: This file is part of SLEPc.
19: SLEPc is free software: you can redistribute it and/or modify it under the
20: terms of version 3 of the GNU Lesser General Public License as published by
21: the Free Software Foundation.
23: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
24: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
25: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
26: more details.
28: You should have received a copy of the GNU Lesser General Public License
29: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
30: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
31: */
32: #include <slepc/private/dsimpl.h>
33: #include <slepcblaslapack.h>
37: /*
38: INPUT:
39: a ---- diagonal of J
40: b ---- subdiagonal of J;
41: the superdiagonal of J is all 1's
43: OUTPUT:
44: For an eigenvalue lambda of J we have:
45: gl=<real(lambda)<=gr
46: -sigma=<imag(lambda)<=sigma
47: */
48: static PetscErrorCode ScanJ(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *gl,PetscReal *gr,PetscReal *sigma)
49: {
50: PetscInt i;
51: PetscReal b0,b1,rad;
54: /* For original matrix C, C_bal=T+S; T-symmetric and S=skew-symmetric
55: C_bal is the balanced form of C */
56: /* Bounds on the imaginary part of C (Gersgorin bound for S)*/
57: *sigma = 0.0;
58: b0 = 0.0;
59: for (i=0;i<n-1;i++) {
60: if (b[i]<0.0) b1 = PetscSqrtReal(-b[i]);
61: else b1 = 0.0;
62: *sigma = PetscMax(*sigma,b1+b0);
63: b0 = b1;
64: }
65: *sigma = PetscMax(*sigma,b0);
66: /* Gersgorin bounds for T (=Gersgorin bounds on the real part for C) */
67: rad = (b[0]>0.0)?PetscSqrtReal(b[0]):0.0; /* rad = b1+b0, b0 = 0 */
68: *gr = a[0]+rad;
69: *gl = a[0]-rad;
70: b0 = rad;
71: for (i=1;i<n-1;i++) {
72: b1 = (b[i]>0.0)?PetscSqrtReal(b[i]):0.0;
73: rad = b0+b1;
74: *gr = PetscMax(*gr,a[i]+rad);
75: *gl = PetscMin(*gl,a[i]-rad);
76: b0 = b1;
77: }
78: rad = b0;
79: *gr = PetscMax(*gr,a[n-1]+rad);
80: *gl = PetscMin(*gl,a[n-1]-rad);
81: return(0);
82: }
86: /*
87: INPUT:
88: a - vector with the diagonal elements
89: b - vector with the subdiagonal elements
90: gl - Gersgorin left bound (real axis)
91: gr - Gersgorin right bound (real axis)
92: OUTPUT:
93: eigvalue - multiple eigenvalue (if there is an eigenvalue)
94: m - its multiplicity (m=0 if there isn't a multiple eigenvalue)
95: X - matrix of generalized eigenvectors
96: shift
97: dim(work)=5*n+4
98: */
99: static PetscErrorCode Prologue(PetscInt n,PetscReal *a,PetscReal *b,PetscReal gl,PetscReal gr,PetscInt *m,PetscReal *shift,PetscReal *work)
100: {
103: PetscReal mu,tol,*a1,*y,*yp,*x,*xp;
104: PetscInt i,k;
107: *m = 0;
108: mu = 0.0;
109: for (i=0;i<n;i++) mu += a[i];
110: mu /= n;
111: tol = n*PETSC_MACHINE_EPSILON*(gr-gl);
112: a1 = work; /* size n */
113: y = work+n; /* size n+1 */
114: yp = y+n+1; /* size n+1. yp is the derivative of y (p for "prime") */
115: x = yp+n+1; /* size n+1 */
116: xp = x+n+1; /* size n+1 */
117: for (i=0;i<n;i++) a1[i] = mu-a[i];
118: x[0] = 1;
119: xp[0] = 0;
120: x[1] = a1[0];
121: xp[1] = 1;
122: for (i=1;i<n;i++) {
123: x[i+1]=a1[i]*x[i]-b[i-1]*x[i-1];
124: xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1];
125: }
126: *shift = mu;
127: if (PetscAbsReal(x[n])<tol) {
128: /* mu is an eigenvalue */
129: *m = *m+1;
130: if (PetscAbsReal(xp[n])<tol) {
131: /* mu is a multiple eigenvalue; Is it the one-point spectrum case? */
132: k = 0;
133: while (PetscAbsReal(xp[n])<tol && k<n-1) {
134: PetscMemcpy(x,y,(n+1)*sizeof(PetscReal));
135: PetscMemcpy(xp,yp,(n+1)*sizeof(PetscReal));
136: x[k] = 0.0;
137: k++;
138: x[k] = 1.0;
139: xp[k] = 0.0;
140: x[k+1] = a1[k] + y[k];
141: xp[k+1] = 1+yp[k];
142: for (i=k+1;i<n;i++) {
143: x[i+1] = a1[i]*x[i]-b[i-1]*x[i-1]+y[i];
144: xp[i+1]=a1[i]*xp[i]+x[i]-b[i-1]*xp[i-1]+yp[i];
145: }
146: *m = *m+1;
147: }
148: }
149: }
150: /*
151: When mu is not an eigenvalue or it it an eigenvalue but it is not the one-point spectrum case, we will always have shift=mu
153: Need to check for overflow!
155: After calling Prologue, eigenComplexdqds and eigen3dqds will test if m==n in which case we have the one-point spectrum case;
156: If m!=0, the only output to be used is the shift returned.
157: */
158: return(0);
159: }
163: static PetscErrorCode LUfac(PetscInt n,PetscReal *a,PetscReal *b,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L,PetscReal *U,PetscInt *fail,PetscReal *work)
164: {
165: PetscInt i;
166: PetscReal *a1;
169: a1 = work;
170: for (i=0;i<n;i++) a1[i] = a[i]-shift;
171: *fail = 0;
172: for (i=0;i<n-1;i++) {
173: U[i] = a1[i];
174: L[i] = b[i]/U[i];
175: a1[i+1] = a1[i+1]-L[i];
176: }
177: U[n-1] = a1[n-1];
179: /* Check if there are NaN values */
180: for (i=0;i<n-1 && !*fail;i++) {
181: if (PetscIsInfOrNanReal(L[i])) *fail=1;
182: if (PetscIsInfOrNanReal(U[i])) *fail=1;
183: }
184: if (!*fail && PetscIsInfOrNanReal(U[n-1])) *fail=1;
186: for (i=0;i<n-1 && !*fail;i++) {
187: if (PetscAbsReal(L[i])>tol*norm) *fail = 1; /* This demands IEEE arithmetic */
188: if (PetscAbsReal(U[i])>tol*norm) *fail = 1;
189: }
190: if (!*fail && PetscAbsReal(U[n-1])>tol*norm) *fail = 1;
191: return(0);
192: }
196: static PetscErrorCode RealDQDS(PetscInt n,PetscReal *L,PetscReal *U,PetscReal shift,PetscReal tol,PetscReal norm,PetscReal *L1,PetscReal *U1,PetscInt *fail)
197: {
198: PetscReal d;
199: PetscInt i;
202: *fail = 0;
203: d = U[0]-shift;
204: for (i=0;i<n-1;i++) {
205: U1[i] = d+L[i];
206: L1[i] = L[i]*(U[i+1]/U1[i]);
207: d = d*(U[i+1]/U1[i])-shift;
208: }
209: U1[n-1]=d;
211: /* The following demands IEEE arithmetic */
212: for (i=0;i<n-1 && !*fail;i++) {
213: if (PetscIsInfOrNanReal(L1[i])) *fail=1;
214: if (PetscIsInfOrNanReal(U1[i])) *fail=1;
215: }
216: if (!*fail && PetscIsInfOrNanReal(U1[n-1])) *fail=1;
217: for (i=0;i<n-1 && !*fail;i++) {
218: if (PetscAbsReal(L1[i])>tol*norm) *fail=1;
219: if (PetscAbsReal(U1[i])>tol*norm) *fail=1;
220: }
221: if (!*fail && PetscAbsReal(U1[n-1])>tol*norm) *fail=1;
222: return(0);
223: }
227: static PetscErrorCode TridqdsZhuang3(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscInt *fail)
228: {
229: PetscReal xl,yl,xr,yr,zr;
230: PetscInt i;
233: *fail = 0;
234: xr = 1.0;
235: yr = e[0];
236: zr = 0.0;
237: /* Step 1 */
238: /* the efect of Z1 */
239: xr = xr*q[0]+yr;
240: /* the inverse of L1 */
241: xl = (q[0]+e[0])*(q[0]+e[0])+q[1]*e[0]-sum*(q[0]+e[0])+prod;
242: yl = -(q[2]*e[1]*q[1]*e[0])/xl;
243: xl = -(q[1]*e[0]*(q[0]+e[0]+q[1]+e[1]-sum))/xl;
244: /* the efect of L1 */
245: q[0] = xr-xl;
246: xr = yr-xl;
247: yr = zr-yl-xl*e[1];
248: /*the inverse of Y1 */
249: xr = xr/q[0];
250: yr = yr/q[0];
251: /*the effect of Y1 inverse */
252: e[0] = xl+yr+xr*q[1];
253: xl = yl+zr+yr*q[2]; /* zr=0 when n=3 */
254: /*the effect of Y1 */
255: xr = 1.0-xr;
256: yr = e[1]-yr;
258: /* STEP n-1 */
260: if (PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e[n-3])>tolDef*PetscAbsReal(q[n-3])) {
261: /* the efect of Zn-1 */
262: xr = xr*q[n-2]+yr;
263: /* the inverse of Ln-1 */
264: xl = -xl/e[n-3];
265: /* the efect of Ln-1 */
266: q[n-2] = xr-xl;
267: xr = yr-xl;
268: /*the inverse of Yn-1 */
269: xr = xr/q[n-2];
270: /*the effect of the inverse of Yn-1 */
271: e[n-2] = xl+xr*q[n-1];
272: /*the effects of Yn-1 */
273: xr = 1.0-xr;
274: /* STEP n */
275: /*the effect of Zn */
276: xr = xr*q[n-1];
277: /*the inverse of Ln=I */
278: /*the effect of Ln */
279: q[n-1] = xr;
280: /* the inverse of Yn-1=I */
282: } else { /* Free deflation */
283: e[n-2] = (e[n-3]+(xr*q[n-2]+yr)+q[n-1])*0.5; /* Sum=trace/2 */
284: q[n-2] = (e[n-3]+q[n-2]*xr)*q[n-1]-xl; /* det */
285: q[n-1] = e[n-2]*e[n-2]-q[n-2];
286: *fail = 2;
287: }
289: /* The following demands IEEE arithmetic */
290: for (i=0;i<n-1 && !*fail;i++) {
291: if (PetscIsInfOrNanReal(e[i])) *fail=1;
292: if (PetscIsInfOrNanReal(q[i])) *fail=1;
293: }
294: if (!*fail && PetscIsInfOrNanReal(q[n-1])) *fail=1;
295: for (i=0;i<n-1 && !*fail;i++) {
296: if (PetscAbsReal(e[i])>tol*norm) *fail=1;
297: if (PetscAbsReal(q[i])>tol*norm) *fail=1;
298: }
299: if (!*fail && PetscAbsReal(q[n-1])>tol*norm) *fail=1;
300: return(0);
301: }
305: static PetscErrorCode TridqdsZhuang(PetscInt n,PetscReal *e,PetscReal *q,PetscReal sum,PetscReal prod,PetscReal tol,PetscReal norm,PetscReal tolDef,PetscReal *e1,PetscReal *q1,PetscInt *fail)
306: {
308: PetscInt i;
309: PetscReal xl,yl,xr,yr,zr;
312: for (i=0;i<n-1;i++) {
313: e1[i] = e[i];
314: q1[i] = q[i];
315: }
316: q1[n-1] = q[n-1];
317: *fail = 0;
318: if (n>3) { /* For n>3 */
319: *fail = 0;
320: xr = 1;
321: yr = e1[0];
322: zr = 0;
323: /* step 1 */
324: /* the efect of Z1 */
325: xr = xr*q1[0]+yr;
326: /* the inverse of L1 */
327: xl = (q1[0]+e1[0])*(q1[0]+e1[0])+q1[1]*e1[0]-sum*(q1[0]+e1[0])+prod;
328: yl = -(q1[2]*e1[1]*q1[1]*e1[0])/xl;
329: xl = -(q1[1]*e1[0]*(q1[0]+e1[0]+q1[1]+e1[1]-sum))/xl;
330: /* the efect of L1 */
331: q1[0] = xr-xl;
332: xr = yr-xl;
333: yr = zr-yl-xl*e1[1];
334: zr = -yl*e1[2];
335: /* the inverse of Y1 */
336: xr = xr/q1[0];
337: yr = yr/q1[0];
338: zr = zr/q1[0];
339: /* the effect of Y1 inverse */
340: e1[0] = xl+yr+xr*q1[1];
341: xl = yl+zr+yr*q1[2];
342: yl = zr*q1[3];
343: /* the effect of Y1 */
344: xr = 1-xr;
345: yr = e1[1]-yr;
346: zr = -zr;
347: /* step i=2,...,n-3 */
348: for (i=1;i<n-3;i++) {
349: /* the efect of Zi */
350: xr = xr*q1[i]+yr;
351: /* the inverse of Li */
352: xl = -xl/e1[i-1];
353: yl = -yl/e1[i-1];
354: /* the efect of Li */
355: q1[i] = xr-xl;
356: xr = yr-xl;
357: yr = zr-yl-xl*e1[i+1];
358: zr = -yl*e1[i+2];
359: /* the inverse of Yi */
360: xr = xr/q1[i];
361: yr = yr/q1[i];
362: zr = zr/q1[i];
363: /* the effect of the inverse of Yi */
364: e1[i] = xl+yr+xr*q1[i+1];
365: xl = yl+zr+yr*q1[i+2];
366: yl = zr*q1[i+3];
367: /* the effects of Yi */
368: xr = 1.0-xr;
369: yr = e1[i+1]-yr;
370: zr = -zr;
371: }
373: /* STEP n-2 zr is no longer needed */
375: /* the efect of Zn-2 */
376: xr = xr*q1[n-3]+yr;
377: /* the inverse of Ln-2 */
378: xl = -xl/e1[n-4];
379: yl = -yl/e1[n-4];
380: /* the efect of Ln-2 */
381: q1[n-3] = xr-xl;
382: xr = yr-xl;
383: yr = zr-yl-xl*e1[n-2];
384: /* the inverse of Yn-2 */
385: xr = xr/q1[n-3];
386: yr = yr/q1[n-3];
387: /* the effect of the inverse of Yn-2 */
388: e1[n-3] = xl+yr+xr*q1[n-2];
389: xl = yl+yr*q1[n-1];
390: /* the effect of Yn-2 */
391: xr = 1.0-xr;
392: yr = e1[n-2]-yr;
394: /* STEP n-1 yl and yr are no longer needed */
395: /* Testing for EARLY DEFLATION */
397: if (PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(xl) || PetscAbsReal(e1[n-3])>tolDef*PetscAbsReal(q1[n-3])) {
398: /* the efect of Zn-1 */
399: xr = xr*q1[n-2]+yr;
400: /* the inverse of Ln-1 */
401: xl = -xl/e1[n-3];
402: /* the efect of Ln-1 */
403: q1[n-2] = xr-xl;
404: xr = yr-xl;
405: /*the inverse of Yn-1 */
406: xr = xr/q1[n-2];
407: /*the effect of the inverse of Yn-1 */
408: e1[n-2] = xl+xr*q1[n-1];
409: /*the effects of Yn-1 */
410: xr = 1.0-xr;
412: /* STEP n; xl no longer needed */
413: /* the effect of Zn */
414: xr = xr*q1[n-1];
415: /* the inverse of Ln = I */
416: /* the effect of Ln */
417: q1[n-1] = xr;
418: /* the inverse of Yn-1=I */
420: } else { /* FREE DEFLATION */
421: e1[n-2] = (e1[n-3]+xr*q1[n-2]+yr+q1[n-1])*0.5; /* sum=trace/2 */
422: q1[n-2] = (e1[n-3]+q1[n-2]*xr)*q1[n-1]-xl; /* det */
423: q1[n-1] = e1[n-2]*e1[n-2]-q1[n-2];
424: *fail = 2;
425: }
427: for (i=0;i<n-1 && !*fail;i++) {
428: if (PetscIsInfOrNanReal(e1[i])) *fail=1;
429: if (PetscIsInfOrNanReal(q1[i])) *fail=1;
430: }
431: if (!*fail && PetscIsInfOrNanReal(q1[n-1])) *fail=1;
432: for (i=0;i<n-1 && !*fail;i++) {
433: if (PetscAbsReal(e1[i])>tol*norm) *fail = 1; /* This demands IEEE arithmetic */
434: if (PetscAbsReal(q1[i])>tol*norm) *fail = 1;
435: }
436: if (!*fail && PetscAbsReal(q1[n-1])>tol*norm) *fail = 1;
438: } else { /* The case n=3 */
439: TridqdsZhuang3(n,e1,q1,sum,prod,tol,norm,tolDef,fail);
440: }
441: return(0);
442: }
446: static PetscErrorCode DSGHIEP_Eigen3DQDS(PetscInt n,PetscReal *a,PetscReal *b,PetscReal *c,PetscScalar *wr,PetscScalar *wi,PetscReal *work)
447: {
448: PetscInt totalIt=0; /* Total Number of Iterations */
449: PetscInt totalFail=0; /* Total number of failures */
450: PetscInt nFail=0; /* Number of failures per transformation */
451: PetscReal tolZero=1.0/16; /* Tolerance for zero shifts */
452: PetscInt maxIt=10*n; /* Maximum number of iterations */
453: PetscInt maxFail=10*n; /* Maximum number of failures allowed per each transformation */
454: PetscReal tolDef=PETSC_MACHINE_EPSILON; /* Tolerance for deflation eps, 10*eps, 100*eps */
455: PetscReal tolGrowth=100000;
457: PetscInt i,k,nwu=0,begin,ind,flag,dim,m,*split,lastSplit;
458: PetscReal norm,gr,gl,sigma,delta,meanEig,*U,*L,*U1,*L1;
459: PetscReal acShift,initialShift,shift=0.0,sum,det,disc,prod,x1,x2;
460: PetscBool test1,test2;
463: dim = n;
464: /* Test if the matrix is unreduced */
465: for (i=0;i<n-1;i++) {
466: if (PetscAbsReal(b[i])==0.0 || PetscAbsReal(c[i])==0.0) SETERRQ(PETSC_COMM_SELF,1,"Initial tridiagonal matrix is not unreduced");
467: }
468: U = work;
469: L = work+n;
470: U1 = work+2*n;
471: L1 = work+3*n;
472: nwu = 4*n;
473: if (wi) {
474: PetscMemzero(wi,n*sizeof(PetscScalar));
475: }
476: /* Normalization - the J form of C */
477: for (i=0;i<n-1;i++) b[i] *= c[i]; /* subdiagonal of the J form */
479: /* Scan matrix J ---- Finding a box of inclusion for the eigenvalues */
480: norm = 0.0;
481: for (i=0;i<n-1;i++) {
482: norm = PetscMax(norm,PetscMax(PetscAbsReal(a[i]),PetscAbsReal(b[i])));
483: }
484: norm = PetscMax(norm,PetscMax(1,PetscAbsReal(a[n-1])));
485: ScanJ(n,a,b,&gl,&gr,&sigma);
486: delta = (gr-gl)/n; /* How much to add to the shift, in case of failure (element growth) */
487: meanEig = 0.0;
488: for (i=0;i<n;i++) meanEig += a[i];
489: meanEig /= n; /* shift = initial shift = mean of eigenvalues */
490: Prologue(n,a,b,gl,gr,&m,&shift,work+nwu);
491: if (m==n) { /* Multiple eigenvalue, we have the one-point spectrum case */
492: for (i=0;i<dim;i++) {
493: wr[i] = shift;
494: if (wi) wi[i] = 0.0;
495: }
496: return(0);
497: }
498: /* Initial LU Factorization */
499: if (delta==0.0) shift=0.0; /* The case when all eigenvalues are pure imaginary */
500: LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu); /* flag=1 failure; flag=0 successful transformation*/
501: while (flag==1 && nFail<maxFail) {
502: shift=shift+delta;
503: if (shift>gr || shift<gl) { /* Successive failures */
504: shift=meanEig;
505: delta=-delta;
506: }
507: nFail=nFail+1;
508: LUfac(n,a,b,shift,tolGrowth,norm,L,U,&flag,work+nwu); /* flag=1 failure; flag=0 successful transformation*/
509: }
510: if (nFail==maxFail) SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached in Initial LU factorization");
511: /* Successful Initial transformation */
512: totalFail = totalFail+nFail;
513: nFail = 0;
514: acShift = 0;
515: initialShift = shift;
516: shift = 0;
517: begin = 0;
518: lastSplit = 0;
519: PetscMalloc1(n,&split);
520: split[lastSplit] = begin;
521: while (begin!=-1) {
522: while (n-begin>2 && totalIt<maxIt) {
523: /* Check for deflation before performing a transformation */
524: test1 = (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])
525: && PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)
526: && PetscAbsReal(L[n-2]*U[n])<tolDef*PetscAbsReal(acShift+U[n-1])
527: && PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)<tolDef*PetscAbsReal(acShift+U[n-1]))? PETSC_TRUE: PETSC_FALSE;
528: if (flag==2) { /* Early 2x2 deflation */
529: test2 = PETSC_TRUE;
530: } else {
531: if (n-begin>4) {
532: test2 = (PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])
533: && PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3]))? PETSC_TRUE: PETSC_FALSE;
534: } else { /* n-begin+1=3 */
535: test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
536: }
537: }
538: while (test2 || test1) {
539: /* 2x2 deflation */
540: if (test2) {
541: if (flag==2) { /* Early deflation */
542: sum = L[n-2];
543: det = U[n-2];
544: disc = U[n-1];
545: flag = 0;
546: } else {
547: sum = (L[n-2]+(U[n-2]+U[n-1]))/2;
548: disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
549: det = U[n-2]*U[n-1];
550: }
551: if (disc<=0) {
552: #if !defined(PETSC_USE_COMPLEX)
553: wr[--n] = sum+acShift; if (wi) wi[n] = PetscSqrtReal(-disc);
554: wr[--n] = sum+acShift; if (wi) wi[n] = -PetscSqrtReal(-disc);
555: #else
556: wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
557: wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
558: #endif
559: } else {
560: if (sum==0.0) {
561: x1 = PetscSqrtReal(disc);
562: x2 = -x1;
563: } else {
564: x1 = ((sum>=0.0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
565: x2 = det/x1;
566: }
567: wr[--n] = x1+acShift;
568: wr[--n] = x2+acShift;
569: }
570: } else { /* test1 -- 1x1 deflation */
571: x1 = U[n-1]+acShift;
572: wr[--n] = x1;
573: }
575: if (n<=begin+2) {
576: break;
577: } else {
578: test1 = (PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-2])
579: && PetscAbsReal(L[n-2])<tolDef*PetscAbsReal(U[n-1]+acShift)
580: && PetscAbsReal(L[n-2]*U[n-1])<tolDef*PetscAbsReal(acShift+U[n-1])
581: && PetscAbsReal(L[n-2])*(PetscAbsReal(U[n-2])+1)< tolDef*PetscAbsReal(acShift+U[n-1]))? PETSC_TRUE: PETSC_FALSE;
582: if (n-begin>4) {
583: test2 = (PetscAbsReal(L[n-3])<tolDef*PetscAbsReal(U[n-3])
584: && PetscAbsReal(L[n-3]*(U[n-4]+L[n-4]))< tolDef*PetscAbsReal(U[n-4]*(U[n-3]+L[n-3])+L[n-4]*L[n-3]))? PETSC_TRUE: PETSC_FALSE;
585: } else { /* n-begin+1=3 */
586: test2 = (PetscAbsReal(L[begin])<tolDef*PetscAbsReal(U[begin]))? PETSC_TRUE: PETSC_FALSE;
587: }
588: }
589: } /* end "WHILE deflations" */
590: /* After deflation */
591: if (n>begin+3) {
592: ind = begin;
593: for (k=n-4;k>=begin+1;k--) {
594: if (PetscAbsReal(L[k])<tolDef*PetscAbsReal(U[k])
595: && PetscAbsReal(L[k]*U[k+1]*(U[k+2]+L[k+2])*(U[k-1]+L[k-1]))<tolDef*PetscAbsReal((U[k-1]*(U[k]+L[k])+L[k-1]*L[k])*(U[k+1]*(U[k+2]+L[k+2])+L[k+1]*L[k+2]))) {
596: ind=k;
597: break;
598: }
599: }
600: if (ind>begin || PetscAbsReal(L[begin]) <tolDef*PetscAbsReal(U[begin])) {
601: lastSplit = lastSplit+1;
602: split[lastSplit] = begin;
603: L[ind] = acShift; /* Use of L[ind] to save acShift */
604: begin = ind+1;
605: }
606: }
608: if (n>begin+2) {
609: disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
610: if ((PetscAbsReal(L[n-2])>tolZero) && (PetscAbsReal(L[n-3])>tolZero)) { /* L's are big */
611: shift = 0;
612: RealDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
613: if (flag) { /* Failure */
614: TridqdsZhuang(n-begin,L+begin,U+begin,0.0,0.0,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
615: shift = 0.0;
616: while (flag==1 && nFail<maxFail) { /* Successive failures */
617: shift = shift+delta;
618: if (shift>gr-acShift || shift<gl-acShift) {
619: shift = meanEig-acShift;
620: delta = -delta;
621: }
622: nFail++;
623: RealDQDS(n-begin,L+begin,U+begin,0,tolGrowth,norm,L1+begin,U1+begin,&flag);
624: }
625: }
626: } else { /* L's are small */
627: if (disc<0) { /* disc <0 Complex case; Francis shift; 3dqds */
628: sum = U[n-2]+L[n-2]+U[n-1];
629: prod = U[n-2]*U[n-1];
630: TridqdsZhuang(n-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
631: shift = 0.0; /* Restoring transformation */
632: while (flag==1 && nFail<maxFail) { /* In case of failure */
633: shift = shift+U[n-1]; /* first time shift=0 */
634: RealDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
635: nFail++;
636: }
637: } else { /* disc >0 Real case; real Wilkinson shift; dqds */
638: sum = (L[n-2]+U[n-2]+U[n-1])/2;
639: if (sum==0.0) {
640: x1 = PetscSqrtReal(disc);
641: x2 = -x1;
642: } else {
643: x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
644: x2 = U[n-2]*U[n-1]/x1;
645: }
646: /* Take the eigenvalue closest to UL(n,n) */
647: if (PetscAbsReal(x1-U[n-1])<PetscAbsReal(x2-U[n-1])) {
648: shift = x1;
649: } else {
650: shift = x2;
651: }
652: RealDQDS(n-begin,L+begin,U+begin,shift,tolGrowth,norm,L1+begin,U1+begin,&flag);
653: /* In case of failure */
654: while (flag==1 && nFail<maxFail) {
655: sum = 2*shift;
656: prod = shift*shift;
657: TridqdsZhuang(n-1-begin,L+begin,U+begin,sum,prod,tolGrowth,norm,tolDef,L1+begin,U1+begin,&flag);
658: /* In case of successive failures */
659: if (shift==0.0) {
660: shift = PetscMin(PetscAbsReal(L[n-2]),PetscAbsReal(L[n-3]))*delta;
661: } else {
662: shift=shift+delta;
663: }
664: if (shift>gr-acShift || shift<gl-acShift) {
665: shift = meanEig-acShift;
666: delta = -delta;
667: }
668: if (!flag) { /* We changed from real dqds to 3dqds */
669: shift=0;
670: }
671: nFail++;
672: }
673: }
674: } /* end "if tolZero" */
675: if (nFail==maxFail) SETERRQ(PETSC_COMM_SELF,1,"Maximun number of failures reached. No convergence in DQDS");
676: /* Successful Transformation; flag==0 */
677: totalIt++;
678: acShift = shift+acShift;
679: for (i=begin;i<n-1;i++) {
680: L[i] = L1[i];
681: U[i] = U1[i];
682: }
683: U[n-1] = U1[n-1];
684: totalFail = totalFail+nFail;
685: nFail = 0;
686: } /* end "if n>begin+1" */
687: } /* end WHILE 1 */
688: if (totalIt>=maxIt) SETERRQ(PETSC_COMM_SELF,1,"Maximun number of iterations reached. No convergence in DQDS");
689: /* END: n=2 or n=1 % n=begin+1 or n=begin */
690: if (n==begin+2) {
691: sum = (L[n-2]+U[n-2]+U[n-1])/2;
692: disc = (L[n-2]*(L[n-2]+2*(U[n-2]+U[n-1]))+(U[n-2]-U[n-1])*(U[n-2]-U[n-1]))/4;
693: if (disc<=0) { /* Complex case */
694: /* Deflation 2 */
695: #if !defined(PETSC_USE_COMPLEX)
696: wr[--n] = sum+acShift; if (wi) wi[n] = PetscSqrtReal(-disc);
697: wr[--n] = sum+acShift; if (wi) wi[n] = -PetscSqrtReal(-disc);
698: #else
699: wr[--n] = sum-PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
700: wr[--n] = sum+PETSC_i*PetscSqrtReal(-disc)+acShift; if (wi) wi[n] = 0.0;
701: #endif
702: } else { /* Real case */
703: if (sum==0.0) {
704: x1 = PetscSqrtReal(disc);
705: x2 = -x1;
706: } else {
707: x1 = ((sum>=0)?1.0:-1.0)*(PetscAbsReal(sum)+PetscSqrtReal(disc));
708: x2 = U[n-2]*U[n-1]/x1;
709: }
710: /* Deflation 2 */
711: wr[--n] = x2+acShift;
712: wr[--n] = x1+acShift;
713: }
714: } else { /* n=1 n=begin */
715: /* deflation 1 */
716: x1 = U[n-1]+acShift;
717: wr[--n] = x1;
718: }
719: switch (n) {
720: case 0:
721: begin = -1;
722: break;
723: case 1:
724: acShift = L[begin-1];
725: begin = split[lastSplit];
726: lastSplit--;
727: break;
728: default : /* n>=2 */
729: acShift = L[begin-1];
730: begin = split[lastSplit];
731: lastSplit--;
732: }
733: }/* While begin~=-1 */
734: for (i=0;i<dim;i++) {
735: wr[i] = wr[i]+initialShift;
736: }
737: PetscFree(split);
738: return(0);
739: }
743: PetscErrorCode DSSolve_GHIEP_DQDS_II(DS ds,PetscScalar *wr,PetscScalar *wi)
744: {
746: PetscInt i,off,ld,nwall,nwu;
747: PetscScalar *A,*B,*Q,*vi;
748: PetscReal *d,*e,*s,*a,*b,*c;
751: #if !defined(PETSC_USE_COMPLEX)
753: #endif
754: ld = ds->ld;
755: off = ds->l + ds->l*ld;
756: A = ds->mat[DS_MAT_A];
757: B = ds->mat[DS_MAT_B];
758: Q = ds->mat[DS_MAT_Q];
759: d = ds->rmat[DS_MAT_T];
760: e = ds->rmat[DS_MAT_T] + ld;
761: s = ds->rmat[DS_MAT_D];
762: /* Quick return if possible */
763: if (ds->n-ds->l == 1) {
764: *(Q+off) = 1;
765: if (!ds->compact) {
766: d[ds->l] = PetscRealPart(A[off]);
767: s[ds->l] = PetscRealPart(B[off]);
768: }
769: wr[ds->l] = d[ds->l]/s[ds->l];
770: if (wi) wi[ds->l] = 0.0;
771: return(0);
772: }
773: nwall = 12*ld+4;
774: DSAllocateWork_Private(ds,0,nwall,0);
775: /* Reduce to pseudotriadiagonal form */
776: DSIntermediate_GHIEP(ds);
778: /* Compute Eigenvalues (DQDS) */
779: /* Form pseudosymmetric tridiagonal */
780: a = ds->rwork;
781: b = a+ld;
782: c = b+ld;
783: nwu = 3*ld;
784: if (ds->compact) {
785: for (i=ds->l;i<ds->n-1;i++) {
786: a[i] = d[i]*s[i];
787: b[i] = e[i]*s[i+1];
788: c[i] = e[i]*s[i];
789: }
790: a[ds->n-1] = d[ds->n-1]*s[ds->n-1];
791: } else {
792: for (i=ds->l;i<ds->n-1;i++) {
793: a[i] = PetscRealPart(A[i+i*ld]*B[i+i*ld]);
794: b[i] = PetscRealPart(A[i+1+i*ld]*s[i+1]);
795: c[i] = PetscRealPart(A[i+(i+1)*ld]*s[i]);
796: }
797: a[ds->n-1] = PetscRealPart(A[ds->n-1+(ds->n-1)*ld]*B[ds->n-1+(ds->n-1)*ld]);
798: }
799: vi = (wi)?wi+ds->l:NULL;
800: DSGHIEP_Eigen3DQDS(ds->n-ds->l,a+ds->l,b+ds->l,c+ds->l,wr+ds->l,vi,ds->rwork+nwu);
802: /* Compute Eigenvectors with Inverse Iteration */
803: DSGHIEPInverseIteration(ds,wr,wi);
805: /* Recover eigenvalues from diagonal */
806: DSGHIEPComplexEigs(ds,0,ds->l,wr,wi);
807: #if defined(PETSC_USE_COMPLEX)
808: if (wi) {
809: for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
810: }
811: #endif
812: return(0);
813: }