Actual source code: dshep.c

slepc-3.7.4 2017-05-17
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.

  8:    SLEPc is free software: you can redistribute it and/or modify it under  the
  9:    terms of version 3 of the GNU Lesser General Public License as published by
 10:    the Free Software Foundation.

 12:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 13:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 14:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 15:    more details.

 17:    You  should have received a copy of the GNU Lesser General  Public  License
 18:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 19:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 20: */

 22: #include <slepc/private/dsimpl.h>
 23: #include <slepcblaslapack.h>

 27: PetscErrorCode DSAllocate_HEP(DS ds,PetscInt ld)
 28: {

 32:   DSAllocateMat_Private(ds,DS_MAT_A);
 33:   DSAllocateMat_Private(ds,DS_MAT_Q);
 34:   DSAllocateMatReal_Private(ds,DS_MAT_T);
 35:   PetscFree(ds->perm);
 36:   PetscMalloc1(ld,&ds->perm);
 37:   PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
 38:   return(0);
 39: }

 41: /*   0       l           k                 n-1
 42:     -----------------------------------------
 43:     |*       .           .                  |
 44:     |  *     .           .                  |
 45:     |    *   .           .                  |
 46:     |      * .           .                  |
 47:     |. . . . o           o                  |
 48:     |          o         o                  |
 49:     |            o       o                  |
 50:     |              o     o                  |
 51:     |                o   o                  |
 52:     |                  o o                  |
 53:     |. . . . o o o o o o o x                |
 54:     |                    x x x              |
 55:     |                      x x x            |
 56:     |                        x x x          |
 57:     |                          x x x        |
 58:     |                            x x x      |
 59:     |                              x x x    |
 60:     |                                x x x  |
 61:     |                                  x x x|
 62:     |                                    x x|
 63:     -----------------------------------------
 64: */

 68: static PetscErrorCode DSSwitchFormat_HEP(DS ds,PetscBool tocompact)
 69: {
 71:   PetscReal      *T = ds->rmat[DS_MAT_T];
 72:   PetscScalar    *A = ds->mat[DS_MAT_A];
 73:   PetscInt       i,n=ds->n,k=ds->k,ld=ds->ld;

 76:   if (tocompact) { /* switch from dense (arrow) to compact storage */
 77:     PetscMemzero(T,3*ld*sizeof(PetscReal));
 78:     for (i=0;i<k;i++) {
 79:       T[i] = PetscRealPart(A[i+i*ld]);
 80:       T[i+ld] = PetscRealPart(A[k+i*ld]);
 81:     }
 82:     for (i=k;i<n-1;i++) {
 83:       T[i] = PetscRealPart(A[i+i*ld]);
 84:       T[i+ld] = PetscRealPart(A[i+1+i*ld]);
 85:     }
 86:     T[n-1] = PetscRealPart(A[n-1+(n-1)*ld]);
 87:     if (ds->extrarow) T[n-1+ld] = PetscRealPart(A[n+(n-1)*ld]);
 88:   } else { /* switch from compact (arrow) to dense storage */
 89:     PetscMemzero(A,ld*ld*sizeof(PetscScalar));
 90:     for (i=0;i<k;i++) {
 91:       A[i+i*ld] = T[i];
 92:       A[k+i*ld] = T[i+ld];
 93:       A[i+k*ld] = T[i+ld];
 94:     }
 95:     A[k+k*ld] = T[k];
 96:     for (i=k+1;i<n;i++) {
 97:       A[i+i*ld] = T[i];
 98:       A[i-1+i*ld] = T[i-1+ld];
 99:       A[i+(i-1)*ld] = T[i-1+ld];
100:     }
101:     if (ds->extrarow) A[n+(n-1)*ld] = T[n-1+ld];
102:   }
103:   return(0);
104: }

108: PetscErrorCode DSView_HEP(DS ds,PetscViewer viewer)
109: {
110:   PetscErrorCode    ierr;
111:   PetscViewerFormat format;
112:   PetscInt          i,j,r,c,rows;
113:   PetscReal         value;
114:   const char        *methodname[] = {
115:                      "Implicit QR method (_steqr)",
116:                      "Relatively Robust Representations (_stevr)",
117:                      "Divide and Conquer method (_stedc)",
118:                      "Block Divide and Conquer method (dsbtdc)"
119:   };
120:   const int         nmeth=sizeof(methodname)/sizeof(methodname[0]);

123:   PetscViewerGetFormat(viewer,&format);
124:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
125:     if (ds->bs>1) {
126:       PetscViewerASCIIPrintf(viewer,"block size: %D\n",ds->bs);
127:     }
128:     if (ds->method>=nmeth) {
129:       PetscViewerASCIIPrintf(viewer,"solving the problem with: INVALID METHOD\n");
130:     } else {
131:       PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
132:     }
133:     return(0);
134:   }
135:   if (ds->compact) {
136:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
137:     rows = ds->extrarow? ds->n+1: ds->n;
138:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
139:       PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",rows,ds->n);
140:       PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",3*ds->n);
141:       PetscViewerASCIIPrintf(viewer,"zzz = [\n");
142:       for (i=0;i<ds->n;i++) {
143:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,i+1,*(ds->rmat[DS_MAT_T]+i));
144:       }
145:       for (i=0;i<rows-1;i++) {
146:         r = PetscMax(i+2,ds->k+1);
147:         c = i+1;
148:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",r,c,*(ds->rmat[DS_MAT_T]+ds->ld+i));
149:         if (i<ds->n-1 && ds->k<ds->n) { /* do not print vertical arrow when k=n */
150:           PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",c,r,*(ds->rmat[DS_MAT_T]+ds->ld+i));
151:         }
152:       }
153:       PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
154:     } else {
155:       for (i=0;i<rows;i++) {
156:         for (j=0;j<ds->n;j++) {
157:           if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
158:           else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
159:           else if (i==j+1 && i>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+i-1);
160:           else if (i+1==j && j>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+j-1);
161:           else value = 0.0;
162:           PetscViewerASCIIPrintf(viewer," %18.16e ",value);
163:         }
164:         PetscViewerASCIIPrintf(viewer,"\n");
165:       }
166:     }
167:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
168:     PetscViewerFlush(viewer);
169:   } else {
170:     DSViewMat(ds,viewer,DS_MAT_A);
171:   }
172:   if (ds->state>DS_STATE_INTERMEDIATE) {
173:     DSViewMat(ds,viewer,DS_MAT_Q);
174:   }
175:   return(0);
176: }

180: PetscErrorCode DSVectors_HEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
181: {
182:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
183:   PetscInt       ld = ds->ld,i;

187:   switch (mat) {
188:     case DS_MAT_X:
189:     case DS_MAT_Y:
190:       if (j) {
191:         if (ds->state>=DS_STATE_CONDENSED) {
192:           PetscMemcpy(ds->mat[mat]+(*j)*ld,Q+(*j)*ld,ld*sizeof(PetscScalar));
193:         } else {
194:           PetscMemzero(ds->mat[mat]+(*j)*ld,ld*sizeof(PetscScalar));
195:           *(ds->mat[mat]+(*j)+(*j)*ld) = 1.0;
196:         }
197:       } else {
198:         if (ds->state>=DS_STATE_CONDENSED) {
199:           PetscMemcpy(ds->mat[mat],Q,ld*ld*sizeof(PetscScalar));
200:         } else {
201:           PetscMemzero(ds->mat[mat],ld*ld*sizeof(PetscScalar));
202:           for (i=0;i<ds->n;i++) *(ds->mat[mat]+i+i*ld) = 1.0;
203:         }
204:       }
205:       if (rnorm && j) *rnorm = PetscAbsScalar(Q[ds->n-1+(*j)*ld]);
206:       break;
207:     case DS_MAT_U:
208:     case DS_MAT_VT:
209:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
210:       break;
211:     default:
212:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
213:   }
214:   return(0);
215: }

219: PetscErrorCode DSNormalize_HEP(DS ds,DSMatType mat,PetscInt col)
220: {
222:   switch (mat) {
223:     case DS_MAT_X:
224:     case DS_MAT_Y:
225:     case DS_MAT_Q:
226:       /* All the matrices resulting from DSVectors and DSSolve are already normalized */
227:       break;
228:     case DS_MAT_U:
229:     case DS_MAT_VT:
230:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
231:       break;
232:     default:
233:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
234:   }
235:   return(0);
236: }

240: /*
241:   ARROWTRIDIAG reduces a symmetric arrowhead matrix of the form

243:                 [ d 0 0 0 e ]
244:                 [ 0 d 0 0 e ]
245:             A = [ 0 0 d 0 e ]
246:                 [ 0 0 0 d e ]
247:                 [ e e e e d ]

249:   to tridiagonal form

251:                 [ d e 0 0 0 ]
252:                 [ e d e 0 0 ]
253:    T = Q'*A*Q = [ 0 e d e 0 ],
254:                 [ 0 0 e d e ]
255:                 [ 0 0 0 e d ]

257:   where Q is an orthogonal matrix. Rutishauser's algorithm is used to
258:   perform the reduction, which requires O(n**2) flops. The accumulation
259:   of the orthogonal factor Q, however, requires O(n**3) flops.

261:   Arguments
262:   =========

264:   N       (input) INTEGER
265:           The order of the matrix A.  N >= 0.

267:   D       (input/output) DOUBLE PRECISION array, dimension (N)
268:           On entry, the diagonal entries of the matrix A to be
269:           reduced.
270:           On exit, the diagonal entries of the reduced matrix T.

272:   E       (input/output) DOUBLE PRECISION array, dimension (N-1)
273:           On entry, the off-diagonal entries of the matrix A to be
274:           reduced.
275:           On exit, the subdiagonal entries of the reduced matrix T.

277:   Q       (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
278:           On exit, the orthogonal matrix Q.

280:   LDQ     (input) INTEGER
281:           The leading dimension of the array Q.

283:   Note
284:   ====
285:   Based on Fortran code contributed by Daniel Kressner
286: */
287: static PetscErrorCode ArrowTridiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ld)
288: {
289: #if defined(SLEPC_MISSING_LAPACK_LARTG)
291:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARTG - Lapack routine is unavailable");
292: #else
293:   PetscBLASInt i,j,j2,one=1;
294:   PetscReal    c,s,p,off,temp;

297:   if (n<=2) return(0);

299:   for (j=0;j<n-2;j++) {

301:     /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
302:     temp = e[j+1];
303:     PetscStackCallBLAS("LAPACKlartg",LAPACKlartg_(&temp,&e[j],&c,&s,&e[j+1]));
304:     s = -s;

306:     /* Apply rotation to diagonal elements */
307:     temp   = d[j+1];
308:     e[j]   = c*s*(temp-d[j]);
309:     d[j+1] = s*s*d[j] + c*c*temp;
310:     d[j]   = c*c*d[j] + s*s*temp;

312:     /* Apply rotation to Q */
313:     j2 = j+2;
314:     PetscStackCallBLAS("BLASrot",BLASrot_(&j2,Q+j*ld,&one,Q+(j+1)*ld,&one,&c,&s));

316:     /* Chase newly introduced off-diagonal entry to the top left corner */
317:     for (i=j-1;i>=0;i--) {
318:       off  = -s*e[i];
319:       e[i] = c*e[i];
320:       temp = e[i+1];
321:       PetscStackCallBLAS("LAPACKlartg",LAPACKlartg_(&temp,&off,&c,&s,&e[i+1]));
322:       s = -s;
323:       temp = (d[i]-d[i+1])*s - 2.0*c*e[i];
324:       p = s*temp;
325:       d[i+1] += p;
326:       d[i] -= p;
327:       e[i] = -e[i] - c*temp;
328:       j2 = j+2;
329:       PetscStackCallBLAS("BLASrot",BLASrot_(&j2,Q+i*ld,&one,Q+(i+1)*ld,&one,&c,&s));
330:     }
331:   }
332:   return(0);
333: #endif
334: }

338: /*
339:    Reduce to tridiagonal form by means of ArrowTridiag.
340: */
341: static PetscErrorCode DSIntermediate_HEP(DS ds)
342: {
343: #if defined(SLEPC_MISSING_LAPACK_SYTRD) || defined(SLEPC_MISSING_LAPACK_ORGTR)
345:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYTRD/ORGTR - Lapack routine is unavailable");
346: #else
348:   PetscInt       i;
349:   PetscBLASInt   n1,n2,n3,lwork,info,l,n,ld,off;
350:   PetscScalar    *A,*Q,*work,*tau;
351:   PetscReal      *d,*e;

354:   PetscBLASIntCast(ds->n,&n);
355:   PetscBLASIntCast(ds->l,&l);
356:   PetscBLASIntCast(ds->ld,&ld);
357:   PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
358:   PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
359:   n3 = n1+n2;
360:   off = l+l*ld;
361:   A  = ds->mat[DS_MAT_A];
362:   Q  = ds->mat[DS_MAT_Q];
363:   d  = ds->rmat[DS_MAT_T];
364:   e  = ds->rmat[DS_MAT_T]+ld;
365:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
366:   for (i=0;i<n;i++) Q[i+i*ld] = 1.0;

368:   if (ds->compact) {

370:     if (ds->state<DS_STATE_INTERMEDIATE) ArrowTridiag(n1,d+l,e+l,Q+off,ld);

372:   } else {

374:     for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }

376:     if (ds->state<DS_STATE_INTERMEDIATE) {
377:       DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
378:       DSAllocateWork_Private(ds,ld+ld*ld,0,0);
379:       tau  = ds->work;
380:       work = ds->work+ld;
381:       lwork = ld*ld;
382:       PetscStackCallBLAS("LAPACKsytrd",LAPACKsytrd_("L",&n3,Q+off,&ld,d+l,e+l,tau,work,&lwork,&info));
383:       if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xSYTRD %d",info);
384:       PetscStackCallBLAS("LAPACKorgtr",LAPACKorgtr_("L",&n3,Q+off,&ld,tau,work,&lwork,&info));
385:       if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGTR %d",info);
386:     } else {
387:       /* copy tridiagonal to d,e */
388:       for (i=l;i<n;i++) d[i] = PetscRealPart(A[i+i*ld]);
389:       for (i=l;i<n-1;i++) e[i] = PetscRealPart(A[(i+1)+i*ld]);
390:     }
391:   }
392:   return(0);
393: #endif
394: }

398: PetscErrorCode DSSort_HEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
399: {
401:   PetscInt       n,l,i,*perm,ld=ds->ld;
402:   PetscScalar    *A;
403:   PetscReal      *d;

406:   if (!ds->sc) return(0);
407:   n = ds->n;
408:   l = ds->l;
409:   A = ds->mat[DS_MAT_A];
410:   d = ds->rmat[DS_MAT_T];
411:   perm = ds->perm;
412:   if (!rr) {
413:     DSSortEigenvaluesReal_Private(ds,d,perm);
414:   } else {
415:     DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE);
416:   }
417:   for (i=l;i<n;i++) wr[i] = d[perm[i]];
418:   DSPermuteColumns_Private(ds,l,n,DS_MAT_Q,perm);
419:   for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
420:   if (!ds->compact) {
421:     for (i=l;i<n;i++) A[i+i*ld] = wr[i];
422:   }
423:   return(0);
424: }

428: PetscErrorCode DSUpdateExtraRow_HEP(DS ds)
429: {
431:   PetscInt       i;
432:   PetscBLASInt   n,ld,incx=1;
433:   PetscScalar    *A,*Q,*x,*y,one=1.0,zero=0.0;
434:   PetscReal      *e,beta;

437:   PetscBLASIntCast(ds->n,&n);
438:   PetscBLASIntCast(ds->ld,&ld);
439:   A  = ds->mat[DS_MAT_A];
440:   Q  = ds->mat[DS_MAT_Q];
441:   e  = ds->rmat[DS_MAT_T]+ld;

443:   if (ds->compact) {
444:     beta = e[n-1];   /* in compact, we assume all entries are zero except the last one */
445:     for (i=0;i<n;i++) e[i] = PetscRealPart(beta*Q[n-1+i*ld]);
446:     ds->k = n;
447:   } else {
448:     DSAllocateWork_Private(ds,2*ld,0,0);
449:     x = ds->work;
450:     y = ds->work+ld;
451:     for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
452:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
453:     for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
454:     ds->k = n;
455:   }
456:   return(0);
457: }

461: PetscErrorCode DSSolve_HEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
462: {
463: #if defined(PETSC_MISSING_LAPACK_STEQR)
465:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEQR - Lapack routine is unavailable");
466: #else
468:   PetscInt       i;
469:   PetscBLASInt   n1,n2,n3,info,l,n,ld,off;
470:   PetscScalar    *Q,*A;
471:   PetscReal      *d,*e;

474:   if (ds->bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This method is not prepared for bs>1");
475:   PetscBLASIntCast(ds->n,&n);
476:   PetscBLASIntCast(ds->l,&l);
477:   PetscBLASIntCast(ds->ld,&ld);
478:   PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
479:   PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
480:   n3 = n1+n2;
481:   off = l+l*ld;
482:   Q  = ds->mat[DS_MAT_Q];
483:   A  = ds->mat[DS_MAT_A];
484:   d  = ds->rmat[DS_MAT_T];
485:   e  = ds->rmat[DS_MAT_T]+ld;

487:   /* Reduce to tridiagonal form */
488:   DSIntermediate_HEP(ds);

490:   /* Solve the tridiagonal eigenproblem */
491:   for (i=0;i<l;i++) wr[i] = d[i];

493:   DSAllocateWork_Private(ds,0,2*ld,0);
494:   PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("V",&n3,d+l,e+l,Q+off,&ld,ds->rwork,&info));
495:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xSTEQR %d",info);
496:   for (i=l;i<n;i++) wr[i] = d[i];

498:   /* Create diagonal matrix as a result */
499:   if (ds->compact) {
500:     PetscMemzero(e,(n-1)*sizeof(PetscReal));
501:   } else {
502:     for (i=l;i<n;i++) {
503:       PetscMemzero(A+l+i*ld,(n-l)*sizeof(PetscScalar));
504:     }
505:     for (i=l;i<n;i++) A[i+i*ld] = d[i];
506:   }

508:   /* Set zero wi */
509:   if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
510:   return(0);
511: #endif
512: }

516: PetscErrorCode DSSolve_HEP_MRRR(DS ds,PetscScalar *wr,PetscScalar *wi)
517: {
518: #if defined(SLEPC_MISSING_LAPACK_STEVR)
520:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEVR - Lapack routine is unavailable");
521: #else
523:   PetscInt       i;
524:   PetscBLASInt   n1,n2,n3,lwork,liwork,info,l,n,m,ld,off,il,iu,*isuppz;
525:   PetscScalar    *A,*Q,*W=NULL,one=1.0,zero=0.0;
526:   PetscReal      *d,*e,abstol=0.0,vl,vu;
527: #if defined(PETSC_USE_COMPLEX)
528:   PetscInt       j;
529:   PetscReal      *ritz;
530: #endif

533:   if (ds->bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This method is not prepared for bs>1");
534:   PetscBLASIntCast(ds->n,&n);
535:   PetscBLASIntCast(ds->l,&l);
536:   PetscBLASIntCast(ds->ld,&ld);
537:   PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
538:   PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
539:   n3 = n1+n2;
540:   off = l+l*ld;
541:   A  = ds->mat[DS_MAT_A];
542:   Q  = ds->mat[DS_MAT_Q];
543:   d  = ds->rmat[DS_MAT_T];
544:   e  = ds->rmat[DS_MAT_T]+ld;

546:   /* Reduce to tridiagonal form */
547:   DSIntermediate_HEP(ds);

549:   /* Solve the tridiagonal eigenproblem */
550:   for (i=0;i<l;i++) wr[i] = d[i];

552:   if (ds->state<DS_STATE_INTERMEDIATE) {  /* Q contains useful info */
553:     DSAllocateMat_Private(ds,DS_MAT_W);
554:     DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
555:     W = ds->mat[DS_MAT_W];
556:   }
557: #if defined(PETSC_USE_COMPLEX)
558:   DSAllocateMatReal_Private(ds,DS_MAT_Q);
559: #endif
560:   lwork = 20*ld;
561:   liwork = 10*ld;
562:   DSAllocateWork_Private(ds,0,lwork+ld,liwork+2*ld);
563:   isuppz = ds->iwork+liwork;
564: #if defined(PETSC_USE_COMPLEX)
565:   ritz = ds->rwork+lwork;
566:   PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,ritz+l,ds->rmat[DS_MAT_Q]+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
567:   for (i=l;i<n;i++) wr[i] = ritz[i];
568: #else
569:   PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,wr+l,Q+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
570: #endif
571:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack DSTEVR %d",info);
572: #if defined(PETSC_USE_COMPLEX)
573:   for (i=l;i<n;i++)
574:     for (j=l;j<n;j++)
575:       Q[i+j*ld] = (ds->rmat[DS_MAT_Q])[i+j*ld];
576: #endif
577:   if (ds->state<DS_STATE_INTERMEDIATE) {  /* accumulate previous Q */
578:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n3,&n3,&n3,&one,W+off,&ld,Q+off,&ld,&zero,A+off,&ld));
579:     DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
580:   }
581:   for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);

583:   /* Create diagonal matrix as a result */
584:   if (ds->compact) {
585:     PetscMemzero(e,(n-1)*sizeof(PetscReal));
586:   } else {
587:     for (i=l;i<n;i++) {
588:       PetscMemzero(A+l+i*ld,(n-l)*sizeof(PetscScalar));
589:     }
590:     for (i=l;i<n;i++) A[i+i*ld] = d[i];
591:   }

593:   /* Set zero wi */
594:   if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
595:   return(0);
596: #endif
597: }

601: PetscErrorCode DSSolve_HEP_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
602: {
603: #if defined(SLEPC_MISSING_LAPACK_STEDC)
605:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEDC - Lapack routine is unavailable");
606: #else
608:   PetscInt       i;
609:   PetscBLASInt   n1,info,l,ld,off,lrwork,liwork;
610:   PetscScalar    *Q,*A;
611:   PetscReal      *d,*e;
612: #if defined(PETSC_USE_COMPLEX)
613:   PetscBLASInt   lwork;
614:   PetscInt       j;
615: #endif

618:   if (ds->bs>1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This method is not prepared for bs>1");
619:   PetscBLASIntCast(ds->l,&l);
620:   PetscBLASIntCast(ds->ld,&ld);
621:   PetscBLASIntCast(ds->n-ds->l,&n1);
622:   off = l+l*ld;
623:   Q  = ds->mat[DS_MAT_Q];
624:   A  = ds->mat[DS_MAT_A];
625:   d  = ds->rmat[DS_MAT_T];
626:   e  = ds->rmat[DS_MAT_T]+ld;

628:   /* Reduce to tridiagonal form */
629:   DSIntermediate_HEP(ds);

631:   /* Solve the tridiagonal eigenproblem */
632:   for (i=0;i<l;i++) wr[i] = d[i];

634:   lrwork = 5*n1*n1+3*n1+1;
635:   liwork = 5*n1*n1+6*n1+6;
636: #if !defined(PETSC_USE_COMPLEX)
637:   DSAllocateWork_Private(ds,0,lrwork,liwork);
638:   PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
639: #else
640:   lwork = ld*ld;
641:   DSAllocateWork_Private(ds,lwork,lrwork,liwork);
642:   PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->work,&lwork,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
643:   /* Fixing Lapack bug*/
644:   for (j=ds->l;j<ds->n;j++)
645:     for (i=0;i<ds->l;i++) Q[i+j*ld] = 0.0;
646: #endif
647:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xSTEDC %d",info);
648:   for (i=l;i<ds->n;i++) wr[i] = d[i];

650:   /* Create diagonal matrix as a result */
651:   if (ds->compact) {
652:     PetscMemzero(e,(ds->n-1)*sizeof(PetscReal));
653:   } else {
654:     for (i=l;i<ds->n;i++) {
655:       PetscMemzero(A+l+i*ld,(ds->n-l)*sizeof(PetscScalar));
656:     }
657:     for (i=l;i<ds->n;i++) A[i+i*ld] = d[i];
658:   }

660:   /* Set zero wi */
661:   if (wi) for (i=l;i<ds->n;i++) wi[i] = 0.0;
662:   return(0);
663: #endif
664: }

666: #if !defined(PETSC_USE_COMPLEX)
669: PetscErrorCode DSSolve_HEP_BDC(DS ds,PetscScalar *wr,PetscScalar *wi)
670: {
672:   PetscBLASInt   i,j,k,m,n,info,nblks,bs,ld,lde,lrwork,liwork,*ksizes,*iwork,mingapi;
673:   PetscScalar    *Q,*A;
674:   PetscReal      *D,*E,*d,*e,tol=PETSC_MACHINE_EPSILON/2,tau1=1e-16,tau2=1e-18,*rwork,mingap;

677:   if (ds->l>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This method is not prepared for l>1");
678:   if (ds->compact) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for compact storage");
679:   PetscBLASIntCast(ds->ld,&ld);
680:   PetscBLASIntCast(ds->bs,&bs);
681:   PetscBLASIntCast(ds->n,&n);
682:   nblks = n/bs;
683:   Q  = ds->mat[DS_MAT_Q];
684:   A  = ds->mat[DS_MAT_A];
685:   d  = ds->rmat[DS_MAT_T];
686:   e  = ds->rmat[DS_MAT_T]+ld;
687:   lrwork = 4*n*n+60*n+1;
688:   liwork = 5*n+5*nblks-1;
689:   lde = 2*bs+1;
690:   DSAllocateWork_Private(ds,bs*n+lde*lde*(nblks-1),lrwork,nblks+liwork);
691:   D      = ds->work;
692:   E      = ds->work+bs*n;
693:   rwork  = ds->rwork;
694:   ksizes = ds->iwork;
695:   iwork  = ds->iwork+nblks;
696:   PetscMemzero(iwork,liwork*sizeof(PetscBLASInt));

698:   /* Copy matrix to block tridiagonal format */
699:   j=0;
700:   for (i=0;i<nblks;i++) {
701:     ksizes[i]=bs;
702:     for (k=0;k<bs;k++)
703:       for (m=0;m<bs;m++)
704:         D[k+m*bs+i*bs*bs] = PetscRealPart(A[j+k+(j+m)*n]);
705:     j = j + bs;
706:   }
707:   j=0;
708:   for (i=0;i<nblks-1;i++) {
709:     for (k=0;k<bs;k++)
710:       for (m=0;m<bs;m++)
711:         E[k+m*lde+i*lde*lde] = PetscRealPart(A[j+bs+k+(j+m)*n]);
712:     j = j + bs;
713:   }

715:   /* Solve the block tridiagonal eigenproblem */
716:   BDC_dsbtdc_("D","A",n,nblks,ksizes,D,bs,bs,E,lde,lde,tol,tau1,tau2,d,
717:            Q,n,rwork,lrwork,iwork,liwork,&mingap,&mingapi,&info,1,1);
718:   for (i=0;i<ds->n;i++) wr[i] = d[i];

720:   /* Create diagonal matrix as a result */
721:   if (ds->compact) {
722:     PetscMemzero(e,(ds->n-1)*sizeof(PetscReal));
723:   } else {
724:     for (i=0;i<ds->n;i++) {
725:       PetscMemzero(A+i*ld,ds->n*sizeof(PetscScalar));
726:     }
727:     for (i=0;i<ds->n;i++) A[i+i*ld] = wr[i];
728:   }

730:   /* Set zero wi */
731:   if (wi) for (i=0;i<ds->n;i++) wi[i] = 0.0;
732:   return(0);
733: }
734: #endif

738: PetscErrorCode DSTruncate_HEP(DS ds,PetscInt n)
739: {
740:   PetscInt       i,ld=ds->ld,l=ds->l;
741:   PetscScalar    *A;

744:   if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
745:   A = ds->mat[DS_MAT_A];
746:   if (!ds->compact && ds->extrarow && ds->k==ds->n) {
747:     for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
748:   }
749:   if (ds->extrarow) ds->k = n;
750:   else ds->k = 0;
751:   ds->n = n;
752:   return(0);
753: }

757: PetscErrorCode DSCond_HEP(DS ds,PetscReal *cond)
758: {
759: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE)
761:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE - Lapack routines are unavailable");
762: #else
764:   PetscScalar    *work;
765:   PetscReal      *rwork;
766:   PetscBLASInt   *ipiv;
767:   PetscBLASInt   lwork,info,n,ld;
768:   PetscReal      hn,hin;
769:   PetscScalar    *A;

772:   PetscBLASIntCast(ds->n,&n);
773:   PetscBLASIntCast(ds->ld,&ld);
774:   lwork = 8*ld;
775:   DSAllocateWork_Private(ds,lwork,ld,ld);
776:   work  = ds->work;
777:   rwork = ds->rwork;
778:   ipiv  = ds->iwork;
779:   DSSwitchFormat_HEP(ds,PETSC_FALSE);

781:   /* use workspace matrix W to avoid overwriting A */
782:   DSAllocateMat_Private(ds,DS_MAT_W);
783:   A = ds->mat[DS_MAT_W];
784:   PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);

786:   /* norm of A */
787:   hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);

789:   /* norm of inv(A) */
790:   PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
791:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRF %d",info);
792:   PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
793:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRI %d",info);
794:   hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);

796:   *cond = hn*hin;
797:   return(0);
798: #endif
799: }

803: PetscErrorCode DSTranslateRKS_HEP(DS ds,PetscScalar alpha)
804: {
805: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
807:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routines are unavailable");
808: #else
810:   PetscInt       i,j,k=ds->k;
811:   PetscScalar    *Q,*A,*R,*tau,*work;
812:   PetscBLASInt   ld,n1,n0,lwork,info;

815:   PetscBLASIntCast(ds->ld,&ld);
816:   DSAllocateWork_Private(ds,ld*ld,0,0);
817:   tau = ds->work;
818:   work = ds->work+ld;
819:   PetscBLASIntCast(ld*(ld-1),&lwork);
820:   DSAllocateMat_Private(ds,DS_MAT_W);
821:   A  = ds->mat[DS_MAT_A];
822:   Q  = ds->mat[DS_MAT_Q];
823:   R  = ds->mat[DS_MAT_W];
824:   /* Copy I+alpha*A */
825:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
826:   PetscMemzero(R,ld*ld*sizeof(PetscScalar));
827:   for (i=0;i<k;i++) {
828:     Q[i+i*ld] = 1.0 + alpha*A[i+i*ld];
829:     Q[k+i*ld] = alpha*A[k+i*ld];
830:   }
831:   /* Compute qr */
832:   PetscBLASIntCast(k+1,&n1);
833:   PetscBLASIntCast(k,&n0);
834:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&n1,&n0,Q,&ld,tau,work,&lwork,&info));
835:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
836:   /* Copy R from Q */
837:   for (j=0;j<k;j++)
838:     for (i=0;i<=j;i++)
839:       R[i+j*ld] = Q[i+j*ld];
840:   /* Compute orthogonal matrix in Q */
841:   PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&n1,&n1,&n0,Q,&ld,tau,work,&lwork,&info));
842:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGQR %d",info);
843:   /* Compute the updated matrix of projected problem */
844:   for (j=0;j<k;j++)
845:     for (i=0;i<k+1;i++)
846:       A[j*ld+i] = Q[i*ld+j];
847:   alpha = -1.0/alpha;
848:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n1,&n0,&alpha,R,&ld,A,&ld));
849:   for (i=0;i<k;i++)
850:     A[ld*i+i]-=alpha;
851:   return(0);
852: #endif
853: }

857: PETSC_EXTERN PetscErrorCode DSCreate_HEP(DS ds)
858: {
860:   ds->ops->allocate      = DSAllocate_HEP;
861:   ds->ops->view          = DSView_HEP;
862:   ds->ops->vectors       = DSVectors_HEP;
863:   ds->ops->solve[0]      = DSSolve_HEP_QR;
864:   ds->ops->solve[1]      = DSSolve_HEP_MRRR;
865:   ds->ops->solve[2]      = DSSolve_HEP_DC;
866: #if !defined(PETSC_USE_COMPLEX)
867:   ds->ops->solve[3]      = DSSolve_HEP_BDC;
868: #endif
869:   ds->ops->sort          = DSSort_HEP;
870:   ds->ops->truncate      = DSTruncate_HEP;
871:   ds->ops->update        = DSUpdateExtraRow_HEP;
872:   ds->ops->cond          = DSCond_HEP;
873:   ds->ops->transrks      = DSTranslateRKS_HEP;
874:   ds->ops->normalize     = DSNormalize_HEP;
875:   return(0);
876: }