Actual source code: fnutil.c
slepc-3.7.4 2017-05-17
1: /*
2: Utility subroutines common to several impls
4: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5: SLEPc - Scalable Library for Eigenvalue Problem Computations
6: Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain
8: This file is part of SLEPc.
10: SLEPc is free software: you can redistribute it and/or modify it under the
11: terms of version 3 of the GNU Lesser General Public License as published by
12: the Free Software Foundation.
14: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
15: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
17: more details.
19: You should have received a copy of the GNU Lesser General Public License
20: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
21: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
22: */
24: #include <slepc/private/fnimpl.h> /*I "slepcfn.h" I*/
25: #include <slepcblaslapack.h>
29: /*
30: Compute the square root of an upper quasi-triangular matrix T,
31: using Higham's algorithm (LAA 88, 1987). T is overwritten with sqrtm(T).
32: */
33: PetscErrorCode SlepcMatDenseSqrt(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld)
34: {
35: #if defined(SLEPC_MISSING_LAPACK_TRSYL)
37: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRSYL - Lapack routine unavailable");
38: #else
39: PetscScalar one=1.0,mone=-1.0;
40: PetscReal done=1.0;
41: PetscBLASInt i,j,si,sj,r,ione=1,info;
42: #if !defined(PETSC_USE_COMPLEX)
43: PetscReal alpha,theta,mu,mu2;
44: #endif
47: for (j=0;j<n;j++) {
48: #if defined(PETSC_USE_COMPLEX)
49: sj = 1;
50: T[j+j*ld] = PetscSqrtScalar(T[j+j*ld]);
51: #else
52: sj = (j==n-1 || T[j+1+j*ld] == 0.0)? 1: 2;
53: if (sj==1) {
54: if (T[j+j*ld]<0.0) SETERRQ(PETSC_COMM_SELF,1,"Matrix has a real negative eigenvalue, no real primary square root exists");
55: T[j+j*ld] = PetscSqrtReal(T[j+j*ld]);
56: } else {
57: /* square root of 2x2 block */
58: theta = (T[j+j*ld]+T[j+1+(j+1)*ld])/2.0;
59: mu = (T[j+j*ld]-T[j+1+(j+1)*ld])/2.0;
60: mu2 = -mu*mu-T[j+1+j*ld]*T[j+(j+1)*ld];
61: mu = PetscSqrtReal(mu2);
62: if (theta>0.0) alpha = PetscSqrtReal((theta+PetscSqrtReal(theta*theta+mu2))/2.0);
63: else alpha = mu/PetscSqrtReal(2.0*(-theta+PetscSqrtReal(theta*theta+mu2)));
64: T[j+j*ld] /= 2.0*alpha;
65: T[j+1+(j+1)*ld] /= 2.0*alpha;
66: T[j+(j+1)*ld] /= 2.0*alpha;
67: T[j+1+j*ld] /= 2.0*alpha;
68: T[j+j*ld] += alpha-theta/(2.0*alpha);
69: T[j+1+(j+1)*ld] += alpha-theta/(2.0*alpha);
70: }
71: #endif
72: for (i=j-1;i>=0;i--) {
73: #if defined(PETSC_USE_COMPLEX)
74: si = 1;
75: #else
76: si = (i==0 || T[i+(i-1)*ld] == 0.0)? 1: 2;
77: if (si==2) i--;
78: #endif
79: /* solve Sylvester equation of order si x sj */
80: r = j-i-si;
81: if (r) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&si,&sj,&r,&mone,T+i+(i+si)*ld,&ld,T+i+si+j*ld,&ld,&one,T+i+j*ld,&ld));
82: PetscStackCallBLAS("LAPACKtrsyl",LAPACKtrsyl_("N","N",&ione,&si,&sj,T+i+i*ld,&ld,T+j+j*ld,&ld,T+i+j*ld,&ld,&done,&info));
83: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTRSYL %d",info);
84: }
85: if (sj==2) j++;
86: }
87: return(0);
88: #endif
89: }
91: #define BLOCKSIZE 64
95: /*
96: Simplified Schur-Parlett algorithm on an upper quasi-triangular matrix T,
97: particularized for the square root function. T is overwritten with sqrtm(T).
98: If firstonly then only the first column of T will contain relevant values.
99: */
100: PetscErrorCode SlepcSchurParlettSqrt(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscBool firstonly)
101: {
102: #if defined(SLEPC_MISSING_LAPACK_GEES) || defined(SLEPC_MISSING_LAPACK_TRSYL)
104: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEES/TRSYL - Lapack routines are unavailable");
105: #else
107: PetscBLASInt i,j,k,r,ione=1,sdim,lwork,*s,*p,info,bs=BLOCKSIZE;
108: PetscScalar *wr,*W,*Q,*work,one=1.0,zero=0.0,mone=-1.0;
109: PetscInt m,nblk;
110: PetscReal done=1.0;
111: #if defined(PETSC_USE_COMPLEX)
112: PetscReal *rwork;
113: #else
114: PetscReal *wi;
115: #endif
118: m = n;
119: nblk = (m+bs-1)/bs;
120: lwork = 5*n;
121: k = firstonly? 1: n;
123: /* compute Schur decomposition A*Q = Q*T */
124: #if !defined(PETSC_USE_COMPLEX)
125: PetscMalloc7(m,&wr,m,&wi,m*k,&W,m*m,&Q,lwork,&work,nblk,&s,nblk,&p);
126: PetscStackCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,wi,Q,&ld,work,&lwork,NULL,&info));
127: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEES %d",info);
128: #else
129: PetscMalloc7(m,&wr,m,&rwork,m*k,&W,m*m,&Q,lwork,&work,nblk,&s,nblk,&p);
130: PetscStackCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,Q,&ld,work,&lwork,rwork,NULL,&info));
131: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEES %d",info);
132: #endif
134: /* determine block sizes and positions, to avoid cutting 2x2 blocks */
135: j = 0;
136: p[j] = 0;
137: do {
138: s[j] = PetscMin(bs,n-p[j]);
139: #if !defined(PETSC_USE_COMPLEX)
140: if (p[j]+s[j]!=n && T[p[j]+s[j]+(p[j]+s[j]-1)*ld]!=0.0) s[j]++;
141: #endif
142: if (p[j]+s[j]==n) break;
143: j++;
144: p[j] = p[j-1]+s[j-1];
145: } while (1);
146: nblk = j+1;
148: for (j=0;j<nblk;j++) {
149: /* evaluate f(T_jj) */
150: SlepcMatDenseSqrt(s[j],T+p[j]+p[j]*ld,ld);
151: for (i=j-1;i>=0;i--) {
152: /* solve Sylvester equation for block (i,j) */
153: r = p[j]-p[i]-s[i];
154: if (r) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",s+i,s+j,&r,&mone,T+p[i]+(p[i]+s[i])*ld,&ld,T+p[i]+s[i]+p[j]*ld,&ld,&one,T+p[i]+p[j]*ld,&ld));
155: PetscStackCallBLAS("LAPACKtrsyl",LAPACKtrsyl_("N","N",&ione,s+i,s+j,T+p[i]+p[i]*ld,&ld,T+p[j]+p[j]*ld,&ld,T+p[i]+p[j]*ld,&ld,&done,&info));
156: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTRSYL %d",info);
157: }
158: }
160: /* backtransform B = Q*T*Q' */
161: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&k,&n,&one,T,&ld,Q,&ld,&zero,W,&ld));
162: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&k,&n,&one,Q,&ld,W,&ld,&zero,T,&ld));
164: #if !defined(PETSC_USE_COMPLEX)
165: PetscFree7(wr,wi,W,Q,work,s,p);
166: #else
167: PetscFree7(wr,rwork,W,Q,work,s,p);
168: #endif
169: return(0);
170: #endif
171: }