Actual source code: fnexp.c
slepc-3.7.4 2017-05-17
1: /*
2: Exponential function exp(x)
4: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5: SLEPc - Scalable Library for Eigenvalue Problem Computations
6: Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain
8: This file is part of SLEPc.
10: SLEPc is free software: you can redistribute it and/or modify it under the
11: terms of version 3 of the GNU Lesser General Public License as published by
12: the Free Software Foundation.
14: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
15: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
17: more details.
19: You should have received a copy of the GNU Lesser General Public License
20: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
21: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
22: */
24: #include <slepc/private/fnimpl.h> /*I "slepcfn.h" I*/
25: #include <slepcblaslapack.h>
29: PetscErrorCode FNEvaluateFunction_Exp(FN fn,PetscScalar x,PetscScalar *y)
30: {
32: *y = PetscExpScalar(x);
33: return(0);
34: }
38: PetscErrorCode FNEvaluateDerivative_Exp(FN fn,PetscScalar x,PetscScalar *y)
39: {
41: *y = PetscExpScalar(x);
42: return(0);
43: }
45: #define MAX_PADE 6
46: #define SWAP(a,b,t) {t=a;a=b;b=t;}
50: PetscErrorCode FNEvaluateFunctionMat_Exp(FN fn,Mat A,Mat B)
51: {
52: #if defined(PETSC_MISSING_LAPACK_GESV) || defined(SLEPC_MISSING_LAPACK_LANGE)
54: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV/LANGE - Lapack routines are unavailable");
55: #else
57: PetscBLASInt n,ld,ld2,*ipiv,info,inc=1;
58: PetscInt m,j,k,sexp;
59: PetscBool odd;
60: const PetscInt p=MAX_PADE;
61: PetscReal c[MAX_PADE+1],s,*rwork;
62: PetscScalar scale,mone=-1.0,one=1.0,two=2.0,zero=0.0;
63: PetscScalar *Aa,*Ba,*As,*A2,*Q,*P,*W,*aux;
66: MatDenseGetArray(A,&Aa);
67: MatDenseGetArray(B,&Ba);
68: MatGetSize(A,&m,NULL);
69: PetscBLASIntCast(m,&n);
70: ld = n;
71: ld2 = ld*ld;
72: P = Ba;
73: PetscMalloc6(m*m,&Q,m*m,&W,m*m,&As,m*m,&A2,ld,&rwork,ld,&ipiv);
74: PetscMemcpy(As,Aa,ld2*sizeof(PetscScalar));
76: /* Pade' coefficients */
77: c[0] = 1.0;
78: for (k=1;k<=p;k++) c[k] = c[k-1]*(p+1-k)/(k*(2*p+1-k));
80: /* Scaling */
81: s = LAPACKlange_("I",&n,&n,As,&ld,rwork);
82: if (s>0.5) {
83: sexp = PetscMax(0,(int)(PetscLogReal(s)/PetscLogReal(2.0))+2);
84: scale = PetscPowRealInt(2.0,-sexp);
85: PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&scale,As,&inc));
86: } else sexp = 0;
88: /* Horner evaluation */
89: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,As,&ld,As,&ld,&zero,A2,&ld));
90: PetscMemzero(Q,ld2*sizeof(PetscScalar));
91: PetscMemzero(P,ld2*sizeof(PetscScalar));
92: for (j=0;j<n;j++) {
93: Q[j+j*ld] = c[p];
94: P[j+j*ld] = c[p-1];
95: }
97: odd = PETSC_TRUE;
98: for (k=p-1;k>0;k--) {
99: if (odd) {
100: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A2,&ld,&zero,W,&ld));
101: SWAP(Q,W,aux);
102: for (j=0;j<n;j++) Q[j+j*ld] += c[k-1];
103: odd = PETSC_FALSE;
104: } else {
105: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A2,&ld,&zero,W,&ld));
106: SWAP(P,W,aux);
107: for (j=0;j<n;j++) P[j+j*ld] += c[k-1];
108: odd = PETSC_TRUE;
109: }
110: }
111: if (odd) {
112: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,As,&ld,&zero,W,&ld));
113: SWAP(Q,W,aux);
114: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
115: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
116: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESV %d",info);
117: PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
118: for (j=0;j<n;j++) P[j+j*ld] += 1.0;
119: PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&mone,P,&inc));
120: } else {
121: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,As,&ld,&zero,W,&ld));
122: SWAP(P,W,aux);
123: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
124: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
125: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESV %d",info);
126: PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
127: for (j=0;j<n;j++) P[j+j*ld] += 1.0;
128: }
130: for (k=1;k<=sexp;k++) {
131: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,P,&ld,&zero,W,&ld));
132: PetscMemcpy(P,W,ld2*sizeof(PetscScalar));
133: }
134: if (P!=Ba) { PetscMemcpy(Ba,P,ld2*sizeof(PetscScalar)); }
136: PetscFree6(Q,W,As,A2,rwork,ipiv);
137: MatDenseRestoreArray(A,&Aa);
138: MatDenseRestoreArray(B,&Ba);
139: return(0);
140: #endif
141: }
145: PetscErrorCode FNView_Exp(FN fn,PetscViewer viewer)
146: {
148: PetscBool isascii;
149: char str[50];
152: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
153: if (isascii) {
154: if (fn->beta==(PetscScalar)1.0) {
155: if (fn->alpha==(PetscScalar)1.0) {
156: PetscViewerASCIIPrintf(viewer," Exponential: exp(x)\n");
157: } else {
158: SlepcSNPrintfScalar(str,50,fn->alpha,PETSC_TRUE);
159: PetscViewerASCIIPrintf(viewer," Exponential: exp(%s*x)\n",str);
160: }
161: } else {
162: SlepcSNPrintfScalar(str,50,fn->beta,PETSC_TRUE);
163: if (fn->alpha==(PetscScalar)1.0) {
164: PetscViewerASCIIPrintf(viewer," Exponential: %s*exp(x)\n",str);
165: } else {
166: PetscViewerASCIIPrintf(viewer," Exponential: %s",str);
167: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
168: SlepcSNPrintfScalar(str,50,fn->alpha,PETSC_TRUE);
169: PetscViewerASCIIPrintf(viewer,"*exp(%s*x)\n",str);
170: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
171: }
172: }
173: }
174: return(0);
175: }
179: PETSC_EXTERN PetscErrorCode FNCreate_Exp(FN fn)
180: {
182: fn->ops->evaluatefunction = FNEvaluateFunction_Exp;
183: fn->ops->evaluatederivative = FNEvaluateDerivative_Exp;
184: fn->ops->evaluatefunctionmat = FNEvaluateFunctionMat_Exp;
185: fn->ops->view = FNView_Exp;
186: return(0);
187: }