Actual source code: dot.h

  2: #ifndef DOT
 3:  #include petsc.h


  7: /* BGL kernels */
  8: #if defined(PETSC_USE_FORTRAN_KERNELS_BGL)
  9: #define fortranxtimesy          fortranxtimesy_bgl
 10: #define fortranmdot4            fortranmdot4_bgl
 11: #define fortranmdot3            fortranmdot3_bgl
 12: #define fortranmdot2            fortranmdot2_bgl
 13: #define fortranmdot1            fortranmdot1_bgl
 14: #define fortrannormsqr          fortrannormsqr_bgl
 15: #define fortransolvebaij4unroll fortransolvebaij4unroll_bgl
 16: #define fortransolvebaij4blas   fortransolvebaij4blas_bgl
 17: #define fortransolvebaij4       fortransolvebaij4_bgl


 20: #endif


 23: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 24: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 25: #define fortranmdot4_      FORTRANMDOT4
 26: #define fortranmdot3_      FORTRANMDOT3
 27: #define fortranmdot2_      FORTRANMDOT2
 28: #define fortranmdot1_      FORTRANMDOT1
 29: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 30: #define fortranmdot4_      fortranmdot4
 31: #define fortranmdot3_      fortranmdot3
 32: #define fortranmdot2_      fortranmdot2
 33: #define fortranmdot1_      fortranmdot1
 34: #endif
 35: EXTERN void fortranmdot4_(void*,void*,void*,void*,void*,PetscInt*,void*,void*,void*,void*);
 36: EXTERN void fortranmdot3_(void*,void*,void*,void*,PetscInt*,void*,void*,void*);
 37: EXTERN void fortranmdot2_(void*,void*,void*,PetscInt*,void*,void*);
 38: EXTERN void fortranmdot1_(void*,void*,PetscInt*,void*);
 39: #endif

 41: #if defined(PETSC_USE_FORTRAN_KERNEL_NORM)
 42: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 43: #define fortrannormsqr_    FORTRANNORMSQR
 44: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 45: #define fortrannormsqr_    fortrannormsqr
 46: #endif
 47: EXTERN void fortrannormsqr_(void*,PetscInt*,void*);
 48: #endif

 50: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
 51: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 52: #define fortranmultcrl_    FORTRANMULTCRL
 53: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 54: #define fortranmultcrl_    fortranmultcrl
 55: #endif
 56: EXTERN void fortranmultcrl_(PetscInt*,PetscInt*,PetscScalar*,PetscScalar*,PetscInt*,PetscScalar*);
 57: #endif

 59: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCSRPERM)
 60: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 61: #define fortranmultcsrperm_    FORTRANMULTCSRPERM
 62: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 63: #define fortranmultcsrperm_    fortranmultcsrperm
 64: #endif
 65: EXTERN void fortranmultcsrperm_(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscScalar*);
 66: #endif

 68: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJ)
 69: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 70: #define fortranmultaij_    FORTRANMULTAIJ
 71: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 72: #define fortranmultaij_    fortranmultaij
 73: #endif
 74: EXTERN void fortranmultaij_(PetscInt*,void*,PetscInt*,PetscInt*,void*,void*);
 75: #endif

 77: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTTRANSPOSEAIJ)
 78: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 79: #define fortranmulttransposeaddaij_    FORTRANMULTTRANSPOSEADDAIJ
 80: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 81: #define fortranmulttransposeaddaij_    fortranmulttransposeaddaij
 82: #endif
 83: EXTERN void fortranmulttransposeaddaij_(PetscInt*,void*,PetscInt*,PetscInt*,void*,void*);
 84: #endif

 86: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJ)
 87: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 88: #define fortranmultaddaij_ FORTRANMULTADDAIJ
 89: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 90: #define fortranmultaddaij_ fortranmultaddaij
 91: #endif
 92: EXTERN void fortranmultaddaij_(PetscInt*,void*,PetscInt*,PetscInt*,void*,void*,void*);
 93: #endif

 95: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
 96: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 97: #define fortransolveaij_   FORTRANSOLVEAIJ
 98: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 99: #define fortransolveaij_   fortransolveaij
100: #endif
101: EXTERN void fortransolveaij_(PetscInt*,void*,PetscInt*,PetscInt*,PetscInt*,void*,void*);
102: #endif

104: #if defined(PETSC_USE_FORTRAN_KERNEL_RELAXAIJ)
105: #if defined(PETSC_HAVE_FORTRAN_CAPS)
106: #define fortranrelaxaijforward_   FORTRANRELAXAIJFORWARD
107: #define fortranrelaxaijbackward_   FORTRANRELAXAIJBACKWARD
108: #define fortranrelaxaijforwardzero_   FORTRANRELAXAIJFORWARDZERO
109: #define fortranrelaxaijbackwardzero_   FORTRANRELAXAIJBACKWARDZERO
110: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
111: #define fortranrelaxaijforward_   fortranrelaxaijforward
112: #define fortranrelaxaijbackward_   fortranrelaxaijbackward
113: #define fortranrelaxaijforwardzero_   fortranrelaxaijforwardzero
114: #define fortranrelaxaijbackwardzero_   fortranrelaxaijbackwardzero
115: #endif
116: EXTERN void fortranrelaxaijforward_(PetscInt*,PetscReal*,void*,PetscInt*,PetscInt*,const PetscInt*,void*,void*);
117: EXTERN void fortranrelaxaijbackward_(PetscInt*,PetscReal*,void*,PetscInt*,PetscInt*,const PetscInt*,void*,void*);
118: EXTERN void fortranrelaxaijforwardzero_(PetscInt*,PetscReal*,void*,PetscInt*,PetscInt*,const PetscInt*,void*,void*,void*);
119: EXTERN void fortranrelaxaijbackwardzero_(PetscInt*,PetscReal*,void*,PetscInt*,PetscInt*,const PetscInt*,void*,void*,void*);
120: #endif

122: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ)
123: #if defined(PETSC_HAVE_FORTRAN_CAPS)
124: #define fortransolvebaij4_         FORTRANSOLVEBAIJ4
125: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
126: #define fortransolvebaij4_          fortransolvebaij4
127: #endif
128: EXTERN void fortransolvebaij4_(PetscInt*,void*,PetscInt*,PetscInt*,PetscInt*,void*,void*,void*);
129: #endif

131: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL)
132: #if defined(PETSC_HAVE_FORTRAN_CAPS)
133: #define fortransolvebaij4unroll_   FORTRANSOLVEBAIJ4UNROLL
134: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
135: #define fortransolvebaij4unroll_    fortransolvebaij4unroll
136: #endif
137: EXTERN void fortransolvebaij4unroll_(PetscInt*,void*,PetscInt*,PetscInt*,PetscInt*,void*,void*);
138: #endif

140: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS)
141: #if defined(PETSC_HAVE_FORTRAN_CAPS)
142: #define fortransolvebaij4blas_     FORTRANSOLVEBAIJ4BLAS
143: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
144: #define fortransolvebaij4blas_      fortransolvebaij4blas
145: #endif
146: EXTERN void fortransolvebaij4blas_(PetscInt*,void*,PetscInt*,PetscInt*,PetscInt*,void*,void*,void*);
147: #endif

149: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
150: #ifdef PETSC_HAVE_FORTRAN_CAPS
151: #define fortranxtimesy_ FORTRANXTIMESY
152: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
153: #define fortranxtimesy_ fortranxtimesy
154: #endif
155: EXTERN void fortranxtimesy_(void*,void*,void*,PetscInt*);
156: #endif


160: /* ------------------------------------------------------------------- */


163: #if !defined(PETSC_USE_COMPLEX)

165: #ifdef PETSC_USE_UNROLL_KERNELS
166: #define DOT(sum,x,y,n) {\
167: switch (n & 0x3) {\
168: case 3: sum += *x++ * *y++;\
169: case 2: sum += *x++ * *y++;\
170: case 1: sum += *x++ * *y++;\
171: n -= 4;case 0:break;}\
172: while (n>0) {sum += x[0]*y[0]+x[1]*y[1]+x[2]*y[2]+x[3]*y[3];x+=4;y+=4;\
173: n -= 4;}}
174: #define DOT2(sum1,sum2,x,y1,y2,n) {\
175: if(n&0x1){sum1+=*x**y1++;sum2+=*x++**y2++;n--;}\
176: while (n>0) {sum1+=x[0]*y1[0]+x[1]*y1[1];sum2+=x[0]*y2[0]+x[1]*y2[1];x+=2;\
177: y1+=2;y2+=2;n -= 2;}}
178: #define SQR(sum,x,n) {\
179: switch (n & 0x3) {\
180: case 3: sum += *x * *x;x++;\
181: case 2: sum += *x * *x;x++;\
182: case 1: sum += *x * *x;x++;\
183: n -= 4;case 0:break;}\
184: while (n>0) {sum += x[0]*x[0]+x[1]*x[1]+x[2]*x[2]+x[3]*x[3];x+=4;\
185: n -= 4;}}

187: #elif defined(PETSC_USE_WHILE_KERNELS)
188: #define DOT(sum,x,y,n) {\
189: while(n--) sum+= *x++ * *y++;}
190: #define DOT2(sum1,sum2,x,y1,y2,n) {\
191: while(n--){sum1+= *x**y1++;sum2+=*x++**y2++;}}
192: #define SQR(sum,x,n)   {\
193: while(n--) {sum+= *x * *x; x++;}}

195: #elif defined(PETSC_USE_BLAS_KERNELS)
196: #define DOT(sum,x,y,n) {PetscBLASInt one=1;\
197: sum=BLASdot_(&n,x,&one,y,&one);}
198: #define DOT2(sum1,sum2,x,y1,y2,n) {PetscInt __i;\
199: for(__i=0;__i<n;__i++){sum1+=x[__i]*y1[__i];sum2+=x[__i]*y2[__i];}}
200: #define SQR(sum,x,n)   {PetscBLASInt one=1;\
201: sum=BLASdot_(&n,x,&one,x,&one);}

203: #else
204: #define DOT(sum,x,y,n) {PetscInt __i;\
205: for(__i=0;__i<n;__i++)sum+=x[__i]*y[__i];}
206: #define DOT2(sum1,sum2,x,y1,y2,n) {PetscInt __i;\
207: for(__i=0;__i<n;__i++){sum1+=x[__i]*y1[__i];sum2+=x[__i]*y2[__i];}}
208: #define SQR(sum,x,n)   {PetscInt __i;\
209: for(__i=0;__i<n;__i++)sum+=x[__i]*x[__i];}
210: #endif

212: #else

214: #ifdef PETSC_USE_UNROLL_KERNELS
215: #define DOT(sum,x,y,n) {\
216: switch (n & 0x3) {\
217: case 3: sum += *x * conj(*y); x++; y++;\
218: case 2: sum += *x * conj(*y); x++; y++;\
219: case 1: sum += *x * conj(*y); x++; y++;\
220: n -= 4;case 0:break;}\
221: while (n>0) {sum += x[0]*conj(y[0])+x[1]*conj(y[1])+x[2]*conj(y[2])+x[3]*conj(y[3]);x+=4;y+=4;\
222: n -= 4;}}
223: #define DOT2(sum1,sum2,x,y1,y2,n) {\
224: if(n&0x1){sum1+=*x*conj(*y1)++;sum2+=*x++*conj(*y2)++;n--;}\
225: while (n>0) {sum1+=x[0]*conj(y1[0])+x[1]*conj(y1[1]);sum2+=x[0]*conj(y2[0])+x[1]*conj(y2[1]);x+=2;\
226: y1+=2;y2+=2;n -= 2;}}
227: #define SQR(sum,x,n) {\
228: switch (n & 0x3) {\
229: case 3: sum += *x * conj(*x);x++;\
230: case 2: sum += *x * conj(*x);x++;\
231: case 1: sum += *x * conj(*x);x++;\
232: n -= 4;case 0:break;}\
233: while (n>0) {sum += x[0]*conj(x[0])+x[1]*conj(x[1])+x[2]*conj(x[2])+x[3]*conj(x[3]);x+=4;\
234: n -= 4;}}

236: #elif defined(PETSC_USE_WHILE_KERNELS)
237: #define DOT(sum,x,y,n) {
238: while(n--) sum+= *x++ * conj(*y++);}
239: #define DOT2(sum1,sum2,x,y1,y2,n) {\
240: while(n--){sum1+= *x*conj(*y1);sum2+=*x*conj(*y2); x++; y1++; y2++;}}
241: #define SQR(sum,x,n)   {\
242: while(n--) {sum+= *x * conj(*x); x++;}}

244: #else
245: #define DOT(sum,x,y,n) {PetscInt __i;\
246: for(__i=0;__i<n;__i++)sum+=x[__i]*conj(y[__i]);}
247: #define DOT2(sum1,sum2,x,y1,y2,n) {PetscInt __i;\
248: for(__i=0;__i<n;__i++){sum1+=x[__i]*conj(y1[__i]);sum2+=x[__i]*conj(y2[__i]);}}
249: #define SQR(sum,x,n)   {PetscInt __i;\
250: for(__i=0;__i<n;__i++)sum+=x[__i]*conj(x[__i]);}
251: #endif

253: #endif

255: #endif