Actual source code: dot.h
2: #ifndef DOT
3: #include petsc.h
7: /* BGL kernels */
8: #if defined(PETSC_USE_FORTRAN_KERNELS_BGL)
9: #define fortranxtimesy fortranxtimesy_bgl
10: #define fortranmdot4 fortranmdot4_bgl
11: #define fortranmdot3 fortranmdot3_bgl
12: #define fortranmdot2 fortranmdot2_bgl
13: #define fortranmdot1 fortranmdot1_bgl
14: #define fortrannormsqr fortrannormsqr_bgl
15: #define fortransolvebaij4unroll fortransolvebaij4unroll_bgl
16: #define fortransolvebaij4blas fortransolvebaij4blas_bgl
17: #define fortransolvebaij4 fortransolvebaij4_bgl
20: #endif
23: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
24: #if defined(PETSC_HAVE_FORTRAN_CAPS)
25: #define fortranmdot4_ FORTRANMDOT4
26: #define fortranmdot3_ FORTRANMDOT3
27: #define fortranmdot2_ FORTRANMDOT2
28: #define fortranmdot1_ FORTRANMDOT1
29: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
30: #define fortranmdot4_ fortranmdot4
31: #define fortranmdot3_ fortranmdot3
32: #define fortranmdot2_ fortranmdot2
33: #define fortranmdot1_ fortranmdot1
34: #endif
35: EXTERN void fortranmdot4_(void*,void*,void*,void*,void*,PetscInt*,void*,void*,void*,void*);
36: EXTERN void fortranmdot3_(void*,void*,void*,void*,PetscInt*,void*,void*,void*);
37: EXTERN void fortranmdot2_(void*,void*,void*,PetscInt*,void*,void*);
38: EXTERN void fortranmdot1_(void*,void*,PetscInt*,void*);
39: #endif
41: #if defined(PETSC_USE_FORTRAN_KERNEL_NORM)
42: #if defined(PETSC_HAVE_FORTRAN_CAPS)
43: #define fortrannormsqr_ FORTRANNORMSQR
44: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
45: #define fortrannormsqr_ fortrannormsqr
46: #endif
47: EXTERN void fortrannormsqr_(void*,PetscInt*,void*);
48: #endif
50: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
51: #if defined(PETSC_HAVE_FORTRAN_CAPS)
52: #define fortranmultcrl_ FORTRANMULTCRL
53: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
54: #define fortranmultcrl_ fortranmultcrl
55: #endif
56: EXTERN void fortranmultcrl_(PetscInt*,PetscInt*,PetscScalar*,PetscScalar*,PetscInt*,PetscScalar*);
57: #endif
59: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCSRPERM)
60: #if defined(PETSC_HAVE_FORTRAN_CAPS)
61: #define fortranmultcsrperm_ FORTRANMULTCSRPERM
62: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
63: #define fortranmultcsrperm_ fortranmultcsrperm
64: #endif
65: EXTERN void fortranmultcsrperm_(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscScalar*);
66: #endif
68: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJ)
69: #if defined(PETSC_HAVE_FORTRAN_CAPS)
70: #define fortranmultaij_ FORTRANMULTAIJ
71: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
72: #define fortranmultaij_ fortranmultaij
73: #endif
74: EXTERN void fortranmultaij_(PetscInt*,void*,PetscInt*,PetscInt*,void*,void*);
75: #endif
77: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTTRANSPOSEAIJ)
78: #if defined(PETSC_HAVE_FORTRAN_CAPS)
79: #define fortranmulttransposeaddaij_ FORTRANMULTTRANSPOSEADDAIJ
80: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
81: #define fortranmulttransposeaddaij_ fortranmulttransposeaddaij
82: #endif
83: EXTERN void fortranmulttransposeaddaij_(PetscInt*,void*,PetscInt*,PetscInt*,void*,void*);
84: #endif
86: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJ)
87: #if defined(PETSC_HAVE_FORTRAN_CAPS)
88: #define fortranmultaddaij_ FORTRANMULTADDAIJ
89: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
90: #define fortranmultaddaij_ fortranmultaddaij
91: #endif
92: EXTERN void fortranmultaddaij_(PetscInt*,void*,PetscInt*,PetscInt*,void*,void*,void*);
93: #endif
95: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
96: #if defined(PETSC_HAVE_FORTRAN_CAPS)
97: #define fortransolveaij_ FORTRANSOLVEAIJ
98: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
99: #define fortransolveaij_ fortransolveaij
100: #endif
101: EXTERN void fortransolveaij_(PetscInt*,void*,PetscInt*,PetscInt*,PetscInt*,void*,void*);
102: #endif
104: #if defined(PETSC_USE_FORTRAN_KERNEL_RELAXAIJ)
105: #if defined(PETSC_HAVE_FORTRAN_CAPS)
106: #define fortranrelaxaijforward_ FORTRANRELAXAIJFORWARD
107: #define fortranrelaxaijbackward_ FORTRANRELAXAIJBACKWARD
108: #define fortranrelaxaijforwardzero_ FORTRANRELAXAIJFORWARDZERO
109: #define fortranrelaxaijbackwardzero_ FORTRANRELAXAIJBACKWARDZERO
110: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
111: #define fortranrelaxaijforward_ fortranrelaxaijforward
112: #define fortranrelaxaijbackward_ fortranrelaxaijbackward
113: #define fortranrelaxaijforwardzero_ fortranrelaxaijforwardzero
114: #define fortranrelaxaijbackwardzero_ fortranrelaxaijbackwardzero
115: #endif
116: EXTERN void fortranrelaxaijforward_(PetscInt*,PetscReal*,void*,PetscInt*,PetscInt*,const PetscInt*,void*,void*);
117: EXTERN void fortranrelaxaijbackward_(PetscInt*,PetscReal*,void*,PetscInt*,PetscInt*,const PetscInt*,void*,void*);
118: EXTERN void fortranrelaxaijforwardzero_(PetscInt*,PetscReal*,void*,PetscInt*,PetscInt*,const PetscInt*,void*,void*,void*);
119: EXTERN void fortranrelaxaijbackwardzero_(PetscInt*,PetscReal*,void*,PetscInt*,PetscInt*,const PetscInt*,void*,void*,void*);
120: #endif
122: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ)
123: #if defined(PETSC_HAVE_FORTRAN_CAPS)
124: #define fortransolvebaij4_ FORTRANSOLVEBAIJ4
125: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
126: #define fortransolvebaij4_ fortransolvebaij4
127: #endif
128: EXTERN void fortransolvebaij4_(PetscInt*,void*,PetscInt*,PetscInt*,PetscInt*,void*,void*,void*);
129: #endif
131: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL)
132: #if defined(PETSC_HAVE_FORTRAN_CAPS)
133: #define fortransolvebaij4unroll_ FORTRANSOLVEBAIJ4UNROLL
134: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
135: #define fortransolvebaij4unroll_ fortransolvebaij4unroll
136: #endif
137: EXTERN void fortransolvebaij4unroll_(PetscInt*,void*,PetscInt*,PetscInt*,PetscInt*,void*,void*);
138: #endif
140: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS)
141: #if defined(PETSC_HAVE_FORTRAN_CAPS)
142: #define fortransolvebaij4blas_ FORTRANSOLVEBAIJ4BLAS
143: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
144: #define fortransolvebaij4blas_ fortransolvebaij4blas
145: #endif
146: EXTERN void fortransolvebaij4blas_(PetscInt*,void*,PetscInt*,PetscInt*,PetscInt*,void*,void*,void*);
147: #endif
149: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
150: #ifdef PETSC_HAVE_FORTRAN_CAPS
151: #define fortranxtimesy_ FORTRANXTIMESY
152: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
153: #define fortranxtimesy_ fortranxtimesy
154: #endif
155: EXTERN void fortranxtimesy_(void*,void*,void*,PetscInt*);
156: #endif
160: /* ------------------------------------------------------------------- */
163: #if !defined(PETSC_USE_COMPLEX)
165: #ifdef PETSC_USE_UNROLL_KERNELS
166: #define DOT(sum,x,y,n) {\
167: switch (n & 0x3) {\
168: case 3: sum += *x++ * *y++;\
169: case 2: sum += *x++ * *y++;\
170: case 1: sum += *x++ * *y++;\
171: n -= 4;case 0:break;}\
172: while (n>0) {sum += x[0]*y[0]+x[1]*y[1]+x[2]*y[2]+x[3]*y[3];x+=4;y+=4;\
173: n -= 4;}}
174: #define DOT2(sum1,sum2,x,y1,y2,n) {\
175: if(n&0x1){sum1+=*x**y1++;sum2+=*x++**y2++;n--;}\
176: while (n>0) {sum1+=x[0]*y1[0]+x[1]*y1[1];sum2+=x[0]*y2[0]+x[1]*y2[1];x+=2;\
177: y1+=2;y2+=2;n -= 2;}}
178: #define SQR(sum,x,n) {\
179: switch (n & 0x3) {\
180: case 3: sum += *x * *x;x++;\
181: case 2: sum += *x * *x;x++;\
182: case 1: sum += *x * *x;x++;\
183: n -= 4;case 0:break;}\
184: while (n>0) {sum += x[0]*x[0]+x[1]*x[1]+x[2]*x[2]+x[3]*x[3];x+=4;\
185: n -= 4;}}
187: #elif defined(PETSC_USE_WHILE_KERNELS)
188: #define DOT(sum,x,y,n) {\
189: while(n--) sum+= *x++ * *y++;}
190: #define DOT2(sum1,sum2,x,y1,y2,n) {\
191: while(n--){sum1+= *x**y1++;sum2+=*x++**y2++;}}
192: #define SQR(sum,x,n) {\
193: while(n--) {sum+= *x * *x; x++;}}
195: #elif defined(PETSC_USE_BLAS_KERNELS)
196: #define DOT(sum,x,y,n) {PetscBLASInt one=1;\
197: sum=BLASdot_(&n,x,&one,y,&one);}
198: #define DOT2(sum1,sum2,x,y1,y2,n) {PetscInt __i;\
199: for(__i=0;__i<n;__i++){sum1+=x[__i]*y1[__i];sum2+=x[__i]*y2[__i];}}
200: #define SQR(sum,x,n) {PetscBLASInt one=1;\
201: sum=BLASdot_(&n,x,&one,x,&one);}
203: #else
204: #define DOT(sum,x,y,n) {PetscInt __i;\
205: for(__i=0;__i<n;__i++)sum+=x[__i]*y[__i];}
206: #define DOT2(sum1,sum2,x,y1,y2,n) {PetscInt __i;\
207: for(__i=0;__i<n;__i++){sum1+=x[__i]*y1[__i];sum2+=x[__i]*y2[__i];}}
208: #define SQR(sum,x,n) {PetscInt __i;\
209: for(__i=0;__i<n;__i++)sum+=x[__i]*x[__i];}
210: #endif
212: #else
214: #ifdef PETSC_USE_UNROLL_KERNELS
215: #define DOT(sum,x,y,n) {\
216: switch (n & 0x3) {\
217: case 3: sum += *x * conj(*y); x++; y++;\
218: case 2: sum += *x * conj(*y); x++; y++;\
219: case 1: sum += *x * conj(*y); x++; y++;\
220: n -= 4;case 0:break;}\
221: while (n>0) {sum += x[0]*conj(y[0])+x[1]*conj(y[1])+x[2]*conj(y[2])+x[3]*conj(y[3]);x+=4;y+=4;\
222: n -= 4;}}
223: #define DOT2(sum1,sum2,x,y1,y2,n) {\
224: if(n&0x1){sum1+=*x*conj(*y1)++;sum2+=*x++*conj(*y2)++;n--;}\
225: while (n>0) {sum1+=x[0]*conj(y1[0])+x[1]*conj(y1[1]);sum2+=x[0]*conj(y2[0])+x[1]*conj(y2[1]);x+=2;\
226: y1+=2;y2+=2;n -= 2;}}
227: #define SQR(sum,x,n) {\
228: switch (n & 0x3) {\
229: case 3: sum += *x * conj(*x);x++;\
230: case 2: sum += *x * conj(*x);x++;\
231: case 1: sum += *x * conj(*x);x++;\
232: n -= 4;case 0:break;}\
233: while (n>0) {sum += x[0]*conj(x[0])+x[1]*conj(x[1])+x[2]*conj(x[2])+x[3]*conj(x[3]);x+=4;\
234: n -= 4;}}
236: #elif defined(PETSC_USE_WHILE_KERNELS)
237: #define DOT(sum,x,y,n) {
238: while(n--) sum+= *x++ * conj(*y++);}
239: #define DOT2(sum1,sum2,x,y1,y2,n) {\
240: while(n--){sum1+= *x*conj(*y1);sum2+=*x*conj(*y2); x++; y1++; y2++;}}
241: #define SQR(sum,x,n) {\
242: while(n--) {sum+= *x * conj(*x); x++;}}
244: #else
245: #define DOT(sum,x,y,n) {PetscInt __i;\
246: for(__i=0;__i<n;__i++)sum+=x[__i]*conj(y[__i]);}
247: #define DOT2(sum1,sum2,x,y1,y2,n) {PetscInt __i;\
248: for(__i=0;__i<n;__i++){sum1+=x[__i]*conj(y1[__i]);sum2+=x[__i]*conj(y2[__i]);}}
249: #define SQR(sum,x,n) {PetscInt __i;\
250: for(__i=0;__i<n;__i++)sum+=x[__i]*conj(x[__i]);}
251: #endif
253: #endif
255: #endif