Actual source code: spops.h
2: #ifndef SPARSEDENSESMAXPY
5: /* take (x,i) into dense vector r; there are nnz entries in (x,i)
6: r(xi) -= alpha * xv */
7: #ifdef PETSC_USE_UNROLL_KERNELS
8: #define SPARSEDENSESMAXPY(r,alpha,xv,xi,nnz) {PetscInt __noff;\
9: __noff = nnz & 0x3;\
10: switch (__noff) {\
11: case 3: r[xi[2]] -= alpha * xv[2];\
12: case 2: r[xi[1]] -= alpha * xv[1];\
13: case 1: r[xi[0]] -= alpha * xv[0];\
14: nnz -= 4;xi+=__noff;xv+=__noff;\
15: }\
16: while (nnz > 0) {\
17: r[xi[0]] -= alpha * xv[0];r[xi[1]] -= alpha * xv[1];\
18: r[xi[2]] -= alpha * xv[2];r[xi[3]] -= alpha * xv[3];\
19: xi += 4;xv += 4;nnz -= 4;}}/*}*/
21: #elif defined(PETSC_USE_WHILE_KERNELS)
22: #define SPARSEDENSESMAXPY(r,alpha,xv,xi,nnz) {\
23: while (nnz-->0) r[*xi++] -= alpha * *xv++;}
25: #elif defined(PETSC_USE_FOR_KERNELS)
26: #define SPARSEDENSESMAXPY(r,alpha,xv,xi,nnz) {\
27: PetscInt __i,__j1,__j2; PetscScalar__s1,__s2;\
28: for(__i=0;__i<nnz-1;__i+=2){__j1=xi[__i];__j2=xi[__i+1];__s1=alpha*xv[__i];\
29: __s2=alpha*xv[__i+1];__s1=r[__j1]-__s1;__s2=r[__j2]-__s2;\
30: r[__j1]=__s1;r[__j2]=__s2;}\
31: if (nnz & 0x1) r[xi[__i]] -= alpha * xv[__i];}
33: #else
34: #define SPARSEDENSESMAXPY(r,alpha,xv,xi,nnz) {\
35: PetscInt __i;\
36: for(__i=0;__i<nnz;__i++)r[xi[__i]] -= alpha * xv[__i];}
37: #endif
39: /* Form sum -= r[xi] * xv; */
40: #ifdef PETSC_USE_UNROLL_KERNELS
41: #define SPARSEDENSEMDOT(sum,r,xv,xi,nnz) {\
42: if (nnz > 0) {\
43: switch (nnz & 0x3) {\
44: case 3: sum -= *xv++ * r[*xi++];\
45: case 2: sum -= *xv++ * r[*xi++];\
46: case 1: sum -= *xv++ * r[*xi++];\
47: nnz -= 4;}\
48: while (nnz > 0) {\
49: sum = sum - xv[0] * r[xi[0]] - xv[1] * r[xi[1]] -\
50: xv[2] * r[xi[2]] - xv[3] * r[xi[3]];\
51: xv += 4; xi += 4; nnz -= 4; }}}
53: #elif defined(PETSC_USE_WHILE_KERNELS)
54: #define SPARSEDENSEMDOT(sum,r,xv,xi,nnz) {\
55: while (nnz--) sum -= *xv++ * r[*xi++];}
57: #elif defined(PETSC_USE_FOR_KERNELS) && defined(MEMQUEST)
58: #define SPARSEDENSEMDOT(sum,r,xv,xi,nnz) {\
59: PetscInt __i,__i1,__i2;\
60: for(__i=0;__i<nnz-1;__i+=2) {__i1 = xi[__i]; __i2=xi[__i+1];\
61: sum -= (xv[__i]*r[__i1] + xv[__i+1]*r[__i2]);}\
62: if (nnz & 0x1) sum -= xv[__i] * r[xi[__i]];}
64: #else
65: #define SPARSEDENSEMDOT(sum,r,xv,xi,nnz) {\
66: PetscInt __i;\
67: for(__i=0;__i<nnz;__i++) sum -= xv[__i] * r[xi[__i]];}
68: #endif
72: /* Form sum += r[xi] * xv; */
73: #ifdef PETSC_USE_UNROLL_KERNELS
74: #define SPARSEDENSEDOT(sum,r,xv,xi,nnz) {\
75: if (nnz > 0) {\
76: switch (nnz & 0x3) {\
77: case 3: sum += *xv++ * r[*xi++];\
78: case 2: sum += *xv++ * r[*xi++];\
79: case 1: sum += *xv++ * r[*xi++];\
80: nnz -= 4;}\
81: while (nnz > 0) {\
82: sum = sum + xv[0] * r[xi[0]] + xv[1] * r[xi[1]] +\
83: xv[2] * r[xi[2]] + xv[3] * r[xi[3]];\
84: xv += 4; xi += 4; nnz -= 4; }}}
86: #elif defined(PETSC_USE_WHILE_KERNELS)
87: #define SPARSEDENSEDOT(sum,r,xv,xi,nnz) {\
88: while (nnz--) sum += *xv++ * r[*xi++];}
90: #elif defined(PETSC_USE_FOR_KERNELS) && defined(MEMQUEST)
91: #define SPARSEDENSEDOT(sum,r,xv,xi,nnz) {\
92: PetscInt __i,__i1,__i2;\
93: for(__i=0;__i<nnz-1;__i+=2) {__i1 = xi[__i]; __i2=xi[__i+1];\
94: sum += (xv[__i]*r[__i1] + xv[__i+1]*r[__i2]);}\
95: if (nnz & 0x1) sum += xv[__i] * r[xi[__i]];}
97: #else
98: #define SPARSEDENSEDOT(sum,r,xv,xi,nnz) {\
99: PetscInt __i;\
100: for(__i=0;__i<nnz;__i++) sum += xv[__i] * r[xi[__i]];}
101: #endif
103: /* Form sum += r[map[xi]] * xv; */
104: #ifdef PETSC_USE_UNROLL_KERNELS
105: #define SPARSEDENSEMAPDOT(sum,r,xv,xi,map,nnz) {\
106: if (nnz > 0) {\
107: switch (nnz & 0x3) {\
108: case 3: sum += *xv++ * r[map[*xi++]];\
109: case 2: sum += *xv++ * r[map[*xi++]];\
110: case 1: sum += *xv++ * r[map[*xi++]];\
111: nnz -= 4;}\
112: while (nnz > 0) {\
113: sum = sum + xv[0] * r[map[xi[0]]] + xv[1] * r[map[xi[1]]] +\
114: xv[2] * r[map[xi[2]]] + xv[3] * r[map[xi[3]]];\
115: xv += 4; xi += 4; nnz -= 4; }}}
117: #elif defined(PETSC_USE_WHILE_KERNELS)
118: #define SPARSEDENSEMAPDOT(sum,r,xv,xi,map,nnz) {\
119: while (nnz--) sum += *xv++ * r[map[*xi++]];}
121: #else
122: #define SPARSEDENSEMAPDOT(sum,r,xv,xi,map,nnz) {\
123: PetscInt __i;\
124: for(__i=0;__i<nnz;__i++) sum += xv[__i] * r[map[xi[__i]]];}
125: #endif
127: /* Gather xv = r[xi] */
128: #ifdef PETSC_USE_UNROLL_KERNELS
129: #define GATHER(xv,xi,r,nz) {PetscInt __noff;\
130: if (nz > 0) {\
131: __noff = nz & 0x3;\
132: switch (nz & 0x3) {\
133: case 3: xv[2] = r[xi[2]];\
134: case 2: xv[1] = r[xi[1]];\
135: case 1: xv[0] = r[xi[0]];\
136: nz -= 4;xv+=__noff;xi+=__noff;}\
137: while (nz > 0) {\
138: xv[0] = r[xi[0]]; xv[1] = r[xi[1]]; xv[2] = r[xi[2]]; xv[3] = r[xi[3]];\
139: xi += 4;xv += 4;nz -= 4;}}}
141: #elif defined(PETSC_USE_WHILE_KERNELS)
142: #define GATHER(xv,xi,r,nz) while (nz--) *xv++ = r[*xi++];
144: #elif defined(PETSC_USE_FOR_KERNELS)
145: #define GATHER(xv,xi,r,nz) {PetscInt __i; PetscScalar__s1,__s2;\
146: for(__i=0;__i<nz-1;__i+=2){__s1=r[xi[__i]];__s2=r[xi[__i+1]];\
147: xv[__i]=__s1;xv[__i+1]=__s2;}if ((nz)&0x1) xv[__i]=r[xi[__i]];}
149: #else
150: #define GATHER(xv,xi,r,nz) {PetscInt __i;for(__i=0;__i<nz;__i++)xv[__i]=r[xi[__i]];}
151: #endif
153: /* Scatter r[xi] = xv */
154: #ifdef PETSC_USE_UNROLL_KERNELS
155: #define SCATTER(xv,xi,r,nz) \
156: if (nz > 0) {\
157: switch (nz & 0x3) {\
158: case 3: r[*xi++] = *xv++;\
159: case 2: r[*xi++] = *xv++;\
160: case 1: r[*xi++] = *xv++;\
161: nz -= 4;}\
162: while (nz > 0) {\
163: r[xi[0]]=xv[0]; r[xi[1]]=xv[1]; r[xi[2]]=xv[2]; r[xi[3]]=xv[3];\
164: xi += 4;xv += 4;nz -= 4;}}
166: #elif defined(PETSC_USE_WHILE_KERNELS)
167: #define SCATTER(xv,xi,r,nz) while (nz--) r[*xi++]=*xv++;
169: #elif defined(PETSC_USE_FOR_KERNELS)
170: #define SCATTER(xv,xi,r,nz) {PetscInt __i; PetscScalar__s1,__s2;\
171: for(__i=0;__i<nz-1;__i+=2){__s1=xv[__i];__s2=xv[__i+1];\
172: r[xi[__i]]=__s1;r[xi[__i+1]]=__s2;}if ((nz)&0x1)r[xi[__i]]=xv[__i];}
174: #else
175: #define SCATTER(xv,xi,r,nz) {PetscInt __i;\
176: for(__i=0;__i<nz;__i++)r[xi[__i]]=xv[__i];}
177: #endif
179: /* Scatter r[xi] = val */
180: #ifdef PETSC_USE_UNROLL_KERNELS
181: #define SCATTERVAL(r,xi,n,val) \
182: switch (n & 0x3) {\
183: case 3: r[*xi++] = val;\
184: case 2: r[*xi++] = val;\
185: case 1: r[*xi++] = val;\
186: n -= 4;}\
187: while (n > 0) {\
188: r[xi[0]]=val; r[xi[1]]=val; r[xi[2]]=val; r[xi[3]]=val;xi += 4;n -= 4;}
190: #elif defined(PETSC_USE_WHILE_KERNELS)
191: #define SCATTERVAL(r,xi,n,val) while (n--) r[*xi++]=val;
193: #else
194: #define SCATTERVAL(r,xi,n,val) {PetscInt __i;for(__i=0;__i<n;__i++)r[xi[__i]]=val;}
195: #endif
197: /* Copy vo[xi] = vi[xi] */
198: #ifdef PETSC_USE_UNROLL_KERNELS
199: #define COPYPERM(vo,xi,vi,n) \
200: switch (n & 0x3) {\
201: case 3: vo[*xi] = vi[*xi];xi++;\
202: case 2: vo[*xi] = vi[*xi];xi++;\
203: case 1: vo[*xi] = vi[*xi];xi++;\
204: n -= 4;}\
205: while (n > 0) {\
206: vo[xi[0]]=vi[xi[0]]; vo[xi[1]]=vi[xi[1]]; vo[xi[2]]=vi[xi[2]]; \
207: vo[xi[3]]=vi[xi[3]];xi += 4;n -= 4;}}
209: #elif defined(PETSC_USE_WHILE_KERNELS)
210: #define COPYPERM(vo,xi,vi,n) while (n--) {vo[*xi]=vi[*xi];xi++;}
212: #else
213: #define COPYPERM(vo,xi,vi,n) {PetscInt __i;\
214: for(__i=0;__i<n;__i++)vo[xi[__i]]=vi[xi[__i]];}
215: #endif
217: /* Scale sparse vector v[xi] *= a */
218: #ifdef PETSC_USE_UNROLL_KERNELS
219: #define SPARSESCALE(v,xi,n,val) {\
220: switch (n & 0x3) {\
221: case 3: v[*xi++] *= val;\
222: case 2: v[*xi++] *= val;\
223: case 1: v[*xi++] *= val;\
224: n -= 4;}\
225: while (n > 0) {\
226: v[xi[0]]*=val;vo[xi[1]]*=val; vo[xi[2]]*=val;vo[xi[3]]*=val;xi += 4;n -= 4;}}}
228: #elif defined(PETSC_USE_WHILE_KERNELS)
229: #define SPARSESCALE(v,xi,n,val) {\
230: while (n--) v[*xi++] *= val;}
232: #else
233: #define SPARSESCALE(v,xi,n,val) {PetscInt __i;\
234: for(__i=0;__i<n;__i++)v[xi[__i]*=val;}
235: #endif
237: /* sparse dot sum = sum(a[xi] * b[xi]) */
238: #ifdef PETSC_USE_UNROLL_KERNELS
239: #define SPARSEDOT(sum,a,b,xi,n) {\
240: switch (n & 0x3) {\
241: case 3: sum += a[*xi] * b[*xi]; xi++;\
242: case 2: sum += a[*xi] * b[*xi]; xi++;\
243: case 1: sum += a[*xi] * b[*xi]; xi++;\
244: n -= 4;}\
245: while (n > 0) {\
246: sum+=a[xi[0]]*b[xi[0]]+a[xi[1]]*b[xi[1]]+a[xi[2]]*b[xi[2]]+a[xi[3]]*b[xi[3]];\
247: xi += 4;n -= 4;}}}
249: #elif defined(PETSC_USE_WHILE_KERNELS)
250: #define SPARSEDOT(sum,a,b,xi,n) {\
251: while (n--) {sum += a[*xi]*b[*xi];xi++;}}
253: #else
254: #define SPARSEDOT(sum,a,b,xi,n) {\
255: PetscInt __i;\
256: for(__i=0;__i<n;__i++)sum+= a[xi[__i]]*b[xi[__i]];}
257: #endif
260: /* Scatter r[xi] += xv */
261: #ifdef PETSC_USE_UNROLL_KERNELS
262: #define SCATTERADD(xv,xi,r,nz) {\
263: if (nz > 0) {\
264: switch (nz & 0x3) {\
265: case 3: r[*xi++] += *xv++;\
266: case 2: r[*xi++] += *xv++;\
267: case 1: r[*xi++] += *xv++;\
268: nz -= 4;}\
269: while (nz > 0) {\
270: r[xi[0]]+=xv[0]; r[xi[1]]+=xv[1]; r[xi[2]]+=xv[2]; r[xi[3]]+=xv[3];\
271: xi += 4;xv += 4;nz -= 4;}}}
273: #elif defined(PETSC_USE_WHILE_KERNELS)
274: #define SCATTERADD(xv,xi,r,nz) {\
275: while (nz--) r[*xi++]+= *xv++;}
277: #elif defined(PETSC_USE_FOR_KERNELS)
278: #define SCATTERADD(xv,xi,r,nz) { PetscScalar__s1,__s2;\
279: PetscInt __i,__i1,__i2;\
280: for(__i=0;__i<nz-1;__i+=2){__i1 = xi[__i]; __i2 = xi[__i+1];\
281: __s1 = r[__i1]; __s2 = r[__i2]; __s1 += xv[__i]; __s2 += xv[__i+1];\
282: r[__i1]=__s1;r[__i2]=__s2;}if ((nz)&0x1)r[xi[__i]]+=xv[__i];}
284: #else
285: #define SCATTERADD(xv,xi,r,nz) {\
286: PetscInt __i;\
287: for(__i=0;__i<nz;__i++) r[xi[__i]]+= xv[__i];}
288: #endif
290: /* Gather xv += r[xi] */
291: #ifdef PETSC_USE_UNROLL_KERNELS
292: #define GATHERADD(xv,xi,r,nz) {\
293: if (nz > 0) {\
294: switch (nz & 0x3) {\
295: case 3: *xv++ += r[*xi++];\
296: case 2: *xv++ += r[*xi++];\
297: case 1: *xv++ += r[*xi++];\
298: nz -= 4;}\
299: while (nz > 0) {\
300: xv[0] += r[xi[0]]; xv[1] += r[xi[1]]; xv[2] += r[xi[2]]; xv[3] += r[xi[3]];\
301: xi += 4;xv += 4;nz -= 4;}}}
303: #elif defined(PETSC_USE_WHILE_KERNELS)
304: #define GATHERADD(xv,xi,r,nz) {\
305: while (nz--) *xv++ += r[*xi++];}
307: #else
308: #define GATHERADD(xv,xi,r,nz) {\
309: PetscInt __i;\
310: for(__i=0;__i<nz;__i++)xv[__i] += r[xi[__i]];}
311: #endif
313: #endif