Actual source code: spops.h

  2: #ifndef SPARSEDENSESMAXPY


  5: /* take (x,i) into dense vector r; there are nnz entries in (x,i)
  6:    r(xi) -= alpha * xv */
  7: #ifdef PETSC_USE_UNROLL_KERNELS
  8: #define SPARSEDENSESMAXPY(r,alpha,xv,xi,nnz) {PetscInt __noff;\
  9: __noff = nnz & 0x3;\
 10: switch (__noff) {\
 11: case 3: r[xi[2]] -= alpha * xv[2];\
 12: case 2: r[xi[1]] -= alpha * xv[1];\
 13: case 1: r[xi[0]] -= alpha * xv[0];\
 14: nnz -= 4;xi+=__noff;xv+=__noff;\
 15: }\
 16: while (nnz > 0) {\
 17: r[xi[0]] -= alpha * xv[0];r[xi[1]] -= alpha * xv[1];\
 18: r[xi[2]] -= alpha * xv[2];r[xi[3]] -= alpha * xv[3];\
 19: xi  += 4;xv  += 4;nnz -= 4;}}/*}*/

 21: #elif defined(PETSC_USE_WHILE_KERNELS)
 22: #define SPARSEDENSESMAXPY(r,alpha,xv,xi,nnz) {\
 23: while (nnz-->0) r[*xi++] -= alpha * *xv++;}

 25: #elif defined(PETSC_USE_FOR_KERNELS)
 26: #define SPARSEDENSESMAXPY(r,alpha,xv,xi,nnz) {\
 27:  PetscInt __i,__j1,__j2; PetscScalar__s1,__s2;\
 28: for(__i=0;__i<nnz-1;__i+=2){__j1=xi[__i];__j2=xi[__i+1];__s1=alpha*xv[__i];\
 29: __s2=alpha*xv[__i+1];__s1=r[__j1]-__s1;__s2=r[__j2]-__s2;\
 30: r[__j1]=__s1;r[__j2]=__s2;}\
 31: if (nnz & 0x1) r[xi[__i]] -= alpha * xv[__i];}

 33: #else
 34: #define SPARSEDENSESMAXPY(r,alpha,xv,xi,nnz) {\
 35: PetscInt __i;\
 36: for(__i=0;__i<nnz;__i++)r[xi[__i]] -= alpha * xv[__i];}
 37: #endif

 39: /* Form sum -= r[xi] * xv; */
 40: #ifdef PETSC_USE_UNROLL_KERNELS
 41: #define SPARSEDENSEMDOT(sum,r,xv,xi,nnz) {\
 42: if (nnz > 0) {\
 43: switch (nnz & 0x3) {\
 44: case 3: sum -= *xv++ * r[*xi++];\
 45: case 2: sum -= *xv++ * r[*xi++];\
 46: case 1: sum -= *xv++ * r[*xi++];\
 47: nnz -= 4;}\
 48: while (nnz > 0) {\
 49: sum = sum - xv[0] * r[xi[0]] - xv[1] * r[xi[1]] -\
 50:         xv[2] * r[xi[2]] - xv[3] * r[xi[3]];\
 51: xv  += 4; xi += 4; nnz -= 4; }}}

 53: #elif defined(PETSC_USE_WHILE_KERNELS)
 54: #define SPARSEDENSEMDOT(sum,r,xv,xi,nnz) {\
 55: while (nnz--) sum -= *xv++ * r[*xi++];}

 57: #elif defined(PETSC_USE_FOR_KERNELS) && defined(MEMQUEST)
 58: #define SPARSEDENSEMDOT(sum,r,xv,xi,nnz) {\
 59: PetscInt __i,__i1,__i2;\
 60: for(__i=0;__i<nnz-1;__i+=2) {__i1 = xi[__i]; __i2=xi[__i+1];\
 61: sum -= (xv[__i]*r[__i1] + xv[__i+1]*r[__i2]);}\
 62: if (nnz & 0x1) sum -= xv[__i] * r[xi[__i]];}

 64: #else
 65: #define SPARSEDENSEMDOT(sum,r,xv,xi,nnz) {\
 66: PetscInt __i;\
 67: for(__i=0;__i<nnz;__i++) sum -= xv[__i] * r[xi[__i]];}
 68: #endif



 72: /* Form sum += r[xi] * xv; */
 73: #ifdef PETSC_USE_UNROLL_KERNELS
 74: #define SPARSEDENSEDOT(sum,r,xv,xi,nnz) {\
 75: if (nnz > 0) {\
 76: switch (nnz & 0x3) {\
 77: case 3: sum += *xv++ * r[*xi++];\
 78: case 2: sum += *xv++ * r[*xi++];\
 79: case 1: sum += *xv++ * r[*xi++];\
 80: nnz -= 4;}\
 81: while (nnz > 0) {\
 82: sum = sum + xv[0] * r[xi[0]] + xv[1] * r[xi[1]] +\
 83:         xv[2] * r[xi[2]] + xv[3] * r[xi[3]];\
 84: xv  += 4; xi += 4; nnz -= 4; }}}

 86: #elif defined(PETSC_USE_WHILE_KERNELS)
 87: #define SPARSEDENSEDOT(sum,r,xv,xi,nnz) {\
 88: while (nnz--) sum += *xv++ * r[*xi++];}

 90: #elif defined(PETSC_USE_FOR_KERNELS) && defined(MEMQUEST)
 91: #define SPARSEDENSEDOT(sum,r,xv,xi,nnz) {\
 92: PetscInt __i,__i1,__i2;\
 93: for(__i=0;__i<nnz-1;__i+=2) {__i1 = xi[__i]; __i2=xi[__i+1];\
 94: sum += (xv[__i]*r[__i1] + xv[__i+1]*r[__i2]);}\
 95: if (nnz & 0x1) sum += xv[__i] * r[xi[__i]];}

 97: #else
 98: #define SPARSEDENSEDOT(sum,r,xv,xi,nnz) {\
 99:  PetscInt __i;\
100: for(__i=0;__i<nnz;__i++) sum += xv[__i] * r[xi[__i]];}
101: #endif

103: /* Form sum += r[map[xi]] * xv; */
104: #ifdef PETSC_USE_UNROLL_KERNELS
105: #define SPARSEDENSEMAPDOT(sum,r,xv,xi,map,nnz) {\
106: if (nnz > 0) {\
107: switch (nnz & 0x3) {\
108: case 3: sum += *xv++ * r[map[*xi++]];\
109: case 2: sum += *xv++ * r[map[*xi++]];\
110: case 1: sum += *xv++ * r[map[*xi++]];\
111: nnz -= 4;}\
112: while (nnz > 0) {\
113: sum = sum + xv[0] * r[map[xi[0]]] + xv[1] * r[map[xi[1]]] +\
114:         xv[2] * r[map[xi[2]]] + xv[3] * r[map[xi[3]]];\
115: xv  += 4; xi += 4; nnz -= 4; }}}

117: #elif defined(PETSC_USE_WHILE_KERNELS)
118: #define SPARSEDENSEMAPDOT(sum,r,xv,xi,map,nnz) {\
119: while (nnz--) sum += *xv++ * r[map[*xi++]];}

121: #else
122: #define SPARSEDENSEMAPDOT(sum,r,xv,xi,map,nnz) {\
123: PetscInt __i;\
124: for(__i=0;__i<nnz;__i++) sum += xv[__i] * r[map[xi[__i]]];}
125: #endif

127: /* Gather xv = r[xi] */
128: #ifdef PETSC_USE_UNROLL_KERNELS
129: #define GATHER(xv,xi,r,nz) {PetscInt __noff;\
130: if (nz > 0) {\
131: __noff = nz & 0x3;\
132: switch (nz & 0x3) {\
133: case 3: xv[2] = r[xi[2]];\
134: case 2: xv[1] = r[xi[1]];\
135: case 1: xv[0] = r[xi[0]];\
136: nz -= 4;xv+=__noff;xi+=__noff;}\
137: while (nz > 0) {\
138: xv[0] = r[xi[0]]; xv[1] = r[xi[1]]; xv[2] = r[xi[2]]; xv[3] = r[xi[3]];\
139: xi  += 4;xv  += 4;nz -= 4;}}}

141: #elif defined(PETSC_USE_WHILE_KERNELS)
142: #define GATHER(xv,xi,r,nz) while (nz--) *xv++ = r[*xi++];

144: #elif defined(PETSC_USE_FOR_KERNELS)
145: #define GATHER(xv,xi,r,nz) {PetscInt __i; PetscScalar__s1,__s2;\
146: for(__i=0;__i<nz-1;__i+=2){__s1=r[xi[__i]];__s2=r[xi[__i+1]];\
147: xv[__i]=__s1;xv[__i+1]=__s2;}if ((nz)&0x1) xv[__i]=r[xi[__i]];}

149: #else
150: #define GATHER(xv,xi,r,nz) {PetscInt __i;for(__i=0;__i<nz;__i++)xv[__i]=r[xi[__i]];}
151: #endif

153: /* Scatter r[xi] = xv */
154: #ifdef PETSC_USE_UNROLL_KERNELS
155: #define SCATTER(xv,xi,r,nz) \
156: if (nz > 0) {\
157: switch (nz & 0x3) {\
158: case 3: r[*xi++] = *xv++;\
159: case 2: r[*xi++] = *xv++;\
160: case 1: r[*xi++] = *xv++;\
161: nz -= 4;}\
162: while (nz > 0) {\
163: r[xi[0]]=xv[0]; r[xi[1]]=xv[1]; r[xi[2]]=xv[2]; r[xi[3]]=xv[3];\
164: xi  += 4;xv  += 4;nz -= 4;}}

166: #elif defined(PETSC_USE_WHILE_KERNELS)
167: #define SCATTER(xv,xi,r,nz) while (nz--) r[*xi++]=*xv++;

169: #elif defined(PETSC_USE_FOR_KERNELS)
170: #define SCATTER(xv,xi,r,nz) {PetscInt __i; PetscScalar__s1,__s2;\
171: for(__i=0;__i<nz-1;__i+=2){__s1=xv[__i];__s2=xv[__i+1];\
172: r[xi[__i]]=__s1;r[xi[__i+1]]=__s2;}if ((nz)&0x1)r[xi[__i]]=xv[__i];}

174: #else
175: #define SCATTER(xv,xi,r,nz) {PetscInt __i;\
176: for(__i=0;__i<nz;__i++)r[xi[__i]]=xv[__i];}
177: #endif

179: /* Scatter r[xi] = val */
180: #ifdef PETSC_USE_UNROLL_KERNELS
181: #define SCATTERVAL(r,xi,n,val) \
182: switch (n & 0x3) {\
183: case 3: r[*xi++] = val;\
184: case 2: r[*xi++] = val;\
185: case 1: r[*xi++] = val;\
186: n -= 4;}\
187: while (n > 0) {\
188: r[xi[0]]=val; r[xi[1]]=val; r[xi[2]]=val; r[xi[3]]=val;xi  += 4;n -= 4;}

190: #elif defined(PETSC_USE_WHILE_KERNELS)
191: #define SCATTERVAL(r,xi,n,val) while (n--) r[*xi++]=val;

193: #else
194: #define SCATTERVAL(r,xi,n,val) {PetscInt __i;for(__i=0;__i<n;__i++)r[xi[__i]]=val;}
195: #endif

197: /* Copy vo[xi] = vi[xi] */
198: #ifdef PETSC_USE_UNROLL_KERNELS
199: #define COPYPERM(vo,xi,vi,n) \
200: switch (n & 0x3) {\
201: case 3: vo[*xi] = vi[*xi];xi++;\
202: case 2: vo[*xi] = vi[*xi];xi++;\
203: case 1: vo[*xi] = vi[*xi];xi++;\
204: n -= 4;}\
205: while (n > 0) {\
206: vo[xi[0]]=vi[xi[0]]; vo[xi[1]]=vi[xi[1]]; vo[xi[2]]=vi[xi[2]]; \
207: vo[xi[3]]=vi[xi[3]];xi  += 4;n -= 4;}}

209: #elif defined(PETSC_USE_WHILE_KERNELS)
210: #define COPYPERM(vo,xi,vi,n) while (n--) {vo[*xi]=vi[*xi];xi++;}

212: #else
213: #define COPYPERM(vo,xi,vi,n) {PetscInt __i;\
214: for(__i=0;__i<n;__i++)vo[xi[__i]]=vi[xi[__i]];}
215: #endif

217: /* Scale sparse vector v[xi] *= a */
218: #ifdef PETSC_USE_UNROLL_KERNELS
219: #define SPARSESCALE(v,xi,n,val) {\
220: switch (n & 0x3) {\
221: case 3: v[*xi++] *= val;\
222: case 2: v[*xi++] *= val;\
223: case 1: v[*xi++] *= val;\
224: n -= 4;}\
225: while (n > 0) {\
226: v[xi[0]]*=val;vo[xi[1]]*=val; vo[xi[2]]*=val;vo[xi[3]]*=val;xi  += 4;n -= 4;}}}

228: #elif defined(PETSC_USE_WHILE_KERNELS)
229: #define SPARSESCALE(v,xi,n,val) {\
230: while (n--) v[*xi++] *= val;}

232: #else
233: #define SPARSESCALE(v,xi,n,val) {PetscInt __i;\
234: for(__i=0;__i<n;__i++)v[xi[__i]*=val;}
235: #endif

237: /* sparse dot sum = sum(a[xi] * b[xi]) */
238: #ifdef PETSC_USE_UNROLL_KERNELS
239: #define SPARSEDOT(sum,a,b,xi,n) {\
240: switch (n & 0x3) {\
241: case 3: sum += a[*xi] * b[*xi]; xi++;\
242: case 2: sum += a[*xi] * b[*xi]; xi++;\
243: case 1: sum += a[*xi] * b[*xi]; xi++;\
244: n -= 4;}\
245: while (n > 0) {\
246: sum+=a[xi[0]]*b[xi[0]]+a[xi[1]]*b[xi[1]]+a[xi[2]]*b[xi[2]]+a[xi[3]]*b[xi[3]];\
247: xi  += 4;n -= 4;}}}

249: #elif defined(PETSC_USE_WHILE_KERNELS)
250: #define SPARSEDOT(sum,a,b,xi,n) {\
251: while (n--) {sum += a[*xi]*b[*xi];xi++;}}

253: #else
254: #define SPARSEDOT(sum,a,b,xi,n) {\
255: PetscInt __i;\
256: for(__i=0;__i<n;__i++)sum+= a[xi[__i]]*b[xi[__i]];}
257: #endif


260: /* Scatter r[xi] += xv */
261: #ifdef PETSC_USE_UNROLL_KERNELS
262: #define SCATTERADD(xv,xi,r,nz) {\
263: if (nz > 0) {\
264: switch (nz & 0x3) {\
265: case 3: r[*xi++] += *xv++;\
266: case 2: r[*xi++] += *xv++;\
267: case 1: r[*xi++] += *xv++;\
268: nz -= 4;}\
269: while (nz > 0) {\
270: r[xi[0]]+=xv[0]; r[xi[1]]+=xv[1]; r[xi[2]]+=xv[2]; r[xi[3]]+=xv[3];\
271: xi  += 4;xv  += 4;nz -= 4;}}}

273: #elif defined(PETSC_USE_WHILE_KERNELS)
274: #define SCATTERADD(xv,xi,r,nz) {\
275: while (nz--) r[*xi++]+= *xv++;}

277: #elif defined(PETSC_USE_FOR_KERNELS)
278: #define SCATTERADD(xv,xi,r,nz) { PetscScalar__s1,__s2;\
279:  PetscInt __i,__i1,__i2;\
280: for(__i=0;__i<nz-1;__i+=2){__i1 = xi[__i]; __i2 = xi[__i+1];\
281: __s1 = r[__i1]; __s2 = r[__i2]; __s1 += xv[__i]; __s2 += xv[__i+1];\
282: r[__i1]=__s1;r[__i2]=__s2;}if ((nz)&0x1)r[xi[__i]]+=xv[__i];}

284: #else
285: #define SCATTERADD(xv,xi,r,nz) {\
286: PetscInt __i;\
287: for(__i=0;__i<nz;__i++) r[xi[__i]]+= xv[__i];}
288: #endif

290: /* Gather xv += r[xi] */
291: #ifdef PETSC_USE_UNROLL_KERNELS
292: #define GATHERADD(xv,xi,r,nz) {\
293: if (nz > 0) {\
294: switch (nz & 0x3) {\
295: case 3: *xv++ += r[*xi++];\
296: case 2: *xv++ += r[*xi++];\
297: case 1: *xv++ += r[*xi++];\
298: nz -= 4;}\
299: while (nz > 0) {\
300: xv[0] += r[xi[0]]; xv[1] += r[xi[1]]; xv[2] += r[xi[2]]; xv[3] += r[xi[3]];\
301: xi  += 4;xv  += 4;nz -= 4;}}}

303: #elif defined(PETSC_USE_WHILE_KERNELS)
304: #define GATHERADD(xv,xi,r,nz) {\
305: while (nz--) *xv++ += r[*xi++];}

307: #else
308: #define GATHERADD(xv,xi,r,nz) {\
309: PetscInt __i;\
310: for(__i=0;__i<nz;__i++)xv[__i] += r[xi[__i]];}
311: #endif

313: #endif