Actual source code: Index.c

 2:  #include petsc.h
 3:  #include petscsys.h

  8: 
 11: int main(int argc,char **argv)
 12: {

 15:   PetscInitialize(&argc,&argv,0,0);
 16: 
 17:   test1();
 18:   test2();

 20:   PetscFinalize();
 21:   return(0);
 22: }

 26: int test1(void)
 27: {
 28:   PetscLogDouble  t1,t2;
 29:   double      value;
 30:   int         i,ierr,*z,*zi,intval;
 31:   PetscScalar *x,*y;
 32:   PetscRandom r;

 34:   PetscRandomCreate(PETSC_COMM_SELF,&r);
 35:   PetscRandomSetFromOptions(r);
 36:   PetscMalloc(20000*sizeof(PetscScalar),&x);
 37:   PetscMalloc(20000*sizeof(PetscScalar),&y);

 39:   PetscMalloc(2000*sizeof(int),&z);
 40:   PetscMalloc(2000*sizeof(int),&zi);



 44:   /* Take care of paging effects */
 45:   PetscGetTime(&t1);
 46: 
 47:    /* Form the random set of integers */
 48:   for (i=0; i<2000; i++) {
 49:     PetscRandomGetValue(r,&value);
 50:     intval = (int)(value*20000.0);
 51:     z[i]   = intval;
 52:   }

 54:   for (i=0; i<2000; i++) {
 55:     PetscRandomGetValue(r,&value);
 56:     intval  = (int)(value*20000.0);
 57:     zi[i]   = intval;
 58:   }
 59:   /* fprintf(stdout,"Done setup\n"); */

 61:   BlastCache();

 63:   PetscGetTime(&t1);
 64:   for (i=0; i<2000; i++) {  x[i] = y[i]; }
 65:   PetscGetTime(&t2);
 66:   fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0);

 68:   BlastCache();

 70:   PetscGetTime(&t1);
 71:   for (i=0; i<500; i+=4) {
 72:     x[i]   = y[z[i]];
 73:     x[1+i] = y[z[1+i]];
 74:     x[2+i] = y[z[2+i]];
 75:     x[3+i] = y[z[3+i]];
 76:   }
 77:   PetscGetTime(&t2);
 78:   fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 4",(t2-t1)/2000.0);

 80:   BlastCache();

 82:   PetscGetTime(&t1);CHKERRQ(ierr)
 83:   for (i=0; i<2000; i++) {  x[i] = y[z[i]]; }
 84:   PetscGetTime(&t2);
 85:   fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0);

 87:   BlastCache();

 89:   PetscGetTime(&t1);
 90:   for (i=0; i<1000; i+=2) {  x[i] = y[z[i]];  x[1+i] = y[z[1+i]]; }
 91:   PetscGetTime(&t2);
 92:   fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]] - unroll 2",(t2-t1)/2000.0);

 94:   BlastCache();

 96:   PetscGetTime(&t1);
 97:   for (i=0; i<2000; i++) {  x[z[i]] = y[i]; }
 98:   PetscGetTime(&t2);
 99:   fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0);

101:   BlastCache();

103:   PetscGetTime(&t1);
104:   for (i=0; i<2000; i++) {  x[z[i]] = y[zi[i]]; }
105:   PetscGetTime(&t2);
106:   fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0);
107: 
108:   PetscMemcpy(x,y,10);
109:   PetscMemcpy(z,zi,10);
110:   PetscFree(z);
111:   PetscFree(zi);
112:   PetscFree(x);
113:   PetscFree(y);
114:   PetscRandomDestroy(r);
115:   return(0);
116: }

120: int test2(void)
121: {
122:   PetscLogDouble   t1,t2;
123:   double       value;
124:   int          i,ierr,z[20000],zi[20000],intval,tmp;
125:   PetscScalar  x[20000],y[20000];
126:   PetscRandom  r;

128:   PetscRandomCreate(PETSC_COMM_SELF,&r);
129:   PetscRandomSetFromOptions(r);

131:   /* Take care of paging effects */
132:   PetscGetTime(&t1);
133: 
134:   for (i=0; i<20000; i++) {
135:     x[i]  = i;
136:     y[i]  = i;
137:     z[i]  = i;
138:     zi[i] = i;
139:   }

141:    /* Form the random set of integers */
142:   for (i=0; i<20000; i++) {
143:     PetscRandomGetValue(r,&value);
144:     intval = (int)(value*20000.0);
145:     tmp    = z[i];
146:     z[i]   = z[intval];
147:     z[intval] = tmp;
148:   }

150:   for (i=0; i<20000; i++) {
151:     PetscRandomGetValue(r,&value);
152:     intval = (int)(value*20000.0);
153:     tmp    = zi[i];
154:     zi[i]  = zi[intval];
155:     zi[intval] = tmp;
156:   }
157:   /* fprintf(stdout,"Done setup\n"); */

159:   /* BlastCache(); */

161:   PetscGetTime(&t1);
162:   for (i=0; i<2000; i++) {  x[i] = y[i]; }
163:   PetscGetTime(&t2);
164:   fprintf(stdout,"%-27s : %e sec\n","x[i] = y[i]",(t2-t1)/2000.0);

166:   /* BlastCache(); */

168:   PetscGetTime(&t1);
169:   for (i=0; i<2000; i++) {  y[i] = x[z[i]]; }
170:   PetscGetTime(&t2);
171:   fprintf(stdout,"%-27s : %e sec\n","x[i] = y[idx[i]]",(t2-t1)/2000.0);

173:   /* BlastCache(); */

175:   PetscGetTime(&t1);
176:   for (i=0; i<2000; i++) {  x[z[i]] = y[i]; }
177:   PetscGetTime(&t2);
178:   fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[i]",(t2-t1)/2000.0);

180:   /* BlastCache(); */

182:   PetscGetTime(&t1);
183:   for (i=0; i<2000; i++) {  y[z[i]] = x[zi[i]]; }
184:   PetscGetTime(&t2);
185:   fprintf(stdout,"%-27s : %e sec\n","x[z[i]] = y[zi[i]]",(t2-t1)/2000.0);


188:   PetscRandomDestroy(r);
189:   return(0);
190: }

194: int BlastCache(void)
195: {
196:   int    i,ierr,n = 1000000;
197:   PetscScalar *x,*y,*z,*a,*b;

199:   PetscMalloc(5*n*sizeof(PetscScalar),&x);
200:   y = x + n;
201:   z = y + n;
202:   a = z + n;
203:   b = a + n;

205:   for (i=0; i<n; i++) {
206:     a[i] = (PetscScalar) i;
207:     y[i] = (PetscScalar) i;
208:     z[i] = (PetscScalar) i;
209:     b[i] = (PetscScalar) i;
210:     x[i] = (PetscScalar) i;
211:   }

213:   for (i=0; i<n; i++) {
214:     a[i] = 3.0*x[i] + 2.0*y[i] + 3.3*z[i] - 25.*b[i];
215:   }
216:   for (i=0; i<n; i++) {
217:     b[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i];
218:   }
219:   for (i=0; i<n; i++) {
220:     z[i] = 3.0*x[i] + 2.0*y[i] + 3.3*a[i] - 25.*b[i];
221:   }
222:   PetscFree(x);
223:   return(0);
224: }