~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/include/asm-i386/xor.h

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * include/asm-i386/xor.h
  3  *
  4  * Optimized RAID-5 checksumming functions for MMX and SSE.
  5  *
  6  * This program is free software; you can redistribute it and/or modify
  7  * it under the terms of the GNU General Public License as published by
  8  * the Free Software Foundation; either version 2, or (at your option)
  9  * any later version.
 10  *
 11  * You should have received a copy of the GNU General Public License
 12  * (for example /usr/src/linux/COPYING); if not, write to the Free
 13  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 14  */
 15 
 16 /*
 17  * High-speed RAID5 checksumming functions utilizing MMX instructions.
 18  * Copyright (C) 1998 Ingo Molnar.
 19  */
 20 
 21 #define FPU_SAVE                                                        \
 22   do {                                                                  \
 23         if (!(current->flags & PF_USEDFPU))                             \
 24                 __asm__ __volatile__ (" clts;\n");                      \
 25         __asm__ __volatile__ ("fsave %0; fwait": "=m"(fpu_save[0]));    \
 26   } while (0)
 27 
 28 #define FPU_RESTORE                                                     \
 29   do {                                                                  \
 30         __asm__ __volatile__ ("frstor %0": : "m"(fpu_save[0]));         \
 31         if (!(current->flags & PF_USEDFPU))                             \
 32                 stts();                                                 \
 33   } while (0)
 34 
 35 #define LD(x,y)         "       movq   8*("#x")(%1), %%mm"#y"   ;\n"
 36 #define ST(x,y)         "       movq %%mm"#y",   8*("#x")(%1)   ;\n"
 37 #define XO1(x,y)        "       pxor   8*("#x")(%2), %%mm"#y"   ;\n"
 38 #define XO2(x,y)        "       pxor   8*("#x")(%3), %%mm"#y"   ;\n"
 39 #define XO3(x,y)        "       pxor   8*("#x")(%4), %%mm"#y"   ;\n"
 40 #define XO4(x,y)        "       pxor   8*("#x")(%5), %%mm"#y"   ;\n"
 41 
 42 
 43 static void
 44 xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 45 {
 46         unsigned long lines = bytes >> 7;
 47         char fpu_save[108];
 48 
 49         FPU_SAVE;
 50 
 51         __asm__ __volatile__ (
 52 #undef BLOCK
 53 #define BLOCK(i) \
 54         LD(i,0)                                 \
 55                 LD(i+1,1)                       \
 56                         LD(i+2,2)               \
 57                                 LD(i+3,3)       \
 58         XO1(i,0)                                \
 59         ST(i,0)                                 \
 60                 XO1(i+1,1)                      \
 61                 ST(i+1,1)                       \
 62                         XO1(i+2,2)              \
 63                         ST(i+2,2)               \
 64                                 XO1(i+3,3)      \
 65                                 ST(i+3,3)
 66 
 67         " .align 32                     ;\n"
 68         " 1:                            ;\n"
 69 
 70         BLOCK(0)
 71         BLOCK(4)
 72         BLOCK(8)
 73         BLOCK(12)
 74 
 75         "       addl $128, %1         ;\n"
 76         "       addl $128, %2         ;\n"
 77         "       decl %0               ;\n"
 78         "       jnz 1b                ;\n"
 79         :
 80         : "r" (lines),
 81           "r" (p1), "r" (p2)
 82         : "memory");
 83 
 84         FPU_RESTORE;
 85 }
 86 
 87 static void
 88 xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 89               unsigned long *p3)
 90 {
 91         unsigned long lines = bytes >> 7;
 92         char fpu_save[108];
 93 
 94         FPU_SAVE;
 95 
 96         __asm__ __volatile__ (
 97 #undef BLOCK
 98 #define BLOCK(i) \
 99         LD(i,0)                                 \
100                 LD(i+1,1)                       \
101                         LD(i+2,2)               \
102                                 LD(i+3,3)       \
103         XO1(i,0)                                \
104                 XO1(i+1,1)                      \
105                         XO1(i+2,2)              \
106                                 XO1(i+3,3)      \
107         XO2(i,0)                                \
108         ST(i,0)                                 \
109                 XO2(i+1,1)                      \
110                 ST(i+1,1)                       \
111                         XO2(i+2,2)              \
112                         ST(i+2,2)               \
113                                 XO2(i+3,3)      \
114                                 ST(i+3,3)
115 
116         " .align 32                     ;\n"
117         " 1:                            ;\n"
118 
119         BLOCK(0)
120         BLOCK(4)
121         BLOCK(8)
122         BLOCK(12)
123 
124         "       addl $128, %1         ;\n"
125         "       addl $128, %2         ;\n"
126         "       addl $128, %3         ;\n"
127         "       decl %0               ;\n"
128         "       jnz 1b                ;\n"
129         :
130         : "r" (lines),
131           "r" (p1), "r" (p2), "r" (p3)
132         : "memory");
133 
134         FPU_RESTORE;
135 }
136 
137 static void
138 xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
139               unsigned long *p3, unsigned long *p4)
140 {
141         unsigned long lines = bytes >> 7;
142         char fpu_save[108];
143 
144         FPU_SAVE;
145 
146         __asm__ __volatile__ (
147 #undef BLOCK
148 #define BLOCK(i) \
149         LD(i,0)                                 \
150                 LD(i+1,1)                       \
151                         LD(i+2,2)               \
152                                 LD(i+3,3)       \
153         XO1(i,0)                                \
154                 XO1(i+1,1)                      \
155                         XO1(i+2,2)              \
156                                 XO1(i+3,3)      \
157         XO2(i,0)                                \
158                 XO2(i+1,1)                      \
159                         XO2(i+2,2)              \
160                                 XO2(i+3,3)      \
161         XO3(i,0)                                \
162         ST(i,0)                                 \
163                 XO3(i+1,1)                      \
164                 ST(i+1,1)                       \
165                         XO3(i+2,2)              \
166                         ST(i+2,2)               \
167                                 XO3(i+3,3)      \
168                                 ST(i+3,3)
169 
170         " .align 32                     ;\n"
171         " 1:                            ;\n"
172 
173         BLOCK(0)
174         BLOCK(4)
175         BLOCK(8)
176         BLOCK(12)
177 
178         "       addl $128, %1         ;\n"
179         "       addl $128, %2         ;\n"
180         "       addl $128, %3         ;\n"
181         "       addl $128, %4         ;\n"
182         "       decl %0               ;\n"
183         "       jnz 1b                ;\n"
184         :
185         : "r" (lines),
186           "r" (p1), "r" (p2), "r" (p3), "r" (p4)
187         : "memory");
188 
189         FPU_RESTORE;
190 }
191 
192 static void
193 xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
194               unsigned long *p3, unsigned long *p4, unsigned long *p5)
195 {
196         unsigned long lines = bytes >> 7;
197         char fpu_save[108];
198 
199         FPU_SAVE;
200 
201         __asm__ __volatile__ (
202 #undef BLOCK
203 #define BLOCK(i) \
204         LD(i,0)                                 \
205                 LD(i+1,1)                       \
206                         LD(i+2,2)               \
207                                 LD(i+3,3)       \
208         XO1(i,0)                                \
209                 XO1(i+1,1)                      \
210                         XO1(i+2,2)              \
211                                 XO1(i+3,3)      \
212         XO2(i,0)                                \
213                 XO2(i+1,1)                      \
214                         XO2(i+2,2)              \
215                                 XO2(i+3,3)      \
216         XO3(i,0)                                \
217                 XO3(i+1,1)                      \
218                         XO3(i+2,2)              \
219                                 XO3(i+3,3)      \
220         XO4(i,0)                                \
221         ST(i,0)                                 \
222                 XO4(i+1,1)                      \
223                 ST(i+1,1)                       \
224                         XO4(i+2,2)              \
225                         ST(i+2,2)               \
226                                 XO4(i+3,3)      \
227                                 ST(i+3,3)
228 
229         " .align 32                     ;\n"
230         " 1:                            ;\n"
231 
232         BLOCK(0)
233         BLOCK(4)
234         BLOCK(8)
235         BLOCK(12)
236 
237         "       addl $128, %1         ;\n"
238         "       addl $128, %2         ;\n"
239         "       addl $128, %3         ;\n"
240         "       addl $128, %4         ;\n"
241         "       addl $128, %5         ;\n"
242         "       decl %0               ;\n"
243         "       jnz 1b                ;\n"
244         :
245         : "g" (lines),
246           "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
247         : "memory");
248 
249         FPU_RESTORE;
250 }
251 
252 #undef LD
253 #undef XO1
254 #undef XO2
255 #undef XO3
256 #undef XO4
257 #undef ST
258 #undef BLOCK
259 
260 static void
261 xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
262 {
263         unsigned long lines = bytes >> 6;
264         char fpu_save[108];
265 
266         FPU_SAVE;
267 
268         __asm__ __volatile__ (
269         " .align 32                  ;\n"
270         " 1:                         ;\n"
271         "       movq   (%1), %%mm0   ;\n"
272         "       movq  8(%1), %%mm1   ;\n"
273         "       pxor   (%2), %%mm0   ;\n"
274         "       movq 16(%1), %%mm2   ;\n"
275         "       movq %%mm0,   (%1)   ;\n"
276         "       pxor  8(%2), %%mm1   ;\n"
277         "       movq 24(%1), %%mm3   ;\n"
278         "       movq %%mm1,  8(%1)   ;\n"
279         "       pxor 16(%2), %%mm2   ;\n"
280         "       movq 32(%1), %%mm4   ;\n"
281         "       movq %%mm2, 16(%1)   ;\n"
282         "       pxor 24(%2), %%mm3   ;\n"
283         "       movq 40(%1), %%mm5   ;\n"
284         "       movq %%mm3, 24(%1)   ;\n"
285         "       pxor 32(%2), %%mm4   ;\n"
286         "       movq 48(%1), %%mm6   ;\n"
287         "       movq %%mm4, 32(%1)   ;\n"
288         "       pxor 40(%2), %%mm5   ;\n"
289         "       movq 56(%1), %%mm7   ;\n"
290         "       movq %%mm5, 40(%1)   ;\n"
291         "       pxor 48(%2), %%mm6   ;\n"
292         "       pxor 56(%2), %%mm7   ;\n"
293         "       movq %%mm6, 48(%1)   ;\n"
294         "       movq %%mm7, 56(%1)   ;\n"
295         
296         "       addl $64, %1         ;\n"
297         "       addl $64, %2         ;\n"
298         "       decl %0              ;\n"
299         "       jnz 1b               ;\n"
300         : 
301         : "r" (lines),
302           "r" (p1), "r" (p2)
303         : "memory");
304 
305         FPU_RESTORE;
306 }
307 
308 static void
309 xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
310              unsigned long *p3)
311 {
312         unsigned long lines = bytes >> 6;
313         char fpu_save[108];
314 
315         FPU_SAVE;
316 
317         __asm__ __volatile__ (
318         " .align 32,0x90             ;\n"
319         " 1:                         ;\n"
320         "       movq   (%1), %%mm0   ;\n"
321         "       movq  8(%1), %%mm1   ;\n"
322         "       pxor   (%2), %%mm0   ;\n"
323         "       movq 16(%1), %%mm2   ;\n"
324         "       pxor  8(%2), %%mm1   ;\n"
325         "       pxor   (%3), %%mm0   ;\n"
326         "       pxor 16(%2), %%mm2   ;\n"
327         "       movq %%mm0,   (%1)   ;\n"
328         "       pxor  8(%3), %%mm1   ;\n"
329         "       pxor 16(%3), %%mm2   ;\n"
330         "       movq 24(%1), %%mm3   ;\n"
331         "       movq %%mm1,  8(%1)   ;\n"
332         "       movq 32(%1), %%mm4   ;\n"
333         "       movq 40(%1), %%mm5   ;\n"
334         "       pxor 24(%2), %%mm3   ;\n"
335         "       movq %%mm2, 16(%1)   ;\n"
336         "       pxor 32(%2), %%mm4   ;\n"
337         "       pxor 24(%3), %%mm3   ;\n"
338         "       pxor 40(%2), %%mm5   ;\n"
339         "       movq %%mm3, 24(%1)   ;\n"
340         "       pxor 32(%3), %%mm4   ;\n"
341         "       pxor 40(%3), %%mm5   ;\n"
342         "       movq 48(%1), %%mm6   ;\n"
343         "       movq %%mm4, 32(%1)   ;\n"
344         "       movq 56(%1), %%mm7   ;\n"
345         "       pxor 48(%2), %%mm6   ;\n"
346         "       movq %%mm5, 40(%1)   ;\n"
347         "       pxor 56(%2), %%mm7   ;\n"
348         "       pxor 48(%3), %%mm6   ;\n"
349         "       pxor 56(%3), %%mm7   ;\n"
350         "       movq %%mm6, 48(%1)   ;\n"
351         "       movq %%mm7, 56(%1)   ;\n"
352       
353         "       addl $64, %1         ;\n"
354         "       addl $64, %2         ;\n"
355         "       addl $64, %3         ;\n"
356         "       decl %0              ;\n"
357         "       jnz 1b               ;\n"
358         : 
359         : "r" (lines),
360           "r" (p1), "r" (p2), "r" (p3)
361         : "memory" );
362 
363         FPU_RESTORE;
364 }
365 
366 static void
367 xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
368              unsigned long *p3, unsigned long *p4)
369 {
370         unsigned long lines = bytes >> 6;
371         char fpu_save[108];
372 
373         FPU_SAVE;
374 
375         __asm__ __volatile__ (
376         " .align 32,0x90             ;\n"
377         " 1:                         ;\n"
378         "       movq   (%1), %%mm0   ;\n"
379         "       movq  8(%1), %%mm1   ;\n"
380         "       pxor   (%2), %%mm0   ;\n"
381         "       movq 16(%1), %%mm2   ;\n"
382         "       pxor  8(%2), %%mm1   ;\n"
383         "       pxor   (%3), %%mm0   ;\n"
384         "       pxor 16(%2), %%mm2   ;\n"
385         "       pxor  8(%3), %%mm1   ;\n"
386         "       pxor   (%4), %%mm0   ;\n"
387         "       movq 24(%1), %%mm3   ;\n"
388         "       pxor 16(%3), %%mm2   ;\n"
389         "       pxor  8(%4), %%mm1   ;\n"
390         "       movq %%mm0,   (%1)   ;\n"
391         "       movq 32(%1), %%mm4   ;\n"
392         "       pxor 24(%2), %%mm3   ;\n"
393         "       pxor 16(%4), %%mm2   ;\n"
394         "       movq %%mm1,  8(%1)   ;\n"
395         "       movq 40(%1), %%mm5   ;\n"
396         "       pxor 32(%2), %%mm4   ;\n"
397         "       pxor 24(%3), %%mm3   ;\n"
398         "       movq %%mm2, 16(%1)   ;\n"
399         "       pxor 40(%2), %%mm5   ;\n"
400         "       pxor 32(%3), %%mm4   ;\n"
401         "       pxor 24(%4), %%mm3   ;\n"
402         "       movq %%mm3, 24(%1)   ;\n"
403         "       movq 56(%1), %%mm7   ;\n"
404         "       movq 48(%1), %%mm6   ;\n"
405         "       pxor 40(%3), %%mm5   ;\n"
406         "       pxor 32(%4), %%mm4   ;\n"
407         "       pxor 48(%2), %%mm6   ;\n"
408         "       movq %%mm4, 32(%1)   ;\n"
409         "       pxor 56(%2), %%mm7   ;\n"
410         "       pxor 40(%4), %%mm5   ;\n"
411         "       pxor 48(%3), %%mm6   ;\n"
412         "       pxor 56(%3), %%mm7   ;\n"
413         "       movq %%mm5, 40(%1)   ;\n"
414         "       pxor 48(%4), %%mm6   ;\n"
415         "       pxor 56(%4), %%mm7   ;\n"
416         "       movq %%mm6, 48(%1)   ;\n"
417         "       movq %%mm7, 56(%1)   ;\n"
418       
419         "       addl $64, %1         ;\n"
420         "       addl $64, %2         ;\n"
421         "       addl $64, %3         ;\n"
422         "       addl $64, %4         ;\n"
423         "       decl %0              ;\n"
424         "       jnz 1b               ;\n"
425         : 
426         : "r" (lines),
427           "r" (p1), "r" (p2), "r" (p3), "r" (p4)
428         : "memory");
429 
430         FPU_RESTORE;
431 }
432 
433 static void
434 xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
435              unsigned long *p3, unsigned long *p4, unsigned long *p5)
436 {
437         unsigned long lines = bytes >> 6;
438         char fpu_save[108];
439 
440         FPU_SAVE;
441 
442         __asm__ __volatile__ (
443         " .align 32,0x90             ;\n"
444         " 1:                         ;\n"
445         "       movq   (%1), %%mm0   ;\n"
446         "       movq  8(%1), %%mm1   ;\n"
447         "       pxor   (%2), %%mm0   ;\n"
448         "       pxor  8(%2), %%mm1   ;\n"
449         "       movq 16(%1), %%mm2   ;\n"
450         "       pxor   (%3), %%mm0   ;\n"
451         "       pxor  8(%3), %%mm1   ;\n"
452         "       pxor 16(%2), %%mm2   ;\n"
453         "       pxor   (%4), %%mm0   ;\n"
454         "       pxor  8(%4), %%mm1   ;\n"
455         "       pxor 16(%3), %%mm2   ;\n"
456         "       movq 24(%1), %%mm3   ;\n"
457         "       pxor   (%5), %%mm0   ;\n"
458         "       pxor  8(%5), %%mm1   ;\n"
459         "       movq %%mm0,   (%1)   ;\n"
460         "       pxor 16(%4), %%mm2   ;\n"
461         "       pxor 24(%2), %%mm3   ;\n"
462         "       movq %%mm1,  8(%1)   ;\n"
463         "       pxor 16(%5), %%mm2   ;\n"
464         "       pxor 24(%3), %%mm3   ;\n"
465         "       movq 32(%1), %%mm4   ;\n"
466         "       movq %%mm2, 16(%1)   ;\n"
467         "       pxor 24(%4), %%mm3   ;\n"
468         "       pxor 32(%2), %%mm4   ;\n"
469         "       movq 40(%1), %%mm5   ;\n"
470         "       pxor 24(%5), %%mm3   ;\n"
471         "       pxor 32(%3), %%mm4   ;\n"
472         "       pxor 40(%2), %%mm5   ;\n"
473         "       movq %%mm3, 24(%1)   ;\n"
474         "       pxor 32(%4), %%mm4   ;\n"
475         "       pxor 40(%3), %%mm5   ;\n"
476         "       movq 48(%1), %%mm6   ;\n"
477         "       movq 56(%1), %%mm7   ;\n"
478         "       pxor 32(%5), %%mm4   ;\n"
479         "       pxor 40(%4), %%mm5   ;\n"
480         "       pxor 48(%2), %%mm6   ;\n"
481         "       pxor 56(%2), %%mm7   ;\n"
482         "       movq %%mm4, 32(%1)   ;\n"
483         "       pxor 48(%3), %%mm6   ;\n"
484         "       pxor 56(%3), %%mm7   ;\n"
485         "       pxor 40(%5), %%mm5   ;\n"
486         "       pxor 48(%4), %%mm6   ;\n"
487         "       pxor 56(%4), %%mm7   ;\n"
488         "       movq %%mm5, 40(%1)   ;\n"
489         "       pxor 48(%5), %%mm6   ;\n"
490         "       pxor 56(%5), %%mm7   ;\n"
491         "       movq %%mm6, 48(%1)   ;\n"
492         "       movq %%mm7, 56(%1)   ;\n"
493       
494         "       addl $64, %1         ;\n"
495         "       addl $64, %2         ;\n"
496         "       addl $64, %3         ;\n"
497         "       addl $64, %4         ;\n"
498         "       addl $64, %5         ;\n"
499         "       decl %0              ;\n"
500         "       jnz 1b               ;\n"
501         : 
502         : "g" (lines),
503           "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
504         : "memory");
505 
506         FPU_RESTORE;
507 }
508 
509 static struct xor_block_template xor_block_pII_mmx = {
510         name: "pII_mmx",
511         do_2: xor_pII_mmx_2,
512         do_3: xor_pII_mmx_3,
513         do_4: xor_pII_mmx_4,
514         do_5: xor_pII_mmx_5,
515 };
516 
517 static struct xor_block_template xor_block_p5_mmx = {
518         name: "p5_mmx",
519         do_2: xor_p5_mmx_2,
520         do_3: xor_p5_mmx_3,
521         do_4: xor_p5_mmx_4,
522         do_5: xor_p5_mmx_5,
523 };
524 
525 #undef FPU_SAVE
526 #undef FPU_RESTORE
527 
528 /*
529  * Cache avoiding checksumming functions utilizing KNI instructions
530  * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
531  */
532 
533 #define XMMS_SAVE                               \
534         __asm__ __volatile__ (                  \
535                 "movl %%cr0,%0          ;\n\t"  \
536                 "clts                   ;\n\t"  \
537                 "movups %%xmm0,(%1)     ;\n\t"  \
538                 "movups %%xmm1,0x10(%1) ;\n\t"  \
539                 "movups %%xmm2,0x20(%1) ;\n\t"  \
540                 "movups %%xmm3,0x30(%1) ;\n\t"  \
541                 : "=r" (cr0)                    \
542                 : "r" (xmm_save)                \
543                 : "memory")
544 
545 #define XMMS_RESTORE                            \
546         __asm__ __volatile__ (                  \
547                 "sfence                 ;\n\t"  \
548                 "movups (%1),%%xmm0     ;\n\t"  \
549                 "movups 0x10(%1),%%xmm1 ;\n\t"  \
550                 "movups 0x20(%1),%%xmm2 ;\n\t"  \
551                 "movups 0x30(%1),%%xmm3 ;\n\t"  \
552                 "movl   %0,%%cr0        ;\n\t"  \
553                 :                               \
554                 : "r" (cr0), "r" (xmm_save)     \
555                 : "memory")
556 
557 #define OFFS(x)         "16*("#x")"
558 #define PF0(x)          "       prefetcht0  "OFFS(x)"(%1)   ;\n"
559 #define LD(x,y)         "       movaps   "OFFS(x)"(%1), %%xmm"#y"   ;\n"
560 #define ST(x,y)         "       movaps %%xmm"#y",   "OFFS(x)"(%1)   ;\n"
561 #define PF1(x)          "       prefetchnta "OFFS(x)"(%2)   ;\n"
562 #define PF2(x)          "       prefetchnta "OFFS(x)"(%3)   ;\n"
563 #define PF3(x)          "       prefetchnta "OFFS(x)"(%4)   ;\n"
564 #define PF4(x)          "       prefetchnta "OFFS(x)"(%5)   ;\n"
565 #define PF5(x)          "       prefetchnta "OFFS(x)"(%6)   ;\n"
566 #define XO1(x,y)        "       xorps   "OFFS(x)"(%2), %%xmm"#y"   ;\n"
567 #define XO2(x,y)        "       xorps   "OFFS(x)"(%3), %%xmm"#y"   ;\n"
568 #define XO3(x,y)        "       xorps   "OFFS(x)"(%4), %%xmm"#y"   ;\n"
569 #define XO4(x,y)        "       xorps   "OFFS(x)"(%5), %%xmm"#y"   ;\n"
570 #define XO5(x,y)        "       xorps   "OFFS(x)"(%6), %%xmm"#y"   ;\n"
571 
572 
573 static void
574 xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
575 {
576         unsigned long lines = bytes >> 8;
577         char xmm_save[16*4];
578         int cr0;
579 
580         XMMS_SAVE;
581 
582         __asm__ __volatile__ (
583 #undef BLOCK
584 #define BLOCK(i) \
585                 LD(i,0)                                 \
586                         LD(i+1,1)                       \
587                 PF1(i)                                  \
588                                 PF1(i+2)                \
589                                 LD(i+2,2)               \
590                                         LD(i+3,3)       \
591                 PF0(i+4)                                \
592                                 PF0(i+6)                \
593                 XO1(i,0)                                \
594                         XO1(i+1,1)                      \
595                                 XO1(i+2,2)              \
596                                         XO1(i+3,3)      \
597                 ST(i,0)                                 \
598                         ST(i+1,1)                       \
599                                 ST(i+2,2)               \
600                                         ST(i+3,3)       \
601 
602 
603                 PF0(0)
604                                 PF0(2)
605 
606         " .align 32                     ;\n"
607         " 1:                            ;\n"
608 
609                 BLOCK(0)
610                 BLOCK(4)
611                 BLOCK(8)
612                 BLOCK(12)
613 
614         "       addl $256, %1           ;\n"
615         "       addl $256, %2           ;\n"
616         "       decl %0                 ;\n"
617         "       jnz 1b                  ;\n"
618         :
619         : "r" (lines),
620           "r" (p1), "r" (p2)
621         : "memory");
622 
623         XMMS_RESTORE;
624 }
625 
626 static void
627 xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
628           unsigned long *p3)
629 {
630         unsigned long lines = bytes >> 8;
631         char xmm_save[16*4];
632         int cr0;
633 
634         XMMS_SAVE;
635 
636         __asm__ __volatile__ (
637 #undef BLOCK
638 #define BLOCK(i) \
639                 PF1(i)                                  \
640                                 PF1(i+2)                \
641                 LD(i,0)                                 \
642                         LD(i+1,1)                       \
643                                 LD(i+2,2)               \
644                                         LD(i+3,3)       \
645                 PF2(i)                                  \
646                                 PF2(i+2)                \
647                 PF0(i+4)                                \
648                                 PF0(i+6)                \
649                 XO1(i,0)                                \
650                         XO1(i+1,1)                      \
651                                 XO1(i+2,2)              \
652                                         XO1(i+3,3)      \
653                 XO2(i,0)                                \
654                         XO2(i+1,1)                      \
655                                 XO2(i+2,2)              \
656                                         XO2(i+3,3)      \
657                 ST(i,0)                                 \
658                         ST(i+1,1)                       \
659                                 ST(i+2,2)               \
660                                         ST(i+3,3)       \
661 
662 
663                 PF0(0)
664                                 PF0(2)
665 
666         " .align 32                     ;\n"
667         " 1:                            ;\n"
668 
669                 BLOCK(0)
670                 BLOCK(4)
671                 BLOCK(8)
672                 BLOCK(12)
673 
674         "       addl $256, %1           ;\n"
675         "       addl $256, %2           ;\n"
676         "       addl $256, %3           ;\n"
677         "       decl %0                 ;\n"
678         "       jnz 1b                  ;\n"
679         :
680         : "r" (lines),
681           "r" (p1), "r"(p2), "r"(p3)
682         : "memory" );
683 
684         XMMS_RESTORE;
685 }
686 
687 static void
688 xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
689           unsigned long *p3, unsigned long *p4)
690 {
691         unsigned long lines = bytes >> 8;
692         char xmm_save[16*4];
693         int cr0;
694 
695         XMMS_SAVE;
696 
697         __asm__ __volatile__ (
698 #undef BLOCK
699 #define BLOCK(i) \
700                 PF1(i)                                  \
701                                 PF1(i+2)                \
702                 LD(i,0)                                 \
703                         LD(i+1,1)                       \
704                                 LD(i+2,2)               \
705                                         LD(i+3,3)       \
706                 PF2(i)                                  \
707                                 PF2(i+2)                \
708                 XO1(i,0)                                \
709                         XO1(i+1,1)                      \
710                                 XO1(i+2,2)              \
711                                         XO1(i+3,3)      \
712                 PF3(i)                                  \
713                                 PF3(i+2)                \
714                 PF0(i+4)                                \
715                                 PF0(i+6)                \
716                 XO2(i,0)                                \
717                         XO2(i+1,1)                      \
718                                 XO2(i+2,2)              \
719                                         XO2(i+3,3)      \
720                 XO3(i,0)                                \
721                         XO3(i+1,1)                      \
722                                 XO3(i+2,2)              \
723                                         XO3(i+3,3)      \
724                 ST(i,0)                                 \
725                         ST(i+1,1)                       \
726                                 ST(i+2,2)               \
727                                         ST(i+3,3)       \
728 
729 
730                 PF0(0)
731                                 PF0(2)
732 
733         " .align 32                     ;\n"
734         " 1:                            ;\n"
735 
736                 BLOCK(0)
737                 BLOCK(4)
738                 BLOCK(8)
739                 BLOCK(12)
740 
741         "       addl $256, %1           ;\n"
742         "       addl $256, %2           ;\n"
743         "       addl $256, %3           ;\n"
744         "       addl $256, %4           ;\n"
745         "       decl %0                 ;\n"
746         "       jnz 1b                  ;\n"
747         :
748         : "r" (lines),
749           "r" (p1), "r" (p2), "r" (p3), "r" (p4)
750         : "memory" );
751 
752         XMMS_RESTORE;
753 }
754 
755 static void
756 xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
757           unsigned long *p3, unsigned long *p4, unsigned long *p5)
758 {
759         unsigned long lines = bytes >> 8;
760         char xmm_save[16*4];
761         int cr0;
762 
763         XMMS_SAVE;
764 
765         __asm__ __volatile__ (
766 #undef BLOCK
767 #define BLOCK(i) \
768                 PF1(i)                                  \
769                                 PF1(i+2)                \
770                 LD(i,0)                                 \
771                         LD(i+1,1)                       \
772                                 LD(i+2,2)               \
773                                         LD(i+3,3)       \
774                 PF2(i)                                  \
775                                 PF2(i+2)                \
776                 XO1(i,0)                                \
777                         XO1(i+1,1)                      \
778                                 XO1(i+2,2)              \
779                                         XO1(i+3,3)      \
780                 PF3(i)                                  \
781                                 PF3(i+2)                \
782                 XO2(i,0)                                \
783                         XO2(i+1,1)                      \
784                                 XO2(i+2,2)              \
785                                         XO2(i+3,3)      \
786                 PF4(i)                                  \
787                                 PF4(i+2)                \
788                 PF0(i+4)                                \
789                                 PF0(i+6)                \
790                 XO3(i,0)                                \
791                         XO3(i+1,1)                      \
792                                 XO3(i+2,2)              \
793                                         XO3(i+3,3)      \
794                 XO4(i,0)                                \
795                         XO4(i+1,1)                      \
796                                 XO4(i+2,2)              \
797                                         XO4(i+3,3)      \
798                 ST(i,0)                                 \
799                         ST(i+1,1)                       \
800                                 ST(i+2,2)               \
801                                         ST(i+3,3)       \
802 
803 
804                 PF0(0)
805                                 PF0(2)
806 
807         " .align 32                     ;\n"
808         " 1:                            ;\n"
809 
810                 BLOCK(0)
811                 BLOCK(4)
812                 BLOCK(8)
813                 BLOCK(12)
814 
815         "       addl $256, %1           ;\n"
816         "       addl $256, %2           ;\n"
817         "       addl $256, %3           ;\n"
818         "       addl $256, %4           ;\n"
819         "       addl $256, %5           ;\n"
820         "       decl %0                 ;\n"
821         "       jnz 1b                  ;\n"
822         :
823         : "r" (lines),
824           "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
825         : "memory");
826 
827         XMMS_RESTORE;
828 }
829 
830 static struct xor_block_template xor_block_pIII_sse = {
831         name: "pIII_sse",
832         do_2: xor_sse_2,
833         do_3: xor_sse_3,
834         do_4: xor_sse_4,
835         do_5: xor_sse_5,
836 };
837 
838 /* Also try the generic routines.  */
839 #include <asm-generic/xor.h>
840 
841 #undef XOR_TRY_TEMPLATES
842 #define XOR_TRY_TEMPLATES                               \
843         do {                                            \
844                 xor_speed(&xor_block_8regs);            \
845                 xor_speed(&xor_block_32regs);           \
846                 if (cpu_has_xmm)                        \
847                         xor_speed(&xor_block_pIII_sse); \
848                 if (md_cpu_has_mmx()) {                 \
849                         xor_speed(&xor_block_pII_mmx);  \
850                         xor_speed(&xor_block_p5_mmx);   \
851                 }                                       \
852         } while (0)
853 
854 /* We force the use of the SSE xor block because it can write around L2.
855    We may also be able to load into the L1 only depending on how the cpu
856    deals with a load to a line that is being prefetched.  */
857 #define XOR_SELECT_TEMPLATE(FASTEST) \
858         (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
859 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.