1 /*
2 * include/asm-ia64/xor.h
3 *
4 * Optimized RAID-5 checksumming functions for IA-64.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
9 * any later version.
10 *
11 * You should have received a copy of the GNU General Public License
12 * (for example /usr/src/linux/COPYING); if not, write to the Free
13 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14 */
15
16
17 extern void xor_ia64_2(unsigned long, unsigned long *, unsigned long *);
18 extern void xor_ia64_3(unsigned long, unsigned long *, unsigned long *,
19 unsigned long *);
20 extern void xor_ia64_4(unsigned long, unsigned long *, unsigned long *,
21 unsigned long *, unsigned long *);
22 extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *,
23 unsigned long *, unsigned long *, unsigned long *);
24
25 asm ("
26 .text
27
28 // Assume L2 memory latency of 6 cycles.
29
30 .proc xor_ia64_2
31 xor_ia64_2:
32 .prologue
33 .fframe 0
34 { .mii
35 .save ar.pfs, r31
36 alloc r31 = ar.pfs, 3, 0, 13, 16
37 .save ar.lc, r30
38 mov r30 = ar.lc
39 .save pr, r29
40 mov r29 = pr
41 ;;
42 }
43 .body
44 { .mii
45 mov r8 = in1
46 mov ar.ec = 6 + 2
47 shr in0 = in0, 3
48 ;;
49 }
50 { .mmi
51 adds in0 = -1, in0
52 mov r16 = in1
53 mov r17 = in2
54 ;;
55 }
56 { .mii
57 mov ar.lc = in0
58 mov pr.rot = 1 << 16
59 ;;
60 }
61 .rotr s1[6+1], s2[6+1], d[2]
62 .rotp p[6+2]
63 0: { .mmi
64 (p[0]) ld8.nta s1[0] = [r16], 8
65 (p[0]) ld8.nta s2[0] = [r17], 8
66 (p[6]) xor d[0] = s1[6], s2[6]
67 }
68 { .mfb
69 (p[6+1]) st8.nta [r8] = d[1], 8
70 nop.f 0
71 br.ctop.dptk.few 0b
72 ;;
73 }
74 { .mii
75 mov ar.lc = r30
76 mov pr = r29, -1
77 }
78 { .bbb
79 br.ret.sptk.few rp
80 }
81 .endp xor_ia64_2
82
83 .proc xor_ia64_3
84 xor_ia64_3:
85 .prologue
86 .fframe 0
87 { .mii
88 .save ar.pfs, r31
89 alloc r31 = ar.pfs, 4, 0, 20, 24
90 .save ar.lc, r30
91 mov r30 = ar.lc
92 .save pr, r29
93 mov r29 = pr
94 ;;
95 }
96 .body
97 { .mii
98 mov r8 = in1
99 mov ar.ec = 6 + 2
100 shr in0 = in0, 3
101 ;;
102 }
103 { .mmi
104 adds in0 = -1, in0
105 mov r16 = in1
106 mov r17 = in2
107 ;;
108 }
109 { .mii
110 mov r18 = in3
111 mov ar.lc = in0
112 mov pr.rot = 1 << 16
113 ;;
114 }
115 .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
116 .rotp p[6+2]
117 0: { .mmi
118 (p[0]) ld8.nta s1[0] = [r16], 8
119 (p[0]) ld8.nta s2[0] = [r17], 8
120 (p[6]) xor d[0] = s1[6], s2[6]
121 ;;
122 }
123 { .mmi
124 (p[0]) ld8.nta s3[0] = [r18], 8
125 (p[6+1]) st8.nta [r8] = d[1], 8
126 (p[6]) xor d[0] = d[0], s3[6]
127 }
128 { .bbb
129 br.ctop.dptk.few 0b
130 ;;
131 }
132 { .mii
133 mov ar.lc = r30
134 mov pr = r29, -1
135 }
136 { .bbb
137 br.ret.sptk.few rp
138 }
139 .endp xor_ia64_3
140
141 .proc xor_ia64_4
142 xor_ia64_4:
143 .prologue
144 .fframe 0
145 { .mii
146 .save ar.pfs, r31
147 alloc r31 = ar.pfs, 5, 0, 27, 32
148 .save ar.lc, r30
149 mov r30 = ar.lc
150 .save pr, r29
151 mov r29 = pr
152 ;;
153 }
154 .body
155 { .mii
156 mov r8 = in1
157 mov ar.ec = 6 + 2
158 shr in0 = in0, 3
159 ;;
160 }
161 { .mmi
162 adds in0 = -1, in0
163 mov r16 = in1
164 mov r17 = in2
165 ;;
166 }
167 { .mii
168 mov r18 = in3
169 mov ar.lc = in0
170 mov pr.rot = 1 << 16
171 }
172 { .mfb
173 mov r19 = in4
174 ;;
175 }
176 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
177 .rotp p[6+2]
178 0: { .mmi
179 (p[0]) ld8.nta s1[0] = [r16], 8
180 (p[0]) ld8.nta s2[0] = [r17], 8
181 (p[6]) xor d[0] = s1[6], s2[6]
182 }
183 { .mmi
184 (p[0]) ld8.nta s3[0] = [r18], 8
185 (p[0]) ld8.nta s4[0] = [r19], 8
186 (p[6]) xor r20 = s3[6], s4[6]
187 ;;
188 }
189 { .mib
190 (p[6+1]) st8.nta [r8] = d[1], 8
191 (p[6]) xor d[0] = d[0], r20
192 br.ctop.dptk.few 0b
193 ;;
194 }
195 { .mii
196 mov ar.lc = r30
197 mov pr = r29, -1
198 }
199 { .bbb
200 br.ret.sptk.few rp
201 }
202 .endp xor_ia64_4
203
204 .proc xor_ia64_5
205 xor_ia64_5:
206 .prologue
207 .fframe 0
208 { .mii
209 .save ar.pfs, r31
210 alloc r31 = ar.pfs, 6, 0, 34, 40
211 .save ar.lc, r30
212 mov r30 = ar.lc
213 .save pr, r29
214 mov r29 = pr
215 ;;
216 }
217 .body
218 { .mii
219 mov r8 = in1
220 mov ar.ec = 6 + 2
221 shr in0 = in0, 3
222 ;;
223 }
224 { .mmi
225 adds in0 = -1, in0
226 mov r16 = in1
227 mov r17 = in2
228 ;;
229 }
230 { .mii
231 mov r18 = in3
232 mov ar.lc = in0
233 mov pr.rot = 1 << 16
234 }
235 { .mib
236 mov r19 = in4
237 mov r20 = in5
238 ;;
239 }
240 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
241 .rotp p[6+2]
242 0: { .mmi
243 (p[0]) ld8.nta s1[0] = [r16], 8
244 (p[0]) ld8.nta s2[0] = [r17], 8
245 (p[6]) xor d[0] = s1[6], s2[6]
246 }
247 { .mmi
248 (p[0]) ld8.nta s3[0] = [r18], 8
249 (p[0]) ld8.nta s4[0] = [r19], 8
250 (p[6]) xor r21 = s3[6], s4[6]
251 ;;
252 }
253 { .mmi
254 (p[0]) ld8.nta s5[0] = [r20], 8
255 (p[6+1]) st8.nta [r8] = d[1], 8
256 (p[6]) xor d[0] = d[0], r21
257 ;;
258 }
259 { .mfb
260 (p[6]) xor d[0] = d[0], s5[6]
261 nop.f 0
262 br.ctop.dptk.few 0b
263 ;;
264 }
265 { .mii
266 mov ar.lc = r30
267 mov pr = r29, -1
268 }
269 { .bbb
270 br.ret.sptk.few rp
271 }
272 .endp xor_ia64_5
273 ");
274
275 static struct xor_block_template xor_block_ia64 = {
276 name: "ia64",
277 do_2: xor_ia64_2,
278 do_3: xor_ia64_3,
279 do_4: xor_ia64_4,
280 do_5: xor_ia64_5,
281 };
282
283 #define XOR_TRY_TEMPLATES xor_speed(&xor_block_ia64)
284
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.