cranelift_codegen/
divconst_magic_numbers.rs

1//! Compute "magic numbers" for division-by-constants transformations.
2//!
3//! Math helpers for division by (non-power-of-2) constants. This is based
4//! on the presentation in "Hacker's Delight" by Henry Warren, 2003. There
5//! are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size
6//! makes little difference, but the signed-vs-unsigned aspect has a large
7//! effect. Therefore everything is presented in the order U32 U64 S32 S64
8//! so as to emphasise the similarity of the U32 and U64 cases and the S32
9//! and S64 cases.
10
11// Structures to hold the "magic numbers" computed.
12
13#[derive(PartialEq, Debug)]
14pub struct MU32 {
15    pub mul_by: u32,
16    pub do_add: bool,
17    pub shift_by: i32,
18}
19
20#[derive(PartialEq, Debug)]
21pub struct MU64 {
22    pub mul_by: u64,
23    pub do_add: bool,
24    pub shift_by: i32,
25}
26
27#[derive(PartialEq, Debug)]
28pub struct MS32 {
29    pub mul_by: i32,
30    pub shift_by: i32,
31}
32
33#[derive(PartialEq, Debug)]
34pub struct MS64 {
35    pub mul_by: i64,
36    pub shift_by: i32,
37}
38
39// The actual "magic number" generators follow.
40
41pub fn magic_u32(d: u32) -> MU32 {
42    debug_assert_ne!(d, 0);
43    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
44
45    let mut do_add: bool = false;
46    let mut p: i32 = 31;
47    let nc: u32 = 0xFFFFFFFFu32 - u32::wrapping_neg(d) % d;
48    let mut q1: u32 = 0x80000000u32 / nc;
49    let mut r1: u32 = 0x80000000u32 - q1 * nc;
50    let mut q2: u32 = 0x7FFFFFFFu32 / d;
51    let mut r2: u32 = 0x7FFFFFFFu32 - q2 * d;
52    loop {
53        p = p + 1;
54        if r1 >= nc - r1 {
55            q1 = u32::wrapping_add(u32::wrapping_mul(2, q1), 1);
56            r1 = u32::wrapping_sub(u32::wrapping_mul(2, r1), nc);
57        } else {
58            q1 = u32::wrapping_mul(2, q1);
59            r1 = 2 * r1;
60        }
61        if r2 + 1 >= d - r2 {
62            if q2 >= 0x7FFFFFFFu32 {
63                do_add = true;
64            }
65            q2 = 2 * q2 + 1;
66            r2 = u32::wrapping_sub(u32::wrapping_add(u32::wrapping_mul(2, r2), 1), d);
67        } else {
68            if q2 >= 0x80000000u32 {
69                do_add = true;
70            }
71            q2 = u32::wrapping_mul(2, q2);
72            r2 = 2 * r2 + 1;
73        }
74        let delta: u32 = d - 1 - r2;
75        if !(p < 64 && (q1 < delta || (q1 == delta && r1 == 0))) {
76            break;
77        }
78    }
79
80    MU32 {
81        mul_by: q2 + 1,
82        do_add,
83        shift_by: p - 32,
84    }
85}
86
87pub fn magic_u64(d: u64) -> MU64 {
88    debug_assert_ne!(d, 0);
89    debug_assert_ne!(d, 1); // d==1 generates out of range shifts.
90
91    let mut do_add: bool = false;
92    let mut p: i32 = 63;
93    let nc: u64 = 0xFFFFFFFFFFFFFFFFu64 - u64::wrapping_neg(d) % d;
94    let mut q1: u64 = 0x8000000000000000u64 / nc;
95    let mut r1: u64 = 0x8000000000000000u64 - q1 * nc;
96    let mut q2: u64 = 0x7FFFFFFFFFFFFFFFu64 / d;
97    let mut r2: u64 = 0x7FFFFFFFFFFFFFFFu64 - q2 * d;
98    loop {
99        p = p + 1;
100        if r1 >= nc - r1 {
101            q1 = u64::wrapping_add(u64::wrapping_mul(2, q1), 1);
102            r1 = u64::wrapping_sub(u64::wrapping_mul(2, r1), nc);
103        } else {
104            q1 = u64::wrapping_mul(2, q1);
105            r1 = 2 * r1;
106        }
107        if r2 + 1 >= d - r2 {
108            if q2 >= 0x7FFFFFFFFFFFFFFFu64 {
109                do_add = true;
110            }
111            q2 = 2 * q2 + 1;
112            r2 = u64::wrapping_sub(u64::wrapping_add(u64::wrapping_mul(2, r2), 1), d);
113        } else {
114            if q2 >= 0x8000000000000000u64 {
115                do_add = true;
116            }
117            q2 = u64::wrapping_mul(2, q2);
118            r2 = 2 * r2 + 1;
119        }
120        let delta: u64 = d - 1 - r2;
121        if !(p < 128 && (q1 < delta || (q1 == delta && r1 == 0))) {
122            break;
123        }
124    }
125
126    MU64 {
127        mul_by: q2 + 1,
128        do_add,
129        shift_by: p - 64,
130    }
131}
132
133pub fn magic_s32(d: i32) -> MS32 {
134    debug_assert_ne!(d, -1);
135    debug_assert_ne!(d, 0);
136    debug_assert_ne!(d, 1);
137    let two31: u32 = 0x80000000u32;
138    let mut p: i32 = 31;
139    let ad: u32 = i32::wrapping_abs(d) as u32;
140    let t: u32 = two31 + ((d as u32) >> 31);
141    let anc: u32 = u32::wrapping_sub(t - 1, t % ad);
142    let mut q1: u32 = two31 / anc;
143    let mut r1: u32 = two31 - q1 * anc;
144    let mut q2: u32 = two31 / ad;
145    let mut r2: u32 = two31 - q2 * ad;
146    loop {
147        p = p + 1;
148        q1 = 2 * q1;
149        r1 = 2 * r1;
150        if r1 >= anc {
151            q1 = q1 + 1;
152            r1 = r1 - anc;
153        }
154        q2 = 2 * q2;
155        r2 = 2 * r2;
156        if r2 >= ad {
157            q2 = q2 + 1;
158            r2 = r2 - ad;
159        }
160        let delta: u32 = ad - r2;
161        if !(q1 < delta || (q1 == delta && r1 == 0)) {
162            break;
163        }
164    }
165
166    MS32 {
167        mul_by: (if d < 0 {
168            u32::wrapping_neg(q2 + 1)
169        } else {
170            q2 + 1
171        }) as i32,
172        shift_by: p - 32,
173    }
174}
175
176pub fn magic_s64(d: i64) -> MS64 {
177    debug_assert_ne!(d, -1);
178    debug_assert_ne!(d, 0);
179    debug_assert_ne!(d, 1);
180    let two63: u64 = 0x8000000000000000u64;
181    let mut p: i32 = 63;
182    let ad: u64 = i64::wrapping_abs(d) as u64;
183    let t: u64 = two63 + ((d as u64) >> 63);
184    let anc: u64 = u64::wrapping_sub(t - 1, t % ad);
185    let mut q1: u64 = two63 / anc;
186    let mut r1: u64 = two63 - q1 * anc;
187    let mut q2: u64 = two63 / ad;
188    let mut r2: u64 = two63 - q2 * ad;
189    loop {
190        p = p + 1;
191        q1 = 2 * q1;
192        r1 = 2 * r1;
193        if r1 >= anc {
194            q1 = q1 + 1;
195            r1 = r1 - anc;
196        }
197        q2 = 2 * q2;
198        r2 = 2 * r2;
199        if r2 >= ad {
200            q2 = q2 + 1;
201            r2 = r2 - ad;
202        }
203        let delta: u64 = ad - r2;
204        if !(q1 < delta || (q1 == delta && r1 == 0)) {
205            break;
206        }
207    }
208
209    MS64 {
210        mul_by: (if d < 0 {
211            u64::wrapping_neg(q2 + 1)
212        } else {
213            q2 + 1
214        }) as i64,
215        shift_by: p - 64,
216    }
217}
218
219#[cfg(test)]
220mod tests {
221    use super::{magic_s32, magic_s64, magic_u32, magic_u64};
222    use super::{MS32, MS64, MU32, MU64};
223
224    fn make_mu32(mul_by: u32, do_add: bool, shift_by: i32) -> MU32 {
225        MU32 {
226            mul_by,
227            do_add,
228            shift_by,
229        }
230    }
231
232    fn make_mu64(mul_by: u64, do_add: bool, shift_by: i32) -> MU64 {
233        MU64 {
234            mul_by,
235            do_add,
236            shift_by,
237        }
238    }
239
240    fn make_ms32(mul_by: i32, shift_by: i32) -> MS32 {
241        MS32 { mul_by, shift_by }
242    }
243
244    fn make_ms64(mul_by: i64, shift_by: i32) -> MS64 {
245        MS64 { mul_by, shift_by }
246    }
247
248    #[test]
249    fn test_magic_u32() {
250        assert_eq!(magic_u32(2u32), make_mu32(0x80000000u32, false, 0));
251        assert_eq!(magic_u32(3u32), make_mu32(0xaaaaaaabu32, false, 1));
252        assert_eq!(magic_u32(4u32), make_mu32(0x40000000u32, false, 0));
253        assert_eq!(magic_u32(5u32), make_mu32(0xcccccccdu32, false, 2));
254        assert_eq!(magic_u32(6u32), make_mu32(0xaaaaaaabu32, false, 2));
255        assert_eq!(magic_u32(7u32), make_mu32(0x24924925u32, true, 3));
256        assert_eq!(magic_u32(9u32), make_mu32(0x38e38e39u32, false, 1));
257        assert_eq!(magic_u32(10u32), make_mu32(0xcccccccdu32, false, 3));
258        assert_eq!(magic_u32(11u32), make_mu32(0xba2e8ba3u32, false, 3));
259        assert_eq!(magic_u32(12u32), make_mu32(0xaaaaaaabu32, false, 3));
260        assert_eq!(magic_u32(25u32), make_mu32(0x51eb851fu32, false, 3));
261        assert_eq!(magic_u32(125u32), make_mu32(0x10624dd3u32, false, 3));
262        assert_eq!(magic_u32(625u32), make_mu32(0xd1b71759u32, false, 9));
263        assert_eq!(magic_u32(1337u32), make_mu32(0x88233b2bu32, true, 11));
264        assert_eq!(magic_u32(65535u32), make_mu32(0x80008001u32, false, 15));
265        assert_eq!(magic_u32(65536u32), make_mu32(0x00010000u32, false, 0));
266        assert_eq!(magic_u32(65537u32), make_mu32(0xffff0001u32, false, 16));
267        assert_eq!(magic_u32(31415927u32), make_mu32(0x445b4553u32, false, 23));
268        assert_eq!(
269            magic_u32(0xdeadbeefu32),
270            make_mu32(0x93275ab3u32, false, 31)
271        );
272        assert_eq!(
273            magic_u32(0xfffffffdu32),
274            make_mu32(0x40000001u32, false, 30)
275        );
276        assert_eq!(magic_u32(0xfffffffeu32), make_mu32(0x00000003u32, true, 32));
277        assert_eq!(
278            magic_u32(0xffffffffu32),
279            make_mu32(0x80000001u32, false, 31)
280        );
281    }
282
283    #[test]
284    fn test_magic_u64() {
285        assert_eq!(magic_u64(2u64), make_mu64(0x8000000000000000u64, false, 0));
286        assert_eq!(magic_u64(3u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 1));
287        assert_eq!(magic_u64(4u64), make_mu64(0x4000000000000000u64, false, 0));
288        assert_eq!(magic_u64(5u64), make_mu64(0xcccccccccccccccdu64, false, 2));
289        assert_eq!(magic_u64(6u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 2));
290        assert_eq!(magic_u64(7u64), make_mu64(0x2492492492492493u64, true, 3));
291        assert_eq!(magic_u64(9u64), make_mu64(0xe38e38e38e38e38fu64, false, 3));
292        assert_eq!(magic_u64(10u64), make_mu64(0xcccccccccccccccdu64, false, 3));
293        assert_eq!(magic_u64(11u64), make_mu64(0x2e8ba2e8ba2e8ba3u64, false, 1));
294        assert_eq!(magic_u64(12u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 3));
295        assert_eq!(magic_u64(25u64), make_mu64(0x47ae147ae147ae15u64, true, 5));
296        assert_eq!(magic_u64(125u64), make_mu64(0x0624dd2f1a9fbe77u64, true, 7));
297        assert_eq!(
298            magic_u64(625u64),
299            make_mu64(0x346dc5d63886594bu64, false, 7)
300        );
301        assert_eq!(
302            magic_u64(1337u64),
303            make_mu64(0xc4119d952866a139u64, false, 10)
304        );
305        assert_eq!(
306            magic_u64(31415927u64),
307            make_mu64(0x116d154b9c3d2f85u64, true, 25)
308        );
309        assert_eq!(
310            magic_u64(0x00000000deadbeefu64),
311            make_mu64(0x93275ab2dfc9094bu64, false, 31)
312        );
313        assert_eq!(
314            magic_u64(0x00000000fffffffdu64),
315            make_mu64(0x8000000180000005u64, false, 31)
316        );
317        assert_eq!(
318            magic_u64(0x00000000fffffffeu64),
319            make_mu64(0x0000000200000005u64, true, 32)
320        );
321        assert_eq!(
322            magic_u64(0x00000000ffffffffu64),
323            make_mu64(0x8000000080000001u64, false, 31)
324        );
325        assert_eq!(
326            magic_u64(0x0000000100000000u64),
327            make_mu64(0x0000000100000000u64, false, 0)
328        );
329        assert_eq!(
330            magic_u64(0x0000000100000001u64),
331            make_mu64(0xffffffff00000001u64, false, 32)
332        );
333        assert_eq!(
334            magic_u64(0x0ddc0ffeebadf00du64),
335            make_mu64(0x2788e9d394b77da1u64, true, 60)
336        );
337        assert_eq!(
338            magic_u64(0xfffffffffffffffdu64),
339            make_mu64(0x4000000000000001u64, false, 62)
340        );
341        assert_eq!(
342            magic_u64(0xfffffffffffffffeu64),
343            make_mu64(0x0000000000000003u64, true, 64)
344        );
345        assert_eq!(
346            magic_u64(0xffffffffffffffffu64),
347            make_mu64(0x8000000000000001u64, false, 63)
348        );
349    }
350
351    #[test]
352    fn test_magic_s32() {
353        assert_eq!(
354            magic_s32(-0x80000000i32),
355            make_ms32(0x7fffffffu32 as i32, 30)
356        );
357        assert_eq!(
358            magic_s32(-0x7FFFFFFFi32),
359            make_ms32(0xbfffffffu32 as i32, 29)
360        );
361        assert_eq!(
362            magic_s32(-0x7FFFFFFEi32),
363            make_ms32(0x7ffffffdu32 as i32, 30)
364        );
365        assert_eq!(magic_s32(-31415927i32), make_ms32(0xbba4baadu32 as i32, 23));
366        assert_eq!(magic_s32(-1337i32), make_ms32(0x9df73135u32 as i32, 9));
367        assert_eq!(magic_s32(-256i32), make_ms32(0x7fffffffu32 as i32, 7));
368        assert_eq!(magic_s32(-5i32), make_ms32(0x99999999u32 as i32, 1));
369        assert_eq!(magic_s32(-3i32), make_ms32(0x55555555u32 as i32, 1));
370        assert_eq!(magic_s32(-2i32), make_ms32(0x7fffffffu32 as i32, 0));
371        assert_eq!(magic_s32(2i32), make_ms32(0x80000001u32 as i32, 0));
372        assert_eq!(magic_s32(3i32), make_ms32(0x55555556u32 as i32, 0));
373        assert_eq!(magic_s32(4i32), make_ms32(0x80000001u32 as i32, 1));
374        assert_eq!(magic_s32(5i32), make_ms32(0x66666667u32 as i32, 1));
375        assert_eq!(magic_s32(6i32), make_ms32(0x2aaaaaabu32 as i32, 0));
376        assert_eq!(magic_s32(7i32), make_ms32(0x92492493u32 as i32, 2));
377        assert_eq!(magic_s32(9i32), make_ms32(0x38e38e39u32 as i32, 1));
378        assert_eq!(magic_s32(10i32), make_ms32(0x66666667u32 as i32, 2));
379        assert_eq!(magic_s32(11i32), make_ms32(0x2e8ba2e9u32 as i32, 1));
380        assert_eq!(magic_s32(12i32), make_ms32(0x2aaaaaabu32 as i32, 1));
381        assert_eq!(magic_s32(25i32), make_ms32(0x51eb851fu32 as i32, 3));
382        assert_eq!(magic_s32(125i32), make_ms32(0x10624dd3u32 as i32, 3));
383        assert_eq!(magic_s32(625i32), make_ms32(0x68db8badu32 as i32, 8));
384        assert_eq!(magic_s32(1337i32), make_ms32(0x6208cecbu32 as i32, 9));
385        assert_eq!(magic_s32(31415927i32), make_ms32(0x445b4553u32 as i32, 23));
386        assert_eq!(
387            magic_s32(0x7ffffffei32),
388            make_ms32(0x80000003u32 as i32, 30)
389        );
390        assert_eq!(
391            magic_s32(0x7fffffffi32),
392            make_ms32(0x40000001u32 as i32, 29)
393        );
394    }
395
396    #[test]
397    fn test_magic_s64() {
398        assert_eq!(
399            magic_s64(-0x8000000000000000i64),
400            make_ms64(0x7fffffffffffffffu64 as i64, 62)
401        );
402        assert_eq!(
403            magic_s64(-0x7FFFFFFFFFFFFFFFi64),
404            make_ms64(0xbfffffffffffffffu64 as i64, 61)
405        );
406        assert_eq!(
407            magic_s64(-0x7FFFFFFFFFFFFFFEi64),
408            make_ms64(0x7ffffffffffffffdu64 as i64, 62)
409        );
410        assert_eq!(
411            magic_s64(-0x0ddC0ffeeBadF00di64),
412            make_ms64(0x6c3b8b1635a4412fu64 as i64, 59)
413        );
414        assert_eq!(
415            magic_s64(-0x100000001i64),
416            make_ms64(0x800000007fffffffu64 as i64, 31)
417        );
418        assert_eq!(
419            magic_s64(-0x100000000i64),
420            make_ms64(0x7fffffffffffffffu64 as i64, 31)
421        );
422        assert_eq!(
423            magic_s64(-0xFFFFFFFFi64),
424            make_ms64(0x7fffffff7fffffffu64 as i64, 31)
425        );
426        assert_eq!(
427            magic_s64(-0xFFFFFFFEi64),
428            make_ms64(0x7ffffffefffffffdu64 as i64, 31)
429        );
430        assert_eq!(
431            magic_s64(-0xFFFFFFFDi64),
432            make_ms64(0x7ffffffe7ffffffbu64 as i64, 31)
433        );
434        assert_eq!(
435            magic_s64(-0xDeadBeefi64),
436            make_ms64(0x6cd8a54d2036f6b5u64 as i64, 31)
437        );
438        assert_eq!(
439            magic_s64(-31415927i64),
440            make_ms64(0x7749755a31e1683du64 as i64, 24)
441        );
442        assert_eq!(
443            magic_s64(-1337i64),
444            make_ms64(0x9df731356bccaf63u64 as i64, 9)
445        );
446        assert_eq!(
447            magic_s64(-256i64),
448            make_ms64(0x7fffffffffffffffu64 as i64, 7)
449        );
450        assert_eq!(magic_s64(-5i64), make_ms64(0x9999999999999999u64 as i64, 1));
451        assert_eq!(magic_s64(-3i64), make_ms64(0x5555555555555555u64 as i64, 1));
452        assert_eq!(magic_s64(-2i64), make_ms64(0x7fffffffffffffffu64 as i64, 0));
453        assert_eq!(magic_s64(2i64), make_ms64(0x8000000000000001u64 as i64, 0));
454        assert_eq!(magic_s64(3i64), make_ms64(0x5555555555555556u64 as i64, 0));
455        assert_eq!(magic_s64(4i64), make_ms64(0x8000000000000001u64 as i64, 1));
456        assert_eq!(magic_s64(5i64), make_ms64(0x6666666666666667u64 as i64, 1));
457        assert_eq!(magic_s64(6i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 0));
458        assert_eq!(magic_s64(7i64), make_ms64(0x4924924924924925u64 as i64, 1));
459        assert_eq!(magic_s64(9i64), make_ms64(0x1c71c71c71c71c72u64 as i64, 0));
460        assert_eq!(magic_s64(10i64), make_ms64(0x6666666666666667u64 as i64, 2));
461        assert_eq!(magic_s64(11i64), make_ms64(0x2e8ba2e8ba2e8ba3u64 as i64, 1));
462        assert_eq!(magic_s64(12i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 1));
463        assert_eq!(magic_s64(25i64), make_ms64(0xa3d70a3d70a3d70bu64 as i64, 4));
464        assert_eq!(
465            magic_s64(125i64),
466            make_ms64(0x20c49ba5e353f7cfu64 as i64, 4)
467        );
468        assert_eq!(
469            magic_s64(625i64),
470            make_ms64(0x346dc5d63886594bu64 as i64, 7)
471        );
472        assert_eq!(
473            magic_s64(1337i64),
474            make_ms64(0x6208ceca9433509du64 as i64, 9)
475        );
476        assert_eq!(
477            magic_s64(31415927i64),
478            make_ms64(0x88b68aa5ce1e97c3u64 as i64, 24)
479        );
480        assert_eq!(
481            magic_s64(0x00000000deadbeefi64),
482            make_ms64(0x93275ab2dfc9094bu64 as i64, 31)
483        );
484        assert_eq!(
485            magic_s64(0x00000000fffffffdi64),
486            make_ms64(0x8000000180000005u64 as i64, 31)
487        );
488        assert_eq!(
489            magic_s64(0x00000000fffffffei64),
490            make_ms64(0x8000000100000003u64 as i64, 31)
491        );
492        assert_eq!(
493            magic_s64(0x00000000ffffffffi64),
494            make_ms64(0x8000000080000001u64 as i64, 31)
495        );
496        assert_eq!(
497            magic_s64(0x0000000100000000i64),
498            make_ms64(0x8000000000000001u64 as i64, 31)
499        );
500        assert_eq!(
501            magic_s64(0x0000000100000001i64),
502            make_ms64(0x7fffffff80000001u64 as i64, 31)
503        );
504        assert_eq!(
505            magic_s64(0x0ddc0ffeebadf00di64),
506            make_ms64(0x93c474e9ca5bbed1u64 as i64, 59)
507        );
508        assert_eq!(
509            magic_s64(0x7ffffffffffffffdi64),
510            make_ms64(0x2000000000000001u64 as i64, 60)
511        );
512        assert_eq!(
513            magic_s64(0x7ffffffffffffffei64),
514            make_ms64(0x8000000000000003u64 as i64, 62)
515        );
516        assert_eq!(
517            magic_s64(0x7fffffffffffffffi64),
518            make_ms64(0x4000000000000001u64 as i64, 61)
519        );
520    }
521
522    #[test]
523    fn test_magic_generators_dont_panic() {
524        // The point of this is to check that the magic number generators
525        // don't panic with integer wraparounds, especially at boundary cases
526        // for their arguments. The actual results are thrown away, although
527        // we force `total` to be used, so that rustc can't optimise the
528        // entire computation away.
529
530        // Testing UP magic_u32
531        let mut total: u64 = 0;
532        for x in 2..(200 * 1000u32) {
533            let m = magic_u32(x);
534            total = total ^ (m.mul_by as u64);
535            total = total + (m.shift_by as u64);
536            total = total + (if m.do_add { 123 } else { 456 });
537        }
538        assert_eq!(total, 2481999609);
539
540        total = 0;
541        // Testing MIDPOINT magic_u32
542        for x in 0x8000_0000u32 - 10 * 1000u32..0x8000_0000u32 + 10 * 1000u32 {
543            let m = magic_u32(x);
544            total = total ^ (m.mul_by as u64);
545            total = total + (m.shift_by as u64);
546            total = total + (if m.do_add { 123 } else { 456 });
547        }
548        assert_eq!(total, 2399809723);
549
550        total = 0;
551        // Testing DOWN magic_u32
552        for x in 0..(200 * 1000u32) {
553            let m = magic_u32(0xFFFF_FFFFu32 - x);
554            total = total ^ (m.mul_by as u64);
555            total = total + (m.shift_by as u64);
556            total = total + (if m.do_add { 123 } else { 456 });
557        }
558        assert_eq!(total, 271138267);
559
560        // Testing UP magic_u64
561        total = 0;
562        for x in 2..(200 * 1000u64) {
563            let m = magic_u64(x);
564            total = total ^ m.mul_by;
565            total = total + (m.shift_by as u64);
566            total = total + (if m.do_add { 123 } else { 456 });
567        }
568        assert_eq!(total, 7430004086976261161);
569
570        total = 0;
571        // Testing MIDPOINT magic_u64
572        for x in 0x8000_0000_0000_0000u64 - 10 * 1000u64..0x8000_0000_0000_0000u64 + 10 * 1000u64 {
573            let m = magic_u64(x);
574            total = total ^ m.mul_by;
575            total = total + (m.shift_by as u64);
576            total = total + (if m.do_add { 123 } else { 456 });
577        }
578        assert_eq!(total, 10312117246769520603);
579
580        // Testing DOWN magic_u64
581        total = 0;
582        for x in 0..(200 * 1000u64) {
583            let m = magic_u64(0xFFFF_FFFF_FFFF_FFFFu64 - x);
584            total = total ^ m.mul_by;
585            total = total + (m.shift_by as u64);
586            total = total + (if m.do_add { 123 } else { 456 });
587        }
588        assert_eq!(total, 1126603594357269734);
589
590        // Testing UP magic_s32
591        total = 0;
592        for x in 0..(200 * 1000i32) {
593            let m = magic_s32(-0x8000_0000i32 + x);
594            total = total ^ (m.mul_by as u64);
595            total = total + (m.shift_by as u64);
596        }
597        assert_eq!(total, 18446744069953376812);
598
599        total = 0;
600        // Testing MIDPOINT magic_s32
601        for x in 0..(200 * 1000i32) {
602            let x2 = -100 * 1000i32 + x;
603            if x2 != -1 && x2 != 0 && x2 != 1 {
604                let m = magic_s32(x2);
605                total = total ^ (m.mul_by as u64);
606                total = total + (m.shift_by as u64);
607            }
608        }
609        assert_eq!(total, 351839350);
610
611        // Testing DOWN magic_s32
612        total = 0;
613        for x in 0..(200 * 1000i32) {
614            let m = magic_s32(0x7FFF_FFFFi32 - x);
615            total = total ^ (m.mul_by as u64);
616            total = total + (m.shift_by as u64);
617        }
618        assert_eq!(total, 18446744072916880714);
619
620        // Testing UP magic_s64
621        total = 0;
622        for x in 0..(200 * 1000i64) {
623            let m = magic_s64(-0x8000_0000_0000_0000i64 + x);
624            total = total ^ (m.mul_by as u64);
625            total = total + (m.shift_by as u64);
626        }
627        assert_eq!(total, 17929885647724831014);
628
629        total = 0;
630        // Testing MIDPOINT magic_s64
631        for x in 0..(200 * 1000i64) {
632            let x2 = -100 * 1000i64 + x;
633            if x2 != -1 && x2 != 0 && x2 != 1 {
634                let m = magic_s64(x2);
635                total = total ^ (m.mul_by as u64);
636                total = total + (m.shift_by as u64);
637            }
638        }
639        assert_eq!(total, 18106042338125661964);
640
641        // Testing DOWN magic_s64
642        total = 0;
643        for x in 0..(200 * 1000i64) {
644            let m = magic_s64(0x7FFF_FFFF_FFFF_FFFFi64 - x);
645            total = total ^ (m.mul_by as u64);
646            total = total + (m.shift_by as u64);
647        }
648        assert_eq!(total, 563301797155560970);
649    }
650
651    #[test]
652    fn test_magic_generators_give_correct_numbers() {
653        // For a variety of values for both `n` and `d`, compute the magic
654        // numbers for `d`, and in effect interpret them so as to compute
655        // `n / d`.  Check that that equals the value of `n / d` computed
656        // directly by the hardware.  This serves to check that the magic
657        // number generates work properly.  In total, 50,148,000 tests are
658        // done.
659
660        // Some constants
661        const MIN_U32: u32 = 0;
662        const MAX_U32: u32 = 0xFFFF_FFFFu32;
663        const MAX_U32_HALF: u32 = 0x8000_0000u32; // more or less
664
665        const MIN_S32: i32 = 0x8000_0000u32 as i32;
666        const MAX_S32: i32 = 0x7FFF_FFFFu32 as i32;
667
668        const MIN_U64: u64 = 0;
669        const MAX_U64: u64 = 0xFFFF_FFFF_FFFF_FFFFu64;
670        const MAX_U64_HALF: u64 = 0x8000_0000_0000_0000u64; // ditto
671
672        const MIN_S64: i64 = 0x8000_0000_0000_0000u64 as i64;
673        const MAX_S64: i64 = 0x7FFF_FFFF_FFFF_FFFFu64 as i64;
674
675        // These generate reference results for signed/unsigned 32/64 bit
676        // division, rounding towards zero.
677        fn div_u32(x: u32, y: u32) -> u32 {
678            return x / y;
679        }
680        fn div_s32(x: i32, y: i32) -> i32 {
681            return x / y;
682        }
683        fn div_u64(x: u64, y: u64) -> u64 {
684            return x / y;
685        }
686        fn div_s64(x: i64, y: i64) -> i64 {
687            return x / y;
688        }
689
690        // Returns the high half of a 32 bit unsigned widening multiply.
691        fn mulhw_u32(x: u32, y: u32) -> u32 {
692            let x64: u64 = x as u64;
693            let y64: u64 = y as u64;
694            let r64: u64 = x64 * y64;
695            (r64 >> 32) as u32
696        }
697
698        // Returns the high half of a 32 bit signed widening multiply.
699        fn mulhw_s32(x: i32, y: i32) -> i32 {
700            let x64: i64 = x as i64;
701            let y64: i64 = y as i64;
702            let r64: i64 = x64 * y64;
703            (r64 >> 32) as i32
704        }
705
706        // Returns the high half of a 64 bit unsigned widening multiply.
707        fn mulhw_u64(x: u64, y: u64) -> u64 {
708            let t0: u64 = x & 0xffffffffu64;
709            let t1: u64 = x >> 32;
710            let t2: u64 = y & 0xffffffffu64;
711            let t3: u64 = y >> 32;
712            let t4: u64 = t0 * t2;
713            let t5: u64 = t1 * t2 + (t4 >> 32);
714            let t6: u64 = t5 & 0xffffffffu64;
715            let t7: u64 = t5 >> 32;
716            let t8: u64 = t0 * t3 + t6;
717            let t9: u64 = t1 * t3 + t7 + (t8 >> 32);
718            t9
719        }
720
721        // Returns the high half of a 64 bit signed widening multiply.
722        fn mulhw_s64(x: i64, y: i64) -> i64 {
723            let t0: u64 = x as u64 & 0xffffffffu64;
724            let t1: i64 = x >> 32;
725            let t2: u64 = y as u64 & 0xffffffffu64;
726            let t3: i64 = y >> 32;
727            let t4: u64 = t0 * t2;
728            let t5: i64 = t1 * t2 as i64 + (t4 >> 32) as i64;
729            let t6: u64 = t5 as u64 & 0xffffffffu64;
730            let t7: i64 = t5 >> 32;
731            let t8: i64 = t0 as i64 * t3 + t6 as i64;
732            let t9: i64 = t1 * t3 + t7 + (t8 >> 32);
733            t9
734        }
735
736        // Compute the magic numbers for `d` and then use them to compute and
737        // check `n / d` for around 1000 values of `n`, using unsigned 32-bit
738        // division.
739        fn test_magic_u32_inner(d: u32, n_tests_done: &mut i32) {
740            // Advance the numerator (the `n` in `n / d`) so as to test
741            // densely near the range ends (and, in the signed variants, near
742            // zero) but not so densely away from those regions.
743            fn advance_n_u32(x: u32) -> u32 {
744                if x < MIN_U32 + 110 {
745                    return x + 1;
746                }
747                if x < MIN_U32 + 1700 {
748                    return x + 23;
749                }
750                if x < MAX_U32 - 1700 {
751                    let xd: f64 = (x as f64) * 1.06415927;
752                    return if xd >= (MAX_U32 - 1700) as f64 {
753                        MAX_U32 - 1700
754                    } else {
755                        xd as u32
756                    };
757                }
758                if x < MAX_U32 - 110 {
759                    return x + 23;
760                }
761                u32::wrapping_add(x, 1)
762            }
763
764            let magic: MU32 = magic_u32(d);
765            let mut n: u32 = MIN_U32;
766            loop {
767                *n_tests_done += 1;
768                // Compute and check `q = n / d` using `magic`.
769                let mut q: u32 = mulhw_u32(n, magic.mul_by);
770                if magic.do_add {
771                    assert!(magic.shift_by >= 1 && magic.shift_by <= 32);
772                    let mut t: u32 = n - q;
773                    t >>= 1;
774                    t = t + q;
775                    q = t >> (magic.shift_by - 1);
776                } else {
777                    assert!(magic.shift_by >= 0 && magic.shift_by <= 31);
778                    q >>= magic.shift_by;
779                }
780
781                assert_eq!(q, div_u32(n, d));
782
783                n = advance_n_u32(n);
784                if n == MIN_U32 {
785                    break;
786                }
787            }
788        }
789
790        // Compute the magic numbers for `d` and then use them to compute and
791        // check `n / d` for around 1000 values of `n`, using signed 32-bit
792        // division.
793        fn test_magic_s32_inner(d: i32, n_tests_done: &mut i32) {
794            // See comment on advance_n_u32 above.
795            fn advance_n_s32(x: i32) -> i32 {
796                if x >= 0 && x <= 29 {
797                    return x + 1;
798                }
799                if x < MIN_S32 + 110 {
800                    return x + 1;
801                }
802                if x < MIN_S32 + 1700 {
803                    return x + 23;
804                }
805                if x < MAX_S32 - 1700 {
806                    let mut xd: f64 = x as f64;
807                    xd = if xd < 0.0 {
808                        xd / 1.06415927
809                    } else {
810                        xd * 1.06415927
811                    };
812                    return if xd >= (MAX_S32 - 1700) as f64 {
813                        MAX_S32 - 1700
814                    } else {
815                        xd as i32
816                    };
817                }
818                if x < MAX_S32 - 110 {
819                    return x + 23;
820                }
821                if x == MAX_S32 {
822                    return MIN_S32;
823                }
824                x + 1
825            }
826
827            let magic: MS32 = magic_s32(d);
828            let mut n: i32 = MIN_S32;
829            loop {
830                *n_tests_done += 1;
831                // Compute and check `q = n / d` using `magic`.
832                let mut q: i32 = mulhw_s32(n, magic.mul_by);
833                if d > 0 && magic.mul_by < 0 {
834                    q = q + n;
835                } else if d < 0 && magic.mul_by > 0 {
836                    q = q - n;
837                }
838                assert!(magic.shift_by >= 0 && magic.shift_by <= 31);
839                q = q >> magic.shift_by;
840                let mut t: u32 = q as u32;
841                t = t >> 31;
842                q = q + (t as i32);
843
844                assert_eq!(q, div_s32(n, d));
845
846                n = advance_n_s32(n);
847                if n == MIN_S32 {
848                    break;
849                }
850            }
851        }
852
853        // Compute the magic numbers for `d` and then use them to compute and
854        // check `n / d` for around 1000 values of `n`, using unsigned 64-bit
855        // division.
856        fn test_magic_u64_inner(d: u64, n_tests_done: &mut i32) {
857            // See comment on advance_n_u32 above.
858            fn advance_n_u64(x: u64) -> u64 {
859                if x < MIN_U64 + 110 {
860                    return x + 1;
861                }
862                if x < MIN_U64 + 1700 {
863                    return x + 23;
864                }
865                if x < MAX_U64 - 1700 {
866                    let xd: f64 = (x as f64) * 1.06415927;
867                    return if xd >= (MAX_U64 - 1700) as f64 {
868                        MAX_U64 - 1700
869                    } else {
870                        xd as u64
871                    };
872                }
873                if x < MAX_U64 - 110 {
874                    return x + 23;
875                }
876                u64::wrapping_add(x, 1)
877            }
878
879            let magic: MU64 = magic_u64(d);
880            let mut n: u64 = MIN_U64;
881            loop {
882                *n_tests_done += 1;
883                // Compute and check `q = n / d` using `magic`.
884                let mut q = mulhw_u64(n, magic.mul_by);
885                if magic.do_add {
886                    assert!(magic.shift_by >= 1 && magic.shift_by <= 64);
887                    let mut t: u64 = n - q;
888                    t >>= 1;
889                    t = t + q;
890                    q = t >> (magic.shift_by - 1);
891                } else {
892                    assert!(magic.shift_by >= 0 && magic.shift_by <= 63);
893                    q >>= magic.shift_by;
894                }
895
896                assert_eq!(q, div_u64(n, d));
897
898                n = advance_n_u64(n);
899                if n == MIN_U64 {
900                    break;
901                }
902            }
903        }
904
905        // Compute the magic numbers for `d` and then use them to compute and
906        // check `n / d` for around 1000 values of `n`, using signed 64-bit
907        // division.
908        fn test_magic_s64_inner(d: i64, n_tests_done: &mut i32) {
909            // See comment on advance_n_u32 above.
910            fn advance_n_s64(x: i64) -> i64 {
911                if x >= 0 && x <= 29 {
912                    return x + 1;
913                }
914                if x < MIN_S64 + 110 {
915                    return x + 1;
916                }
917                if x < MIN_S64 + 1700 {
918                    return x + 23;
919                }
920                if x < MAX_S64 - 1700 {
921                    let mut xd: f64 = x as f64;
922                    xd = if xd < 0.0 {
923                        xd / 1.06415927
924                    } else {
925                        xd * 1.06415927
926                    };
927                    return if xd >= (MAX_S64 - 1700) as f64 {
928                        MAX_S64 - 1700
929                    } else {
930                        xd as i64
931                    };
932                }
933                if x < MAX_S64 - 110 {
934                    return x + 23;
935                }
936                if x == MAX_S64 {
937                    return MIN_S64;
938                }
939                x + 1
940            }
941
942            let magic: MS64 = magic_s64(d);
943            let mut n: i64 = MIN_S64;
944            loop {
945                *n_tests_done += 1;
946                // Compute and check `q = n / d` using `magic`. */
947                let mut q: i64 = mulhw_s64(n, magic.mul_by);
948                if d > 0 && magic.mul_by < 0 {
949                    q = q + n;
950                } else if d < 0 && magic.mul_by > 0 {
951                    q = q - n;
952                }
953                assert!(magic.shift_by >= 0 && magic.shift_by <= 63);
954                q = q >> magic.shift_by;
955                let mut t: u64 = q as u64;
956                t = t >> 63;
957                q = q + (t as i64);
958
959                assert_eq!(q, div_s64(n, d));
960
961                n = advance_n_s64(n);
962                if n == MIN_S64 {
963                    break;
964                }
965            }
966        }
967
968        // Using all the above support machinery, actually run the tests.
969
970        let mut n_tests_done: i32 = 0;
971
972        // u32 division tests
973        {
974            // 2 .. 3k
975            let mut d: u32 = 2;
976            for _ in 0..3 * 1000 {
977                test_magic_u32_inner(d, &mut n_tests_done);
978                d += 1;
979            }
980
981            // across the midpoint: midpoint - 3k .. midpoint + 3k
982            d = MAX_U32_HALF - 3 * 1000;
983            for _ in 0..2 * 3 * 1000 {
984                test_magic_u32_inner(d, &mut n_tests_done);
985                d += 1;
986            }
987
988            // MAX_U32 - 3k .. MAX_U32 (in reverse order)
989            d = MAX_U32;
990            for _ in 0..3 * 1000 {
991                test_magic_u32_inner(d, &mut n_tests_done);
992                d -= 1;
993            }
994        }
995
996        // s32 division tests
997        {
998            // MIN_S32 .. MIN_S32 + 3k
999            let mut d: i32 = MIN_S32;
1000            for _ in 0..3 * 1000 {
1001                test_magic_s32_inner(d, &mut n_tests_done);
1002                d += 1;
1003            }
1004
1005            // -3k .. -2 (in reverse order)
1006            d = -2;
1007            for _ in 0..3 * 1000 {
1008                test_magic_s32_inner(d, &mut n_tests_done);
1009                d -= 1;
1010            }
1011
1012            // 2 .. 3k
1013            d = 2;
1014            for _ in 0..3 * 1000 {
1015                test_magic_s32_inner(d, &mut n_tests_done);
1016                d += 1;
1017            }
1018
1019            // MAX_S32 - 3k .. MAX_S32 (in reverse order)
1020            d = MAX_S32;
1021            for _ in 0..3 * 1000 {
1022                test_magic_s32_inner(d, &mut n_tests_done);
1023                d -= 1;
1024            }
1025        }
1026
1027        // u64 division tests
1028        {
1029            // 2 .. 3k
1030            let mut d: u64 = 2;
1031            for _ in 0..3 * 1000 {
1032                test_magic_u64_inner(d, &mut n_tests_done);
1033                d += 1;
1034            }
1035
1036            // across the midpoint: midpoint - 3k .. midpoint + 3k
1037            d = MAX_U64_HALF - 3 * 1000;
1038            for _ in 0..2 * 3 * 1000 {
1039                test_magic_u64_inner(d, &mut n_tests_done);
1040                d += 1;
1041            }
1042
1043            // mAX_U64 - 3000 .. mAX_U64 (in reverse order)
1044            d = MAX_U64;
1045            for _ in 0..3 * 1000 {
1046                test_magic_u64_inner(d, &mut n_tests_done);
1047                d -= 1;
1048            }
1049        }
1050
1051        // s64 division tests
1052        {
1053            // MIN_S64 .. MIN_S64 + 3k
1054            let mut d: i64 = MIN_S64;
1055            for _ in 0..3 * 1000 {
1056                test_magic_s64_inner(d, &mut n_tests_done);
1057                d += 1;
1058            }
1059
1060            // -3k .. -2 (in reverse order)
1061            d = -2;
1062            for _ in 0..3 * 1000 {
1063                test_magic_s64_inner(d, &mut n_tests_done);
1064                d -= 1;
1065            }
1066
1067            // 2 .. 3k
1068            d = 2;
1069            for _ in 0..3 * 1000 {
1070                test_magic_s64_inner(d, &mut n_tests_done);
1071                d += 1;
1072            }
1073
1074            // MAX_S64 - 3k .. MAX_S64 (in reverse order)
1075            d = MAX_S64;
1076            for _ in 0..3 * 1000 {
1077                test_magic_s64_inner(d, &mut n_tests_done);
1078                d -= 1;
1079            }
1080        }
1081        assert_eq!(n_tests_done, 50_148_000);
1082    }
1083}