Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | #ifndef _ASM_GENERIC_DIV64_H #define _ASM_GENERIC_DIV64_H /* * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com> * Based on former asm-ppc/div64.h and asm-m68knommu/div64.h * * Optimization for constant divisors on 32-bit machines: * Copyright (C) 2006-2015 Nicolas Pitre * * The semantics of do_div() are: * * u32 do_div(u64 *n, u32 base) * { * u32 remainder = *n % base; * *n = *n / base; * return remainder; * } * * NOTE: macro parameter n is evaluated multiple times, * beware of side effects! */ #include <linux/types.h> #include <linux/compiler.h> #if BITS_PER_LONG == 64 # define do_div(n,base) ({ \ u32 __base = (base); \ u32 __rem; \ __rem = ((u64)(n)) % __base; \ (n) = ((u64)(n)) / __base; \ __rem; \ }) #elif BITS_PER_LONG == 32 #include <linux/log2.h> /* * If the divisor happens to be constant, we determine the appropriate * inverse at compile time to turn the division into a few inline * multiplications which ought to be much faster. And yet only if compiling * with a sufficiently recent gcc version to perform proper 64-bit constant * propagation. * * (It is unfortunate that gcc doesn't perform all this internally.) */ #ifndef __div64_const32_is_OK #define __div64_const32_is_OK (__GNUC__ >= 4) #endif #define __div64_const32(n, ___b) \ ({ \ /* \ * Multiplication by reciprocal of b: n / b = n * (p / b) / p \ * \ * We rely on the fact that most of this code gets optimized \ * away at compile time due to constant propagation and only \ * a few multiplication instructions should remain. \ * Hence this monstrous macro (static inline doesn't always \ * do the trick here). \ */ \ u64 ___res, ___x, ___t, ___m, ___n = (n); \ u32 ___p, ___bias; \ \ /* determine MSB of b */ \ ___p = 1 << ilog2(___b); \ \ /* compute m = ((p << 64) + b - 1) / b */ \ ___m = (~0ULL / ___b) * ___p; \ ___m += (((~0ULL % ___b + 1) * ___p) + ___b - 1) / ___b; \ \ /* one less than the dividend with highest result */ \ ___x = ~0ULL / ___b * ___b - 1; \ \ /* test our ___m with res = m * x / (p << 64) */ \ ___res = ((___m & 0xffffffff) * (___x & 0xffffffff)) >> 32; \ ___t = ___res += (___m & 0xffffffff) * (___x >> 32); \ ___res += (___x & 0xffffffff) * (___m >> 32); \ ___t = (___res < ___t) ? (1ULL << 32) : 0; \ ___res = (___res >> 32) + ___t; \ ___res += (___m >> 32) * (___x >> 32); \ ___res /= ___p; \ \ /* Now sanitize and optimize what we've got. */ \ if (~0ULL % (___b / (___b & -___b)) == 0) { \ /* special case, can be simplified to ... */ \ ___n /= (___b & -___b); \ ___m = ~0ULL / (___b / (___b & -___b)); \ ___p = 1; \ ___bias = 1; \ } else if (___res != ___x / ___b) { \ /* \ * We can't get away without a bias to compensate \ * for bit truncation errors. To avoid it we'd need an \ * additional bit to represent m which would overflow \ * a 64-bit variable. \ * \ * Instead we do m = p / b and n / b = (n * m + m) / p. \ */ \ ___bias = 1; \ /* Compute m = (p << 64) / b */ \ ___m = (~0ULL / ___b) * ___p; \ ___m += ((~0ULL % ___b + 1) * ___p) / ___b; \ } else { \ /* \ * Reduce m / p, and try to clear bit 31 of m when \ * possible, otherwise that'll need extra overflow \ * handling later. \ */ \ u32 ___bits = -(___m & -___m); \ ___bits |= ___m >> 32; \ ___bits = (~___bits) << 1; \ /* \ * If ___bits == 0 then setting bit 31 is unavoidable. \ * Simply apply the maximum possible reduction in that \ * case. Otherwise the MSB of ___bits indicates the \ * best reduction we should apply. \ */ \ if (!___bits) { \ ___p /= (___m & -___m); \ ___m /= (___m & -___m); \ } else { \ ___p >>= ilog2(___bits); \ ___m >>= ilog2(___bits); \ } \ /* No bias needed. */ \ ___bias = 0; \ } \ \ /* \ * Now we have a combination of 2 conditions: \ * \ * 1) whether or not we need to apply a bias, and \ * \ * 2) whether or not there might be an overflow in the cross \ * product determined by (___m & ((1 << 63) | (1 << 31))). \ * \ * Select the best way to do (m_bias + m * n) / (1 << 64). \ * From now on there will be actual runtime code generated. \ */ \ ___res = __arch_xprod_64(___m, ___n, ___bias); \ \ ___res /= ___p; \ }) #ifndef __arch_xprod_64 /* * Default C implementation for __arch_xprod_64() * * Prototype: u64 __arch_xprod_64(const u64 m, u64 n, bool bias) * Semantic: retval = ((bias ? m : 0) + m * n) >> 64 * * The product is a 128-bit value, scaled down to 64 bits. * Assuming constant propagation to optimize away unused conditional code. * Architectures may provide their own optimized assembly implementation. */ static inline u64 __arch_xprod_64(const u64 m, u64 n, bool bias) { u32 m_lo = m; u32 m_hi = m >> 32; u32 n_lo = n; u32 n_hi = n >> 32; u64 res, tmp; if (!bias) { res = ((u64)m_lo * n_lo) >> 32; } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) { /* there can't be any overflow here */ res = (m + (u64)m_lo * n_lo) >> 32; } else { res = m + (u64)m_lo * n_lo; tmp = (res < m) ? (1ULL << 32) : 0; res = (res >> 32) + tmp; } if (!(m & ((1ULL << 63) | (1ULL << 31)))) { /* there can't be any overflow here */ res += (u64)m_lo * n_hi; res += (u64)m_hi * n_lo; res >>= 32; } else { tmp = res += (u64)m_lo * n_hi; res += (u64)m_hi * n_lo; tmp = (res < tmp) ? (1ULL << 32) : 0; res = (res >> 32) + tmp; } res += (u64)m_hi * n_hi; return res; } #endif #ifndef __div64_32 extern u32 __div64_32(u64 *dividend, u32 divisor); #endif /* The unnecessary pointer compare is there * to check for type safety (n must be 64bit) */ # define do_div(n,base) ({ \ u32 __base = (base); \ u32 __rem; \ (void)(((typeof((n)) *)0) == ((u64 *)0)); \ if (__builtin_constant_p(__base) && \ is_power_of_2(__base)) { \ __rem = (n) & (__base - 1); \ (n) >>= ilog2(__base); \ } else if (__div64_const32_is_OK && \ __builtin_constant_p(__base) && \ __base != 0) { \ u32 __res_lo, __n_lo = (n); \ (n) = __div64_const32(n, __base); \ /* the remainder can be computed with 32-bit regs */ \ __res_lo = (n); \ __rem = __n_lo - __res_lo * __base; \ } else if (likely(((n) >> 32) == 0)) { \ __rem = (u32)(n) % __base; \ (n) = (u32)(n) / __base; \ } else \ __rem = __div64_32(&(n), __base); \ __rem; \ }) #else /* BITS_PER_LONG == ?? */ # error do_div() does not yet support the C64 #endif /* BITS_PER_LONG */ /* Wrapper for do_div(). Doesn't modify dividend and returns * the result, not remainder. */ static inline u64 lldiv(u64 dividend, u32 divisor) { u64 __res = dividend; do_div(__res, divisor); return(__res); } #endif /* _ASM_GENERIC_DIV64_H */ |