| /* |
| * Routines to emulate some Altivec/VMX instructions, specifically |
| * those that can trap when given denormalized operands in Java mode. |
| */ |
| #include <linux/kernel.h> |
| #include <linux/errno.h> |
| #include <linux/sched.h> |
| #include <asm/ptrace.h> |
| #include <asm/processor.h> |
| #include <asm/uaccess.h> |
| |
| /* Functions in vector.S */ |
| extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); |
| extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); |
| extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); |
| extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); |
| extern void vrefp(vector128 *dst, vector128 *src); |
| extern void vrsqrtefp(vector128 *dst, vector128 *src); |
| extern void vexptep(vector128 *dst, vector128 *src); |
| |
| static unsigned int exp2s[8] = { |
| 0x800000, |
| 0x8b95c2, |
| 0x9837f0, |
| 0xa5fed7, |
| 0xb504f3, |
| 0xc5672a, |
| 0xd744fd, |
| 0xeac0c7 |
| }; |
| |
| /* |
| * Computes an estimate of 2^x. The `s' argument is the 32-bit |
| * single-precision floating-point representation of x. |
| */ |
| static unsigned int eexp2(unsigned int s) |
| { |
| int exp, pwr; |
| unsigned int mant, frac; |
| |
| /* extract exponent field from input */ |
| exp = ((s >> 23) & 0xff) - 127; |
| if (exp > 7) { |
| /* check for NaN input */ |
| if (exp == 128 && (s & 0x7fffff) != 0) |
| return s | 0x400000; /* return QNaN */ |
| /* 2^-big = 0, 2^+big = +Inf */ |
| return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ |
| } |
| if (exp < -23) |
| return 0x3f800000; /* 1.0 */ |
| |
| /* convert to fixed point integer in 9.23 representation */ |
| pwr = (s & 0x7fffff) | 0x800000; |
| if (exp > 0) |
| pwr <<= exp; |
| else |
| pwr >>= -exp; |
| if (s & 0x80000000) |
| pwr = -pwr; |
| |
| /* extract integer part, which becomes exponent part of result */ |
| exp = (pwr >> 23) + 126; |
| if (exp >= 254) |
| return 0x7f800000; |
| if (exp < -23) |
| return 0; |
| |
| /* table lookup on top 3 bits of fraction to get mantissa */ |
| mant = exp2s[(pwr >> 20) & 7]; |
| |
| /* linear interpolation using remaining 20 bits of fraction */ |
| asm("mulhwu %0,%1,%2" : "=r" (frac) |
| : "r" (pwr << 12), "r" (0x172b83ff)); |
| asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); |
| mant += frac; |
| |
| if (exp >= 0) |
| return mant + (exp << 23); |
| |
| /* denormalized result */ |
| exp = -exp; |
| mant += 1 << (exp - 1); |
| return mant >> exp; |
| } |
| |
| /* |
| * Computes an estimate of log_2(x). The `s' argument is the 32-bit |
| * single-precision floating-point representation of x. |
| */ |
| static unsigned int elog2(unsigned int s) |
| { |
| int exp, mant, lz, frac; |
| |
| exp = s & 0x7f800000; |
| mant = s & 0x7fffff; |
| if (exp == 0x7f800000) { /* Inf or NaN */ |
| if (mant != 0) |
| s |= 0x400000; /* turn NaN into QNaN */ |
| return s; |
| } |
| if ((exp | mant) == 0) /* +0 or -0 */ |
| return 0xff800000; /* return -Inf */ |
| |
| if (exp == 0) { |
| /* denormalized */ |
| asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); |
| mant <<= lz - 8; |
| exp = (-118 - lz) << 23; |
| } else { |
| mant |= 0x800000; |
| exp -= 127 << 23; |
| } |
| |
| if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ |
| exp |= 0x400000; /* 0.5 * 2^23 */ |
| asm("mulhwu %0,%1,%2" : "=r" (mant) |
| : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ |
| } |
| if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ |
| exp |= 0x200000; /* 0.25 * 2^23 */ |
| asm("mulhwu %0,%1,%2" : "=r" (mant) |
| : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ |
| } |
| if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ |
| exp |= 0x100000; /* 0.125 * 2^23 */ |
| asm("mulhwu %0,%1,%2" : "=r" (mant) |
| : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ |
| } |
| if (mant > 0x800000) { /* 1.0 * 2^23 */ |
| /* calculate (mant - 1) * 1.381097463 */ |
| /* 1.381097463 == 0.125 / (2^0.125 - 1) */ |
| asm("mulhwu %0,%1,%2" : "=r" (frac) |
| : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); |
| exp += frac; |
| } |
| s = exp & 0x80000000; |
| if (exp != 0) { |
| if (s) |
| exp = -exp; |
| asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); |
| lz = 8 - lz; |
| if (lz > 0) |
| exp >>= lz; |
| else if (lz < 0) |
| exp <<= -lz; |
| s += ((lz + 126) << 23) + exp; |
| } |
| return s; |
| } |
| |
| #define VSCR_SAT 1 |
| |
| static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) |
| { |
| int exp, mant; |
| |
| exp = (x >> 23) & 0xff; |
| mant = x & 0x7fffff; |
| if (exp == 255 && mant != 0) |
| return 0; /* NaN -> 0 */ |
| exp = exp - 127 + scale; |
| if (exp < 0) |
| return 0; /* round towards zero */ |
| if (exp >= 31) { |
| /* saturate, unless the result would be -2^31 */ |
| if (x + (scale << 23) != 0xcf000000) |
| *vscrp |= VSCR_SAT; |
| return (x & 0x80000000)? 0x80000000: 0x7fffffff; |
| } |
| mant |= 0x800000; |
| mant = (mant << 7) >> (30 - exp); |
| return (x & 0x80000000)? -mant: mant; |
| } |
| |
| static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) |
| { |
| int exp; |
| unsigned int mant; |
| |
| exp = (x >> 23) & 0xff; |
| mant = x & 0x7fffff; |
| if (exp == 255 && mant != 0) |
| return 0; /* NaN -> 0 */ |
| exp = exp - 127 + scale; |
| if (exp < 0) |
| return 0; /* round towards zero */ |
| if (x & 0x80000000) { |
| /* negative => saturate to 0 */ |
| *vscrp |= VSCR_SAT; |
| return 0; |
| } |
| if (exp >= 32) { |
| /* saturate */ |
| *vscrp |= VSCR_SAT; |
| return 0xffffffff; |
| } |
| mant |= 0x800000; |
| mant = (mant << 8) >> (31 - exp); |
| return mant; |
| } |
| |
| /* Round to floating integer, towards 0 */ |
| static unsigned int rfiz(unsigned int x) |
| { |
| int exp; |
| |
| exp = ((x >> 23) & 0xff) - 127; |
| if (exp == 128 && (x & 0x7fffff) != 0) |
| return x | 0x400000; /* NaN -> make it a QNaN */ |
| if (exp >= 23) |
| return x; /* it's an integer already (or Inf) */ |
| if (exp < 0) |
| return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ |
| return x & ~(0x7fffff >> exp); |
| } |
| |
| /* Round to floating integer, towards +/- Inf */ |
| static unsigned int rfii(unsigned int x) |
| { |
| int exp, mask; |
| |
| exp = ((x >> 23) & 0xff) - 127; |
| if (exp == 128 && (x & 0x7fffff) != 0) |
| return x | 0x400000; /* NaN -> make it a QNaN */ |
| if (exp >= 23) |
| return x; /* it's an integer already (or Inf) */ |
| if ((x & 0x7fffffff) == 0) |
| return x; /* +/-0 -> +/-0 */ |
| if (exp < 0) |
| /* 0 < |x| < 1.0 rounds to +/- 1.0 */ |
| return (x & 0x80000000) | 0x3f800000; |
| mask = 0x7fffff >> exp; |
| /* mantissa overflows into exponent - that's OK, |
| it can't overflow into the sign bit */ |
| return (x + mask) & ~mask; |
| } |
| |
| /* Round to floating integer, to nearest */ |
| static unsigned int rfin(unsigned int x) |
| { |
| int exp, half; |
| |
| exp = ((x >> 23) & 0xff) - 127; |
| if (exp == 128 && (x & 0x7fffff) != 0) |
| return x | 0x400000; /* NaN -> make it a QNaN */ |
| if (exp >= 23) |
| return x; /* it's an integer already (or Inf) */ |
| if (exp < -1) |
| return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ |
| if (exp == -1) |
| /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ |
| return (x & 0x80000000) | 0x3f800000; |
| half = 0x400000 >> exp; |
| /* add 0.5 to the magnitude and chop off the fraction bits */ |
| return (x + half) & ~(0x7fffff >> exp); |
| } |
| |
| int |
| emulate_altivec(struct pt_regs *regs) |
| { |
| unsigned int instr, i; |
| unsigned int va, vb, vc, vd; |
| vector128 *vrs; |
| |
| if (get_user(instr, (unsigned int __user *) regs->nip)) |
| return -EFAULT; |
| if ((instr >> 26) != 4) |
| return -EINVAL; /* not an altivec instruction */ |
| vd = (instr >> 21) & 0x1f; |
| va = (instr >> 16) & 0x1f; |
| vb = (instr >> 11) & 0x1f; |
| vc = (instr >> 6) & 0x1f; |
| |
| vrs = current->thread.vr; |
| switch (instr & 0x3f) { |
| case 10: |
| switch (vc) { |
| case 0: /* vaddfp */ |
| vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); |
| break; |
| case 1: /* vsubfp */ |
| vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); |
| break; |
| case 4: /* vrefp */ |
| vrefp(&vrs[vd], &vrs[vb]); |
| break; |
| case 5: /* vrsqrtefp */ |
| vrsqrtefp(&vrs[vd], &vrs[vb]); |
| break; |
| case 6: /* vexptefp */ |
| for (i = 0; i < 4; ++i) |
| vrs[vd].u[i] = eexp2(vrs[vb].u[i]); |
| break; |
| case 7: /* vlogefp */ |
| for (i = 0; i < 4; ++i) |
| vrs[vd].u[i] = elog2(vrs[vb].u[i]); |
| break; |
| case 8: /* vrfin */ |
| for (i = 0; i < 4; ++i) |
| vrs[vd].u[i] = rfin(vrs[vb].u[i]); |
| break; |
| case 9: /* vrfiz */ |
| for (i = 0; i < 4; ++i) |
| vrs[vd].u[i] = rfiz(vrs[vb].u[i]); |
| break; |
| case 10: /* vrfip */ |
| for (i = 0; i < 4; ++i) { |
| u32 x = vrs[vb].u[i]; |
| x = (x & 0x80000000)? rfiz(x): rfii(x); |
| vrs[vd].u[i] = x; |
| } |
| break; |
| case 11: /* vrfim */ |
| for (i = 0; i < 4; ++i) { |
| u32 x = vrs[vb].u[i]; |
| x = (x & 0x80000000)? rfii(x): rfiz(x); |
| vrs[vd].u[i] = x; |
| } |
| break; |
| case 14: /* vctuxs */ |
| for (i = 0; i < 4; ++i) |
| vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, |
| ¤t->thread.vscr.u[3]); |
| break; |
| case 15: /* vctsxs */ |
| for (i = 0; i < 4; ++i) |
| vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, |
| ¤t->thread.vscr.u[3]); |
| break; |
| default: |
| return -EINVAL; |
| } |
| break; |
| case 46: /* vmaddfp */ |
| vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); |
| break; |
| case 47: /* vnmsubfp */ |
| vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); |
| break; |
| default: |
| return -EINVAL; |
| } |
| |
| return 0; |
| } |