|
|
1.1 ! root 1: /* ! 2: * VIS op helpers ! 3: * ! 4: * Copyright (c) 2003-2005 Fabrice Bellard ! 5: * ! 6: * This library is free software; you can redistribute it and/or ! 7: * modify it under the terms of the GNU Lesser General Public ! 8: * License as published by the Free Software Foundation; either ! 9: * version 2 of the License, or (at your option) any later version. ! 10: * ! 11: * This library is distributed in the hope that it will be useful, ! 12: * but WITHOUT ANY WARRANTY; without even the implied warranty of ! 13: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! 14: * Lesser General Public License for more details. ! 15: * ! 16: * You should have received a copy of the GNU Lesser General Public ! 17: * License along with this library; if not, see <http://www.gnu.org/licenses/>. ! 18: */ ! 19: ! 20: #include "cpu.h" ! 21: #include "helper.h" ! 22: ! 23: /* This function uses non-native bit order */ ! 24: #define GET_FIELD(X, FROM, TO) \ ! 25: ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1)) ! 26: ! 27: /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */ ! 28: #define GET_FIELD_SP(X, FROM, TO) \ ! 29: GET_FIELD(X, 63 - (TO), 63 - (FROM)) ! 30: ! 31: target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) ! 32: { ! 33: return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) | ! 34: (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) | ! 35: (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) | ! 36: (GET_FIELD_SP(pixel_addr, 56, 59) << 13) | ! 37: (GET_FIELD_SP(pixel_addr, 35, 38) << 9) | ! 38: (GET_FIELD_SP(pixel_addr, 13, 16) << 5) | ! 39: (((pixel_addr >> 55) & 1) << 4) | ! 40: (GET_FIELD_SP(pixel_addr, 33, 34) << 2) | ! 41: GET_FIELD_SP(pixel_addr, 11, 12); ! 42: } ! 43: ! 44: #ifdef HOST_WORDS_BIGENDIAN ! 45: #define VIS_B64(n) b[7 - (n)] ! 46: #define VIS_W64(n) w[3 - (n)] ! 47: #define VIS_SW64(n) sw[3 - (n)] ! 48: #define VIS_L64(n) l[1 - (n)] ! 49: #define VIS_B32(n) b[3 - (n)] ! 50: #define VIS_W32(n) w[1 - (n)] ! 51: #else ! 52: #define VIS_B64(n) b[n] ! 53: #define VIS_W64(n) w[n] ! 54: #define VIS_SW64(n) sw[n] ! 55: #define VIS_L64(n) l[n] ! 56: #define VIS_B32(n) b[n] ! 57: #define VIS_W32(n) w[n] ! 58: #endif ! 59: ! 60: typedef union { ! 61: uint8_t b[8]; ! 62: uint16_t w[4]; ! 63: int16_t sw[4]; ! 64: uint32_t l[2]; ! 65: uint64_t ll; ! 66: float64 d; ! 67: } VIS64; ! 68: ! 69: typedef union { ! 70: uint8_t b[4]; ! 71: uint16_t w[2]; ! 72: uint32_t l; ! 73: float32 f; ! 74: } VIS32; ! 75: ! 76: uint64_t helper_fpmerge(uint64_t src1, uint64_t src2) ! 77: { ! 78: VIS64 s, d; ! 79: ! 80: s.ll = src1; ! 81: d.ll = src2; ! 82: ! 83: /* Reverse calculation order to handle overlap */ ! 84: d.VIS_B64(7) = s.VIS_B64(3); ! 85: d.VIS_B64(6) = d.VIS_B64(3); ! 86: d.VIS_B64(5) = s.VIS_B64(2); ! 87: d.VIS_B64(4) = d.VIS_B64(2); ! 88: d.VIS_B64(3) = s.VIS_B64(1); ! 89: d.VIS_B64(2) = d.VIS_B64(1); ! 90: d.VIS_B64(1) = s.VIS_B64(0); ! 91: /* d.VIS_B64(0) = d.VIS_B64(0); */ ! 92: ! 93: return d.ll; ! 94: } ! 95: ! 96: uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2) ! 97: { ! 98: VIS64 s, d; ! 99: uint32_t tmp; ! 100: ! 101: s.ll = src1; ! 102: d.ll = src2; ! 103: ! 104: #define PMUL(r) \ ! 105: tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \ ! 106: if ((tmp & 0xff) > 0x7f) { \ ! 107: tmp += 0x100; \ ! 108: } \ ! 109: d.VIS_W64(r) = tmp >> 8; ! 110: ! 111: PMUL(0); ! 112: PMUL(1); ! 113: PMUL(2); ! 114: PMUL(3); ! 115: #undef PMUL ! 116: ! 117: return d.ll; ! 118: } ! 119: ! 120: uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2) ! 121: { ! 122: VIS64 s, d; ! 123: uint32_t tmp; ! 124: ! 125: s.ll = src1; ! 126: d.ll = src2; ! 127: ! 128: #define PMUL(r) \ ! 129: tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \ ! 130: if ((tmp & 0xff) > 0x7f) { \ ! 131: tmp += 0x100; \ ! 132: } \ ! 133: d.VIS_W64(r) = tmp >> 8; ! 134: ! 135: PMUL(0); ! 136: PMUL(1); ! 137: PMUL(2); ! 138: PMUL(3); ! 139: #undef PMUL ! 140: ! 141: return d.ll; ! 142: } ! 143: ! 144: uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2) ! 145: { ! 146: VIS64 s, d; ! 147: uint32_t tmp; ! 148: ! 149: s.ll = src1; ! 150: d.ll = src2; ! 151: ! 152: #define PMUL(r) \ ! 153: tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \ ! 154: if ((tmp & 0xff) > 0x7f) { \ ! 155: tmp += 0x100; \ ! 156: } \ ! 157: d.VIS_W64(r) = tmp >> 8; ! 158: ! 159: PMUL(0); ! 160: PMUL(1); ! 161: PMUL(2); ! 162: PMUL(3); ! 163: #undef PMUL ! 164: ! 165: return d.ll; ! 166: } ! 167: ! 168: uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) ! 169: { ! 170: VIS64 s, d; ! 171: uint32_t tmp; ! 172: ! 173: s.ll = src1; ! 174: d.ll = src2; ! 175: ! 176: #define PMUL(r) \ ! 177: tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ ! 178: if ((tmp & 0xff) > 0x7f) { \ ! 179: tmp += 0x100; \ ! 180: } \ ! 181: d.VIS_W64(r) = tmp >> 8; ! 182: ! 183: PMUL(0); ! 184: PMUL(1); ! 185: PMUL(2); ! 186: PMUL(3); ! 187: #undef PMUL ! 188: ! 189: return d.ll; ! 190: } ! 191: ! 192: uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) ! 193: { ! 194: VIS64 s, d; ! 195: uint32_t tmp; ! 196: ! 197: s.ll = src1; ! 198: d.ll = src2; ! 199: ! 200: #define PMUL(r) \ ! 201: tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ ! 202: if ((tmp & 0xff) > 0x7f) { \ ! 203: tmp += 0x100; \ ! 204: } \ ! 205: d.VIS_W64(r) = tmp >> 8; ! 206: ! 207: PMUL(0); ! 208: PMUL(1); ! 209: PMUL(2); ! 210: PMUL(3); ! 211: #undef PMUL ! 212: ! 213: return d.ll; ! 214: } ! 215: ! 216: uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2) ! 217: { ! 218: VIS64 s, d; ! 219: uint32_t tmp; ! 220: ! 221: s.ll = src1; ! 222: d.ll = src2; ! 223: ! 224: #define PMUL(r) \ ! 225: tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ ! 226: if ((tmp & 0xff) > 0x7f) { \ ! 227: tmp += 0x100; \ ! 228: } \ ! 229: d.VIS_L64(r) = tmp; ! 230: ! 231: /* Reverse calculation order to handle overlap */ ! 232: PMUL(1); ! 233: PMUL(0); ! 234: #undef PMUL ! 235: ! 236: return d.ll; ! 237: } ! 238: ! 239: uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2) ! 240: { ! 241: VIS64 s, d; ! 242: uint32_t tmp; ! 243: ! 244: s.ll = src1; ! 245: d.ll = src2; ! 246: ! 247: #define PMUL(r) \ ! 248: tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ ! 249: if ((tmp & 0xff) > 0x7f) { \ ! 250: tmp += 0x100; \ ! 251: } \ ! 252: d.VIS_L64(r) = tmp; ! 253: ! 254: /* Reverse calculation order to handle overlap */ ! 255: PMUL(1); ! 256: PMUL(0); ! 257: #undef PMUL ! 258: ! 259: return d.ll; ! 260: } ! 261: ! 262: uint64_t helper_fexpand(uint64_t src1, uint64_t src2) ! 263: { ! 264: VIS32 s; ! 265: VIS64 d; ! 266: ! 267: s.l = (uint32_t)src1; ! 268: d.ll = src2; ! 269: d.VIS_W64(0) = s.VIS_B32(0) << 4; ! 270: d.VIS_W64(1) = s.VIS_B32(1) << 4; ! 271: d.VIS_W64(2) = s.VIS_B32(2) << 4; ! 272: d.VIS_W64(3) = s.VIS_B32(3) << 4; ! 273: ! 274: return d.ll; ! 275: } ! 276: ! 277: #define VIS_HELPER(name, F) \ ! 278: uint64_t name##16(uint64_t src1, uint64_t src2) \ ! 279: { \ ! 280: VIS64 s, d; \ ! 281: \ ! 282: s.ll = src1; \ ! 283: d.ll = src2; \ ! 284: \ ! 285: d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0)); \ ! 286: d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1)); \ ! 287: d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2)); \ ! 288: d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3)); \ ! 289: \ ! 290: return d.ll; \ ! 291: } \ ! 292: \ ! 293: uint32_t name##16s(uint32_t src1, uint32_t src2) \ ! 294: { \ ! 295: VIS32 s, d; \ ! 296: \ ! 297: s.l = src1; \ ! 298: d.l = src2; \ ! 299: \ ! 300: d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0)); \ ! 301: d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1)); \ ! 302: \ ! 303: return d.l; \ ! 304: } \ ! 305: \ ! 306: uint64_t name##32(uint64_t src1, uint64_t src2) \ ! 307: { \ ! 308: VIS64 s, d; \ ! 309: \ ! 310: s.ll = src1; \ ! 311: d.ll = src2; \ ! 312: \ ! 313: d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0)); \ ! 314: d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1)); \ ! 315: \ ! 316: return d.ll; \ ! 317: } \ ! 318: \ ! 319: uint32_t name##32s(uint32_t src1, uint32_t src2) \ ! 320: { \ ! 321: VIS32 s, d; \ ! 322: \ ! 323: s.l = src1; \ ! 324: d.l = src2; \ ! 325: \ ! 326: d.l = F(d.l, s.l); \ ! 327: \ ! 328: return d.l; \ ! 329: } ! 330: ! 331: #define FADD(a, b) ((a) + (b)) ! 332: #define FSUB(a, b) ((a) - (b)) ! 333: VIS_HELPER(helper_fpadd, FADD) ! 334: VIS_HELPER(helper_fpsub, FSUB) ! 335: ! 336: #define VIS_CMPHELPER(name, F) \ ! 337: uint64_t name##16(uint64_t src1, uint64_t src2) \ ! 338: { \ ! 339: VIS64 s, d; \ ! 340: \ ! 341: s.ll = src1; \ ! 342: d.ll = src2; \ ! 343: \ ! 344: d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \ ! 345: d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \ ! 346: d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \ ! 347: d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \ ! 348: d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \ ! 349: \ ! 350: return d.ll; \ ! 351: } \ ! 352: \ ! 353: uint64_t name##32(uint64_t src1, uint64_t src2) \ ! 354: { \ ! 355: VIS64 s, d; \ ! 356: \ ! 357: s.ll = src1; \ ! 358: d.ll = src2; \ ! 359: \ ! 360: d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \ ! 361: d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \ ! 362: d.VIS_L64(1) = 0; \ ! 363: \ ! 364: return d.ll; \ ! 365: } ! 366: ! 367: #define FCMPGT(a, b) ((a) > (b)) ! 368: #define FCMPEQ(a, b) ((a) == (b)) ! 369: #define FCMPLE(a, b) ((a) <= (b)) ! 370: #define FCMPNE(a, b) ((a) != (b)) ! 371: ! 372: VIS_CMPHELPER(helper_fcmpgt, FCMPGT) ! 373: VIS_CMPHELPER(helper_fcmpeq, FCMPEQ) ! 374: VIS_CMPHELPER(helper_fcmple, FCMPLE) ! 375: VIS_CMPHELPER(helper_fcmpne, FCMPNE) ! 376: ! 377: uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2) ! 378: { ! 379: int i; ! 380: for (i = 0; i < 8; i++) { ! 381: int s1, s2; ! 382: ! 383: s1 = (src1 >> (56 - (i * 8))) & 0xff; ! 384: s2 = (src2 >> (56 - (i * 8))) & 0xff; ! 385: ! 386: /* Absolute value of difference. */ ! 387: s1 -= s2; ! 388: if (s1 < 0) { ! 389: s1 = -s1; ! 390: } ! 391: ! 392: sum += s1; ! 393: } ! 394: ! 395: return sum; ! 396: } ! 397: ! 398: uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2) ! 399: { ! 400: int scale = (gsr >> 3) & 0xf; ! 401: uint32_t ret = 0; ! 402: int byte; ! 403: ! 404: for (byte = 0; byte < 4; byte++) { ! 405: uint32_t val; ! 406: int16_t src = rs2 >> (byte * 16); ! 407: int32_t scaled = src << scale; ! 408: int32_t from_fixed = scaled >> 7; ! 409: ! 410: val = (from_fixed < 0 ? 0 : ! 411: from_fixed > 255 ? 255 : from_fixed); ! 412: ! 413: ret |= val << (8 * byte); ! 414: } ! 415: ! 416: return ret; ! 417: } ! 418: ! 419: uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2) ! 420: { ! 421: int scale = (gsr >> 3) & 0x1f; ! 422: uint64_t ret = 0; ! 423: int word; ! 424: ! 425: ret = (rs1 << 8) & ~(0x000000ff000000ffULL); ! 426: for (word = 0; word < 2; word++) { ! 427: uint64_t val; ! 428: int32_t src = rs2 >> (word * 32); ! 429: int64_t scaled = (int64_t)src << scale; ! 430: int64_t from_fixed = scaled >> 23; ! 431: ! 432: val = (from_fixed < 0 ? 0 : ! 433: (from_fixed > 255) ? 255 : from_fixed); ! 434: ! 435: ret |= val << (32 * word); ! 436: } ! 437: ! 438: return ret; ! 439: } ! 440: ! 441: uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2) ! 442: { ! 443: int scale = (gsr >> 3) & 0x1f; ! 444: uint32_t ret = 0; ! 445: int word; ! 446: ! 447: for (word = 0; word < 2; word++) { ! 448: uint32_t val; ! 449: int32_t src = rs2 >> (word * 32); ! 450: int64_t scaled = src << scale; ! 451: int64_t from_fixed = scaled >> 16; ! 452: ! 453: val = (from_fixed < -32768 ? -32768 : ! 454: from_fixed > 32767 ? 32767 : from_fixed); ! 455: ! 456: ret |= (val & 0xffff) << (word * 16); ! 457: } ! 458: ! 459: return ret; ! 460: } ! 461: ! 462: uint64 helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2) ! 463: { ! 464: union { ! 465: uint64_t ll[2]; ! 466: uint8_t b[16]; ! 467: } s; ! 468: VIS64 r; ! 469: uint32_t i, mask, host; ! 470: ! 471: /* Set up S such that we can index across all of the bytes. */ ! 472: #ifdef HOST_WORDS_BIGENDIAN ! 473: s.ll[0] = src1; ! 474: s.ll[1] = src2; ! 475: host = 0; ! 476: #else ! 477: s.ll[1] = src1; ! 478: s.ll[0] = src2; ! 479: host = 15; ! 480: #endif ! 481: mask = gsr >> 32; ! 482: ! 483: for (i = 0; i < 8; ++i) { ! 484: unsigned e = (mask >> (28 - i*4)) & 0xf; ! 485: r.VIS_B64(i) = s.b[e ^ host]; ! 486: } ! 487: ! 488: return r.ll; ! 489: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.