55 #define X8(x) x, x, x, x, x, x, x, x 64 0x4000, 0x539f, 0xc000, 0xac61, 0x4000, 0xdd5d, 0x4000, 0xdd5d,
65 0x4000, 0x22a3, 0x4000, 0x22a3, 0xc000, 0x539f, 0x4000, 0xac61,
66 0x3249, 0x11a8, 0x4b42, 0xee58, 0x11a8, 0x4b42, 0x11a8, 0xcdb7,
67 0x58c5, 0x4b42, 0xa73b, 0xcdb7, 0x3249, 0xa73b, 0x4b42, 0xa73b
71 0x58c5, 0x73fc, 0xa73b, 0x8c04, 0x58c5, 0xcff5, 0x58c5, 0xcff5,
72 0x58c5, 0x300b, 0x58c5, 0x300b, 0xa73b, 0x73fc, 0x58c5, 0x8c04,
73 0x45bf, 0x187e, 0x6862, 0xe782, 0x187e, 0x6862, 0x187e, 0xba41,
74 0x7b21, 0x6862, 0x84df, 0xba41, 0x45bf, 0x84df, 0x6862, 0x84df
78 0x539f, 0x6d41, 0xac61, 0x92bf, 0x539f, 0xd2bf, 0x539f, 0xd2bf,
79 0x539f, 0x2d41, 0x539f, 0x2d41, 0xac61, 0x6d41, 0x539f, 0x92bf,
80 0x41b3, 0x1712, 0x6254, 0xe8ee, 0x1712, 0x6254, 0x1712, 0xbe4d,
81 0x73fc, 0x6254, 0x8c04, 0xbe4d, 0x41b3, 0x8c04, 0x6254, 0x8c04
85 0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 0x4b42, 0xd746,
86 0x4b42, 0x28ba, 0x4b42, 0x28ba, 0xb4be, 0x6254, 0x4b42, 0x9dac,
87 0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df,
88 0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e
92 65536, 65536, 65536, 65536,
93 3597, 3597, 3597, 3597,
94 2260, 2260, 2260, 2260,
95 1203, 1203, 1203, 1203,
101 #define ROW1 "%%xmm6" 102 #define ROW3 "%%xmm4" 103 #define ROW5 "%%xmm5" 104 #define ROW7 "%%xmm7" 106 #define CLEAR_ODD(r) "pxor "r","r" \n\t" 107 #define PUT_ODD(dst) "pshufhw $0x1B, %%xmm2, "dst" \n\t" 111 # define ROW0 "%%xmm8" 113 # define ROW2 "%%xmm9" 115 # define ROW4 "%%xmm10" 117 # define ROW6 "%%xmm11" 119 # define CLEAR_EVEN(r) CLEAR_ODD(r) 120 # define PUT_EVEN(dst) PUT_ODD(dst) 121 # define XMMS "%%xmm12" 122 # define MOV_32_ONLY "#" 124 # define TAN3 "%%xmm13" 125 # define TAN1 "%%xmm14" 130 # define REG0 "%%xmm4" 131 # define ROW2 "2*16(%0)" 132 # define REG2 "%%xmm4" 133 # define ROW4 "4*16(%0)" 134 # define REG4 "%%xmm6" 135 # define ROW6 "6*16(%0)" 136 # define REG6 "%%xmm6" 137 # define CLEAR_EVEN(r) 138 # define PUT_EVEN(dst) \ 139 "pshufhw $0x1B, %%xmm2, %%xmm2 \n\t" \ 140 "movdqa %%xmm2, "dst" \n\t" 141 # define XMMS "%%xmm2" 142 # define MOV_32_ONLY "movdqa " 143 # define SREG2 "%%xmm7" 144 # define TAN3 "%%xmm0" 145 # define TAN1 "%%xmm2" 149 #define ROUND(x) "paddd "MANGLE(x) 151 #define JZ(reg, to) \ 152 "testl "reg","reg" \n\t" \ 155 #define JNZ(reg, to) \ 156 "testl "reg","reg" \n\t" \ 159 #define TEST_ONE_ROW(src, reg, clear) \ 161 "movq "src", %%mm1 \n\t" \ 162 "por 8+"src", %%mm1 \n\t" \ 163 "paddusb %%mm0, %%mm1 \n\t" \ 164 "pmovmskb %%mm1, "reg" \n\t" 166 #define TEST_TWO_ROWS(row1, row2, reg1, reg2, clear1, clear2) \ 169 "movq "row1", %%mm1 \n\t" \ 170 "por 8+"row1", %%mm1 \n\t" \ 171 "movq "row2", %%mm2 \n\t" \ 172 "por 8+"row2", %%mm2 \n\t" \ 173 "paddusb %%mm0, %%mm1 \n\t" \ 174 "paddusb %%mm0, %%mm2 \n\t" \ 175 "pmovmskb %%mm1, "reg1" \n\t" \ 176 "pmovmskb %%mm2, "reg2" \n\t" 179 #define iMTX_MULT(src, table, rounder, put) \ 180 "movdqa "src", %%xmm3 \n\t" \ 181 "movdqa %%xmm3, %%xmm0 \n\t" \ 182 "pshufd $0x11, %%xmm3, %%xmm1 \n\t" \ 183 "punpcklqdq %%xmm0, %%xmm0 \n\t" \ 184 "pmaddwd "table", %%xmm0 \n\t" \ 185 "pmaddwd 16+"table", %%xmm1 \n\t" \ 186 "pshufd $0xBB, %%xmm3, %%xmm2 \n\t" \ 187 "punpckhqdq %%xmm3, %%xmm3 \n\t" \ 188 "pmaddwd 32+"table", %%xmm2 \n\t" \ 189 "pmaddwd 48+"table", %%xmm3 \n\t" \ 190 "paddd %%xmm1, %%xmm0 \n\t" \ 191 "paddd %%xmm3, %%xmm2 \n\t" \ 192 rounder", %%xmm0 \n\t" \ 193 "movdqa %%xmm2, %%xmm3 \n\t" \ 194 "paddd %%xmm0, %%xmm2 \n\t" \ 195 "psubd %%xmm3, %%xmm0 \n\t" \ 196 "psrad $11, %%xmm2 \n\t" \ 197 "psrad $11, %%xmm0 \n\t" \ 198 "packssdw %%xmm0, %%xmm2 \n\t" \ 203 "movdqa "MANGLE(tan3)", "TAN3" \n\t" \ 204 "movdqa "MANGLE(tan1)", "TAN1" \n\t" \ 206 #define iLLM_PASS(dct) \ 208 "movdqa "TAN3", %%xmm1 \n\t" \ 209 "movdqa "TAN1", %%xmm3 \n\t" \ 210 "pmulhw %%xmm4, "TAN3" \n\t" \ 211 "pmulhw %%xmm5, %%xmm1 \n\t" \ 212 "paddsw %%xmm4, "TAN3" \n\t" \ 213 "paddsw %%xmm5, %%xmm1 \n\t" \ 214 "psubsw %%xmm5, "TAN3" \n\t" \ 215 "paddsw %%xmm4, %%xmm1 \n\t" \ 216 "pmulhw %%xmm7, %%xmm3 \n\t" \ 217 "pmulhw %%xmm6, "TAN1" \n\t" \ 218 "paddsw %%xmm6, %%xmm3 \n\t" \ 219 "psubsw %%xmm7, "TAN1" \n\t" \ 220 "movdqa %%xmm3, %%xmm7 \n\t" \ 221 "movdqa "TAN1", %%xmm6 \n\t" \ 222 "psubsw %%xmm1, %%xmm3 \n\t" \ 223 "psubsw "TAN3", "TAN1" \n\t" \ 224 "paddsw %%xmm7, %%xmm1 \n\t" \ 225 "paddsw %%xmm6, "TAN3" \n\t" \ 226 "movdqa %%xmm3, %%xmm6 \n\t" \ 227 "psubsw "TAN3", %%xmm3 \n\t" \ 228 "paddsw %%xmm6, "TAN3" \n\t" \ 229 "movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \ 230 "pmulhw %%xmm4, %%xmm3 \n\t" \ 231 "pmulhw %%xmm4, "TAN3" \n\t" \ 232 "paddsw "TAN3", "TAN3" \n\t" \ 233 "paddsw %%xmm3, %%xmm3 \n\t" \ 234 "movdqa "MANGLE(tan2)", %%xmm7 \n\t" \ 235 MOV_32_ONLY ROW2", "REG2" \n\t" \ 236 MOV_32_ONLY ROW6", "REG6" \n\t" \ 237 "movdqa %%xmm7, %%xmm5 \n\t" \ 238 "pmulhw "REG6", %%xmm7 \n\t" \ 239 "pmulhw "REG2", %%xmm5 \n\t" \ 240 "paddsw "REG2", %%xmm7 \n\t" \ 241 "psubsw "REG6", %%xmm5 \n\t" \ 242 MOV_32_ONLY ROW0", "REG0" \n\t" \ 243 MOV_32_ONLY ROW4", "REG4" \n\t" \ 244 MOV_32_ONLY" "TAN1", (%0) \n\t" \ 245 "movdqa "REG0", "XMMS" \n\t" \ 246 "psubsw "REG4", "REG0" \n\t" \ 247 "paddsw "XMMS", "REG4" \n\t" \ 248 "movdqa "REG4", "XMMS" \n\t" \ 249 "psubsw %%xmm7, "REG4" \n\t" \ 250 "paddsw "XMMS", %%xmm7 \n\t" \ 251 "movdqa "REG0", "XMMS" \n\t" \ 252 "psubsw %%xmm5, "REG0" \n\t" \ 253 "paddsw "XMMS", %%xmm5 \n\t" \ 254 "movdqa %%xmm5, "XMMS" \n\t" \ 255 "psubsw "TAN3", %%xmm5 \n\t" \ 256 "paddsw "XMMS", "TAN3" \n\t" \ 257 "movdqa "REG0", "XMMS" \n\t" \ 258 "psubsw %%xmm3, "REG0" \n\t" \ 259 "paddsw "XMMS", %%xmm3 \n\t" \ 260 MOV_32_ONLY" (%0), "TAN1" \n\t" \ 261 "psraw $6, %%xmm5 \n\t" \ 262 "psraw $6, "REG0" \n\t" \ 263 "psraw $6, "TAN3" \n\t" \ 264 "psraw $6, %%xmm3 \n\t" \ 265 "movdqa "TAN3", 1*16("dct") \n\t" \ 266 "movdqa %%xmm3, 2*16("dct") \n\t" \ 267 "movdqa "REG0", 5*16("dct") \n\t" \ 268 "movdqa %%xmm5, 6*16("dct") \n\t" \ 269 "movdqa %%xmm7, %%xmm0 \n\t" \ 270 "movdqa "REG4", %%xmm4 \n\t" \ 271 "psubsw %%xmm1, %%xmm7 \n\t" \ 272 "psubsw "TAN1", "REG4" \n\t" \ 273 "paddsw %%xmm0, %%xmm1 \n\t" \ 274 "paddsw %%xmm4, "TAN1" \n\t" \ 275 "psraw $6, %%xmm1 \n\t" \ 276 "psraw $6, %%xmm7 \n\t" \ 277 "psraw $6, "TAN1" \n\t" \ 278 "psraw $6, "REG4" \n\t" \ 279 "movdqa %%xmm1, ("dct") \n\t" \ 280 "movdqa "TAN1", 3*16("dct") \n\t" \ 281 "movdqa "REG4", 4*16("dct") \n\t" \ 282 "movdqa %%xmm7, 7*16("dct") \n\t" 285 #define iLLM_PASS_SPARSE(dct) \ 286 "pmulhw %%xmm4, "TAN3" \n\t" \ 287 "paddsw %%xmm4, "TAN3" \n\t" \ 288 "movdqa %%xmm6, %%xmm3 \n\t" \ 289 "pmulhw %%xmm6, "TAN1" \n\t" \ 290 "movdqa %%xmm4, %%xmm1 \n\t" \ 291 "psubsw %%xmm1, %%xmm3 \n\t" \ 292 "paddsw %%xmm6, %%xmm1 \n\t" \ 293 "movdqa "TAN1", %%xmm6 \n\t" \ 294 "psubsw "TAN3", "TAN1" \n\t" \ 295 "paddsw %%xmm6, "TAN3" \n\t" \ 296 "movdqa %%xmm3, %%xmm6 \n\t" \ 297 "psubsw "TAN3", %%xmm3 \n\t" \ 298 "paddsw %%xmm6, "TAN3" \n\t" \ 299 "movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \ 300 "pmulhw %%xmm4, %%xmm3 \n\t" \ 301 "pmulhw %%xmm4, "TAN3" \n\t" \ 302 "paddsw "TAN3", "TAN3" \n\t" \ 303 "paddsw %%xmm3, %%xmm3 \n\t" \ 304 "movdqa "MANGLE(tan2)", %%xmm5 \n\t" \ 305 MOV_32_ONLY ROW2", "SREG2" \n\t" \ 306 "pmulhw "SREG2", %%xmm5 \n\t" \ 307 MOV_32_ONLY ROW0", "REG0" \n\t" \ 308 "movdqa "REG0", %%xmm6 \n\t" \ 309 "psubsw "SREG2", %%xmm6 \n\t" \ 310 "paddsw "REG0", "SREG2" \n\t" \ 311 MOV_32_ONLY" "TAN1", (%0) \n\t" \ 312 "movdqa "REG0", "XMMS" \n\t" \ 313 "psubsw %%xmm5, "REG0" \n\t" \ 314 "paddsw "XMMS", %%xmm5 \n\t" \ 315 "movdqa %%xmm5, "XMMS" \n\t" \ 316 "psubsw "TAN3", %%xmm5 \n\t" \ 317 "paddsw "XMMS", "TAN3" \n\t" \ 318 "movdqa "REG0", "XMMS" \n\t" \ 319 "psubsw %%xmm3, "REG0" \n\t" \ 320 "paddsw "XMMS", %%xmm3 \n\t" \ 321 MOV_32_ONLY" (%0), "TAN1" \n\t" \ 322 "psraw $6, %%xmm5 \n\t" \ 323 "psraw $6, "REG0" \n\t" \ 324 "psraw $6, "TAN3" \n\t" \ 325 "psraw $6, %%xmm3 \n\t" \ 326 "movdqa "TAN3", 1*16("dct") \n\t" \ 327 "movdqa %%xmm3, 2*16("dct") \n\t" \ 328 "movdqa "REG0", 5*16("dct") \n\t" \ 329 "movdqa %%xmm5, 6*16("dct") \n\t" \ 330 "movdqa "SREG2", %%xmm0 \n\t" \ 331 "movdqa %%xmm6, %%xmm4 \n\t" \ 332 "psubsw %%xmm1, "SREG2" \n\t" \ 333 "psubsw "TAN1", %%xmm6 \n\t" \ 334 "paddsw %%xmm0, %%xmm1 \n\t" \ 335 "paddsw %%xmm4, "TAN1" \n\t" \ 336 "psraw $6, %%xmm1 \n\t" \ 337 "psraw $6, "SREG2" \n\t" \ 338 "psraw $6, "TAN1" \n\t" \ 339 "psraw $6, %%xmm6 \n\t" \ 340 "movdqa %%xmm1, ("dct") \n\t" \ 341 "movdqa "TAN1", 3*16("dct") \n\t" \ 342 "movdqa %%xmm6, 4*16("dct") \n\t" \ 343 "movdqa "SREG2", 7*16("dct") \n\t" 348 "movq "MANGLE (m127)
", %%mm0 \n\t" 349 iMTX_MULT(
"(%0)",
MANGLE(iTab1), ROUND(walkenIdctRounders), PUT_EVEN(ROW0))
350 iMTX_MULT(
"1*16(%0)",
MANGLE(iTab2), ROUND(walkenIdctRounders + 1 * 16), PUT_ODD(ROW1))
351 iMTX_MULT(
"2*16(%0)",
MANGLE(iTab3), ROUND(walkenIdctRounders + 2 * 16), PUT_EVEN(ROW2))
353 TEST_TWO_ROWS(
"3*16(%0)",
"4*16(%0)",
"%%eax",
"%%ecx", CLEAR_ODD(ROW3), CLEAR_EVEN(ROW4))
355 iMTX_MULT("3*16(%0)",
MANGLE(iTab4), ROUND(walkenIdctRounders + 3 * 16), PUT_ODD(ROW3))
357 TEST_TWO_ROWS("5*16(%0)", "6*16(%0)", "%%eax", "%%edx", CLEAR_ODD(ROW5), CLEAR_EVEN(ROW6))
358 TEST_ONE_ROW("7*16(%0)", "%%esi", CLEAR_ODD(ROW7))
365 iLLM_PASS_SPARSE("%0")
368 iMTX_MULT("4*16(%0)",
MANGLE(iTab1), "
#", PUT_EVEN(ROW4)) 370 iMTX_MULT(
"5*16(%0)",
MANGLE(iTab4), ROUND(walkenIdctRounders + 4 * 16), PUT_ODD(ROW5))
373 iMTX_MULT(
"6*16(%0)",
MANGLE(iTab3), ROUND(walkenIdctRounders + 5 * 16), PUT_EVEN(ROW6))
376 iMTX_MULT(
"7*16(%0)",
MANGLE(iTab2), ROUND(walkenIdctRounders + 5 * 16), PUT_ODD(ROW7))
385 "%xmm4",
"%xmm5",
"%xmm6",
"%xmm7", )
388 "%xmm12",
"%xmm13",
"%xmm14", )
390 "%eax",
"%ecx",
"%edx",
"%esi",
"memory");
memory handling functions
#define DECLARE_ASM_CONST(n, t, v)
void ff_xvid_idct_sse2_add(uint8_t *dest, int line_size, short *block)
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
common internal API header
void ff_xvid_idct_sse2_put(uint8_t *dest, int line_size, short *block)
#define XMM_CLOBBERS(...)
void ff_xvid_idct_sse2(short *block)
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
header for Xvid IDCT functions