60 13036, 13036, 13036, 13036,
61 27146, 27146, 27146, 27146,
62 -21746, -21746, -21746, -21746,
63 23170, 23170, 23170, 23170
138 16384, 16384, 16384, -16384,
139 21407, 8867, 8867, -21407,
140 16384, -16384, 16384, 16384,
141 -8867, 21407, -21407, -8867,
142 22725, 12873, 19266, -22725,
143 19266, 4520, -4520, -12873,
144 12873, 4520, 4520, 19266,
145 -22725, 19266, -12873, -22725,
147 22725, 22725, 22725, -22725,
148 29692, 12299, 12299, -29692,
149 22725, -22725, 22725, 22725,
150 -12299, 29692, -29692, -12299,
151 31521, 17855, 26722, -31521,
152 26722, 6270, -6270, -17855,
153 17855, 6270, 6270, 26722,
154 -31521, 26722, -17855, -31521,
156 21407, 21407, 21407, -21407,
157 27969, 11585, 11585, -27969,
158 21407, -21407, 21407, 21407,
159 -11585, 27969, -27969, -11585,
160 29692, 16819, 25172, -29692,
161 25172, 5906, -5906, -16819,
162 16819, 5906, 5906, 25172,
163 -29692, 25172, -16819, -29692,
165 19266, 19266, 19266, -19266,
166 25172, 10426, 10426, -25172,
167 19266, -19266, 19266, 19266,
168 -10426, 25172, -25172, -10426,
169 26722, 15137, 22654, -26722,
170 22654, 5315, -5315, -15137,
171 15137, 5315, 5315, 22654,
172 -26722, 22654, -15137, -26722,
180 16384, 21407, 16384, 8867,
181 16384, 8867, -16384, -21407,
182 16384, -8867, 16384, -21407,
183 -16384, 21407, 16384, -8867,
184 22725, 19266, 19266, -4520,
185 12873, 4520, -22725, -12873,
186 12873, -22725, 4520, -12873,
187 4520, 19266, 19266, -22725,
189 22725, 29692, 22725, 12299,
190 22725, 12299, -22725, -29692,
191 22725, -12299, 22725, -29692,
192 -22725, 29692, 22725, -12299,
193 31521, 26722, 26722, -6270,
194 17855, 6270, -31521, -17855,
195 17855, -31521, 6270, -17855,
196 6270, 26722, 26722, -31521,
198 21407, 27969, 21407, 11585,
199 21407, 11585, -21407, -27969,
200 21407, -11585, 21407, -27969,
201 -21407, 27969, 21407, -11585,
202 29692, 25172, 25172, -5906,
203 16819, 5906, -29692, -16819,
204 16819, -29692, 5906, -16819,
205 5906, 25172, 25172, -29692,
207 19266, 25172, 19266, 10426,
208 19266, 10426, -19266, -25172,
209 19266, -10426, 19266, -25172,
210 -19266, 25172, 19266, -10426,
211 26722, 22654, 22654, -5315,
212 15137, 5315, -26722, -15137,
213 15137, -26722, 5315, -15137,
214 5315, 22654, 22654, -26722,
224 #define DCT_8_INV_ROW_MMX(A1, A2, A3, A4) \ 225 "movq "#A1", %%mm0 \n\t" \ 226 "movq 8+"#A1", %%mm1 \n\t" \ 227 "movq %%mm0, %%mm2 \n\t" \ 228 "movq "#A3", %%mm3 \n\t" \ 229 "punpcklwd %%mm1, %%mm0 \n\t" \ 230 "movq %%mm0, %%mm5 \n\t" \ 231 "punpckldq %%mm0, %%mm0 \n\t" \ 232 "movq 8+"#A3", %%mm4 \n\t" \ 233 "punpckhwd %%mm1, %%mm2 \n\t" \ 234 "pmaddwd %%mm0, %%mm3 \n\t" \ 235 "movq %%mm2, %%mm6 \n\t" \ 236 "movq 32+"#A3", %%mm1 \n\t" \ 237 "punpckldq %%mm2, %%mm2 \n\t" \ 238 "pmaddwd %%mm2, %%mm4 \n\t" \ 239 "punpckhdq %%mm5, %%mm5 \n\t" \ 240 "pmaddwd 16+"#A3", %%mm0 \n\t" \ 241 "punpckhdq %%mm6, %%mm6 \n\t" \ 242 "movq 40+ "#A3", %%mm7 \n\t" \ 243 "pmaddwd %%mm5, %%mm1 \n\t" \ 244 "paddd "#A4", %%mm3 \n\t" \ 245 "pmaddwd %%mm6, %%mm7 \n\t" \ 246 "pmaddwd 24+"#A3", %%mm2 \n\t" \ 247 "paddd %%mm4, %%mm3 \n\t" \ 248 "pmaddwd 48+"#A3", %%mm5 \n\t" \ 249 "movq %%mm3, %%mm4 \n\t" \ 250 "pmaddwd 56+"#A3", %%mm6 \n\t" \ 251 "paddd %%mm7, %%mm1 \n\t" \ 252 "paddd "#A4", %%mm0 \n\t" \ 253 "psubd %%mm1, %%mm3 \n\t" \ 254 "psrad $11, %%mm3 \n\t" \ 255 "paddd %%mm4, %%mm1 \n\t" \ 256 "paddd %%mm2, %%mm0 \n\t" \ 257 "psrad $11, %%mm1 \n\t" \ 258 "paddd %%mm6, %%mm5 \n\t" \ 259 "movq %%mm0, %%mm4 \n\t" \ 260 "paddd %%mm5, %%mm0 \n\t" \ 261 "psubd %%mm5, %%mm4 \n\t" \ 262 "psrad $11, %%mm0 \n\t" \ 263 "psrad $11, %%mm4 \n\t" \ 264 "packssdw %%mm0, %%mm1 \n\t" \ 265 "packssdw %%mm3, %%mm4 \n\t" \ 266 "movq %%mm4, %%mm7 \n\t" \ 267 "psrld $16, %%mm4 \n\t" \ 268 "pslld $16, %%mm7 \n\t" \ 269 "movq %%mm1, "#A2" \n\t" \ 270 "por %%mm4, %%mm7 \n\t" \ 271 "movq %%mm7, 8+"#A2" \n\t" \ 278 #define DCT_8_INV_ROW_XMM(A1, A2, A3, A4) \ 279 "movq "#A1", %%mm0 \n\t" \ 280 "movq 8+"#A1", %%mm1 \n\t" \ 281 "movq %%mm0, %%mm2 \n\t" \ 282 "movq "#A3", %%mm3 \n\t" \ 283 "pshufw $0x88, %%mm0, %%mm0 \n\t" \ 284 "movq 8+"#A3", %%mm4 \n\t" \ 285 "movq %%mm1, %%mm5 \n\t" \ 286 "pmaddwd %%mm0, %%mm3 \n\t" \ 287 "movq 32+"#A3", %%mm6 \n\t" \ 288 "pshufw $0x88, %%mm1, %%mm1 \n\t" \ 289 "pmaddwd %%mm1, %%mm4 \n\t" \ 290 "movq 40+"#A3", %%mm7 \n\t" \ 291 "pshufw $0xdd, %%mm2, %%mm2 \n\t" \ 292 "pmaddwd %%mm2, %%mm6 \n\t" \ 293 "pshufw $0xdd, %%mm5, %%mm5 \n\t" \ 294 "pmaddwd %%mm5, %%mm7 \n\t" \ 295 "paddd "#A4", %%mm3 \n\t" \ 296 "pmaddwd 16+"#A3", %%mm0 \n\t" \ 297 "paddd %%mm4, %%mm3 \n\t" \ 298 "pmaddwd 24+"#A3", %%mm1 \n\t" \ 299 "movq %%mm3, %%mm4 \n\t" \ 300 "pmaddwd 48+"#A3", %%mm2 \n\t" \ 301 "paddd %%mm7, %%mm6 \n\t" \ 302 "pmaddwd 56+"#A3", %%mm5 \n\t" \ 303 "paddd %%mm6, %%mm3 \n\t" \ 304 "paddd "#A4", %%mm0 \n\t" \ 305 "psrad $11, %%mm3 \n\t" \ 306 "paddd %%mm1, %%mm0 \n\t" \ 307 "psubd %%mm6, %%mm4 \n\t" \ 308 "movq %%mm0, %%mm7 \n\t" \ 309 "paddd %%mm5, %%mm2 \n\t" \ 310 "paddd %%mm2, %%mm0 \n\t" \ 311 "psrad $11, %%mm4 \n\t" \ 312 "psubd %%mm2, %%mm7 \n\t" \ 313 "psrad $11, %%mm0 \n\t" \ 314 "psrad $11, %%mm7 \n\t" \ 315 "packssdw %%mm0, %%mm3 \n\t" \ 316 "packssdw %%mm4, %%mm7 \n\t" \ 317 "movq %%mm3, "#A2" \n\t" \ 318 "pshufw $0xb1, %%mm7, %%mm7 \n\t" \ 319 "movq %%mm7, 8+"#A2" \n\t" \ 385 #define DCT_8_INV_COL(A1, A2) \ 386 "movq 2*8(%3), %%mm0 \n\t" \ 387 "movq 16*3+"#A1", %%mm3 \n\t" \ 388 "movq %%mm0, %%mm1 \n\t" \ 389 "movq 16*5+"#A1", %%mm5 \n\t" \ 390 "pmulhw %%mm3, %%mm0 \n\t" \ 391 "movq (%3), %%mm4 \n\t" \ 392 "pmulhw %%mm5, %%mm1 \n\t" \ 393 "movq 16*7+"#A1", %%mm7 \n\t" \ 394 "movq %%mm4, %%mm2 \n\t" \ 395 "movq 16*1+"#A1", %%mm6 \n\t" \ 396 "pmulhw %%mm7, %%mm4 \n\t" \ 397 "paddsw %%mm3, %%mm0 \n\t" \ 398 "pmulhw %%mm6, %%mm2 \n\t" \ 399 "paddsw %%mm3, %%mm1 \n\t" \ 400 "psubsw %%mm5, %%mm0 \n\t" \ 401 "movq 3*8(%3), %%mm3 \n\t" \ 402 "paddsw %%mm5, %%mm1 \n\t" \ 403 "paddsw %%mm6, %%mm4 \n\t" \ 404 "psubsw %%mm7, %%mm2 \n\t" \ 405 "movq %%mm4, %%mm5 \n\t" \ 406 "movq %%mm2, %%mm6 \n\t" \ 407 "paddsw %%mm1, %%mm5 \n\t" \ 408 "psubsw %%mm0, %%mm6 \n\t" \ 409 "psubsw %%mm1, %%mm4 \n\t" \ 410 "paddsw %%mm0, %%mm2 \n\t" \ 411 "movq 1*8(%3), %%mm7 \n\t" \ 412 "movq %%mm4, %%mm1 \n\t" \ 413 "movq %%mm5, 3*16+"#A2" \n\t" \ 414 "paddsw %%mm2, %%mm1 \n\t" \ 415 "movq %%mm6, 5*16+"#A2" \n\t" \ 416 "psubsw %%mm2, %%mm4 \n\t" \ 417 "movq 2*16+"#A1", %%mm5 \n\t" \ 418 "movq %%mm7, %%mm0 \n\t" \ 419 "movq 6*16+"#A1", %%mm6 \n\t" \ 420 "pmulhw %%mm5, %%mm0 \n\t" \ 421 "pmulhw %%mm6, %%mm7 \n\t" \ 422 "pmulhw %%mm3, %%mm1 \n\t" \ 423 "movq 0*16+"#A1", %%mm2 \n\t" \ 424 "pmulhw %%mm3, %%mm4 \n\t" \ 425 "psubsw %%mm6, %%mm0 \n\t" \ 426 "movq %%mm2, %%mm3 \n\t" \ 427 "movq 4*16+"#A1", %%mm6 \n\t" \ 428 "paddsw %%mm5, %%mm7 \n\t" \ 429 "paddsw %%mm6, %%mm2 \n\t" \ 430 "psubsw %%mm6, %%mm3 \n\t" \ 431 "movq %%mm2, %%mm5 \n\t" \ 432 "movq %%mm3, %%mm6 \n\t" \ 433 "psubsw %%mm7, %%mm2 \n\t" \ 434 "paddsw %%mm0, %%mm3 \n\t" \ 435 "paddsw %%mm1, %%mm1 \n\t" \ 436 "paddsw %%mm4, %%mm4 \n\t" \ 437 "paddsw %%mm7, %%mm5 \n\t" \ 438 "psubsw %%mm0, %%mm6 \n\t" \ 439 "movq %%mm3, %%mm7 \n\t" \ 440 "movq %%mm6, %%mm0 \n\t" \ 441 "paddsw %%mm1, %%mm3 \n\t" \ 442 "paddsw %%mm4, %%mm6 \n\t" \ 443 "psraw $6, %%mm3 \n\t" \ 444 "psubsw %%mm1, %%mm7 \n\t" \ 445 "psraw $6, %%mm6 \n\t" \ 446 "psubsw %%mm4, %%mm0 \n\t" \ 447 "movq 3*16+"#A2", %%mm1 \n\t" \ 448 "psraw $6, %%mm7 \n\t" \ 449 "movq %%mm5, %%mm4 \n\t" \ 450 "psraw $6, %%mm0 \n\t" \ 451 "movq %%mm3, 1*16+"#A2" \n\t" \ 452 "paddsw %%mm1, %%mm5 \n\t" \ 453 "movq %%mm6, 2*16+"#A2" \n\t" \ 454 "psubsw %%mm1, %%mm4 \n\t" \ 455 "movq 5*16+"#A2", %%mm3 \n\t" \ 456 "psraw $6, %%mm5 \n\t" \ 457 "movq %%mm2, %%mm6 \n\t" \ 458 "psraw $6, %%mm4 \n\t" \ 459 "movq %%mm0, 5*16+"#A2" \n\t" \ 460 "paddsw %%mm3, %%mm2 \n\t" \ 461 "movq %%mm7, 6*16+"#A2" \n\t" \ 462 "psubsw %%mm3, %%mm6 \n\t" \ 463 "movq %%mm5, 0*16+"#A2" \n\t" \ 464 "psraw $6, %%mm2 \n\t" \ 465 "movq %%mm4, 7*16+"#A2" \n\t" \ 466 "psraw $6, %%mm6 \n\t" \ 467 "movq %%mm2, 3*16+"#A2" \n\t" \ 468 "movq %%mm6, 4*16+"#A2" \n\t" \ 482 DCT_8_INV_ROW_MMX(0 * 16(%0), 0 * 16(%0), 64 * 0(%2), 8 * 0(%1))
483 DCT_8_INV_ROW_MMX(1 * 16(%0), 1 * 16(%0), 64 * 1(%2), 8 * 1(%1))
484 DCT_8_INV_ROW_MMX(2 * 16(%0), 2 * 16(%0), 64 * 2(%2), 8 * 2(%1))
485 DCT_8_INV_ROW_MMX(3 * 16(%0), 3 * 16(%0), 64 * 3(%2), 8 * 3(%1))
486 DCT_8_INV_ROW_MMX(4 * 16(%0), 4 * 16(%0), 64 * 0(%2), 8 * 4(%1))
487 DCT_8_INV_ROW_MMX(5 * 16(%0), 5 * 16(%0), 64 * 3(%2), 8 * 5(%1))
488 DCT_8_INV_ROW_MMX(6 * 16(%0), 6 * 16(%0), 64 * 2(%2), 8 * 6(%1))
489 DCT_8_INV_ROW_MMX(7 * 16(%0), 7 * 16(%0), 64 * 1(%2), 8 * 7(%1))
492 DCT_8_INV_COL(0(%0), 0(%0))
493 DCT_8_INV_COL(8(%0), 8(%0))
494 :: "
r" (block), "
r" (rounder_0), "
r" (tab_i_04_mmx), "
r" (tg_1_16));
511 #if HAVE_MMXEXT_INLINE 521 DCT_8_INV_ROW_XMM(0 * 16(%0), 0 * 16(%0), 64 * 0(%2), 8 * 0(%1))
522 DCT_8_INV_ROW_XMM(1 * 16(%0), 1 * 16(%0), 64 * 1(%2), 8 * 1(%1))
523 DCT_8_INV_ROW_XMM(2 * 16(%0), 2 * 16(%0), 64 * 2(%2), 8 * 2(%1))
524 DCT_8_INV_ROW_XMM(3 * 16(%0), 3 * 16(%0), 64 * 3(%2), 8 * 3(%1))
525 DCT_8_INV_ROW_XMM(4 * 16(%0), 4 * 16(%0), 64 * 0(%2), 8 * 4(%1))
526 DCT_8_INV_ROW_XMM(5 * 16(%0), 5 * 16(%0), 64 * 3(%2), 8 * 5(%1))
527 DCT_8_INV_ROW_XMM(6 * 16(%0), 6 * 16(%0), 64 * 2(%2), 8 * 6(%1))
528 DCT_8_INV_ROW_XMM(7 * 16(%0), 7 * 16(%0), 64 * 1(%2), 8 * 7(%1))
531 DCT_8_INV_COL(0(%0), 0(%0))
532 DCT_8_INV_COL(8(%0), 8(%0))
533 :: "
r" (block), "
r" (rounder_0), "
r" (tab_i_04_xmm), "
r" (tg_1_16));
void ff_xvid_idct_mmx_put(uint8_t *dest, int line_size, int16_t *block)
memory handling functions
void ff_xvid_idct_mmxext_add(uint8_t *dest, int line_size, int16_t *block)
#define DECLARE_ALIGNED(n, t, v)
void ff_xvid_idct_mmxext(short *block)
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
void ff_xvid_idct_mmxext_put(uint8_t *dest, int line_size, int16_t *block)
void ff_xvid_idct_mmx(short *block)
Libavcodec external API header.
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
header for Xvid IDCT functions
void ff_xvid_idct_mmx_add(uint8_t *dest, int line_size, int16_t *block)