44 #define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 45 #define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 46 #define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 47 #define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 48 #define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5 49 #define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 50 #define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 51 #define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 54 #define COL_SHIFT 20 // 6 90 int16_t *
const temp= (int16_t*)align_tmp;
93 #define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ 94 "movq " #src0 ", %%mm0 \n\t" \ 95 "movq " #src4 ", %%mm1 \n\t" \ 96 "movq " #src1 ", %%mm2 \n\t" \ 97 "movq " #src5 ", %%mm3 \n\t" \ 98 "movq "MANGLE(wm1010)", %%mm4 \n\t"\ 99 "pand %%mm0, %%mm4 \n\t"\ 100 "por %%mm1, %%mm4 \n\t"\ 101 "por %%mm2, %%mm4 \n\t"\ 102 "por %%mm3, %%mm4 \n\t"\ 103 "packssdw %%mm4,%%mm4 \n\t"\ 104 "movd %%mm4, %%eax \n\t"\ 105 "orl %%eax, %%eax \n\t"\ 107 "movq 16(%2), %%mm4 \n\t" \ 108 "pmaddwd %%mm0, %%mm4 \n\t" \ 109 "movq 24(%2), %%mm5 \n\t" \ 110 "pmaddwd %%mm5, %%mm0 \n\t" \ 111 "movq 32(%2), %%mm5 \n\t" \ 112 "pmaddwd %%mm1, %%mm5 \n\t" \ 113 "movq 40(%2), %%mm6 \n\t" \ 114 "pmaddwd %%mm6, %%mm1 \n\t" \ 115 "movq 48(%2), %%mm7 \n\t" \ 116 "pmaddwd %%mm2, %%mm7 \n\t" \ 117 #rounder ", %%mm4 \n\t"\ 118 "movq %%mm4, %%mm6 \n\t" \ 119 "paddd %%mm5, %%mm4 \n\t" \ 120 "psubd %%mm5, %%mm6 \n\t" \ 121 "movq 56(%2), %%mm5 \n\t" \ 122 "pmaddwd %%mm3, %%mm5 \n\t" \ 123 #rounder ", %%mm0 \n\t"\ 124 "paddd %%mm0, %%mm1 \n\t" \ 125 "paddd %%mm0, %%mm0 \n\t" \ 126 "psubd %%mm1, %%mm0 \n\t" \ 127 "pmaddwd 64(%2), %%mm2 \n\t" \ 128 "paddd %%mm5, %%mm7 \n\t" \ 129 "movq 72(%2), %%mm5 \n\t" \ 130 "pmaddwd %%mm3, %%mm5 \n\t" \ 131 "paddd %%mm4, %%mm7 \n\t" \ 132 "paddd %%mm4, %%mm4 \n\t" \ 133 "psubd %%mm7, %%mm4 \n\t" \ 134 "paddd %%mm2, %%mm5 \n\t" \ 135 "psrad $" #shift ", %%mm7 \n\t"\ 136 "psrad $" #shift ", %%mm4 \n\t"\ 137 "movq %%mm1, %%mm2 \n\t" \ 138 "paddd %%mm5, %%mm1 \n\t" \ 139 "psubd %%mm5, %%mm2 \n\t" \ 140 "psrad $" #shift ", %%mm1 \n\t"\ 141 "psrad $" #shift ", %%mm2 \n\t"\ 142 "packssdw %%mm1, %%mm7 \n\t" \ 143 "packssdw %%mm4, %%mm2 \n\t" \ 144 "movq %%mm7, " #dst " \n\t"\ 145 "movq " #src1 ", %%mm1 \n\t" \ 146 "movq 80(%2), %%mm4 \n\t" \ 147 "movq %%mm2, 24+" #dst " \n\t"\ 148 "pmaddwd %%mm1, %%mm4 \n\t" \ 149 "movq 88(%2), %%mm7 \n\t" \ 150 "pmaddwd 96(%2), %%mm1 \n\t" \ 151 "pmaddwd %%mm3, %%mm7 \n\t" \ 152 "movq %%mm0, %%mm2 \n\t" \ 153 "pmaddwd 104(%2), %%mm3 \n\t" \ 154 "paddd %%mm7, %%mm4 \n\t" \ 155 "paddd %%mm4, %%mm2 \n\t" \ 156 "psubd %%mm4, %%mm0 \n\t" \ 157 "psrad $" #shift ", %%mm2 \n\t"\ 158 "psrad $" #shift ", %%mm0 \n\t"\ 159 "movq %%mm6, %%mm4 \n\t" \ 160 "paddd %%mm1, %%mm3 \n\t" \ 161 "paddd %%mm3, %%mm6 \n\t" \ 162 "psubd %%mm3, %%mm4 \n\t" \ 163 "psrad $" #shift ", %%mm6 \n\t"\ 164 "packssdw %%mm6, %%mm2 \n\t" \ 165 "movq %%mm2, 8+" #dst " \n\t"\ 166 "psrad $" #shift ", %%mm4 \n\t"\ 167 "packssdw %%mm0, %%mm4 \n\t" \ 168 "movq %%mm4, 16+" #dst " \n\t"\ 171 "pslld $16, %%mm0 \n\t"\ 172 "paddd "MANGLE(d40000)", %%mm0 \n\t"\ 173 "psrad $13, %%mm0 \n\t"\ 174 "packssdw %%mm0, %%mm0 \n\t"\ 175 "movq %%mm0, " #dst " \n\t"\ 176 "movq %%mm0, 8+" #dst " \n\t"\ 177 "movq %%mm0, 16+" #dst " \n\t"\ 178 "movq %%mm0, 24+" #dst " \n\t"\ 181 #define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \ 182 "movq " #src0 ", %%mm0 \n\t" \ 183 "movq " #src4 ", %%mm1 \n\t" \ 184 "movq " #src1 ", %%mm2 \n\t" \ 185 "movq " #src5 ", %%mm3 \n\t" \ 186 "movq %%mm0, %%mm4 \n\t"\ 187 "por %%mm1, %%mm4 \n\t"\ 188 "por %%mm2, %%mm4 \n\t"\ 189 "por %%mm3, %%mm4 \n\t"\ 190 "packssdw %%mm4,%%mm4 \n\t"\ 191 "movd %%mm4, %%eax \n\t"\ 192 "orl %%eax, %%eax \n\t"\ 194 "movq 16(%2), %%mm4 \n\t" \ 195 "pmaddwd %%mm0, %%mm4 \n\t" \ 196 "movq 24(%2), %%mm5 \n\t" \ 197 "pmaddwd %%mm5, %%mm0 \n\t" \ 198 "movq 32(%2), %%mm5 \n\t" \ 199 "pmaddwd %%mm1, %%mm5 \n\t" \ 200 "movq 40(%2), %%mm6 \n\t" \ 201 "pmaddwd %%mm6, %%mm1 \n\t" \ 202 "movq 48(%2), %%mm7 \n\t" \ 203 "pmaddwd %%mm2, %%mm7 \n\t" \ 204 #rounder ", %%mm4 \n\t"\ 205 "movq %%mm4, %%mm6 \n\t" \ 206 "paddd %%mm5, %%mm4 \n\t" \ 207 "psubd %%mm5, %%mm6 \n\t" \ 208 "movq 56(%2), %%mm5 \n\t" \ 209 "pmaddwd %%mm3, %%mm5 \n\t" \ 210 #rounder ", %%mm0 \n\t"\ 211 "paddd %%mm0, %%mm1 \n\t" \ 212 "paddd %%mm0, %%mm0 \n\t" \ 213 "psubd %%mm1, %%mm0 \n\t" \ 214 "pmaddwd 64(%2), %%mm2 \n\t" \ 215 "paddd %%mm5, %%mm7 \n\t" \ 216 "movq 72(%2), %%mm5 \n\t" \ 217 "pmaddwd %%mm3, %%mm5 \n\t" \ 218 "paddd %%mm4, %%mm7 \n\t" \ 219 "paddd %%mm4, %%mm4 \n\t" \ 220 "psubd %%mm7, %%mm4 \n\t" \ 221 "paddd %%mm2, %%mm5 \n\t" \ 222 "psrad $" #shift ", %%mm7 \n\t"\ 223 "psrad $" #shift ", %%mm4 \n\t"\ 224 "movq %%mm1, %%mm2 \n\t" \ 225 "paddd %%mm5, %%mm1 \n\t" \ 226 "psubd %%mm5, %%mm2 \n\t" \ 227 "psrad $" #shift ", %%mm1 \n\t"\ 228 "psrad $" #shift ", %%mm2 \n\t"\ 229 "packssdw %%mm1, %%mm7 \n\t" \ 230 "packssdw %%mm4, %%mm2 \n\t" \ 231 "movq %%mm7, " #dst " \n\t"\ 232 "movq " #src1 ", %%mm1 \n\t" \ 233 "movq 80(%2), %%mm4 \n\t" \ 234 "movq %%mm2, 24+" #dst " \n\t"\ 235 "pmaddwd %%mm1, %%mm4 \n\t" \ 236 "movq 88(%2), %%mm7 \n\t" \ 237 "pmaddwd 96(%2), %%mm1 \n\t" \ 238 "pmaddwd %%mm3, %%mm7 \n\t" \ 239 "movq %%mm0, %%mm2 \n\t" \ 240 "pmaddwd 104(%2), %%mm3 \n\t" \ 241 "paddd %%mm7, %%mm4 \n\t" \ 242 "paddd %%mm4, %%mm2 \n\t" \ 243 "psubd %%mm4, %%mm0 \n\t" \ 244 "psrad $" #shift ", %%mm2 \n\t"\ 245 "psrad $" #shift ", %%mm0 \n\t"\ 246 "movq %%mm6, %%mm4 \n\t" \ 247 "paddd %%mm1, %%mm3 \n\t" \ 248 "paddd %%mm3, %%mm6 \n\t" \ 249 "psubd %%mm3, %%mm4 \n\t" \ 250 "psrad $" #shift ", %%mm6 \n\t"\ 251 "packssdw %%mm6, %%mm2 \n\t" \ 252 "movq %%mm2, 8+" #dst " \n\t"\ 253 "psrad $" #shift ", %%mm4 \n\t"\ 254 "packssdw %%mm0, %%mm4 \n\t" \ 255 "movq %%mm4, 16+" #dst " \n\t"\ 257 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ 258 "movq " #src0 ", %%mm0 \n\t" \ 259 "movq " #src4 ", %%mm1 \n\t" \ 260 "movq " #src1 ", %%mm2 \n\t" \ 261 "movq " #src5 ", %%mm3 \n\t" \ 262 "movq 16(%2), %%mm4 \n\t" \ 263 "pmaddwd %%mm0, %%mm4 \n\t" \ 264 "movq 24(%2), %%mm5 \n\t" \ 265 "pmaddwd %%mm5, %%mm0 \n\t" \ 266 "movq 32(%2), %%mm5 \n\t" \ 267 "pmaddwd %%mm1, %%mm5 \n\t" \ 268 "movq 40(%2), %%mm6 \n\t" \ 269 "pmaddwd %%mm6, %%mm1 \n\t" \ 270 "movq 48(%2), %%mm7 \n\t" \ 271 "pmaddwd %%mm2, %%mm7 \n\t" \ 272 #rounder ", %%mm4 \n\t"\ 273 "movq %%mm4, %%mm6 \n\t" \ 274 "paddd %%mm5, %%mm4 \n\t" \ 275 "psubd %%mm5, %%mm6 \n\t" \ 276 "movq 56(%2), %%mm5 \n\t" \ 277 "pmaddwd %%mm3, %%mm5 \n\t" \ 278 #rounder ", %%mm0 \n\t"\ 279 "paddd %%mm0, %%mm1 \n\t" \ 280 "paddd %%mm0, %%mm0 \n\t" \ 281 "psubd %%mm1, %%mm0 \n\t" \ 282 "pmaddwd 64(%2), %%mm2 \n\t" \ 283 "paddd %%mm5, %%mm7 \n\t" \ 284 "movq 72(%2), %%mm5 \n\t" \ 285 "pmaddwd %%mm3, %%mm5 \n\t" \ 286 "paddd %%mm4, %%mm7 \n\t" \ 287 "paddd %%mm4, %%mm4 \n\t" \ 288 "psubd %%mm7, %%mm4 \n\t" \ 289 "paddd %%mm2, %%mm5 \n\t" \ 290 "psrad $" #shift ", %%mm7 \n\t"\ 291 "psrad $" #shift ", %%mm4 \n\t"\ 292 "movq %%mm1, %%mm2 \n\t" \ 293 "paddd %%mm5, %%mm1 \n\t" \ 294 "psubd %%mm5, %%mm2 \n\t" \ 295 "psrad $" #shift ", %%mm1 \n\t"\ 296 "psrad $" #shift ", %%mm2 \n\t"\ 297 "packssdw %%mm1, %%mm7 \n\t" \ 298 "packssdw %%mm4, %%mm2 \n\t" \ 299 "movq %%mm7, " #dst " \n\t"\ 300 "movq " #src1 ", %%mm1 \n\t" \ 301 "movq 80(%2), %%mm4 \n\t" \ 302 "movq %%mm2, 24+" #dst " \n\t"\ 303 "pmaddwd %%mm1, %%mm4 \n\t" \ 304 "movq 88(%2), %%mm7 \n\t" \ 305 "pmaddwd 96(%2), %%mm1 \n\t" \ 306 "pmaddwd %%mm3, %%mm7 \n\t" \ 307 "movq %%mm0, %%mm2 \n\t" \ 308 "pmaddwd 104(%2), %%mm3 \n\t" \ 309 "paddd %%mm7, %%mm4 \n\t" \ 310 "paddd %%mm4, %%mm2 \n\t" \ 311 "psubd %%mm4, %%mm0 \n\t" \ 312 "psrad $" #shift ", %%mm2 \n\t"\ 313 "psrad $" #shift ", %%mm0 \n\t"\ 314 "movq %%mm6, %%mm4 \n\t" \ 315 "paddd %%mm1, %%mm3 \n\t" \ 316 "paddd %%mm3, %%mm6 \n\t" \ 317 "psubd %%mm3, %%mm4 \n\t" \ 318 "psrad $" #shift ", %%mm6 \n\t"\ 319 "packssdw %%mm6, %%mm2 \n\t" \ 320 "movq %%mm2, 8+" #dst " \n\t"\ 321 "psrad $" #shift ", %%mm4 \n\t"\ 322 "packssdw %%mm0, %%mm4 \n\t" \ 323 "movq %%mm4, 16+" #dst " \n\t"\ 326 DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
327 Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
328 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
329 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
332 #define IDCT(src0, src4, src1, src5, dst, shift) \ 333 "movq " #src0 ", %%mm0 \n\t" \ 334 "movq " #src4 ", %%mm1 \n\t" \ 335 "movq " #src1 ", %%mm2 \n\t" \ 336 "movq " #src5 ", %%mm3 \n\t" \ 337 "movq 16(%2), %%mm4 \n\t" \ 338 "pmaddwd %%mm0, %%mm4 \n\t" \ 339 "movq 24(%2), %%mm5 \n\t" \ 340 "pmaddwd %%mm5, %%mm0 \n\t" \ 341 "movq 32(%2), %%mm5 \n\t" \ 342 "pmaddwd %%mm1, %%mm5 \n\t" \ 343 "movq 40(%2), %%mm6 \n\t" \ 344 "pmaddwd %%mm6, %%mm1 \n\t" \ 345 "movq %%mm4, %%mm6 \n\t" \ 346 "movq 48(%2), %%mm7 \n\t" \ 347 "pmaddwd %%mm2, %%mm7 \n\t" \ 348 "paddd %%mm5, %%mm4 \n\t" \ 349 "psubd %%mm5, %%mm6 \n\t" \ 350 "movq %%mm0, %%mm5 \n\t" \ 351 "paddd %%mm1, %%mm0 \n\t" \ 352 "psubd %%mm1, %%mm5 \n\t" \ 353 "movq 56(%2), %%mm1 \n\t" \ 354 "pmaddwd %%mm3, %%mm1 \n\t" \ 355 "pmaddwd 64(%2), %%mm2 \n\t" \ 356 "paddd %%mm1, %%mm7 \n\t" \ 357 "movq 72(%2), %%mm1 \n\t" \ 358 "pmaddwd %%mm3, %%mm1 \n\t" \ 359 "paddd %%mm4, %%mm7 \n\t" \ 360 "paddd %%mm4, %%mm4 \n\t" \ 361 "psubd %%mm7, %%mm4 \n\t" \ 362 "paddd %%mm2, %%mm1 \n\t" \ 363 "psrad $" #shift ", %%mm7 \n\t"\ 364 "psrad $" #shift ", %%mm4 \n\t"\ 365 "movq %%mm0, %%mm2 \n\t" \ 366 "paddd %%mm1, %%mm0 \n\t" \ 367 "psubd %%mm1, %%mm2 \n\t" \ 368 "psrad $" #shift ", %%mm0 \n\t"\ 369 "psrad $" #shift ", %%mm2 \n\t"\ 370 "packssdw %%mm7, %%mm7 \n\t" \ 371 "movd %%mm7, " #dst " \n\t"\ 372 "packssdw %%mm0, %%mm0 \n\t" \ 373 "movd %%mm0, 16+" #dst " \n\t"\ 374 "packssdw %%mm2, %%mm2 \n\t" \ 375 "movd %%mm2, 96+" #dst " \n\t"\ 376 "packssdw %%mm4, %%mm4 \n\t" \ 377 "movd %%mm4, 112+" #dst " \n\t"\ 378 "movq " #src1 ", %%mm0 \n\t" \ 379 "movq 80(%2), %%mm4 \n\t" \ 380 "pmaddwd %%mm0, %%mm4 \n\t" \ 381 "movq 88(%2), %%mm7 \n\t" \ 382 "pmaddwd 96(%2), %%mm0 \n\t" \ 383 "pmaddwd %%mm3, %%mm7 \n\t" \ 384 "movq %%mm5, %%mm2 \n\t" \ 385 "pmaddwd 104(%2), %%mm3 \n\t" \ 386 "paddd %%mm7, %%mm4 \n\t" \ 387 "paddd %%mm4, %%mm2 \n\t" \ 388 "psubd %%mm4, %%mm5 \n\t" \ 389 "psrad $" #shift ", %%mm2 \n\t"\ 390 "psrad $" #shift ", %%mm5 \n\t"\ 391 "movq %%mm6, %%mm4 \n\t" \ 392 "paddd %%mm0, %%mm3 \n\t" \ 393 "paddd %%mm3, %%mm6 \n\t" \ 394 "psubd %%mm3, %%mm4 \n\t" \ 395 "psrad $" #shift ", %%mm6 \n\t"\ 396 "psrad $" #shift ", %%mm4 \n\t"\ 397 "packssdw %%mm2, %%mm2 \n\t" \ 398 "packssdw %%mm6, %%mm6 \n\t" \ 399 "movd %%mm2, 32+" #dst " \n\t"\ 400 "packssdw %%mm4, %%mm4 \n\t" \ 401 "packssdw %%mm5, %%mm5 \n\t" \ 402 "movd %%mm6, 48+" #dst " \n\t"\ 403 "movd %%mm4, 64+" #dst " \n\t"\ 404 "movd %%mm5, 80+" #dst " \n\t" 408 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
409 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
410 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
411 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
416 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
417 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
420 #define IDCT(src0, src4, src1, src5, dst, shift) \ 421 "movq " #src0 ", %%mm0 \n\t" \ 422 "movq " #src4 ", %%mm1 \n\t" \ 423 "movq " #src5 ", %%mm3 \n\t" \ 424 "movq 16(%2), %%mm4 \n\t" \ 425 "pmaddwd %%mm0, %%mm4 \n\t" \ 426 "movq 24(%2), %%mm5 \n\t" \ 427 "pmaddwd %%mm5, %%mm0 \n\t" \ 428 "movq 32(%2), %%mm5 \n\t" \ 429 "pmaddwd %%mm1, %%mm5 \n\t" \ 430 "movq 40(%2), %%mm6 \n\t" \ 431 "pmaddwd %%mm6, %%mm1 \n\t" \ 432 "movq %%mm4, %%mm6 \n\t" \ 433 "paddd %%mm5, %%mm4 \n\t" \ 434 "psubd %%mm5, %%mm6 \n\t" \ 435 "movq %%mm0, %%mm5 \n\t" \ 436 "paddd %%mm1, %%mm0 \n\t" \ 437 "psubd %%mm1, %%mm5 \n\t" \ 438 "movq 56(%2), %%mm1 \n\t" \ 439 "pmaddwd %%mm3, %%mm1 \n\t" \ 440 "movq 72(%2), %%mm7 \n\t" \ 441 "pmaddwd %%mm3, %%mm7 \n\t" \ 442 "paddd %%mm4, %%mm1 \n\t" \ 443 "paddd %%mm4, %%mm4 \n\t" \ 444 "psubd %%mm1, %%mm4 \n\t" \ 445 "psrad $" #shift ", %%mm1 \n\t"\ 446 "psrad $" #shift ", %%mm4 \n\t"\ 447 "movq %%mm0, %%mm2 \n\t" \ 448 "paddd %%mm7, %%mm0 \n\t" \ 449 "psubd %%mm7, %%mm2 \n\t" \ 450 "psrad $" #shift ", %%mm0 \n\t"\ 451 "psrad $" #shift ", %%mm2 \n\t"\ 452 "packssdw %%mm1, %%mm1 \n\t" \ 453 "movd %%mm1, " #dst " \n\t"\ 454 "packssdw %%mm0, %%mm0 \n\t" \ 455 "movd %%mm0, 16+" #dst " \n\t"\ 456 "packssdw %%mm2, %%mm2 \n\t" \ 457 "movd %%mm2, 96+" #dst " \n\t"\ 458 "packssdw %%mm4, %%mm4 \n\t" \ 459 "movd %%mm4, 112+" #dst " \n\t"\ 460 "movq 88(%2), %%mm1 \n\t" \ 461 "pmaddwd %%mm3, %%mm1 \n\t" \ 462 "movq %%mm5, %%mm2 \n\t" \ 463 "pmaddwd 104(%2), %%mm3 \n\t" \ 464 "paddd %%mm1, %%mm2 \n\t" \ 465 "psubd %%mm1, %%mm5 \n\t" \ 466 "psrad $" #shift ", %%mm2 \n\t"\ 467 "psrad $" #shift ", %%mm5 \n\t"\ 468 "movq %%mm6, %%mm1 \n\t" \ 469 "paddd %%mm3, %%mm6 \n\t" \ 470 "psubd %%mm3, %%mm1 \n\t" \ 471 "psrad $" #shift ", %%mm6 \n\t"\ 472 "psrad $" #shift ", %%mm1 \n\t"\ 473 "packssdw %%mm2, %%mm2 \n\t" \ 474 "packssdw %%mm6, %%mm6 \n\t" \ 475 "movd %%mm2, 32+" #dst " \n\t"\ 476 "packssdw %%mm1, %%mm1 \n\t" \ 477 "packssdw %%mm5, %%mm5 \n\t" \ 478 "movd %%mm6, 48+" #dst " \n\t"\ 479 "movd %%mm1, 64+" #dst " \n\t"\ 480 "movd %%mm5, 80+" #dst " \n\t" 483 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
484 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
485 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
486 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
491 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
494 #define IDCT(src0, src4, src1, src5, dst, shift) \ 495 "movq " #src0 ", %%mm0 \n\t" \ 496 "movq " #src5 ", %%mm3 \n\t" \ 497 "movq 16(%2), %%mm4 \n\t" \ 498 "pmaddwd %%mm0, %%mm4 \n\t" \ 499 "movq 24(%2), %%mm5 \n\t" \ 500 "pmaddwd %%mm5, %%mm0 \n\t" \ 501 "movq %%mm4, %%mm6 \n\t" \ 502 "movq %%mm0, %%mm5 \n\t" \ 503 "movq 56(%2), %%mm1 \n\t" \ 504 "pmaddwd %%mm3, %%mm1 \n\t" \ 505 "movq 72(%2), %%mm7 \n\t" \ 506 "pmaddwd %%mm3, %%mm7 \n\t" \ 507 "paddd %%mm4, %%mm1 \n\t" \ 508 "paddd %%mm4, %%mm4 \n\t" \ 509 "psubd %%mm1, %%mm4 \n\t" \ 510 "psrad $" #shift ", %%mm1 \n\t"\ 511 "psrad $" #shift ", %%mm4 \n\t"\ 512 "movq %%mm0, %%mm2 \n\t" \ 513 "paddd %%mm7, %%mm0 \n\t" \ 514 "psubd %%mm7, %%mm2 \n\t" \ 515 "psrad $" #shift ", %%mm0 \n\t"\ 516 "psrad $" #shift ", %%mm2 \n\t"\ 517 "packssdw %%mm1, %%mm1 \n\t" \ 518 "movd %%mm1, " #dst " \n\t"\ 519 "packssdw %%mm0, %%mm0 \n\t" \ 520 "movd %%mm0, 16+" #dst " \n\t"\ 521 "packssdw %%mm2, %%mm2 \n\t" \ 522 "movd %%mm2, 96+" #dst " \n\t"\ 523 "packssdw %%mm4, %%mm4 \n\t" \ 524 "movd %%mm4, 112+" #dst " \n\t"\ 525 "movq 88(%2), %%mm1 \n\t" \ 526 "pmaddwd %%mm3, %%mm1 \n\t" \ 527 "movq %%mm5, %%mm2 \n\t" \ 528 "pmaddwd 104(%2), %%mm3 \n\t" \ 529 "paddd %%mm1, %%mm2 \n\t" \ 530 "psubd %%mm1, %%mm5 \n\t" \ 531 "psrad $" #shift ", %%mm2 \n\t"\ 532 "psrad $" #shift ", %%mm5 \n\t"\ 533 "movq %%mm6, %%mm1 \n\t" \ 534 "paddd %%mm3, %%mm6 \n\t" \ 535 "psubd %%mm3, %%mm1 \n\t" \ 536 "psrad $" #shift ", %%mm6 \n\t"\ 537 "psrad $" #shift ", %%mm1 \n\t"\ 538 "packssdw %%mm2, %%mm2 \n\t" \ 539 "packssdw %%mm6, %%mm6 \n\t" \ 540 "movd %%mm2, 32+" #dst " \n\t"\ 541 "packssdw %%mm1, %%mm1 \n\t" \ 542 "packssdw %%mm5, %%mm5 \n\t" \ 543 "movd %%mm6, 48+" #dst " \n\t"\ 544 "movd %%mm1, 64+" #dst " \n\t"\ 545 "movd %%mm5, 80+" #dst " \n\t" 549 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
550 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
551 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
552 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
557 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
560 #define IDCT(src0, src4, src1, src5, dst, shift) \ 561 "movq " #src0 ", %%mm0 \n\t" \ 562 "movq " #src1 ", %%mm2 \n\t" \ 563 "movq " #src5 ", %%mm3 \n\t" \ 564 "movq 16(%2), %%mm4 \n\t" \ 565 "pmaddwd %%mm0, %%mm4 \n\t" \ 566 "movq 24(%2), %%mm5 \n\t" \ 567 "pmaddwd %%mm5, %%mm0 \n\t" \ 568 "movq %%mm4, %%mm6 \n\t" \ 569 "movq 48(%2), %%mm7 \n\t" \ 570 "pmaddwd %%mm2, %%mm7 \n\t" \ 571 "movq %%mm0, %%mm5 \n\t" \ 572 "movq 56(%2), %%mm1 \n\t" \ 573 "pmaddwd %%mm3, %%mm1 \n\t" \ 574 "pmaddwd 64(%2), %%mm2 \n\t" \ 575 "paddd %%mm1, %%mm7 \n\t" \ 576 "movq 72(%2), %%mm1 \n\t" \ 577 "pmaddwd %%mm3, %%mm1 \n\t" \ 578 "paddd %%mm4, %%mm7 \n\t" \ 579 "paddd %%mm4, %%mm4 \n\t" \ 580 "psubd %%mm7, %%mm4 \n\t" \ 581 "paddd %%mm2, %%mm1 \n\t" \ 582 "psrad $" #shift ", %%mm7 \n\t"\ 583 "psrad $" #shift ", %%mm4 \n\t"\ 584 "movq %%mm0, %%mm2 \n\t" \ 585 "paddd %%mm1, %%mm0 \n\t" \ 586 "psubd %%mm1, %%mm2 \n\t" \ 587 "psrad $" #shift ", %%mm0 \n\t"\ 588 "psrad $" #shift ", %%mm2 \n\t"\ 589 "packssdw %%mm7, %%mm7 \n\t" \ 590 "movd %%mm7, " #dst " \n\t"\ 591 "packssdw %%mm0, %%mm0 \n\t" \ 592 "movd %%mm0, 16+" #dst " \n\t"\ 593 "packssdw %%mm2, %%mm2 \n\t" \ 594 "movd %%mm2, 96+" #dst " \n\t"\ 595 "packssdw %%mm4, %%mm4 \n\t" \ 596 "movd %%mm4, 112+" #dst " \n\t"\ 597 "movq " #src1 ", %%mm0 \n\t" \ 598 "movq 80(%2), %%mm4 \n\t" \ 599 "pmaddwd %%mm0, %%mm4 \n\t" \ 600 "movq 88(%2), %%mm7 \n\t" \ 601 "pmaddwd 96(%2), %%mm0 \n\t" \ 602 "pmaddwd %%mm3, %%mm7 \n\t" \ 603 "movq %%mm5, %%mm2 \n\t" \ 604 "pmaddwd 104(%2), %%mm3 \n\t" \ 605 "paddd %%mm7, %%mm4 \n\t" \ 606 "paddd %%mm4, %%mm2 \n\t" \ 607 "psubd %%mm4, %%mm5 \n\t" \ 608 "psrad $" #shift ", %%mm2 \n\t"\ 609 "psrad $" #shift ", %%mm5 \n\t"\ 610 "movq %%mm6, %%mm4 \n\t" \ 611 "paddd %%mm0, %%mm3 \n\t" \ 612 "paddd %%mm3, %%mm6 \n\t" \ 613 "psubd %%mm3, %%mm4 \n\t" \ 614 "psrad $" #shift ", %%mm6 \n\t"\ 615 "psrad $" #shift ", %%mm4 \n\t"\ 616 "packssdw %%mm2, %%mm2 \n\t" \ 617 "packssdw %%mm6, %%mm6 \n\t" \ 618 "movd %%mm2, 32+" #dst " \n\t"\ 619 "packssdw %%mm4, %%mm4 \n\t" \ 620 "packssdw %%mm5, %%mm5 \n\t" \ 621 "movd %%mm6, 48+" #dst " \n\t"\ 622 "movd %%mm4, 64+" #dst " \n\t"\ 623 "movd %%mm5, 80+" #dst " \n\t" 626 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
627 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
628 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
629 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
635 #define IDCT(src0, src4, src1, src5, dst, shift) \ 636 "movq " #src0 ", %%mm0 \n\t" \ 637 "movq " #src1 ", %%mm2 \n\t" \ 638 "movq 16(%2), %%mm4 \n\t" \ 639 "pmaddwd %%mm0, %%mm4 \n\t" \ 640 "movq 24(%2), %%mm5 \n\t" \ 641 "pmaddwd %%mm5, %%mm0 \n\t" \ 642 "movq %%mm4, %%mm6 \n\t" \ 643 "movq 48(%2), %%mm7 \n\t" \ 644 "pmaddwd %%mm2, %%mm7 \n\t" \ 645 "movq %%mm0, %%mm5 \n\t" \ 646 "movq 64(%2), %%mm3 \n\t"\ 647 "pmaddwd %%mm2, %%mm3 \n\t" \ 648 "paddd %%mm4, %%mm7 \n\t" \ 649 "paddd %%mm4, %%mm4 \n\t" \ 650 "psubd %%mm7, %%mm4 \n\t" \ 651 "psrad $" #shift ", %%mm7 \n\t"\ 652 "psrad $" #shift ", %%mm4 \n\t"\ 653 "movq %%mm0, %%mm1 \n\t" \ 654 "paddd %%mm3, %%mm0 \n\t" \ 655 "psubd %%mm3, %%mm1 \n\t" \ 656 "psrad $" #shift ", %%mm0 \n\t"\ 657 "psrad $" #shift ", %%mm1 \n\t"\ 658 "packssdw %%mm7, %%mm7 \n\t" \ 659 "movd %%mm7, " #dst " \n\t"\ 660 "packssdw %%mm0, %%mm0 \n\t" \ 661 "movd %%mm0, 16+" #dst " \n\t"\ 662 "packssdw %%mm1, %%mm1 \n\t" \ 663 "movd %%mm1, 96+" #dst " \n\t"\ 664 "packssdw %%mm4, %%mm4 \n\t" \ 665 "movd %%mm4, 112+" #dst " \n\t"\ 666 "movq 80(%2), %%mm4 \n\t" \ 667 "pmaddwd %%mm2, %%mm4 \n\t" \ 668 "pmaddwd 96(%2), %%mm2 \n\t" \ 669 "movq %%mm5, %%mm1 \n\t" \ 670 "paddd %%mm4, %%mm1 \n\t" \ 671 "psubd %%mm4, %%mm5 \n\t" \ 672 "psrad $" #shift ", %%mm1 \n\t"\ 673 "psrad $" #shift ", %%mm5 \n\t"\ 674 "movq %%mm6, %%mm4 \n\t" \ 675 "paddd %%mm2, %%mm6 \n\t" \ 676 "psubd %%mm2, %%mm4 \n\t" \ 677 "psrad $" #shift ", %%mm6 \n\t"\ 678 "psrad $" #shift ", %%mm4 \n\t"\ 679 "packssdw %%mm1, %%mm1 \n\t" \ 680 "packssdw %%mm6, %%mm6 \n\t" \ 681 "movd %%mm1, 32+" #dst " \n\t"\ 682 "packssdw %%mm4, %%mm4 \n\t" \ 683 "packssdw %%mm5, %%mm5 \n\t" \ 684 "movd %%mm6, 48+" #dst " \n\t"\ 685 "movd %%mm4, 64+" #dst " \n\t"\ 686 "movd %%mm5, 80+" #dst " \n\t" 690 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
691 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
692 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
693 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
699 #define IDCT(src0, src4, src1, src5, dst, shift) \ 700 "movq " #src0 ", %%mm0 \n\t" \ 701 "movq " #src4 ", %%mm1 \n\t" \ 702 "movq 16(%2), %%mm4 \n\t" \ 703 "pmaddwd %%mm0, %%mm4 \n\t" \ 704 "movq 24(%2), %%mm5 \n\t" \ 705 "pmaddwd %%mm5, %%mm0 \n\t" \ 706 "movq 32(%2), %%mm5 \n\t" \ 707 "pmaddwd %%mm1, %%mm5 \n\t" \ 708 "movq 40(%2), %%mm6 \n\t" \ 709 "pmaddwd %%mm6, %%mm1 \n\t" \ 710 "movq %%mm4, %%mm6 \n\t" \ 711 "paddd %%mm5, %%mm4 \n\t" \ 712 "psubd %%mm5, %%mm6 \n\t" \ 713 "movq %%mm0, %%mm5 \n\t" \ 714 "paddd %%mm1, %%mm0 \n\t" \ 715 "psubd %%mm1, %%mm5 \n\t" \ 716 "movq 8+" #src0 ", %%mm2 \n\t" \ 717 "movq 8+" #src4 ", %%mm3 \n\t" \ 718 "movq 16(%2), %%mm1 \n\t" \ 719 "pmaddwd %%mm2, %%mm1 \n\t" \ 720 "movq 24(%2), %%mm7 \n\t" \ 721 "pmaddwd %%mm7, %%mm2 \n\t" \ 722 "movq 32(%2), %%mm7 \n\t" \ 723 "pmaddwd %%mm3, %%mm7 \n\t" \ 724 "pmaddwd 40(%2), %%mm3 \n\t" \ 725 "paddd %%mm1, %%mm7 \n\t" \ 726 "paddd %%mm1, %%mm1 \n\t" \ 727 "psubd %%mm7, %%mm1 \n\t" \ 728 "paddd %%mm2, %%mm3 \n\t" \ 729 "paddd %%mm2, %%mm2 \n\t" \ 730 "psubd %%mm3, %%mm2 \n\t" \ 731 "psrad $" #shift ", %%mm4 \n\t"\ 732 "psrad $" #shift ", %%mm7 \n\t"\ 733 "psrad $" #shift ", %%mm3 \n\t"\ 734 "packssdw %%mm7, %%mm4 \n\t" \ 735 "movq %%mm4, " #dst " \n\t"\ 736 "psrad $" #shift ", %%mm0 \n\t"\ 737 "packssdw %%mm3, %%mm0 \n\t" \ 738 "movq %%mm0, 16+" #dst " \n\t"\ 739 "movq %%mm0, 96+" #dst " \n\t"\ 740 "movq %%mm4, 112+" #dst " \n\t"\ 741 "psrad $" #shift ", %%mm5 \n\t"\ 742 "psrad $" #shift ", %%mm6 \n\t"\ 743 "psrad $" #shift ", %%mm2 \n\t"\ 744 "packssdw %%mm2, %%mm5 \n\t" \ 745 "movq %%mm5, 32+" #dst " \n\t"\ 746 "psrad $" #shift ", %%mm1 \n\t"\ 747 "packssdw %%mm1, %%mm6 \n\t" \ 748 "movq %%mm6, 48+" #dst " \n\t"\ 749 "movq %%mm6, 64+" #dst " \n\t"\ 750 "movq %%mm5, 80+" #dst " \n\t" 754 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
756 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
764 #define IDCT(src0, src4, src1, src5, dst, shift) \ 765 "movq " #src0 ", %%mm0 \n\t" \ 766 "movq " #src4 ", %%mm1 \n\t" \ 767 "movq " #src1 ", %%mm2 \n\t" \ 768 "movq 16(%2), %%mm4 \n\t" \ 769 "pmaddwd %%mm0, %%mm4 \n\t" \ 770 "movq 24(%2), %%mm5 \n\t" \ 771 "pmaddwd %%mm5, %%mm0 \n\t" \ 772 "movq 32(%2), %%mm5 \n\t" \ 773 "pmaddwd %%mm1, %%mm5 \n\t" \ 774 "movq 40(%2), %%mm6 \n\t" \ 775 "pmaddwd %%mm6, %%mm1 \n\t" \ 776 "movq %%mm4, %%mm6 \n\t" \ 777 "movq 48(%2), %%mm7 \n\t" \ 778 "pmaddwd %%mm2, %%mm7 \n\t" \ 779 "paddd %%mm5, %%mm4 \n\t" \ 780 "psubd %%mm5, %%mm6 \n\t" \ 781 "movq %%mm0, %%mm5 \n\t" \ 782 "paddd %%mm1, %%mm0 \n\t" \ 783 "psubd %%mm1, %%mm5 \n\t" \ 784 "movq 64(%2), %%mm1 \n\t"\ 785 "pmaddwd %%mm2, %%mm1 \n\t" \ 786 "paddd %%mm4, %%mm7 \n\t" \ 787 "paddd %%mm4, %%mm4 \n\t" \ 788 "psubd %%mm7, %%mm4 \n\t" \ 789 "psrad $" #shift ", %%mm7 \n\t"\ 790 "psrad $" #shift ", %%mm4 \n\t"\ 791 "movq %%mm0, %%mm3 \n\t" \ 792 "paddd %%mm1, %%mm0 \n\t" \ 793 "psubd %%mm1, %%mm3 \n\t" \ 794 "psrad $" #shift ", %%mm0 \n\t"\ 795 "psrad $" #shift ", %%mm3 \n\t"\ 796 "packssdw %%mm7, %%mm7 \n\t" \ 797 "movd %%mm7, " #dst " \n\t"\ 798 "packssdw %%mm0, %%mm0 \n\t" \ 799 "movd %%mm0, 16+" #dst " \n\t"\ 800 "packssdw %%mm3, %%mm3 \n\t" \ 801 "movd %%mm3, 96+" #dst " \n\t"\ 802 "packssdw %%mm4, %%mm4 \n\t" \ 803 "movd %%mm4, 112+" #dst " \n\t"\ 804 "movq 80(%2), %%mm4 \n\t" \ 805 "pmaddwd %%mm2, %%mm4 \n\t" \ 806 "pmaddwd 96(%2), %%mm2 \n\t" \ 807 "movq %%mm5, %%mm3 \n\t" \ 808 "paddd %%mm4, %%mm3 \n\t" \ 809 "psubd %%mm4, %%mm5 \n\t" \ 810 "psrad $" #shift ", %%mm3 \n\t"\ 811 "psrad $" #shift ", %%mm5 \n\t"\ 812 "movq %%mm6, %%mm4 \n\t" \ 813 "paddd %%mm2, %%mm6 \n\t" \ 814 "psubd %%mm2, %%mm4 \n\t" \ 815 "psrad $" #shift ", %%mm6 \n\t"\ 816 "packssdw %%mm3, %%mm3 \n\t" \ 817 "movd %%mm3, 32+" #dst " \n\t"\ 818 "psrad $" #shift ", %%mm4 \n\t"\ 819 "packssdw %%mm6, %%mm6 \n\t" \ 820 "movd %%mm6, 48+" #dst " \n\t"\ 821 "packssdw %%mm4, %%mm4 \n\t" \ 822 "packssdw %%mm5, %%mm5 \n\t" \ 823 "movd %%mm4, 64+" #dst " \n\t"\ 824 "movd %%mm5, 80+" #dst " \n\t" 828 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
829 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
830 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
831 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
838 #define IDCT(src0, src4, src1, src5, dst, shift) \ 839 "movq " #src0 ", %%mm0 \n\t" \ 840 "movq 16(%2), %%mm4 \n\t" \ 841 "pmaddwd %%mm0, %%mm4 \n\t" \ 842 "movq 24(%2), %%mm5 \n\t" \ 843 "pmaddwd %%mm5, %%mm0 \n\t" \ 844 "psrad $" #shift ", %%mm4 \n\t"\ 845 "psrad $" #shift ", %%mm0 \n\t"\ 846 "movq 8+" #src0 ", %%mm2 \n\t" \ 847 "movq 16(%2), %%mm1 \n\t" \ 848 "pmaddwd %%mm2, %%mm1 \n\t" \ 849 "movq 24(%2), %%mm7 \n\t" \ 850 "pmaddwd %%mm7, %%mm2 \n\t" \ 851 "movq 32(%2), %%mm7 \n\t" \ 852 "psrad $" #shift ", %%mm1 \n\t"\ 853 "packssdw %%mm1, %%mm4 \n\t" \ 854 "movq %%mm4, " #dst " \n\t"\ 855 "psrad $" #shift ", %%mm2 \n\t"\ 856 "packssdw %%mm2, %%mm0 \n\t" \ 857 "movq %%mm0, 16+" #dst " \n\t"\ 858 "movq %%mm0, 96+" #dst " \n\t"\ 859 "movq %%mm4, 112+" #dst " \n\t"\ 860 "movq %%mm0, 32+" #dst " \n\t"\ 861 "movq %%mm4, 48+" #dst " \n\t"\ 862 "movq %%mm4, 64+" #dst " \n\t"\ 863 "movq %%mm0, 80+" #dst " \n\t" 866 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
868 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
895 :: "
r" (block), "
r" (temp), "
r" (coeffs)
void(* ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)
memory handling functions
#define DECLARE_ALIGNED(n, t, v)
#define DECLARE_ASM_CONST(n, t, v)
void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block)
common internal API header
void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block)
void ff_simple_idct_mmx(int16_t *block)
void(* ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size)