29 #define LFC_FUNC(DIR, DEPTH, OPT) \ 30 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int *tc, uint8_t *no_p, uint8_t *no_q); 32 #define LFL_FUNC(DIR, DEPTH, OPT) \ 33 void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, int *tc, uint8_t *no_p, uint8_t *no_q); 35 #define LFC_FUNCS(type, depth) \ 36 LFC_FUNC(h, depth, sse2) \ 37 LFC_FUNC(v, depth, sse2) 39 #define LFL_FUNCS(type, depth) \ 40 LFL_FUNC(h, depth, ssse3) \ 41 LFL_FUNC(v, depth, ssse3) 48 #define idct_dc_proto(size, bitd, opt) \ 49 void ff_hevc_idct_ ## size ## _dc_add_ ## bitd ## _ ## opt(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) 69 #define IDCT_FUNCS(W, opt) \ 70 void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \ 71 void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs) 81 #define GET_PIXELS(width, depth, cf) \ 82 void ff_hevc_get_pixels_ ## width ## _ ## depth ## _ ## cf(int16_t *dst, ptrdiff_t dststride, \ 83 uint8_t *src, ptrdiff_t srcstride, \ 84 int height, int mx, int my, int16_t *mcbuffer); 105 #define INTERP_HV_FUNC(width, cf) \ 106 void ff_hevc_qpel_hv_ ## width ## _ ## cf(int16_t *dst, ptrdiff_t dststride, \ 107 int16_t *src, ptrdiff_t srcstride, \ 108 int height, int mx, int my, int16_t *mcbuffer); \ 109 void ff_hevc_epel_hv_ ## width ## _ ## cf(int16_t *dst, ptrdiff_t dststride, \ 110 int16_t *src, ptrdiff_t srcstride, \ 111 int height, int mx, int my, int16_t *mcbuffer); 122 #if ARCH_X86_64 && HAVE_AVX_EXTERNAL 123 #define QPEL_FUNC_HV(width, depth, cf_h, cf_v, cf_hv) \ 124 static void hevc_qpel_hv_ ## width ## _ ## depth ## _ ## cf_hv(int16_t *dst, ptrdiff_t dststride, \ 125 uint8_t *src, ptrdiff_t srcstride, \ 126 int height, int mx, int my, int16_t *mcbuffer) \ 128 const ptrdiff_t stride = FFALIGN(width + 7, 8); \ 129 ff_hevc_qpel_h_ ## width ## _ ## depth ## _ ## cf_h(mcbuffer, 2 * stride, src - 3 * srcstride, srcstride, \ 130 height + 7, mx, my, mcbuffer); \ 131 ff_hevc_qpel_hv_ ## width ## _ ## cf_hv(dst, dststride, mcbuffer + 3 * stride, 2 * stride, \ 132 height, mx, my, mcbuffer); \ 135 #define QPEL_FUNC_HV(width, depth, cf_h, cf_v, cf_hv) 138 #define QPEL_FUNCS(width, depth, cf_h, cf_v, cf_hv) \ 139 void ff_hevc_qpel_h_ ## width ## _ ## depth ## _ ## cf_h(int16_t *dst, ptrdiff_t dststride, \ 140 uint8_t *src, ptrdiff_t srcstride, \ 141 int height, int mx, int my, int16_t *mcbuffer); \ 142 void ff_hevc_qpel_v_ ## width ## _ ## depth ## _ ## cf_v(int16_t *dst, ptrdiff_t dststride, \ 143 uint8_t *src, ptrdiff_t srcstride, \ 144 int height, int mx, int my, int16_t *mcbuffer); \ 145 QPEL_FUNC_HV(width, depth, cf_h, cf_v, cf_hv) 165 #if ARCH_X86_64 && HAVE_AVX_EXTERNAL 166 #define EPEL_FUNC_HV(width, depth, cf_h, cf_v, cf_hv) \ 167 static void hevc_epel_hv_ ## width ## _ ## depth ## _ ## cf_hv(int16_t *dst, ptrdiff_t dststride, \ 168 uint8_t *src, ptrdiff_t srcstride, \ 169 int height, int mx, int my, int16_t *mcbuffer) \ 171 const ptrdiff_t stride = FFALIGN(width + 3, 8); \ 172 ff_hevc_epel_h_ ## width ## _ ## depth ## _ ## cf_h(mcbuffer, 2 * stride, src - srcstride, srcstride, \ 173 height + 3, mx, my, mcbuffer); \ 174 ff_hevc_epel_hv_ ## width ## _ ## cf_hv(dst, dststride, mcbuffer + stride, 2 * stride, \ 175 height, mx, my, mcbuffer); \ 178 #define EPEL_FUNC_HV(width, depth, cf_h, cf_v, cf_hv) 181 #define EPEL_FUNCS(width, depth, cf_h, cf_v, cf_hv) \ 182 void ff_hevc_epel_h_ ## width ## _ ## depth ## _ ## cf_h(int16_t *dst, ptrdiff_t dststride, \ 183 uint8_t *src, ptrdiff_t srcstride, \ 184 int height, int mx, int my, int16_t *mcbuffer); \ 185 void ff_hevc_epel_v_ ## width ## _ ## depth ## _ ## cf_v(int16_t *dst, ptrdiff_t dststride, \ 186 uint8_t *src, ptrdiff_t srcstride, \ 187 int height, int mx, int my, int16_t *mcbuffer); \ 188 EPEL_FUNC_HV(width, depth, cf_h, cf_v, cf_hv) 204 #define PUT_PRED(width, depth, cf_uw, cf_w) \ 205 void ff_hevc_put_unweighted_pred_ ## width ## _ ## depth ## _ ## cf_uw(uint8_t *dst, ptrdiff_t dststride, \ 206 int16_t *src, ptrdiff_t srcstride, \ 208 void ff_hevc_put_unweighted_pred_avg_ ## width ## _ ## depth ## _ ## cf_uw(uint8_t *dst, ptrdiff_t dststride, \ 209 int16_t *src1, int16_t *src2, \ 210 ptrdiff_t srcstride, int height); \ 211 void ff_hevc_put_weighted_pred_ ## width ## _ ## depth ## _ ## cf_w(uint8_t denom, int16_t weight, int16_t offset, \ 212 uint8_t *dst, ptrdiff_t dststride, \ 213 int16_t *src, ptrdiff_t srcstride, \ 215 void ff_hevc_put_weighted_pred_avg_ ## width ## _ ## depth ## _ ## cf_w(uint8_t denom, int16_t weight0, int16_t weight1, \ 216 int16_t offset0, int16_t offset1, \ 217 uint8_t *dst, ptrdiff_t dststride, \ 218 int16_t *src0, int16_t *src1, ptrdiff_t srcstride, \ 243 #define SET_LUMA_FUNCS(tabname, funcname, depth, cf) \ 244 c->tabname[0] = funcname ## _4_ ## depth ## _ ## cf; \ 245 c->tabname[1] = funcname ## _8_ ## depth ## _ ## cf; \ 246 c->tabname[2] = funcname ## _12_ ## depth ## _ ## cf; \ 247 c->tabname[3] = funcname ## _16_ ## depth ## _ ## cf; \ 248 c->tabname[4] = funcname ## _24_ ## depth ## _ ## cf; \ 249 c->tabname[5] = funcname ## _32_ ## depth ## _ ## cf; \ 250 c->tabname[6] = funcname ## _48_ ## depth ## _ ## cf; \ 251 c->tabname[7] = funcname ## _64_ ## depth ## _ ## cf; 253 #define SET_CHROMA_FUNCS(tabname, funcname, depth, cf) \ 254 c->tabname[1] = funcname ## _4_ ## depth ## _ ## cf; \ 255 c->tabname[3] = funcname ## _8_ ## depth ## _ ## cf; \ 256 c->tabname[4] = funcname ## _12_ ## depth ## _ ## cf; \ 257 c->tabname[5] = funcname ## _16_ ## depth ## _ ## cf; \ 258 c->tabname[6] = funcname ## _24_ ## depth ## _ ## cf; \ 259 c->tabname[7] = funcname ## _32_ ## depth ## _ ## cf; 261 #define SET_QPEL_FUNCS(v, h, depth, cf, name) SET_LUMA_FUNCS (put_hevc_qpel[v][h], name, depth, cf) 262 #define SET_EPEL_FUNCS(v, h, depth, cf, name) SET_CHROMA_FUNCS(put_hevc_epel[v][h], name, depth, cf) 264 if (bit_depth == 8) {
266 c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
267 c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_mmxext;
270 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
271 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
273 c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
274 c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
275 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
281 SET_CHROMA_FUNCS(put_unweighted_pred_chroma, ff_hevc_put_unweighted_pred, 8, sse2);
282 SET_CHROMA_FUNCS(put_unweighted_pred_avg_chroma, ff_hevc_put_unweighted_pred_avg, 8, sse2);
291 }
else if (bit_depth == 10) {
293 c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
294 c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_mmxext;
297 c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
298 c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
300 c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
301 c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
302 c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
309 SET_CHROMA_FUNCS(put_unweighted_pred_chroma, ff_hevc_put_unweighted_pred, 10, sse2);
310 SET_CHROMA_FUNCS(put_unweighted_pred_avg_chroma, ff_hevc_put_unweighted_pred_avg, 10, sse2);
315 if (bit_depth == 8) {
317 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
318 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
325 SET_CHROMA_FUNCS(weighted_pred_avg_chroma, ff_hevc_put_weighted_pred_avg, 8, sse4);
329 #if HAVE_AVX_EXTERNAL 335 c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
336 c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
338 }
else if (bit_depth == 10) {
340 c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
341 c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
345 SET_CHROMA_FUNCS(weighted_pred_chroma, ff_hevc_put_weighted_pred, 10, sse4);
347 SET_CHROMA_FUNCS(weighted_pred_avg_chroma, ff_hevc_put_weighted_pred_avg, 10, sse4);
350 #if HAVE_AVX_EXTERNAL 360 c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
361 c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
#define QPEL_FUNCS(width, depth, cf_h, cf_v, cf_hv)
#define EPEL_FUNCS(width, depth, cf_h, cf_v, cf_hv)
#define idct_dc_proto(size, bitd, opt)
#define SET_EPEL_FUNCS(v, h, depth, cf, name)
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
#define PUT_PRED(width, depth, cf_uw, cf_w)
#define LFC_FUNCS(type, depth)
#define EXTERNAL_SSE4(flags)
#define LFL_FUNCS(type, depth)
#define SET_LUMA_FUNCS(tabname, funcname, depth, cf)
static av_always_inline void FUNC() weighted_pred(uint8_t denom, int16_t wlxFlag, int16_t olxFlag, uint8_t *_dst, ptrdiff_t _dststride, int16_t *src, ptrdiff_t srcstride, int width, int height)
#define EXTERNAL_SSE2(flags)
static av_always_inline void FUNC() put_unweighted_pred(uint8_t *_dst, ptrdiff_t _dststride, int16_t *src, ptrdiff_t srcstride, int width, int height)
static av_always_inline void FUNC() weighted_pred_avg(uint8_t denom, int16_t wl0Flag, int16_t wl1Flag, int16_t ol0Flag, int16_t ol1Flag, uint8_t *_dst, ptrdiff_t _dststride, int16_t *src1, int16_t *src2, ptrdiff_t srcstride, int width, int height)
#define IDCT_FUNCS(W, opt)
static void(WINAPI *cond_broadcast)(pthread_cond_t *cond)
#define EXTERNAL_SSSE3(flags)
#define INTERP_HV_FUNC(width, cf)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static av_always_inline void FUNC() put_unweighted_pred_avg(uint8_t *_dst, ptrdiff_t _dststride, int16_t *src1, int16_t *src2, ptrdiff_t srcstride, int width, int height)
#define EXTERNAL_AVX2(flags)
#define EXTERNAL_MMXEXT(flags)
#define GET_PIXELS(width, depth, cf)
#define EXTERNAL_AVX(flags)
#define SET_CHROMA_FUNCS(tabname, funcname, depth, cf)
#define SET_QPEL_FUNCS(v, h, depth, cf, name)