Libav
pixblockdsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002 Brian Foley
3  * Copyright (c) 2002 Dieter Shirley
4  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5  *
6  * This file is part of Libav.
7  *
8  * Libav is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * Libav is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with Libav; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "config.h"
24 #if HAVE_ALTIVEC_H
25 #include <altivec.h>
26 #endif
27 
28 #include "libavutil/attributes.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/ppc/cpu.h"
33 #include "libavcodec/avcodec.h"
34 #include "libavcodec/pixblockdsp.h"
35 
36 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
37 
38 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
39  int line_size)
40 {
41  int i;
42  vec_u8 perm = vec_lvsl(0, pixels);
43  const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
44 
45  for (i = 0; i < 8; i++) {
46  /* Read potentially unaligned pixels.
47  * We're reading 16 pixels, and actually only want 8,
48  * but we simply ignore the extras. */
49  vec_u8 pixl = vec_ld(0, pixels);
50  vec_u8 pixr = vec_ld(7, pixels);
51  vec_u8 bytes = vec_perm(pixl, pixr, perm);
52 
53  // Convert the bytes into shorts.
54  vec_s16 shorts = (vec_s16)vec_mergeh(zero, bytes);
55 
56  // Save the data to the block, we assume the block is 16-byte aligned.
57  vec_st(shorts, i * 16, (vec_s16 *)block);
58 
59  pixels += line_size;
60  }
61 }
62 
63 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
64  const uint8_t *s2, int stride)
65 {
66  int i;
67  vec_u8 perm1 = vec_lvsl(0, s1);
68  vec_u8 perm2 = vec_lvsl(0, s2);
69  const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
70  vec_s16 shorts1, shorts2;
71 
72  for (i = 0; i < 4; i++) {
73  /* Read potentially unaligned pixels.
74  * We're reading 16 pixels, and actually only want 8,
75  * but we simply ignore the extras. */
76  vec_u8 pixl = vec_ld(0, s1);
77  vec_u8 pixr = vec_ld(15, s1);
78  vec_u8 bytes = vec_perm(pixl, pixr, perm1);
79 
80  // Convert the bytes into shorts.
81  shorts1 = (vec_s16)vec_mergeh(zero, bytes);
82 
83  // Do the same for the second block of pixels.
84  pixl = vec_ld(0, s2);
85  pixr = vec_ld(15, s2);
86  bytes = vec_perm(pixl, pixr, perm2);
87 
88  // Convert the bytes into shorts.
89  shorts2 = (vec_s16)vec_mergeh(zero, bytes);
90 
91  // Do the subtraction.
92  shorts1 = vec_sub(shorts1, shorts2);
93 
94  // Save the data to the block, we assume the block is 16-byte aligned.
95  vec_st(shorts1, 0, (vec_s16 *)block);
96 
97  s1 += stride;
98  s2 += stride;
99  block += 8;
100 
101  /* The code below is a copy of the code above...
102  * This is a manual unroll. */
103 
104  /* Read potentially unaligned pixels.
105  * We're reading 16 pixels, and actually only want 8,
106  * but we simply ignore the extras. */
107  pixl = vec_ld(0, s1);
108  pixr = vec_ld(15, s1);
109  bytes = vec_perm(pixl, pixr, perm1);
110 
111  // Convert the bytes into shorts.
112  shorts1 = (vec_s16)vec_mergeh(zero, bytes);
113 
114  // Do the same for the second block of pixels.
115  pixl = vec_ld(0, s2);
116  pixr = vec_ld(15, s2);
117  bytes = vec_perm(pixl, pixr, perm2);
118 
119  // Convert the bytes into shorts.
120  shorts2 = (vec_s16)vec_mergeh(zero, bytes);
121 
122  // Do the subtraction.
123  shorts1 = vec_sub(shorts1, shorts2);
124 
125  // Save the data to the block, we assume the block is 16-byte aligned.
126  vec_st(shorts1, 0, (vec_s16 *)block);
127 
128  s1 += stride;
129  s2 += stride;
130  block += 8;
131  }
132 }
133 
134 #endif /* HAVE_ALTIVEC */
135 
136 #if HAVE_VSX
137 static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
138  int line_size)
139 {
140  int i;
141  for (i = 0; i < 8; i++) {
142  vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
143 
144  vec_vsx_st(shorts, i * 16, block);
145 
146  pixels += line_size;
147  }
148 }
149 
150 static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
151  const uint8_t *s2, int stride)
152 {
153  int i;
154  vec_s16 shorts1, shorts2;
155  for (i = 0; i < 8; i++) {
156  shorts1 = vsx_ld_u8_s16(0, s1);
157  shorts2 = vsx_ld_u8_s16(0, s2);
158 
159  shorts1 = vec_sub(shorts1, shorts2);
160 
161  vec_vsx_st(shorts1, 0, block);
162 
163  s1 += stride;
164  s2 += stride;
165  block += 8;
166  }
167 }
168 #endif /* HAVE_VSX */
169 
171  AVCodecContext *avctx,
172  unsigned high_bit_depth)
173 {
174 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
176  return;
177 
178  c->diff_pixels = diff_pixels_altivec;
179 
180  if (!high_bit_depth) {
181  c->get_pixels = get_pixels_altivec;
182  }
183 #endif /* HAVE_ALTIVEC */
184 
185 #if HAVE_VSX
186  if (!PPC_VSX(av_get_cpu_flags()))
187  return;
188 
189  c->diff_pixels = diff_pixels_vsx;
190 
191  if (!high_bit_depth)
192  c->get_pixels = get_pixels_vsx;
193 #endif /* HAVE_VSX */
194 }
#define PPC_VSX(flags)
Definition: cpu.h:27
int stride
Definition: mace.c:144
Macro definitions for various function/variable attributes.
static int16_t block[64]
Definition: dct.c:97
uint8_t
#define av_cold
Definition: attributes.h:66
av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth)
Definition: pixblockdsp.c:170
#define vec_s16
Definition: types_altivec.h:30
void(* diff_pixels)(int16_t *block, const uint8_t *s1, const uint8_t *s2, int stride)
Definition: pixblockdsp.h:30
void(* get_pixels)(int16_t *block, const uint8_t *pixels, int line_size)
Definition: pixblockdsp.h:27
#define PPC_ALTIVEC(flags)
Definition: cpu.h:26
#define vec_u8
Definition: types_altivec.h:27
Libavcodec external API header.
main external API structure.
Definition: avcodec.h:1409
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:47
Contains misc utility macros and inline functions.
#define restrict
Definition: config.h:8