Libav
g722enc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) CMU 1993 Computer Science, Speech Group
3  * Chengxiang Lu and Alex Hauptmann
4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5  * Copyright (c) 2009 Kenan Gillet
6  * Copyright (c) 2010 Martin Storsjo
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
30 #include "avcodec.h"
31 #include "internal.h"
32 #include "g722.h"
33 #include "libavutil/common.h"
34 
35 #define FREEZE_INTERVAL 128
36 
37 /* This is an arbitrary value. Allowing insanely large values leads to strange
38  problems, so we limit it to a reasonable value */
39 #define MAX_FRAME_SIZE 32768
40 
41 /* We clip the value of avctx->trellis to prevent data type overflows and
42  undefined behavior. Using larger values is insanely slow anyway. */
43 #define MIN_TRELLIS 0
44 #define MAX_TRELLIS 16
45 
47 {
48  G722Context *c = avctx->priv_data;
49  int i;
50  for (i = 0; i < 2; i++) {
51  av_freep(&c->paths[i]);
52  av_freep(&c->node_buf[i]);
53  av_freep(&c->nodep_buf[i]);
54  }
55  return 0;
56 }
57 
59 {
60  G722Context *c = avctx->priv_data;
61  int ret;
62 
63  if (avctx->channels != 1) {
64  av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
65  return AVERROR_INVALIDDATA;
66  }
67 
68  c->band[0].scale_factor = 8;
69  c->band[1].scale_factor = 2;
70  c->prev_samples_pos = 22;
71 
72  if (avctx->trellis) {
73  int frontier = 1 << avctx->trellis;
74  int max_paths = frontier * FREEZE_INTERVAL;
75  int i;
76  for (i = 0; i < 2; i++) {
77  c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
78  c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
79  c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
80  if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
81  ret = AVERROR(ENOMEM);
82  goto error;
83  }
84  }
85  }
86 
87  if (avctx->frame_size) {
88  /* validate frame size */
89  if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
90  int new_frame_size;
91 
92  if (avctx->frame_size == 1)
93  new_frame_size = 2;
94  else if (avctx->frame_size > MAX_FRAME_SIZE)
95  new_frame_size = MAX_FRAME_SIZE;
96  else
97  new_frame_size = avctx->frame_size - 1;
98 
99  av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
100  "allowed. Using %d instead of %d\n", new_frame_size,
101  avctx->frame_size);
102  avctx->frame_size = new_frame_size;
103  }
104  } else {
105  /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
106  a common packet size for VoIP applications */
107  avctx->frame_size = 320;
108  }
109  avctx->initial_padding = 22;
110 
111  if (avctx->trellis) {
112  /* validate trellis */
113  if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
114  int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
115  av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
116  "allowed. Using %d instead of %d\n", new_trellis,
117  avctx->trellis);
118  avctx->trellis = new_trellis;
119  }
120  }
121 
122  ff_g722dsp_init(&c->dsp);
123 
124  return 0;
125 error:
126  g722_encode_close(avctx);
127  return ret;
128 }
129 
130 static const int16_t low_quant[33] = {
131  35, 72, 110, 150, 190, 233, 276, 323,
132  370, 422, 473, 530, 587, 650, 714, 786,
133  858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
134  1765, 1980, 2195, 2557, 2919
135 };
136 
137 static inline void filter_samples(G722Context *c, const int16_t *samples,
138  int *xlow, int *xhigh)
139 {
140  int xout[2];
141  c->prev_samples[c->prev_samples_pos++] = samples[0];
142  c->prev_samples[c->prev_samples_pos++] = samples[1];
143  c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
144  *xlow = xout[0] + xout[1] >> 14;
145  *xhigh = xout[0] - xout[1] >> 14;
147  memmove(c->prev_samples,
148  c->prev_samples + c->prev_samples_pos - 22,
149  22 * sizeof(c->prev_samples[0]));
150  c->prev_samples_pos = 22;
151  }
152 }
153 
154 static inline int encode_high(const struct G722Band *state, int xhigh)
155 {
156  int diff = av_clip_int16(xhigh - state->s_predictor);
157  int pred = 141 * state->scale_factor >> 8;
158  /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
159  return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
160 }
161 
162 static inline int encode_low(const struct G722Band* state, int xlow)
163 {
164  int diff = av_clip_int16(xlow - state->s_predictor);
165  /* = diff >= 0 ? diff : -(diff + 1) */
166  int limit = diff ^ (diff >> (sizeof(diff)*8-1));
167  int i = 0;
168  limit = limit + 1 << 10;
169  if (limit > low_quant[8] * state->scale_factor)
170  i = 9;
171  while (i < 29 && limit > low_quant[i] * state->scale_factor)
172  i++;
173  return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
174 }
175 
176 static void g722_encode_trellis(G722Context *c, int trellis,
177  uint8_t *dst, int nb_samples,
178  const int16_t *samples)
179 {
180  int i, j, k;
181  int frontier = 1 << trellis;
182  struct TrellisNode **nodes[2];
183  struct TrellisNode **nodes_next[2];
184  int pathn[2] = {0, 0}, froze = -1;
185  struct TrellisPath *p[2];
186 
187  for (i = 0; i < 2; i++) {
188  nodes[i] = c->nodep_buf[i];
189  nodes_next[i] = c->nodep_buf[i] + frontier;
190  memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
191  nodes[i][0] = c->node_buf[i] + frontier;
192  nodes[i][0]->ssd = 0;
193  nodes[i][0]->path = 0;
194  nodes[i][0]->state = c->band[i];
195  }
196 
197  for (i = 0; i < nb_samples >> 1; i++) {
198  int xlow, xhigh;
199  struct TrellisNode *next[2];
200  int heap_pos[2] = {0, 0};
201 
202  for (j = 0; j < 2; j++) {
203  next[j] = c->node_buf[j] + frontier*(i & 1);
204  memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
205  }
206 
207  filter_samples(c, &samples[2*i], &xlow, &xhigh);
208 
209  for (j = 0; j < frontier && nodes[0][j]; j++) {
210  /* Only k >> 2 affects the future adaptive state, therefore testing
211  * small steps that don't change k >> 2 is useless, the original
212  * value from encode_low is better than them. Since we step k
213  * in steps of 4, make sure range is a multiple of 4, so that
214  * we don't miss the original value from encode_low. */
215  int range = j < frontier/2 ? 4 : 0;
216  struct TrellisNode *cur_node = nodes[0][j];
217 
218  int ilow = encode_low(&cur_node->state, xlow);
219 
220  for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
221  int decoded, dec_diff, pos;
222  uint32_t ssd;
223  struct TrellisNode* node;
224 
225  if (k < 0)
226  continue;
227 
228  decoded = av_clip_intp2((cur_node->state.scale_factor *
229  ff_g722_low_inv_quant6[k] >> 10)
230  + cur_node->state.s_predictor, 14);
231  dec_diff = xlow - decoded;
232 
233 #define STORE_NODE(index, UPDATE, VALUE)\
234  ssd = cur_node->ssd + dec_diff*dec_diff;\
235  /* Check for wraparound. Using 64 bit ssd counters would \
236  * be simpler, but is slower on x86 32 bit. */\
237  if (ssd < cur_node->ssd)\
238  continue;\
239  if (heap_pos[index] < frontier) {\
240  pos = heap_pos[index]++;\
241  assert(pathn[index] < FREEZE_INTERVAL * frontier);\
242  node = nodes_next[index][pos] = next[index]++;\
243  node->path = pathn[index]++;\
244  } else {\
245  /* Try to replace one of the leaf nodes with the new \
246  * one, but not always testing the same leaf position */\
247  pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
248  if (ssd >= nodes_next[index][pos]->ssd)\
249  continue;\
250  heap_pos[index]++;\
251  node = nodes_next[index][pos];\
252  }\
253  node->ssd = ssd;\
254  node->state = cur_node->state;\
255  UPDATE;\
256  c->paths[index][node->path].value = VALUE;\
257  c->paths[index][node->path].prev = cur_node->path;\
258  /* Sift the newly inserted node up in the heap to restore \
259  * the heap property */\
260  while (pos > 0) {\
261  int parent = (pos - 1) >> 1;\
262  if (nodes_next[index][parent]->ssd <= ssd)\
263  break;\
264  FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
265  nodes_next[index][pos]);\
266  pos = parent;\
267  }
268  STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
269  }
270  }
271 
272  for (j = 0; j < frontier && nodes[1][j]; j++) {
273  int ihigh;
274  struct TrellisNode *cur_node = nodes[1][j];
275 
276  /* We don't try to get any initial guess for ihigh via
277  * encode_high - since there's only 4 possible values, test
278  * them all. Testing all of these gives a much, much larger
279  * gain than testing a larger range around ilow. */
280  for (ihigh = 0; ihigh < 4; ihigh++) {
281  int dhigh, decoded, dec_diff, pos;
282  uint32_t ssd;
283  struct TrellisNode* node;
284 
285  dhigh = cur_node->state.scale_factor *
286  ff_g722_high_inv_quant[ihigh] >> 10;
287  decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
288  dec_diff = xhigh - decoded;
289 
290  STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
291  }
292  }
293 
294  for (j = 0; j < 2; j++) {
295  FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
296 
297  if (nodes[j][0]->ssd > (1 << 16)) {
298  for (k = 1; k < frontier && nodes[j][k]; k++)
299  nodes[j][k]->ssd -= nodes[j][0]->ssd;
300  nodes[j][0]->ssd = 0;
301  }
302  }
303 
304  if (i == froze + FREEZE_INTERVAL) {
305  p[0] = &c->paths[0][nodes[0][0]->path];
306  p[1] = &c->paths[1][nodes[1][0]->path];
307  for (j = i; j > froze; j--) {
308  dst[j] = p[1]->value << 6 | p[0]->value;
309  p[0] = &c->paths[0][p[0]->prev];
310  p[1] = &c->paths[1][p[1]->prev];
311  }
312  froze = i;
313  pathn[0] = pathn[1] = 0;
314  memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
315  memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
316  }
317  }
318 
319  p[0] = &c->paths[0][nodes[0][0]->path];
320  p[1] = &c->paths[1][nodes[1][0]->path];
321  for (j = i; j > froze; j--) {
322  dst[j] = p[1]->value << 6 | p[0]->value;
323  p[0] = &c->paths[0][p[0]->prev];
324  p[1] = &c->paths[1][p[1]->prev];
325  }
326  c->band[0] = nodes[0][0]->state;
327  c->band[1] = nodes[1][0]->state;
328 }
329 
331  const int16_t *samples)
332 {
333  int xlow, xhigh, ilow, ihigh;
334  filter_samples(c, samples, &xlow, &xhigh);
335  ihigh = encode_high(&c->band[1], xhigh);
336  ilow = encode_low (&c->band[0], xlow);
338  ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
339  ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
340  *dst = ihigh << 6 | ilow;
341 }
342 
344  uint8_t *dst, int nb_samples,
345  const int16_t *samples)
346 {
347  int i;
348  for (i = 0; i < nb_samples; i += 2)
349  encode_byte(c, dst++, &samples[i]);
350 }
351 
352 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
353  const AVFrame *frame, int *got_packet_ptr)
354 {
355  G722Context *c = avctx->priv_data;
356  const int16_t *samples = (const int16_t *)frame->data[0];
357  int nb_samples, out_size, ret;
358 
359  out_size = (frame->nb_samples + 1) / 2;
360  if ((ret = ff_alloc_packet(avpkt, out_size))) {
361  av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
362  return ret;
363  }
364 
365  nb_samples = frame->nb_samples - (frame->nb_samples & 1);
366 
367  if (avctx->trellis)
368  g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
369  else
370  g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
371 
372  /* handle last frame with odd frame_size */
373  if (nb_samples < frame->nb_samples) {
374  int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
375  encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
376  }
377 
378  if (frame->pts != AV_NOPTS_VALUE)
379  avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
380  *got_packet_ptr = 1;
381  return 0;
382 }
383 
385  .name = "g722",
386  .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
387  .type = AVMEDIA_TYPE_AUDIO,
389  .priv_data_size = sizeof(G722Context),
391  .close = g722_encode_close,
392  .encode2 = g722_encode_frame,
393  .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME,
394  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
396 };
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:54
struct G722Context::TrellisNode ** nodep_buf[2]
int path
Definition: adpcmenc.c:45
This structure describes decoded (raw) audio or video data.
Definition: frame.h:140
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:130
av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (%s)\, len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt), use_generic ? ac->func_descr_generic :ac->func_descr)
struct G722Context::TrellisPath * paths[2]
int out_size
Definition: movenc.c:55
static void filter_samples(G722Context *c, const int16_t *samples, int *xlow, int *xhigh)
Definition: g722enc.c:137
#define MIN_TRELLIS
Definition: g722enc.c:43
AVCodec.
Definition: avcodec.h:3120
static void g722_encode_no_trellis(G722Context *c, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:343
void av_freep(void *arg)
Free a memory block which has been allocated with av_malloc(z)() or av_realloc() and set the pointer ...
Definition: mem.c:202
void(* apply_qmf)(const int16_t *prev_samples, int xout[2])
Definition: g722dsp.h:27
static int encode_high(const struct G722Band *state, int xhigh)
Definition: g722enc.c:154
uint8_t
#define av_cold
Definition: attributes.h:66
#define PREV_SAMPLES_BUF_SIZE
Definition: g722.h:32
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:211
static av_cold int g722_encode_init(AVCodecContext *avctx)
Definition: g722enc.c:58
uint8_t * data
Definition: avcodec.h:1346
uint32_t ssd
Definition: adpcmenc.c:44
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:124
struct G722Context::TrellisNode * node_buf[2]
#define AVERROR(e)
Definition: error.h:43
const int16_t ff_g722_low_inv_quant6[64]
Definition: g722.c:63
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:148
int initial_padding
Audio only.
Definition: avcodec.h:3054
int16_t prev_samples[PREV_SAMPLES_BUF_SIZE]
memory of past decoded samples
Definition: g722.h:37
AVCodec ff_adpcm_g722_encoder
Definition: g722enc.c:384
const char * name
Name of the codec implementation.
Definition: avcodec.h:3127
#define FREEZE_INTERVAL
Definition: g722enc.c:35
av_cold void ff_g722dsp_init(G722DSPContext *c)
Definition: g722dsp.c:68
struct G722Context::G722Band band[2]
#define AV_CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
Definition: avcodec.h:868
static void g722_encode_trellis(G722Context *c, int trellis, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:176
#define MAX_FRAME_SIZE
Definition: g722enc.c:39
static av_cold int g722_encode_close(AVCodecContext *avctx)
Definition: g722enc.c:46
static int encode_low(const struct G722Band *state, int xlow)
Definition: g722enc.c:162
int ff_alloc_packet(AVPacket *avpkt, int size)
Check AVPacket size and/or allocate data.
Definition: utils.c:1211
void ff_g722_update_low_predictor(struct G722Band *band, const int ilow)
Definition: g722.c:143
static const float pred[4]
Definition: siprdata.h:259
G722DSPContext dsp
Definition: g722.h:66
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:2172
static const int16_t low_quant[33]
Definition: g722enc.c:130
Libavcodec external API header.
AVSampleFormat
Audio Sample Formats.
Definition: samplefmt.h:60
main external API structure.
Definition: avcodec.h:1409
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:146
static struct @174 state
common internal api header.
common internal and external API header
signed 16 bits
Definition: samplefmt.h:63
int prev_samples_pos
the number of values in prev_samples
Definition: g722.h:38
static av_cold int init(AVCodecParserContext *s)
Definition: h264_parser.c:582
int trellis
trellis RD quantization
Definition: avcodec.h:2486
void * priv_data
Definition: avcodec.h:1451
#define STORE_NODE(index, UPDATE, VALUE)
const int16_t ff_g722_high_inv_quant[4]
Definition: g722.c:51
int channels
number of audio channels
Definition: avcodec.h:2153
static av_always_inline void encode_byte(G722Context *c, uint8_t *dst, const int16_t *samples)
Definition: g722enc.c:330
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:700
static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: g722enc.c:352
#define av_always_inline
Definition: attributes.h:40
static av_always_inline int64_t ff_samples_to_time_base(AVCodecContext *avctx, int64_t samples)
Rescale from sample rate to AVCodecContext.time_base.
Definition: internal.h:202
#define MAX_TRELLIS
Definition: g722enc.c:44
#define FFSWAP(type, a, b)
Definition: common.h:69
void ff_g722_update_high_predictor(struct G722Band *band, const int dhigh, const int ihigh)
Definition: g722.c:154
This structure stores compressed data.
Definition: avcodec.h:1323
int16_t scale_factor
delayed quantizer scale factor
Definition: g722.h:52
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:184
void * av_mallocz(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:211
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: avcodec.h:1339
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:235