/*
*
* This file is part of libmpeg3
*
* LibMPEG3
* Author: Adam Williams <[email protected]>
* Page: heroine.linuxbox.com
* Page: http://www.smalltalkconsulting.com/html/mpeg3source.html (for Squeak)
*
LibMPEG3 was originally licenced under GPL. It was relicensed by
the author under the LGPL and the Squeak license on Nov 1st, 2000
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Also licensed under the Squeak license.
http://www.squeak.org/license.html
*/
/* Changed Sept 15th by John M McIntosh to support Macintosh & Squeak
*/
#include "mpeg3private.h"
#include "mpeg3video.h"
#include "mpeg3videoprotos.h"
#include "slice.h"
#include <stdlib.h>
#ifdef HAVE_MMX
static unsigned long long MMX_128 = 0x80008000800080LL;
#endif
int mpeg3_new_slice_buffer(mpeg3_slice_buffer_t *slice_buffer)
{
#ifdef USE_PTHREADS
pthread_mutexattr_t mutex_attr;
#endif
slice_buffer->data = (unsigned char *) memoryAllocate(1,1024);
slice_buffer->buffer_size = 0;
slice_buffer->buffer_allocation = 1024;
slice_buffer->current_position = 0;
slice_buffer->bits_size = 0;
slice_buffer->bits = 0;
slice_buffer->done = 0;
#ifdef USE_PTHREADS
pthread_mutexattr_init(&mutex_attr);
pthread_mutex_init(&(slice_buffer->completion_lock), &mutex_attr);
#endif
return 0;
}
int mpeg3_delete_slice_buffer(mpeg3_slice_buffer_t *slice_buffer)
{
memoryFree(slice_buffer->data);
#ifdef USE_PTHREADS
pthread_mutex_destroy(&(slice_buffer->completion_lock));
#endif
return 0;
}
int mpeg3_expand_slice_buffer(mpeg3_slice_buffer_t *slice_buffer)
{
int i;
unsigned char *new_buffer = (unsigned char *) memoryAllocate(1,slice_buffer->buffer_allocation * 2);
for(i = 0; i < slice_buffer->buffer_size; i++)
new_buffer[i] = slice_buffer->data[i];
memoryFree(slice_buffer->data);
slice_buffer->data = new_buffer;
slice_buffer->buffer_allocation *= 2;
return 0;
}
/* limit coefficients to -2048..2047 */
/* move/add 8x8-Block from block[comp] to refframe */
static inline int mpeg3video_addblock(mpeg3_slice_t *slice,
mpeg3video_t *video,
int comp,
int bx,
int by,
int dct_type,
int addflag)
{
int cc, i, iincr;
unsigned char *rfp;
short *bp;
int spar = slice->sparse[comp];
/* color component index */
cc = (comp < 4) ? 0 : (comp & 1) + 1;
if(cc == 0)
{
/* luminance */
if(video->pict_struct == FRAME_PICTURE)
{
if(dct_type)
{
/* field DCT coding */
rfp = video->newframe[0] +
video->coded_picture_width * (by + ((comp & 2) >> 1)) + bx + ((comp & 1) << 3);
iincr = (video->coded_picture_width << 1);
}
else
{
/* frame DCT coding */
rfp = video->newframe[0] +
video->coded_picture_width * (by + ((comp & 2) << 2)) + bx + ((comp & 1) << 3);
iincr = video->coded_picture_width;
}
}
else
{
/* field picture */
rfp = video->newframe[0] +
(video->coded_picture_width << 1) * (by + ((comp & 2) << 2)) + bx + ((comp & 1) << 3);
iincr = (video->coded_picture_width << 1);
}
}
else
{
/* chrominance */
/* scale coordinates */
if(video->chroma_format != CHROMA444) bx >>= 1;
if(video->chroma_format == CHROMA420) by >>= 1;
if(video->pict_struct == FRAME_PICTURE)
{
if(dct_type && (video->chroma_format != CHROMA420))
{
/* field DCT coding */
rfp = video->newframe[cc]
+ video->chrom_width * (by + ((comp & 2) >> 1)) + bx + (comp & 8);
iincr = (video->chrom_width << 1);
}
else
{
/* frame DCT coding */
rfp = video->newframe[cc]
+ video->chrom_width * (by + ((comp & 2) << 2)) + bx + (comp & 8);
iincr = video->chrom_width;
}
}
else
{
/* field picture */
rfp = video->newframe[cc]
+ (video->chrom_width << 1) * (by + ((comp & 2) << 2)) + bx + (comp & 8);
iincr = (video->chrom_width << 1);
}
}
bp = slice->block[comp];
if(addflag)
{
#ifdef HAVE_MMX
if(video->have_mmx)
{
if(spar)
{
__asm__ __volatile__(
"movq (%2), %%mm6\n" /* 4 blockvals */
"pxor %%mm4, %%mm4\n"
"punpcklwd %%mm6, %%mm6\n"
"punpcklwd %%mm6, %%mm6\n"
".align 8\n"
"1:"
"movq (%1), %%mm0\n" /* 8 rindex1 */
"movq %%mm0, %%mm2\n"
"punpcklbw %%mm4, %%mm0\n"
"punpckhbw %%mm4, %%mm2\n"
"paddw %%mm6, %%mm0\n"
"paddw %%mm6, %%mm2\n"
"packuswb %%mm2, %%mm0\n"
"movq %%mm0, (%1)\n"
"leal (%1, %3), %1\n"
"loop 1b\n"
: /* scr dest */
: "c" (8),"r" (rfp), "r" (bp), "r" (iincr)
);
}
else
{
__asm__ __volatile__(
"pxor %%mm4, %%mm4\n"
".align 8\n"
"1:"
"movq (%2), %%mm0\n" /* 8 rfp 0 1 2 3 4 5 6 7*/
"movq (%1), %%mm6\n" /* 4 blockvals 0 1 2 3 */
"movq %%mm0, %%mm2\n"
"movq 8(%1), %%mm5\n" /* 4 blockvals 0 1 2 3 */
"punpcklbw %%mm4, %%mm0\n" /* 0 2 4 6 */
"punpckhbw %%mm4, %%mm2\n" /* 1 3 5 7 */
"paddw %%mm6, %%mm0\n"
"paddw %%mm5, %%mm2\n"
"packuswb %%mm2, %%mm0\n"
"addl $16, %1\n"
"movq %%mm0, (%2)\n"
"leal (%2,%3), %2\n"
"loop 1b\n"
: /* scr dest */
: "c" (8),"r" (bp), "r" (rfp), "r" (iincr)
);
}
}
else
#endif
for(i = 0; i < 8; i++)
{
rfp[0] = CLIP(bp[0] + rfp[0]);
rfp[1] = CLIP(bp[1] + rfp[1]);
rfp[2] = CLIP(bp[2] + rfp[2]);
rfp[3] = CLIP(bp[3] + rfp[3]);
rfp[4] = CLIP(bp[4] + rfp[4]);
rfp[5] = CLIP(bp[5] + rfp[5]);
rfp[6] = CLIP(bp[6] + rfp[6]);
rfp[7] = CLIP(bp[7] + rfp[7]);
rfp += iincr;
bp += 8;
}
}
else
{
#ifdef HAVE_MMX
if(video->have_mmx)
{
if(spar)
{
__asm__ __volatile__(
"movd (%2), %%mm0\n" /* " 0 0 0 v1" */
"punpcklwd %%mm0, %%mm0\n" /* " 0 0 v1 v1" */
"punpcklwd %%mm0, %%mm0\n"
"paddw _MMX_128, %%mm0\n"
"packuswb %%mm0, %%mm0\n"
"leal (%0,%1,2), %%eax\n"
"movq %%mm0, (%0, %1)\n"
"movq %%mm0, (%%eax)\n"
"leal (%%eax,%1,2), %0\n"
"movq %%mm0, (%%eax, %1)\n"
"movq %%mm0, (%0)\n"
"leal (%0,%1,2), %%eax\n"
"movq %%mm0, (%0, %1)\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm0, (%%eax, %1)\n"
:
: "D" (rfp), "c" (iincr), "b" (bp)
: "eax");
}
else
{
__asm__ __volatile__(
"movq _MMX_128,%%mm4\n"
".align 8\n"
"1:"
"movq (%1), %%mm0\n"
"movq 8(%1), %%mm1\n"
"paddw %%mm4, %%mm0\n"
"movq 16(%1), %%mm2\n"
"paddw %%mm4, %%mm1\n"
"movq 24(%1), %%mm3\n"
"paddw %%mm4, %%mm2\n"
"packuswb %%mm1, %%mm0\n"
"paddw %%mm4, %%mm3\n"
"addl $32, %1\n"
"packuswb %%mm3, %%mm2\n"
"movq %%mm0, (%2)\n"
"movq %%mm2, (%2,%3)\n"
"leal (%2,%3,2), %2\n"
"loop 1b\n"
:
: "c" (4), "r" (bp), "r" (rfp), "r" (iincr)
);
}
}
else
#endif
for(i = 0; i < 8; i++)
{
rfp[0] = CLIP(bp[0] + 128);
rfp[1] = CLIP(bp[1] + 128);
rfp[2] = CLIP(bp[2] + 128);
rfp[3] = CLIP(bp[3] + 128);
rfp[4] = CLIP(bp[4] + 128);
rfp[5] = CLIP(bp[5] + 128);
rfp[6] = CLIP(bp[6] + 128);
rfp[7] = CLIP(bp[7] + 128);
rfp+= iincr;
bp += 8;
}
}
return 0;
}
int mpeg3_decode_slice(mpeg3_slice_t *slice)
{
mpeg3video_t *video = (mpeg3video_t *) slice->video;
int comp;
int mb_type, cbp, motion_type = 0, dct_type;
int macroblock_address, mba_inc, mba_max;
int slice_vert_pos_ext;
unsigned int code;
int bx, by;
int dc_dct_pred[3];
int mv_count, mv_format, mvscale;
int pmv[2][2][2], mv_field_sel[2][2];
int dmv, dmvector[2];
int qs;
int stwtype, stwclass;
int snr_cbp;
int i;
mpeg3_slice_buffer_t *slice_buffer = slice->slice_buffer;
/* number of macroblocks per picture */
mba_max = video->mb_width * video->mb_height;
/* field picture has half as many macroblocks as frame */
if(video->pict_struct != FRAME_PICTURE)
mba_max >>= 1;
/* macroblock address */
macroblock_address = 0;
/* first macroblock in slice is not skipped */
mba_inc = 0;
slice->fault = 0;
code = mpeg3slice_getbits(slice_buffer, 32);
/* decode slice header (may change quant_scale) */
slice_vert_pos_ext = mpeg3video_getslicehdr(slice, video);
/* reset all DC coefficient and motion vector predictors */
dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
pmv[0][1][0] = pmv[0][1][1] = pmv[1][1][0] = pmv[1][1][1] = 0;
for(i = 0;
slice_buffer->current_position < slice_buffer->buffer_size;
i++)
{
if(mba_inc == 0)
{
/* Done */
if(!mpeg3slice_showbits(slice_buffer, 23)) return 0;
/* decode macroblock address increment */
mba_inc = mpeg3video_get_macroblock_address(slice);
if(slice->fault) return 1;
if(i == 0)
{
/* Get the macroblock_address */
macroblock_address = ((slice_vert_pos_ext << 7) + (code & 255) - 1) * video->mb_width + mba_inc - 1;
/* first macroblock in slice: not skipped */
mba_inc = 1;
}
}
if(slice->fault) return 1;
if(macroblock_address >= mba_max)
{
/* mba_inc points beyond picture dimensions */
/*fprintf(stderr, "mpeg3_decode_slice: too many macroblocks in picture\n"); */
return 1;
}
/* not skipped */
if(mba_inc == 1)
{
mpeg3video_macroblock_modes(slice,
video,
&mb_type,
&stwtype,
&stwclass,
&motion_type,
&mv_count,
&mv_format,
&dmv,
&mvscale,
&dct_type);
if(slice->fault) return 1;
if(mb_type & MB_QUANT)
{
qs = mpeg3slice_getbits(slice_buffer, 5);
if(video->mpeg2)
slice->quant_scale = video->qscale_type ? mpeg3_non_linear_mquant_table[qs] : (qs << 1);
else
slice->quant_scale = qs;
if(video->scalable_mode == SC_DP)
/* make sure quant_scale is valid */
slice->quant_scale = slice->quant_scale;
}
/* motion vectors */
/* decode forward motion vectors */
if((mb_type & MB_FORWARD) || ((mb_type & MB_INTRA) && video->conceal_mv))
{
if(video->mpeg2)
mpeg3video_motion_vectors(slice,
video,
pmv,
dmvector,
mv_field_sel,
0,
mv_count,
mv_format,
video->h_forw_r_size,
video->v_forw_r_size,
dmv,
mvscale);
else
mpeg3video_motion_vector(slice,
video,
pmv[0][0],
dmvector,
video->forw_r_size,
video->forw_r_size,
0,
0,
video->full_forw);
}
if(slice->fault) return 1;
/* decode backward motion vectors */
if(mb_type & MB_BACKWARD)
{
if(video->mpeg2)
mpeg3video_motion_vectors(slice,
video,
pmv,
dmvector,
mv_field_sel,
1,
mv_count,
mv_format,
video->h_back_r_size,
video->v_back_r_size,
0,
mvscale);
else
mpeg3video_motion_vector(slice,
video,
pmv[0][1],
dmvector,
video->back_r_size,
video->back_r_size,
0,
0,
video->full_back);
}
if(slice->fault) return 1;
/* remove marker_bit */
if((mb_type & MB_INTRA) && video->conceal_mv)
mpeg3slice_flushbit(slice_buffer);
/* macroblock_pattern */
if(mb_type & MB_PATTERN)
{
cbp = mpeg3video_get_cbp(slice);
if(video->chroma_format == CHROMA422)
{
/* coded_block_pattern_1 */
cbp = (cbp << 2) | mpeg3slice_getbits2(slice_buffer);
}
else
if(video->chroma_format == CHROMA444)
{
/* coded_block_pattern_2 */
cbp = (cbp << 6) | mpeg3slice_getbits(slice_buffer, 6);
}
}
else
cbp = (mb_type & MB_INTRA) ? ((1 << video->blk_cnt) - 1) : 0;
if(slice->fault) return 1;
/* decode blocks */
mpeg3video_clearblock(slice, 0, video->blk_cnt);
for(comp = 0; comp < video->blk_cnt; comp++)
{
if(cbp & (1 << (video->blk_cnt - comp - 1)))
{
if(mb_type & MB_INTRA)
{
if(video->mpeg2)
mpeg3video_getmpg2intrablock(slice, video, comp, dc_dct_pred);
else
mpeg3video_getintrablock(slice, video, comp, dc_dct_pred);
}
else
{
if(video->mpeg2)
mpeg3video_getmpg2interblock(slice, video, comp);
else
mpeg3video_getinterblock(slice, video, comp);
}
if(slice->fault) return 1;
}
}
/* reset intra_dc predictors */
if(!(mb_type & MB_INTRA))
dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
/* reset motion vector predictors */
if((mb_type & MB_INTRA) && !video->conceal_mv)
{
/* intra mb without concealment motion vectors */
pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
pmv[0][1][0] = pmv[0][1][1] = pmv[1][1][0] = pmv[1][1][1] = 0;
}
if((video->pict_type == P_TYPE) && !(mb_type & (MB_FORWARD | MB_INTRA)))
{
/* non-intra mb without forward mv in a P picture */
pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
/* derive motion_type */
if(video->pict_struct == FRAME_PICTURE)
motion_type = MC_FRAME;
else
{
motion_type = MC_FIELD;
/* predict from field of same parity */
mv_field_sel[0][0] = (video->pict_struct == BOTTOM_FIELD);
}
}
if(stwclass == 4)
{
/* purely spatially predicted macroblock */
pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
pmv[0][1][0] = pmv[0][1][1] = pmv[1][1][0] = pmv[1][1][1] = 0;
}
}
else
{
/* mba_inc!=1: skipped macroblock */
mpeg3video_clearblock(slice, 0, video->blk_cnt);
/* reset intra_dc predictors */
dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
/* reset motion vector predictors */
if(video->pict_type == P_TYPE)
pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
/* derive motion_type */
if(video->pict_struct == FRAME_PICTURE)
motion_type = MC_FRAME;
else
{
motion_type = MC_FIELD;
/* predict from field of same parity */
mv_field_sel[0][0] = mv_field_sel[0][1] = (video->pict_struct == BOTTOM_FIELD);
}
/* skipped I are spatial-only predicted, */
/* skipped P and B are temporal-only predicted */
stwtype = (video->pict_type == I_TYPE) ? 8 : 0;
/* clear MB_INTRA */
mb_type &= ~MB_INTRA;
/* no block data */
cbp = 0;
}
snr_cbp = 0;
/* pixel coordinates of top left corner of current macroblock */
bx = 16 * (macroblock_address % video->mb_width);
by = 16 * (macroblock_address / video->mb_width);
/* motion compensation */
if(!(mb_type & MB_INTRA))
mpeg3video_reconstruct(video,
bx,
by,
mb_type,
motion_type,
pmv,
mv_field_sel,
dmvector,
stwtype);
/* copy or add block data into picture */
for(comp = 0; comp < video->blk_cnt; comp++)
{
if((cbp | snr_cbp) & (1 << (video->blk_cnt - 1 - comp)))
{
#ifdef HAVE_MMX
if(video->have_mmx)
IDCT_mmx(slice->block[comp]);
else
#endif
mpeg3video_idct_conversion(slice->block[comp]);
mpeg3video_addblock(slice,
video,
comp,
bx,
by,
dct_type,
(mb_type & MB_INTRA) == 0);
}
}
/* advance to next macroblock */
macroblock_address++;
mba_inc--;
}
return 0;
}
void mpeg3_slice_loop(mpeg3_slice_t *slice)
{
mpeg3video_t *video = (mpeg3video_t *) slice->video;
int result = 1;
while(!slice->done)
{
#ifdef USE_PTHREADS
pthread_mutex_lock(&(slice->input_lock));
#endif
if(!slice->done)
{
/* Get a buffer to decode */
result = 1;
#ifdef USE_PTHREADS
pthread_mutex_lock(&(video->slice_lock));
#endif
if(slice->buffer_step > 0)
{
while(slice->current_buffer <= slice->last_buffer)
{
if(!video->slice_buffers[slice->current_buffer].done &&
slice->current_buffer <= slice->last_buffer)
{
result = 0;
break;
}
slice->current_buffer += slice->buffer_step;
}
}
else
{
while(slice->current_buffer >= slice->last_buffer)
{
if(!video->slice_buffers[slice->current_buffer].done &&
slice->current_buffer >= slice->last_buffer)
{
result = 0;
break;
}
slice->current_buffer += slice->buffer_step;
}
}
/* Got one */
if(!result && slice->current_buffer >= 0 && slice->current_buffer < video->total_slice_buffers)
{
slice->slice_buffer = &(video->slice_buffers[slice->current_buffer]);
slice->slice_buffer->done = 1;
#ifdef USE_PTHREADS
pthread_mutex_unlock(&(video->slice_lock));
pthread_mutex_unlock(&(slice->input_lock));
#endif
mpeg3_decode_slice(slice);
#ifdef USE_PTHREADS
pthread_mutex_unlock(&(slice->slice_buffer->completion_lock));
#endif
}
else {
#ifdef USE_PTHREADS
pthread_mutex_unlock(&(video->slice_lock));
#endif
}
}
#ifdef USE_PTHREADS
pthread_mutex_unlock(&(slice->output_lock));
#else
if ((slice->current_buffer > slice->last_buffer) || (slice->last_buffer == 0))
break;
#endif
}
}
int mpeg3_new_slice_decoder(void *video, mpeg3_slice_t *slice)
{
#ifdef USE_PTHREADS
pthread_attr_t attr;
struct sched_param param;
pthread_mutexattr_t mutex_attr;
#endif
slice->video = video;
slice->done = 0;
#ifdef USE_PTHREADS
pthread_mutexattr_init(&mutex_attr);
pthread_mutex_init(&(slice->input_lock), &mutex_attr);
pthread_mutex_lock(&(slice->input_lock));
pthread_mutex_init(&(slice->output_lock), &mutex_attr);
pthread_mutex_lock(&(slice->output_lock));
pthread_attr_init(&attr);
pthread_create(&(slice->tid), &attr, (void*)mpeg3_slice_loop, slice);
#endif
return 0;
}
int mpeg3_delete_slice_decoder(mpeg3_slice_t *slice)
{
slice->done = 1;
#ifdef USE_PTHREADS
pthread_mutex_unlock(&(slice->input_lock));
pthread_join(slice->tid, 0);
pthread_mutex_destroy(&(slice->input_lock));
pthread_mutex_destroy(&(slice->output_lock));
#endif
return 0;
}
|