/* Copyright (C) 2000-2003 Constantin Kaplinsky.  All Rights Reserved.
 * Copyright (C) 2004-2005 Cendio AB. All rights reserved.
 * Copyright (C) 2011 D. R. Commander.  All Rights Reserved.
 * Copyright 2014 Pierre Ossman for Cendio AB
 * Copyright (C) 2020-2024 m-privacy GmbH.  All Rights Reserved.
 * 
 * This is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this software; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
 * USA.
 */

#include <rfb/MPDecompressor.h>
#include <rdr/Exception.h>
#include <rfb/Rect.h>
#include <rfb/PixelFormat.h>
#include <rfb/LogWriter.h>

#include <stdio.h>
#include <wels/codec_api.h>

#include <stdio.h>
extern "C" {
#include <jpeglib.h>
}
#include <jerror.h>
#include <setjmp.h>
#include <zstd.h>

#if !defined(__APPLE__)
#include <jxl/decode.h>
#ifndef WIN32
#include <jxl/resizable_parallel_runner.h>
#endif
#endif

using namespace rfb;

static rfb::LogWriter vlog("MPDecompressor");

unsigned MPDecompressor::compressionCount[MP_COMPRESSION_MAX+1];

struct MP_JPEG_ERROR_MGR {
  struct jpeg_error_mgr pub;
  jmp_buf jmpBuffer;
  char lastError[JMSG_LENGTH_MAX];
};
static void
JpegErrorExit(j_common_ptr jpegDinfo)
{
  MP_JPEG_ERROR_MGR *jpegErr = (MP_JPEG_ERROR_MGR *)jpegDinfo->err;

  (*jpegDinfo->err->output_message)(jpegDinfo);
  longjmp(jpegErr->jmpBuffer, 1);
}
static void
JpegOutputMessage(j_common_ptr jpegDinfo)
{
  MP_JPEG_ERROR_MGR *jpegErr = (MP_JPEG_ERROR_MGR *)jpegDinfo->err;

  (*jpegDinfo->err->format_message)(jpegDinfo, jpegErr->lastError);
}
struct MP_JPEG_SRC_MGR {
  struct jpeg_source_mgr pub;
  MPDecompressor *instance;
};
static void
JpegNoOp(j_decompress_ptr jpegDinfo)
{
}
static boolean
JpegFillInputBuffer(j_decompress_ptr jpegDinfo)
{
  ERREXIT(jpegDinfo, JERR_BUFFER_SIZE);
  return TRUE;
}
static void
JpegSkipInputData(j_decompress_ptr jpegDinfo, long num_bytes)
{
  MP_JPEG_SRC_MGR *jpegSrc = (MP_JPEG_SRC_MGR *)jpegDinfo->src;

  if (num_bytes < 0 || (size_t)num_bytes > jpegSrc->pub.bytes_in_buffer) {
    ERREXIT(jpegDinfo, JERR_BUFFER_SIZE);
  } else {
    jpegSrc->pub.next_input_byte += (size_t) num_bytes;
    jpegSrc->pub.bytes_in_buffer -= (size_t) num_bytes;
  }
}

/* convert videoFormatI420: IYUV 4:2:0, 12 Bit/pixel, planar: 1(Y) + 1/4(U) + 1/4(V)
 * to RGBA
 * see https://www.fourcc.org/pixel-format/yuv-i420/
 * formula derived from https://docs.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering
 */
static inline rdr::U8 clip(int value)
{
	if (value < 0)
		return 0;
	if (value > 255)
		return 255;
	return value;
}
static inline void calcRGBAPixel(rdr::U8 *pixel, rdr::U8 Y, rdr::U8 U, rdr::U8 V)
{
	const int c = Y - 16;
	const int d = U - 128;
	const int e = V - 128;

	pixel[0] = clip((298 * c + 409 * e + 128) >> 8);
	pixel[1] = clip((298 * c - 100 * d - 208 * e + 128) >> 8);
	pixel[2] = clip((298 * c + 517 * d + 128) >> 8);
	pixel[3] = 0;
}
static void i420ToRgba(rdr::U8 *target, const int targetStride, const int width, const int height, const rdr::U8 *srcY, const rdr::U8 *srcU, const rdr::U8 *srcV, const int srcStrideY, const int srcStrideUV)
{
	const rdr::U8 * posY = srcY;
	const rdr::U8 * posU = srcU;
	const rdr::U8 * posV = srcV;
	rdr::U8 * targetLine = target;
	rdr::U8 * posT;
	int x, y;

	for(y = 0; y < height; y++ ) {
		posT = targetLine;
		for(x = 0; x < width; x++ ) {
			calcRGBAPixel(posT, posY[x], posU[x/2], posV[x/2]);
			posT += 4;
		}
		targetLine += targetStride;
		posY += srcStrideY;
		if (y & 1) {
			posU += srcStrideUV;
			posV += srcStrideUV;
		}
	}
}

MPDecompressor::MPDecompressor(size_t threadNum_)
{
	threadNum = threadNum_;
	vlog.debug("initializing for Decoder thread %lu", threadNum);

#pragma GCC ivdep
	for (int i = MP_COMPRESSION_MIN; i <= MP_COMPRESSION_MAX; i++) {
		compressionCountLocal[i] = 0;
		totalBytes[i] = 0;
		totalCBytes[i] = 0;
		minCompressed[i] = 100000;
		maxCompressed[i] = 0;
	}

#if !defined(__APPLE__)
	jpegxlDecoder = JxlDecoderCreate(nullptr);
#ifndef WIN32
	jpegxlRunner = JxlResizableParallelRunnerCreate(nullptr);
#endif
#endif
}

MPDecompressor::~MPDecompressor()
{
#if !defined(__APPLE__)
#ifndef WIN32
	JxlResizableParallelRunnerDestroy(jpegxlRunner);
#endif
	JxlDecoderDestroy(jpegxlDecoder);
#endif

	if (vlog.getLevel() >= 100) {
		unsigned compressionCountSum = 0;
		rdr::U64 totalBytesSum = 0;
		rdr::U64 totalCBytesSum = 0;
		int minCompressedTotal = 100000;
		int maxCompressedTotal = 0;
#pragma GCC ivdep
		for (int i = MP_COMPRESSION_MIN; i <= MP_COMPRESSION_MAX; i++) {
			if (compressionCountLocal[i] > 0 && totalBytes[i] > 0) {
				compressionCountSum += compressionCountLocal[i];
				vlog.debug("thread %lu: compression %u %s: %u calls decompressed %llu Bytes from %llu Bytes with ratio %llu%%, average rect size %llu uncompressed and %llu compressed, min/max compressed size %u/%u", threadNum, i, mpCompressionName(i), compressionCountLocal[i], totalBytes[i], totalCBytes[i], 100 * totalCBytes[i] / totalBytes[i], totalBytes[i]/compressionCountLocal[i], totalCBytes[i]/compressionCountLocal[i], minCompressed[i], maxCompressed[i]);
				totalBytesSum += totalBytes[i];
				totalCBytesSum += totalCBytes[i];
				if (minCompressed[i] < minCompressedTotal)
					minCompressedTotal = minCompressed[i];
				if (maxCompressed[i] > maxCompressedTotal)
					maxCompressedTotal = maxCompressed[i];
			}
		}
		if (compressionCountSum > 0 && totalBytesSum > 0 && totalCBytesSum > 0)
			vlog.debug("thread %lu: %u calls decompressed %llu Bytes from %llu Bytes with ratio %llu%%, average rect size %llu uncompressed and %llu compressed, min/max compressed size %u/%u", threadNum, compressionCountSum, totalBytesSum, totalCBytesSum, 100 * totalCBytesSum / totalBytesSum, totalBytesSum/compressionCountSum, totalCBytesSum/compressionCountSum, minCompressedTotal, maxCompressedTotal);
	}
}

void MPDecompressor::decompress(const rdr::U8 *mpBuf, int mpBufLen,
	rdr::U8 *buf, int stride, const Rect& r, const PixelFormat& pf)
{
	const rdr::U8 *srcBuf = mpBuf;
	int srcBufLen = mpBufLen;

	int w = r.width();
	int h = r.height();

	ISVCDecoder *welsSvcDecoderP = NULL;

	bool is888;
	rdr::U8 *transBuf = buf;
	const int targetBufPixelsize = 4;
	int mpPixelsize;
	int mpStride;
	rdr::U8 * data;
	rdr::U8 mpCompression;
	int mpWidth;
	int mpHeight;

	if (stride == 0)
		stride = w;
	is888 = pf.is888();

	/* read header */
	mpCompression = *srcBuf;
	srcBuf++;
	srcBufLen--;
	/* flags: highest 4 Bits for mpLevel */
	const int mpLevel = *srcBuf >> 4;
	srcBuf++;
	srcBufLen--;

	compressionCount[mpCompression]++;
	compressionCountLocal[mpCompression]++;
	totalBytes[mpCompression] += w * h * targetBufPixelsize;
	totalCBytes[mpCompression] += srcBufLen;
	if (srcBufLen < minCompressed[mpCompression])
		minCompressed[mpCompression] = srcBufLen;
	if (srcBufLen > maxCompressed[mpCompression])
		maxCompressed[mpCompression] = srcBufLen;

	/* solid? */
	if (mpCompression == MP_COMPRESSION_SOLID) {
		if (is888) {
			rdr::U8 * line;

			vlog.verbose2("SOLID direct: x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, raw size: %u, compressed: %u, colour RGBA: %#02X%02X%02X%02X", r.tl.x, r.tl.y, w, h, targetBufPixelsize, stride, h * w * targetBufPixelsize, srcBufLen, *srcBuf, *(srcBuf+1), *(srcBuf+2), *(srcBuf+3));
			line = buf;
#pragma GCC ivdep
			for (int y = 0; y < h ; y++) {
#pragma GCC ivdep
				for (int x = 0; x < w ; x++) {
					memcpy(line + x * targetBufPixelsize, srcBuf, targetBufPixelsize);
				}
				line += stride * targetBufPixelsize;
			}
		} else {
			static const PixelFormat pfRGBX(32, 24, false, true, 255, 255, 255, 0, 8, 16);

			vlog.verbose2("SOLID buffered: x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, raw size: %u, compressed: %u, colour RGBA: %#02X%02X%02X%02X", r.tl.x, r.tl.y, w, h, targetBufPixelsize, stride, h * w * targetBufPixelsize, srcBufLen, *srcBuf, *(srcBuf+1), *(srcBuf+2), *(srcBuf+3));
			data = (rdr::U8 *) malloc(w * h * targetBufPixelsize);
			if (!data) {
				vlog.error("SOLID: failed to alloc memory for RGBA data");
				return;
			}
#pragma GCC ivdep
			for (int i = 0; i < w * h ; i++) {
				memcpy(data + i * targetBufPixelsize, srcBuf, targetBufPixelsize);
			}
			pf.bufferFromBuffer(buf, pfRGBX, data, w, h, stride, w);
			free(data);
		}
		return;
	}

	/* process body */
	if (mpCompression == MP_COMPRESSION_RAW) {
		data = (rdr::U8 *) srcBuf;
		mpWidth = w;
		mpHeight = h;
		mpPixelsize = 4;
		mpStride = mpWidth * mpPixelsize;
	} else if (mpCompression == MP_COMPRESSION_JPEG) {
		struct jpeg_decompress_struct *jpegDinfo;
		struct MP_JPEG_ERROR_MGR *jpegErr;
		struct MP_JPEG_SRC_MGR *jpegSrc;
		JSAMPROW *jpegRowPointer = NULL;
#ifdef JCS_EXTENSIONS
		bool direct = false;
#endif

		mpWidth = w;
		mpHeight = h;
#ifdef JCS_EXTENSIONS
		if (is888)
			direct = true;
		mpPixelsize = 4;
#else
		mpPixelsize = 3;
#endif
		mpStride = mpWidth * mpPixelsize;
//		vlog.verbose2("JPEG: x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, mpWidth: %u, mpHeight: %u, mpStride: %u, raw size: %u, compressed: %u", r.tl.x, r.tl.y, w, h, mpPixelsize, stride, mpWidth, mpHeight, mpStride, h * w * mpPixelsize, srcBufLen);

		jpegDinfo = new jpeg_decompress_struct;
		jpegErr = new struct MP_JPEG_ERROR_MGR;
		jpegDinfo->err = jpeg_std_error(&jpegErr->pub);
		snprintf(jpegErr->lastError, JMSG_LENGTH_MAX, "No error");
		jpegErr->pub.error_exit = JpegErrorExit;
		jpegErr->pub.output_message = JpegOutputMessage;
		if(setjmp(jpegErr->jmpBuffer)) {
			// this will execute if libjpeg has an error
			throw rdr::Exception("%s", jpegErr->lastError);
		}
		jpeg_create_decompress(jpegDinfo);
		jpegSrc = new struct MP_JPEG_SRC_MGR;
		jpegSrc->pub.init_source = JpegNoOp;
		jpegSrc->pub.fill_input_buffer = JpegFillInputBuffer;
		jpegSrc->pub.skip_input_data = JpegSkipInputData;
		jpegSrc->pub.resync_to_restart = jpeg_resync_to_restart;
		jpegSrc->pub.term_source = JpegNoOp;
		jpegSrc->instance = this;
		jpegDinfo->src = (struct jpeg_source_mgr *)jpegSrc;
		if(setjmp(jpegErr->jmpBuffer)) {
			// this will execute if libjpeg has an error
			jpeg_abort_decompress(jpegDinfo);
			if (jpegRowPointer) delete[] jpegRowPointer;
			throw rdr::Exception("%s", jpegErr->lastError);
		}
		jpegSrc->pub.next_input_byte = (const JOCTET *) srcBuf;
		jpegSrc->pub.bytes_in_buffer = srcBufLen;
		jpeg_read_header(jpegDinfo, TRUE);
#ifdef JCS_EXTENSIONS
		jpegDinfo->out_color_space = JCS_EXT_RGBX;
#else
		jpegDinfo->out_color_space = JCS_RGB;
#endif
		jpegRowPointer = new JSAMPROW[mpHeight];
#ifdef JCS_EXTENSIONS
		if (direct) {
			vlog.verbose2("direct: compression: %u (%s), mpLevel: %u, x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, mpStride: %u, raw size: %u, compressed: %u", mpCompression, mpCompressionName(mpCompression), mpLevel, r.tl.x, r.tl.y, w, h, mpPixelsize, stride, mpStride, h * w * mpPixelsize, srcBufLen);
			mpStride = stride * targetBufPixelsize;
			data = buf;
		} else {
#endif
			data = (rdr::U8 *) malloc(mpHeight * mpStride);
			if (!data) {
				vlog.error("JPEG: failed to alloc memory for RGBA data");
				delete[] jpegRowPointer;
				return;
			}
#ifdef JCS_EXTENSIONS
		}
#endif
#pragma GCC ivdep
		for (int y = 0; y < mpHeight; y++)
			jpegRowPointer[y] = (JSAMPROW) data + y * mpStride;
		jpeg_start_decompress(jpegDinfo);
		if (   jpegDinfo->output_width != (unsigned) mpWidth
		    || jpegDinfo->output_height != (unsigned) mpHeight
		    || jpegDinfo->output_components != mpPixelsize) {
			jpeg_abort_decompress(jpegDinfo);
			if (jpegRowPointer) delete[] jpegRowPointer;
			vlog.debug("rect size mismatch: expected %ux%ux%u, got MP image with %ux%ux%u", mpWidth, mpHeight, mpPixelsize, jpegDinfo->output_width, jpegDinfo->output_height, jpegDinfo->output_components);
			return;
		}
		while (jpegDinfo->output_scanline < jpegDinfo->output_height) {
			jpeg_read_scanlines(jpegDinfo, &jpegRowPointer[jpegDinfo->output_scanline],
					jpegDinfo->output_height - jpegDinfo->output_scanline);
		}
		jpeg_finish_decompress(jpegDinfo);
		delete[] jpegRowPointer;
		if(setjmp(jpegErr->jmpBuffer)) {
			// this will execute if libjpeg has an error
			return;
		}
		jpeg_destroy_decompress(jpegDinfo);
		delete jpegErr;
		delete jpegSrc;
		delete jpegDinfo;
#ifdef JCS_EXTENSIONS
		if (direct)
			return;
#endif
#if !defined(__APPLE__)
	} else if (mpCompression == MP_COMPRESSION_JPEGXL) {
		JxlDecoderStatus jpegxlStatus;
		bool direct = false;
		mpPixelsize = 4;
		mpHeight = h;
		mpWidth = w;
		mpStride = mpWidth * mpPixelsize;

		JxlDecoderReset(jpegxlDecoder);
		jpegxlStatus = JxlDecoderSubscribeEvents(jpegxlDecoder, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);
		if (JXL_DEC_SUCCESS != jpegxlStatus) {
			vlog.error("JPEGXL: JxlDecoderSubscribeEvents() failed with status %i", jpegxlStatus);
			return;
		}
#ifndef WIN32
		jpegxlStatus = JxlDecoderSetParallelRunner(jpegxlDecoder, JxlResizableParallelRunner, jpegxlRunner);
		if (JXL_DEC_SUCCESS != jpegxlStatus) {
			vlog.error("JPEGXL: JxlDecoderSetParallelRunner() failed with status %i", jpegxlStatus);
			return;
		}
#endif
		JxlDecoderSetInput(jpegxlDecoder, srcBuf, srcBufLen);
		JxlDecoderCloseInput(jpegxlDecoder);
		if (is888) {
			direct = true;
			data = buf;
			vlog.verbose2("JPEGXL: direct: thread: %lu, x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, mpWidth: %u, mpHeight: %u, mpStride: %u, raw size: %u, compressed: %u", threadNum, r.tl.x, r.tl.y, w, h, mpPixelsize, stride, mpWidth, mpHeight, mpStride, h * w * mpPixelsize, srcBufLen);
		} else {
			data = NULL;
			vlog.verbose2("JPEGXL: thread: %lu, x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, mpWidth: %u, mpHeight: %u, mpStride: %u, raw size: %u, compressed: %u", threadNum, r.tl.x, r.tl.y, w, h, mpPixelsize, stride, mpWidth, mpHeight, mpStride, h * w * mpPixelsize, srcBufLen);
		}
		bool doLoop = true;
		while (doLoop) {
			jpegxlStatus = JxlDecoderProcessInput(jpegxlDecoder);
			switch (jpegxlStatus) {
				case JXL_DEC_BASIC_INFO:
					JxlBasicInfo info;
					jpegxlStatus = JxlDecoderGetBasicInfo(jpegxlDecoder, &info);
					if (JXL_DEC_SUCCESS != jpegxlStatus) {
						vlog.error("JPEGXL: JxlDecoderGetBasicInfo() failed");
						if (data && data != buf)
							free(data);
						return;
					}
					mpWidth = info.xsize;
					mpHeight = info.ysize;
					if (   (mpWidth != w)
					    || (mpHeight != h)
					   ) {
						vlog.error("JPEGXL: rect size mismatch: expected %ux%u, got MP image with %ux%u", w, h, mpWidth, mpHeight);
						if (data && data != buf)
							free(data);
						return;
					}
//					vlog.verbose2("JPEGXL: got info: xsize %u, ysize %u, bits_per_sample %u, exponent_bits_per_sample %u, alpha_bits %u, alpha_exponent_bits %u, num_color_channels %u, num_extra_channels %u, uses_original_profile %u",
//						info.xsize, info.ysize, info.bits_per_sample, info.exponent_bits_per_sample, info.alpha_bits, info.alpha_exponent_bits, info.num_color_channels, info.num_extra_channels, info.uses_original_profile);
#ifndef WIN32
					JxlResizableParallelRunnerSetThreads(jpegxlRunner, JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize));
#endif
					continue;
				case JXL_DEC_NEED_IMAGE_OUT_BUFFER:
					size_t buffer_size;
					JxlPixelFormat format;

					if (direct) {
						if (data != buf) {
							free(data);
							vlog.error("JPEGXL: data already allocated, repeated JXL_DEC_NEED_IMAGE_OUT_BUFFER!");
							return;
						}
						mpStride = stride * targetBufPixelsize;
						/* data align in bytes, 0 = not needed */
						format = {(uint32_t) targetBufPixelsize, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, (size_t) mpStride};
					} else {
						data = (rdr::U8 *) malloc(mpHeight * mpStride);
						if (!data) {
							vlog.error("JPEGXL: failed to alloc memory for RGBA data");
							return;
						}
						/* data align in bytes, 0 = not needed */
						format = {(uint32_t) targetBufPixelsize, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
					}
					jpegxlStatus = JxlDecoderImageOutBufferSize(jpegxlDecoder, &format, &buffer_size);
					if (JXL_DEC_SUCCESS != jpegxlStatus) {
						vlog.error("JPEGXL: JxlDecoderImageOutBufferSize() failed");
						if (data != buf)
							free(data);
						return;
					}
					if (buffer_size != (size_t) mpHeight * mpStride) {
						vlog.error("JPEGXL: image size mismatch: got size %lu, wanted size %u", buffer_size, mpHeight * mpStride);
						if (data != buf)
							free(data);
						return;
					}
					jpegxlStatus = JxlDecoderSetImageOutBuffer(jpegxlDecoder, &format, data, buffer_size);
					if (JXL_DEC_SUCCESS != jpegxlStatus) {
						vlog.error("JPEGXL: JxlDecoderSetImageOutBuffer() failed");
						if (data != buf)
							free(data);
						return;
					}
					continue;
				case JXL_DEC_FULL_IMAGE:
				case JXL_DEC_SUCCESS:
					doLoop = false;
					continue;
				case JXL_DEC_ERROR:
					vlog.error("JPEGXL: JxlDecoderProcessInput() failed with error");
					if (data && data != buf)
						free(data);
					return;
				case JXL_DEC_NEED_MORE_INPUT:
					vlog.error("JPEGXL: JxlDecoderProcessInput() wants more input, but got all input");
					if (data && data != buf)
						free(data);
					return;
				default:
					vlog.error("JPEGXL: JxlDecoderSetImageOutBuffer() failed");
					if (data && data != buf)
						free(data);
					return;
			}
		}
		if (direct)
			return;
#endif
	} else if (mpCompression == MP_COMPRESSION_H264) {
		SBufferInfo welsDstBufInfo;
		int rv;
		unsigned char *welsData[3] = {NULL, NULL, NULL};
		SDecodingParam welsParam;

		welsParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_AVC;
		welsParam.bParseOnly = false;

		memset(&welsDstBufInfo, 0, sizeof(SBufferInfo));
		rv = WelsCreateDecoder(&welsSvcDecoderP);
		if (rv != cmResultSuccess) {
			vlog.error("WelsCreateDecoder() failed with error: %i", rv);
			return;
		}
		rv = welsSvcDecoderP->Initialize(&welsParam);
		if (rv != cmResultSuccess) {
			vlog.error("welsSvcDecoderP->Initialize() failed with error: %i", rv);
			WelsDestroyDecoder(welsSvcDecoderP);
			return;
		}
		rv = welsSvcDecoderP->DecodeFrameNoDelay(srcBuf, srcBufLen, welsData, &welsDstBufInfo);
		if (rv != cmResultSuccess) {
			vlog.error("welsSvcDecoderP->DecodeFrameNoDelay() failed with error: %i", rv);
			welsSvcDecoderP->Uninitialize();
			WelsDestroyDecoder(welsSvcDecoderP);
			return;
		}
		if (welsDstBufInfo.iBufferStatus <= 0) {
			vlog.error("decoded H264 image without data");
			welsSvcDecoderP->Uninitialize();
			WelsDestroyDecoder(welsSvcDecoderP);
			return;
		}
		mpWidth = welsDstBufInfo.UsrData.sSystemBuffer.iWidth;
		mpHeight = welsDstBufInfo.UsrData.sSystemBuffer.iHeight;

		/* sanity checks */
		if (   (mpWidth != w && mpWidth != w + 1)
		    || (mpHeight != h && mpHeight != h + 1)
		   ) {
			vlog.debug("H264: rect size mismatch: expected %u-%u x %u-%u, got MP image with %ux%u", w, w + 1, h, h + 1, mpWidth, mpHeight);
			welsSvcDecoderP->Uninitialize();
			WelsDestroyDecoder(welsSvcDecoderP);
			return;
		}
		mpPixelsize = 4;
		mpStride = mpWidth * mpPixelsize;
		if (is888) {
			/* direct */
			vlog.verbose2("direct: compression: %u (%s), mpLevel: %u, x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, mpStride: %u, raw size: %u, compressed: %u", mpCompression, mpCompressionName(mpCompression), mpLevel, r.tl.x, r.tl.y, w, h, mpPixelsize, stride, mpStride, h * w * mpPixelsize, srcBufLen);
			i420ToRgba(buf, stride * targetBufPixelsize, w, h, welsData[0], welsData[1], welsData[2], welsDstBufInfo.UsrData.sSystemBuffer.iStride[0], welsDstBufInfo.UsrData.sSystemBuffer.iStride[1]);
			welsSvcDecoderP->Uninitialize();
			WelsDestroyDecoder(welsSvcDecoderP);
			return;
		}
		data = (rdr::U8 *) malloc(mpHeight * mpStride);
		if (!data) {
			vlog.error("H264: failed to alloc memory for RGBA data");
			welsSvcDecoderP->Uninitialize();
			WelsDestroyDecoder(welsSvcDecoderP);
			return;
		}
//		vlog.verbose2("H264/Wels: x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, mpWidth: %u, mpHeight: %u, mpStride: %u, iFormat: %u, iWidthYUV: %u, iHeightYUV: %u, strideY: %u, strideUV: %u, raw size: %u, compressed: %u", r.tl.x, r.tl.y, w, h, mpPixelsize, stride, mpWidth, mpHeight, mpStride, welsDstBufInfo.UsrData.sSystemBuffer.iFormat, welsDstBufInfo.UsrData.sSystemBuffer.iWidth, welsDstBufInfo.UsrData.sSystemBuffer.iHeight, welsDstBufInfo.UsrData.sSystemBuffer.iStride[0], welsDstBufInfo.UsrData.sSystemBuffer.iStride[1], h * w * mpPixelsize, srcBufLen);
		i420ToRgba(data, mpStride, mpWidth, mpHeight, welsData[0], welsData[1], welsData[2], welsDstBufInfo.UsrData.sSystemBuffer.iStride[0], welsDstBufInfo.UsrData.sSystemBuffer.iStride[1]);
	} else if (mpCompression == MP_COMPRESSION_RAWI420) {
		int expectedDataSize;
		const rdr::U8 * srcBufU;
		const rdr::U8 * srcBufV;

		mpWidth = w;
		mpHeight = h;
		mpPixelsize = 4;
		mpStride = mpWidth * mpPixelsize;
		/* U and V each need 1/4 of pixels plus extra column for
		 * uneven mpWidth and extra line for uneven mpHeight
		 */
		expectedDataSize = mpWidth * mpHeight * 3 / 2 + 2 * ((mpWidth & 1) * mpHeight + (mpHeight & 1) * mpWidth);
//		vlog.verbose2("RAWI420: x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, mpWidth: %u, mpHeight: %u, mpStride: %u, raw size: %u, compressed: %u", r.tl.x, r.tl.y, w, h, mpPixelsize, stride, mpWidth, mpHeight, mpStride, h * w * mpPixelsize, srcBufLen);
		if (srcBufLen != expectedDataSize) {
			vlog.error("RAWI420: buffer size mismatch: expected %u, got %u", expectedDataSize, srcBufLen);
			return;
		}
		srcBufU = srcBuf + mpWidth * mpHeight;
		srcBufV = srcBufU + mpWidth * mpHeight / 4 + (mpWidth & 1) * mpHeight + (mpHeight & 1) * mpWidth;
		if (is888) {
			/* direct */
			vlog.verbose2("direct: compression: %u (%s), mpLevel: %u, x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, mpStride: %u, raw size: %u, compressed: %u", mpCompression, mpCompressionName(mpCompression), mpLevel, r.tl.x, r.tl.y, w, h, mpPixelsize, stride, mpStride, h * w * mpPixelsize, srcBufLen);
			i420ToRgba(buf, stride * targetBufPixelsize, mpWidth, mpHeight, srcBuf, srcBufU, srcBufV, mpWidth, (mpWidth + 1) / 2);
			return;
		}
		data = (rdr::U8 *) malloc(mpHeight * mpStride);
		if (!data) {
			vlog.error("RAWI420: failed to alloc memory for RGBA data");
			return;
		}
		i420ToRgba(data, mpStride, mpWidth, mpHeight, srcBuf, srcBufU, srcBufV, mpWidth, (mpWidth + 1) / 2);
	} else if (mpCompression == MP_COMPRESSION_ZSTD) {
		int dataSize;

		mpWidth = w;
		mpHeight = h;
		mpPixelsize = 4;
		mpStride = mpWidth * mpPixelsize;
//		vlog.verbose2("ZSTD: x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, mpWidth: %u, mpHeight: %u, mpStride: %u, raw size: %u, compressed: %u", r.tl.x, r.tl.y, w, h, mpPixelsize, stride, mpWidth, mpHeight, mpStride, h * w * mpPixelsize, srcBufLen);
		dataSize = ZSTD_getFrameContentSize(srcBuf, srcBufLen);
		if (dataSize != mpStride * mpHeight) {
			vlog.error("ZSTD: data size mismatch: expected %u, got %i", mpStride * mpHeight, dataSize);
			return;
		}
		data = (rdr::U8 *) malloc(dataSize);
		if (!data) {
			vlog.error("ZSTD: failed to alloc memory for RGBA data");
			return;
		}
		dataSize = ZSTD_decompress(data, dataSize, srcBuf, srcBufLen);
		if (dataSize < 0) {
			vlog.error("ZSTD: ZSTD_decompress() failed with error %i", dataSize);
			free(data);
			return;
		}
//		vlog.verbose("ZSTD: compressed: %u, decompressed: %u, ratio: %u%%", srcBufLen, dataSize, 100 * srcBufLen / dataSize);
	} else if (mpCompression == MP_COMPRESSION_I420ZSTD) {
		int expectedUncompSize;
		rdr::U8 * uncompBuf;
		int uncompBufSize;
		const rdr::U8 * uncompBufU;
		const rdr::U8 * uncompBufV;

		mpWidth = w;
		mpHeight = h;
		mpPixelsize = 4;
		mpStride = mpWidth * mpPixelsize;
//		vlog.verbose2("I420+ZSTD: x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, mpWidth: %u, mpHeight: %u, mpStride: %u, raw size: %u, compressed: %u", r.tl.x, r.tl.y, w, h, mpPixelsize, stride, mpWidth, mpHeight, mpStride, h * w * mpPixelsize, srcBufLen);
		expectedUncompSize = mpWidth * mpHeight * 3 / 2 + 2 * ((mpWidth & 1) * mpHeight + (mpHeight & 1) * mpWidth);
		uncompBufSize = ZSTD_getFrameContentSize(srcBuf, srcBufLen);
		if (uncompBufSize != expectedUncompSize) {
			vlog.error("I420ZSTD: uncompressed data size mismatch: expected %u, got %i", expectedUncompSize, uncompBufSize);
			return;
		}
		uncompBuf = (rdr::U8 *) malloc(uncompBufSize);
		if (!uncompBuf) {
			vlog.error("I420+ZSTD: failed to alloc memory for uncompressed data");
			return;
		}
		uncompBufSize = ZSTD_decompress(uncompBuf, uncompBufSize, srcBuf, srcBufLen);
		if (uncompBufSize < 0) {
			vlog.error("I420+ZSTD: ZSTD_decompress() failed with error %i", uncompBufSize);
			return;
		}
		uncompBufU = uncompBuf + mpWidth * mpHeight;
		uncompBufV = uncompBufU + mpWidth * mpHeight / 4 + (mpWidth & 1) * mpHeight + (mpHeight & 1) * mpWidth;
		if (is888) {
			/* direct */
			vlog.verbose2("direct: compression: %u (%s), mpLevel: %u, x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, mpStride: %u, raw size: %u, compressed: %u", mpCompression, mpCompressionName(mpCompression), mpLevel, r.tl.x, r.tl.y, w, h, mpPixelsize, stride, mpStride, h * w * mpPixelsize, srcBufLen);
			i420ToRgba(buf, stride * targetBufPixelsize, mpWidth, mpHeight, uncompBuf, uncompBufU, uncompBufV, mpWidth, (mpWidth + 1) / 2);
			free(uncompBuf);
			return;
		}
		data = (rdr::U8 *) malloc(mpStride * mpHeight);
		if (!data) {
			vlog.error("I420+ZSTD: failed to alloc memory for RGBA data");
			free(uncompBuf);
			return;
		}
		i420ToRgba(data, mpStride, mpWidth, mpHeight, uncompBuf, uncompBufU, uncompBufV, mpWidth, (mpWidth + 1) / 2);
		free(uncompBuf);
	} else {
		vlog.error("invalid MP compression: %u, mpLevel: %u,w %u, h %u", mpCompression, mpLevel, w, h);
		return;
	}
	if (!is888) {
		vlog.verbose("not is888, use transBuf: w: %u, h: %u, stride: %u, mpWidth: %u, mpStride: %u", w, h, stride, mpWidth, mpStride);
		transBuf = (rdr::U8 *) malloc(w * h * targetBufPixelsize);
		if (!transBuf) {
			vlog.error("failed to alloc transBuf");
			if (mpCompression == MP_COMPRESSION_RAWI420)
				free(data);
			else if (mpCompression == MP_COMPRESSION_JPEG) {
#ifdef JCS_EXTENSIONS
				if (data != buf)
#endif
					free(data);
#if !defined(__APPLE__)
			} else if (mpCompression == MP_COMPRESSION_JPEGXL) {
				if (data != buf)
					free(data);
#endif
			} else if (mpCompression == MP_COMPRESSION_H264)
				free(data);
			return;
		}
	}
	if(stride != w || mpStride != w * targetBufPixelsize) {
		const int srcoffset = mpStride;
		const int targetoffset = stride * targetBufPixelsize;
		int y;

		if (mpPixelsize == targetBufPixelsize) {
			const int linesize = (w <= mpWidth ? w * targetBufPixelsize : mpWidth * targetBufPixelsize);

			vlog.verbose2("line: compression: %u (%s), mpLevel: %u, x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, mpStride: %u, raw size: %u, compressed: %u", mpCompression, mpCompressionName(mpCompression), mpLevel, r.tl.x, r.tl.y, w, h, mpPixelsize, stride, mpStride, h * w * mpPixelsize, srcBufLen);
#pragma GCC ivdep
			for (y = 0; y < h ; y++) {
				memcpy(transBuf + y * targetoffset, data + y * srcoffset, linesize);
			}
		} else {
			const rdr::U8 * srcline = data;
			rdr::U8 * targetline = transBuf;
			int x;

			vlog.verbose2("pixel: compression: %u (%s), mpLevel: %u, x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, mpStride: %u, raw size: %u, compressed: %u", mpCompression, mpCompressionName(mpCompression), mpLevel, r.tl.x, r.tl.y, w, h, mpPixelsize, stride, mpStride, h * w * mpPixelsize, srcBufLen);
#pragma GCC ivdep
			for (y = 0; y < h ; y++) {
#pragma GCC ivdep
				for (x = 0; x < w ; x++) {
					memcpy(targetline + x * targetBufPixelsize, srcline + x * mpPixelsize, mpPixelsize);
					targetline[x * targetBufPixelsize + mpPixelsize] = 0;
				}
				targetline += targetoffset;
				srcline += srcoffset;
			}
		}
	} else {
		vlog.verbose2("copy: compression: %u (%s), mpLevel: %u, x: %u, y: %u, w: %u, h: %u, mpPixelsize: %u, stride: %u, mpStride: %u, raw size: %u, compressed: %u", mpCompression, mpCompressionName(mpCompression), mpLevel, r.tl.x, r.tl.y, w, h, mpPixelsize, stride, mpStride, h * w * mpPixelsize, srcBufLen);
		if (transBuf != data)
			memcpy(transBuf, data, w * h * targetBufPixelsize);
	}

	if (mpCompression == MP_COMPRESSION_RAWI420) {
		free(data);
	} else if (mpCompression == MP_COMPRESSION_JPEG) {
#ifdef JCS_EXTENSIONS
		if (data != buf)
#endif
			free(data);
#if !defined(__APPLE__)
	} else if (mpCompression == MP_COMPRESSION_JPEGXL) {
		if (data != buf)
			free(data);
#endif
	} else if (mpCompression == MP_COMPRESSION_H264) {
		free(data);
		welsSvcDecoderP->Uninitialize();
		WelsDestroyDecoder(welsSvcDecoderP);
	} else if (mpCompression == MP_COMPRESSION_ZSTD) {
		free(data);
	} else if (mpCompression == MP_COMPRESSION_I420ZSTD) {
		free(data);
	}
	if (!is888) {
		if (mpPixelsize == targetBufPixelsize) {
			static const PixelFormat pfRGBX(32, 24, false, true, 255, 255, 255, 0, 8, 16);

			pf.bufferFromBuffer(buf, pfRGBX, transBuf, w, h, stride, w);
		} else {
			pf.bufferFromRGB(buf, transBuf, w, stride, h);
		}
		free(transBuf);
	}
}
