From 3e9bbf514f6652c8b2ecb73aebba876d7d488df1 Mon Sep 17 00:00:00 2001 From: James Deng Date: Mon, 15 Apr 2024 11:42:57 +0800 Subject: [PATCH] Update for v1.0beta3.1 --- fs/cpio/dracut.conf | 5 +- package/dracut/busybox-init-module-setup.sh | 3 +- package/dracut/dracut.mk | 2 - package/dracut/rescue-module-setup.sh | 19 - .../0006-update-version-fix-some-bugs.patch | 703 + ...c-v-h264_stcodec-i-xxx.mp4-output.yu.patch | 38 + ...-support-yuv420p-output-pixel-format.patch | 125 + .../0009-stcodec-support-video-encode.patch | 592 + ...010-ffmpeg-optimize-stcodecenc-logic.patch | 327 + .../0011-fix-bug-video-transcoding-blur.patch | 53 + ...port-YUV420P-and-fix-some-small-bugs.patch | 76 + .../0001-support-spacemit-plugin.patch | 7727 ++++ ....-fix-spacemitsrc-pressure-test-fail.patch | 3767 ++ ...y-mjpeg-use-dec-parse-interface-bb11.patch | 406 + .../0004-0.0.61mpp-bb12-bb13.patch | 474 + package/gstreamer1/gst1-plugins-bad/Config.in | 14 + .../gst1-plugins-bad/gst1-plugins-bad.mk | 25 +- package/haveged/{S21haveged => S09haveged} | 0 package/haveged/haveged.mk | 4 +- ...RVV-optimized-jpeg-turbo-gcc-version.patch | 16216 +++++++ .../jpeg-turbo/0002-fix-thumbnail-bug.patch | 162 + package/jpeg-turbo/jpeg-turbo.mk | 5 +- package/jpeg/Config.in | 1 + .../0009-RVV-optimized-chacha20.patch | 312 + ...pile-err-which-has-no-header-file-ri.patch | 27 + package/libopenssl/libopenssl.mk | 4 + .../libpng/0002-RVV-optimized-libpng.patch | 1359 + package/libpng/libpng.mk | 5 + ...r-when-open-wayland-can-not-find-way.patch | 27 + package/sdl2/sdl2.mk | 7 +- .../zlib-ng/0004-RVV-optimized-zlib-ng.patch | 37255 ++++++++++++++++ package/zlib-ng/zlib-ng.mk | 4 + 32 files changed, 69712 insertions(+), 32 deletions(-) delete mode 100755 package/dracut/rescue-module-setup.sh create mode 100644 package/ffmpeg/0006-update-version-fix-some-bugs.patch create mode 100644 package/ffmpeg/0007-optimize-ffmpeg-c-v-h264_stcodec-i-xxx.mp4-output.yu.patch create mode 100644 package/ffmpeg/0008-stcodecdec-support-yuv420p-output-pixel-format.patch create mode 100644 package/ffmpeg/0009-stcodec-support-video-encode.patch create mode 100644 package/ffmpeg/0010-ffmpeg-optimize-stcodecenc-logic.patch create mode 100644 package/ffmpeg/0011-fix-bug-video-transcoding-blur.patch create mode 100644 package/ffmpeg/0012-stcodecenc-support-YUV420P-and-fix-some-small-bugs.patch create mode 100644 package/gstreamer1/gst1-plugins-bad/0001-support-spacemit-plugin.patch create mode 100644 package/gstreamer1/gst1-plugins-bad/0002-1.-fix-spacemitsrc-pressure-test-fail.patch create mode 100644 package/gstreamer1/gst1-plugins-bad/0003-only-mjpeg-use-dec-parse-interface-bb11.patch create mode 100644 package/gstreamer1/gst1-plugins-bad/0004-0.0.61mpp-bb12-bb13.patch rename package/haveged/{S21haveged => S09haveged} (100%) create mode 100644 package/jpeg-turbo/0001-RVV-optimized-jpeg-turbo-gcc-version.patch create mode 100644 package/jpeg-turbo/0002-fix-thumbnail-bug.patch create mode 100644 package/libopenssl/0009-RVV-optimized-chacha20.patch create mode 100644 package/libopenssl/0010-fix-x86-host-compile-err-which-has-no-header-file-ri.patch create mode 100644 package/libpng/0002-RVV-optimized-libpng.patch create mode 100644 package/sdl2/0001-fix-compile-error-when-open-wayland-can-not-find-way.patch create mode 100644 package/zlib-ng/0004-RVV-optimized-zlib-ng.patch diff --git a/fs/cpio/dracut.conf b/fs/cpio/dracut.conf index 77f3ad4e..2337736c 100644 --- a/fs/cpio/dracut.conf +++ b/fs/cpio/dracut.conf @@ -15,9 +15,7 @@ do_strip=no # Dracut modules needed add_dracutmodules+=" \ -busybox-init \ -busybox \ -rescue \ +busybox-init " # Modules to ignore @@ -26,6 +24,7 @@ bash \ biosdevname \ btrfs \ bluetooth \ +busybox \ caps \ cifs \ crypt \ diff --git a/package/dracut/busybox-init-module-setup.sh b/package/dracut/busybox-init-module-setup.sh index b9880e02..d6ea9435 100644 --- a/package/dracut/busybox-init-module-setup.sh +++ b/package/dracut/busybox-init-module-setup.sh @@ -55,6 +55,5 @@ install() { /etc/group \ /etc/passwd \ /etc/shadow \ - /etc/hostname \ - /lib/firmware/esos.elf + /etc/hostname } diff --git a/package/dracut/dracut.mk b/package/dracut/dracut.mk index 52a4f2fa..3eb9bcc4 100644 --- a/package/dracut/dracut.mk +++ b/package/dracut/dracut.mk @@ -43,8 +43,6 @@ ifeq ($(BR2_INIT_BUSYBOX),y) define HOST_DRACUT_POST_INSTALL_BUSYBOX_INIT_MODULE $(INSTALL) -D -m 0755 package/dracut/busybox-init-module-setup.sh \ $(HOST_DIR)/lib/dracut/modules.d/05busybox-init/module-setup.sh - $(INSTALL) -D -m 0755 package/dracut/rescue-module-setup.sh \ - $(HOST_DIR)/lib/dracut/modules.d/03rescue/module-setup.sh endef HOST_DRACUT_POST_INSTALL_HOOKS += HOST_DRACUT_POST_INSTALL_BUSYBOX_INIT_MODULE endif diff --git a/package/dracut/rescue-module-setup.sh b/package/dracut/rescue-module-setup.sh deleted file mode 100755 index 1eda52ac..00000000 --- a/package/dracut/rescue-module-setup.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -# called by dracut -check() { - # do not add this module by default - return 255 -} - -# called by dracut -depends() { - return 0 -} - -# called by dracut -install() { - inst_multiple -o ps grep more cat rm strace free showmount \ - ping netstat rpcinfo vi scp ping6 ssh \ - fsck fsck.ext2 fsck.ext4 fsck.ext3 fsck.ext4dev fsck.f2fs fsck.vfat e2fsck resize2fs mount -} diff --git a/package/ffmpeg/0006-update-version-fix-some-bugs.patch b/package/ffmpeg/0006-update-version-fix-some-bugs.patch new file mode 100644 index 00000000..c5907433 --- /dev/null +++ b/package/ffmpeg/0006-update-version-fix-some-bugs.patch @@ -0,0 +1,703 @@ +From 573266fb7c6d3747556269aadb66905cab788fcd Mon Sep 17 00:00:00 2001 +From: fuqiang +Date: Tue, 12 Mar 2024 08:56:22 +0800 +Subject: [PATCH] update version: fix some bugs + +--- + libavcodec/stcodecdec.c | 477 +++++++++++++++++++++++++++++----------- + 1 file changed, 346 insertions(+), 131 deletions(-) + +diff --git a/libavcodec/stcodecdec.c b/libavcodec/stcodecdec.c +index 763e1b2..1d618b9 100755 +--- a/libavcodec/stcodecdec.c ++++ b/libavcodec/stcodecdec.c +@@ -39,7 +39,8 @@ + #include "libavutil/log.h" + #include "vdec.h" + +-#define PACKET_SIZE (2 * 1024 * 1024) ++#define FREESLOTS_THRESHOLD 4 ++#define DROPFRAME_NUM_AFTER_FLUSH 1 + + typedef struct { + MppVdecCtx* pVdecCtx; +@@ -49,6 +50,11 @@ typedef struct { + enum AVPixelFormat ePixFmt; + + char eos_reached; ++ char no_pts; ++ char first_packet; ++ int64_t duration; ++ int64_t use_dts; ++ char flushed; + + AVBufferRef* frames_ref; + AVBufferRef* device_ref; +@@ -76,17 +82,78 @@ static MppCodingType stcodec_get_codingtype(AVCodecContext* avctx) { + return CODING_VP9; + case AV_CODEC_ID_MJPEG: + return CODING_MJPEG; ++ case AV_CODEC_ID_VC1: ++ return CODING_VC1; ++ case AV_CODEC_ID_MPEG2VIDEO: ++ return CODING_MPEG2; ++ case AV_CODEC_ID_MPEG4: ++ return CODING_MPEG4; ++ case AV_CODEC_ID_AVS: ++ return CODING_AVS; ++ case AV_CODEC_ID_AVS2: ++ return CODING_AVS2; + default: + return CODING_UNKNOWN; + } + } + ++static MppProfileType stcodec_get_profiletype(AVCodecContext* avctx) { ++ av_log(avctx, AV_LOG_DEBUG, "profile = %d\n", avctx->profile); ++ if (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO) { ++ switch (avctx->profile) { ++ case FF_PROFILE_MPEG2_422: ++ return PROFILE_MPEG2_422; ++ case FF_PROFILE_MPEG2_HIGH: ++ return PROFILE_MPEG2_HIGH; ++ case FF_PROFILE_MPEG2_SNR_SCALABLE: ++ return PROFILE_MPEG2_SNR_SCALABLE; ++ case FF_PROFILE_MPEG2_MAIN: ++ return PROFILE_MPEG2_MAIN; ++ case FF_PROFILE_MPEG2_SIMPLE: ++ return PROFILE_MPEG2_SIMPLE; ++ default: ++ return PROFILE_UNKNOWN; ++ } ++ } else if (avctx->codec_id == AV_CODEC_ID_VC1) { ++ switch (avctx->profile) { ++ case FF_PROFILE_VC1_SIMPLE: ++ return PROFILE_VC1_SIMPLE; ++ case FF_PROFILE_VC1_MAIN: ++ return PROFILE_VC1_MAIN; ++ case FF_PROFILE_VC1_COMPLEX: ++ return PROFILE_VC1_COMPLEX; ++ case FF_PROFILE_VC1_ADVANCED: ++ return PROFILE_VC1_ADVANCED; ++ default: ++ return PROFILE_UNKNOWN; ++ } ++ } else if (avctx->codec_id == AV_CODEC_ID_MJPEG) { ++ switch (avctx->profile) { ++ case FF_PROFILE_MJPEG_HUFFMAN_BASELINE_DCT: ++ return PROFILE_MJPEG_HUFFMAN_BASELINE_DCT; ++ case FF_PROFILE_MJPEG_HUFFMAN_EXTENDED_SEQUENTIAL_DCT: ++ return PROFILE_MJPEG_HUFFMAN_EXTENDED_SEQUENTIAL_DCT; ++ case FF_PROFILE_MJPEG_HUFFMAN_PROGRESSIVE_DCT: ++ return PROFILE_MJPEG_HUFFMAN_PROGRESSIVE_DCT; ++ case FF_PROFILE_MJPEG_HUFFMAN_LOSSLESS: ++ return PROFILE_MJPEG_HUFFMAN_LOSSLESS; ++ case FF_PROFILE_MJPEG_JPEG_LS: ++ return PROFILE_MJPEG_JPEG_LS; ++ default: ++ return PROFILE_UNKNOWN; ++ } ++ } else { ++ return PROFILE_UNKNOWN; ++ } ++} ++ + static int get_stride(int width, int align) { + return (width + align - 1) & (~(align - 1)); + } + + static int stcodec_send_data_to_decoder(AVCodecContext* avctx, uint8_t* buffer, +- int size, int64_t pts) { ++ int size, int64_t pts, int64_t dts, ++ int64_t duration) { + STCODECDecodeContext* st_context = avctx->priv_data; + STCODECDecoder* decoder = (STCODECDecoder*)st_context->decoder_ref->data; + int ret = 0; +@@ -102,7 +169,14 @@ static int stcodec_send_data_to_decoder(AVCodecContext* avctx, uint8_t* buffer, + + PACKET_SetDataPointer(decoder->pPacket, buffer); + PACKET_SetLength(decoder->pPacket, size); +- PACKET_SetPts(decoder->pPacket, pts); ++ if (pts != AV_NOPTS_VALUE) { ++ PACKET_SetPts(decoder->pPacket, pts); ++ } else if (dts != AV_NOPTS_VALUE) { ++ PACKET_SetPts(decoder->pPacket, dts); ++ decoder->use_dts = 1; ++ } else { ++ decoder->no_pts = 1; ++ } + PACKET_SetEos(decoder->pPacket, 0); + av_log(avctx, AV_LOG_DEBUG, "input pts : %ld\n", pts); + } +@@ -135,12 +209,24 @@ static int stcodec_close_decoder(AVCodecContext* avctx) { + + static void stcodec_release_stcodec_decoder(void* opaque, uint8_t* data) { + STCODECDecoder* decoder = (STCODECDecoder*)data; ++ ++ if (decoder->pPacket) { ++ // PACKET_Free(decoder->pPacket); ++ PACKET_Destory(decoder->pPacket); ++ decoder->pPacket = NULL; ++ } ++ ++ if (decoder->pFrame) { ++ FRAME_Destory(decoder->pFrame); ++ decoder->pFrame = NULL; ++ } ++ + if (decoder->pVdecCtx) { +- av_log(NULL, AV_LOG_ERROR, "stcodec release decoder\n"); +- VDEC_ResetChannel(decoder->pVdecCtx); ++ // VDEC_ResetChannel(decoder->pVdecCtx); + VDEC_DestoryChannel(decoder->pVdecCtx); + decoder->pVdecCtx = NULL; + } ++ + av_buffer_unref(&decoder->frames_ref); + av_buffer_unref(&decoder->device_ref); + av_free(decoder); +@@ -151,9 +237,10 @@ static int stcodec_init_decoder(AVCodecContext* avctx) { + STCODECDecoder* decoder = NULL; + MppCodingType codectype = CODING_UNKNOWN; + int ret; +-/* +- if (avctx->width > 4096 || avctx->height > 2160 || avctx->width <= 640 || +- avctx->height <= 480) { ++ ++ if ((avctx->width > 4096 || avctx->height > 2304 || avctx->width <= 640 || ++ avctx->height <= 480) && ++ (avctx->width != 0 || avctx->height != 0)) { + av_log(avctx, AV_LOG_ERROR, + "STCODEC Decoder do not support the size (%d x %d), too big or too " + "small!\n", +@@ -161,9 +248,9 @@ static int stcodec_init_decoder(AVCodecContext* avctx) { + ret = AVERROR_UNKNOWN; + goto fail; + } +-*/ ++ + avctx->pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts); +- av_log(avctx, AV_LOG_ERROR, "------------------------ Use pixel format %d\n", avctx->pix_fmt); ++ av_log(avctx, AV_LOG_ERROR, "Use pixel format %d\n", avctx->pix_fmt); + + // create a decoder and a ref to it + decoder = av_mallocz(sizeof(STCODECDecoder)); +@@ -206,6 +293,7 @@ static int stcodec_init_decoder(AVCodecContext* avctx) { + + // set para + decoder->pVdecCtx->stVdecPara.eCodingType = codectype; ++ decoder->pVdecCtx->stVdecPara.nProfile = stcodec_get_profiletype(avctx); + decoder->pVdecCtx->stVdecPara.bInputBlockModeEnable = MPP_FALSE; + decoder->pVdecCtx->stVdecPara.bOutputBlockModeEnable = MPP_TRUE; + decoder->pVdecCtx->stVdecPara.nWidth = avctx->width; +@@ -213,7 +301,12 @@ static int stcodec_init_decoder(AVCodecContext* avctx) { + decoder->pVdecCtx->stVdecPara.nStride = get_stride(avctx->width, 8); + decoder->pVdecCtx->stVdecPara.eOutputPixelFormat = PIXEL_FORMAT_NV12; + decoder->pVdecCtx->eCodecType = CODEC_V4L2_LINLONV5V7; +- decoder->pVdecCtx->stVdecPara.nScale = 1; ++ if (avctx->width >= 3840 || avctx->height >= 2160) { ++ av_log(avctx, AV_LOG_ERROR, "4K video, downscale!\n"); ++ decoder->pVdecCtx->stVdecPara.nScale = 2; ++ } else { ++ decoder->pVdecCtx->stVdecPara.nScale = 1; ++ } + decoder->pVdecCtx->stVdecPara.nHorizonScaleDownRatio = 1; + decoder->pVdecCtx->stVdecPara.nVerticalScaleDownRatio = 1; + decoder->pVdecCtx->stVdecPara.nRotateDegree = 0; +@@ -225,37 +318,38 @@ static int stcodec_init_decoder(AVCodecContext* avctx) { + if (ret) { + av_log(avctx, AV_LOG_ERROR, + "Failed to initialize STCODEC VDEV (ret = %d).\n", ret); +- VDEC_DestoryChannel(decoder->pVdecCtx); +- av_free(decoder); ++ // VDEC_DestoryChannel(decoder->pVdecCtx); ++ // av_free(decoder); + ret = AVERROR_UNKNOWN; + goto fail; + } + +- av_log(avctx, AV_LOG_ERROR, "init 1.\n"); + // mpp packet init + decoder->pPacket = PACKET_Create(); + if (!decoder->pPacket) { + av_log(avctx, AV_LOG_ERROR, "Failed to initialize STCODEC packet.\n"); +- VDEC_DestoryChannel(decoder->pVdecCtx); +- av_free(decoder); ++ // VDEC_DestoryChannel(decoder->pVdecCtx); ++ // av_free(decoder); + ret = AVERROR_UNKNOWN; + goto fail; + } +- PACKET_Alloc(decoder->pPacket, PACKET_SIZE); + +- av_log(avctx, AV_LOG_ERROR, "init 2.\n"); + // mpp frame init + decoder->pFrame = FRAME_Create(); + if (!decoder->pFrame) { + av_log(avctx, AV_LOG_ERROR, "Failed to initialize STCODEC frame.\n"); + PACKET_Destory(decoder->pPacket); +- VDEC_DestoryChannel(decoder->pVdecCtx); +- av_free(decoder); ++ // VDEC_DestoryChannel(decoder->pVdecCtx); ++ // av_free(decoder); + ret = AVERROR_UNKNOWN; + goto fail; + } + +- av_log(avctx, AV_LOG_ERROR, "init 3.\n"); ++ decoder->first_packet = 1; ++ decoder->duration = 0; ++ decoder->use_dts = 0; ++ decoder->flushed = 0; ++ + av_log(avctx, AV_LOG_DEBUG, "STCODEC decoder initialized successfully.\n"); + + decoder->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM); +@@ -264,37 +358,32 @@ static int stcodec_init_decoder(AVCodecContext* avctx) { + ret = AVERROR(ENOMEM); + goto fail; + } +- av_log(avctx, AV_LOG_ERROR, "init 4.\n"); + ret = av_hwdevice_ctx_init(decoder->device_ref); + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to av_hwdevice_ctx_init\n"); + goto fail; + } +- av_log(avctx, AV_LOG_ERROR, "init 5.\n"); +- +- decoder->frames_ref = av_hwframe_ctx_alloc(decoder->device_ref); +- if (!decoder->frames_ref) { +- av_log(avctx, AV_LOG_ERROR, "Failed to av_hwframe_ctx_alloc\n"); +- ret = AVERROR(ENOMEM); +- goto fail; +- } +- av_log(avctx, AV_LOG_ERROR, "init 6.\n"); +- +- AVHWFramesContext* hwframes; +- hwframes = (AVHWFramesContext*)decoder->frames_ref->data; +- hwframes->format = AV_PIX_FMT_DRM_PRIME; +- hwframes->sw_format = AV_PIX_FMT_NV12; +- hwframes->width = avctx->width; +- hwframes->height = avctx->height; +- ret = av_hwframe_ctx_init(decoder->frames_ref); +- if (ret < 0) goto fail; ++ /* ++ decoder->frames_ref = av_hwframe_ctx_alloc(decoder->device_ref); ++ if (!decoder->frames_ref) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to av_hwframe_ctx_alloc\n"); ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } + ++ AVHWFramesContext* hwframes; ++ hwframes = (AVHWFramesContext*)decoder->frames_ref->data; ++ hwframes->format = AV_PIX_FMT_DRM_PRIME; ++ hwframes->sw_format = AV_PIX_FMT_NV12; ++ hwframes->width = avctx->width; ++ hwframes->height = avctx->height; ++ ret = av_hwframe_ctx_init(decoder->frames_ref); ++ if (ret < 0) goto fail; ++ */ + av_log(avctx, AV_LOG_DEBUG, "Initialized successfully.\n"); +- av_log(avctx, AV_LOG_ERROR, "init 7.\n"); + return 0; + + fail: +- av_log(avctx, AV_LOG_ERROR, "init 8.\n"); + av_log(avctx, AV_LOG_ERROR, + "Failed to initialize STCODEC decoder, please check!\n"); + stcodec_close_decoder(avctx); +@@ -305,23 +394,57 @@ static int stcodec_send_packet(AVCodecContext* avctx, const AVPacket* avpkt) { + STCODECDecodeContext* st_context = avctx->priv_data; + STCODECDecoder* decoder = (STCODECDecoder*)st_context->decoder_ref->data; + int ret; +- av_log(avctx, AV_LOG_DEBUG, "start send packet, pts(%ld)\n", avpkt->pts); ++ av_log(avctx, AV_LOG_DEBUG, ++ "start send packet, pts(%ld) dts(%ld) duration(%ld)\n", avpkt->pts, ++ avpkt->dts, avpkt->duration); + + // handle EOF + if (!avpkt->size) { + av_log(avctx, AV_LOG_ERROR, "Get EOS from parser!\n"); + decoder->eos_reached = 1; + // write a NULL data to decoder to inform it the EOS. +- ret = stcodec_send_data_to_decoder(avctx, NULL, 0, 0); ++ ret = stcodec_send_data_to_decoder(avctx, NULL, 0, 0, 0, 0); + if (ret) + av_log(avctx, AV_LOG_ERROR, "Failed to send EOS to decoder (ret = %d)\n", + ret); + return ret; + } + ++ // on first packet, send extradata ++ if (decoder->first_packet) { ++ if (avctx->extradata_size) { ++ if (avctx->codec_id == AV_CODEC_ID_H264) { ++ ret = stcodec_send_data_to_decoder(avctx, avctx->extradata, ++ avctx->extradata_size, avpkt->pts, ++ avpkt->dts, avpkt->duration); ++ } else if (avctx->codec_id == AV_CODEC_ID_HEVC) { ++ ret = stcodec_send_data_to_decoder(avctx, avctx->extradata, ++ avctx->extradata_size, avpkt->pts, ++ avpkt->dts, avpkt->duration); ++ } else if (avctx->codec_id == AV_CODEC_ID_MJPEG) { ++ // do nothing. ++ } else if (avctx->codec_id == AV_CODEC_ID_MPEG4 || ++ avctx->codec_id == AV_CODEC_ID_VC1) { ++ ret = stcodec_send_data_to_decoder(avctx, avctx->extradata, ++ avctx->extradata_size, avpkt->pts, ++ avpkt->dts, avpkt->duration); ++ if (ret) { ++ av_log(avctx, AV_LOG_ERROR, ++ "Failed to write extradata to decoder (ret = %d)\n", ret); ++ return ret; ++ } else { ++ av_log(avctx, AV_LOG_ERROR, ++ "OK! Write extradata to decoder (size = %d)\n", ++ avctx->extradata_size); ++ } ++ } ++ } ++ decoder->first_packet = 0; ++ } ++ + // now send packet +- ret = +- stcodec_send_data_to_decoder(avctx, avpkt->data, avpkt->size, avpkt->pts); ++ ret = stcodec_send_data_to_decoder(avctx, avpkt->data, avpkt->size, ++ avpkt->pts, avpkt->dts, avpkt->duration); + if (ret) { + av_log(avctx, AV_LOG_DEBUG, + "Failed to write data to decoder (code = %d (%s))\n", ret, +@@ -351,6 +474,7 @@ static void stcodec_release_frame(void* opaque, uint8_t* data) { + AV_PIX_FMT_DRM_PRIME) { + av_free(desc); + } ++ FRAME_Destory(framecontext->pFrame); + av_buffer_unref(&framecontext->decoder_ref); + av_buffer_unref(&framecontextref); + } +@@ -373,15 +497,15 @@ static int stcodec_receive_frame(AVCodecContext* avctx, AVFrame* frame) { + // we get the available input queue num in decoder + VDEC_GetParam(decoder->pVdecCtx, &(decoder->pVdecPara)); + freeslots = decoder->pVdecPara->nInputQueueLeftNum; +- av_log(avctx, AV_LOG_ERROR, "--------- Input queue left %d seat!!\n", freeslots); ++ av_log(avctx, AV_LOG_DEBUG, "Input queue left %d seat!!\n", freeslots); + +- if (freeslots > 0) { ++ while (freeslots > 0) { + ret = ff_decode_get_packet(avctx, &pkt); + if (ret < 0 && ret != AVERROR_EOF) { +- av_log(avctx, AV_LOG_ERROR, ++ av_log(avctx, AV_LOG_DEBUG, + "Failed to ff_decode_get_packet (ret = %d (%s))\n", ret, + av_err2str(ret)); +- return ret; ++ goto fail; + } + + ret = stcodec_send_packet(avctx, &pkt); +@@ -391,114 +515,200 @@ static int stcodec_receive_frame(AVCodecContext* avctx, AVFrame* frame) { + av_log(avctx, AV_LOG_ERROR, + "Failed to send packet to decoder (code = %d (%s))\n", ret, + av_err2str(ret)); +- return ret; ++ goto fail; + } +- } + +- // make sure we keep decoder full +- VDEC_GetParam(decoder->pVdecCtx, &(decoder->pVdecPara)); +- freeslots = decoder->pVdecPara->nInputQueueLeftNum; +- av_log(avctx, AV_LOG_ERROR, "========== Input queue left %d seat!!\n", freeslots); +- if (freeslots > 0) return AVERROR(EAGAIN); ++ // make sure we keep decoder full ++ VDEC_GetParam(decoder->pVdecCtx, &(decoder->pVdecPara)); ++ freeslots = decoder->pVdecPara->nInputQueueLeftNum; ++ av_log(avctx, AV_LOG_DEBUG, "Input queue left %d seat!!\n", freeslots); ++ } + } + +- ret = VDEC_RequestOutputFrame(decoder->pVdecCtx, FRAME_GetBaseData(mppframe)); ++ do { ++ ret = ++ VDEC_RequestOutputFrame(decoder->pVdecCtx, FRAME_GetBaseData(mppframe)); ++ if (ret == MPP_CODER_NULL_DATA) { ++ av_log(avctx, AV_LOG_ERROR, "null data, return\n"); ++ VDEC_ReturnOutputFrame(decoder->pVdecCtx, FRAME_GetBaseData(mppframe)); ++ } ++ } while (ret == MPP_CODER_NO_DATA || ret == MPP_CODER_NULL_DATA); + av_log(avctx, AV_LOG_DEBUG, "Request frame (ret = %d)\n", ret); + + if (ret == MPP_CODER_EOS && mppframe == NULL) { + av_log(avctx, AV_LOG_ERROR, "EOS 1!\n"); +- return AVERROR_EOF; ++ ret = AVERROR_EOF; ++ goto fail; + } + + if (ret == MPP_CODER_EOS || + FRAME_GetEos(mppframe) == 1 /* || decoder->eos_reached*/) { + av_log(avctx, AV_LOG_ERROR, "EOS 2!\n"); + VDEC_ReturnOutputFrame(decoder->pVdecCtx, FRAME_GetBaseData(mppframe)); +- return AVERROR_EOF; ++ ret = AVERROR_EOF; ++ goto fail; + } + +- if (!ret) { +- av_log(NULL, AV_LOG_DEBUG, "stcodec request a frame\n"); ++ if (ret == MPP_ERROR_FRAME) { ++ av_log(avctx, AV_LOG_ERROR, "error frame, need discard\n"); ++ VDEC_ReturnOutputFrame(decoder->pVdecCtx, FRAME_GetBaseData(mppframe)); ++ ret = AVERROR(EAGAIN); ++ goto fail; ++ } + +- // setup general frame fields +- frame->format = avctx->pix_fmt; +- frame->width = avctx->width; +- frame->height = avctx->height; +- // frame->pts = FRAME_GetPts(decoder->pFrame); +- frame->interlaced_frame = 0; +- frame->top_field_first = 0; +- +- framecontextref = av_buffer_allocz(sizeof(*framecontext)); +- if (!framecontextref) { +- av_log(avctx, AV_LOG_ERROR, "Failed to alloc AvBufferRef\n"); +- ret = AVERROR(ENOMEM); +- return ret; +- } ++ if (ret == MPP_RESOLUTION_CHANGED) { ++ av_log(avctx, AV_LOG_ERROR, ++ "resolution changed!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); ++ AVHWFramesContext* hwframes; + +- // MPP decoder needs to be closed only when all frames have been released. +- framecontext = (STCODECFrameContext*)framecontextref->data; +- framecontext->decoder_ref = av_buffer_ref(st_context->decoder_ref); +- framecontext->pFrame = mppframe; ++ av_log(avctx, AV_LOG_DEBUG, "Decoder noticed an info change (%dx%d)\n", ++ decoder->pVdecCtx->stVdecPara.nWidth, ++ decoder->pVdecCtx->stVdecPara.nHeight); + +- if (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) { +- desc = av_mallocz(sizeof(AVDRMFrameDescriptor)); +- if (!desc) { +- ret = AVERROR(ENOMEM); +- return ret; +- } ++ avctx->width = decoder->pVdecCtx->stVdecPara.nWidth; ++ avctx->height = decoder->pVdecCtx->stVdecPara.nHeight; + +- desc->nb_objects = 1; +- desc->objects[0].fd = FRAME_GetFD(mppframe, 0); +- desc->objects[0].size = frame->width * frame->height * 3 / 2; +- av_log(avctx, AV_LOG_DEBUG, "fd=%d size=%ld\n", desc->objects[0].fd, +- desc->objects[0].size); +- desc->nb_layers = 1; +- layer = &desc->layers[0]; +- layer->format = DRM_FORMAT_NV12; +- layer->nb_planes = 2; +- +- layer->planes[0].object_index = 0; +- layer->planes[0].offset = 0; +- layer->planes[0].pitch = frame->width; +- +- layer->planes[1].object_index = 0; +- layer->planes[1].offset = frame->width * frame->height; +- layer->planes[1].pitch = frame->width; +- +- frame->data[0] = (uint8_t*)desc; +- frame->buf[0] = +- av_buffer_create((uint8_t*)desc, sizeof(*desc), stcodec_release_frame, +- framecontextref, AV_BUFFER_FLAG_READONLY); +- } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) { +- frame->linesize[0] = get_stride(avctx->width, 8); +- frame->linesize[1] = get_stride(avctx->width, 8); +- frame->data[0] = FRAME_GetDataPointer(mppframe, 0); +- frame->data[1] = frame->data[0] + frame->width * frame->height; +- frame->buf[0] = av_buffer_create( +- (uint8_t*)(frame->data[0]), sizeof(frame->data[0]), +- stcodec_release_frame, framecontextref, AV_BUFFER_FLAG_READONLY); +- } ++ av_buffer_unref(&decoder->frames_ref); + +- if (!frame->buf[0]) { ++ decoder->frames_ref = av_hwframe_ctx_alloc(decoder->device_ref); ++ if (!decoder->frames_ref) { + av_log(avctx, AV_LOG_ERROR, +- "Failed to create AVDRMFrameDescriptor ref\n"); ++ "av_hwframe_ctx_alloc failed, please check\n"); + ret = AVERROR(ENOMEM); +- return ret; ++ goto fail; + } + +- frame->hw_frames_ctx = av_buffer_ref(decoder->frames_ref); +- if (!frame->hw_frames_ctx) { +- av_log(avctx, AV_LOG_ERROR, "Failed to create hw_frames_ctx\n"); +- ret = AVERROR(ENOMEM); +- return ret; ++ hwframes = (AVHWFramesContext*)decoder->frames_ref->data; ++ hwframes->format = AV_PIX_FMT_DRM_PRIME; ++ hwframes->sw_format = AV_PIX_FMT_NV12; ++ hwframes->width = avctx->width; ++ hwframes->height = avctx->height; ++ ret = av_hwframe_ctx_init(decoder->frames_ref); ++ if (ret < 0) { ++ av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed, please check\n"); ++ goto fail; + } + ++ // here decoder is fully initialized, we need to feed it again with data ++ ret = AVERROR(EAGAIN); ++ goto fail; ++ } ++ ++ if (!ret) { ++ av_log(NULL, AV_LOG_DEBUG, "stcodec request a frame\n"); ++ if (!decoder->flushed) { ++ // setup general frame fields ++ frame->format = avctx->pix_fmt; ++ frame->width = avctx->width; ++ frame->height = avctx->height; ++ if (!decoder->no_pts) { ++ if (!decoder->use_dts) { ++ frame->pts = FRAME_GetPts(mppframe); ++ } else { ++ frame->pkt_dts = FRAME_GetPts(mppframe); ++ } ++ } else { ++ } ++ av_log(avctx, AV_LOG_DEBUG, "frame pts:%ld pkt_dts:%ld\n", frame->pts, ++ frame->pkt_dts); ++ frame->interlaced_frame = 0; ++ frame->top_field_first = 0; ++ ++ framecontextref = av_buffer_allocz(sizeof(*framecontext)); ++ if (!framecontextref) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to alloc AvBufferRef\n"); ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ // MPP decoder needs to be closed only when all frames have been released. ++ framecontext = (STCODECFrameContext*)framecontextref->data; ++ framecontext->decoder_ref = av_buffer_ref(st_context->decoder_ref); ++ framecontext->pFrame = mppframe; ++ ++ if (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) { ++ desc = av_mallocz(sizeof(AVDRMFrameDescriptor)); ++ if (!desc) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ desc->nb_objects = 1; ++ desc->objects[0].fd = FRAME_GetFD(mppframe, 0); ++ desc->objects[0].size = frame->width * frame->height * 3 / 2; ++ av_log(avctx, AV_LOG_DEBUG, "fd=%d size=%ld\n", desc->objects[0].fd, ++ desc->objects[0].size); ++ desc->nb_layers = 1; ++ layer = &desc->layers[0]; ++ layer->format = DRM_FORMAT_NV12; ++ layer->nb_planes = 2; ++ ++ layer->planes[0].object_index = 0; ++ layer->planes[0].offset = 0; ++ layer->planes[0].pitch = frame->width; ++ ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = frame->width * frame->height; ++ layer->planes[1].pitch = frame->width; ++ ++ frame->data[0] = (uint8_t*)desc; ++ frame->buf[0] = av_buffer_create((uint8_t*)desc, sizeof(*desc), ++ stcodec_release_frame, framecontextref, ++ AV_BUFFER_FLAG_READONLY); ++ } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) { ++ frame->linesize[0] = get_stride(avctx->width, 8); ++ frame->linesize[1] = get_stride(avctx->width, 8); ++ frame->data[0] = FRAME_GetDataPointer(mppframe, 0); ++ frame->data[1] = frame->data[0] + frame->width * frame->height; ++ frame->buf[0] = av_buffer_create( ++ (uint8_t*)(frame->data[0]), sizeof(frame->data[0]), ++ stcodec_release_frame, framecontextref, AV_BUFFER_FLAG_READONLY); ++ } ++ ++ if (!frame->buf[0]) { ++ av_log(avctx, AV_LOG_ERROR, ++ "Failed to create AVDRMFrameDescriptor ref\n"); ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ frame->hw_frames_ctx = av_buffer_ref(decoder->frames_ref); ++ if (!frame->hw_frames_ctx) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create hw_frames_ctx\n"); ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ } else if (decoder->flushed < DROPFRAME_NUM_AFTER_FLUSH) { ++ VDEC_ReturnOutputFrame(decoder->pVdecCtx, FRAME_GetBaseData(mppframe)); ++ av_log(avctx, AV_LOG_ERROR, "drop some frames after flush, num=%d\n", ++ decoder->flushed); ++ decoder->flushed++; ++ ret = AVERROR(EAGAIN); ++ goto fail; ++ } else if (decoder->flushed == DROPFRAME_NUM_AFTER_FLUSH) { ++ VDEC_ReturnOutputFrame(decoder->pVdecCtx, FRAME_GetBaseData(mppframe)); ++ ret = AVERROR(EAGAIN); ++ av_log(avctx, AV_LOG_ERROR, ++ "drop some frames after flush finish, num=%d\n", decoder->flushed); ++ decoder->flushed = 0; ++ goto fail; ++ } + } else { +- // return AVERROR_UNKNOWN; +- return AVERROR(EAGAIN); ++ av_log(avctx, AV_LOG_ERROR, "should not be here, ret = %d\n", ret); ++ ret = AVERROR(EAGAIN); ++ goto fail; + } + + return 0; // return AVERROR(EAGAIN); ++ ++fail: ++ if (mppframe) FRAME_Destory(mppframe); ++ if (framecontext && framecontext->pFrame) FRAME_Destory(framecontext->pFrame); ++ if (framecontext) av_buffer_unref(&framecontext->decoder_ref); ++ if (framecontextref) av_buffer_unref(&framecontextref); ++ if (desc) av_free(desc); ++ ++ return ret; + } + + static void stcodec_flush(AVCodecContext* avctx) { +@@ -507,13 +717,18 @@ static void stcodec_flush(AVCodecContext* avctx) { + int ret = -1; + + av_log(avctx, AV_LOG_ERROR, "Flush.\n"); +-/* +- ret = VDEC_ResetChannel(decoder->pVdecCtx); ++ ++ ret = VDEC_Flush(decoder->pVdecCtx); + if (ret) +- av_log(avctx, AV_LOG_ERROR, "Failed to reset VDEC Channel (code = %d)\n", ++ av_log(avctx, AV_LOG_ERROR, "Failed to flush VDEC Channel (code = %d)\n", + ret); +-*/ ++ + decoder->eos_reached = 0; ++ decoder->no_pts = 0; ++ decoder->first_packet = 1; ++ decoder->duration = 0; ++ decoder->use_dts = 0; ++ decoder->flushed = 1; + } + + static const AVCodecHWConfigInternal* const stcodec_hw_configs[] = { +-- +2.25.1 + diff --git a/package/ffmpeg/0007-optimize-ffmpeg-c-v-h264_stcodec-i-xxx.mp4-output.yu.patch b/package/ffmpeg/0007-optimize-ffmpeg-c-v-h264_stcodec-i-xxx.mp4-output.yu.patch new file mode 100644 index 00000000..5ee4d5fc --- /dev/null +++ b/package/ffmpeg/0007-optimize-ffmpeg-c-v-h264_stcodec-i-xxx.mp4-output.yu.patch @@ -0,0 +1,38 @@ +From 6eb52b2c24bbc15ea991d40c7c7772f642881ae8 Mon Sep 17 00:00:00 2001 +From: fuqiang +Date: Wed, 20 Mar 2024 14:38:47 +0800 +Subject: [PATCH] optimize: ffmpeg -c:v h264_stcodec -i xxx.mp4 output.yuv + +--- + libavcodec/stcodecdec.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/libavcodec/stcodecdec.c b/libavcodec/stcodecdec.c +index 1d618b9..cf88096 100755 +--- a/libavcodec/stcodecdec.c ++++ b/libavcodec/stcodecdec.c +@@ -532,7 +532,7 @@ static int stcodec_receive_frame(AVCodecContext* avctx, AVFrame* frame) { + av_log(avctx, AV_LOG_ERROR, "null data, return\n"); + VDEC_ReturnOutputFrame(decoder->pVdecCtx, FRAME_GetBaseData(mppframe)); + } +- } while (ret == MPP_CODER_NO_DATA || ret == MPP_CODER_NULL_DATA); ++ } while (ret == MPP_CODER_NULL_DATA); + av_log(avctx, AV_LOG_DEBUG, "Request frame (ret = %d)\n", ret); + + if (ret == MPP_CODER_EOS && mppframe == NULL) { +@@ -556,6 +556,12 @@ static int stcodec_receive_frame(AVCodecContext* avctx, AVFrame* frame) { + goto fail; + } + ++ if (ret == MPP_CODER_NO_DATA) { ++ av_log(avctx, AV_LOG_ERROR, "no data, return\n"); ++ ret = AVERROR(EAGAIN); ++ goto fail; ++ } ++ + if (ret == MPP_RESOLUTION_CHANGED) { + av_log(avctx, AV_LOG_ERROR, + "resolution changed!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"); +-- +2.25.1 + diff --git a/package/ffmpeg/0008-stcodecdec-support-yuv420p-output-pixel-format.patch b/package/ffmpeg/0008-stcodecdec-support-yuv420p-output-pixel-format.patch new file mode 100644 index 00000000..cce70800 --- /dev/null +++ b/package/ffmpeg/0008-stcodecdec-support-yuv420p-output-pixel-format.patch @@ -0,0 +1,125 @@ +From 217da28a3f91ae8e360d2da836519001488d143b Mon Sep 17 00:00:00 2001 +From: fuqiang +Date: Fri, 29 Mar 2024 09:39:01 +0800 +Subject: [PATCH] stcodecdec support yuv420p output pixel format + +--- + libavcodec/stcodecdec.c | 54 ++++++++++++++++++++++++++--------------- + 1 file changed, 34 insertions(+), 20 deletions(-) + +diff --git a/libavcodec/stcodecdec.c b/libavcodec/stcodecdec.c +index cf88096..66b1305 100755 +--- a/libavcodec/stcodecdec.c ++++ b/libavcodec/stcodecdec.c +@@ -70,6 +70,19 @@ typedef struct { + AVBufferRef* decoder_ref; + } STCODECFrameContext; + ++static MppPixelFormat stcodec_get_pixelformat(AVCodecContext* avctx) { ++ switch (avctx->pix_fmt) { ++ case AV_PIX_FMT_NV12: ++ return PIXEL_FORMAT_NV12; ++ case AV_PIX_FMT_YUV420P: ++ return PIXEL_FORMAT_I420; ++ case AV_PIX_FMT_DRM_PRIME: ++ return PIXEL_FORMAT_NV12; ++ default: ++ return PIXEL_FORMAT_UNKNOWN; ++ } ++} ++ + static MppCodingType stcodec_get_codingtype(AVCodecContext* avctx) { + switch (avctx->codec_id) { + case AV_CODEC_ID_H264: +@@ -299,7 +312,7 @@ static int stcodec_init_decoder(AVCodecContext* avctx) { + decoder->pVdecCtx->stVdecPara.nWidth = avctx->width; + decoder->pVdecCtx->stVdecPara.nHeight = avctx->height; + decoder->pVdecCtx->stVdecPara.nStride = get_stride(avctx->width, 8); +- decoder->pVdecCtx->stVdecPara.eOutputPixelFormat = PIXEL_FORMAT_NV12; ++ decoder->pVdecCtx->stVdecPara.eOutputPixelFormat = stcodec_get_pixelformat(avctx); + decoder->pVdecCtx->eCodecType = CODEC_V4L2_LINLONV5V7; + if (avctx->width >= 3840 || avctx->height >= 2160) { + av_log(avctx, AV_LOG_ERROR, "4K video, downscale!\n"); +@@ -413,18 +426,10 @@ static int stcodec_send_packet(AVCodecContext* avctx, const AVPacket* avpkt) { + // on first packet, send extradata + if (decoder->first_packet) { + if (avctx->extradata_size) { +- if (avctx->codec_id == AV_CODEC_ID_H264) { +- ret = stcodec_send_data_to_decoder(avctx, avctx->extradata, +- avctx->extradata_size, avpkt->pts, +- avpkt->dts, avpkt->duration); +- } else if (avctx->codec_id == AV_CODEC_ID_HEVC) { +- ret = stcodec_send_data_to_decoder(avctx, avctx->extradata, +- avctx->extradata_size, avpkt->pts, +- avpkt->dts, avpkt->duration); +- } else if (avctx->codec_id == AV_CODEC_ID_MJPEG) { +- // do nothing. +- } else if (avctx->codec_id == AV_CODEC_ID_MPEG4 || +- avctx->codec_id == AV_CODEC_ID_VC1) { ++ if (avctx->codec_id == AV_CODEC_ID_H264 || ++ avctx->codec_id == AV_CODEC_ID_HEVC || ++ avctx->codec_id == AV_CODEC_ID_MPEG4 || ++ avctx->codec_id == AV_CODEC_ID_VC1) { + ret = stcodec_send_data_to_decoder(avctx, avctx->extradata, + avctx->extradata_size, avpkt->pts, + avpkt->dts, avpkt->duration); +@@ -499,7 +504,7 @@ static int stcodec_receive_frame(AVCodecContext* avctx, AVFrame* frame) { + freeslots = decoder->pVdecPara->nInputQueueLeftNum; + av_log(avctx, AV_LOG_DEBUG, "Input queue left %d seat!!\n", freeslots); + +- while (freeslots > 0) { ++ if (freeslots > 0) { + ret = ff_decode_get_packet(avctx, &pkt); + if (ret < 0 && ret != AVERROR_EOF) { + av_log(avctx, AV_LOG_DEBUG, +@@ -517,11 +522,6 @@ static int stcodec_receive_frame(AVCodecContext* avctx, AVFrame* frame) { + av_err2str(ret)); + goto fail; + } +- +- // make sure we keep decoder full +- VDEC_GetParam(decoder->pVdecCtx, &(decoder->pVdecPara)); +- freeslots = decoder->pVdecPara->nInputQueueLeftNum; +- av_log(avctx, AV_LOG_DEBUG, "Input queue left %d seat!!\n", freeslots); + } + } + +@@ -666,6 +666,17 @@ static int stcodec_receive_frame(AVCodecContext* avctx, AVFrame* frame) { + frame->linesize[1] = get_stride(avctx->width, 8); + frame->data[0] = FRAME_GetDataPointer(mppframe, 0); + frame->data[1] = frame->data[0] + frame->width * frame->height; ++ frame->buf[0] = av_buffer_create( ++ (uint8_t*)(frame->data[0]), sizeof(frame->data[0]), ++ stcodec_release_frame, framecontextref, AV_BUFFER_FLAG_READONLY); ++ } else if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) { ++ frame->linesize[0] = get_stride(avctx->width, 8); ++ frame->linesize[1] = get_stride(avctx->width, 8) / 2; ++ frame->linesize[2] = get_stride(avctx->width, 8) / 2; ++ frame->data[0] = FRAME_GetDataPointer(mppframe, 0); ++ frame->data[1] = FRAME_GetDataPointer(mppframe, 1); ++ frame->data[2] = FRAME_GetDataPointer(mppframe, 2); ++ + frame->buf[0] = av_buffer_create( + (uint8_t*)(frame->data[0]), sizeof(frame->data[0]), + stcodec_release_frame, framecontextref, AV_BUFFER_FLAG_READONLY); +@@ -763,7 +774,7 @@ static const AVCodecHWConfigInternal* const stcodec_hw_configs[] = { + AV_CODEC_CAP_HARDWARE, \ + .pix_fmts = \ + (const enum AVPixelFormat[]){AV_PIX_FMT_DRM_PRIME, AV_PIX_FMT_NV12, \ +- AV_PIX_FMT_NONE}, \ ++ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE}, \ + .hw_configs = stcodec_hw_configs, \ + .bsfs = BSFS, \ + .wrapper_name = "stcodec", \ +@@ -771,4 +782,7 @@ static const AVCodecHWConfigInternal* const stcodec_hw_configs[] = { + + STCODEC_DEC(h264, AV_CODEC_ID_H264, "h264_mp4toannexb") + STCODEC_DEC(hevc, AV_CODEC_ID_HEVC, "hevc_mp4toannexb") ++STCODEC_DEC(vp8, AV_CODEC_ID_VP8, NULL) ++STCODEC_DEC(vp9, AV_CODEC_ID_VP9, NULL) + STCODEC_DEC(mjpeg, AV_CODEC_ID_MJPEG, NULL) ++STCODEC_DEC(mpeg4, AV_CODEC_ID_MPEG4, NULL) +\ No newline at end of file +-- +2.25.1 + diff --git a/package/ffmpeg/0009-stcodec-support-video-encode.patch b/package/ffmpeg/0009-stcodec-support-video-encode.patch new file mode 100644 index 00000000..3c23b6e0 --- /dev/null +++ b/package/ffmpeg/0009-stcodec-support-video-encode.patch @@ -0,0 +1,592 @@ +From 7757b33d16cc70e2f5a182cad8f9f9ee375874b2 Mon Sep 17 00:00:00 2001 +From: fuqiang +Date: Fri, 29 Mar 2024 10:45:11 +0800 +Subject: [PATCH] stcodec support video encode + +--- + configure | 7 + + libavcodec/Makefile | 7 + + libavcodec/allcodecs.c | 7 + + libavcodec/stcodecenc.c | 417 ++++++++++++++++++++++++++++++++++++++++ + 4 files changed, 438 insertions(+) + create mode 100755 libavcodec/stcodecenc.c + +diff --git a/configure b/configure +index 473d30e..c14c5c1 100755 +--- a/configure ++++ b/configure +@@ -3095,6 +3095,7 @@ h264_v4l2m2m_decoder_deps="v4l2_m2m h264_v4l2_m2m" + h264_v4l2m2m_decoder_select="h264_mp4toannexb_bsf" + h264_v4l2m2m_encoder_deps="v4l2_m2m h264_v4l2_m2m" + h264_stcodec_decoder_deps="stcodec" ++h264_stcodec_encoder_deps="stcodec" + hevc_amf_encoder_deps="amf" + hevc_cuvid_decoder_deps="cuvid" + hevc_cuvid_decoder_select="hevc_mp4toannexb_bsf" +@@ -3113,12 +3114,15 @@ hevc_v4l2m2m_decoder_deps="v4l2_m2m hevc_v4l2_m2m" + hevc_v4l2m2m_decoder_select="hevc_mp4toannexb_bsf" + hevc_v4l2m2m_encoder_deps="v4l2_m2m hevc_v4l2_m2m" + hevc_stcodec_decoder_deps="stcodec" ++hevc_stcodec_encoder_deps="stcodec" + mjpeg_cuvid_decoder_deps="cuvid" + mjpeg_qsv_decoder_select="qsvdec" + mjpeg_qsv_encoder_deps="libmfx" + mjpeg_qsv_encoder_select="qsvenc" + mjpeg_vaapi_encoder_deps="VAEncPictureParameterBufferJPEG" + mjpeg_vaapi_encoder_select="cbs_jpeg jpegtables vaapi_encode" ++mjpeg_stcodec_decoder_deps="stcodec" ++mjpeg_stcodec_encoder_deps="stcodec" + mp3_mf_encoder_deps="mediafoundation" + mpeg1_cuvid_decoder_deps="cuvid" + mpeg1_v4l2m2m_decoder_deps="v4l2_m2m mpeg1_v4l2_m2m" +@@ -3137,6 +3141,7 @@ mpeg4_mmal_decoder_deps="mmal" + mpeg4_omx_encoder_deps="omx" + mpeg4_v4l2m2m_decoder_deps="v4l2_m2m mpeg4_v4l2_m2m" + mpeg4_v4l2m2m_encoder_deps="v4l2_m2m mpeg4_v4l2_m2m" ++mpeg4_stcodec_decoder_deps="stcodec" + msmpeg4_crystalhd_decoder_select="crystalhd" + nvenc_h264_encoder_select="h264_nvenc_encoder" + nvenc_hevc_encoder_select="hevc_nvenc_encoder" +@@ -3153,6 +3158,7 @@ vp8_vaapi_encoder_deps="VAEncPictureParameterBufferVP8" + vp8_vaapi_encoder_select="vaapi_encode" + vp8_v4l2m2m_decoder_deps="v4l2_m2m vp8_v4l2_m2m" + vp8_v4l2m2m_encoder_deps="v4l2_m2m vp8_v4l2_m2m" ++vp8_stcodec_decoder_deps="stcodec" + vp9_cuvid_decoder_deps="cuvid" + vp9_mediacodec_decoder_deps="mediacodec" + vp9_qsv_decoder_select="qsvdec" +@@ -3162,6 +3168,7 @@ vp9_vaapi_encoder_select="vaapi_encode" + vp9_qsv_encoder_deps="libmfx MFX_CODEC_VP9" + vp9_qsv_encoder_select="qsvenc" + vp9_v4l2m2m_decoder_deps="v4l2_m2m vp9_v4l2_m2m" ++vp9_stcodec_decoder_deps="stcodec" + wmv3_crystalhd_decoder_select="crystalhd" + av1_qsv_decoder_select="qsvdec" + +diff --git a/libavcodec/Makefile b/libavcodec/Makefile +index ff56ed6..e8e18f3 100644 +--- a/libavcodec/Makefile ++++ b/libavcodec/Makefile +@@ -386,6 +386,7 @@ OBJS-$(CONFIG_H264_VIDEOTOOLBOX_ENCODER) += videotoolboxenc.o + OBJS-$(CONFIG_H264_V4L2M2M_DECODER) += v4l2_m2m_dec.o + OBJS-$(CONFIG_H264_V4L2M2M_ENCODER) += v4l2_m2m_enc.o + OBJS-$(CONFIG_H264_STCODEC_DECODER) += stcodecdec.o ++OBJS-$(CONFIG_H264_STCODEC_ENCODER) += stcodecenc.o + OBJS-$(CONFIG_HAP_DECODER) += hapdec.o hap.o + OBJS-$(CONFIG_HAP_ENCODER) += hapenc.o hap.o + OBJS-$(CONFIG_HCA_DECODER) += hcadec.o +@@ -407,6 +408,7 @@ OBJS-$(CONFIG_HEVC_VAAPI_ENCODER) += vaapi_encode_h265.o h265_profile_level + OBJS-$(CONFIG_HEVC_V4L2M2M_DECODER) += v4l2_m2m_dec.o + OBJS-$(CONFIG_HEVC_V4L2M2M_ENCODER) += v4l2_m2m_enc.o + OBJS-$(CONFIG_HEVC_STCODEC_DECODER) += stcodecdec.o ++OBJS-$(CONFIG_HEVC_STCODEC_ENCODER) += stcodecenc.o + OBJS-$(CONFIG_HNM4_VIDEO_DECODER) += hnm4video.o + OBJS-$(CONFIG_HQ_HQA_DECODER) += hq_hqa.o hq_hqadata.o hq_hqadsp.o \ + canopus.o +@@ -461,6 +463,8 @@ OBJS-$(CONFIG_MJPEGB_DECODER) += mjpegbdec.o + OBJS-$(CONFIG_MJPEG_CUVID_DECODER) += cuviddec.o + OBJS-$(CONFIG_MJPEG_QSV_ENCODER) += qsvenc_jpeg.o + OBJS-$(CONFIG_MJPEG_VAAPI_ENCODER) += vaapi_encode_mjpeg.o ++OBJS-$(CONFIG_MJPEG_STCODEC_DECODER) += stcodecdec.o ++OBJS-$(CONFIG_MJPEG_STCODEC_ENCODER) += stcodecenc.o + OBJS-$(CONFIG_MLP_DECODER) += mlpdec.o mlpdsp.o + OBJS-$(CONFIG_MLP_ENCODER) += mlpenc.o mlp.o + OBJS-$(CONFIG_MMVIDEO_DECODER) += mmvideo.o +@@ -506,6 +510,7 @@ OBJS-$(CONFIG_MPEG4_MEDIACODEC_DECODER) += mediacodecdec.o + OBJS-$(CONFIG_MPEG4_OMX_ENCODER) += omx.o + OBJS-$(CONFIG_MPEG4_V4L2M2M_DECODER) += v4l2_m2m_dec.o + OBJS-$(CONFIG_MPEG4_V4L2M2M_ENCODER) += v4l2_m2m_enc.o ++OBJS-$(CONFIG_MPEG4_STCODEC_DECODER) += stcodecdec.o + OBJS-$(CONFIG_MPL2_DECODER) += mpl2dec.o ass.o + OBJS-$(CONFIG_MSA1_DECODER) += mss3.o + OBJS-$(CONFIG_MSCC_DECODER) += mscc.o +@@ -722,6 +727,7 @@ OBJS-$(CONFIG_VP8_RKMPP_DECODER) += rkmppdec.o + OBJS-$(CONFIG_VP8_VAAPI_ENCODER) += vaapi_encode_vp8.o + OBJS-$(CONFIG_VP8_V4L2M2M_DECODER) += v4l2_m2m_dec.o + OBJS-$(CONFIG_VP8_V4L2M2M_ENCODER) += v4l2_m2m_enc.o ++OBJS-$(CONFIG_VP8_STCODEC_DECODER) += stcodecdec.o + OBJS-$(CONFIG_VP9_DECODER) += vp9.o vp9data.o vp9dsp.o vp9lpf.o vp9recon.o \ + vp9block.o vp9prob.o vp9mvs.o vp56rac.o \ + vp9dsp_8bpp.o vp9dsp_10bpp.o vp9dsp_12bpp.o +@@ -732,6 +738,7 @@ OBJS-$(CONFIG_VP9_VAAPI_ENCODER) += vaapi_encode_vp9.o + OBJS-$(CONFIG_VP9_QSV_ENCODER) += qsvenc_vp9.o + OBJS-$(CONFIG_VPLAYER_DECODER) += textdec.o ass.o + OBJS-$(CONFIG_VP9_V4L2M2M_DECODER) += v4l2_m2m_dec.o ++OBJS-$(CONFIG_VP9_STCODEC_DECODER) += stcodecdec.o + OBJS-$(CONFIG_VQA_DECODER) += vqavideo.o + OBJS-$(CONFIG_WAVPACK_DECODER) += wavpack.o wavpackdata.o dsd.o + OBJS-$(CONFIG_WAVPACK_ENCODER) += wavpackdata.o wavpackenc.o +diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c +index 9981106..79bf666 100644 +--- a/libavcodec/allcodecs.c ++++ b/libavcodec/allcodecs.c +@@ -149,12 +149,14 @@ extern AVCodec ff_h264_mmal_decoder; + extern AVCodec ff_h264_qsv_decoder; + extern AVCodec ff_h264_rkmpp_decoder; + extern AVCodec ff_h264_stcodec_decoder; ++extern AVCodec ff_h264_stcodec_encoder; + extern AVCodec ff_hap_encoder; + extern AVCodec ff_hap_decoder; + extern AVCodec ff_hevc_decoder; + extern AVCodec ff_hevc_qsv_decoder; + extern AVCodec ff_hevc_rkmpp_decoder; + extern AVCodec ff_hevc_stcodec_decoder; ++extern AVCodec ff_hevc_stcodec_encoder; + extern AVCodec ff_hevc_v4l2m2m_decoder; + extern AVCodec ff_hnm4_video_decoder; + extern AVCodec ff_hq_hqa_decoder; +@@ -191,6 +193,8 @@ extern AVCodec ff_mimic_decoder; + extern AVCodec ff_mjpeg_encoder; + extern AVCodec ff_mjpeg_decoder; + extern AVCodec ff_mjpegb_decoder; ++extern AVCodec ff_mjpeg_stcodec_decoder; ++extern AVCodec ff_mjpeg_stcodec_encoder; + extern AVCodec ff_mmvideo_decoder; + extern AVCodec ff_mobiclip_decoder; + extern AVCodec ff_motionpixels_decoder; +@@ -203,6 +207,7 @@ extern AVCodec ff_mpeg4_decoder; + extern AVCodec ff_mpeg4_crystalhd_decoder; + extern AVCodec ff_mpeg4_v4l2m2m_decoder; + extern AVCodec ff_mpeg4_mmal_decoder; ++extern AVCodec ff_mpeg4_stcodec_decoder; + extern AVCodec ff_mpegvideo_decoder; + extern AVCodec ff_mpeg1_v4l2m2m_decoder; + extern AVCodec ff_mpeg2_mmal_decoder; +@@ -361,9 +366,11 @@ extern AVCodec ff_vp7_decoder; + extern AVCodec ff_vp8_decoder; + extern AVCodec ff_vp8_rkmpp_decoder; + extern AVCodec ff_vp8_v4l2m2m_decoder; ++extern AVCodec ff_vp8_stcodec_decoder; + extern AVCodec ff_vp9_decoder; + extern AVCodec ff_vp9_rkmpp_decoder; + extern AVCodec ff_vp9_v4l2m2m_decoder; ++extern AVCodec ff_vp9_stcodec_decoder; + extern AVCodec ff_vqa_decoder; + extern AVCodec ff_webp_decoder; + extern AVCodec ff_wcmv_decoder; +diff --git a/libavcodec/stcodecenc.c b/libavcodec/stcodecenc.c +new file mode 100755 +index 0000000..eeaa41a +--- /dev/null ++++ b/libavcodec/stcodecenc.c +@@ -0,0 +1,417 @@ ++/* ++ * Spacemit MPP Video Encoder ++ * Copyright 2022-2023 SPACEMIT. All rights reserved. ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "avcodec.h" ++#include "internal.h" ++#include "encode.h" ++#include "h264_parse.h" ++#include "hevc_parse.h" ++#include "hwconfig.h" ++#include "libavutil/buffer.h" ++#include "libavutil/common.h" ++#include "libavutil/frame.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_drm.h" ++#include "libavutil/imgutils.h" ++#include "libavutil/log.h" ++#include "venc.h" ++ ++#define PACKET_SIZE (2 * 1024 * 1024) ++ ++typedef struct { ++ MppVencCtx* pVencCtx; ++ MppPacket* pPacket; ++ MppFrame* pFrame; ++ ++ char first_packet; ++ char eos_reached; ++ ++ AVBufferRef* frames_ref; ++ AVBufferRef* device_ref; ++} STCODECEncoder; ++ ++typedef struct { ++ AVClass* av_class; ++ AVBufferRef* encoder_ref; ++} STCODECEncodeContext; ++ ++typedef struct { ++ MppFrame* pFrame; ++ AVBufferRef* encoder_ref; ++} STCODECFrameContext; ++ ++static MppCodingType stcodec_get_codingtype(AVCodecContext* avctx) { ++ switch (avctx->codec_id) { ++ case AV_CODEC_ID_H264: ++ return CODING_H264; ++ case AV_CODEC_ID_HEVC: ++ return CODING_H265; ++ case AV_CODEC_ID_VP8: ++ return CODING_VP8; ++ case AV_CODEC_ID_VP9: ++ return CODING_VP9; ++ case AV_CODEC_ID_MJPEG: ++ return CODING_MJPEG; ++ default: ++ return CODING_UNKNOWN; ++ } ++} ++ ++static int get_stride(int width, int align) { ++ return (width + align - 1) & (~(align - 1)); ++} ++ ++static int stcodec_send_frame(AVCodecContext* avctx, const AVFrame* frame) { ++ STCODECEncodeContext* st_context = avctx->priv_data; ++ STCODECEncoder* encoder = (STCODECEncoder*)st_context->encoder_ref->data; ++ int ret = -1; ++ ++ if (frame != NULL) { ++ av_log(avctx, AV_LOG_ERROR, "@@@@@@@@ format:%d fd:%d %p %p\n", ++ frame->format, ++ ((AVDRMFrameDescriptor*)(frame->data[0]))->objects[0].fd, ++ frame->data[0], frame->data[1]); ++ FRAME_SetEos(encoder->pFrame, 0); ++ if (frame->format == AV_PIX_FMT_NV12) { ++ FRAME_SetDataUsedNum(encoder->pFrame, 2); ++ FRAME_SetDataPointer(encoder->pFrame, 0, frame->data[0]); ++ FRAME_SetDataPointer(encoder->pFrame, 1, frame->data[1]); ++ // FRAME_SetDataPointer(encoder->pFrame, 2, frame->data[2]); ++ } else if (frame->format == AV_PIX_FMT_DRM_PRIME) { ++ void* vaddr = ++ mmap(NULL, ((AVDRMFrameDescriptor*)(frame->data[0]))->objects[0].size, ++ PROT_READ | PROT_WRITE, MAP_SHARED, ++ ((AVDRMFrameDescriptor*)(frame->data[0]))->objects[0].fd, 0); ++ FRAME_SetDataUsedNum(encoder->pFrame, 2); ++ FRAME_SetDataPointer(encoder->pFrame, 0, (U8*)vaddr); ++ FRAME_SetDataPointer(encoder->pFrame, 1, ++ ((U8*)vaddr) + frame->width * frame->height); ++ // FRAME_SetDataPointer(encoder->pFrame, 2, ++ // ((U8*)vaddr) + frame->width * frame->height * 5 / ++ // 4); ++ } else { ++ } ++ ret = VENC_Encode(encoder->pVencCtx, FRAME_GetBaseData(encoder->pFrame)); ++ } else { ++ FRAME_SetEos(encoder->pFrame, 1); ++ av_log(avctx, AV_LOG_ERROR, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ bigbig %d\n", ++ FRAME_GetEos(encoder->pFrame)); ++ // FRAME_SetDataUsedNum(encoder->pFrame, 0); ++ ret = VENC_Encode(encoder->pVencCtx, FRAME_GetBaseData(encoder->pFrame)); ++ } ++ ++ return ret; ++} ++ ++static int stcodec_receive_packet(AVCodecContext* avctx, AVPacket* avpkt) { ++ STCODECEncodeContext* st_context = avctx->priv_data; ++ STCODECEncoder* encoder = (STCODECEncoder*)st_context->encoder_ref->data; ++ int ret = -1; ++ AVFrame* frame = av_frame_alloc(); ++ av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 0.\n"); ++ // if (!frame->buf[0]) { ++ ret = ff_encode_get_frame(avctx, frame); ++ ++ av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 0. RET = %d\n", ++ ret); ++ if (ret < 0 && ret != AVERROR_EOF) return ret; ++ ++ if (ret == AVERROR_EOF) { ++ av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 get eos\n"); ++ frame = NULL; ++ } ++ //} ++ av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 1.\n"); ++ ret = stcodec_send_frame(avctx, frame); ++ if (ret != AVERROR(EAGAIN)) av_frame_unref(frame); ++ ++ if (ret < 0 && ret != AVERROR(EAGAIN)) return ret; ++ ++ av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 2.\n"); ++ // return ff_v4l2_context_dequeue_packet(capture, avpkt); ++haha: ++ ret = VENC_RequestOutputStreamBuffer(encoder->pVencCtx, ++ PACKET_GetBaseData(encoder->pPacket)); ++ av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 2. ret = %d\n", ++ ret); ++ if (ret == MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 3.\n"); ++ avpkt->size = PACKET_GetLength(encoder->pPacket); ++ av_log(avctx, AV_LOG_DEBUG, "1111111111111111111111111111111 4. %d=%d %p\n", ++ PACKET_GetLength(encoder->pPacket), avpkt->size, ++ PACKET_GetDataPointer(encoder->pPacket)); ++ av_new_packet(avpkt, avpkt->size); ++ memcpy(avpkt->data, PACKET_GetDataPointer(encoder->pPacket), avpkt->size); ++ av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 5.\n"); ++ VENC_ReturnOutputStreamBuffer(encoder->pVencCtx, ++ PACKET_GetBaseData(encoder->pPacket)); ++ } else if (ret == MPP_CODER_NO_DATA) { ++ av_log(avctx, AV_LOG_ERROR, "get no data.\n"); ++ // return AVERROR(EAGAIN); ++ goto haha; ++ } else if (ret == MPP_CODER_EOS) { ++ av_log(avctx, AV_LOG_ERROR, "get EOS.\n"); ++ return AVERROR_EOF; ++ } else { ++ av_log(avctx, AV_LOG_ERROR, "get ???. %d\n", ret); ++ } ++ ++ return 0; ++} ++ ++static av_cold int stcodec_close_encoder(AVCodecContext* avctx) { ++ STCODECEncodeContext* st_context = avctx->priv_data; ++ av_log(NULL, AV_LOG_ERROR, "stcodec close encoder\n"); ++ av_buffer_unref(&st_context->encoder_ref); ++ return 0; ++} ++ ++static void stcodec_release_encoder(void* opaque, uint8_t* data) { ++ STCODECEncoder* encoder = (STCODECEncoder*)data; ++ if (encoder->pVencCtx) { ++ av_log(NULL, AV_LOG_ERROR, "stcodec release encoder\n"); ++ VENC_ResetChannel(encoder->pVencCtx); ++ VENC_DestoryChannel(encoder->pVencCtx); ++ encoder->pVencCtx = NULL; ++ } ++ av_buffer_unref(&encoder->frames_ref); ++ av_buffer_unref(&encoder->device_ref); ++ av_free(encoder); ++} ++ ++static av_cold int stcodec_init_encoder(AVCodecContext* avctx) { ++ STCODECEncodeContext* st_context = avctx->priv_data; ++ STCODECEncoder* encoder = NULL; ++ ++ MppCodingType codectype = CODING_UNKNOWN; ++ int ret; ++ ++ if (avctx->width > 4096 || avctx->height > 2160 || avctx->width <= 640 || ++ avctx->height <= 480) { ++ av_log(avctx, AV_LOG_ERROR, ++ "STCODEC-ENC do not support the size, too big or too small!\n"); ++ ret = AVERROR_UNKNOWN; ++ goto fail; ++ } ++ ++ // avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; ++ // avctx->pix_fmt = AV_PIX_FMT_NV12; ++ // avctx->pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts); ++ av_log(avctx, AV_LOG_ERROR, "======================== Use pixel format %d\n", ++ avctx->pix_fmt); ++ ++ // create a encoder and a ref to it ++ encoder = av_mallocz(sizeof(STCODECEncoder)); ++ if (!encoder) { ++ av_log(avctx, AV_LOG_ERROR, ++ "Failed to av_mallocz STCODECEncoder encoder\n"); ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ st_context->encoder_ref = ++ av_buffer_create((uint8_t*)encoder, sizeof(*encoder), ++ stcodec_release_encoder, NULL, AV_BUFFER_FLAG_READONLY); ++ if (!st_context->encoder_ref) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create ref of STCODECEncoder!\n"); ++ av_free(encoder); ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ++ av_log(avctx, AV_LOG_DEBUG, "Initializing STCODEC encoder.\n"); ++ ++ codectype = stcodec_get_codingtype(avctx); ++ if (codectype == CODING_UNKNOWN) { ++ av_log(avctx, AV_LOG_ERROR, "Unknown codec type (%d).\n", avctx->codec_id); ++ av_free(encoder); ++ ret = AVERROR_UNKNOWN; ++ goto fail; ++ } ++ /* ++ ret = mpp_check_support_format(MPP_CTX_DEC, codectype); ++ if (ret != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, "Codec type (%d) unsupported by MPP\n", ++ avctx->codec_id); ret = AVERROR_UNKNOWN; goto fail; ++ } ++ */ ++ ++ // Create the MPP context ++ encoder->pVencCtx = VENC_CreateChannel(); ++ if (!encoder->pVencCtx) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create STCODEC VENC channel.\n"); ++ av_free(encoder); ++ ret = AVERROR_UNKNOWN; ++ goto fail; ++ } ++ ++ // set para ++ encoder->pVencCtx->stVencPara.eCodingType = codectype; ++ encoder->pVencCtx->stVencPara.nWidth = avctx->width; ++ encoder->pVencCtx->stVencPara.nHeight = avctx->height; ++ encoder->pVencCtx->stVencPara.nStride = get_stride(avctx->width, 8); ++ encoder->pVencCtx->stVencPara.PixelFormat = PIXEL_FORMAT_NV12; ++ encoder->pVencCtx->eCodecType = CODEC_V4L2_LINLONV5V7; ++ encoder->pVencCtx->stVencPara.nBitrate = 5000000; ++ encoder->pVencCtx->stVencPara.nFrameRate = 30; ++ av_log(avctx, AV_LOG_ERROR, "(widthxheight = %d x %d).\n", avctx->width, ++ avctx->height); ++ ++ // venc init ++ ret = VENC_Init(encoder->pVencCtx); ++ if (ret != MPP_OK) { ++ av_log(avctx, AV_LOG_ERROR, ++ "Failed to initialize STCODEC VENC (ret = %d).\n", ret); ++ VENC_DestoryChannel(encoder->pVencCtx); ++ av_free(encoder); ++ ret = AVERROR_UNKNOWN; ++ goto fail; ++ } ++ ++ // mpp packet init ++ encoder->pPacket = PACKET_Create(); ++ if (!encoder->pPacket) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to initialize STCODEC packet.\n"); ++ VENC_DestoryChannel(encoder->pVencCtx); ++ av_free(encoder); ++ ret = AVERROR_UNKNOWN; ++ goto fail; ++ } ++ PACKET_Alloc(encoder->pPacket, PACKET_SIZE); ++ ++ // mpp frame init ++ encoder->pFrame = FRAME_Create(); ++ if (!encoder->pFrame) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to initialize STCODEC frame.\n"); ++ PACKET_Destory(encoder->pPacket); ++ VENC_DestoryChannel(encoder->pVencCtx); ++ av_free(encoder); ++ ret = AVERROR_UNKNOWN; ++ goto fail; ++ } ++ ++ VENC_SetParam(encoder->pVencCtx, &(encoder->pVencCtx->stVencPara)); ++ ++ /* ++ // make decode calls blocking with a timeout ++ paramS32 = MPP_POLL_BLOCK; ++ ret = decoder->mpi->control(decoder->ctx, MPP_SET_OUTPUT_BLOCK, ++ ¶mS32); ++ ++ paramS64 = RECEIVE_FRAME_TIMEOUT; ++ ret = decoder->mpi->control(decoder->ctx, MPP_SET_OUTPUT_BLOCK_TIMEOUT, ++ ¶mS64); ++ ++ ret = mpp_buffer_group_get_internal(&decoder->frame_group, ++ MPP_BUFFER_TYPE_ION); ++ ret = decoder->mpi->control(decoder->ctx, MPP_DEC_SET_EXT_BUF_GROUP, ++ decoder->frame_group); ++ ++ ret = mpp_buffer_group_limit_config(decoder->frame_group, 0, ++ FRAMEGROUP_MAX_FRAMES); ++ */ ++ // decoder->first_packet = 1; ++ ++ av_log(avctx, AV_LOG_DEBUG, "STCODEC encoder initialized successfully.\n"); ++ /* ++ encoder->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM); ++ if (!encoder->device_ref) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to av_hwdevice_ctx_alloc\n"); ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ ret = av_hwdevice_ctx_init(encoder->device_ref); ++ if (ret < 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to av_hwdevice_ctx_init\n"); ++ goto fail; ++ } ++ ++ AVHWFramesContext* hwframes; ++ avctx->width = avctx->width; ++ avctx->height = avctx->height; ++ encoder->frames_ref = av_hwframe_ctx_alloc(encoder->device_ref); ++ if (!encoder->frames_ref) { ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } ++ hwframes = (AVHWFramesContext*)encoder->frames_ref->data; ++ hwframes->format = AV_PIX_FMT_YUV420P;//AV_PIX_FMT_DRM_PRIME; ++ hwframes->sw_format = AV_PIX_FMT_YUV420P; ++ hwframes->width = avctx->width; ++ hwframes->height = avctx->height; ++ ret = av_hwframe_ctx_init(encoder->frames_ref); ++ if (ret < 0) goto fail; ++ */ ++ av_log(avctx, AV_LOG_ERROR, "Initialized successfully.\n"); ++ return 0; ++ ++fail: ++ av_log(avctx, AV_LOG_ERROR, ++ "Failed to initialize STCODEC encoder, please check!\n"); ++ stcodec_close_encoder(avctx); ++ return ret; ++} ++ ++static const AVCodecHWConfigInternal* const stcodec_hw_configs[] = { ++ HW_CONFIG_INTERNAL(DRM_PRIME), NULL}; ++ ++#define STCODEC_ENC_CLASS(NAME) \ ++ static const AVClass stcodec_##NAME##_enc_class = { \ ++ .class_name = "stcodec_" #NAME "_enc", \ ++ .version = LIBAVUTIL_VERSION_INT, \ ++ }; ++ ++#define STCODEC_ENC(NAME, ID) \ ++ STCODEC_ENC_CLASS(NAME) \ ++ AVCodec ff_##NAME##_stcodec_encoder = { \ ++ .name = #NAME "_stcodec", \ ++ .long_name = NULL_IF_CONFIG_SMALL(#NAME " (stcodec encoder)"), \ ++ .type = AVMEDIA_TYPE_VIDEO, \ ++ .id = ID, \ ++ .priv_data_size = sizeof(STCODECEncodeContext), \ ++ .priv_class = &stcodec_##NAME##_enc_class, \ ++ .init = stcodec_init_encoder, \ ++ .receive_packet = stcodec_receive_packet, \ ++ .close = stcodec_close_encoder, \ ++ .capabilities = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY, \ ++ .wrapper_name = "stcodec", \ ++ .pix_fmts = \ ++ (const enum AVPixelFormat[]){AV_PIX_FMT_DRM_PRIME, AV_PIX_FMT_NV12, \ ++ AV_PIX_FMT_NONE}, \ ++ .hw_configs = stcodec_hw_configs, \ ++ }; ++ ++STCODEC_ENC(h264, AV_CODEC_ID_H264) ++STCODEC_ENC(hevc, AV_CODEC_ID_HEVC) ++STCODEC_ENC(mjpeg, AV_CODEC_ID_MJPEG) +-- +2.25.1 + diff --git a/package/ffmpeg/0010-ffmpeg-optimize-stcodecenc-logic.patch b/package/ffmpeg/0010-ffmpeg-optimize-stcodecenc-logic.patch new file mode 100644 index 00000000..be0ccde4 --- /dev/null +++ b/package/ffmpeg/0010-ffmpeg-optimize-stcodecenc-logic.patch @@ -0,0 +1,327 @@ +From 23c0a7e3fb8f22afe6513e5d72c61302f04901ad Mon Sep 17 00:00:00 2001 +From: fuqiang +Date: Sun, 7 Apr 2024 15:34:13 +0800 +Subject: [PATCH] ffmpeg: optimize stcodecenc logic + +--- + libavcodec/stcodecenc.c | 221 +++++++++++++++------------------------- + 1 file changed, 81 insertions(+), 140 deletions(-) + +diff --git a/libavcodec/stcodecenc.c b/libavcodec/stcodecenc.c +index eeaa41a..30025e2 100755 +--- a/libavcodec/stcodecenc.c ++++ b/libavcodec/stcodecenc.c +@@ -53,12 +53,6 @@ typedef struct { + MppVencCtx* pVencCtx; + MppPacket* pPacket; + MppFrame* pFrame; +- +- char first_packet; +- char eos_reached; +- +- AVBufferRef* frames_ref; +- AVBufferRef* device_ref; + } STCODECEncoder; + + typedef struct { +@@ -66,11 +60,6 @@ typedef struct { + AVBufferRef* encoder_ref; + } STCODECEncodeContext; + +-typedef struct { +- MppFrame* pFrame; +- AVBufferRef* encoder_ref; +-} STCODECFrameContext; +- + static MppCodingType stcodec_get_codingtype(AVCodecContext* avctx) { + switch (avctx->codec_id) { + case AV_CODEC_ID_H264: +@@ -92,22 +81,35 @@ static int get_stride(int width, int align) { + return (width + align - 1) & (~(align - 1)); + } + +-static int stcodec_send_frame(AVCodecContext* avctx, const AVFrame* frame) { ++static int stcodec_receive_packet(AVCodecContext* avctx, AVPacket* avpkt) { + STCODECEncodeContext* st_context = avctx->priv_data; + STCODECEncoder* encoder = (STCODECEncoder*)st_context->encoder_ref->data; + int ret = -1; ++ int index = -1; ++ AVFrame* frame = av_frame_alloc(); ++ ++ ret = ff_encode_get_frame(avctx, frame); ++ if (ret < 0 && ret != AVERROR_EOF) { ++ av_log(avctx, AV_LOG_DEBUG, "ff_encode_get_frame fail, return\n"); ++ return ret; ++ } ++ ++ if (ret == AVERROR_EOF) { ++ av_log(avctx, AV_LOG_ERROR, "ff_encode_get_frame get EOS\n"); ++ frame = NULL; ++ } + + if (frame != NULL) { +- av_log(avctx, AV_LOG_ERROR, "@@@@@@@@ format:%d fd:%d %p %p\n", +- frame->format, +- ((AVDRMFrameDescriptor*)(frame->data[0]))->objects[0].fd, +- frame->data[0], frame->data[1]); + FRAME_SetEos(encoder->pFrame, 0); + if (frame->format == AV_PIX_FMT_NV12) { + FRAME_SetDataUsedNum(encoder->pFrame, 2); + FRAME_SetDataPointer(encoder->pFrame, 0, frame->data[0]); + FRAME_SetDataPointer(encoder->pFrame, 1, frame->data[1]); +- // FRAME_SetDataPointer(encoder->pFrame, 2, frame->data[2]); ++ } else if (frame->format == AV_PIX_FMT_YUV420P) { ++ FRAME_SetDataUsedNum(encoder->pFrame, 3); ++ FRAME_SetDataPointer(encoder->pFrame, 0, frame->data[0]); ++ FRAME_SetDataPointer(encoder->pFrame, 1, frame->data[1]); ++ FRAME_SetDataPointer(encoder->pFrame, 2, frame->data[3]); + } else if (frame->format == AV_PIX_FMT_DRM_PRIME) { + void* vaddr = + mmap(NULL, ((AVDRMFrameDescriptor*)(frame->data[0]))->objects[0].size, +@@ -117,74 +119,61 @@ static int stcodec_send_frame(AVCodecContext* avctx, const AVFrame* frame) { + FRAME_SetDataPointer(encoder->pFrame, 0, (U8*)vaddr); + FRAME_SetDataPointer(encoder->pFrame, 1, + ((U8*)vaddr) + frame->width * frame->height); +- // FRAME_SetDataPointer(encoder->pFrame, 2, +- // ((U8*)vaddr) + frame->width * frame->height * 5 / +- // 4); + } else { ++ av_log(avctx, AV_LOG_ERROR, "format(%d) not support!", frame->format); + } +- ret = VENC_Encode(encoder->pVencCtx, FRAME_GetBaseData(encoder->pFrame)); +- } else { +- FRAME_SetEos(encoder->pFrame, 1); +- av_log(avctx, AV_LOG_ERROR, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ bigbig %d\n", +- FRAME_GetEos(encoder->pFrame)); +- // FRAME_SetDataUsedNum(encoder->pFrame, 0); +- ret = VENC_Encode(encoder->pVencCtx, FRAME_GetBaseData(encoder->pFrame)); +- } + +- return ret; +-} ++ ret = VENC_SendInputFrame(encoder->pVencCtx, ++ FRAME_GetBaseData(encoder->pFrame)); ++ do { ++ ret = VENC_GetOutputStreamBuffer(encoder->pVencCtx, ++ PACKET_GetBaseData(encoder->pPacket)); ++ if (ret == MPP_OK) { ++ avpkt->size = PACKET_GetLength(encoder->pPacket); ++ av_new_packet(avpkt, avpkt->size); ++ memcpy(avpkt->data, PACKET_GetDataPointer(encoder->pPacket), ++ avpkt->size); ++ } ++ } while (ret != MPP_OK); + +-static int stcodec_receive_packet(AVCodecContext* avctx, AVPacket* avpkt) { +- STCODECEncodeContext* st_context = avctx->priv_data; +- STCODECEncoder* encoder = (STCODECEncoder*)st_context->encoder_ref->data; +- int ret = -1; +- AVFrame* frame = av_frame_alloc(); +- av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 0.\n"); +- // if (!frame->buf[0]) { +- ret = ff_encode_get_frame(avctx, frame); ++ index = -1; ++ do { ++ index = VENC_ReturnInputFrame(encoder->pVencCtx, NULL); ++ if (index >= 0) { ++ av_log(avctx, AV_LOG_DEBUG, "a frame return"); ++ } ++ } while (index == -1); ++ } else { ++ FRAME_SetEos(encoder->pFrame, FRAME_EOS_WITHOUT_DATA); ++ ret = VENC_SendInputFrame(encoder->pVencCtx, ++ FRAME_GetBaseData(encoder->pFrame)); + +- av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 0. RET = %d\n", +- ret); +- if (ret < 0 && ret != AVERROR_EOF) return ret; ++ do { ++ ret = VENC_GetOutputStreamBuffer(encoder->pVencCtx, ++ PACKET_GetBaseData(encoder->pPacket)); ++ if (ret == MPP_OK || ret == MPP_CODER_EOS) { ++ avpkt->size = PACKET_GetLength(encoder->pPacket); ++ av_new_packet(avpkt, avpkt->size); ++ memcpy(avpkt->data, PACKET_GetDataPointer(encoder->pPacket), ++ avpkt->size); ++ } ++ } while (ret != MPP_OK && ret != MPP_CODER_EOS); + +- if (ret == AVERROR_EOF) { +- av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 get eos\n"); +- frame = NULL; ++ index = -1; ++ do { ++ index = VENC_ReturnInputFrame(encoder->pVencCtx, NULL); ++ if (index >= 0) { ++ av_log(avctx, AV_LOG_DEBUG, "a frame return"); ++ } ++ } while (index != -1); + } +- //} +- av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 1.\n"); +- ret = stcodec_send_frame(avctx, frame); +- if (ret != AVERROR(EAGAIN)) av_frame_unref(frame); + +- if (ret < 0 && ret != AVERROR(EAGAIN)) return ret; ++ usleep(1000); ++ if (ret != AVERROR(EAGAIN)) av_frame_unref(frame); + +- av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 2.\n"); +- // return ff_v4l2_context_dequeue_packet(capture, avpkt); +-haha: +- ret = VENC_RequestOutputStreamBuffer(encoder->pVencCtx, +- PACKET_GetBaseData(encoder->pPacket)); +- av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 2. ret = %d\n", +- ret); +- if (ret == MPP_OK) { +- av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 3.\n"); +- avpkt->size = PACKET_GetLength(encoder->pPacket); +- av_log(avctx, AV_LOG_DEBUG, "1111111111111111111111111111111 4. %d=%d %p\n", +- PACKET_GetLength(encoder->pPacket), avpkt->size, +- PACKET_GetDataPointer(encoder->pPacket)); +- av_new_packet(avpkt, avpkt->size); +- memcpy(avpkt->data, PACKET_GetDataPointer(encoder->pPacket), avpkt->size); +- av_log(avctx, AV_LOG_ERROR, "1111111111111111111111111111111 5.\n"); +- VENC_ReturnOutputStreamBuffer(encoder->pVencCtx, +- PACKET_GetBaseData(encoder->pPacket)); +- } else if (ret == MPP_CODER_NO_DATA) { +- av_log(avctx, AV_LOG_ERROR, "get no data.\n"); +- // return AVERROR(EAGAIN); +- goto haha; +- } else if (ret == MPP_CODER_EOS) { +- av_log(avctx, AV_LOG_ERROR, "get EOS.\n"); ++ if (ret == MPP_CODER_EOS) { ++ av_log(avctx, AV_LOG_ERROR, "get final EOS.\n"); + return AVERROR_EOF; +- } else { +- av_log(avctx, AV_LOG_ERROR, "get ???. %d\n", ret); + } + + return 0; +@@ -199,14 +188,25 @@ static av_cold int stcodec_close_encoder(AVCodecContext* avctx) { + + static void stcodec_release_encoder(void* opaque, uint8_t* data) { + STCODECEncoder* encoder = (STCODECEncoder*)data; ++ ++ if (encoder->pPacket) { ++ // PACKET_Free(decoder->pPacket); ++ PACKET_Destory(encoder->pPacket); ++ encoder->pPacket = NULL; ++ } ++ ++ if (encoder->pFrame) { ++ FRAME_Destory(encoder->pFrame); ++ encoder->pFrame = NULL; ++ } ++ + if (encoder->pVencCtx) { + av_log(NULL, AV_LOG_ERROR, "stcodec release encoder\n"); + VENC_ResetChannel(encoder->pVencCtx); + VENC_DestoryChannel(encoder->pVencCtx); + encoder->pVencCtx = NULL; + } +- av_buffer_unref(&encoder->frames_ref); +- av_buffer_unref(&encoder->device_ref); ++ + av_free(encoder); + } + +@@ -217,8 +217,8 @@ static av_cold int stcodec_init_encoder(AVCodecContext* avctx) { + MppCodingType codectype = CODING_UNKNOWN; + int ret; + +- if (avctx->width > 4096 || avctx->height > 2160 || avctx->width <= 640 || +- avctx->height <= 480) { ++ if (avctx->width > 4096 || avctx->height > 4096 || avctx->width < 640 || ++ avctx->height < 360) { + av_log(avctx, AV_LOG_ERROR, + "STCODEC-ENC do not support the size, too big or too small!\n"); + ret = AVERROR_UNKNOWN; +@@ -259,13 +259,6 @@ static av_cold int stcodec_init_encoder(AVCodecContext* avctx) { + ret = AVERROR_UNKNOWN; + goto fail; + } +- /* +- ret = mpp_check_support_format(MPP_CTX_DEC, codectype); +- if (ret != MPP_OK) { +- av_log(avctx, AV_LOG_ERROR, "Codec type (%d) unsupported by MPP\n", +- avctx->codec_id); ret = AVERROR_UNKNOWN; goto fail; +- } +- */ + + // Create the MPP context + encoder->pVencCtx = VENC_CreateChannel(); +@@ -282,9 +275,9 @@ static av_cold int stcodec_init_encoder(AVCodecContext* avctx) { + encoder->pVencCtx->stVencPara.nHeight = avctx->height; + encoder->pVencCtx->stVencPara.nStride = get_stride(avctx->width, 8); + encoder->pVencCtx->stVencPara.PixelFormat = PIXEL_FORMAT_NV12; ++ encoder->pVencCtx->stVencPara.eFrameBufferType = ++ MPP_FRAME_BUFFERTYPE_NORMAL_EXTERNAL; + encoder->pVencCtx->eCodecType = CODEC_V4L2_LINLONV5V7; +- encoder->pVencCtx->stVencPara.nBitrate = 5000000; +- encoder->pVencCtx->stVencPara.nFrameRate = 30; + av_log(avctx, AV_LOG_ERROR, "(widthxheight = %d x %d).\n", avctx->width, + avctx->height); + +@@ -321,58 +314,6 @@ static av_cold int stcodec_init_encoder(AVCodecContext* avctx) { + goto fail; + } + +- VENC_SetParam(encoder->pVencCtx, &(encoder->pVencCtx->stVencPara)); +- +- /* +- // make decode calls blocking with a timeout +- paramS32 = MPP_POLL_BLOCK; +- ret = decoder->mpi->control(decoder->ctx, MPP_SET_OUTPUT_BLOCK, +- ¶mS32); +- +- paramS64 = RECEIVE_FRAME_TIMEOUT; +- ret = decoder->mpi->control(decoder->ctx, MPP_SET_OUTPUT_BLOCK_TIMEOUT, +- ¶mS64); +- +- ret = mpp_buffer_group_get_internal(&decoder->frame_group, +- MPP_BUFFER_TYPE_ION); +- ret = decoder->mpi->control(decoder->ctx, MPP_DEC_SET_EXT_BUF_GROUP, +- decoder->frame_group); +- +- ret = mpp_buffer_group_limit_config(decoder->frame_group, 0, +- FRAMEGROUP_MAX_FRAMES); +- */ +- // decoder->first_packet = 1; +- +- av_log(avctx, AV_LOG_DEBUG, "STCODEC encoder initialized successfully.\n"); +- /* +- encoder->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM); +- if (!encoder->device_ref) { +- av_log(avctx, AV_LOG_ERROR, "Failed to av_hwdevice_ctx_alloc\n"); +- ret = AVERROR(ENOMEM); +- goto fail; +- } +- ret = av_hwdevice_ctx_init(encoder->device_ref); +- if (ret < 0) { +- av_log(avctx, AV_LOG_ERROR, "Failed to av_hwdevice_ctx_init\n"); +- goto fail; +- } +- +- AVHWFramesContext* hwframes; +- avctx->width = avctx->width; +- avctx->height = avctx->height; +- encoder->frames_ref = av_hwframe_ctx_alloc(encoder->device_ref); +- if (!encoder->frames_ref) { +- ret = AVERROR(ENOMEM); +- goto fail; +- } +- hwframes = (AVHWFramesContext*)encoder->frames_ref->data; +- hwframes->format = AV_PIX_FMT_YUV420P;//AV_PIX_FMT_DRM_PRIME; +- hwframes->sw_format = AV_PIX_FMT_YUV420P; +- hwframes->width = avctx->width; +- hwframes->height = avctx->height; +- ret = av_hwframe_ctx_init(encoder->frames_ref); +- if (ret < 0) goto fail; +- */ + av_log(avctx, AV_LOG_ERROR, "Initialized successfully.\n"); + return 0; + +-- +2.25.1 + diff --git a/package/ffmpeg/0011-fix-bug-video-transcoding-blur.patch b/package/ffmpeg/0011-fix-bug-video-transcoding-blur.patch new file mode 100644 index 00000000..1b25cdf8 --- /dev/null +++ b/package/ffmpeg/0011-fix-bug-video-transcoding-blur.patch @@ -0,0 +1,53 @@ +From 4fd5362fcef8f8d7c10dd1624c40d29880a03324 Mon Sep 17 00:00:00 2001 +From: fuqiang +Date: Sun, 7 Apr 2024 19:46:55 +0800 +Subject: [PATCH] fix bug: video transcoding blur + +--- + libavcodec/stcodecdec.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +diff --git a/libavcodec/stcodecdec.c b/libavcodec/stcodecdec.c +index 66b1305..6acb89f 100755 +--- a/libavcodec/stcodecdec.c ++++ b/libavcodec/stcodecdec.c +@@ -251,8 +251,8 @@ static int stcodec_init_decoder(AVCodecContext* avctx) { + MppCodingType codectype = CODING_UNKNOWN; + int ret; + +- if ((avctx->width > 4096 || avctx->height > 2304 || avctx->width <= 640 || +- avctx->height <= 480) && ++ if ((avctx->width > 4096 || avctx->height > 4096 || avctx->width < 640 || ++ avctx->height < 320) && + (avctx->width != 0 || avctx->height != 0)) { + av_log(avctx, AV_LOG_ERROR, + "STCODEC Decoder do not support the size (%d x %d), too big or too " +@@ -312,7 +312,15 @@ static int stcodec_init_decoder(AVCodecContext* avctx) { + decoder->pVdecCtx->stVdecPara.nWidth = avctx->width; + decoder->pVdecCtx->stVdecPara.nHeight = avctx->height; + decoder->pVdecCtx->stVdecPara.nStride = get_stride(avctx->width, 8); +- decoder->pVdecCtx->stVdecPara.eOutputPixelFormat = stcodec_get_pixelformat(avctx); ++ decoder->pVdecCtx->stVdecPara.eOutputPixelFormat = ++ stcodec_get_pixelformat(avctx); ++ if (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) { ++ decoder->pVdecCtx->stVdecPara.eFrameBufferType = ++ MPP_FRAME_BUFFERTYPE_DMABUF_INTERNAL; ++ } else { ++ decoder->pVdecCtx->stVdecPara.eFrameBufferType = ++ MPP_FRAME_BUFFERTYPE_NORMAL_INTERNAL; ++ } + decoder->pVdecCtx->eCodecType = CODEC_V4L2_LINLONV5V7; + if (avctx->width >= 3840 || avctx->height >= 2160) { + av_log(avctx, AV_LOG_ERROR, "4K video, downscale!\n"); +@@ -665,7 +673,7 @@ static int stcodec_receive_frame(AVCodecContext* avctx, AVFrame* frame) { + frame->linesize[0] = get_stride(avctx->width, 8); + frame->linesize[1] = get_stride(avctx->width, 8); + frame->data[0] = FRAME_GetDataPointer(mppframe, 0); +- frame->data[1] = frame->data[0] + frame->width * frame->height; ++ frame->data[1] = FRAME_GetDataPointer(mppframe, 1); + frame->buf[0] = av_buffer_create( + (uint8_t*)(frame->data[0]), sizeof(frame->data[0]), + stcodec_release_frame, framecontextref, AV_BUFFER_FLAG_READONLY); +-- +2.25.1 + diff --git a/package/ffmpeg/0012-stcodecenc-support-YUV420P-and-fix-some-small-bugs.patch b/package/ffmpeg/0012-stcodecenc-support-YUV420P-and-fix-some-small-bugs.patch new file mode 100644 index 00000000..b497d139 --- /dev/null +++ b/package/ffmpeg/0012-stcodecenc-support-YUV420P-and-fix-some-small-bugs.patch @@ -0,0 +1,76 @@ +From 65302ef6df4dd3a8228e0347163aa6d91e82991c Mon Sep 17 00:00:00 2001 +From: fuqiang +Date: Mon, 8 Apr 2024 15:41:53 +0800 +Subject: [PATCH] stcodecenc support YUV420P and fix some small bugs + +--- + libavcodec/stcodecdec.c | 1 + + libavcodec/stcodecenc.c | 19 ++++++++++++++++--- + 2 files changed, 17 insertions(+), 3 deletions(-) + +diff --git a/libavcodec/stcodecdec.c b/libavcodec/stcodecdec.c +index 6acb89f..0300554 100755 +--- a/libavcodec/stcodecdec.c ++++ b/libavcodec/stcodecdec.c +@@ -566,6 +566,7 @@ static int stcodec_receive_frame(AVCodecContext* avctx, AVFrame* frame) { + + if (ret == MPP_CODER_NO_DATA) { + av_log(avctx, AV_LOG_ERROR, "no data, return\n"); ++ usleep(5000); + ret = AVERROR(EAGAIN); + goto fail; + } +diff --git a/libavcodec/stcodecenc.c b/libavcodec/stcodecenc.c +index 30025e2..c976dec 100755 +--- a/libavcodec/stcodecenc.c ++++ b/libavcodec/stcodecenc.c +@@ -60,6 +60,19 @@ typedef struct { + AVBufferRef* encoder_ref; + } STCODECEncodeContext; + ++static MppPixelFormat stcodec_get_pixelformat(AVCodecContext* avctx) { ++ switch (avctx->pix_fmt) { ++ case AV_PIX_FMT_NV12: ++ return PIXEL_FORMAT_NV12; ++ case AV_PIX_FMT_YUV420P: ++ return PIXEL_FORMAT_I420; ++ case AV_PIX_FMT_DRM_PRIME: ++ return PIXEL_FORMAT_NV12; ++ default: ++ return PIXEL_FORMAT_UNKNOWN; ++ } ++} ++ + static MppCodingType stcodec_get_codingtype(AVCodecContext* avctx) { + switch (avctx->codec_id) { + case AV_CODEC_ID_H264: +@@ -109,7 +122,7 @@ static int stcodec_receive_packet(AVCodecContext* avctx, AVPacket* avpkt) { + FRAME_SetDataUsedNum(encoder->pFrame, 3); + FRAME_SetDataPointer(encoder->pFrame, 0, frame->data[0]); + FRAME_SetDataPointer(encoder->pFrame, 1, frame->data[1]); +- FRAME_SetDataPointer(encoder->pFrame, 2, frame->data[3]); ++ FRAME_SetDataPointer(encoder->pFrame, 2, frame->data[2]); + } else if (frame->format == AV_PIX_FMT_DRM_PRIME) { + void* vaddr = + mmap(NULL, ((AVDRMFrameDescriptor*)(frame->data[0]))->objects[0].size, +@@ -274,7 +287,7 @@ static av_cold int stcodec_init_encoder(AVCodecContext* avctx) { + encoder->pVencCtx->stVencPara.nWidth = avctx->width; + encoder->pVencCtx->stVencPara.nHeight = avctx->height; + encoder->pVencCtx->stVencPara.nStride = get_stride(avctx->width, 8); +- encoder->pVencCtx->stVencPara.PixelFormat = PIXEL_FORMAT_NV12; ++ encoder->pVencCtx->stVencPara.PixelFormat = stcodec_get_pixelformat(avctx); + encoder->pVencCtx->stVencPara.eFrameBufferType = + MPP_FRAME_BUFFERTYPE_NORMAL_EXTERNAL; + encoder->pVencCtx->eCodecType = CODEC_V4L2_LINLONV5V7; +@@ -349,7 +362,7 @@ static const AVCodecHWConfigInternal* const stcodec_hw_configs[] = { + .wrapper_name = "stcodec", \ + .pix_fmts = \ + (const enum AVPixelFormat[]){AV_PIX_FMT_DRM_PRIME, AV_PIX_FMT_NV12, \ +- AV_PIX_FMT_NONE}, \ ++ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE}, \ + .hw_configs = stcodec_hw_configs, \ + }; + +-- +2.25.1 + diff --git a/package/gstreamer1/gst1-plugins-bad/0001-support-spacemit-plugin.patch b/package/gstreamer1/gst1-plugins-bad/0001-support-spacemit-plugin.patch new file mode 100644 index 00000000..e811f8b6 --- /dev/null +++ b/package/gstreamer1/gst1-plugins-bad/0001-support-spacemit-plugin.patch @@ -0,0 +1,7727 @@ +From f386f8e5bdba56ff630ab433af0d7440d4cb1f33 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E9=BB=8E=E5=BF=97=E8=8D=A3?= +Date: Thu, 7 Mar 2024 16:46:06 +0800 +Subject: [PATCH] support spacemit plugin + +--- + ext/meson.build | 1 + + ext/spacemit/meson.build | 2 + + .../spacemitcodec/gstspacemitallocator.c | 450 ++++++ + .../spacemitcodec/gstspacemitallocator.h | 129 ++ + .../spacemitcodec/gstspacemitbufferpool.c | 344 ++++ + .../spacemitcodec/gstspacemitbufferpool.h | 72 + + ext/spacemit/spacemitcodec/gstspacemitdec.c | 1371 ++++++++++++++++ + ext/spacemit/spacemitcodec/gstspacemitdec.h | 98 ++ + .../spacemitcodec/gstspacemitdec_bak.c | 1394 +++++++++++++++++ + ext/spacemit/spacemitcodec/gstspacemitenc.c | 862 ++++++++++ + ext/spacemit/spacemitcodec/gstspacemitenc.h | 105 ++ + .../spacemitcodec/gstspacemitenc_bak.c | 1022 ++++++++++++ + .../spacemitcodec/gstspacemitplugin.c | 29 + + .../spacemitcodec/gstspmdmabufallocator.c | 310 ++++ + .../spacemitcodec/gstspmdmabufallocator.h | 123 ++ + ext/spacemit/spacemitcodec/meson.build | 22 + + ext/spacemit/spacemitsrc/gstspacemitsrc.c | 807 ++++++++++ + ext/spacemit/spacemitsrc/gstspacemitsrc.h | 99 ++ + ext/spacemit/spacemitsrc/gstspmsrcallocator.c | 143 ++ + ext/spacemit/spacemitsrc/gstspmsrcallocator.h | 131 ++ + ext/spacemit/spacemitsrc/meson.build | 19 + + meson_options.txt | 1 + + 22 files changed, 7534 insertions(+) + create mode 100755 ext/spacemit/meson.build + create mode 100755 ext/spacemit/spacemitcodec/gstspacemitallocator.c + create mode 100755 ext/spacemit/spacemitcodec/gstspacemitallocator.h + create mode 100755 ext/spacemit/spacemitcodec/gstspacemitbufferpool.c + create mode 100755 ext/spacemit/spacemitcodec/gstspacemitbufferpool.h + create mode 100755 ext/spacemit/spacemitcodec/gstspacemitdec.c + create mode 100755 ext/spacemit/spacemitcodec/gstspacemitdec.h + create mode 100755 ext/spacemit/spacemitcodec/gstspacemitdec_bak.c + create mode 100755 ext/spacemit/spacemitcodec/gstspacemitenc.c + create mode 100755 ext/spacemit/spacemitcodec/gstspacemitenc.h + create mode 100755 ext/spacemit/spacemitcodec/gstspacemitenc_bak.c + create mode 100755 ext/spacemit/spacemitcodec/gstspacemitplugin.c + create mode 100755 ext/spacemit/spacemitcodec/gstspmdmabufallocator.c + create mode 100755 ext/spacemit/spacemitcodec/gstspmdmabufallocator.h + create mode 100755 ext/spacemit/spacemitcodec/meson.build + create mode 100755 ext/spacemit/spacemitsrc/gstspacemitsrc.c + create mode 100755 ext/spacemit/spacemitsrc/gstspacemitsrc.h + create mode 100755 ext/spacemit/spacemitsrc/gstspmsrcallocator.c + create mode 100755 ext/spacemit/spacemitsrc/gstspmsrcallocator.h + create mode 100755 ext/spacemit/spacemitsrc/meson.build + +diff --git a/ext/meson.build b/ext/meson.build +index 17195f8..12838d3 100644 +--- a/ext/meson.build ++++ b/ext/meson.build +@@ -73,3 +73,4 @@ subdir('wpe') + subdir('x265') + subdir('zxing') + subdir('zbar') ++subdir('spacemit') +diff --git a/ext/spacemit/meson.build b/ext/spacemit/meson.build +new file mode 100755 +index 0000000..32e7164 +--- /dev/null ++++ b/ext/spacemit/meson.build +@@ -0,0 +1,2 @@ ++subdir('spacemitcodec') ++subdir('spacemitsrc') +\ No newline at end of file +diff --git a/ext/spacemit/spacemitcodec/gstspacemitallocator.c b/ext/spacemit/spacemitcodec/gstspacemitallocator.c +new file mode 100755 +index 0000000..abb2b84 +--- /dev/null ++++ b/ext/spacemit/spacemitcodec/gstspacemitallocator.c +@@ -0,0 +1,450 @@ ++/* GStreamer ++ * ++ * Copyright (C) 2016 Igalia ++ * ++ * Authors: ++ * Víctor Manuel Jáquez Leal ++ * Javier Martin ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Library General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Library General Public License for more details. ++ * ++ * You should have received a copy of the GNU Library General Public ++ * License along with this library; if not, write to the ++ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include "gstspacemitallocator.h" ++ ++#define GST_CAT_DEFAULT spacemitallocator_debug_category ++GST_DEBUG_CATEGORY_STATIC (GST_CAT_DEFAULT); ++ ++#define GST_SPACEMIT_MEMORY_TYPE "SpaceMitMemory" ++ ++#define parent_class gst_spacemit_allocator_parent_class ++ ++G_DEFINE_TYPE_WITH_CODE (GstSpaceMitAllocator, gst_spacemit_allocator, ++ GST_TYPE_ALLOCATOR, ++ GST_DEBUG_CATEGORY_INIT (GST_CAT_DEFAULT, "spacemitallocator", 0, ++ "SPACEMIT allocator")); ++ ++GQuark ++gst_spm_dmabuf_memory_quark (void) ++{ ++ static GQuark quark = 0; ++ ++ if (quark == 0) ++ quark = g_quark_from_static_string ("GstSpmDmabufMemory"); ++ ++ return quark; ++} ++GQuark ++gst_spacemit_allocator_quark (void) ++{ ++ static GQuark quark = 0; ++ ++ if (quark == 0) ++ quark = g_quark_from_static_string ("GstSpaceMitAllocator"); ++ ++ return quark; ++} ++ ++gboolean ++gst_is_spacemit_memory (GstMemory * base_mem) ++{ ++ return gst_memory_is_type (base_mem, GST_SPACEMIT_MEMORY_TYPE); ++} ++ ++static void ++gst_spacemit_allocator_free (GstAllocator * allocator, GstMemory * base_mem) ++{ ++// GstSpaceMitAllocator *alloc = GST_SPACEMIT_ALLOCATOR (allocator); ++ GstSpaceMitMemory *mem = (GstSpaceMitMemory *) base_mem; ++ GST_DEBUG ("ZRong ------------------- spacemit free mem"); ++ ++ g_warn_if_fail (!mem->acquired); ++ ++ if (mem->foreign_mem) ++ gst_memory_unref (mem->foreign_mem); ++ else ++ GST_ERROR ("ZRong err ------------------- spacemit free"); ++ ++// g_slice_free (GstSpaceMitMemory, mem); ++ ++// GST_ALLOCATOR_CLASS (parent_class)->free (allocator, mem); ++} ++ ++static void ++gst_unref_spacemit_mem (gpointer key, gpointer value, gpointer user_data) ++{ ++ GstMemory *base_mem = (GstMemory *)value; ++ GstSpaceMitMemory *mem; ++ ++ if (GST_IS_SPACEMIT_ALLOCATOR (base_mem->allocator)) ++ mem = (GstSpaceMitMemory *) base_mem; ++ else ++ mem = gst_mini_object_get_qdata (GST_MINI_OBJECT (base_mem), ++ GST_SPM_DMABUF_MEMORY_QUARK); ++ ++ if (value) { ++ GST_DEBUG ("allocator finalize, mem:%p, base_mem:%p, %d", mem, base_mem, base_mem->mini_object.refcount); ++ gst_memory_unref(base_mem); ++ } else { ++ GST_ERROR ("allocator finalize err, base_mem:%p, %d", base_mem, base_mem->mini_object.refcount); ++ } ++ g_slice_free (GstSpaceMitMemory, mem); ++} ++ ++static void ++gst_spacemit_allocator_finalize (GObject * obj) ++{ ++ GstSpaceMitAllocator *alloc = GST_SPACEMIT_ALLOCATOR (obj); ++ GST_DEBUG_OBJECT (alloc, "ZRong ------------------- spacemit allocator finalize"); ++ ++ G_OBJECT_CLASS (parent_class)->finalize (obj); ++} ++/* installed as the GstMiniObject::dispose function of the acquired GstMemory */ ++static gboolean ++gst_spacemit_allocator_memory_dispose (GstMemory * base_mem) ++{ ++ GstSpaceMitMemory *mem; ++ GstSpaceMitAllocator *allocator; ++ gint fd = -1; ++ ++ if (GST_IS_SPACEMIT_ALLOCATOR (base_mem->allocator)) { ++ mem = (GstSpaceMitMemory *) base_mem; ++ allocator = (GstSpaceMitAllocator *)base_mem->allocator; ++ fd = -1; ++ } else { ++ mem = gst_mini_object_get_qdata (GST_MINI_OBJECT (base_mem), ++ GST_SPM_DMABUF_MEMORY_QUARK); ++ allocator = gst_mini_object_get_qdata (GST_MINI_OBJECT (base_mem), ++ GST_SPACEMIT_ALLOCATOR_QUARK); ++ fd = gst_fd_memory_get_fd(base_mem); ++ } ++ ++ base_mem->size = mem->memsize; ++ ++ if (mem->acquired) ++ { ++ VDEC_ReturnOutputFrame(mem->ctx, FRAME_GetBaseData(mem->mppframe)); ++ FRAME_Destory(mem->mppframe); ++ mem->ctx = NULL; ++ mem->mppframe = NULL; ++ ++ /* keep the memory alive */ ++ gst_memory_ref (base_mem); ++ GST_DEBUG ("memory dispose success, mem %p fd:(%d, %d), id:%d, base_mem:%p, ref:%d", ++ mem, mem->fd, fd, mem->mppframe_id, base_mem, base_mem->mini_object.refcount); ++ ++ mem->acquired = FALSE; ++ ++ return FALSE; ++ } ++ GST_DEBUG ("memory dispose222 success, mem %p fd:(%d, %d), id:%d, base_mem:%p, ref:%d", ++ mem, mem->fd, fd, mem->mppframe_id, base_mem, base_mem->mini_object.refcount); ++ ++ return TRUE; ++} ++ ++static GstSpaceMitMemory * ++gst_spacemit_memory_new (GstSpaceMitAllocator * allocator, gsize maxsize, ++ GstMemoryFlags flags, GstMemory * parent, gssize offset, gssize size) ++{ ++ GstSpaceMitMemory *mem; ++ gint align; ++ ++ /* GStreamer uses a bitmask for the alignment while ++ * OMX uses the alignment itself. So we have to convert ++ * here */ ++ align = 0; ++ ++ if (size == -1) { ++ size = maxsize - offset; ++ } ++ ++ mem = g_slice_new0 (GstSpaceMitMemory); ++ gst_memory_init (GST_MEMORY_CAST (mem), flags, (GstAllocator *) allocator, ++ parent, maxsize, align, offset, size); ++ ++ mem->memsize = size; ++ mem->acquired = FALSE; ++ ++ return mem; ++} ++ ++static inline void ++install_mem_dispose (GstMemory * base_mem) ++{ ++ GST_MINI_OBJECT_CAST (base_mem)->dispose = ++ (GstMiniObjectDisposeFunction) gst_spacemit_allocator_memory_dispose; ++} ++ ++GstMemory * ++gst_spacemit_allocator_alloc (GstSpaceMitAllocator * allocator, gsize size) ++{ ++ GstMemory *ret_mem; ++ GstSpaceMitMemory *mem; ++ guint8 *data; ++ ++ GST_DEBUG_OBJECT (allocator, "allocator alloc paras, size: %d", size); ++ ++ if (allocator->mode == GST_SPM_MEMORY_TYPE_SYSTEM) { ++ mem = gst_spacemit_memory_new (allocator, size, 0, NULL, 0, size); ++ install_mem_dispose (GST_MEMORY_CAST (mem)); ++ } else { ++ mem = g_slice_new0 (GstSpaceMitMemory); ++ mem->memsize = size; ++ mem->acquired = FALSE; ++ mem->fd = allocator->dmabuf_fd; ++ ++ mem->foreign_mem = ++ gst_fd_allocator_alloc (allocator->foreign_allocator, allocator->dmabuf_fd, size, GST_FD_MEMORY_FLAG_DONT_CLOSE); ++ gst_mini_object_set_qdata (GST_MINI_OBJECT (mem->foreign_mem), ++ GST_SPM_DMABUF_MEMORY_QUARK, mem, NULL); ++ gst_mini_object_set_qdata (GST_MINI_OBJECT (mem->foreign_mem), ++ GST_SPACEMIT_ALLOCATOR_QUARK, allocator, NULL); ++ install_mem_dispose (mem->foreign_mem); ++ } ++ ++ ret_mem = mem->foreign_mem ? mem->foreign_mem : (GstMemory *) mem; ++ ++ GST_DEBUG_OBJECT (allocator, "allocator success alloc mem:%p, return mem:%p", mem, ret_mem); ++ ++ return ret_mem; ++} ++ ++GstFlowReturn ++gst_spacemit_allocator_acquire (GstAllocator * base_allocator, GstMemory ** memory) ++{ ++ GstMemory *base_mem; ++ GstSpaceMitMemory *mem; ++ GstSpaceMitAllocator *allocator = GST_SPACEMIT_ALLOCATOR (base_allocator); ++ ++ base_mem = (GstMemory *) g_hash_table_lookup (allocator->memories, GINT_TO_POINTER(allocator->mppframe_id)); ++ if (!base_mem) { ++ base_mem = gst_spacemit_allocator_alloc (allocator, allocator->info.size); ++ GST_DEBUG_OBJECT (allocator, "insert id%d fd%d to memories hash", allocator->mppframe_id, allocator->dmabuf_fd); ++ g_hash_table_insert(allocator->memories, GINT_TO_POINTER(allocator->mppframe_id), base_mem); ++ } ++ if (GST_IS_SPACEMIT_ALLOCATOR (base_mem->allocator)) ++ mem = (GstSpaceMitMemory *) base_mem; ++ else ++ mem = gst_mini_object_get_qdata (GST_MINI_OBJECT (base_mem), ++ GST_SPM_DMABUF_MEMORY_QUARK); ++ mem->acquired = TRUE; ++ mem->mppframe_id = allocator->mppframe_id; ++ *memory = base_mem; ++ ++ GST_DEBUG_OBJECT (allocator, "acquire mem %p size:%d, fd:%d, id:%d, base_mem:%p", mem, (*memory)->size, mem->fd, mem->mppframe_id, base_mem); ++ return GST_FLOW_OK; ++} ++ ++gboolean ++gst_spacemit_allocator_configure(GstAllocator * base_allocator, GstVideoInfo * info, gboolean use_dmabuf) ++{ ++ GstSpaceMitAllocator *allocator = GST_SPACEMIT_ALLOCATOR (base_allocator); ++ if (!info) { ++ GST_ERROR_OBJECT (allocator, "error, get a NULL info to allocator!"); ++ return FALSE; ++ } ++ allocator->info = *info; ++ ++ if (use_dmabuf) { ++ allocator->foreign_allocator = gst_dmabuf_allocator_new (); ++ allocator->mode = GST_SPM_MEMORY_TYPE_DMABUF; ++ GST_DEBUG_OBJECT (allocator, "create a dmabuf allocator!"); ++ } else { ++ allocator->mode = GST_SPM_MEMORY_TYPE_SYSTEM; ++ } ++ ++ allocator->memories = g_hash_table_new (g_direct_hash, g_direct_equal); ++ allocator->mppframe_id = -1; ++ allocator->active = FALSE; ++ allocator->mem_back = TRUE; ++ g_mutex_init (&allocator->lock); ++ ++ return TRUE; ++} ++ ++static void ++gst_check_mem_status (gpointer key, gpointer value, gpointer user_data) ++{ ++ GstMemory *base_mem = (GstMemory *)value; ++ GstSpaceMitAllocator *allocator = (GstSpaceMitAllocator *)user_data; ++ GstSpaceMitMemory *mem; ++ ++ if (GST_IS_SPACEMIT_ALLOCATOR (base_mem->allocator)) ++ mem = (GstSpaceMitMemory *) base_mem; ++ else ++ mem = gst_mini_object_get_qdata (GST_MINI_OBJECT (base_mem), ++ GST_SPM_DMABUF_MEMORY_QUARK); ++ ++ if (mem->acquired == TRUE) { ++ GST_DEBUG_OBJECT (allocator, "check, mem%d had acquired, mem:%p, base_mem:%p, ref:%d", ++ mem->mppframe_id, mem, base_mem, base_mem->mini_object.refcount); ++ allocator->mem_back = FALSE; ++ } ++ GST_DEBUG_OBJECT (allocator, "check mem status: %d, %d, %d", mem->mppframe_id, mem->acquired, allocator->mem_back); ++ ++} ++ ++gboolean ++gst_spacemit_allocator_wait_inactive (GstAllocator * base_allocator) ++{ ++ GstSpaceMitAllocator *allocator = GST_SPACEMIT_ALLOCATOR (base_allocator); ++ ++ while (TRUE) { ++ allocator->mem_back = TRUE; ++ g_hash_table_foreach (allocator->memories, gst_check_mem_status, allocator); ++ ++ GST_DEBUG_OBJECT (allocator, "alloctor check mem all back is: %d", allocator->mem_back); ++ break; ++ if (allocator->mem_back) { ++ break; ++ } else { ++ GST_DEBUG_OBJECT (allocator, "wait for spacemit mem inactive"); ++ g_usleep(5 * 1000); ++ } ++ } ++} ++ ++void gst_spacemitdec_mem_hash_reinit (GstAllocator * base_allocator) ++{ ++ GstSpaceMitAllocator *allocator = GST_SPACEMIT_ALLOCATOR (base_allocator); ++ ++ g_hash_table_foreach (allocator->memories, gst_unref_spacemit_mem, NULL); ++ g_hash_table_remove_all (allocator->memories); ++} ++ ++gboolean ++gst_spacemit_allocator_set_active (GstSpaceMitAllocator * allocator, gboolean active) ++{ ++ gboolean changed = FALSE; ++ ++ g_mutex_lock (&allocator->lock); ++ ++ if (allocator->active != active) ++ changed = TRUE; ++ ++ GST_DEBUG_OBJECT (allocator, "allocator set active/deactice paras, cur:%d, set:%d", allocator->active, active); ++ ++ if (changed) { ++ if (active) { ++ ++ } else { ++ gst_spacemit_allocator_wait_inactive(allocator); ++ g_hash_table_foreach (allocator->memories, gst_unref_spacemit_mem, NULL); ++ g_hash_table_remove_all (allocator->memories); ++ if (allocator->foreign_allocator) { ++ GST_DEBUG_OBJECT (allocator, "ZRong ------------------- allocator unref(%d %d) (%d %d)", ++ GST_OBJECT_REFCOUNT_VALUE(allocator->foreign_allocator), GST_OBJECT_REFCOUNT(allocator->foreign_allocator), ++ GST_OBJECT_REFCOUNT_VALUE(allocator), GST_OBJECT_REFCOUNT(allocator)); ++ g_object_unref (allocator->foreign_allocator); ++ allocator->foreign_allocator = NULL; ++ } ++ } ++ } ++ ++ allocator->active = active; ++ g_mutex_unlock (&allocator->lock); ++ GST_DEBUG_OBJECT (allocator, "allocator set active/deactice finish"); ++ ++ return changed; ++} ++ ++GstAllocator * ++gst_spacemit_allocator_new (void) ++{ ++ GstSpaceMitAllocator *allocator; ++ ++ allocator = g_object_new (GST_TYPE_SPACEMIT_ALLOCATOR, NULL); ++ ++ allocator->id = 666; ++ ++ return GST_ALLOCATOR_CAST (allocator); ++} ++ ++static gpointer ++gst_spacemit_memory_map (GstMemory * base_mem, gsize maxsize, GstMapFlags flags) ++{ ++ GstSpaceMitMemory *mem = (GstSpaceMitMemory *) base_mem; ++ GST_DEBUG ("ZRong ------------------- af memory_map (%d, %d)", maxsize, mem->mppframe_id); ++ ++ return FRAME_GetDataPointer(mem->mppframe, 0); ++} ++static void ++gst_spacemit_memory_unmap (GstMemory * base_mem) ++{ ++} ++ ++void ++gst_spacemit_allocator_get_info (GstAllocator * base_allocator, gint32 id, gint32 fd) ++{ ++ GstSpaceMitAllocator *allocator = GST_SPACEMIT_ALLOCATOR (base_allocator); ++ ++ if (id < 0) ++ GST_ERROR_OBJECT (allocator, "error, get a invaild id: %d !", id); ++ ++ if (fd < 0 && allocator->mode == GST_SPM_MEMORY_TYPE_DMABUF) { ++ GST_ERROR_OBJECT (allocator, "error, get a invaild fd: %d in dmabuf mode!", fd); ++ } ++ ++ allocator->mppframe_id = id; ++ allocator->dmabuf_fd = fd; ++} ++void ++gst_spacemit_set_mem (GstMemory * base_mem, MppFrame *mppframe, MppVdecCtx *ctx) ++{ ++ GstSpaceMitMemory *mem; ++ ++ if (GST_IS_SPACEMIT_ALLOCATOR (base_mem->allocator)) ++ mem = (GstSpaceMitMemory *) base_mem; ++ else ++ mem = gst_mini_object_get_qdata (GST_MINI_OBJECT (base_mem), ++ GST_SPM_DMABUF_MEMORY_QUARK); ++ ++ mem->mppframe = mppframe; ++ mem->ctx = ctx; ++} ++ ++static void ++gst_spacemit_allocator_class_init (GstSpaceMitAllocatorClass * klass) ++{ ++ GObjectClass *gobject_class; ++ GstAllocatorClass *allocator_class; ++ ++ allocator_class = GST_ALLOCATOR_CLASS (klass); ++ gobject_class = G_OBJECT_CLASS (klass); ++ ++ allocator_class->alloc = NULL; ++ allocator_class->free = gst_spacemit_allocator_free; ++ ++ gobject_class->finalize = gst_spacemit_allocator_finalize; ++} ++ ++static void ++gst_spacemit_allocator_init (GstSpaceMitAllocator * allocator) ++{ ++ GstAllocator *alloc = GST_ALLOCATOR_CAST (allocator); ++ ++ alloc->mem_type = GST_SPACEMIT_MEMORY_TYPE; ++ alloc->mem_map = gst_spacemit_memory_map; ++ alloc->mem_unmap = gst_spacemit_memory_unmap; ++ ++ GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC); ++} ++ +diff --git a/ext/spacemit/spacemitcodec/gstspacemitallocator.h b/ext/spacemit/spacemitcodec/gstspacemitallocator.h +new file mode 100755 +index 0000000..b4a0b9f +--- /dev/null ++++ b/ext/spacemit/spacemitcodec/gstspacemitallocator.h +@@ -0,0 +1,129 @@ ++/* GStreamer ++ * ++ * Copyright (C) 2016 Igalia ++ * ++ * Authors: ++ * Víctor Manuel Jáquez Leal ++ * Javier Martin ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Library General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Library General Public License for more details. ++ * ++ * You should have received a copy of the GNU Library General Public ++ * License along with this library; if not, write to the ++ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ ++ ++#ifndef __GST_SPACEMIT_ALLOCATOR_H__ ++#define __GST_SPACEMIT_ALLOCATOR_H__ ++ ++#include ++#include ++#include ++#include ++ ++G_BEGIN_DECLS ++ ++/* ---------------------------------------------------------------------*/ ++/* GstSpmSystemoMemory */ ++/* ---------------------------------------------------------------------*/ ++ ++#define GST_TYPE_SPACEMIT_ALLOCATOR \ ++ (gst_spacemit_allocator_get_type()) ++#define GST_IS_SPACEMIT_ALLOCATOR(obj) \ ++ (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_SPACEMIT_ALLOCATOR)) ++#define GST_IS_SPACEMIT_ALLOCATOR_CLASS(klass) \ ++ (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_SPACEMIT_ALLOCATOR)) ++#define GST_SPACEMIT_ALLOCATOR_GET_CLASS(obj) \ ++ (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_SPACEMIT_ALLOCATOR, GstSpaceMitAllocatorClass)) ++#define GST_SPACEMIT_ALLOCATOR(obj) \ ++ (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_SPACEMIT_ALLOCATOR, GstSpaceMitAllocator)) ++#define GST_SPACEMIT_ALLOCATOR_CLASS(klass) \ ++ (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_SPACEMIT_ALLOCATOR, GstSpaceMitAllocatorClass)) ++ ++typedef struct _GstSpaceMitAllocator GstSpaceMitAllocator; ++typedef struct _GstSpaceMitAllocatorClass GstSpaceMitAllocatorClass; ++typedef struct _GstSpaceMitMemory GstSpaceMitMemory; ++ ++typedef enum _GstSpmMemoryType ++{ ++ GST_SPM_MEMORY_TYPE_SYSTEM, ++ GST_SPM_MEMORY_TYPE_DMABUF, ++} GstSpmMemoryType; ++ ++struct _GstSpaceMitMemory ++{ ++ GstMemory parent; ++ ++ gboolean acquired; ++ MppFrame *mppframe; ++ gpointer ptr; ++ ++ MppVdecCtx *ctx; ++ guint32 memsize; ++ gint32 mppframe_id; ++ GstMemory *foreign_mem; ++ ++ guint32 fd; ++ /* the original dispose function of foreign_mem */ ++ GstMiniObjectDisposeFunction foreign_dispose; ++}; ++ ++struct _GstSpaceMitAllocator ++{ ++ GstAllocator parent; ++ gboolean active; ++ GMutex lock; ++ ++ GHashTable *memories; ++ gint32 mppframe_id; ++ gint32 dmabuf_fd; ++ guint32 id; ++ GstVideoInfo info; ++ GstAllocator *foreign_allocator; ++ GstSpmMemoryType mode; ++ gboolean mem_back; ++}; ++ ++struct _GstSpaceMitAllocatorClass { ++ GstAllocatorClass parent_class; ++}; ++GQuark gst_spm_dmabuf_memory_quark (void); ++GQuark gst_spacemit_allocator_quark (void); ++ ++#define GST_SPM_DMABUF_MEMORY_QUARK gst_spm_dmabuf_memory_quark () ++#define GST_SPACEMIT_ALLOCATOR_QUARK gst_spacemit_allocator_quark () ++ ++gboolean gst_is_spacemit_memory (GstMemory *mem); ++GstAllocator * ++gst_spacemit_allocator_new (void); ++GstMemory * ++gst_spacemit_allocator_alloc (GstSpaceMitAllocator * allocator, gsize size); ++GstFlowReturn ++gst_spacemit_allocator_acquire (GstAllocator * base_allocator, GstMemory ** memory); ++gboolean ++gst_spacemit_allocator_configure(GstAllocator * base_allocator, GstVideoInfo * info, gboolean use_dmabuf); ++gboolean ++gst_spacemit_allocator_set_active (GstSpaceMitAllocator * allocator, gboolean active); ++ ++void ++gst_spacemit_allocator_get_info (GstAllocator * base_allocator, gint32 id, gint32 fd); ++void ++gst_spacemit_set_mem (GstMemory * base_mem, MppFrame *mppframe, MppVdecCtx *ctx); ++gboolean ++gst_spacemit_allocator_wait_inactive (GstAllocator * base_allocator); ++ ++GType gst_spacemit_allocator_get_type (void) G_GNUC_CONST; ++ ++G_END_DECLS ++ ++#endif /* __GST_SPACEMIT_ALLOCATOR_H__ */ +diff --git a/ext/spacemit/spacemitcodec/gstspacemitbufferpool.c b/ext/spacemit/spacemitcodec/gstspacemitbufferpool.c +new file mode 100755 +index 0000000..cd186ab +--- /dev/null ++++ b/ext/spacemit/spacemitcodec/gstspacemitbufferpool.c +@@ -0,0 +1,344 @@ ++/* GStreamer ++ * Copyright (C) <2005> Julien Moutte ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Library General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Library General Public License for more details. ++ * ++ * You should have received a copy of the GNU Library General Public ++ * License along with this library; if not, write to the ++ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++/* Debugging category */ ++#include ++#include "gstspacemitbufferpool.h" ++#include "gstspacemitallocator.h" ++ ++/* Helper functions */ ++#include ++#include ++#include ++#include ++ ++#define GST_CAT_DEFAULT gst_spacemitbufferpool_debug_category ++GST_DEBUG_CATEGORY_STATIC (GST_CAT_DEFAULT); ++ ++/* bufferpool */ ++static void gst_spacemit_buffer_pool_finalize (GObject * object); ++#define gst_spacemit_buffer_pool_parent_class parent_class ++ ++G_DEFINE_TYPE_WITH_CODE (GstSpaceMitBufferPool, gst_spacemit_buffer_pool, ++ GST_TYPE_BUFFER_POOL, ++ GST_DEBUG_CATEGORY_INIT (GST_CAT_DEFAULT, "SpaceMitbufferpool", 0, ++ "spacemit buffer pool")); ++ ++static gboolean ++gst_spacemit_buffer_pool_start (GstBufferPool * base_pool) ++{ ++ GstSpaceMitBufferPool *pool = GST_SPACEMIT_BUFFER_POOL (base_pool); ++ gboolean ret; ++ ++ GST_DEBUG ("ZRong ------------------------af pool start.(%d)", ret); ++ ++ gst_spacemit_allocator_set_active (pool->allocator, TRUE); ++ ++ ret = GST_BUFFER_POOL_CLASS (gst_spacemit_buffer_pool_parent_class)->start (base_pool); ++ ++ return ret; ++} ++ ++static gboolean ++gst_spacemit_buffer_pool_stop (GstBufferPool * base_pool) ++{ ++ GstSpaceMitBufferPool *pool = GST_SPACEMIT_BUFFER_POOL_CAST (base_pool); ++ ++ GST_DEBUG_OBJECT (pool, "deactivating spacemit allocator"); ++ gst_spacemit_allocator_set_active (pool->allocator, FALSE); ++ ++ if (pool->caps) ++ gst_caps_unref (pool->caps); ++ pool->caps = NULL; ++ ++// pool->add_videometa = FALSE; ++// pool->deactivated = TRUE; ++ ++ return GST_BUFFER_POOL_CLASS (parent_class)->stop (base_pool); ++} ++ ++static const gchar ** ++gst_spacemit_buffer_pool_get_options (GstBufferPool * base_pool) ++{ ++ static const gchar *options[] = { GST_BUFFER_POOL_OPTION_VIDEO_META, ++ NULL ++ }; ++ return options; ++} ++ ++static inline GstSpmMemoryType ++_spm_get_memory_type (GstStructure * config) ++{ ++ gboolean video, dmabuf; ++ ++ dmabuf = gst_buffer_pool_config_has_option (config, ++ GST_BUFFER_POOL_OPTION_SPM_USE_DMABUF); ++ ++ if (dmabuf) ++ return GST_SPM_MEMORY_TYPE_DMABUF; ++ else ++ return GST_SPM_MEMORY_TYPE_SYSTEM; ++} ++ ++static gboolean ++gst_spacemit_buffer_pool_set_config (GstBufferPool * base_pool, GstStructure * config) ++{ ++ GstSpaceMitBufferPool *pool = GST_SPACEMIT_BUFFER_POOL_CAST (base_pool); ++ GstVideoInfo info; ++ GstCaps *caps; ++ guint size, min_buffers, max_buffers; ++ GstAllocator *allocator; ++ GstAllocationParams params; ++ GstStructure *fake_config; ++ gboolean ret; ++ ++ GST_DEBUG ("ZRong -------------------- in spacemit bufferpool set_config"); ++ ++ if (!gst_buffer_pool_config_get_params (config, &caps, &size, &min_buffers, ++ &max_buffers)) ++ goto wrong_config; ++ ++ if (caps == NULL) ++ goto no_caps; ++ ++ /* now parse the caps from the config */ ++ if (!gst_video_info_from_caps (&info, caps)) ++ goto wrong_caps; ++ ++ if (!gst_buffer_pool_config_get_allocator (config, &allocator, ¶ms)) ++ goto wrong_config; ++ ++ /* enable metadata based on config of the pool */ ++ pool->add_videometa = ++ gst_buffer_pool_config_has_option (config, ++ GST_BUFFER_POOL_OPTION_VIDEO_META); ++ ++ if (pool->caps) ++ gst_caps_unref (pool->caps); ++ pool->caps = gst_caps_ref (caps); ++ ++ gst_buffer_pool_config_set_params (config, caps, size, min_buffers, max_buffers); ++ pool->memory_type = _spm_get_memory_type (config); ++ ++ /* give a fake config to the parent default_set_config() with size == 0 ++ * this prevents default_release_buffer() from free'ing the buffers, since ++ * we release them with no memory */ ++ fake_config = gst_structure_copy (config); ++ gst_buffer_pool_config_set_params (fake_config, caps, 0, min_buffers, max_buffers); ++ ++ if (pool->allocator) ++ gst_object_unref (pool->allocator); ++ ++ pool->allocator = gst_object_ref (allocator); ++ pool->info = info; ++ pool->params = params; ++ ++ ret = GST_BUFFER_POOL_CLASS (parent_class)->set_config (base_pool, fake_config); ++ gst_structure_free (fake_config); ++ ++ return ret; ++ ++/* ERRORS */ ++wrong_config: ++ { ++ GST_WARNING_OBJECT (pool, "invalid config"); ++ return FALSE; ++ } ++no_caps: ++ { ++ GST_WARNING_OBJECT (pool, "no caps in config"); ++ return FALSE; ++ } ++ ++wrong_caps: ++ { ++ GST_WARNING_OBJECT (pool, ++ "failed getting geometry from caps %" GST_PTR_FORMAT, caps); ++ return FALSE; ++ } ++} ++static void ++gst_spacemit_buffer_pool_reset_buffer (GstBufferPool * base_pool, GstBuffer * buffer) ++{ ++ GstSpaceMitBufferPool *pool = GST_SPACEMIT_BUFFER_POOL_CAST (base_pool); ++ guint n; ++ guint size; ++ ++ n = gst_buffer_n_memory (buffer); ++ size = gst_buffer_get_size (buffer); ++ ++ GST_DEBUG ("ZRong omx ------------------------ reset buffer.(%d)", size); ++ ++ if (G_UNLIKELY (n != 1)) { ++ GST_ERROR_OBJECT (pool, "Released buffer does not have 1 memory... " ++ "(n = %u) something went terribly wrong", n); ++ } ++ ++ /* rip the memory out of the buffer; ++ * we like to keep them separate in this pool. ++ * if this was the last ref count of the memory, it will be returned ++ * to the allocator, otherwise it will be returned later */ ++ gst_buffer_remove_all_memory (buffer); ++ ++ /* reset before removing the TAG_MEMORY flag so that the parent impl ++ * doesn't try to restore the original buffer size */ ++ GST_BUFFER_POOL_CLASS (parent_class)->reset_buffer ++ (base_pool, buffer); ++ n = gst_buffer_n_memory (buffer); ++ size = gst_buffer_get_size (buffer); ++ ++ GST_DEBUG ("ZRong omx ------------------------ reset buffer.(%d %d)", n, size); ++ ++ /* pretend nothing happened to the memory to avoid discarding the buffer */ ++ GST_MINI_OBJECT_FLAG_UNSET (buffer, GST_BUFFER_FLAG_TAG_MEMORY); ++} ++ ++static GstFlowReturn ++gst_spacemit_buffer_pool_alloc_buffer (GstBufferPool * base_pool, GstBuffer ** buffer, ++ GstBufferPoolAcquireParams * params) ++{ ++ GstSpaceMitBufferPool *pool = GST_SPACEMIT_BUFFER_POOL_CAST (base_pool); ++ GstVideoInfo *info; ++ GstBuffer *buf; ++ GstMemory *mem; ++ ++ info = &pool->info; ++ GST_DEBUG_OBJECT (pool, "ZRong -------------------- in spacemit bufferpool alloc (%d)", info->size); ++ buf = gst_buffer_new (); ++ ++ if (pool->add_videometa) { ++ GST_DEBUG_OBJECT (pool, "adding GstVideoMeta"); ++ GstVideoMeta *meta; ++ ++ meta = gst_buffer_add_video_meta_full (buf, GST_VIDEO_FRAME_FLAG_NONE, ++ GST_VIDEO_INFO_FORMAT (info), ++ GST_VIDEO_INFO_WIDTH (info), GST_VIDEO_INFO_HEIGHT (info), ++ GST_VIDEO_INFO_N_PLANES (info), info->offset, info->stride); ++ //gst_video_meta_set_alignment (meta, 0); ++ } ++ ++ *buffer = buf; ++ GST_DEBUG ("ZRong -------------------- out spacemit bufferpool alloc"); ++ ++ return GST_FLOW_OK; ++} ++static GstFlowReturn ++gst_spacemit_buffer_pool_acquire_buffer (GstBufferPool * base_pool, ++ GstBuffer ** buffer, GstBufferPoolAcquireParams * params) ++{ ++ GstFlowReturn ret; ++ GstSpaceMitBufferPool *pool = GST_SPACEMIT_BUFFER_POOL (base_pool); ++ GstMemory *mem; ++ ++ GST_DEBUG ("ZRong -------------------- in acquire_buffer"); ++ ++ ret = GST_BUFFER_POOL_CLASS (parent_class)->acquire_buffer ++ (base_pool, buffer, params); ++ if (ret != GST_FLOW_OK) ++ return ret; ++ ++// if (pool->memory_type == GST_SPM_MEMORY_TYPE_SYSTEM) ++ GST_DEBUG ("ZRong ----------- in acquire_buffer, bf allocator_acquire"); ++ ++ ret = gst_spacemit_allocator_acquire (pool->allocator, &mem); ++// else ++// ret = gst_spm_dmabuf_allocator_acquire (pool->allocator, &mem); ++ ++ if (ret != GST_FLOW_OK) ++ return ret; ++ ++ gst_buffer_append_memory (*buffer, mem); ++ ++ return ret; ++} ++ ++static void ++gst_spacemit_buffer_pool_release_buffer (GstBufferPool * base_pool, GstBuffer * buffer) ++{ ++ guint n; ++ gsize size, len; ++ ++ n = gst_buffer_n_memory (buffer); ++ len = n; ++ size = gst_buffer_get_size (buffer); ++ GST_DEBUG ("ZRong ------------------------ release buffer.(%d, %d, %d)", n, len, size); ++ ++ GST_BUFFER_POOL_CLASS (parent_class)->release_buffer(base_pool, buffer); ++} ++GstBufferPool * ++gst_spacemit_buffer_pool_new (void) ++{ ++ GstSpaceMitBufferPool *pool; ++ ++ pool = g_object_new (GST_TYPE_SPACEMIT_BUFFER_POOL, NULL); ++ ++ GST_DEBUG_OBJECT (pool, "ZRong new spacemit buffer pool %p", pool); ++ ++ return GST_BUFFER_POOL (pool); ++} ++ ++static void ++gst_spacemit_buffer_pool_finalize (GObject * object) ++{ ++ GstSpaceMitBufferPool *pool = GST_SPACEMIT_BUFFER_POOL (object); ++ GST_DEBUG ("ZRong ------------------------af pool finalize."); ++ ++ if (pool->allocator) { ++ GST_DEBUG ("ZRong ------------------- af pool finalize (%d %d)", ++ GST_OBJECT_REFCOUNT_VALUE(pool->allocator), GST_OBJECT_REFCOUNT(pool->allocator)); ++ ++ gst_object_unref (pool->allocator); ++ } ++ pool->allocator = NULL; ++ ++ if (pool->caps) ++ gst_caps_unref (pool->caps); ++ pool->caps = NULL; ++ ++ G_OBJECT_CLASS (parent_class)->finalize (object); ++} ++ ++static void ++gst_spacemit_buffer_pool_class_init (GstSpaceMitBufferPoolClass * klass) ++{ ++ GObjectClass *gobject_class = (GObjectClass *) klass; ++ GstBufferPoolClass *gstbufferpool_class = (GstBufferPoolClass *) klass; ++ ++ gobject_class->finalize = gst_spacemit_buffer_pool_finalize; ++ ++ gstbufferpool_class->start = gst_spacemit_buffer_pool_start; ++ gstbufferpool_class->stop = gst_spacemit_buffer_pool_stop; ++ gstbufferpool_class->get_options = gst_spacemit_buffer_pool_get_options; ++ gstbufferpool_class->set_config = gst_spacemit_buffer_pool_set_config; ++ gstbufferpool_class->alloc_buffer = gst_spacemit_buffer_pool_alloc_buffer; ++ gstbufferpool_class->reset_buffer = gst_spacemit_buffer_pool_reset_buffer; ++ ++ gstbufferpool_class->acquire_buffer = gst_spacemit_buffer_pool_acquire_buffer; ++ gstbufferpool_class->release_buffer = gst_spacemit_buffer_pool_release_buffer; ++} ++ ++static void ++gst_spacemit_buffer_pool_init (GstSpaceMitBufferPool * pool) ++{ ++} ++ +diff --git a/ext/spacemit/spacemitcodec/gstspacemitbufferpool.h b/ext/spacemit/spacemitcodec/gstspacemitbufferpool.h +new file mode 100755 +index 0000000..1b40b68 +--- /dev/null ++++ b/ext/spacemit/spacemitcodec/gstspacemitbufferpool.h +@@ -0,0 +1,72 @@ ++/* GStreamer ++ * Copyright (C) <2005> Julien Moutte ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Library General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Library General Public License for more details. ++ * ++ * You should have received a copy of the GNU Library General Public ++ * License along with this library; if not, write to the ++ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ */ ++ ++#ifndef __GST_SPACEMITPOOL_H__ ++#define __GST_SPACEMITPOOL_H__ ++ ++#include ++#include ++#include ++#include ++ ++#include "gstspacemitallocator.h" ++ ++G_BEGIN_DECLS ++ ++typedef struct _GstSpaceMitBufferPool GstSpaceMitBufferPool; ++typedef struct _GstSpaceMitBufferPoolClass GstSpaceMitBufferPoolClass; ++ ++/* buffer pool functions */ ++#define GST_TYPE_SPACEMIT_BUFFER_POOL (gst_spacemit_buffer_pool_get_type()) ++#define GST_IS_SPACEMIT_BUFFER_POOL(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_SPACEMIT_BUFFER_POOL)) ++#define GST_SPACEMIT_BUFFER_POOL(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_SPACEMIT_BUFFER_POOL, GstSpaceMitBufferPool)) ++#define GST_SPACEMIT_BUFFER_POOL_CAST(obj) ((GstSpaceMitBufferPool*)(obj)) ++ ++struct _GstSpaceMitBufferPool ++{ ++ GstVideoBufferPool parent; ++ ++ GstAllocator *allocator; ++ ++ GstCaps *caps; ++ GstVideoInfo info; ++ GstVideoAlignment align; ++ guint padded_width; ++ guint padded_height; ++ gboolean add_videometa; ++ gboolean add_metavideo; ++ gboolean need_alignment; ++ GstAllocationParams params; ++ ++ GstSpmMemoryType memory_type; ++}; ++ ++struct _GstSpaceMitBufferPoolClass ++{ ++ GstVideoBufferPoolClass parent_class; ++}; ++#define GST_BUFFER_POOL_OPTION_SPM_USE_DMABUF "GstBufferPoolOptionSpmUseDMABuf" ++ ++GType gst_spacemit_buffer_pool_get_type (void); ++ ++GstBufferPool * gst_spacemit_buffer_pool_new (void); ++ ++G_END_DECLS ++ ++#endif /* __GST_SPACEMITPOOL_H__ */ +diff --git a/ext/spacemit/spacemitcodec/gstspacemitdec.c b/ext/spacemit/spacemitcodec/gstspacemitdec.c +new file mode 100755 +index 0000000..d053eff +--- /dev/null ++++ b/ext/spacemit/spacemitcodec/gstspacemitdec.c +@@ -0,0 +1,1371 @@ ++/* GStreamer ++ * Copyright (C) 2022 FIXME ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Library General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Library General Public License for more details. ++ * ++ * You should have received a copy of the GNU Library General Public ++ * License along with this library; if not, write to the ++ * Free Software Foundation, Inc., 51 Franklin Street, Suite 500, ++ * Boston, MA 02110-1335, USA. ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++#include ++#include ++ ++#include ++#include ++#include ++#include "gstspacemitdec.h" ++ ++GST_DEBUG_CATEGORY_STATIC (gst_spacemitdec_debug_category); ++#define GST_CAT_DEFAULT gst_spacemitdec_debug_category ++ ++#define EOS_FARME_NUM 3 ++#define MAX_POLL_TIME 0xffffffff ++ ++/* prototypes */ ++static void gst_spacemitdec_set_property (GObject * object, ++ guint property_id, const GValue * value, GParamSpec * pspec); ++static void gst_spacemitdec_get_property (GObject * object, ++ guint property_id, GValue * value, GParamSpec * pspec); ++static void gst_spacemitdec_finalize (GObject * object); ++static gboolean gst_spacemitdec_close(GstVideoDecoder *decoder); ++static gboolean gst_spacemitdec_start(GstVideoDecoder *decoder); ++static gboolean gst_spacemitdec_stop(GstVideoDecoder *decoder); ++static gboolean gst_spacemitdec_set_format(GstVideoDecoder *decoder, GstVideoCodecState *state); ++static gboolean gst_spacemitdec_reset(GstVideoDecoder *decoder, gboolean hard); ++static gboolean gst_spacemitdec_flush(GstVideoDecoder * decoder); ++static GstFlowReturn gst_spacemitdec_finish(GstVideoDecoder *decoder); ++static GstFlowReturn gst_spacemitdec_handle_frame(GstVideoDecoder *decoder, GstVideoCodecFrame *frame); ++static gboolean gst_spacemitdec_decide_allocation(GstVideoDecoder *decoder, GstQuery *query); ++static GstStateChangeReturn gst_spacemitdec_dec_change_state (GstElement * element, GstStateChange transition); ++ ++/* pad templates */ ++static GstStaticPadTemplate gst_spacemitdec_sink_template = ++GST_STATIC_PAD_TEMPLATE ("sink", ++ GST_PAD_SINK, ++ GST_PAD_ALWAYS, ++ GST_STATIC_CAPS ++ ("video/x-h264, stream-format=(string)byte-stream, alignment=(string)au, " ++ "profile=(string){ constrained-baseline, baseline, main, high }," ++ "width=(int) [640,MAX], " "height=(int) [480,MAX]" ++ ";" ++ "video/x-h265," ++ "stream-format = (string) byte-stream," ++ "alignment = (string)au," ++ "width=(int) [640,MAX], " "height=(int) [480,MAX]" ++ ";" ++ "video/mpeg," ++ "mpegversion = (int) { 1, 2, 4 }," ++ "parsed = (boolean) true," ++ "systemstream = (boolean) false" ++ ";" ++ "image/jpeg" ";" "video/x-vp8" ";" "video/x-vp9" ++ )); ++ ++static GstStaticPadTemplate gst_spacemitdec_src_template = ++ GST_STATIC_PAD_TEMPLATE ("src", ++ GST_PAD_SRC, ++ GST_PAD_ALWAYS, ++ GST_STATIC_CAPS (SPM_DEC_CAPS_MAKE ("{" SPM_DEC_FORMATS "}") ";") ++ ); ++ ++#define parent_class gst_spacemitdec_parent_class ++/* class initialization */ ++G_DEFINE_TYPE(GstSpacemitDec, gst_spacemitdec, GST_TYPE_VIDEO_DECODER); ++enum ++{ ++ PROP_0, ++ PROP_CODING_WIDTH, ++ PROP_CODING_HIGHT, ++ PROP_CLOSE_DMABUF, ++ PROP_CODE_TYPE, ++ PROP_CODE_YUV_FORMAT, ++ PROP_SAVE_DEC_IMAGE, ++ PROP_DOWNSCALE, ++ N_PROPERTIES ++}; ++ ++static gboolean ++_gst_caps_has_feature (const GstCaps * caps, const gchar * feature) ++{ ++ guint i; ++ ++ for (i = 0; i < gst_caps_get_size (caps); i++) { ++ GstCapsFeatures *const features = gst_caps_get_features (caps, i); ++ /* Skip ANY features, we need an exact match for correct evaluation */ ++ if (gst_caps_features_is_any (features)) ++ continue; ++ if (gst_caps_features_contains (features, feature)) ++ return TRUE; ++ } ++ ++ return FALSE; ++} ++ ++static gboolean ++srcpad_can_dmabuf (GstSpacemitDec * thiz) ++{ ++ gboolean ret = FALSE; ++ GstCaps *caps, *out_caps; ++ GstPad *srcpad; ++ ++ srcpad = GST_VIDEO_DECODER_SRC_PAD (thiz); ++ caps = gst_pad_get_pad_template_caps (srcpad); ++ ++ out_caps = gst_pad_peer_query_caps (srcpad, caps); ++ if (!out_caps) ++ goto done; ++ ++ if (gst_caps_is_any (out_caps) || gst_caps_is_empty (out_caps) ++ || out_caps == caps) ++ goto done; ++ ++ GST_DEBUG_OBJECT (thiz, "get src peer caps caps %" GST_PTR_FORMAT, out_caps); ++ if (_gst_caps_has_feature (out_caps, GST_CAPS_FEATURE_MEMORY_DMABUF)) ++ ret = TRUE; ++ ++done: ++ if (caps) ++ gst_caps_unref (caps); ++ if (out_caps) ++ gst_caps_unref (out_caps); ++ return ret; ++} ++ ++static gboolean gst_spacemitdec_close(GstVideoDecoder * decoder) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC(decoder); ++ ++ if (thiz->input_state) ++ { ++ gst_video_codec_state_unref (thiz->input_state); ++ thiz->input_state = NULL; ++ } ++ GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec start close"); ++ if (thiz->pool) { ++ gst_buffer_pool_set_active (thiz->pool, FALSE); ++// gst_spacemit_allocator_wait_inactive (GST_SPACEMIT_BUFFER_POOL_CAST(spacemitdec->pool)->allocator); ++ gst_object_unref (thiz->pool); ++ thiz->pool = NULL; ++ } ++ GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec start close222"); ++ ++// FRAME_Destory(spacemitdec->mppframe); ++ PACKET_Destory (thiz->mpppacket); ++ VDEC_DestoryChannel (thiz->ctx); ++ ++ GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec finish close"); ++ ++ return TRUE; ++} ++ ++ ++static void gst_spacemitdec_class_init(GstSpacemitDecClass * klass) ++{ ++ GstVideoDecoderClass *video_decoder_class = GST_VIDEO_DECODER_CLASS (klass); ++ GstElementClass *element_class = GST_ELEMENT_CLASS (klass); ++ GObjectClass *gobject_class = G_OBJECT_CLASS (klass); ++ ++ gst_element_class_add_static_pad_template(GST_ELEMENT_CLASS (klass), &gst_spacemitdec_sink_template); ++ gst_element_class_add_static_pad_template(GST_ELEMENT_CLASS (klass), &gst_spacemitdec_src_template); ++ ++ gst_element_class_set_static_metadata(GST_ELEMENT_CLASS (klass), ++ "Spacemit video decoder", "Decoder/Video", "Spacemit video decoder", ++ "ZRong, zhirong.li@spacemit.com"); ++ ++ gobject_class->set_property = gst_spacemitdec_set_property; ++ gobject_class->get_property = gst_spacemitdec_get_property; ++ gobject_class->finalize = gst_spacemitdec_finalize; ++ ++ video_decoder_class->close = GST_DEBUG_FUNCPTR(gst_spacemitdec_close); ++ video_decoder_class->start = GST_DEBUG_FUNCPTR(gst_spacemitdec_start); ++ video_decoder_class->stop = GST_DEBUG_FUNCPTR(gst_spacemitdec_stop); ++ video_decoder_class->set_format = GST_DEBUG_FUNCPTR(gst_spacemitdec_set_format); ++// video_decoder_class->reset = GST_DEBUG_FUNCPTR(gst_spacemitdec_reset); ++ video_decoder_class->flush = GST_DEBUG_FUNCPTR (gst_spacemitdec_flush); ++ video_decoder_class->finish = GST_DEBUG_FUNCPTR(gst_spacemitdec_finish); ++ video_decoder_class->handle_frame = GST_DEBUG_FUNCPTR(gst_spacemitdec_handle_frame); ++ video_decoder_class->decide_allocation = GST_DEBUG_FUNCPTR(gst_spacemitdec_decide_allocation); ++ ++// element_class->change_state = GST_DEBUG_FUNCPTR (gst_spacemitdec_dec_change_state); ++ ++ /* define properties */ ++ g_object_class_install_property (gobject_class, PROP_CODE_TYPE, ++ g_param_spec_uint ("code-type", "code type", ++ "Codec selection to work", ++ CODEC_OPENH264, 1023, CODEC_SFOMX, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_DOWNSCALE, ++ g_param_spec_uint ("downscale", "downscale", ++ "downscale in 4k", ++ 1, 3, 1, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CODE_YUV_FORMAT, ++ g_param_spec_uint ("code-yuv-format", "code yuv format", ++ "Decode the generated yuv format", ++ PIXEL_FORMAT_UNKNOWN, PIXEL_FORMAT_MAX - 1, PIXEL_FORMAT_I420, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CLOSE_DMABUF, ++ g_param_spec_boolean ("close-dmabuf", "close dmabuf", ++ "Close Dmabuf feature", FALSE, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ ++ g_object_class_install_property (gobject_class, PROP_SAVE_DEC_IMAGE, ++ g_param_spec_boolean ("save-dec-images", "save dec images", ++ "save dec image to /tmp/", FALSE, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++} ++ ++static void gst_spacemitdec_init (GstSpacemitDec * thiz) ++{ ++ GstVideoDecoder *decoder = GST_VIDEO_DECODER (thiz); ++ ++ thiz->pool = NULL; ++ thiz->ctx = NULL; ++ thiz->width = 1280; ++ thiz->height = 720; ++ thiz->eCodecType = CODEC_V4L2_LINLONV5V7; ++ thiz->eCodingType = CODING_H264; ++ thiz->downscale = 1; ++ thiz->use_dmabuf = TRUE; ++ thiz->eOutputPixelFormat = PIXEL_FORMAT_NV12; ++ thiz->dec_nonblock = TRUE; ++ thiz->req_nonblock = TRUE; ++ thiz->wait_reschange = FALSE; ++ thiz->save_dec = FALSE; ++ ++ gst_video_decoder_set_packetized (decoder, TRUE); ++ gst_video_decoder_set_needs_format (decoder, TRUE); ++} ++void ++gst_spacemitdec_set_property (GObject * object, guint property_id, ++ const GValue * value, GParamSpec * pspec) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC (object); ++ ++ GST_DEBUG_OBJECT (thiz, "ZRong ------------------- set_property: %d", property_id); ++ ++ switch (property_id) { ++ case PROP_CODE_TYPE: ++ thiz->eCodecType = g_value_get_uint (value); ++ break; ++ case PROP_CODE_YUV_FORMAT: ++ thiz->eOutputPixelFormat = g_value_get_uint (value); ++ break; ++ case PROP_CLOSE_DMABUF: ++ thiz->use_dmabuf = FALSE; ++ break; ++ case PROP_SAVE_DEC_IMAGE: ++ thiz->save_dec = g_value_get_boolean (value); ++ break; ++ case PROP_DOWNSCALE: ++ thiz->downscale = g_value_get_uint (value); ++ break; ++ default: ++ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); ++ break; ++ } ++} ++ ++void ++gst_spacemitdec_get_property (GObject * object, guint property_id, ++ GValue * value, GParamSpec * pspec) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC (object); ++ ++ GST_DEBUG_OBJECT (thiz, "ZRong ------------------- get_property: %d", property_id); ++ ++ switch (property_id) { ++ case PROP_CODE_TYPE: ++ g_value_set_uint (value, thiz->eCodecType); ++ break; ++ case PROP_CODE_YUV_FORMAT: ++ g_value_set_uint (value, thiz->eOutputPixelFormat); ++ break; ++ case PROP_CLOSE_DMABUF: ++ g_value_set_boolean (value, thiz->use_dmabuf); ++ break; ++ case PROP_SAVE_DEC_IMAGE: ++ g_value_set_boolean (value, thiz->save_dec); ++ break; ++ case PROP_DOWNSCALE: ++ g_value_set_uint (value, thiz->downscale); ++ break; ++ default: ++ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); ++ break; ++ } ++} ++ ++void ++gst_spacemitdec_finalize (GObject * object) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC (object); ++ ++ GST_DEBUG_OBJECT (thiz, "finalize"); ++ ++ /* clean up object here */ ++ if (thiz->input_state) ++ gst_video_codec_state_unref (thiz->input_state); ++ thiz->input_state = NULL; ++ ++ G_OBJECT_CLASS (gst_spacemitdec_parent_class)->finalize (object); ++} ++ ++static gboolean gst_spacemitdec_start (GstVideoDecoder * decoder) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC (decoder); ++ int ret = 0; ++ ++ GST_DEBUG_OBJECT (thiz, "in start !"); ++ ++ if (thiz->ctx) ++ { ++ VDEC_DestoryChannel (thiz->ctx); ++ thiz->ctx = NULL; ++ } ++ thiz->ctx = VDEC_CreateChannel (); ++ thiz->ctx->eCodecType = thiz->eCodecType; ++ ++ thiz->mpppacket = PACKET_Create (); ++ if (!thiz->mpppacket) { ++ GST_ERROR_OBJECT (thiz, "can not alloc for mpp structure, please check !"); ++ return FALSE; ++ } ++ ++ thiz->mppframe = NULL; ++ thiz->at_eos = FALSE; ++ thiz->downstream_flow_ret = GST_FLOW_OK; ++ thiz->cur_frame_number = -1; ++ thiz->initialized = FALSE; ++ if (thiz->save_dec) ++ thiz->fb = fopen("/tmp/spacemitdec_out.yuv", "wb+"); ++ ++ GST_DEBUG_OBJECT (thiz, "finish start ! (%d)", thiz->save_dec); ++ ++ return TRUE; ++} ++ ++static gboolean gst_spacemitdec_stop(GstVideoDecoder * decoder) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC(decoder); ++ if (thiz->save_dec) ++ fclose(thiz->fb); ++ ++ if (!(gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) { ++ GST_DEBUG_OBJECT (thiz, "ZRong --------------- spacemitdec finish stop"); ++ return TRUE; ++ } ++ ++ gst_pad_stop_task (decoder->srcpad); ++ GST_DEBUG_OBJECT (thiz, "ZRong --------------- spacemitdec finish stop222"); ++ ++ return TRUE; ++} ++static MppCodingType ++gst_change_mpp_ecoding_type (GstStructure * s) ++{ ++ if (gst_structure_has_name (s, "video/x-h264")) ++ return CODING_H264; ++ ++ if (gst_structure_has_name (s, "video/x-h265")) ++ return CODING_H265; ++ ++ if (gst_structure_has_name (s, "image/jpeg")) ++ return CODING_MJPEG; ++ ++ if (gst_structure_has_name (s, "video/x-vp8")) ++ return CODING_VP8; ++ ++ if (gst_structure_has_name (s, "video/x-vp9")) ++ return CODING_VP9; ++ ++ return CODING_UNKNOWN; ++} ++static gboolean ++gst_spacemitdec_init_decoder (GstSpacemitDec * thiz) ++{ ++ GstStructure *structure; ++ gboolean ret = TRUE; ++ ++ if (thiz->initialized) ++ return TRUE; ++ ++ structure = gst_caps_get_structure (thiz->input_state->caps, 0); ++ thiz->eCodingType = gst_change_mpp_ecoding_type (structure); ++ if(thiz->eCodingType == CODING_UNKNOWN) { ++ GST_ERROR_OBJECT(thiz, "no support this eCodingType"); ++ return FALSE; ++ } ++ ++ if (thiz->ctx->eCodecType == CODEC_SFOMX || ++ thiz->ctx->eCodecType == CODEC_OPENH264 || ++ thiz->ctx->eCodecType == CODEC_FAKEDEC || ++ thiz->ctx->eCodecType == CODEC_V4L2_LINLONV5V7) { ++ thiz->width = thiz->input_state->info.width; ++ thiz->height = thiz->input_state->info.height; ++ thiz->ctx->stVdecPara.eCodingType = thiz->eCodingType; ++ thiz->ctx->stVdecPara.nWidth = thiz->width; ++ thiz->ctx->stVdecPara.nHeight = thiz->height; ++ thiz->ctx->stVdecPara.eOutputPixelFormat = thiz->eOutputPixelFormat; ++ thiz->ctx->eCodecType = thiz->eCodecType; //set property ++ thiz->ctx->stVdecPara.nScale = thiz->downscale; ++ thiz->ctx->stVdecPara.nHorizonScaleDownRatio = 1; ++ thiz->ctx->stVdecPara.nVerticalScaleDownRatio = 1; ++ thiz->ctx->stVdecPara.nRotateDegree = 0; ++ thiz->ctx->stVdecPara.bThumbnailMode = 0; ++ thiz->ctx->stVdecPara.bIsInterlaced = MPP_FALSE; ++ ++ thiz->ctx->stVdecPara.bInputBlockModeEnable = MPP_TRUE; ++ thiz->ctx->stVdecPara.bOutputBlockModeEnable = MPP_TRUE; ++ GST_DEBUG_OBJECT (thiz, "spacemitdec set eCodecType is %d, downscale:%u", thiz->ctx->eCodecType, thiz->downscale); ++ ++ ret = VDEC_Init (thiz->ctx); ++ if (ret) { ++ GST_ERROR_OBJECT (thiz, "mpp VDEC_Init error, please check !"); ++ return FALSE; ++ } ++ ++ if (thiz->eCodecType == CODEC_V4L2_LINLONV5V7) ++ thiz->wait_reschange = TRUE; ++ } else { ++ GST_ERROR_OBJECT (thiz, "no supprot this eCodecType: %d", thiz->ctx->eCodecType); ++ return FALSE; ++ } ++ ++ thiz->initialized = TRUE; ++ return TRUE; ++} ++ ++static gboolean gst_spacemitdec_set_format(GstVideoDecoder * decoder, GstVideoCodecState * state) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC(decoder); ++ GstStructure *structure; ++ gboolean ret = TRUE; ++ ++ GST_DEBUG_OBJECT(thiz, "input caps: %" GST_PTR_FORMAT, state->caps); ++ ++ if(thiz->input_state) ++ { ++ gst_video_codec_state_unref(thiz->input_state); ++ thiz->input_state = NULL; ++ } ++ ++ thiz->input_state = gst_video_codec_state_ref (state); ++ if (!gst_spacemitdec_init_decoder(thiz)) ++ return FALSE; ++ ++ GST_DEBUG_OBJECT (thiz, "@@@ ZRong ------------------------- set_format (%d, %d, %d), (%d, %d, %d)", ++ thiz->input_state->info.width, thiz->input_state->info.height, thiz->input_state->info.size, ++ state->info.width, state->info.height, state->info.size); ++ GST_DEBUG_OBJECT (thiz, "ZRong ----------------------- set format finish, %u, %s", ++ GST_VIDEO_INFO_FORMAT (&state->info), gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (&state->info))); ++ ++ return TRUE; ++} ++ ++static gboolean gst_spacemitdec_reset(GstVideoDecoder * decoder, gboolean hard) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC(decoder); ++ ++ GST_ERROR_OBJECT (thiz, "ZRong ------------------------- finish reset!"); ++ ++ return TRUE; ++} ++static gboolean ++gst_spacemitdec_flush (GstVideoDecoder * decoder) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC(decoder); ++ ++ GST_DEBUG_OBJECT (thiz, "ZRong -------------------- flushing start, (%d)", thiz->downstream_flow_ret); ++ ++ thiz->flushing = TRUE; ++ ++ if (thiz->downstream_flow_ret == GST_FLOW_EOS) { ++ gst_spacemit_allocator_wait_inactive (GST_SPACEMIT_BUFFER_POOL_CAST(thiz->pool)->allocator); ++ ++ if (thiz->eCodecType != CODEC_V4L2_LINLONV5V7) ++ VDEC_Flush(thiz->ctx); ++ else ++ VDEC_ResetChannel(thiz->ctx); ++ } ++ ++ GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); ++ ++ /* Wait for task thread to pause */ ++ GstTask *task = decoder->srcpad->task; ++ if (task) { ++ //GST_OBJECT_LOCK (task); ++ while (GST_TASK_STATE (task) == GST_TASK_STARTED) { ++ GST_DEBUG_OBJECT(thiz, "finish FLUSH test4"); ++ g_usleep(400 * 1000); ++ //GST_TASK_WAIT (task); ++ } ++ GST_DEBUG_OBJECT(thiz, "finish FLUSH test5"); ++ //GST_OBJECT_UNLOCK (task); ++ gst_pad_stop_task (decoder->srcpad); ++ } ++ GST_VIDEO_DECODER_STREAM_LOCK (decoder); ++ ++ thiz->flushing = FALSE; ++ thiz->downstream_flow_ret = GST_FLOW_OK; ++ ++ GST_DEBUG_OBJECT (thiz, "ZRong -------------------- flushing stop"); ++ ++ return TRUE; ++} ++ ++static int mpp_format_change_to_gst(MppPixelFormat eOutputPixelFormat) ++{ ++ GstVideoFormat format; ++ ++ switch(eOutputPixelFormat){ ++ case PIXEL_FORMAT_I420: ++ format = GST_VIDEO_FORMAT_I420; ++ break; ++ case PIXEL_FORMAT_NV21: ++ format = GST_VIDEO_FORMAT_NV21; ++ break; ++ case PIXEL_FORMAT_NV12: ++ format = GST_VIDEO_FORMAT_NV12; ++ break; ++ default: ++ format = GST_VIDEO_FORMAT_UNKNOWN; ++ break; ++ } ++ return format; ++} ++ ++static GstFlowReturn ++save_decoder_images_for_test (GstVideoDecoder * decoder, GstBuffer *outbuf) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC (decoder); ++ ++ GstVideoCodecState *state; ++ GstVideoFrame video_frame; ++ gint dst_width[GST_VIDEO_MAX_PLANES] = { 0, }; ++ gint dst_height[GST_VIDEO_MAX_PLANES] = { 0, }; ++ guint p, h; ++ const guint8 *data; ++ guint8 *dst; ++ GstVideoFormat format; ++ GstVideoInfo *vinfo = NULL; ++ ++ state = gst_video_decoder_get_output_state (decoder); ++ if (state == NULL) { ++ GST_ERROR_OBJECT (thiz, "err! get output state fail!"); ++ return GST_FLOW_ERROR; ++ } ++ ++ if (!gst_video_frame_map (&video_frame, &state->info, outbuf, ++ GST_MAP_WRITE)) { ++ GST_ERROR_OBJECT (thiz, "Cannot map output buffer!"); ++ gst_video_codec_state_unref (state); ++ return GST_FLOW_ERROR; ++ } ++ ++ format = mpp_format_change_to_gst(thiz->eOutputPixelFormat); ++ ++ vinfo = &state->info; ++ dst_height[0] = GST_VIDEO_INFO_FIELD_HEIGHT (vinfo); ++ ++ switch (format) { ++ case GST_VIDEO_FORMAT_I420: ++ dst_width[0] = GST_VIDEO_INFO_WIDTH (vinfo); ++ dst_width[1] = GST_VIDEO_INFO_WIDTH (vinfo) / 2; ++ dst_height[1] = GST_VIDEO_INFO_FIELD_HEIGHT (vinfo) / 2; ++ dst_width[2] = GST_VIDEO_INFO_WIDTH (vinfo) / 2; ++ dst_height[2] = GST_VIDEO_INFO_FIELD_HEIGHT (vinfo) / 2; ++ GST_DEBUG_OBJECT (thiz, "I420 format, size: %d, %d, %d, vinfo stride: %d, %d, %d, planes:%d", ++ dst_width[0], dst_width[1], dst_width[2], ++ GST_VIDEO_INFO_PLANE_STRIDE (vinfo, 0), GST_VIDEO_INFO_PLANE_STRIDE (vinfo, 1), GST_VIDEO_INFO_PLANE_STRIDE (vinfo, 2), ++ GST_VIDEO_INFO_N_PLANES (vinfo)); ++ break; ++ case GST_VIDEO_FORMAT_NV12: ++ case GST_VIDEO_FORMAT_NV21: ++ dst_width[0] = GST_VIDEO_INFO_WIDTH (vinfo); ++ dst_width[1] = GST_VIDEO_INFO_WIDTH (vinfo); ++ dst_height[1] = GST_VIDEO_INFO_FIELD_HEIGHT (vinfo) / 2; ++ GST_DEBUG_OBJECT (thiz, "NV12/NV21 format, size:%d, %d", dst_width[0], dst_width[1]); ++ break; ++ default: ++ g_assert_not_reached (); ++ break; ++ } ++ ++ for (p = 0; p < GST_VIDEO_INFO_N_PLANES (vinfo); p++) { ++ data = (U8*)FRAME_GetDataPointer(thiz->mppframe, p); ++// dst = GST_VIDEO_FRAME_PLANE_DATA (&video_frame, p); ++ ++ for (h = 0; h < dst_height[p]; h++) { ++ fwrite(data, 1, dst_width[p], thiz->fb); ++// memcpy (dst, data, dst_width[p]); ++// dst += GST_VIDEO_INFO_PLANE_STRIDE (vinfo, p); ++ data += dst_width[p]; ++ } ++ } ++ ++ gst_video_codec_state_unref (state); ++ gst_video_frame_unmap (&video_frame); ++ ++ return GST_FLOW_OK; ++} ++ ++static GstFlowReturn ++gst_spacemitdec_fill_output_buffer (GstVideoDecoder * decoder, ++ GstBuffer ** output_buffer) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC (decoder); ++ GstFlowReturn flow_status = GST_FLOW_OK; ++ GstBuffer *outbuf; ++ GstBufferPoolAcquireParams params = { 0, }; ++ GstMemory *mem; ++ gint32 id = -1; ++ gint32 fd = -1; ++ ++ id = FRAME_GetID(thiz->mppframe); ++ if (thiz->use_dmabuf) { ++ fd = FRAME_GetFD(thiz->mppframe, 0); ++ gst_spacemit_allocator_get_info (GST_SPACEMIT_BUFFER_POOL_CAST(thiz->pool)->allocator, id, fd); ++ } else { ++ gst_spacemit_allocator_get_info (GST_SPACEMIT_BUFFER_POOL_CAST(thiz->pool)->allocator, id, -1); ++ } ++ GST_DEBUG_OBJECT (thiz, "get mppframe id:%d fd:%d", id, fd); ++ ++ flow_status = gst_buffer_pool_acquire_buffer (thiz->pool, &outbuf, ¶ms); ++ if (flow_status != GST_FLOW_OK) { ++ GST_ERROR_OBJECT (thiz, ++ "can not acquire output buffer from pool: %" GST_PTR_FORMAT, thiz->pool); ++ goto done; ++ } ++ ++ mem = gst_buffer_peek_memory (outbuf, 0); ++ gst_spacemit_set_mem (mem, thiz->mppframe, thiz->ctx); ++ *output_buffer = outbuf; ++ GST_DEBUG_OBJECT (thiz, "finish buffer %p fill, mem:%p", outbuf, mem); ++ ++ if (thiz->save_dec) ++ flow_status = save_decoder_images_for_test (decoder, outbuf); ++ ++done: ++ thiz->mppframe = NULL; ++ return flow_status; ++} ++ ++static gint32 gst_spacemitdec_request_frame (GstSpacemitDec *thiz) ++{ ++ MppFrame *mppframe = NULL; ++ gint32 ret; ++ static guint count = 0; ++ ++ mppframe = FRAME_Create(); ++ if (!mppframe) { ++ GST_ERROR_OBJECT (thiz, "can not alloc for mpp structure, please check!"); ++ return FALSE; ++ } ++ ++ do { ++ ret = VDEC_RequestOutputFrame (thiz->ctx, FRAME_GetBaseData(mppframe)); ++ if (ret != MPP_CODER_NO_DATA) { ++ break; ++ } else { ++ if (count >= 2 && count < MAX_POLL_TIME) { ++ g_usleep(500); ++ } else if (count >= MAX_POLL_TIME) { ++ GST_ERROR_OBJECT (thiz, "try max times: %u, fail return(%d)", MAX_POLL_TIME, ret); ++ ret = MPP_POLL_FAILED; ++ break; ++ } ++ } ++ count++; ++ } while (thiz->req_nonblock); ++ ++ count = 0; ++ thiz->mppframe = mppframe; ++ ++ return ret; ++} ++ ++static void ++gst_spacemitdec_loop (GstVideoDecoder * decoder) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC (decoder); ++ GstVideoCodecFrame * frame; ++ GstFlowReturn flow_status; ++ GstBuffer *outbuf = NULL; ++ int ret; ++ ++ if (G_UNLIKELY(thiz->flushing)) ++ goto flushing; ++ ++ ret = gst_spacemitdec_request_frame (thiz); ++ if (ret == MPP_CODER_EOS) { ++ goto finish_work; ++ } else if (ret == MPP_POLL_FAILED) { ++ thiz->downstream_flow_ret = GST_FLOW_ERROR; ++ FRAME_Destory(thiz->mppframe); ++ goto done; ++ } else if (ret == MPP_RESOLUTION_CHANGED) { ++ if (thiz->width != thiz->ctx->stVdecPara.nWidth && ++ thiz->height != thiz->ctx->stVdecPara.nHeight) { ++ GST_DEBUG_OBJECT (thiz, "resolution change from (%u, %u) to (%u, %u), need renegotiate", ++ thiz->width, thiz->height, ++ thiz->ctx->stVdecPara.nWidth, thiz->ctx->stVdecPara.nHeight); ++ } else { ++ GST_DEBUG_OBJECT (thiz, "resolution change from (%u, %u) to (%u, %u), size is same", ++ thiz->width, thiz->height, ++ thiz->ctx->stVdecPara.nWidth, thiz->ctx->stVdecPara.nHeight); ++ thiz->wait_reschange = FALSE; ++ FRAME_Destory(thiz->mppframe); ++ goto no_frame; ++ } ++ } ++ ++ if (thiz->wait_reschange) { ++ VDEC_ReturnOutputFrame(thiz->ctx, FRAME_GetBaseData(thiz->mppframe)); ++ FRAME_Destory(thiz->mppframe); ++ goto no_frame; ++ } ++ ++ GST_DEBUG_OBJECT (thiz, "@@@ ZRong 555, %d", ret); ++ ++ frame = gst_video_decoder_get_oldest_frame (decoder); ++ if (frame == NULL) { ++ GST_DEBUG_OBJECT (thiz, "@@@ ZRong 333, null"); ++ flow_status = gst_spacemitdec_fill_output_buffer (decoder, &outbuf); ++ if (flow_status != GST_FLOW_OK) ++ goto fill_buffer_err; ++ ++ GST_DEBUG_OBJECT (thiz, "zrong ----- push buf %p of size %" G_GSIZE_FORMAT ", " ++ "PTS %" GST_TIME_FORMAT ", dur %" GST_TIME_FORMAT, outbuf, ++ gst_buffer_get_size (outbuf), ++ GST_TIME_ARGS (GST_BUFFER_PTS (outbuf)), ++ GST_TIME_ARGS (GST_BUFFER_DURATION (outbuf))); ++ flow_status = gst_pad_push (GST_VIDEO_DECODER_SRC_PAD (decoder), outbuf); ++ ++ } else { ++ GST_DEBUG_OBJECT (thiz, "get oldest frame %p, snd:%u, input buffer pts: %" GST_TIME_FORMAT ", dts %" GST_TIME_FORMAT ", mppframe pts %" GST_TIME_FORMAT, ++ frame, frame->system_frame_number, GST_TIME_ARGS (GST_BUFFER_PTS (frame->input_buffer)), ++ GST_TIME_ARGS (GST_BUFFER_DTS (frame->input_buffer)), GST_TIME_ARGS (FRAME_GetPts(thiz->mppframe))); ++ ++ //frame->pts = FRAME_GetPts(thiz->mppframe); ++ frame->pts = GST_CLOCK_TIME_NONE; ++ frame->dts = FRAME_GetPts(thiz->mppframe); ++ flow_status = gst_spacemitdec_fill_output_buffer (decoder, &frame->output_buffer); ++ if (flow_status != GST_FLOW_OK) ++ goto fill_buffer_err; ++ ++ const gchar *user_clk_choice; ++ ++ user_clk_choice = g_getenv ("GST_CLK_CHOICE"); ++ if (user_clk_choice && g_strstr_len (user_clk_choice, 1, "Y")) { ++ frame->pts = GST_CLOCK_TIME_NONE; ++ frame->dts = GST_CLOCK_TIME_NONE; ++ } ++ ++ GST_BUFFER_PTS (frame->output_buffer) = frame->pts; ++ GST_BUFFER_DTS (frame->output_buffer) = frame->dts; ++ ++ GST_DEBUG_OBJECT (thiz, "finish frame %p, snd:%u, output buffer pts: %" GST_TIME_FORMAT ", dts %" GST_TIME_FORMAT ", frame pts: %" GST_TIME_FORMAT ", dts %" GST_TIME_FORMAT, ++ frame, frame->system_frame_number, GST_TIME_ARGS (GST_BUFFER_PTS (frame->output_buffer)), ++ GST_TIME_ARGS (GST_BUFFER_DTS (frame->output_buffer)), GST_TIME_ARGS (frame->pts), ++ GST_TIME_ARGS (frame->dts)); ++ flow_status = gst_video_decoder_finish_frame(decoder, frame); ++ } ++ ++ if (flow_status != GST_FLOW_OK) ++ goto sending_state; ++ ++ thiz->downstream_flow_ret = flow_status; ++ ++done: ++ if (thiz->downstream_flow_ret != GST_FLOW_OK) { ++ GST_INFO_OBJECT (thiz, ++ "pause task in dec loop (%d)!", thiz->downstream_flow_ret); ++ ++ gst_pad_pause_task (GST_VIDEO_DECODER_SRC_PAD (thiz)); ++ } ++ return; ++ ++flushing: ++{ ++ thiz->downstream_flow_ret = GST_FLOW_FLUSHING; ++ ++ while (1) { ++ frame = gst_video_decoder_get_oldest_frame (decoder); ++ if(frame == NULL) ++ break; ++ gst_video_decoder_release_frame (decoder, frame); ++ } ++ GST_INFO_OBJECT (thiz, "flushing spacemit decoder"); ++ ++ goto done; ++} ++ ++no_frame: ++{ ++ thiz->downstream_flow_ret = GST_FLOW_OK; ++ goto done; ++} ++ ++fill_buffer_err: ++{ ++ GST_ERROR_OBJECT (thiz, ++ "fill buffer err in dec loop, flow status: %d!", flow_status); ++ thiz->downstream_flow_ret = flow_status; ++ gst_video_codec_frame_unref (frame); ++ goto done; ++} ++ ++finish_work: ++{ ++ GST_DEBUG_OBJECT (thiz, ++ "get eos, finished work and paused task!"); ++ thiz->downstream_flow_ret = GST_FLOW_EOS; ++ VDEC_ReturnOutputFrame(thiz->ctx, FRAME_GetBaseData(thiz->mppframe)); ++ FRAME_Destory(thiz->mppframe); ++ ++ goto done; ++ ++} ++ ++sending_state: ++{ ++ thiz->downstream_flow_ret = flow_status; ++ if (flow_status == GST_FLOW_EOS) { ++ GST_DEBUG_OBJECT (thiz, ++ "get eos, finished work!"); ++ } else if (flow_status == GST_FLOW_ERROR) { ++ GST_ERROR_OBJECT (thiz, ++ "send error and paused task!"); ++ } else if (flow_status == GST_FLOW_FLUSHING) { ++ thiz->flushing = TRUE; ++ GST_DEBUG_OBJECT (thiz, ++ "get GST_FLOW_FLUSHING from finish frame!"); ++ ++ goto flushing; ++ } else { ++ GST_ERROR_OBJECT (thiz, ++ "get an unsupport flow status return after finish frame!"); ++ } ++ goto done; ++} ++ ++} ++static gboolean ++gst_spacemitdec_pool_set_active(GstVideoDecoder * decoder) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC (decoder); ++ GstVideoCodecState *state; ++ GstBufferPool *pool; ++ guint size, min, max, i; ++ GstStructure *config; ++ GstCaps *caps = NULL; ++ GstVideoInfo vinfo; ++ ++ GST_ERROR_OBJECT (thiz, "@@@ ZRong ------------------------- start pool_set_active!"); ++ ++ pool = gst_video_decoder_get_buffer_pool (GST_VIDEO_DECODER (thiz)); ++ if (pool) { ++ config = gst_buffer_pool_get_config (pool); ++ if (!gst_buffer_pool_config_get_params (config, &caps, NULL, &min, &max)) { ++ GST_ERROR_OBJECT (thiz, "can't get buffer pool params"); ++ gst_structure_free (config); ++ return FALSE; ++ } ++ gst_structure_free (config); ++ } ++ ++ if (caps) { ++ thiz->pool = gst_spacemit_buffer_pool_new (); ++ config = gst_buffer_pool_get_config (thiz->pool); ++ gst_buffer_pool_config_add_option (config, ++ GST_BUFFER_POOL_OPTION_VIDEO_META); ++ ++ gst_video_info_init (&vinfo); ++ gst_video_info_from_caps (&vinfo, caps); ++ min = MAX (min, 8); ++ ++ gst_buffer_pool_config_set_params (config, caps, ++ vinfo.size, min, max); ++ if (!gst_buffer_pool_set_config (thiz->pool, config)) { ++ GST_ERROR_OBJECT (thiz, "failed to set config on spacemit pool"); ++ gst_object_unref (thiz->pool); ++ thiz->pool = NULL; ++ return FALSE; ++ } ++ if (!gst_buffer_pool_set_active (thiz->pool, TRUE)) { ++ GST_ERROR_OBJECT (thiz, "failed to activate spacemit pool"); ++ gst_object_unref (thiz->pool); ++ thiz->pool = NULL; ++ return FALSE; ++ } ++ GST_DEBUG_OBJECT (thiz, ++ "use spacemit bufferpool: %" GST_PTR_FORMAT, thiz->pool); ++ ++ pool = gst_video_decoder_get_buffer_pool (decoder); ++ if(!gst_buffer_pool_set_active (pool, FALSE)) ++ GST_ERROR_OBJECT (thiz, "failed to set acitve false on pool %" GST_PTR_FORMAT, pool); ++ gst_object_unref (pool); ++ gst_caps_unref (caps); ++ } else { ++ GST_ERROR_OBJECT (thiz, "can't get pool caps params"); ++ return FALSE; ++ } ++ ++ return TRUE; ++} ++ ++static gboolean ++gst_spacemitdec_set_src_caps (GstSpacemitDec * thiz) ++{ ++ GstVideoCodecState *output_state; ++ GstVideoInfo *vinfo; ++ GstVideoFormat format; ++ guint width, height; ++ ++ width = thiz->ctx->stVdecPara.nWidth; ++ height = thiz->ctx->stVdecPara.nHeight; ++ ++ format = mpp_format_change_to_gst(thiz->eOutputPixelFormat); ++ if (format == GST_VIDEO_FORMAT_UNKNOWN) { ++ GST_ERROR_OBJECT (thiz, "failed to find a valid video format"); ++ return FALSE; ++ } ++ ++ output_state = ++ gst_video_decoder_set_output_state (GST_VIDEO_DECODER (thiz), ++ format, width, height, thiz->input_state); ++ vinfo = &output_state->info; ++ output_state->caps = gst_video_info_to_caps (vinfo); ++ ++ const gchar *user_dmabuf_choice; ++ ++ user_dmabuf_choice = g_getenv ("GST_DMABUF_CHOICE"); ++ if (user_dmabuf_choice && g_strstr_len (user_dmabuf_choice, 1, "N")) { ++ GST_DEBUG("GST_DMABUF_CHOICE is N, set use_dmabuf = false"); ++ thiz->use_dmabuf = FALSE; ++ } else { ++ GST_DEBUG("GST_DMABUF_CHOICE is Y, set use_dmabuf = true"); ++ thiz->use_dmabuf = TRUE; ++ } ++ ++ if (thiz->use_dmabuf && srcpad_can_dmabuf (thiz)) { ++ gst_caps_set_features (output_state->caps, 0, ++ gst_caps_features_new (GST_CAPS_FEATURE_MEMORY_DMABUF, NULL)); ++ GST_INFO_OBJECT (thiz, "set DMABUF feature to spacemitdec src cap %" GST_PTR_FORMAT, output_state->caps); ++ } ++ ++ thiz->width = width; ++ thiz->height = height; ++ ++ return TRUE; ++} ++ ++static gint32 gst_spacemitdec_vdec_decode (GstSpacemitDec * thiz, GstMapInfo *map_info, gint64 pts) ++{ ++ gint32 ret; ++ static guint count = 0; ++ ++ if (map_info == NULL) { ++ PACKET_SetEos(thiz->mpppacket, TRUE); ++ PACKET_SetLength(thiz->mpppacket, 0); ++ } else { ++ PACKET_SetDataPointer(thiz->mpppacket, map_info->data); ++ PACKET_SetLength(thiz->mpppacket, map_info->size); ++ PACKET_SetEos(thiz->mpppacket, FALSE); ++ if (pts != GST_CLOCK_TIME_NONE) ++ PACKET_SetPts(thiz->mpppacket, pts); ++ } ++ ++ do { ++ ret = VDEC_Decode(thiz->ctx, PACKET_GetBaseData(thiz->mpppacket)); ++ if (ret != MPP_POLL_FAILED) { ++ break; ++ } else { ++ if (count >= 2 && count < MAX_POLL_TIME) { ++ g_usleep(500); ++ } else if (count >= MAX_POLL_TIME) { ++ GST_ERROR_OBJECT (thiz, "try max times: %u, fail return(%d)", MAX_POLL_TIME, ret); ++ ret = MPP_POLL_FAILED; ++ break; ++ } ++ } ++ count++; ++ } while (thiz->dec_nonblock); ++ ++ count = 0; ++ ++ return ret; ++} ++ ++static GstFlowReturn ++gst_spacemitdec_handle_frame (GstVideoDecoder * decoder, ++ GstVideoCodecFrame * frame) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC (decoder); ++ GstMapInfo map_info; ++ GstFlowReturn flow_status; ++ guint actual_width, actual_height; ++ GstFlowReturn ret; ++ ++ if (G_UNLIKELY (thiz->flushing)) ++ goto flushing; ++ ++ if (G_UNLIKELY(thiz->downstream_flow_ret != GST_FLOW_OK)) ++ goto downstream_err; ++ ++ if (G_LIKELY(frame)) { ++ thiz->cur_frame_number = frame->system_frame_number; ++ GST_DEBUG_OBJECT (thiz, "handle frame %p, snb:%u, input buffer pts: %" GST_TIME_FORMAT ", dts %" GST_TIME_FORMAT, ++ frame, frame->system_frame_number, GST_TIME_ARGS (GST_BUFFER_PTS (frame->input_buffer)), ++ GST_TIME_ARGS (GST_BUFFER_DTS (frame->input_buffer))); ++ ++ GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); ++ if (!gst_buffer_map (frame->input_buffer, &map_info, GST_MAP_READ)) ++ goto map_err; ++ ++ ret = gst_spacemitdec_vdec_decode (thiz, &map_info, GST_BUFFER_DTS (frame->input_buffer)); ++ ++ gst_buffer_unmap (frame->input_buffer, &map_info); ++ ++ GST_VIDEO_DECODER_STREAM_LOCK (decoder); ++ ++ if (ret) { ++ GST_ERROR_OBJECT (thiz, "VDEC_Decode return error! (%d)", ret); ++ ret = gst_video_decoder_drop_frame (decoder, frame); ++ goto out; ++ } ++ gst_video_codec_frame_unref (frame); ++ } else { ++ GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); ++ ret = gst_spacemitdec_vdec_decode (thiz, NULL, GST_CLOCK_TIME_NONE); ++ GST_DEBUG_OBJECT (thiz, "@@@ ZRong ------------------------- in handle else, %d!", ret); ++ thiz->cur_frame_number = -1; ++ ret = GST_FLOW_EOS; ++ GST_VIDEO_DECODER_STREAM_LOCK (decoder); ++ ++ goto out; ++ } ++ actual_width = thiz->ctx->stVdecPara.nWidth; ++ actual_height = thiz->ctx->stVdecPara.nHeight; ++ ++ if (!gst_pad_has_current_caps (GST_VIDEO_DECODER_SRC_PAD (thiz)) ++ || actual_width != thiz->width ++ || actual_height != thiz->height) { ++ ++ if (!gst_spacemitdec_set_src_caps (thiz)) ++ goto not_negotiated_err; ++ ++ GST_DEBUG_OBJECT (thiz, "@@@ ZRong ------------------------- bf negotiate, %d, %d, %d", ++ actual_width, actual_height, thiz->eOutputPixelFormat); ++ ++ if (!gst_video_decoder_negotiate (decoder)) ++ goto not_negotiated_err; ++ ++ if (!gst_buffer_pool_set_active (thiz->pool, TRUE)) ++ goto acitve_fail; ++ } ++ ++ if (G_UNLIKELY (!gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) { ++ GST_DEBUG_OBJECT (thiz, "@@@ ZRong --------------------- start dec thread"); ++ gst_pad_start_task (decoder->srcpad, ++ (GstTaskFunction) gst_spacemitdec_loop, decoder, NULL); ++ } ++ ++ GST_DEBUG_OBJECT (thiz, "@@@ ZRong ------------------------ finish handle, %d", thiz->downstream_flow_ret); ++ ret = thiz->downstream_flow_ret; ++ ++out: ++ if (thiz->downstream_flow_ret == GST_FLOW_FLUSHING) ++ ret = GST_FLOW_FLUSHING; ++ return ret; ++ ++out_clked: ++ GST_VIDEO_DECODER_STREAM_LOCK (decoder); ++ return ret; ++ ++not_negotiated_err: ++{ ++ GST_ERROR_OBJECT (thiz, ++ "Failed to negotiate with downstream elements"); ++ ret = GST_FLOW_NOT_NEGOTIATED; ++ goto out; ++} ++ ++acitve_fail: ++{ ++ GST_ERROR_OBJECT (thiz, "acitve spacemit pool fail!"); ++ gst_object_unref (thiz->pool); ++ ret = GST_FLOW_ERROR; ++ goto out; ++} ++ ++map_err: ++{ ++ GST_ERROR_OBJECT (thiz, "Cannot map input buffer!"); ++ gst_video_codec_frame_unref (frame); ++ ret = GST_FLOW_ERROR; ++ goto out_clked; ++} ++ ++downstream_err: ++{ ++ GST_ERROR_OBJECT (thiz, "Downstream returned %s", ++ gst_flow_get_name (thiz->downstream_flow_ret)); ++ ret = thiz->downstream_flow_ret; ++ goto out; ++} ++flushing: ++{ ++ GST_WARNING_OBJECT (thiz, "flushing"); ++ ret = GST_FLOW_FLUSHING; ++ gst_video_decoder_release_frame (decoder, frame); ++ goto out; ++} ++} ++ ++static GstFlowReturn gst_spacemitdec_finish(GstVideoDecoder * decoder) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC(decoder); ++ GstFlowReturn flow_status; ++ ++ GST_DEBUG_OBJECT(thiz, "finish"); ++ ++ /* Decoder not negotiated yet */ ++ if (thiz->width == 0) ++ return GST_FLOW_OK; ++ GST_DEBUG_OBJECT(thiz, "finish test1"); ++ ++ /* Drain all pending frames */ ++ //while ((gst_spacemitdec_handle_frame (decoder, NULL)) == GST_FLOW_OK); ++ flow_status = gst_spacemitdec_handle_frame (decoder, NULL); ++ if (flow_status != GST_FLOW_EOS) { ++ GST_ERROR_OBJECT(thiz, "stream no in eos, after setting null packet! (%d)", flow_status); ++ return GST_FLOW_ERROR; ++ } ++ ++ thiz->at_eos = TRUE; ++ ++ GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); ++ ++ GST_DEBUG_OBJECT(thiz, "finish test222"); ++ gst_spacemit_allocator_wait_inactive (GST_SPACEMIT_BUFFER_POOL_CAST(thiz->pool)->allocator); ++ ++ /* Wait for task thread to pause */ ++ GstTask *task = decoder->srcpad->task; ++ if (task) { ++ //GST_OBJECT_LOCK (task); ++ while (GST_TASK_STATE (task) == GST_TASK_STARTED) { ++ GST_DEBUG_OBJECT(thiz, "finish test4"); ++ g_usleep(400 * 1000); ++ //GST_TASK_WAIT (task); ++ } ++ GST_DEBUG_OBJECT(thiz, "finish test5"); ++ //GST_OBJECT_UNLOCK (task); ++ } ++ gst_pad_stop_task (decoder->srcpad); ++ GST_VIDEO_DECODER_STREAM_LOCK (decoder); ++ ++ GST_DEBUG_OBJECT(thiz, "finish test2"); ++ ++ return GST_FLOW_OK; ++} ++ ++static GstBufferPool * ++gst_spacemitdec_create_buffer_pool (GstSpacemitDec * thiz, GstVideoInfo * info, ++ guint num_buffers) ++{ ++ GstBufferPool *pool = NULL; ++ GstAllocator *allocator = NULL; ++ GstStructure *config; ++ GstCaps *caps = NULL; ++ GstVideoAlignment align; ++ ++ pool = gst_spacemit_buffer_pool_new (); ++ if (!pool) ++ goto error_no_pool; ++ ++ allocator = gst_spacemit_allocator_new (); ++ if (!allocator) ++ goto error_no_allocator; ++ ++ gst_spacemit_allocator_configure(allocator, info, thiz->use_dmabuf); ++ ++ caps = gst_video_info_to_caps (info); ++ ++ config = gst_buffer_pool_get_config (GST_BUFFER_POOL_CAST (pool)); ++ gst_buffer_pool_config_set_params (config, caps, ++ GST_VIDEO_INFO_SIZE (info), num_buffers, num_buffers); ++ gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); ++ gst_buffer_pool_config_add_option (config, ++ GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT); ++ gst_caps_unref (caps); ++ ++ if (thiz->use_dmabuf) ++ gst_buffer_pool_config_add_option (config, ++ GST_BUFFER_POOL_OPTION_SPM_USE_DMABUF); ++ ++ gst_buffer_pool_config_set_video_alignment (config, &align); ++ gst_buffer_pool_config_set_allocator (config, allocator, NULL); ++ gst_object_unref (allocator); ++ ++ if (!gst_buffer_pool_set_config (pool, config)) ++ goto error_pool_config; ++ ++ return pool; ++ ++error_no_pool: ++ { ++ GST_ERROR_OBJECT (thiz, "failed to create spacemitdec bufferpool"); ++ return NULL; ++ } ++error_no_allocator: ++ { ++ GST_ERROR_OBJECT (thiz, "failed to create allocator"); ++ gst_object_unref (pool); ++ return NULL; ++ } ++error_pool_config: ++ { ++ GST_ERROR_OBJECT (thiz, "failed to set config"); ++ gst_object_unref (pool); ++ gst_object_unref (allocator); ++ return NULL; ++ } ++} ++ ++static gboolean gst_spacemitdec_decide_allocation(GstVideoDecoder * decoder, GstQuery * query) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC(decoder); ++ GstBufferPool *pool; ++ guint size, min, max, i; ++ GstStructure *pool_config; ++ GstCaps *caps = NULL; ++ GstVideoInfo vinfo; ++ GstVideoCodecState *output_state; ++ ++ GST_DEBUG_OBJECT (thiz, "@@@ ZRong ------------------------- in decide_allocation!"); ++ ++ if (!GST_VIDEO_DECODER_CLASS(gst_spacemitdec_parent_class)->decide_allocation(decoder, query)) ++ return FALSE; ++ ++ /* Get the buffer pool config decided on by the base class. The base ++ class ensures that there will always be at least a 0th pool in ++ the query. */ ++ gst_query_parse_nth_allocation_pool(query, 0, &pool, &size, &min, &max); ++ pool_config = gst_buffer_pool_get_config(pool); ++ gst_buffer_pool_config_get_params (pool_config, &caps, &size, ++ &min, &max); ++ ++ GST_DEBUG_OBJECT (thiz, "get pool caps: %" GST_PTR_FORMAT, caps); ++ if (_gst_caps_has_feature (caps, GST_CAPS_FEATURE_MEMORY_DMABUF)) { ++ GST_INFO_OBJECT (thiz, "the spacemit decoder uses DMABuf memory"); ++ thiz->use_dmabuf = TRUE; ++ } else { ++ thiz->use_dmabuf = FALSE; ++ } ++ ++ /* Decoder always use its own pool. */ ++ if (!thiz->pool) { ++ output_state = gst_video_decoder_get_output_state (GST_VIDEO_DECODER (thiz)); ++ ++ gst_clear_object (&thiz->pool); ++ GST_INFO_OBJECT (thiz, "create new spacemit bufferpool"); ++ thiz->pool = ++ gst_spacemitdec_create_buffer_pool(thiz, &output_state->info, (4 > min) ? 4 : min); ++ gst_video_codec_state_unref (output_state); ++ if (!thiz->pool) { ++ GST_ERROR_OBJECT (thiz, "failed to create new pool"); ++ goto failed_to_create_pool; ++ } ++ } ++ GST_DEBUG_OBJECT (thiz, ++ "upstream provides the pool is: %" GST_PTR_FORMAT, pool); ++ ++ /* If downstream supports video meta and video alignment, ++ * we can replace with our own spacemit bufferpool and use it ++ */ ++#if 0 ++ if (gst_buffer_pool_has_option (pool, ++ GST_BUFFER_POOL_OPTION_VIDEO_META)) { ++ GstStructure *config; ++ GstAllocator *allocator; ++ ++ /* Remove downstream's pool */ ++ gst_structure_free (pool_config); ++ gst_object_unref (pool); ++ ++ pool = gst_object_ref (spacemitdec->pool); ++ /* Set the allocator of new spacemitdec bufferpool */ ++ config = gst_buffer_pool_get_config (GST_BUFFER_POOL_CAST (pool)); ++ ++ if (gst_buffer_pool_config_get_allocator (config, &allocator, NULL)) ++ gst_query_set_nth_allocation_param (query, 0, allocator, NULL); ++ gst_structure_free (config); ++ ++ gst_query_set_nth_allocation_pool (query, 0, pool, size, min, ++ max); ++ } else { ++ goto no_support; ++ } ++#endif ++ if (pool) ++ gst_object_unref (pool); ++ ++ return TRUE; ++ ++failed_to_create_pool: ++ GST_ERROR_OBJECT (thiz, "failed to set buffer pool config"); ++ if (pool) ++ gst_object_unref (pool); ++ return FALSE; ++ ++no_support: ++ GST_ERROR_OBJECT (thiz, ++ "error! upstream provides the strange pool: %" GST_PTR_FORMAT, pool); ++ if (pool) ++ gst_object_unref (pool); ++ return FALSE; ++} ++ ++static GstStateChangeReturn ++gst_spacemitdec_dec_change_state (GstElement * element, GstStateChange transition) ++{ ++ GstVideoDecoder *decoder = GST_VIDEO_DECODER (element); ++ GST_DEBUG("ZRong ------------------ in change state, %x", transition); ++ ++ if (transition == GST_STATE_CHANGE_PAUSED_TO_READY) { ++ GST_VIDEO_DECODER_STREAM_LOCK (decoder); ++ if (!(gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) ++ return TRUE; ++ ++ GST_DEBUG_OBJECT (decoder, "stopping decoding thread"); ++ gst_pad_stop_task (decoder->srcpad); ++ GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); ++ } ++ ++ return GST_ELEMENT_CLASS (parent_class)->change_state (element, transition); ++} ++ ++gboolean ++gst_spacemitdec_register (GstPlugin * plugin, guint rank) ++{ ++ GST_DEBUG_CATEGORY_INIT (gst_spacemitdec_debug_category, "spacemitdec", 0, ++ "debug category for spacemitdec element"); ++ ++ return gst_element_register (plugin, "spacemitdec", rank, ++ GST_TYPE_SPACEMITDEC); ++} +diff --git a/ext/spacemit/spacemitcodec/gstspacemitdec.h b/ext/spacemit/spacemitcodec/gstspacemitdec.h +new file mode 100755 +index 0000000..99cfd31 +--- /dev/null ++++ b/ext/spacemit/spacemitcodec/gstspacemitdec.h +@@ -0,0 +1,98 @@ ++/* GStreamer ++ * Copyright (C) 2022 FIXME ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Library General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Library General Public License for more details. ++ * ++ * You should have received a copy of the GNU Library General Public ++ * License along with this library; if not, write to the ++ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ */ ++ ++#ifndef _GST_SPACEMITDEC_H_ ++#define _GST_SPACEMITDEC_H_ ++ ++#include ++#include ++#include "gstspacemitallocator.h" ++#include "gstspacemitbufferpool.h" ++ ++#include ++ ++G_BEGIN_DECLS ++ ++#define GST_TYPE_SPACEMITDEC (gst_spacemitdec_get_type()) ++#define GST_SPACEMITDEC(obj) (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_SPACEMITDEC,GstSpacemitDec)) ++#define GST_SPACEMITDEC_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_SPACEMITDEC,GstSpacemitDecClass)) ++#define GST_IS_SPACEMITDEC(obj) (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_SPACEMITDEC)) ++#define GST_IS_SPACEMITDEC_CLASS(obj) (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_SPACEMITDEC)) ++ ++typedef struct _GstSpacemitDec GstSpacemitDec; ++typedef struct _GstSpacemitDecClass GstSpacemitDecClass; ++ ++struct _GstSpacemitDec ++{ ++ GstVideoDecoder base_spacemitdec; ++ ++ MppVdecCtx *ctx; ++ MppVdecPara *para; ++ MppPacket *mpppacket; ++ MppFrame *mppframe; ++ ++ guint width; ++ guint height; ++ guint eCodecType; ++ guint eCodingType; ++ MppPixelFormat eOutputPixelFormat; ++ guint downscale; ++ ++ GstVideoCodecState *input_state; ++ GstFlowReturn downstream_flow_ret; ++ gboolean at_eos; ++ guint32 cur_frame_number; /* ED */ ++ GstVideoInfo out_info; ++ /* stop handling new frame when flushing */ ++ gboolean flushing; ++ gboolean use_dmabuf; ++ gboolean initialized; ++ gboolean dec_nonblock; ++ gboolean req_nonblock; ++ gboolean save_dec; ++ gboolean wait_reschange; ++ GstBufferPool *pool; ++ FILE *fb; ++}; ++ ++struct _GstSpacemitDecClass ++{ ++ GstVideoDecoderClass base_spacemitdec_class; ++}; ++ ++#ifndef GST_CAPS_FEATURE_MEMORY_DMABUF ++#define GST_CAPS_FEATURE_MEMORY_DMABUF "memory:DMABuf" ++#endif ++ ++#define SPM_DEC_OUT_FORMATS "I420, NV21, NV12" ++#define SPM_DEC_FORMATS SPM_DEC_OUT_FORMATS ++//#define SPM_DEC_CAPS_MAKE(fmts) \ ++// GST_VIDEO_CAPS_MAKE (fmts) ";" ++#define SPM_DEC_CAPS_MAKE(fmts) \ ++ GST_VIDEO_CAPS_MAKE (fmts) ";" \ ++ GST_VIDEO_CAPS_MAKE_WITH_FEATURES (GST_CAPS_FEATURE_MEMORY_DMABUF, fmts) ++ ++GType gst_spacemitdec_get_type(void); ++ ++gboolean ++gst_spacemitdec_register (GstPlugin * plugin, guint rank); ++ ++G_END_DECLS ++ ++#endif +diff --git a/ext/spacemit/spacemitcodec/gstspacemitdec_bak.c b/ext/spacemit/spacemitcodec/gstspacemitdec_bak.c +new file mode 100755 +index 0000000..f81ee21 +--- /dev/null ++++ b/ext/spacemit/spacemitcodec/gstspacemitdec_bak.c +@@ -0,0 +1,1394 @@ ++/* GStreamer ++ * Copyright (C) 2022 FIXME ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Library General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Library General Public License for more details. ++ * ++ * You should have received a copy of the GNU Library General Public ++ * License along with this library; if not, write to the ++ * Free Software Foundation, Inc., 51 Franklin Street, Suite 500, ++ * Boston, MA 02110-1335, USA. ++ */ ++/** ++ * SECTION:element-gstvideoprocess ++ * ++ * The videoprocess element does FIXME stuff. ++ * ++ * ++ * Example launch line ++ * |[ ++ * gst-launch-1.0 -v fakesrc ! videoprocess ! FIXME ! fakesink ++ * ]| ++ * FIXME Describe what the pipeline does. ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++#include ++#include ++ ++#include ++#include ++#include ++#include "gstspacemitdec.h" ++ ++//#define MULTI_THREAD_DOWNSTREAM_POOL_TEST ++//#define SINGLE_THREAD_DOWNSTREAM_POOL_TEST ++ ++GST_DEBUG_CATEGORY_STATIC (gst_spacemitdec_debug_category); ++#define GST_CAT_DEFAULT gst_spacemitdec_debug_category ++ ++/* prototypes */ ++static void gst_spacemitdec_set_property (GObject * object, ++ guint property_id, const GValue * value, GParamSpec * pspec); ++static void gst_spacemitdec_get_property (GObject * object, ++ guint property_id, GValue * value, GParamSpec * pspec); ++static void gst_spacemitdec_finalize (GObject * object); ++static gboolean gst_spacemitdec_close(GstVideoDecoder *decoder); ++static gboolean gst_spacemitdec_start(GstVideoDecoder *decoder); ++static gboolean gst_spacemitdec_stop(GstVideoDecoder *decoder); ++static gboolean gst_spacemitdec_set_format(GstVideoDecoder *decoder, GstVideoCodecState *state); ++static gboolean gst_spacemitdec_reset(GstVideoDecoder *decoder, gboolean hard); ++static gboolean gst_spacemitdec_flush(GstVideoDecoder * decoder); ++static GstFlowReturn gst_spacemitdec_finish(GstVideoDecoder *decoder); ++static GstFlowReturn gst_spacemitdec_handle_frame(GstVideoDecoder *decoder, GstVideoCodecFrame *frame); ++static gboolean gst_spacemitdec_decide_allocation(GstVideoDecoder *decoder, GstQuery *query); ++static GstStateChangeReturn gst_spacemitdec_dec_change_state (GstElement * element, GstStateChange transition); ++ ++/* pad templates */ ++static GstStaticPadTemplate gst_spacemitdec_sink_template = ++GST_STATIC_PAD_TEMPLATE ("sink", ++ GST_PAD_SINK, ++ GST_PAD_ALWAYS, ++ GST_STATIC_CAPS ++ ("video/x-h264, stream-format=(string)byte-stream, alignment=(string)au, " ++ "profile=(string){ constrained-baseline, baseline, main, high }," ++ "width=(int) [640,MAX], " "height=(int) [480,MAX]" ++ ";" ++ "video/x-h265," ++ "stream-format = (string) byte-stream," ++ "alignment = (string)au," ++ "width=(int) [640,MAX], " "height=(int) [480,MAX]" ++ )); ++ ++static GstStaticPadTemplate gst_spacemitdec_src_template = ++ GST_STATIC_PAD_TEMPLATE ("src", ++ GST_PAD_SRC, ++ GST_PAD_ALWAYS, ++ GST_STATIC_CAPS (SPM_DEC_CAPS_MAKE ("{" SPM_DEC_FORMATS "}") ";") ++ ); ++ ++#define parent_class gst_spacemitdec_parent_class ++/* class initialization */ ++G_DEFINE_TYPE(GstSpacemitDec, gst_spacemitdec, GST_TYPE_VIDEO_DECODER); ++enum ++{ ++ PROP_0, ++ PROP_CODING_WIDTH, ++ PROP_CODING_HIGHT, ++ PROP_CODING_TYPE, ++ PROP_CODE_TYPE, ++ PROP_CODE_YUV_FORMAT, ++ N_PROPERTIES ++}; ++ ++static gboolean ++_gst_caps_has_feature (const GstCaps * caps, const gchar * feature) ++{ ++ guint i; ++ ++ for (i = 0; i < gst_caps_get_size (caps); i++) { ++ GstCapsFeatures *const features = gst_caps_get_features (caps, i); ++ /* Skip ANY features, we need an exact match for correct evaluation */ ++ if (gst_caps_features_is_any (features)) ++ continue; ++ if (gst_caps_features_contains (features, feature)) ++ return TRUE; ++ } ++ ++ return FALSE; ++} ++ ++static gboolean ++srcpad_can_dmabuf (GstSpacemitDec * thiz) ++{ ++ gboolean ret = FALSE; ++ GstCaps *caps, *out_caps; ++ GstPad *srcpad; ++ ++ srcpad = GST_VIDEO_DECODER_SRC_PAD (thiz); ++ caps = gst_pad_get_pad_template_caps (srcpad); ++ ++ out_caps = gst_pad_peer_query_caps (srcpad, caps); ++ if (!out_caps) ++ goto done; ++ ++ if (gst_caps_is_any (out_caps) || gst_caps_is_empty (out_caps) ++ || out_caps == caps) ++ goto done; ++ ++ if (_gst_caps_has_feature (out_caps, GST_CAPS_FEATURE_MEMORY_DMABUF)) ++ ret = TRUE; ++ ++done: ++ if (caps) ++ gst_caps_unref (caps); ++ if (out_caps) ++ gst_caps_unref (out_caps); ++ return ret; ++} ++ ++static gboolean gst_spacemitdec_close(GstVideoDecoder * decoder) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC(decoder); ++ ++ if (thiz->input_state) ++ { ++ gst_video_codec_state_unref (thiz->input_state); ++ thiz->input_state = NULL; ++ } ++ GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec start close"); ++ if (thiz->pool) { ++ gst_buffer_pool_set_active (thiz->pool, FALSE); ++// gst_spacemit_allocator_wait_inactive (GST_SPACEMIT_BUFFER_POOL_CAST(spacemitdec->pool)->allocator); ++ gst_object_unref (spacemitdec->pool); ++ thiz->pool = NULL; ++ } ++ GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec start close222"); ++ ++// FRAME_Destory(spacemitdec->mppframe); ++ PACKET_Destory (thiz->mpppacket); ++ VDEC_DestoryChannel (thiz->ctx); ++ ++ GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec finish close"); ++ ++ return TRUE; ++} ++ ++ ++static void gst_spacemitdec_class_init(GstSpacemitDecClass * klass) ++{ ++ GstVideoDecoderClass *video_decoder_class = GST_VIDEO_DECODER_CLASS (klass); ++ GstElementClass *element_class = GST_ELEMENT_CLASS (klass); ++ GObjectClass *gobject_class = G_OBJECT_CLASS (klass); ++ ++ gst_element_class_add_static_pad_template(GST_ELEMENT_CLASS (klass), &gst_spacemitdec_sink_template); ++ gst_element_class_add_static_pad_template(GST_ELEMENT_CLASS (klass), &gst_spacemitdec_src_template); ++ ++ gst_element_class_set_static_metadata(GST_ELEMENT_CLASS (klass), ++ "Spacemit video decoder", "Decoder/Video", "Spacemit video decoder", ++ "ZRong, zhirong.li@spacemit.com"); ++ ++ gobject_class->set_property = gst_spacemitdec_set_property; ++ gobject_class->get_property = gst_spacemitdec_get_property; ++ gobject_class->finalize = gst_spacemitdec_finalize; ++ ++ video_decoder_class->close = GST_DEBUG_FUNCPTR(gst_spacemitdec_close); ++ video_decoder_class->start = GST_DEBUG_FUNCPTR(gst_spacemitdec_start); ++ video_decoder_class->stop = GST_DEBUG_FUNCPTR(gst_spacemitdec_stop); ++ video_decoder_class->set_format = GST_DEBUG_FUNCPTR(gst_spacemitdec_set_format); ++ video_decoder_class->reset = GST_DEBUG_FUNCPTR(gst_spacemitdec_reset); ++ video_decoder_class->flush = GST_DEBUG_FUNCPTR (gst_spacemitdec_flush); ++ video_decoder_class->finish = GST_DEBUG_FUNCPTR(gst_spacemitdec_finish); ++ video_decoder_class->handle_frame = GST_DEBUG_FUNCPTR(gst_spacemitdec_handle_frame); ++ video_decoder_class->decide_allocation = GST_DEBUG_FUNCPTR(gst_spacemitdec_decide_allocation); ++ ++// element_class->change_state = GST_DEBUG_FUNCPTR (gst_spacemitdec_dec_change_state); ++ ++ /* define properties */ ++ g_object_class_install_property (gobject_class, PROP_CODING_TYPE, ++ g_param_spec_uint ("coding-type", "coding type", ++ "Format to decode", ++ CODING_H264, CODING_FWHT, CODING_H264, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CODE_TYPE, ++ g_param_spec_uint ("code-type", "code type", ++ "Codec selection to work", ++ CODEC_OPENH264, 1023, CODEC_SFOMX, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CODING_WIDTH, ++ g_param_spec_uint ("coding-width", "coding width", ++ "image width to decode", ++ 0, 3840, 1280, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CODING_HIGHT, ++ g_param_spec_uint ("coding-hight", "coding hight", ++ "image hight to decode", ++ 0, 2160, 720, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CODE_YUV_FORMAT, ++ g_param_spec_uint ("code-yuv-format", "code yuv format", ++ "Decode the generated yuv format", ++ PIXEL_FORMAT_DEFAULT, PIXEL_FORMAT_UNKNOWN-1, PIXEL_FORMAT_I420, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ ++} ++ ++static void gst_spacemitdec_init (GstSpacemitDec * thiz) ++{ ++ GstVideoDecoder *decoder = GST_VIDEO_DECODER (thiz); ++ ++ thiz->pool = NULL; ++ thiz->ctx = NULL; ++ thiz->width = 1280; ++ thiz->height = 720; ++ thiz->eCodecType = CODEC_SFOMX; ++ thiz->eCodingType = CODING_H264; ++ thiz->eOutputPixelFormat = PIXEL_FORMAT_I420; ++ ++ gst_video_decoder_set_packetized (decoder, TRUE); ++ gst_video_decoder_set_needs_format (decoder, TRUE); ++} ++void ++gst_spacemitdec_set_property (GObject * object, guint property_id, ++ const GValue * value, GParamSpec * pspec) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC (object); ++ ++ GST_DEBUG_OBJECT (thiz, "ZRong ------------------- set_property: %d", property_id); ++ ++ switch (property_id) { ++ case PROP_CODING_WIDTH: ++ thiz->width = g_value_get_uint (value); ++ break; ++ case PROP_CODING_HIGHT: ++ thiz->height = g_value_get_uint (value); ++ break; ++ case PROP_CODING_TYPE: ++ thiz->eCodingType = g_value_get_uint (value); ++ break; ++ case PROP_CODE_TYPE: ++ thiz->eCodecType = g_value_get_uint (value); ++ break; ++ case PROP_CODE_YUV_FORMAT: ++ thiz->eOutputPixelFormat = g_value_get_uint (value); ++ break; ++ default: ++ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); ++ break; ++ } ++} ++ ++void ++gst_spacemitdec_get_property (GObject * object, guint property_id, ++ GValue * value, GParamSpec * pspec) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC (object); ++ ++ GST_DEBUG_OBJECT (thiz, "ZRong ------------------- get_property: %d", property_id); ++ ++ switch (property_id) { ++ case PROP_CODING_WIDTH: ++ g_value_set_uint (value, thiz->width); ++ break; ++ case PROP_CODING_HIGHT: ++ g_value_set_uint (value, thiz->height); ++ break; ++ case PROP_CODING_TYPE: ++ g_value_set_uint (value, thiz->eCodingType); ++ break; ++ case PROP_CODE_TYPE: ++ g_value_set_uint (value, thiz->eCodecType); ++ break; ++ case PROP_CODE_YUV_FORMAT: ++ g_value_set_uint (value, thiz->eOutputPixelFormat); ++ break; ++ default: ++ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); ++ break; ++ } ++} ++ ++void ++gst_spacemitdec_finalize (GObject * object) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC (object); ++ ++ GST_DEBUG_OBJECT (thiz, "finalize"); ++ ++ /* clean up object here */ ++ ++ if (thiz->input_state) { ++ gst_video_codec_state_unref (thiz->input_state); ++ } ++ thiz->input_state = NULL; ++ ++ G_OBJECT_CLASS (gst_spacemitdec_parent_class)->finalize (object); ++} ++ ++static FILE *fbbb1; ++static gboolean gst_spacemitdec_start (GstVideoDecoder * decoder) ++{ ++ GstSpacemitDec *thiz = GST_SPACEMITDEC (decoder); ++ int ret = 0; ++ ++ GST_ERROR_OBJECT (thiz, "ZRong ----------------- in start !"); ++ ++ if (thiz->ctx) ++ { ++ VDEC_DestoryChannel (thiz->ctx); ++ thiz->ctx = NULL; ++ } ++ thiz->ctx = VDEC_CreateChannel (); ++ thiz->ctx->eCodecType = thiz->eCodecType; ++ ++ thiz->mpppacket = PACKET_Create (); ++ if (!thiz->mpppacket) ++ goto alloc_err; ++ ++ thiz->mppframe = NULL; ++ thiz->at_eos = FALSE; ++ thiz->downstream_flow_ret = GST_FLOW_OK; ++ thiz->cur_frame_number = -1; ++ thiz->use_dmabuf = FALSE; ++ thiz->initialized = FALSE; ++ ++ GST_ERROR_OBJECT (thiz, "ZRong ------------------------- finish start !"); ++ fbbb1 = fopen("/tmp/out.yuv", "ab+"); ++ ++ return TRUE; ++ ++alloc_err: ++ GST_ERROR_OBJECT (spacemitdec, "can not alloc for mpp structure, please check !"); ++ return FALSE; ++ ++init_err: ++ GST_ERROR_OBJECT (spacemitdec, "Mpp vdec init error, please check !"); ++ return FALSE; ++ ++} ++ ++static gboolean gst_spacemitdec_stop(GstVideoDecoder *decoder) ++{ ++ GstSpacemitDec *spacemitdec = GST_SPACEMITDEC(decoder); ++ GstSpaceMitAllocator * allocator; ++ fclose(fbbb1); ++ ++#if 0 ++ if (spacemitdec->input_state) ++ { ++ gst_video_codec_state_unref (spacemitdec->input_state); ++ spacemitdec->input_state = NULL; ++ } ++ GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec start stop"); ++ ++// FRAME_Destory(spacemitdec->mppframe); ++ PACKET_Destory (spacemitdec->mpppacket); ++ VDEC_DestoryChannel (spacemitdec->ctx); ++ ++ if (spacemitdec->pool) { ++// gst_spacemit_allocator_wait_inactive (GST_SPACEMIT_BUFFER_POOL_CAST(spacemitdec->pool)->allocator); ++ gst_object_unref (spacemitdec->pool); ++ spacemitdec->pool = NULL; ++ } ++#endif ++ if (!(gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) { ++ GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec finish stop"); ++ ++ return TRUE; ++ } ++ ++ gst_pad_stop_task (decoder->srcpad); ++ GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec finish stop222"); ++ ++ return TRUE; ++} ++static MppCodingType ++gst_spacemit_get_mpp_video_type (GstStructure * s) ++{ ++ if (gst_structure_has_name (s, "video/x-h264")) ++ return CODING_H264; ++ ++ if (gst_structure_has_name (s, "video/x-h265")) ++ return CODING_H265; ++ ++ return CODING_UNKNOWN; ++} ++static gboolean ++gst_spacemitdec_init_decoder (GstSpacemitDec * thiz) ++{ ++ GstStructure *structure; ++ gboolean ret = TRUE; ++ ++ if (thiz->initialized) ++ return TRUE; ++ ++ structure = gst_caps_get_structure (thiz->input_state->caps, 0); ++ thiz->eCodingType = gst_spacemit_get_mpp_video_type (structure); ++ if(thiz->eCodingType == CODING_UNKNOWN) { ++ GST_ERROR_OBJECT(thiz, "mpp no support this eCodingType"); ++ return FALSE; ++ } ++ ++ if (thiz->ctx->eCodecType == CODEC_SFOMX || ++ thiz->ctx->eCodecType == CODEC_OPENH264 || ++ thiz->ctx->eCodecType == CODEC_FAKEDEC) { ++ thiz->width = thiz->input_state->info.width; ++ thiz->height = thiz->input_state->info.height; ++ thiz->ctx->stVdecPara.eCodingType = thiz->eCodingType; ++ thiz->ctx->stVdecPara.nWidth = thiz->width; ++ thiz->ctx->stVdecPara.nHeight = thiz->height; ++ thiz->ctx->stVdecPara.eOutputPixelFormat = thiz->eOutputPixelFormat; ++ thiz->ctx->eCodecType = thiz->eCodecType; ++ thiz->ctx->stVdecPara.bInputBlockModeEnable = MPP_TRUE; ++ thiz->ctx->stVdecPara.bOutputBlockModeEnable = MPP_TRUE; ++ GST_DEBUG_OBJECT (thiz, "spacemitdec set eCodecType is %d", thiz->ctx->eCodecType); ++ ++ ret = VDEC_Init (thiz->ctx); ++ if (ret) { ++ GST_ERROR_OBJECT (thiz, "Mpp vdec init error, please check !"); ++ return FALSE; ++ } ++ } else { ++ GST_ERROR_OBJECT (thiz, "No supprot this type(%d) to handle frame", thiz->ctx->eCodecType); ++ } ++ ++ thiz->initialized = TRUE; ++ return TRUE; ++} ++ ++/* function: ++ * Tell subclasses to input stream data format ++ * called time: ++ * When the upstream element sends GST_EVENT_CAPS, ++ * call in gst_video_decoder_setcaps. ++ * need to modify................................. ++ */ ++static gboolean gst_spacemitdec_set_format(GstVideoDecoder *decoder, GstVideoCodecState *state) ++{ ++ GstSpacemitDec *spacemitdec = GST_SPACEMITDEC(decoder); ++ GstStructure *structure; ++ gboolean ret = TRUE; ++ ++ GST_DEBUG_OBJECT(spacemitdec, "input caps: %" GST_PTR_FORMAT, state->caps); ++ ++ if(spacemitdec->input_state) ++ { ++ gst_video_codec_state_unref(spacemitdec->input_state); ++ spacemitdec->input_state = NULL; ++ } ++ ++ spacemitdec->input_state = gst_video_codec_state_ref (state); ++ if (!gst_spacemitdec_init_decoder(spacemitdec)) ++ return FALSE; ++ ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong ------------------------- finish set_format, %d, %d, %d, %d, %d, %d, %d, %d!", ++ spacemitdec->input_state->info.width, spacemitdec->input_state->info.height, spacemitdec->input_state->info.size, ++ state->info.width, state->info.height, state->info.size, ++ PACKET_GetBaseData(spacemitdec->mpppacket)->nWidth, PACKET_GetBaseData(spacemitdec->mpppacket)->nHeight); ++ GstVideoFormat fmt; ++ fmt = GST_VIDEO_INFO_FORMAT (&state->info); ++ GST_ERROR_OBJECT (spacemitdec, "ZRong ----------------------- set format finish, %u, %s", fmt, gst_video_format_to_string (fmt)); ++ ++ return TRUE; ++} ++ ++static gboolean gst_spacemitdec_reset(GstVideoDecoder *decoder, gboolean hard) ++{ ++ GstSpacemitDec *spacemitdec = GST_SPACEMITDEC(decoder); ++ ++ GST_ERROR_OBJECT (spacemitdec, "ZRong ------------------------- finish reset!"); ++ ++ return TRUE; ++} ++static gboolean ++gst_spacemitdec_flush (GstVideoDecoder * decoder) ++{ ++ GstSpacemitDec *spacemitdec = GST_SPACEMITDEC(decoder); ++ ++ GST_DEBUG_OBJECT (spacemitdec, "ZRong -------------------- flushing decoder start, (%d)", spacemitdec->downstream_flow_ret); ++ ++ spacemitdec->flushing = TRUE; ++ ++ if (spacemitdec->downstream_flow_ret == GST_FLOW_EOS) ++ gst_spacemit_allocator_wait_inactive (GST_SPACEMIT_BUFFER_POOL_CAST(spacemitdec->pool)->allocator); ++ ++ VDEC_Flush(spacemitdec->ctx); ++ ++ GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); ++ ++ /* Wait for task thread to pause */ ++ GstTask *task = decoder->srcpad->task; ++ if (task) { ++ //GST_OBJECT_LOCK (task); ++ while (GST_TASK_STATE (task) == GST_TASK_STARTED) { ++ GST_DEBUG_OBJECT(spacemitdec, "finish FLUSH test4"); ++ g_usleep(400 * 1000); ++ //GST_TASK_WAIT (task); ++ } ++ GST_DEBUG_OBJECT(spacemitdec, "finish FLUSH test5"); ++ //GST_OBJECT_UNLOCK (task); ++ gst_pad_stop_task (decoder->srcpad); ++ } ++ GST_VIDEO_DECODER_STREAM_LOCK (decoder); ++ ++ spacemitdec->flushing = FALSE; ++ spacemitdec->downstream_flow_ret = GST_FLOW_OK; ++ ++ GST_DEBUG_OBJECT (spacemitdec, "ZRong -------------------- flushing decoder stop"); ++ ++ return TRUE; ++} ++ ++static int gst_mpp_format_change(MppPixelFormat eOutputPixelFormat) ++{ ++ GstVideoFormat format; ++ ++ switch(eOutputPixelFormat){ ++ case PIXEL_FORMAT_I420: ++ format = GST_VIDEO_FORMAT_I420; ++ break; ++ case PIXEL_FORMAT_NV21: ++ format = GST_VIDEO_FORMAT_NV21; ++ break; ++ case PIXEL_FORMAT_NV12: ++ format = GST_VIDEO_FORMAT_NV12; ++ break; ++ default: ++ format = GST_VIDEO_FORMAT_UNKNOWN; ++ break; ++ } ++ return format; ++} ++ ++static GstFlowReturn ++gst_spacemitdec_fill_output_buffer (GstVideoDecoder * decoder, ++ GstBuffer **output_buffer) ++{ ++ GstSpacemitDec *spacemitdec = GST_SPACEMITDEC (decoder); ++ GstVideoInfo *vinfo = NULL; ++ GstVideoFormat format; ++ const guint8 *data; ++ guint8 *dst; ++ guint h; ++ gint dst_width[GST_VIDEO_MAX_PLANES] = { 0, }; ++ gint dst_height[GST_VIDEO_MAX_PLANES] = { 0, }; ++ guint p; ++ GstVideoFrame video_frame; ++ GstVideoCodecState *state; ++ GstFlowReturn flow_status = GST_FLOW_OK; ++ GstBuffer *outbuf; ++ GstBufferPoolAcquireParams params = { 0, }; ++ GstMemory *mem; ++ GstSpaceMitMemory *spm_mem; ++ gint32 id; ++ gint32 fd; ++ ++ id = FRAME_GetID(spacemitdec->mppframe); ++ if (spacemitdec->use_dmabuf) { ++ fd = FRAME_GetFD(spacemitdec->mppframe); ++ gst_spacemit_allocator_get_info (GST_SPACEMIT_BUFFER_POOL_CAST(spacemitdec->pool)->allocator, id, fd); ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong get info (%d %d)", id, fd); ++ ++ } else { ++ gst_spacemit_allocator_get_info (GST_SPACEMIT_BUFFER_POOL_CAST(spacemitdec->pool)->allocator, id, -1); ++ } ++ ++ flow_status = gst_buffer_pool_acquire_buffer (spacemitdec->pool, &outbuf, ¶ms); ++ if (flow_status != GST_FLOW_OK) { ++ goto alloc_err; ++ } ++ ++#if !defined(MULTI_THREAD_DOWNSTREAM_POOL_TEST) && !defined(SINGLE_THREAD_DOWNSTREAM_POOL_TEST) ++ mem = gst_buffer_peek_memory (outbuf, 0); ++ gst_spacemit_set_mem (mem, spacemitdec->mppframe, spacemitdec->ctx); ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong 444"); ++ *output_buffer = outbuf; ++#else ++ ++ if (gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED) { ++ flow_status = gst_buffer_pool_acquire_buffer (spacemitdec->pool, &outbuf, ¶ms); ++ if (flow_status != GST_FLOW_OK) ++ goto alloc_err; ++ *output_buffer = outbuf; //mult thread, get outbuf from acquire ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong AAAA "); ++ } else { ++ outbuf = *output_buffer; //single thread, get outbuf from outside ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong BBBB "); ++ } ++ ++ state = gst_video_decoder_get_output_state (decoder); ++ if (state == NULL) ++ goto negotiated_err; ++ ++ if (!gst_video_frame_map (&video_frame, &state->info, outbuf, ++ GST_MAP_WRITE)) ++ goto map_err; ++ ++ format = gst_mpp_format_change(spacemitdec->eOutputPixelFormat); ++ ++ vinfo = &state->info; ++ dst_height[0] = GST_VIDEO_INFO_FIELD_HEIGHT (vinfo); ++ ++ switch (format) { ++ case GST_VIDEO_FORMAT_I420: ++ dst_width[0] = GST_VIDEO_INFO_WIDTH (vinfo); ++ dst_width[1] = GST_VIDEO_INFO_WIDTH (vinfo) / 2; ++ dst_height[1] = GST_VIDEO_INFO_FIELD_HEIGHT (vinfo) / 2; ++ dst_width[2] = GST_VIDEO_INFO_WIDTH (vinfo) / 2; ++ dst_height[2] = GST_VIDEO_INFO_FIELD_HEIGHT (vinfo) / 2; ++ GST_ERROR_OBJECT (spacemitdec, "ZRong ------------------ in I420 dec_fill_buffer,(%d, %d, %d) (%d, %d, %d)", ++ dst_width[0], dst_width[1], dst_width[2], ++ GST_VIDEO_INFO_PLANE_STRIDE (vinfo, 0), GST_VIDEO_INFO_PLANE_STRIDE (vinfo, 1), GST_VIDEO_INFO_PLANE_STRIDE (vinfo, 2)); ++ GST_ERROR_OBJECT (spacemitdec, "ZRong ------------------ in I420 dec_fill_buffer,(%d, %d, %d) %d", ++ dst_height[0], dst_height[1], dst_height[2], GST_VIDEO_INFO_N_PLANES (vinfo)); ++ ++ break; ++ case GST_VIDEO_FORMAT_NV12: ++ case GST_VIDEO_FORMAT_NV21: ++ dst_width[0] = GST_VIDEO_INFO_WIDTH (vinfo); ++ dst_width[1] = GST_VIDEO_INFO_WIDTH (vinfo); ++ dst_height[1] = GST_VIDEO_INFO_FIELD_HEIGHT (vinfo) / 2; ++ GST_ERROR_OBJECT (spacemitdec, "ZRong ------------------ in NV12/NV21 dec_fill_buffer,(%d, %d) ", ++ dst_width[0], dst_width[1]); ++ GST_ERROR_OBJECT (spacemitdec, "ZRong ------------------ in NV12/NV21 dec_fill_buffer,(%d, %d,)", ++ dst_height[0], dst_height[1]); ++ ++ break; ++ default: ++ g_assert_not_reached (); ++ break; ++ } ++ ++ for (p = 0; p < GST_VIDEO_INFO_N_PLANES (vinfo); p++) { ++ data = (U8*)FRAME_GetDataPointer(spacemitdec->mppframe, p); ++ dst = GST_VIDEO_FRAME_PLANE_DATA (&video_frame, p); ++ ++ for (h = 0; h < dst_height[p]; h++) { ++// fwrite(data, 1, dst_width[p], fbbb1); ++ memcpy (dst, data, dst_width[p]); ++ dst += GST_VIDEO_INFO_PLANE_STRIDE (vinfo, p); ++ data += dst_width[p]; ++ } ++ } ++ ++ gst_video_codec_state_unref (state); ++ gst_video_frame_unmap (&video_frame); ++#endif ++ ++done: ++ spacemitdec->mppframe = NULL; ++ return flow_status; ++ ++alloc_err: ++{ ++ GST_ERROR_OBJECT (spacemitdec, ++ "an output buffer could not be allocated"); ++ goto done; ++} ++negotiated_err: ++{ ++ GST_ERROR_OBJECT (spacemitdec, ++ "Not yet negotiate with downstream elements!"); ++ flow_status = GST_FLOW_NOT_NEGOTIATED; ++ goto done; ++} ++map_err: ++{ ++ GST_ERROR_OBJECT (spacemitdec, "Cannot map output buffer!"); ++ gst_video_codec_state_unref (state); ++ flow_status = GST_FLOW_ERROR; ++ goto done; ++} ++ ++} ++ ++static void ++gst_st_mpp_dec_loop (GstVideoDecoder * decoder) ++{ ++ GstSpacemitDec *spacemitdec = GST_SPACEMITDEC (decoder); ++ GstVideoCodecFrame * frame; ++ GstFlowReturn flow_status; ++ GstVideoFrame video_frame; ++ guint8 *p; ++ guint row_stride, component_width, component_height, row; ++ U8 *tmp_pdata[3]; ++ int pnum, i, size[3]; ++ int ret; ++ MppFrame *mppframe = NULL; ++ ++ if (G_UNLIKELY(spacemitdec->flushing)) { ++ goto flushing; ++ } ++ ++ ret = VDEC_RequestOutputFrame_2 (spacemitdec->ctx, (MppData **)&mppframe); ++ if (ret == MPP_CODER_EOS) { ++ goto finish_work; ++ } else if (ret == MPP_CODER_NO_DATA || mppframe == NULL) { ++ goto no_mppframe; ++ } ++ ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong 555, %d", ret); ++ ++ spacemitdec->mppframe = mppframe; ++ frame = gst_video_decoder_get_oldest_frame (decoder); ++ ++ if (frame == NULL) { ++ GstBuffer *outbuf = NULL; ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong 333, null"); ++ flow_status = gst_spacemitdec_fill_output_buffer (decoder, &outbuf); ++ if (flow_status != GST_FLOW_OK) { ++ goto fill_buffer_err; ++ } ++ GST_DEBUG_OBJECT (spacemitdec, "zrong ---------------- push buf of size %" G_GSIZE_FORMAT ", " ++ "PTS %" GST_TIME_FORMAT ", dur %" GST_TIME_FORMAT, ++ gst_buffer_get_size (outbuf), ++ GST_TIME_ARGS (GST_BUFFER_PTS (outbuf)), ++ GST_TIME_ARGS (GST_BUFFER_DURATION (outbuf))); ++ flow_status = gst_pad_push (GST_VIDEO_DECODER_SRC_PAD (decoder), outbuf); ++ ++ } else { ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong 333, %d", frame->ref_count); ++ flow_status = gst_spacemitdec_fill_output_buffer (decoder, &frame->output_buffer); ++ if (flow_status != GST_FLOW_OK) { ++ goto fill_buffer_err; ++ } ++ struct timeval tv1 = {0}; ++ struct timeval tv2 = {0}; ++ ++ gettimeofday(&tv1, NULL); ++ ++ flow_status = gst_video_decoder_finish_frame(decoder, frame); ++ gettimeofday(&tv2, NULL); ++ ++ GST_ERROR_OBJECT (spacemitdec, "ZRong -------------- spacemit handle %ld ", ++ tv2.tv_sec * 1000000 + tv2.tv_usec - (tv1.tv_sec * 1000000 + tv1.tv_usec)); ++ } ++ ++ if (flow_status != GST_FLOW_OK) ++ goto sending_state; ++ ++ spacemitdec->downstream_flow_ret = flow_status; ++#ifdef MULTI_THREAD_DOWNSTREAM_POOL_TEST ++ VDEC_ReturnOutputFrame(spacemitdec->ctx, FRAME_GetBaseData(spacemitdec->mppframe)); ++#endif ++ ++done: ++ if (spacemitdec->downstream_flow_ret != GST_FLOW_OK) { ++ GST_INFO_OBJECT (spacemitdec, ++ "pause task in dec loop (%d)!", spacemitdec->downstream_flow_ret); ++ ++ gst_pad_pause_task (GST_VIDEO_DECODER_SRC_PAD (spacemitdec)); ++ } ++ return; ++ ++flushing: ++{ ++ spacemitdec->downstream_flow_ret = GST_FLOW_FLUSHING; ++ ++ while (1) { ++ frame = gst_video_decoder_get_oldest_frame (decoder); ++ if(frame == NULL) ++ break; ++ gst_video_decoder_release_frame (decoder, frame); ++ } ++ GST_INFO_OBJECT (spacemitdec, "flushing spacemit decoder"); ++ ++ goto done; ++} ++ ++no_mppframe: ++{ ++ //GST_LOG_OBJECT (spacemitdec, ++ // "No out mppframes to request!"); ++ spacemitdec->downstream_flow_ret = GST_FLOW_OK; ++ goto done; ++} ++ ++fill_buffer_err: ++{ ++ GST_ERROR_OBJECT (spacemitdec, ++ "fill buffer err in dec loop, flow status: %d!", flow_status); ++ spacemitdec->downstream_flow_ret = flow_status; ++ gst_video_codec_frame_unref (frame); ++ goto done; ++} ++ ++finish_work: ++{ ++ GST_DEBUG_OBJECT (spacemitdec, ++ "Get eos, Finished work and paused task!"); ++ spacemitdec->downstream_flow_ret = GST_FLOW_EOS; ++ ++ goto done; ++ ++} ++fream_null: ++{ ++ GST_ERROR_OBJECT (spacemitdec, "get oldest frame fail!"); ++ spacemitdec->downstream_flow_ret = GST_FLOW_ERROR; ++ goto done; ++} ++ ++sending_state: ++{ ++ spacemitdec->downstream_flow_ret = flow_status; ++ if (flow_status == GST_FLOW_EOS) { ++ GST_DEBUG_OBJECT (spacemitdec, ++ "Get eos, Finished work!"); ++ } else if (flow_status == GST_FLOW_ERROR) { ++ GST_ERROR_OBJECT (spacemitdec, ++ "send error and paused task!"); ++ } else if (flow_status == GST_FLOW_FLUSHING) { ++ spacemitdec->flushing = TRUE; ++ GST_DEBUG_OBJECT (spacemitdec, ++ "Get GST_FLOW_FLUSHING from finish frame!"); ++ ++ goto flushing; ++ } else { ++ GST_ERROR_OBJECT (spacemitdec, ++ "Get an unsupport flow status return after finish frame!"); ++ } ++ goto done; ++} ++ ++} ++static gboolean ++gst_spacemitdec_pool_set_active(GstVideoDecoder * decoder) ++{ ++ GstSpacemitDec *spacemitdec = GST_SPACEMITDEC (decoder); ++ GST_ERROR_OBJECT (spacemitdec, "@@@ ZRong ------------------------- in spacemitdec pool_set_active!"); ++ GstVideoCodecState *state; ++ GstBufferPool *pool; ++ guint size, min, max, i; ++ GstStructure *config; ++ GstCaps *caps = NULL; ++ GstVideoInfo vinfo; ++/* ++ i = 0; ++ while (i < gst_query_get_n_allocation_pools (query)) { ++ gst_query_parse_nth_allocation_pool (query, i, &pool, NULL, NULL, NULL); ++ i++; ++ if (pool) { ++ GST_DEBUG_OBJECT (spacemitdec, ++ "upstream provides pool: %" GST_PTR_FORMAT, pool); ++ gst_object_unref (pool); ++ } ++ } ++ ++ gst_query_parse_allocation (query, &caps, NULL); ++*/ ++ ++ pool = gst_video_decoder_get_buffer_pool (GST_VIDEO_DECODER (spacemitdec)); ++ ++ if (pool) { ++ config = gst_buffer_pool_get_config (pool); ++ if (!gst_buffer_pool_config_get_params (config, &caps, NULL, &min, &max)) { ++ GST_ERROR_OBJECT (spacemitdec, "Can't get buffer pool params"); ++ gst_structure_free (config); ++ return FALSE; ++ } ++ gst_structure_free (config); ++ } ++ ++ if (caps) { ++ spacemitdec->pool = gst_spacemit_buffer_pool_new (); ++ config = gst_buffer_pool_get_config (spacemitdec->pool); ++ gst_buffer_pool_config_add_option (config, ++ GST_BUFFER_POOL_OPTION_VIDEO_META); ++ ++ gst_video_info_init (&vinfo); ++ gst_video_info_from_caps (&vinfo, caps); ++ min = MAX (min, 8); ++ ++ gst_buffer_pool_config_set_params (config, caps, ++ vinfo.size, min, max); ++ if (!gst_buffer_pool_set_config (spacemitdec->pool, config)) { ++ GST_ERROR_OBJECT (spacemitdec, "Failed to set config on spacemit pool"); ++ gst_object_unref (spacemitdec->pool); ++ spacemitdec->pool = NULL; ++ return FALSE; ++ } ++ if (!gst_buffer_pool_set_active (spacemitdec->pool, TRUE)) { ++ GST_ERROR_OBJECT (spacemitdec, "Failed to activate internal pool"); ++ gst_object_unref (spacemitdec->pool); ++ spacemitdec->pool = NULL; ++ return FALSE; ++ } ++ GST_DEBUG_OBJECT (spacemitdec, ++ "use spacemit bufferpool: %" GST_PTR_FORMAT, spacemitdec->pool); ++ ++ pool = gst_video_decoder_get_buffer_pool (decoder); ++ if(!gst_buffer_pool_set_active (pool, FALSE)) ++ GST_ERROR_OBJECT (spacemitdec, "Failed to set acitve false on pool %" GST_PTR_FORMAT, pool); ++ gst_object_unref (pool); ++ gst_caps_unref (caps); ++ } else { ++ GST_ERROR_OBJECT (spacemitdec, "Can't get pool caps params"); ++ return FALSE; ++ } ++ ++ return TRUE; ++} ++ ++static gboolean ++gst_spacemitdec_set_src_caps (GstSpacemitDec * thiz) ++{ ++ GstVideoCodecState *output_state; ++ GstVideoInfo *vinfo; ++ GstVideoFormat format; ++ guint width, height; ++ ++ width = thiz->ctx->stVdecPara.nWidth; ++ height = thiz->ctx->stVdecPara.nHeight; ++ ++ format = gst_mpp_format_change(thiz->eOutputPixelFormat); ++ if (format == GST_VIDEO_FORMAT_UNKNOWN) { ++ GST_ERROR_OBJECT (thiz, "Failed to find a valid video format"); ++ return FALSE; ++ } ++ ++ output_state = ++ gst_video_decoder_set_output_state (GST_VIDEO_DECODER (thiz), ++ format, width, height, thiz->input_state); ++ vinfo = &output_state->info; ++ output_state->caps = gst_video_info_to_caps (vinfo); ++ ++ if (srcpad_can_dmabuf (thiz)) { ++ gst_caps_set_features (output_state->caps, 0, ++ gst_caps_features_new (GST_CAPS_FEATURE_MEMORY_DMABUF, NULL)); ++ GST_DEBUG_OBJECT (thiz, "set DMABUF feature to spacemitdec src cap %" GST_PTR_FORMAT, output_state->caps); ++ } ++ ++ thiz->width = width; ++ thiz->height = height; ++ ++ return TRUE; ++} ++ ++/* function: ++ * Receive the data stream of upstream for decoder ++ */ ++static GstFlowReturn ++gst_spacemitdec_handle_frame (GstVideoDecoder * decoder, ++ GstVideoCodecFrame * frame) ++{ ++ GstSpacemitDec *spacemitdec = GST_SPACEMITDEC (decoder); ++ GstMapInfo map_info; ++ GstClockTime pts; ++ GstFlowReturn flow_status; ++ GstVideoFrame video_frame; ++ guint actual_width, actual_height; ++ guint8 *p; ++ guint component_width, component_height; ++ GstFlowReturn ret; ++ GstVideoFormat format; ++ GstVideoInfo *vinfo; ++ ++ if (G_UNLIKELY (spacemitdec->flushing)) ++ goto flushing; ++ ++#ifndef SINGLE_THREAD_DOWNSTREAM_POOL_TEST ++ if (G_UNLIKELY (!gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) { ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong ------------------------- starting decoding thread"); ++ ++ gst_pad_start_task (decoder->srcpad, ++ (GstTaskFunction) gst_st_mpp_dec_loop, decoder, NULL); ++ } ++#endif ++ if (G_UNLIKELY(spacemitdec->downstream_flow_ret != GST_FLOW_OK)) ++ goto downstream_err; ++ ++ if (G_LIKELY(frame)) { ++ spacemitdec->cur_frame_number = frame->system_frame_number; ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong ------------------------- in handle frame!"); ++ ++ GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); ++ if (!gst_buffer_map (frame->input_buffer, &map_info, GST_MAP_READ)) ++ goto map_err; ++ ++ PACKET_SetDataPointer(spacemitdec->mpppacket, map_info.data); ++ PACKET_SetLength(spacemitdec->mpppacket, map_info.size); ++ MppData * tmp = PACKET_GetBaseData(spacemitdec->mpppacket); ++ tmp->bEos = 0; ++ ++ ret = VDEC_Decode(spacemitdec->ctx, PACKET_GetBaseData(spacemitdec->mpppacket)); ++ ++ gst_buffer_unmap (frame->input_buffer, &map_info); ++ ++ GST_VIDEO_DECODER_STREAM_LOCK (decoder); ++ ++ if (ret) { ++ GST_ERROR_OBJECT (spacemitdec, "VDEC_Decode return error! (%d)", ret); ++ ret = gst_video_decoder_drop_frame (decoder, frame); ++ goto out; ++ } ++ gst_video_codec_frame_unref (frame); ++ } else { ++ GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); ++ int count=3; ++ while(count>0) ++ { ++ MppData * tmp = PACKET_GetBaseData(spacemitdec->mpppacket); ++ tmp->bEos = 1; ++ PACKET_SetLength(spacemitdec->mpppacket, 0); ++ ret = VDEC_Decode(spacemitdec->ctx, PACKET_GetBaseData(spacemitdec->mpppacket)); ++ ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong ------------------------- in handle else, %d!", ret); ++ count--; ++ } ++ spacemitdec->cur_frame_number = -1; ++ ret = GST_FLOW_EOS; ++ GST_VIDEO_DECODER_STREAM_LOCK (decoder); ++ ++ goto out; ++ } ++ actual_width = spacemitdec->ctx->stVdecPara.nWidth; ++ actual_height = spacemitdec->ctx->stVdecPara.nHeight; ++ ++ if (!gst_pad_has_current_caps (GST_VIDEO_DECODER_SRC_PAD (spacemitdec)) ++ || actual_width != spacemitdec->width ++ || actual_height != spacemitdec->height) { ++ ++ if (!gst_spacemitdec_set_src_caps (spacemitdec)) ++ goto not_negotiated_err; ++ ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong ------------------------- bf decoder_negotiate, %d, %d, (%d %d)!", actual_width, actual_height, format, spacemitdec->eOutputPixelFormat); ++ ++ if (!gst_video_decoder_negotiate (decoder)) ++ goto not_negotiated_err; ++ ++#if !defined(MULTI_THREAD_DOWNSTREAM_POOL_TEST) && !defined(SINGLE_THREAD_DOWNSTREAM_POOL_TEST) ++ if (!gst_buffer_pool_set_active (spacemitdec->pool, TRUE)) ++ goto acitve_fail; ++ ++ // if (!gst_spacemitdec_pool_set_active(decoder)) ++// goto acitve_fail; ++#endif ++ } ++ ++#ifdef SINGLE_THREAD_DOWNSTREAM_POOL_TEST ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong 555, %d", ret); ++ MppFrame *mppframe = NULL; ++ ++ frame = gst_video_decoder_get_oldest_frame (decoder); ++ if (frame == NULL) { ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong errrrrrrrrrrrrrrrrrrrrr"); ++ ret = GST_FLOW_OK; ++ goto out; ++ } else { ++ flow_status = gst_video_decoder_allocate_output_frame (decoder, frame); ++ if (flow_status != GST_FLOW_OK) { ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong errrrrrrrrrrrrrrrrrrrrr"); ++ ret = GST_FLOW_OK; ++ goto out; ++ } ++ ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong 333, %d, %d", frame->ref_count, flow_status); ++ ++ int rett; ++ rett = VDEC_RequestOutputFrame_2 (spacemitdec->ctx, (MppData **)&mppframe); ++ if (rett == MPP_CODER_NO_DATA || mppframe == NULL) { ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong errrrrrrrrrrrrrrrrrrrrr"); ++ ret = GST_FLOW_OK; ++ goto out; ++ } ++ ++ spacemitdec->mppframe = mppframe; ++ flow_status = gst_spacemitdec_fill_output_buffer (decoder, &frame->output_buffer); ++ if (flow_status != GST_FLOW_OK) { ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong errrrrrrrrrrrrrrrrrrrrr"); ++ ret = GST_FLOW_OK; ++ goto out; ++ } ++ VDEC_ReturnOutputFrame(spacemitdec->ctx, FRAME_GetBaseData(spacemitdec->mppframe)); ++ ++ flow_status = gst_video_decoder_finish_frame(decoder, frame); ++ spacemitdec->downstream_flow_ret = flow_status; ++ } ++#endif ++ GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong ------------------------- af current_caps, %d", spacemitdec->downstream_flow_ret); ++ ret = spacemitdec->downstream_flow_ret; ++ ++out: ++ if (spacemitdec->downstream_flow_ret == GST_FLOW_FLUSHING) ++ ret = GST_FLOW_FLUSHING; ++ return ret; ++ ++out_clked: ++ GST_VIDEO_DECODER_STREAM_LOCK (decoder); ++ return ret; ++ ++not_negotiated_err: ++{ ++ GST_ERROR_OBJECT (spacemitdec, ++ "Failed to negotiate with downstream elements"); ++ ret = GST_FLOW_NOT_NEGOTIATED; ++ goto out; ++} ++ ++acitve_fail: ++{ ++ GST_ERROR_OBJECT (spacemitdec, "acitve spacemit pool fail!"); ++ gst_object_unref (spacemitdec->pool); ++ ret = GST_FLOW_ERROR; ++ goto out; ++} ++ ++map_err: ++{ ++ GST_ERROR_OBJECT (spacemitdec, "Cannot map input buffer!"); ++ gst_video_codec_frame_unref (frame); ++ ret = GST_FLOW_ERROR; ++ goto out_clked; ++} ++ ++downstream_err: ++{ ++ GST_ERROR_OBJECT (spacemitdec, "Downstream returned %s", ++ gst_flow_get_name (spacemitdec->downstream_flow_ret)); ++ ret = spacemitdec->downstream_flow_ret; ++ goto out; ++} ++flushing: ++{ ++ GST_WARNING_OBJECT (spacemitdec, "flushing"); ++ ret = GST_FLOW_FLUSHING; ++ gst_video_decoder_release_frame (decoder, frame); ++ goto out; ++} ++} ++/* function: ++ * Receive event GST_EVENT_SEGMENT_DONE,GST_EVENT_EOS or reverse playback, ++ * ask the subclass to process the remaining data in the decoder. After this ++ * operation, the subclass can refuse to decode the new data ++ * called time: ++ * in gst_video_decoder_drain_out、gst_video_decoder_flush_parse ++ * need to modify................................. ++ */ ++static GstFlowReturn gst_spacemitdec_finish(GstVideoDecoder *decoder) ++{ ++ GstSpacemitDec *spacemitdec = GST_SPACEMITDEC(decoder); ++ ++ GST_DEBUG_OBJECT(spacemitdec, "finish"); ++ ++ /* Decoder not negotiated yet */ ++ if (spacemitdec->width == 0) ++ return GST_FLOW_OK; ++ GST_DEBUG_OBJECT(spacemitdec, "finish test1"); ++ ++ /* Drain all pending frames */ ++ while ((gst_spacemitdec_handle_frame (decoder, NULL)) == GST_FLOW_OK); ++ ++ GST_DEBUG_OBJECT(spacemitdec, "finish test3"); ++ spacemitdec->at_eos = TRUE; ++ ++ GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); ++ ++ /* Wait for task thread to pause */ ++ GstTask *task = decoder->srcpad->task; ++ if (task) { ++ //GST_OBJECT_LOCK (task); ++ while (GST_TASK_STATE (task) == GST_TASK_STARTED) { ++ GST_DEBUG_OBJECT(spacemitdec, "finish test4"); ++ g_usleep(400 * 1000); ++ //GST_TASK_WAIT (task); ++ } ++ GST_DEBUG_OBJECT(spacemitdec, "finish test5"); ++ //GST_OBJECT_UNLOCK (task); ++ } ++ gst_pad_stop_task (decoder->srcpad); ++ GST_VIDEO_DECODER_STREAM_LOCK (decoder); ++ ++ GST_DEBUG_OBJECT(spacemitdec, "finish test2"); ++ ++ return GST_FLOW_OK; ++} ++ ++static GstBufferPool * ++gst_spacemitdec_create_buffer_pool (GstSpacemitDec * thiz, GstVideoInfo * info, ++ guint num_buffers) ++{ ++ GstBufferPool *pool = NULL; ++ GstAllocator *allocator = NULL; ++ GstStructure *config; ++ GstCaps *caps = NULL; ++ GstVideoAlignment align; ++ ++ pool = gst_spacemit_buffer_pool_new (); ++ if (!pool) ++ goto error_no_pool; ++ ++ allocator = gst_spacemit_allocator_new (); ++ if (!allocator) ++ goto error_no_allocator; ++ ++ gst_spacemit_allocator_configure(allocator, info, thiz->use_dmabuf); ++ ++ caps = gst_video_info_to_caps (info); ++ ++ config = gst_buffer_pool_get_config (GST_BUFFER_POOL_CAST (pool)); ++ gst_buffer_pool_config_set_params (config, caps, ++ GST_VIDEO_INFO_SIZE (info), num_buffers, num_buffers); ++ gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); ++ gst_buffer_pool_config_add_option (config, ++ GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT); ++ gst_caps_unref (caps); ++ ++ if (thiz->use_dmabuf) ++ gst_buffer_pool_config_add_option (config, ++ GST_BUFFER_POOL_OPTION_SPM_USE_DMABUF); ++ ++ gst_buffer_pool_config_set_video_alignment (config, &align); ++ gst_buffer_pool_config_set_allocator (config, allocator, NULL); ++ gst_object_unref (allocator); ++ ++ if (!gst_buffer_pool_set_config (pool, config)) ++ goto error_pool_config; ++ ++ return pool; ++ ++error_no_pool: ++ { ++ GST_ERROR_OBJECT (thiz, "failed to create spacemitdec bufferpool"); ++ return NULL; ++ } ++error_no_allocator: ++ { ++ GST_ERROR_OBJECT (thiz, "failed to create allocator"); ++ gst_object_unref (pool); ++ return NULL; ++ } ++error_pool_config: ++ { ++ GST_ERROR_OBJECT (thiz, "failed to set config"); ++ gst_object_unref (pool); ++ gst_object_unref (allocator); ++ return NULL; ++ } ++} ++ ++/* function: ++ * Set the parameters of the allocator that allocates the output buffer. ++ * The incoming query parameter contains information about the allocator ++ * of downstream components. The default implementation of the parent class ++ * needs to be called in the subclass implementation ++ */ ++static gboolean gst_spacemitdec_decide_allocation(GstVideoDecoder *decoder, GstQuery *query) ++{ ++ GstSpacemitDec *spacemitdec = GST_SPACEMITDEC(decoder); ++ GST_ERROR_OBJECT (spacemitdec, "@@@ ZRong ------------------------- in spacemitdec decide_allocation!"); ++ GstBufferPool *pool; ++ guint size, min, max, i; ++ GstStructure *pool_config; ++ GstCaps *caps = NULL; ++ GstVideoInfo vinfo; ++ ++ if (!GST_VIDEO_DECODER_CLASS(gst_spacemitdec_parent_class)->decide_allocation(decoder, query)) ++ return FALSE; ++ ++ /* Get the buffer pool config decided on by the base class. The base ++ class ensures that there will always be at least a 0th pool in ++ the query. */ ++ gst_query_parse_nth_allocation_pool(query, 0, &pool, &size, &min, &max); ++ pool_config = gst_buffer_pool_get_config(pool); ++ gst_buffer_pool_config_get_params (pool_config, &caps, &size, ++ &min, &max); ++ ++ GST_DEBUG_OBJECT (decoder, "get pool caps: %" GST_PTR_FORMAT, caps); ++ if (_gst_caps_has_feature (caps, GST_CAPS_FEATURE_MEMORY_DMABUF)) { ++ GST_INFO_OBJECT (decoder, "This spacemit decoder uses DMABuf memory"); ++ spacemitdec->use_dmabuf = TRUE; ++ } ++ ++ /* Decoder always use its own pool. */ ++ if (!spacemitdec->pool) { ++ GstVideoCodecState *output_state = ++ gst_video_decoder_get_output_state (GST_VIDEO_DECODER (spacemitdec)); ++ gst_clear_object (&spacemitdec->pool); ++ GST_INFO_OBJECT (decoder, "create new spacemitdec bufferpool"); ++ spacemitdec->pool = ++ gst_spacemitdec_create_buffer_pool(spacemitdec, &output_state->info, (4 > min) ? 4 : min); ++ gst_video_codec_state_unref (output_state); ++ if (!spacemitdec->pool) { ++ GST_ERROR_OBJECT (decoder, "failed to create new pool"); ++ goto failed_to_create_pool; ++ } ++ } ++ GST_DEBUG_OBJECT (spacemitdec, ++ "upstream provides the pool is: %" GST_PTR_FORMAT, pool); ++ ++ /* If downstream supports video meta and video alignment, ++ * we can replace with our own spacemit bufferpool and use it ++ */ ++#if 0 ++ if (gst_buffer_pool_has_option (pool, ++ GST_BUFFER_POOL_OPTION_VIDEO_META)) { ++ GstStructure *config; ++ GstAllocator *allocator; ++ ++ /* Remove downstream's pool */ ++ gst_structure_free (pool_config); ++ gst_object_unref (pool); ++ ++ pool = gst_object_ref (spacemitdec->pool); ++ /* Set the allocator of new spacemitdec bufferpool */ ++ config = gst_buffer_pool_get_config (GST_BUFFER_POOL_CAST (pool)); ++ ++ if (gst_buffer_pool_config_get_allocator (config, &allocator, NULL)) ++ gst_query_set_nth_allocation_param (query, 0, allocator, NULL); ++ gst_structure_free (config); ++ ++ gst_query_set_nth_allocation_pool (query, 0, pool, size, min, ++ max); ++ } else { ++ goto no_support; ++ } ++#endif ++ if (pool) ++ gst_object_unref (pool); ++ ++ return TRUE; ++ ++failed_to_create_pool: ++ GST_ERROR_OBJECT (decoder, "failed to set buffer pool config"); ++ if (pool) ++ gst_object_unref (pool); ++ return FALSE; ++ ++no_support: ++ GST_ERROR_OBJECT (spacemitdec, ++ "error! upstream provides the strange pool: %" GST_PTR_FORMAT, pool); ++ if (pool) ++ gst_object_unref (pool); ++ return FALSE; ++} ++ ++static GstStateChangeReturn ++gst_spacemitdec_dec_change_state (GstElement * element, GstStateChange transition) ++{ ++ GstVideoDecoder *decoder = GST_VIDEO_DECODER (element); ++ GST_DEBUG("ZRong ------------------ in spacemitdec change state, %x", transition); ++ ++ if (transition == GST_STATE_CHANGE_PAUSED_TO_READY) { ++ GST_VIDEO_DECODER_STREAM_LOCK (decoder); ++ if (!(gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) ++ return TRUE; ++ ++ GST_DEBUG_OBJECT (decoder, "stopping decoding thread"); ++ gst_pad_stop_task (decoder->srcpad); ++ GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); ++ } ++ ++ return GST_ELEMENT_CLASS (parent_class)->change_state (element, transition); ++} ++ ++gboolean ++gst_spacemitdec_register (GstPlugin * plugin, guint rank) ++{ ++ GST_DEBUG_CATEGORY_INIT (gst_spacemitdec_debug_category, "spacemitdec", 0, ++ "debug category for spacemitdec element"); ++ ++ return gst_element_register (plugin, "spacemitdec", rank, ++ GST_TYPE_SPACEMITDEC); ++} ++ +diff --git a/ext/spacemit/spacemitcodec/gstspacemitenc.c b/ext/spacemit/spacemitcodec/gstspacemitenc.c +new file mode 100755 +index 0000000..c1f2085 +--- /dev/null ++++ b/ext/spacemit/spacemitcodec/gstspacemitenc.c +@@ -0,0 +1,862 @@ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++#include ++ ++#include "gstspacemitenc.h" ++ ++#include ++#include ++#include ++#include ++#include ++#define SPM_PENDING_MAX 5 /* Max number of MPP pending frame */ ++ ++GST_DEBUG_CATEGORY_STATIC (gst_spacemitenc_debug_category); ++#define GST_CAT_DEFAULT gst_spacemitenc_debug_category ++ ++/* prototypes */ ++static void gst_spacemitenc_set_property (GObject * object, ++ guint property_id, const GValue * value, GParamSpec * pspec); ++static void gst_spacemitenc_get_property (GObject * object, ++ guint property_id, GValue * value, GParamSpec * pspec); ++static void gst_spacemitenc_finalize (GObject * object); ++static gboolean gst_spacemitenc_start (GstVideoEncoder * encoder); ++static gboolean gst_spacemitenc_stop (GstVideoEncoder * encoder); ++static gboolean gst_spacemitenc_set_format (GstVideoEncoder * encoder, ++ GstVideoCodecState * state); ++static GstFlowReturn gst_spacemitenc_handle_frame (GstVideoEncoder * encoder, ++ GstVideoCodecFrame * frame); ++static GstFlowReturn gst_spacemitenc_finish (GstVideoEncoder * encoder); ++static gboolean gst_spacemitenc_propose_allocation (GstVideoEncoder * encoder, ++ GstQuery * query); ++ ++#define DEFAULT_BITRATE (128000) ++#define DEFAULT_GOP_SIZE (90) ++#define DEFAULT_MAX_SLICE_SIZE (1500000) ++#define START_FRAMERATE 30 ++#define DEFAULT_MULTI_THREAD 0 ++#define DEFAULT_ENABLE_DENOISE FALSE ++#define DEFAULT_ENABLE_FRAME_SKIP FALSE ++#define DEFAULT_BACKGROUND_DETECTION TRUE ++#define DEFAULT_ADAPTIVE_QUANTIZATION TRUE ++#define DEFAULT_SCENE_CHANGE_DETECTION TRUE ++#define DEFAULT_SLICE_MODE GST_SPACEMIT_SLICE_MODE_N_SLICES ++#define DEFAULT_NUM_SLICES 1 ++#define DEFAULT_QP_MIN 0 ++#define DEFAULT_QP_MAX 51 ++ ++#define GST_SPM_ENC_EVENT_MUTEX(encoder) (&GST_SPACEMITENC (encoder)->event_mutex) ++#define GST_SPM_ENC_EVENT_COND(encoder) (&GST_SPACEMITENC (encoder)->event_cond) ++ ++#define GST_SPM_ENC_BROADCAST(encoder) \ ++ g_mutex_lock (GST_SPM_ENC_EVENT_MUTEX (encoder)); \ ++ g_cond_broadcast (GST_SPM_ENC_EVENT_COND (encoder)); \ ++ g_mutex_unlock (GST_SPM_ENC_EVENT_MUTEX (encoder)); ++ ++#define GST_SPM_ENC_WAIT(encoder, condition) \ ++ g_mutex_lock (GST_SPM_ENC_EVENT_MUTEX (encoder)); \ ++ while (!(condition)) \ ++ g_cond_wait (GST_SPM_ENC_EVENT_COND (encoder), \ ++ GST_SPM_ENC_EVENT_MUTEX (encoder)); \ ++ g_mutex_unlock (GST_SPM_ENC_EVENT_MUTEX (encoder)); ++ ++enum ++{ ++ PROP_0, ++ PROP_BITRATE, ++ PROP_MAX_BITRATE, ++ PROP_GOP_SIZE, ++ PROP_MAX_SLICE_SIZE, ++ PROP_MULTI_THREAD, ++ PROP_ENABLE_DENOISE, ++ PROP_ENABLE_FRAME_SKIP, ++ PROP_BACKGROUND_DETECTION, ++ PROP_ADAPTIVE_QUANTIZATION, ++ PROP_SCENE_CHANGE_DETECTION, ++ PROP_NUM_SLICES, ++ PROP_QP_MIN, ++ PROP_QP_MAX, ++ PROP_CODING_WIDTH, ++ PROP_CODING_HIGHT, ++ PROP_CODING_TYPE, ++ PROP_CODE_TYPE, ++ PROP_CODE_YUV_FORMAT, ++ N_PROPERTIES ++}; ++ ++/* pad templates */ ++ ++static GstStaticPadTemplate gst_spacemitenc_sink_template = ++GST_STATIC_PAD_TEMPLATE ("sink", ++ GST_PAD_SINK, ++ GST_PAD_ALWAYS, ++ GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE ("{I420, NV12, NV21}")) ++ ); ++ ++static GstStaticPadTemplate gst_spacemitenc_src_template = ++GST_STATIC_PAD_TEMPLATE ("src", ++ GST_PAD_SRC, ++ GST_PAD_ALWAYS, ++ GST_STATIC_CAPS ++ ( ++ "video/x-h264," ++ "stream-format = (string) byte-stream," ++ "alignment = (string)au," ++ "width=(int) [1,MAX], " "height=(int) [1,MAX]" ++ )); ++ ++/* class initialization */ ++G_DEFINE_TYPE(GstSpacemitEnc, gst_spacemitenc, GST_TYPE_VIDEO_ENCODER); ++ ++static void ++gst_spacemitenc_class_init (GstSpacemitEncClass * klass) ++{ ++ GObjectClass *gobject_class = G_OBJECT_CLASS (klass); ++ GstVideoEncoderClass *video_encoder_class = GST_VIDEO_ENCODER_CLASS (klass); ++ ++ gst_element_class_add_static_pad_template (GST_ELEMENT_CLASS (klass), ++ &gst_spacemitenc_src_template); ++ gst_element_class_add_static_pad_template (GST_ELEMENT_CLASS (klass), ++ &gst_spacemitenc_sink_template); ++ ++ gst_element_class_set_static_metadata (GST_ELEMENT_CLASS (klass), ++ "Spacemit video encoder", "Encoder/Video", "Spacemit video encoder", ++ "ZRong, zhirong.li@spacemit.com"); ++ ++ gobject_class->set_property = gst_spacemitenc_set_property; ++ gobject_class->get_property = gst_spacemitenc_get_property; ++ gobject_class->finalize = gst_spacemitenc_finalize; ++ video_encoder_class->start = GST_DEBUG_FUNCPTR (gst_spacemitenc_start); ++ video_encoder_class->stop = GST_DEBUG_FUNCPTR (gst_spacemitenc_stop); ++ video_encoder_class->set_format = ++ GST_DEBUG_FUNCPTR (gst_spacemitenc_set_format); ++ video_encoder_class->handle_frame = ++ GST_DEBUG_FUNCPTR (gst_spacemitenc_handle_frame); ++ video_encoder_class->propose_allocation = ++ GST_DEBUG_FUNCPTR (gst_spacemitenc_propose_allocation); ++ video_encoder_class->finish = GST_DEBUG_FUNCPTR (gst_spacemitenc_finish); ++ ++ /* define properties */ ++ g_object_class_install_property (gobject_class, PROP_CODING_TYPE, ++ g_param_spec_uint ("coding-type", "coding type", ++ "Format to encode", ++ CODING_H264, CODING_FWHT, CODING_H264, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CODE_TYPE, ++ g_param_spec_uint ("code-type", "code type", ++ "Codec selection to work", ++ CODEC_OPENH264, CODEC_V4L2, CODEC_SFOMX, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CODING_WIDTH, ++ g_param_spec_uint ("coding-width", "coding width", ++ "image width to encode", ++ 0, 3840, 1280, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CODING_HIGHT, ++ g_param_spec_uint ("code-hight", "code hight", ++ "image hight to encode", ++ 0, 2160, 720, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CODE_YUV_FORMAT, ++ g_param_spec_uint ("code-yuv-format", "code yuv format", ++ "ENcode the yuv format", ++ PIXEL_FORMAT_UNKNOWN, PIXEL_FORMAT_MAX - 1, PIXEL_FORMAT_I420, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++} ++ ++static void ++gst_spacemitenc_init (GstSpacemitEnc * thiz) ++{ ++ thiz->width = 1280; ++ thiz->height = 720; ++ thiz->eCodecType = CODEC_SFOMX; ++ thiz->eCodingType = CODING_H264; ++ thiz->PixelFormat = PIXEL_FORMAT_I420; ++ thiz->gop_size = DEFAULT_GOP_SIZE; ++ thiz->multi_thread = DEFAULT_MULTI_THREAD; ++ thiz->max_slice_size = DEFAULT_MAX_SLICE_SIZE; ++ thiz->bitrate = DEFAULT_BITRATE; ++ thiz->qp_min = DEFAULT_QP_MIN; ++ thiz->qp_max = DEFAULT_QP_MAX; ++ thiz->framerate = START_FRAMERATE; ++ thiz->input_state = NULL; ++ thiz->time_per_frame = GST_SECOND / thiz->framerate; ++ thiz->frame_count = 0; ++ thiz->previous_timestamp = 0; ++ thiz->enable_denoise = DEFAULT_ENABLE_DENOISE; ++ thiz->enable_frame_skip = DEFAULT_ENABLE_FRAME_SKIP; ++ thiz->background_detection = DEFAULT_BACKGROUND_DETECTION; ++ thiz->adaptive_quantization = DEFAULT_ADAPTIVE_QUANTIZATION; ++ thiz->scene_change_detection = DEFAULT_SCENE_CHANGE_DETECTION; ++ thiz->slice_mode = DEFAULT_SLICE_MODE; ++ thiz->num_slices = DEFAULT_NUM_SLICES; ++ thiz->bitrate_changed = FALSE; ++ thiz->max_bitrate_changed = FALSE; ++ ++ thiz->ctx = NULL; ++ thiz->para = NULL; ++ thiz->mppframe = NULL; ++ thiz->mpppacket = NULL; ++} ++ ++void ++gst_spacemitenc_set_property (GObject * object, guint property_id, ++ const GValue * value, GParamSpec * pspec) ++{ ++ GstSpacemitEnc *thiz = GST_SPACEMITENC (object); ++ ++ GST_DEBUG_OBJECT (thiz, "ZRong ------------------- set_property: %d", property_id); ++ ++ switch (property_id) { ++ case PROP_BITRATE: ++ GST_OBJECT_LOCK (thiz); ++ if (thiz->bitrate != g_value_get_uint (value)) { ++ thiz->bitrate = g_value_get_uint (value); ++ thiz->bitrate_changed = TRUE; ++ } ++ GST_OBJECT_UNLOCK (thiz); ++ break; ++ ++ case PROP_MAX_BITRATE: ++ GST_OBJECT_LOCK (thiz); ++ if (thiz->max_bitrate != g_value_get_uint (value)) { ++ thiz->max_bitrate = g_value_get_uint (value); ++ thiz->max_bitrate_changed = TRUE; ++ } ++ GST_OBJECT_UNLOCK (thiz); ++ break; ++ ++ case PROP_QP_MIN: ++ thiz->qp_min = g_value_get_uint (value); ++ break; ++ ++ case PROP_QP_MAX: ++ thiz->qp_max = g_value_get_uint (value); ++ break; ++ ++ case PROP_MULTI_THREAD: ++ thiz->multi_thread = g_value_get_uint (value); ++ break; ++ case PROP_ENABLE_DENOISE: ++ thiz->enable_denoise = g_value_get_boolean (value); ++ break; ++ ++ case PROP_ENABLE_FRAME_SKIP: ++ thiz->enable_frame_skip = g_value_get_boolean (value); ++ break; ++ case PROP_GOP_SIZE: ++ thiz->gop_size = g_value_get_uint (value); ++ GST_DEBUG_OBJECT (thiz, "ZRong ------------------- set_property: %d", thiz->gop_size); ++ break; ++ ++ case PROP_MAX_SLICE_SIZE: ++ thiz->max_slice_size = g_value_get_uint (value); ++ break; ++ case PROP_BACKGROUND_DETECTION: ++ thiz->background_detection = g_value_get_boolean (value); ++ break; ++ ++ case PROP_ADAPTIVE_QUANTIZATION: ++ thiz->adaptive_quantization = g_value_get_boolean (value); ++ break; ++ ++ case PROP_SCENE_CHANGE_DETECTION: ++ thiz->scene_change_detection = g_value_get_boolean (value); ++ break; ++ case PROP_NUM_SLICES: ++ thiz->num_slices = g_value_get_uint (value); ++ break; ++ case PROP_CODING_WIDTH: ++ thiz->width = g_value_get_uint (value); ++ break; ++ case PROP_CODING_HIGHT: ++ thiz->height = g_value_get_uint (value); ++ break; ++ case PROP_CODING_TYPE: ++ thiz->eCodingType = g_value_get_uint (value); ++ break; ++ case PROP_CODE_TYPE: ++ thiz->eCodecType = g_value_get_uint (value); ++ break; ++ case PROP_CODE_YUV_FORMAT: ++ thiz->PixelFormat = g_value_get_uint (value); ++ break; ++ default: ++ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); ++ break; ++ } ++} ++ ++void ++gst_spacemitenc_get_property (GObject * object, guint property_id, ++ GValue * value, GParamSpec * pspec) ++{ ++ GstSpacemitEnc *thiz = GST_SPACEMITENC (object); ++ ++ GST_DEBUG_OBJECT (thiz, "ZRong ------------------- get_property: %d", property_id); ++ ++ switch (property_id) { ++ case PROP_BITRATE: ++ g_value_set_uint (value, thiz->bitrate); ++ break; ++ ++ case PROP_MAX_BITRATE: ++ g_value_set_uint (value, thiz->max_bitrate); ++ break; ++ ++ case PROP_QP_MIN: ++ g_value_set_uint (value, thiz->qp_min); ++ break; ++ ++ case PROP_QP_MAX: ++ g_value_set_uint (value, thiz->qp_max); ++ break; ++ ++ case PROP_ENABLE_DENOISE: ++ g_value_set_boolean (value, thiz->enable_denoise); ++ break; ++ ++ case PROP_ENABLE_FRAME_SKIP: ++ g_value_set_boolean (value, thiz->enable_frame_skip); ++ break; ++ ++ case PROP_MULTI_THREAD: ++ g_value_set_uint (value, thiz->multi_thread); ++ break; ++ ++ case PROP_GOP_SIZE: ++ GST_DEBUG_OBJECT (thiz, "ZRong ------------------- gop_size: %d", thiz->gop_size); ++ g_value_set_uint (value, thiz->gop_size); ++ break; ++ case PROP_MAX_SLICE_SIZE: ++ g_value_set_uint (value, thiz->max_slice_size); ++ break; ++ case PROP_BACKGROUND_DETECTION: ++ g_value_set_boolean (value, thiz->background_detection); ++ break; ++ case PROP_ADAPTIVE_QUANTIZATION: ++ g_value_set_boolean (value, thiz->adaptive_quantization); ++ break; ++ case PROP_SCENE_CHANGE_DETECTION: ++ g_value_set_boolean (value, thiz->scene_change_detection); ++ break; ++ case PROP_NUM_SLICES: ++ g_value_set_uint (value, thiz->num_slices); ++ break; ++ case PROP_CODING_WIDTH: ++ g_value_set_uint (value, thiz->width); ++ break; ++ case PROP_CODING_HIGHT: ++ g_value_set_uint (value, thiz->height); ++ break; ++ case PROP_CODING_TYPE: ++ g_value_set_uint (value, thiz->eCodingType); ++ break; ++ case PROP_CODE_TYPE: ++ g_value_set_uint (value, thiz->eCodecType); ++ break; ++ case PROP_CODE_YUV_FORMAT: ++ g_value_set_uint (value, thiz->PixelFormat); ++ break; ++ default: ++ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); ++ break; ++ } ++} ++ ++void ++gst_spacemitenc_finalize (GObject * object) ++{ ++ GstSpacemitEnc *thiz = GST_SPACEMITENC (object); ++ ++ GST_DEBUG_OBJECT (thiz, "finalize"); ++ ++ /* clean up object here */ ++ if (thiz->input_state) ++ gst_video_codec_state_unref (thiz->input_state); ++ ++ thiz->input_state = NULL; ++ ++ G_OBJECT_CLASS (gst_spacemitenc_parent_class)->finalize (object); ++} ++FILE *fbbb; ++ ++static gboolean ++gst_spacemitenc_start (GstVideoEncoder * encoder) ++{ ++ int ret = 0; ++ GstSpacemitEnc *thiz = GST_SPACEMITENC (encoder); ++ GST_DEBUG_OBJECT (thiz, "start"); ++ ++ if(thiz->ctx) ++ { ++ VENC_DestoryChannel(thiz->ctx); ++ thiz->ctx = NULL; ++ } ++ thiz->ctx = VENC_CreateChannel(); ++ thiz->ctx->eCodecType = thiz->eCodecType; ++ ++ thiz->mpppacket = PACKET_Create (); ++ if (!thiz->mpppacket) ++ goto alloc_err; ++ ++ thiz->mppframe = FRAME_Create (); ++ if (!thiz->mppframe) ++ goto alloc_err; ++ ++ GST_ERROR_OBJECT (thiz, "ZRong ------------------------- finish start!!!"); ++ g_mutex_init (&thiz->event_mutex); ++ g_cond_init (&thiz->event_cond); ++ thiz->pending_frames = 0; ++ fbbb = fopen("/tmp/out.yuv", "ab+"); ++ ++ return TRUE; ++ ++alloc_err: ++ GST_ERROR_OBJECT (thiz, "can not alloc for mpp structure, please check !"); ++ return FALSE; ++} ++ ++static gboolean ++gst_spacemitenc_stop (GstVideoEncoder * encoder) ++{ ++ GstSpacemitEnc *thiz = GST_SPACEMITENC (encoder); ++ ++ GST_ERROR_OBJECT (thiz, "xxxxxxxxxxxxxx stop start"); ++ fclose(fbbb); ++ ++ if(thiz->ctx) ++ { ++ VENC_DestoryChannel(thiz->ctx); ++ thiz->ctx = NULL; ++ } ++ FRAME_Destory(thiz->mppframe); ++ PACKET_Destory(thiz->mpppacket); ++ VENC_DestoryChannel(thiz->ctx); ++ ++ if (thiz->input_state) { ++ gst_video_codec_state_unref (thiz->input_state); ++ } ++ thiz->input_state = NULL; ++ if (!(gst_pad_get_task_state ((encoder)->srcpad) == GST_TASK_STARTED)) ++ return TRUE; ++ ++ GST_DEBUG_OBJECT (thiz, "spacemit_enc_stop called"); ++ ++ gst_pad_stop_task (encoder->srcpad); ++ g_cond_clear (&thiz->event_cond); ++ g_mutex_clear (&thiz->event_mutex); ++ ++ return TRUE; ++} ++static void ++gst_spacemitenc_loop (GstVideoEncoder * encoder) ++{ ++ GstVideoCodecFrame * frame = NULL; ++ GstSpacemitEnc *thiz = GST_SPACEMITENC (encoder); ++ int ret; ++ GstFlowReturn flow_status; ++ GstMapInfo map; ++ static int lenght = 0; ++ ++ GST_SPM_ENC_WAIT (encoder, thiz->pending_frames || thiz->at_eos); ++ GST_VIDEO_ENCODER_STREAM_LOCK (encoder); ++ ++ ret = VENC_RequestOutputStreamBuffer(thiz->ctx, PACKET_GetBaseData(thiz->mpppacket)); ++ if (ret == MPP_CODER_NO_DATA) ++ goto sf_no_frame; ++ else if (ret == MPP_CODER_EOS) ++ goto finish_work; ++ ++ frame = gst_video_encoder_get_oldest_frame (encoder); ++ if (!frame) { ++ GST_ERROR_OBJECT (thiz, "ZRong ------------------- finish last frame"); ++ thiz->downstream_flow_ret = GST_FLOW_EOS; ++ GstBuffer *buffer; ++ buffer = ++ gst_buffer_new_wrapped (g_memdup (PACKET_GetDataPointer(thiz->mpppacket), ++ PACKET_GetLength(thiz->mpppacket)), PACKET_GetLength(thiz->mpppacket)); ++ flow_status = gst_pad_push (GST_VIDEO_ENCODER_SRC_PAD (thiz), buffer); ++ GST_ERROR_OBJECT (thiz, "ZRong ------------------- finish last frame %d", flow_status); ++ ++ VENC_ReturnOutputStreamBuffer (thiz->ctx, PACKET_GetBaseData(thiz->mpppacket)); ++ goto done; ++ } ++ thiz->pending_frames--; ++ GST_SPM_ENC_BROADCAST (encoder); ++ ++ //if (videoFrameTypeIDR == frame_info.eFrameType) { ++ // GST_VIDEO_CODEC_FRAME_SET_SYNC_POINT (frame); ++ //} else { ++ GST_VIDEO_CODEC_FRAME_UNSET_SYNC_POINT (frame); ++ //} ++ frame->output_buffer = ++ gst_video_encoder_allocate_output_buffer (encoder, PACKET_GetLength(thiz->mpppacket)); ++ gst_buffer_map (frame->output_buffer, &map, GST_MAP_WRITE); ++ ++ lenght += PACKET_GetLength(thiz->mpppacket); ++ GST_ERROR_OBJECT (thiz, "ZRong ------------------- finish one frame, %d, %d", lenght, PACKET_GetLength(thiz->mpppacket)); ++ memcpy (map.data, PACKET_GetDataPointer(thiz->mpppacket), PACKET_GetLength(thiz->mpppacket)); ++ ++ gst_buffer_unmap (frame->output_buffer, &map); ++ VENC_ReturnOutputStreamBuffer (thiz->ctx, PACKET_GetBaseData(thiz->mpppacket)); ++ ++ thiz->downstream_flow_ret = gst_video_encoder_finish_frame (encoder, frame); ++done: ++ GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); ++ return; ++ ++sf_no_frame: ++{ ++ GST_LOG_OBJECT (thiz, ++ "No out frame to request%d!", thiz->pending_frames); ++ thiz->downstream_flow_ret = GST_FLOW_OK; ++ goto done; ++} ++finish_work: ++{ ++ GST_DEBUG_OBJECT (thiz, ++ "Get eos, Finished work and paused task!"); ++ gst_pad_pause_task (GST_VIDEO_ENCODER_SRC_PAD (thiz)); ++ thiz->downstream_flow_ret = GST_FLOW_EOS; ++ goto done; ++} ++ ++} ++ ++static MppPixelFormat gst_change_to_mpp_format(GstVideoFormat format) ++{ ++ MppPixelFormat pixel; ++ ++ switch (format) { ++ case GST_VIDEO_FORMAT_I420: ++ pixel = PIXEL_FORMAT_I420; ++ break; ++ case GST_VIDEO_FORMAT_NV21: ++ pixel = PIXEL_FORMAT_NV21; ++ break; ++ case GST_VIDEO_FORMAT_NV12: ++ pixel = PIXEL_FORMAT_NV12; ++ break; ++ default: ++ pixel = PIXEL_FORMAT_UNKNOWN; ++ break; ++ } ++ return pixel; ++} ++ ++static MppCodingType ++gst_change_mpp_ecoding_type (GstStructure * s) ++{ ++ if (gst_structure_has_name (s, "video/x-h264")) ++ return CODING_H264; ++ ++ if (gst_structure_has_name (s, "video/x-h265")) ++ return CODING_H265; ++ ++ return CODING_UNKNOWN; ++} ++static gboolean ++gst_spacemitenc_init_encoder (GstSpacemitEnc * thiz, ++ GstVideoCodecState * state, GstCaps *caps) ++{ ++ guint width, height, fps_n, fps_d; ++ int stride0, stride1, stride2; ++ GstStructure *structure; ++ GstVideoFormat fmt; ++ gboolean ret = TRUE; ++ ++ width = GST_VIDEO_INFO_WIDTH (&state->info); ++ height = GST_VIDEO_INFO_HEIGHT (&state->info); ++ fps_n = GST_VIDEO_INFO_FPS_N (&state->info); ++ fps_d = GST_VIDEO_INFO_FPS_D (&state->info); ++ stride0 = GST_VIDEO_INFO_PLANE_STRIDE (&state->info, 0); ++ stride1 = GST_VIDEO_INFO_PLANE_STRIDE (&state->info, 1); ++ stride2 = GST_VIDEO_INFO_PLANE_STRIDE (&state->info, 2); ++ fmt = GST_VIDEO_INFO_FORMAT (&state->info); ++ ++ structure = gst_caps_get_structure (caps, 0); ++ thiz->eCodingType = gst_change_mpp_ecoding_type (structure); ++ if (thiz->eCodingType == CODING_UNKNOWN) { ++ GST_ERROR_OBJECT(thiz, "no support this eCodingType"); ++ return FALSE; ++ } ++ thiz->PixelFormat = gst_change_to_mpp_format(fmt); ++ if(thiz->PixelFormat == PIXEL_FORMAT_UNKNOWN) { ++ GST_ERROR_OBJECT(thiz, "no support this PixelFormat output"); ++ return FALSE; ++ } ++ GST_ERROR_OBJECT (thiz, "ZRong -------------------- init_encoder %d, %d (%d %d) (%d, %d)", ++ width, height, fps_n, fps_d, thiz->eCodingType, thiz->PixelFormat); ++ ++ thiz->ctx->stVencPara.eCodingType = thiz->eCodingType; ++ thiz->ctx->stVencPara.nWidth = width; ++ thiz->ctx->stVencPara.nHeight = height; ++ thiz->ctx->stVencPara.PixelFormat = thiz->PixelFormat; ++ thiz->ctx->stVencPara.nBitrate = 5000000; ++ thiz->ctx->stVencPara.nFrameRate = fps_n; ++ thiz->ctx->stVencPara.nStride = width; ++ ++ ret = VENC_Init(thiz->ctx); ++ if (ret) ++ goto init_err; ++ ++ VENC_SetParam(thiz->ctx, &(thiz->ctx->stVencPara)); ++ ++ GST_ERROR_OBJECT (thiz, "ZRong ----------------------- set format %u, (%d %d %d) %s", ++ fmt, stride0, stride1, stride2, gst_video_format_to_string (fmt)); ++ ++ return TRUE; ++ ++init_err: ++ GST_ERROR_OBJECT (thiz, "Mpp vnec init error, please check !"); ++ return FALSE; ++} ++ ++ ++static gboolean ++gst_spacemitenc_set_format (GstVideoEncoder * encoder, ++ GstVideoCodecState * state) ++{ ++ GstSpacemitEnc *thiz = GST_SPACEMITENC (encoder); ++ GstCaps *outcaps; ++ GstCaps *caps; ++ ++ GST_DEBUG_OBJECT (thiz, "ZRong ------------------------- start set format: %s", ++ gst_caps_to_string (state->caps)); ++ ++ if (thiz->input_state) ++ gst_video_codec_state_unref (thiz->input_state); ++ ++ thiz->frame_count = 0; ++ thiz->input_state = gst_video_codec_state_ref (state); ++ outcaps = ++ gst_caps_copy (gst_static_pad_template_get_caps ++ (&gst_spacemitenc_src_template)); ++ ++ caps = gst_pad_peer_query_caps (encoder->srcpad, outcaps); ++ GST_DEBUG_OBJECT (thiz, "Returning caps %" GST_PTR_FORMAT, caps); ++ ++ if (!gst_spacemitenc_init_encoder(thiz, state, caps)) ++ return FALSE; ++ ++ if (!gst_video_encoder_set_output_state (encoder, outcaps, state)) ++ return FALSE; ++ ++ if (!gst_video_encoder_negotiate (encoder)) ++ return FALSE; ++ ++ if (!gst_pad_get_task_state ((encoder)->srcpad) == GST_TASK_STARTED) { ++ GST_DEBUG_OBJECT (thiz, "ZRong ------------------------- start enc thread"); ++ gst_pad_start_task (encoder->srcpad, ++ (GstTaskFunction) gst_spacemitenc_loop, encoder, NULL); ++ } ++ ++ return TRUE; ++} ++ ++static gboolean ++gst_spacemitenc_propose_allocation (GstVideoEncoder * encoder, GstQuery * query) ++{ ++ gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL); ++ ++ return ++ GST_VIDEO_ENCODER_CLASS ++ (gst_spacemitenc_parent_class)->propose_allocation (encoder, query); ++} ++ ++static GstFlowReturn ++gst_spacemitenc_handle_frame (GstVideoEncoder * encoder, ++ GstVideoCodecFrame * frame) ++{ ++ GstSpacemitEnc *thiz = GST_SPACEMITENC (encoder); ++ GstVideoFrame video_frame; ++ gboolean force_keyframe; ++ gint ret; ++ gfloat fps; ++ gint i, j; ++ gsize buf_length = 0; ++ GstFlowReturn flow_ret; ++ GST_DEBUG_OBJECT (thiz, "ZRong ------------- start handle frame"); ++ ++ if (thiz->downstream_flow_ret != GST_FLOW_OK) { ++ goto downstream_err; ++ } ++ ++ GST_OBJECT_LOCK (thiz); ++ if (thiz->bitrate_changed || thiz->max_bitrate_changed) { ++ GST_WARNING_OBJECT (thiz, ++ "Error need to support this cases"); ++ } ++ GST_OBJECT_UNLOCK (thiz); ++ ++ if (frame) { ++ thiz->frame_count++; ++ gst_video_frame_map (&video_frame, &thiz->input_state->info, ++ frame->input_buffer, GST_MAP_READ); ++ if (thiz->PixelFormat == PIXEL_FORMAT_I420) { ++ FRAME_SetDataUsedNum(thiz->mppframe, 3); ++ FRAME_SetDataPointer(thiz->mppframe, 0, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 0)); ++ FRAME_SetDataPointer(thiz->mppframe, 1, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 1)); ++ FRAME_SetDataPointer(thiz->mppframe, 2, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 2)); ++ } else { ++ GST_DEBUG_OBJECT (thiz, "ZRong ------------- be save %s, %d, (%d, %d, %d)", ++ gst_video_format_to_string(GST_VIDEO_FRAME_FORMAT(&video_frame)), ++ GST_VIDEO_FRAME_N_PLANES(&video_frame), GST_VIDEO_FRAME_SIZE(&video_frame), ++ GST_VIDEO_FRAME_WIDTH(&video_frame), GST_VIDEO_FRAME_HEIGHT(&video_frame)); ++ ++ FRAME_SetDataUsedNum(thiz->mppframe, 2); ++ FRAME_SetDataPointer(thiz->mppframe, 0, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 0)); ++ FRAME_SetDataPointer(thiz->mppframe, 1, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 1)); ++#if 0 ++ //SF_OMX_BUF_INFO *pBufInfo = pOMXBuffer->pOutputPortPrivate; ++ //LOG(SF_LOG_INFO, "%p %d %p\r\n", pOMXBuffer->pBuffer, pOMXBuffer->nFilledLen, pBufInfo->remap_vaddr); ++ ++ fwrite(GST_VIDEO_FRAME_COMP_DATA (&video_frame, 0), 1, 1280*720, fbbb); ++ fwrite(GST_VIDEO_FRAME_COMP_DATA (&video_frame, 1), 1, 1280*720/2, fbbb); ++ ++#endif ++ ++ } ++ } else { ++ FRAME_SetEos(thiz->mppframe, 1); ++ } ++ GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); ++ ret = VENC_Encode(thiz->ctx, FRAME_GetBaseData(thiz->mppframe)); ++ GST_VIDEO_ENCODER_STREAM_LOCK (encoder); ++ if (ret != 0) { ++ GST_ELEMENT_ERROR (thiz, STREAM, ENCODE, ++ ("Could not encode frame"), ("Spacemit returned %d", ret)); ++ flow_ret = GST_FLOW_ERROR; ++ if (frame) ++ goto release_frame; ++ else ++ goto done; ++ } ++ if (!frame) ++ goto flow_eos; ++ ++ /* Avoid holding too much frames */ ++ GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); ++ GST_SPM_ENC_WAIT (encoder, ++ thiz->pending_frames < SPM_PENDING_MAX); ++ GST_VIDEO_ENCODER_STREAM_LOCK (encoder); ++ thiz->pending_frames++; ++ GST_SPM_ENC_BROADCAST (encoder); ++ ++ flow_ret = thiz->downstream_flow_ret; ++ GST_DEBUG_OBJECT (thiz, "ZRong ------------- handle frame start: %d, %d", thiz->pending_frames, thiz->frame_count); ++ ++ if (frame) ++ goto release_frame; ++ ++done: ++ return flow_ret; ++downstream_err: ++{ ++ GST_ERROR_OBJECT (thiz, "Downstream returned %s", ++ gst_flow_get_name (thiz->downstream_flow_ret)); ++ flow_ret = thiz->downstream_flow_ret; ++ goto done; ++} ++release_frame: ++{ ++ gst_video_frame_unmap (&video_frame); ++ gst_video_codec_frame_unref (frame); ++ goto done; ++} ++flow_eos: ++{ ++ GST_DEBUG_OBJECT (thiz, "Get an eos, exit handle"); ++ flow_ret = GST_FLOW_EOS; ++ goto done; ++} ++#if 0 ++ /* FIXME: spacemit has no way for us to get a connection ++ * between the input and output frames, we just have to ++ * guess based on the input */ ++ frame = gst_video_encoder_get_oldest_frame (encoder); ++ if (!frame) { ++ GST_ELEMENT_ERROR (thiz, STREAM, ENCODE, ++ ("Could not encode frame"), ("thiz returned %d", ret)); ++ gst_video_codec_frame_unref (frame); ++ return GST_FLOW_ERROR; ++ } ++ ret = VENC_RequestOutputStreamBuffer(thiz->ctx, PACKET_GetBaseData(thiz->mpppacket)); ++ if (ret) { ++ GST_ELEMENT_ERROR (thiz, STREAM, ENCODE, ++ ("Could not encode frame"), ("thiz returned %d", ret)); ++ gst_video_codec_frame_unref (frame); ++ return GST_FLOW_ERROR; ++ } ++ ++ if (videoFrameTypeIDR == frame_info.eFrameType) { ++ GST_VIDEO_CODEC_FRAME_SET_SYNC_POINT (frame); ++ } else { ++ GST_VIDEO_CODEC_FRAME_UNSET_SYNC_POINT (frame); ++ } ++ ++ frame->output_buffer = ++ gst_video_encoder_allocate_output_buffer (encoder, PACKET_GetLength(thiz->mpppacket)); ++ gst_buffer_map (frame->output_buffer, &map, GST_MAP_WRITE); ++ ++ memcpy (map.data, PACKET_GetDataPointer(thiz->mpppacket), PACKET_GetLength(thiz->mpppacket)); ++ ++ gst_buffer_unmap (frame->output_buffer, &map); ++ VENC_ReturnOutputStreamBuffer (thiz->ctx, PACKET_GetBaseData(thiz->mpppacket)); ++ ++ GstFlowReturn flow_status = gst_video_encoder_finish_frame (encoder, frame); ++ ++ GST_ERROR_OBJECT (thiz, "ZRong ----------------------- handle frame finish"); ++ ++ return flow_status; ++#endif ++} ++ ++static GstFlowReturn ++gst_spacemitenc_finish (GstVideoEncoder * encoder) ++{ ++ GstSpacemitEnc *thiz = GST_SPACEMITENC (encoder); ++ ++ if (thiz->frame_count == 0) ++ return GST_FLOW_OK; ++ ++ /* Drain encoder */ ++ while ((gst_spacemitenc_handle_frame (encoder, NULL)) == GST_FLOW_OK); ++ ++ GST_DEBUG_OBJECT(thiz, "finish test3"); ++ thiz->at_eos = TRUE; ++ ++ GST_SPM_ENC_BROADCAST (encoder); ++ ++ GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); ++ /* Wait for task thread to pause */ ++ GstTask *task = encoder->srcpad->task; ++ if (task) { ++ //GST_OBJECT_LOCK (task); ++ while (GST_TASK_STATE (task) == GST_TASK_STARTED) { ++ GST_DEBUG_OBJECT(thiz, "finish test4"); ++ g_usleep(400 * 1000); ++ //GST_TASK_WAIT (task); ++ } ++ GST_DEBUG_OBJECT(thiz, "finish test5"); ++ //GST_OBJECT_UNLOCK (task); ++ } ++ GST_VIDEO_ENCODER_STREAM_LOCK (encoder); ++ ++ GST_DEBUG_OBJECT(thiz, "finish test2"); ++ ++ return GST_FLOW_OK; ++} ++ ++gboolean ++gst_spacemitenc_register (GstPlugin * plugin, guint rank) ++{ ++ GST_DEBUG_CATEGORY_INIT (gst_spacemitenc_debug_category, "spacemitenc", 0, ++ "debug category for spacemitenc element"); ++ ++ return gst_element_register (plugin, "spacemitenc", rank, ++ GST_TYPE_SPACEMITENC); ++} ++ +diff --git a/ext/spacemit/spacemitcodec/gstspacemitenc.h b/ext/spacemit/spacemitcodec/gstspacemitenc.h +new file mode 100755 +index 0000000..ef19791 +--- /dev/null ++++ b/ext/spacemit/spacemitcodec/gstspacemitenc.h +@@ -0,0 +1,105 @@ ++ ++/* ++ * GstSpacemitEnc ++*/ ++ ++#ifndef _GST_SPACEMITENC_H_ ++#define _GST_SPACEMITENC_H_ ++ ++#include ++#include ++ ++//#include ++//#include ++//#include ++//#include ++ ++#include ++ ++G_BEGIN_DECLS ++ ++typedef enum _GstSpacemitencDeblockingMode ++{ ++ GST_SPACEMIT_DEBLOCKING_ON = 0, ++ GST_SPACEMIT_DEBLOCKING_OFF = 1, ++ GST_SPACEMIT_DEBLOCKING_NOT_SLICE_BOUNDARIES = 2 ++} GstSpacemitencDeblockingMode; ++ ++typedef enum ++{ ++ GST_SPACEMIT_SLICE_MODE_N_SLICES = 1, /* SM_FIXEDSLCNUM_SLICE */ ++ GST_SPACEMIT_SLICE_MODE_AUTO = 5 /* former SM_AUTO_SLICE */ ++} GstSpacemitEncSliceMode; ++ ++#define GST_TYPE_SPACEMITENC (gst_spacemitenc_get_type()) ++#define GST_SPACEMITENC(obj) (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_SPACEMITENC,GstSpacemitEnc)) ++#define GST_SPACEMITENC_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_SPACEMITENC,GstSpacemitEncClass)) ++#define GST_IS_SPACEMITENC(obj) (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_SPACEMITENC)) ++#define GST_IS_SPACEMITENC_CLASS(obj) (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_SPACEMITENC)) ++ ++//#define GST_MPP_ENC(obj) (G_TYPE_CHECK_INSTANCE_CAST((obj), \ ++// GST_TYPE_SPACEMITENC, GstSpacemitEnc)) ++ ++typedef struct _GstSpacemitEnc GstSpacemitEnc; ++typedef struct _GstSpacemitEncClass GstSpacemitEncClass; ++ ++struct _GstSpacemitEnc ++{ ++ GstVideoEncoder base_spacemitenc; ++ ++ /*< private >*/ ++ //ISVCEncoder *encoder; ++// EUsageType usage_type; ++ guint gop_size; ++// RC_MODES rate_control; ++ guint max_slice_size; ++ guint bitrate; ++ guint max_bitrate; ++ guint qp_min; ++ guint qp_max; ++ guint framerate; ++ guint multi_thread; ++ gboolean enable_denoise; ++ gboolean enable_frame_skip; ++ GstVideoCodecState *input_state; ++ guint64 time_per_frame; ++ guint64 frame_count; ++ guint64 previous_timestamp; ++ GstSpacemitencDeblockingMode deblocking_mode; ++ gboolean background_detection; ++ gboolean adaptive_quantization; ++ gboolean scene_change_detection; ++ GstSpacemitEncSliceMode slice_mode; ++ guint num_slices; ++// ECOMPLEXITY_MODE complexity; ++ gboolean bitrate_changed; ++ gboolean max_bitrate_changed; ++ ++// GMutex mutex; ++ guint width; ++ guint height; ++ guint eCodecType; ++ guint eCodingType; ++ GMutex event_mutex; ++ GCond event_cond; ++ MppPixelFormat PixelFormat; ++ GstFlowReturn downstream_flow_ret; ++ guint pending_frames; ++ gboolean at_eos; ++ MppVencCtx *ctx; ++ MppVencPara *para; ++ MppPacket *mpppacket; ++ MppFrame *mppframe; ++}; ++ ++struct _GstSpacemitEncClass ++{ ++ GstVideoEncoderClass base_spacemitenc_class; ++}; ++ ++GType gst_spacemitenc_get_type(void); ++gboolean ++gst_spacemitenc_register (GstPlugin * plugin, guint rank); ++ ++G_END_DECLS ++#endif +diff --git a/ext/spacemit/spacemitcodec/gstspacemitenc_bak.c b/ext/spacemit/spacemitcodec/gstspacemitenc_bak.c +new file mode 100755 +index 0000000..dff86f9 +--- /dev/null ++++ b/ext/spacemit/spacemitcodec/gstspacemitenc_bak.c +@@ -0,0 +1,1022 @@ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++#include ++ ++#include "gstspacemitenc.h" ++ ++#include ++#include ++#include ++#include ++#include ++#define SPM_PENDING_MAX 8 /* Max number of MPP pending frame */ ++ ++GST_DEBUG_CATEGORY_STATIC (gst_spacemitenc_debug_category); ++#define GST_CAT_DEFAULT gst_spacemitenc_debug_category ++ ++/* FIXME: we should not really directly use the enums from the spacemit API ++ * here, since it might change or be removed */ ++/* ++#define GST_TYPE_USAGE_TYPE (gst_spacemitenc_usage_type_get_type ()) ++static GType ++gst_spacemitenc_usage_type_get_type (void) ++{ ++ static GType usage_type = 0; ++ ++ if (!usage_type) { ++ static const GEnumValue usage_types[] = { ++ {CAMERA_VIDEO_REAL_TIME, "video from camera", "camera"}, ++ {SCREEN_CONTENT_REAL_TIME, "screen content", "screen"}, ++ {0, NULL, NULL}, ++ }; ++ ++ usage_type = g_enum_register_static ("EUsageType", usage_types); ++ } ++ ++ return usage_type; ++} ++*/ ++/* ++#define GST_TYPE_RC_MODES (gst_spacemitenc_rc_modes_get_type ()) ++static GType ++gst_spacemitenc_rc_modes_get_type (void) ++{ ++ static GType rc_modes_type = 0; ++ ++ if (!rc_modes_type) { ++ static const GEnumValue rc_modes_types[] = { ++ {RC_QUALITY_MODE, "Quality mode", "quality"}, ++ {RC_BITRATE_MODE, "Bitrate mode", "bitrate"}, ++ {RC_BUFFERBASED_MODE, "No bitrate control, just using buffer status", ++ "buffer"}, ++ {RC_OFF_MODE, "Rate control off mode", "off"}, ++ {0, NULL, NULL}, ++ }; ++ ++ rc_modes_type = g_enum_register_static ("RC_MODES", rc_modes_types); ++ } ++ ++ return rc_modes_type; ++} ++*/ ++/* ++#define GST_TYPE_SPACEMITENC_DEBLOCKING_MODE (gst_spacemitenc_deblocking_mode_get_type ()) ++static GType ++gst_spacemitenc_deblocking_mode_get_type (void) ++{ ++ static const GEnumValue types[] = { ++ {GST_SPACEMIT_DEBLOCKING_ON, "Deblocking on", "on"}, ++ {GST_SPACEMIT_DEBLOCKING_OFF, "Deblocking off", "off"}, ++ {GST_SPACEMIT_DEBLOCKING_NOT_SLICE_BOUNDARIES, ++ "Deblocking on, except for slice boundaries", "not-slice-boundaries"}, ++ {0, NULL, NULL}, ++ }; ++ static gsize id = 0; ++ ++ if (g_once_init_enter (&id)) { ++ GType _id = g_enum_register_static ("GstSpacemitencDeblockingModes", types); ++ g_once_init_leave (&id, _id); ++ } ++ ++ return (GType) id; ++} ++*/ ++/* ++#define GST_TYPE_SPACEMITENC_SLICE_MODE (gst_spacemitenc_slice_mode_get_type ()) ++static GType ++gst_spacemitenc_slice_mode_get_type (void) ++{ ++ static const GEnumValue types[] = { ++ {GST_SPACEMIT_SLICE_MODE_N_SLICES, "Fixed number of slices", "n-slices"}, ++ {GST_SPACEMIT_SLICE_MODE_AUTO, ++ "Number of slices equal to number of threads", "auto"}, ++ {0, NULL, NULL}, ++ }; ++ static gsize id = 0; ++ ++ if (g_once_init_enter (&id)) { ++ GType _id = g_enum_register_static ("GstSpacemitEncSliceModes", types); ++ g_once_init_leave (&id, _id); ++ } ++ ++ return (GType) id; ++} ++*/ ++/* ++#define GST_TYPE_SPACEMITENC_COMPLEXITY (gst_spacemitenc_complexity_get_type ()) ++static GType ++gst_spacemitenc_complexity_get_type (void) ++{ ++ static const GEnumValue types[] = { ++ {LOW_COMPLEXITY, "Low complexity / high speed encoding", "low"}, ++ {MEDIUM_COMPLEXITY, "Medium complexity / medium speed encoding", "medium"}, ++ {HIGH_COMPLEXITY, "High complexity / low speed encoding", "high"}, ++ {0, NULL, NULL}, ++ }; ++ static gsize id = 0; ++ ++ if (g_once_init_enter (&id)) { ++ GType _id = g_enum_register_static ("GstSpacemitencComplexity", types); ++ g_once_init_leave (&id, _id); ++ } ++ ++ return (GType) id; ++} ++*/ ++/* prototypes */ ++ ++static void gst_spacemitenc_set_property (GObject * object, ++ guint property_id, const GValue * value, GParamSpec * pspec); ++static void gst_spacemitenc_get_property (GObject * object, ++ guint property_id, GValue * value, GParamSpec * pspec); ++static void gst_spacemitenc_finalize (GObject * object); ++static gboolean gst_spacemitenc_start (GstVideoEncoder * encoder); ++static gboolean gst_spacemitenc_stop (GstVideoEncoder * encoder); ++static gboolean gst_spacemitenc_set_format (GstVideoEncoder * encoder, ++ GstVideoCodecState * state); ++static GstFlowReturn gst_spacemitenc_handle_frame (GstVideoEncoder * encoder, ++ GstVideoCodecFrame * frame); ++static GstFlowReturn gst_spacemitenc_finish (GstVideoEncoder * encoder); ++static gboolean gst_spacemitenc_propose_allocation (GstVideoEncoder * encoder, ++ GstQuery * query); ++ ++#define DEFAULT_BITRATE (128000) ++//#define DEFAULT_MAX_BITRATE (UNSPECIFIED_BIT_RATE) ++#define DEFAULT_GOP_SIZE (90) ++#define DEFAULT_MAX_SLICE_SIZE (1500000) ++#define START_FRAMERATE 30 ++//#define DEFAULT_USAGE_TYPE CAMERA_VIDEO_REAL_TIME ++//#define DEFAULT_RATE_CONTROL RC_QUALITY_MODE ++#define DEFAULT_MULTI_THREAD 0 ++#define DEFAULT_ENABLE_DENOISE FALSE ++#define DEFAULT_ENABLE_FRAME_SKIP FALSE ++//#define DEFAULT_DEBLOCKING_MODE GST_SPACEMIT_DEBLOCKING_ON ++#define DEFAULT_BACKGROUND_DETECTION TRUE ++#define DEFAULT_ADAPTIVE_QUANTIZATION TRUE ++#define DEFAULT_SCENE_CHANGE_DETECTION TRUE ++#define DEFAULT_SLICE_MODE GST_SPACEMIT_SLICE_MODE_N_SLICES ++#define DEFAULT_NUM_SLICES 1 ++//#define DEFAULT_COMPLEXITY MEDIUM_COMPLEXITY ++#define DEFAULT_QP_MIN 0 ++#define DEFAULT_QP_MAX 51 ++ ++#define GST_SPM_ENC_EVENT_MUTEX(encoder) (&GST_SPACEMITENC (encoder)->event_mutex) ++#define GST_SPM_ENC_EVENT_COND(encoder) (&GST_SPACEMITENC (encoder)->event_cond) ++ ++#define GST_SPM_ENC_BROADCAST(encoder) \ ++ g_mutex_lock (GST_SPM_ENC_EVENT_MUTEX (encoder)); \ ++ g_cond_broadcast (GST_SPM_ENC_EVENT_COND (encoder)); \ ++ g_mutex_unlock (GST_SPM_ENC_EVENT_MUTEX (encoder)); ++ ++#define GST_SPM_ENC_WAIT(encoder, condition) \ ++ g_mutex_lock (GST_SPM_ENC_EVENT_MUTEX (encoder)); \ ++ while (!(condition)) \ ++ g_cond_wait (GST_SPM_ENC_EVENT_COND (encoder), \ ++ GST_SPM_ENC_EVENT_MUTEX (encoder)); \ ++ g_mutex_unlock (GST_SPM_ENC_EVENT_MUTEX (encoder)); ++ ++enum ++{ ++ PROP_0, ++ //PROP_USAGE_TYPE, ++ PROP_BITRATE, ++ PROP_MAX_BITRATE, ++ PROP_GOP_SIZE, ++ PROP_MAX_SLICE_SIZE, ++ //PROP_RATE_CONTROL, ++ PROP_MULTI_THREAD, ++ PROP_ENABLE_DENOISE, ++ PROP_ENABLE_FRAME_SKIP, ++ //PROP_DEBLOCKING_MODE, ++ PROP_BACKGROUND_DETECTION, ++ PROP_ADAPTIVE_QUANTIZATION, ++ PROP_SCENE_CHANGE_DETECTION, ++ //PROP_SLICE_MODE, ++ PROP_NUM_SLICES, ++ //PROP_COMPLEXITY, ++ PROP_QP_MIN, ++ PROP_QP_MAX, ++ PROP_CODING_WIDTH, ++ PROP_CODING_HIGHT, ++ PROP_CODING_TYPE, ++ PROP_CODE_TYPE, ++ PROP_CODE_YUV_FORMAT, ++ N_PROPERTIES ++}; ++ ++/* pad templates */ ++ ++static GstStaticPadTemplate gst_spacemitenc_sink_template = ++GST_STATIC_PAD_TEMPLATE ("sink", ++ GST_PAD_SINK, ++ GST_PAD_ALWAYS, ++ GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE ("{I420, NV12, NV21}")) ++ ); ++#if 0 ++static GstStaticPadTemplate gst_spacemitenc_src_template = ++GST_STATIC_PAD_TEMPLATE ("src", ++ GST_PAD_SRC, ++ GST_PAD_ALWAYS, ++ GST_STATIC_CAPS ++ ("video/x-h264, stream-format=(string)\"byte-stream\", alignment=(string)\"au\", profile=(string)\"baseline\"" ++ ";" ++ "video/x-h265," ++ "stream-format = (string) byte-stream," ++ "alignment = (string)au," ++ "width=(int) [1,MAX], " "height=(int) [1,MAX]" ++ )); ++#else ++static GstStaticPadTemplate gst_spacemitenc_src_template = ++GST_STATIC_PAD_TEMPLATE ("src", ++ GST_PAD_SRC, ++ GST_PAD_ALWAYS, ++ GST_STATIC_CAPS ++ ( ++ "video/x-h265," ++ "stream-format = (string) byte-stream," ++ "alignment = (string)au," ++ "width=(int) [1,MAX], " "height=(int) [1,MAX]" ++ )); ++ ++#endif ++/* class initialization */ ++ ++G_DEFINE_TYPE_WITH_CODE (GstSpacemitEnc, gst_spacemitenc, ++ GST_TYPE_VIDEO_ENCODER, ++ G_IMPLEMENT_INTERFACE (GST_TYPE_PRESET, NULL); ++ GST_DEBUG_CATEGORY_INIT (gst_spacemitenc_debug_category, "spacemitenc", 0, ++ "debug category for spacemitenc element")); ++ ++static void ++gst_spacemitenc_class_init (GstSpacemitEncClass * klass) ++{ ++ GObjectClass *gobject_class = G_OBJECT_CLASS (klass); ++ GstVideoEncoderClass *video_encoder_class = GST_VIDEO_ENCODER_CLASS (klass); ++ ++ /* Setting up pads and setting metadata should be moved to ++ base_class_init if you intend to subclass this class. */ ++ gst_element_class_add_static_pad_template (GST_ELEMENT_CLASS (klass), ++ &gst_spacemitenc_src_template); ++ gst_element_class_add_static_pad_template (GST_ELEMENT_CLASS (klass), ++ &gst_spacemitenc_sink_template); ++ ++ gst_element_class_set_static_metadata (GST_ELEMENT_CLASS (klass), ++ "Spacemit video encoder", "Encoder/Video", "Spacemit video encoder", ++ "David, qiang.fu@spacemit.com"); ++ ++ gobject_class->set_property = gst_spacemitenc_set_property; ++ gobject_class->get_property = gst_spacemitenc_get_property; ++ gobject_class->finalize = gst_spacemitenc_finalize; ++ video_encoder_class->start = GST_DEBUG_FUNCPTR (gst_spacemitenc_start); ++ video_encoder_class->stop = GST_DEBUG_FUNCPTR (gst_spacemitenc_stop); ++ video_encoder_class->set_format = ++ GST_DEBUG_FUNCPTR (gst_spacemitenc_set_format); ++ video_encoder_class->handle_frame = ++ GST_DEBUG_FUNCPTR (gst_spacemitenc_handle_frame); ++ video_encoder_class->propose_allocation = ++ GST_DEBUG_FUNCPTR (gst_spacemitenc_propose_allocation); ++ video_encoder_class->finish = GST_DEBUG_FUNCPTR (gst_spacemitenc_finish); ++ ++ /* define properties */ ++ g_object_class_install_property (gobject_class, PROP_GOP_SIZE, ++ g_param_spec_uint ("gop-size", "GOP size", ++ "Number of frames between intra frames", ++ 0, G_MAXUINT, DEFAULT_GOP_SIZE, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CODING_TYPE, ++ g_param_spec_uint ("coding-type", "coding type", ++ "Format to encode", ++ CODING_H264, CODING_FWHT, CODING_H264, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CODE_TYPE, ++ g_param_spec_uint ("code-type", "code type", ++ "Codec selection to work", ++ CODEC_OPENH264, CODEC_V4L2, CODEC_SFOMX, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CODING_WIDTH, ++ g_param_spec_uint ("coding-width", "coding width", ++ "image width to encode", ++ 0, 3840, 1280, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CODING_HIGHT, ++ g_param_spec_uint ("code-hight", "code hight", ++ "image hight to encode", ++ 0, 2160, 720, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_CODE_YUV_FORMAT, ++ g_param_spec_uint ("code-yuv-format", "code yuv format", ++ "ENcode the yuv format", ++ PIXEL_FORMAT_DEFAULT, PIXEL_FORMAT_UNKNOWN-1, PIXEL_FORMAT_I420, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++} ++ ++static void ++gst_spacemitenc_init (GstSpacemitEnc * spacemitenc) ++{ ++ spacemitenc->width = 1280; ++ spacemitenc->height = 720; ++ spacemitenc->eCodecType = CODEC_SFOMX; ++ spacemitenc->eCodingType = CODING_H264; ++ spacemitenc->PixelFormat = PIXEL_FORMAT_I420; ++ spacemitenc->gop_size = DEFAULT_GOP_SIZE; ++// spacemitenc->usage_type = DEFAULT_USAGE_TYPE; ++// spacemitenc->rate_control = DEFAULT_RATE_CONTROL; ++ spacemitenc->multi_thread = DEFAULT_MULTI_THREAD; ++ spacemitenc->max_slice_size = DEFAULT_MAX_SLICE_SIZE; ++ spacemitenc->bitrate = DEFAULT_BITRATE; ++// spacemitenc->max_bitrate = DEFAULT_MAX_BITRATE; ++ spacemitenc->qp_min = DEFAULT_QP_MIN; ++ spacemitenc->qp_max = DEFAULT_QP_MAX; ++ spacemitenc->framerate = START_FRAMERATE; ++ spacemitenc->input_state = NULL; ++ spacemitenc->time_per_frame = GST_SECOND / spacemitenc->framerate; ++ spacemitenc->frame_count = 0; ++ spacemitenc->previous_timestamp = 0; ++ spacemitenc->enable_denoise = DEFAULT_ENABLE_DENOISE; ++ spacemitenc->enable_frame_skip = DEFAULT_ENABLE_FRAME_SKIP; ++// spacemitenc->deblocking_mode = DEFAULT_DEBLOCKING_MODE; ++ spacemitenc->background_detection = DEFAULT_BACKGROUND_DETECTION; ++ spacemitenc->adaptive_quantization = DEFAULT_ADAPTIVE_QUANTIZATION; ++ spacemitenc->scene_change_detection = DEFAULT_SCENE_CHANGE_DETECTION; ++ spacemitenc->slice_mode = DEFAULT_SLICE_MODE; ++ spacemitenc->num_slices = DEFAULT_NUM_SLICES; ++ //spacemitenc->encoder = NULL; ++// spacemitenc->complexity = DEFAULT_COMPLEXITY; ++ spacemitenc->bitrate_changed = FALSE; ++ spacemitenc->max_bitrate_changed = FALSE; ++ ++ spacemitenc->ctx = NULL; ++ spacemitenc->para = NULL; ++ spacemitenc->mppframe = NULL; ++ spacemitenc->mpppacket = NULL; ++ ++// gst_spacemitenc_set_usage_type (spacemitenc, CAMERA_VIDEO_REAL_TIME); ++// gst_spacemitenc_set_rate_control (spacemitenc, RC_QUALITY_MODE); ++} ++ ++void ++gst_spacemitenc_set_property (GObject * object, guint property_id, ++ const GValue * value, GParamSpec * pspec) ++{ ++ GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (object); ++ ++ GST_DEBUG_OBJECT (spacemitenc, "ZRong ------------------- set_property: %d", property_id); ++ ++ switch (property_id) { ++ case PROP_BITRATE: ++ GST_OBJECT_LOCK (spacemitenc); ++ if (spacemitenc->bitrate != g_value_get_uint (value)) { ++ spacemitenc->bitrate = g_value_get_uint (value); ++ spacemitenc->bitrate_changed = TRUE; ++ } ++ GST_OBJECT_UNLOCK (spacemitenc); ++ break; ++ ++ case PROP_MAX_BITRATE: ++ GST_OBJECT_LOCK (spacemitenc); ++ if (spacemitenc->max_bitrate != g_value_get_uint (value)) { ++ spacemitenc->max_bitrate = g_value_get_uint (value); ++ spacemitenc->max_bitrate_changed = TRUE; ++ } ++ GST_OBJECT_UNLOCK (spacemitenc); ++ break; ++ ++ case PROP_QP_MIN: ++ spacemitenc->qp_min = g_value_get_uint (value); ++ break; ++ ++ case PROP_QP_MAX: ++ spacemitenc->qp_max = g_value_get_uint (value); ++ break; ++ ++ case PROP_MULTI_THREAD: ++ spacemitenc->multi_thread = g_value_get_uint (value); ++ break; ++/* ++ case PROP_USAGE_TYPE: ++ gst_spacemitenc_set_usage_type (spacemitenc, g_value_get_enum (value)); ++ break; ++*/ ++ case PROP_ENABLE_DENOISE: ++ spacemitenc->enable_denoise = g_value_get_boolean (value); ++ break; ++ ++ case PROP_ENABLE_FRAME_SKIP: ++ spacemitenc->enable_frame_skip = g_value_get_boolean (value); ++ break; ++/* ++ case PROP_RATE_CONTROL: ++ gst_spacemitenc_set_rate_control (spacemitenc, g_value_get_enum (value)); ++ break; ++*/ ++ case PROP_GOP_SIZE: ++ spacemitenc->gop_size = g_value_get_uint (value); ++ GST_DEBUG_OBJECT (spacemitenc, "ZRong ------------------- set_property: %d", spacemitenc->gop_size); ++ break; ++ ++ case PROP_MAX_SLICE_SIZE: ++ spacemitenc->max_slice_size = g_value_get_uint (value); ++ break; ++/* ++ case PROP_DEBLOCKING_MODE: ++ spacemitenc->deblocking_mode = ++ (GstSpacemitencDeblockingMode) g_value_get_enum (value); ++ break; ++*/ ++ case PROP_BACKGROUND_DETECTION: ++ spacemitenc->background_detection = g_value_get_boolean (value); ++ break; ++ ++ case PROP_ADAPTIVE_QUANTIZATION: ++ spacemitenc->adaptive_quantization = g_value_get_boolean (value); ++ break; ++ ++ case PROP_SCENE_CHANGE_DETECTION: ++ spacemitenc->scene_change_detection = g_value_get_boolean (value); ++ break; ++/* ++ case PROP_SLICE_MODE: ++ spacemitenc->slice_mode = ++ (GstSpacemitEncSliceMode) g_value_get_enum (value); ++ break; ++*/ ++ case PROP_NUM_SLICES: ++ spacemitenc->num_slices = g_value_get_uint (value); ++ break; ++ case PROP_CODING_WIDTH: ++ spacemitenc->width = g_value_get_uint (value); ++ break; ++ case PROP_CODING_HIGHT: ++ spacemitenc->height = g_value_get_uint (value); ++ break; ++ case PROP_CODING_TYPE: ++ spacemitenc->eCodingType = g_value_get_uint (value); ++ break; ++ case PROP_CODE_TYPE: ++ spacemitenc->eCodecType = g_value_get_uint (value); ++ break; ++ case PROP_CODE_YUV_FORMAT: ++ spacemitenc->PixelFormat = g_value_get_uint (value); ++ break; ++ ++/* ++ case PROP_COMPLEXITY: ++ spacemitenc->complexity = (ECOMPLEXITY_MODE) g_value_get_enum (value); ++ break; ++*/ ++ default: ++ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); ++ break; ++ } ++} ++ ++void ++gst_spacemitenc_get_property (GObject * object, guint property_id, ++ GValue * value, GParamSpec * pspec) ++{ ++ GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (object); ++ ++ GST_DEBUG_OBJECT (spacemitenc, "ZRong ------------------- get_property: %d", property_id); ++ ++ switch (property_id) { ++ /* ++ case PROP_USAGE_TYPE: ++ g_value_set_enum (value, spacemitenc->usage_type); ++ break; ++ */ ++ /* ++ case PROP_RATE_CONTROL: ++ g_value_set_enum (value, spacemitenc->rate_control); ++ break; ++ */ ++ case PROP_BITRATE: ++ g_value_set_uint (value, spacemitenc->bitrate); ++ break; ++ ++ case PROP_MAX_BITRATE: ++ g_value_set_uint (value, spacemitenc->max_bitrate); ++ break; ++ ++ case PROP_QP_MIN: ++ g_value_set_uint (value, spacemitenc->qp_min); ++ break; ++ ++ case PROP_QP_MAX: ++ g_value_set_uint (value, spacemitenc->qp_max); ++ break; ++ ++ case PROP_ENABLE_DENOISE: ++ g_value_set_boolean (value, spacemitenc->enable_denoise); ++ break; ++ ++ case PROP_ENABLE_FRAME_SKIP: ++ g_value_set_boolean (value, spacemitenc->enable_frame_skip); ++ break; ++ ++ case PROP_MULTI_THREAD: ++ g_value_set_uint (value, spacemitenc->multi_thread); ++ break; ++ ++ case PROP_GOP_SIZE: ++ GST_DEBUG_OBJECT (spacemitenc, "ZRong ------------------- gop_size: %d", spacemitenc->gop_size); ++ g_value_set_uint (value, spacemitenc->gop_size); ++ break; ++ ++ case PROP_MAX_SLICE_SIZE: ++ g_value_set_uint (value, spacemitenc->max_slice_size); ++ break; ++ /* ++ case PROP_DEBLOCKING_MODE: ++ g_value_set_enum (value, spacemitenc->deblocking_mode); ++ break; ++ */ ++ case PROP_BACKGROUND_DETECTION: ++ g_value_set_boolean (value, spacemitenc->background_detection); ++ break; ++ ++ case PROP_ADAPTIVE_QUANTIZATION: ++ g_value_set_boolean (value, spacemitenc->adaptive_quantization); ++ break; ++ ++ case PROP_SCENE_CHANGE_DETECTION: ++ g_value_set_boolean (value, spacemitenc->scene_change_detection); ++ break; ++ /* ++ case PROP_SLICE_MODE: ++ g_value_set_enum (value, spacemitenc->slice_mode); ++ break; ++ */ ++ case PROP_NUM_SLICES: ++ g_value_set_uint (value, spacemitenc->num_slices); ++ break; ++ case PROP_CODING_WIDTH: ++ g_value_set_uint (value, spacemitenc->width); ++ break; ++ case PROP_CODING_HIGHT: ++ g_value_set_uint (value, spacemitenc->height); ++ break; ++ case PROP_CODING_TYPE: ++ g_value_set_uint (value, spacemitenc->eCodingType); ++ break; ++ case PROP_CODE_TYPE: ++ g_value_set_uint (value, spacemitenc->eCodecType); ++ break; ++ case PROP_CODE_YUV_FORMAT: ++ g_value_set_uint (value, spacemitenc->PixelFormat); ++ break; ++ ++ /* ++ case PROP_COMPLEXITY: ++ g_value_set_enum (value, spacemitenc->complexity); ++ break; ++ */ ++ default: ++ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); ++ break; ++ } ++} ++ ++void ++gst_spacemitenc_finalize (GObject * object) ++{ ++ GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (object); ++ ++ GST_DEBUG_OBJECT (spacemitenc, "finalize"); ++ ++ /* clean up object here */ ++ ++ if (spacemitenc->input_state) { ++ gst_video_codec_state_unref (spacemitenc->input_state); ++ } ++ spacemitenc->input_state = NULL; ++ ++ G_OBJECT_CLASS (gst_spacemitenc_parent_class)->finalize (object); ++} ++FILE *fbbb; ++ ++ ++static gboolean ++gst_spacemitenc_start (GstVideoEncoder * encoder) ++{ ++ int ret = 0; ++ GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (encoder); ++ GST_DEBUG_OBJECT (spacemitenc, "start"); ++ ++ if(spacemitenc->ctx) ++ { ++ VENC_DestoryChannel(spacemitenc->ctx); ++ spacemitenc->ctx = NULL; ++ } ++ spacemitenc->ctx = VENC_CreateChannel(); ++ ++ spacemitenc->ctx->stVencPara.eCodingType = spacemitenc->eCodingType; ++ spacemitenc->ctx->stVencPara.nWidth = spacemitenc->width; ++ spacemitenc->ctx->stVencPara.nHeight = spacemitenc->height; ++ spacemitenc->ctx->stVencPara.PixelFormat = spacemitenc->PixelFormat; ++ spacemitenc->ctx->eCodecType = spacemitenc->eCodecType; ++ ++ ret = VENC_Init(spacemitenc->ctx); ++ if (ret) ++ goto init_err; ++ ++ spacemitenc->mpppacket = PACKET_Create (); ++ if (!spacemitenc->mpppacket) ++ goto alloc_err; ++ ++ spacemitenc->mppframe = FRAME_Create (); ++ if (!spacemitenc->mppframe) ++ goto alloc_err; ++// FRAME_Alloc(spacemitenc->mppframe, 1, 1280, 720); ++ ++ GST_ERROR_OBJECT (spacemitenc, "ZRong ------------------------- finish start!!!"); ++ g_mutex_init (&spacemitenc->event_mutex); ++ g_cond_init (&spacemitenc->event_cond); ++ spacemitenc->pending_frames = 0; ++ fbbb = fopen("/tmp/out.yuv", "ab+"); ++ ++ return TRUE; ++ ++alloc_err: ++ GST_ERROR_OBJECT (spacemitenc, "can not alloc for mpp structure, please check !"); ++ return FALSE; ++ ++init_err: ++ GST_ERROR_OBJECT (spacemitenc, "Mpp vnec init error, please check !"); ++ return FALSE; ++} ++ ++static gboolean ++gst_spacemitenc_stop (GstVideoEncoder * encoder) ++{ ++ GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (encoder); ++ ++ GST_ERROR_OBJECT (spacemitenc, "xxxxxxxxxxxxxx stop start"); ++ fclose(fbbb); ++ ++ if(spacemitenc->ctx) ++ { ++ VENC_DestoryChannel(spacemitenc->ctx); ++ spacemitenc->ctx = NULL; ++ } ++ FRAME_Destory(spacemitenc->mppframe); ++ PACKET_Destory(spacemitenc->mpppacket); ++ VENC_DestoryChannel(spacemitenc->ctx); ++ ++ if (spacemitenc->input_state) { ++ gst_video_codec_state_unref (spacemitenc->input_state); ++ } ++ spacemitenc->input_state = NULL; ++ if (!(gst_pad_get_task_state ((encoder)->srcpad) == GST_TASK_STARTED)) ++ return TRUE; ++ ++ GST_DEBUG_OBJECT (spacemitenc, "spacemit_enc_stop called"); ++ ++ gst_pad_stop_task (encoder->srcpad); ++ g_cond_clear (&spacemitenc->event_cond); ++ g_mutex_clear (&spacemitenc->event_mutex); ++ ++ return TRUE; ++} ++static void ++gst_st_mpp_enc_loop (GstVideoEncoder * encoder) ++{ ++ GstVideoCodecFrame * frame = NULL; ++ GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (encoder); ++ int ret; ++ GstFlowReturn flow_status; ++ GstMapInfo map; ++ static int lenght = 0; ++ ++ GST_SPM_ENC_WAIT (encoder, spacemitenc->pending_frames >= 4 || spacemitenc->at_eos); ++ GST_VIDEO_ENCODER_STREAM_LOCK (encoder); ++ ++ ret = VENC_RequestOutputStreamBuffer(spacemitenc->ctx, PACKET_GetBaseData(spacemitenc->mpppacket)); ++ if (ret == MPP_CODER_NO_DATA) ++ goto sf_no_frame; ++ else if (ret == MPP_CODER_EOS) ++ goto finish_work; ++ ++ frame = gst_video_encoder_get_oldest_frame (encoder); ++ if (!frame) { ++ GST_ERROR_OBJECT (spacemitenc, "ZRong ------------------- finish last frame"); ++ spacemitenc->downstream_flow_ret = GST_FLOW_EOS; ++ GstBuffer *buffer; ++ buffer = ++ gst_buffer_new_wrapped (g_memdup (PACKET_GetDataPointer(spacemitenc->mpppacket), ++ PACKET_GetLength(spacemitenc->mpppacket)), PACKET_GetLength(spacemitenc->mpppacket)); ++ flow_status = gst_pad_push (GST_VIDEO_ENCODER_SRC_PAD (spacemitenc), buffer); ++ GST_ERROR_OBJECT (spacemitenc, "ZRong ------------------- finish last frame %d", flow_status); ++ ++ VENC_ReturnOutputStreamBuffer (spacemitenc->ctx, PACKET_GetBaseData(spacemitenc->mpppacket)); ++ goto done; ++ } ++ spacemitenc->pending_frames--; ++ GST_SPM_ENC_BROADCAST (encoder); ++ ++ //if (videoFrameTypeIDR == frame_info.eFrameType) { ++ // GST_VIDEO_CODEC_FRAME_SET_SYNC_POINT (frame); ++ //} else { ++ GST_VIDEO_CODEC_FRAME_UNSET_SYNC_POINT (frame); ++ //} ++ frame->output_buffer = ++ gst_video_encoder_allocate_output_buffer (encoder, PACKET_GetLength(spacemitenc->mpppacket)); ++ gst_buffer_map (frame->output_buffer, &map, GST_MAP_WRITE); ++ ++ lenght += PACKET_GetLength(spacemitenc->mpppacket); ++ GST_ERROR_OBJECT (spacemitenc, "ZRong ------------------- finish one frame, %d, %d", lenght, PACKET_GetLength(spacemitenc->mpppacket)); ++ memcpy (map.data, PACKET_GetDataPointer(spacemitenc->mpppacket), PACKET_GetLength(spacemitenc->mpppacket)); ++ ++ gst_buffer_unmap (frame->output_buffer, &map); ++ VENC_ReturnOutputStreamBuffer (spacemitenc->ctx, PACKET_GetBaseData(spacemitenc->mpppacket)); ++ ++ spacemitenc->downstream_flow_ret = gst_video_encoder_finish_frame (encoder, frame); ++done: ++ GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); ++ return; ++ ++sf_no_frame: ++{ ++ GST_LOG_OBJECT (spacemitenc, ++ "No out frame to request%d!", spacemitenc->pending_frames); ++ spacemitenc->downstream_flow_ret = GST_FLOW_OK; ++ goto done; ++} ++finish_work: ++{ ++ GST_DEBUG_OBJECT (spacemitenc, ++ "Get eos, Finished work and paused task!"); ++ gst_pad_pause_task (GST_VIDEO_ENCODER_SRC_PAD (spacemitenc)); ++ spacemitenc->downstream_flow_ret = GST_FLOW_EOS; ++ goto done; ++} ++ ++} ++ ++ ++static gboolean ++gst_spacemitenc_set_format (GstVideoEncoder * encoder, ++ GstVideoCodecState * state) ++{ ++ GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (encoder); ++ gchar *debug_caps; ++ guint width, height, fps_n, fps_d; ++ guint n_slices = 1; ++ gint ret; ++ GstCaps *outcaps; ++ GstVideoCodecState *output_state; ++ spacemitenc->frame_count = 0; ++ int stride0, stride1, stride2; ++ GstVideoFormat fmt; ++ ++ debug_caps = gst_caps_to_string (state->caps); ++ GST_DEBUG_OBJECT (spacemitenc, "gst_e26d4_enc_set_format called, caps: %s", ++ debug_caps); ++ g_free (debug_caps); ++ ++ if (spacemitenc->input_state) { ++ gst_video_codec_state_unref (spacemitenc->input_state); ++ } ++ spacemitenc->input_state = gst_video_codec_state_ref (state); ++ ++ width = GST_VIDEO_INFO_WIDTH (&state->info); ++ height = GST_VIDEO_INFO_HEIGHT (&state->info); ++ fps_n = GST_VIDEO_INFO_FPS_N (&state->info); ++ fps_d = GST_VIDEO_INFO_FPS_D (&state->info); ++ stride0 = GST_VIDEO_INFO_PLANE_STRIDE (&state->info, 0); ++ stride1 = GST_VIDEO_INFO_PLANE_STRIDE (&state->info, 1); ++ stride2 = GST_VIDEO_INFO_PLANE_STRIDE (&state->info, 2); ++ fmt = GST_VIDEO_INFO_FORMAT (&state->info); ++ ++ spacemitenc->para = (MppVencPara*)malloc(sizeof(MppVencPara)); ++ memset(spacemitenc->para, 0, sizeof(MppVencPara)); ++ spacemitenc->para->nBitrate = 5000000; ++ spacemitenc->para->nFrameRate = 30; ++ spacemitenc->para->nHeight = spacemitenc->height; ++ spacemitenc->para ->nWidth = spacemitenc->width; ++ spacemitenc->para->nStride = spacemitenc->width; ++ VENC_SetParam(spacemitenc->ctx, spacemitenc->para); ++ ++ outcaps = ++ gst_caps_copy (gst_static_pad_template_get_caps ++ (&gst_spacemitenc_src_template)); ++ ++ output_state = gst_video_encoder_set_output_state (encoder, outcaps, state); ++ gst_video_codec_state_unref (output_state); ++ ++ GST_ERROR_OBJECT (spacemitenc, "ZRong -------------------- set format finish, %d, %d (%d %d)", width, height, fps_n, fps_d); ++ GST_ERROR_OBJECT (spacemitenc, "ZRong ----------------------- set format finish, %u, (%d %d %d) %s", fmt, stride0, stride1, stride2, gst_video_format_to_string (fmt)); ++ ++ if (!gst_video_encoder_negotiate (encoder)) ++ return FALSE; ++ ++ if (G_UNLIKELY (!gst_pad_get_task_state ((encoder)->srcpad) == GST_TASK_STARTED)) { ++ GST_DEBUG_OBJECT (spacemitenc, "@@@ ZRong ------------------------- starting encoding thread"); ++ gst_pad_start_task (encoder->srcpad, ++ (GstTaskFunction) gst_st_mpp_enc_loop, encoder, NULL); ++ } ++ return TRUE; ++ ++} ++ ++static gboolean ++gst_spacemitenc_propose_allocation (GstVideoEncoder * encoder, GstQuery * query) ++{ ++ gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL); ++ ++ return ++ GST_VIDEO_ENCODER_CLASS ++ (gst_spacemitenc_parent_class)->propose_allocation (encoder, query); ++} ++ ++static GstFlowReturn ++gst_spacemitenc_handle_frame (GstVideoEncoder * encoder, ++ GstVideoCodecFrame * frame) ++{ ++ GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (encoder); ++ GstVideoFrame video_frame; ++ gboolean force_keyframe; ++ gint ret; ++ gfloat fps; ++ gint i, j; ++ gsize buf_length = 0; ++ GstFlowReturn flow_ret; ++ GST_ERROR_OBJECT (spacemitenc, "ZRong ------------- handle frame start"); ++ ++ if (spacemitenc->downstream_flow_ret != GST_FLOW_OK) { ++ goto downstream_err; ++ } ++ ++ GST_OBJECT_LOCK (spacemitenc); ++ if (spacemitenc->bitrate_changed || spacemitenc->max_bitrate_changed) { ++ GST_WARNING_OBJECT (spacemitenc, ++ "Error need to support this cases"); ++ } ++ GST_OBJECT_UNLOCK (spacemitenc); ++ ++ if (frame) { ++ spacemitenc->frame_count++; ++ gst_video_frame_map (&video_frame, &spacemitenc->input_state->info, ++ frame->input_buffer, GST_MAP_READ); ++ if (spacemitenc->PixelFormat == PIXEL_FORMAT_I420) { ++ FRAME_SetDataUsedNum(spacemitenc->mppframe, 3); ++ FRAME_SetDataPointer(spacemitenc->mppframe, 0, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 0)); ++ FRAME_SetDataPointer(spacemitenc->mppframe, 1, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 1)); ++ FRAME_SetDataPointer(spacemitenc->mppframe, 2, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 2)); ++ } else { ++ GST_ERROR_OBJECT (spacemitenc, "ZRong ------------- be save %s, %d, (%d, %d, %d)", ++ gst_video_format_to_string(GST_VIDEO_FRAME_FORMAT(&video_frame)), ++ GST_VIDEO_FRAME_N_PLANES(&video_frame), GST_VIDEO_FRAME_SIZE(&video_frame), ++ GST_VIDEO_FRAME_WIDTH(&video_frame), GST_VIDEO_FRAME_HEIGHT(&video_frame)); ++ ++ FRAME_SetDataUsedNum(spacemitenc->mppframe, 2); ++ FRAME_SetDataPointer(spacemitenc->mppframe, 0, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 0)); ++ FRAME_SetDataPointer(spacemitenc->mppframe, 1, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 1)); ++#if 0 ++ //SF_OMX_BUF_INFO *pBufInfo = pOMXBuffer->pOutputPortPrivate; ++ //LOG(SF_LOG_INFO, "%p %d %p\r\n", pOMXBuffer->pBuffer, pOMXBuffer->nFilledLen, pBufInfo->remap_vaddr); ++ ++ fwrite(GST_VIDEO_FRAME_COMP_DATA (&video_frame, 0), 1, 1280*720, fbbb); ++ fwrite(GST_VIDEO_FRAME_COMP_DATA (&video_frame, 1), 1, 1280*720/2, fbbb); ++ ++#endif ++ ++ } ++ } else { ++ MppData * tmp = FRAME_GetBaseData(spacemitenc->mppframe); ++ tmp->bEos = 1; ++ } ++ GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); ++ ret = VENC_Encode(spacemitenc->ctx, FRAME_GetBaseData(spacemitenc->mppframe)); ++ GST_VIDEO_ENCODER_STREAM_LOCK (encoder); ++ if (ret != 0) { ++ GST_ELEMENT_ERROR (spacemitenc, STREAM, ENCODE, ++ ("Could not encode frame"), ("Spacemit returned %d", ret)); ++ flow_ret = GST_FLOW_ERROR; ++ if (frame) ++ goto release_res; ++ else ++ goto done; ++ } ++ if (!frame) ++ goto flow_eos; ++ /* Avoid holding too much frames */ ++ GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); ++ GST_SPM_ENC_WAIT (encoder, ++ spacemitenc->pending_frames < SPM_PENDING_MAX); ++ GST_VIDEO_ENCODER_STREAM_LOCK (encoder); ++ spacemitenc->pending_frames++; ++ GST_SPM_ENC_BROADCAST (encoder); ++ ++ flow_ret = spacemitenc->downstream_flow_ret; ++ GST_ERROR_OBJECT (spacemitenc, "ZRong ------------- handle frame start: %d, %d", spacemitenc->pending_frames, spacemitenc->frame_count); ++ ++ if (frame) ++ goto release_res; ++ ++done: ++ return flow_ret; ++downstream_err: ++{ ++ GST_ERROR_OBJECT (spacemitenc, "Downstream returned %s", ++ gst_flow_get_name (spacemitenc->downstream_flow_ret)); ++ flow_ret = spacemitenc->downstream_flow_ret; ++ goto done; ++} ++release_res: ++{ ++ gst_video_frame_unmap (&video_frame); ++ gst_video_codec_frame_unref (frame); ++ goto done; ++} ++flow_eos: ++{ ++ GST_DEBUG_OBJECT (spacemitenc, "Get an eos, exit handle"); ++ flow_ret = GST_FLOW_EOS; ++ goto done; ++} ++#if 0 ++ /* FIXME: spacemit has no way for us to get a connection ++ * between the input and output frames, we just have to ++ * guess based on the input */ ++ frame = gst_video_encoder_get_oldest_frame (encoder); ++ if (!frame) { ++ GST_ELEMENT_ERROR (spacemitenc, STREAM, ENCODE, ++ ("Could not encode frame"), ("spacemitenc returned %d", ret)); ++ gst_video_codec_frame_unref (frame); ++ return GST_FLOW_ERROR; ++ } ++ ret = VENC_RequestOutputStreamBuffer(spacemitenc->ctx, PACKET_GetBaseData(spacemitenc->mpppacket)); ++ if (ret) { ++ GST_ELEMENT_ERROR (spacemitenc, STREAM, ENCODE, ++ ("Could not encode frame"), ("spacemitenc returned %d", ret)); ++ gst_video_codec_frame_unref (frame); ++ return GST_FLOW_ERROR; ++ } ++ ++ if (videoFrameTypeIDR == frame_info.eFrameType) { ++ GST_VIDEO_CODEC_FRAME_SET_SYNC_POINT (frame); ++ } else { ++ GST_VIDEO_CODEC_FRAME_UNSET_SYNC_POINT (frame); ++ } ++ ++ frame->output_buffer = ++ gst_video_encoder_allocate_output_buffer (encoder, PACKET_GetLength(spacemitenc->mpppacket)); ++ gst_buffer_map (frame->output_buffer, &map, GST_MAP_WRITE); ++ ++ memcpy (map.data, PACKET_GetDataPointer(spacemitenc->mpppacket), PACKET_GetLength(spacemitenc->mpppacket)); ++ ++ gst_buffer_unmap (frame->output_buffer, &map); ++ VENC_ReturnOutputStreamBuffer (spacemitenc->ctx, PACKET_GetBaseData(spacemitenc->mpppacket)); ++ ++ GstFlowReturn flow_status = gst_video_encoder_finish_frame (encoder, frame); ++ ++ GST_ERROR_OBJECT (spacemitenc, "ZRong ----------------------- handle frame finish"); ++ ++ return flow_status; ++#endif ++} ++ ++static GstFlowReturn ++gst_spacemitenc_finish (GstVideoEncoder * encoder) ++{ ++ GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (encoder); ++ ++ if (spacemitenc->frame_count == 0) ++ return GST_FLOW_OK; ++ ++ /* Drain encoder */ ++ while ((gst_spacemitenc_handle_frame (encoder, NULL)) == GST_FLOW_OK); ++ ++ GST_DEBUG_OBJECT(spacemitenc, "finish test3"); ++ spacemitenc->at_eos = TRUE; ++ ++ GST_SPM_ENC_BROADCAST (encoder); ++ ++ GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); ++ /* Wait for task thread to pause */ ++ GstTask *task = encoder->srcpad->task; ++ if (task) { ++ //GST_OBJECT_LOCK (task); ++ while (GST_TASK_STATE (task) == GST_TASK_STARTED) { ++ GST_DEBUG_OBJECT(spacemitenc, "finish test4"); ++ g_usleep(400 * 1000); ++ //GST_TASK_WAIT (task); ++ } ++ GST_DEBUG_OBJECT(spacemitenc, "finish test5"); ++ //GST_OBJECT_UNLOCK (task); ++ } ++ GST_VIDEO_ENCODER_STREAM_LOCK (encoder); ++ ++ GST_DEBUG_OBJECT(spacemitenc, "finish test2"); ++ ++ return GST_FLOW_OK; ++} ++ ++gboolean ++gst_spacemitenc_register (GstPlugin * plugin, guint rank) ++{ ++ return gst_element_register (plugin, "spacemitenc", rank, ++ GST_TYPE_SPACEMITENC); ++} ++ +diff --git a/ext/spacemit/spacemitcodec/gstspacemitplugin.c b/ext/spacemit/spacemitcodec/gstspacemitplugin.c +new file mode 100755 +index 0000000..d2d5e62 +--- /dev/null ++++ b/ext/spacemit/spacemitcodec/gstspacemitplugin.c +@@ -0,0 +1,29 @@ ++ ++/* ++ * Spacemit GStreamer plugin ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include ++#endif ++ ++#include ++ ++#include "gstspacemitdec.h" ++#include "gstspacemitenc.h" ++ ++static gboolean plugin_init(GstPlugin *plugin) ++{ ++ gboolean ret = FALSE; ++ ++ ret |= gst_spacemitdec_register (plugin, GST_RANK_PRIMARY + 1); ++ ret |= gst_spacemitenc_register (plugin, GST_RANK_PRIMARY + 1); ++ ++ return ret; ++} ++ ++GST_PLUGIN_DEFINE(GST_VERSION_MAJOR, ++ GST_VERSION_MINOR, ++ spacemitcodec, ++ "Spacemit encoder/decoder plugin", ++ plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN) +diff --git a/ext/spacemit/spacemitcodec/gstspmdmabufallocator.c b/ext/spacemit/spacemitcodec/gstspmdmabufallocator.c +new file mode 100755 +index 0000000..86efd37 +--- /dev/null ++++ b/ext/spacemit/spacemitcodec/gstspmdmabufallocator.c +@@ -0,0 +1,310 @@ ++/* GStreamer ++ * ++ * Copyright (C) 2016 Igalia ++ * ++ * Authors: ++ * Víctor Manuel Jáquez Leal ++ * Javier Martin ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Library General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Library General Public License for more details. ++ * ++ * You should have received a copy of the GNU Library General Public ++ * License along with this library; if not, write to the ++ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include "gstspmdmabufallocator.h" ++ ++#define GST_CAT_DEFAULT spmdmabufallocator_debug_category ++GST_DEBUG_CATEGORY_STATIC (GST_CAT_DEFAULT); ++ ++#define GST_SPM_DMABUF_MEMORY_TYPE "SpmDmaBufMemory" ++ ++#define parent_class gst_spm_dmabuf_allocator_parent_class ++ ++G_DEFINE_TYPE_WITH_CODE (GstSpmDmaBufAllocator, gst_spm_dmabuf_allocator, ++ GST_TYPE_DMABUF_ALLOCATOR, ++ GST_DEBUG_CATEGORY_INIT (GST_CAT_DEFAULT, "spmdmabufallocator", 0, ++ "SPM DMABUF allocator")); ++ ++gboolean ++gst_is_spm_dmabuf_memory (GstMemory * base_mem) ++{ ++ return gst_memory_is_type (base_mem, GST_SPM_DMABUF_MEMORY_NAME); ++} ++ ++static void ++gst_spm_dmabuf_allocator_free (GstAllocator * allocator, GstMemory * base_mem) ++{ ++ GstSpmDmaBufMemory *mem = (GstSpmDmaBufMemory *) base_mem; ++ GST_DEBUG ("ZRong ------------------- spacemit free dma mem"); ++ ++ g_warn_if_fail (!mem->acquired); ++ ++ g_slice_free (GstSpmDmaBufMemory, mem); ++} ++ ++static void ++gst_unref_spm_dmabuf_mem (gpointer key, gpointer value, gpointer user_data) ++{ ++ GstMemory *mem = (GstMemory *)value; ++ ++ GST_DEBUG ("ZRong ------------------- spacemit dmabuf allocator finalize %d", mem->mini_object.refcount); ++ gst_memory_unref (value); ++} ++ ++static void ++gst_spm_dmabuf_allocator_finalize (GObject * obj) ++{ ++ GstSpmDmaBufAllocator *alloc = GST_SPM_DMABUF_ALLOCATOR (obj); ++ GST_DEBUG_OBJECT (alloc, "ZRong ------------------- spacemit dma allocator finalize"); ++ ++ G_OBJECT_CLASS (parent_class)->finalize (obj); ++} ++ ++/* installed as the GstMiniObject::dispose function of the acquired GstMemory */ ++static gboolean ++gst_spm_dmabuf_allocator_memory_dispose (GstMemory * base_mem) ++{ ++ GstSpmDmaBufMemory *mem; ++ GstSpmDmaBufAllocator *allocator; ++ ++ mem = (GstSpmDmaBufMemory *) base_mem; ++ base_mem->size = mem->memsize; ++ allocator = GST_SPM_DMABUF_ALLOCATOR (GST_MEMORY_CAST (mem)->allocator); ++ ++ if (mem->acquired) ++ { ++ VDEC_ReturnOutputFrame(mem->ctx, FRAME_GetBaseData(mem->mppframe)); ++ FRAME_Destory(mem->mppframe); ++ mem->ctx = NULL; ++ mem->mppframe = NULL; ++ ++ /* keep the memory alive */ ++ gst_memory_ref (base_mem); ++ GST_DEBUG ("ZRong ------------------- spacemit memory_dispose (%d, %d), push mem: %d, %d", mem->memsize, base_mem->size, mem->num, base_mem->mini_object.refcount); ++ ++ mem->acquired = FALSE; ++ ++ return FALSE; ++ } ++ GST_DEBUG ("ZRong ------------------- spacemit memory_dispose222 (%d, %d), push mem: %d, %d", mem->memsize, base_mem->size, mem->num, base_mem->mini_object.refcount); ++ ++ return TRUE; ++} ++ ++static GstSpmDmaBufMemory * ++gst_spm_dmabuf_memory_new (GstSpmDmaBufAllocator * allocator, gsize maxsize, ++ GstMemoryFlags flags, GstMemory * parent, gssize offset, gssize size) ++{ ++ GstSpmDmaBufMemory *mem; ++ gint align; ++ ++ /* GStreamer uses a bitmask for the alignment while ++ * OMX uses the alignment itself. So we have to convert ++ * here */ ++ align = 0; ++ ++ if (size == -1) { ++ size = maxsize - offset; ++ } ++ ++ mem = g_slice_new0 (GstSpmDmaBufMemory); ++ gst_memory_init (GST_MEMORY_CAST (mem), flags, (GstAllocator *) allocator, ++ parent, maxsize, align, offset, size); ++ ++ return mem; ++} ++ ++static inline void ++install_dmabuf_mem_dispose (GstSpmDmaBufMemory * mem) ++{ ++ GstMemory *base_mem = (GstMemory *) mem; ++ ++ GST_MINI_OBJECT_CAST (base_mem)->dispose = ++ (GstMiniObjectDisposeFunction) gst_spm_dmabuf_allocator_memory_dispose; ++} ++ ++GstAllocator * ++gst_spm_dmabuf_allocator_new (void) ++{ ++ GstSpmDmaBufAllocator *allocator; ++ ++ allocator = g_object_new (GST_TYPE_SPM_DMABUF_ALLOCATOR, NULL); ++ if (!allocator) ++ return NULL; ++ ++ allocator->id = 888; ++ ++ return GST_ALLOCATOR_CAST (allocator); ++} ++GstMemory * ++gst_spm_dmabuf_allocator_alloc (GstSpmDmaBufAllocator * allocator, gsize size, ++ gint32 fd) ++{ ++ GstSpmDmaBufMemory *mem; ++ guint8 *data; ++ static guint8 i = 0; ++ ++ GST_DEBUG_OBJECT (allocator, "ZRong ------------------- spacemit allocator alloc %d.%d %p", size, allocator->id, allocator->memories); ++ ++ //mem = gst_spm_dmabuf_memory_new (allocator, size, 0, NULL, 0, size); ++ mem = gst_dmabuf_allocator_alloc (allocator, fd, size); ++ ++ mem->memsize = size; ++ ++ install_dmabuf_mem_dispose (mem); ++ mem->num = i; ++ i++; ++ mem->acquired = FALSE; ++ ++ GST_DEBUG_OBJECT (allocator, "ZRong ------------------- af spacemit allocator alloc %d", i); ++ ++ return (GstMemory *) mem; ++} ++ ++ ++GstFlowReturn ++gst_spm_dmabuf_allocator_acquire (GstAllocator * base_allocator, GstMemory ** memory) ++{ ++ GstMemory *base_mem; ++ GstSpmDmaBufMemory *mem; ++ GstSpmDmaBufAllocator *allocator = GST_SPM_DMABUF_ALLOCATOR (base_allocator); ++ ++ base_mem = (GstMemory *) g_hash_table_lookup (allocator->memories, GINT_TO_POINTER(allocator->mppframe_id)); ++ if (!base_mem) { ++ base_mem = gst_spm_dmabuf_allocator_alloc (allocator, allocator->info.size, allocator->dmabuf_fd); ++ GST_DEBUG_OBJECT (allocator, "insert id%d fd%d to memories hash", allocator->mppframe_id, allocator->dmabuf_fd); ++ g_hash_table_insert(allocator->memories, GINT_TO_POINTER(allocator->mppframe_id), base_mem); ++ } ++ mem = (GstSpmDmaBufMemory *)base_mem; ++ mem->acquired = TRUE; ++ *memory = base_mem; ++ ++ GST_DEBUG_OBJECT (allocator, "ZRong ------------------- acquire mem (%d, %d) %d, pop mem: %d", (*memory)->size, (*memory)->offset, allocator->mppframe_id, mem->num); ++ return GST_FLOW_OK; ++} ++ ++gboolean ++gst_spm_dmabuf_allocator_configure (GstAllocator * base_allocator, GstVideoInfo * info) ++{ ++ GstSpmDmaBufAllocator *allocator = GST_SPM_DMABUF_ALLOCATOR (base_allocator); ++ ++ if (!info) { ++ GST_ERROR_OBJECT (allocator, "error, get a NULL info to allocator!"); ++ return FALSE; ++ } ++ ++ allocator->info = *info; ++ return TRUE; ++} ++ ++gboolean ++gst_spm_dmabuf_allocator_set_active (GstSpmDmaBufAllocator * allocator, gboolean active) ++{ ++ gboolean changed = FALSE; ++ ++ g_mutex_lock (&allocator->lock); ++ ++ if (allocator->active != active) ++ changed = TRUE; ++ ++ GST_DEBUG_OBJECT (allocator, "ZRong ------------------- allocator_set_active(%d %d)", allocator->active, active); ++ ++ if (changed) { ++ if (active) { ++ ++ } else { ++ g_hash_table_foreach (allocator->memories, gst_unref_spm_dmabuf_mem, NULL); ++ g_hash_table_remove_all (allocator->memories); ++ ++ } ++ } ++ ++ allocator->active = active; ++ g_mutex_unlock (&allocator->lock); ++ ++ return changed; ++} ++ ++ ++static gpointer ++gst_spm_dmabuf_memory_map (GstMemory * base_mem, gsize maxsize, GstMapFlags flags) ++{ ++ GstSpmDmaBufMemory *mem = (GstSpmDmaBufMemory *) base_mem; ++ GST_DEBUG ("ZRong ------------------- af memory_map (%d, %d)", maxsize, mem->num); ++ ++ return FRAME_GetDataPointer(mem->mppframe, 0); ++} ++static void ++gst_spm_dmabuf_memory_unmap (GstMemory * base_mem) ++{ ++} ++ ++void ++gst_spm_dmabuf_allocator_mppframe_id (GstAllocator * base_allocator, gint32 id, gint32 fd) ++{ ++ GstSpmDmaBufAllocator *allocator = GST_SPM_DMABUF_ALLOCATOR (base_allocator); ++ ++ if (id < 0 || fd < 0) ++ GST_ERROR_OBJECT (allocator, "error, get a invaild id/fd (%d,%d)!", id, fd); ++ ++ allocator->mppframe_id = id; ++ allocator->dmabuf_fd = fd; ++} ++ ++void ++gst_spm_set_dmabuf_mem (GstMemory * base_mem, MppFrame *mppframe, MppVdecCtx *ctx) ++{ ++ GstSpmDmaBufMemory *mem = (GstSpmDmaBufMemory *) base_mem; ++ ++ mem->mppframe = mppframe; ++ mem->ctx = ctx; ++} ++ ++static void ++gst_spm_dmabuf_allocator_class_init (GstSpmDmaBufAllocatorClass * klass) ++{ ++ GObjectClass *gobject_class; ++ GstAllocatorClass *allocator_class; ++ ++ allocator_class = GST_ALLOCATOR_CLASS (klass); ++ gobject_class = G_OBJECT_CLASS (klass); ++ ++ allocator_class->alloc = NULL; ++ allocator_class->free = gst_spm_dmabuf_allocator_free; ++ ++ gobject_class->finalize = gst_spm_dmabuf_allocator_finalize; ++} ++ ++static void ++gst_spm_dmabuf_allocator_init (GstSpmDmaBufAllocator * allocator) ++{ ++ GstAllocator *alloc = GST_ALLOCATOR_CAST (allocator); ++ ++ alloc->mem_type = GST_SPM_DMABUF_MEMORY_NAME; ++ alloc->mem_map = gst_spm_dmabuf_memory_map; ++ alloc->mem_unmap = gst_spm_dmabuf_memory_unmap; ++ ++ allocator->memories = g_hash_table_new (g_direct_hash, g_direct_equal); ++ allocator->mppframe_id = -1; ++ allocator->active = FALSE; ++ g_mutex_init (&allocator->lock); ++ ++ GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC); ++} ++ +diff --git a/ext/spacemit/spacemitcodec/gstspmdmabufallocator.h b/ext/spacemit/spacemitcodec/gstspmdmabufallocator.h +new file mode 100755 +index 0000000..d9d290a +--- /dev/null ++++ b/ext/spacemit/spacemitcodec/gstspmdmabufallocator.h +@@ -0,0 +1,123 @@ ++/* GStreamer ++ * ++ * Copyright (C) 2016 Igalia ++ * ++ * Authors: ++ * Víctor Manuel Jáquez Leal ++ * Javier Martin ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Library General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Library General Public License for more details. ++ * ++ * You should have received a copy of the GNU Library General Public ++ * License along with this library; if not, write to the ++ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ ++ ++#ifndef __GST_SPM_DMABUF_ALLOCATOR_H__ ++#define __GST_SPM_DMABUF_ALLOCATOR_H__ ++ ++#include ++#include ++#include ++#include ++ ++G_BEGIN_DECLS ++ ++/* ---------------------------------------------------------------------*/ ++/* GstSpmDmaBufMemory */ ++/* ---------------------------------------------------------------------*/ ++ ++typedef struct _GstSpmDmaBufAllocator GstSpmDmaBufAllocator; ++typedef struct _GstSpmDmaBufAllocatorClass GstSpmDmaBufAllocatorClass; ++typedef struct _GstSpmDmaBufMemory GstSpmDmaBufMemory; ++ ++#define GST_SPM_DMABUF_MEMORY_NAME "GstSpmDmaBufMemory" ++ ++#define GST_SPM_DMABUF_ALLOCATOR_CAST(allocator) \ ++ ((GstSpmDmaBufAllocator *) (allocator)) ++ ++#define GST_TYPE_SPM_DMABUF_ALLOCATOR \ ++ (gst_spm_dmabuf_allocator_get_type ()) ++#define GST_SPM_DMABUF_ALLOCATOR(obj) \ ++ (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_SPM_DMABUF_ALLOCATOR, \ ++ GstSpmDmaBufAllocator)) ++#define GST_IS_SPM_DMABUF_ALLOCATOR(obj) \ ++ (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_SPM_DMABUF_ALLOCATOR)) ++ ++ ++struct _GstSpmDmaBufMemory ++{ ++ GstMemory parent; ++ ++ gboolean acquired; ++ MppFrame *mppframe; ++ gpointer ptr; ++ ++ MppVdecCtx *ctx; ++ guint32 num; ++ guint32 memsize; ++ gint32 mppframe_id; ++ ++ guint32 fd; ++}; ++ ++/* ++ * GstSpmDmaBufAllocator: ++ * ++ * A SPM DMABuf memory allocator object. ++ */ ++struct _GstSpmDmaBufAllocator ++{ ++ GstDmaBufAllocator parent_instance; ++ gboolean active; ++ GMutex lock; ++ ++ GHashTable *memories; ++ gint32 mppframe_id; ++ gint32 dmabuf_fd; ++ guint32 id; ++ GstVideoInfo info; ++}; ++ ++/* ++ * GstSpmDmaBufAllocatorClass: ++ * ++ * A SPM DMABuf memory allocator class. ++ */ ++struct _GstSpmDmaBufAllocatorClass ++{ ++ GstDmaBufAllocatorClass parent_class; ++}; ++GstAllocator * ++gst_spm_dmabuf_allocator_new (void); ++gboolean gst_is_spm_dmabuf_memory (GstMemory *mem); ++GstMemory * ++gst_spm_dmabuf_allocator_alloc (GstSpmDmaBufAllocator * allocator, gsize size, ++ GstAllocationParams * params); ++GstFlowReturn ++gst_spm_dmabuf_allocator_acquire (GstAllocator * base_allocator, GstMemory ** memory); ++gboolean ++gst_spm_dmabuf_allocator_configure (GstAllocator * base_allocator, GstVideoInfo * info); ++gboolean ++gst_spm_dmabuf_allocator_set_active (GstSpmDmaBufAllocator * allocator, gboolean active); ++ ++void ++gst_spm_dmabuf_allocator_mppframe_id (GstAllocator * base_allocator, gint32 id); ++void ++gst_spm_set_dmabuf_mem (GstMemory * base_mem, MppFrame *mppframe, MppVdecCtx *ctx); ++ ++GType gst_spm_dmabuf_allocator_get_type (void); ++ ++G_END_DECLS ++ ++#endif /* __GST_SPM_DMABUF_ALLOCATOR_H__ */ +diff --git a/ext/spacemit/spacemitcodec/meson.build b/ext/spacemit/spacemitcodec/meson.build +new file mode 100755 +index 0000000..c6ed6c3 +--- /dev/null ++++ b/ext/spacemit/spacemitcodec/meson.build +@@ -0,0 +1,22 @@ ++spacemitcodec_sources = [ ++ 'gstspacemitallocator.c', ++ 'gstspacemitbufferpool.c', ++ 'gstspacemitenc.c', ++ 'gstspacemitdec.c', ++ 'gstspacemitplugin.c', ++] ++ ++spacemitmpp_dep = dependency('spacemit_mpp', version : '>= 0.0.0', required : false) ++ ++if spacemitmpp_dep.found() ++ gstspacemitcodec = library('gstspacemitcodec', ++ spacemitcodec_sources, ++ c_args : gst_plugins_bad_args, ++ link_args : noseh_link_args, ++ include_directories : [configinc], ++ dependencies : [gstvideo_dep, spacemitmpp_dep, gstallocators_dep], ++ install : true, ++ install_dir : plugins_install_dir, ++ ) ++ plugins += [gstspacemitcodec] ++endif +diff --git a/ext/spacemit/spacemitsrc/gstspacemitsrc.c b/ext/spacemit/spacemitsrc/gstspacemitsrc.c +new file mode 100755 +index 0000000..f1bb069 +--- /dev/null ++++ b/ext/spacemit/spacemitsrc/gstspacemitsrc.c +@@ -0,0 +1,807 @@ ++/* GStreamer ++ * Copyright (C) 2021 FIXME ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Library General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Library General Public License for more details. ++ * ++ * You should have received a copy of the GNU Library General Public ++ * License along with this library; if not, write to the ++ * Free Software Foundation, Inc., 51 Franklin Street, Suite 500, ++ * Boston, MA 02110-1335, USA. ++ */ ++/** ++ * SECTION:element-gstspacemitsrc ++ * ++ * The spacemitsrc element is a source for k1x cameras like the Retiga 2000R ++ * ++ * ++ * Example launch line ++ * |[ ++ * gst-launch -v spacemitsrc ! videoconvert ! autovideosink ++ * ]| ++ * Shows video from the default k1x camera device ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include ++#include ++#include ++#include "gstspacemitsrc.h" ++ ++GST_DEBUG_CATEGORY_STATIC (gst_spacemitsrc_debug); ++#define GST_CAT_DEFAULT gst_spacemitsrc_debug ++ ++/* prototypes */ ++static void gst_spacemitsrc_set_property (GObject * object, ++ guint property_id, const GValue * value, GParamSpec * pspec); ++static void gst_spacemitsrc_get_property (GObject * object, ++ guint property_id, GValue * value, GParamSpec * pspec); ++static void gst_spacemitsrc_dispose (GObject * object); ++static void gst_spacemitsrc_finalize (GObject * object); ++ ++static gboolean gst_spacemitsrc_start (GstBaseSrc * src); ++static gboolean gst_spacemitsrc_stop (GstBaseSrc * src); ++static GstCaps *gst_spacemitsrc_get_caps (GstBaseSrc * src, GstCaps * filter); ++static gboolean gst_spacemitsrc_set_caps (GstBaseSrc * src, GstCaps * caps); ++static gboolean gst_spacemitsrc_unlock (GstBaseSrc * src); ++static gboolean gst_spacemitsrc_unlock_stop (GstBaseSrc * src); ++static gboolean gst_spacemitsrc_decide_allocation (GstBaseSrc * bsrc, GstQuery * query); ++static GstFlowReturn gst_spacemitsrc_create (GstPushSrc * src, GstBuffer ** buf); ++ ++static void gst_spacemitsrc_frame_callback (void *userPtr, unsigned long userData, ++ int errcode, unsigned long flags); ++ ++enum ++{ ++ PROP_0, ++ PROP_LOCATION, ++ PROP_DEVICE_INDEX, ++ PROP_NUM_CAPTURE_BUFFERS, ++ PROP_TIMEOUT, ++ PROP_EXPOSURE, ++ PROP_GAIN, ++ PROP_OFFSET, ++ PROP_FORMAT, ++ PROP_X, ++ PROP_Y, ++ PROP_WIDTH, ++ PROP_HEIGHT, ++ PROP_BINNING ++}; ++ ++#define DEFAULT_PROP_DEVICE_INDEX 0 ++#define DEFAULT_PROP_NUM_CAPTURE_BUFFERS 3 ++#define DEFAULT_PROP_TIMEOUT 500 ++#define DEFAULT_PROP_EXPOSURE 16384 ++#define DEFAULT_PROP_GAIN 1.0 ++#define DEFAULT_PROP_OFFSET 0 ++//#define DEFAULT_PROP_FORMAT qfmtMono16 ++#define DEFAULT_PROP_FORMAT 2 ++#define DEFAULT_PROP_X 0 ++#define DEFAULT_PROP_Y 0 ++#define DEFAULT_PROP_WIDTH 0 ++#define DEFAULT_PROP_HEIGHT 0 ++#define DEFAULT_PROP_BINNING 1 ++ ++GHashTable *frame_hash; ++ ++/* pad templates */ ++static GstStaticPadTemplate gst_spacemitsrc_src_template = ++GST_STATIC_PAD_TEMPLATE ("src", ++ GST_PAD_SRC, ++ GST_PAD_ALWAYS, ++ GST_STATIC_CAPS (SPM_SRC_CAPS_MAKE ("{" SPM_SRC_FORMATS "}") ";") ++ ); ++ ++/* class initialization */ ++G_DEFINE_TYPE (GstSpacemitSrc, gst_spacemitsrc, GST_TYPE_PUSH_SRC); ++ ++static int g_spacemitsrc_use_count = 0; ++ ++static void ++gst_spacemitsrc_driver_ref () ++{ ++ if (g_spacemitsrc_use_count == 0) { ++ //SPACEMIT_LoadDriver (); ++ } ++ g_spacemitsrc_use_count++; ++} ++ ++static void ++gst_spacemitsrc_driver_unref () ++{ ++ g_spacemitsrc_use_count--; ++ if (g_spacemitsrc_use_count == 0) { ++ //SPACEMIT_ReleaseDriver (); ++ } ++} ++ ++static void ++gst_spacemitsrc_class_init (GstSpacemitSrcClass * klass) ++{ ++ GObjectClass *gobject_class = G_OBJECT_CLASS (klass); ++ GstElementClass *gstelement_class = GST_ELEMENT_CLASS (klass); ++ GstBaseSrcClass *gstbasesrc_class = GST_BASE_SRC_CLASS (klass); ++ GstPushSrcClass *gstpushsrc_class = GST_PUSH_SRC_CLASS (klass); ++ ++ GST_DEBUG_CATEGORY_INIT (GST_CAT_DEFAULT, "spacemitsrc", 0, ++ "k1x-cam Spacemitsrc source"); ++ ++ gobject_class->set_property = gst_spacemitsrc_set_property; ++ gobject_class->get_property = gst_spacemitsrc_get_property; ++ gobject_class->dispose = gst_spacemitsrc_dispose; ++ gobject_class->finalize = gst_spacemitsrc_finalize; ++ ++ gst_element_class_add_pad_template (gstelement_class, ++ gst_static_pad_template_get (&gst_spacemitsrc_src_template)); ++ ++ gst_element_class_set_static_metadata (gstelement_class, ++ "k1x cam Video Source", "Source/Video", ++ "k1x cam spacemit video source", "zhirong.li@spacemit.com"); ++ ++ gstbasesrc_class->start = GST_DEBUG_FUNCPTR (gst_spacemitsrc_start); ++ gstbasesrc_class->stop = GST_DEBUG_FUNCPTR (gst_spacemitsrc_stop); ++ gstbasesrc_class->get_caps = GST_DEBUG_FUNCPTR (gst_spacemitsrc_get_caps); ++ gstbasesrc_class->set_caps = GST_DEBUG_FUNCPTR (gst_spacemitsrc_set_caps); ++ gstbasesrc_class->unlock = GST_DEBUG_FUNCPTR (gst_spacemitsrc_unlock); ++ gstbasesrc_class->unlock_stop = GST_DEBUG_FUNCPTR (gst_spacemitsrc_unlock_stop); ++ gstbasesrc_class->decide_allocation = GST_DEBUG_FUNCPTR (gst_spacemitsrc_decide_allocation); ++ gstpushsrc_class->create = GST_DEBUG_FUNCPTR (gst_spacemitsrc_create); ++ ++ /* Install GObject properties */ ++ g_object_class_install_property (gobject_class, PROP_LOCATION, ++ g_param_spec_string ("location", "File Location", ++ "Location of the file to read", NULL, ++ G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | ++ GST_PARAM_MUTABLE_READY)); ++ g_object_class_install_property (gobject_class, PROP_DEVICE_INDEX, ++ g_param_spec_int ("device-index", "Device index", ++ "Index of device, use -1 to enumerate all and select last", -1, ++ G_MAXINT, DEFAULT_PROP_DEVICE_INDEX, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (gobject_class, PROP_NUM_CAPTURE_BUFFERS, ++ g_param_spec_uint ("num-capture-buffers", "Number of capture buffers", ++ "Number of capture buffers", 1, G_MAXUINT, ++ DEFAULT_PROP_NUM_CAPTURE_BUFFERS, ++ (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); ++ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_TIMEOUT, ++ g_param_spec_int ("timeout", "Timeout (ms)", ++ "Timeout in ms to wait for a frame beyond exposure time", 0, G_MAXINT, ++ DEFAULT_PROP_TIMEOUT, ++ (GParamFlags) (G_PARAM_STATIC_STRINGS | G_PARAM_READWRITE))); ++ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_EXPOSURE, ++ g_param_spec_uint ("exposure", "Exposure (us)", ++ "Exposure time in microseconds", 0, G_MAXINT, DEFAULT_PROP_EXPOSURE, ++ (GParamFlags) (G_PARAM_STATIC_STRINGS | G_PARAM_READWRITE))); ++ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_GAIN, ++ g_param_spec_double ("gain", "Normalized gain", "Normalized gain", 0, ++ 1000, DEFAULT_PROP_GAIN, ++ (GParamFlags) (G_PARAM_STATIC_STRINGS | G_PARAM_READWRITE))); ++ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_OFFSET, ++ g_param_spec_int ("offset", "Offset", "Absolute offset", -G_MAXINT, ++ G_MAXINT, DEFAULT_PROP_OFFSET, ++ (GParamFlags) (G_PARAM_STATIC_STRINGS | G_PARAM_READWRITE))); ++ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_FORMAT, ++ g_param_spec_int ("format", "Image format", ++ "Image format (2=GRAY8, 3=GRAY16_LE)", 2, 3, DEFAULT_PROP_FORMAT, ++ (GParamFlags) (G_PARAM_STATIC_STRINGS | G_PARAM_READWRITE))); ++ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_X, ++ g_param_spec_int ("x", "ROI x pixel", "ROI x pixel position", 0, G_MAXINT, ++ DEFAULT_PROP_X, ++ (GParamFlags) (G_PARAM_STATIC_STRINGS | G_PARAM_READWRITE))); ++ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_Y, ++ g_param_spec_int ("y", "ROI y pixel", "ROI y pixel position", 0, G_MAXINT, ++ DEFAULT_PROP_Y, ++ (GParamFlags) (G_PARAM_STATIC_STRINGS | G_PARAM_READWRITE))); ++ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_WIDTH, ++ g_param_spec_int ("width", "ROI width", "ROI width", 0, G_MAXINT, ++ DEFAULT_PROP_WIDTH, ++ (GParamFlags) (G_PARAM_STATIC_STRINGS | G_PARAM_READWRITE))); ++ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_HEIGHT, ++ g_param_spec_int ("height", "ROI height", "ROI height", 0, G_MAXINT, ++ DEFAULT_PROP_HEIGHT, ++ (GParamFlags) (G_PARAM_STATIC_STRINGS | G_PARAM_READWRITE))); ++ g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_BINNING, ++ g_param_spec_int ("binning", "Binning", "Symmetrical binning", 1, 8, ++ DEFAULT_PROP_BINNING, ++ (GParamFlags) (G_PARAM_STATIC_STRINGS | G_PARAM_READWRITE))); ++} ++ ++static void ++gst_spacemitsrc_reset (GstSpacemitSrc * src) ++{ ++// src->handle = NULL; ++ ++ src->exposure = DEFAULT_PROP_EXPOSURE; ++ src->gain = DEFAULT_PROP_GAIN; ++ src->offset = DEFAULT_PROP_OFFSET; ++ src->format = DEFAULT_PROP_FORMAT; ++ src->x = DEFAULT_PROP_X; ++ src->y = DEFAULT_PROP_Y; ++ src->width = DEFAULT_PROP_WIDTH; ++ src->height = DEFAULT_PROP_HEIGHT; ++ src->binning = DEFAULT_PROP_BINNING; ++ ++ src->last_frame_count = 0; ++ src->total_dropped_frames = 0; ++ ++ if (src->caps) { ++ gst_caps_unref (src->caps); ++ src->caps = NULL; ++ } ++ ++ if (src->queue) { ++ // TODO: remove dangling buffers ++ g_async_queue_unref (src->queue); ++ } ++ src->queue = g_async_queue_new (); ++ g_async_queue_ref (src->queue); ++ GST_DEBUG_OBJECT (src, "new queue %p", src->queue); ++} ++ ++static void ++gst_spacemitsrc_init (GstSpacemitSrc * src) ++{ ++ GST_DEBUG_OBJECT (src, "Initialize instance"); ++ ++ gst_spacemitsrc_driver_ref (); ++ ++ /* set source as live (no preroll) */ ++ gst_base_src_set_live (GST_BASE_SRC (src), TRUE); ++ ++ /* override default of BYTES to operate in time mode */ ++ gst_base_src_set_format (GST_BASE_SRC (src), GST_FORMAT_TIME); ++ ++ /* initialize member variables */ ++ src->device_index = DEFAULT_PROP_DEVICE_INDEX; ++ src->num_capture_buffers = DEFAULT_PROP_NUM_CAPTURE_BUFFERS; ++ src->timeout = DEFAULT_PROP_TIMEOUT; ++ ++ src->stop_requested = FALSE; ++ src->caps = NULL; ++ src->queue = NULL; ++ ++ frame_hash = g_hash_table_new (g_direct_hash, g_direct_equal); ++ gst_spacemitsrc_reset (src); ++} ++ ++ ++static void ++gst_spacemitsrc_set_exposure (GstSpacemitSrc * src, unsigned long exposure) ++{ ++ //SPACEMIT_SetParam (&src->qsettings, qprmExposure, exposure); ++} ++ ++static void ++gst_spacemitsrc_set_gain (GstSpacemitSrc * src, float gain) ++{ ++ //SPACEMIT_SetParam (&src->qsettings, qprmNormalizedGain, ++ // (unsigned long) (gain * 1000000)); ++} ++ ++static void ++gst_spacemitsrc_set_offset (GstSpacemitSrc * src, long offset) ++{ ++ //SPACEMIT_SetParamS32 (&src->qsettings, qprmS32AbsoluteOffset, offset); ++} ++static gboolean ++gst_spacemitsrc_set_location (GstSpacemitSrc *src, const gchar * location) ++{ ++ g_free (src->filename); ++ ++ /* clear the filename if we get a NULL */ ++ if (location == NULL) { ++ src->filename = NULL; ++ } else { ++ /* we store the filename as received by the application. On Windows this ++ * should be UTF8 */ ++ src->filename = g_strdup (location); ++ GST_INFO_OBJECT (src, "filename : %s", src->filename); ++ } ++ ++ return 0; ++} ++ ++void ++gst_spacemitsrc_set_property (GObject * object, guint property_id, ++ const GValue * value, GParamSpec * pspec) ++{ ++ GstSpacemitSrc *src; ++ ++ src = GST_SPACEMIT_SRC (object); ++ ++ switch (property_id) { ++ case PROP_LOCATION: ++ gst_spacemitsrc_set_location (src, g_value_get_string (value)); ++ break; ++ case PROP_DEVICE_INDEX: ++ src->device_index = g_value_get_int (value); ++ break; ++ case PROP_NUM_CAPTURE_BUFFERS: ++ src->num_capture_buffers = g_value_get_uint (value); ++ break; ++ case PROP_TIMEOUT: ++ src->timeout = g_value_get_int (value); ++ break; ++ case PROP_EXPOSURE: ++ src->exposure = g_value_get_uint (value); ++ src->send_settings = TRUE; ++ break; ++ case PROP_GAIN: ++ src->gain = g_value_get_double (value); ++ src->send_settings = TRUE; ++ break; ++ case PROP_OFFSET: ++ src->offset = g_value_get_int (value); ++ src->send_settings = TRUE; ++ break; ++ case PROP_FORMAT: ++ src->format = g_value_get_int (value); ++ break; ++ case PROP_X: ++ src->x = g_value_get_int (value); ++ break; ++ case PROP_Y: ++ src->y = g_value_get_int (value); ++ break; ++ case PROP_WIDTH: ++ src->width = GST_ROUND_DOWN_4 (g_value_get_int (value)); ++ break; ++ case PROP_HEIGHT: ++ src->height = g_value_get_int (value); ++ break; ++ case PROP_BINNING: ++ src->binning = g_value_get_int (value); ++ break; ++ default: ++ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); ++ break; ++ } ++} ++ ++void ++gst_spacemitsrc_get_property (GObject * object, guint property_id, ++ GValue * value, GParamSpec * pspec) ++{ ++ GstSpacemitSrc *src; ++ ++ g_return_if_fail (GST_IS_SPACEMIT_SRC (object)); ++ src = GST_SPACEMIT_SRC (object); ++ ++ switch (property_id) { ++ case PROP_LOCATION: ++ g_value_set_string (value, src->filename); ++ break; ++ case PROP_DEVICE_INDEX: ++ g_value_set_int (value, src->device_index); ++ break; ++ case PROP_NUM_CAPTURE_BUFFERS: ++ g_value_set_uint (value, src->num_capture_buffers); ++ break; ++ case PROP_TIMEOUT: ++ g_value_set_int (value, src->timeout); ++ break; ++ case PROP_EXPOSURE: ++ g_value_set_uint (value, src->exposure); ++ break; ++ case PROP_GAIN: ++ g_value_set_double (value, src->gain); ++ break; ++ case PROP_OFFSET: ++ g_value_set_int (value, src->offset); ++ break; ++ case PROP_FORMAT: ++ g_value_set_int (value, src->format); ++ break; ++ case PROP_X: ++ g_value_set_int (value, src->x); ++ break; ++ case PROP_Y: ++ g_value_set_int (value, src->y); ++ break; ++ case PROP_WIDTH: ++ g_value_set_int (value, src->width); ++ break; ++ case PROP_HEIGHT: ++ g_value_set_int (value, src->height); ++ break; ++ case PROP_BINNING: ++ g_value_set_int (value, src->binning); ++ break; ++ default: ++ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); ++ break; ++ } ++} ++ ++void ++gst_spacemitsrc_dispose (GObject * object) ++{ ++ GstSpacemitSrc *src; ++ ++ g_return_if_fail (GST_IS_SPACEMIT_SRC (object)); ++ src = GST_SPACEMIT_SRC (object); ++ ++ /* clean up as possible. may be called multiple times */ ++ ++ G_OBJECT_CLASS (gst_spacemitsrc_parent_class)->dispose (object); ++} ++ ++void ++gst_spacemitsrc_finalize (GObject * object) ++{ ++ GstSpacemitSrc *src; ++ ++ g_return_if_fail (GST_IS_SPACEMIT_SRC (object)); ++ src = GST_SPACEMIT_SRC (object); ++ ++ /* clean up object here */ ++ ++ if (src->caps) { ++ gst_caps_unref (src->caps); ++ src->caps = NULL; ++ } ++ ++ gst_spacemitsrc_driver_unref (); ++ ++ G_OBJECT_CLASS (gst_spacemitsrc_parent_class)->finalize (object); ++} ++ ++typedef struct ++{ ++ gint fd; ++ IMAGE_BUFFER_S* outputBuf; ++ GstSpacemitSrc *src; ++ GstClockTime clock_time; ++ GstBuffer *gstbuf; ++} VideoFrame; ++ ++static void ++video_frame_release (void *data) ++{ ++ IMAGE_BUFFER_S* outputBuf = (IMAGE_BUFFER_S *) data; ++ { ++ VideoFrame * frame = (VideoFrame *) g_hash_table_lookup (frame_hash, outputBuf); ++ if (!frame) { ++ CLOG_ERROR("can't get a videoframe from key %p", outputBuf); ++ } else { ++ GST_DEBUG_OBJECT (frame->src, "get cpp output buffer %p back, the gstbuffer is %p", outputBuf, frame->gstbuf); ++ } ++ //List_Push out buffer to list ++ // List_Push(cpp_out_list[0], (void*)outputBuf); ++ gst_release_cpp_buffer(outputBuf); ++ } ++} ++ ++static VideoFrame * ++video_frame_create (GstSpacemitSrc * src, IMAGE_BUFFER_S* buffer) ++{ ++ VideoFrame *frame = g_new (VideoFrame, 1); ++ ++ frame->src = src; ++ frame->outputBuf = buffer; ++ frame->fd = buffer->m.fd; ++ ++ return frame; ++} ++ ++static int gst_get_cpp_buf_callback(IMAGE_BUFFER_S* cpp_out_buffer) ++{ ++ VideoFrame * frame = (VideoFrame *) g_hash_table_lookup (frame_hash, cpp_out_buffer); ++ if (!frame) { ++ CLOG_ERROR("can't get a videoframe from key %p", cpp_out_buffer); ++ return -EINVAL; ++ } ++ GstClock *clock = gst_element_get_clock (GST_ELEMENT (frame->src)); ++ frame->clock_time = gst_clock_get_time (clock); ++ gst_object_unref (clock); ++ CLOG_INFO("PUSH a cpp output buffer to queue"); ++ ++ g_async_queue_push (frame->src->queue, cpp_out_buffer); ++ GST_DEBUG_OBJECT (frame->src, "PUSH a cpp output buffer %p to queue %p, now queue had %d buffer", ++ cpp_out_buffer, frame->src->queue, g_async_queue_length(frame->src->queue)); ++ ++ return 0; ++} ++ ++static int gst_cpp_buf_prepare_callback(void *gst_buf_prepare_data, IMAGE_BUFFER_S* buffer) ++{ ++ GstSpacemitSrc * src = (GstSpacemitSrc *) gst_buf_prepare_data; ++ ++ VideoFrame *frame = video_frame_create (src, buffer); ++ g_hash_table_insert(frame_hash, buffer, frame); ++ ++ return 0; ++} ++ ++static gboolean ++gst_spacemitsrc_setup_stream (GstSpacemitSrc * src) ++{ ++ int ret = 0; ++ void* sensorHandle = NULL; ++ SENSOR_MODULE_INFO sensor_info; ++ int pipelineId = 0; ++ int firmwareId = 0; ++ int viChnId = 0; ++ int rawdumpChnId = 0; ++ IMAGE_INFO_S img_info = {}; ++ struct tuning_objs_config tuning_cfg = {0}; ++ ++ GST_DEBUG_OBJECT (src, "start setup stream camera, json:%s", src->filename); ++ src->para.gst_get_cpp_buffer = gst_get_cpp_buf_callback; ++ src->para.gst_cpp_buf_prepare = gst_cpp_buf_prepare_callback; ++ src->para.gst_cpp_buf_prepare_data = src; ++ src->para.jsonfile = src->filename; ++ ++ ret = gst_setup_camera_start (&src->para); ++ if (ret) { ++ GST_ERROR_OBJECT (src, "setup stream camera fail! ret: %d", ret); ++ return FALSE; ++ } ++ ++ src->pipelineId = pipelineId; ++ src->firmwareId = firmwareId; ++ src->sensorHandle = sensorHandle; ++ src->sensorInfoId = sensor_info.sensorId; ++ ++ GST_DEBUG_OBJECT (src, "sensor stream on"); ++ { ++ GstStructure *structure; ++ GstCaps *caps; ++ caps = gst_caps_new_empty (); ++ structure = gst_structure_from_string ("video/x-raw", NULL); ++ const char *gst_format; ++ gst_format = "NV12"; ++ gst_structure_set (structure, ++ "format", G_TYPE_STRING, gst_format, ++ "width", G_TYPE_INT, src->para.out_width, ++ "height", G_TYPE_INT, src->para.out_height, ++ "framerate", GST_TYPE_FRACTION, 30, 1, NULL); ++ gst_caps_append_structure (caps, structure); ++ ++ if (src->caps) { ++ gst_caps_unref (src->caps); ++ } ++ gst_caps_set_features (caps, 0, ++ gst_caps_features_new (GST_CAPS_FEATURE_MEMORY_DMABUF, NULL)); ++ GST_INFO_OBJECT (src, "set DMABUF feature to spacemitsrc src cap %" GST_PTR_FORMAT, caps); ++ src->caps = caps; ++ gst_base_src_set_caps (GST_BASE_SRC (src), src->caps); ++ } ++ ++ GST_DEBUG_OBJECT (src, "finish setup stream camera"); ++ return TRUE; ++} ++ ++static gboolean ++gst_spacemitsrc_start (GstBaseSrc * bsrc) ++{ ++ GstSpacemitSrc *src = GST_SPACEMIT_SRC (bsrc); ++ ++ GST_DEBUG_OBJECT (src, "start"); ++ ++ if (!gst_spacemitsrc_setup_stream (src)) { ++ GST_ERROR_OBJECT (src, "setup stream error"); ++ /* error already sent */ ++ return FALSE; ++ } ++ GST_DEBUG_OBJECT (src, "finish start"); ++ return TRUE; ++} ++ ++static gboolean ++gst_spacemitsrc_stop (GstBaseSrc * bsrc) ++{ ++ GstSpacemitSrc *src = GST_SPACEMIT_SRC (bsrc); ++ GST_DEBUG_OBJECT (src, "stop"); ++ ++ if (src->handle) { ++ //SPACEMIT_CloseCamera (src->handle); ++ src->handle = NULL; ++ } ++ ++ gst_setup_camera_stop (&src->para); ++ GST_DEBUG_OBJECT (src, "sensor stream off"); ++ gst_spacemitsrc_reset (src); ++ ++ return TRUE; ++} ++ ++static GstCaps * ++gst_spacemitsrc_get_caps (GstBaseSrc * bsrc, GstCaps * filter) ++{ ++ GstSpacemitSrc *src = GST_SPACEMIT_SRC (bsrc); ++ GstCaps *caps; ++ ++ if (src->caps == NULL) { ++ caps = gst_pad_get_pad_template_caps (GST_BASE_SRC_PAD (src)); ++ } else { ++ caps = gst_caps_copy (src->caps); ++ } ++ ++ GST_DEBUG_OBJECT (src, "The caps before filtering are %" GST_PTR_FORMAT, ++ caps); ++ ++ if (filter && caps) { ++ GstCaps *tmp = gst_caps_intersect (caps, filter); ++ gst_caps_unref (caps); ++ caps = tmp; ++ } ++ ++ GST_DEBUG_OBJECT (src, "The caps after filtering are %" GST_PTR_FORMAT, caps); ++ ++ return caps; ++} ++ ++static gboolean ++gst_spacemitsrc_set_caps (GstBaseSrc * bsrc, GstCaps * caps) ++{ ++ GstSpacemitSrc *src = GST_SPACEMIT_SRC (bsrc); ++ ++ GST_DEBUG_OBJECT (src, "The caps being set are %" GST_PTR_FORMAT, caps); ++ ++ return TRUE; ++} ++ ++static gboolean ++gst_spacemitsrc_unlock (GstBaseSrc * bsrc) ++{ ++ GstSpacemitSrc *src = GST_SPACEMIT_SRC (bsrc); ++ ++ GST_LOG_OBJECT (src, "unlock"); ++ ++ src->stop_requested = TRUE; ++ ++ return TRUE; ++} ++ ++static gboolean ++gst_spacemitsrc_unlock_stop (GstBaseSrc * bsrc) ++{ ++ GstSpacemitSrc *src = GST_SPACEMIT_SRC (bsrc); ++ ++ GST_LOG_OBJECT (src, "unlock_stop"); ++ ++ src->stop_requested = FALSE; ++ ++ return TRUE; ++} ++ ++static gboolean ++gst_spacemitsrc_decide_allocation (GstBaseSrc * bsrc, GstQuery * query) ++{ ++ GstSpacemitSrc *src = GST_SPACEMIT_SRC (bsrc); ++ GstAllocator *allocator = NULL; ++ gint nallocator; ++ GstAllocationParams params; ++ GstBufferPool *pool = NULL; ++ guint size, min = 1, max = 0; ++ ++ GST_DEBUG_OBJECT (src, "decide_allocation be params: %" GST_PTR_FORMAT, query); ++ allocator = gst_spacemit_src_allocator_new (); ++ if (!allocator) ++ goto error_no_allocator; ++ ++ src->allocator = allocator; ++ ++ while (gst_query_get_n_allocation_pools (query) > 0) { ++ // gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max); ++ GST_DEBUG_OBJECT (src, "------------------ 2222"); ++ ++ gst_query_remove_nth_allocation_pool (query, 0); ++ // gst_object_unref (pool); ++ pool = NULL; ++ } ++ ++ while (gst_query_get_n_allocation_params (query) > 0) { ++ GST_DEBUG_OBJECT (src, "------------------ 3333"); ++ gst_query_remove_nth_allocation_param (query, 0); ++ } ++ ++ nallocator = gst_query_get_n_allocation_params (query); ++ if (nallocator > 0) { ++ GST_ERROR_OBJECT (src, "get %d allocators from query", nallocator); ++ return FALSE; ++ } else { ++ gst_allocation_params_init (¶ms); ++ gst_query_add_allocation_param (query, allocator, NULL); ++ gst_object_unref (allocator); ++ } ++ ++ GST_DEBUG_OBJECT (src, "get %d allocators from query, dma allocator:%d", ++ nallocator, GST_IS_DMABUF_ALLOCATOR(src->allocator)); ++ ++ GST_DEBUG_OBJECT (src, "decide_allocation af params: %" GST_PTR_FORMAT, query); ++ if (!GST_BASE_SRC_CLASS(gst_spacemitsrc_parent_class)->decide_allocation(bsrc, query)) ++ return FALSE; ++ ++ return TRUE; ++ ++error_no_allocator: ++{ ++ GST_ERROR_OBJECT (src, "failed to create allocator"); ++ return FALSE; ++} ++} ++ ++static GstFlowReturn ++gst_spacemitsrc_create (GstPushSrc * psrc, GstBuffer ** buf) ++{ ++ GstSpacemitSrc *src = GST_SPACEMIT_SRC (psrc); ++ VideoFrame *video_frame; ++ GST_DEBUG_OBJECT (src, "create"); ++ IMAGE_BUFFER_S* outputBuf; ++ ++ //wait queue and get cpp output buffer ++ outputBuf = ++ (IMAGE_BUFFER_S *) g_async_queue_timeout_pop (src->queue, ++ (guint64) 100 * 1000 * 1000); ++ if (!outputBuf) { ++ GST_ELEMENT_ERROR (src, RESOURCE, OPEN_READ, ++ ("Failed to get buffer in %d ms, queue %p had %d buffer", src->timeout, src->queue, g_async_queue_length(src->queue)), (NULL)); ++ return GST_FLOW_ERROR; ++ } ++ GST_DEBUG_OBJECT (src, "pop a cpp output buffer %p", outputBuf); ++ video_frame = (VideoFrame *) g_hash_table_lookup (frame_hash, outputBuf); ++ if (!video_frame) { ++ CLOG_ERROR("can't get a videoframe from key %p", outputBuf); ++ return GST_FLOW_ERROR; ++ } ++ ++ GstBuffer *newbuf; ++ GstMemory *mem; ++ newbuf = gst_buffer_new (); ++ mem = gst_dmabuf_allocator_alloc_with_flags (src->allocator, video_frame->fd, ++ src->para.out_width * src->para.out_height * 1.5, GST_FD_MEMORY_FLAG_DONT_CLOSE); ++ gst_mini_object_set_qdata (GST_MINI_OBJECT (mem), GST_SPACEMIT_SRC_DMABUF_MEMORY_QUARK, outputBuf, ++ video_frame_release); ++ gst_buffer_append_memory (newbuf, mem); ++ ++ //bind release func to cpp output buffer ++ // *buf = gst_buffer_new_wrapped_full ((GstMemoryFlags) ++ // GST_MEMORY_FLAG_PHYSICALLY_CONTIGUOUS, ++ // (gpointer) outputBuf->planes[0].virAddr, 640*480*1.5, 0, ++ // 640*480*1.5, outputBuf, ++ // (GDestroyNotify) video_frame_release); ++ ++ *buf = newbuf; ++ video_frame->gstbuf = *buf; ++ GST_BUFFER_TIMESTAMP (*buf) = ++ GST_CLOCK_DIFF (gst_element_get_base_time (GST_ELEMENT (src)), ++ video_frame->clock_time); ++ GST_DEBUG_OBJECT (src, "gst buffer %p reture from create, no close fd: %d ", *buf, video_frame->fd); ++ ++ return GST_FLOW_OK; ++} ++ ++static gboolean ++plugin_init (GstPlugin * plugin) ++{ ++ GST_DEBUG_CATEGORY_INIT (GST_CAT_DEFAULT, "spacemitsrc", 0, ++ "debug category for spacemitsrc plugin"); ++ ++ if (!gst_element_register (plugin, "spacemitsrc", GST_RANK_NONE, ++ gst_spacemitsrc_get_type ())) { ++ return FALSE; ++ } ++ ++ return TRUE; ++} ++ ++GST_PLUGIN_DEFINE (GST_VERSION_MAJOR, ++ GST_VERSION_MINOR, ++ spacemitsrc, ++ "spacemitsrc video element", ++ plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME, ++ GST_PACKAGE_ORIGIN) +diff --git a/ext/spacemit/spacemitsrc/gstspacemitsrc.h b/ext/spacemit/spacemitsrc/gstspacemitsrc.h +new file mode 100755 +index 0000000..5c657ca +--- /dev/null ++++ b/ext/spacemit/spacemitsrc/gstspacemitsrc.h +@@ -0,0 +1,99 @@ ++/* GStreamerspacemitsrc ++ * Copyright (C) 2021 FIXME ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Library General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Library General Public License for more details. ++ * ++ * You should have received a copy of the GNU Library General Public ++ * License along with this library; if not, write to the ++ * Free Software Foundation, Inc., 59 Temple Place - Suite 330, ++ * Boston, MA 02111-1307, USA. ++ */ ++ ++#ifndef _GST_SPACEMIT_SRC_H_ ++#define _GST_SPACEMIT_SRC_H_ ++ ++#include ++#include "gstspmsrcallocator.h" ++#include "gst_cam_api.h" ++ ++G_BEGIN_DECLS ++ ++#define GST_TYPE_SPACEMIT_SRC (gst_spacemitsrc_get_type()) ++#define GST_SPACEMIT_SRC(obj) (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_SPACEMIT_SRC,GstSpacemitSrc)) ++#define GST_SPACEMIT_SRC_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_SPACEMIT_SRC,GstSpacemitSrcClass)) ++#define GST_IS_SPACEMIT_SRC(obj) (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_SPACEMIT_SRC)) ++#define GST_IS_SPACEMIT_SRC_CLASS(obj) (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_SPACEMIT_SRC)) ++ ++#ifndef GST_CAPS_FEATURE_MEMORY_DMABUF ++#define GST_CAPS_FEATURE_MEMORY_DMABUF "memory:DMABuf" ++#endif ++ ++#define SPM_SRC_OUT_FORMATS "I420, NV21, NV12" ++#define SPM_SRC_FORMATS SPM_SRC_OUT_FORMATS ++//#define SPM_DEC_CAPS_MAKE(fmts) \ ++// GST_VIDEO_CAPS_MAKE (fmts) ";" ++#define SPM_SRC_CAPS_MAKE(fmts) \ ++ GST_VIDEO_CAPS_MAKE (fmts) ";" \ ++ GST_VIDEO_CAPS_MAKE_WITH_FEATURES (GST_CAPS_FEATURE_MEMORY_DMABUF, fmts) ++ ++typedef struct _GstSpacemitSrc GstSpacemitSrc; ++typedef struct _GstSpacemitSrcClass GstSpacemitSrcClass; ++ ++struct _GstSpacemitSrc ++{ ++ GstPushSrc base_spacemitsrc; ++ ++ /* camera handle */ ++ gboolean send_settings; ++ gint *handle; ++ /* properties */ ++ gint device_index; ++ guint num_capture_buffers; ++ gint timeout; ++ guint exposure; ++ gdouble gain; ++ gint offset; ++ gint format; ++ gint x; ++ gint y; ++ gint width; ++ gint height; ++ gint binning; ++ ++ GAsyncQueue *queue; ++ GstClockTime base_time; ++ ++ guint32 last_frame_count; ++ guint32 total_dropped_frames; ++ ++ GstCaps *caps; ++ gboolean stop_requested; ++ ++ GstAllocator *allocator; ++ gchar *filename; ++ ++ struct gstParam para; ++ gint sensorInfoId; ++ gint pipelineId; ++ gint firmwareId; ++ void* sensorHandle; ++}; ++ ++struct _GstSpacemitSrcClass ++{ ++ GstPushSrcClass base_spacemitsrc_class; ++}; ++ ++GType gst_spacemitsrc_get_type (void); ++ ++G_END_DECLS ++ ++#endif +diff --git a/ext/spacemit/spacemitsrc/gstspmsrcallocator.c b/ext/spacemit/spacemitsrc/gstspmsrcallocator.c +new file mode 100755 +index 0000000..28b4704 +--- /dev/null ++++ b/ext/spacemit/spacemitsrc/gstspmsrcallocator.c +@@ -0,0 +1,143 @@ ++/* GStreamer ++ * ++ * Copyright (C) 2016 Igalia ++ * ++ * Authors: ++ * Víctor Manuel Jáquez Leal ++ * Javier Martin ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Library General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Library General Public License for more details. ++ * ++ * You should have received a copy of the GNU Library General Public ++ * License along with this library; if not, write to the ++ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include "gstspmsrcallocator.h" ++ ++#define GST_CAT_DEFAULT spacemit_src_allocator_debug_category ++GST_DEBUG_CATEGORY_STATIC (GST_CAT_DEFAULT); ++ ++#define GST_SPACEMIT_SRC_MEMORY_TYPE "SpaceMitSrcMemory" ++ ++#define parent_class gst_spacemit_src_allocator_parent_class ++ ++G_DEFINE_TYPE_WITH_CODE (GstSpaceMitSrcAllocator, gst_spacemit_src_allocator, ++ GST_TYPE_DMABUF_ALLOCATOR, ++ GST_DEBUG_CATEGORY_INIT (GST_CAT_DEFAULT, "spacemitsrcallocator", 0, ++ "Spacemit Src Dma allocator")); ++ ++GQuark ++gst_spacemit_src_dmabuf_memory_quark (void) ++{ ++ static GQuark quark = 0; ++ ++ if (quark == 0) ++ quark = g_quark_from_static_string ("GstSpacemitSrcDmabufMemory"); ++ ++ return quark; ++} ++ ++static void ++gst_spacemit_src_allocator_free (GstAllocator * allocator, GstMemory * base_mem) ++{ ++ GST_DEBUG ("ZRong ------------------- spacemit free mem"); ++ ++} ++ ++static void ++gst_spacemit_src_allocator_finalize (GObject * obj) ++{ ++ GstSpaceMitSrcAllocator *alloc = GST_SPACEMIT_SRC_ALLOCATOR (obj); ++ GST_DEBUG_OBJECT (alloc, "ZRong ------------------- spacemit allocator finalize"); ++ ++ G_OBJECT_CLASS (parent_class)->finalize (obj); ++} ++ ++// GstMemory * ++// gst_spacemit_src_allocator_alloc (GstSpaceMitSrcAllocator * allocator, gsize size) ++// { ++// GST_DEBUG_OBJECT (allocator, "ZRong ------------------- spacemit allocator alloc"); ++ ++// GstMemory *ret_mem = NULL; ++// // GstSpaceMitMemory *mem; ++// // guint8 *data; ++ ++// // GST_DEBUG_OBJECT (allocator, "allocator alloc paras, size: %d", size); ++ ++// // if (allocator->mode == GST_SPM_MEMORY_TYPE_SYSTEM) { ++// // mem = gst_spacemit_memory_new (allocator, size, 0, NULL, 0, size); ++// // install_mem_dispose (GST_MEMORY_CAST (mem)); ++// // } else { ++// // mem = g_slice_new0 (GstSpaceMitMemory); ++// // mem->memsize = size; ++// // mem->acquired = FALSE; ++// // mem->fd = allocator->dmabuf_fd; ++ ++// // mem->foreign_mem = ++// // gst_fd_allocator_alloc (allocator->foreign_allocator, allocator->dmabuf_fd, size, GST_FD_MEMORY_FLAG_DONT_CLOSE); ++// // gst_mini_object_set_qdata (GST_MINI_OBJECT (mem->foreign_mem), ++// // GST_SPM_DMABUF_MEMORY_QUARK, mem, NULL); ++// // gst_mini_object_set_qdata (GST_MINI_OBJECT (mem->foreign_mem), ++// // GST_SPACEMIT_ALLOCATOR_QUARK, allocator, NULL); ++// // install_mem_dispose (mem->foreign_mem); ++// // } ++ ++// // ret_mem = mem->foreign_mem ? mem->foreign_mem : (GstMemory *) mem; ++ ++// // GST_DEBUG_OBJECT (allocator, "allocator success alloc mem:%p, return mem:%p", mem, ret_mem); ++ ++// return ret_mem; ++// } ++ ++GstAllocator * ++gst_spacemit_src_allocator_new (void) ++{ ++ GstSpaceMitSrcAllocator *allocator; ++ ++ allocator = g_object_new (GST_TYPE_SPACEMIT_SRC_ALLOCATOR, NULL); ++ ++ return GST_ALLOCATOR_CAST (allocator); ++} ++ ++static void ++gst_spacemit_src_allocator_class_init (GstSpaceMitSrcAllocatorClass * klass) ++{ ++ GObjectClass *gobject_class; ++ GstAllocatorClass *allocator_class; ++ ++ allocator_class = GST_ALLOCATOR_CLASS (klass); ++ gobject_class = G_OBJECT_CLASS (klass); ++ ++ // allocator_class->alloc = gst_spacemit_src_allocator_alloc; ++ // allocator_class->free = gst_spacemit_src_allocator_free; ++ ++ gobject_class->finalize = gst_spacemit_src_allocator_finalize; ++} ++ ++static void ++gst_spacemit_src_allocator_init (GstSpaceMitSrcAllocator * allocator) ++{ ++ GstAllocator *alloc = GST_ALLOCATOR_CAST (allocator); ++ ++ alloc->mem_type = GST_SPACEMIT_SRC_MEMORY_TYPE; ++ // alloc->mem_map = gst_spacemit_memory_map; ++ // alloc->mem_unmap = gst_spacemit_memory_unmap; ++ ++ // GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC); ++} ++ +diff --git a/ext/spacemit/spacemitsrc/gstspmsrcallocator.h b/ext/spacemit/spacemitsrc/gstspmsrcallocator.h +new file mode 100755 +index 0000000..592df02 +--- /dev/null ++++ b/ext/spacemit/spacemitsrc/gstspmsrcallocator.h +@@ -0,0 +1,131 @@ ++/* GStreamer ++ * ++ * Copyright (C) 2016 Igalia ++ * ++ * Authors: ++ * Víctor Manuel Jáquez Leal ++ * Javier Martin ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Library General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Library General Public License for more details. ++ * ++ * You should have received a copy of the GNU Library General Public ++ * License along with this library; if not, write to the ++ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ * ++ */ ++ ++#ifndef __GST_SPACEMIT_SRC_ALLOCATOR_H__ ++#define __GST_SPACEMIT_SRC_ALLOCATOR_H__ ++ ++#include ++#include ++#include ++ ++G_BEGIN_DECLS ++ ++/* ---------------------------------------------------------------------*/ ++/* GstSpmSystemoMemory */ ++/* ---------------------------------------------------------------------*/ ++ ++#define GST_TYPE_SPACEMIT_SRC_ALLOCATOR \ ++ (gst_spacemit_src_allocator_get_type()) ++#define GST_IS_SPACEMIT_SRC_ALLOCATOR(obj) \ ++ (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_SPACEMIT_SRC_ALLOCATOR)) ++#define GST_IS_SPACEMIT_SRC_ALLOCATOR_CLASS(klass) \ ++ (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_SPACEMIT_SRC_ALLOCATOR)) ++#define GST_SPACEMIT_SRC_ALLOCATOR_GET_CLASS(obj) \ ++ (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_SPACEMIT_SRC_ALLOCATOR, GstSpaceMitSrcAllocatorClass)) ++#define GST_SPACEMIT_SRC_ALLOCATOR(obj) \ ++ (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_SPACEMIT_SRC_ALLOCATOR, GstSpaceMitSrcAllocator)) ++#define GST_SPACEMIT_SRC_ALLOCATOR_CLASS(klass) \ ++ (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_SPACEMIT_SRC_ALLOCATOR, GstSpaceMitSrcAllocatorClass)) ++ ++typedef struct _GstSpaceMitSrcAllocator GstSpaceMitSrcAllocator; ++typedef struct _GstSpaceMitSrcAllocatorClass GstSpaceMitSrcAllocatorClass; ++typedef struct _GstSpaceMitSrcMemory GstSpaceMitSrcMemory; ++ ++// typedef enum _GstSpmMemoryType ++// { ++// GST_SPM_MEMORY_TYPE_SYSTEM, ++// GST_SPM_MEMORY_TYPE_DMABUF, ++// } GstSpmMemoryType; ++ ++struct _GstSpaceMitSrcMemory ++{ ++ GstMemory parent; ++ ++// gboolean acquired; ++// MppFrame *mppframe; ++// gpointer ptr; ++ ++// MppVdecCtx *ctx; ++// guint32 memsize; ++// gint32 mppframe_id; ++// GstMemory *foreign_mem; ++ ++// guint32 fd; ++// /* the original dispose function of foreign_mem */ ++// GstMiniObjectDisposeFunction foreign_dispose; ++}; ++ ++struct _GstSpaceMitSrcAllocator ++{ ++ GstDmaBufAllocator parent; ++// gboolean active; ++// GMutex lock; ++ ++// GHashTable *memories; ++// gint32 mppframe_id; ++// gint32 dmabuf_fd; ++// guint32 id; ++// GstVideoInfo info; ++// GstAllocator *foreign_allocator; ++// GstSpmMemoryType mode; ++// gboolean mem_back; ++}; ++ ++struct _GstSpaceMitSrcAllocatorClass { ++ GstDmaBufAllocatorClass parent_class; ++}; ++GQuark gst_spacemit_src_dmabuf_memory_quark (void); ++// GQuark gst_spacemit_allocator_quark (void); ++ ++#define GST_SPACEMIT_SRC_DMABUF_MEMORY_QUARK gst_spacemit_src_dmabuf_memory_quark () ++// #define GST_SPACEMIT_ALLOCATOR_QUARK gst_spacemit_allocator_quark () ++ ++// gboolean gst_is_spacemit_memory (GstMemory *mem); ++// GstAllocator * ++// gst_spacemit_allocator_new (void); ++// GstMemory * ++// gst_spacemit_allocator_alloc (GstSpaceMitAllocator * allocator, gsize size); ++// GstFlowReturn ++// gst_spacemit_allocator_acquire (GstAllocator * base_allocator, GstMemory ** memory); ++// gboolean ++// gst_spacemit_allocator_configure(GstAllocator * base_allocator, GstVideoInfo * info, gboolean use_dmabuf); ++// gboolean ++// gst_spacemit_allocator_set_active (GstSpaceMitAllocator * allocator, gboolean active); ++ ++// void ++// gst_spacemit_allocator_get_info (GstAllocator * base_allocator, gint32 id, gint32 fd); ++// void ++// gst_spacemit_set_mem (GstMemory * base_mem, MppFrame *mppframe, MppVdecCtx *ctx); ++// gboolean ++// gst_spacemit_allocator_wait_inactive (GstAllocator * base_allocator); ++ ++GstAllocator * ++gst_spacemit_src_allocator_new (void); ++ ++GType gst_spacemit_src_allocator_get_type (void) G_GNUC_CONST; ++ ++G_END_DECLS ++ ++#endif /* __GST_SPACEMIT_ALLOCATOR_H__ */ +diff --git a/ext/spacemit/spacemitsrc/meson.build b/ext/spacemit/spacemitsrc/meson.build +new file mode 100755 +index 0000000..cb7e251 +--- /dev/null ++++ b/ext/spacemit/spacemitsrc/meson.build +@@ -0,0 +1,19 @@ ++spacemitsrc_sources = [ ++ 'gstspacemitsrc.c', ++ 'gstspmsrcallocator.c', ++] ++ ++spacemitcam_dep = dependency('k1x-cam', version : '>= 0.0.0', required : false) ++ ++if spacemitcam_dep.found() ++ gstspacemitsrc = library('gstspacemitsrc', ++ spacemitsrc_sources, ++ c_args : gst_plugins_bad_args, ++ link_args : noseh_link_args, ++ include_directories : [configinc], ++ dependencies : [gstvideo_dep, spacemitcam_dep, gstallocators_dep, gstbase_dep], ++ install : true, ++ install_dir : plugins_install_dir, ++ ) ++ plugins += [gstspacemitsrc] ++endif +diff --git a/meson_options.txt b/meson_options.txt +index 6c8855a..654cd55 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -185,6 +185,7 @@ option('wpe', type : 'feature', value : 'auto', description : 'WPE Web browser p + option('magicleap', type : 'feature', value : 'auto', description : 'Magic Leap platform support') + option('v4l2codecs', type : 'feature', value : 'auto', description : 'Video4Linux Stateless CODECs support') + option('isac', type : 'feature', value : 'auto', description : 'iSAC plugin') ++option('spacemit', type : 'feature', value : 'auto', description : 'Spacemit encoder/decoder plugin') + + # HLS plugin options + option('hls', type : 'feature', value : 'auto', description : 'HTTP Live Streaming plugin') +-- +2.25.1 + diff --git a/package/gstreamer1/gst1-plugins-bad/0002-1.-fix-spacemitsrc-pressure-test-fail.patch b/package/gstreamer1/gst1-plugins-bad/0002-1.-fix-spacemitsrc-pressure-test-fail.patch new file mode 100644 index 00000000..93854572 --- /dev/null +++ b/package/gstreamer1/gst1-plugins-bad/0002-1.-fix-spacemitsrc-pressure-test-fail.patch @@ -0,0 +1,3767 @@ +From c39a8ce49229fed652ac82092bcb0f7bb0db4a78 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E9=BB=8E=E5=BF=97=E8=8D=A3?= +Date: Sat, 23 Mar 2024 09:36:25 +0800 +Subject: [PATCH] 1. fix spacemitsrc pressure test fail 2. support spacemitenc + to enc h264 from spacemitdec + +--- + ext/spacemit/spacemitcodec/gstspacemitdec.c | 161 +- + .../spacemitcodec/gstspacemitdec_bak.c | 1394 ----------------- + ext/spacemit/spacemitcodec/gstspacemitenc.c | 537 +++++-- + ext/spacemit/spacemitcodec/gstspacemitenc.h | 3 +- + .../spacemitcodec/gstspacemitenc_bak.c | 1022 ------------ + ext/spacemit/spacemitsrc/gstspacemitsrc.c | 62 +- + ext/spacemit/spacemitsrc/gstspacemitsrc.h | 3 + + 7 files changed, 506 insertions(+), 2676 deletions(-) + delete mode 100755 ext/spacemit/spacemitcodec/gstspacemitdec_bak.c + delete mode 100755 ext/spacemit/spacemitcodec/gstspacemitenc_bak.c + +diff --git a/ext/spacemit/spacemitcodec/gstspacemitdec.c b/ext/spacemit/spacemitcodec/gstspacemitdec.c +index d053eff..6a989e0 100755 +--- a/ext/spacemit/spacemitcodec/gstspacemitdec.c ++++ b/ext/spacemit/spacemitcodec/gstspacemitdec.c +@@ -54,32 +54,38 @@ static GstStateChangeReturn gst_spacemitdec_dec_change_state (GstElement * eleme + /* pad templates */ + static GstStaticPadTemplate gst_spacemitdec_sink_template = + GST_STATIC_PAD_TEMPLATE ("sink", +- GST_PAD_SINK, +- GST_PAD_ALWAYS, +- GST_STATIC_CAPS +- ("video/x-h264, stream-format=(string)byte-stream, alignment=(string)au, " +- "profile=(string){ constrained-baseline, baseline, main, high }," +- "width=(int) [640,MAX], " "height=(int) [480,MAX]" +- ";" +- "video/x-h265," +- "stream-format = (string) byte-stream," +- "alignment = (string)au," +- "width=(int) [640,MAX], " "height=(int) [480,MAX]" +- ";" +- "video/mpeg," +- "mpegversion = (int) { 1, 2, 4 }," +- "parsed = (boolean) true," +- "systemstream = (boolean) false" +- ";" +- "image/jpeg" ";" "video/x-vp8" ";" "video/x-vp9" +- )); ++ GST_PAD_SINK, ++ GST_PAD_ALWAYS, ++ GST_STATIC_CAPS ++ ( ++ "video/x-h264, " ++ "stream-format=(string)byte-stream, alignment=(string)au, " ++ "profile=(string){ constrained-baseline, baseline, main, high }," ++ "width=(int) [640,MAX], " "height=(int) [480,MAX]" ++ ";" ++ "video/x-h265," ++ "stream-format = (string) byte-stream," ++ "alignment = (string)au," ++ "width=(int) [640,MAX], " "height=(int) [480,MAX]" ++ ";" ++ "video/mpeg," ++ "mpegversion = (int) { 1, 2, 4 }," ++ "parsed = (boolean) true," ++ "systemstream = (boolean) false" ++ ";" ++ "image/jpeg" ++ ";" ++ "video/x-vp8" ++ ";" ++ "video/x-vp9" ++ )); + + static GstStaticPadTemplate gst_spacemitdec_src_template = +- GST_STATIC_PAD_TEMPLATE ("src", +- GST_PAD_SRC, +- GST_PAD_ALWAYS, +- GST_STATIC_CAPS (SPM_DEC_CAPS_MAKE ("{" SPM_DEC_FORMATS "}") ";") +- ); ++ GST_STATIC_PAD_TEMPLATE ("src", ++ GST_PAD_SRC, ++ GST_PAD_ALWAYS, ++ GST_STATIC_CAPS (SPM_DEC_CAPS_MAKE ("{" SPM_DEC_FORMATS "}") ";") ++ ); + + #define parent_class gst_spacemitdec_parent_class + /* class initialization */ +@@ -364,7 +370,7 @@ static gboolean gst_spacemitdec_stop(GstVideoDecoder * decoder) + if (thiz->save_dec) + fclose(thiz->fb); + +- if (!(gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) { ++ if (!(gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) { + GST_DEBUG_OBJECT (thiz, "ZRong --------------- spacemitdec finish stop"); + return TRUE; + } +@@ -406,7 +412,7 @@ gst_spacemitdec_init_decoder (GstSpacemitDec * thiz) + structure = gst_caps_get_structure (thiz->input_state->caps, 0); + thiz->eCodingType = gst_change_mpp_ecoding_type (structure); + if(thiz->eCodingType == CODING_UNKNOWN) { +- GST_ERROR_OBJECT(thiz, "no support this eCodingType"); ++ GST_ERROR_OBJECT(thiz, "error! no support eCodingType, structure: %" GST_PTR_FORMAT, structure); + return FALSE; + } + +@@ -692,6 +698,9 @@ static gint32 gst_spacemitdec_request_frame (GstSpacemitDec *thiz) + count++; + } while (thiz->req_nonblock); + ++ if (ret == MPP_CODER_EOS) ++ FRAME_SetEos (mppframe, TRUE); ++ + count = 0; + thiz->mppframe = mppframe; + +@@ -705,19 +714,21 @@ gst_spacemitdec_loop (GstVideoDecoder * decoder) + GstVideoCodecFrame * frame; + GstFlowReturn flow_status; + GstBuffer *outbuf = NULL; +- int ret; ++ int mpp_ret; + + if (G_UNLIKELY(thiz->flushing)) + goto flushing; + +- ret = gst_spacemitdec_request_frame (thiz); +- if (ret == MPP_CODER_EOS) { ++ mpp_ret = gst_spacemitdec_request_frame (thiz); ++ if (mpp_ret == MPP_CODER_EOS) { ++ VDEC_ReturnOutputFrame(thiz->ctx, FRAME_GetBaseData(thiz->mppframe)); ++ FRAME_Destory(thiz->mppframe); + goto finish_work; +- } else if (ret == MPP_POLL_FAILED) { ++ } else if (mpp_ret == MPP_POLL_FAILED) { + thiz->downstream_flow_ret = GST_FLOW_ERROR; + FRAME_Destory(thiz->mppframe); + goto done; +- } else if (ret == MPP_RESOLUTION_CHANGED) { ++ } else if (mpp_ret == MPP_RESOLUTION_CHANGED) { + if (thiz->width != thiz->ctx->stVdecPara.nWidth && + thiz->height != thiz->ctx->stVdecPara.nHeight) { + GST_DEBUG_OBJECT (thiz, "resolution change from (%u, %u) to (%u, %u), need renegotiate", +@@ -731,6 +742,8 @@ gst_spacemitdec_loop (GstVideoDecoder * decoder) + FRAME_Destory(thiz->mppframe); + goto no_frame; + } ++ } else if (mpp_ret != MPP_OK) { ++ GST_WARNING_OBJECT (thiz, "other return(%d) form mpp, when get dec frame", mpp_ret); + } + + if (thiz->wait_reschange) { +@@ -739,44 +752,43 @@ gst_spacemitdec_loop (GstVideoDecoder * decoder) + goto no_frame; + } + +- GST_DEBUG_OBJECT (thiz, "@@@ ZRong 555, %d", ret); ++ GST_DEBUG_OBJECT (thiz, "start get oldest frame. mpp_ret: %d", mpp_ret); + + frame = gst_video_decoder_get_oldest_frame (decoder); + if (frame == NULL) { +- GST_DEBUG_OBJECT (thiz, "@@@ ZRong 333, null"); ++ GST_DEBUG_OBJECT (thiz, "get null frame (%d)", mpp_ret); + flow_status = gst_spacemitdec_fill_output_buffer (decoder, &outbuf); + if (flow_status != GST_FLOW_OK) + goto fill_buffer_err; + +- GST_DEBUG_OBJECT (thiz, "zrong ----- push buf %p of size %" G_GSIZE_FORMAT ", " ++ GST_DEBUG_OBJECT (thiz, "null frame, push buf %p of size %" G_GSIZE_FORMAT ", " + "PTS %" GST_TIME_FORMAT ", dur %" GST_TIME_FORMAT, outbuf, +- gst_buffer_get_size (outbuf), +- GST_TIME_ARGS (GST_BUFFER_PTS (outbuf)), ++ gst_buffer_get_size (outbuf), GST_TIME_ARGS (GST_BUFFER_PTS (outbuf)), + GST_TIME_ARGS (GST_BUFFER_DURATION (outbuf))); +- flow_status = gst_pad_push (GST_VIDEO_DECODER_SRC_PAD (decoder), outbuf); + ++ flow_status = gst_pad_push (GST_VIDEO_DECODER_SRC_PAD (decoder), outbuf); + } else { + GST_DEBUG_OBJECT (thiz, "get oldest frame %p, snd:%u, input buffer pts: %" GST_TIME_FORMAT ", dts %" GST_TIME_FORMAT ", mppframe pts %" GST_TIME_FORMAT, + frame, frame->system_frame_number, GST_TIME_ARGS (GST_BUFFER_PTS (frame->input_buffer)), + GST_TIME_ARGS (GST_BUFFER_DTS (frame->input_buffer)), GST_TIME_ARGS (FRAME_GetPts(thiz->mppframe))); + + //frame->pts = FRAME_GetPts(thiz->mppframe); +- frame->pts = GST_CLOCK_TIME_NONE; +- frame->dts = FRAME_GetPts(thiz->mppframe); ++ // frame->pts = GST_CLOCK_TIME_NONE; ++ // frame->dts = FRAME_GetPts(thiz->mppframe); + flow_status = gst_spacemitdec_fill_output_buffer (decoder, &frame->output_buffer); + if (flow_status != GST_FLOW_OK) + goto fill_buffer_err; + +- const gchar *user_clk_choice; ++ // const gchar *user_clk_choice; + +- user_clk_choice = g_getenv ("GST_CLK_CHOICE"); +- if (user_clk_choice && g_strstr_len (user_clk_choice, 1, "Y")) { +- frame->pts = GST_CLOCK_TIME_NONE; +- frame->dts = GST_CLOCK_TIME_NONE; +- } ++ // user_clk_choice = g_getenv ("GST_CLK_CHOICE"); ++ // if (user_clk_choice && g_strstr_len (user_clk_choice, 1, "Y")) { ++ // frame->pts = GST_CLOCK_TIME_NONE; ++ // frame->dts = GST_CLOCK_TIME_NONE; ++ // } + +- GST_BUFFER_PTS (frame->output_buffer) = frame->pts; +- GST_BUFFER_DTS (frame->output_buffer) = frame->dts; ++ // GST_BUFFER_PTS (frame->output_buffer) = frame->pts; ++ // GST_BUFFER_DTS (frame->output_buffer) = frame->dts; + + GST_DEBUG_OBJECT (thiz, "finish frame %p, snd:%u, output buffer pts: %" GST_TIME_FORMAT ", dts %" GST_TIME_FORMAT ", frame pts: %" GST_TIME_FORMAT ", dts %" GST_TIME_FORMAT, + frame, frame->system_frame_number, GST_TIME_ARGS (GST_BUFFER_PTS (frame->output_buffer)), +@@ -785,16 +797,19 @@ gst_spacemitdec_loop (GstVideoDecoder * decoder) + flow_status = gst_video_decoder_finish_frame(decoder, frame); + } + +- if (flow_status != GST_FLOW_OK) +- goto sending_state; ++ if (flow_status == GST_FLOW_EOS) { ++ goto finish_work; ++ } else if (flow_status == GST_FLOW_FLUSHING) { ++ goto flushing; ++ } else if (flow_status != GST_FLOW_OK) { ++ goto error_flow; ++ } + + thiz->downstream_flow_ret = flow_status; + + done: + if (thiz->downstream_flow_ret != GST_FLOW_OK) { +- GST_INFO_OBJECT (thiz, +- "pause task in dec loop (%d)!", thiz->downstream_flow_ret); +- ++ GST_INFO_OBJECT (thiz, "pause task in dec loop (%d)!", thiz->downstream_flow_ret); + gst_pad_pause_task (GST_VIDEO_DECODER_SRC_PAD (thiz)); + } + return; +@@ -822,8 +837,7 @@ no_frame: + + fill_buffer_err: + { +- GST_ERROR_OBJECT (thiz, +- "fill buffer err in dec loop, flow status: %d!", flow_status); ++ GST_ERROR_OBJECT (thiz, "fill buffer err, flow status: %d!", flow_status); + thiz->downstream_flow_ret = flow_status; + gst_video_codec_frame_unref (frame); + goto done; +@@ -831,38 +845,17 @@ fill_buffer_err: + + finish_work: + { +- GST_DEBUG_OBJECT (thiz, +- "get eos, finished work and paused task!"); ++ GST_DEBUG_OBJECT (thiz, "get eos(%d, %d), finish work and pause task!", mpp_ret, flow_status); + thiz->downstream_flow_ret = GST_FLOW_EOS; +- VDEC_ReturnOutputFrame(thiz->ctx, FRAME_GetBaseData(thiz->mppframe)); +- FRAME_Destory(thiz->mppframe); +- + goto done; +- + } + +-sending_state: ++error_flow: + { + thiz->downstream_flow_ret = flow_status; +- if (flow_status == GST_FLOW_EOS) { +- GST_DEBUG_OBJECT (thiz, +- "get eos, finished work!"); +- } else if (flow_status == GST_FLOW_ERROR) { +- GST_ERROR_OBJECT (thiz, +- "send error and paused task!"); +- } else if (flow_status == GST_FLOW_FLUSHING) { +- thiz->flushing = TRUE; +- GST_DEBUG_OBJECT (thiz, +- "get GST_FLOW_FLUSHING from finish frame!"); +- +- goto flushing; +- } else { +- GST_ERROR_OBJECT (thiz, +- "get an unsupport flow status return after finish frame!"); +- } ++ GST_ERROR_OBJECT (thiz, "unsupport flow status return: %d", flow_status); + goto done; + } +- + } + static gboolean + gst_spacemitdec_pool_set_active(GstVideoDecoder * decoder) +@@ -941,7 +934,7 @@ gst_spacemitdec_set_src_caps (GstSpacemitDec * thiz) + + format = mpp_format_change_to_gst(thiz->eOutputPixelFormat); + if (format == GST_VIDEO_FORMAT_UNKNOWN) { +- GST_ERROR_OBJECT (thiz, "failed to find a valid video format"); ++ GST_ERROR_OBJECT(thiz, "error! no support PixelFormat, format: %s", gst_video_format_to_string (format)); + return FALSE; + } + +@@ -1052,7 +1045,7 @@ gst_spacemitdec_handle_frame (GstVideoDecoder * decoder, + } else { + GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); + ret = gst_spacemitdec_vdec_decode (thiz, NULL, GST_CLOCK_TIME_NONE); +- GST_DEBUG_OBJECT (thiz, "@@@ ZRong ------------------------- in handle else, %d!", ret); ++ GST_DEBUG_OBJECT (thiz, "in dec handle else, %d!", ret); + thiz->cur_frame_number = -1; + ret = GST_FLOW_EOS; + GST_VIDEO_DECODER_STREAM_LOCK (decoder); +@@ -1069,7 +1062,7 @@ gst_spacemitdec_handle_frame (GstVideoDecoder * decoder, + if (!gst_spacemitdec_set_src_caps (thiz)) + goto not_negotiated_err; + +- GST_DEBUG_OBJECT (thiz, "@@@ ZRong ------------------------- bf negotiate, %d, %d, %d", ++ GST_DEBUG_OBJECT (thiz, "start negotiate, %d, %d, %d", + actual_width, actual_height, thiz->eOutputPixelFormat); + + if (!gst_video_decoder_negotiate (decoder)) +@@ -1080,12 +1073,12 @@ gst_spacemitdec_handle_frame (GstVideoDecoder * decoder, + } + + if (G_UNLIKELY (!gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) { +- GST_DEBUG_OBJECT (thiz, "@@@ ZRong --------------------- start dec thread"); ++ GST_DEBUG_OBJECT (thiz, "start dec thread"); + gst_pad_start_task (decoder->srcpad, + (GstTaskFunction) gst_spacemitdec_loop, decoder, NULL); + } + +- GST_DEBUG_OBJECT (thiz, "@@@ ZRong ------------------------ finish handle, %d", thiz->downstream_flow_ret); ++ GST_DEBUG_OBJECT (thiz, "finish dec handle, %d", thiz->downstream_flow_ret); + ret = thiz->downstream_flow_ret; + + out: +@@ -1285,7 +1278,7 @@ static gboolean gst_spacemitdec_decide_allocation(GstVideoDecoder * decoder, Gst + gst_clear_object (&thiz->pool); + GST_INFO_OBJECT (thiz, "create new spacemit bufferpool"); + thiz->pool = +- gst_spacemitdec_create_buffer_pool(thiz, &output_state->info, (4 > min) ? 4 : min); ++ gst_spacemitdec_create_buffer_pool(thiz, &output_state->info, (8 > min) ? 8 : min); + gst_video_codec_state_unref (output_state); + if (!thiz->pool) { + GST_ERROR_OBJECT (thiz, "failed to create new pool"); +diff --git a/ext/spacemit/spacemitcodec/gstspacemitdec_bak.c b/ext/spacemit/spacemitcodec/gstspacemitdec_bak.c +deleted file mode 100755 +index f81ee21..0000000 +--- a/ext/spacemit/spacemitcodec/gstspacemitdec_bak.c ++++ /dev/null +@@ -1,1394 +0,0 @@ +-/* GStreamer +- * Copyright (C) 2022 FIXME +- * +- * This library is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Library General Public +- * License as published by the Free Software Foundation; either +- * version 2 of the License, or (at your option) any later version. +- * +- * This library is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * Library General Public License for more details. +- * +- * You should have received a copy of the GNU Library General Public +- * License along with this library; if not, write to the +- * Free Software Foundation, Inc., 51 Franklin Street, Suite 500, +- * Boston, MA 02110-1335, USA. +- */ +-/** +- * SECTION:element-gstvideoprocess +- * +- * The videoprocess element does FIXME stuff. +- * +- * +- * Example launch line +- * |[ +- * gst-launch-1.0 -v fakesrc ! videoprocess ! FIXME ! fakesink +- * ]| +- * FIXME Describe what the pipeline does. +- * +- */ +- +-#ifdef HAVE_CONFIG_H +-#include "config.h" +-#endif +-#include +-#include +- +-#include +-#include +-#include +-#include "gstspacemitdec.h" +- +-//#define MULTI_THREAD_DOWNSTREAM_POOL_TEST +-//#define SINGLE_THREAD_DOWNSTREAM_POOL_TEST +- +-GST_DEBUG_CATEGORY_STATIC (gst_spacemitdec_debug_category); +-#define GST_CAT_DEFAULT gst_spacemitdec_debug_category +- +-/* prototypes */ +-static void gst_spacemitdec_set_property (GObject * object, +- guint property_id, const GValue * value, GParamSpec * pspec); +-static void gst_spacemitdec_get_property (GObject * object, +- guint property_id, GValue * value, GParamSpec * pspec); +-static void gst_spacemitdec_finalize (GObject * object); +-static gboolean gst_spacemitdec_close(GstVideoDecoder *decoder); +-static gboolean gst_spacemitdec_start(GstVideoDecoder *decoder); +-static gboolean gst_spacemitdec_stop(GstVideoDecoder *decoder); +-static gboolean gst_spacemitdec_set_format(GstVideoDecoder *decoder, GstVideoCodecState *state); +-static gboolean gst_spacemitdec_reset(GstVideoDecoder *decoder, gboolean hard); +-static gboolean gst_spacemitdec_flush(GstVideoDecoder * decoder); +-static GstFlowReturn gst_spacemitdec_finish(GstVideoDecoder *decoder); +-static GstFlowReturn gst_spacemitdec_handle_frame(GstVideoDecoder *decoder, GstVideoCodecFrame *frame); +-static gboolean gst_spacemitdec_decide_allocation(GstVideoDecoder *decoder, GstQuery *query); +-static GstStateChangeReturn gst_spacemitdec_dec_change_state (GstElement * element, GstStateChange transition); +- +-/* pad templates */ +-static GstStaticPadTemplate gst_spacemitdec_sink_template = +-GST_STATIC_PAD_TEMPLATE ("sink", +- GST_PAD_SINK, +- GST_PAD_ALWAYS, +- GST_STATIC_CAPS +- ("video/x-h264, stream-format=(string)byte-stream, alignment=(string)au, " +- "profile=(string){ constrained-baseline, baseline, main, high }," +- "width=(int) [640,MAX], " "height=(int) [480,MAX]" +- ";" +- "video/x-h265," +- "stream-format = (string) byte-stream," +- "alignment = (string)au," +- "width=(int) [640,MAX], " "height=(int) [480,MAX]" +- )); +- +-static GstStaticPadTemplate gst_spacemitdec_src_template = +- GST_STATIC_PAD_TEMPLATE ("src", +- GST_PAD_SRC, +- GST_PAD_ALWAYS, +- GST_STATIC_CAPS (SPM_DEC_CAPS_MAKE ("{" SPM_DEC_FORMATS "}") ";") +- ); +- +-#define parent_class gst_spacemitdec_parent_class +-/* class initialization */ +-G_DEFINE_TYPE(GstSpacemitDec, gst_spacemitdec, GST_TYPE_VIDEO_DECODER); +-enum +-{ +- PROP_0, +- PROP_CODING_WIDTH, +- PROP_CODING_HIGHT, +- PROP_CODING_TYPE, +- PROP_CODE_TYPE, +- PROP_CODE_YUV_FORMAT, +- N_PROPERTIES +-}; +- +-static gboolean +-_gst_caps_has_feature (const GstCaps * caps, const gchar * feature) +-{ +- guint i; +- +- for (i = 0; i < gst_caps_get_size (caps); i++) { +- GstCapsFeatures *const features = gst_caps_get_features (caps, i); +- /* Skip ANY features, we need an exact match for correct evaluation */ +- if (gst_caps_features_is_any (features)) +- continue; +- if (gst_caps_features_contains (features, feature)) +- return TRUE; +- } +- +- return FALSE; +-} +- +-static gboolean +-srcpad_can_dmabuf (GstSpacemitDec * thiz) +-{ +- gboolean ret = FALSE; +- GstCaps *caps, *out_caps; +- GstPad *srcpad; +- +- srcpad = GST_VIDEO_DECODER_SRC_PAD (thiz); +- caps = gst_pad_get_pad_template_caps (srcpad); +- +- out_caps = gst_pad_peer_query_caps (srcpad, caps); +- if (!out_caps) +- goto done; +- +- if (gst_caps_is_any (out_caps) || gst_caps_is_empty (out_caps) +- || out_caps == caps) +- goto done; +- +- if (_gst_caps_has_feature (out_caps, GST_CAPS_FEATURE_MEMORY_DMABUF)) +- ret = TRUE; +- +-done: +- if (caps) +- gst_caps_unref (caps); +- if (out_caps) +- gst_caps_unref (out_caps); +- return ret; +-} +- +-static gboolean gst_spacemitdec_close(GstVideoDecoder * decoder) +-{ +- GstSpacemitDec *thiz = GST_SPACEMITDEC(decoder); +- +- if (thiz->input_state) +- { +- gst_video_codec_state_unref (thiz->input_state); +- thiz->input_state = NULL; +- } +- GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec start close"); +- if (thiz->pool) { +- gst_buffer_pool_set_active (thiz->pool, FALSE); +-// gst_spacemit_allocator_wait_inactive (GST_SPACEMIT_BUFFER_POOL_CAST(spacemitdec->pool)->allocator); +- gst_object_unref (spacemitdec->pool); +- thiz->pool = NULL; +- } +- GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec start close222"); +- +-// FRAME_Destory(spacemitdec->mppframe); +- PACKET_Destory (thiz->mpppacket); +- VDEC_DestoryChannel (thiz->ctx); +- +- GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec finish close"); +- +- return TRUE; +-} +- +- +-static void gst_spacemitdec_class_init(GstSpacemitDecClass * klass) +-{ +- GstVideoDecoderClass *video_decoder_class = GST_VIDEO_DECODER_CLASS (klass); +- GstElementClass *element_class = GST_ELEMENT_CLASS (klass); +- GObjectClass *gobject_class = G_OBJECT_CLASS (klass); +- +- gst_element_class_add_static_pad_template(GST_ELEMENT_CLASS (klass), &gst_spacemitdec_sink_template); +- gst_element_class_add_static_pad_template(GST_ELEMENT_CLASS (klass), &gst_spacemitdec_src_template); +- +- gst_element_class_set_static_metadata(GST_ELEMENT_CLASS (klass), +- "Spacemit video decoder", "Decoder/Video", "Spacemit video decoder", +- "ZRong, zhirong.li@spacemit.com"); +- +- gobject_class->set_property = gst_spacemitdec_set_property; +- gobject_class->get_property = gst_spacemitdec_get_property; +- gobject_class->finalize = gst_spacemitdec_finalize; +- +- video_decoder_class->close = GST_DEBUG_FUNCPTR(gst_spacemitdec_close); +- video_decoder_class->start = GST_DEBUG_FUNCPTR(gst_spacemitdec_start); +- video_decoder_class->stop = GST_DEBUG_FUNCPTR(gst_spacemitdec_stop); +- video_decoder_class->set_format = GST_DEBUG_FUNCPTR(gst_spacemitdec_set_format); +- video_decoder_class->reset = GST_DEBUG_FUNCPTR(gst_spacemitdec_reset); +- video_decoder_class->flush = GST_DEBUG_FUNCPTR (gst_spacemitdec_flush); +- video_decoder_class->finish = GST_DEBUG_FUNCPTR(gst_spacemitdec_finish); +- video_decoder_class->handle_frame = GST_DEBUG_FUNCPTR(gst_spacemitdec_handle_frame); +- video_decoder_class->decide_allocation = GST_DEBUG_FUNCPTR(gst_spacemitdec_decide_allocation); +- +-// element_class->change_state = GST_DEBUG_FUNCPTR (gst_spacemitdec_dec_change_state); +- +- /* define properties */ +- g_object_class_install_property (gobject_class, PROP_CODING_TYPE, +- g_param_spec_uint ("coding-type", "coding type", +- "Format to decode", +- CODING_H264, CODING_FWHT, CODING_H264, +- (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); +- g_object_class_install_property (gobject_class, PROP_CODE_TYPE, +- g_param_spec_uint ("code-type", "code type", +- "Codec selection to work", +- CODEC_OPENH264, 1023, CODEC_SFOMX, +- (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); +- g_object_class_install_property (gobject_class, PROP_CODING_WIDTH, +- g_param_spec_uint ("coding-width", "coding width", +- "image width to decode", +- 0, 3840, 1280, +- (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); +- g_object_class_install_property (gobject_class, PROP_CODING_HIGHT, +- g_param_spec_uint ("coding-hight", "coding hight", +- "image hight to decode", +- 0, 2160, 720, +- (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); +- g_object_class_install_property (gobject_class, PROP_CODE_YUV_FORMAT, +- g_param_spec_uint ("code-yuv-format", "code yuv format", +- "Decode the generated yuv format", +- PIXEL_FORMAT_DEFAULT, PIXEL_FORMAT_UNKNOWN-1, PIXEL_FORMAT_I420, +- (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); +- +-} +- +-static void gst_spacemitdec_init (GstSpacemitDec * thiz) +-{ +- GstVideoDecoder *decoder = GST_VIDEO_DECODER (thiz); +- +- thiz->pool = NULL; +- thiz->ctx = NULL; +- thiz->width = 1280; +- thiz->height = 720; +- thiz->eCodecType = CODEC_SFOMX; +- thiz->eCodingType = CODING_H264; +- thiz->eOutputPixelFormat = PIXEL_FORMAT_I420; +- +- gst_video_decoder_set_packetized (decoder, TRUE); +- gst_video_decoder_set_needs_format (decoder, TRUE); +-} +-void +-gst_spacemitdec_set_property (GObject * object, guint property_id, +- const GValue * value, GParamSpec * pspec) +-{ +- GstSpacemitDec *thiz = GST_SPACEMITDEC (object); +- +- GST_DEBUG_OBJECT (thiz, "ZRong ------------------- set_property: %d", property_id); +- +- switch (property_id) { +- case PROP_CODING_WIDTH: +- thiz->width = g_value_get_uint (value); +- break; +- case PROP_CODING_HIGHT: +- thiz->height = g_value_get_uint (value); +- break; +- case PROP_CODING_TYPE: +- thiz->eCodingType = g_value_get_uint (value); +- break; +- case PROP_CODE_TYPE: +- thiz->eCodecType = g_value_get_uint (value); +- break; +- case PROP_CODE_YUV_FORMAT: +- thiz->eOutputPixelFormat = g_value_get_uint (value); +- break; +- default: +- G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); +- break; +- } +-} +- +-void +-gst_spacemitdec_get_property (GObject * object, guint property_id, +- GValue * value, GParamSpec * pspec) +-{ +- GstSpacemitDec *thiz = GST_SPACEMITDEC (object); +- +- GST_DEBUG_OBJECT (thiz, "ZRong ------------------- get_property: %d", property_id); +- +- switch (property_id) { +- case PROP_CODING_WIDTH: +- g_value_set_uint (value, thiz->width); +- break; +- case PROP_CODING_HIGHT: +- g_value_set_uint (value, thiz->height); +- break; +- case PROP_CODING_TYPE: +- g_value_set_uint (value, thiz->eCodingType); +- break; +- case PROP_CODE_TYPE: +- g_value_set_uint (value, thiz->eCodecType); +- break; +- case PROP_CODE_YUV_FORMAT: +- g_value_set_uint (value, thiz->eOutputPixelFormat); +- break; +- default: +- G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); +- break; +- } +-} +- +-void +-gst_spacemitdec_finalize (GObject * object) +-{ +- GstSpacemitDec *thiz = GST_SPACEMITDEC (object); +- +- GST_DEBUG_OBJECT (thiz, "finalize"); +- +- /* clean up object here */ +- +- if (thiz->input_state) { +- gst_video_codec_state_unref (thiz->input_state); +- } +- thiz->input_state = NULL; +- +- G_OBJECT_CLASS (gst_spacemitdec_parent_class)->finalize (object); +-} +- +-static FILE *fbbb1; +-static gboolean gst_spacemitdec_start (GstVideoDecoder * decoder) +-{ +- GstSpacemitDec *thiz = GST_SPACEMITDEC (decoder); +- int ret = 0; +- +- GST_ERROR_OBJECT (thiz, "ZRong ----------------- in start !"); +- +- if (thiz->ctx) +- { +- VDEC_DestoryChannel (thiz->ctx); +- thiz->ctx = NULL; +- } +- thiz->ctx = VDEC_CreateChannel (); +- thiz->ctx->eCodecType = thiz->eCodecType; +- +- thiz->mpppacket = PACKET_Create (); +- if (!thiz->mpppacket) +- goto alloc_err; +- +- thiz->mppframe = NULL; +- thiz->at_eos = FALSE; +- thiz->downstream_flow_ret = GST_FLOW_OK; +- thiz->cur_frame_number = -1; +- thiz->use_dmabuf = FALSE; +- thiz->initialized = FALSE; +- +- GST_ERROR_OBJECT (thiz, "ZRong ------------------------- finish start !"); +- fbbb1 = fopen("/tmp/out.yuv", "ab+"); +- +- return TRUE; +- +-alloc_err: +- GST_ERROR_OBJECT (spacemitdec, "can not alloc for mpp structure, please check !"); +- return FALSE; +- +-init_err: +- GST_ERROR_OBJECT (spacemitdec, "Mpp vdec init error, please check !"); +- return FALSE; +- +-} +- +-static gboolean gst_spacemitdec_stop(GstVideoDecoder *decoder) +-{ +- GstSpacemitDec *spacemitdec = GST_SPACEMITDEC(decoder); +- GstSpaceMitAllocator * allocator; +- fclose(fbbb1); +- +-#if 0 +- if (spacemitdec->input_state) +- { +- gst_video_codec_state_unref (spacemitdec->input_state); +- spacemitdec->input_state = NULL; +- } +- GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec start stop"); +- +-// FRAME_Destory(spacemitdec->mppframe); +- PACKET_Destory (spacemitdec->mpppacket); +- VDEC_DestoryChannel (spacemitdec->ctx); +- +- if (spacemitdec->pool) { +-// gst_spacemit_allocator_wait_inactive (GST_SPACEMIT_BUFFER_POOL_CAST(spacemitdec->pool)->allocator); +- gst_object_unref (spacemitdec->pool); +- spacemitdec->pool = NULL; +- } +-#endif +- if (!(gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) { +- GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec finish stop"); +- +- return TRUE; +- } +- +- gst_pad_stop_task (decoder->srcpad); +- GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec finish stop222"); +- +- return TRUE; +-} +-static MppCodingType +-gst_spacemit_get_mpp_video_type (GstStructure * s) +-{ +- if (gst_structure_has_name (s, "video/x-h264")) +- return CODING_H264; +- +- if (gst_structure_has_name (s, "video/x-h265")) +- return CODING_H265; +- +- return CODING_UNKNOWN; +-} +-static gboolean +-gst_spacemitdec_init_decoder (GstSpacemitDec * thiz) +-{ +- GstStructure *structure; +- gboolean ret = TRUE; +- +- if (thiz->initialized) +- return TRUE; +- +- structure = gst_caps_get_structure (thiz->input_state->caps, 0); +- thiz->eCodingType = gst_spacemit_get_mpp_video_type (structure); +- if(thiz->eCodingType == CODING_UNKNOWN) { +- GST_ERROR_OBJECT(thiz, "mpp no support this eCodingType"); +- return FALSE; +- } +- +- if (thiz->ctx->eCodecType == CODEC_SFOMX || +- thiz->ctx->eCodecType == CODEC_OPENH264 || +- thiz->ctx->eCodecType == CODEC_FAKEDEC) { +- thiz->width = thiz->input_state->info.width; +- thiz->height = thiz->input_state->info.height; +- thiz->ctx->stVdecPara.eCodingType = thiz->eCodingType; +- thiz->ctx->stVdecPara.nWidth = thiz->width; +- thiz->ctx->stVdecPara.nHeight = thiz->height; +- thiz->ctx->stVdecPara.eOutputPixelFormat = thiz->eOutputPixelFormat; +- thiz->ctx->eCodecType = thiz->eCodecType; +- thiz->ctx->stVdecPara.bInputBlockModeEnable = MPP_TRUE; +- thiz->ctx->stVdecPara.bOutputBlockModeEnable = MPP_TRUE; +- GST_DEBUG_OBJECT (thiz, "spacemitdec set eCodecType is %d", thiz->ctx->eCodecType); +- +- ret = VDEC_Init (thiz->ctx); +- if (ret) { +- GST_ERROR_OBJECT (thiz, "Mpp vdec init error, please check !"); +- return FALSE; +- } +- } else { +- GST_ERROR_OBJECT (thiz, "No supprot this type(%d) to handle frame", thiz->ctx->eCodecType); +- } +- +- thiz->initialized = TRUE; +- return TRUE; +-} +- +-/* function: +- * Tell subclasses to input stream data format +- * called time: +- * When the upstream element sends GST_EVENT_CAPS, +- * call in gst_video_decoder_setcaps. +- * need to modify................................. +- */ +-static gboolean gst_spacemitdec_set_format(GstVideoDecoder *decoder, GstVideoCodecState *state) +-{ +- GstSpacemitDec *spacemitdec = GST_SPACEMITDEC(decoder); +- GstStructure *structure; +- gboolean ret = TRUE; +- +- GST_DEBUG_OBJECT(spacemitdec, "input caps: %" GST_PTR_FORMAT, state->caps); +- +- if(spacemitdec->input_state) +- { +- gst_video_codec_state_unref(spacemitdec->input_state); +- spacemitdec->input_state = NULL; +- } +- +- spacemitdec->input_state = gst_video_codec_state_ref (state); +- if (!gst_spacemitdec_init_decoder(spacemitdec)) +- return FALSE; +- +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong ------------------------- finish set_format, %d, %d, %d, %d, %d, %d, %d, %d!", +- spacemitdec->input_state->info.width, spacemitdec->input_state->info.height, spacemitdec->input_state->info.size, +- state->info.width, state->info.height, state->info.size, +- PACKET_GetBaseData(spacemitdec->mpppacket)->nWidth, PACKET_GetBaseData(spacemitdec->mpppacket)->nHeight); +- GstVideoFormat fmt; +- fmt = GST_VIDEO_INFO_FORMAT (&state->info); +- GST_ERROR_OBJECT (spacemitdec, "ZRong ----------------------- set format finish, %u, %s", fmt, gst_video_format_to_string (fmt)); +- +- return TRUE; +-} +- +-static gboolean gst_spacemitdec_reset(GstVideoDecoder *decoder, gboolean hard) +-{ +- GstSpacemitDec *spacemitdec = GST_SPACEMITDEC(decoder); +- +- GST_ERROR_OBJECT (spacemitdec, "ZRong ------------------------- finish reset!"); +- +- return TRUE; +-} +-static gboolean +-gst_spacemitdec_flush (GstVideoDecoder * decoder) +-{ +- GstSpacemitDec *spacemitdec = GST_SPACEMITDEC(decoder); +- +- GST_DEBUG_OBJECT (spacemitdec, "ZRong -------------------- flushing decoder start, (%d)", spacemitdec->downstream_flow_ret); +- +- spacemitdec->flushing = TRUE; +- +- if (spacemitdec->downstream_flow_ret == GST_FLOW_EOS) +- gst_spacemit_allocator_wait_inactive (GST_SPACEMIT_BUFFER_POOL_CAST(spacemitdec->pool)->allocator); +- +- VDEC_Flush(spacemitdec->ctx); +- +- GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); +- +- /* Wait for task thread to pause */ +- GstTask *task = decoder->srcpad->task; +- if (task) { +- //GST_OBJECT_LOCK (task); +- while (GST_TASK_STATE (task) == GST_TASK_STARTED) { +- GST_DEBUG_OBJECT(spacemitdec, "finish FLUSH test4"); +- g_usleep(400 * 1000); +- //GST_TASK_WAIT (task); +- } +- GST_DEBUG_OBJECT(spacemitdec, "finish FLUSH test5"); +- //GST_OBJECT_UNLOCK (task); +- gst_pad_stop_task (decoder->srcpad); +- } +- GST_VIDEO_DECODER_STREAM_LOCK (decoder); +- +- spacemitdec->flushing = FALSE; +- spacemitdec->downstream_flow_ret = GST_FLOW_OK; +- +- GST_DEBUG_OBJECT (spacemitdec, "ZRong -------------------- flushing decoder stop"); +- +- return TRUE; +-} +- +-static int gst_mpp_format_change(MppPixelFormat eOutputPixelFormat) +-{ +- GstVideoFormat format; +- +- switch(eOutputPixelFormat){ +- case PIXEL_FORMAT_I420: +- format = GST_VIDEO_FORMAT_I420; +- break; +- case PIXEL_FORMAT_NV21: +- format = GST_VIDEO_FORMAT_NV21; +- break; +- case PIXEL_FORMAT_NV12: +- format = GST_VIDEO_FORMAT_NV12; +- break; +- default: +- format = GST_VIDEO_FORMAT_UNKNOWN; +- break; +- } +- return format; +-} +- +-static GstFlowReturn +-gst_spacemitdec_fill_output_buffer (GstVideoDecoder * decoder, +- GstBuffer **output_buffer) +-{ +- GstSpacemitDec *spacemitdec = GST_SPACEMITDEC (decoder); +- GstVideoInfo *vinfo = NULL; +- GstVideoFormat format; +- const guint8 *data; +- guint8 *dst; +- guint h; +- gint dst_width[GST_VIDEO_MAX_PLANES] = { 0, }; +- gint dst_height[GST_VIDEO_MAX_PLANES] = { 0, }; +- guint p; +- GstVideoFrame video_frame; +- GstVideoCodecState *state; +- GstFlowReturn flow_status = GST_FLOW_OK; +- GstBuffer *outbuf; +- GstBufferPoolAcquireParams params = { 0, }; +- GstMemory *mem; +- GstSpaceMitMemory *spm_mem; +- gint32 id; +- gint32 fd; +- +- id = FRAME_GetID(spacemitdec->mppframe); +- if (spacemitdec->use_dmabuf) { +- fd = FRAME_GetFD(spacemitdec->mppframe); +- gst_spacemit_allocator_get_info (GST_SPACEMIT_BUFFER_POOL_CAST(spacemitdec->pool)->allocator, id, fd); +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong get info (%d %d)", id, fd); +- +- } else { +- gst_spacemit_allocator_get_info (GST_SPACEMIT_BUFFER_POOL_CAST(spacemitdec->pool)->allocator, id, -1); +- } +- +- flow_status = gst_buffer_pool_acquire_buffer (spacemitdec->pool, &outbuf, ¶ms); +- if (flow_status != GST_FLOW_OK) { +- goto alloc_err; +- } +- +-#if !defined(MULTI_THREAD_DOWNSTREAM_POOL_TEST) && !defined(SINGLE_THREAD_DOWNSTREAM_POOL_TEST) +- mem = gst_buffer_peek_memory (outbuf, 0); +- gst_spacemit_set_mem (mem, spacemitdec->mppframe, spacemitdec->ctx); +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong 444"); +- *output_buffer = outbuf; +-#else +- +- if (gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED) { +- flow_status = gst_buffer_pool_acquire_buffer (spacemitdec->pool, &outbuf, ¶ms); +- if (flow_status != GST_FLOW_OK) +- goto alloc_err; +- *output_buffer = outbuf; //mult thread, get outbuf from acquire +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong AAAA "); +- } else { +- outbuf = *output_buffer; //single thread, get outbuf from outside +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong BBBB "); +- } +- +- state = gst_video_decoder_get_output_state (decoder); +- if (state == NULL) +- goto negotiated_err; +- +- if (!gst_video_frame_map (&video_frame, &state->info, outbuf, +- GST_MAP_WRITE)) +- goto map_err; +- +- format = gst_mpp_format_change(spacemitdec->eOutputPixelFormat); +- +- vinfo = &state->info; +- dst_height[0] = GST_VIDEO_INFO_FIELD_HEIGHT (vinfo); +- +- switch (format) { +- case GST_VIDEO_FORMAT_I420: +- dst_width[0] = GST_VIDEO_INFO_WIDTH (vinfo); +- dst_width[1] = GST_VIDEO_INFO_WIDTH (vinfo) / 2; +- dst_height[1] = GST_VIDEO_INFO_FIELD_HEIGHT (vinfo) / 2; +- dst_width[2] = GST_VIDEO_INFO_WIDTH (vinfo) / 2; +- dst_height[2] = GST_VIDEO_INFO_FIELD_HEIGHT (vinfo) / 2; +- GST_ERROR_OBJECT (spacemitdec, "ZRong ------------------ in I420 dec_fill_buffer,(%d, %d, %d) (%d, %d, %d)", +- dst_width[0], dst_width[1], dst_width[2], +- GST_VIDEO_INFO_PLANE_STRIDE (vinfo, 0), GST_VIDEO_INFO_PLANE_STRIDE (vinfo, 1), GST_VIDEO_INFO_PLANE_STRIDE (vinfo, 2)); +- GST_ERROR_OBJECT (spacemitdec, "ZRong ------------------ in I420 dec_fill_buffer,(%d, %d, %d) %d", +- dst_height[0], dst_height[1], dst_height[2], GST_VIDEO_INFO_N_PLANES (vinfo)); +- +- break; +- case GST_VIDEO_FORMAT_NV12: +- case GST_VIDEO_FORMAT_NV21: +- dst_width[0] = GST_VIDEO_INFO_WIDTH (vinfo); +- dst_width[1] = GST_VIDEO_INFO_WIDTH (vinfo); +- dst_height[1] = GST_VIDEO_INFO_FIELD_HEIGHT (vinfo) / 2; +- GST_ERROR_OBJECT (spacemitdec, "ZRong ------------------ in NV12/NV21 dec_fill_buffer,(%d, %d) ", +- dst_width[0], dst_width[1]); +- GST_ERROR_OBJECT (spacemitdec, "ZRong ------------------ in NV12/NV21 dec_fill_buffer,(%d, %d,)", +- dst_height[0], dst_height[1]); +- +- break; +- default: +- g_assert_not_reached (); +- break; +- } +- +- for (p = 0; p < GST_VIDEO_INFO_N_PLANES (vinfo); p++) { +- data = (U8*)FRAME_GetDataPointer(spacemitdec->mppframe, p); +- dst = GST_VIDEO_FRAME_PLANE_DATA (&video_frame, p); +- +- for (h = 0; h < dst_height[p]; h++) { +-// fwrite(data, 1, dst_width[p], fbbb1); +- memcpy (dst, data, dst_width[p]); +- dst += GST_VIDEO_INFO_PLANE_STRIDE (vinfo, p); +- data += dst_width[p]; +- } +- } +- +- gst_video_codec_state_unref (state); +- gst_video_frame_unmap (&video_frame); +-#endif +- +-done: +- spacemitdec->mppframe = NULL; +- return flow_status; +- +-alloc_err: +-{ +- GST_ERROR_OBJECT (spacemitdec, +- "an output buffer could not be allocated"); +- goto done; +-} +-negotiated_err: +-{ +- GST_ERROR_OBJECT (spacemitdec, +- "Not yet negotiate with downstream elements!"); +- flow_status = GST_FLOW_NOT_NEGOTIATED; +- goto done; +-} +-map_err: +-{ +- GST_ERROR_OBJECT (spacemitdec, "Cannot map output buffer!"); +- gst_video_codec_state_unref (state); +- flow_status = GST_FLOW_ERROR; +- goto done; +-} +- +-} +- +-static void +-gst_st_mpp_dec_loop (GstVideoDecoder * decoder) +-{ +- GstSpacemitDec *spacemitdec = GST_SPACEMITDEC (decoder); +- GstVideoCodecFrame * frame; +- GstFlowReturn flow_status; +- GstVideoFrame video_frame; +- guint8 *p; +- guint row_stride, component_width, component_height, row; +- U8 *tmp_pdata[3]; +- int pnum, i, size[3]; +- int ret; +- MppFrame *mppframe = NULL; +- +- if (G_UNLIKELY(spacemitdec->flushing)) { +- goto flushing; +- } +- +- ret = VDEC_RequestOutputFrame_2 (spacemitdec->ctx, (MppData **)&mppframe); +- if (ret == MPP_CODER_EOS) { +- goto finish_work; +- } else if (ret == MPP_CODER_NO_DATA || mppframe == NULL) { +- goto no_mppframe; +- } +- +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong 555, %d", ret); +- +- spacemitdec->mppframe = mppframe; +- frame = gst_video_decoder_get_oldest_frame (decoder); +- +- if (frame == NULL) { +- GstBuffer *outbuf = NULL; +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong 333, null"); +- flow_status = gst_spacemitdec_fill_output_buffer (decoder, &outbuf); +- if (flow_status != GST_FLOW_OK) { +- goto fill_buffer_err; +- } +- GST_DEBUG_OBJECT (spacemitdec, "zrong ---------------- push buf of size %" G_GSIZE_FORMAT ", " +- "PTS %" GST_TIME_FORMAT ", dur %" GST_TIME_FORMAT, +- gst_buffer_get_size (outbuf), +- GST_TIME_ARGS (GST_BUFFER_PTS (outbuf)), +- GST_TIME_ARGS (GST_BUFFER_DURATION (outbuf))); +- flow_status = gst_pad_push (GST_VIDEO_DECODER_SRC_PAD (decoder), outbuf); +- +- } else { +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong 333, %d", frame->ref_count); +- flow_status = gst_spacemitdec_fill_output_buffer (decoder, &frame->output_buffer); +- if (flow_status != GST_FLOW_OK) { +- goto fill_buffer_err; +- } +- struct timeval tv1 = {0}; +- struct timeval tv2 = {0}; +- +- gettimeofday(&tv1, NULL); +- +- flow_status = gst_video_decoder_finish_frame(decoder, frame); +- gettimeofday(&tv2, NULL); +- +- GST_ERROR_OBJECT (spacemitdec, "ZRong -------------- spacemit handle %ld ", +- tv2.tv_sec * 1000000 + tv2.tv_usec - (tv1.tv_sec * 1000000 + tv1.tv_usec)); +- } +- +- if (flow_status != GST_FLOW_OK) +- goto sending_state; +- +- spacemitdec->downstream_flow_ret = flow_status; +-#ifdef MULTI_THREAD_DOWNSTREAM_POOL_TEST +- VDEC_ReturnOutputFrame(spacemitdec->ctx, FRAME_GetBaseData(spacemitdec->mppframe)); +-#endif +- +-done: +- if (spacemitdec->downstream_flow_ret != GST_FLOW_OK) { +- GST_INFO_OBJECT (spacemitdec, +- "pause task in dec loop (%d)!", spacemitdec->downstream_flow_ret); +- +- gst_pad_pause_task (GST_VIDEO_DECODER_SRC_PAD (spacemitdec)); +- } +- return; +- +-flushing: +-{ +- spacemitdec->downstream_flow_ret = GST_FLOW_FLUSHING; +- +- while (1) { +- frame = gst_video_decoder_get_oldest_frame (decoder); +- if(frame == NULL) +- break; +- gst_video_decoder_release_frame (decoder, frame); +- } +- GST_INFO_OBJECT (spacemitdec, "flushing spacemit decoder"); +- +- goto done; +-} +- +-no_mppframe: +-{ +- //GST_LOG_OBJECT (spacemitdec, +- // "No out mppframes to request!"); +- spacemitdec->downstream_flow_ret = GST_FLOW_OK; +- goto done; +-} +- +-fill_buffer_err: +-{ +- GST_ERROR_OBJECT (spacemitdec, +- "fill buffer err in dec loop, flow status: %d!", flow_status); +- spacemitdec->downstream_flow_ret = flow_status; +- gst_video_codec_frame_unref (frame); +- goto done; +-} +- +-finish_work: +-{ +- GST_DEBUG_OBJECT (spacemitdec, +- "Get eos, Finished work and paused task!"); +- spacemitdec->downstream_flow_ret = GST_FLOW_EOS; +- +- goto done; +- +-} +-fream_null: +-{ +- GST_ERROR_OBJECT (spacemitdec, "get oldest frame fail!"); +- spacemitdec->downstream_flow_ret = GST_FLOW_ERROR; +- goto done; +-} +- +-sending_state: +-{ +- spacemitdec->downstream_flow_ret = flow_status; +- if (flow_status == GST_FLOW_EOS) { +- GST_DEBUG_OBJECT (spacemitdec, +- "Get eos, Finished work!"); +- } else if (flow_status == GST_FLOW_ERROR) { +- GST_ERROR_OBJECT (spacemitdec, +- "send error and paused task!"); +- } else if (flow_status == GST_FLOW_FLUSHING) { +- spacemitdec->flushing = TRUE; +- GST_DEBUG_OBJECT (spacemitdec, +- "Get GST_FLOW_FLUSHING from finish frame!"); +- +- goto flushing; +- } else { +- GST_ERROR_OBJECT (spacemitdec, +- "Get an unsupport flow status return after finish frame!"); +- } +- goto done; +-} +- +-} +-static gboolean +-gst_spacemitdec_pool_set_active(GstVideoDecoder * decoder) +-{ +- GstSpacemitDec *spacemitdec = GST_SPACEMITDEC (decoder); +- GST_ERROR_OBJECT (spacemitdec, "@@@ ZRong ------------------------- in spacemitdec pool_set_active!"); +- GstVideoCodecState *state; +- GstBufferPool *pool; +- guint size, min, max, i; +- GstStructure *config; +- GstCaps *caps = NULL; +- GstVideoInfo vinfo; +-/* +- i = 0; +- while (i < gst_query_get_n_allocation_pools (query)) { +- gst_query_parse_nth_allocation_pool (query, i, &pool, NULL, NULL, NULL); +- i++; +- if (pool) { +- GST_DEBUG_OBJECT (spacemitdec, +- "upstream provides pool: %" GST_PTR_FORMAT, pool); +- gst_object_unref (pool); +- } +- } +- +- gst_query_parse_allocation (query, &caps, NULL); +-*/ +- +- pool = gst_video_decoder_get_buffer_pool (GST_VIDEO_DECODER (spacemitdec)); +- +- if (pool) { +- config = gst_buffer_pool_get_config (pool); +- if (!gst_buffer_pool_config_get_params (config, &caps, NULL, &min, &max)) { +- GST_ERROR_OBJECT (spacemitdec, "Can't get buffer pool params"); +- gst_structure_free (config); +- return FALSE; +- } +- gst_structure_free (config); +- } +- +- if (caps) { +- spacemitdec->pool = gst_spacemit_buffer_pool_new (); +- config = gst_buffer_pool_get_config (spacemitdec->pool); +- gst_buffer_pool_config_add_option (config, +- GST_BUFFER_POOL_OPTION_VIDEO_META); +- +- gst_video_info_init (&vinfo); +- gst_video_info_from_caps (&vinfo, caps); +- min = MAX (min, 8); +- +- gst_buffer_pool_config_set_params (config, caps, +- vinfo.size, min, max); +- if (!gst_buffer_pool_set_config (spacemitdec->pool, config)) { +- GST_ERROR_OBJECT (spacemitdec, "Failed to set config on spacemit pool"); +- gst_object_unref (spacemitdec->pool); +- spacemitdec->pool = NULL; +- return FALSE; +- } +- if (!gst_buffer_pool_set_active (spacemitdec->pool, TRUE)) { +- GST_ERROR_OBJECT (spacemitdec, "Failed to activate internal pool"); +- gst_object_unref (spacemitdec->pool); +- spacemitdec->pool = NULL; +- return FALSE; +- } +- GST_DEBUG_OBJECT (spacemitdec, +- "use spacemit bufferpool: %" GST_PTR_FORMAT, spacemitdec->pool); +- +- pool = gst_video_decoder_get_buffer_pool (decoder); +- if(!gst_buffer_pool_set_active (pool, FALSE)) +- GST_ERROR_OBJECT (spacemitdec, "Failed to set acitve false on pool %" GST_PTR_FORMAT, pool); +- gst_object_unref (pool); +- gst_caps_unref (caps); +- } else { +- GST_ERROR_OBJECT (spacemitdec, "Can't get pool caps params"); +- return FALSE; +- } +- +- return TRUE; +-} +- +-static gboolean +-gst_spacemitdec_set_src_caps (GstSpacemitDec * thiz) +-{ +- GstVideoCodecState *output_state; +- GstVideoInfo *vinfo; +- GstVideoFormat format; +- guint width, height; +- +- width = thiz->ctx->stVdecPara.nWidth; +- height = thiz->ctx->stVdecPara.nHeight; +- +- format = gst_mpp_format_change(thiz->eOutputPixelFormat); +- if (format == GST_VIDEO_FORMAT_UNKNOWN) { +- GST_ERROR_OBJECT (thiz, "Failed to find a valid video format"); +- return FALSE; +- } +- +- output_state = +- gst_video_decoder_set_output_state (GST_VIDEO_DECODER (thiz), +- format, width, height, thiz->input_state); +- vinfo = &output_state->info; +- output_state->caps = gst_video_info_to_caps (vinfo); +- +- if (srcpad_can_dmabuf (thiz)) { +- gst_caps_set_features (output_state->caps, 0, +- gst_caps_features_new (GST_CAPS_FEATURE_MEMORY_DMABUF, NULL)); +- GST_DEBUG_OBJECT (thiz, "set DMABUF feature to spacemitdec src cap %" GST_PTR_FORMAT, output_state->caps); +- } +- +- thiz->width = width; +- thiz->height = height; +- +- return TRUE; +-} +- +-/* function: +- * Receive the data stream of upstream for decoder +- */ +-static GstFlowReturn +-gst_spacemitdec_handle_frame (GstVideoDecoder * decoder, +- GstVideoCodecFrame * frame) +-{ +- GstSpacemitDec *spacemitdec = GST_SPACEMITDEC (decoder); +- GstMapInfo map_info; +- GstClockTime pts; +- GstFlowReturn flow_status; +- GstVideoFrame video_frame; +- guint actual_width, actual_height; +- guint8 *p; +- guint component_width, component_height; +- GstFlowReturn ret; +- GstVideoFormat format; +- GstVideoInfo *vinfo; +- +- if (G_UNLIKELY (spacemitdec->flushing)) +- goto flushing; +- +-#ifndef SINGLE_THREAD_DOWNSTREAM_POOL_TEST +- if (G_UNLIKELY (!gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) { +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong ------------------------- starting decoding thread"); +- +- gst_pad_start_task (decoder->srcpad, +- (GstTaskFunction) gst_st_mpp_dec_loop, decoder, NULL); +- } +-#endif +- if (G_UNLIKELY(spacemitdec->downstream_flow_ret != GST_FLOW_OK)) +- goto downstream_err; +- +- if (G_LIKELY(frame)) { +- spacemitdec->cur_frame_number = frame->system_frame_number; +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong ------------------------- in handle frame!"); +- +- GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); +- if (!gst_buffer_map (frame->input_buffer, &map_info, GST_MAP_READ)) +- goto map_err; +- +- PACKET_SetDataPointer(spacemitdec->mpppacket, map_info.data); +- PACKET_SetLength(spacemitdec->mpppacket, map_info.size); +- MppData * tmp = PACKET_GetBaseData(spacemitdec->mpppacket); +- tmp->bEos = 0; +- +- ret = VDEC_Decode(spacemitdec->ctx, PACKET_GetBaseData(spacemitdec->mpppacket)); +- +- gst_buffer_unmap (frame->input_buffer, &map_info); +- +- GST_VIDEO_DECODER_STREAM_LOCK (decoder); +- +- if (ret) { +- GST_ERROR_OBJECT (spacemitdec, "VDEC_Decode return error! (%d)", ret); +- ret = gst_video_decoder_drop_frame (decoder, frame); +- goto out; +- } +- gst_video_codec_frame_unref (frame); +- } else { +- GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); +- int count=3; +- while(count>0) +- { +- MppData * tmp = PACKET_GetBaseData(spacemitdec->mpppacket); +- tmp->bEos = 1; +- PACKET_SetLength(spacemitdec->mpppacket, 0); +- ret = VDEC_Decode(spacemitdec->ctx, PACKET_GetBaseData(spacemitdec->mpppacket)); +- +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong ------------------------- in handle else, %d!", ret); +- count--; +- } +- spacemitdec->cur_frame_number = -1; +- ret = GST_FLOW_EOS; +- GST_VIDEO_DECODER_STREAM_LOCK (decoder); +- +- goto out; +- } +- actual_width = spacemitdec->ctx->stVdecPara.nWidth; +- actual_height = spacemitdec->ctx->stVdecPara.nHeight; +- +- if (!gst_pad_has_current_caps (GST_VIDEO_DECODER_SRC_PAD (spacemitdec)) +- || actual_width != spacemitdec->width +- || actual_height != spacemitdec->height) { +- +- if (!gst_spacemitdec_set_src_caps (spacemitdec)) +- goto not_negotiated_err; +- +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong ------------------------- bf decoder_negotiate, %d, %d, (%d %d)!", actual_width, actual_height, format, spacemitdec->eOutputPixelFormat); +- +- if (!gst_video_decoder_negotiate (decoder)) +- goto not_negotiated_err; +- +-#if !defined(MULTI_THREAD_DOWNSTREAM_POOL_TEST) && !defined(SINGLE_THREAD_DOWNSTREAM_POOL_TEST) +- if (!gst_buffer_pool_set_active (spacemitdec->pool, TRUE)) +- goto acitve_fail; +- +- // if (!gst_spacemitdec_pool_set_active(decoder)) +-// goto acitve_fail; +-#endif +- } +- +-#ifdef SINGLE_THREAD_DOWNSTREAM_POOL_TEST +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong 555, %d", ret); +- MppFrame *mppframe = NULL; +- +- frame = gst_video_decoder_get_oldest_frame (decoder); +- if (frame == NULL) { +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong errrrrrrrrrrrrrrrrrrrrr"); +- ret = GST_FLOW_OK; +- goto out; +- } else { +- flow_status = gst_video_decoder_allocate_output_frame (decoder, frame); +- if (flow_status != GST_FLOW_OK) { +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong errrrrrrrrrrrrrrrrrrrrr"); +- ret = GST_FLOW_OK; +- goto out; +- } +- +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong 333, %d, %d", frame->ref_count, flow_status); +- +- int rett; +- rett = VDEC_RequestOutputFrame_2 (spacemitdec->ctx, (MppData **)&mppframe); +- if (rett == MPP_CODER_NO_DATA || mppframe == NULL) { +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong errrrrrrrrrrrrrrrrrrrrr"); +- ret = GST_FLOW_OK; +- goto out; +- } +- +- spacemitdec->mppframe = mppframe; +- flow_status = gst_spacemitdec_fill_output_buffer (decoder, &frame->output_buffer); +- if (flow_status != GST_FLOW_OK) { +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong errrrrrrrrrrrrrrrrrrrrr"); +- ret = GST_FLOW_OK; +- goto out; +- } +- VDEC_ReturnOutputFrame(spacemitdec->ctx, FRAME_GetBaseData(spacemitdec->mppframe)); +- +- flow_status = gst_video_decoder_finish_frame(decoder, frame); +- spacemitdec->downstream_flow_ret = flow_status; +- } +-#endif +- GST_DEBUG_OBJECT (spacemitdec, "@@@ ZRong ------------------------- af current_caps, %d", spacemitdec->downstream_flow_ret); +- ret = spacemitdec->downstream_flow_ret; +- +-out: +- if (spacemitdec->downstream_flow_ret == GST_FLOW_FLUSHING) +- ret = GST_FLOW_FLUSHING; +- return ret; +- +-out_clked: +- GST_VIDEO_DECODER_STREAM_LOCK (decoder); +- return ret; +- +-not_negotiated_err: +-{ +- GST_ERROR_OBJECT (spacemitdec, +- "Failed to negotiate with downstream elements"); +- ret = GST_FLOW_NOT_NEGOTIATED; +- goto out; +-} +- +-acitve_fail: +-{ +- GST_ERROR_OBJECT (spacemitdec, "acitve spacemit pool fail!"); +- gst_object_unref (spacemitdec->pool); +- ret = GST_FLOW_ERROR; +- goto out; +-} +- +-map_err: +-{ +- GST_ERROR_OBJECT (spacemitdec, "Cannot map input buffer!"); +- gst_video_codec_frame_unref (frame); +- ret = GST_FLOW_ERROR; +- goto out_clked; +-} +- +-downstream_err: +-{ +- GST_ERROR_OBJECT (spacemitdec, "Downstream returned %s", +- gst_flow_get_name (spacemitdec->downstream_flow_ret)); +- ret = spacemitdec->downstream_flow_ret; +- goto out; +-} +-flushing: +-{ +- GST_WARNING_OBJECT (spacemitdec, "flushing"); +- ret = GST_FLOW_FLUSHING; +- gst_video_decoder_release_frame (decoder, frame); +- goto out; +-} +-} +-/* function: +- * Receive event GST_EVENT_SEGMENT_DONE,GST_EVENT_EOS or reverse playback, +- * ask the subclass to process the remaining data in the decoder. After this +- * operation, the subclass can refuse to decode the new data +- * called time: +- * in gst_video_decoder_drain_out、gst_video_decoder_flush_parse +- * need to modify................................. +- */ +-static GstFlowReturn gst_spacemitdec_finish(GstVideoDecoder *decoder) +-{ +- GstSpacemitDec *spacemitdec = GST_SPACEMITDEC(decoder); +- +- GST_DEBUG_OBJECT(spacemitdec, "finish"); +- +- /* Decoder not negotiated yet */ +- if (spacemitdec->width == 0) +- return GST_FLOW_OK; +- GST_DEBUG_OBJECT(spacemitdec, "finish test1"); +- +- /* Drain all pending frames */ +- while ((gst_spacemitdec_handle_frame (decoder, NULL)) == GST_FLOW_OK); +- +- GST_DEBUG_OBJECT(spacemitdec, "finish test3"); +- spacemitdec->at_eos = TRUE; +- +- GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); +- +- /* Wait for task thread to pause */ +- GstTask *task = decoder->srcpad->task; +- if (task) { +- //GST_OBJECT_LOCK (task); +- while (GST_TASK_STATE (task) == GST_TASK_STARTED) { +- GST_DEBUG_OBJECT(spacemitdec, "finish test4"); +- g_usleep(400 * 1000); +- //GST_TASK_WAIT (task); +- } +- GST_DEBUG_OBJECT(spacemitdec, "finish test5"); +- //GST_OBJECT_UNLOCK (task); +- } +- gst_pad_stop_task (decoder->srcpad); +- GST_VIDEO_DECODER_STREAM_LOCK (decoder); +- +- GST_DEBUG_OBJECT(spacemitdec, "finish test2"); +- +- return GST_FLOW_OK; +-} +- +-static GstBufferPool * +-gst_spacemitdec_create_buffer_pool (GstSpacemitDec * thiz, GstVideoInfo * info, +- guint num_buffers) +-{ +- GstBufferPool *pool = NULL; +- GstAllocator *allocator = NULL; +- GstStructure *config; +- GstCaps *caps = NULL; +- GstVideoAlignment align; +- +- pool = gst_spacemit_buffer_pool_new (); +- if (!pool) +- goto error_no_pool; +- +- allocator = gst_spacemit_allocator_new (); +- if (!allocator) +- goto error_no_allocator; +- +- gst_spacemit_allocator_configure(allocator, info, thiz->use_dmabuf); +- +- caps = gst_video_info_to_caps (info); +- +- config = gst_buffer_pool_get_config (GST_BUFFER_POOL_CAST (pool)); +- gst_buffer_pool_config_set_params (config, caps, +- GST_VIDEO_INFO_SIZE (info), num_buffers, num_buffers); +- gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); +- gst_buffer_pool_config_add_option (config, +- GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT); +- gst_caps_unref (caps); +- +- if (thiz->use_dmabuf) +- gst_buffer_pool_config_add_option (config, +- GST_BUFFER_POOL_OPTION_SPM_USE_DMABUF); +- +- gst_buffer_pool_config_set_video_alignment (config, &align); +- gst_buffer_pool_config_set_allocator (config, allocator, NULL); +- gst_object_unref (allocator); +- +- if (!gst_buffer_pool_set_config (pool, config)) +- goto error_pool_config; +- +- return pool; +- +-error_no_pool: +- { +- GST_ERROR_OBJECT (thiz, "failed to create spacemitdec bufferpool"); +- return NULL; +- } +-error_no_allocator: +- { +- GST_ERROR_OBJECT (thiz, "failed to create allocator"); +- gst_object_unref (pool); +- return NULL; +- } +-error_pool_config: +- { +- GST_ERROR_OBJECT (thiz, "failed to set config"); +- gst_object_unref (pool); +- gst_object_unref (allocator); +- return NULL; +- } +-} +- +-/* function: +- * Set the parameters of the allocator that allocates the output buffer. +- * The incoming query parameter contains information about the allocator +- * of downstream components. The default implementation of the parent class +- * needs to be called in the subclass implementation +- */ +-static gboolean gst_spacemitdec_decide_allocation(GstVideoDecoder *decoder, GstQuery *query) +-{ +- GstSpacemitDec *spacemitdec = GST_SPACEMITDEC(decoder); +- GST_ERROR_OBJECT (spacemitdec, "@@@ ZRong ------------------------- in spacemitdec decide_allocation!"); +- GstBufferPool *pool; +- guint size, min, max, i; +- GstStructure *pool_config; +- GstCaps *caps = NULL; +- GstVideoInfo vinfo; +- +- if (!GST_VIDEO_DECODER_CLASS(gst_spacemitdec_parent_class)->decide_allocation(decoder, query)) +- return FALSE; +- +- /* Get the buffer pool config decided on by the base class. The base +- class ensures that there will always be at least a 0th pool in +- the query. */ +- gst_query_parse_nth_allocation_pool(query, 0, &pool, &size, &min, &max); +- pool_config = gst_buffer_pool_get_config(pool); +- gst_buffer_pool_config_get_params (pool_config, &caps, &size, +- &min, &max); +- +- GST_DEBUG_OBJECT (decoder, "get pool caps: %" GST_PTR_FORMAT, caps); +- if (_gst_caps_has_feature (caps, GST_CAPS_FEATURE_MEMORY_DMABUF)) { +- GST_INFO_OBJECT (decoder, "This spacemit decoder uses DMABuf memory"); +- spacemitdec->use_dmabuf = TRUE; +- } +- +- /* Decoder always use its own pool. */ +- if (!spacemitdec->pool) { +- GstVideoCodecState *output_state = +- gst_video_decoder_get_output_state (GST_VIDEO_DECODER (spacemitdec)); +- gst_clear_object (&spacemitdec->pool); +- GST_INFO_OBJECT (decoder, "create new spacemitdec bufferpool"); +- spacemitdec->pool = +- gst_spacemitdec_create_buffer_pool(spacemitdec, &output_state->info, (4 > min) ? 4 : min); +- gst_video_codec_state_unref (output_state); +- if (!spacemitdec->pool) { +- GST_ERROR_OBJECT (decoder, "failed to create new pool"); +- goto failed_to_create_pool; +- } +- } +- GST_DEBUG_OBJECT (spacemitdec, +- "upstream provides the pool is: %" GST_PTR_FORMAT, pool); +- +- /* If downstream supports video meta and video alignment, +- * we can replace with our own spacemit bufferpool and use it +- */ +-#if 0 +- if (gst_buffer_pool_has_option (pool, +- GST_BUFFER_POOL_OPTION_VIDEO_META)) { +- GstStructure *config; +- GstAllocator *allocator; +- +- /* Remove downstream's pool */ +- gst_structure_free (pool_config); +- gst_object_unref (pool); +- +- pool = gst_object_ref (spacemitdec->pool); +- /* Set the allocator of new spacemitdec bufferpool */ +- config = gst_buffer_pool_get_config (GST_BUFFER_POOL_CAST (pool)); +- +- if (gst_buffer_pool_config_get_allocator (config, &allocator, NULL)) +- gst_query_set_nth_allocation_param (query, 0, allocator, NULL); +- gst_structure_free (config); +- +- gst_query_set_nth_allocation_pool (query, 0, pool, size, min, +- max); +- } else { +- goto no_support; +- } +-#endif +- if (pool) +- gst_object_unref (pool); +- +- return TRUE; +- +-failed_to_create_pool: +- GST_ERROR_OBJECT (decoder, "failed to set buffer pool config"); +- if (pool) +- gst_object_unref (pool); +- return FALSE; +- +-no_support: +- GST_ERROR_OBJECT (spacemitdec, +- "error! upstream provides the strange pool: %" GST_PTR_FORMAT, pool); +- if (pool) +- gst_object_unref (pool); +- return FALSE; +-} +- +-static GstStateChangeReturn +-gst_spacemitdec_dec_change_state (GstElement * element, GstStateChange transition) +-{ +- GstVideoDecoder *decoder = GST_VIDEO_DECODER (element); +- GST_DEBUG("ZRong ------------------ in spacemitdec change state, %x", transition); +- +- if (transition == GST_STATE_CHANGE_PAUSED_TO_READY) { +- GST_VIDEO_DECODER_STREAM_LOCK (decoder); +- if (!(gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) +- return TRUE; +- +- GST_DEBUG_OBJECT (decoder, "stopping decoding thread"); +- gst_pad_stop_task (decoder->srcpad); +- GST_VIDEO_DECODER_STREAM_UNLOCK (decoder); +- } +- +- return GST_ELEMENT_CLASS (parent_class)->change_state (element, transition); +-} +- +-gboolean +-gst_spacemitdec_register (GstPlugin * plugin, guint rank) +-{ +- GST_DEBUG_CATEGORY_INIT (gst_spacemitdec_debug_category, "spacemitdec", 0, +- "debug category for spacemitdec element"); +- +- return gst_element_register (plugin, "spacemitdec", rank, +- GST_TYPE_SPACEMITDEC); +-} +- +diff --git a/ext/spacemit/spacemitcodec/gstspacemitenc.c b/ext/spacemit/spacemitcodec/gstspacemitenc.c +index c1f2085..a0c0370 100755 +--- a/ext/spacemit/spacemitcodec/gstspacemitenc.c ++++ b/ext/spacemit/spacemitcodec/gstspacemitenc.c +@@ -4,13 +4,14 @@ + #include + + #include "gstspacemitenc.h" +- ++#include + #include + #include + #include + #include + #include + #define SPM_PENDING_MAX 5 /* Max number of MPP pending frame */ ++#define PACKET_SIZE (2 * 1024 * 1024) + + GST_DEBUG_CATEGORY_STATIC (gst_spacemitenc_debug_category); + #define GST_CAT_DEFAULT gst_spacemitenc_debug_category +@@ -48,6 +49,7 @@ static gboolean gst_spacemitenc_propose_allocation (GstVideoEncoder * encoder, + + #define GST_SPM_ENC_EVENT_MUTEX(encoder) (&GST_SPACEMITENC (encoder)->event_mutex) + #define GST_SPM_ENC_EVENT_COND(encoder) (&GST_SPACEMITENC (encoder)->event_cond) ++#define MAX_POLL_TIME 0xffffffff + + #define GST_SPM_ENC_BROADCAST(encoder) \ + g_mutex_lock (GST_SPM_ENC_EVENT_MUTEX (encoder)); \ +@@ -85,26 +87,54 @@ enum + N_PROPERTIES + }; + ++#ifndef GST_CAPS_FEATURE_MEMORY_DMABUF ++#define GST_CAPS_FEATURE_MEMORY_DMABUF "memory:DMABuf" ++#endif ++ ++#define SPM_ENC_IN_FORMATS "I420, NV21, NV12" ++#define SPM_ENC_FORMATS SPM_ENC_IN_FORMATS ++#define SPM_ENC_CAPS_MAKE(fmts) \ ++ GST_VIDEO_CAPS_MAKE (fmts) ";" \ ++ GST_VIDEO_CAPS_MAKE_WITH_FEATURES (GST_CAPS_FEATURE_MEMORY_DMABUF, fmts) ++ + /* pad templates */ + + static GstStaticPadTemplate gst_spacemitenc_sink_template = + GST_STATIC_PAD_TEMPLATE ("sink", +- GST_PAD_SINK, +- GST_PAD_ALWAYS, +- GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE ("{I420, NV12, NV21}")) +- ); ++ GST_PAD_SINK, ++ GST_PAD_ALWAYS, ++ GST_STATIC_CAPS ++ ( ++ SPM_ENC_CAPS_MAKE ("{" SPM_ENC_FORMATS "}") ";" ++ )); + + static GstStaticPadTemplate gst_spacemitenc_src_template = + GST_STATIC_PAD_TEMPLATE ("src", +- GST_PAD_SRC, +- GST_PAD_ALWAYS, +- GST_STATIC_CAPS +- ( ++ GST_PAD_SRC, ++ GST_PAD_ALWAYS, ++ GST_STATIC_CAPS ++ ( + "video/x-h264," + "stream-format = (string) byte-stream," + "alignment = (string)au," + "width=(int) [1,MAX], " "height=(int) [1,MAX]" +- )); ++ ";" ++ "video/x-h265," ++ "stream-format = (string) byte-stream," ++ "alignment = (string)au," ++ "width=(int) [640,MAX], " "height=(int) [480,MAX]" ++ ";" ++ "video/mpeg," ++ "mpegversion = (int) { 1, 2, 4 }," ++ "parsed = (boolean) true," ++ "systemstream = (boolean) false" ++ ";" ++ "image/jpeg" ++ ";" ++ "video/x-vp8" ++ ";" ++ "video/x-vp9" ++ )); + + /* class initialization */ + G_DEFINE_TYPE(GstSpacemitEnc, gst_spacemitenc, GST_TYPE_VIDEO_ENCODER); +@@ -170,7 +200,7 @@ gst_spacemitenc_init (GstSpacemitEnc * thiz) + { + thiz->width = 1280; + thiz->height = 720; +- thiz->eCodecType = CODEC_SFOMX; ++ thiz->eCodecType = CODEC_V4L2_LINLONV5V7; + thiz->eCodingType = CODING_H264; + thiz->PixelFormat = PIXEL_FORMAT_I420; + thiz->gop_size = DEFAULT_GOP_SIZE; +@@ -385,8 +415,9 @@ FILE *fbbb; + static gboolean + gst_spacemitenc_start (GstVideoEncoder * encoder) + { +- int ret = 0; ++ gint ret = 0; + GstSpacemitEnc *thiz = GST_SPACEMITENC (encoder); ++ + GST_DEBUG_OBJECT (thiz, "start"); + + if(thiz->ctx) +@@ -395,26 +426,30 @@ gst_spacemitenc_start (GstVideoEncoder * encoder) + thiz->ctx = NULL; + } + thiz->ctx = VENC_CreateChannel(); +- thiz->ctx->eCodecType = thiz->eCodecType; ++ if (!thiz->ctx) ++ goto alloc_err; ++ // thiz->ctx->eCodecType = thiz->eCodecType; + + thiz->mpppacket = PACKET_Create (); + if (!thiz->mpppacket) + goto alloc_err; ++ PACKET_Alloc(thiz->mpppacket, PACKET_SIZE); + + thiz->mppframe = FRAME_Create (); + if (!thiz->mppframe) + goto alloc_err; + +- GST_ERROR_OBJECT (thiz, "ZRong ------------------------- finish start!!!"); + g_mutex_init (&thiz->event_mutex); + g_cond_init (&thiz->event_cond); ++ thiz->bufs_hash = g_hash_table_new (g_direct_hash, g_direct_equal); + thiz->pending_frames = 0; + fbbb = fopen("/tmp/out.yuv", "ab+"); + ++ GST_DEBUG_OBJECT (thiz, "finish start"); + return TRUE; + + alloc_err: +- GST_ERROR_OBJECT (thiz, "can not alloc for mpp structure, please check !"); ++ GST_ERROR_OBJECT (thiz, "alloc mpp structure error, please check !"); + return FALSE; + } + +@@ -423,8 +458,8 @@ gst_spacemitenc_stop (GstVideoEncoder * encoder) + { + GstSpacemitEnc *thiz = GST_SPACEMITENC (encoder); + +- GST_ERROR_OBJECT (thiz, "xxxxxxxxxxxxxx stop start"); +- fclose(fbbb); ++ GST_DEBUG_OBJECT (thiz, "stop"); ++ fclose(fbbb); + + if(thiz->ctx) + { +@@ -442,7 +477,7 @@ gst_spacemitenc_stop (GstVideoEncoder * encoder) + if (!(gst_pad_get_task_state ((encoder)->srcpad) == GST_TASK_STARTED)) + return TRUE; + +- GST_DEBUG_OBJECT (thiz, "spacemit_enc_stop called"); ++ GST_DEBUG_OBJECT (thiz, "finish stop"); + + gst_pad_stop_task (encoder->srcpad); + g_cond_clear (&thiz->event_cond); +@@ -450,74 +485,190 @@ gst_spacemitenc_stop (GstVideoEncoder * encoder) + + return TRUE; + } ++ ++//建立input buffer与mppframe id间的hash关系 ++void setup_gstbuffer_and_mppframe_hash (GstSpacemitEnc * thiz, gint mppframe_id, GstBuffer *input_buffer) ++{ ++ gst_buffer_ref (input_buffer); ++ g_hash_table_insert (thiz->bufs_hash, GINT_TO_POINTER(mppframe_id), input_buffer); ++ ++ GST_DEBUG_OBJECT (thiz, "hash had:%u bufs now, after add. buf %p ref:%d", ++ g_hash_table_size(thiz->bufs_hash), input_buffer, GST_MINI_OBJECT_REFCOUNT_VALUE(input_buffer)); ++} ++ ++//从al_enc_return_input_frame接口或者当前可以release的buffer ++gint try_to_release_upstream_buffer (GstSpacemitEnc * thiz) ++{ ++ gint id; ++ GstBuffer *buffer; ++ gint ret = TRUE; ++ ++ do { ++ id = VENC_ReturnInputFrame (thiz->ctx, NULL); ++ if (id >= 0) { ++ buffer = (GstBuffer *) g_hash_table_lookup (thiz->bufs_hash, GINT_TO_POINTER(id)); ++ if (buffer) { ++ GST_DEBUG_OBJECT (thiz, "release buf %p, buf ref:%d. hash had:%u bufs now, before remove. id:%d", ++ buffer, GST_MINI_OBJECT_REFCOUNT_VALUE(buffer), g_hash_table_size(thiz->bufs_hash), id); ++ g_hash_table_remove(thiz->bufs_hash, GINT_TO_POINTER(id)); ++ gst_buffer_unref (buffer); ++ } else { ++ ret = FALSE; ++ GST_ERROR_OBJECT (thiz, "fail to release upstream buf, id:%d", id); ++ } ++ } ++ } while (id != -1); ++ // g_hash_table_foreach (thiz->bufs_hash, release_gst_buffer, thiz) ++ ++ return ret; ++} ++ ++static void ++release_all_buffer (gpointer key, gpointer value, gpointer user_data) ++{ ++ GstBuffer *buffer = (GstBuffer *)value; ++ GstSpacemitEnc *thiz = (GstSpacemitEnc *)user_data; ++ ++ g_hash_table_remove(thiz->bufs_hash, key); ++ ++ GST_DEBUG_OBJECT (thiz, "release buffer %p, before release ref: %d", buffer, GST_MINI_OBJECT_REFCOUNT_VALUE(buffer)); ++ gst_buffer_unref (buffer); ++} ++ ++//eos发生,查询hash是否还剩映射,清零方能退出 ++static void drain_all_upstream_buffer (GstSpacemitEnc * thiz) ++{ ++ gint sum; ++ gint try_count = 0; ++ ++ do { ++ try_to_release_upstream_buffer(thiz); ++ sum = g_hash_table_size(thiz->bufs_hash); ++ GST_DEBUG_OBJECT (thiz, "hash had:%u bufs now, in drain", sum); ++ ++ g_usleep(1000); ++ } while (++try_count < 10); ++ ++ GST_DEBUG_OBJECT (thiz, "hash had:%u bufs now, after drain. try times: %d", ++ g_hash_table_size(thiz->bufs_hash), try_count); ++ ++ g_hash_table_foreach (thiz->bufs_hash, release_all_buffer, thiz); ++} ++ ++static gint32 gst_spacemitenc_request_packet (GstSpacemitEnc *thiz) ++{ ++ gint32 ret; ++ static guint count = 0; ++ ++ do { ++ // ret = VENC_RequestOutputStreamBuffer(thiz->ctx, PACKET_GetBaseData(thiz->mpppacket)); ++ ret = VENC_GetOutputStreamBuffer (thiz->ctx, PACKET_GetBaseData(thiz->mpppacket)); ++ if (ret != MPP_CODER_NO_DATA) { ++ break; ++ } else { ++ if (count >= 2 && count < MAX_POLL_TIME) { ++ g_usleep(500); ++ } else if (count >= MAX_POLL_TIME) { ++ GST_ERROR_OBJECT (thiz, "try max times: %u, fail return(%d)", MAX_POLL_TIME, ret); ++ ret = MPP_POLL_FAILED; ++ break; ++ } ++ } ++ count++; ++ } while (1); ++ ++ count = 0; ++ ++ return ret; ++} ++ + static void + gst_spacemitenc_loop (GstVideoEncoder * encoder) + { + GstVideoCodecFrame * frame = NULL; + GstSpacemitEnc *thiz = GST_SPACEMITENC (encoder); +- int ret; ++ gint ret; + GstFlowReturn flow_status; + GstMapInfo map; +- static int lenght = 0; +- +- GST_SPM_ENC_WAIT (encoder, thiz->pending_frames || thiz->at_eos); +- GST_VIDEO_ENCODER_STREAM_LOCK (encoder); ++ static gint lenght = 0; + +- ret = VENC_RequestOutputStreamBuffer(thiz->ctx, PACKET_GetBaseData(thiz->mpppacket)); ++ ret = gst_spacemitenc_request_packet(thiz); + if (ret == MPP_CODER_NO_DATA) +- goto sf_no_frame; ++ goto err_packet; + else if (ret == MPP_CODER_EOS) + goto finish_work; ++ else if (ret != MPP_OK) ++ goto err_packet; ++ ++ // GST_VIDEO_ENCODER_STREAM_LOCK (encoder); + + frame = gst_video_encoder_get_oldest_frame (encoder); + if (!frame) { +- GST_ERROR_OBJECT (thiz, "ZRong ------------------- finish last frame"); +- thiz->downstream_flow_ret = GST_FLOW_EOS; ++ GST_WARNING_OBJECT (thiz, "get oldest frame with null"); ++ + GstBuffer *buffer; +- buffer = +- gst_buffer_new_wrapped (g_memdup (PACKET_GetDataPointer(thiz->mpppacket), +- PACKET_GetLength(thiz->mpppacket)), PACKET_GetLength(thiz->mpppacket)); ++ buffer = gst_video_encoder_allocate_output_buffer (encoder, PACKET_GetLength(thiz->mpppacket)); + flow_status = gst_pad_push (GST_VIDEO_ENCODER_SRC_PAD (thiz), buffer); +- GST_ERROR_OBJECT (thiz, "ZRong ------------------- finish last frame %d", flow_status); +- +- VENC_ReturnOutputStreamBuffer (thiz->ctx, PACKET_GetBaseData(thiz->mpppacket)); +- goto done; ++ // GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); ++ if (flow_status == GST_FLOW_EOS) { ++ goto finish_work; ++ } else if (flow_status == GST_FLOW_OK) { ++ goto done; ++ } else { ++ goto err_case; ++ } + } +- thiz->pending_frames--; +- GST_SPM_ENC_BROADCAST (encoder); + +- //if (videoFrameTypeIDR == frame_info.eFrameType) { +- // GST_VIDEO_CODEC_FRAME_SET_SYNC_POINT (frame); +- //} else { +- GST_VIDEO_CODEC_FRAME_UNSET_SYNC_POINT (frame); +- //} +- frame->output_buffer = +- gst_video_encoder_allocate_output_buffer (encoder, PACKET_GetLength(thiz->mpppacket)); ++ GST_VIDEO_CODEC_FRAME_UNSET_SYNC_POINT (frame); ++ ++ frame->output_buffer = gst_video_encoder_allocate_output_buffer (encoder, PACKET_GetLength(thiz->mpppacket)); + gst_buffer_map (frame->output_buffer, &map, GST_MAP_WRITE); + +- lenght += PACKET_GetLength(thiz->mpppacket); +- GST_ERROR_OBJECT (thiz, "ZRong ------------------- finish one frame, %d, %d", lenght, PACKET_GetLength(thiz->mpppacket)); + memcpy (map.data, PACKET_GetDataPointer(thiz->mpppacket), PACKET_GetLength(thiz->mpppacket)); + + gst_buffer_unmap (frame->output_buffer, &map); +- VENC_ReturnOutputStreamBuffer (thiz->ctx, PACKET_GetBaseData(thiz->mpppacket)); ++ // GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); ++ ++ lenght += PACKET_GetLength(thiz->mpppacket); ++ if (frame->input_buffer) ++ GST_DEBUG_OBJECT (thiz, "loop finish frame %p, ref_count:%d, buf(%p, %p) ref:(%d, %d) (%d, %d), pts:%ld", ++ frame, frame->ref_count, frame->input_buffer, frame->output_buffer, GST_MINI_OBJECT_REFCOUNT_VALUE(frame->input_buffer), ++ GST_MINI_OBJECT_REFCOUNT_VALUE(frame->output_buffer), lenght, PACKET_GetLength(thiz->mpppacket), PACKET_GetPts(thiz->mpppacket)); ++ else ++ GST_DEBUG_OBJECT (thiz, "loop finish frame %p, ref_count:%d, buf %p, ref:(%d) (%d, %d), pts:%ld", ++ frame, frame->ref_count, frame->output_buffer, GST_MINI_OBJECT_REFCOUNT_VALUE(frame->output_buffer), ++ lenght, PACKET_GetLength(thiz->mpppacket), PACKET_GetPts(thiz->mpppacket)); ++ ++ flow_status = gst_video_encoder_finish_frame (encoder, frame); ++ if (flow_status == GST_FLOW_EOS) { ++ goto finish_work; ++ } else if (flow_status != GST_FLOW_OK) { ++ goto err_case; ++ } ++ thiz->downstream_flow_ret = GST_FLOW_OK; + +- thiz->downstream_flow_ret = gst_video_encoder_finish_frame (encoder, frame); + done: +- GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); ++ try_to_release_upstream_buffer(thiz); + return; + +-sf_no_frame: ++err_case: + { +- GST_LOG_OBJECT (thiz, +- "No out frame to request%d!", thiz->pending_frames); +- thiz->downstream_flow_ret = GST_FLOW_OK; ++ GST_ERROR_OBJECT (thiz, "other cases err! return %d!", ret); ++ thiz->downstream_flow_ret = GST_FLOW_ERROR; ++ goto done; ++} ++ ++err_packet: ++{ ++ GST_ERROR_OBJECT (thiz, "get packet err! return %d!", ret); ++ thiz->downstream_flow_ret = GST_FLOW_ERROR; + goto done; + } ++ + finish_work: + { +- GST_DEBUG_OBJECT (thiz, +- "Get eos, Finished work and paused task!"); ++ GST_DEBUG_OBJECT (thiz, "Get eos, Finish work and pause task!"); ++ drain_all_upstream_buffer(thiz); + gst_pad_pause_task (GST_VIDEO_ENCODER_SRC_PAD (thiz)); + thiz->downstream_flow_ret = GST_FLOW_EOS; + goto done; +@@ -555,14 +706,27 @@ gst_change_mpp_ecoding_type (GstStructure * s) + if (gst_structure_has_name (s, "video/x-h265")) + return CODING_H265; + ++ if (gst_structure_has_name (s, "image/jpeg")) ++ return CODING_MJPEG; ++ ++ if (gst_structure_has_name (s, "video/x-vp8")) ++ return CODING_VP8; ++ ++ if (gst_structure_has_name (s, "video/x-vp9")) ++ return CODING_VP9; ++ + return CODING_UNKNOWN; + } ++static guint get_stride(guint width, guint align) { ++ return (width + align - 1) & (~(align - 1)); ++} ++ + static gboolean + gst_spacemitenc_init_encoder (GstSpacemitEnc * thiz, + GstVideoCodecState * state, GstCaps *caps) + { + guint width, height, fps_n, fps_d; +- int stride0, stride1, stride2; ++ gint stride0, stride1, stride2; + GstStructure *structure; + GstVideoFormat fmt; + gboolean ret = TRUE; +@@ -579,24 +743,25 @@ gst_spacemitenc_init_encoder (GstSpacemitEnc * thiz, + structure = gst_caps_get_structure (caps, 0); + thiz->eCodingType = gst_change_mpp_ecoding_type (structure); + if (thiz->eCodingType == CODING_UNKNOWN) { +- GST_ERROR_OBJECT(thiz, "no support this eCodingType"); ++ GST_ERROR_OBJECT(thiz, "error! no support eCodingType, structure: %" GST_PTR_FORMAT, structure); + return FALSE; + } + thiz->PixelFormat = gst_change_to_mpp_format(fmt); + if(thiz->PixelFormat == PIXEL_FORMAT_UNKNOWN) { +- GST_ERROR_OBJECT(thiz, "no support this PixelFormat output"); ++ GST_ERROR_OBJECT(thiz, "error! no support PixelFormat, fmt: %s", gst_video_format_to_string (fmt)); + return FALSE; + } +- GST_ERROR_OBJECT (thiz, "ZRong -------------------- init_encoder %d, %d (%d %d) (%d, %d)", ++ GST_DEBUG_OBJECT (thiz, "start init encoder %d, %d (%d %d) (%d, %d)", + width, height, fps_n, fps_d, thiz->eCodingType, thiz->PixelFormat); + ++ thiz->ctx->eCodecType = thiz->eCodecType; + thiz->ctx->stVencPara.eCodingType = thiz->eCodingType; + thiz->ctx->stVencPara.nWidth = width; + thiz->ctx->stVencPara.nHeight = height; + thiz->ctx->stVencPara.PixelFormat = thiz->PixelFormat; + thiz->ctx->stVencPara.nBitrate = 5000000; + thiz->ctx->stVencPara.nFrameRate = fps_n; +- thiz->ctx->stVencPara.nStride = width; ++ thiz->ctx->stVencPara.nStride = get_stride(width, 8); + + ret = VENC_Init(thiz->ctx); + if (ret) +@@ -604,17 +769,17 @@ gst_spacemitenc_init_encoder (GstSpacemitEnc * thiz, + + VENC_SetParam(thiz->ctx, &(thiz->ctx->stVencPara)); + +- GST_ERROR_OBJECT (thiz, "ZRong ----------------------- set format %u, (%d %d %d) %s", +- fmt, stride0, stride1, stride2, gst_video_format_to_string (fmt)); ++ GST_DEBUG_OBJECT (thiz, "finish init encoder (%d %d %d) %s", ++ stride0, stride1, stride2, gst_video_format_to_string (fmt)); + + return TRUE; + + init_err: +- GST_ERROR_OBJECT (thiz, "Mpp vnec init error, please check !"); ++ VENC_DestoryChannel(thiz->ctx); ++ GST_ERROR_OBJECT (thiz, "init encoder error, please check !"); + return FALSE; + } + +- + static gboolean + gst_spacemitenc_set_format (GstVideoEncoder * encoder, + GstVideoCodecState * state) +@@ -622,38 +787,68 @@ gst_spacemitenc_set_format (GstVideoEncoder * encoder, + GstSpacemitEnc *thiz = GST_SPACEMITENC (encoder); + GstCaps *outcaps; + GstCaps *caps; ++ GstVideoInfo *vinfo; ++ GstVideoCodecState *output_state; + +- GST_DEBUG_OBJECT (thiz, "ZRong ------------------------- start set format: %s", +- gst_caps_to_string (state->caps)); ++ GST_DEBUG_OBJECT (thiz, "start set enc format: %s", gst_caps_to_string (state->caps)); + + if (thiz->input_state) + gst_video_codec_state_unref (thiz->input_state); + + thiz->frame_count = 0; + thiz->input_state = gst_video_codec_state_ref (state); +- outcaps = +- gst_caps_copy (gst_static_pad_template_get_caps +- (&gst_spacemitenc_src_template)); ++ outcaps = gst_caps_copy (gst_static_pad_template_get_caps(&gst_spacemitenc_src_template)); ++ GST_DEBUG_OBJECT (thiz, "spacemitenc support outcaps %" GST_PTR_FORMAT, outcaps); + + caps = gst_pad_peer_query_caps (encoder->srcpad, outcaps); +- GST_DEBUG_OBJECT (thiz, "Returning caps %" GST_PTR_FORMAT, caps); ++ gst_caps_unref (outcaps); ++ GST_DEBUG_OBJECT (thiz, "query peer caps, returned caps %" GST_PTR_FORMAT, caps); + +- if (!gst_spacemitenc_init_encoder(thiz, state, caps)) +- return FALSE; ++ outcaps = gst_caps_copy_nth (caps, 0); ++ GST_DEBUG_OBJECT (thiz, "last caps to use: %" GST_PTR_FORMAT, outcaps); + +- if (!gst_video_encoder_set_output_state (encoder, outcaps, state)) +- return FALSE; ++ gst_caps_unref (caps); + +- if (!gst_video_encoder_negotiate (encoder)) +- return FALSE; ++ if (!gst_spacemitenc_init_encoder(thiz, state, outcaps)) ++ goto init_encoder_err; + +- if (!gst_pad_get_task_state ((encoder)->srcpad) == GST_TASK_STARTED) { +- GST_DEBUG_OBJECT (thiz, "ZRong ------------------------- start enc thread"); +- gst_pad_start_task (encoder->srcpad, +- (GstTaskFunction) gst_spacemitenc_loop, encoder, NULL); +- } ++ output_state = gst_video_encoder_set_output_state (encoder, outcaps, state); ++ if (!output_state) ++ goto set_state_err; ++ ++ vinfo = &output_state->info; ++ output_state->caps = outcaps; ++ ++ // if (!gst_video_encoder_negotiate (encoder)) ++ // goto negotiate_err; ++ ++ // if (!gst_pad_get_task_state ((encoder)->srcpad) == GST_TASK_STARTED) { ++ // GST_DEBUG_OBJECT (thiz, "start enc thread"); ++ // gst_pad_start_task (encoder->srcpad, (GstTaskFunction) gst_spacemitenc_loop, encoder, NULL); ++ // } ++ ++ // gst_caps_unref (outcaps); + + return TRUE; ++ ++init_encoder_err: ++{ ++ // gst_caps_unref (outcaps); ++ GST_ERROR_OBJECT (thiz, "init encoder error, please check !"); ++ return FALSE; ++} ++set_state_err: ++{ ++ // gst_caps_unref (outcaps); ++ GST_ERROR_OBJECT (thiz, "set output state error, please check !"); ++ return FALSE; ++} ++negotiate_err: ++{ ++ // gst_caps_unref (outcaps); ++ GST_ERROR_OBJECT (thiz, "negotiate error, please check !"); ++ return FALSE; ++} + } + + static gboolean +@@ -666,6 +861,58 @@ gst_spacemitenc_propose_allocation (GstVideoEncoder * encoder, GstQuery * query) + (gst_spacemitenc_parent_class)->propose_allocation (encoder, query); + } + ++static gint32 gst_spacemitenc_venc_encode (GstSpacemitEnc * thiz, MppFrame *mppframe) ++{ ++ gint32 ret; ++ static guint count = 0; ++ ++ do { ++ // ret = VENC_Encode(thiz->ctx, FRAME_GetBaseData(mppframe)); ++ ret = VENC_SendInputFrame(thiz->ctx, FRAME_GetBaseData(mppframe)); ++ if (ret != MPP_POLL_FAILED) { ++ break; ++ } else { ++ if (count >= 2 && count < MAX_POLL_TIME) { ++ g_usleep(500); ++ } else if (count >= MAX_POLL_TIME) { ++ GST_ERROR_OBJECT (thiz, "try max times: %u, fail return(%d)", MAX_POLL_TIME, ret); ++ ret = MPP_POLL_FAILED; ++ break; ++ } ++ } ++ count++; ++ } while (1); ++ ++ count = 0; ++ ++ return ret; ++} ++ ++MppFrame *get_mppframe_from_vdec_mem (GstSpacemitEnc * thiz, GstBuffer *input_buffer) ++{ ++ GstMemory *mem; ++ GstSpaceMitMemory *spm_mem; ++ MppFrame *mppframe = NULL; ++ ++ if (input_buffer != NULL) { ++ mem = gst_buffer_peek_memory (input_buffer, 0); ++ ++ if (GST_IS_SPACEMIT_ALLOCATOR (mem->allocator)) { ++ spm_mem = (GstSpaceMitMemory *) mem; ++ GST_DEBUG_OBJECT (thiz, "mem %p allocator is spacemit", mem); ++ } else { ++ spm_mem = gst_mini_object_get_qdata (GST_MINI_OBJECT (mem), ++ GST_SPM_DMABUF_MEMORY_QUARK); ++ ++ mppframe = spm_mem->mppframe; ++ GST_DEBUG_OBJECT (thiz, "mem %p allocator is spacemit (dmabuf), mppframe id:%d, fd: %d, eos: %d", ++ mem, FRAME_GetID(mppframe), FRAME_GetFD(mppframe, 0), FRAME_GetEos(mppframe)); ++ } ++ } ++ ++ return mppframe; ++} ++ + static GstFlowReturn + gst_spacemitenc_handle_frame (GstVideoEncoder * encoder, + GstVideoCodecFrame * frame) +@@ -678,30 +925,36 @@ gst_spacemitenc_handle_frame (GstVideoEncoder * encoder, + gint i, j; + gsize buf_length = 0; + GstFlowReturn flow_ret; +- GST_DEBUG_OBJECT (thiz, "ZRong ------------- start handle frame"); ++ MppFrame *mppframe = NULL; + +- if (thiz->downstream_flow_ret != GST_FLOW_OK) { ++ GST_DEBUG_OBJECT (thiz, "start handle frame"); ++ ++ if (thiz->downstream_flow_ret != GST_FLOW_OK) + goto downstream_err; +- } + +- GST_OBJECT_LOCK (thiz); +- if (thiz->bitrate_changed || thiz->max_bitrate_changed) { +- GST_WARNING_OBJECT (thiz, +- "Error need to support this cases"); ++ if (!gst_pad_has_current_caps (GST_VIDEO_ENCODER_SRC_PAD (thiz))) { ++ GST_DEBUG_OBJECT (thiz, "start encoder negotiate"); ++ if (!gst_video_encoder_negotiate (encoder)) ++ goto negotiate_err; ++ ++ if (!gst_pad_get_task_state ((encoder)->srcpad) == GST_TASK_STARTED) { ++ GST_DEBUG_OBJECT (thiz, "start enc thread"); ++ gst_pad_start_task (encoder->srcpad, (GstTaskFunction) gst_spacemitenc_loop, encoder, NULL); ++ } + } +- GST_OBJECT_UNLOCK (thiz); + + if (frame) { + thiz->frame_count++; +- gst_video_frame_map (&video_frame, &thiz->input_state->info, +- frame->input_buffer, GST_MAP_READ); ++ FRAME_SetEos(thiz->mppframe, 0); ++ ++ gst_video_frame_map (&video_frame, &thiz->input_state->info, frame->input_buffer, GST_MAP_READ); + if (thiz->PixelFormat == PIXEL_FORMAT_I420) { + FRAME_SetDataUsedNum(thiz->mppframe, 3); + FRAME_SetDataPointer(thiz->mppframe, 0, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 0)); + FRAME_SetDataPointer(thiz->mppframe, 1, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 1)); + FRAME_SetDataPointer(thiz->mppframe, 2, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 2)); + } else { +- GST_DEBUG_OBJECT (thiz, "ZRong ------------- be save %s, %d, (%d, %d, %d)", ++ GST_DEBUG_OBJECT (thiz, "debug save video_frame in file: %s, %d, (%d, %d, %d)", + gst_video_format_to_string(GST_VIDEO_FRAME_FORMAT(&video_frame)), + GST_VIDEO_FRAME_N_PLANES(&video_frame), GST_VIDEO_FRAME_SIZE(&video_frame), + GST_VIDEO_FRAME_WIDTH(&video_frame), GST_VIDEO_FRAME_HEIGHT(&video_frame)); +@@ -717,101 +970,71 @@ gst_spacemitenc_handle_frame (GstVideoEncoder * encoder, + fwrite(GST_VIDEO_FRAME_COMP_DATA (&video_frame, 1), 1, 1280*720/2, fbbb); + + #endif +- + } ++ gst_video_frame_unmap (&video_frame); ++ mppframe = get_mppframe_from_vdec_mem (thiz, frame->input_buffer); ++ if (!mppframe) { ++ flow_ret = GST_FLOW_ERROR; ++ goto done; ++ } ++ ++ setup_gstbuffer_and_mppframe_hash(thiz, FRAME_GetID(mppframe), frame->input_buffer); ++ FRAME_SetPts(mppframe, GST_BUFFER_PTS (frame->input_buffer)); + } else { ++ GST_DEBUG_OBJECT (thiz, "null frame enc, need eos"); + FRAME_SetEos(thiz->mppframe, 1); ++ mppframe = thiz->mppframe; + } ++ + GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); +- ret = VENC_Encode(thiz->ctx, FRAME_GetBaseData(thiz->mppframe)); ++ ret = gst_spacemitenc_venc_encode (thiz, mppframe); ++ // ret = VENC_Encode(thiz->ctx, FRAME_GetBaseData(thiz->mppframe)); + GST_VIDEO_ENCODER_STREAM_LOCK (encoder); +- if (ret != 0) { +- GST_ELEMENT_ERROR (thiz, STREAM, ENCODE, +- ("Could not encode frame"), ("Spacemit returned %d", ret)); ++ if (ret != MPP_OK) { ++ GST_ELEMENT_ERROR (thiz, STREAM, ENCODE, ("Could not encode frame"), ("spacemitenc return %d", ret)); + flow_ret = GST_FLOW_ERROR; +- if (frame) +- goto release_frame; +- else +- goto done; ++ goto done; + } +- if (!frame) +- goto flow_eos; +- +- /* Avoid holding too much frames */ +- GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); +- GST_SPM_ENC_WAIT (encoder, +- thiz->pending_frames < SPM_PENDING_MAX); +- GST_VIDEO_ENCODER_STREAM_LOCK (encoder); +- thiz->pending_frames++; +- GST_SPM_ENC_BROADCAST (encoder); + + flow_ret = thiz->downstream_flow_ret; +- GST_DEBUG_OBJECT (thiz, "ZRong ------------- handle frame start: %d, %d", thiz->pending_frames, thiz->frame_count); + +- if (frame) ++ if (frame) { ++ GST_DEBUG_OBJECT (thiz, "finish handle frame:%p, ref_count:%d, finish sum:%d, buf%p, ref: %d", ++ frame, frame->ref_count, thiz->frame_count, frame->input_buffer, GST_MINI_OBJECT_REFCOUNT_VALUE(frame->input_buffer)); + goto release_frame; ++ } else { ++ GST_DEBUG_OBJECT (thiz, "finish handle null frame, eos flow, finish sum:%d", thiz->frame_count); ++ goto flow_eos; ++ } + + done: ++ try_to_release_upstream_buffer(thiz); + return flow_ret; ++ + downstream_err: + { +- GST_ERROR_OBJECT (thiz, "Downstream returned %s", +- gst_flow_get_name (thiz->downstream_flow_ret)); ++ GST_ERROR_OBJECT (thiz, "Downstream returned %s", gst_flow_get_name (thiz->downstream_flow_ret)); + flow_ret = thiz->downstream_flow_ret; + goto done; + } + release_frame: + { +- gst_video_frame_unmap (&video_frame); + gst_video_codec_frame_unref (frame); + goto done; + } + flow_eos: + { + GST_DEBUG_OBJECT (thiz, "Get an eos, exit handle"); ++ // drain_all_upstream_buffer(thiz); + flow_ret = GST_FLOW_EOS; + goto done; + } +-#if 0 +- /* FIXME: spacemit has no way for us to get a connection +- * between the input and output frames, we just have to +- * guess based on the input */ +- frame = gst_video_encoder_get_oldest_frame (encoder); +- if (!frame) { +- GST_ELEMENT_ERROR (thiz, STREAM, ENCODE, +- ("Could not encode frame"), ("thiz returned %d", ret)); +- gst_video_codec_frame_unref (frame); +- return GST_FLOW_ERROR; +- } +- ret = VENC_RequestOutputStreamBuffer(thiz->ctx, PACKET_GetBaseData(thiz->mpppacket)); +- if (ret) { +- GST_ELEMENT_ERROR (thiz, STREAM, ENCODE, +- ("Could not encode frame"), ("thiz returned %d", ret)); +- gst_video_codec_frame_unref (frame); +- return GST_FLOW_ERROR; +- } +- +- if (videoFrameTypeIDR == frame_info.eFrameType) { +- GST_VIDEO_CODEC_FRAME_SET_SYNC_POINT (frame); +- } else { +- GST_VIDEO_CODEC_FRAME_UNSET_SYNC_POINT (frame); +- } +- +- frame->output_buffer = +- gst_video_encoder_allocate_output_buffer (encoder, PACKET_GetLength(thiz->mpppacket)); +- gst_buffer_map (frame->output_buffer, &map, GST_MAP_WRITE); +- +- memcpy (map.data, PACKET_GetDataPointer(thiz->mpppacket), PACKET_GetLength(thiz->mpppacket)); +- +- gst_buffer_unmap (frame->output_buffer, &map); +- VENC_ReturnOutputStreamBuffer (thiz->ctx, PACKET_GetBaseData(thiz->mpppacket)); +- +- GstFlowReturn flow_status = gst_video_encoder_finish_frame (encoder, frame); +- +- GST_ERROR_OBJECT (thiz, "ZRong ----------------------- handle frame finish"); +- +- return flow_status; +-#endif ++negotiate_err: ++{ ++ GST_ERROR_OBJECT (thiz, "negotiate error, please check !"); ++ flow_ret = GST_FLOW_NOT_NEGOTIATED; ++ goto done; ++} + } + + static GstFlowReturn +@@ -822,10 +1045,12 @@ gst_spacemitenc_finish (GstVideoEncoder * encoder) + if (thiz->frame_count == 0) + return GST_FLOW_OK; + ++ GST_DEBUG_OBJECT(thiz, "finish test3"); ++ + /* Drain encoder */ + while ((gst_spacemitenc_handle_frame (encoder, NULL)) == GST_FLOW_OK); ++ // drain_all_upstream_buffer(thiz); + +- GST_DEBUG_OBJECT(thiz, "finish test3"); + thiz->at_eos = TRUE; + + GST_SPM_ENC_BROADCAST (encoder); +diff --git a/ext/spacemit/spacemitcodec/gstspacemitenc.h b/ext/spacemit/spacemitcodec/gstspacemitenc.h +index ef19791..ae9be5b 100755 +--- a/ext/spacemit/spacemitcodec/gstspacemitenc.h ++++ b/ext/spacemit/spacemitcodec/gstspacemitenc.h +@@ -8,7 +8,7 @@ + + #include + #include +- ++#include "gstspacemitallocator.h" + //#include + //#include + //#include +@@ -76,6 +76,7 @@ struct _GstSpacemitEnc + gboolean max_bitrate_changed; + + // GMutex mutex; ++ GHashTable *bufs_hash; + guint width; + guint height; + guint eCodecType; +diff --git a/ext/spacemit/spacemitcodec/gstspacemitenc_bak.c b/ext/spacemit/spacemitcodec/gstspacemitenc_bak.c +deleted file mode 100755 +index dff86f9..0000000 +--- a/ext/spacemit/spacemitcodec/gstspacemitenc_bak.c ++++ /dev/null +@@ -1,1022 +0,0 @@ +-#ifdef HAVE_CONFIG_H +-#include "config.h" +-#endif +-#include +- +-#include "gstspacemitenc.h" +- +-#include +-#include +-#include +-#include +-#include +-#define SPM_PENDING_MAX 8 /* Max number of MPP pending frame */ +- +-GST_DEBUG_CATEGORY_STATIC (gst_spacemitenc_debug_category); +-#define GST_CAT_DEFAULT gst_spacemitenc_debug_category +- +-/* FIXME: we should not really directly use the enums from the spacemit API +- * here, since it might change or be removed */ +-/* +-#define GST_TYPE_USAGE_TYPE (gst_spacemitenc_usage_type_get_type ()) +-static GType +-gst_spacemitenc_usage_type_get_type (void) +-{ +- static GType usage_type = 0; +- +- if (!usage_type) { +- static const GEnumValue usage_types[] = { +- {CAMERA_VIDEO_REAL_TIME, "video from camera", "camera"}, +- {SCREEN_CONTENT_REAL_TIME, "screen content", "screen"}, +- {0, NULL, NULL}, +- }; +- +- usage_type = g_enum_register_static ("EUsageType", usage_types); +- } +- +- return usage_type; +-} +-*/ +-/* +-#define GST_TYPE_RC_MODES (gst_spacemitenc_rc_modes_get_type ()) +-static GType +-gst_spacemitenc_rc_modes_get_type (void) +-{ +- static GType rc_modes_type = 0; +- +- if (!rc_modes_type) { +- static const GEnumValue rc_modes_types[] = { +- {RC_QUALITY_MODE, "Quality mode", "quality"}, +- {RC_BITRATE_MODE, "Bitrate mode", "bitrate"}, +- {RC_BUFFERBASED_MODE, "No bitrate control, just using buffer status", +- "buffer"}, +- {RC_OFF_MODE, "Rate control off mode", "off"}, +- {0, NULL, NULL}, +- }; +- +- rc_modes_type = g_enum_register_static ("RC_MODES", rc_modes_types); +- } +- +- return rc_modes_type; +-} +-*/ +-/* +-#define GST_TYPE_SPACEMITENC_DEBLOCKING_MODE (gst_spacemitenc_deblocking_mode_get_type ()) +-static GType +-gst_spacemitenc_deblocking_mode_get_type (void) +-{ +- static const GEnumValue types[] = { +- {GST_SPACEMIT_DEBLOCKING_ON, "Deblocking on", "on"}, +- {GST_SPACEMIT_DEBLOCKING_OFF, "Deblocking off", "off"}, +- {GST_SPACEMIT_DEBLOCKING_NOT_SLICE_BOUNDARIES, +- "Deblocking on, except for slice boundaries", "not-slice-boundaries"}, +- {0, NULL, NULL}, +- }; +- static gsize id = 0; +- +- if (g_once_init_enter (&id)) { +- GType _id = g_enum_register_static ("GstSpacemitencDeblockingModes", types); +- g_once_init_leave (&id, _id); +- } +- +- return (GType) id; +-} +-*/ +-/* +-#define GST_TYPE_SPACEMITENC_SLICE_MODE (gst_spacemitenc_slice_mode_get_type ()) +-static GType +-gst_spacemitenc_slice_mode_get_type (void) +-{ +- static const GEnumValue types[] = { +- {GST_SPACEMIT_SLICE_MODE_N_SLICES, "Fixed number of slices", "n-slices"}, +- {GST_SPACEMIT_SLICE_MODE_AUTO, +- "Number of slices equal to number of threads", "auto"}, +- {0, NULL, NULL}, +- }; +- static gsize id = 0; +- +- if (g_once_init_enter (&id)) { +- GType _id = g_enum_register_static ("GstSpacemitEncSliceModes", types); +- g_once_init_leave (&id, _id); +- } +- +- return (GType) id; +-} +-*/ +-/* +-#define GST_TYPE_SPACEMITENC_COMPLEXITY (gst_spacemitenc_complexity_get_type ()) +-static GType +-gst_spacemitenc_complexity_get_type (void) +-{ +- static const GEnumValue types[] = { +- {LOW_COMPLEXITY, "Low complexity / high speed encoding", "low"}, +- {MEDIUM_COMPLEXITY, "Medium complexity / medium speed encoding", "medium"}, +- {HIGH_COMPLEXITY, "High complexity / low speed encoding", "high"}, +- {0, NULL, NULL}, +- }; +- static gsize id = 0; +- +- if (g_once_init_enter (&id)) { +- GType _id = g_enum_register_static ("GstSpacemitencComplexity", types); +- g_once_init_leave (&id, _id); +- } +- +- return (GType) id; +-} +-*/ +-/* prototypes */ +- +-static void gst_spacemitenc_set_property (GObject * object, +- guint property_id, const GValue * value, GParamSpec * pspec); +-static void gst_spacemitenc_get_property (GObject * object, +- guint property_id, GValue * value, GParamSpec * pspec); +-static void gst_spacemitenc_finalize (GObject * object); +-static gboolean gst_spacemitenc_start (GstVideoEncoder * encoder); +-static gboolean gst_spacemitenc_stop (GstVideoEncoder * encoder); +-static gboolean gst_spacemitenc_set_format (GstVideoEncoder * encoder, +- GstVideoCodecState * state); +-static GstFlowReturn gst_spacemitenc_handle_frame (GstVideoEncoder * encoder, +- GstVideoCodecFrame * frame); +-static GstFlowReturn gst_spacemitenc_finish (GstVideoEncoder * encoder); +-static gboolean gst_spacemitenc_propose_allocation (GstVideoEncoder * encoder, +- GstQuery * query); +- +-#define DEFAULT_BITRATE (128000) +-//#define DEFAULT_MAX_BITRATE (UNSPECIFIED_BIT_RATE) +-#define DEFAULT_GOP_SIZE (90) +-#define DEFAULT_MAX_SLICE_SIZE (1500000) +-#define START_FRAMERATE 30 +-//#define DEFAULT_USAGE_TYPE CAMERA_VIDEO_REAL_TIME +-//#define DEFAULT_RATE_CONTROL RC_QUALITY_MODE +-#define DEFAULT_MULTI_THREAD 0 +-#define DEFAULT_ENABLE_DENOISE FALSE +-#define DEFAULT_ENABLE_FRAME_SKIP FALSE +-//#define DEFAULT_DEBLOCKING_MODE GST_SPACEMIT_DEBLOCKING_ON +-#define DEFAULT_BACKGROUND_DETECTION TRUE +-#define DEFAULT_ADAPTIVE_QUANTIZATION TRUE +-#define DEFAULT_SCENE_CHANGE_DETECTION TRUE +-#define DEFAULT_SLICE_MODE GST_SPACEMIT_SLICE_MODE_N_SLICES +-#define DEFAULT_NUM_SLICES 1 +-//#define DEFAULT_COMPLEXITY MEDIUM_COMPLEXITY +-#define DEFAULT_QP_MIN 0 +-#define DEFAULT_QP_MAX 51 +- +-#define GST_SPM_ENC_EVENT_MUTEX(encoder) (&GST_SPACEMITENC (encoder)->event_mutex) +-#define GST_SPM_ENC_EVENT_COND(encoder) (&GST_SPACEMITENC (encoder)->event_cond) +- +-#define GST_SPM_ENC_BROADCAST(encoder) \ +- g_mutex_lock (GST_SPM_ENC_EVENT_MUTEX (encoder)); \ +- g_cond_broadcast (GST_SPM_ENC_EVENT_COND (encoder)); \ +- g_mutex_unlock (GST_SPM_ENC_EVENT_MUTEX (encoder)); +- +-#define GST_SPM_ENC_WAIT(encoder, condition) \ +- g_mutex_lock (GST_SPM_ENC_EVENT_MUTEX (encoder)); \ +- while (!(condition)) \ +- g_cond_wait (GST_SPM_ENC_EVENT_COND (encoder), \ +- GST_SPM_ENC_EVENT_MUTEX (encoder)); \ +- g_mutex_unlock (GST_SPM_ENC_EVENT_MUTEX (encoder)); +- +-enum +-{ +- PROP_0, +- //PROP_USAGE_TYPE, +- PROP_BITRATE, +- PROP_MAX_BITRATE, +- PROP_GOP_SIZE, +- PROP_MAX_SLICE_SIZE, +- //PROP_RATE_CONTROL, +- PROP_MULTI_THREAD, +- PROP_ENABLE_DENOISE, +- PROP_ENABLE_FRAME_SKIP, +- //PROP_DEBLOCKING_MODE, +- PROP_BACKGROUND_DETECTION, +- PROP_ADAPTIVE_QUANTIZATION, +- PROP_SCENE_CHANGE_DETECTION, +- //PROP_SLICE_MODE, +- PROP_NUM_SLICES, +- //PROP_COMPLEXITY, +- PROP_QP_MIN, +- PROP_QP_MAX, +- PROP_CODING_WIDTH, +- PROP_CODING_HIGHT, +- PROP_CODING_TYPE, +- PROP_CODE_TYPE, +- PROP_CODE_YUV_FORMAT, +- N_PROPERTIES +-}; +- +-/* pad templates */ +- +-static GstStaticPadTemplate gst_spacemitenc_sink_template = +-GST_STATIC_PAD_TEMPLATE ("sink", +- GST_PAD_SINK, +- GST_PAD_ALWAYS, +- GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE ("{I420, NV12, NV21}")) +- ); +-#if 0 +-static GstStaticPadTemplate gst_spacemitenc_src_template = +-GST_STATIC_PAD_TEMPLATE ("src", +- GST_PAD_SRC, +- GST_PAD_ALWAYS, +- GST_STATIC_CAPS +- ("video/x-h264, stream-format=(string)\"byte-stream\", alignment=(string)\"au\", profile=(string)\"baseline\"" +- ";" +- "video/x-h265," +- "stream-format = (string) byte-stream," +- "alignment = (string)au," +- "width=(int) [1,MAX], " "height=(int) [1,MAX]" +- )); +-#else +-static GstStaticPadTemplate gst_spacemitenc_src_template = +-GST_STATIC_PAD_TEMPLATE ("src", +- GST_PAD_SRC, +- GST_PAD_ALWAYS, +- GST_STATIC_CAPS +- ( +- "video/x-h265," +- "stream-format = (string) byte-stream," +- "alignment = (string)au," +- "width=(int) [1,MAX], " "height=(int) [1,MAX]" +- )); +- +-#endif +-/* class initialization */ +- +-G_DEFINE_TYPE_WITH_CODE (GstSpacemitEnc, gst_spacemitenc, +- GST_TYPE_VIDEO_ENCODER, +- G_IMPLEMENT_INTERFACE (GST_TYPE_PRESET, NULL); +- GST_DEBUG_CATEGORY_INIT (gst_spacemitenc_debug_category, "spacemitenc", 0, +- "debug category for spacemitenc element")); +- +-static void +-gst_spacemitenc_class_init (GstSpacemitEncClass * klass) +-{ +- GObjectClass *gobject_class = G_OBJECT_CLASS (klass); +- GstVideoEncoderClass *video_encoder_class = GST_VIDEO_ENCODER_CLASS (klass); +- +- /* Setting up pads and setting metadata should be moved to +- base_class_init if you intend to subclass this class. */ +- gst_element_class_add_static_pad_template (GST_ELEMENT_CLASS (klass), +- &gst_spacemitenc_src_template); +- gst_element_class_add_static_pad_template (GST_ELEMENT_CLASS (klass), +- &gst_spacemitenc_sink_template); +- +- gst_element_class_set_static_metadata (GST_ELEMENT_CLASS (klass), +- "Spacemit video encoder", "Encoder/Video", "Spacemit video encoder", +- "David, qiang.fu@spacemit.com"); +- +- gobject_class->set_property = gst_spacemitenc_set_property; +- gobject_class->get_property = gst_spacemitenc_get_property; +- gobject_class->finalize = gst_spacemitenc_finalize; +- video_encoder_class->start = GST_DEBUG_FUNCPTR (gst_spacemitenc_start); +- video_encoder_class->stop = GST_DEBUG_FUNCPTR (gst_spacemitenc_stop); +- video_encoder_class->set_format = +- GST_DEBUG_FUNCPTR (gst_spacemitenc_set_format); +- video_encoder_class->handle_frame = +- GST_DEBUG_FUNCPTR (gst_spacemitenc_handle_frame); +- video_encoder_class->propose_allocation = +- GST_DEBUG_FUNCPTR (gst_spacemitenc_propose_allocation); +- video_encoder_class->finish = GST_DEBUG_FUNCPTR (gst_spacemitenc_finish); +- +- /* define properties */ +- g_object_class_install_property (gobject_class, PROP_GOP_SIZE, +- g_param_spec_uint ("gop-size", "GOP size", +- "Number of frames between intra frames", +- 0, G_MAXUINT, DEFAULT_GOP_SIZE, +- (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); +- g_object_class_install_property (gobject_class, PROP_CODING_TYPE, +- g_param_spec_uint ("coding-type", "coding type", +- "Format to encode", +- CODING_H264, CODING_FWHT, CODING_H264, +- (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); +- g_object_class_install_property (gobject_class, PROP_CODE_TYPE, +- g_param_spec_uint ("code-type", "code type", +- "Codec selection to work", +- CODEC_OPENH264, CODEC_V4L2, CODEC_SFOMX, +- (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); +- g_object_class_install_property (gobject_class, PROP_CODING_WIDTH, +- g_param_spec_uint ("coding-width", "coding width", +- "image width to encode", +- 0, 3840, 1280, +- (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); +- g_object_class_install_property (gobject_class, PROP_CODING_HIGHT, +- g_param_spec_uint ("code-hight", "code hight", +- "image hight to encode", +- 0, 2160, 720, +- (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); +- g_object_class_install_property (gobject_class, PROP_CODE_YUV_FORMAT, +- g_param_spec_uint ("code-yuv-format", "code yuv format", +- "ENcode the yuv format", +- PIXEL_FORMAT_DEFAULT, PIXEL_FORMAT_UNKNOWN-1, PIXEL_FORMAT_I420, +- (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); +-} +- +-static void +-gst_spacemitenc_init (GstSpacemitEnc * spacemitenc) +-{ +- spacemitenc->width = 1280; +- spacemitenc->height = 720; +- spacemitenc->eCodecType = CODEC_SFOMX; +- spacemitenc->eCodingType = CODING_H264; +- spacemitenc->PixelFormat = PIXEL_FORMAT_I420; +- spacemitenc->gop_size = DEFAULT_GOP_SIZE; +-// spacemitenc->usage_type = DEFAULT_USAGE_TYPE; +-// spacemitenc->rate_control = DEFAULT_RATE_CONTROL; +- spacemitenc->multi_thread = DEFAULT_MULTI_THREAD; +- spacemitenc->max_slice_size = DEFAULT_MAX_SLICE_SIZE; +- spacemitenc->bitrate = DEFAULT_BITRATE; +-// spacemitenc->max_bitrate = DEFAULT_MAX_BITRATE; +- spacemitenc->qp_min = DEFAULT_QP_MIN; +- spacemitenc->qp_max = DEFAULT_QP_MAX; +- spacemitenc->framerate = START_FRAMERATE; +- spacemitenc->input_state = NULL; +- spacemitenc->time_per_frame = GST_SECOND / spacemitenc->framerate; +- spacemitenc->frame_count = 0; +- spacemitenc->previous_timestamp = 0; +- spacemitenc->enable_denoise = DEFAULT_ENABLE_DENOISE; +- spacemitenc->enable_frame_skip = DEFAULT_ENABLE_FRAME_SKIP; +-// spacemitenc->deblocking_mode = DEFAULT_DEBLOCKING_MODE; +- spacemitenc->background_detection = DEFAULT_BACKGROUND_DETECTION; +- spacemitenc->adaptive_quantization = DEFAULT_ADAPTIVE_QUANTIZATION; +- spacemitenc->scene_change_detection = DEFAULT_SCENE_CHANGE_DETECTION; +- spacemitenc->slice_mode = DEFAULT_SLICE_MODE; +- spacemitenc->num_slices = DEFAULT_NUM_SLICES; +- //spacemitenc->encoder = NULL; +-// spacemitenc->complexity = DEFAULT_COMPLEXITY; +- spacemitenc->bitrate_changed = FALSE; +- spacemitenc->max_bitrate_changed = FALSE; +- +- spacemitenc->ctx = NULL; +- spacemitenc->para = NULL; +- spacemitenc->mppframe = NULL; +- spacemitenc->mpppacket = NULL; +- +-// gst_spacemitenc_set_usage_type (spacemitenc, CAMERA_VIDEO_REAL_TIME); +-// gst_spacemitenc_set_rate_control (spacemitenc, RC_QUALITY_MODE); +-} +- +-void +-gst_spacemitenc_set_property (GObject * object, guint property_id, +- const GValue * value, GParamSpec * pspec) +-{ +- GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (object); +- +- GST_DEBUG_OBJECT (spacemitenc, "ZRong ------------------- set_property: %d", property_id); +- +- switch (property_id) { +- case PROP_BITRATE: +- GST_OBJECT_LOCK (spacemitenc); +- if (spacemitenc->bitrate != g_value_get_uint (value)) { +- spacemitenc->bitrate = g_value_get_uint (value); +- spacemitenc->bitrate_changed = TRUE; +- } +- GST_OBJECT_UNLOCK (spacemitenc); +- break; +- +- case PROP_MAX_BITRATE: +- GST_OBJECT_LOCK (spacemitenc); +- if (spacemitenc->max_bitrate != g_value_get_uint (value)) { +- spacemitenc->max_bitrate = g_value_get_uint (value); +- spacemitenc->max_bitrate_changed = TRUE; +- } +- GST_OBJECT_UNLOCK (spacemitenc); +- break; +- +- case PROP_QP_MIN: +- spacemitenc->qp_min = g_value_get_uint (value); +- break; +- +- case PROP_QP_MAX: +- spacemitenc->qp_max = g_value_get_uint (value); +- break; +- +- case PROP_MULTI_THREAD: +- spacemitenc->multi_thread = g_value_get_uint (value); +- break; +-/* +- case PROP_USAGE_TYPE: +- gst_spacemitenc_set_usage_type (spacemitenc, g_value_get_enum (value)); +- break; +-*/ +- case PROP_ENABLE_DENOISE: +- spacemitenc->enable_denoise = g_value_get_boolean (value); +- break; +- +- case PROP_ENABLE_FRAME_SKIP: +- spacemitenc->enable_frame_skip = g_value_get_boolean (value); +- break; +-/* +- case PROP_RATE_CONTROL: +- gst_spacemitenc_set_rate_control (spacemitenc, g_value_get_enum (value)); +- break; +-*/ +- case PROP_GOP_SIZE: +- spacemitenc->gop_size = g_value_get_uint (value); +- GST_DEBUG_OBJECT (spacemitenc, "ZRong ------------------- set_property: %d", spacemitenc->gop_size); +- break; +- +- case PROP_MAX_SLICE_SIZE: +- spacemitenc->max_slice_size = g_value_get_uint (value); +- break; +-/* +- case PROP_DEBLOCKING_MODE: +- spacemitenc->deblocking_mode = +- (GstSpacemitencDeblockingMode) g_value_get_enum (value); +- break; +-*/ +- case PROP_BACKGROUND_DETECTION: +- spacemitenc->background_detection = g_value_get_boolean (value); +- break; +- +- case PROP_ADAPTIVE_QUANTIZATION: +- spacemitenc->adaptive_quantization = g_value_get_boolean (value); +- break; +- +- case PROP_SCENE_CHANGE_DETECTION: +- spacemitenc->scene_change_detection = g_value_get_boolean (value); +- break; +-/* +- case PROP_SLICE_MODE: +- spacemitenc->slice_mode = +- (GstSpacemitEncSliceMode) g_value_get_enum (value); +- break; +-*/ +- case PROP_NUM_SLICES: +- spacemitenc->num_slices = g_value_get_uint (value); +- break; +- case PROP_CODING_WIDTH: +- spacemitenc->width = g_value_get_uint (value); +- break; +- case PROP_CODING_HIGHT: +- spacemitenc->height = g_value_get_uint (value); +- break; +- case PROP_CODING_TYPE: +- spacemitenc->eCodingType = g_value_get_uint (value); +- break; +- case PROP_CODE_TYPE: +- spacemitenc->eCodecType = g_value_get_uint (value); +- break; +- case PROP_CODE_YUV_FORMAT: +- spacemitenc->PixelFormat = g_value_get_uint (value); +- break; +- +-/* +- case PROP_COMPLEXITY: +- spacemitenc->complexity = (ECOMPLEXITY_MODE) g_value_get_enum (value); +- break; +-*/ +- default: +- G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); +- break; +- } +-} +- +-void +-gst_spacemitenc_get_property (GObject * object, guint property_id, +- GValue * value, GParamSpec * pspec) +-{ +- GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (object); +- +- GST_DEBUG_OBJECT (spacemitenc, "ZRong ------------------- get_property: %d", property_id); +- +- switch (property_id) { +- /* +- case PROP_USAGE_TYPE: +- g_value_set_enum (value, spacemitenc->usage_type); +- break; +- */ +- /* +- case PROP_RATE_CONTROL: +- g_value_set_enum (value, spacemitenc->rate_control); +- break; +- */ +- case PROP_BITRATE: +- g_value_set_uint (value, spacemitenc->bitrate); +- break; +- +- case PROP_MAX_BITRATE: +- g_value_set_uint (value, spacemitenc->max_bitrate); +- break; +- +- case PROP_QP_MIN: +- g_value_set_uint (value, spacemitenc->qp_min); +- break; +- +- case PROP_QP_MAX: +- g_value_set_uint (value, spacemitenc->qp_max); +- break; +- +- case PROP_ENABLE_DENOISE: +- g_value_set_boolean (value, spacemitenc->enable_denoise); +- break; +- +- case PROP_ENABLE_FRAME_SKIP: +- g_value_set_boolean (value, spacemitenc->enable_frame_skip); +- break; +- +- case PROP_MULTI_THREAD: +- g_value_set_uint (value, spacemitenc->multi_thread); +- break; +- +- case PROP_GOP_SIZE: +- GST_DEBUG_OBJECT (spacemitenc, "ZRong ------------------- gop_size: %d", spacemitenc->gop_size); +- g_value_set_uint (value, spacemitenc->gop_size); +- break; +- +- case PROP_MAX_SLICE_SIZE: +- g_value_set_uint (value, spacemitenc->max_slice_size); +- break; +- /* +- case PROP_DEBLOCKING_MODE: +- g_value_set_enum (value, spacemitenc->deblocking_mode); +- break; +- */ +- case PROP_BACKGROUND_DETECTION: +- g_value_set_boolean (value, spacemitenc->background_detection); +- break; +- +- case PROP_ADAPTIVE_QUANTIZATION: +- g_value_set_boolean (value, spacemitenc->adaptive_quantization); +- break; +- +- case PROP_SCENE_CHANGE_DETECTION: +- g_value_set_boolean (value, spacemitenc->scene_change_detection); +- break; +- /* +- case PROP_SLICE_MODE: +- g_value_set_enum (value, spacemitenc->slice_mode); +- break; +- */ +- case PROP_NUM_SLICES: +- g_value_set_uint (value, spacemitenc->num_slices); +- break; +- case PROP_CODING_WIDTH: +- g_value_set_uint (value, spacemitenc->width); +- break; +- case PROP_CODING_HIGHT: +- g_value_set_uint (value, spacemitenc->height); +- break; +- case PROP_CODING_TYPE: +- g_value_set_uint (value, spacemitenc->eCodingType); +- break; +- case PROP_CODE_TYPE: +- g_value_set_uint (value, spacemitenc->eCodecType); +- break; +- case PROP_CODE_YUV_FORMAT: +- g_value_set_uint (value, spacemitenc->PixelFormat); +- break; +- +- /* +- case PROP_COMPLEXITY: +- g_value_set_enum (value, spacemitenc->complexity); +- break; +- */ +- default: +- G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); +- break; +- } +-} +- +-void +-gst_spacemitenc_finalize (GObject * object) +-{ +- GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (object); +- +- GST_DEBUG_OBJECT (spacemitenc, "finalize"); +- +- /* clean up object here */ +- +- if (spacemitenc->input_state) { +- gst_video_codec_state_unref (spacemitenc->input_state); +- } +- spacemitenc->input_state = NULL; +- +- G_OBJECT_CLASS (gst_spacemitenc_parent_class)->finalize (object); +-} +-FILE *fbbb; +- +- +-static gboolean +-gst_spacemitenc_start (GstVideoEncoder * encoder) +-{ +- int ret = 0; +- GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (encoder); +- GST_DEBUG_OBJECT (spacemitenc, "start"); +- +- if(spacemitenc->ctx) +- { +- VENC_DestoryChannel(spacemitenc->ctx); +- spacemitenc->ctx = NULL; +- } +- spacemitenc->ctx = VENC_CreateChannel(); +- +- spacemitenc->ctx->stVencPara.eCodingType = spacemitenc->eCodingType; +- spacemitenc->ctx->stVencPara.nWidth = spacemitenc->width; +- spacemitenc->ctx->stVencPara.nHeight = spacemitenc->height; +- spacemitenc->ctx->stVencPara.PixelFormat = spacemitenc->PixelFormat; +- spacemitenc->ctx->eCodecType = spacemitenc->eCodecType; +- +- ret = VENC_Init(spacemitenc->ctx); +- if (ret) +- goto init_err; +- +- spacemitenc->mpppacket = PACKET_Create (); +- if (!spacemitenc->mpppacket) +- goto alloc_err; +- +- spacemitenc->mppframe = FRAME_Create (); +- if (!spacemitenc->mppframe) +- goto alloc_err; +-// FRAME_Alloc(spacemitenc->mppframe, 1, 1280, 720); +- +- GST_ERROR_OBJECT (spacemitenc, "ZRong ------------------------- finish start!!!"); +- g_mutex_init (&spacemitenc->event_mutex); +- g_cond_init (&spacemitenc->event_cond); +- spacemitenc->pending_frames = 0; +- fbbb = fopen("/tmp/out.yuv", "ab+"); +- +- return TRUE; +- +-alloc_err: +- GST_ERROR_OBJECT (spacemitenc, "can not alloc for mpp structure, please check !"); +- return FALSE; +- +-init_err: +- GST_ERROR_OBJECT (spacemitenc, "Mpp vnec init error, please check !"); +- return FALSE; +-} +- +-static gboolean +-gst_spacemitenc_stop (GstVideoEncoder * encoder) +-{ +- GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (encoder); +- +- GST_ERROR_OBJECT (spacemitenc, "xxxxxxxxxxxxxx stop start"); +- fclose(fbbb); +- +- if(spacemitenc->ctx) +- { +- VENC_DestoryChannel(spacemitenc->ctx); +- spacemitenc->ctx = NULL; +- } +- FRAME_Destory(spacemitenc->mppframe); +- PACKET_Destory(spacemitenc->mpppacket); +- VENC_DestoryChannel(spacemitenc->ctx); +- +- if (spacemitenc->input_state) { +- gst_video_codec_state_unref (spacemitenc->input_state); +- } +- spacemitenc->input_state = NULL; +- if (!(gst_pad_get_task_state ((encoder)->srcpad) == GST_TASK_STARTED)) +- return TRUE; +- +- GST_DEBUG_OBJECT (spacemitenc, "spacemit_enc_stop called"); +- +- gst_pad_stop_task (encoder->srcpad); +- g_cond_clear (&spacemitenc->event_cond); +- g_mutex_clear (&spacemitenc->event_mutex); +- +- return TRUE; +-} +-static void +-gst_st_mpp_enc_loop (GstVideoEncoder * encoder) +-{ +- GstVideoCodecFrame * frame = NULL; +- GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (encoder); +- int ret; +- GstFlowReturn flow_status; +- GstMapInfo map; +- static int lenght = 0; +- +- GST_SPM_ENC_WAIT (encoder, spacemitenc->pending_frames >= 4 || spacemitenc->at_eos); +- GST_VIDEO_ENCODER_STREAM_LOCK (encoder); +- +- ret = VENC_RequestOutputStreamBuffer(spacemitenc->ctx, PACKET_GetBaseData(spacemitenc->mpppacket)); +- if (ret == MPP_CODER_NO_DATA) +- goto sf_no_frame; +- else if (ret == MPP_CODER_EOS) +- goto finish_work; +- +- frame = gst_video_encoder_get_oldest_frame (encoder); +- if (!frame) { +- GST_ERROR_OBJECT (spacemitenc, "ZRong ------------------- finish last frame"); +- spacemitenc->downstream_flow_ret = GST_FLOW_EOS; +- GstBuffer *buffer; +- buffer = +- gst_buffer_new_wrapped (g_memdup (PACKET_GetDataPointer(spacemitenc->mpppacket), +- PACKET_GetLength(spacemitenc->mpppacket)), PACKET_GetLength(spacemitenc->mpppacket)); +- flow_status = gst_pad_push (GST_VIDEO_ENCODER_SRC_PAD (spacemitenc), buffer); +- GST_ERROR_OBJECT (spacemitenc, "ZRong ------------------- finish last frame %d", flow_status); +- +- VENC_ReturnOutputStreamBuffer (spacemitenc->ctx, PACKET_GetBaseData(spacemitenc->mpppacket)); +- goto done; +- } +- spacemitenc->pending_frames--; +- GST_SPM_ENC_BROADCAST (encoder); +- +- //if (videoFrameTypeIDR == frame_info.eFrameType) { +- // GST_VIDEO_CODEC_FRAME_SET_SYNC_POINT (frame); +- //} else { +- GST_VIDEO_CODEC_FRAME_UNSET_SYNC_POINT (frame); +- //} +- frame->output_buffer = +- gst_video_encoder_allocate_output_buffer (encoder, PACKET_GetLength(spacemitenc->mpppacket)); +- gst_buffer_map (frame->output_buffer, &map, GST_MAP_WRITE); +- +- lenght += PACKET_GetLength(spacemitenc->mpppacket); +- GST_ERROR_OBJECT (spacemitenc, "ZRong ------------------- finish one frame, %d, %d", lenght, PACKET_GetLength(spacemitenc->mpppacket)); +- memcpy (map.data, PACKET_GetDataPointer(spacemitenc->mpppacket), PACKET_GetLength(spacemitenc->mpppacket)); +- +- gst_buffer_unmap (frame->output_buffer, &map); +- VENC_ReturnOutputStreamBuffer (spacemitenc->ctx, PACKET_GetBaseData(spacemitenc->mpppacket)); +- +- spacemitenc->downstream_flow_ret = gst_video_encoder_finish_frame (encoder, frame); +-done: +- GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); +- return; +- +-sf_no_frame: +-{ +- GST_LOG_OBJECT (spacemitenc, +- "No out frame to request%d!", spacemitenc->pending_frames); +- spacemitenc->downstream_flow_ret = GST_FLOW_OK; +- goto done; +-} +-finish_work: +-{ +- GST_DEBUG_OBJECT (spacemitenc, +- "Get eos, Finished work and paused task!"); +- gst_pad_pause_task (GST_VIDEO_ENCODER_SRC_PAD (spacemitenc)); +- spacemitenc->downstream_flow_ret = GST_FLOW_EOS; +- goto done; +-} +- +-} +- +- +-static gboolean +-gst_spacemitenc_set_format (GstVideoEncoder * encoder, +- GstVideoCodecState * state) +-{ +- GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (encoder); +- gchar *debug_caps; +- guint width, height, fps_n, fps_d; +- guint n_slices = 1; +- gint ret; +- GstCaps *outcaps; +- GstVideoCodecState *output_state; +- spacemitenc->frame_count = 0; +- int stride0, stride1, stride2; +- GstVideoFormat fmt; +- +- debug_caps = gst_caps_to_string (state->caps); +- GST_DEBUG_OBJECT (spacemitenc, "gst_e26d4_enc_set_format called, caps: %s", +- debug_caps); +- g_free (debug_caps); +- +- if (spacemitenc->input_state) { +- gst_video_codec_state_unref (spacemitenc->input_state); +- } +- spacemitenc->input_state = gst_video_codec_state_ref (state); +- +- width = GST_VIDEO_INFO_WIDTH (&state->info); +- height = GST_VIDEO_INFO_HEIGHT (&state->info); +- fps_n = GST_VIDEO_INFO_FPS_N (&state->info); +- fps_d = GST_VIDEO_INFO_FPS_D (&state->info); +- stride0 = GST_VIDEO_INFO_PLANE_STRIDE (&state->info, 0); +- stride1 = GST_VIDEO_INFO_PLANE_STRIDE (&state->info, 1); +- stride2 = GST_VIDEO_INFO_PLANE_STRIDE (&state->info, 2); +- fmt = GST_VIDEO_INFO_FORMAT (&state->info); +- +- spacemitenc->para = (MppVencPara*)malloc(sizeof(MppVencPara)); +- memset(spacemitenc->para, 0, sizeof(MppVencPara)); +- spacemitenc->para->nBitrate = 5000000; +- spacemitenc->para->nFrameRate = 30; +- spacemitenc->para->nHeight = spacemitenc->height; +- spacemitenc->para ->nWidth = spacemitenc->width; +- spacemitenc->para->nStride = spacemitenc->width; +- VENC_SetParam(spacemitenc->ctx, spacemitenc->para); +- +- outcaps = +- gst_caps_copy (gst_static_pad_template_get_caps +- (&gst_spacemitenc_src_template)); +- +- output_state = gst_video_encoder_set_output_state (encoder, outcaps, state); +- gst_video_codec_state_unref (output_state); +- +- GST_ERROR_OBJECT (spacemitenc, "ZRong -------------------- set format finish, %d, %d (%d %d)", width, height, fps_n, fps_d); +- GST_ERROR_OBJECT (spacemitenc, "ZRong ----------------------- set format finish, %u, (%d %d %d) %s", fmt, stride0, stride1, stride2, gst_video_format_to_string (fmt)); +- +- if (!gst_video_encoder_negotiate (encoder)) +- return FALSE; +- +- if (G_UNLIKELY (!gst_pad_get_task_state ((encoder)->srcpad) == GST_TASK_STARTED)) { +- GST_DEBUG_OBJECT (spacemitenc, "@@@ ZRong ------------------------- starting encoding thread"); +- gst_pad_start_task (encoder->srcpad, +- (GstTaskFunction) gst_st_mpp_enc_loop, encoder, NULL); +- } +- return TRUE; +- +-} +- +-static gboolean +-gst_spacemitenc_propose_allocation (GstVideoEncoder * encoder, GstQuery * query) +-{ +- gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL); +- +- return +- GST_VIDEO_ENCODER_CLASS +- (gst_spacemitenc_parent_class)->propose_allocation (encoder, query); +-} +- +-static GstFlowReturn +-gst_spacemitenc_handle_frame (GstVideoEncoder * encoder, +- GstVideoCodecFrame * frame) +-{ +- GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (encoder); +- GstVideoFrame video_frame; +- gboolean force_keyframe; +- gint ret; +- gfloat fps; +- gint i, j; +- gsize buf_length = 0; +- GstFlowReturn flow_ret; +- GST_ERROR_OBJECT (spacemitenc, "ZRong ------------- handle frame start"); +- +- if (spacemitenc->downstream_flow_ret != GST_FLOW_OK) { +- goto downstream_err; +- } +- +- GST_OBJECT_LOCK (spacemitenc); +- if (spacemitenc->bitrate_changed || spacemitenc->max_bitrate_changed) { +- GST_WARNING_OBJECT (spacemitenc, +- "Error need to support this cases"); +- } +- GST_OBJECT_UNLOCK (spacemitenc); +- +- if (frame) { +- spacemitenc->frame_count++; +- gst_video_frame_map (&video_frame, &spacemitenc->input_state->info, +- frame->input_buffer, GST_MAP_READ); +- if (spacemitenc->PixelFormat == PIXEL_FORMAT_I420) { +- FRAME_SetDataUsedNum(spacemitenc->mppframe, 3); +- FRAME_SetDataPointer(spacemitenc->mppframe, 0, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 0)); +- FRAME_SetDataPointer(spacemitenc->mppframe, 1, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 1)); +- FRAME_SetDataPointer(spacemitenc->mppframe, 2, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 2)); +- } else { +- GST_ERROR_OBJECT (spacemitenc, "ZRong ------------- be save %s, %d, (%d, %d, %d)", +- gst_video_format_to_string(GST_VIDEO_FRAME_FORMAT(&video_frame)), +- GST_VIDEO_FRAME_N_PLANES(&video_frame), GST_VIDEO_FRAME_SIZE(&video_frame), +- GST_VIDEO_FRAME_WIDTH(&video_frame), GST_VIDEO_FRAME_HEIGHT(&video_frame)); +- +- FRAME_SetDataUsedNum(spacemitenc->mppframe, 2); +- FRAME_SetDataPointer(spacemitenc->mppframe, 0, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 0)); +- FRAME_SetDataPointer(spacemitenc->mppframe, 1, GST_VIDEO_FRAME_PLANE_DATA (&video_frame, 1)); +-#if 0 +- //SF_OMX_BUF_INFO *pBufInfo = pOMXBuffer->pOutputPortPrivate; +- //LOG(SF_LOG_INFO, "%p %d %p\r\n", pOMXBuffer->pBuffer, pOMXBuffer->nFilledLen, pBufInfo->remap_vaddr); +- +- fwrite(GST_VIDEO_FRAME_COMP_DATA (&video_frame, 0), 1, 1280*720, fbbb); +- fwrite(GST_VIDEO_FRAME_COMP_DATA (&video_frame, 1), 1, 1280*720/2, fbbb); +- +-#endif +- +- } +- } else { +- MppData * tmp = FRAME_GetBaseData(spacemitenc->mppframe); +- tmp->bEos = 1; +- } +- GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); +- ret = VENC_Encode(spacemitenc->ctx, FRAME_GetBaseData(spacemitenc->mppframe)); +- GST_VIDEO_ENCODER_STREAM_LOCK (encoder); +- if (ret != 0) { +- GST_ELEMENT_ERROR (spacemitenc, STREAM, ENCODE, +- ("Could not encode frame"), ("Spacemit returned %d", ret)); +- flow_ret = GST_FLOW_ERROR; +- if (frame) +- goto release_res; +- else +- goto done; +- } +- if (!frame) +- goto flow_eos; +- /* Avoid holding too much frames */ +- GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); +- GST_SPM_ENC_WAIT (encoder, +- spacemitenc->pending_frames < SPM_PENDING_MAX); +- GST_VIDEO_ENCODER_STREAM_LOCK (encoder); +- spacemitenc->pending_frames++; +- GST_SPM_ENC_BROADCAST (encoder); +- +- flow_ret = spacemitenc->downstream_flow_ret; +- GST_ERROR_OBJECT (spacemitenc, "ZRong ------------- handle frame start: %d, %d", spacemitenc->pending_frames, spacemitenc->frame_count); +- +- if (frame) +- goto release_res; +- +-done: +- return flow_ret; +-downstream_err: +-{ +- GST_ERROR_OBJECT (spacemitenc, "Downstream returned %s", +- gst_flow_get_name (spacemitenc->downstream_flow_ret)); +- flow_ret = spacemitenc->downstream_flow_ret; +- goto done; +-} +-release_res: +-{ +- gst_video_frame_unmap (&video_frame); +- gst_video_codec_frame_unref (frame); +- goto done; +-} +-flow_eos: +-{ +- GST_DEBUG_OBJECT (spacemitenc, "Get an eos, exit handle"); +- flow_ret = GST_FLOW_EOS; +- goto done; +-} +-#if 0 +- /* FIXME: spacemit has no way for us to get a connection +- * between the input and output frames, we just have to +- * guess based on the input */ +- frame = gst_video_encoder_get_oldest_frame (encoder); +- if (!frame) { +- GST_ELEMENT_ERROR (spacemitenc, STREAM, ENCODE, +- ("Could not encode frame"), ("spacemitenc returned %d", ret)); +- gst_video_codec_frame_unref (frame); +- return GST_FLOW_ERROR; +- } +- ret = VENC_RequestOutputStreamBuffer(spacemitenc->ctx, PACKET_GetBaseData(spacemitenc->mpppacket)); +- if (ret) { +- GST_ELEMENT_ERROR (spacemitenc, STREAM, ENCODE, +- ("Could not encode frame"), ("spacemitenc returned %d", ret)); +- gst_video_codec_frame_unref (frame); +- return GST_FLOW_ERROR; +- } +- +- if (videoFrameTypeIDR == frame_info.eFrameType) { +- GST_VIDEO_CODEC_FRAME_SET_SYNC_POINT (frame); +- } else { +- GST_VIDEO_CODEC_FRAME_UNSET_SYNC_POINT (frame); +- } +- +- frame->output_buffer = +- gst_video_encoder_allocate_output_buffer (encoder, PACKET_GetLength(spacemitenc->mpppacket)); +- gst_buffer_map (frame->output_buffer, &map, GST_MAP_WRITE); +- +- memcpy (map.data, PACKET_GetDataPointer(spacemitenc->mpppacket), PACKET_GetLength(spacemitenc->mpppacket)); +- +- gst_buffer_unmap (frame->output_buffer, &map); +- VENC_ReturnOutputStreamBuffer (spacemitenc->ctx, PACKET_GetBaseData(spacemitenc->mpppacket)); +- +- GstFlowReturn flow_status = gst_video_encoder_finish_frame (encoder, frame); +- +- GST_ERROR_OBJECT (spacemitenc, "ZRong ----------------------- handle frame finish"); +- +- return flow_status; +-#endif +-} +- +-static GstFlowReturn +-gst_spacemitenc_finish (GstVideoEncoder * encoder) +-{ +- GstSpacemitEnc *spacemitenc = GST_SPACEMITENC (encoder); +- +- if (spacemitenc->frame_count == 0) +- return GST_FLOW_OK; +- +- /* Drain encoder */ +- while ((gst_spacemitenc_handle_frame (encoder, NULL)) == GST_FLOW_OK); +- +- GST_DEBUG_OBJECT(spacemitenc, "finish test3"); +- spacemitenc->at_eos = TRUE; +- +- GST_SPM_ENC_BROADCAST (encoder); +- +- GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); +- /* Wait for task thread to pause */ +- GstTask *task = encoder->srcpad->task; +- if (task) { +- //GST_OBJECT_LOCK (task); +- while (GST_TASK_STATE (task) == GST_TASK_STARTED) { +- GST_DEBUG_OBJECT(spacemitenc, "finish test4"); +- g_usleep(400 * 1000); +- //GST_TASK_WAIT (task); +- } +- GST_DEBUG_OBJECT(spacemitenc, "finish test5"); +- //GST_OBJECT_UNLOCK (task); +- } +- GST_VIDEO_ENCODER_STREAM_LOCK (encoder); +- +- GST_DEBUG_OBJECT(spacemitenc, "finish test2"); +- +- return GST_FLOW_OK; +-} +- +-gboolean +-gst_spacemitenc_register (GstPlugin * plugin, guint rank) +-{ +- return gst_element_register (plugin, "spacemitenc", rank, +- GST_TYPE_SPACEMITENC); +-} +- +diff --git a/ext/spacemit/spacemitsrc/gstspacemitsrc.c b/ext/spacemit/spacemitsrc/gstspacemitsrc.c +index f1bb069..2384fb9 100755 +--- a/ext/spacemit/spacemitsrc/gstspacemitsrc.c ++++ b/ext/spacemit/spacemitsrc/gstspacemitsrc.c +@@ -271,6 +271,8 @@ gst_spacemitsrc_init (GstSpacemitSrc * src) + src->stop_requested = FALSE; + src->caps = NULL; + src->queue = NULL; ++ src->pushed_frame = 0; ++ g_mutex_init (&src->mutex); + + frame_hash = g_hash_table_new (g_direct_hash, g_direct_equal); + gst_spacemitsrc_reset (src); +@@ -464,7 +466,9 @@ typedef struct + IMAGE_BUFFER_S* outputBuf; + GstSpacemitSrc *src; + GstClockTime clock_time; ++ GstClockTime use_dur; + GstBuffer *gstbuf; ++ gint index; + } VideoFrame; + + static void +@@ -473,14 +477,25 @@ video_frame_release (void *data) + IMAGE_BUFFER_S* outputBuf = (IMAGE_BUFFER_S *) data; + { + VideoFrame * frame = (VideoFrame *) g_hash_table_lookup (frame_hash, outputBuf); +- if (!frame) { ++ if (!frame || !outputBuf) { + CLOG_ERROR("can't get a videoframe from key %p", outputBuf); ++ GST_DEBUG_OBJECT (frame->src, "can't get a videoframe from key %p", outputBuf); + } else { +- GST_DEBUG_OBJECT (frame->src, "get cpp output buffer %p back, the gstbuffer is %p", outputBuf, frame->gstbuf); ++ //get end absolute time ++ GstClock *clock = gst_element_get_clock (GST_ELEMENT (frame->src)); ++ ++ g_mutex_lock(&frame->src->mutex); ++ frame->src->pushed_frame--; ++ GST_DEBUG_OBJECT (frame->src, "release cpp output buffer %p, the gstbuffer is %p, pushed frame: %u", outputBuf, frame->gstbuf, frame->src->pushed_frame); ++ g_mutex_unlock(&frame->src->mutex); ++ ++ GST_DEBUG_OBJECT (frame->src, "video_frame dur is: %, fd: %d" GST_TIME_FORMAT, GST_TIME_ARGS (GST_CLOCK_DIFF (frame->use_dur, gst_clock_get_time (clock))), outputBuf->m.fd); ++ gst_object_unref (clock); ++ ++ gst_release_cpp_buffer(outputBuf, frame->index); + } + //List_Push out buffer to list + // List_Push(cpp_out_list[0], (void*)outputBuf); +- gst_release_cpp_buffer(outputBuf); + } + } + +@@ -493,10 +508,13 @@ video_frame_create (GstSpacemitSrc * src, IMAGE_BUFFER_S* buffer) + frame->outputBuf = buffer; + frame->fd = buffer->m.fd; + ++ ++ CLOG_INFO("create a video_frame, buffer: %p, fd: %d", buffer, buffer->m.fd); ++ + return frame; + } + +-static int gst_get_cpp_buf_callback(IMAGE_BUFFER_S* cpp_out_buffer) ++static int gst_get_cpp_buf_callback(IMAGE_BUFFER_S* cpp_out_buffer, gint index) + { + VideoFrame * frame = (VideoFrame *) g_hash_table_lookup (frame_hash, cpp_out_buffer); + if (!frame) { +@@ -506,10 +524,10 @@ static int gst_get_cpp_buf_callback(IMAGE_BUFFER_S* cpp_out_buffer) + GstClock *clock = gst_element_get_clock (GST_ELEMENT (frame->src)); + frame->clock_time = gst_clock_get_time (clock); + gst_object_unref (clock); +- CLOG_INFO("PUSH a cpp output buffer to queue"); +- ++ // CLOG_INFO("PUSH a cpp output buffer to queue"); ++ frame->index = index; + g_async_queue_push (frame->src->queue, cpp_out_buffer); +- GST_DEBUG_OBJECT (frame->src, "PUSH a cpp output buffer %p to queue %p, now queue had %d buffer", ++ GST_DEBUG_OBJECT (frame->src, "PUSH a cpp out buffer %p to gst queue %p, now gst queue had %d buffer", + cpp_out_buffer, frame->src->queue, g_async_queue_length(frame->src->queue)); + + return 0; +@@ -740,22 +758,22 @@ gst_spacemitsrc_create (GstPushSrc * psrc, GstBuffer ** buf) + { + GstSpacemitSrc *src = GST_SPACEMIT_SRC (psrc); + VideoFrame *video_frame; +- GST_DEBUG_OBJECT (src, "create"); + IMAGE_BUFFER_S* outputBuf; ++ guint64 timeout = 1 * 100 * 1000 * 1000;//100秒 + ++ GST_DEBUG_OBJECT (src, "create"); + //wait queue and get cpp output buffer +- outputBuf = +- (IMAGE_BUFFER_S *) g_async_queue_timeout_pop (src->queue, +- (guint64) 100 * 1000 * 1000); ++ outputBuf = (IMAGE_BUFFER_S *) g_async_queue_timeout_pop (src->queue, (guint64) timeout); + if (!outputBuf) { + GST_ELEMENT_ERROR (src, RESOURCE, OPEN_READ, +- ("Failed to get buffer in %d ms, queue %p had %d buffer", src->timeout, src->queue, g_async_queue_length(src->queue)), (NULL)); ++ ("Failed to get buffer in %d s, queue %p had %d buffer", timeout/1000000, src->queue, g_async_queue_length(src->queue)), (NULL)); + return GST_FLOW_ERROR; + } +- GST_DEBUG_OBJECT (src, "pop a cpp output buffer %p", outputBuf); ++ GST_DEBUG_OBJECT (src, "pop a cpp out buffer %p, fd: %d, gst queue %p had %d buffer", outputBuf, outputBuf->m.fd, src->queue, g_async_queue_length(src->queue)); + video_frame = (VideoFrame *) g_hash_table_lookup (frame_hash, outputBuf); + if (!video_frame) { + CLOG_ERROR("can't get a videoframe from key %p", outputBuf); ++ GST_ERROR_OBJECT (src, "can't get a videoframe from key %p", outputBuf); + return GST_FLOW_ERROR; + } + +@@ -764,8 +782,7 @@ gst_spacemitsrc_create (GstPushSrc * psrc, GstBuffer ** buf) + newbuf = gst_buffer_new (); + mem = gst_dmabuf_allocator_alloc_with_flags (src->allocator, video_frame->fd, + src->para.out_width * src->para.out_height * 1.5, GST_FD_MEMORY_FLAG_DONT_CLOSE); +- gst_mini_object_set_qdata (GST_MINI_OBJECT (mem), GST_SPACEMIT_SRC_DMABUF_MEMORY_QUARK, outputBuf, +- video_frame_release); ++ gst_mini_object_set_qdata (GST_MINI_OBJECT (mem), GST_SPACEMIT_SRC_DMABUF_MEMORY_QUARK, outputBuf, video_frame_release); + gst_buffer_append_memory (newbuf, mem); + + //bind release func to cpp output buffer +@@ -777,10 +794,17 @@ gst_spacemitsrc_create (GstPushSrc * psrc, GstBuffer ** buf) + + *buf = newbuf; + video_frame->gstbuf = *buf; +- GST_BUFFER_TIMESTAMP (*buf) = +- GST_CLOCK_DIFF (gst_element_get_base_time (GST_ELEMENT (src)), +- video_frame->clock_time); +- GST_DEBUG_OBJECT (src, "gst buffer %p reture from create, no close fd: %d ", *buf, video_frame->fd); ++ GST_BUFFER_TIMESTAMP (*buf) = GST_CLOCK_DIFF (gst_element_get_base_time (GST_ELEMENT (src)), video_frame->clock_time); ++ ++ //get start absolute time ++ GstClock *clock = gst_element_get_clock (GST_ELEMENT (src)); ++ video_frame->use_dur = gst_clock_get_time (clock); ++ gst_object_unref (clock); ++ ++ g_mutex_lock(&src->mutex); ++ src->pushed_frame++; ++ GST_DEBUG_OBJECT (src, "gst buffer %p new in create, mem fd: %d, buffer:%p, pushed frame: %u", *buf, video_frame->fd, outputBuf, src->pushed_frame); ++ g_mutex_unlock(&src->mutex); + + return GST_FLOW_OK; + } +diff --git a/ext/spacemit/spacemitsrc/gstspacemitsrc.h b/ext/spacemit/spacemitsrc/gstspacemitsrc.h +index 5c657ca..77366df 100755 +--- a/ext/spacemit/spacemitsrc/gstspacemitsrc.h ++++ b/ext/spacemit/spacemitsrc/gstspacemitsrc.h +@@ -68,11 +68,14 @@ struct _GstSpacemitSrc + gint height; + gint binning; + ++ GMutex mutex; + GAsyncQueue *queue; + GstClockTime base_time; + + guint32 last_frame_count; + guint32 total_dropped_frames; ++ guint32 pushed_frame; ++ + + GstCaps *caps; + gboolean stop_requested; +-- +2.25.1 + diff --git a/package/gstreamer1/gst1-plugins-bad/0003-only-mjpeg-use-dec-parse-interface-bb11.patch b/package/gstreamer1/gst1-plugins-bad/0003-only-mjpeg-use-dec-parse-interface-bb11.patch new file mode 100644 index 00000000..aabafcdb --- /dev/null +++ b/package/gstreamer1/gst1-plugins-bad/0003-only-mjpeg-use-dec-parse-interface-bb11.patch @@ -0,0 +1,406 @@ +diff --git a/ext/spacemit/spacemitcodec/gstspacemitdec.c b/ext/spacemit/spacemitcodec/gstspacemitdec.c +index 6a989e0..48b5b7f 100755 +--- a/ext/spacemit/spacemitcodec/gstspacemitdec.c ++++ b/ext/spacemit/spacemitcodec/gstspacemitdec.c +@@ -50,6 +50,7 @@ static GstFlowReturn gst_spacemitdec_finish(GstVideoDecoder *decoder); + static GstFlowReturn gst_spacemitdec_handle_frame(GstVideoDecoder *decoder, GstVideoCodecFrame *frame); + static gboolean gst_spacemitdec_decide_allocation(GstVideoDecoder *decoder, GstQuery *query); + static GstStateChangeReturn gst_spacemitdec_dec_change_state (GstElement * element, GstStateChange transition); ++static GstFlowReturn gst_spacemitdec_parse (GstVideoDecoder * bdec, GstVideoCodecFrame * frame, GstAdapter * adapter, gboolean at_eos); + + /* pad templates */ + static GstStaticPadTemplate gst_spacemitdec_sink_template = +@@ -73,8 +74,8 @@ GST_STATIC_PAD_TEMPLATE ("sink", + "parsed = (boolean) true," + "systemstream = (boolean) false" + ";" +- "image/jpeg" +- ";" ++// "image/jpeg" ++// ";" + "video/x-vp8" + ";" + "video/x-vp9" +@@ -204,6 +205,7 @@ static void gst_spacemitdec_class_init(GstSpacemitDecClass * klass) + video_decoder_class->finish = GST_DEBUG_FUNCPTR(gst_spacemitdec_finish); + video_decoder_class->handle_frame = GST_DEBUG_FUNCPTR(gst_spacemitdec_handle_frame); + video_decoder_class->decide_allocation = GST_DEBUG_FUNCPTR(gst_spacemitdec_decide_allocation); ++ video_decoder_class->parse = gst_spacemitdec_parse; + + // element_class->change_state = GST_DEBUG_FUNCPTR (gst_spacemitdec_dec_change_state); + +@@ -359,6 +361,9 @@ static gboolean gst_spacemitdec_start (GstVideoDecoder * decoder) + if (thiz->save_dec) + thiz->fb = fopen("/tmp/spacemitdec_out.yuv", "wb+"); + ++ thiz->saw_header = FALSE; ++ thiz->parse_entropy_len = 0; ++ thiz->parse_resync = FALSE; + GST_DEBUG_OBJECT (thiz, "finish start ! (%d)", thiz->save_dec); + + return TRUE; +@@ -370,6 +375,10 @@ static gboolean gst_spacemitdec_stop(GstVideoDecoder * decoder) + if (thiz->save_dec) + fclose(thiz->fb); + ++ thiz->parse_entropy_len = 0; ++ thiz->parse_resync = FALSE; ++ thiz->saw_header = FALSE; ++ + if (!(gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) { + GST_DEBUG_OBJECT (thiz, "ZRong --------------- spacemitdec finish stop"); + return TRUE; +@@ -436,7 +445,8 @@ gst_spacemitdec_init_decoder (GstSpacemitDec * thiz) + + thiz->ctx->stVdecPara.bInputBlockModeEnable = MPP_TRUE; + thiz->ctx->stVdecPara.bOutputBlockModeEnable = MPP_TRUE; +- GST_DEBUG_OBJECT (thiz, "spacemitdec set eCodecType is %d, downscale:%u", thiz->ctx->eCodecType, thiz->downscale); ++ GST_DEBUG_OBJECT (thiz, "spacemitdec set eCodecType is %d, downscale:%u, eCodingType:%d", ++ thiz->ctx->eCodecType, thiz->downscale, thiz->eCodingType); + + ret = VDEC_Init (thiz->ctx); + if (ret) { +@@ -460,6 +470,7 @@ static gboolean gst_spacemitdec_set_format(GstVideoDecoder * decoder, GstVideoCo + GstSpacemitDec *thiz = GST_SPACEMITDEC(decoder); + GstStructure *structure; + gboolean ret = TRUE; ++ gboolean parsed = FALSE; + + GST_DEBUG_OBJECT(thiz, "input caps: %" GST_PTR_FORMAT, state->caps); + +@@ -473,11 +484,16 @@ static gboolean gst_spacemitdec_set_format(GstVideoDecoder * decoder, GstVideoCo + if (!gst_spacemitdec_init_decoder(thiz)) + return FALSE; + +- GST_DEBUG_OBJECT (thiz, "@@@ ZRong ------------------------- set_format (%d, %d, %d), (%d, %d, %d)", ++ if (thiz->eCodingType == CODING_MJPEG) { ++ structure = gst_caps_get_structure (state->caps, 0); ++ gst_structure_get_boolean (structure, "parsed", &parsed); ++ gst_video_decoder_set_packetized (thiz, parsed); ++ } ++ GST_DEBUG_OBJECT (thiz, "set format finish (%d, %d, %d), (%d, %d, %d)", + thiz->input_state->info.width, thiz->input_state->info.height, thiz->input_state->info.size, + state->info.width, state->info.height, state->info.size); +- GST_DEBUG_OBJECT (thiz, "ZRong ----------------------- set format finish, %u, %s", +- GST_VIDEO_INFO_FORMAT (&state->info), gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (&state->info))); ++ GST_DEBUG_OBJECT (thiz, "set format finish (%u, %s), parsed: %d", ++ GST_VIDEO_INFO_FORMAT (&state->info), gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (&state->info)), parsed); + + return TRUE; + } +@@ -1130,6 +1146,202 @@ flushing: + } + } + ++static inline gboolean ++gst_spacemitdec_parse_tag_has_entropy_segment (guint8 tag) ++{ ++ if (tag == 0xda || (tag >= 0xd0 && tag <= 0xd7)) ++ return TRUE; ++ return FALSE; ++} ++ ++//code from jpegdec ++static GstFlowReturn ++gst_spacemitdec_parse (GstVideoDecoder * bdec, GstVideoCodecFrame * frame, ++ GstAdapter * adapter, gboolean at_eos) ++{ ++ guint size; ++ gint toadd = 0; ++ gboolean resync; ++ gint offset = 0, noffset; ++ GstSpacemitDec *thiz = GST_SPACEMITDEC (bdec); ++ ++ if (thiz->eCodingType != CODING_MJPEG) { ++ GST_ERROR_OBJECT (thiz, "only support mjpeg for parse, eCodingType:%d", thiz->eCodingType); ++ return GST_FLOW_ERROR; ++ } ++ ++ GST_VIDEO_CODEC_FRAME_SET_SYNC_POINT (frame); ++ ++ /* FIXME : The overhead of using scan_uint32 is massive */ ++ ++ size = gst_adapter_available (adapter); ++ GST_DEBUG_OBJECT (thiz, "Parsing jpeg image data (%u bytes)", size); ++ ++ if (at_eos) { ++ GST_DEBUG_OBJECT (thiz, "Flushing all data out"); ++ toadd = size; ++ ++ /* If we have leftover data, throw it away */ ++ if (!thiz->saw_header) ++ goto drop_frame; ++ goto have_full_frame; ++ } ++ ++ if (size < 8) ++ goto need_more_data; ++ ++ if (!thiz->saw_header) { ++ gint ret; ++ /* we expect at least 4 bytes, first of which start marker */ ++ ret = ++ gst_adapter_masked_scan_uint32 (adapter, 0xffff0000, 0xffd80000, 0, ++ size - 4); ++ ++ GST_DEBUG_OBJECT (thiz, "ret:%d", ret); ++ if (ret < 0) ++ goto need_more_data; ++ ++ if (ret) { ++ gst_adapter_flush (adapter, ret); ++ size -= ret; ++ } ++ thiz->saw_header = TRUE; ++ } ++ ++ while (1) { ++ guint frame_len; ++ guint32 value; ++ ++ GST_DEBUG_OBJECT (thiz, "offset:%d, size:%d", offset, size); ++ ++ noffset = ++ gst_adapter_masked_scan_uint32_peek (adapter, 0x0000ff00, 0x0000ff00, ++ offset, size - offset, &value); ++ ++ /* lost sync if 0xff marker not where expected */ ++ if ((resync = (noffset != offset))) { ++ GST_DEBUG_OBJECT (thiz, "Lost sync at 0x%08x, resyncing", offset + 2); ++ } ++ /* may have marker, but could have been resyncng */ ++ resync = resync || thiz->parse_resync; ++ /* Skip over extra 0xff */ ++ while ((noffset >= 0) && ((value & 0xff) == 0xff)) { ++ noffset++; ++ noffset = ++ gst_adapter_masked_scan_uint32_peek (adapter, 0x0000ff00, 0x0000ff00, ++ noffset, size - noffset, &value); ++ } ++ /* enough bytes left for marker? (we need 0xNN after the 0xff) */ ++ if (noffset < 0) { ++ GST_DEBUG_OBJECT (thiz, "at end of input and no EOI marker found, need more data"); ++ goto need_more_data; ++ } ++ ++ /* now lock on the marker we found */ ++ offset = noffset; ++ value = value & 0xff; ++ if (value == 0xd9) { ++ GST_DEBUG_OBJECT (thiz, "0x%08x: EOI marker", offset + 2); ++ /* clear parse state */ ++ thiz->saw_header = FALSE; ++ thiz->parse_resync = FALSE; ++ toadd = offset + 4; ++ goto have_full_frame; ++ } ++ if (value == 0xd8) { ++ GST_DEBUG_OBJECT (thiz, "0x%08x: SOI marker before EOI marker", offset + 2); ++ ++ /* clear parse state */ ++ thiz->saw_header = FALSE; ++ thiz->parse_resync = FALSE; ++ toadd = offset; ++ goto have_full_frame; ++ } ++ ++ ++ if (value >= 0xd0 && value <= 0xd7) ++ frame_len = 0; ++ else { ++ /* peek tag and subsequent length */ ++ if (offset + 2 + 4 > size) ++ goto need_more_data; ++ else ++ gst_adapter_masked_scan_uint32_peek (adapter, 0x0, 0x0, offset + 2, 4, ++ &frame_len); ++ frame_len = frame_len & 0xffff; ++ } ++ GST_DEBUG_OBJECT (thiz, "0x%08x: tag %02x, frame_len=%u", offset + 2, value, frame_len); ++ /* the frame length includes the 2 bytes for the length; here we want at ++ * least 2 more bytes at the end for an end marker */ ++ if (offset + 2 + 2 + frame_len + 2 > size) { ++ goto need_more_data; ++ } ++ ++ if (gst_spacemitdec_parse_tag_has_entropy_segment (value)) { ++ guint eseglen = thiz->parse_entropy_len; ++ ++ GST_DEBUG_OBJECT (thiz, "0x%08x: finding entropy segment length (eseglen:%d)", ++ offset + 2, eseglen); ++ if (size < offset + 2 + frame_len + eseglen) ++ goto need_more_data; ++ noffset = offset + 2 + frame_len + thiz->parse_entropy_len; ++ while (1) { ++ GST_DEBUG_OBJECT (thiz, "noffset:%d, size:%d, size - noffset:%d", ++ noffset, size, size - noffset); ++ noffset = gst_adapter_masked_scan_uint32_peek (adapter, 0x0000ff00, ++ 0x0000ff00, noffset, size - noffset, &value); ++ if (noffset < 0) { ++ /* need more data */ ++ thiz->parse_entropy_len = size - offset - 4 - frame_len - 2; ++ goto need_more_data; ++ } ++ if ((value & 0xff) != 0x00) { ++ eseglen = noffset - offset - frame_len - 2; ++ break; ++ } ++ noffset++; ++ } ++ thiz->parse_entropy_len = 0; ++ frame_len += eseglen; ++ GST_DEBUG_OBJECT (thiz, "entropy segment length=%u => frame_len=%u", eseglen, ++ frame_len); ++ } ++ if (resync) { ++ /* check if we will still be in sync if we interpret ++ * this as a sync point and skip this frame */ ++ noffset = offset + frame_len + 2; ++ noffset = gst_adapter_masked_scan_uint32 (adapter, 0x0000ff00, 0x0000ff00, ++ noffset, 4); ++ if (noffset < 0) { ++ /* ignore and continue resyncing until we hit the end ++ * of our data or find a sync point that looks okay */ ++ offset++; ++ continue; ++ } ++ GST_DEBUG_OBJECT (thiz, "found sync at 0x%x", offset + 2); ++ } ++ ++ /* Add current data to output buffer */ ++ toadd += frame_len + 2; ++ offset += frame_len + 2; ++ } ++ ++need_more_data: ++ if (toadd) ++ gst_video_decoder_add_to_frame (bdec, toadd); ++ return GST_VIDEO_DECODER_FLOW_NEED_DATA; ++ ++have_full_frame: ++ if (toadd) ++ gst_video_decoder_add_to_frame (bdec, toadd); ++ GST_VIDEO_CODEC_FRAME_SET_SYNC_POINT (frame); ++ return gst_video_decoder_have_frame (bdec); ++ ++drop_frame: ++ gst_adapter_flush (adapter, size); ++ return GST_FLOW_OK; ++} ++ + static GstFlowReturn gst_spacemitdec_finish(GstVideoDecoder * decoder) + { + GstSpacemitDec *thiz = GST_SPACEMITDEC(decoder); +diff --git a/ext/spacemit/spacemitcodec/gstspacemitdec.h b/ext/spacemit/spacemitcodec/gstspacemitdec.h +index 99cfd31..0b0bc22 100755 +--- a/ext/spacemit/spacemitcodec/gstspacemitdec.h ++++ b/ext/spacemit/spacemitcodec/gstspacemitdec.h +@@ -69,6 +69,11 @@ struct _GstSpacemitDec + gboolean wait_reschange; + GstBufferPool *pool; + FILE *fb; ++ ++ /* parse state */ ++ gboolean saw_header; ++ gint parse_entropy_len; ++ gint parse_resync; + }; + + struct _GstSpacemitDecClass +diff --git a/ext/spacemit/spacemitcodec/gstspacemitenc.c b/ext/spacemit/spacemitcodec/gstspacemitenc.c +index a0c0370..ba9ab6a 100755 +--- a/ext/spacemit/spacemitcodec/gstspacemitenc.c ++++ b/ext/spacemit/spacemitcodec/gstspacemitenc.c +@@ -529,8 +529,6 @@ release_all_buffer (gpointer key, gpointer value, gpointer user_data) + GstBuffer *buffer = (GstBuffer *)value; + GstSpacemitEnc *thiz = (GstSpacemitEnc *)user_data; + +- g_hash_table_remove(thiz->bufs_hash, key); +- + GST_DEBUG_OBJECT (thiz, "release buffer %p, before release ref: %d", buffer, GST_MINI_OBJECT_REFCOUNT_VALUE(buffer)); + gst_buffer_unref (buffer); + } +@@ -553,6 +551,7 @@ static void drain_all_upstream_buffer (GstSpacemitEnc * thiz) + g_hash_table_size(thiz->bufs_hash), try_count); + + g_hash_table_foreach (thiz->bufs_hash, release_all_buffer, thiz); ++ g_hash_table_remove_all (thiz->bufs_hash); + } + + static gint32 gst_spacemitenc_request_packet (GstSpacemitEnc *thiz) +diff --git a/ext/spacemit/spacemitcodec/gstspacemitplugin.c b/ext/spacemit/spacemitcodec/gstspacemitplugin.c +index d2d5e62..be75339 100755 +--- a/ext/spacemit/spacemitcodec/gstspacemitplugin.c ++++ b/ext/spacemit/spacemitcodec/gstspacemitplugin.c +@@ -17,9 +17,9 @@ static gboolean plugin_init(GstPlugin *plugin) + gboolean ret = FALSE; + + ret |= gst_spacemitdec_register (plugin, GST_RANK_PRIMARY + 1); +- ret |= gst_spacemitenc_register (plugin, GST_RANK_PRIMARY + 1); ++ ret |= gst_spacemitenc_register (plugin, GST_RANK_PRIMARY - 1); + +- return ret; ++ return ret; + } + + GST_PLUGIN_DEFINE(GST_VERSION_MAJOR, +diff --git a/ext/spacemit/spacemitcodec/meson.build b/ext/spacemit/spacemitcodec/meson.build +index c6ed6c3..e7deda1 100755 +--- a/ext/spacemit/spacemitcodec/meson.build ++++ b/ext/spacemit/spacemitcodec/meson.build +@@ -6,7 +6,7 @@ spacemitcodec_sources = [ + 'gstspacemitplugin.c', + ] + +-spacemitmpp_dep = dependency('spacemit_mpp', version : '>= 0.0.0', required : false) ++spacemitmpp_dep = dependency('spacemit_mpp', version : '>= 0.0.0', required : get_option('spacemitcodec')) + + if spacemitmpp_dep.found() + gstspacemitcodec = library('gstspacemitcodec', +diff --git a/ext/spacemit/spacemitsrc/gstspacemitsrc.c b/ext/spacemit/spacemitsrc/gstspacemitsrc.c +index 2384fb9..eafb476 100755 +--- a/ext/spacemit/spacemitsrc/gstspacemitsrc.c ++++ b/ext/spacemit/spacemitsrc/gstspacemitsrc.c +@@ -322,6 +322,7 @@ gst_spacemitsrc_set_property (GObject * object, guint property_id, + GstSpacemitSrc *src; + + src = GST_SPACEMIT_SRC (object); ++ GST_INFO_OBJECT (src, "in set property :%d", property_id); + + switch (property_id) { + case PROP_LOCATION: +@@ -380,6 +381,7 @@ gst_spacemitsrc_get_property (GObject * object, guint property_id, + + g_return_if_fail (GST_IS_SPACEMIT_SRC (object)); + src = GST_SPACEMIT_SRC (object); ++ GST_INFO_OBJECT (src, "in get property :%d", property_id); + + switch (property_id) { + case PROP_LOCATION: +diff --git a/ext/spacemit/spacemitsrc/meson.build b/ext/spacemit/spacemitsrc/meson.build +index bfa0b49..85040a7 100755 +--- a/ext/spacemit/spacemitsrc/meson.build ++++ b/ext/spacemit/spacemitsrc/meson.build +@@ -3,7 +3,7 @@ spacemitsrc_sources = [ + 'gstspmsrcallocator.c', + ] + +-spacemitcam_dep = dependency('k1x-cam', version : '>= 0.0.0', required : false) ++spacemitcam_dep = dependency('k1x-cam', version : '>= 0.0.0', required : get_option('spacemitsrc')) + + gstspacemitsrc = library('gstspacemitsrc', + spacemitsrc_sources, +diff --git a/meson_options.txt b/meson_options.txt +index 3131929..e2e689f 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -184,7 +184,8 @@ option('wpe', type : 'feature', value : 'auto', description : 'WPE Web browser p + option('magicleap', type : 'feature', value : 'auto', description : 'Magic Leap platform support') + option('v4l2codecs', type : 'feature', value : 'auto', description : 'Video4Linux Stateless CODECs support') + option('isac', type : 'feature', value : 'auto', description : 'iSAC plugin') +-option('spacemit', type : 'feature', value : 'auto', description : 'Spacemit encoder/decoder plugin') ++option('spacemitcodec', type : 'feature', value : 'auto', description : 'Spacemit encoder/decoder plugin') ++option('spacemitsrc', type : 'feature', value : 'auto', description : 'Spacemit source plugin') + + # HLS plugin options + option('hls', type : 'feature', value : 'auto', description : 'HTTP Live Streaming plugin') diff --git a/package/gstreamer1/gst1-plugins-bad/0004-0.0.61mpp-bb12-bb13.patch b/package/gstreamer1/gst1-plugins-bad/0004-0.0.61mpp-bb12-bb13.patch new file mode 100644 index 00000000..adff0e18 --- /dev/null +++ b/package/gstreamer1/gst1-plugins-bad/0004-0.0.61mpp-bb12-bb13.patch @@ -0,0 +1,474 @@ +diff --git a/ext/spacemit/spacemitcodec/gstspacemitallocator.c b/ext/spacemit/spacemitcodec/gstspacemitallocator.c +index abb2b84..a677114 100755 +--- a/ext/spacemit/spacemitcodec/gstspacemitallocator.c ++++ b/ext/spacemit/spacemitcodec/gstspacemitallocator.c +@@ -112,7 +112,7 @@ static void + gst_spacemit_allocator_finalize (GObject * obj) + { + GstSpaceMitAllocator *alloc = GST_SPACEMIT_ALLOCATOR (obj); +- GST_DEBUG_OBJECT (alloc, "ZRong ------------------- spacemit allocator finalize"); ++ GST_DEBUG_OBJECT (alloc, "start allocator finalize, refcount:%d", GST_OBJECT_REFCOUNT_VALUE(alloc)); + + G_OBJECT_CLASS (parent_class)->finalize (obj); + } +@@ -147,7 +147,7 @@ gst_spacemit_allocator_memory_dispose (GstMemory * base_mem) + + /* keep the memory alive */ + gst_memory_ref (base_mem); +- GST_DEBUG ("memory dispose success, mem %p fd:(%d, %d), id:%d, base_mem:%p, ref:%d", ++ GST_DEBUG ("memory dispose success, mem %p fd:(%d, %d), id:%d, base_mem:%p, refcount:%d", + mem, mem->fd, fd, mem->mppframe_id, base_mem, base_mem->mini_object.refcount); + + mem->acquired = FALSE; +@@ -340,7 +340,7 @@ gst_spacemit_allocator_set_active (GstSpaceMitAllocator * allocator, gboolean ac + if (allocator->active != active) + changed = TRUE; + +- GST_DEBUG_OBJECT (allocator, "allocator set active/deactice paras, cur:%d, set:%d", allocator->active, active); ++ GST_DEBUG_OBJECT (allocator, "start allocator set active, cur:%d, set:%d", allocator->active, active); + + if (changed) { + if (active) { +@@ -350,7 +350,7 @@ gst_spacemit_allocator_set_active (GstSpaceMitAllocator * allocator, gboolean ac + g_hash_table_foreach (allocator->memories, gst_unref_spacemit_mem, NULL); + g_hash_table_remove_all (allocator->memories); + if (allocator->foreign_allocator) { +- GST_DEBUG_OBJECT (allocator, "ZRong ------------------- allocator unref(%d %d) (%d %d)", ++ GST_DEBUG_OBJECT (allocator, "allocator refcount (%d %d) (%d %d)", + GST_OBJECT_REFCOUNT_VALUE(allocator->foreign_allocator), GST_OBJECT_REFCOUNT(allocator->foreign_allocator), + GST_OBJECT_REFCOUNT_VALUE(allocator), GST_OBJECT_REFCOUNT(allocator)); + g_object_unref (allocator->foreign_allocator); +@@ -361,7 +361,7 @@ gst_spacemit_allocator_set_active (GstSpaceMitAllocator * allocator, gboolean ac + + allocator->active = active; + g_mutex_unlock (&allocator->lock); +- GST_DEBUG_OBJECT (allocator, "allocator set active/deactice finish"); ++ GST_DEBUG_OBJECT (allocator, "finish allocator set active"); + + return changed; + } +diff --git a/ext/spacemit/spacemitcodec/gstspacemitbufferpool.c b/ext/spacemit/spacemitcodec/gstspacemitbufferpool.c +index cd186ab..771afe2 100755 +--- a/ext/spacemit/spacemitcodec/gstspacemitbufferpool.c ++++ b/ext/spacemit/spacemitcodec/gstspacemitbufferpool.c +@@ -41,7 +41,7 @@ static void gst_spacemit_buffer_pool_finalize (GObject * object); + + G_DEFINE_TYPE_WITH_CODE (GstSpaceMitBufferPool, gst_spacemit_buffer_pool, + GST_TYPE_BUFFER_POOL, +- GST_DEBUG_CATEGORY_INIT (GST_CAT_DEFAULT, "SpaceMitbufferpool", 0, ++ GST_DEBUG_CATEGORY_INIT (GST_CAT_DEFAULT, "spacemitbufferpool", 0, + "spacemit buffer pool")); + + static gboolean +@@ -301,11 +301,11 @@ static void + gst_spacemit_buffer_pool_finalize (GObject * object) + { + GstSpaceMitBufferPool *pool = GST_SPACEMIT_BUFFER_POOL (object); +- GST_DEBUG ("ZRong ------------------------af pool finalize."); ++ GST_DEBUG_OBJECT (pool, "start pool finalize."); + + if (pool->allocator) { +- GST_DEBUG ("ZRong ------------------- af pool finalize (%d %d)", +- GST_OBJECT_REFCOUNT_VALUE(pool->allocator), GST_OBJECT_REFCOUNT(pool->allocator)); ++ GST_DEBUG_OBJECT (pool, "pool allocator refcount: (%d %d)", ++ GST_OBJECT_REFCOUNT_VALUE(pool->allocator), GST_OBJECT_REFCOUNT(pool->allocator)); + + gst_object_unref (pool->allocator); + } +diff --git a/ext/spacemit/spacemitcodec/gstspacemitdec.c b/ext/spacemit/spacemitcodec/gstspacemitdec.c +index 48b5b7f..8c170e5 100755 +--- a/ext/spacemit/spacemitcodec/gstspacemitdec.c ++++ b/ext/spacemit/spacemitcodec/gstspacemitdec.c +@@ -154,26 +154,26 @@ done: + static gboolean gst_spacemitdec_close(GstVideoDecoder * decoder) + { + GstSpacemitDec *thiz = GST_SPACEMITDEC(decoder); ++ GST_DEBUG_OBJECT (decoder, "start close"); + + if (thiz->input_state) + { + gst_video_codec_state_unref (thiz->input_state); + thiz->input_state = NULL; + } +- GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec start close"); ++ + if (thiz->pool) { ++ GST_DEBUG_OBJECT (decoder, "start pool set active: false(%d, %d)", ++ GST_MINI_OBJECT_REFCOUNT_VALUE(thiz->pool), ++ GST_MINI_OBJECT_REFCOUNT_VALUE(GST_SPACEMIT_BUFFER_POOL_CAST(thiz->pool)->allocator)); ++ + gst_buffer_pool_set_active (thiz->pool, FALSE); +-// gst_spacemit_allocator_wait_inactive (GST_SPACEMIT_BUFFER_POOL_CAST(spacemitdec->pool)->allocator); ++ gst_spacemit_allocator_wait_inactive (GST_SPACEMIT_BUFFER_POOL_CAST(thiz->pool)->allocator); + gst_object_unref (thiz->pool); + thiz->pool = NULL; + } +- GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec start close222"); +- +-// FRAME_Destory(spacemitdec->mppframe); +- PACKET_Destory (thiz->mpppacket); +- VDEC_DestoryChannel (thiz->ctx); + +- GST_DEBUG_OBJECT (decoder, "ZRong --------------- spacemitdec finish close"); ++ GST_DEBUG_OBJECT (decoder, "finish close"); + + return TRUE; + } +@@ -263,7 +263,7 @@ gst_spacemitdec_set_property (GObject * object, guint property_id, + { + GstSpacemitDec *thiz = GST_SPACEMITDEC (object); + +- GST_DEBUG_OBJECT (thiz, "ZRong ------------------- set_property: %d", property_id); ++ GST_DEBUG_OBJECT (thiz, "set property: %d", property_id); + + switch (property_id) { + case PROP_CODE_TYPE: +@@ -293,7 +293,7 @@ gst_spacemitdec_get_property (GObject * object, guint property_id, + { + GstSpacemitDec *thiz = GST_SPACEMITDEC (object); + +- GST_DEBUG_OBJECT (thiz, "ZRong ------------------- get_property: %d", property_id); ++ GST_DEBUG_OBJECT (thiz, "get property: %d", property_id); + + switch (property_id) { + case PROP_CODE_TYPE: +@@ -372,6 +372,9 @@ static gboolean gst_spacemitdec_start (GstVideoDecoder * decoder) + static gboolean gst_spacemitdec_stop(GstVideoDecoder * decoder) + { + GstSpacemitDec *thiz = GST_SPACEMITDEC(decoder); ++ ++ GST_DEBUG_OBJECT (thiz, "start finish stop"); ++ + if (thiz->save_dec) + fclose(thiz->fb); + +@@ -379,13 +382,19 @@ static gboolean gst_spacemitdec_stop(GstVideoDecoder * decoder) + thiz->parse_resync = FALSE; + thiz->saw_header = FALSE; + ++ VDEC_DestoryChannel (thiz->ctx); ++ thiz->ctx = NULL; ++ ++ PACKET_Destory(thiz->mpppacket); ++ thiz->mpppacket = NULL; ++ + if (!(gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) { +- GST_DEBUG_OBJECT (thiz, "ZRong --------------- spacemitdec finish stop"); ++ GST_DEBUG_OBJECT (thiz, "finish stop"); + return TRUE; + } + + gst_pad_stop_task (decoder->srcpad); +- GST_DEBUG_OBJECT (thiz, "ZRong --------------- spacemitdec finish stop222"); ++ GST_DEBUG_OBJECT (thiz, "finish stop222"); + + return TRUE; + } +@@ -398,6 +407,26 @@ gst_change_mpp_ecoding_type (GstStructure * s) + if (gst_structure_has_name (s, "video/x-h265")) + return CODING_H265; + ++ if (gst_structure_has_name (s, "video/mpeg")) { ++ const GValue *value; ++ ++ value = gst_structure_get_value (s, "mpegversion"); ++ if (!value || !G_VALUE_HOLDS_INT (value)) { ++ GST_ERROR ("Failed to get mpegversion"); ++ return CODING_UNKNOWN; ++ } ++ if (g_value_get_int (value) == 4) ++ return CODING_MPEG4; ++ else if (g_value_get_int (value) == 2) ++ return CODING_MPEG2; ++ else if (g_value_get_int (value) == 1) { ++ GST_ERROR ("no support now!!!"); ++ return CODING_MPEG1; ++ } else { ++ return CODING_UNKNOWN; ++ } ++ } ++ + if (gst_structure_has_name (s, "image/jpeg")) + return CODING_MJPEG; + +@@ -442,6 +471,7 @@ gst_spacemitdec_init_decoder (GstSpacemitDec * thiz) + thiz->ctx->stVdecPara.nRotateDegree = 0; + thiz->ctx->stVdecPara.bThumbnailMode = 0; + thiz->ctx->stVdecPara.bIsInterlaced = MPP_FALSE; ++ thiz->ctx->stVdecPara.eFrameBufferType = MPP_FRAME_BUFFERTYPE_DMABUF_INTERNAL; + + thiz->ctx->stVdecPara.bInputBlockModeEnable = MPP_TRUE; + thiz->ctx->stVdecPara.bOutputBlockModeEnable = MPP_TRUE; +@@ -511,7 +541,7 @@ gst_spacemitdec_flush (GstVideoDecoder * decoder) + { + GstSpacemitDec *thiz = GST_SPACEMITDEC(decoder); + +- GST_DEBUG_OBJECT (thiz, "ZRong -------------------- flushing start, (%d)", thiz->downstream_flow_ret); ++ GST_DEBUG_OBJECT (thiz, "start flush, (%d)", thiz->downstream_flow_ret); + + thiz->flushing = TRUE; + +@@ -544,7 +574,7 @@ gst_spacemitdec_flush (GstVideoDecoder * decoder) + thiz->flushing = FALSE; + thiz->downstream_flow_ret = GST_FLOW_OK; + +- GST_DEBUG_OBJECT (thiz, "ZRong -------------------- flushing stop"); ++ GST_DEBUG_OBJECT (thiz, "finish flush"); + + return TRUE; + } +@@ -715,7 +745,7 @@ static gint32 gst_spacemitdec_request_frame (GstSpacemitDec *thiz) + } while (thiz->req_nonblock); + + if (ret == MPP_CODER_EOS) +- FRAME_SetEos (mppframe, TRUE); ++ FRAME_SetEos (mppframe, FRAME_EOS_WITH_DATA); + + count = 0; + thiz->mppframe = mppframe; +@@ -788,7 +818,7 @@ gst_spacemitdec_loop (GstVideoDecoder * decoder) + frame, frame->system_frame_number, GST_TIME_ARGS (GST_BUFFER_PTS (frame->input_buffer)), + GST_TIME_ARGS (GST_BUFFER_DTS (frame->input_buffer)), GST_TIME_ARGS (FRAME_GetPts(thiz->mppframe))); + +- //frame->pts = FRAME_GetPts(thiz->mppframe); ++ // frame->pts = FRAME_GetPts(thiz->mppframe); + // frame->pts = GST_CLOCK_TIME_NONE; + // frame->dts = FRAME_GetPts(thiz->mppframe); + flow_status = gst_spacemitdec_fill_output_buffer (decoder, &frame->output_buffer); +@@ -832,6 +862,7 @@ done: + + flushing: + { ++ thiz->flushing = TRUE; + thiz->downstream_flow_ret = GST_FLOW_FLUSHING; + + while (1) { +@@ -884,7 +915,7 @@ gst_spacemitdec_pool_set_active(GstVideoDecoder * decoder) + GstCaps *caps = NULL; + GstVideoInfo vinfo; + +- GST_ERROR_OBJECT (thiz, "@@@ ZRong ------------------------- start pool_set_active!"); ++ GST_ERROR_OBJECT (thiz, "start spacemitdec pool set active!"); + + pool = gst_video_decoder_get_buffer_pool (GST_VIDEO_DECODER (thiz)); + if (pool) { +@@ -895,6 +926,7 @@ gst_spacemitdec_pool_set_active(GstVideoDecoder * decoder) + return FALSE; + } + gst_structure_free (config); ++ gst_object_unref (pool); + } + + if (caps) { +@@ -934,6 +966,8 @@ gst_spacemitdec_pool_set_active(GstVideoDecoder * decoder) + return FALSE; + } + ++ GST_DEBUG_OBJECT (thiz, "finish spacemitdec pool set active, ref count: %d", GST_OBJECT_REFCOUNT_VALUE(thiz->pool)); ++ + return TRUE; + } + +@@ -1078,8 +1112,7 @@ gst_spacemitdec_handle_frame (GstVideoDecoder * decoder, + if (!gst_spacemitdec_set_src_caps (thiz)) + goto not_negotiated_err; + +- GST_DEBUG_OBJECT (thiz, "start negotiate, %d, %d, %d", +- actual_width, actual_height, thiz->eOutputPixelFormat); ++ GST_DEBUG_OBJECT (thiz, "start negotiate, (%d, %d, %d)", actual_width, actual_height, thiz->eOutputPixelFormat); + + if (!gst_video_decoder_negotiate (decoder)) + goto not_negotiated_err; +@@ -1088,13 +1121,14 @@ gst_spacemitdec_handle_frame (GstVideoDecoder * decoder, + goto acitve_fail; + } + +- if (G_UNLIKELY (!gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED)) { ++ if (G_UNLIKELY (!gst_pad_get_task_state ((decoder)->srcpad) == GST_TASK_STARTED) && !thiz->flushing) { + GST_DEBUG_OBJECT (thiz, "start dec thread"); + gst_pad_start_task (decoder->srcpad, + (GstTaskFunction) gst_spacemitdec_loop, decoder, NULL); + } + +- GST_DEBUG_OBJECT (thiz, "finish dec handle, %d", thiz->downstream_flow_ret); ++ GST_DEBUG_OBJECT (thiz, "finish dec handle, %d, pool refcount:%d", ++ thiz->downstream_flow_ret, GST_OBJECT_REFCOUNT_VALUE(thiz->pool)); + ret = thiz->downstream_flow_ret; + + out: +@@ -1461,8 +1495,11 @@ static gboolean gst_spacemitdec_decide_allocation(GstVideoDecoder * decoder, Gst + GstCaps *caps = NULL; + GstVideoInfo vinfo; + GstVideoCodecState *output_state; ++ guint num_buffers; ++ GstAllocationParams params; ++ GstAllocator *allocator = NULL; + +- GST_DEBUG_OBJECT (thiz, "@@@ ZRong ------------------------- in decide_allocation!"); ++ GST_DEBUG_OBJECT (thiz, "start decide_allocation!, query: %" GST_PTR_FORMAT, query); + + if (!GST_VIDEO_DECODER_CLASS(gst_spacemitdec_parent_class)->decide_allocation(decoder, query)) + return FALSE; +@@ -1471,34 +1508,72 @@ static gboolean gst_spacemitdec_decide_allocation(GstVideoDecoder * decoder, Gst + class ensures that there will always be at least a 0th pool in + the query. */ + gst_query_parse_nth_allocation_pool(query, 0, &pool, &size, &min, &max); +- pool_config = gst_buffer_pool_get_config(pool); +- gst_buffer_pool_config_get_params (pool_config, &caps, &size, +- &min, &max); ++ pool_config = gst_buffer_pool_get_config (pool); ++ gst_buffer_pool_config_get_params (pool_config, &caps, &size, &min, &max); ++ ++ GST_DEBUG_OBJECT (thiz, "upstream provides the pool is: %" GST_PTR_FORMAT "pool caps: %" GST_PTR_FORMAT, pool, caps); ++ ++ gst_object_unref (pool); + +- GST_DEBUG_OBJECT (thiz, "get pool caps: %" GST_PTR_FORMAT, caps); + if (_gst_caps_has_feature (caps, GST_CAPS_FEATURE_MEMORY_DMABUF)) { +- GST_INFO_OBJECT (thiz, "the spacemit decoder uses DMABuf memory"); ++ // GST_INFO_OBJECT (thiz, "the spacemit decoder uses DMABuf memory"); + thiz->use_dmabuf = TRUE; + } else { + thiz->use_dmabuf = FALSE; + } + ++ i = 0; ++ while (gst_query_get_n_allocation_pools (query) > 0) { ++ gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max); ++ GST_DEBUG_OBJECT (thiz, "%dth pool is %" GST_PTR_FORMAT, i, pool); ++ ++ gst_query_remove_nth_allocation_pool (query, 0); ++ if (pool) ++ gst_object_unref (pool); ++ pool = NULL; ++ i++; ++ } ++ ++ i = 0; ++ while (gst_query_get_n_allocation_params (query) > 0) { ++ gst_query_parse_nth_allocation_param (query, 0, &allocator, ¶ms); ++ GST_DEBUG_OBJECT (thiz, "%dth allocator is %" GST_PTR_FORMAT, i, allocator); ++ ++ gst_query_remove_nth_allocation_param (query, 0); ++ if (allocator) ++ gst_object_unref (allocator); ++ allocator = NULL; ++ i++; ++ } ++ + /* Decoder always use its own pool. */ + if (!thiz->pool) { + output_state = gst_video_decoder_get_output_state (GST_VIDEO_DECODER (thiz)); + + gst_clear_object (&thiz->pool); +- GST_INFO_OBJECT (thiz, "create new spacemit bufferpool"); +- thiz->pool = +- gst_spacemitdec_create_buffer_pool(thiz, &output_state->info, (8 > min) ? 8 : min); ++ num_buffers = (8 > min) ? 8 : min; ++ GST_INFO_OBJECT (thiz, "create new spacemit bufferpool, DMABUF memory: %d, num: %d", thiz->use_dmabuf, num_buffers); ++ thiz->pool = gst_spacemitdec_create_buffer_pool(thiz, &output_state->info, num_buffers); + gst_video_codec_state_unref (output_state); + if (!thiz->pool) { + GST_ERROR_OBJECT (thiz, "failed to create new pool"); + goto failed_to_create_pool; + } ++ ++ gst_buffer_pool_config_set_params (pool_config, caps, size, num_buffers, num_buffers); ++ gst_buffer_pool_config_set_allocator (pool_config, GST_SPACEMIT_BUFFER_POOL_CAST(thiz->pool)->allocator, ¶ms); ++ gst_query_add_allocation_param (query, GST_SPACEMIT_BUFFER_POOL_CAST(thiz->pool)->allocator, ¶ms); ++ gst_query_add_allocation_pool (query, thiz->pool, size, num_buffers, num_buffers); ++ } else { ++ gst_buffer_pool_config_set_params (pool_config, caps, size, num_buffers, num_buffers); ++ gst_buffer_pool_config_set_allocator (pool_config, GST_SPACEMIT_BUFFER_POOL_CAST(thiz->pool)->allocator, ¶ms); ++ gst_query_add_allocation_param (query, GST_SPACEMIT_BUFFER_POOL_CAST(thiz->pool)->allocator, ¶ms); ++ gst_query_add_allocation_pool (query, thiz->pool, size, num_buffers, num_buffers); ++ // goto no_support; ++ GST_WARNING_OBJECT (thiz, "spacemit pool had setup!"); + } +- GST_DEBUG_OBJECT (thiz, +- "upstream provides the pool is: %" GST_PTR_FORMAT, pool); ++ ++ GST_DEBUG_OBJECT (thiz, "finish decide_allocation, spacemit pool ref:%d", GST_OBJECT_REFCOUNT_VALUE(thiz->pool)); + + /* If downstream supports video meta and video alignment, + * we can replace with our own spacemit bufferpool and use it +@@ -1527,22 +1602,16 @@ static gboolean gst_spacemitdec_decide_allocation(GstVideoDecoder * decoder, Gst + goto no_support; + } + #endif +- if (pool) +- gst_object_unref (pool); + + return TRUE; + + failed_to_create_pool: + GST_ERROR_OBJECT (thiz, "failed to set buffer pool config"); +- if (pool) +- gst_object_unref (pool); + return FALSE; + + no_support: + GST_ERROR_OBJECT (thiz, + "error! upstream provides the strange pool: %" GST_PTR_FORMAT, pool); +- if (pool) +- gst_object_unref (pool); + return FALSE; + } + +@@ -1550,7 +1619,7 @@ static GstStateChangeReturn + gst_spacemitdec_dec_change_state (GstElement * element, GstStateChange transition) + { + GstVideoDecoder *decoder = GST_VIDEO_DECODER (element); +- GST_DEBUG("ZRong ------------------ in change state, %x", transition); ++ GST_DEBUG_OBJECT (decoder, "start change state, %x", transition); + + if (transition == GST_STATE_CHANGE_PAUSED_TO_READY) { + GST_VIDEO_DECODER_STREAM_LOCK (decoder); +diff --git a/ext/spacemit/spacemitcodec/gstspacemitenc.c b/ext/spacemit/spacemitcodec/gstspacemitenc.c +index ba9ab6a..2070c8b 100755 +--- a/ext/spacemit/spacemitcodec/gstspacemitenc.c ++++ b/ext/spacemit/spacemitcodec/gstspacemitenc.c +@@ -944,7 +944,7 @@ gst_spacemitenc_handle_frame (GstVideoEncoder * encoder, + + if (frame) { + thiz->frame_count++; +- FRAME_SetEos(thiz->mppframe, 0); ++ FRAME_SetEos(thiz->mppframe, FRAME_NO_EOS); + + gst_video_frame_map (&video_frame, &thiz->input_state->info, frame->input_buffer, GST_MAP_READ); + if (thiz->PixelFormat == PIXEL_FORMAT_I420) { +@@ -981,7 +981,7 @@ gst_spacemitenc_handle_frame (GstVideoEncoder * encoder, + FRAME_SetPts(mppframe, GST_BUFFER_PTS (frame->input_buffer)); + } else { + GST_DEBUG_OBJECT (thiz, "null frame enc, need eos"); +- FRAME_SetEos(thiz->mppframe, 1); ++ FRAME_SetEos(thiz->mppframe, FRAME_EOS_WITHOUT_DATA); + mppframe = thiz->mppframe; + } + +diff --git a/ext/spacemit/spacemitsrc/gstspacemitsrc.c b/ext/spacemit/spacemitsrc/gstspacemitsrc.c +index eafb476..8a973a5 100755 +--- a/ext/spacemit/spacemitsrc/gstspacemitsrc.c ++++ b/ext/spacemit/spacemitsrc/gstspacemitsrc.c +@@ -491,7 +491,7 @@ video_frame_release (void *data) + GST_DEBUG_OBJECT (frame->src, "release cpp output buffer %p, the gstbuffer is %p, pushed frame: %u", outputBuf, frame->gstbuf, frame->src->pushed_frame); + g_mutex_unlock(&frame->src->mutex); + +- GST_DEBUG_OBJECT (frame->src, "video_frame dur is: %, fd: %d" GST_TIME_FORMAT, GST_TIME_ARGS (GST_CLOCK_DIFF (frame->use_dur, gst_clock_get_time (clock))), outputBuf->m.fd); ++ GST_DEBUG_OBJECT (frame->src, "video_frame dur is: %" GST_TIME_FORMAT, GST_TIME_ARGS (GST_CLOCK_DIFF (frame->use_dur, gst_clock_get_time (clock))), ", fd: %d", outputBuf->m.fd); + gst_object_unref (clock); + + gst_release_cpp_buffer(outputBuf, frame->index); +diff --git a/gst-libs/gst/wayland/gstwllinuxdmabuf.c b/gst-libs/gst/wayland/gstwllinuxdmabuf.c +index d6ee6ec..c591742 100644 +--- a/gst-libs/gst/wayland/gstwllinuxdmabuf.c ++++ b/gst-libs/gst/wayland/gstwllinuxdmabuf.c +@@ -152,7 +152,7 @@ gst_wl_linux_dmabuf_construct_wl_buffer (GstBuffer * buf, + /* Wait for the request answer */ + wl_display_flush (gst_wl_display_get_display (display)); + data.wbuf = (gpointer) 0x1; +- timeout = g_get_monotonic_time () + G_TIME_SPAN_SECOND; ++ timeout = g_get_monotonic_time () + 5 * G_TIME_SPAN_SECOND; + while (data.wbuf == (gpointer) 0x1) { + if (!g_cond_wait_until (&data.cond, &data.lock, timeout)) { + GST_ERROR_OBJECT (mem->allocator, "zwp_linux_buffer_params_v1 time out"); diff --git a/package/gstreamer1/gst1-plugins-bad/Config.in b/package/gstreamer1/gst1-plugins-bad/Config.in index d2ef450d..26034951 100644 --- a/package/gstreamer1/gst1-plugins-bad/Config.in +++ b/package/gstreamer1/gst1-plugins-bad/Config.in @@ -733,6 +733,20 @@ config BR2_PACKAGE_GST1_PLUGINS_BAD_ZBAR depends on BR2_TOOLCHAIN_HEADERS_AT_LEAST_3_0 # zbar -> libv4l select BR2_PACKAGE_ZBAR +config BR2_PACKAGE_GST1_PLUGINS_BAD_PLUGIN_SPACEMITCODEC + bool "spacemitcodec" + default y + depends on BR2_PACKAGE_MPP + help + spacemitcodec plugin for spacemit decodec and encodec + +config BR2_PACKAGE_GST1_PLUGINS_BAD_PLUGIN_SPACEMITSRC + bool "spacemitsrc" + default y + depends on BR2_PACKAGE_K1X_CAM_LIB + help + spacemitsrc plugin only for spacemit K1 cam + comment "zbar plugin needs a toolchain w/ threads, C++ and headers >= 3.0" depends on BR2_USE_MMU depends on !BR2_TOOLCHAIN_HAS_THREADS \ diff --git a/package/gstreamer1/gst1-plugins-bad/gst1-plugins-bad.mk b/package/gstreamer1/gst1-plugins-bad/gst1-plugins-bad.mk index ac1328b7..eed10e92 100644 --- a/package/gstreamer1/gst1-plugins-bad/gst1-plugins-bad.mk +++ b/package/gstreamer1/gst1-plugins-bad/gst1-plugins-bad.mk @@ -73,7 +73,8 @@ GST1_PLUGINS_BAD_CONF_OPTS += \ -Dmagicleap=disabled \ -Disac=disabled \ -Diqa=disabled \ - -Dopencv=disabled + -Dopencv=disabled \ + -Dtinyalsa=disabled GST1_PLUGINS_BAD_DEPENDENCIES = gst1-plugins-base gstreamer1 @@ -818,6 +819,28 @@ else GST1_PLUGINS_BAD_CONF_OPTS += -Dzxing=disabled endif +ifeq ($(BR2_PACKAGE_GST1_PLUGINS_BAD_PLUGIN_SPACEMITCODEC)_$(BR2_PACKAGE_GST1_PLUGINS_BAD_PLUGIN_SPACEMITSRC), y_y) +#$(info "open spacemit codec and src") +GST1_PLUGINS_BAD_CONF_OPTS += -Dspacemitcodec=enabled +GST1_PLUGINS_BAD_CONF_OPTS += -Dspacemitsrc=enabled +GST1_PLUGINS_BAD_DEPENDENCIES += mpp +GST1_PLUGINS_BAD_DEPENDENCIES += k1x-cam +else ifeq ($(BR2_PACKAGE_GST1_PLUGINS_BAD_PLUGIN_SPACEMITCODEC),y) +#$(info "only open spacemit codec") +GST1_PLUGINS_BAD_CONF_OPTS += -Dspacemitcodec=enabled +GST1_PLUGINS_BAD_CONF_OPTS += -Dspacemitsrc=disabled +GST1_PLUGINS_BAD_DEPENDENCIES += mpp +else ifeq ($(BR2_PACKAGE_GST1_PLUGINS_BAD_PLUGIN_SPACEMITSRC),y) +#$(info "only open spacemit src") +GST1_PLUGINS_BAD_CONF_OPTS += -Dspacemitcodec=disabled +GST1_PLUGINS_BAD_CONF_OPTS += -Dspacemitsrc=enabled +GST1_PLUGINS_BAD_DEPENDENCIES += k1x-cam +else +#$(info "none open spacemit codec or src") +GST1_PLUGINS_BAD_CONF_OPTS += -Dspacemitcodec=disabled +GST1_PLUGINS_BAD_CONF_OPTS += -Dspacemitsrc=disabled +endif + # Add GPL license if GPL licensed plugins enabled. ifeq ($(GST1_PLUGINS_BAD_HAS_GPL_LICENSE),y) GST1_PLUGINS_BAD_CONF_OPTS += -Dgpl=enabled diff --git a/package/haveged/S21haveged b/package/haveged/S09haveged similarity index 100% rename from package/haveged/S21haveged rename to package/haveged/S09haveged diff --git a/package/haveged/haveged.mk b/package/haveged/haveged.mk index fbf72a96..69292157 100644 --- a/package/haveged/haveged.mk +++ b/package/haveged/haveged.mk @@ -31,8 +31,8 @@ HAVEGED_CONF_OPTS += --disable-threads endif define HAVEGED_INSTALL_INIT_SYSV - $(INSTALL) -m 755 -D package/haveged/S21haveged \ - $(TARGET_DIR)/etc/init.d/S21haveged + $(INSTALL) -m 755 -D package/haveged/S09haveged \ + $(TARGET_DIR)/etc/init.d/S09haveged endef define HAVEGED_INSTALL_INIT_SYSTEMD diff --git a/package/jpeg-turbo/0001-RVV-optimized-jpeg-turbo-gcc-version.patch b/package/jpeg-turbo/0001-RVV-optimized-jpeg-turbo-gcc-version.patch new file mode 100644 index 00000000..ee19f992 --- /dev/null +++ b/package/jpeg-turbo/0001-RVV-optimized-jpeg-turbo-gcc-version.patch @@ -0,0 +1,16216 @@ +From 8a6effaa440873347c4d60bb0b31214bbf56b9de Mon Sep 17 00:00:00 2001 +From: "lff@Snode" +Date: Wed, 27 Mar 2024 16:19:34 +0800 +Subject: [PATCH] [RVV] optimized jpeg-turbo gcc version + +--- + BUILDING.md | 18 +- + CMakeLists.txt | 105 +- + ChangeLog.md | 83 +- + LICENSE.md | 2 +- + cmakescripts/BuildPackages.cmake | 2 +- + cmyk.h | 1 - + djpeg.c | 2 + + doc/html/group___turbo_j_p_e_g.html | 379 +++---- + doc/html/search/all_6.js | 14 +- + doc/html/search/functions_0.js | 12 +- + doc/html/structtjtransform.html | 6 +- + java/TJBench.java | 111 +- + java/TJExample.java | 13 +- + java/TJUnitTest.java | 76 +- + java/doc/index-all.html | 176 ++-- + java/doc/org/libjpegturbo/turbojpeg/TJ.html | 168 +-- + .../libjpegturbo/turbojpeg/TJCompressor.html | 169 +-- + .../turbojpeg/TJCustomFilter.html | 2 +- + .../turbojpeg/TJDecompressor.html | 348 ++++--- + .../libjpegturbo/turbojpeg/TJTransform.html | 39 +- + .../libjpegturbo/turbojpeg/TJTransformer.html | 81 +- + .../org/libjpegturbo/turbojpeg/YUVImage.html | 166 +-- + .../turbojpeg/package-summary.html | 2 +- + java/doc/serialized-form.html | 5 +- + java/org/libjpegturbo/turbojpeg/TJ.java | 143 +-- + .../libjpegturbo/turbojpeg/TJCompressor.java | 141 +-- + .../turbojpeg/TJCustomFilter.java | 4 +- + .../turbojpeg/TJDecompressor.java | 313 +++--- + .../libjpegturbo/turbojpeg/TJTransform.java | 28 +- + .../libjpegturbo/turbojpeg/TJTransformer.java | 66 +- + java/org/libjpegturbo/turbojpeg/YUVImage.java | 161 +-- + jccolext.c | 14 +- + jccolor.c | 21 +- + jchuff.c | 1 - + jchuff.h | 12 +- + jcmaster.c | 1 - + jcphuff.c | 37 +- + jdapimin.c | 1 - + jdcolext.c | 14 +- + jdcolor.c | 1 - + jdmainct.c | 1 - + jdmerge.c | 1 - + jdmrgext.c | 20 +- + jdsample.c | 2 +- + jpegtran.1 | 2 +- + jsimd.h | 8 +- + jsimd_none.c | 8 +- + jversion.h.in | 4 +- + rdppm.c | 20 +- + simd/CMakeLists.txt | 56 +- + simd/arm/aarch32/jsimd.c | 12 +- + simd/arm/aarch64/jsimd.c | 17 +- + simd/arm/jcphuff-neon.c | 187 ++-- + simd/arm/jdcolor-neon.c | 1 - + simd/arm/jdmerge-neon.c | 1 - + simd/arm/jidctint-neon.c | 1 - + simd/i386/jsimd.c | 84 +- + simd/jsimd.h | 177 +++- + simd/mips/jsimd.c | 12 +- + simd/mips64/jsimd.c | 12 +- + simd/powerpc/jsimd.c | 12 +- + simd/rvv/jccolext-rvv.c | 145 +++ + simd/rvv/jccolor-rvv.c | 118 +++ + simd/rvv/jcgray-rvv.c | 112 ++ + simd/rvv/jcgryext-rvv.c | 104 ++ + simd/rvv/jcsample-rvv.c | 149 +++ + simd/rvv/jcsample.h | 59 ++ + simd/rvv/jdcolext-rvv.c | 170 ++++ + simd/rvv/jdcolor-rvv.c | 122 +++ + simd/rvv/jdmerge-rvv.c | 136 +++ + simd/rvv/jdmrgext-rvv.c | 235 +++++ + simd/rvv/jdsample-rvv.c | 460 +++++++++ + simd/rvv/jfdctfst-rvv.c | 155 +++ + simd/rvv/jfdctint-rvv.c | 250 +++++ + simd/rvv/jidctfst-rvv.c | 315 ++++++ + simd/rvv/jidctint-rvv.c | 360 +++++++ + simd/rvv/jidctred-rvv.c | 353 +++++++ + simd/rvv/jquanti-rvv.c | 132 +++ + simd/rvv/jsimd.c | 940 +++++++++++++++++ + simd/rvv/jsimd_rvv.h | 39 + + simd/x86_64/jsimd.c | 60 +- + tjbench.c | 117 ++- + tjexample.c | 11 +- + tjunittest.c | 75 +- + transupp.c | 2 +- + turbojpeg-jni.c | 31 +- + turbojpeg-mapfile | 8 +- + turbojpeg-mapfile.jni | 8 +- + turbojpeg.c | 164 +-- + turbojpeg.h | 962 +++++++++--------- + win/jpeg.rc.in | 2 +- + win/turbojpeg.rc.in | 2 +- + wizard.txt | 10 +- + 93 files changed, 7188 insertions(+), 2194 deletions(-) + create mode 100644 simd/rvv/jccolext-rvv.c + create mode 100644 simd/rvv/jccolor-rvv.c + create mode 100644 simd/rvv/jcgray-rvv.c + create mode 100644 simd/rvv/jcgryext-rvv.c + create mode 100644 simd/rvv/jcsample-rvv.c + create mode 100644 simd/rvv/jcsample.h + create mode 100644 simd/rvv/jdcolext-rvv.c + create mode 100644 simd/rvv/jdcolor-rvv.c + create mode 100644 simd/rvv/jdmerge-rvv.c + create mode 100644 simd/rvv/jdmrgext-rvv.c + create mode 100644 simd/rvv/jdsample-rvv.c + create mode 100644 simd/rvv/jfdctfst-rvv.c + create mode 100644 simd/rvv/jfdctint-rvv.c + create mode 100644 simd/rvv/jidctfst-rvv.c + create mode 100644 simd/rvv/jidctint-rvv.c + create mode 100644 simd/rvv/jidctred-rvv.c + create mode 100644 simd/rvv/jquanti-rvv.c + create mode 100644 simd/rvv/jsimd.c + create mode 100644 simd/rvv/jsimd_rvv.h + +diff --git a/BUILDING.md b/BUILDING.md +index 2ce65d6..b965b5e 100644 +--- a/BUILDING.md ++++ b/BUILDING.md +@@ -25,9 +25,9 @@ Build Requirements + variable or the `ASM_NASM` environment variable. On Windows, use forward + slashes rather than backslashes in the path (for example, + **c:/nasm/nasm.exe**). +- * NASM and Yasm are located in the CRB (Code Ready Builder) repository on +- Red Hat Enterprise Linux 8 and in the PowerTools repository on RHEL +- derivatives, which is not enabled by default. ++ * NASM and Yasm are located in the CRB (Code Ready Builder) or PowerTools ++ repository on Red Hat Enterprise Linux 8+ and derivatives, which is not ++ enabled by default. + + ### Un*x Platforms (including Linux, Mac, FreeBSD, Solaris, and Cygwin) + +@@ -372,9 +372,13 @@ located (usually **/usr/bin**.) Next, execute the following commands: + + cd {build_directory} + cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \ ++ -DCMAKE_INSTALL_PREFIX={install_path} \ + [additional CMake flags] {source_directory} + make + ++*{install\_path}* is the path under which the libjpeg-turbo binaries should be ++installed. ++ + + ### 64-bit MinGW Build on Un*x (including Mac and Cygwin) + +@@ -391,9 +395,13 @@ located (usually **/usr/bin**.) Next, execute the following commands: + + cd {build_directory} + cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \ ++ -DCMAKE_INSTALL_PREFIX={install_path} \ + [additional CMake flags] {source_directory} + make + ++*{install\_path}* is the path under which the libjpeg-turbo binaries should be ++installed. ++ + + Building libjpeg-turbo for iOS + ------------------------------ +@@ -429,6 +437,10 @@ iPhone 5S/iPad Mini 2/iPad Air and newer. + [additional CMake flags] {source_directory} + make + ++Replace `iPhoneOS` with `iPhoneSimulator` and `-miphoneos-version-min` with ++`-miphonesimulator-version-min` to build libjpeg-turbo for the iOS simulator on ++Macs with Apple silicon CPUs. ++ + + Building libjpeg-turbo for Android + ---------------------------------- +diff --git a/CMakeLists.txt b/CMakeLists.txt +index cca2966..0eea262 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -10,8 +10,8 @@ if(CMAKE_EXECUTABLE_SUFFIX) + endif() + + project(libjpeg-turbo C) +-set(VERSION 2.1.4) +-set(COPYRIGHT_YEAR "1991-2022") ++set(VERSION 2.1.5) ++set(COPYRIGHT_YEAR "1991-2023") + string(REPLACE "." ";" VERSION_TRIPLET ${VERSION}) + list(GET VERSION_TRIPLET 0 VERSION_MAJOR) + list(GET VERSION_TRIPLET 1 VERSION_MINOR) +@@ -40,7 +40,16 @@ set(LIBJPEG_TURBO_VERSION_NUMBER ${VERSION_MAJOR}${VERSION_MINOR}${VERSION_REVIS + # application bundles would break our iOS packages.) + set(CMAKE_MACOSX_BUNDLE FALSE) + +-string(TIMESTAMP DEFAULT_BUILD "%Y%m%d") ++get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY ++ GENERATOR_IS_MULTI_CONFIG) ++# If the GENERATOR_IS_MULTI_CONFIG property doesn't exist (CMake < 3.9), then ++# set the GENERATOR_IS_MULTI_CONFIG variable manually if the generator is ++# Visual Studio or Xcode (the only multi-config generators in CMake < 3.9). ++if(NOT GENERATOR_IS_MULTI_CONFIG AND (MSVC_IDE OR XCODE)) ++ set(GENERATOR_IS_MULTI_CONFIG TRUE) ++endif() ++ ++string(TIMESTAMP DEFAULT_BUILD "%Y%m%d" UTC) + set(BUILD ${DEFAULT_BUILD} CACHE STRING "Build string (default: ${DEFAULT_BUILD})") + + # NOTE: On Windows, this does nothing except when using MinGW or Cygwin. +@@ -236,10 +245,6 @@ endif() + report_option(ENABLE_SHARED "Shared libraries") + report_option(ENABLE_STATIC "Static libraries") + +-if(ENABLE_SHARED) +- set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_FULL_LIBDIR}) +-endif() +- + if(WITH_JPEG8 OR WITH_JPEG7) + set(WITH_ARITH_ENC 1) + set(WITH_ARITH_DEC 1) +@@ -287,6 +292,16 @@ if(NOT WITH_JPEG8) + report_option(WITH_MEM_SRCDST "In-memory source/destination managers") + endif() + ++# 0: Original libjpeg v6b/v7/v8 API/ABI ++# ++# libjpeg v6b/v7 API/ABI emulation: ++# 1: + In-memory source/destination managers (libjpeg-turbo 1.3.x) ++# 2: + Partial image decompression functions (libjpeg-turbo 1.5.x) ++# 3: + ICC functions (libjpeg-turbo 2.0.x) ++# ++# libjpeg v8 API/ABI emulation: ++# 1: + Partial image decompression functions (libjpeg-turbo 1.5.x) ++# 2: + ICC functions (libjpeg-turbo 2.0.x) + set(SO_AGE 2) + if(WITH_MEM_SRCDST) + set(SO_AGE 3) +@@ -337,8 +352,19 @@ message(STATUS "libjpeg API shared library version = ${SO_MAJOR_VERSION}.${SO_AG + # names of functions whenever they are modified in a backward-incompatible + # manner, it is always backward-ABI-compatible with itself, so the major and + # minor SO versions don't change. However, we increase the middle number (the +-# SO "age") whenever functions are added to the API. ++# SO "age") whenever functions are added to the API, because adding functions ++# affects forward API/ABI compatibility. + set(TURBOJPEG_SO_MAJOR_VERSION 0) ++# 0: TurboJPEG 1.3.x API ++# 1: TurboJPEG 1.4.x API ++# The TurboJPEG 1.5.x API modified some of the function prototypes, adding ++# the const keyword in front of pointers to unmodified buffers, but that did ++# not affect forward API/ABI compatibility. ++# 2: TurboJPEG 2.0.x API ++# The TurboJPEG 2.1.x API modified the behavior of the tjDecompressHeader3() ++# function so that it accepts "abbreviated table specification" (AKA ++# "tables-only") datastreams as well as JPEG images, but that did not affect ++# forward API/ABI compatibility. + set(TURBOJPEG_SO_AGE 2) + set(TURBOJPEG_SO_VERSION 0.${TURBOJPEG_SO_AGE}.0) + +@@ -713,7 +739,7 @@ add_executable(strtest strtest.c) + + add_subdirectory(md5) + +-if(MSVC_IDE OR XCODE) ++if(GENERATOR_IS_MULTI_CONFIG) + set(OBJDIR "\${CTEST_CONFIGURATION_TYPE}/") + else() + set(OBJDIR "") +@@ -946,23 +972,30 @@ if(FLOATTEST) + endif() + endif() + ++message(STATUS "CMAKE_CROSSCOMPILING_EMULATOR=${CMAKE_CROSSCOMPILING_EMULATOR}") ++message(STATUS "CPU_PARAMETERS=${CPU_PARAMETERS}") ++if(CPU_PARAMETERS) ++ set(EMULATOR_CPU "-cpu") ++endif() ++message(STATUS "EMULATOR_CPU=${EMULATOR_CPU}") ++ + foreach(libtype ${TEST_LIBTYPES}) + if(libtype STREQUAL "static") + set(suffix -static) + endif() + if(WITH_TURBOJPEG) + add_test(tjunittest-${libtype} +- ${CMAKE_CROSSCOMPILING_EMULATOR} tjunittest${suffix}) ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} tjunittest${suffix}) + add_test(tjunittest-${libtype}-alloc +- ${CMAKE_CROSSCOMPILING_EMULATOR} tjunittest${suffix} -alloc) ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} tjunittest${suffix} -alloc) + add_test(tjunittest-${libtype}-yuv +- ${CMAKE_CROSSCOMPILING_EMULATOR} tjunittest${suffix} -yuv) ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} tjunittest${suffix} -yuv) + add_test(tjunittest-${libtype}-yuv-alloc +- ${CMAKE_CROSSCOMPILING_EMULATOR} tjunittest${suffix} -yuv -alloc) ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} tjunittest${suffix} -yuv -alloc) + add_test(tjunittest-${libtype}-yuv-nopad +- ${CMAKE_CROSSCOMPILING_EMULATOR} tjunittest${suffix} -yuv -noyuvpad) ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} tjunittest${suffix} -yuv -noyuvpad) + add_test(tjunittest-${libtype}-bmp +- ${CMAKE_CROSSCOMPILING_EMULATOR} tjunittest${suffix} -bmp) ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} tjunittest${suffix} -bmp) + + set(MD5_PPM_GRAY_TILE 89d3ca21213d9d864b50b4e4e7de4ca6) + set(MD5_PPM_420_8x8_TILE 847fceab15c5b7b911cb986cf0f71de3) +@@ -987,23 +1020,23 @@ foreach(libtype ${TEST_LIBTYPES}) + ${CMAKE_COMMAND} -E copy_if_different ${TESTIMAGES}/testorig.ppm + testout_tile.ppm) + add_test(tjbench-${libtype}-tile +- ${CMAKE_CROSSCOMPILING_EMULATOR} tjbench${suffix} testout_tile.ppm 95 ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} tjbench${suffix} testout_tile.ppm 95 + -rgb -quiet -tile -benchtime 0.01 -warmup 0) + set_tests_properties(tjbench-${libtype}-tile + PROPERTIES DEPENDS tjbench-${libtype}-tile-cp) + + foreach(tile 8 16 32 64 128) + add_test(tjbench-${libtype}-tile-gray-${tile}x${tile}-cmp +- ${CMAKE_CROSSCOMPILING_EMULATOR} ${MD5CMP} ${MD5_PPM_GRAY_TILE} ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} ${MD5CMP} ${MD5_PPM_GRAY_TILE} + testout_tile_GRAY_Q95_${tile}x${tile}.ppm) + foreach(subsamp 420 422) + add_test(tjbench-${libtype}-tile-${subsamp}-${tile}x${tile}-cmp +- ${CMAKE_CROSSCOMPILING_EMULATOR} ${MD5CMP} ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} ${MD5CMP} + ${MD5_PPM_${subsamp}_${tile}x${tile}_TILE} + testout_tile_${subsamp}_Q95_${tile}x${tile}.ppm) + endforeach() + add_test(tjbench-${libtype}-tile-444-${tile}x${tile}-cmp +- ${CMAKE_CROSSCOMPILING_EMULATOR} ${MD5CMP} ${MD5_PPM_444_TILE} ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} ${MD5CMP} ${MD5_PPM_444_TILE} + testout_tile_444_Q95_${tile}x${tile}.ppm) + foreach(subsamp gray 420 422 444) + set_tests_properties(tjbench-${libtype}-tile-${subsamp}-${tile}x${tile}-cmp +@@ -1015,21 +1048,21 @@ foreach(libtype ${TEST_LIBTYPES}) + ${CMAKE_COMMAND} -E copy_if_different ${TESTIMAGES}/testorig.ppm + testout_tilem.ppm) + add_test(tjbench-${libtype}-tilem +- ${CMAKE_CROSSCOMPILING_EMULATOR} tjbench${suffix} testout_tilem.ppm 95 ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} tjbench${suffix} testout_tilem.ppm 95 + -rgb -fastupsample -quiet -tile -benchtime 0.01 -warmup 0) + set_tests_properties(tjbench-${libtype}-tilem + PROPERTIES DEPENDS tjbench-${libtype}-tilem-cp) + + add_test(tjbench-${libtype}-tile-420m-8x8-cmp +- ${CMAKE_CROSSCOMPILING_EMULATOR} ${MD5CMP} ${MD5_PPM_420M_8x8_TILE} ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} ${MD5CMP} ${MD5_PPM_420M_8x8_TILE} + testout_tilem_420_Q95_8x8.ppm) + add_test(tjbench-${libtype}-tile-422m-8x8-cmp +- ${CMAKE_CROSSCOMPILING_EMULATOR} ${MD5CMP} ${MD5_PPM_422M_8x8_TILE} ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} ${MD5CMP} ${MD5_PPM_422M_8x8_TILE} + testout_tilem_422_Q95_8x8.ppm) + foreach(tile 16 32 64 128) + foreach(subsamp 420 422) + add_test(tjbench-${libtype}-tile-${subsamp}m-${tile}x${tile}-cmp +- ${CMAKE_CROSSCOMPILING_EMULATOR} ${MD5CMP} ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} ${MD5CMP} + ${MD5_PPM_${subsamp}M_TILE} + testout_tilem_${subsamp}_Q95_${tile}x${tile}.ppm) + endforeach() +@@ -1048,10 +1081,10 @@ foreach(libtype ${TEST_LIBTYPES}) + + macro(add_bittest PROG NAME ARGS OUTFILE INFILE MD5SUM) + add_test(${PROG}-${libtype}-${NAME} +- ${CMAKE_CROSSCOMPILING_EMULATOR} ${PROG}${suffix} ${ARGS} ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} ${PROG}${suffix} ${ARGS} + -outfile ${OUTFILE} ${INFILE}) + add_test(${PROG}-${libtype}-${NAME}-cmp +- ${CMAKE_CROSSCOMPILING_EMULATOR} ${MD5CMP} ${MD5SUM} ${OUTFILE}) ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} ${MD5CMP} ${MD5SUM} ${OUTFILE}) + set_tests_properties(${PROG}-${libtype}-${NAME}-cmp PROPERTIES + DEPENDS ${PROG}-${libtype}-${NAME}) + if(${ARGC} GREATER 6) +@@ -1072,7 +1105,7 @@ foreach(libtype ${TEST_LIBTYPES}) + ${MD5_PPM_RGB_ISLOW} cjpeg-${libtype}-rgb-islow) + + add_test(djpeg-${libtype}-rgb-islow-icc-cmp +- ${CMAKE_CROSSCOMPILING_EMULATOR} ${MD5CMP} ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} ${MD5CMP} + b06a39d730129122e85c1363ed1bbc9e testout_rgb_islow.icc) + set_tests_properties(djpeg-${libtype}-rgb-islow-icc-cmp PROPERTIES + DEPENDS djpeg-${libtype}-rgb-islow) +@@ -1302,7 +1335,7 @@ foreach(libtype ${TEST_LIBTYPES}) + + # Context rows: Yes Intra-iMCU row: No iMCU row prefetch: No ENT: prog huff + add_test(cjpeg-${libtype}-420-islow-prog +- ${CMAKE_CROSSCOMPILING_EMULATOR} cjpeg${suffix} -dct int -prog ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} cjpeg${suffix} -dct int -prog + -outfile testout_420_islow_prog.jpg ${TESTIMAGES}/testorig.ppm) + add_bittest(djpeg 420-islow-prog-crop62x62_71_71 + "-dct;int;-crop;62x62+71+71;-ppm" +@@ -1319,7 +1352,7 @@ foreach(libtype ${TEST_LIBTYPES}) + + # Context rows: No Intra-iMCU row: Yes ENT: huff + add_test(cjpeg-${libtype}-444-islow +- ${CMAKE_CROSSCOMPILING_EMULATOR} cjpeg${suffix} -dct int -sample 1x1 ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} cjpeg${suffix} -dct int -sample 1x1 + -outfile testout_444_islow.jpg ${TESTIMAGES}/testorig.ppm) + add_bittest(djpeg 444-islow-skip1_6 "-dct;int;-skip;1,6;-ppm" + testout_444_islow_skip1,6.ppm testout_444_islow.jpg +@@ -1327,7 +1360,7 @@ foreach(libtype ${TEST_LIBTYPES}) + + # Context rows: No Intra-iMCU row: No ENT: prog huff + add_test(cjpeg-${libtype}-444-islow-prog +- ${CMAKE_CROSSCOMPILING_EMULATOR} cjpeg${suffix} -dct int -prog -sample 1x1 ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} cjpeg${suffix} -dct int -prog -sample 1x1 + -outfile testout_444_islow_prog.jpg ${TESTIMAGES}/testorig.ppm) + add_bittest(djpeg 444-islow-prog-crop98x98_13_13 + "-dct;int;-crop;98x98+13+13;-ppm" +@@ -1337,7 +1370,7 @@ foreach(libtype ${TEST_LIBTYPES}) + # Context rows: No Intra-iMCU row: No ENT: arith + if(WITH_ARITH_ENC) + add_test(cjpeg-${libtype}-444-islow-ari +- ${CMAKE_CROSSCOMPILING_EMULATOR} cjpeg${suffix} -dct int -arithmetic ++ ${CMAKE_CROSSCOMPILING_EMULATOR} ${EMULATOR_CPU} ${CPU_PARAMETERS} cjpeg${suffix} -dct int -arithmetic + -sample 1x1 -outfile testout_444_islow_ari.jpg + ${TESTIMAGES}/testorig.ppm) + if(WITH_ARITH_DEC) +@@ -1392,14 +1425,15 @@ if(WITH_TURBOJPEG) + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest.java -yuv + COMMAND echo tjbenchtest.java -progressive + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest.java -progressive +- COMMAND echo tjexampletest.java -progressive -yuv ++ COMMAND echo tjbenchtest.java -progressive -yuv + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest.java + -progressive -yuv + COMMAND echo tjexampletest.java + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest.java + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest + ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest.java +- ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest) ++ ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest ++ ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest.java) + else() + add_custom_target(tjtest + COMMAND echo tjbenchtest +@@ -1416,7 +1450,8 @@ if(WITH_TURBOJPEG) + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -progressive -yuv + COMMAND echo tjexampletest + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest +- DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest) ++ DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest ++ ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest) + endif() + endif() + +@@ -1447,7 +1482,7 @@ if(WITH_TURBOJPEG) + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + if(NOT ENABLE_SHARED) +- if(MSVC_IDE OR XCODE) ++ if(GENERATOR_IS_MULTI_CONFIG) + set(DIR "${CMAKE_CURRENT_BINARY_DIR}/\${CMAKE_INSTALL_CONFIG_NAME}") + else() + set(DIR ${CMAKE_CURRENT_BINARY_DIR}) +@@ -1465,7 +1500,7 @@ if(ENABLE_STATIC) + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + if(NOT ENABLE_SHARED) +- if(MSVC_IDE OR XCODE) ++ if(GENERATOR_IS_MULTI_CONFIG) + set(DIR "${CMAKE_CURRENT_BINARY_DIR}/\${CMAKE_INSTALL_CONFIG_NAME}") + else() + set(DIR ${CMAKE_CURRENT_BINARY_DIR}) +diff --git a/ChangeLog.md b/ChangeLog.md +index b0d166e..a547522 100644 +--- a/ChangeLog.md ++++ b/ChangeLog.md +@@ -1,7 +1,62 @@ ++2.1.5 ++===== ++ ++### Significant changes relative to 2.1.4: ++ ++1. Fixed issues in the build system whereby, when using the Ninja Multi-Config ++CMake generator, a static build of libjpeg-turbo (a build in which ++`ENABLE_SHARED` is `0`) could not be installed, a Windows installer could not ++be built, and the Java regression tests failed. ++ ++2. Fixed a regression introduced by 2.0 beta1[15] that caused a buffer overrun ++in the progressive Huffman encoder when attempting to transform a ++specially-crafted malformed 12-bit-per-component JPEG image into a progressive ++12-bit-per-component JPEG image using a 12-bit-per-component build of ++libjpeg-turbo (`-DWITH_12BIT=1`.) Given that the buffer overrun was fully ++contained within the progressive Huffman encoder structure and did not cause a ++segfault or other user-visible errant behavior, given that the lossless ++transformer (unlike the decompressor) is not generally exposed to arbitrary ++data exploits, and given that 12-bit-per-component builds of libjpeg-turbo are ++uncommon, this issue did not likely pose a security risk. ++ ++3. Fixed an issue whereby, when using a 12-bit-per-component build of ++libjpeg-turbo (`-DWITH_12BIT=1`), passing samples with values greater than 4095 ++or less than 0 to `jpeg_write_scanlines()` caused a buffer overrun or underrun ++in the RGB-to-YCbCr color converter. ++ ++4. Fixed a floating point exception that occurred when attempting to use the ++jpegtran `-drop` and `-trim` options to losslessly transform a ++specially-crafted malformed JPEG image. ++ ++5. Fixed an issue in `tjBufSizeYUV2()` whereby it returned a bogus result, ++rather than throwing an error, if the `align` parameter was not a power of 2. ++Fixed a similar issue in `tjCompressFromYUV()` whereby it generated a corrupt ++JPEG image in certain cases, rather than throwing an error, if the `align` ++parameter was not a power of 2. ++ ++6. Fixed an issue whereby `tjDecompressToYUV2()`, which is a wrapper for ++`tjDecompressToYUVPlanes()`, used the desired YUV image dimensions rather than ++the actual scaled image dimensions when computing the plane pointers and ++strides to pass to `tjDecompressToYUVPlanes()`. This caused a buffer overrun ++and subsequent segfault if the desired image dimensions exceeded the scaled ++image dimensions. ++ ++7. Fixed an issue whereby, when decompressing a 12-bit-per-component JPEG image ++(`-DWITH_12BIT=1`) using an alpha-enabled output color space such as ++`JCS_EXT_RGBA`, the alpha channel was set to 255 rather than 4095. ++ ++8. Fixed an issue whereby the Java version of TJBench did not accept a range of ++quality values. ++ ++9. Fixed an issue whereby, when `-progressive` was passed to TJBench, the JPEG ++input image was not transformed into a progressive JPEG image prior to ++decompression. ++ ++ + 2.1.4 + ===== + +-### Significant changes relative to 2.1.3 ++### Significant changes relative to 2.1.3: + + 1. Fixed a regression introduced in 2.1.3 that caused build failures with + Visual Studio 2010. +@@ -36,7 +91,7 @@ virtual array access") under certain circumstances. + 2.1.3 + ===== + +-### Significant changes relative to 2.1.2 ++### Significant changes relative to 2.1.2: + + 1. Fixed a regression introduced by 2.0 beta1[7] whereby cjpeg compressed PGM + input files into full-color JPEG images unless the `-grayscale` option was +@@ -60,7 +115,7 @@ be reproduced using the libjpeg API, not using djpeg. + 2.1.2 + ===== + +-### Significant changes relative to 2.1.1 ++### Significant changes relative to 2.1.1: + + 1. Fixed a regression introduced by 2.1 beta1[13] that caused the remaining + GAS implementations of AArch64 (Arm 64-bit) Neon SIMD functions (which are used +@@ -92,7 +147,7 @@ image contains incomplete or corrupt image data. + 2.1.1 + ===== + +-### Significant changes relative to 2.1.0 ++### Significant changes relative to 2.1.0: + + 1. Fixed a regression introduced in 2.1.0 that caused build failures with + non-GCC-compatible compilers for Un*x/Arm platforms. +@@ -121,7 +176,7 @@ transform a specially-crafted malformed JPEG image. + 2.1.0 + ===== + +-### Significant changes relative to 2.1 beta1 ++### Significant changes relative to 2.1 beta1: + + 1. Fixed a regression introduced by 2.1 beta1[6(b)] whereby attempting to + decompress certain progressive JPEG images with one or more component planes of +@@ -156,10 +211,10 @@ progressive JPEG format described in the report + ["Two Issues with the JPEG Standard"](https://libjpeg-turbo.org/pmwiki/uploads/About/TwoIssueswiththeJPEGStandard.pdf). + + 7. The PPM reader now throws an error, rather than segfaulting (due to a buffer +-overrun) or generating incorrect pixels, if an application attempts to use the +-`tjLoadImage()` function to load a 16-bit binary PPM file (a binary PPM file +-with a maximum value greater than 255) into a grayscale image buffer or to load +-a 16-bit binary PGM file into an RGB image buffer. ++overrun, CVE-2021-46822) or generating incorrect pixels, if an application ++attempts to use the `tjLoadImage()` function to load a 16-bit binary PPM file ++(a binary PPM file with a maximum value greater than 255) into a grayscale ++image buffer or to load a 16-bit binary PGM file into an RGB image buffer. + + 8. Fixed an issue in the PPM reader that caused incorrect pixels to be + generated when using the `tjLoadImage()` function to load a 16-bit binary PPM +@@ -325,11 +380,11 @@ methods in the TurboJPEG Java API. + + 2. Fixed or worked around multiple issues with `jpeg_skip_scanlines()`: + +- - Fixed segfaults or "Corrupt JPEG data: premature end of data segment" +-errors in `jpeg_skip_scanlines()` that occurred when decompressing 4:2:2 or +-4:2:0 JPEG images using merged (non-fancy) upsampling/color conversion (that +-is, when setting `cinfo.do_fancy_upsampling` to `FALSE`.) 2.0.0[6] was a +-similar fix, but it did not cover all cases. ++ - Fixed segfaults (CVE-2020-35538) or "Corrupt JPEG data: premature end of ++data segment" errors in `jpeg_skip_scanlines()` that occurred when ++decompressing 4:2:2 or 4:2:0 JPEG images using merged (non-fancy) ++upsampling/color conversion (that is, when setting `cinfo.do_fancy_upsampling` ++to `FALSE`.) 2.0.0[6] was a similar fix, but it did not cover all cases. + - `jpeg_skip_scanlines()` now throws an error if two-pass color + quantization is enabled. Two-pass color quantization never worked properly + with `jpeg_skip_scanlines()`, and the issues could not readily be fixed. +diff --git a/LICENSE.md b/LICENSE.md +index d753e1d..bf8a7fd 100644 +--- a/LICENSE.md ++++ b/LICENSE.md +@@ -91,7 +91,7 @@ best of our understanding. + The Modified (3-clause) BSD License + =================================== + +-Copyright (C)2009-2022 D. R. Commander. All Rights Reserved.
++Copyright (C)2009-2023 D. R. Commander. All Rights Reserved.
+ Copyright (C)2015 Viktor Szathmáry. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without +diff --git a/cmakescripts/BuildPackages.cmake b/cmakescripts/BuildPackages.cmake +index 7d6fa2c..2e0170f 100644 +--- a/cmakescripts/BuildPackages.cmake ++++ b/cmakescripts/BuildPackages.cmake +@@ -90,7 +90,7 @@ if(WITH_JAVA) + set(INST_DEFS ${INST_DEFS} -DJAVA) + endif() + +-if(MSVC_IDE) ++if(GENERATOR_IS_MULTI_CONFIG) + set(INST_DEFS ${INST_DEFS} "-DBUILDDIR=${CMAKE_CFG_INTDIR}\\") + else() + set(INST_DEFS ${INST_DEFS} "-DBUILDDIR=") +diff --git a/cmyk.h b/cmyk.h +index 48187a8..b6ca20f 100644 +--- a/cmyk.h ++++ b/cmyk.h +@@ -17,7 +17,6 @@ + #include + #define JPEG_INTERNALS + #include +-#include "jconfigint.h" + + + /* Fully reversible */ +diff --git a/djpeg.c b/djpeg.c +index 7666e3f..c22c4ca 100644 +--- a/djpeg.c ++++ b/djpeg.c +@@ -316,7 +316,9 @@ parse_switches(j_decompress_ptr cinfo, int argc, char **argv, + if (++argn >= argc) /* advance to next argument */ + usage(); + icc_filename = argv[argn]; ++#ifdef SAVE_MARKERS_SUPPORTED + jpeg_save_markers(cinfo, JPEG_APP0 + 2, 0xFFFF); ++#endif + + } else if (keymatch(arg, "map", 3)) { + /* Quantize to a color map taken from an input file. */ +diff --git a/doc/html/group___turbo_j_p_e_g.html b/doc/html/group___turbo_j_p_e_g.html +index 63d4791..39fe4d7 100644 +--- a/doc/html/group___turbo_j_p_e_g.html ++++ b/doc/html/group___turbo_j_p_e_g.html +@@ -102,22 +102,22 @@ Macros +  The number of JPEG colorspaces. More...
+   + #define TJFLAG_BOTTOMUP +- The uncompressed source/destination image is stored in bottom-up (Windows, OpenGL) order, not top-down (X11) order. More...
++ Rows in the packed-pixel source/destination image are stored in bottom-up (Windows, OpenGL) order rather than in top-down (X11) order. More...
+   + #define TJFLAG_FASTUPSAMPLE +- When decompressing an image that was compressed using chrominance subsampling, use the fastest chrominance upsampling algorithm available in the underlying codec. More...
++ When decompressing an image that was compressed using chrominance subsampling, use the fastest chrominance upsampling algorithm available. More...
+   + #define TJFLAG_NOREALLOC +- Disable buffer (re)allocation. More...
++ Disable JPEG buffer (re)allocation. More...
+   + #define TJFLAG_FASTDCT +- Use the fastest DCT/IDCT algorithm available in the underlying codec. More...
++ Use the fastest DCT/IDCT algorithm available. More...
+   + #define TJFLAG_ACCURATEDCT +- Use the most accurate DCT/IDCT algorithm available in the underlying codec. More...
++ Use the most accurate DCT/IDCT algorithm available. More...
+   + #define TJFLAG_STOPONWARNING +- Immediately discontinue the current compression/decompression/transform operation if the underlying codec throws a warning (non-fatal error). More...
++ Immediately discontinue the current compression/decompression/transform operation if a warning (non-fatal error) occurs. More...
+   + #define TJFLAG_PROGRESSIVE +  Use progressive entropy coding in JPEG images generated by the compression and transform functions. More...
+@@ -141,19 +141,19 @@ Macros +  This option will enable lossless cropping. More...
+   + #define TJXOPT_GRAY +- This option will discard the color data in the input image and produce a grayscale output image. More...
++ This option will discard the color data in the source image and produce a grayscale destination image. More...
+   + #define TJXOPT_NOOUTPUT +- This option will prevent tjTransform() from outputting a JPEG image for this particular transform (this can be used in conjunction with a custom filter to capture the transformed DCT coefficients without transcoding them.) More...
++ This option will prevent tjTransform() from outputting a JPEG image for this particular transform. More...
+   + #define TJXOPT_PROGRESSIVE +- This option will enable progressive entropy coding in the output image generated by this particular transform. More...
++ This option will enable progressive entropy coding in the JPEG image generated by this particular transform. More...
+   + #define TJXOPT_COPYNONE +- This option will prevent tjTransform() from copying any extra markers (including EXIF and ICC profile data) from the source image to the output image. More...
++ This option will prevent tjTransform() from copying any extra markers (including EXIF and ICC profile data) from the source image to the destination image. More...
+   + #define TJPAD(width) +- Pad the given width to the nearest 32-bit boundary. More...
++ Pad the given width to the nearest multiple of 4. More...
+   + #define TJSCALED(dimension, scalingFactor) +  Compute the scaled value of dimension using the given scaling factor. More...
+@@ -240,20 +240,20 @@ Functions +  Create a TurboJPEG compressor instance. More...
+   + DLLEXPORT int tjCompress2 (tjhandle handle, const unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags) +- Compress an RGB, grayscale, or CMYK image into a JPEG image. More...
++ Compress a packed-pixel RGB, grayscale, or CMYK image into a JPEG image. More...
+   +-DLLEXPORT int tjCompressFromYUV (tjhandle handle, const unsigned char *srcBuf, int width, int pad, int height, int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags) +- Compress a YUV planar image into a JPEG image. More...
+-  ++DLLEXPORT int tjCompressFromYUV (tjhandle handle, const unsigned char *srcBuf, int width, int align, int height, int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags) ++ Compress a unified planar YUV image into a JPEG image. More...
++  + DLLEXPORT int tjCompressFromYUVPlanes (tjhandle handle, const unsigned char **srcPlanes, int width, const int *strides, int height, int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags) +  Compress a set of Y, U (Cb), and V (Cr) image planes into a JPEG image. More...
+   + DLLEXPORT unsigned long tjBufSize (int width, int height, int jpegSubsamp) +  The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters. More...
+   +-DLLEXPORT unsigned long tjBufSizeYUV2 (int width, int pad, int height, int subsamp) +- The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters. More...
+-  ++DLLEXPORT unsigned long tjBufSizeYUV2 (int width, int align, int height, int subsamp) ++ The size of the buffer (in bytes) required to hold a unified planar YUV image with the given parameters. More...
++  + DLLEXPORT unsigned long tjPlaneSizeYUV (int componentID, int width, int stride, int height, int subsamp) +  The size of the buffer (in bytes) required to hold a YUV image plane with the given parameters. More...
+   +@@ -263,11 +263,11 @@ Functions + DLLEXPORT int tjPlaneHeight (int componentID, int height, int subsamp) +  The plane height of a YUV image plane with the given parameters. More...
+   +-DLLEXPORT int tjEncodeYUV3 (tjhandle handle, const unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf, int pad, int subsamp, int flags) +- Encode an RGB or grayscale image into a YUV planar image. More...
+-  ++DLLEXPORT int tjEncodeYUV3 (tjhandle handle, const unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf, int align, int subsamp, int flags) ++ Encode a packed-pixel RGB or grayscale image into a unified planar YUV image. More...
++  + DLLEXPORT int tjEncodeYUVPlanes (tjhandle handle, const unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char **dstPlanes, int *strides, int subsamp, int flags) +- Encode an RGB or grayscale image into separate Y, U (Cb), and V (Cr) image planes. More...
++ Encode a packed-pixel RGB or grayscale image into separate Y, U (Cb), and V (Cr) image planes. More...
+   + DLLEXPORT tjhandle tjInitDecompress (void) +  Create a TurboJPEG decompressor instance. More...
+@@ -275,23 +275,23 @@ Functions + DLLEXPORT int tjDecompressHeader3 (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, int *jpegSubsamp, int *jpegColorspace) +  Retrieve information about a JPEG image without decompressing it, or prime the decompressor with quantization and Huffman tables. More...
+   +-DLLEXPORT tjscalingfactortjGetScalingFactors (int *numscalingfactors) +- Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of TurboJPEG supports. More...
+-  ++DLLEXPORT tjscalingfactortjGetScalingFactors (int *numScalingFactors) ++ Returns a list of fractional scaling factors that the JPEG decompressor supports. More...
++  + DLLEXPORT int tjDecompress2 (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags) +- Decompress a JPEG image to an RGB, grayscale, or CMYK image. More...
++ Decompress a JPEG image into a packed-pixel RGB, grayscale, or CMYK image. More...
+   +-DLLEXPORT int tjDecompressToYUV2 (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int width, int pad, int height, int flags) +- Decompress a JPEG image to a YUV planar image. More...
+-  ++DLLEXPORT int tjDecompressToYUV2 (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int width, int align, int height, int flags) ++ Decompress a JPEG image into a unified planar YUV image. More...
++  + DLLEXPORT int tjDecompressToYUVPlanes (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char **dstPlanes, int width, int *strides, int height, int flags) +  Decompress a JPEG image into separate Y, U (Cb), and V (Cr) image planes. More...
+   +-DLLEXPORT int tjDecodeYUV (tjhandle handle, const unsigned char *srcBuf, int pad, int subsamp, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags) +- Decode a YUV planar image into an RGB or grayscale image. More...
+-  ++DLLEXPORT int tjDecodeYUV (tjhandle handle, const unsigned char *srcBuf, int align, int subsamp, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags) ++ Decode a unified planar YUV image into a packed-pixel RGB or grayscale image. More...
++  + DLLEXPORT int tjDecodeYUVPlanes (tjhandle handle, const unsigned char **srcPlanes, const int *strides, int subsamp, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags) +- Decode a set of Y, U (Cb), and V (Cr) image planes into an RGB or grayscale image. More...
++ Decode a set of Y, U (Cb), and V (Cr) image planes into a packed-pixel RGB or grayscale image. More...
+   + DLLEXPORT tjhandle tjInitTransform (void) +  Create a new TurboJPEG transformer instance. More...
+@@ -303,16 +303,16 @@ Functions +  Destroy a TurboJPEG compressor, decompressor, or transformer instance. More...
+   + DLLEXPORT unsigned char * tjAlloc (int bytes) +- Allocate an image buffer for use with TurboJPEG. More...
++ Allocate a byte buffer for use with TurboJPEG. More...
+   + DLLEXPORT unsigned char * tjLoadImage (const char *filename, int *width, int align, int *height, int *pixelFormat, int flags) +- Load an uncompressed image from disk into memory. More...
++ Load a packed-pixel image from disk into memory. More...
+   + DLLEXPORT int tjSaveImage (const char *filename, unsigned char *buffer, int width, int pitch, int height, int pixelFormat, int flags) +- Save an uncompressed image from memory to disk. More...
++ Save a packed-pixel image from memory to disk. More...
+   + DLLEXPORT void tjFree (unsigned char *buffer) +- Free an image buffer previously allocated by TurboJPEG. More...
++ Free a byte buffer previously allocated by TurboJPEG. More...
+   + DLLEXPORT char * tjGetErrorStr2 (tjhandle handle) +  Returns a descriptive error message explaining why the last command failed. More...
+@@ -352,8 +352,8 @@ Variables +

+ YUV Image Format Notes

+

Technically, the JPEG format uses the YCbCr colorspace (which is technically not a colorspace but a color transform), but per the convention of the digital video community, the TurboJPEG API uses "YUV" to refer to an image format consisting of Y, Cb, and Cr image planes.

+-

Each plane is simply a 2D array of bytes, each byte representing the value of one of the components (Y, Cb, or Cr) at a particular location in the image. The width and height of each plane are determined by the image width, height, and level of chrominance subsampling. The luminance plane width is the image width padded to the nearest multiple of the horizontal subsampling factor (2 in the case of 4:2:0 and 4:2:2, 4 in the case of 4:1:1, 1 in the case of 4:4:4 or grayscale.) Similarly, the luminance plane height is the image height padded to the nearest multiple of the vertical subsampling factor (2 in the case of 4:2:0 or 4:4:0, 1 in the case of 4:4:4 or grayscale.) This is irrespective of any additional padding that may be specified as an argument to the various YUV functions. The chrominance plane width is equal to the luminance plane width divided by the horizontal subsampling factor, and the chrominance plane height is equal to the luminance plane height divided by the vertical subsampling factor.

+-

For example, if the source image is 35 x 35 pixels and 4:2:2 subsampling is used, then the luminance plane would be 36 x 35 bytes, and each of the chrominance planes would be 18 x 35 bytes. If you specify a line padding of 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes, and each of the chrominance planes would be 20 x 35 bytes.

++

Each plane is simply a 2D array of bytes, each byte representing the value of one of the components (Y, Cb, or Cr) at a particular location in the image. The width and height of each plane are determined by the image width, height, and level of chrominance subsampling. The luminance plane width is the image width padded to the nearest multiple of the horizontal subsampling factor (1 in the case of 4:4:4, grayscale, or 4:4:0; 2 in the case of 4:2:2 or 4:2:0; 4 in the case of 4:1:1.) Similarly, the luminance plane height is the image height padded to the nearest multiple of the vertical subsampling factor (1 in the case of 4:4:4, 4:2:2, grayscale, or 4:1:1; 2 in the case of 4:2:0 or 4:4:0.) This is irrespective of any additional padding that may be specified as an argument to the various YUV functions. The chrominance plane width is equal to the luminance plane width divided by the horizontal subsampling factor, and the chrominance plane height is equal to the luminance plane height divided by the vertical subsampling factor.

++

For example, if the source image is 35 x 35 pixels and 4:2:2 subsampling is used, then the luminance plane would be 36 x 35 bytes, and each of the chrominance planes would be 18 x 35 bytes. If you specify a row alignment of 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes, and each of the chrominance planes would be 20 x 35 bytes.

+

Macro Definition Documentation

+ +

◆ TJ_NUMCS

+@@ -447,8 +447,8 @@ YUV Image Format Notes + +
+ +-

Use the most accurate DCT/IDCT algorithm available in the underlying codec.

+-

The default if this flag is not specified is implementation-specific. For example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast algorithm by default when compressing, because this has been shown to have only a very slight effect on accuracy, but it uses the accurate algorithm when decompressing, because this has been shown to have a larger effect.

++

Use the most accurate DCT/IDCT algorithm available.

++

The default if this flag is not specified is implementation-specific. For example, the implementation of the TurboJPEG API in libjpeg-turbo uses the fast algorithm by default when compressing, because this has been shown to have only a very slight effect on accuracy, but it uses the accurate algorithm when decompressing, because this has been shown to have a larger effect.

+ +
+ +@@ -464,7 +464,7 @@ YUV Image Format Notes + +
+ +-

The uncompressed source/destination image is stored in bottom-up (Windows, OpenGL) order, not top-down (X11) order.

++

Rows in the packed-pixel source/destination image are stored in bottom-up (Windows, OpenGL) order rather than in top-down (X11) order.

+ +
+ +@@ -480,8 +480,8 @@ YUV Image Format Notes + +
+ +-

Use the fastest DCT/IDCT algorithm available in the underlying codec.

+-

The default if this flag is not specified is implementation-specific. For example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast algorithm by default when compressing, because this has been shown to have only a very slight effect on accuracy, but it uses the accurate algorithm when decompressing, because this has been shown to have a larger effect.

++

Use the fastest DCT/IDCT algorithm available.

++

The default if this flag is not specified is implementation-specific. For example, the implementation of the TurboJPEG API in libjpeg-turbo uses the fast algorithm by default when compressing, because this has been shown to have only a very slight effect on accuracy, but it uses the accurate algorithm when decompressing, because this has been shown to have a larger effect.

+ +
+ +@@ -497,7 +497,7 @@ YUV Image Format Notes + +
+ +-

When decompressing an image that was compressed using chrominance subsampling, use the fastest chrominance upsampling algorithm available in the underlying codec.

++

When decompressing an image that was compressed using chrominance subsampling, use the fastest chrominance upsampling algorithm available.

+

The default is to use smooth upsampling, which creates a smooth transition between neighboring chrominance components in order to reduce upsampling artifacts in the decompressed image.

+ +
+@@ -531,8 +531,8 @@ YUV Image Format Notes + +
+ +-

Disable buffer (re)allocation.

+-

If passed to one of the JPEG compression or transform functions, this flag will cause those functions to generate an error if the JPEG image buffer is invalid or too small rather than attempting to allocate or reallocate that buffer. This reproduces the behavior of earlier versions of TurboJPEG.

++

Disable JPEG buffer (re)allocation.

++

If passed to one of the JPEG compression or transform functions, this flag will cause those functions to generate an error if the JPEG destination buffer is invalid or too small, rather than attempt to allocate or reallocate that buffer.

+ +
+ +@@ -565,7 +565,7 @@ YUV Image Format Notes + +
+ +-

Immediately discontinue the current compression/decompression/transform operation if the underlying codec throws a warning (non-fatal error).

++

Immediately discontinue the current compression/decompression/transform operation if a warning (non-fatal error) occurs.

+

The default behavior is to allow the operation to complete unless a fatal error is encountered.

+ +
+@@ -586,7 +586,7 @@ YUV Image Format Notes + +
+ +-

Pad the given width to the nearest 32-bit boundary.

++

Pad the given width to the nearest multiple of 4.

+ +
+ +@@ -633,7 +633,7 @@ YUV Image Format Notes + +
+ +-

This option will prevent tjTransform() from copying any extra markers (including EXIF and ICC profile data) from the source image to the output image.

++

This option will prevent tjTransform() from copying any extra markers (including EXIF and ICC profile data) from the source image to the destination image.

+ +
+ +@@ -666,7 +666,7 @@ YUV Image Format Notes + +
+ +-

This option will discard the color data in the input image and produce a grayscale output image.

++

This option will discard the color data in the source image and produce a grayscale destination image.

+ +
+ +@@ -682,7 +682,8 @@ YUV Image Format Notes + +
+ +-

This option will prevent tjTransform() from outputting a JPEG image for this particular transform (this can be used in conjunction with a custom filter to capture the transformed DCT coefficients without transcoding them.)

++

This option will prevent tjTransform() from outputting a JPEG image for this particular transform.

++

(This can be used in conjunction with a custom filter to capture the transformed DCT coefficients without transcoding them.)

+ +
+ +@@ -715,8 +716,8 @@ YUV Image Format Notes + +
+ +-

This option will enable progressive entropy coding in the output image generated by this particular transform.

+-

Progressive entropy coding will generally improve compression relative to baseline entropy coding (the default), but it will reduce compression and decompression performance considerably.

++

This option will enable progressive entropy coding in the JPEG image generated by this particular transform.

++

Progressive entropy coding will generally improve compression relative to baseline entropy coding (the default), but it will reduce decompression performance considerably.

+ +
+ +@@ -785,19 +786,19 @@ YUV Image Format Notes +

JPEG colorspaces.

+ + + + + + +
Enumerator
TJCS_RGB 

RGB colorspace.

+-

When compressing the JPEG image, the R, G, and B components in the source image are reordered into image planes, but no colorspace conversion or subsampling is performed. RGB JPEG images can be decompressed to any of the extended RGB pixel formats or grayscale, but they cannot be decompressed to YUV images.

++

When compressing the JPEG image, the R, G, and B components in the source image are reordered into image planes, but no colorspace conversion or subsampling is performed. RGB JPEG images can be decompressed to packed-pixel images with any of the extended RGB or grayscale pixel formats, but they cannot be decompressed to planar YUV images.

+
TJCS_YCbCr 

YCbCr colorspace.

+-

YCbCr is not an absolute colorspace but rather a mathematical transformation of RGB designed solely for storage and transmission. YCbCr images must be converted to RGB before they can actually be displayed. In the YCbCr colorspace, the Y (luminance) component represents the black & white portion of the original image, and the Cb and Cr (chrominance) components represent the color portion of the original image. Originally, the analog equivalent of this transformation allowed the same signal to drive both black & white and color televisions, but JPEG images use YCbCr primarily because it allows the color data to be optionally subsampled for the purposes of reducing bandwidth or disk space. YCbCr is the most common JPEG colorspace, and YCbCr JPEG images can be compressed from and decompressed to any of the extended RGB pixel formats or grayscale, or they can be decompressed to YUV planar images.

++

YCbCr is not an absolute colorspace but rather a mathematical transformation of RGB designed solely for storage and transmission. YCbCr images must be converted to RGB before they can actually be displayed. In the YCbCr colorspace, the Y (luminance) component represents the black & white portion of the original image, and the Cb and Cr (chrominance) components represent the color portion of the original image. Originally, the analog equivalent of this transformation allowed the same signal to drive both black & white and color televisions, but JPEG images use YCbCr primarily because it allows the color data to be optionally subsampled for the purposes of reducing network or disk usage. YCbCr is the most common JPEG colorspace, and YCbCr JPEG images can be compressed from and decompressed to packed-pixel images with any of the extended RGB or grayscale pixel formats. YCbCr JPEG images can also be compressed from and decompressed to planar YUV images.

+
TJCS_GRAY 

Grayscale colorspace.

+-

The JPEG image retains only the luminance data (Y component), and any color data from the source image is discarded. Grayscale JPEG images can be compressed from and decompressed to any of the extended RGB pixel formats or grayscale, or they can be decompressed to YUV planar images.

++

The JPEG image retains only the luminance data (Y component), and any color data from the source image is discarded. Grayscale JPEG images can be compressed from and decompressed to packed-pixel images with any of the extended RGB or grayscale pixel formats, or they can be compressed from and decompressed to planar YUV images.

+
TJCS_CMYK 

CMYK colorspace.

+-

When compressing the JPEG image, the C, M, Y, and K components in the source image are reordered into image planes, but no colorspace conversion or subsampling is performed. CMYK JPEG images can only be decompressed to CMYK pixels.

++

When compressing the JPEG image, the C, M, Y, and K components in the source image are reordered into image planes, but no colorspace conversion or subsampling is performed. CMYK JPEG images can only be decompressed to packed-pixel images with the CMYK pixel format.

+
TJCS_YCCK 

YCCK colorspace.

+-

YCCK (AKA "YCbCrK") is not an absolute colorspace but rather a mathematical transformation of CMYK designed solely for storage and transmission. It is to CMYK as YCbCr is to RGB. CMYK pixels can be reversibly transformed into YCCK, and as with YCbCr, the chrominance components in the YCCK pixels can be subsampled without incurring major perceptual loss. YCCK JPEG images can only be compressed from and decompressed to CMYK pixels.

++

YCCK (AKA "YCbCrK") is not an absolute colorspace but rather a mathematical transformation of CMYK designed solely for storage and transmission. It is to CMYK as YCbCr is to RGB. CMYK pixels can be reversibly transformed into YCCK, and as with YCbCr, the chrominance components in the YCCK pixels can be subsampled without incurring major perceptual loss. YCCK JPEG images can only be compressed from and decompressed to packed-pixel images with the CMYK pixel format.

+
+ +@@ -817,7 +818,7 @@ YUV Image Format Notes + +

Error codes.

+ +- + +@@ -873,10 +874,10 @@ YUV Image Format Notes +

This is the same as TJPF_XRGB, except that when decompressing, the X component is guaranteed to be 0xFF, which can be interpreted as an opaque alpha channel.

+ + + +
Enumerator
TJERR_WARNING 

The error was non-fatal and recoverable, but the image may still be corrupt.

++
Enumerator
TJERR_WARNING 

The error was non-fatal and recoverable, but the destination image may still be corrupt.

+
TJERR_FATAL 

The error was fatal and non-recoverable.

+
TJPF_CMYK 

CMYK pixel format.

+-

Unlike RGB, which is an additive color model used primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive color model used primarily for printing. In the CMYK color model, the value of each color component typically corresponds to an amount of cyan, magenta, yellow, or black ink that is applied to a white background. In order to convert between CMYK and RGB, it is necessary to use a color management system (CMS.) A CMS will attempt to map colors within the printer's gamut to perceptually similar colors in the display's gamut and vice versa, but the mapping is typically not 1:1 or reversible, nor can it be defined with a simple formula. Thus, such a conversion is out of scope for a codec library. However, the TurboJPEG API allows for compressing CMYK pixels into a YCCK JPEG image (see TJCS_YCCK) and decompressing YCCK JPEG images into CMYK pixels.

++

Unlike RGB, which is an additive color model used primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive color model used primarily for printing. In the CMYK color model, the value of each color component typically corresponds to an amount of cyan, magenta, yellow, or black ink that is applied to a white background. In order to convert between CMYK and RGB, it is necessary to use a color management system (CMS.) A CMS will attempt to map colors within the printer's gamut to perceptually similar colors in the display's gamut and vice versa, but the mapping is typically not 1:1 or reversible, nor can it be defined with a simple formula. Thus, such a conversion is out of scope for a codec library. However, the TurboJPEG API allows for compressing packed-pixel CMYK images into YCCK JPEG images (see TJCS_YCCK) and decompressing YCCK JPEG images into packed-pixel CMYK images.

+
TJPF_UNKNOWN 

Unknown pixel format.

+-

Currently this is only used by tjLoadImage().

++

Currently this is only used by tjLoadImage().

+
+ +@@ -895,7 +896,7 @@ YUV Image Format Notes +
+ +

Chrominance subsampling options.

+-

When pixels are converted from RGB to YCbCr (see TJCS_YCbCr) or from CMYK to YCCK (see TJCS_YCCK) as part of the JPEG compression process, some of the Cb and Cr (chrominance) components can be discarded or averaged together to produce a smaller image with little perceptible loss of image clarity (the human eye is more sensitive to small changes in brightness than to small changes in color.) This is called "chrominance subsampling".

++

When pixels are converted from RGB to YCbCr (see TJCS_YCbCr) or from CMYK to YCCK (see TJCS_YCCK) as part of the JPEG compression process, some of the Cb and Cr (chrominance) components can be discarded or averaged together to produce a smaller image with little perceptible loss of image clarity. (The human eye is more sensitive to small changes in brightness than to small changes in color.) This is called "chrominance subsampling".

+ +
Enumerator
TJSAMP_444 

4:4:4 chrominance subsampling (no chrominance subsampling).

+

The JPEG or YUV image will contain one chrominance component for every pixel in the source image.

+@@ -977,8 +978,8 @@ YUV Image Format Notes +
+
+ +-

Allocate an image buffer for use with TurboJPEG.

+-

You should always use this function to allocate the JPEG destination buffer(s) for the compression and transform functions unless you are disabling automatic buffer (re)allocation (by setting TJFLAG_NOREALLOC.)

++

Allocate a byte buffer for use with TurboJPEG.

++

You should always use this function to allocate the JPEG destination buffer(s) for the compression and transform functions unless you are disabling automatic buffer (re)allocation (by setting TJFLAG_NOREALLOC.)

+
Parameters
+ + +@@ -986,7 +987,7 @@ YUV Image Format Notes + + +
Returns
a pointer to a newly-allocated buffer with the specified number of bytes.
+-
See also
tjFree()
++
See also
tjFree()
+ + + +@@ -1023,7 +1024,7 @@ YUV Image Format Notes +
+ +

The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters.

+-

The number of bytes returned by this function is larger than the size of the uncompressed source image. The reason for this is that the JPEG format uses 16-bit coefficients, and it is thus possible for a very high-quality JPEG image with very high-frequency content to expand rather than compress when converted to the JPEG format. Such images represent a very rare corner case, but since there is no way to predict the size of a JPEG image prior to compression, the corner case has to be handled.

++

The number of bytes returned by this function is larger than the size of the uncompressed source image. The reason for this is that the JPEG format uses 16-bit coefficients, so it is possible for a very high-quality source image with very high-frequency content to expand rather than compress when converted to the JPEG format. Such images represent very rare corner cases, but since there is no way to predict the size of a JPEG image prior to compression, the corner cases have to be handled.

+
Parameters
+
bytesthe number of bytes to allocate
+ +@@ -1036,8 +1037,8 @@ YUV Image Format Notes + + + +- +-

◆ tjBufSizeYUV2()

++ ++

◆ tjBufSizeYUV2()

+ +
+
+@@ -1052,7 +1053,7 @@ YUV Image Format Notes +
+ + +- ++ + + + +@@ -1074,11 +1075,11 @@ YUV Image Format Notes +
widthwidth (in pixels) of the image
int pad, align,
+
+ +-

The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters.

++

The size of the buffer (in bytes) required to hold a unified planar YUV image with the given parameters.

+
Parameters
+ + +- ++ + + +
widthwidth (in pixels) of the image
padthe width of each line in each plane of the image is padded to the nearest multiple of this number of bytes (must be a power of 2.)
alignrow alignment (in bytes) of the image (must be a power of 2.) Setting this parameter to n specifies that each row in each plane of the image will be padded to the nearest multiple of n bytes (1 = unpadded.)
heightheight (in pixels) of the image
subsamplevel of chrominance subsampling in the image (see Chrominance subsampling options.)
+@@ -1168,22 +1169,22 @@ YUV Image Format Notes + +
+ +-

Compress an RGB, grayscale, or CMYK image into a JPEG image.

++

Compress a packed-pixel RGB, grayscale, or CMYK image into a JPEG image.

+
Parameters
+ + +- ++ + +- ++ + + +- +- ++If you choose option 1, then *jpegSize should be set to the size of your pre-allocated buffer. In any case, unless you have set TJFLAG_NOREALLOC, you should always check *jpegBuf upon return from this function, as it may have changed. ++ + + + +@@ -1194,8 +1195,8 @@ If you choose option 1, *jpegSize should be set to the size of your + + + +- +-

◆ tjCompressFromYUV()

++ ++

◆ tjCompressFromYUV()

+ +
+
+@@ -1222,7 +1223,7 @@ If you choose option 1, *jpegSize should be set to the size of your +
+ + +- ++ + + + +@@ -1268,22 +1269,22 @@ If you choose option 1, *jpegSize should be set to the size of your +
handlea handle to a TurboJPEG compressor or transformer instance
srcBufpointer to an image buffer containing RGB, grayscale, or CMYK pixels to be compressed
srcBufpointer to a buffer containing a packed-pixel RGB, grayscale, or CMYK source image to be compressed
widthwidth (in pixels) of the source image
pitchbytes per line in the source image. Normally, this should be width * tjPixelSize[pixelFormat] if the image is unpadded, or TJPAD(width * tjPixelSize[pixelFormat]) if each line of the image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. You can also be clever and use this parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
pitchbytes per row in the source image. Normally this should be width * tjPixelSize[pixelFormat], if the image is unpadded, or TJPAD(width * tjPixelSize[pixelFormat]) if each row of the image is padded to the nearest multiple of 4 bytes, as is the case for Windows bitmaps. You can also be clever and use this parameter to skip rows, etc. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
heightheight (in pixels) of the source image
pixelFormatpixel format of the source image (see Pixel formats.)
jpegBufaddress of a pointer to an image buffer that will receive the JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:
    +-
  1. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  2. ++
jpegBufaddress of a pointer to a byte buffer that will receive the JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:
    ++
  1. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  2. +
  3. set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer for you, or
  4. +-
  5. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize(). This should ensure that the buffer never has to be re-allocated (setting TJFLAG_NOREALLOC guarantees that it won't be.)
  6. ++
  7. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize(). This should ensure that the buffer never has to be re-allocated. (Setting TJFLAG_NOREALLOC guarantees that it won't be.)
  8. +
+-If you choose option 1, *jpegSize should be set to the size of your pre-allocated buffer. In any case, unless you have set TJFLAG_NOREALLOC, you should always check *jpegBuf upon return from this function, as it may have changed.
jpegSizepointer to an unsigned long variable that holds the size of the JPEG image buffer. If *jpegBuf points to a pre-allocated buffer, then *jpegSize should be set to the size of the buffer. Upon return, *jpegSize will contain the size of the JPEG image (in bytes.) If *jpegBuf points to a JPEG image buffer that is being reused from a previous call to one of the JPEG compression functions, then *jpegSize is ignored.
jpegSizepointer to an unsigned long variable that holds the size of the JPEG buffer. If *jpegBuf points to a pre-allocated buffer, then *jpegSize should be set to the size of the buffer. Upon return, *jpegSize will contain the size of the JPEG image (in bytes.) If *jpegBuf points to a JPEG buffer that is being reused from a previous call to one of the JPEG compression functions, then *jpegSize is ignored.
jpegSubsampthe level of chrominance subsampling to be used when generating the JPEG image (see Chrominance subsampling options.)
jpegQualthe image quality of the generated JPEG image (1 = worst, 100 = best)
flagsthe bitwise OR of one or more of the flags
int pad, align,
+
+ +-

Compress a YUV planar image into a JPEG image.

++

Compress a unified planar YUV image into a JPEG image.

+
Parameters
+ + +- +- +- +- ++ ++ ++ ++ + +- +- ++If you choose option 1, then *jpegSize should be set to the size of your pre-allocated buffer. In any case, unless you have set TJFLAG_NOREALLOC, you should always check *jpegBuf upon return from this function, as it may have changed. ++ + + +
handlea handle to a TurboJPEG compressor or transformer instance
srcBufpointer to an image buffer containing a YUV planar image to be compressed. The size of this buffer should match the value returned by tjBufSizeYUV2() for the given image width, height, padding, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be stored sequentially in the source buffer (refer to YUV Image Format Notes.)
widthwidth (in pixels) of the source image. If the width is not an even multiple of the MCU block width (see tjMCUWidth), then an intermediate buffer copy will be performed within TurboJPEG.
padthe line padding used in the source image. For instance, if each line in each plane of the YUV image is padded to the nearest multiple of 4 bytes, then pad should be set to 4.
heightheight (in pixels) of the source image. If the height is not an even multiple of the MCU block height (see tjMCUHeight), then an intermediate buffer copy will be performed within TurboJPEG.
srcBufpointer to a buffer containing a unified planar YUV source image to be compressed. The size of this buffer should match the value returned by tjBufSizeYUV2() for the given image width, height, row alignment, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be stored sequentially in the buffer. (Refer to YUV Image Format Notes.)
widthwidth (in pixels) of the source image. If the width is not an even multiple of the MCU block width (see tjMCUWidth), then an intermediate buffer copy will be performed.
alignrow alignment (in bytes) of the source image (must be a power of 2.) Setting this parameter to n indicates that each row in each plane of the source image is padded to the nearest multiple of n bytes (1 = unpadded.)
heightheight (in pixels) of the source image. If the height is not an even multiple of the MCU block height (see tjMCUHeight), then an intermediate buffer copy will be performed.
subsampthe level of chrominance subsampling used in the source image (see Chrominance subsampling options.)
jpegBufaddress of a pointer to an image buffer that will receive the JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:
    +-
  1. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  2. ++
jpegBufaddress of a pointer to a byte buffer that will receive the JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:
    ++
  1. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  2. +
  3. set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer for you, or
  4. +-
  5. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize(). This should ensure that the buffer never has to be re-allocated (setting TJFLAG_NOREALLOC guarantees that it won't be.)
  6. ++
  7. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize(). This should ensure that the buffer never has to be re-allocated. (Setting TJFLAG_NOREALLOC guarantees that it won't be.)
  8. +
+-If you choose option 1, *jpegSize should be set to the size of your pre-allocated buffer. In any case, unless you have set TJFLAG_NOREALLOC, you should always check *jpegBuf upon return from this function, as it may have changed.
jpegSizepointer to an unsigned long variable that holds the size of the JPEG image buffer. If *jpegBuf points to a pre-allocated buffer, then *jpegSize should be set to the size of the buffer. Upon return, *jpegSize will contain the size of the JPEG image (in bytes.) If *jpegBuf points to a JPEG image buffer that is being reused from a previous call to one of the JPEG compression functions, then *jpegSize is ignored.
jpegSizepointer to an unsigned long variable that holds the size of the JPEG buffer. If *jpegBuf points to a pre-allocated buffer, then *jpegSize should be set to the size of the buffer. Upon return, *jpegSize will contain the size of the JPEG image (in bytes.) If *jpegBuf points to a JPEG buffer that is being reused from a previous call to one of the JPEG compression functions, then *jpegSize is ignored.
jpegQualthe image quality of the generated JPEG image (1 = worst, 100 = best)
flagsthe bitwise OR of one or more of the flags
+@@ -1371,18 +1372,18 @@ If you choose option 1, *jpegSize should be set to the size of your +
Parameters
+ + +- +- +- +- ++ ++ ++ ++ + +- +- ++If you choose option 1, then *jpegSize should be set to the size of your pre-allocated buffer. In any case, unless you have set TJFLAG_NOREALLOC, you should always check *jpegBuf upon return from this function, as it may have changed. ++ + + +
handlea handle to a TurboJPEG compressor or transformer instance
srcPlanesan array of pointers to Y, U (Cb), and V (Cr) image planes (or just a Y plane, if compressing a grayscale image) that contain a YUV image to be compressed. These planes can be contiguous or non-contiguous in memory. The size of each plane should match the value returned by tjPlaneSizeYUV() for the given image width, height, strides, and level of chrominance subsampling. Refer to YUV Image Format Notes for more details.
widthwidth (in pixels) of the source image. If the width is not an even multiple of the MCU block width (see tjMCUWidth), then an intermediate buffer copy will be performed within TurboJPEG.
stridesan array of integers, each specifying the number of bytes per line in the corresponding plane of the YUV source image. Setting the stride for any plane to 0 is the same as setting it to the plane width (see YUV Image Format Notes.) If strides is NULL, then the strides for all planes will be set to their respective plane widths. You can adjust the strides in order to specify an arbitrary amount of line padding in each plane or to create a JPEG image from a subregion of a larger YUV planar image.
heightheight (in pixels) of the source image. If the height is not an even multiple of the MCU block height (see tjMCUHeight), then an intermediate buffer copy will be performed within TurboJPEG.
srcPlanesan array of pointers to Y, U (Cb), and V (Cr) image planes (or just a Y plane, if compressing a grayscale image) that contain a YUV source image to be compressed. These planes can be contiguous or non-contiguous in memory. The size of each plane should match the value returned by tjPlaneSizeYUV() for the given image width, height, strides, and level of chrominance subsampling. Refer to YUV Image Format Notes for more details.
widthwidth (in pixels) of the source image. If the width is not an even multiple of the MCU block width (see tjMCUWidth), then an intermediate buffer copy will be performed.
stridesan array of integers, each specifying the number of bytes per row in the corresponding plane of the YUV source image. Setting the stride for any plane to 0 is the same as setting it to the plane width (see YUV Image Format Notes.) If strides is NULL, then the strides for all planes will be set to their respective plane widths. You can adjust the strides in order to specify an arbitrary amount of row padding in each plane or to create a JPEG image from a subregion of a larger planar YUV image.
heightheight (in pixels) of the source image. If the height is not an even multiple of the MCU block height (see tjMCUHeight), then an intermediate buffer copy will be performed.
subsampthe level of chrominance subsampling used in the source image (see Chrominance subsampling options.)
jpegBufaddress of a pointer to an image buffer that will receive the JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:
    +-
  1. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  2. ++
jpegBufaddress of a pointer to a byte buffer that will receive the JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:
    ++
  1. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  2. +
  3. set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer for you, or
  4. +-
  5. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize(). This should ensure that the buffer never has to be re-allocated (setting TJFLAG_NOREALLOC guarantees that it won't be.)
  6. ++
  7. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize(). This should ensure that the buffer never has to be re-allocated. (Setting TJFLAG_NOREALLOC guarantees that it won't be.)
  8. +
+-If you choose option 1, *jpegSize should be set to the size of your pre-allocated buffer. In any case, unless you have set TJFLAG_NOREALLOC, you should always check *jpegBuf upon return from this function, as it may have changed.
jpegSizepointer to an unsigned long variable that holds the size of the JPEG image buffer. If *jpegBuf points to a pre-allocated buffer, then *jpegSize should be set to the size of the buffer. Upon return, *jpegSize will contain the size of the JPEG image (in bytes.) If *jpegBuf points to a JPEG image buffer that is being reused from a previous call to one of the JPEG compression functions, then *jpegSize is ignored.
jpegSizepointer to an unsigned long variable that holds the size of the JPEG buffer. If *jpegBuf points to a pre-allocated buffer, then *jpegSize should be set to the size of the buffer. Upon return, *jpegSize will contain the size of the JPEG image (in bytes.) If *jpegBuf points to a JPEG buffer that is being reused from a previous call to one of the JPEG compression functions, then *jpegSize is ignored.
jpegQualthe image quality of the generated JPEG image (1 = worst, 100 = best)
flagsthe bitwise OR of one or more of the flags
+@@ -1392,8 +1393,8 @@ If you choose option 1, *jpegSize should be set to the size of your + +
+ +- +-

◆ tjDecodeYUV()

++ ++

◆ tjDecodeYUV()

+ +
+
+@@ -1414,7 +1415,7 @@ If you choose option 1, *jpegSize should be set to the size of your + + + int  +- pad, ++ align, + + + +@@ -1466,17 +1467,17 @@ If you choose option 1, *jpegSize should be set to the size of your + +
+ +-

Decode a YUV planar image into an RGB or grayscale image.

+-

This function uses the accelerated color conversion routines in the underlying codec but does not execute any of the other steps in the JPEG decompression process.

++

Decode a unified planar YUV image into a packed-pixel RGB or grayscale image.

++

This function performs color conversion (which is accelerated in the libjpeg-turbo implementation) but does not execute any of the other steps in the JPEG decompression process.

+
Parameters
+ + +- +- ++ ++ + +- ++ + +- ++ + + + +@@ -1561,17 +1562,17 @@ If you choose option 1, *jpegSize should be set to the size of your +
handlea handle to a TurboJPEG decompressor or transformer instance
srcBufpointer to an image buffer containing a YUV planar image to be decoded. The size of this buffer should match the value returned by tjBufSizeYUV2() for the given image width, height, padding, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be stored sequentially in the source buffer (refer to YUV Image Format Notes.)
padUse this parameter to specify that the width of each line in each plane of the YUV source image is padded to the nearest multiple of this number of bytes (must be a power of 2.)
srcBufpointer to a buffer containing a unified planar YUV source image to be decoded. The size of this buffer should match the value returned by tjBufSizeYUV2() for the given image width, height, row alignment, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be stored sequentially in the source buffer. (Refer to YUV Image Format Notes.)
alignrow alignment (in bytes) of the YUV source image (must be a power of 2.) Setting this parameter to n indicates that each row in each plane of the YUV source image is padded to the nearest multiple of n bytes (1 = unpadded.)
subsampthe level of chrominance subsampling used in the YUV source image (see Chrominance subsampling options.)
dstBufpointer to an image buffer that will receive the decoded image. This buffer should normally be pitch * height bytes in size, but the dstBuf pointer can also be used to decode into a specific region of a larger buffer.
dstBufpointer to a buffer that will receive the packed-pixel decoded image. This buffer should normally be pitch * height bytes in size, but the dstBuf pointer can also be used to decode into a specific region of a larger buffer.
widthwidth (in pixels) of the source and destination images
pitchbytes per line in the destination image. Normally, this should be width * tjPixelSize[pixelFormat] if the destination image is unpadded, or TJPAD(width * tjPixelSize[pixelFormat]) if each line of the destination image should be padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. You can also be clever and use the pitch parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
pitchbytes per row in the destination image. Normally this should be set to width * tjPixelSize[pixelFormat], if the destination image should be unpadded, or TJPAD(width * tjPixelSize[pixelFormat]) if each row of the destination image should be padded to the nearest multiple of 4 bytes, as is the case for Windows bitmaps. You can also be clever and use the pitch parameter to skip rows, etc. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
heightheight (in pixels) of the source and destination images
pixelFormatpixel format of the destination image (see Pixel formats.)
flagsthe bitwise OR of one or more of the flags
+
+ +-

Decode a set of Y, U (Cb), and V (Cr) image planes into an RGB or grayscale image.

+-

This function uses the accelerated color conversion routines in the underlying codec but does not execute any of the other steps in the JPEG decompression process.

++

Decode a set of Y, U (Cb), and V (Cr) image planes into a packed-pixel RGB or grayscale image.

++

This function performs color conversion (which is accelerated in the libjpeg-turbo implementation) but does not execute any of the other steps in the JPEG decompression process.

+
Parameters
+ + + +- ++ + +- ++ + +- ++ + + + +@@ -1650,15 +1651,15 @@ If you choose option 1, *jpegSize should be set to the size of your +
handlea handle to a TurboJPEG decompressor or transformer instance
srcPlanesan array of pointers to Y, U (Cb), and V (Cr) image planes (or just a Y plane, if decoding a grayscale image) that contain a YUV image to be decoded. These planes can be contiguous or non-contiguous in memory. The size of each plane should match the value returned by tjPlaneSizeYUV() for the given image width, height, strides, and level of chrominance subsampling. Refer to YUV Image Format Notes for more details.
stridesan array of integers, each specifying the number of bytes per line in the corresponding plane of the YUV source image. Setting the stride for any plane to 0 is the same as setting it to the plane width (see YUV Image Format Notes.) If strides is NULL, then the strides for all planes will be set to their respective plane widths. You can adjust the strides in order to specify an arbitrary amount of line padding in each plane or to decode a subregion of a larger YUV planar image.
stridesan array of integers, each specifying the number of bytes per row in the corresponding plane of the YUV source image. Setting the stride for any plane to 0 is the same as setting it to the plane width (see YUV Image Format Notes.) If strides is NULL, then the strides for all planes will be set to their respective plane widths. You can adjust the strides in order to specify an arbitrary amount of row padding in each plane or to decode a subregion of a larger planar YUV image.
subsampthe level of chrominance subsampling used in the YUV source image (see Chrominance subsampling options.)
dstBufpointer to an image buffer that will receive the decoded image. This buffer should normally be pitch * height bytes in size, but the dstBuf pointer can also be used to decode into a specific region of a larger buffer.
dstBufpointer to a buffer that will receive the packed-pixel decoded image. This buffer should normally be pitch * height bytes in size, but the dstBuf pointer can also be used to decode into a specific region of a larger buffer.
widthwidth (in pixels) of the source and destination images
pitchbytes per line in the destination image. Normally, this should be width * tjPixelSize[pixelFormat] if the destination image is unpadded, or TJPAD(width * tjPixelSize[pixelFormat]) if each line of the destination image should be padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. You can also be clever and use the pitch parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
pitchbytes per row in the destination image. Normally this should be set to width * tjPixelSize[pixelFormat], if the destination image should be unpadded, or TJPAD(width * tjPixelSize[pixelFormat]) if each row of the destination image should be padded to the nearest multiple of 4 bytes, as is the case for Windows bitmaps. You can also be clever and use the pitch parameter to skip rows, etc. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
heightheight (in pixels) of the source and destination images
pixelFormatpixel format of the destination image (see Pixel formats.)
flagsthe bitwise OR of one or more of the flags
+
+ +-

Decompress a JPEG image to an RGB, grayscale, or CMYK image.

++

Decompress a JPEG image into a packed-pixel RGB, grayscale, or CMYK image.

+
Parameters
+ + +- ++ + +- ++ + +- ++ + + + +@@ -1729,7 +1730,7 @@ If you choose option 1, *jpegSize should be set to the size of your +
Parameters
+
handlea handle to a TurboJPEG decompressor or transformer instance
jpegBufpointer to a buffer containing the JPEG image to decompress
jpegBufpointer to a byte buffer containing the JPEG image to decompress
jpegSizesize of the JPEG image (in bytes)
dstBufpointer to an image buffer that will receive the decompressed image. This buffer should normally be pitch * scaledHeight bytes in size, where scaledHeight can be determined by calling TJSCALED() with the JPEG image height and one of the scaling factors returned by tjGetScalingFactors(). The dstBuf pointer may also be used to decompress into a specific region of a larger buffer.
dstBufpointer to a buffer that will receive the packed-pixel decompressed image. This buffer should normally be pitch * scaledHeight bytes in size, where scaledHeight can be determined by calling TJSCALED() with the JPEG image height and one of the scaling factors returned by tjGetScalingFactors(). The dstBuf pointer may also be used to decompress into a specific region of a larger buffer.
widthdesired width (in pixels) of the destination image. If this is different than the width of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired width. If width is set to 0, then only the height will be considered when determining the scaled image size.
pitchbytes per line in the destination image. Normally, this is scaledWidth * tjPixelSize[pixelFormat] if the decompressed image is unpadded, else TJPAD(scaledWidth * tjPixelSize[pixelFormat]) if each line of the decompressed image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. (NOTE: scaledWidth can be determined by calling TJSCALED() with the JPEG image width and one of the scaling factors returned by tjGetScalingFactors().) You can also be clever and use the pitch parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to scaledWidth * tjPixelSize[pixelFormat].
pitchbytes per row in the destination image. Normally this should be set to scaledWidth * tjPixelSize[pixelFormat], if the destination image should be unpadded, or TJPAD(scaledWidth * tjPixelSize[pixelFormat]) if each row of the destination image should be padded to the nearest multiple of 4 bytes, as is the case for Windows bitmaps. (NOTE: scaledWidth can be determined by calling TJSCALED() with the JPEG image width and one of the scaling factors returned by tjGetScalingFactors().) You can also be clever and use the pitch parameter to skip rows, etc. Setting this parameter to 0 is the equivalent of setting it to scaledWidth * tjPixelSize[pixelFormat].
heightdesired height (in pixels) of the destination image. If this is different than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If height is set to 0, then only the width will be considered when determining the scaled image size.
pixelFormatpixel format of the destination image (see Pixel formats.)
flagsthe bitwise OR of one or more of the flags
+ +- ++ + + + +@@ -1742,8 +1743,8 @@ If you choose option 1, *jpegSize should be set to the size of your + + + +- +-

◆ tjDecompressToYUV2()

++ ++

◆ tjDecompressToYUV2()

+ +
+
+@@ -1782,7 +1783,7 @@ If you choose option 1, *jpegSize should be set to the size of your +
+ + +- ++ + + + +@@ -1804,17 +1805,17 @@ If you choose option 1, *jpegSize should be set to the size of your +
handlea handle to a TurboJPEG decompressor or transformer instance
jpegBufpointer to a buffer containing a JPEG image or an "abbreviated table specification" (AKA "tables-only") datastream. Passing a tables-only datastream to this function primes the decompressor with quantization and Huffman tables that can be used when decompressing subsequent "abbreviated image" datastreams. This is useful, for instance, when decompressing video streams in which all frames share the same quantization and Huffman tables.
jpegBufpointer to a byte buffer containing a JPEG image or an "abbreviated table specification" (AKA "tables-only") datastream. Passing a tables-only datastream to this function primes the decompressor with quantization and Huffman tables that can be used when decompressing subsequent "abbreviated image" datastreams. This is useful, for instance, when decompressing video streams in which all frames share the same quantization and Huffman tables.
jpegSizesize of the JPEG image or tables-only datastream (in bytes)
widthpointer to an integer variable that will receive the width (in pixels) of the JPEG image. If jpegBuf points to a tables-only datastream, then width is ignored.
heightpointer to an integer variable that will receive the height (in pixels) of the JPEG image. If jpegBuf points to a tables-only datastream, then height is ignored.
int pad, align,
+
+ +-

Decompress a JPEG image to a YUV planar image.

+-

This function performs JPEG decompression but leaves out the color conversion step, so a planar YUV image is generated instead of an RGB image.

++

Decompress a JPEG image into a unified planar YUV image.

++

This function performs JPEG decompression but leaves out the color conversion step, so a planar YUV image is generated instead of a packed-pixel image.

+
Parameters
+ + +- ++ + +- +- +- +- ++ ++ ++ ++ + +
handlea handle to a TurboJPEG decompressor or transformer instance
jpegBufpointer to a buffer containing the JPEG image to decompress
jpegBufpointer to a byte buffer containing the JPEG image to decompress
jpegSizesize of the JPEG image (in bytes)
dstBufpointer to an image buffer that will receive the YUV image. Use tjBufSizeYUV2() to determine the appropriate size for this buffer based on the image width, height, padding, and level of subsampling. The Y, U (Cb), and V (Cr) image planes will be stored sequentially in the buffer (refer to YUV Image Format Notes.)
widthdesired width (in pixels) of the YUV image. If this is different than the width of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired width. If width is set to 0, then only the height will be considered when determining the scaled image size. If the scaled width is not an even multiple of the MCU block width (see tjMCUWidth), then an intermediate buffer copy will be performed within TurboJPEG.
padthe width of each line in each plane of the YUV image will be padded to the nearest multiple of this number of bytes (must be a power of 2.) To generate images suitable for X Video, pad should be set to 4.
heightdesired height (in pixels) of the YUV image. If this is different than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If height is set to 0, then only the width will be considered when determining the scaled image size. If the scaled height is not an even multiple of the MCU block height (see tjMCUHeight), then an intermediate buffer copy will be performed within TurboJPEG.
dstBufpointer to a buffer that will receive the unified planar YUV decompressed image. Use tjBufSizeYUV2() to determine the appropriate size for this buffer based on the scaled image width, scaled image height, row alignment, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) image planes will be stored sequentially in the buffer. (Refer to YUV Image Format Notes.)
widthdesired width (in pixels) of the YUV image. If this is different than the width of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired width. If width is set to 0, then only the height will be considered when determining the scaled image size. If the scaled width is not an even multiple of the MCU block width (see tjMCUWidth), then an intermediate buffer copy will be performed.
alignrow alignment (in bytes) of the YUV image (must be a power of 2.) Setting this parameter to n will cause each row in each plane of the YUV image to be padded to the nearest multiple of n bytes (1 = unpadded.) To generate images suitable for X Video, align should be set to 4.
heightdesired height (in pixels) of the YUV image. If this is different than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If height is set to 0, then only the width will be considered when determining the scaled image size. If the scaled height is not an even multiple of the MCU block height (see tjMCUHeight), then an intermediate buffer copy will be performed.
flagsthe bitwise OR of one or more of the flags
+
+@@ -1886,16 +1887,16 @@ If you choose option 1, *jpegSize should be set to the size of your +
+ +

Decompress a JPEG image into separate Y, U (Cb), and V (Cr) image planes.

+-

This function performs JPEG decompression but leaves out the color conversion step, so a planar YUV image is generated instead of an RGB image.

++

This function performs JPEG decompression but leaves out the color conversion step, so a planar YUV image is generated instead of a packed-pixel image.

+
Parameters
+ + +- ++ + +- +- +- +- ++ ++ ++ ++ + +
handlea handle to a TurboJPEG decompressor or transformer instance
jpegBufpointer to a buffer containing the JPEG image to decompress
jpegBufpointer to a byte buffer containing the JPEG image to decompress
jpegSizesize of the JPEG image (in bytes)
dstPlanesan array of pointers to Y, U (Cb), and V (Cr) image planes (or just a Y plane, if decompressing a grayscale image) that will receive the YUV image. These planes can be contiguous or non-contiguous in memory. Use tjPlaneSizeYUV() to determine the appropriate size for each plane based on the scaled image width, scaled image height, strides, and level of chrominance subsampling. Refer to YUV Image Format Notes for more details.
widthdesired width (in pixels) of the YUV image. If this is different than the width of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired width. If width is set to 0, then only the height will be considered when determining the scaled image size. If the scaled width is not an even multiple of the MCU block width (see tjMCUWidth), then an intermediate buffer copy will be performed within TurboJPEG.
stridesan array of integers, each specifying the number of bytes per line in the corresponding plane of the output image. Setting the stride for any plane to 0 is the same as setting it to the scaled plane width (see YUV Image Format Notes.) If strides is NULL, then the strides for all planes will be set to their respective scaled plane widths. You can adjust the strides in order to add an arbitrary amount of line padding to each plane or to decompress the JPEG image into a subregion of a larger YUV planar image.
heightdesired height (in pixels) of the YUV image. If this is different than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If height is set to 0, then only the width will be considered when determining the scaled image size. If the scaled height is not an even multiple of the MCU block height (see tjMCUHeight), then an intermediate buffer copy will be performed within TurboJPEG.
dstPlanesan array of pointers to Y, U (Cb), and V (Cr) image planes (or just a Y plane, if decompressing a grayscale image) that will receive the decompressed image. These planes can be contiguous or non-contiguous in memory. Use tjPlaneSizeYUV() to determine the appropriate size for each plane based on the scaled image width, scaled image height, strides, and level of chrominance subsampling. Refer to YUV Image Format Notes for more details.
widthdesired width (in pixels) of the YUV image. If this is different than the width of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired width. If width is set to 0, then only the height will be considered when determining the scaled image size. If the scaled width is not an even multiple of the MCU block width (see tjMCUWidth), then an intermediate buffer copy will be performed.
stridesan array of integers, each specifying the number of bytes per row in the corresponding plane of the YUV image. Setting the stride for any plane to 0 is the same as setting it to the scaled plane width (see YUV Image Format Notes.) If strides is NULL, then the strides for all planes will be set to their respective scaled plane widths. You can adjust the strides in order to add an arbitrary amount of row padding to each plane or to decompress the JPEG image into a subregion of a larger planar YUV image.
heightdesired height (in pixels) of the YUV image. If this is different than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If height is set to 0, then only the width will be considered when determining the scaled image size. If the scaled height is not an even multiple of the MCU block height (see tjMCUHeight), then an intermediate buffer copy will be performed.
flagsthe bitwise OR of one or more of the flags
+
+@@ -1931,8 +1932,8 @@ If you choose option 1, *jpegSize should be set to the size of your + +
+
+- +-

◆ tjEncodeYUV3()

++ ++

◆ tjEncodeYUV3()

+ +
+
+@@ -1983,7 +1984,7 @@ If you choose option 1, *jpegSize should be set to the size of your + + + int  +- pad, ++ align, + + + +@@ -2005,18 +2006,18 @@ If you choose option 1, *jpegSize should be set to the size of your + +
+ +-

Encode an RGB or grayscale image into a YUV planar image.

+-

This function uses the accelerated color conversion routines in the underlying codec but does not execute any of the other steps in the JPEG compression process.

++

Encode a packed-pixel RGB or grayscale image into a unified planar YUV image.

++

This function performs color conversion (which is accelerated in the libjpeg-turbo implementation) but does not execute any of the other steps in the JPEG compression process.

+
Parameters
+ + +- ++ + +- ++ + + +- +- ++ ++ + + +
handlea handle to a TurboJPEG compressor or transformer instance
srcBufpointer to an image buffer containing RGB or grayscale pixels to be encoded
srcBufpointer to a buffer containing a packed-pixel RGB or grayscale source image to be encoded
widthwidth (in pixels) of the source image
pitchbytes per line in the source image. Normally, this should be width * tjPixelSize[pixelFormat] if the image is unpadded, or TJPAD(width * tjPixelSize[pixelFormat]) if each line of the image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. You can also be clever and use this parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
pitchbytes per row in the source image. Normally this should be width * tjPixelSize[pixelFormat], if the image is unpadded, or TJPAD(width * tjPixelSize[pixelFormat]) if each row of the image is padded to the nearest multiple of 4 bytes, as is the case for Windows bitmaps. You can also be clever and use this parameter to skip rows, etc. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
heightheight (in pixels) of the source image
pixelFormatpixel format of the source image (see Pixel formats.)
dstBufpointer to an image buffer that will receive the YUV image. Use tjBufSizeYUV2() to determine the appropriate size for this buffer based on the image width, height, padding, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) image planes will be stored sequentially in the buffer (refer to YUV Image Format Notes.)
padthe width of each line in each plane of the YUV image will be padded to the nearest multiple of this number of bytes (must be a power of 2.) To generate images suitable for X Video, pad should be set to 4.
dstBufpointer to a buffer that will receive the unified planar YUV image. Use tjBufSizeYUV2() to determine the appropriate size for this buffer based on the image width, height, row alignment, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) image planes will be stored sequentially in the buffer. (Refer to YUV Image Format Notes.)
alignrow alignment (in bytes) of the YUV image (must be a power of 2.) Setting this parameter to n will cause each row in each plane of the YUV image to be padded to the nearest multiple of n bytes (1 = unpadded.) To generate images suitable for X Video, align should be set to 4.
subsampthe level of chrominance subsampling to be used when generating the YUV image (see Chrominance subsampling options.) To generate images suitable for X Video, subsamp should be set to TJSAMP_420. This produces an image compatible with the I420 (AKA "YUV420P") format.
flagsthe bitwise OR of one or more of the flags
+@@ -2100,18 +2101,18 @@ If you choose option 1, *jpegSize should be set to the size of your + +
+ +-

Encode an RGB or grayscale image into separate Y, U (Cb), and V (Cr) image planes.

+-

This function uses the accelerated color conversion routines in the underlying codec but does not execute any of the other steps in the JPEG compression process.

++

Encode a packed-pixel RGB or grayscale image into separate Y, U (Cb), and V (Cr) image planes.

++

This function performs color conversion (which is accelerated in the libjpeg-turbo implementation) but does not execute any of the other steps in the JPEG compression process.

+
Parameters
+ + +- ++ + +- ++ + + + +- ++ + + +
handlea handle to a TurboJPEG compressor or transformer instance
srcBufpointer to an image buffer containing RGB or grayscale pixels to be encoded
srcBufpointer to a buffer containing a packed-pixel RGB or grayscale source image to be encoded
widthwidth (in pixels) of the source image
pitchbytes per line in the source image. Normally, this should be width * tjPixelSize[pixelFormat] if the image is unpadded, or TJPAD(width * tjPixelSize[pixelFormat]) if each line of the image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. You can also be clever and use this parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
pitchbytes per row in the source image. Normally this should be width * tjPixelSize[pixelFormat], if the image is unpadded, or TJPAD(width * tjPixelSize[pixelFormat]) if each row of the image is padded to the nearest multiple of 4 bytes, as is the case for Windows bitmaps. You can also be clever and use this parameter to skip rows, etc. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
heightheight (in pixels) of the source image
pixelFormatpixel format of the source image (see Pixel formats.)
dstPlanesan array of pointers to Y, U (Cb), and V (Cr) image planes (or just a Y plane, if generating a grayscale image) that will receive the encoded image. These planes can be contiguous or non-contiguous in memory. Use tjPlaneSizeYUV() to determine the appropriate size for each plane based on the image width, height, strides, and level of chrominance subsampling. Refer to YUV Image Format Notes for more details.
stridesan array of integers, each specifying the number of bytes per line in the corresponding plane of the output image. Setting the stride for any plane to 0 is the same as setting it to the plane width (see YUV Image Format Notes.) If strides is NULL, then the strides for all planes will be set to their respective plane widths. You can adjust the strides in order to add an arbitrary amount of line padding to each plane or to encode an RGB or grayscale image into a subregion of a larger YUV planar image.
stridesan array of integers, each specifying the number of bytes per row in the corresponding plane of the YUV image. Setting the stride for any plane to 0 is the same as setting it to the plane width (see YUV Image Format Notes.) If strides is NULL, then the strides for all planes will be set to their respective plane widths. You can adjust the strides in order to add an arbitrary amount of row padding to each plane or to encode an RGB or grayscale image into a subregion of a larger planar YUV image.
subsampthe level of chrominance subsampling to be used when generating the YUV image (see Chrominance subsampling options.) To generate images suitable for X Video, subsamp should be set to TJSAMP_420. This produces an image compatible with the I420 (AKA "YUV420P") format.
flagsthe bitwise OR of one or more of the flags
+@@ -2137,15 +2138,15 @@ If you choose option 1, *jpegSize should be set to the size of your + +
+ +-

Free an image buffer previously allocated by TurboJPEG.

+-

You should always use this function to free JPEG destination buffer(s) that were automatically (re)allocated by the compression and transform functions or that were manually allocated using tjAlloc().

++

Free a byte buffer previously allocated by TurboJPEG.

++

You should always use this function to free JPEG destination buffer(s) that were automatically (re)allocated by the compression and transform functions or that were manually allocated using tjAlloc().

+
Parameters
+ + +
bufferaddress of the buffer to free. If the address is NULL, then this function has no effect.
+
+
+-
See also
tjAlloc()
++
See also
tjAlloc()
+ +
+
+@@ -2204,8 +2205,8 @@ If you choose option 1, *jpegSize should be set to the size of your + + + +- +-

◆ tjGetScalingFactors()

++ ++

◆ tjGetScalingFactors()

+ +
+
+@@ -2214,16 +2215,16 @@ If you choose option 1, *jpegSize should be set to the size of your + DLLEXPORT tjscalingfactor* tjGetScalingFactors + ( + int *  +- numscalingfactors) ++ numScalingFactors) + + + +
+ +-

Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of TurboJPEG supports.

++

Returns a list of fractional scaling factors that the JPEG decompressor supports.

+
Parameters
+ +- ++ +
numscalingfactorspointer to an integer variable that will receive the number of elements in the list
numScalingFactorspointer to an integer variable that will receive the number of elements in the list
+
+
+@@ -2344,25 +2345,25 @@ If you choose option 1, *jpegSize should be set to the size of your + +
+ +-

Load an uncompressed image from disk into memory.

++

Load a packed-pixel image from disk into memory.

+
Parameters
+ +- +- +- +- +- ++ ++ ++ ++ + +
filenamename of a file containing an uncompressed image in Windows BMP or PBMPLUS (PPM/PGM) format
widthpointer to an integer variable that will receive the width (in pixels) of the uncompressed image
alignrow alignment of the image buffer to be returned (must be a power of 2.) For instance, setting this parameter to 4 will cause all rows in the image buffer to be padded to the nearest 32-bit boundary, and setting this parameter to 1 will cause all rows in the image buffer to be unpadded.
heightpointer to an integer variable that will receive the height (in pixels) of the uncompressed image
pixelFormatpointer to an integer variable that specifies or will receive the pixel format of the uncompressed image buffer. The behavior of tjLoadImage() will vary depending on the value of *pixelFormat passed to the function:
    +-
  • TJPF_UNKNOWN : The uncompressed image buffer returned by the function will use the most optimal pixel format for the file type, and *pixelFormat will contain the ID of this pixel format upon successful return from the function.
  • +-
  • TJPF_GRAY : Only PGM files and 8-bit BMP files with a grayscale colormap can be loaded.
  • +-
  • TJPF_CMYK : The RGB or grayscale pixels stored in the file will be converted using a quick & dirty algorithm that is suitable only for testing purposes (proper conversion between CMYK and other formats requires a color management system.)
  • +-
  • Other pixel formats : The uncompressed image buffer will use the specified pixel format, and pixel format conversion will be performed if necessary.
  • ++
filenamename of a file containing a packed-pixel image in Windows BMP or PBMPLUS (PPM/PGM) format
widthpointer to an integer variable that will receive the width (in pixels) of the packed-pixel image
alignrow alignment of the packed-pixel buffer to be returned (must be a power of 2.) Setting this parameter to n will cause all rows in the buffer to be padded to the nearest multiple of n bytes (1 = unpadded.)
heightpointer to an integer variable that will receive the height (in pixels) of the packed-pixel image
pixelFormatpointer to an integer variable that specifies or will receive the pixel format of the packed-pixel buffer. The behavior of tjLoadImage() will vary depending on the value of *pixelFormat passed to the function:
    ++
  • TJPF_UNKNOWN : The packed-pixel buffer returned by this function will use the most optimal pixel format for the file type, and *pixelFormat will contain the ID of that pixel format upon successful return from this function.
  • ++
  • TJPF_GRAY : Only PGM files and 8-bit-per-pixel BMP files with a grayscale colormap can be loaded.
  • ++
  • TJPF_CMYK : The RGB or grayscale pixels stored in the file will be converted using a quick & dirty algorithm that is suitable only for testing purposes. (Proper conversion between CMYK and other formats requires a color management system.)
  • ++
  • Other pixel formats : The packed-pixel buffer will use the specified pixel format, and pixel format conversion will be performed if necessary.
  • +
+
flagsthe bitwise OR of one or more of the flags.
+
+
+-
Returns
a pointer to a newly-allocated buffer containing the uncompressed image, converted to the chosen pixel format and with the chosen row alignment, or NULL if an error occurred (see tjGetErrorStr2().) This buffer should be freed using tjFree().
++
Returns
a pointer to a newly-allocated buffer containing the packed-pixel image, converted to the chosen pixel format and with the chosen row alignment, or NULL if an error occurred (see tjGetErrorStr2().) This buffer should be freed using tjFree().
+ +
+
+@@ -2461,7 +2462,7 @@ If you choose option 1, *jpegSize should be set to the size of your + + + +- ++ + + +
componentIDID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr)
widthwidth (in pixels) of the YUV image. NOTE: this is the width of the whole image, not the plane width.
stridebytes per line in the image plane. Setting this to 0 is the equivalent of setting it to the plane width.
stridebytes per row in the image plane. Setting this to 0 is the equivalent of setting it to the plane width.
heightheight (in pixels) of the YUV image. NOTE: this is the height of the whole image, not the plane height.
subsamplevel of chrominance subsampling in the image (see Chrominance subsampling options.)
+@@ -2573,15 +2574,15 @@ If you choose option 1, *jpegSize should be set to the size of your + +
+ +-

Save an uncompressed image from memory to disk.

++

Save a packed-pixel image from memory to disk.

+
Parameters
+ +- +- +- +- +- +- ++ ++ ++ ++ ++ ++ + +
filenamename of a file to which to save the uncompressed image. The image will be stored in Windows BMP or PBMPLUS (PPM/PGM) format, depending on the file extension.
bufferpointer to an image buffer containing RGB, grayscale, or CMYK pixels to be saved
widthwidth (in pixels) of the uncompressed image
pitchbytes per line in the image buffer. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
heightheight (in pixels) of the uncompressed image
pixelFormatpixel format of the image buffer (see Pixel formats.) If this parameter is set to TJPF_GRAY, then the image will be stored in PGM or 8-bit (indexed color) BMP format. Otherwise, the image will be stored in PPM or 24-bit BMP format. If this parameter is set to TJPF_CMYK, then the CMYK pixels will be converted to RGB using a quick & dirty algorithm that is suitable only for testing (proper conversion between CMYK and other formats requires a color management system.)
filenamename of a file to which to save the packed-pixel image. The image will be stored in Windows BMP or PBMPLUS (PPM/PGM) format, depending on the file extension.
bufferpointer to a buffer containing a packed-pixel RGB, grayscale, or CMYK image to be saved
widthwidth (in pixels) of the packed-pixel image
pitchbytes per row in the packed-pixel image. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
heightheight (in pixels) of the packed-pixel image
pixelFormatpixel format of the packed-pixel image (see Pixel formats.) If this parameter is set to TJPF_GRAY, then the image will be stored in PGM or 8-bit-per-pixel (indexed color) BMP format. Otherwise, the image will be stored in PPM or 24-bit-per-pixel BMP format. If this parameter is set to TJPF_CMYK, then the CMYK pixels will be converted to RGB using a quick & dirty algorithm that is suitable only for testing purposes. (Proper conversion between CMYK and other formats requires a color management system.)
flagsthe bitwise OR of one or more of the flags.
+
+@@ -2657,17 +2658,17 @@ If you choose option 1, *jpegSize should be set to the size of your +
Parameters
+ + +- ++ + + +- +- +- ++If you choose option 1, then dstSizes[i] should be set to the size of your pre-allocated buffer. In any case, unless you have set TJFLAG_NOREALLOC, you should always check dstBufs[i] upon return from this function, as it may have changed. ++ ++ + +
handlea handle to a TurboJPEG transformer instance
jpegBufpointer to a buffer containing the JPEG source image to transform
jpegBufpointer to a byte buffer containing the JPEG source image to transform
jpegSizesize of the JPEG source image (in bytes)
nthe number of transformed JPEG images to generate
dstBufspointer to an array of n image buffers. dstBufs[i] will receive a JPEG image that has been transformed using the parameters in transforms[i]. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:
    +-
  1. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  2. ++
dstBufspointer to an array of n byte buffers. dstBufs[i] will receive a JPEG image that has been transformed using the parameters in transforms[i]. TurboJPEG has the ability to reallocate the JPEG destination buffer to accommodate the size of the transformed JPEG image. Thus, you can choose to:
    ++
  1. pre-allocate the JPEG destination buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  2. +
  3. set dstBufs[i] to NULL to tell TurboJPEG to allocate the buffer for you, or
  4. +-
  5. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize() with the transformed or cropped width and height. Under normal circumstances, this should ensure that the buffer never has to be re-allocated (setting TJFLAG_NOREALLOC guarantees that it won't be.) Note, however, that there are some rare cases (such as transforming images with a large amount of embedded EXIF or ICC profile data) in which the output image will be larger than the worst-case size, and TJFLAG_NOREALLOC cannot be used in those cases.
  6. ++
  7. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize() with the transformed or cropped width and height. Under normal circumstances, this should ensure that the buffer never has to be re-allocated. (Setting TJFLAG_NOREALLOC guarantees that it won't be.) Note, however, that there are some rare cases (such as transforming images with a large amount of embedded EXIF or ICC profile data) in which the transformed JPEG image will be larger than the worst-case size, and TJFLAG_NOREALLOC cannot be used in those cases.
  8. +
+-If you choose option 1, dstSizes[i] should be set to the size of your pre-allocated buffer. In any case, unless you have set TJFLAG_NOREALLOC, you should always check dstBufs[i] upon return from this function, as it may have changed.
dstSizespointer to an array of n unsigned long variables that will receive the actual sizes (in bytes) of each transformed JPEG image. If dstBufs[i] points to a pre-allocated buffer, then dstSizes[i] should be set to the size of the buffer. Upon return, dstSizes[i] will contain the size of the JPEG image (in bytes.)
transformspointer to an array of n tjtransform structures, each of which specifies the transform parameters and/or cropping region for the corresponding transformed output image.
dstSizespointer to an array of n unsigned long variables that will receive the actual sizes (in bytes) of each transformed JPEG image. If dstBufs[i] points to a pre-allocated buffer, then dstSizes[i] should be set to the size of the buffer. Upon return, dstSizes[i] will contain the size of the transformed JPEG image (in bytes.)
transformspointer to an array of n tjtransform structures, each of which specifies the transform parameters and/or cropping region for the corresponding transformed JPEG image.
flagsthe bitwise OR of one or more of the flags
+
+@@ -2698,7 +2699,7 @@ If you choose option 1, dstSizes[i] should be set to the size of yo +
+ +

Alpha offset (in bytes) for a given pixel format.

+-

This specifies the number of bytes that the Alpha component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRA is stored in char pixel[], then the alpha component will be pixel[tjAlphaOffset[TJ_BGRA]]. This will be -1 if the pixel format does not have an alpha component.

++

This specifies the number of bytes that the alpha component is offset from the start of the pixel. For instance, if a pixel of format TJPF_BGRA is stored in unsigned char pixel[], then the alpha component will be pixel[tjAlphaOffset[TJPF_BGRA]]. This will be -1 if the pixel format does not have an alpha component.

+ +
+ +@@ -2723,7 +2724,7 @@ If you choose option 1, dstSizes[i] should be set to the size of yo +
+ +

Blue offset (in bytes) for a given pixel format.

+-

This specifies the number of bytes that the Blue component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in char pixel[], then the blue component will be pixel[tjBlueOffset[TJ_BGRX]]. This will be -1 if the pixel format does not have a blue component.

++

This specifies the number of bytes that the blue component is offset from the start of the pixel. For instance, if a pixel of format TJPF_BGRX is stored in unsigned char pixel[], then the blue component will be pixel[tjBlueOffset[TJPF_BGRX]]. This will be -1 if the pixel format does not have a blue component.

+ +
+ +@@ -2748,7 +2749,7 @@ If you choose option 1, dstSizes[i] should be set to the size of yo +
+ +

Green offset (in bytes) for a given pixel format.

+-

This specifies the number of bytes that the green component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in char pixel[], then the green component will be pixel[tjGreenOffset[TJ_BGRX]]. This will be -1 if the pixel format does not have a green component.

++

This specifies the number of bytes that the green component is offset from the start of the pixel. For instance, if a pixel of format TJPF_BGRX is stored in unsigned char pixel[], then the green component will be pixel[tjGreenOffset[TJPF_BGRX]]. This will be -1 if the pixel format does not have a green component.

+ +
+ +@@ -2859,7 +2860,7 @@ If you choose option 1, dstSizes[i] should be set to the size of yo +
+ +

Red offset (in bytes) for a given pixel format.

+-

This specifies the number of bytes that the red component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in char pixel[], then the red component will be pixel[tjRedOffset[TJ_BGRX]]. This will be -1 if the pixel format does not have a red component.

++

This specifies the number of bytes that the red component is offset from the start of the pixel. For instance, if a pixel of format TJPF_BGRX is stored in unsigned char pixel[], then the red component will be pixel[tjRedOffset[TJPF_BGRX]]. This will be -1 if the pixel format does not have a red component.

+ +
+ +diff --git a/doc/html/search/all_6.js b/doc/html/search/all_6.js +index aa31107..6b43b30 100644 +--- a/doc/html/search/all_6.js ++++ b/doc/html/search/all_6.js +@@ -9,9 +9,9 @@ var searchData= + ['tjalphaoffset_14',['tjAlphaOffset',['../group___turbo_j_p_e_g.html#ga5af0ab065feefd526debf1e20c43e837',1,'turbojpeg.h']]], + ['tjblueoffset_15',['tjBlueOffset',['../group___turbo_j_p_e_g.html#ga84e2e35d3f08025f976ec1ec53693dea',1,'turbojpeg.h']]], + ['tjbufsize_16',['tjBufSize',['../group___turbo_j_p_e_g.html#ga67ac12fee79073242cb216e07c9f1f90',1,'turbojpeg.h']]], +- ['tjbufsizeyuv2_17',['tjBufSizeYUV2',['../group___turbo_j_p_e_g.html#ga2be2b9969d4df9ecce9b05deed273194',1,'turbojpeg.h']]], ++ ['tjbufsizeyuv2_17',['tjBufSizeYUV2',['../group___turbo_j_p_e_g.html#ga5e5aac9e8bcf17049279301e2466474c',1,'turbojpeg.h']]], + ['tjcompress2_18',['tjCompress2',['../group___turbo_j_p_e_g.html#gafbdce0112fd78fd38efae841443a9bcf',1,'turbojpeg.h']]], +- ['tjcompressfromyuv_19',['tjCompressFromYUV',['../group___turbo_j_p_e_g.html#ga7622a459b79aa1007e005b58783f875b',1,'turbojpeg.h']]], ++ ['tjcompressfromyuv_19',['tjCompressFromYUV',['../group___turbo_j_p_e_g.html#gab40f5096a72fd7e5bda9d6b58fa37e2e',1,'turbojpeg.h']]], + ['tjcompressfromyuvplanes_20',['tjCompressFromYUVPlanes',['../group___turbo_j_p_e_g.html#ga29ec5dfbd2d84b8724e951d6fa0d5d9e',1,'turbojpeg.h']]], + ['tjcs_21',['TJCS',['../group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720',1,'turbojpeg.h']]], + ['tjcs_5fcmyk_22',['TJCS_CMYK',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a6c8b636152ac8195b869587db315ee53',1,'turbojpeg.h']]], +@@ -19,14 +19,14 @@ var searchData= + ['tjcs_5frgb_24',['TJCS_RGB',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a677cb7ccb85c4038ac41964a2e09e555',1,'turbojpeg.h']]], + ['tjcs_5fycbcr_25',['TJCS_YCbCr',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a7389b8f65bb387ffedce3efd0d78ec75',1,'turbojpeg.h']]], + ['tjcs_5fycck_26',['TJCS_YCCK',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e',1,'turbojpeg.h']]], +- ['tjdecodeyuv_27',['tjDecodeYUV',['../group___turbo_j_p_e_g.html#ga70abbf38f77a26fd6da8813bef96f695',1,'turbojpeg.h']]], ++ ['tjdecodeyuv_27',['tjDecodeYUV',['../group___turbo_j_p_e_g.html#ga97c2cedc1e2bade15a84164c94e503c1',1,'turbojpeg.h']]], + ['tjdecodeyuvplanes_28',['tjDecodeYUVPlanes',['../group___turbo_j_p_e_g.html#ga10e837c07fa9d25770565b237d3898d9',1,'turbojpeg.h']]], + ['tjdecompress2_29',['tjDecompress2',['../group___turbo_j_p_e_g.html#gae9eccef8b682a48f43a9117c231ed013',1,'turbojpeg.h']]], + ['tjdecompressheader3_30',['tjDecompressHeader3',['../group___turbo_j_p_e_g.html#ga0595681096bba7199cc6f3533cb25f77',1,'turbojpeg.h']]], +- ['tjdecompresstoyuv2_31',['tjDecompressToYUV2',['../group___turbo_j_p_e_g.html#ga04d1e839ff9a0860dd1475cff78d3364',1,'turbojpeg.h']]], ++ ['tjdecompresstoyuv2_31',['tjDecompressToYUV2',['../group___turbo_j_p_e_g.html#ga5a3093e325598c17a9f004323af6fafa',1,'turbojpeg.h']]], + ['tjdecompresstoyuvplanes_32',['tjDecompressToYUVPlanes',['../group___turbo_j_p_e_g.html#gaa59f901a5258ada5bd0185ad59368540',1,'turbojpeg.h']]], + ['tjdestroy_33',['tjDestroy',['../group___turbo_j_p_e_g.html#ga75f355fa27225ba1a4ee392c852394d2',1,'turbojpeg.h']]], +- ['tjencodeyuv3_34',['tjEncodeYUV3',['../group___turbo_j_p_e_g.html#gac519b922cdf446e97d0cdcba513636bf',1,'turbojpeg.h']]], ++ ['tjencodeyuv3_34',['tjEncodeYUV3',['../group___turbo_j_p_e_g.html#ga5d619e0a02b71e05a8dffb764f6d7a64',1,'turbojpeg.h']]], + ['tjencodeyuvplanes_35',['tjEncodeYUVPlanes',['../group___turbo_j_p_e_g.html#gae2d04c72457fe7f4d60cf78ab1b1feb1',1,'turbojpeg.h']]], + ['tjerr_36',['TJERR',['../group___turbo_j_p_e_g.html#gafbc17cfa57d0d5d11fea35ac025950fe',1,'turbojpeg.h']]], + ['tjerr_5ffatal_37',['TJERR_FATAL',['../group___turbo_j_p_e_g.html#ggafbc17cfa57d0d5d11fea35ac025950feafc9cceeada13122b09e4851e3788039a',1,'turbojpeg.h']]], +@@ -42,7 +42,7 @@ var searchData= + ['tjfree_47',['tjFree',['../group___turbo_j_p_e_g.html#gaea863d2da0cdb609563aabdf9196514b',1,'turbojpeg.h']]], + ['tjgeterrorcode_48',['tjGetErrorCode',['../group___turbo_j_p_e_g.html#ga414feeffbf860ebd31c745df203de410',1,'turbojpeg.h']]], + ['tjgeterrorstr2_49',['tjGetErrorStr2',['../group___turbo_j_p_e_g.html#ga1ead8574f9f39fbafc6b497124e7aafa',1,'turbojpeg.h']]], +- ['tjgetscalingfactors_50',['tjGetScalingFactors',['../group___turbo_j_p_e_g.html#gac3854476006b10787bd128f7ede48057',1,'turbojpeg.h']]], ++ ['tjgetscalingfactors_50',['tjGetScalingFactors',['../group___turbo_j_p_e_g.html#ga193d0977b3b9966d53a6c402e90899b1',1,'turbojpeg.h']]], + ['tjgreenoffset_51',['tjGreenOffset',['../group___turbo_j_p_e_g.html#ga82d6e35da441112a411da41923c0ba2f',1,'turbojpeg.h']]], + ['tjhandle_52',['tjhandle',['../group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763',1,'turbojpeg.h']]], + ['tjinitcompress_53',['tjInitCompress',['../group___turbo_j_p_e_g.html#ga9d63a05fc6d813f4aae06107041a37e8',1,'turbojpeg.h']]], +@@ -82,7 +82,7 @@ var searchData= + ['tjsaveimage_87',['tjSaveImage',['../group___turbo_j_p_e_g.html#ga6f445b22d8933ae4815b3370a538d879',1,'turbojpeg.h']]], + ['tjscaled_88',['TJSCALED',['../group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df',1,'turbojpeg.h']]], + ['tjscalingfactor_89',['tjscalingfactor',['../structtjscalingfactor.html',1,'']]], +- ['tjtransform_90',['tjtransform',['../structtjtransform.html',1,'tjtransform'],['../group___turbo_j_p_e_g.html#ga504805ec0161f1b505397ca0118bf8fd',1,'tjtransform(): turbojpeg.h'],['../group___turbo_j_p_e_g.html#ga9cb8abf4cc91881e04a0329b2270be25',1,'tjTransform(tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, int n, unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *transforms, int flags): turbojpeg.h']]], ++ ['tjtransform_90',['tjtransform',['../structtjtransform.html',1,'tjtransform'],['../group___turbo_j_p_e_g.html#ga9cb8abf4cc91881e04a0329b2270be25',1,'tjTransform(tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, int n, unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *transforms, int flags): turbojpeg.h'],['../group___turbo_j_p_e_g.html#ga504805ec0161f1b505397ca0118bf8fd',1,'tjtransform(): turbojpeg.h']]], + ['tjxop_91',['TJXOP',['../group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866',1,'turbojpeg.h']]], + ['tjxop_5fhflip_92',['TJXOP_HFLIP',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aa0df69776caa30f0fa28e26332d311ce',1,'turbojpeg.h']]], + ['tjxop_5fnone_93',['TJXOP_NONE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aad88c0366cd3f7d0eac9d7a3fa1c2c27',1,'turbojpeg.h']]], +diff --git a/doc/html/search/functions_0.js b/doc/html/search/functions_0.js +index 4a9ea5b..a608dab 100644 +--- a/doc/html/search/functions_0.js ++++ b/doc/html/search/functions_0.js +@@ -2,23 +2,23 @@ var searchData= + [ + ['tjalloc_114',['tjAlloc',['../group___turbo_j_p_e_g.html#gaec627dd4c5f30b7a775a7aea3bec5d83',1,'turbojpeg.h']]], + ['tjbufsize_115',['tjBufSize',['../group___turbo_j_p_e_g.html#ga67ac12fee79073242cb216e07c9f1f90',1,'turbojpeg.h']]], +- ['tjbufsizeyuv2_116',['tjBufSizeYUV2',['../group___turbo_j_p_e_g.html#ga2be2b9969d4df9ecce9b05deed273194',1,'turbojpeg.h']]], ++ ['tjbufsizeyuv2_116',['tjBufSizeYUV2',['../group___turbo_j_p_e_g.html#ga5e5aac9e8bcf17049279301e2466474c',1,'turbojpeg.h']]], + ['tjcompress2_117',['tjCompress2',['../group___turbo_j_p_e_g.html#gafbdce0112fd78fd38efae841443a9bcf',1,'turbojpeg.h']]], +- ['tjcompressfromyuv_118',['tjCompressFromYUV',['../group___turbo_j_p_e_g.html#ga7622a459b79aa1007e005b58783f875b',1,'turbojpeg.h']]], ++ ['tjcompressfromyuv_118',['tjCompressFromYUV',['../group___turbo_j_p_e_g.html#gab40f5096a72fd7e5bda9d6b58fa37e2e',1,'turbojpeg.h']]], + ['tjcompressfromyuvplanes_119',['tjCompressFromYUVPlanes',['../group___turbo_j_p_e_g.html#ga29ec5dfbd2d84b8724e951d6fa0d5d9e',1,'turbojpeg.h']]], +- ['tjdecodeyuv_120',['tjDecodeYUV',['../group___turbo_j_p_e_g.html#ga70abbf38f77a26fd6da8813bef96f695',1,'turbojpeg.h']]], ++ ['tjdecodeyuv_120',['tjDecodeYUV',['../group___turbo_j_p_e_g.html#ga97c2cedc1e2bade15a84164c94e503c1',1,'turbojpeg.h']]], + ['tjdecodeyuvplanes_121',['tjDecodeYUVPlanes',['../group___turbo_j_p_e_g.html#ga10e837c07fa9d25770565b237d3898d9',1,'turbojpeg.h']]], + ['tjdecompress2_122',['tjDecompress2',['../group___turbo_j_p_e_g.html#gae9eccef8b682a48f43a9117c231ed013',1,'turbojpeg.h']]], + ['tjdecompressheader3_123',['tjDecompressHeader3',['../group___turbo_j_p_e_g.html#ga0595681096bba7199cc6f3533cb25f77',1,'turbojpeg.h']]], +- ['tjdecompresstoyuv2_124',['tjDecompressToYUV2',['../group___turbo_j_p_e_g.html#ga04d1e839ff9a0860dd1475cff78d3364',1,'turbojpeg.h']]], ++ ['tjdecompresstoyuv2_124',['tjDecompressToYUV2',['../group___turbo_j_p_e_g.html#ga5a3093e325598c17a9f004323af6fafa',1,'turbojpeg.h']]], + ['tjdecompresstoyuvplanes_125',['tjDecompressToYUVPlanes',['../group___turbo_j_p_e_g.html#gaa59f901a5258ada5bd0185ad59368540',1,'turbojpeg.h']]], + ['tjdestroy_126',['tjDestroy',['../group___turbo_j_p_e_g.html#ga75f355fa27225ba1a4ee392c852394d2',1,'turbojpeg.h']]], +- ['tjencodeyuv3_127',['tjEncodeYUV3',['../group___turbo_j_p_e_g.html#gac519b922cdf446e97d0cdcba513636bf',1,'turbojpeg.h']]], ++ ['tjencodeyuv3_127',['tjEncodeYUV3',['../group___turbo_j_p_e_g.html#ga5d619e0a02b71e05a8dffb764f6d7a64',1,'turbojpeg.h']]], + ['tjencodeyuvplanes_128',['tjEncodeYUVPlanes',['../group___turbo_j_p_e_g.html#gae2d04c72457fe7f4d60cf78ab1b1feb1',1,'turbojpeg.h']]], + ['tjfree_129',['tjFree',['../group___turbo_j_p_e_g.html#gaea863d2da0cdb609563aabdf9196514b',1,'turbojpeg.h']]], + ['tjgeterrorcode_130',['tjGetErrorCode',['../group___turbo_j_p_e_g.html#ga414feeffbf860ebd31c745df203de410',1,'turbojpeg.h']]], + ['tjgeterrorstr2_131',['tjGetErrorStr2',['../group___turbo_j_p_e_g.html#ga1ead8574f9f39fbafc6b497124e7aafa',1,'turbojpeg.h']]], +- ['tjgetscalingfactors_132',['tjGetScalingFactors',['../group___turbo_j_p_e_g.html#gac3854476006b10787bd128f7ede48057',1,'turbojpeg.h']]], ++ ['tjgetscalingfactors_132',['tjGetScalingFactors',['../group___turbo_j_p_e_g.html#ga193d0977b3b9966d53a6c402e90899b1',1,'turbojpeg.h']]], + ['tjinitcompress_133',['tjInitCompress',['../group___turbo_j_p_e_g.html#ga9d63a05fc6d813f4aae06107041a37e8',1,'turbojpeg.h']]], + ['tjinitdecompress_134',['tjInitDecompress',['../group___turbo_j_p_e_g.html#ga52300eac3f3d9ef4bab303bc244f62d3',1,'turbojpeg.h']]], + ['tjinittransform_135',['tjInitTransform',['../group___turbo_j_p_e_g.html#ga928beff6ac248ceadf01089fc6b41957',1,'turbojpeg.h']]], +diff --git a/doc/html/structtjtransform.html b/doc/html/structtjtransform.html +index ba78980..9ff248d 100644 +--- a/doc/html/structtjtransform.html ++++ b/doc/html/structtjtransform.html +@@ -84,7 +84,7 @@ Data Fields +  One of the transform operations. More...
+   + int options +- The bitwise OR of one of more of the transform options. More...
++ The bitwise OR of one of more of the transform options. More...
+   + void * data +  Arbitrary data that can be accessed within the body of the callback function. More...
+@@ -115,7 +115,7 @@ Data Fields + coeffspointer to an array of transformed DCT coefficients. (NOTE: this pointer is not guaranteed to be valid once the callback returns, so applications wishing to hand off the DCT coefficients to another function or library should make a copy of them within the body of the callback.) + arrayRegiontjregion structure containing the width and height of the array pointed to by coeffs as well as its offset relative to the component plane. TurboJPEG implementations may choose to split each component plane into multiple DCT coefficient arrays and call the callback function once for each array. + planeRegiontjregion structure containing the width and height of the component plane to which coeffs belongs +- componentIDID number of the component plane to which coeffs belongs (Y, Cb, and Cr have, respectively, ID's of 0, 1, and 2 in typical JPEG images.) ++ componentIDID number of the component plane to which coeffs belongs. (Y, Cb, and Cr have, respectively, ID's of 0, 1, and 2 in typical JPEG images.) + transformIDID number of the transformed image to which coeffs belongs. This is the same as the index of the transform in the transforms array that was passed to tjTransform(). + transforma pointer to a tjtransform structure that specifies the parameters and/or cropping region for this transform + +@@ -169,7 +169,7 @@ Data Fields + +
+ +-

The bitwise OR of one of more of the transform options.

++

The bitwise OR of one of more of the transform options.

+ +
+ +diff --git a/java/TJBench.java b/java/TJBench.java +index 3a061d8..e95541d 100644 +--- a/java/TJBench.java ++++ b/java/TJBench.java +@@ -1,6 +1,6 @@ + /* +- * Copyright (C)2009-2014, 2016-2019, 2021 D. R. Commander. +- * All Rights Reserved. ++ * Copyright (C)2009-2014, 2016-2019, 2021, 2023 D. R. Commander. ++ * All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: +@@ -37,7 +37,7 @@ final class TJBench { + + private TJBench() {} + +- private static int flags = 0, quiet = 0, pf = TJ.PF_BGR, yuvPad = 1; ++ private static int flags = 0, quiet = 0, pf = TJ.PF_BGR, yuvAlign = 1; + private static boolean compOnly, decompOnly, doTile, doYUV, write = true; + + static final String[] PIXFORMATSTR = { +@@ -192,7 +192,7 @@ final class TJBench { + int width = doTile ? tilew : scaledw; + int height = doTile ? tileh : scaledh; + +- yuvImage = new YUVImage(width, yuvPad, height, subsamp); ++ yuvImage = new YUVImage(width, yuvAlign, height, subsamp); + Arrays.fill(yuvImage.getBuf(), (byte)127); + } + +@@ -212,7 +212,8 @@ final class TJBench { + tjd.setSourceImage(jpegBuf[tile], jpegSize[tile]); + } catch (TJException e) { handleTJException(e); } + if (doYUV) { +- yuvImage.setBuf(yuvImage.getBuf(), width, yuvPad, height, subsamp); ++ yuvImage.setBuf(yuvImage.getBuf(), width, yuvAlign, height, ++ subsamp); + try { + tjd.decompressToYUV(yuvImage, flags); + } catch (TJException e) { handleTJException(e); } +@@ -372,7 +373,7 @@ final class TJBench { + tjc.setSubsamp(subsamp); + + if (doYUV) { +- yuvImage = new YUVImage(tilew, yuvPad, tileh, subsamp); ++ yuvImage = new YUVImage(tilew, yuvAlign, tileh, subsamp); + Arrays.fill(yuvImage.getBuf(), (byte)127); + } + +@@ -393,7 +394,7 @@ final class TJBench { + if (doYUV) { + double startEncode = getTime(); + +- yuvImage.setBuf(yuvImage.getBuf(), width, yuvPad, height, ++ yuvImage.setBuf(yuvImage.getBuf(), width, yuvAlign, height, + subsamp); + tjc.encodeYUV(yuvImage, flags); + if (iter >= 0) +@@ -524,7 +525,7 @@ final class TJBench { + + if (quiet == 1) { + System.out.println("All performance values in Mpixels/sec\n"); +- System.out.format("Bitmap JPEG JPEG %s %s Xform Comp Decomp ", ++ System.out.format("Pixel JPEG JPEG %s %s Xform Comp Decomp ", + (doTile ? "Tile " : "Image"), + (doTile ? "Tile " : "Image")); + if (doYUV) +@@ -695,34 +696,30 @@ final class TJBench { + String className = new TJBench().getClass().getName(); + + System.out.println("\nUSAGE: java " + className); +- System.out.println(" [options]\n"); ++ System.out.println(" [options]\n"); + System.out.println(" java " + className); +- System.out.println(" [options]\n"); ++ System.out.println(" [options]\n"); + System.out.println("Options:\n"); +- System.out.println("-alloc = Dynamically allocate JPEG image buffers"); +- System.out.println("-bottomup = Test bottom-up compression/decompression"); +- System.out.println("-tile = Test performance of the codec when the image is encoded as separate"); +- System.out.println(" tiles of varying sizes."); ++ System.out.println("-bottomup = Use bottom-up row order for packed-pixel source/destination buffers"); ++ System.out.println("-tile = Compress/transform the input image into separate JPEG tiles of varying"); ++ System.out.println(" sizes (useful for measuring JPEG overhead)"); + System.out.println("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb ="); +- System.out.println(" Test the specified color conversion path in the codec (default = BGR)"); +- System.out.println("-fastupsample = Use the fastest chrominance upsampling algorithm available in"); +- System.out.println(" the underlying codec"); +- System.out.println("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying"); +- System.out.println(" codec"); +- System.out.println("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the"); +- System.out.println(" underlying codec"); ++ System.out.println(" Use the specified pixel format for packed-pixel source/destination buffers"); ++ System.out.println(" [default = BGR]"); ++ System.out.println("-fastupsample = Use the fastest chrominance upsampling algorithm available"); ++ System.out.println("-fastdct = Use the fastest DCT/IDCT algorithm available"); ++ System.out.println("-accuratedct = Use the most accurate DCT/IDCT algorithm available"); + System.out.println("-progressive = Use progressive entropy coding in JPEG images generated by"); +- System.out.println(" compression and transform operations."); +- System.out.println("-subsamp = When testing JPEG compression, this option specifies the level"); +- System.out.println(" of chrominance subsampling to use ( = 444, 422, 440, 420, 411, or"); +- System.out.println(" GRAY). The default is to test Grayscale, 4:2:0, 4:2:2, and 4:4:4 in"); +- System.out.println(" sequence."); ++ System.out.println(" compression and transform operations"); ++ System.out.println("-subsamp = When compressing, use the specified level of chrominance"); ++ System.out.println(" subsampling ( = 444, 422, 440, 420, 411, or GRAY) [default = test"); ++ System.out.println(" Grayscale, 4:2:0, 4:2:2, and 4:4:4 in sequence]"); + System.out.println("-quiet = Output results in tabular rather than verbose format"); +- System.out.println("-yuv = Test YUV encoding/decoding functions"); +- System.out.println("-yuvpad

= If testing YUV encoding/decoding, this specifies the number of"); +- System.out.println(" bytes to which each row of each plane in the intermediate YUV image is"); +- System.out.println(" padded (default = 1)"); +- System.out.println("-scale M/N = Scale down the width/height of the decompressed JPEG image by a"); ++ System.out.println("-yuv = Compress from/decompress to intermediate planar YUV images"); ++ System.out.println("-yuvpad

= The number of bytes by which each row in each plane of an"); ++ System.out.println(" intermediate YUV image is evenly divisible (must be a power of 2)"); ++ System.out.println(" [default = 1]"); ++ System.out.println("-scale M/N = When decompressing, scale the width/height of the JPEG image by a"); + System.out.print(" factor of M/N (M/N = "); + for (i = 0; i < nsf; i++) { + System.out.format("%d/%d", scalingFactors[i].getNum(), +@@ -740,24 +737,24 @@ final class TJBench { + } + System.out.println(")"); + System.out.println("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 ="); +- System.out.println(" Perform the corresponding lossless transform prior to"); +- System.out.println(" decompression (these options are mutually exclusive)"); +- System.out.println("-grayscale = Perform lossless grayscale conversion prior to decompression"); +- System.out.println(" test (can be combined with the other transforms above)"); ++ System.out.println(" Perform the specified lossless transform operation on the input image"); ++ System.out.println(" prior to decompression (these operations are mutually exclusive)"); ++ System.out.println("-grayscale = Transform the input image into a grayscale JPEG image prior to"); ++ System.out.println(" decompression (can be combined with the other transform operations above)"); + System.out.println("-copynone = Do not copy any extra markers (including EXIF and ICC profile data)"); +- System.out.println(" when transforming the image."); +- System.out.println("-benchtime = Run each benchmark for at least seconds (default = 5.0)"); +- System.out.println("-warmup = Run each benchmark for seconds (default = 1.0) prior to"); ++ System.out.println(" when transforming the input image"); ++ System.out.println("-benchtime = Run each benchmark for at least seconds [default = 5.0]"); ++ System.out.println("-warmup = Run each benchmark for seconds [default = 1.0] prior to"); + System.out.println(" starting the timer, in order to prime the caches and thus improve the"); +- System.out.println(" consistency of the results."); ++ System.out.println(" consistency of the benchmark results"); + System.out.println("-componly = Stop after running compression tests. Do not test decompression."); +- System.out.println("-nowrite = Do not write reference or output images (improves consistency"); +- System.out.println(" of performance measurements.)"); ++ System.out.println("-nowrite = Do not write reference or output images (improves consistency of"); ++ System.out.println(" benchmark results)"); + System.out.println("-limitscans = Refuse to decompress or transform progressive JPEG images that"); + System.out.println(" have an unreasonably large number of scans"); + System.out.println("-stoponwarning = Immediately discontinue the current"); +- System.out.println(" compression/decompression/transform operation if the underlying codec"); +- System.out.println(" throws a warning (non-fatal error)\n"); ++ System.out.println(" compression/decompression/transform operation if a warning (non-fatal"); ++ System.out.println(" error) occurs\n"); + System.out.println("NOTE: If the quality is specified as a range (e.g. 90-100), a separate"); + System.out.println("test will be performed for all quality values in the range.\n"); + System.exit(1); +@@ -785,18 +782,18 @@ final class TJBench { + minArg = 2; + if (argv.length < minArg) + usage(); ++ String[] quals = argv[1].split("-", 2); + try { +- minQual = Integer.parseInt(argv[1]); ++ minQual = Integer.parseInt(quals[0]); + } catch (NumberFormatException e) {} + if (minQual < 1 || minQual > 100) + throw new Exception("Quality must be between 1 and 100."); +- int dashIndex = argv[1].indexOf('-'); +- if (dashIndex > 0 && argv[1].length() > dashIndex + 1) { ++ if (quals.length > 1) { + try { +- maxQual = Integer.parseInt(argv[1].substring(dashIndex + 1)); ++ maxQual = Integer.parseInt(quals[1]); + } catch (NumberFormatException e) {} + } +- if (maxQual < 1 || maxQual > 100) ++ if (maxQual < 1 || maxQual > 100 || maxQual < minQual) + maxQual = minQual; + } + +@@ -805,7 +802,7 @@ final class TJBench { + if (argv[i].equalsIgnoreCase("-tile")) { + doTile = true; xformOpt |= TJTransform.OPT_CROP; + } else if (argv[i].equalsIgnoreCase("-fastupsample")) { +- System.out.println("Using fast upsampling code\n"); ++ System.out.println("Using fastest upsampling algorithm\n"); + flags |= TJ.FLAG_FASTUPSAMPLE; + } else if (argv[i].equalsIgnoreCase("-fastdct")) { + System.out.println("Using fastest DCT/IDCT algorithm\n"); +@@ -816,6 +813,7 @@ final class TJBench { + } else if (argv[i].equalsIgnoreCase("-progressive")) { + System.out.println("Using progressive entropy coding\n"); + flags |= TJ.FLAG_PROGRESSIVE; ++ xformOpt |= TJTransform.OPT_PROGRESSIVE; + } else if (argv[i].equalsIgnoreCase("-rgb")) + pf = TJ.PF_RGB; + else if (argv[i].equalsIgnoreCase("-rgbx")) +@@ -902,7 +900,7 @@ final class TJBench { + } else + usage(); + } else if (argv[i].equalsIgnoreCase("-yuv")) { +- System.out.println("Testing YUV planar encoding/decoding\n"); ++ System.out.println("Testing planar YUV encoding/decoding\n"); + doYUV = true; + } else if (argv[i].equalsIgnoreCase("-yuvpad") && + i < argv.length - 1) { +@@ -911,8 +909,10 @@ final class TJBench { + try { + temp = Integer.parseInt(argv[++i]); + } catch (NumberFormatException e) {} +- if (temp >= 1) +- yuvPad = temp; ++ if (temp >= 1 && (temp & (temp - 1)) == 0) ++ yuvAlign = temp; ++ else ++ usage(); + } else if (argv[i].equalsIgnoreCase("-subsamp") && + i < argv.length - 1) { + i++; +@@ -928,6 +928,8 @@ final class TJBench { + subsamp = TJ.SAMP_420; + else if (argv[i].equals("411")) + subsamp = TJ.SAMP_411; ++ else ++ usage(); + } else if (argv[i].equalsIgnoreCase("-componly")) + compOnly = true; + else if (argv[i].equalsIgnoreCase("-nowrite")) +@@ -945,8 +947,9 @@ final class TJBench { + + if ((sf.getNum() != 1 || sf.getDenom() != 1) && doTile) { + System.out.println("Disabling tiled compression/decompression tests, because those tests do not"); +- System.out.println("work when scaled decompression is enabled."); ++ System.out.println("work when scaled decompression is enabled.\n"); + doTile = false; ++ xformOpt &= (~TJTransform.OPT_CROP); + } + + if (!decompOnly) { +@@ -961,7 +964,7 @@ final class TJBench { + + if (quiet == 1 && !decompOnly) { + System.out.println("All performance values in Mpixels/sec\n"); +- System.out.format("Bitmap JPEG JPEG %s %s ", ++ System.out.format("Pixel JPEG JPEG %s %s ", + (doTile ? "Tile " : "Image"), + (doTile ? "Tile " : "Image")); + if (doYUV) +diff --git a/java/TJExample.java b/java/TJExample.java +index 7859886..5ff1c52 100644 +--- a/java/TJExample.java ++++ b/java/TJExample.java +@@ -1,6 +1,6 @@ + /* +- * Copyright (C)2011-2012, 2014-2015, 2017-2018 D. R. Commander. +- * All Rights Reserved. ++ * Copyright (C)2011-2012, 2014-2015, 2017-2018, 2023 D. R. Commander. ++ * All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: +@@ -136,14 +136,11 @@ class TJExample implements TJCustomFilter { + System.out.println("-display = Display output image (Output filename need not be specified in this"); + System.out.println(" case.)\n"); + +- System.out.println("-fastupsample = Use the fastest chrominance upsampling algorithm available in"); +- System.out.println(" the underlying codec.\n"); ++ System.out.println("-fastupsample = Use the fastest chrominance upsampling algorithm available\n"); + +- System.out.println("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying"); +- System.out.println(" codec.\n"); ++ System.out.println("-fastdct = Use the fastest DCT/IDCT algorithm available\n"); + +- System.out.println("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the"); +- System.out.println(" underlying codec.\n"); ++ System.out.println("-accuratedct = Use the most accurate DCT/IDCT algorithm available\n"); + + System.exit(1); + } +diff --git a/java/TJUnitTest.java b/java/TJUnitTest.java +index 91ad5fd..20de6df 100644 +--- a/java/TJUnitTest.java ++++ b/java/TJUnitTest.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (C)2011-2018 D. R. Commander. All Rights Reserved. ++ * Copyright (C)2011-2018, 2023 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: +@@ -48,10 +48,10 @@ final class TJUnitTest { + static void usage() { + System.out.println("\nUSAGE: java " + CLASS_NAME + " [options]\n"); + System.out.println("Options:"); +- System.out.println("-yuv = test YUV encoding/decoding support"); +- System.out.println("-noyuvpad = do not pad each line of each Y, U, and V plane to the nearest"); +- System.out.println(" 4-byte boundary"); +- System.out.println("-bi = test BufferedImage support\n"); ++ System.out.println("-yuv = test YUV encoding/compression/decompression/decoding"); ++ System.out.println("-noyuvpad = do not pad each row in each Y, U, and V plane to the nearest"); ++ System.out.println(" multiple of 4 bytes"); ++ System.out.println("-bi = test BufferedImage I/O\n"); + System.exit(1); + } + +@@ -92,7 +92,7 @@ final class TJUnitTest { + }; + + private static boolean doYUV = false; +- private static int pad = 4; ++ private static int yuvAlign = 4; + private static boolean bi = false; + + private static int exitStatus = 0; +@@ -532,7 +532,7 @@ final class TJUnitTest { + int hsf = TJ.getMCUWidth(subsamp) / 8, vsf = TJ.getMCUHeight(subsamp) / 8; + int pw = pad(w, hsf), ph = pad(h, vsf); + int cw = pw / hsf, ch = ph / vsf; +- int ypitch = pad(pw, pad), uvpitch = pad(cw, pad); ++ int ypitch = pad(pw, yuvAlign), uvpitch = pad(cw, yuvAlign); + int retval = 1; + int correctsize = ypitch * ph + + (subsamp == TJ.SAMP_GRAY ? 0 : uvpitch * ch * 2); +@@ -668,7 +668,7 @@ final class TJUnitTest { + if (doYUV) { + System.out.format("%s %s -> YUV %s ... ", pfStrLong, buStrLong, + SUBNAME_LONG[subsamp]); +- YUVImage yuvImage = tjc.encodeYUV(pad, flags); ++ YUVImage yuvImage = tjc.encodeYUV(yuvAlign, flags); + if (checkBufYUV(yuvImage.getBuf(), yuvImage.getSize(), w, h, subsamp, + new TJScalingFactor(1, 1)) == 1) + System.out.print("Passed.\n"); +@@ -733,8 +733,8 @@ final class TJUnitTest { + if (!sf.isOne()) + System.out.format("%d/%d ... ", sf.getNum(), sf.getDenom()); + else System.out.print("... "); +- YUVImage yuvImage = tjd.decompressToYUV(scaledWidth, pad, scaledHeight, +- flags); ++ YUVImage yuvImage = tjd.decompressToYUV(scaledWidth, yuvAlign, ++ scaledHeight, flags); + if (checkBufYUV(yuvImage.getBuf(), yuvImage.getSize(), scaledWidth, + scaledHeight, subsamp, sf) == 1) + System.out.print("Passed.\n"); +@@ -837,6 +837,55 @@ final class TJUnitTest { + if (tjd != null) tjd.close(); + } + ++ static void overflowTest() throws Exception { ++ /* Ensure that the various buffer size methods don't overflow */ ++ int size = 0; ++ boolean exception = false; ++ ++ try { ++ exception = false; ++ size = TJ.bufSize(18919, 18919, TJ.SAMP_444); ++ } catch (Exception e) { exception = true; } ++ if (!exception || size != 0) ++ throw new Exception("TJ.bufSize() overflow"); ++ try { ++ exception = false; ++ size = TJ.bufSizeYUV(26755, 1, 26755, TJ.SAMP_444); ++ } catch (Exception e) { exception = true; } ++ if (!exception || size != 0) ++ throw new Exception("TJ.bufSizeYUV() overflow"); ++ try { ++ exception = false; ++ size = TJ.bufSizeYUV(26754, 3, 26754, TJ.SAMP_444); ++ } catch (Exception e) { exception = true; } ++ if (!exception || size != 0) ++ throw new Exception("TJ.bufSizeYUV() overflow"); ++ try { ++ exception = false; ++ size = TJ.bufSizeYUV(26754, -1, 26754, TJ.SAMP_444); ++ } catch (Exception e) { exception = true; } ++ if (!exception || size != 0) ++ throw new Exception("TJ.bufSizeYUV() overflow"); ++ try { ++ exception = false; ++ size = TJ.planeSizeYUV(0, 46341, 0, 46341, TJ.SAMP_444); ++ } catch (Exception e) { exception = true; } ++ if (!exception || size != 0) ++ throw new Exception("TJ.planeSizeYUV() overflow"); ++ try { ++ exception = false; ++ size = TJ.planeWidth(0, Integer.MAX_VALUE, TJ.SAMP_420); ++ } catch (Exception e) { exception = true; } ++ if (!exception || size != 0) ++ throw new Exception("TJ.planeWidth() overflow"); ++ try { ++ exception = false; ++ size = TJ.planeHeight(0, Integer.MAX_VALUE, TJ.SAMP_420); ++ } catch (Exception e) { exception = true; } ++ if (!exception || size != 0) ++ throw new Exception("TJ.planeHeight() overflow"); ++ } ++ + static void bufSizeTest() throws Exception { + int w, h, i, subsamp; + byte[] srcBuf, dstBuf = null; +@@ -855,7 +904,7 @@ final class TJUnitTest { + System.out.format("%04d x %04d\b\b\b\b\b\b\b\b\b\b\b", w, h); + srcBuf = new byte[w * h * 4]; + if (doYUV) +- dstImage = new YUVImage(w, pad, h, subsamp); ++ dstImage = new YUVImage(w, yuvAlign, h, subsamp); + else + dstBuf = new byte[TJ.bufSize(w, h, subsamp)]; + for (i = 0; i < w * h * 4; i++) { +@@ -871,7 +920,7 @@ final class TJUnitTest { + + srcBuf = new byte[h * w * 4]; + if (doYUV) +- dstImage = new YUVImage(h, pad, w, subsamp); ++ dstImage = new YUVImage(h, yuvAlign, w, subsamp); + else + dstBuf = new byte[TJ.bufSize(h, w, subsamp)]; + for (i = 0; i < h * w * 4; i++) { +@@ -903,7 +952,7 @@ final class TJUnitTest { + if (argv[i].equalsIgnoreCase("-yuv")) + doYUV = true; + else if (argv[i].equalsIgnoreCase("-noyuvpad")) +- pad = 1; ++ yuvAlign = 1; + else if (argv[i].equalsIgnoreCase("-bi")) { + bi = true; + testName = "javabitest"; +@@ -912,6 +961,7 @@ final class TJUnitTest { + } + if (doYUV) + FORMATS_4BYTE[4] = -1; ++ overflowTest(); + doTest(35, 39, bi ? FORMATS_3BYTEBI : FORMATS_3BYTE, TJ.SAMP_444, + testName); + doTest(39, 41, bi ? FORMATS_4BYTEBI : FORMATS_4BYTE, TJ.SAMP_444, +diff --git a/java/doc/index-all.html b/java/doc/index-all.html +index 5def53e..0224536 100644 +--- a/java/doc/index-all.html ++++ b/java/doc/index-all.html +@@ -74,8 +74,9 @@ + +

bufSizeYUV(int, int, int, int) - Static method in class org.libjpegturbo.turbojpeg.TJ
+
+-
Returns the size of the buffer (in bytes) required to hold a YUV planar +- image with the given width, height, and level of chrominance subsampling.
++
Returns the size of the buffer (in bytes) required to hold a unified ++ planar YUV image with the given width, height, and level of chrominance ++ subsampling.
+
+
bufSizeYUV(int, int, int) - Static method in class org.libjpegturbo.turbojpeg.TJ
+
+@@ -103,13 +104,14 @@ +
+
compress(byte[], int) - Method in class org.libjpegturbo.turbojpeg.TJCompressor
+
+-
Compress the uncompressed source image associated with this compressor +- instance and output a JPEG image to the given destination buffer.
++
Compress the packed-pixel or planar YUV source image associated with this ++ compressor instance and output a JPEG image to the given destination ++ buffer.
+
+
compress(int) - Method in class org.libjpegturbo.turbojpeg.TJCompressor
+
+-
Compress the uncompressed source image associated with this compressor +- instance and return a buffer containing a JPEG image.
++
Compress the packed-pixel or planar YUV source image associated with this ++ compressor instance and return a buffer containing a JPEG image.
+
+
compress(BufferedImage, byte[], int) - Method in class org.libjpegturbo.turbojpeg.TJCompressor
+
+@@ -161,9 +163,9 @@ +
+
decompress(byte[], int, int, int, int, int, int, int) - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+-
Decompress the JPEG source image or decode the YUV source image associated +- with this decompressor instance and output a grayscale, RGB, or CMYK image +- to the given destination buffer.
++
Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and output a packed-pixel ++ grayscale, RGB, or CMYK image to the given destination buffer.
+
+
decompress(byte[], int, int, int, int, int) - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+@@ -174,32 +176,35 @@ +
+
decompress(int, int, int, int, int) - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+-
Decompress the JPEG source image associated with this decompressor +- instance and return a buffer containing the decompressed image.
++
Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and return a buffer containing ++ the packed-pixel decompressed image.
+
+
decompress(int[], int, int, int, int, int, int, int) - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+-
Decompress the JPEG source image or decode the YUV source image associated +- with this decompressor instance and output a grayscale, RGB, or CMYK image +- to the given destination buffer.
++
Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and output a packed-pixel ++ grayscale, RGB, or CMYK image to the given destination buffer.
+
+
decompress(BufferedImage, int) - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+-
Decompress the JPEG source image or decode the YUV source image associated +- with this decompressor instance and output a decompressed/decoded image to +- the given BufferedImage instance.
++
Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and output a packed-pixel ++ decompressed/decoded image to the given BufferedImage ++ instance.
+
+
decompress(int, int, int, int) - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+-
Decompress the JPEG source image or decode the YUV source image associated +- with this decompressor instance and return a BufferedImage +- instance containing the decompressed/decoded image.
++
Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and return a ++ BufferedImage instance containing the packed-pixel ++ decompressed/decoded image.
+
+
decompressToYUV(YUVImage, int) - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+
Decompress the JPEG source image associated with this decompressor +- instance into a YUV planar image and store it in the given +- YUVImage instance.
++ instance into a planar YUV image and store it in the given ++ YUVImage instance. +
+
decompressToYUV(byte[], int) - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+@@ -211,13 +216,13 @@ +
+
Decompress the JPEG source image associated with this decompressor + instance into a set of Y, U (Cb), and V (Cr) image planes and return a +- YUVImage instance containing the decompressed image planes.
++ YUVImage instance containing the decompressed image planes. +
+
decompressToYUV(int, int, int, int) - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+
Decompress the JPEG source image associated with this decompressor +- instance into a unified YUV planar image buffer and return a +- YUVImage instance containing the decompressed image.
++ instance into a unified planar YUV image and return a YUVImage ++ instance containing the decompressed image. +
+
decompressToYUV(int) - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+@@ -233,9 +238,9 @@ +
+
encodeYUV(YUVImage, int) - Method in class org.libjpegturbo.turbojpeg.TJCompressor
+
+-
Encode the uncompressed source image associated with this compressor +- instance into a YUV planar image and store it in the given +- YUVImage instance.
++
Encode the packed-pixel source image associated with this compressor ++ instance into a planar YUV image and store it in the given ++ YUVImage instance.
+
+
encodeYUV(byte[], int) - Method in class org.libjpegturbo.turbojpeg.TJCompressor
+
+@@ -245,15 +250,15 @@ +
+
encodeYUV(int, int) - Method in class org.libjpegturbo.turbojpeg.TJCompressor
+
+-
Encode the uncompressed source image associated with this compressor +- instance into a unified YUV planar image buffer and return a +- YUVImage instance containing the encoded image.
++
Encode the packed-pixel source image associated with this compressor ++ instance into a unified planar YUV image and return a YUVImage ++ instance containing the encoded image.
+
+
encodeYUV(int[], int) - Method in class org.libjpegturbo.turbojpeg.TJCompressor
+
+-
Encode the uncompressed source image associated with this compressor ++
Encode the packed-pixel source image associated with this compressor + instance into separate Y, U (Cb), and V (Cr) image planes and return a +- YUVImage instance containing the encoded image planes.
++ YUVImage instance containing the encoded image planes.
+
+
encodeYUV(int) - Method in class org.libjpegturbo.turbojpeg.TJCompressor
+
+@@ -288,8 +293,8 @@ +
+
ERR_WARNING - Static variable in class org.libjpegturbo.turbojpeg.TJ
+
+-
The error was non-fatal and recoverable, but the image may still be +- corrupt.
++
The error was non-fatal and recoverable, but the destination image may ++ still be corrupt.
+
+
+ +@@ -303,23 +308,21 @@ +
 
+
FLAG_ACCURATEDCT - Static variable in class org.libjpegturbo.turbojpeg.TJ
+
+-
Use the most accurate DCT/IDCT algorithm available in the underlying +- codec.
++
Use the most accurate DCT/IDCT algorithm available.
+
+
FLAG_BOTTOMUP - Static variable in class org.libjpegturbo.turbojpeg.TJ
+
+-
The uncompressed source/destination image is stored in bottom-up (Windows, +- OpenGL) order, not top-down (X11) order.
++
Rows in the packed-pixel source/destination image are stored in bottom-up ++ (Windows, OpenGL) order rather than in top-down (X11) order.
+
+
FLAG_FASTDCT - Static variable in class org.libjpegturbo.turbojpeg.TJ
+
+-
Use the fastest DCT/IDCT algorithm available in the underlying codec.
++
Use the fastest DCT/IDCT algorithm available.
+
+
FLAG_FASTUPSAMPLE - Static variable in class org.libjpegturbo.turbojpeg.TJ
+
+
When decompressing an image that was compressed using chrominance +- subsampling, use the fastest chrominance upsampling algorithm available in +- the underlying codec.
++ subsampling, use the fastest chrominance upsampling algorithm available. +
+
FLAG_FORCEMMX - Static variable in class org.libjpegturbo.turbojpeg.TJ
+
+@@ -350,7 +353,7 @@ +
FLAG_STOPONWARNING - Static variable in class org.libjpegturbo.turbojpeg.TJ
+
+
Immediately discontinue the current compression/decompression/transform +- operation if the underlying codec throws a warning (non-fatal error).
++ operation if a warning (non-fatal error) occurs. +
+
+ +@@ -370,8 +373,8 @@ +
+
getBuf() - Method in class org.libjpegturbo.turbojpeg.YUVImage
+
+-
Returns the YUV image buffer (if this image is stored in a unified +- buffer rather than separate image planes.)
++
Returns the YUV buffer (if this image is stored in a unified buffer rather ++ than separate image planes.)
+
+
getColorspace() - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+@@ -408,7 +411,7 @@ +
+
getJPEGBuf() - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+-
Returns the JPEG image buffer associated with this decompressor instance.
++
Returns the JPEG buffer associated with this decompressor instance.
+
+
getJPEGSize() - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+@@ -436,7 +439,7 @@ +
+
getPad() - Method in class org.libjpegturbo.turbojpeg.YUVImage
+
+-
Returns the line padding used in the YUV image buffer (if this image is ++
Returns the row alignment (in bytes) of the YUV buffer (if this image is + stored in a unified buffer rather than separate image planes.)
+
+
getPixelSize(int) - Static method in class org.libjpegturbo.turbojpeg.TJ
+@@ -470,17 +473,17 @@ + +
getScalingFactors() - Static method in class org.libjpegturbo.turbojpeg.TJ
+
+-
Returns a list of fractional scaling factors that the JPEG decompressor in +- this implementation of TurboJPEG supports.
++
Returns a list of fractional scaling factors that the JPEG decompressor ++ supports.
+
+
getSize() - Method in class org.libjpegturbo.turbojpeg.YUVImage
+
+-
Returns the size (in bytes) of the YUV image buffer (if this image is +- stored in a unified buffer rather than separate image planes.)
++
Returns the size (in bytes) of the YUV buffer (if this image is stored in ++ a unified buffer rather than separate image planes.)
+
+
getStrides() - Method in class org.libjpegturbo.turbojpeg.YUVImage
+
+-
Returns the number of bytes per line of each plane in the YUV image.
++
Returns the number of bytes per row of each plane in the YUV image.
+
+
getSubsamp() - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+@@ -494,7 +497,7 @@ +
getTransformedSizes() - Method in class org.libjpegturbo.turbojpeg.TJTransformer
+
+
Returns an array containing the sizes of the transformed JPEG images +- generated by the most recent transform operation.
++ (in bytes) generated by the most recent transform operation. +
+
getWidth() - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+@@ -578,7 +581,7 @@ +
+
op - Variable in class org.libjpegturbo.turbojpeg.TJTransform
+
+-
Transform operation (one of OP_*)
++
Transform operation (one of OP_*)
+
+
OP_HFLIP - Static variable in class org.libjpegturbo.turbojpeg.TJTransform
+
+@@ -616,7 +619,7 @@ +
OPT_COPYNONE - Static variable in class org.libjpegturbo.turbojpeg.TJTransform
+
+
This option will prevent TJTransformer.transform() from copying any extra markers (including EXIF +- and ICC profile data) from the source image to the output image.
++ and ICC profile data) from the source image to the destination image. +
+
OPT_CROP - Static variable in class org.libjpegturbo.turbojpeg.TJTransform
+
+@@ -624,8 +627,8 @@ +
+
OPT_GRAY - Static variable in class org.libjpegturbo.turbojpeg.TJTransform
+
+-
This option will discard the color data in the input image and produce +- a grayscale output image.
++
This option will discard the color data in the source image and produce a ++ grayscale destination image.
+
+
OPT_NOOUTPUT - Static variable in class org.libjpegturbo.turbojpeg.TJTransform
+
+@@ -639,7 +642,7 @@ +
+
OPT_PROGRESSIVE - Static variable in class org.libjpegturbo.turbojpeg.TJTransform
+
+-
This option will enable progressive entropy coding in the output image ++
This option will enable progressive entropy coding in the JPEG image + generated by this particular transform.
+
+
OPT_TRIM - Static variable in class org.libjpegturbo.turbojpeg.TJTransform
+@@ -649,7 +652,8 @@ +
+
options - Variable in class org.libjpegturbo.turbojpeg.TJTransform
+
+-
Transform options (bitwise OR of one or more of OPT_*)
++
Transform options (bitwise OR of one or more of ++ OPT_*)
+
+
org.libjpegturbo.turbojpeg - package org.libjpegturbo.turbojpeg
+
 
+@@ -756,7 +760,7 @@ + +
setBuf(byte[], int, int, int, int) - Method in class org.libjpegturbo.turbojpeg.YUVImage
+
+-
Assign a unified image buffer to this YUVImage instance.
++
Assign a unified buffer to this YUVImage instance.
+
+
setJPEGImage(byte[], int) - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+@@ -770,7 +774,7 @@ +
+
setSourceImage(byte[], int, int, int, int, int, int) - Method in class org.libjpegturbo.turbojpeg.TJCompressor
+
+-
Associate an uncompressed RGB, grayscale, or CMYK source image with this ++
Associate a packed-pixel RGB, grayscale, or CMYK source image with this + compressor instance.
+
+
setSourceImage(byte[], int, int, int, int) - Method in class org.libjpegturbo.turbojpeg.TJCompressor
+@@ -782,13 +786,12 @@ + +
setSourceImage(BufferedImage, int, int, int, int) - Method in class org.libjpegturbo.turbojpeg.TJCompressor
+
+-
Associate an uncompressed RGB or grayscale source image with this ++
Associate a packed-pixel RGB or grayscale source image with this + compressor instance.
+
+
setSourceImage(YUVImage) - Method in class org.libjpegturbo.turbojpeg.TJCompressor
+
+-
Associate an uncompressed YUV planar source image with this compressor +- instance.
++
Associate a planar YUV source image with this compressor instance.
+
+
setSourceImage(byte[], int) - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+@@ -798,7 +801,7 @@ +
+
setSourceImage(YUVImage) - Method in class org.libjpegturbo.turbojpeg.TJDecompressor
+
+-
Associate the specified YUV planar source image with this decompressor ++
Associate the specified planar YUV source image with this decompressor + instance.
+
+
setSubsamp(int) - Method in class org.libjpegturbo.turbojpeg.TJCompressor
+@@ -826,7 +829,7 @@ + +
TJCompressor(byte[], int, int, int, int, int, int) - Constructor for class org.libjpegturbo.turbojpeg.TJCompressor
+
+-
Create a TurboJPEG compressor instance and associate the uncompressed ++
Create a TurboJPEG compressor instance and associate the packed-pixel + source image stored in srcImage with the newly created + instance.
+
+@@ -839,7 +842,7 @@ + +
TJCompressor(BufferedImage, int, int, int, int) - Constructor for class org.libjpegturbo.turbojpeg.TJCompressor
+
+-
Create a TurboJPEG compressor instance and associate the uncompressed ++
Create a TurboJPEG compressor instance and associate the packed-pixel + source image stored in srcImage with the newly created + instance.
+
+@@ -858,17 +861,19 @@ +
TJDecompressor(byte[]) - Constructor for class org.libjpegturbo.turbojpeg.TJDecompressor
+
+
Create a TurboJPEG decompressor instance and associate the JPEG source +- image stored in jpegImage with the newly created instance.
++ image or "abbreviated table specification" (AKA "tables-only") datastream ++ stored in jpegImage with the newly created instance. +
+
TJDecompressor(byte[], int) - Constructor for class org.libjpegturbo.turbojpeg.TJDecompressor
+
+
Create a TurboJPEG decompressor instance and associate the JPEG source +- image of length imageSize bytes stored in +- jpegImage with the newly created instance.
++ image or "abbreviated table specification" (AKA "tables-only") datastream ++ of length imageSize bytes stored in jpegImage ++ with the newly created instance. +
+
TJDecompressor(YUVImage) - Constructor for class org.libjpegturbo.turbojpeg.TJDecompressor
+
+-
Create a TurboJPEG decompressor instance and associate the YUV planar ++
Create a TurboJPEG decompressor instance and associate the planar YUV + source image stored in yuvImage with the newly created + instance.
+
+@@ -919,25 +924,26 @@ +
TJTransformer(byte[]) - Constructor for class org.libjpegturbo.turbojpeg.TJTransformer
+
+
Create a TurboJPEG lossless transformer instance and associate the JPEG +- image stored in jpegImage with the newly created instance.
++ source image stored in jpegImage with the newly created ++ instance. +
+
TJTransformer(byte[], int) - Constructor for class org.libjpegturbo.turbojpeg.TJTransformer
+
+
Create a TurboJPEG lossless transformer instance and associate the JPEG +- image of length imageSize bytes stored in ++ source image of length imageSize bytes stored in + jpegImage with the newly created instance.
+
+
transform(byte[][], TJTransform[], int) - Method in class org.libjpegturbo.turbojpeg.TJTransformer
+
+-
Losslessly transform the JPEG image associated with this transformer +- instance into one or more JPEG images stored in the given destination +- buffers.
++
Losslessly transform the JPEG source image associated with this ++ transformer instance into one or more JPEG images stored in the given ++ destination buffers.
+
+
transform(TJTransform[], int) - Method in class org.libjpegturbo.turbojpeg.TJTransformer
+
+-
Losslessly transform the JPEG image associated with this transformer +- instance and return an array of TJDecompressor instances, each of +- which has a transformed JPEG image associated with it.
++
Losslessly transform the JPEG source image associated with this ++ transformer instance and return an array of TJDecompressor ++ instances, each of which has a transformed JPEG image associated with it.
+
+ + +@@ -945,13 +951,15 @@ + +

Y

+
++
yuvAlign - Variable in class org.libjpegturbo.turbojpeg.YUVImage
++
 
+
yuvHeight - Variable in class org.libjpegturbo.turbojpeg.YUVImage
+
 
+
yuvImage - Variable in class org.libjpegturbo.turbojpeg.TJDecompressor
+
 
+
YUVImage - Class in org.libjpegturbo.turbojpeg
+
+-
This class encapsulates a YUV planar image and the metadata ++
This class encapsulates a planar YUV image and the metadata + associated with it.
+
+
YUVImage(int, int[], int, int) - Constructor for class org.libjpegturbo.turbojpeg.YUVImage
+@@ -961,8 +969,8 @@ + +
YUVImage(int, int, int, int) - Constructor for class org.libjpegturbo.turbojpeg.YUVImage
+
+-
Create a new YUVImage instance backed by a unified image +- buffer, and allocate memory for the image buffer.
++
Create a new YUVImage instance backed by a unified buffer, ++ and allocate memory for the buffer.
+
+
YUVImage(byte[][], int[], int, int[], int, int) - Constructor for class org.libjpegturbo.turbojpeg.YUVImage
+
+@@ -971,13 +979,11 @@ +
+
YUVImage(byte[], int, int, int, int) - Constructor for class org.libjpegturbo.turbojpeg.YUVImage
+
+-
Create a new YUVImage instance from an existing unified image ++
Create a new YUVImage instance from an existing unified + buffer.
+
+
yuvOffsets - Variable in class org.libjpegturbo.turbojpeg.YUVImage
+
 
+-
yuvPad - Variable in class org.libjpegturbo.turbojpeg.YUVImage
+-
 
+
yuvPlanes - Variable in class org.libjpegturbo.turbojpeg.YUVImage
+
 
+
yuvStrides - Variable in class org.libjpegturbo.turbojpeg.YUVImage
+diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJ.html b/java/doc/org/libjpegturbo/turbojpeg/TJ.html +index 2a3de37..f57baa7 100644 +--- a/java/doc/org/libjpegturbo/turbojpeg/TJ.html ++++ b/java/doc/org/libjpegturbo/turbojpeg/TJ.html +@@ -156,36 +156,34 @@ extends java.lang.Object + + static int + ERR_WARNING +-
The error was non-fatal and recoverable, but the image may still be +- corrupt.
++
The error was non-fatal and recoverable, but the destination image may ++ still be corrupt.
+ + + + static int + FLAG_ACCURATEDCT +-
Use the most accurate DCT/IDCT algorithm available in the underlying +- codec.
++
Use the most accurate DCT/IDCT algorithm available.
+ + + + static int + FLAG_BOTTOMUP +-
The uncompressed source/destination image is stored in bottom-up (Windows, +- OpenGL) order, not top-down (X11) order.
++
Rows in the packed-pixel source/destination image are stored in bottom-up ++ (Windows, OpenGL) order rather than in top-down (X11) order.
+ + + + static int + FLAG_FASTDCT +-
Use the fastest DCT/IDCT algorithm available in the underlying codec.
++
Use the fastest DCT/IDCT algorithm available.
+ + + + static int + FLAG_FASTUPSAMPLE +
When decompressing an image that was compressed using chrominance +- subsampling, use the fastest chrominance upsampling algorithm available in +- the underlying codec.
++ subsampling, use the fastest chrominance upsampling algorithm available. + + + +@@ -230,7 +228,7 @@ extends java.lang.Object + static int + FLAG_STOPONWARNING +
Immediately discontinue the current compression/decompression/transform +- operation if the underlying codec throws a warning (non-fatal error).
++ operation if a warning (non-fatal error) occurs. + + + +@@ -402,11 +400,12 @@ extends java.lang.Object + + static int + bufSizeYUV(int width, +- int pad, ++ int align, + int height, + int subsamp) +-
Returns the size of the buffer (in bytes) required to hold a YUV planar +- image with the given width, height, and level of chrominance subsampling.
++
Returns the size of the buffer (in bytes) required to hold a unified ++ planar YUV image with the given width, height, and level of chrominance ++ subsampling.
+ + + +@@ -460,8 +459,8 @@ extends java.lang.Object + + static TJScalingFactor[] + getScalingFactors() +-
Returns a list of fractional scaling factors that the JPEG decompressor in +- this implementation of TurboJPEG supports.
++
Returns a list of fractional scaling factors that the JPEG decompressor ++ supports.
+ + + +@@ -778,8 +777,8 @@ extends java.lang.Object + vice versa, but the mapping is typically not 1:1 or reversible, nor can it + be defined with a simple formula. Thus, such a conversion is out of scope + for a codec library. However, the TurboJPEG API allows for compressing +- CMYK pixels into a YCCK JPEG image (see CS_YCCK) and +- decompressing YCCK JPEG images into CMYK pixels. ++ packed-pixel CMYK images into YCCK JPEG images (see CS_YCCK) and ++ decompressing YCCK JPEG images into packed-pixel CMYK images. +
See Also:
Constant Field Values
+ + +@@ -804,8 +803,9 @@ extends java.lang.Object +
RGB colorspace. When compressing the JPEG image, the R, G, and B + components in the source image are reordered into image planes, but no + colorspace conversion or subsampling is performed. RGB JPEG images can be +- decompressed to any of the extended RGB pixel formats or grayscale, but +- they cannot be decompressed to YUV images.
++ decompressed to packed-pixel images with any of the extended RGB or ++ grayscale pixel formats, but they cannot be decompressed to planar YUV ++ images. +
See Also:
Constant Field Values
+ + +@@ -826,10 +826,11 @@ extends java.lang.Object + transformation allowed the same signal to drive both black & white and + color televisions, but JPEG images use YCbCr primarily because it allows + the color data to be optionally subsampled for the purposes of reducing +- bandwidth or disk space. YCbCr is the most common JPEG colorspace, and +- YCbCr JPEG images can be compressed from and decompressed to any of the +- extended RGB pixel formats or grayscale, or they can be decompressed to +- YUV planar images. ++ network or disk usage. YCbCr is the most common JPEG colorspace, and ++ YCbCr JPEG images can be compressed from and decompressed to packed-pixel ++ images with any of the extended RGB or grayscale pixel formats. YCbCr ++ JPEG images can also be compressed from and decompressed to planar YUV ++ images. +
See Also:
Constant Field Values
+ + +@@ -842,9 +843,10 @@ extends java.lang.Object +
public static final int CS_GRAY
+
Grayscale colorspace. The JPEG image retains only the luminance data (Y + component), and any color data from the source image is discarded. +- Grayscale JPEG images can be compressed from and decompressed to any of +- the extended RGB pixel formats or grayscale, or they can be decompressed +- to YUV planar images.
++ Grayscale JPEG images can be compressed from and decompressed to ++ packed-pixel images with any of the extended RGB or grayscale pixel ++ formats, or they can be compressed from and decompressed to planar YUV ++ images. +
See Also:
Constant Field Values
+ + +@@ -858,7 +860,7 @@ extends java.lang.Object +
CMYK colorspace. When compressing the JPEG image, the C, M, Y, and K + components in the source image are reordered into image planes, but no + colorspace conversion or subsampling is performed. CMYK JPEG images can +- only be decompressed to CMYK pixels.
++ only be decompressed to packed-pixel images with the CMYK pixel format. +
See Also:
Constant Field Values
+ + +@@ -875,7 +877,7 @@ extends java.lang.Object + reversibly transformed into YCCK, and as with YCbCr, the chrominance + components in the YCCK pixels can be subsampled without incurring major + perceptual loss. YCCK JPEG images can only be compressed from and +- decompressed to CMYK pixels. ++ decompressed to packed-pixel images with the CMYK pixel format. +
See Also:
Constant Field Values
+ + +@@ -886,8 +888,8 @@ extends java.lang.Object +
  • +

    FLAG_BOTTOMUP

    +
    public static final int FLAG_BOTTOMUP
    +-
    The uncompressed source/destination image is stored in bottom-up (Windows, +- OpenGL) order, not top-down (X11) order.
    ++
    Rows in the packed-pixel source/destination image are stored in bottom-up ++ (Windows, OpenGL) order rather than in top-down (X11) order.
    +
    See Also:
    Constant Field Values
    +
  • + +@@ -947,10 +949,10 @@ public static final int FLAG_FORCESSE3 +

    FLAG_FASTUPSAMPLE

    +
    public static final int FLAG_FASTUPSAMPLE
    +
    When decompressing an image that was compressed using chrominance +- subsampling, use the fastest chrominance upsampling algorithm available in +- the underlying codec. The default is to use smooth upsampling, which +- creates a smooth transition between neighboring chrominance components in +- order to reduce upsampling artifacts in the decompressed image.
    ++ subsampling, use the fastest chrominance upsampling algorithm available. ++ The default is to use smooth upsampling, which creates a smooth transition ++ between neighboring chrominance components in order to reduce upsampling ++ artifacts in the decompressed image. +
    See Also:
    Constant Field Values
    + + +@@ -961,12 +963,12 @@ public static final int FLAG_FORCESSE3 +
  • +

    FLAG_FASTDCT

    +
    public static final int FLAG_FASTDCT
    +-
    Use the fastest DCT/IDCT algorithm available in the underlying codec. The +- default if this flag is not specified is implementation-specific. For +- example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast +- algorithm by default when compressing, because this has been shown to have +- only a very slight effect on accuracy, but it uses the accurate algorithm +- when decompressing, because this has been shown to have a larger effect.
    ++
    Use the fastest DCT/IDCT algorithm available. The default if this flag is ++ not specified is implementation-specific. For example, the implementation ++ of the TurboJPEG API in libjpeg-turbo uses the fast algorithm by default ++ when compressing, because this has been shown to have only a very slight ++ effect on accuracy, but it uses the accurate algorithm when decompressing, ++ because this has been shown to have a larger effect.
    +
    See Also:
    Constant Field Values
    +
  • + +@@ -977,13 +979,12 @@ public static final int FLAG_FORCESSE3 +
  • +

    FLAG_ACCURATEDCT

    +
    public static final int FLAG_ACCURATEDCT
    +-
    Use the most accurate DCT/IDCT algorithm available in the underlying +- codec. The default if this flag is not specified is +- implementation-specific. For example, the implementation of TurboJPEG for +- libjpeg[-turbo] uses the fast algorithm by default when compressing, +- because this has been shown to have only a very slight effect on accuracy, +- but it uses the accurate algorithm when decompressing, because this has +- been shown to have a larger effect.
    ++
    Use the most accurate DCT/IDCT algorithm available. The default if this ++ flag is not specified is implementation-specific. For example, the ++ implementation of the TurboJPEG API in libjpeg-turbo uses the fast ++ algorithm by default when compressing, because this has been shown to have ++ only a very slight effect on accuracy, but it uses the accurate algorithm ++ when decompressing, because this has been shown to have a larger effect.
    +
    See Also:
    Constant Field Values
    +
  • + +@@ -995,14 +996,13 @@ public static final int FLAG_FORCESSE3 +

    FLAG_STOPONWARNING

    +
    public static final int FLAG_STOPONWARNING
    +
    Immediately discontinue the current compression/decompression/transform +- operation if the underlying codec throws a warning (non-fatal error). The +- default behavior is to allow the operation to complete unless a fatal +- error is encountered. ++ operation if a warning (non-fatal error) occurs. The default behavior is ++ to allow the operation to complete unless a fatal error is encountered. +

    + NOTE: due to the design of the TurboJPEG Java API, only certain methods + (specifically, TJDecompressor.decompress*() methods +- with a void return type) will complete and leave the output image in a +- fully recoverable state after a non-fatal error occurs.

    ++ with a void return type) will complete and leave the destination image in ++ a fully recoverable state after a non-fatal error occurs. +
    See Also:
    Constant Field Values
    + + +@@ -1055,13 +1055,13 @@ public static final int FLAG_FORCESSE3 +
  • +

    ERR_WARNING

    +
    public static final int ERR_WARNING
    +-
    The error was non-fatal and recoverable, but the image may still be +- corrupt. ++
    The error was non-fatal and recoverable, but the destination image may ++ still be corrupt. +

    + NOTE: due to the design of the TurboJPEG Java API, only certain methods + (specifically, TJDecompressor.decompress*() methods +- with a void return type) will complete and leave the output image in a +- fully recoverable state after a non-fatal error occurs.

    ++ with a void return type) will complete and leave the destination image in ++ a fully recoverable state after a non-fatal error occurs.
    +
    See Also:
    Constant Field Values
    +
  • + +@@ -1094,7 +1094,7 @@ public static final int FLAG_FORCESSE3 +
    Returns the MCU block width for the given level of chrominance + subsampling.
    +
    Parameters:
    subsamp - the level of chrominance subsampling (one of +- SAMP_*)
    ++ SAMP_*) +
    Returns:
    the MCU block width for the given level of chrominance + subsampling.
    + +@@ -1109,7 +1109,7 @@ public static final int FLAG_FORCESSE3 +
    Returns the MCU block height for the given level of chrominance + subsampling.
    +
    Parameters:
    subsamp - the level of chrominance subsampling (one of +- SAMP_*)
    ++ SAMP_*) +
    Returns:
    the MCU block height for the given level of chrominance + subsampling.
    + +@@ -1122,7 +1122,7 @@ public static final int FLAG_FORCESSE3 +

    getPixelSize

    +
    public static int getPixelSize(int pixelFormat)
    +
    Returns the pixel size (in bytes) for the given pixel format.
    +-
    Parameters:
    pixelFormat - the pixel format (one of PF_*)
    ++
    Parameters:
    pixelFormat - the pixel format (one of PF_*)
    +
    Returns:
    the pixel size (in bytes) for the given pixel format.
    + + +@@ -1138,7 +1138,7 @@ public static final int FLAG_FORCESSE3 + of format TJ.PF_BGRX is stored in char pixel[], + then the red component will be + pixel[TJ.getRedOffset(TJ.PF_BGRX)]. +-
    Parameters:
    pixelFormat - the pixel format (one of PF_*)
    ++
    Parameters:
    pixelFormat - the pixel format (one of PF_*)
    +
    Returns:
    the red offset for the given pixel format, or -1 if the pixel + format does not have a red component.
    + +@@ -1155,7 +1155,7 @@ public static final int FLAG_FORCESSE3 + of format TJ.PF_BGRX is stored in char pixel[], + then the green component will be + pixel[TJ.getGreenOffset(TJ.PF_BGRX)]. +-
    Parameters:
    pixelFormat - the pixel format (one of PF_*)
    ++
    Parameters:
    pixelFormat - the pixel format (one of PF_*)
    +
    Returns:
    the green offset for the given pixel format, or -1 if the pixel + format does not have a green component.
    + +@@ -1172,7 +1172,7 @@ public static final int FLAG_FORCESSE3 + of format TJ.PF_BGRX is stored in char pixel[], + then the blue component will be + pixel[TJ.getBlueOffset(TJ.PF_BGRX)]. +-
    Parameters:
    pixelFormat - the pixel format (one of PF_*)
    ++
    Parameters:
    pixelFormat - the pixel format (one of PF_*)
    +
    Returns:
    the blue offset for the given pixel format, or -1 if the pixel + format does not have a blue component.
    + +@@ -1189,7 +1189,7 @@ public static final int FLAG_FORCESSE3 + of format TJ.PF_BGRA is stored in char pixel[], + then the alpha component will be + pixel[TJ.getAlphaOffset(TJ.PF_BGRA)]. +-
    Parameters:
    pixelFormat - the pixel format (one of PF_*)
    ++
    Parameters:
    pixelFormat - the pixel format (one of PF_*)
    +
    Returns:
    the alpha offset for the given pixel format, or -1 if the pixel + format does not have a alpha component.
    + +@@ -1206,7 +1206,7 @@ public static final int FLAG_FORCESSE3 +
    Returns the maximum size of the buffer (in bytes) required to hold a JPEG + image with the given width, height, and level of chrominance subsampling.
    +
    Parameters:
    width - the width (in pixels) of the JPEG image
    height - the height (in pixels) of the JPEG image
    jpegSubsamp - the level of chrominance subsampling to be used when +- generating the JPEG image (one of TJ.SAMP_*)
    ++ generating the JPEG image (one of TJ.SAMP_*) +
    Returns:
    the maximum size of the buffer (in bytes) required to hold a JPEG + image with the given width, height, and level of chrominance subsampling.
    + +@@ -1218,16 +1218,20 @@ public static final int FLAG_FORCESSE3 +
  • +

    bufSizeYUV

    +
    public static int bufSizeYUV(int width,
    +-             int pad,
    ++             int align,
    +              int height,
    +              int subsamp)
    +-
    Returns the size of the buffer (in bytes) required to hold a YUV planar +- image with the given width, height, and level of chrominance subsampling.
    +-
    Parameters:
    width - the width (in pixels) of the YUV image
    pad - the width of each line in each plane of the image is padded to +- the nearest multiple of this number of bytes (must be a power of 2.)
    height - the height (in pixels) of the YUV image
    subsamp - the level of chrominance subsampling used in the YUV +- image (one of TJ.SAMP_*)
    +-
    Returns:
    the size of the buffer (in bytes) required to hold a YUV planar +- image with the given width, height, and level of chrominance subsampling.
    ++
    Returns the size of the buffer (in bytes) required to hold a unified ++ planar YUV image with the given width, height, and level of chrominance ++ subsampling.
    ++
    Parameters:
    width - the width (in pixels) of the YUV image
    align - row alignment (in bytes) of the YUV image (must be a power of ++ 2.) Setting this parameter to n specifies that each row in each plane of ++ the YUV image will be padded to the nearest multiple of n bytes ++ (1 = unpadded.)
    height - the height (in pixels) of the YUV image
    subsamp - the level of chrominance subsampling used in the YUV ++ image (one of TJ.SAMP_*)
    ++
    Returns:
    the size of the buffer (in bytes) required to hold a unified ++ planar YUV image with the given width, height, and level of chrominance ++ subsampling.
    +
  • + + +@@ -1258,11 +1262,11 @@ public static int bufSizeYUV(int width, + plane with the given parameters. +
    Parameters:
    componentID - ID number of the image plane (0 = Y, 1 = U/Cb, + 2 = V/Cr)
    width - width (in pixels) of the YUV image. NOTE: this is the width +- of the whole image, not the plane width.
    stride - bytes per line in the image plane.
    height - height (in pixels) of the YUV image. NOTE: this is the ++ of the whole image, not the plane width.
    stride - bytes per row in the image plane.
    height - height (in pixels) of the YUV image. NOTE: this is the + height of the whole image, not the plane height.
    subsamp - the level of chrominance subsampling used in the YUV +- image (one of TJ.SAMP_*)
    +-
    Returns:
    the size of the buffer (in bytes) required to hold a YUV planar +- image with the given parameters.
    ++ image (one of TJ.SAMP_*) ++
    Returns:
    the size of the buffer (in bytes) required to hold a YUV image ++ plane with the given parameters.
    + + + +@@ -1278,7 +1282,7 @@ public static int bufSizeYUV(int width, + Refer to YUVImage for a description of plane width. +
    Parameters:
    componentID - ID number of the image plane (0 = Y, 1 = U/Cb, + 2 = V/Cr)
    width - width (in pixels) of the YUV image
    subsamp - the level of chrominance subsampling used in the YUV image +- (one of TJ.SAMP_*)
    ++ (one of TJ.SAMP_*) +
    Returns:
    the plane width of a YUV image plane with the given parameters.
    + + +@@ -1295,7 +1299,7 @@ public static int bufSizeYUV(int width, + Refer to YUVImage for a description of plane height. +
    Parameters:
    componentID - ID number of the image plane (0 = Y, 1 = U/Cb, + 2 = V/Cr)
    height - height (in pixels) of the YUV image
    subsamp - the level of chrominance subsampling used in the YUV image +- (one of TJ.SAMP_*)
    ++ (one of TJ.SAMP_*) +
    Returns:
    the plane height of a YUV image plane with the given parameters.
    + + +@@ -1306,10 +1310,10 @@ public static int bufSizeYUV(int width, +
  • +

    getScalingFactors

    +
    public static TJScalingFactor[] getScalingFactors()
    +-
    Returns a list of fractional scaling factors that the JPEG decompressor in +- this implementation of TurboJPEG supports.
    +-
    Returns:
    a list of fractional scaling factors that the JPEG decompressor in +- this implementation of TurboJPEG supports.
    ++
    Returns a list of fractional scaling factors that the JPEG decompressor ++ supports.
    ++
    Returns:
    a list of fractional scaling factors that the JPEG decompressor ++ supports.
    +
  • + + +diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html b/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html +index a53f879..440247b 100644 +--- a/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html ++++ b/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html +@@ -132,7 +132,7 @@ implements java.io.Closeable + int y, + int width, + int height) +-
    Create a TurboJPEG compressor instance and associate the uncompressed ++
    Create a TurboJPEG compressor instance and associate the packed-pixel + source image stored in srcImage with the newly created + instance.
    + +@@ -157,7 +157,7 @@ implements java.io.Closeable + int pitch, + int height, + int pixelFormat) +-
    Create a TurboJPEG compressor instance and associate the uncompressed ++
    Create a TurboJPEG compressor instance and associate the packed-pixel + source image stored in srcImage with the newly created + instance.
    + +@@ -210,15 +210,16 @@ implements java.io.Closeable + void + compress(byte[] dstBuf, + int flags) +-
    Compress the uncompressed source image associated with this compressor +- instance and output a JPEG image to the given destination buffer.
    ++
    Compress the packed-pixel or planar YUV source image associated with this ++ compressor instance and output a JPEG image to the given destination ++ buffer.
    + + + + byte[] + compress(int flags) +-
    Compress the uncompressed source image associated with this compressor +- instance and return a buffer containing a JPEG image.
    ++
    Compress the packed-pixel or planar YUV source image associated with this ++ compressor instance and return a buffer containing a JPEG image.
    + + + +@@ -265,27 +266,27 @@ implements java.io.Closeable + YUVImage + encodeYUV(int[] strides, + int flags) +-
    Encode the uncompressed source image associated with this compressor ++
    Encode the packed-pixel source image associated with this compressor + instance into separate Y, U (Cb), and V (Cr) image planes and return a +- YUVImage instance containing the encoded image planes.
    ++ YUVImage instance containing the encoded image planes.
    + + + + YUVImage +-encodeYUV(int pad, ++encodeYUV(int align, + int flags) +-
    Encode the uncompressed source image associated with this compressor +- instance into a unified YUV planar image buffer and return a +- YUVImage instance containing the encoded image.
    ++
    Encode the packed-pixel source image associated with this compressor ++ instance into a unified planar YUV image and return a YUVImage ++ instance containing the encoded image.
    + + + + void + encodeYUV(YUVImage dstImage, + int flags) +-
    Encode the uncompressed source image associated with this compressor +- instance into a YUV planar image and store it in the given +- YUVImage instance.
    ++
    Encode the packed-pixel source image associated with this compressor ++ instance into a planar YUV image and store it in the given ++ YUVImage instance.
    + + + +@@ -312,7 +313,7 @@ implements java.io.Closeable + int y, + int width, + int height)
    +-
    Associate an uncompressed RGB or grayscale source image with this ++
    Associate a packed-pixel RGB or grayscale source image with this + compressor instance.
    + + +@@ -338,15 +339,14 @@ implements java.io.Closeable + int pitch, + int height, + int pixelFormat) +-
    Associate an uncompressed RGB, grayscale, or CMYK source image with this ++
    Associate a packed-pixel RGB, grayscale, or CMYK source image with this + compressor instance.
    + + + + void + setSourceImage(YUVImage srcImage) +-
    Associate an uncompressed YUV planar source image with this compressor +- instance.
    ++
    Associate a planar YUV source image with this compressor instance.
    + + + +@@ -405,7 +405,7 @@ implements java.io.Closeable + int height, + int pixelFormat) + throws TJException +-
    Create a TurboJPEG compressor instance and associate the uncompressed ++
    Create a TurboJPEG compressor instance and associate the packed-pixel + source image stored in srcImage with the newly created + instance.
    +
    Parameters:
    srcImage - see setSourceImage(byte[], int, int, int, int, int, int) for description
    x - see setSourceImage(byte[], int, int, int, int, int, int) for description
    y - see setSourceImage(byte[], int, int, int, int, int, int) for description
    width - see setSourceImage(byte[], int, int, int, int, int, int) for description
    pitch - see setSourceImage(byte[], int, int, int, int, int, int) for description
    height - see setSourceImage(byte[], int, int, int, int, int, int) for description
    pixelFormat - pixel format of the source image (one of +@@ -445,7 +445,7 @@ public TJCompressor(byte[] srcImage, + int width, + int height) + throws TJException +-
    Create a TurboJPEG compressor instance and associate the uncompressed ++
    Create a TurboJPEG compressor instance and associate the packed-pixel + source image stored in srcImage with the newly created + instance.
    +
    Parameters:
    srcImage - see +@@ -480,20 +480,22 @@ public TJCompressor(byte[] srcImage, + int height, + int pixelFormat) + throws TJException +-
    Associate an uncompressed RGB, grayscale, or CMYK source image with this ++
    Associate a packed-pixel RGB, grayscale, or CMYK source image with this + compressor instance.
    +-
    Parameters:
    srcImage - image buffer containing RGB, grayscale, or CMYK pixels to +- be compressed or encoded. This buffer is not modified.
    x - x offset (in pixels) of the region in the source image from which ++
    Parameters:
    srcImage - buffer containing a packed-pixel RGB, grayscale, or CMYK ++ source image to be compressed or encoded. This buffer is not modified.
    x - x offset (in pixels) of the region in the source image from which + the JPEG or YUV image should be compressed/encoded
    y - y offset (in pixels) of the region in the source image from which + the JPEG or YUV image should be compressed/encoded
    width - width (in pixels) of the region in the source image from +- which the JPEG or YUV image should be compressed/encoded
    pitch - bytes per line of the source image. Normally, this should be +- width * TJ.pixelSize(pixelFormat) if the source image is +- unpadded, but you can use this parameter to, for instance, specify that +- the scanlines in the source image are padded to a 4-byte boundary or to +- compress/encode a JPEG or YUV image from a region of a larger source +- image. You can also be clever and use this parameter to skip lines, etc. +- Setting this parameter to 0 is the equivalent of setting it to +- width * TJ.pixelSize(pixelFormat).
    height - height (in pixels) of the region in the source image from ++ which the JPEG or YUV image should be compressed/encoded
    pitch - bytes per row in the source image. Normally this should be ++ width * ++ TJ.getPixelSize(pixelFormat), ++ if the source image is unpadded. However, you can use this parameter to, ++ for instance, specify that the rows in the source image are padded to the ++ nearest multiple of 4 bytes or to compress/encode a JPEG or YUV image from ++ a region of a larger source image. You can also be clever and use this ++ parameter to skip rows, etc. Setting this parameter to 0 is the ++ equivalent of setting it to width * ++ TJ.getPixelSize(pixelFormat).
    height - height (in pixels) of the region in the source image from + which the JPEG or YUV image should be compressed/encoded
    pixelFormat - pixel format of the source image (one of + TJ.PF_*)
    +
    Throws:
    +@@ -531,10 +533,11 @@ public void setSourceImage(byte[] srcImage, + int width, + int height) + throws TJException +-
    Associate an uncompressed RGB or grayscale source image with this ++
    Associate a packed-pixel RGB or grayscale source image with this + compressor instance.
    +-
    Parameters:
    srcImage - a BufferedImage instance containing RGB or +- grayscale pixels to be compressed or encoded. This image is not modified.
    x - x offset (in pixels) of the region in the source image from which ++
    Parameters:
    srcImage - a BufferedImage instance containing a ++ packed-pixel RGB or grayscale source image to be compressed or encoded. ++ This image is not modified.
    x - x offset (in pixels) of the region in the source image from which + the JPEG or YUV image should be compressed/encoded
    y - y offset (in pixels) of the region in the source image from which + the JPEG or YUV image should be compressed/encoded
    width - width (in pixels) of the region in the source image from + which the JPEG or YUV image should be compressed/encoded (0 = use the +@@ -553,10 +556,9 @@ public void setSourceImage(byte[] srcImage, +

    setSourceImage

    +
    public void setSourceImage(YUVImage srcImage)
    +                     throws TJException
    +-
    Associate an uncompressed YUV planar source image with this compressor +- instance.
    +-
    Parameters:
    srcImage - YUV planar image to be compressed. This image is not +- modified.
    ++
    Associate a planar YUV source image with this compressor instance.
    ++
    Parameters:
    srcImage - planar YUV source image to be compressed. This image is ++ not modified.
    +
    Throws:
    +
    TJException
    + +@@ -573,16 +575,16 @@ public void setSourceImage(byte[] srcImage, + TJ.CS_YCbCr) or from CMYK to YCCK (see TJ.CS_YCCK) as part + of the JPEG compression process, some of the Cb and Cr (chrominance) + components can be discarded or averaged together to produce a smaller +- image with little perceptible loss of image clarity (the human eye is more +- sensitive to small changes in brightness than to small changes in color.) +- This is called "chrominance subsampling". ++ image with little perceptible loss of image clarity. (The human eye is ++ more sensitive to small changes in brightness than to small changes in ++ color.) This is called "chrominance subsampling". +

    +- NOTE: This method has no effect when compressing a JPEG image from a YUV +- planar source. In that case, the level of chrominance subsampling in +- the JPEG image is determined by the source. Furthermore, this method has +- no effect when encoding to a pre-allocated YUVImage instance. In +- that case, the level of chrominance subsampling is determined by the +- destination.

    ++ NOTE: This method has no effect when compressing a JPEG image from a ++ planar YUV source image. In that case, the level of chrominance ++ subsampling in the JPEG image is determined by the source image. ++ Furthermore, this method has no effect when encoding to a pre-allocated ++ YUVImage instance. In that case, the level of chrominance ++ subsampling is determined by the destination image.
    +
    Parameters:
    newSubsamp - the level of chrominance subsampling to use in + subsequent compress/encode oeprations (one of + TJ.SAMP_*)
    +@@ -609,8 +611,9 @@ public void setSourceImage(byte[] srcImage, +
    public void compress(byte[] dstBuf,
    +             int flags)
    +               throws TJException
    +-
    Compress the uncompressed source image associated with this compressor +- instance and output a JPEG image to the given destination buffer.
    ++
    Compress the packed-pixel or planar YUV source image associated with this ++ compressor instance and output a JPEG image to the given destination ++ buffer.
    +
    Parameters:
    dstBuf - buffer that will receive the JPEG image. Use + TJ.bufSize(int, int, int) to determine the maximum size for this buffer based on + the source image's width and height and the desired level of chrominance +@@ -628,8 +631,8 @@ public void setSourceImage(byte[] srcImage, +

    compress

    +
    public byte[] compress(int flags)
    +                 throws TJException
    +-
    Compress the uncompressed source image associated with this compressor +- instance and return a buffer containing a JPEG image.
    ++
    Compress the packed-pixel or planar YUV source image associated with this ++ compressor instance and return a buffer containing a JPEG image.
    +
    Parameters:
    flags - the bitwise OR of one or more of + TJ.FLAG_*
    +
    Returns:
    a buffer containing a JPEG image. The length of this buffer will +@@ -682,13 +685,13 @@ public byte[] compress(java.awt.image.BufferedImage srcImage, +
    public void encodeYUV(YUVImage dstImage,
    +              int flags)
    +                throws TJException
    +-
    Encode the uncompressed source image associated with this compressor +- instance into a YUV planar image and store it in the given +- YUVImage instance. This method uses the accelerated color +- conversion routines in TurboJPEG's underlying codec but does not execute +- any of the other steps in the JPEG compression process. Encoding +- CMYK source images to YUV is not supported.
    +-
    Parameters:
    dstImage - YUVImage instance that will receive the YUV planar ++
    Encode the packed-pixel source image associated with this compressor ++ instance into a planar YUV image and store it in the given ++ YUVImage instance. This method performs color conversion (which ++ is accelerated in the libjpeg-turbo implementation) but does not execute ++ any of the other steps in the JPEG compression process. Encoding CMYK ++ source images into YUV images is not supported.
    ++
    Parameters:
    dstImage - YUVImage instance that will receive the planar YUV + image
    flags - the bitwise OR of one or more of + TJ.FLAG_*
    +
    Throws:
    +@@ -716,20 +719,21 @@ public void encodeYUV(byte[] dstBuf, +
      +
    • +

      encodeYUV

      +-
      public YUVImage encodeYUV(int pad,
      ++
      public YUVImage encodeYUV(int align,
      +                  int flags)
      +                    throws TJException
      +-
      Encode the uncompressed source image associated with this compressor +- instance into a unified YUV planar image buffer and return a +- YUVImage instance containing the encoded image. This method +- uses the accelerated color conversion routines in TurboJPEG's underlying +- codec but does not execute any of the other steps in the JPEG compression +- process. Encoding CMYK source images to YUV is not supported.
      +-
      Parameters:
      pad - the width of each line in each plane of the YUV image will be +- padded to the nearest multiple of this number of bytes (must be a power of +- 2.)
      flags - the bitwise OR of one or more of ++
      Encode the packed-pixel source image associated with this compressor ++ instance into a unified planar YUV image and return a YUVImage ++ instance containing the encoded image. This method performs color ++ conversion (which is accelerated in the libjpeg-turbo implementation) but ++ does not execute any of the other steps in the JPEG compression process. ++ Encoding CMYK source images into YUV images is not supported.
      ++
      Parameters:
      align - row alignment (in bytes) of the YUV image (must be a power of ++ 2.) Setting this parameter to n will cause each row in each plane of the ++ YUV image to be padded to the nearest multiple of n bytes (1 = unpadded.)
      flags - the bitwise OR of one or more of + TJ.FLAG_*
      +-
      Returns:
      a YUV planar image.
      ++
      Returns:
      a YUVImage instance containing the unified planar YUV ++ encoded image
      +
      Throws:
      +
      TJException
      +
    • +@@ -743,21 +747,22 @@ public void encodeYUV(byte[] dstBuf, +
      public YUVImage encodeYUV(int[] strides,
      +                  int flags)
      +                    throws TJException
      +-
      Encode the uncompressed source image associated with this compressor ++
      Encode the packed-pixel source image associated with this compressor + instance into separate Y, U (Cb), and V (Cr) image planes and return a +- YUVImage instance containing the encoded image planes. This +- method uses the accelerated color conversion routines in TurboJPEG's +- underlying codec but does not execute any of the other steps in the JPEG +- compression process. Encoding CMYK source images to YUV is not supported.
      ++ YUVImage instance containing the encoded image planes. This ++ method performs color conversion (which is accelerated in the ++ libjpeg-turbo implementation) but does not execute any of the other steps ++ in the JPEG compression process. Encoding CMYK source images into YUV ++ images is not supported.
      +
      Parameters:
      strides - an array of integers, each specifying the number of bytes +- per line in the corresponding plane of the output image. Setting the +- stride for any plane to 0 is the same as setting it to the component width +- of the plane. If strides is null, then the strides for all +- planes will be set to their respective component widths. You can adjust +- the strides in order to add an arbitrary amount of line padding to each +- plane.
      flags - the bitwise OR of one or more of ++ per row in the corresponding plane of the YUV source image. Setting the ++ stride for any plane to 0 is the same as setting it to the plane width ++ (see YUVImage.) If strides is null, then the strides ++ for all planes will be set to their respective plane widths. You can ++ adjust the strides in order to add an arbitrary amount of row padding to ++ each plane.
      flags - the bitwise OR of one or more of + TJ.FLAG_*
      +-
      Returns:
      a YUV planar image.
      ++
      Returns:
      a YUVImage instance containing the encoded image planes
      +
      Throws:
      +
      TJException
      + +diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html b/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html +index 412dcd4..982079c 100644 +--- a/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html ++++ b/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html +@@ -163,7 +163,7 @@ + into multiple DCT coefficient buffers and call the callback function once + for each buffer.
    planeRegion - rectangle containing the width and height of the + component plane to which coeffBuffer belongs
    componentID - ID number of the component plane to which +- coeffBuffer belongs (Y, Cb, and Cr have, respectively, ID's ++ coeffBuffer belongs. (Y, Cb, and Cr have, respectively, ID's + of 0, 1, and 2 in typical JPEG images.)
    transformID - ID number of the transformed image to which + coeffBuffer belongs. This is the same as the index of the + transform in the transforms array that was passed to TJTransformer.transform().
    transform - a TJTransform instance that specifies the +diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html b/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html +index 6666e4e..77a7ab6 100644 +--- a/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html ++++ b/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html +@@ -180,20 +180,22 @@ implements java.io.Closeable + + TJDecompressor(byte[] jpegImage) +
    Create a TurboJPEG decompressor instance and associate the JPEG source +- image stored in jpegImage with the newly created instance.
    ++ image or "abbreviated table specification" (AKA "tables-only") datastream ++ stored in jpegImage with the newly created instance.
    + + + + TJDecompressor(byte[] jpegImage, + int imageSize) +
    Create a TurboJPEG decompressor instance and associate the JPEG source +- image of length imageSize bytes stored in +- jpegImage with the newly created instance.
    ++ image or "abbreviated table specification" (AKA "tables-only") datastream ++ of length imageSize bytes stored in jpegImage ++ with the newly created instance.
    + + + + TJDecompressor(YUVImage yuvImage) +-
    Create a TurboJPEG decompressor instance and associate the YUV planar ++
    Create a TurboJPEG decompressor instance and associate the planar YUV + source image stored in yuvImage with the newly created + instance.
    + +@@ -223,9 +225,10 @@ implements java.io.Closeable + void + decompress(java.awt.image.BufferedImage dstImage, + int flags) +-
    Decompress the JPEG source image or decode the YUV source image associated +- with this decompressor instance and output a decompressed/decoded image to +- the given BufferedImage instance.
    ++
    Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and output a packed-pixel ++ decompressed/decoded image to the given BufferedImage ++ instance.
    + + + +@@ -252,9 +255,9 @@ implements java.io.Closeable + int desiredHeight, + int pixelFormat, + int flags) +-
    Decompress the JPEG source image or decode the YUV source image associated +- with this decompressor instance and output a grayscale, RGB, or CMYK image +- to the given destination buffer.
    ++
    Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and output a packed-pixel ++ grayscale, RGB, or CMYK image to the given destination buffer.
    + + + +@@ -267,9 +270,9 @@ implements java.io.Closeable + int desiredHeight, + int pixelFormat, + int flags) +-
    Decompress the JPEG source image or decode the YUV source image associated +- with this decompressor instance and output a grayscale, RGB, or CMYK image +- to the given destination buffer.
    ++
    Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and output a packed-pixel ++ grayscale, RGB, or CMYK image to the given destination buffer.
    + + + +@@ -278,9 +281,10 @@ implements java.io.Closeable + int desiredHeight, + int bufferedImageType, + int flags) +-
    Decompress the JPEG source image or decode the YUV source image associated +- with this decompressor instance and return a BufferedImage +- instance containing the decompressed/decoded image.
    ++
    Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and return a ++ BufferedImage instance containing the packed-pixel ++ decompressed/decoded image.
    + + + +@@ -290,8 +294,9 @@ implements java.io.Closeable + int desiredHeight, + int pixelFormat, + int flags) +-
    Decompress the JPEG source image associated with this decompressor +- instance and return a buffer containing the decompressed image.
    ++
    Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and return a buffer containing ++ the packed-pixel decompressed image.
    + + + +@@ -319,18 +324,18 @@ implements java.io.Closeable + int flags) +
    Decompress the JPEG source image associated with this decompressor + instance into a set of Y, U (Cb), and V (Cr) image planes and return a +- YUVImage instance containing the decompressed image planes.
    ++ YUVImage instance containing the decompressed image planes.
    + + + + YUVImage + decompressToYUV(int desiredWidth, +- int pad, ++ int align, + int desiredHeight, + int flags) +
    Decompress the JPEG source image associated with this decompressor +- instance into a unified YUV planar image buffer and return a +- YUVImage instance containing the decompressed image.
    ++ instance into a unified planar YUV image and return a YUVImage ++ instance containing the decompressed image.
    + + + +@@ -338,8 +343,8 @@ implements java.io.Closeable + decompressToYUV(YUVImage dstImage, + int flags) +
    Decompress the JPEG source image associated with this decompressor +- instance into a YUV planar image and store it in the given +- YUVImage instance.
    ++ instance into a planar YUV image and store it in the given ++ YUVImage instance.
    + + + +@@ -363,7 +368,7 @@ implements java.io.Closeable + + byte[] + getJPEGBuf() +-
    Returns the JPEG image buffer associated with this decompressor instance.
    ++
    Returns the JPEG buffer associated with this decompressor instance.
    + + + +@@ -426,7 +431,7 @@ implements java.io.Closeable + + void + setSourceImage(YUVImage srcImage) +-
    Associate the specified YUV planar source image with this decompressor ++
    Associate the specified planar YUV source image with this decompressor + instance.
    + + +@@ -554,9 +559,11 @@ implements java.io.Closeable +
    public TJDecompressor(byte[] jpegImage)
    +                throws TJException
    +
    Create a TurboJPEG decompressor instance and associate the JPEG source +- image stored in jpegImage with the newly created instance.
    +-
    Parameters:
    jpegImage - JPEG image buffer (size of the JPEG image is assumed to +- be the length of the array.) This buffer is not modified.
    ++ image or "abbreviated table specification" (AKA "tables-only") datastream ++ stored in jpegImage with the newly created instance.
    ++
    Parameters:
    jpegImage - buffer containing a JPEG source image or tables-only ++ datastream. (The size of the JPEG image or datastream is assumed to be ++ the length of the array.) This buffer is not modified.
    +
    Throws:
    +
    TJException
    + +@@ -571,9 +578,12 @@ implements java.io.Closeable + int imageSize) + throws TJException +
    Create a TurboJPEG decompressor instance and associate the JPEG source +- image of length imageSize bytes stored in +- jpegImage with the newly created instance.
    +-
    Parameters:
    jpegImage - JPEG image buffer. This buffer is not modified.
    imageSize - size of the JPEG image (in bytes)
    ++ image or "abbreviated table specification" (AKA "tables-only") datastream ++ of length imageSize bytes stored in jpegImage ++ with the newly created instance.
    ++
    Parameters:
    jpegImage - buffer containing a JPEG source image or tables-only ++ datastream. This buffer is not modified.
    imageSize - size of the JPEG source image or tables-only datastream ++ (in bytes)
    +
    Throws:
    +
    TJException
    + +@@ -586,10 +596,10 @@ implements java.io.Closeable +

    TJDecompressor

    +
    public TJDecompressor(YUVImage yuvImage)
    +                throws TJException
    +-
    Create a TurboJPEG decompressor instance and associate the YUV planar ++
    Create a TurboJPEG decompressor instance and associate the planar YUV + source image stored in yuvImage with the newly created + instance.
    +-
    Parameters:
    yuvImage - YUVImage instance containing a YUV planar ++
    Parameters:
    yuvImage - YUVImage instance containing a planar YUV source + image to be decoded. This image is not modified.
    +
    Throws:
    +
    TJException
    +@@ -616,15 +626,15 @@ implements java.io.Closeable + "tables-only") datastream of length imageSize bytes stored in + jpegImage with this decompressor instance. If + jpegImage contains a JPEG image, then this image will be used +- as the source image for subsequent decompress operations. Passing a ++ as the source image for subsequent decompression operations. Passing a + tables-only datastream to this method primes the decompressor with + quantization and Huffman tables that can be used when decompressing + subsequent "abbreviated image" datastreams. This is useful, for instance, + when decompressing video streams in which all frames share the same + quantization and Huffman tables.
    +-
    Parameters:
    jpegImage - buffer containing a JPEG image or an "abbreviated table +- specification" (AKA "tables-only") datastream. This buffer is not +- modified.
    imageSize - size of the JPEG image (in bytes)
    ++
    Parameters:
    jpegImage - buffer containing a JPEG source image or tables-only ++ datastream. This buffer is not modified.
    imageSize - size of the JPEG source image or tables-only datastream ++ (in bytes)
    +
    Throws:
    +
    TJException
    + +@@ -651,11 +661,11 @@ public void setJPEGImage(byte[] jpegImage, +
  • +

    setSourceImage

    +
    public void setSourceImage(YUVImage srcImage)
    +-
    Associate the specified YUV planar source image with this decompressor +- instance. Subsequent decompress operations will decode this image into an +- RGB or grayscale destination image.
    +-
    Parameters:
    srcImage - YUVImage instance containing a YUV planar image to +- be decoded. This image is not modified.
    ++
    Associate the specified planar YUV source image with this decompressor ++ instance. Subsequent decompression operations will decode this image into ++ a packed-pixel RGB or grayscale destination image.
    ++
    Parameters:
    srcImage - YUVImage instance containing a planar YUV source ++ image to be decoded. This image is not modified.
    +
  • + + +@@ -719,8 +729,8 @@ public void setJPEGImage(byte[] jpegImage, +
  • +

    getJPEGBuf

    +
    public byte[] getJPEGBuf()
    +-
    Returns the JPEG image buffer associated with this decompressor instance.
    +-
    Returns:
    the JPEG image buffer associated with this decompressor instance.
    ++
    Returns the JPEG buffer associated with this decompressor instance.
    ++
    Returns:
    the JPEG buffer associated with this decompressor instance.
    +
  • + +
    +@@ -748,12 +758,12 @@ public void setJPEGImage(byte[] jpegImage, + decompressor can generate without exceeding the desired image width and + height.
    +
    Parameters:
    desiredWidth - desired width (in pixels) of the decompressed image. +- Setting this to 0 is the same as setting it to the width of the JPEG image +- (in other words, the width will not be considered when determining the +- scaled image size.)
    desiredHeight - desired height (in pixels) of the decompressed image. ++ Setting this to 0 is the same as setting it to the width of the JPEG ++ image. (In other words, the width will not be considered when determining ++ the scaled image size.)
    desiredHeight - desired height (in pixels) of the decompressed image. + Setting this to 0 is the same as setting it to the height of the JPEG +- image (in other words, the height will not be considered when determining +- the scaled image size.)
    ++ image. (In other words, the height will not be considered when ++ determining the scaled image size.) +
    Returns:
    the width of the largest scaled-down image that the TurboJPEG + decompressor can generate without exceeding the desired image width and + height.
    +@@ -771,12 +781,12 @@ public void setJPEGImage(byte[] jpegImage, + decompressor can generate without exceeding the desired image width and + height. +
    Parameters:
    desiredWidth - desired width (in pixels) of the decompressed image. +- Setting this to 0 is the same as setting it to the width of the JPEG image +- (in other words, the width will not be considered when determining the +- scaled image size.)
    desiredHeight - desired height (in pixels) of the decompressed image. ++ Setting this to 0 is the same as setting it to the width of the JPEG ++ image. (In other words, the width will not be considered when determining ++ the scaled image size.)
    desiredHeight - desired height (in pixels) of the decompressed image. + Setting this to 0 is the same as setting it to the height of the JPEG +- image (in other words, the height will not be considered when determining +- the scaled image size.)
    ++ image. (In other words, the height will not be considered when ++ determining the scaled image size.) +
    Returns:
    the height of the largest scaled-down image that the TurboJPEG + decompressor can generate without exceeding the desired image width and + height.
    +@@ -797,25 +807,26 @@ public void setJPEGImage(byte[] jpegImage, + int pixelFormat, + int flags) + throws
    TJException +-
    Decompress the JPEG source image or decode the YUV source image associated +- with this decompressor instance and output a grayscale, RGB, or CMYK image +- to the given destination buffer. ++
    Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and output a packed-pixel ++ grayscale, RGB, or CMYK image to the given destination buffer. +

    +- NOTE: The output image is fully recoverable if this method throws a ++ NOTE: The destination image is fully recoverable if this method throws a + non-fatal TJException (unless + TJ.FLAG_STOPONWARNING is specified.)

    +-
    Parameters:
    dstBuf - buffer that will receive the decompressed/decoded image. +- If the source image is a JPEG image, then this buffer should normally be +- pitch * scaledHeight bytes in size, where +- scaledHeight can be determined by calling +- scalingFactor.getScaled(jpegHeight) +- with one of the scaling factors returned from TJ.getScalingFactors() or by calling getScaledHeight(int, int). If the +- source image is a YUV image, then this buffer should normally be +- pitch * height bytes in size, where height is +- the height of the YUV image. However, the buffer may also be larger than +- the dimensions of the source image, in which case the x, +- y, and pitch parameters can be used to specify +- the region into which the source image should be decompressed/decoded.
    x - x offset (in pixels) of the region in the destination image into ++
    Parameters:
    dstBuf - buffer that will receive the packed-pixel ++ decompressed/decoded image. If the source image is a JPEG image, then ++ this buffer should normally be pitch * scaledHeight bytes in ++ size, where scaledHeight can be determined by calling ++ scalingFactor.getScaled(jpegHeight) ++ with one of the scaling factors returned from TJ.getScalingFactors() ++ or by calling getScaledHeight(int, int). If the source image is a YUV ++ image, then this buffer should normally be pitch * height ++ bytes in size, where height is the height of the YUV image. ++ However, the buffer may also be larger than the dimensions of the source ++ image, in which case the x, y, and ++ pitch parameters can be used to specify the region into which ++ the source image should be decompressed/decoded.
    x - x offset (in pixels) of the region in the destination image into + which the source image should be decompressed/decoded
    y - y offset (in pixels) of the region in the destination image into + which the source image should be decompressed/decoded
    desiredWidth - If the source image is a JPEG image, then this + specifies the desired width (in pixels) of the decompressed image (or +@@ -823,27 +834,29 @@ public void setJPEGImage(byte[] jpegImage, + than the source image dimensions, then TurboJPEG will use scaling in the + JPEG decompressor to generate the largest possible image that will fit + within the desired dimensions. Setting this to 0 is the same as setting +- it to the width of the JPEG image (in other words, the width will not be ++ it to the width of the JPEG image. (In other words, the width will not be + considered when determining the scaled image size.) This parameter is +- ignored if the source image is a YUV image.
    pitch - bytes per line of the destination image. Normally, this +- should be set to scaledWidth * TJ.pixelSize(pixelFormat) if +- the destination image is unpadded, but you can use this to, for instance, +- pad each line of the destination image to a 4-byte boundary or to +- decompress/decode the source image into a region of a larger image. NOTE: +- if the source image is a JPEG image, then scaledWidth can be +- determined by calling +- scalingFactor.getScaled(jpegWidth) +- or by calling getScaledWidth(int, int). If the source image is a +- YUV image, then scaledWidth is the width of the YUV image. ++ ignored if the source image is a YUV image.
    pitch - bytes per row in the destination image. Normally this should ++ be set to scaledWidth * ++ TJ.getPixelSize(pixelFormat), ++ if the destination image will be unpadded. However, you can use this to, ++ for instance, pad each row of the destination image to the nearest ++ multiple of 4 bytes or to decompress/decode the source image into a region ++ of a larger image. NOTE: if the source image is a JPEG image, then ++ scaledWidth can be determined by calling ++ scalingFactor.getScaled(jpegWidth) ++ or by calling getScaledWidth(int, int). If the source image is a YUV ++ image, then scaledWidth is the width of the YUV image. + Setting this parameter to 0 is the equivalent of setting it to +- scaledWidth * TJ.pixelSize(pixelFormat).
    desiredHeight - If the source image is a JPEG image, then this ++ scaledWidth * ++ TJ.getPixelSize(pixelFormat).
    desiredHeight - If the source image is a JPEG image, then this + specifies the desired height (in pixels) of the decompressed image (or + image region.) If the desired destination image dimensions are different + than the source image dimensions, then TurboJPEG will use scaling in the + JPEG decompressor to generate the largest possible image that will fit + within the desired dimensions. Setting this to 0 is the same as setting +- it to the height of the JPEG image (in other words, the height will not be +- considered when determining the scaled image size.) This parameter is ++ it to the height of the JPEG image. (In other words, the height will not ++ be considered when determining the scaled image size.) This parameter is + ignored if the source image is a YUV image.
    pixelFormat - pixel format of the decompressed/decoded image (one of + TJ.PF_*)
    flags - the bitwise OR of one or more of + TJ.FLAG_*
    +@@ -883,8 +896,9 @@ public void decompress(byte[] dstBuf, + int pixelFormat, + int flags) + throws TJException +-
    Decompress the JPEG source image associated with this decompressor +- instance and return a buffer containing the decompressed image.
    ++
    Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and return a buffer containing ++ the packed-pixel decompressed image.
    +
    Parameters:
    desiredWidth - see + decompress(byte[], int, int, int, int, int, int, int) + for description
    pitch - see +@@ -894,7 +908,7 @@ public void decompress(byte[] dstBuf, + for description
    pixelFormat - pixel format of the decompressed image (one of + TJ.PF_*)
    flags - the bitwise OR of one or more of + TJ.FLAG_*
    +-
    Returns:
    a buffer containing the decompressed image.
    ++
    Returns:
    a buffer containing the packed-pixel decompressed image.
    +
    Throws:
    +
    TJException
    + +@@ -909,21 +923,21 @@ public void decompress(byte[] dstBuf, + int flags) + throws TJException +
    Decompress the JPEG source image associated with this decompressor +- instance into a YUV planar image and store it in the given +- YUVImage instance. This method performs JPEG decompression +- but leaves out the color conversion step, so a planar YUV image is +- generated instead of an RGB or grayscale image. This method cannot be +- used to decompress JPEG source images with the CMYK or YCCK colorspace. ++ instance into a planar YUV image and store it in the given ++ YUVImage instance. This method performs JPEG decompression but ++ leaves out the color conversion step, so a planar YUV image is generated ++ instead of a packed-pixel image. This method cannot be used to decompress ++ JPEG source images with the CMYK or YCCK colorspace. +

    +- NOTE: The YUV planar output image is fully recoverable if this method ++ NOTE: The planar YUV destination image is fully recoverable if this method + throws a non-fatal TJException (unless + TJ.FLAG_STOPONWARNING is specified.)

    +-
    Parameters:
    dstImage - YUVImage instance that will receive the YUV planar +- image. The level of subsampling specified in this YUVImage +- instance must match that of the JPEG image, and the width and height +- specified in the YUVImage instance must match one of the +- scaled image sizes that TurboJPEG is capable of generating from the JPEG +- source image.
    flags - the bitwise OR of one or more of ++
    Parameters:
    dstImage - YUVImage instance that will receive the planar YUV ++ decompressed image. The level of subsampling specified in this ++ YUVImage instance must match that of the JPEG image, and the width ++ and height specified in the YUVImage instance must match one of ++ the scaled image sizes that the decompressor is capable of generating from ++ the JPEG source image.
    flags - the bitwise OR of one or more of + TJ.FLAG_*
    +
    Throws:
    +
    TJException
    +@@ -957,32 +971,33 @@ public void decompressToYUV(byte[] dstBuf, + throws TJException +
    Decompress the JPEG source image associated with this decompressor + instance into a set of Y, U (Cb), and V (Cr) image planes and return a +- YUVImage instance containing the decompressed image planes. +- This method performs JPEG decompression but leaves out the color +- conversion step, so a planar YUV image is generated instead of an RGB or +- grayscale image. This method cannot be used to decompress JPEG source +- images with the CMYK or YCCK colorspace.
    ++ YUVImage instance containing the decompressed image planes. This ++ method performs JPEG decompression but leaves out the color conversion ++ step, so a planar YUV image is generated instead of a packed-pixel image. ++ This method cannot be used to decompress JPEG source images with the CMYK ++ or YCCK colorspace.
    +
    Parameters:
    desiredWidth - desired width (in pixels) of the YUV image. If the + desired image dimensions are different than the dimensions of the JPEG + image being decompressed, then TurboJPEG will use scaling in the JPEG + decompressor to generate the largest possible image that will fit within + the desired dimensions. Setting this to 0 is the same as setting it to +- the width of the JPEG image (in other words, the width will not be ++ the width of the JPEG image. (In other words, the width will not be + considered when determining the scaled image size.)
    strides - an array of integers, each specifying the number of bytes +- per line in the corresponding plane of the output image. Setting the +- stride for any plane to 0 is the same as setting it to the scaled +- component width of the plane. If strides is NULL, then the +- strides for all planes will be set to their respective scaled component +- widths. You can adjust the strides in order to add an arbitrary amount of +- line padding to each plane.
    desiredHeight - desired height (in pixels) of the YUV image. If the ++ per row in the corresponding plane of the YUV image. Setting the stride ++ for any plane to 0 is the same as setting it to the scaled plane width ++ (see YUVImage.) If strides is null, then the strides ++ for all planes will be set to their respective scaled plane widths. You ++ can adjust the strides in order to add an arbitrary amount of row padding ++ to each plane.
    desiredHeight - desired height (in pixels) of the YUV image. If the + desired image dimensions are different than the dimensions of the JPEG + image being decompressed, then TurboJPEG will use scaling in the JPEG + decompressor to generate the largest possible image that will fit within + the desired dimensions. Setting this to 0 is the same as setting it to +- the height of the JPEG image (in other words, the height will not be ++ the height of the JPEG image. (In other words, the height will not be + considered when determining the scaled image size.)
    flags - the bitwise OR of one or more of + TJ.FLAG_*
    +-
    Returns:
    a YUV planar image.
    ++
    Returns:
    a YUVImage instance containing the decompressed image ++ planes
    +
    Throws:
    +
    TJException
    + +@@ -994,34 +1009,34 @@ public void decompressToYUV(byte[] dstBuf, +
  • +

    decompressToYUV

    +
    public YUVImage decompressToYUV(int desiredWidth,
    +-                       int pad,
    ++                       int align,
    +                        int desiredHeight,
    +                        int flags)
    +                          throws TJException
    +
    Decompress the JPEG source image associated with this decompressor +- instance into a unified YUV planar image buffer and return a +- YUVImage instance containing the decompressed image. This +- method performs JPEG decompression but leaves out the color conversion +- step, so a planar YUV image is generated instead of an RGB or grayscale +- image. This method cannot be used to decompress JPEG source images with +- the CMYK or YCCK colorspace.
    ++ instance into a unified planar YUV image and return a YUVImage ++ instance containing the decompressed image. This method performs JPEG ++ decompression but leaves out the color conversion step, so a planar YUV ++ image is generated instead of a packed-pixel image. This method cannot be ++ used to decompress JPEG source images with the CMYK or YCCK colorspace. +
    Parameters:
    desiredWidth - desired width (in pixels) of the YUV image. If the + desired image dimensions are different than the dimensions of the JPEG + image being decompressed, then TurboJPEG will use scaling in the JPEG + decompressor to generate the largest possible image that will fit within + the desired dimensions. Setting this to 0 is the same as setting it to +- the width of the JPEG image (in other words, the width will not be +- considered when determining the scaled image size.)
    pad - the width of each line in each plane of the YUV image will be +- padded to the nearest multiple of this number of bytes (must be a power of +- 2.)
    desiredHeight - desired height (in pixels) of the YUV image. If the ++ the width of the JPEG image. (In other words, the width will not be ++ considered when determining the scaled image size.)
    align - row alignment (in bytes) of the YUV image (must be a power of ++ 2.) Setting this parameter to n will cause each row in each plane of the ++ YUV image to be padded to the nearest multiple of n bytes (1 = unpadded.)
    desiredHeight - desired height (in pixels) of the YUV image. If the + desired image dimensions are different than the dimensions of the JPEG + image being decompressed, then TurboJPEG will use scaling in the JPEG + decompressor to generate the largest possible image that will fit within + the desired dimensions. Setting this to 0 is the same as setting it to +- the height of the JPEG image (in other words, the height will not be ++ the height of the JPEG image. (In other words, the height will not be + considered when determining the scaled image size.)
    flags - the bitwise OR of one or more of + TJ.FLAG_*
    +-
    Returns:
    a YUV planar image.
    ++
    Returns:
    a YUVImage instance containing the unified planar YUV ++ decompressed image
    +
    Throws:
    +
    TJException
    +
  • +@@ -1055,25 +1070,26 @@ public byte[] decompressToYUV(int flags) + int pixelFormat, + int flags) + throws TJException +-
    Decompress the JPEG source image or decode the YUV source image associated +- with this decompressor instance and output a grayscale, RGB, or CMYK image +- to the given destination buffer. ++
    Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and output a packed-pixel ++ grayscale, RGB, or CMYK image to the given destination buffer. +

    +- NOTE: The output image is fully recoverable if this method throws a ++ NOTE: The destination image is fully recoverable if this method throws a + non-fatal TJException (unless + TJ.FLAG_STOPONWARNING is specified.)

    +-
    Parameters:
    dstBuf - buffer that will receive the decompressed/decoded image. +- If the source image is a JPEG image, then this buffer should normally be +- stride * scaledHeight pixels in size, where +- scaledHeight can be determined by calling +- scalingFactor.getScaled(jpegHeight) +- with one of the scaling factors returned from TJ.getScalingFactors() or by calling getScaledHeight(int, int). If the +- source image is a YUV image, then this buffer should normally be +- stride * height pixels in size, where height is +- the height of the YUV image. However, the buffer may also be larger than +- the dimensions of the JPEG image, in which case the x, +- y, and stride parameters can be used to specify +- the region into which the source image should be decompressed.
    x - x offset (in pixels) of the region in the destination image into ++
    Parameters:
    dstBuf - buffer that will receive the packed-pixel ++ decompressed/decoded image. If the source image is a JPEG image, then ++ this buffer should normally be stride * scaledHeight pixels ++ in size, where scaledHeight can be determined by calling ++ scalingFactor.getScaled(jpegHeight) ++ with one of the scaling factors returned from TJ.getScalingFactors() ++ or by calling getScaledHeight(int, int). If the source image is a YUV ++ image, then this buffer should normally be stride * height ++ pixels in size, where height is the height of the YUV image. ++ However, the buffer may also be larger than the dimensions of the JPEG ++ image, in which case the x, y, and ++ stride parameters can be used to specify the region into ++ which the source image should be decompressed.
    x - x offset (in pixels) of the region in the destination image into + which the source image should be decompressed/decoded
    y - y offset (in pixels) of the region in the destination image into + which the source image should be decompressed/decoded
    desiredWidth - If the source image is a JPEG image, then this + specifies the desired width (in pixels) of the decompressed image (or +@@ -1081,16 +1097,16 @@ public byte[] decompressToYUV(int flags) + than the source image dimensions, then TurboJPEG will use scaling in the + JPEG decompressor to generate the largest possible image that will fit + within the desired dimensions. Setting this to 0 is the same as setting +- it to the width of the JPEG image (in other words, the width will not be ++ it to the width of the JPEG image. (In other words, the width will not be + considered when determining the scaled image size.) This parameter is +- ignored if the source image is a YUV image.
    stride - pixels per line of the destination image. Normally, this ++ ignored if the source image is a YUV image.
    stride - pixels per row in the destination image. Normally this + should be set to scaledWidth, but you can use this to, for + instance, decompress the JPEG image into a region of a larger image. + NOTE: if the source image is a JPEG image, then scaledWidth +- can be determined by calling +- scalingFactor.getScaled(jpegWidth) +- or by calling getScaledWidth(int, int). If the source image is a +- YUV image, then scaledWidth is the width of the YUV image. ++ can be determined by calling ++ scalingFactor.getScaled(jpegWidth) ++ or by calling getScaledWidth(int, int). If the source image is a YUV ++ image, then scaledWidth is the width of the YUV image. + Setting this parameter to 0 is the equivalent of setting it to + scaledWidth.
    desiredHeight - If the source image is a JPEG image, then this + specifies the desired height (in pixels) of the decompressed image (or +@@ -1098,8 +1114,8 @@ public byte[] decompressToYUV(int flags) + than the source image dimensions, then TurboJPEG will use scaling in the + JPEG decompressor to generate the largest possible image that will fit + within the desired dimensions. Setting this to 0 is the same as setting +- it to the height of the JPEG image (in other words, the height will not be +- considered when determining the scaled image size.) This parameter is ++ it to the height of the JPEG image. (In other words, the height will not ++ be considered when determining the scaled image size.) This parameter is + ignored if the source image is a YUV image.
    pixelFormat - pixel format of the decompressed image (one of + TJ.PF_*)
    flags - the bitwise OR of one or more of + TJ.FLAG_*
    +@@ -1116,20 +1132,21 @@ public byte[] decompressToYUV(int flags) +
    public void decompress(java.awt.image.BufferedImage dstImage,
    +               int flags)
    +                 throws TJException
    +-
    Decompress the JPEG source image or decode the YUV source image associated +- with this decompressor instance and output a decompressed/decoded image to +- the given BufferedImage instance. ++
    Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and output a packed-pixel ++ decompressed/decoded image to the given BufferedImage ++ instance. +

    +- NOTE: The output image is fully recoverable if this method throws a ++ NOTE: The destination image is fully recoverable if this method throws a + non-fatal TJException (unless + TJ.FLAG_STOPONWARNING is specified.)

    +
    Parameters:
    dstImage - a BufferedImage instance that will receive +- the decompressed/decoded image. If the source image is a JPEG image, then +- the width and height of the BufferedImage instance must match +- one of the scaled image sizes that TurboJPEG is capable of generating from +- the JPEG image. If the source image is a YUV image, then the width and +- height of the BufferedImage instance must match the width and +- height of the YUV image.
    flags - the bitwise OR of one or more of ++ the packed-pixel decompressed/decoded image. If the source image is a ++ JPEG image, then the width and height of the BufferedImage ++ instance must match one of the scaled image sizes that the decompressor is ++ capable of generating from the JPEG image. If the source image is a YUV ++ image, then the width and height of the BufferedImage ++ instance must match the width and height of the YUV image.
    flags - the bitwise OR of one or more of + TJ.FLAG_*
    +
    Throws:
    +
    TJException
    +@@ -1146,9 +1163,10 @@ public byte[] decompressToYUV(int flags) + int bufferedImageType, + int flags) + throws TJException +-
    Decompress the JPEG source image or decode the YUV source image associated +- with this decompressor instance and return a BufferedImage +- instance containing the decompressed/decoded image.
    ++
    Decompress the JPEG source image or decode the planar YUV source image ++ associated with this decompressor instance and return a ++ BufferedImage instance containing the packed-pixel ++ decompressed/decoded image.
    +
    Parameters:
    desiredWidth - see + decompress(byte[], int, int, int, int, int, int, int) for + description
    desiredHeight - see +@@ -1157,7 +1175,7 @@ public byte[] decompressToYUV(int flags) + instance that will be created (for instance, + BufferedImage.TYPE_INT_RGB)
    flags - the bitwise OR of one or more of + TJ.FLAG_*
    +-
    Returns:
    a BufferedImage instance containing the ++
    Returns:
    a BufferedImage instance containing the packed-pixel + decompressed/decoded image.
    +
    Throws:
    +
    TJException
    +diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html b/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html +index 5f22691..e528d79 100644 +--- a/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html ++++ b/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html +@@ -167,7 +167,7 @@ extends java.awt.Rectangle + + int + op +-
    Transform operation (one of OP_*)
    ++
    Transform operation (one of OP_*)
    + + + +@@ -223,7 +223,7 @@ extends java.awt.Rectangle + static int + OPT_COPYNONE +
    This option will prevent TJTransformer.transform() from copying any extra markers (including EXIF +- and ICC profile data) from the source image to the output image.
    ++ and ICC profile data) from the source image to the destination image.
    + + + +@@ -235,8 +235,8 @@ extends java.awt.Rectangle + + static int + OPT_GRAY +-
    This option will discard the color data in the input image and produce +- a grayscale output image.
    ++
    This option will discard the color data in the source image and produce a ++ grayscale destination image.
    + + + +@@ -256,7 +256,7 @@ extends java.awt.Rectangle + + static int + OPT_PROGRESSIVE +-
    This option will enable progressive entropy coding in the output image ++
    This option will enable progressive entropy coding in the JPEG image + generated by this particular transform.
    + + +@@ -270,7 +270,8 @@ extends java.awt.Rectangle + + int + options +-
    Transform options (bitwise OR of one or more of OPT_*)
    ++
    Transform options (bitwise OR of one or more of ++ OPT_*)
    + + + +@@ -509,7 +510,7 @@ extends java.awt.Rectangle + the level of chrominance subsampling used. If the image's width or height + is not evenly divisible by the MCU block size (see TJ.getMCUWidth(int) + and TJ.getMCUHeight(int)), then there will be partial MCU blocks on the +- right and/or bottom edges. It is not possible to move these partial MCU ++ right and/or bottom edges. It is not possible to move these partial MCU + blocks to the top or left of the image, so any transform that would + require that is "imperfect." If this option is not specified, then any + partial MCU blocks that cannot be transformed will be left in place, which +@@ -547,8 +548,8 @@ extends java.awt.Rectangle +
  • +

    OPT_GRAY

    +
    public static final int OPT_GRAY
    +-
    This option will discard the color data in the input image and produce +- a grayscale output image.
    ++
    This option will discard the color data in the source image and produce a ++ grayscale destination image.
    +
    See Also:
    Constant Field Values
    +
  • + +@@ -573,11 +574,10 @@ extends java.awt.Rectangle +
  • +

    OPT_PROGRESSIVE

    +
    public static final int OPT_PROGRESSIVE
    +-
    This option will enable progressive entropy coding in the output image ++
    This option will enable progressive entropy coding in the JPEG image + generated by this particular transform. Progressive entropy coding will + generally improve compression relative to baseline entropy coding (the +- default), but it will reduce compression and decompression performance +- considerably.
    ++ default), but it will reduce decompression performance considerably.
    +
    See Also:
    Constant Field Values
    +
  • + +@@ -589,7 +589,7 @@ extends java.awt.Rectangle +

    OPT_COPYNONE

    +
    public static final int OPT_COPYNONE
    +
    This option will prevent TJTransformer.transform() from copying any extra markers (including EXIF +- and ICC profile data) from the source image to the output image.
    ++ and ICC profile data) from the source image to the destination image.
    +
    See Also:
    Constant Field Values
    + + +@@ -600,7 +600,7 @@ extends java.awt.Rectangle +
  • +

    op

    +
    public int op
    +-
    Transform operation (one of OP_*)
    ++
    Transform operation (one of OP_*)
    +
  • + + +@@ -610,7 +610,8 @@ extends java.awt.Rectangle +
  • +

    options

    +
    public int options
    +-
    Transform options (bitwise OR of one or more of OPT_*)
    ++
    +
  • + + +@@ -661,8 +662,8 @@ extends java.awt.Rectangle + equivalent of setting it to (width of the source JPEG image - + x).
    h - the height of the cropping region. Setting this to 0 is the + equivalent of setting it to (height of the source JPEG image - +- y).
    op - one of the transform operations (OP_*)
    options - the bitwise OR of one or more of the transform options +- (OPT_*)
    cf - an instance of an object that implements the TJCustomFilter interface, or null if no custom filter is needed
    ++ y).
    op - one of the transform operations (OP_*)
    options - the bitwise OR of one or more of the transform options ++ (OPT_*)
    cf - an instance of an object that implements the TJCustomFilter interface, or null if no custom filter is needed
    + + + +@@ -678,8 +679,8 @@ extends java.awt.Rectangle +
    Create a new lossless transform instance with the given parameters.
    +
    Parameters:
    r - a Rectangle instance that specifies the cropping + region. See TJTransform(int, int, int, int, int, int, TJCustomFilter) for more +- detail.
    op - one of the transform operations (OP_*)
    options - the bitwise OR of one or more of the transform options +- (OPT_*)
    cf - an instance of an object that implements the TJCustomFilter interface, or null if no custom filter is needed
    ++ detail.
    op - one of the transform operations (OP_*)
    options - the bitwise OR of one or more of the transform options ++ (OPT_*)
    cf - an instance of an object that implements the TJCustomFilter interface, or null if no custom filter is needed
    + + + +diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html b/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html +index a30fe30..6436b7f 100644 +--- a/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html ++++ b/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html +@@ -148,14 +148,15 @@ extends + TJTransformer(byte[] jpegImage) +
    Create a TurboJPEG lossless transformer instance and associate the JPEG +- image stored in jpegImage with the newly created instance.
    ++ source image stored in jpegImage with the newly created ++ instance. + + + + TJTransformer(byte[] jpegImage, + int imageSize) +
    Create a TurboJPEG lossless transformer instance and associate the JPEG +- image of length imageSize bytes stored in ++ source image of length imageSize bytes stored in + jpegImage with the newly created instance.
    + + +@@ -178,7 +179,7 @@ extends int[] + getTransformedSizes() +
    Returns an array containing the sizes of the transformed JPEG images +- generated by the most recent transform operation.
    ++ (in bytes) generated by the most recent transform operation. + + + +@@ -186,18 +187,18 @@ extends transform(byte[][] dstBufs, + TJTransform[] transforms, + int flags) +-
    Losslessly transform the JPEG image associated with this transformer +- instance into one or more JPEG images stored in the given destination +- buffers.
    ++
    Losslessly transform the JPEG source image associated with this ++ transformer instance into one or more JPEG images stored in the given ++ destination buffers.
    + + + + TJDecompressor[] + transform(TJTransform[] transforms, + int flags) +-
    Losslessly transform the JPEG image associated with this transformer +- instance and return an array of TJDecompressor instances, each of +- which has a transformed JPEG image associated with it.
    ++
    Losslessly transform the JPEG source image associated with this ++ transformer instance and return an array of TJDecompressor ++ instances, each of which has a transformed JPEG image associated with it.
    + + + +@@ -251,9 +252,11 @@ extends public TJTransformer(byte[] jpegImage) + throws TJException +
    Create a TurboJPEG lossless transformer instance and associate the JPEG +- image stored in jpegImage with the newly created instance.
    +-
    Parameters:
    jpegImage - JPEG image buffer (size of the JPEG image is assumed to +- be the length of the array.) This buffer is not modified.
    ++ source image stored in jpegImage with the newly created ++ instance. ++
    Parameters:
    jpegImage - buffer containing the JPEG source image to transform. ++ (The size of the JPEG image is assumed to be the length of the array.) ++ This buffer is not modified.
    +
    Throws:
    +
    TJException
    + +@@ -268,9 +271,10 @@ extends TJException +
    Create a TurboJPEG lossless transformer instance and associate the JPEG +- image of length imageSize bytes stored in ++ source image of length imageSize bytes stored in + jpegImage with the newly created instance.
    +-
    Parameters:
    jpegImage - JPEG image buffer. This buffer is not modified.
    imageSize - size of the JPEG image (in bytes)
    ++
    Parameters:
    jpegImage - buffer containing the JPEG source image to transform. ++ This buffer is not modified.
    imageSize - size of the JPEG source image (in bytes)
    +
    Throws:
    +
    TJException
    + +@@ -293,25 +297,26 @@ extends TJTransform[] transforms, + int flags) + throws TJException +-
    Losslessly transform the JPEG image associated with this transformer +- instance into one or more JPEG images stored in the given destination +- buffers. Lossless transforms work by moving the raw coefficients from one +- JPEG image structure to another without altering the values of the +- coefficients. While this is typically faster than decompressing the +- image, transforming it, and re-compressing it, lossless transforms are not +- free. Each lossless transform requires reading and performing Huffman +- decoding on all of the coefficients in the source image, regardless of the +- size of the destination image. Thus, this method provides a means of +- generating multiple transformed images from the same source or of applying +- multiple transformations simultaneously, in order to eliminate the need to +- read the source coefficients multiple times.
    +-
    Parameters:
    dstBufs - an array of image buffers. dstbufs[i] will +- receive a JPEG image that has been transformed using the parameters in +- transforms[i]. Use TJ.bufSize(int, int, int) to determine the +- maximum size for each buffer based on the transformed or cropped width and +- height and the level of subsampling used in the source image.
    transforms - an array of TJTransform instances, each of ++
    Losslessly transform the JPEG source image associated with this ++ transformer instance into one or more JPEG images stored in the given ++ destination buffers. Lossless transforms work by moving the raw ++ coefficients from one JPEG image structure to another without altering the ++ values of the coefficients. While this is typically faster than ++ decompressing the image, transforming it, and re-compressing it, lossless ++ transforms are not free. Each lossless transform requires reading and ++ performing Huffman decoding on all of the coefficients in the source ++ image, regardless of the size of the destination image. Thus, this method ++ provides a means of generating multiple transformed images from the same ++ source or of applying multiple transformations simultaneously, in order to ++ eliminate the need to read the source coefficients multiple times.
    ++
    Parameters:
    dstBufs - an array of JPEG destination buffers. ++ dstbufs[i] will receive a JPEG image that has been ++ transformed using the parameters in transforms[i]. Use ++ TJ.bufSize(int, int, int) to determine the maximum size for each buffer based on ++ the transformed or cropped width and height and the level of subsampling ++ used in the source image.
    transforms - an array of TJTransform instances, each of + which specifies the transform parameters and/or cropping region for the +- corresponding transformed output image
    flags - the bitwise OR of one or more of ++ corresponding transformed JPEG image
    flags - the bitwise OR of one or more of + TJ.FLAG_*
    +
    Throws:
    +
    TJException
    +@@ -326,12 +331,12 @@ extends public TJDecompressor[] transform(TJTransform[] transforms, + int flags) + throws TJException +-
    Losslessly transform the JPEG image associated with this transformer +- instance and return an array of TJDecompressor instances, each of +- which has a transformed JPEG image associated with it.
    ++
    Losslessly transform the JPEG source image associated with this ++ transformer instance and return an array of TJDecompressor ++ instances, each of which has a transformed JPEG image associated with it.
    +
    Parameters:
    transforms - an array of TJTransform instances, each of + which specifies the transform parameters and/or cropping region for the +- corresponding transformed output image
    flags - the bitwise OR of one or more of ++ corresponding transformed JPEG image
    flags - the bitwise OR of one or more of + TJ.FLAG_*
    +
    Returns:
    an array of TJDecompressor instances, each of + which has a transformed JPEG image associated with it.
    +@@ -347,9 +352,9 @@ extends getTransformedSizes +
    public int[] getTransformedSizes()
    +
    Returns an array containing the sizes of the transformed JPEG images +- generated by the most recent transform operation.
    ++ (in bytes) generated by the most recent transform operation. +
    Returns:
    an array containing the sizes of the transformed JPEG images +- generated by the most recent transform operation.
    ++ (in bytes) generated by the most recent transform operation.
    + + + +diff --git a/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html b/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html +index d4485ed..b08fcb3 100644 +--- a/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html ++++ b/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html +@@ -98,7 +98,7 @@ +
    +
    public class YUVImage
    + extends java.lang.Object
    +-
    This class encapsulates a YUV planar image and the metadata ++
    This class encapsulates a planar YUV image and the metadata + associated with it. The TurboJPEG API allows both the JPEG compression and + decompression pipelines to be split into stages: YUV encode, compress from + YUV, decompress to YUV, and YUV decode. A YUVImage instance +@@ -106,30 +106,32 @@ extends java.lang.Object + operations and as the source image for compress-from-YUV and YUV decode + operations. +

    +- Technically, the JPEG format uses the YCbCr colorspace (which technically is +- not a "colorspace" but rather a "color transform"), but per the convention +- of the digital video community, the TurboJPEG API uses "YUV" to refer to an +- image format consisting of Y, Cb, and Cr image planes. ++ Technically, the JPEG format uses the YCbCr colorspace (which is technically ++ not a colorspace but a color transform), but per the convention of the ++ digital video community, the TurboJPEG API uses "YUV" to refer to an image ++ format consisting of Y, Cb, and Cr image planes. +

    + Each plane is simply a 2D array of bytes, each byte representing the value + of one of the components (Y, Cb, or Cr) at a particular location in the + image. The width and height of each plane are determined by the image + width, height, and level of chrominance subsampling. The luminance plane + width is the image width padded to the nearest multiple of the horizontal +- subsampling factor (2 in the case of 4:2:0 and 4:2:2, 4 in the case of +- 4:1:1, 1 in the case of 4:4:4 or grayscale.) Similarly, the luminance plane +- height is the image height padded to the nearest multiple of the vertical +- subsampling factor (2 in the case of 4:2:0 or 4:4:0, 1 in the case of 4:4:4 +- or grayscale.) The chrominance plane width is equal to the luminance plane +- width divided by the horizontal subsampling factor, and the chrominance +- plane height is equal to the luminance plane height divided by the vertical +- subsampling factor. ++ subsampling factor (1 in the case of 4:4:4, grayscale, or 4:4:0; 2 in the ++ case of 4:2:2 or 4:2:0; 4 in the case of 4:1:1.) Similarly, the luminance ++ plane height is the image height padded to the nearest multiple of the ++ vertical subsampling factor (1 in the case of 4:4:4, 4:2:2, grayscale, or ++ 4:1:1; 2 in the case of 4:2:0 or 4:4:0.) This is irrespective of any ++ additional padding that may be specified as an argument to the various ++ YUVImage methods. The chrominance plane width is equal to the luminance ++ plane width divided by the horizontal subsampling factor, and the ++ chrominance plane height is equal to the luminance plane height divided by ++ the vertical subsampling factor. +

    + For example, if the source image is 35 x 35 pixels and 4:2:2 subsampling is + used, then the luminance plane would be 36 x 35 bytes, and each of the +- chrominance planes would be 18 x 35 bytes. If you specify a line padding of +- 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes, and +- each of the chrominance planes would be 20 x 35 bytes.

    ++ chrominance planes would be 18 x 35 bytes. If you specify a row alignment ++ of 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes, ++ and each of the chrominance planes would be 20 x 35 bytes.
    + + + +@@ -154,15 +156,15 @@ extends java.lang.Object + + + protected int +-
    yuvHeight  ++yuvAlign  + + +-protected int[] +-yuvOffsets  ++protected int ++yuvHeight  + + +-protected int +-yuvPad  ++protected int[] ++yuvOffsets  + + + protected byte[][] +@@ -208,10 +210,10 @@ extends java.lang.Object + + YUVImage(byte[] yuvImage, + int width, +- int pad, ++ int align, + int height, + int subsamp) +-
    Create a new YUVImage instance from an existing unified image ++
    Create a new YUVImage instance from an existing unified + buffer.
    + + +@@ -226,11 +228,11 @@ extends java.lang.Object + + + YUVImage(int width, +- int pad, ++ int align, + int height, + int subsamp) +-
    Create a new YUVImage instance backed by a unified image +- buffer, and allocate memory for the image buffer.
    ++
    Create a new YUVImage instance backed by a unified buffer, ++ and allocate memory for the buffer.
    + + + +@@ -251,8 +253,8 @@ extends java.lang.Object + + byte[] + getBuf() +-
    Returns the YUV image buffer (if this image is stored in a unified +- buffer rather than separate image planes.)
    ++
    Returns the YUV buffer (if this image is stored in a unified buffer rather ++ than separate image planes.)
    + + + +@@ -271,7 +273,7 @@ extends java.lang.Object + + int + getPad() +-
    Returns the line padding used in the YUV image buffer (if this image is ++
    Returns the row alignment (in bytes) of the YUV buffer (if this image is + stored in a unified buffer rather than separate image planes.)
    + + +@@ -284,14 +286,14 @@ extends java.lang.Object + + int + getSize() +-
    Returns the size (in bytes) of the YUV image buffer (if this image is +- stored in a unified buffer rather than separate image planes.)
    ++
    Returns the size (in bytes) of the YUV buffer (if this image is stored in ++ a unified buffer rather than separate image planes.)
    + + + + int[] + getStrides() +-
    Returns the number of bytes per line of each plane in the YUV image.
    ++
    Returns the number of bytes per row of each plane in the YUV image.
    + + + +@@ -321,10 +323,10 @@ extends java.lang.Object + void + setBuf(byte[] yuvImage, + int width, +- int pad, ++ int align, + int height, + int subsamp) +-
    Assign a unified image buffer to this YUVImage instance.
    ++
    Assign a unified buffer to this YUVImage instance.
    + + + +@@ -385,13 +387,13 @@ extends java.lang.Object +
    protected int[] yuvStrides
    + + +- ++ + + +
      +
    • +-

      yuvPad

      +-
      protected int yuvPad
      ++

      yuvAlign

      ++
      protected int yuvAlign
      +
    • +
    + +@@ -442,7 +444,7 @@ extends java.lang.Object +
    Create a new YUVImage instance backed by separate image + planes, and allocate memory for the image planes.
    +
    Parameters:
    width - width (in pixels) of the YUV image
    strides - an array of integers, each specifying the number of bytes +- per line in the corresponding plane of the YUV image. Setting the stride ++ per row in the corresponding plane of the YUV image. Setting the stride + for any plane to 0 is the same as setting it to the plane width (see + above.) If strides is null, then the + strides for all planes will be set to their respective plane widths. When +@@ -458,13 +460,15 @@ extends java.lang.Object +
  • +

    YUVImage

    +
    public YUVImage(int width,
    +-        int pad,
    ++        int align,
    +         int height,
    +         int subsamp)
    +-
    Create a new YUVImage instance backed by a unified image +- buffer, and allocate memory for the image buffer.
    +-
    Parameters:
    width - width (in pixels) of the YUV image
    pad - Each line of each plane in the YUV image buffer will be padded +- to this number of bytes (must be a power of 2.)
    height - height (in pixels) of the YUV image
    subsamp - the level of chrominance subsampling to be used in the YUV ++
    Create a new YUVImage instance backed by a unified buffer, ++ and allocate memory for the buffer.
    ++
    Parameters:
    width - width (in pixels) of the YUV image
    align - row alignment (in bytes) of the YUV image (must be a power of ++ 2.) Setting this parameter to n specifies that each row in each plane of ++ the YUV image will be padded to the nearest multiple of n bytes ++ (1 = unpadded.)
    height - height (in pixels) of the YUV image
    subsamp - the level of chrominance subsampling to be used in the YUV + image (one of TJ.SAMP_*)
    +
  • + +@@ -485,18 +489,18 @@ extends java.lang.Object +
    Parameters:
    planes - an array of buffers representing the Y, U (Cb), and V (Cr) + image planes (or just the Y plane, if the image is grayscale.) These + planes can be contiguous or non-contiguous in memory. Plane +- i should be at least offsets[i] + +- TJ.planeSizeYUV(i, width, strides[i], height, subsamp) ++ i should be at least offsets[i] + ++ TJ.planeSizeYUV(i, width, strides[i], height, subsamp) + bytes in size.
    offsets - If this YUVImage instance represents a + subregion of a larger image, then offsets[i] specifies the + offset (in bytes) of the subregion within plane i of the + larger image. Setting this to null is the same as setting the offsets for + all planes to 0.
    width - width (in pixels) of the new YUV image (or subregion)
    strides - an array of integers, each specifying the number of bytes +- per line in the corresponding plane of the YUV image. Setting the stride ++ per row in the corresponding plane of the YUV image. Setting the stride + for any plane to 0 is the same as setting it to the plane width (see + above.) If strides is null, then the + strides for all planes will be set to their respective plane widths. You +- can adjust the strides in order to add an arbitrary amount of line padding ++ can adjust the strides in order to add an arbitrary amount of row padding + to each plane or to specify that this YUVImage instance is a + subregion of a larger image (in which case, strides[i] should + be set to the plane width of plane i in the larger image.)
    height - height (in pixels) of the new YUV image (or subregion)
    subsamp - the level of chrominance subsampling used in the YUV +@@ -511,18 +515,19 @@ extends java.lang.Object +

    YUVImage

    +
    public YUVImage(byte[] yuvImage,
    +         int width,
    +-        int pad,
    ++        int align,
    +         int height,
    +         int subsamp)
    +-
    Create a new YUVImage instance from an existing unified image ++
    Create a new YUVImage instance from an existing unified + buffer.
    +-
    Parameters:
    yuvImage - image buffer that contains or will contain YUV planar +- image data. Use TJ.bufSizeYUV(int, int, int, int) to determine the minimum size for +- this buffer. The Y, U (Cb), and V (Cr) image planes are stored +- sequentially in the buffer (see above for a description +- of the image format.)
    width - width (in pixels) of the YUV image
    pad - the line padding used in the YUV image buffer. For +- instance, if each line in each plane of the buffer is padded to the +- nearest multiple of 4 bytes, then pad should be set to 4.
    height - height (in pixels) of the YUV image
    subsamp - the level of chrominance subsampling used in the YUV ++
    Parameters:
    yuvImage - buffer that contains or will receive a unified planar YUV ++ image. Use TJ.bufSizeYUV(int, int, int, int) to determine the minimum size for this ++ buffer. The Y, U (Cb), and V (Cr) image planes are stored sequentially in ++ the buffer. (See above for a description of the image ++ format.)
    width - width (in pixels) of the YUV image
    align - row alignment (in bytes) of the YUV image (must be a power of ++ 2.) Setting this parameter to n specifies that each row in each plane of ++ the YUV image will be padded to the nearest multiple of n bytes ++ (1 = unpadded.)
    height - height (in pixels) of the YUV image
    subsamp - the level of chrominance subsampling used in the YUV + image (one of TJ.SAMP_*)
    + + +@@ -550,19 +555,19 @@ extends java.lang.Object +
    Parameters:
    planes - an array of buffers representing the Y, U (Cb), and V (Cr) + image planes (or just the Y plane, if the image is grayscale.) These + planes can be contiguous or non-contiguous in memory. Plane +- i should be at least offsets[i] + +- TJ.planeSizeYUV(i, width, strides[i], height, subsamp) ++ i should be at least offsets[i] + ++ TJ.planeSizeYUV(i, width, strides[i], height, subsamp) + bytes in size.
    offsets - If this YUVImage instance represents a + subregion of a larger image, then offsets[i] specifies the + offset (in bytes) of the subregion within plane i of the + larger image. Setting this to null is the same as setting the offsets for + all planes to 0.
    width - width (in pixels) of the YUV image (or subregion)
    strides - an array of integers, each specifying the number of bytes +- per line in the corresponding plane of the YUV image. Setting the stride ++ per row in the corresponding plane of the YUV image. Setting the stride + for any plane to 0 is the same as setting it to the plane width (see + above.) If strides is null, then the + strides for all planes will be set to their respective plane widths. You +- can adjust the strides in order to add an arbitrary amount of line padding +- to each plane or to specify that this YUVImage image is a ++ can adjust the strides in order to add an arbitrary amount of row padding ++ to each plane or to specify that this YUVImage instance is a + subregion of a larger image (in which case, strides[i] should + be set to the plane width of plane i in the larger image.)
    height - height (in pixels) of the YUV image (or subregion)
    subsamp - the level of chrominance subsampling used in the YUV + image (one of TJ.SAMP_*)
    +@@ -576,17 +581,18 @@ extends java.lang.Object +

    setBuf

    +
    public void setBuf(byte[] yuvImage,
    +           int width,
    +-          int pad,
    ++          int align,
    +           int height,
    +           int subsamp)
    +-
    Assign a unified image buffer to this YUVImage instance.
    +-
    Parameters:
    yuvImage - image buffer that contains or will contain YUV planar +- image data. Use TJ.bufSizeYUV(int, int, int, int) to determine the minimum size for +- this buffer. The Y, U (Cb), and V (Cr) image planes are stored +- sequentially in the buffer (see above for a description +- of the image format.)
    width - width (in pixels) of the YUV image
    pad - the line padding used in the YUV image buffer. For +- instance, if each line in each plane of the buffer is padded to the +- nearest multiple of 4 bytes, then pad should be set to 4.
    height - height (in pixels) of the YUV image
    subsamp - the level of chrominance subsampling used in the YUV ++
    Assign a unified buffer to this YUVImage instance.
    ++
    Parameters:
    yuvImage - buffer that contains or will receive a unified planar YUV ++ image. Use TJ.bufSizeYUV(int, int, int, int) to determine the minimum size for this ++ buffer. The Y, U (Cb), and V (Cr) image planes are stored sequentially in ++ the buffer. (See above for a description of the image ++ format.)
    width - width (in pixels) of the YUV image
    align - row alignment (in bytes) of the YUV image (must be a power of ++ 2.) Setting this parameter to n specifies that each row in each plane of ++ the YUV image will be padded to the nearest multiple of n bytes ++ (1 = unpadded.)
    height - height (in pixels) of the YUV image
    subsamp - the level of chrominance subsampling used in the YUV + image (one of TJ.SAMP_*)
    + + +@@ -619,9 +625,9 @@ extends java.lang.Object +
  • +

    getPad

    +
    public int getPad()
    +-
    Returns the line padding used in the YUV image buffer (if this image is ++
    Returns the row alignment (in bytes) of the YUV buffer (if this image is + stored in a unified buffer rather than separate image planes.)
    +-
    Returns:
    the line padding used in the YUV image buffer
    ++
    Returns:
    the row alignment of the YUV buffer
    +
  • + + +@@ -631,8 +637,8 @@ extends java.lang.Object +
  • +

    getStrides

    +
    public int[] getStrides()
    +-
    Returns the number of bytes per line of each plane in the YUV image.
    +-
    Returns:
    the number of bytes per line of each plane in the YUV image
    ++
    Returns the number of bytes per row of each plane in the YUV image.
    ++
    Returns:
    the number of bytes per row of each plane in the YUV image
    +
  • + +
    +@@ -679,9 +685,9 @@ extends java.lang.Object +
  • +

    getBuf

    +
    public byte[] getBuf()
    +-
    Returns the YUV image buffer (if this image is stored in a unified +- buffer rather than separate image planes.)
    +-
    Returns:
    the YUV image buffer
    ++
    Returns the YUV buffer (if this image is stored in a unified buffer rather ++ than separate image planes.)
    ++
    Returns:
    the YUV buffer
    +
  • + +
    +@@ -691,9 +697,9 @@ extends java.lang.Object +
  • +

    getSize

    +
    public int getSize()
    +-
    Returns the size (in bytes) of the YUV image buffer (if this image is +- stored in a unified buffer rather than separate image planes.)
    +-
    Returns:
    the size (in bytes) of the YUV image buffer
    ++
    Returns the size (in bytes) of the YUV buffer (if this image is stored in ++ a unified buffer rather than separate image planes.)
    ++
    Returns:
    the size (in bytes) of the YUV buffer
    +
  • + + +diff --git a/java/doc/org/libjpegturbo/turbojpeg/package-summary.html b/java/doc/org/libjpegturbo/turbojpeg/package-summary.html +index dedcce5..89dbe05 100644 +--- a/java/doc/org/libjpegturbo/turbojpeg/package-summary.html ++++ b/java/doc/org/libjpegturbo/turbojpeg/package-summary.html +@@ -131,7 +131,7 @@ + +
    YUVImage + +-
    This class encapsulates a YUV planar image and the metadata ++
    This class encapsulates a planar YUV image and the metadata + associated with it.
    + + +diff --git a/java/doc/serialized-form.html b/java/doc/serialized-form.html +index 45bbc86..e123f31 100644 +--- a/java/doc/serialized-form.html ++++ b/java/doc/serialized-form.html +@@ -109,12 +109,13 @@ +
  • +

    op

    +
    int op
    +-
    Transform operation (one of OP_*)
    ++
    Transform operation (one of OP_*)
    +
  • +
  • +

    options

    +
    int options
    +-
    Transform options (bitwise OR of one or more of OPT_*)
    ++
    Transform options (bitwise OR of one or more of ++ OPT_*)
    +
  • +
  • +

    cf

    +diff --git a/java/org/libjpegturbo/turbojpeg/TJ.java b/java/org/libjpegturbo/turbojpeg/TJ.java +index d791e00..3857087 100644 +--- a/java/org/libjpegturbo/turbojpeg/TJ.java ++++ b/java/org/libjpegturbo/turbojpeg/TJ.java +@@ -1,6 +1,6 @@ + /* +- * Copyright (C)2011-2013, 2017-2018, 2020-2021 D. R. Commander. +- * All Rights Reserved. ++ * Copyright (C)2011-2013, 2017-2018, 2020-2021, 2023 D. R. Commander. ++ * All Rights Reserved. + * Copyright (C)2015 Viktor Szathmáry. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -85,7 +85,7 @@ public final class TJ { + * subsampling. + * + * @param subsamp the level of chrominance subsampling (one of +- * SAMP_*) ++ * {@link #SAMP_444 SAMP_*}) + * + * @return the MCU block width for the given level of chrominance + * subsampling. +@@ -105,7 +105,7 @@ public final class TJ { + * subsampling. + * + * @param subsamp the level of chrominance subsampling (one of +- * SAMP_*) ++ * {@link #SAMP_444 SAMP_*}) + * + * @return the MCU block height for the given level of chrominance + * subsampling. +@@ -205,8 +205,8 @@ public final class TJ { + * vice versa, but the mapping is typically not 1:1 or reversible, nor can it + * be defined with a simple formula. Thus, such a conversion is out of scope + * for a codec library. However, the TurboJPEG API allows for compressing +- * CMYK pixels into a YCCK JPEG image (see {@link #CS_YCCK}) and +- * decompressing YCCK JPEG images into CMYK pixels. ++ * packed-pixel CMYK images into YCCK JPEG images (see {@link #CS_YCCK}) and ++ * decompressing YCCK JPEG images into packed-pixel CMYK images. + */ + public static final int PF_CMYK = 11; + +@@ -214,7 +214,7 @@ public final class TJ { + /** + * Returns the pixel size (in bytes) for the given pixel format. + * +- * @param pixelFormat the pixel format (one of PF_*) ++ * @param pixelFormat the pixel format (one of {@link #PF_RGB PF_*}) + * + * @return the pixel size (in bytes) for the given pixel format. + */ +@@ -235,7 +235,7 @@ public final class TJ { + * then the red component will be + * pixel[TJ.getRedOffset(TJ.PF_BGRX)]. + * +- * @param pixelFormat the pixel format (one of PF_*) ++ * @param pixelFormat the pixel format (one of {@link #PF_RGB PF_*}) + * + * @return the red offset for the given pixel format, or -1 if the pixel + * format does not have a red component. +@@ -257,7 +257,7 @@ public final class TJ { + * then the green component will be + * pixel[TJ.getGreenOffset(TJ.PF_BGRX)]. + * +- * @param pixelFormat the pixel format (one of PF_*) ++ * @param pixelFormat the pixel format (one of {@link #PF_RGB PF_*}) + * + * @return the green offset for the given pixel format, or -1 if the pixel + * format does not have a green component. +@@ -279,7 +279,7 @@ public final class TJ { + * then the blue component will be + * pixel[TJ.getBlueOffset(TJ.PF_BGRX)]. + * +- * @param pixelFormat the pixel format (one of PF_*) ++ * @param pixelFormat the pixel format (one of {@link #PF_RGB PF_*}) + * + * @return the blue offset for the given pixel format, or -1 if the pixel + * format does not have a blue component. +@@ -301,7 +301,7 @@ public final class TJ { + * then the alpha component will be + * pixel[TJ.getAlphaOffset(TJ.PF_BGRA)]. + * +- * @param pixelFormat the pixel format (one of PF_*) ++ * @param pixelFormat the pixel format (one of {@link #PF_RGB PF_*}) + * + * @return the alpha offset for the given pixel format, or -1 if the pixel + * format does not have a alpha component. +@@ -324,8 +324,9 @@ public final class TJ { + * RGB colorspace. When compressing the JPEG image, the R, G, and B + * components in the source image are reordered into image planes, but no + * colorspace conversion or subsampling is performed. RGB JPEG images can be +- * decompressed to any of the extended RGB pixel formats or grayscale, but +- * they cannot be decompressed to YUV images. ++ * decompressed to packed-pixel images with any of the extended RGB or ++ * grayscale pixel formats, but they cannot be decompressed to planar YUV ++ * images. + */ + public static final int CS_RGB = 0; + /** +@@ -339,26 +340,28 @@ public final class TJ { + * transformation allowed the same signal to drive both black & white and + * color televisions, but JPEG images use YCbCr primarily because it allows + * the color data to be optionally subsampled for the purposes of reducing +- * bandwidth or disk space. YCbCr is the most common JPEG colorspace, and +- * YCbCr JPEG images can be compressed from and decompressed to any of the +- * extended RGB pixel formats or grayscale, or they can be decompressed to +- * YUV planar images. ++ * network or disk usage. YCbCr is the most common JPEG colorspace, and ++ * YCbCr JPEG images can be compressed from and decompressed to packed-pixel ++ * images with any of the extended RGB or grayscale pixel formats. YCbCr ++ * JPEG images can also be compressed from and decompressed to planar YUV ++ * images. + */ + @SuppressWarnings("checkstyle:ConstantName") + public static final int CS_YCbCr = 1; + /** + * Grayscale colorspace. The JPEG image retains only the luminance data (Y + * component), and any color data from the source image is discarded. +- * Grayscale JPEG images can be compressed from and decompressed to any of +- * the extended RGB pixel formats or grayscale, or they can be decompressed +- * to YUV planar images. ++ * Grayscale JPEG images can be compressed from and decompressed to ++ * packed-pixel images with any of the extended RGB or grayscale pixel ++ * formats, or they can be compressed from and decompressed to planar YUV ++ * images. + */ + public static final int CS_GRAY = 2; + /** + * CMYK colorspace. When compressing the JPEG image, the C, M, Y, and K + * components in the source image are reordered into image planes, but no + * colorspace conversion or subsampling is performed. CMYK JPEG images can +- * only be decompressed to CMYK pixels. ++ * only be decompressed to packed-pixel images with the CMYK pixel format. + */ + public static final int CS_CMYK = 3; + /** +@@ -368,14 +371,14 @@ public final class TJ { + * reversibly transformed into YCCK, and as with YCbCr, the chrominance + * components in the YCCK pixels can be subsampled without incurring major + * perceptual loss. YCCK JPEG images can only be compressed from and +- * decompressed to CMYK pixels. ++ * decompressed to packed-pixel images with the CMYK pixel format. + */ + public static final int CS_YCCK = 4; + + + /** +- * The uncompressed source/destination image is stored in bottom-up (Windows, +- * OpenGL) order, not top-down (X11) order. ++ * Rows in the packed-pixel source/destination image are stored in bottom-up ++ * (Windows, OpenGL) order rather than in top-down (X11) order. + */ + public static final int FLAG_BOTTOMUP = 2; + +@@ -394,41 +397,39 @@ public final class TJ { + + /** + * When decompressing an image that was compressed using chrominance +- * subsampling, use the fastest chrominance upsampling algorithm available in +- * the underlying codec. The default is to use smooth upsampling, which +- * creates a smooth transition between neighboring chrominance components in +- * order to reduce upsampling artifacts in the decompressed image. ++ * subsampling, use the fastest chrominance upsampling algorithm available. ++ * The default is to use smooth upsampling, which creates a smooth transition ++ * between neighboring chrominance components in order to reduce upsampling ++ * artifacts in the decompressed image. + */ + public static final int FLAG_FASTUPSAMPLE = 256; + /** +- * Use the fastest DCT/IDCT algorithm available in the underlying codec. The +- * default if this flag is not specified is implementation-specific. For +- * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast +- * algorithm by default when compressing, because this has been shown to have +- * only a very slight effect on accuracy, but it uses the accurate algorithm +- * when decompressing, because this has been shown to have a larger effect. ++ * Use the fastest DCT/IDCT algorithm available. The default if this flag is ++ * not specified is implementation-specific. For example, the implementation ++ * of the TurboJPEG API in libjpeg-turbo uses the fast algorithm by default ++ * when compressing, because this has been shown to have only a very slight ++ * effect on accuracy, but it uses the accurate algorithm when decompressing, ++ * because this has been shown to have a larger effect. + */ + public static final int FLAG_FASTDCT = 2048; + /** +- * Use the most accurate DCT/IDCT algorithm available in the underlying +- * codec. The default if this flag is not specified is +- * implementation-specific. For example, the implementation of TurboJPEG for +- * libjpeg[-turbo] uses the fast algorithm by default when compressing, +- * because this has been shown to have only a very slight effect on accuracy, +- * but it uses the accurate algorithm when decompressing, because this has +- * been shown to have a larger effect. ++ * Use the most accurate DCT/IDCT algorithm available. The default if this ++ * flag is not specified is implementation-specific. For example, the ++ * implementation of the TurboJPEG API in libjpeg-turbo uses the fast ++ * algorithm by default when compressing, because this has been shown to have ++ * only a very slight effect on accuracy, but it uses the accurate algorithm ++ * when decompressing, because this has been shown to have a larger effect. + */ + public static final int FLAG_ACCURATEDCT = 4096; + /** + * Immediately discontinue the current compression/decompression/transform +- * operation if the underlying codec throws a warning (non-fatal error). The +- * default behavior is to allow the operation to complete unless a fatal +- * error is encountered. ++ * operation if a warning (non-fatal error) occurs. The default behavior is ++ * to allow the operation to complete unless a fatal error is encountered. + *

    + * NOTE: due to the design of the TurboJPEG Java API, only certain methods + * (specifically, {@link TJDecompressor TJDecompressor.decompress*()} methods +- * with a void return type) will complete and leave the output image in a +- * fully recoverable state after a non-fatal error occurs. ++ * with a void return type) will complete and leave the destination image in ++ * a fully recoverable state after a non-fatal error occurs. + */ + public static final int FLAG_STOPONWARNING = 8192; + /** +@@ -455,13 +456,13 @@ public final class TJ { + */ + public static final int NUMERR = 2; + /** +- * The error was non-fatal and recoverable, but the image may still be +- * corrupt. ++ * The error was non-fatal and recoverable, but the destination image may ++ * still be corrupt. + *

    + * NOTE: due to the design of the TurboJPEG Java API, only certain methods + * (specifically, {@link TJDecompressor TJDecompressor.decompress*()} methods +- * with a void return type) will complete and leave the output image in a +- * fully recoverable state after a non-fatal error occurs. ++ * with a void return type) will complete and leave the destination image in ++ * a fully recoverable state after a non-fatal error occurs. + */ + public static final int ERR_WARNING = 0; + /** +@@ -479,7 +480,7 @@ public final class TJ { + * @param height the height (in pixels) of the JPEG image + * + * @param jpegSubsamp the level of chrominance subsampling to be used when +- * generating the JPEG image (one of {@link TJ TJ.SAMP_*}) ++ * generating the JPEG image (one of {@link #SAMP_444 TJ.SAMP_*}) + * + * @return the maximum size of the buffer (in bytes) required to hold a JPEG + * image with the given width, height, and level of chrominance subsampling. +@@ -487,23 +488,27 @@ public final class TJ { + public static native int bufSize(int width, int height, int jpegSubsamp); + + /** +- * Returns the size of the buffer (in bytes) required to hold a YUV planar +- * image with the given width, height, and level of chrominance subsampling. ++ * Returns the size of the buffer (in bytes) required to hold a unified ++ * planar YUV image with the given width, height, and level of chrominance ++ * subsampling. + * + * @param width the width (in pixels) of the YUV image + * +- * @param pad the width of each line in each plane of the image is padded to +- * the nearest multiple of this number of bytes (must be a power of 2.) ++ * @param align row alignment (in bytes) of the YUV image (must be a power of ++ * 2.) Setting this parameter to n specifies that each row in each plane of ++ * the YUV image will be padded to the nearest multiple of n bytes ++ * (1 = unpadded.) + * + * @param height the height (in pixels) of the YUV image + * + * @param subsamp the level of chrominance subsampling used in the YUV +- * image (one of {@link TJ TJ.SAMP_*}) ++ * image (one of {@link #SAMP_444 TJ.SAMP_*}) + * +- * @return the size of the buffer (in bytes) required to hold a YUV planar +- * image with the given width, height, and level of chrominance subsampling. ++ * @return the size of the buffer (in bytes) required to hold a unified ++ * planar YUV image with the given width, height, and level of chrominance ++ * subsampling. + */ +- public static native int bufSizeYUV(int width, int pad, int height, ++ public static native int bufSizeYUV(int width, int align, int height, + int subsamp); + + /** +@@ -523,16 +528,16 @@ public final class TJ { + * @param width width (in pixels) of the YUV image. NOTE: this is the width + * of the whole image, not the plane width. + * +- * @param stride bytes per line in the image plane. ++ * @param stride bytes per row in the image plane. + * + * @param height height (in pixels) of the YUV image. NOTE: this is the + * height of the whole image, not the plane height. + * + * @param subsamp the level of chrominance subsampling used in the YUV +- * image (one of {@link TJ TJ.SAMP_*}) ++ * image (one of {@link #SAMP_444 TJ.SAMP_*}) + * +- * @return the size of the buffer (in bytes) required to hold a YUV planar +- * image with the given parameters. ++ * @return the size of the buffer (in bytes) required to hold a YUV image ++ * plane with the given parameters. + */ + public static native int planeSizeYUV(int componentID, int width, int stride, + int height, int subsamp); +@@ -547,7 +552,7 @@ public final class TJ { + * @param width width (in pixels) of the YUV image + * + * @param subsamp the level of chrominance subsampling used in the YUV image +- * (one of {@link TJ TJ.SAMP_*}) ++ * (one of {@link #SAMP_444 TJ.SAMP_*}) + * + * @return the plane width of a YUV image plane with the given parameters. + */ +@@ -563,7 +568,7 @@ public final class TJ { + * @param height height (in pixels) of the YUV image + * + * @param subsamp the level of chrominance subsampling used in the YUV image +- * (one of {@link TJ TJ.SAMP_*}) ++ * (one of {@link #SAMP_444 TJ.SAMP_*}) + * + * @return the plane height of a YUV image plane with the given parameters. + */ +@@ -571,11 +576,11 @@ public final class TJ { + int subsamp); + + /** +- * Returns a list of fractional scaling factors that the JPEG decompressor in +- * this implementation of TurboJPEG supports. ++ * Returns a list of fractional scaling factors that the JPEG decompressor ++ * supports. + * +- * @return a list of fractional scaling factors that the JPEG decompressor in +- * this implementation of TurboJPEG supports. ++ * @return a list of fractional scaling factors that the JPEG decompressor ++ * supports. + */ + public static native TJScalingFactor[] getScalingFactors(); + +diff --git a/java/org/libjpegturbo/turbojpeg/TJCompressor.java b/java/org/libjpegturbo/turbojpeg/TJCompressor.java +index 6d4830f..d5bbd82 100644 +--- a/java/org/libjpegturbo/turbojpeg/TJCompressor.java ++++ b/java/org/libjpegturbo/turbojpeg/TJCompressor.java +@@ -1,5 +1,6 @@ + /* +- * Copyright (C)2011-2015, 2018, 2020 D. R. Commander. All Rights Reserved. ++ * Copyright (C)2011-2015, 2018, 2020, 2023 D. R. Commander. ++ * All Rights Reserved. + * Copyright (C)2015 Viktor Szathmáry. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -49,7 +50,7 @@ public class TJCompressor implements Closeable { + } + + /** +- * Create a TurboJPEG compressor instance and associate the uncompressed ++ * Create a TurboJPEG compressor instance and associate the packed-pixel + * source image stored in srcImage with the newly created + * instance. + * +@@ -85,7 +86,7 @@ public class TJCompressor implements Closeable { + } + + /** +- * Create a TurboJPEG compressor instance and associate the uncompressed ++ * Create a TurboJPEG compressor instance and associate the packed-pixel + * source image stored in srcImage with the newly created + * instance. + * +@@ -110,11 +111,11 @@ public class TJCompressor implements Closeable { + } + + /** +- * Associate an uncompressed RGB, grayscale, or CMYK source image with this ++ * Associate a packed-pixel RGB, grayscale, or CMYK source image with this + * compressor instance. + * +- * @param srcImage image buffer containing RGB, grayscale, or CMYK pixels to +- * be compressed or encoded. This buffer is not modified. ++ * @param srcImage buffer containing a packed-pixel RGB, grayscale, or CMYK ++ * source image to be compressed or encoded. This buffer is not modified. + * + * @param x x offset (in pixels) of the region in the source image from which + * the JPEG or YUV image should be compressed/encoded +@@ -125,14 +126,16 @@ public class TJCompressor implements Closeable { + * @param width width (in pixels) of the region in the source image from + * which the JPEG or YUV image should be compressed/encoded + * +- * @param pitch bytes per line of the source image. Normally, this should be +- * width * TJ.pixelSize(pixelFormat) if the source image is +- * unpadded, but you can use this parameter to, for instance, specify that +- * the scanlines in the source image are padded to a 4-byte boundary or to +- * compress/encode a JPEG or YUV image from a region of a larger source +- * image. You can also be clever and use this parameter to skip lines, etc. +- * Setting this parameter to 0 is the equivalent of setting it to +- * width * TJ.pixelSize(pixelFormat). ++ * @param pitch bytes per row in the source image. Normally this should be ++ * width * ++ * {@link TJ#getPixelSize TJ.getPixelSize}(pixelFormat), ++ * if the source image is unpadded. However, you can use this parameter to, ++ * for instance, specify that the rows in the source image are padded to the ++ * nearest multiple of 4 bytes or to compress/encode a JPEG or YUV image from ++ * a region of a larger source image. You can also be clever and use this ++ * parameter to skip rows, etc. Setting this parameter to 0 is the ++ * equivalent of setting it to width * ++ * {@link TJ#getPixelSize TJ.getPixelSize}(pixelFormat). + * + * @param height height (in pixels) of the region in the source image from + * which the JPEG or YUV image should be compressed/encoded +@@ -174,11 +177,12 @@ public class TJCompressor implements Closeable { + } + + /** +- * Associate an uncompressed RGB or grayscale source image with this ++ * Associate a packed-pixel RGB or grayscale source image with this + * compressor instance. + * +- * @param srcImage a BufferedImage instance containing RGB or +- * grayscale pixels to be compressed or encoded. This image is not modified. ++ * @param srcImage a BufferedImage instance containing a ++ * packed-pixel RGB or grayscale source image to be compressed or encoded. ++ * This image is not modified. + * + * @param x x offset (in pixels) of the region in the source image from which + * the JPEG or YUV image should be compressed/encoded +@@ -260,11 +264,10 @@ public class TJCompressor implements Closeable { + } + + /** +- * Associate an uncompressed YUV planar source image with this compressor +- * instance. ++ * Associate a planar YUV source image with this compressor instance. + * +- * @param srcImage YUV planar image to be compressed. This image is not +- * modified. ++ * @param srcImage planar YUV source image to be compressed. This image is ++ * not modified. + */ + public void setSourceImage(YUVImage srcImage) throws TJException { + if (handle == 0) init(); +@@ -281,16 +284,16 @@ public class TJCompressor implements Closeable { + * {@link TJ#CS_YCbCr}) or from CMYK to YCCK (see {@link TJ#CS_YCCK}) as part + * of the JPEG compression process, some of the Cb and Cr (chrominance) + * components can be discarded or averaged together to produce a smaller +- * image with little perceptible loss of image clarity (the human eye is more +- * sensitive to small changes in brightness than to small changes in color.) +- * This is called "chrominance subsampling". ++ * image with little perceptible loss of image clarity. (The human eye is ++ * more sensitive to small changes in brightness than to small changes in ++ * color.) This is called "chrominance subsampling". + *

    +- * NOTE: This method has no effect when compressing a JPEG image from a YUV +- * planar source. In that case, the level of chrominance subsampling in +- * the JPEG image is determined by the source. Furthermore, this method has +- * no effect when encoding to a pre-allocated {@link YUVImage} instance. In +- * that case, the level of chrominance subsampling is determined by the +- * destination. ++ * NOTE: This method has no effect when compressing a JPEG image from a ++ * planar YUV source image. In that case, the level of chrominance ++ * subsampling in the JPEG image is determined by the source image. ++ * Furthermore, this method has no effect when encoding to a pre-allocated ++ * {@link YUVImage} instance. In that case, the level of chrominance ++ * subsampling is determined by the destination image. + * + * @param newSubsamp the level of chrominance subsampling to use in + * subsequent compress/encode oeprations (one of +@@ -315,8 +318,9 @@ public class TJCompressor implements Closeable { + } + + /** +- * Compress the uncompressed source image associated with this compressor +- * instance and output a JPEG image to the given destination buffer. ++ * Compress the packed-pixel or planar YUV source image associated with this ++ * compressor instance and output a JPEG image to the given destination ++ * buffer. + * + * @param dstBuf buffer that will receive the JPEG image. Use + * {@link TJ#bufSize} to determine the maximum size for this buffer based on +@@ -366,8 +370,8 @@ public class TJCompressor implements Closeable { + } + + /** +- * Compress the uncompressed source image associated with this compressor +- * instance and return a buffer containing a JPEG image. ++ * Compress the packed-pixel or planar YUV source image associated with this ++ * compressor instance and return a buffer containing a JPEG image. + * + * @param flags the bitwise OR of one or more of + * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*} +@@ -417,14 +421,14 @@ public class TJCompressor implements Closeable { + } + + /** +- * Encode the uncompressed source image associated with this compressor +- * instance into a YUV planar image and store it in the given +- * YUVImage instance. This method uses the accelerated color +- * conversion routines in TurboJPEG's underlying codec but does not execute +- * any of the other steps in the JPEG compression process. Encoding +- * CMYK source images to YUV is not supported. +- * +- * @param dstImage {@link YUVImage} instance that will receive the YUV planar ++ * Encode the packed-pixel source image associated with this compressor ++ * instance into a planar YUV image and store it in the given ++ * {@link YUVImage} instance. This method performs color conversion (which ++ * is accelerated in the libjpeg-turbo implementation) but does not execute ++ * any of the other steps in the JPEG compression process. Encoding CMYK ++ * source images into YUV images is not supported. ++ * ++ * @param dstImage {@link YUVImage} instance that will receive the planar YUV + * image + * + * @param flags the bitwise OR of one or more of +@@ -469,52 +473,54 @@ public class TJCompressor implements Closeable { + } + + /** +- * Encode the uncompressed source image associated with this compressor +- * instance into a unified YUV planar image buffer and return a +- * YUVImage instance containing the encoded image. This method +- * uses the accelerated color conversion routines in TurboJPEG's underlying +- * codec but does not execute any of the other steps in the JPEG compression +- * process. Encoding CMYK source images to YUV is not supported. ++ * Encode the packed-pixel source image associated with this compressor ++ * instance into a unified planar YUV image and return a {@link YUVImage} ++ * instance containing the encoded image. This method performs color ++ * conversion (which is accelerated in the libjpeg-turbo implementation) but ++ * does not execute any of the other steps in the JPEG compression process. ++ * Encoding CMYK source images into YUV images is not supported. + * +- * @param pad the width of each line in each plane of the YUV image will be +- * padded to the nearest multiple of this number of bytes (must be a power of +- * 2.) ++ * @param align row alignment (in bytes) of the YUV image (must be a power of ++ * 2.) Setting this parameter to n will cause each row in each plane of the ++ * YUV image to be padded to the nearest multiple of n bytes (1 = unpadded.) + * + * @param flags the bitwise OR of one or more of + * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*} + * +- * @return a YUV planar image. ++ * @return a {@link YUVImage} instance containing the unified planar YUV ++ * encoded image + */ +- public YUVImage encodeYUV(int pad, int flags) throws TJException { ++ public YUVImage encodeYUV(int align, int flags) throws TJException { + checkSourceImage(); + checkSubsampling(); +- if (pad < 1 || ((pad & (pad - 1)) != 0)) ++ if (align < 1 || ((align & (align - 1)) != 0)) + throw new IllegalStateException("Invalid argument in encodeYUV()"); +- YUVImage dstYUVImage = new YUVImage(srcWidth, pad, srcHeight, subsamp); ++ YUVImage dstYUVImage = new YUVImage(srcWidth, align, srcHeight, subsamp); + encodeYUV(dstYUVImage, flags); + return dstYUVImage; + } + + /** +- * Encode the uncompressed source image associated with this compressor ++ * Encode the packed-pixel source image associated with this compressor + * instance into separate Y, U (Cb), and V (Cr) image planes and return a +- * YUVImage instance containing the encoded image planes. This +- * method uses the accelerated color conversion routines in TurboJPEG's +- * underlying codec but does not execute any of the other steps in the JPEG +- * compression process. Encoding CMYK source images to YUV is not supported. ++ * {@link YUVImage} instance containing the encoded image planes. This ++ * method performs color conversion (which is accelerated in the ++ * libjpeg-turbo implementation) but does not execute any of the other steps ++ * in the JPEG compression process. Encoding CMYK source images into YUV ++ * images is not supported. + * + * @param strides an array of integers, each specifying the number of bytes +- * per line in the corresponding plane of the output image. Setting the +- * stride for any plane to 0 is the same as setting it to the component width +- * of the plane. If strides is null, then the strides for all +- * planes will be set to their respective component widths. You can adjust +- * the strides in order to add an arbitrary amount of line padding to each +- * plane. ++ * per row in the corresponding plane of the YUV source image. Setting the ++ * stride for any plane to 0 is the same as setting it to the plane width ++ * (see {@link YUVImage}.) If strides is null, then the strides ++ * for all planes will be set to their respective plane widths. You can ++ * adjust the strides in order to add an arbitrary amount of row padding to ++ * each plane. + * + * @param flags the bitwise OR of one or more of + * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*} + * +- * @return a YUV planar image. ++ * @return a {@link YUVImage} instance containing the encoded image planes + */ + public YUVImage encodeYUV(int[] strides, int flags) throws TJException { + checkSourceImage(); +@@ -679,6 +685,5 @@ public class TJCompressor implements Closeable { + private int subsamp = -1; + private int jpegQuality = -1; + private int compressedSize = 0; +- private int yuvPad = 4; + private ByteOrder byteOrder = null; + } +diff --git a/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java b/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java +index 9a34587..3a66fd9 100644 +--- a/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java ++++ b/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (C)2011, 2013 D. R. Commander. All Rights Reserved. ++ * Copyright (C)2011, 2013, 2023 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: +@@ -58,7 +58,7 @@ public interface TJCustomFilter { + * component plane to which coeffBuffer belongs + * + * @param componentID ID number of the component plane to which +- * coeffBuffer belongs (Y, Cb, and Cr have, respectively, ID's ++ * coeffBuffer belongs. (Y, Cb, and Cr have, respectively, ID's + * of 0, 1, and 2 in typical JPEG images.) + * + * @param transformID ID number of the transformed image to which +diff --git a/java/org/libjpegturbo/turbojpeg/TJDecompressor.java b/java/org/libjpegturbo/turbojpeg/TJDecompressor.java +index aba390b..e35f80a 100644 +--- a/java/org/libjpegturbo/turbojpeg/TJDecompressor.java ++++ b/java/org/libjpegturbo/turbojpeg/TJDecompressor.java +@@ -1,5 +1,6 @@ + /* +- * Copyright (C)2011-2015, 2018, 2022 D. R. Commander. All Rights Reserved. ++ * Copyright (C)2011-2015, 2018, 2022-2023 D. R. Commander. ++ * All Rights Reserved. + * Copyright (C)2015 Viktor Szathmáry. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -50,10 +51,12 @@ public class TJDecompressor implements Closeable { + + /** + * Create a TurboJPEG decompressor instance and associate the JPEG source +- * image stored in jpegImage with the newly created instance. ++ * image or "abbreviated table specification" (AKA "tables-only") datastream ++ * stored in jpegImage with the newly created instance. + * +- * @param jpegImage JPEG image buffer (size of the JPEG image is assumed to +- * be the length of the array.) This buffer is not modified. ++ * @param jpegImage buffer containing a JPEG source image or tables-only ++ * datastream. (The size of the JPEG image or datastream is assumed to be ++ * the length of the array.) This buffer is not modified. + */ + public TJDecompressor(byte[] jpegImage) throws TJException { + init(); +@@ -62,12 +65,15 @@ public class TJDecompressor implements Closeable { + + /** + * Create a TurboJPEG decompressor instance and associate the JPEG source +- * image of length imageSize bytes stored in +- * jpegImage with the newly created instance. ++ * image or "abbreviated table specification" (AKA "tables-only") datastream ++ * of length imageSize bytes stored in jpegImage ++ * with the newly created instance. + * +- * @param jpegImage JPEG image buffer. This buffer is not modified. ++ * @param jpegImage buffer containing a JPEG source image or tables-only ++ * datastream. This buffer is not modified. + * +- * @param imageSize size of the JPEG image (in bytes) ++ * @param imageSize size of the JPEG source image or tables-only datastream ++ * (in bytes) + */ + public TJDecompressor(byte[] jpegImage, int imageSize) throws TJException { + init(); +@@ -75,11 +81,11 @@ public class TJDecompressor implements Closeable { + } + + /** +- * Create a TurboJPEG decompressor instance and associate the YUV planar ++ * Create a TurboJPEG decompressor instance and associate the planar YUV + * source image stored in yuvImage with the newly created + * instance. + * +- * @param yuvImage {@link YUVImage} instance containing a YUV planar ++ * @param yuvImage {@link YUVImage} instance containing a planar YUV source + * image to be decoded. This image is not modified. + */ + @SuppressWarnings("checkstyle:HiddenField") +@@ -93,18 +99,18 @@ public class TJDecompressor implements Closeable { + * "tables-only") datastream of length imageSize bytes stored in + * jpegImage with this decompressor instance. If + * jpegImage contains a JPEG image, then this image will be used +- * as the source image for subsequent decompress operations. Passing a ++ * as the source image for subsequent decompression operations. Passing a + * tables-only datastream to this method primes the decompressor with + * quantization and Huffman tables that can be used when decompressing + * subsequent "abbreviated image" datastreams. This is useful, for instance, + * when decompressing video streams in which all frames share the same + * quantization and Huffman tables. + * +- * @param jpegImage buffer containing a JPEG image or an "abbreviated table +- * specification" (AKA "tables-only") datastream. This buffer is not +- * modified. ++ * @param jpegImage buffer containing a JPEG source image or tables-only ++ * datastream. This buffer is not modified. + * +- * @param imageSize size of the JPEG image (in bytes) ++ * @param imageSize size of the JPEG source image or tables-only datastream ++ * (in bytes) + */ + public void setSourceImage(byte[] jpegImage, int imageSize) + throws TJException { +@@ -127,12 +133,12 @@ public class TJDecompressor implements Closeable { + } + + /** +- * Associate the specified YUV planar source image with this decompressor +- * instance. Subsequent decompress operations will decode this image into an +- * RGB or grayscale destination image. ++ * Associate the specified planar YUV source image with this decompressor ++ * instance. Subsequent decompression operations will decode this image into ++ * a packed-pixel RGB or grayscale destination image. + * +- * @param srcImage {@link YUVImage} instance containing a YUV planar image to +- * be decoded. This image is not modified. ++ * @param srcImage {@link YUVImage} instance containing a planar YUV source ++ * image to be decoded. This image is not modified. + */ + public void setSourceImage(YUVImage srcImage) { + if (srcImage == null) +@@ -210,9 +216,9 @@ public class TJDecompressor implements Closeable { + } + + /** +- * Returns the JPEG image buffer associated with this decompressor instance. ++ * Returns the JPEG buffer associated with this decompressor instance. + * +- * @return the JPEG image buffer associated with this decompressor instance. ++ * @return the JPEG buffer associated with this decompressor instance. + */ + public byte[] getJPEGBuf() { + if (jpegBuf == null) +@@ -239,14 +245,14 @@ public class TJDecompressor implements Closeable { + * height. + * + * @param desiredWidth desired width (in pixels) of the decompressed image. +- * Setting this to 0 is the same as setting it to the width of the JPEG image +- * (in other words, the width will not be considered when determining the +- * scaled image size.) ++ * Setting this to 0 is the same as setting it to the width of the JPEG ++ * image. (In other words, the width will not be considered when determining ++ * the scaled image size.) + * + * @param desiredHeight desired height (in pixels) of the decompressed image. + * Setting this to 0 is the same as setting it to the height of the JPEG +- * image (in other words, the height will not be considered when determining +- * the scaled image size.) ++ * image. (In other words, the height will not be considered when ++ * determining the scaled image size.) + * + * @return the width of the largest scaled-down image that the TurboJPEG + * decompressor can generate without exceeding the desired image width and +@@ -280,14 +286,14 @@ public class TJDecompressor implements Closeable { + * height. + * + * @param desiredWidth desired width (in pixels) of the decompressed image. +- * Setting this to 0 is the same as setting it to the width of the JPEG image +- * (in other words, the width will not be considered when determining the +- * scaled image size.) ++ * Setting this to 0 is the same as setting it to the width of the JPEG ++ * image. (In other words, the width will not be considered when determining ++ * the scaled image size.) + * + * @param desiredHeight desired height (in pixels) of the decompressed image. + * Setting this to 0 is the same as setting it to the height of the JPEG +- * image (in other words, the height will not be considered when determining +- * the scaled image size.) ++ * image. (In other words, the height will not be considered when ++ * determining the scaled image size.) + * + * @return the height of the largest scaled-down image that the TurboJPEG + * decompressor can generate without exceeding the desired image width and +@@ -316,27 +322,27 @@ public class TJDecompressor implements Closeable { + } + + /** +- * Decompress the JPEG source image or decode the YUV source image associated +- * with this decompressor instance and output a grayscale, RGB, or CMYK image +- * to the given destination buffer. ++ * Decompress the JPEG source image or decode the planar YUV source image ++ * associated with this decompressor instance and output a packed-pixel ++ * grayscale, RGB, or CMYK image to the given destination buffer. + *

    +- * NOTE: The output image is fully recoverable if this method throws a ++ * NOTE: The destination image is fully recoverable if this method throws a + * non-fatal {@link TJException} (unless + * {@link TJ#FLAG_STOPONWARNING TJ.FLAG_STOPONWARNING} is specified.) + * +- * @param dstBuf buffer that will receive the decompressed/decoded image. +- * If the source image is a JPEG image, then this buffer should normally be +- * pitch * scaledHeight bytes in size, where +- * scaledHeight can be determined by calling +- * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegHeight) +- * with one of the scaling factors returned from {@link +- * TJ#getScalingFactors} or by calling {@link #getScaledHeight}. If the +- * source image is a YUV image, then this buffer should normally be +- * pitch * height bytes in size, where height is +- * the height of the YUV image. However, the buffer may also be larger than +- * the dimensions of the source image, in which case the x, +- * y, and pitch parameters can be used to specify +- * the region into which the source image should be decompressed/decoded. ++ * @param dstBuf buffer that will receive the packed-pixel ++ * decompressed/decoded image. If the source image is a JPEG image, then ++ * this buffer should normally be pitch * scaledHeight bytes in ++ * size, where scaledHeight can be determined by calling ++ * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegHeight) ++ * with one of the scaling factors returned from {@link TJ#getScalingFactors} ++ * or by calling {@link #getScaledHeight}. If the source image is a YUV ++ * image, then this buffer should normally be pitch * height ++ * bytes in size, where height is the height of the YUV image. ++ * However, the buffer may also be larger than the dimensions of the source ++ * image, in which case the x, y, and ++ * pitch parameters can be used to specify the region into which ++ * the source image should be decompressed/decoded. + * + * @param x x offset (in pixels) of the region in the destination image into + * which the source image should be decompressed/decoded +@@ -350,22 +356,24 @@ public class TJDecompressor implements Closeable { + * than the source image dimensions, then TurboJPEG will use scaling in the + * JPEG decompressor to generate the largest possible image that will fit + * within the desired dimensions. Setting this to 0 is the same as setting +- * it to the width of the JPEG image (in other words, the width will not be ++ * it to the width of the JPEG image. (In other words, the width will not be + * considered when determining the scaled image size.) This parameter is + * ignored if the source image is a YUV image. + * +- * @param pitch bytes per line of the destination image. Normally, this +- * should be set to scaledWidth * TJ.pixelSize(pixelFormat) if +- * the destination image is unpadded, but you can use this to, for instance, +- * pad each line of the destination image to a 4-byte boundary or to +- * decompress/decode the source image into a region of a larger image. NOTE: +- * if the source image is a JPEG image, then scaledWidth can be +- * determined by calling +- * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegWidth) +- * or by calling {@link #getScaledWidth}. If the source image is a +- * YUV image, then scaledWidth is the width of the YUV image. ++ * @param pitch bytes per row in the destination image. Normally this should ++ * be set to scaledWidth * ++ * {@link TJ#getPixelSize TJ.getPixelSize}(pixelFormat), ++ * if the destination image will be unpadded. However, you can use this to, ++ * for instance, pad each row of the destination image to the nearest ++ * multiple of 4 bytes or to decompress/decode the source image into a region ++ * of a larger image. NOTE: if the source image is a JPEG image, then ++ * scaledWidth can be determined by calling ++ * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegWidth) ++ * or by calling {@link #getScaledWidth}. If the source image is a YUV ++ * image, then scaledWidth is the width of the YUV image. + * Setting this parameter to 0 is the equivalent of setting it to +- * scaledWidth * TJ.pixelSize(pixelFormat). ++ * scaledWidth * ++ * {@link TJ#getPixelSize TJ.getPixelSize}(pixelFormat). + * + * @param desiredHeight If the source image is a JPEG image, then this + * specifies the desired height (in pixels) of the decompressed image (or +@@ -373,8 +381,8 @@ public class TJDecompressor implements Closeable { + * than the source image dimensions, then TurboJPEG will use scaling in the + * JPEG decompressor to generate the largest possible image that will fit + * within the desired dimensions. Setting this to 0 is the same as setting +- * it to the height of the JPEG image (in other words, the height will not be +- * considered when determining the scaled image size.) This parameter is ++ * it to the height of the JPEG image. (In other words, the height will not ++ * be considered when determining the scaled image size.) This parameter is + * ignored if the source image is a YUV image. + * + * @param pixelFormat pixel format of the decompressed/decoded image (one of +@@ -387,7 +395,7 @@ public class TJDecompressor implements Closeable { + int pitch, int desiredHeight, int pixelFormat, + int flags) throws TJException { + if (jpegBuf == null && yuvImage == null) +- throw new IllegalStateException(NO_ASSOC_ERROR); ++ throw new IllegalStateException("No source image is associated with this instance"); + if (dstBuf == null || x < 0 || y < 0 || pitch < 0 || + (yuvImage != null && (desiredWidth < 0 || desiredHeight < 0)) || + pixelFormat < 0 || pixelFormat >= TJ.NUMPF || flags < 0) +@@ -421,8 +429,9 @@ public class TJDecompressor implements Closeable { + } + + /** +- * Decompress the JPEG source image associated with this decompressor +- * instance and return a buffer containing the decompressed image. ++ * Decompress the JPEG source image or decode the planar YUV source image ++ * associated with this decompressor instance and return a buffer containing ++ * the packed-pixel decompressed image. + * + * @param desiredWidth see + * {@link #decompress(byte[], int, int, int, int, int, int, int)} +@@ -442,7 +451,7 @@ public class TJDecompressor implements Closeable { + * @param flags the bitwise OR of one or more of + * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*} + * +- * @return a buffer containing the decompressed image. ++ * @return a buffer containing the packed-pixel decompressed image. + */ + public byte[] decompress(int desiredWidth, int pitch, int desiredHeight, + int pixelFormat, int flags) throws TJException { +@@ -462,22 +471,22 @@ public class TJDecompressor implements Closeable { + + /** + * Decompress the JPEG source image associated with this decompressor +- * instance into a YUV planar image and store it in the given +- * YUVImage instance. This method performs JPEG decompression +- * but leaves out the color conversion step, so a planar YUV image is +- * generated instead of an RGB or grayscale image. This method cannot be +- * used to decompress JPEG source images with the CMYK or YCCK colorspace. ++ * instance into a planar YUV image and store it in the given ++ * {@link YUVImage} instance. This method performs JPEG decompression but ++ * leaves out the color conversion step, so a planar YUV image is generated ++ * instead of a packed-pixel image. This method cannot be used to decompress ++ * JPEG source images with the CMYK or YCCK colorspace. + *

    +- * NOTE: The YUV planar output image is fully recoverable if this method ++ * NOTE: The planar YUV destination image is fully recoverable if this method + * throws a non-fatal {@link TJException} (unless + * {@link TJ#FLAG_STOPONWARNING TJ.FLAG_STOPONWARNING} is specified.) + * +- * @param dstImage {@link YUVImage} instance that will receive the YUV planar +- * image. The level of subsampling specified in this YUVImage +- * instance must match that of the JPEG image, and the width and height +- * specified in the YUVImage instance must match one of the +- * scaled image sizes that TurboJPEG is capable of generating from the JPEG +- * source image. ++ * @param dstImage {@link YUVImage} instance that will receive the planar YUV ++ * decompressed image. The level of subsampling specified in this ++ * {@link YUVImage} instance must match that of the JPEG image, and the width ++ * and height specified in the {@link YUVImage} instance must match one of ++ * the scaled image sizes that the decompressor is capable of generating from ++ * the JPEG source image. + * + * @param flags the bitwise OR of one or more of + * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*} +@@ -494,7 +503,7 @@ public class TJDecompressor implements Closeable { + dstImage.getHeight()); + if (scaledWidth != dstImage.getWidth() || + scaledHeight != dstImage.getHeight()) +- throw new IllegalArgumentException("YUVImage dimensions do not match one of the scaled image sizes that TurboJPEG is capable of generating."); ++ throw new IllegalArgumentException("YUVImage dimensions do not match one of the scaled image sizes that the decompressor is capable of generating."); + if (jpegSubsamp != dstImage.getSubsamp()) + throw new IllegalArgumentException("YUVImage subsampling level does not match that of the JPEG image"); + +@@ -517,40 +526,41 @@ public class TJDecompressor implements Closeable { + /** + * Decompress the JPEG source image associated with this decompressor + * instance into a set of Y, U (Cb), and V (Cr) image planes and return a +- * YUVImage instance containing the decompressed image planes. +- * This method performs JPEG decompression but leaves out the color +- * conversion step, so a planar YUV image is generated instead of an RGB or +- * grayscale image. This method cannot be used to decompress JPEG source +- * images with the CMYK or YCCK colorspace. ++ * {@link YUVImage} instance containing the decompressed image planes. This ++ * method performs JPEG decompression but leaves out the color conversion ++ * step, so a planar YUV image is generated instead of a packed-pixel image. ++ * This method cannot be used to decompress JPEG source images with the CMYK ++ * or YCCK colorspace. + * + * @param desiredWidth desired width (in pixels) of the YUV image. If the + * desired image dimensions are different than the dimensions of the JPEG + * image being decompressed, then TurboJPEG will use scaling in the JPEG + * decompressor to generate the largest possible image that will fit within + * the desired dimensions. Setting this to 0 is the same as setting it to +- * the width of the JPEG image (in other words, the width will not be ++ * the width of the JPEG image. (In other words, the width will not be + * considered when determining the scaled image size.) + * + * @param strides an array of integers, each specifying the number of bytes +- * per line in the corresponding plane of the output image. Setting the +- * stride for any plane to 0 is the same as setting it to the scaled +- * component width of the plane. If strides is NULL, then the +- * strides for all planes will be set to their respective scaled component +- * widths. You can adjust the strides in order to add an arbitrary amount of +- * line padding to each plane. ++ * per row in the corresponding plane of the YUV image. Setting the stride ++ * for any plane to 0 is the same as setting it to the scaled plane width ++ * (see {@link YUVImage}.) If strides is null, then the strides ++ * for all planes will be set to their respective scaled plane widths. You ++ * can adjust the strides in order to add an arbitrary amount of row padding ++ * to each plane. + * + * @param desiredHeight desired height (in pixels) of the YUV image. If the + * desired image dimensions are different than the dimensions of the JPEG + * image being decompressed, then TurboJPEG will use scaling in the JPEG + * decompressor to generate the largest possible image that will fit within + * the desired dimensions. Setting this to 0 is the same as setting it to +- * the height of the JPEG image (in other words, the height will not be ++ * the height of the JPEG image. (In other words, the height will not be + * considered when determining the scaled image size.) + * + * @param flags the bitwise OR of one or more of + * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*} + * +- * @return a YUV planar image. ++ * @return a {@link YUVImage} instance containing the decompressed image ++ * planes + */ + public YUVImage decompressToYUV(int desiredWidth, int[] strides, + int desiredHeight, +@@ -574,40 +584,41 @@ public class TJDecompressor implements Closeable { + + /** + * Decompress the JPEG source image associated with this decompressor +- * instance into a unified YUV planar image buffer and return a +- * YUVImage instance containing the decompressed image. This +- * method performs JPEG decompression but leaves out the color conversion +- * step, so a planar YUV image is generated instead of an RGB or grayscale +- * image. This method cannot be used to decompress JPEG source images with +- * the CMYK or YCCK colorspace. ++ * instance into a unified planar YUV image and return a {@link YUVImage} ++ * instance containing the decompressed image. This method performs JPEG ++ * decompression but leaves out the color conversion step, so a planar YUV ++ * image is generated instead of a packed-pixel image. This method cannot be ++ * used to decompress JPEG source images with the CMYK or YCCK colorspace. + * + * @param desiredWidth desired width (in pixels) of the YUV image. If the + * desired image dimensions are different than the dimensions of the JPEG + * image being decompressed, then TurboJPEG will use scaling in the JPEG + * decompressor to generate the largest possible image that will fit within + * the desired dimensions. Setting this to 0 is the same as setting it to +- * the width of the JPEG image (in other words, the width will not be ++ * the width of the JPEG image. (In other words, the width will not be + * considered when determining the scaled image size.) + * +- * @param pad the width of each line in each plane of the YUV image will be +- * padded to the nearest multiple of this number of bytes (must be a power of +- * 2.) ++ * @param align row alignment (in bytes) of the YUV image (must be a power of ++ * 2.) Setting this parameter to n will cause each row in each plane of the ++ * YUV image to be padded to the nearest multiple of n bytes (1 = unpadded.) + * + * @param desiredHeight desired height (in pixels) of the YUV image. If the + * desired image dimensions are different than the dimensions of the JPEG + * image being decompressed, then TurboJPEG will use scaling in the JPEG + * decompressor to generate the largest possible image that will fit within + * the desired dimensions. Setting this to 0 is the same as setting it to +- * the height of the JPEG image (in other words, the height will not be ++ * the height of the JPEG image. (In other words, the height will not be + * considered when determining the scaled image size.) + * + * @param flags the bitwise OR of one or more of + * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*} + * +- * @return a YUV planar image. ++ * @return a {@link YUVImage} instance containing the unified planar YUV ++ * decompressed image + */ +- public YUVImage decompressToYUV(int desiredWidth, int pad, int desiredHeight, +- int flags) throws TJException { ++ public YUVImage decompressToYUV(int desiredWidth, int align, ++ int desiredHeight, int flags) ++ throws TJException { + if (flags < 0) + throw new IllegalArgumentException("Invalid argument in decompressToYUV()"); + if (jpegWidth < 1 || jpegHeight < 1 || jpegSubsamp < 0) +@@ -619,7 +630,7 @@ public class TJDecompressor implements Closeable { + + int scaledWidth = getScaledWidth(desiredWidth, desiredHeight); + int scaledHeight = getScaledHeight(desiredWidth, desiredHeight); +- YUVImage dstYUVImage = new YUVImage(scaledWidth, pad, scaledHeight, ++ YUVImage dstYUVImage = new YUVImage(scaledWidth, align, scaledHeight, + jpegSubsamp); + decompressToYUV(dstYUVImage, flags); + return dstYUVImage; +@@ -637,27 +648,27 @@ public class TJDecompressor implements Closeable { + } + + /** +- * Decompress the JPEG source image or decode the YUV source image associated +- * with this decompressor instance and output a grayscale, RGB, or CMYK image +- * to the given destination buffer. ++ * Decompress the JPEG source image or decode the planar YUV source image ++ * associated with this decompressor instance and output a packed-pixel ++ * grayscale, RGB, or CMYK image to the given destination buffer. + *

    +- * NOTE: The output image is fully recoverable if this method throws a ++ * NOTE: The destination image is fully recoverable if this method throws a + * non-fatal {@link TJException} (unless + * {@link TJ#FLAG_STOPONWARNING TJ.FLAG_STOPONWARNING} is specified.) + * +- * @param dstBuf buffer that will receive the decompressed/decoded image. +- * If the source image is a JPEG image, then this buffer should normally be +- * stride * scaledHeight pixels in size, where +- * scaledHeight can be determined by calling +- * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegHeight) +- * with one of the scaling factors returned from {@link +- * TJ#getScalingFactors} or by calling {@link #getScaledHeight}. If the +- * source image is a YUV image, then this buffer should normally be +- * stride * height pixels in size, where height is +- * the height of the YUV image. However, the buffer may also be larger than +- * the dimensions of the JPEG image, in which case the x, +- * y, and stride parameters can be used to specify +- * the region into which the source image should be decompressed. ++ * @param dstBuf buffer that will receive the packed-pixel ++ * decompressed/decoded image. If the source image is a JPEG image, then ++ * this buffer should normally be stride * scaledHeight pixels ++ * in size, where scaledHeight can be determined by calling ++ * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegHeight) ++ * with one of the scaling factors returned from {@link TJ#getScalingFactors} ++ * or by calling {@link #getScaledHeight}. If the source image is a YUV ++ * image, then this buffer should normally be stride * height ++ * pixels in size, where height is the height of the YUV image. ++ * However, the buffer may also be larger than the dimensions of the JPEG ++ * image, in which case the x, y, and ++ * stride parameters can be used to specify the region into ++ * which the source image should be decompressed. + * + * @param x x offset (in pixels) of the region in the destination image into + * which the source image should be decompressed/decoded +@@ -671,18 +682,18 @@ public class TJDecompressor implements Closeable { + * than the source image dimensions, then TurboJPEG will use scaling in the + * JPEG decompressor to generate the largest possible image that will fit + * within the desired dimensions. Setting this to 0 is the same as setting +- * it to the width of the JPEG image (in other words, the width will not be ++ * it to the width of the JPEG image. (In other words, the width will not be + * considered when determining the scaled image size.) This parameter is + * ignored if the source image is a YUV image. + * +- * @param stride pixels per line of the destination image. Normally, this ++ * @param stride pixels per row in the destination image. Normally this + * should be set to scaledWidth, but you can use this to, for + * instance, decompress the JPEG image into a region of a larger image. + * NOTE: if the source image is a JPEG image, then scaledWidth +- * can be determined by calling +- * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegWidth) +- * or by calling {@link #getScaledWidth}. If the source image is a +- * YUV image, then scaledWidth is the width of the YUV image. ++ * can be determined by calling ++ * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegWidth) ++ * or by calling {@link #getScaledWidth}. If the source image is a YUV ++ * image, then scaledWidth is the width of the YUV image. + * Setting this parameter to 0 is the equivalent of setting it to + * scaledWidth. + * +@@ -692,8 +703,8 @@ public class TJDecompressor implements Closeable { + * than the source image dimensions, then TurboJPEG will use scaling in the + * JPEG decompressor to generate the largest possible image that will fit + * within the desired dimensions. Setting this to 0 is the same as setting +- * it to the height of the JPEG image (in other words, the height will not be +- * considered when determining the scaled image size.) This parameter is ++ * it to the height of the JPEG image. (In other words, the height will not ++ * be considered when determining the scaled image size.) This parameter is + * ignored if the source image is a YUV image. + * + * @param pixelFormat pixel format of the decompressed image (one of +@@ -706,7 +717,7 @@ public class TJDecompressor implements Closeable { + int stride, int desiredHeight, int pixelFormat, + int flags) throws TJException { + if (jpegBuf == null && yuvImage == null) +- throw new IllegalStateException(NO_ASSOC_ERROR); ++ throw new IllegalStateException("No source image is associated with this instance"); + if (dstBuf == null || x < 0 || y < 0 || stride < 0 || + (yuvImage != null && (desiredWidth < 0 || desiredHeight < 0)) || + pixelFormat < 0 || pixelFormat >= TJ.NUMPF || flags < 0) +@@ -722,21 +733,22 @@ public class TJDecompressor implements Closeable { + } + + /** +- * Decompress the JPEG source image or decode the YUV source image associated +- * with this decompressor instance and output a decompressed/decoded image to +- * the given BufferedImage instance. ++ * Decompress the JPEG source image or decode the planar YUV source image ++ * associated with this decompressor instance and output a packed-pixel ++ * decompressed/decoded image to the given BufferedImage ++ * instance. + *

    +- * NOTE: The output image is fully recoverable if this method throws a ++ * NOTE: The destination image is fully recoverable if this method throws a + * non-fatal {@link TJException} (unless + * {@link TJ#FLAG_STOPONWARNING TJ.FLAG_STOPONWARNING} is specified.) + * + * @param dstImage a BufferedImage instance that will receive +- * the decompressed/decoded image. If the source image is a JPEG image, then +- * the width and height of the BufferedImage instance must match +- * one of the scaled image sizes that TurboJPEG is capable of generating from +- * the JPEG image. If the source image is a YUV image, then the width and +- * height of the BufferedImage instance must match the width and +- * height of the YUV image. ++ * the packed-pixel decompressed/decoded image. If the source image is a ++ * JPEG image, then the width and height of the BufferedImage ++ * instance must match one of the scaled image sizes that the decompressor is ++ * capable of generating from the JPEG image. If the source image is a YUV ++ * image, then the width and height of the BufferedImage ++ * instance must match the width and height of the YUV image. + * + * @param flags the bitwise OR of one or more of + * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*} +@@ -759,7 +771,7 @@ public class TJDecompressor implements Closeable { + scaledWidth = getScaledWidth(desiredWidth, desiredHeight); + scaledHeight = getScaledHeight(desiredWidth, desiredHeight); + if (scaledWidth != desiredWidth || scaledHeight != desiredHeight) +- throw new IllegalArgumentException("BufferedImage dimensions do not match one of the scaled image sizes that TurboJPEG is capable of generating."); ++ throw new IllegalArgumentException("BufferedImage dimensions do not match one of the scaled image sizes that the decompressor is capable of generating."); + } + int pixelFormat; boolean intPixels = false; + if (byteOrder == null) +@@ -827,9 +839,10 @@ public class TJDecompressor implements Closeable { + } + + /** +- * Decompress the JPEG source image or decode the YUV source image associated +- * with this decompressor instance and return a BufferedImage +- * instance containing the decompressed/decoded image. ++ * Decompress the JPEG source image or decode the planar YUV source image ++ * associated with this decompressor instance and return a ++ * BufferedImage instance containing the packed-pixel ++ * decompressed/decoded image. + * + * @param desiredWidth see + * {@link #decompress(byte[], int, int, int, int, int, int, int)} for +@@ -846,7 +859,7 @@ public class TJDecompressor implements Closeable { + * @param flags the bitwise OR of one or more of + * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*} + * +- * @return a BufferedImage instance containing the ++ * @return a BufferedImage instance containing the packed-pixel + * decompressed/decoded image. + */ + public BufferedImage decompress(int desiredWidth, int desiredHeight, +diff --git a/java/org/libjpegturbo/turbojpeg/TJTransform.java b/java/org/libjpegturbo/turbojpeg/TJTransform.java +index 41c4b45..91bcc6b 100644 +--- a/java/org/libjpegturbo/turbojpeg/TJTransform.java ++++ b/java/org/libjpegturbo/turbojpeg/TJTransform.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (C)2011, 2013, 2018 D. R. Commander. All Rights Reserved. ++ * Copyright (C)2011, 2013, 2018, 2023 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: +@@ -97,7 +97,7 @@ public class TJTransform extends Rectangle { + * the level of chrominance subsampling used. If the image's width or height + * is not evenly divisible by the MCU block size (see {@link TJ#getMCUWidth} + * and {@link TJ#getMCUHeight}), then there will be partial MCU blocks on the +- * right and/or bottom edges. It is not possible to move these partial MCU ++ * right and/or bottom edges. It is not possible to move these partial MCU + * blocks to the top or left of the image, so any transform that would + * require that is "imperfect." If this option is not specified, then any + * partial MCU blocks that cannot be transformed will be left in place, which +@@ -114,8 +114,8 @@ public class TJTransform extends Rectangle { + */ + public static final int OPT_CROP = 4; + /** +- * This option will discard the color data in the input image and produce +- * a grayscale output image. ++ * This option will discard the color data in the source image and produce a ++ * grayscale destination image. + */ + public static final int OPT_GRAY = 8; + /** +@@ -127,17 +127,16 @@ public class TJTransform extends Rectangle { + */ + public static final int OPT_NOOUTPUT = 16; + /** +- * This option will enable progressive entropy coding in the output image ++ * This option will enable progressive entropy coding in the JPEG image + * generated by this particular transform. Progressive entropy coding will + * generally improve compression relative to baseline entropy coding (the +- * default), but it will reduce compression and decompression performance +- * considerably. ++ * default), but it will reduce decompression performance considerably. + */ + public static final int OPT_PROGRESSIVE = 32; + /** + * This option will prevent {@link TJTransformer#transform + * TJTransformer.transform()} from copying any extra markers (including EXIF +- * and ICC profile data) from the source image to the output image. ++ * and ICC profile data) from the source image to the destination image. + */ + public static final int OPT_COPYNONE = 64; + +@@ -165,10 +164,10 @@ public class TJTransform extends Rectangle { + * equivalent of setting it to (height of the source JPEG image - + * y). + * +- * @param op one of the transform operations (OP_*) ++ * @param op one of the transform operations ({@link #OP_NONE OP_*}) + * + * @param options the bitwise OR of one or more of the transform options +- * (OPT_*) ++ * ({@link #OPT_PERFECT OPT_*}) + * + * @param cf an instance of an object that implements the {@link + * TJCustomFilter} interface, or null if no custom filter is needed +@@ -190,10 +189,10 @@ public class TJTransform extends Rectangle { + * #TJTransform(int, int, int, int, int, int, TJCustomFilter)} for more + * detail. + * +- * @param op one of the transform operations (OP_*) ++ * @param op one of the transform operations ({@link #OP_NONE OP_*}) + * + * @param options the bitwise OR of one or more of the transform options +- * (OPT_*) ++ * ({@link #OPT_PERFECT OPT_*}) + * + * @param cf an instance of an object that implements the {@link + * TJCustomFilter} interface, or null if no custom filter is needed +@@ -208,13 +207,14 @@ public class TJTransform extends Rectangle { + } + + /** +- * Transform operation (one of OP_*) ++ * Transform operation (one of {@link #OP_NONE OP_*}) + */ + @SuppressWarnings("checkstyle:VisibilityModifier") + public int op = 0; + + /** +- * Transform options (bitwise OR of one or more of OPT_*) ++ * Transform options (bitwise OR of one or more of ++ * {@link #OPT_PERFECT OPT_*}) + */ + @SuppressWarnings("checkstyle:VisibilityModifier") + public int options = 0; +diff --git a/java/org/libjpegturbo/turbojpeg/TJTransformer.java b/java/org/libjpegturbo/turbojpeg/TJTransformer.java +index d7a56f3..2cbf0bf 100644 +--- a/java/org/libjpegturbo/turbojpeg/TJTransformer.java ++++ b/java/org/libjpegturbo/turbojpeg/TJTransformer.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (C)2011, 2013-2015 D. R. Commander. All Rights Reserved. ++ * Copyright (C)2011, 2013-2015, 2023 D. R. Commander. All Rights Reserved. + * Copyright (C)2015 Viktor Szathmáry. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -43,10 +43,12 @@ public class TJTransformer extends TJDecompressor { + + /** + * Create a TurboJPEG lossless transformer instance and associate the JPEG +- * image stored in jpegImage with the newly created instance. ++ * source image stored in jpegImage with the newly created ++ * instance. + * +- * @param jpegImage JPEG image buffer (size of the JPEG image is assumed to +- * be the length of the array.) This buffer is not modified. ++ * @param jpegImage buffer containing the JPEG source image to transform. ++ * (The size of the JPEG image is assumed to be the length of the array.) ++ * This buffer is not modified. + */ + public TJTransformer(byte[] jpegImage) throws TJException { + init(); +@@ -55,12 +57,13 @@ public class TJTransformer extends TJDecompressor { + + /** + * Create a TurboJPEG lossless transformer instance and associate the JPEG +- * image of length imageSize bytes stored in ++ * source image of length imageSize bytes stored in + * jpegImage with the newly created instance. + * +- * @param jpegImage JPEG image buffer. This buffer is not modified. ++ * @param jpegImage buffer containing the JPEG source image to transform. ++ * This buffer is not modified. + * +- * @param imageSize size of the JPEG image (in bytes) ++ * @param imageSize size of the JPEG source image (in bytes) + */ + public TJTransformer(byte[] jpegImage, int imageSize) throws TJException { + init(); +@@ -68,28 +71,29 @@ public class TJTransformer extends TJDecompressor { + } + + /** +- * Losslessly transform the JPEG image associated with this transformer +- * instance into one or more JPEG images stored in the given destination +- * buffers. Lossless transforms work by moving the raw coefficients from one +- * JPEG image structure to another without altering the values of the +- * coefficients. While this is typically faster than decompressing the +- * image, transforming it, and re-compressing it, lossless transforms are not +- * free. Each lossless transform requires reading and performing Huffman +- * decoding on all of the coefficients in the source image, regardless of the +- * size of the destination image. Thus, this method provides a means of +- * generating multiple transformed images from the same source or of applying +- * multiple transformations simultaneously, in order to eliminate the need to +- * read the source coefficients multiple times. ++ * Losslessly transform the JPEG source image associated with this ++ * transformer instance into one or more JPEG images stored in the given ++ * destination buffers. Lossless transforms work by moving the raw ++ * coefficients from one JPEG image structure to another without altering the ++ * values of the coefficients. While this is typically faster than ++ * decompressing the image, transforming it, and re-compressing it, lossless ++ * transforms are not free. Each lossless transform requires reading and ++ * performing Huffman decoding on all of the coefficients in the source ++ * image, regardless of the size of the destination image. Thus, this method ++ * provides a means of generating multiple transformed images from the same ++ * source or of applying multiple transformations simultaneously, in order to ++ * eliminate the need to read the source coefficients multiple times. + * +- * @param dstBufs an array of image buffers. dstbufs[i] will +- * receive a JPEG image that has been transformed using the parameters in +- * transforms[i]. Use {@link TJ#bufSize} to determine the +- * maximum size for each buffer based on the transformed or cropped width and +- * height and the level of subsampling used in the source image. ++ * @param dstBufs an array of JPEG destination buffers. ++ * dstbufs[i] will receive a JPEG image that has been ++ * transformed using the parameters in transforms[i]. Use ++ * {@link TJ#bufSize} to determine the maximum size for each buffer based on ++ * the transformed or cropped width and height and the level of subsampling ++ * used in the source image. + * + * @param transforms an array of {@link TJTransform} instances, each of + * which specifies the transform parameters and/or cropping region for the +- * corresponding transformed output image ++ * corresponding transformed JPEG image + * + * @param flags the bitwise OR of one or more of + * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*} +@@ -103,13 +107,13 @@ public class TJTransformer extends TJDecompressor { + } + + /** +- * Losslessly transform the JPEG image associated with this transformer +- * instance and return an array of {@link TJDecompressor} instances, each of +- * which has a transformed JPEG image associated with it. ++ * Losslessly transform the JPEG source image associated with this ++ * transformer instance and return an array of {@link TJDecompressor} ++ * instances, each of which has a transformed JPEG image associated with it. + * + * @param transforms an array of {@link TJTransform} instances, each of + * which specifies the transform parameters and/or cropping region for the +- * corresponding transformed output image ++ * corresponding transformed JPEG image + * + * @param flags the bitwise OR of one or more of + * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*} +@@ -139,10 +143,10 @@ public class TJTransformer extends TJDecompressor { + + /** + * Returns an array containing the sizes of the transformed JPEG images +- * generated by the most recent transform operation. ++ * (in bytes) generated by the most recent transform operation. + * + * @return an array containing the sizes of the transformed JPEG images +- * generated by the most recent transform operation. ++ * (in bytes) generated by the most recent transform operation. + */ + public int[] getTransformedSizes() { + if (transformedSizes == null) +diff --git a/java/org/libjpegturbo/turbojpeg/YUVImage.java b/java/org/libjpegturbo/turbojpeg/YUVImage.java +index 4da9843..9483046 100644 +--- a/java/org/libjpegturbo/turbojpeg/YUVImage.java ++++ b/java/org/libjpegturbo/turbojpeg/YUVImage.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (C)2014, 2017 D. R. Commander. All Rights Reserved. ++ * Copyright (C)2014, 2017, 2023 D. R. Commander. All Rights Reserved. + * Copyright (C)2015 Viktor Szathmáry. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -30,7 +30,7 @@ + package org.libjpegturbo.turbojpeg; + + /** +- * This class encapsulates a YUV planar image and the metadata ++ * This class encapsulates a planar YUV image and the metadata + * associated with it. The TurboJPEG API allows both the JPEG compression and + * decompression pipelines to be split into stages: YUV encode, compress from + * YUV, decompress to YUV, and YUV decode. A YUVImage instance +@@ -38,30 +38,32 @@ package org.libjpegturbo.turbojpeg; + * operations and as the source image for compress-from-YUV and YUV decode + * operations. + *

    +- * Technically, the JPEG format uses the YCbCr colorspace (which technically is +- * not a "colorspace" but rather a "color transform"), but per the convention +- * of the digital video community, the TurboJPEG API uses "YUV" to refer to an +- * image format consisting of Y, Cb, and Cr image planes. ++ * Technically, the JPEG format uses the YCbCr colorspace (which is technically ++ * not a colorspace but a color transform), but per the convention of the ++ * digital video community, the TurboJPEG API uses "YUV" to refer to an image ++ * format consisting of Y, Cb, and Cr image planes. + *

    + * Each plane is simply a 2D array of bytes, each byte representing the value + * of one of the components (Y, Cb, or Cr) at a particular location in the + * image. The width and height of each plane are determined by the image + * width, height, and level of chrominance subsampling. The luminance plane + * width is the image width padded to the nearest multiple of the horizontal +- * subsampling factor (2 in the case of 4:2:0 and 4:2:2, 4 in the case of +- * 4:1:1, 1 in the case of 4:4:4 or grayscale.) Similarly, the luminance plane +- * height is the image height padded to the nearest multiple of the vertical +- * subsampling factor (2 in the case of 4:2:0 or 4:4:0, 1 in the case of 4:4:4 +- * or grayscale.) The chrominance plane width is equal to the luminance plane +- * width divided by the horizontal subsampling factor, and the chrominance +- * plane height is equal to the luminance plane height divided by the vertical +- * subsampling factor. ++ * subsampling factor (1 in the case of 4:4:4, grayscale, or 4:4:0; 2 in the ++ * case of 4:2:2 or 4:2:0; 4 in the case of 4:1:1.) Similarly, the luminance ++ * plane height is the image height padded to the nearest multiple of the ++ * vertical subsampling factor (1 in the case of 4:4:4, 4:2:2, grayscale, or ++ * 4:1:1; 2 in the case of 4:2:0 or 4:4:0.) This is irrespective of any ++ * additional padding that may be specified as an argument to the various ++ * YUVImage methods. The chrominance plane width is equal to the luminance ++ * plane width divided by the horizontal subsampling factor, and the ++ * chrominance plane height is equal to the luminance plane height divided by ++ * the vertical subsampling factor. + *

    + * For example, if the source image is 35 x 35 pixels and 4:2:2 subsampling is + * used, then the luminance plane would be 36 x 35 bytes, and each of the +- * chrominance planes would be 18 x 35 bytes. If you specify a line padding of +- * 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes, and +- * each of the chrominance planes would be 20 x 35 bytes. ++ * chrominance planes would be 18 x 35 bytes. If you specify a row alignment ++ * of 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes, ++ * and each of the chrominance planes would be 20 x 35 bytes. + */ + public class YUVImage { + +@@ -75,7 +77,7 @@ public class YUVImage { + * @param width width (in pixels) of the YUV image + * + * @param strides an array of integers, each specifying the number of bytes +- * per line in the corresponding plane of the YUV image. Setting the stride ++ * per row in the corresponding plane of the YUV image. Setting the stride + * for any plane to 0 is the same as setting it to the plane width (see + * {@link YUVImage above}.) If strides is null, then the + * strides for all planes will be set to their respective plane widths. When +@@ -92,22 +94,24 @@ public class YUVImage { + } + + /** +- * Create a new YUVImage instance backed by a unified image +- * buffer, and allocate memory for the image buffer. ++ * Create a new YUVImage instance backed by a unified buffer, ++ * and allocate memory for the buffer. + * + * @param width width (in pixels) of the YUV image + * +- * @param pad Each line of each plane in the YUV image buffer will be padded +- * to this number of bytes (must be a power of 2.) ++ * @param align row alignment (in bytes) of the YUV image (must be a power of ++ * 2.) Setting this parameter to n specifies that each row in each plane of ++ * the YUV image will be padded to the nearest multiple of n bytes ++ * (1 = unpadded.) + * + * @param height height (in pixels) of the YUV image + * + * @param subsamp the level of chrominance subsampling to be used in the YUV + * image (one of {@link TJ#SAMP_444 TJ.SAMP_*}) + */ +- public YUVImage(int width, int pad, int height, int subsamp) { +- setBuf(new byte[TJ.bufSizeYUV(width, pad, height, subsamp)], width, pad, +- height, subsamp); ++ public YUVImage(int width, int align, int height, int subsamp) { ++ setBuf(new byte[TJ.bufSizeYUV(width, align, height, subsamp)], width, ++ align, height, subsamp); + } + + /** +@@ -117,8 +121,8 @@ public class YUVImage { + * @param planes an array of buffers representing the Y, U (Cb), and V (Cr) + * image planes (or just the Y plane, if the image is grayscale.) These + * planes can be contiguous or non-contiguous in memory. Plane +- * i should be at least offsets[i] + +- * {@link TJ#planeSizeYUV TJ.planeSizeYUV}(i, width, strides[i], height, subsamp) ++ * i should be at least offsets[i] + ++ * {@link TJ#planeSizeYUV TJ.planeSizeYUV}(i, width, strides[i], height, subsamp) + * bytes in size. + * + * @param offsets If this YUVImage instance represents a +@@ -130,11 +134,11 @@ public class YUVImage { + * @param width width (in pixels) of the new YUV image (or subregion) + * + * @param strides an array of integers, each specifying the number of bytes +- * per line in the corresponding plane of the YUV image. Setting the stride ++ * per row in the corresponding plane of the YUV image. Setting the stride + * for any plane to 0 is the same as setting it to the plane width (see + * {@link YUVImage above}.) If strides is null, then the + * strides for all planes will be set to their respective plane widths. You +- * can adjust the strides in order to add an arbitrary amount of line padding ++ * can adjust the strides in order to add an arbitrary amount of row padding + * to each plane or to specify that this YUVImage instance is a + * subregion of a larger image (in which case, strides[i] should + * be set to the plane width of plane i in the larger image.) +@@ -150,29 +154,30 @@ public class YUVImage { + } + + /** +- * Create a new YUVImage instance from an existing unified image ++ * Create a new YUVImage instance from an existing unified + * buffer. + * +- * @param yuvImage image buffer that contains or will contain YUV planar +- * image data. Use {@link TJ#bufSizeYUV} to determine the minimum size for +- * this buffer. The Y, U (Cb), and V (Cr) image planes are stored +- * sequentially in the buffer (see {@link YUVImage above} for a description +- * of the image format.) ++ * @param yuvImage buffer that contains or will receive a unified planar YUV ++ * image. Use {@link TJ#bufSizeYUV} to determine the minimum size for this ++ * buffer. The Y, U (Cb), and V (Cr) image planes are stored sequentially in ++ * the buffer. (See {@link YUVImage above} for a description of the image ++ * format.) + * + * @param width width (in pixels) of the YUV image + * +- * @param pad the line padding used in the YUV image buffer. For +- * instance, if each line in each plane of the buffer is padded to the +- * nearest multiple of 4 bytes, then pad should be set to 4. ++ * @param align row alignment (in bytes) of the YUV image (must be a power of ++ * 2.) Setting this parameter to n specifies that each row in each plane of ++ * the YUV image will be padded to the nearest multiple of n bytes ++ * (1 = unpadded.) + * + * @param height height (in pixels) of the YUV image + * + * @param subsamp the level of chrominance subsampling used in the YUV + * image (one of {@link TJ#SAMP_444 TJ.SAMP_*}) + */ +- public YUVImage(byte[] yuvImage, int width, int pad, int height, ++ public YUVImage(byte[] yuvImage, int width, int align, int height, + int subsamp) { +- setBuf(yuvImage, width, pad, height, subsamp); ++ setBuf(yuvImage, width, align, height, subsamp); + } + + /** +@@ -181,8 +186,8 @@ public class YUVImage { + * @param planes an array of buffers representing the Y, U (Cb), and V (Cr) + * image planes (or just the Y plane, if the image is grayscale.) These + * planes can be contiguous or non-contiguous in memory. Plane +- * i should be at least offsets[i] + +- * {@link TJ#planeSizeYUV TJ.planeSizeYUV}(i, width, strides[i], height, subsamp) ++ * i should be at least offsets[i] + ++ * {@link TJ#planeSizeYUV TJ.planeSizeYUV}(i, width, strides[i], height, subsamp) + * bytes in size. + * + * @param offsets If this YUVImage instance represents a +@@ -194,12 +199,12 @@ public class YUVImage { + * @param width width (in pixels) of the YUV image (or subregion) + * + * @param strides an array of integers, each specifying the number of bytes +- * per line in the corresponding plane of the YUV image. Setting the stride ++ * per row in the corresponding plane of the YUV image. Setting the stride + * for any plane to 0 is the same as setting it to the plane width (see + * {@link YUVImage above}.) If strides is null, then the + * strides for all planes will be set to their respective plane widths. You +- * can adjust the strides in order to add an arbitrary amount of line padding +- * to each plane or to specify that this YUVImage image is a ++ * can adjust the strides in order to add an arbitrary amount of row padding ++ * to each plane or to specify that this YUVImage instance is a + * subregion of a larger image (in which case, strides[i] should + * be set to the plane width of plane i in the larger image.) + * +@@ -263,32 +268,34 @@ public class YUVImage { + } + + /** +- * Assign a unified image buffer to this YUVImage instance. ++ * Assign a unified buffer to this YUVImage instance. + * +- * @param yuvImage image buffer that contains or will contain YUV planar +- * image data. Use {@link TJ#bufSizeYUV} to determine the minimum size for +- * this buffer. The Y, U (Cb), and V (Cr) image planes are stored +- * sequentially in the buffer (see {@link YUVImage above} for a description +- * of the image format.) ++ * @param yuvImage buffer that contains or will receive a unified planar YUV ++ * image. Use {@link TJ#bufSizeYUV} to determine the minimum size for this ++ * buffer. The Y, U (Cb), and V (Cr) image planes are stored sequentially in ++ * the buffer. (See {@link YUVImage above} for a description of the image ++ * format.) + * + * @param width width (in pixels) of the YUV image + * +- * @param pad the line padding used in the YUV image buffer. For +- * instance, if each line in each plane of the buffer is padded to the +- * nearest multiple of 4 bytes, then pad should be set to 4. ++ * @param align row alignment (in bytes) of the YUV image (must be a power of ++ * 2.) Setting this parameter to n specifies that each row in each plane of ++ * the YUV image will be padded to the nearest multiple of n bytes ++ * (1 = unpadded.) + * + * @param height height (in pixels) of the YUV image + * + * @param subsamp the level of chrominance subsampling used in the YUV + * image (one of {@link TJ#SAMP_444 TJ.SAMP_*}) + */ +- public void setBuf(byte[] yuvImage, int width, int pad, int height, ++ public void setBuf(byte[] yuvImage, int width, int align, int height, + int subsamp) { +- if (yuvImage == null || width < 1 || pad < 1 || ((pad & (pad - 1)) != 0) || +- height < 1 || subsamp < 0 || subsamp >= TJ.NUMSAMP) ++ if (yuvImage == null || width < 1 || align < 1 || ++ ((align & (align - 1)) != 0) || height < 1 || subsamp < 0 || ++ subsamp >= TJ.NUMSAMP) + throw new IllegalArgumentException("Invalid argument in YUVImage::setBuf()"); +- if (yuvImage.length < TJ.bufSizeYUV(width, pad, height, subsamp)) +- throw new IllegalArgumentException("YUV image buffer is not large enough"); ++ if (yuvImage.length < TJ.bufSizeYUV(width, align, height, subsamp)) ++ throw new IllegalArgumentException("YUV buffer is not large enough"); + + int nc = (subsamp == TJ.SAMP_GRAY ? 1 : 3); + byte[][] planes = new byte[nc][]; +@@ -296,9 +303,9 @@ public class YUVImage { + int[] offsets = new int[nc]; + + planes[0] = yuvImage; +- strides[0] = pad(TJ.planeWidth(0, width, subsamp), pad); ++ strides[0] = pad(TJ.planeWidth(0, width, subsamp), align); + if (subsamp != TJ.SAMP_GRAY) { +- strides[1] = strides[2] = pad(TJ.planeWidth(1, width, subsamp), pad); ++ strides[1] = strides[2] = pad(TJ.planeWidth(1, width, subsamp), align); + planes[1] = planes[2] = yuvImage; + offsets[1] = offsets[0] + + strides[0] * TJ.planeHeight(0, height, subsamp); +@@ -306,7 +313,7 @@ public class YUVImage { + strides[1] * TJ.planeHeight(1, height, subsamp); + } + +- yuvPad = pad; ++ yuvAlign = align; + setBuf(planes, offsets, width, strides, height, subsamp); + } + +@@ -333,23 +340,23 @@ public class YUVImage { + } + + /** +- * Returns the line padding used in the YUV image buffer (if this image is ++ * Returns the row alignment (in bytes) of the YUV buffer (if this image is + * stored in a unified buffer rather than separate image planes.) + * +- * @return the line padding used in the YUV image buffer ++ * @return the row alignment of the YUV buffer + */ + public int getPad() { + if (yuvPlanes == null) + throw new IllegalStateException(NO_ASSOC_ERROR); +- if (yuvPad < 1 || ((yuvPad & (yuvPad - 1)) != 0)) ++ if (yuvAlign < 1 || ((yuvAlign & (yuvAlign - 1)) != 0)) + throw new IllegalStateException("Image is not stored in a unified buffer"); +- return yuvPad; ++ return yuvAlign; + } + + /** +- * Returns the number of bytes per line of each plane in the YUV image. ++ * Returns the number of bytes per row of each plane in the YUV image. + * +- * @return the number of bytes per line of each plane in the YUV image ++ * @return the number of bytes per row of each plane in the YUV image + */ + public int[] getStrides() { + if (yuvStrides == null) +@@ -395,10 +402,10 @@ public class YUVImage { + } + + /** +- * Returns the YUV image buffer (if this image is stored in a unified +- * buffer rather than separate image planes.) ++ * Returns the YUV buffer (if this image is stored in a unified buffer rather ++ * than separate image planes.) + * +- * @return the YUV image buffer ++ * @return the YUV buffer + */ + public byte[] getBuf() { + if (yuvPlanes == null || yuvSubsamp < 0 || yuvSubsamp >= TJ.NUMSAMP) +@@ -412,22 +419,22 @@ public class YUVImage { + } + + /** +- * Returns the size (in bytes) of the YUV image buffer (if this image is +- * stored in a unified buffer rather than separate image planes.) ++ * Returns the size (in bytes) of the YUV buffer (if this image is stored in ++ * a unified buffer rather than separate image planes.) + * +- * @return the size (in bytes) of the YUV image buffer ++ * @return the size (in bytes) of the YUV buffer + */ + public int getSize() { + if (yuvPlanes == null || yuvSubsamp < 0 || yuvSubsamp >= TJ.NUMSAMP) + throw new IllegalStateException(NO_ASSOC_ERROR); + int nc = (yuvSubsamp == TJ.SAMP_GRAY ? 1 : 3); +- if (yuvPad < 1) ++ if (yuvAlign < 1) + throw new IllegalStateException("Image is not stored in a unified buffer"); + for (int i = 1; i < nc; i++) { + if (yuvPlanes[i] != yuvPlanes[0]) + throw new IllegalStateException("Image is not stored in a unified buffer"); + } +- return TJ.bufSizeYUV(yuvWidth, yuvPad, yuvHeight, yuvSubsamp); ++ return TJ.bufSizeYUV(yuvWidth, yuvAlign, yuvHeight, yuvSubsamp); + } + + private static int pad(int v, int p) { +@@ -438,7 +445,7 @@ public class YUVImage { + protected byte[][] yuvPlanes = null; + protected int[] yuvOffsets = null; + protected int[] yuvStrides = null; +- protected int yuvPad = 0; ++ protected int yuvAlign = 1; + protected int yuvWidth = 0; + protected int yuvHeight = 0; + protected int yuvSubsamp = -1; +diff --git a/jccolext.c b/jccolext.c +index 303b322..20f891a 100644 +--- a/jccolext.c ++++ b/jccolext.c +@@ -4,7 +4,7 @@ + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: +- * Copyright (C) 2009-2012, 2015, D. R. Commander. ++ * Copyright (C) 2009-2012, 2015, 2022, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * +@@ -48,9 +48,9 @@ rgb_ycc_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf, + outptr2 = output_buf[2][output_row]; + output_row++; + for (col = 0; col < num_cols; col++) { +- r = inptr[RGB_RED]; +- g = inptr[RGB_GREEN]; +- b = inptr[RGB_BLUE]; ++ r = RANGE_LIMIT(inptr[RGB_RED]); ++ g = RANGE_LIMIT(inptr[RGB_GREEN]); ++ b = RANGE_LIMIT(inptr[RGB_BLUE]); + inptr += RGB_PIXELSIZE; + /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations + * must be too; we do not need an explicit range-limiting operation. +@@ -100,9 +100,9 @@ rgb_gray_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf, + outptr = output_buf[0][output_row]; + output_row++; + for (col = 0; col < num_cols; col++) { +- r = inptr[RGB_RED]; +- g = inptr[RGB_GREEN]; +- b = inptr[RGB_BLUE]; ++ r = RANGE_LIMIT(inptr[RGB_RED]); ++ g = RANGE_LIMIT(inptr[RGB_GREEN]); ++ b = RANGE_LIMIT(inptr[RGB_BLUE]); + inptr += RGB_PIXELSIZE; + /* Y */ + outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] + +diff --git a/jccolor.c b/jccolor.c +index bdc563c..fb9f1cc 100644 +--- a/jccolor.c ++++ b/jccolor.c +@@ -5,7 +5,7 @@ + * Copyright (C) 1991-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright 2009 Pierre Ossman for Cendio AB +- * Copyright (C) 2009-2012, 2015, D. R. Commander. ++ * Copyright (C) 2009-2012, 2015, 2022, D. R. Commander. + * Copyright (C) 2014, MIPS Technologies, Inc., California. + * For conditions of distribution and use, see the accompanying README.ijg + * file. +@@ -17,7 +17,6 @@ + #include "jinclude.h" + #include "jpeglib.h" + #include "jsimd.h" +-#include "jconfigint.h" + + + /* Private subobject */ +@@ -84,6 +83,18 @@ typedef my_color_converter *my_cconvert_ptr; + #define B_CR_OFF (7 * (MAXJSAMPLE + 1)) + #define TABLE_SIZE (8 * (MAXJSAMPLE + 1)) + ++/* 12-bit samples use a 16-bit data type, so it is possible to pass ++ * out-of-range sample values (< 0 or > 4095) to jpeg_write_scanlines(). ++ * Thus, we mask the incoming 12-bit samples to guard against overrunning ++ * or underrunning the conversion tables. ++ */ ++ ++#if BITS_IN_JSAMPLE == 12 ++#define RANGE_LIMIT(value) ((value) & 0xFFF) ++#else ++#define RANGE_LIMIT(value) (value) ++#endif ++ + + /* Include inline routines for colorspace extensions */ + +@@ -392,9 +403,9 @@ cmyk_ycck_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + outptr3 = output_buf[3][output_row]; + output_row++; + for (col = 0; col < num_cols; col++) { +- r = MAXJSAMPLE - inptr[0]; +- g = MAXJSAMPLE - inptr[1]; +- b = MAXJSAMPLE - inptr[2]; ++ r = MAXJSAMPLE - RANGE_LIMIT(inptr[0]); ++ g = MAXJSAMPLE - RANGE_LIMIT(inptr[1]); ++ b = MAXJSAMPLE - RANGE_LIMIT(inptr[2]); + /* K passes through as-is */ + outptr3[col] = inptr[3]; + inptr += 4; +diff --git a/jchuff.c b/jchuff.c +index f4dfa1c..5d0276a 100644 +--- a/jchuff.c ++++ b/jchuff.c +@@ -27,7 +27,6 @@ + #include "jinclude.h" + #include "jpeglib.h" + #include "jsimd.h" +-#include "jconfigint.h" + #include + + /* +diff --git a/jchuff.h b/jchuff.h +index 314a232..da7809a 100644 +--- a/jchuff.h ++++ b/jchuff.h +@@ -3,8 +3,8 @@ + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1997, Thomas G. Lane. +- * It was modified by The libjpeg-turbo Project to include only code relevant +- * to libjpeg-turbo. ++ * libjpeg-turbo Modifications: ++ * Copyright (C) 2022, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * +@@ -25,6 +25,14 @@ + #define MAX_COEF_BITS 14 + #endif + ++/* The progressive Huffman encoder uses an unsigned 16-bit data type to store ++ * absolute values of coefficients, because it is possible to inject a ++ * coefficient value of -32768 into the encoder by attempting to transform a ++ * malformed 12-bit JPEG image, and the absolute value of -32768 would overflow ++ * a signed 16-bit integer. ++ */ ++typedef unsigned short UJCOEF; ++ + /* Derived data constructed for each Huffman table */ + + typedef struct { +diff --git a/jcmaster.c b/jcmaster.c +index c2b2600..b821710 100644 +--- a/jcmaster.c ++++ b/jcmaster.c +@@ -19,7 +19,6 @@ + #include "jinclude.h" + #include "jpeglib.h" + #include "jpegcomp.h" +-#include "jconfigint.h" + + + /* Private state */ +diff --git a/jcphuff.c b/jcphuff.c +index 872e570..5006b67 100644 +--- a/jcphuff.c ++++ b/jcphuff.c +@@ -5,7 +5,7 @@ + * Copyright (C) 1995-1997, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2011, 2015, 2018, 2021-2022, D. R. Commander. +- * Copyright (C) 2016, 2018, Matthieu Darbois. ++ * Copyright (C) 2016, 2018, 2022, Matthieu Darbois. + * Copyright (C) 2020, Arm Limited. + * Copyright (C) 2021, Alex Richardson. + * For conditions of distribution and use, see the accompanying README.ijg +@@ -22,7 +22,6 @@ + #include "jinclude.h" + #include "jpeglib.h" + #include "jsimd.h" +-#include "jconfigint.h" + #include + + #ifdef HAVE_INTRIN_H +@@ -83,11 +82,11 @@ typedef struct { + /* Pointer to routine to prepare data for encode_mcu_AC_first() */ + void (*AC_first_prepare) (const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *values, size_t *zerobits); ++ int Al, UJCOEF *values, size_t *zerobits); + /* Pointer to routine to prepare data for encode_mcu_AC_refine() */ + int (*AC_refine_prepare) (const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *absvalues, size_t *bits); ++ int Al, UJCOEF *absvalues, size_t *bits); + + /* Mode flag: TRUE for optimization, FALSE for actual data output */ + boolean gather_statistics; +@@ -157,14 +156,14 @@ METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo, + JBLOCKROW *MCU_data); + METHODDEF(void) encode_mcu_AC_first_prepare + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, +- JCOEF *values, size_t *zerobits); ++ UJCOEF *values, size_t *zerobits); + METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo, + JBLOCKROW *MCU_data); + METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo, + JBLOCKROW *MCU_data); + METHODDEF(int) encode_mcu_AC_refine_prepare + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, +- JCOEF *absvalues, size_t *bits); ++ UJCOEF *absvalues, size_t *bits); + METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo, + JBLOCKROW *MCU_data); + METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo); +@@ -584,8 +583,8 @@ encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) + continue; \ + /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \ + temp2 ^= temp; \ +- values[k] = (JCOEF)temp; \ +- values[k + DCTSIZE2] = (JCOEF)temp2; \ ++ values[k] = (UJCOEF)temp; \ ++ values[k + DCTSIZE2] = (UJCOEF)temp2; \ + zerobits |= ((size_t)1U) << k; \ + } \ + } +@@ -593,7 +592,7 @@ encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) + METHODDEF(void) + encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *values, size_t *bits) ++ int Al, UJCOEF *values, size_t *bits) + { + register int k, temp, temp2; + size_t zerobits = 0U; +@@ -666,9 +665,9 @@ encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) + register int nbits, r; + int Sl = cinfo->Se - cinfo->Ss + 1; + int Al = cinfo->Al; +- JCOEF values_unaligned[2 * DCTSIZE2 + 15]; +- JCOEF *values; +- const JCOEF *cvalue; ++ UJCOEF values_unaligned[2 * DCTSIZE2 + 15]; ++ UJCOEF *values; ++ const UJCOEF *cvalue; + size_t zerobits; + size_t bits[8 / SIZEOF_SIZE_T]; + +@@ -681,7 +680,7 @@ encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) + emit_restart(entropy, entropy->next_restart_num); + + #ifdef WITH_SIMD +- cvalue = values = (JCOEF *)PAD((JUINTPTR)values_unaligned, 16); ++ cvalue = values = (UJCOEF *)PAD((JUINTPTR)values_unaligned, 16); + #else + /* Not using SIMD, so alignment is not needed */ + cvalue = values = values_unaligned; +@@ -815,7 +814,7 @@ encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) + zerobits |= ((size_t)1U) << k; \ + signbits |= ((size_t)(temp2 + 1)) << k; \ + } \ +- absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \ ++ absvalues[k] = (UJCOEF)temp; /* save abs value for main pass */ \ + if (temp == 1) \ + EOB = k + koffset; /* EOB = index of last newly-nonzero coef */ \ + } \ +@@ -824,7 +823,7 @@ encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) + METHODDEF(int) + encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *absvalues, size_t *bits) ++ int Al, UJCOEF *absvalues, size_t *bits) + { + register int k, temp, temp2; + int EOB = 0; +@@ -931,9 +930,9 @@ encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) + unsigned int BR; + int Sl = cinfo->Se - cinfo->Ss + 1; + int Al = cinfo->Al; +- JCOEF absvalues_unaligned[DCTSIZE2 + 15]; +- JCOEF *absvalues; +- const JCOEF *cabsvalue, *EOBPTR; ++ UJCOEF absvalues_unaligned[DCTSIZE2 + 15]; ++ UJCOEF *absvalues; ++ const UJCOEF *cabsvalue, *EOBPTR; + size_t zerobits, signbits; + size_t bits[16 / SIZEOF_SIZE_T]; + +@@ -946,7 +945,7 @@ encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) + emit_restart(entropy, entropy->next_restart_num); + + #ifdef WITH_SIMD +- cabsvalue = absvalues = (JCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16); ++ cabsvalue = absvalues = (UJCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16); + #else + /* Not using SIMD, so alignment is not needed */ + cabsvalue = absvalues = absvalues_unaligned; +diff --git a/jdapimin.c b/jdapimin.c +index f50c27e..30126a0 100644 +--- a/jdapimin.c ++++ b/jdapimin.c +@@ -23,7 +23,6 @@ + #include "jinclude.h" + #include "jpeglib.h" + #include "jdmaster.h" +-#include "jconfigint.h" + + + /* +diff --git a/jdcolext.c b/jdcolext.c +index 863c7a2..fc7e7b8 100644 +--- a/jdcolext.c ++++ b/jdcolext.c +@@ -4,7 +4,7 @@ + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1997, Thomas G. Lane. + * libjpeg-turbo Modifications: +- * Copyright (C) 2009, 2011, 2015, D. R. Commander. ++ * Copyright (C) 2009, 2011, 2015, 2023, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * +@@ -62,10 +62,10 @@ ycc_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS))]; + outptr[RGB_BLUE] = range_limit[y + Cbbtab[cb]]; +- /* Set unused byte to 0xFF so it can be interpreted as an opaque */ ++ /* Set unused byte to MAXJSAMPLE so it can be interpreted as an opaque */ + /* alpha channel value */ + #ifdef RGB_ALPHA +- outptr[RGB_ALPHA] = 0xFF; ++ outptr[RGB_ALPHA] = MAXJSAMPLE; + #endif + outptr += RGB_PIXELSIZE; + } +@@ -94,10 +94,10 @@ gray_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col]; +- /* Set unused byte to 0xFF so it can be interpreted as an opaque */ ++ /* Set unused byte to MAXJSAMPLE so it can be interpreted as an opaque */ + /* alpha channel value */ + #ifdef RGB_ALPHA +- outptr[RGB_ALPHA] = 0xFF; ++ outptr[RGB_ALPHA] = MAXJSAMPLE; + #endif + outptr += RGB_PIXELSIZE; + } +@@ -130,10 +130,10 @@ rgb_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + outptr[RGB_RED] = inptr0[col]; + outptr[RGB_GREEN] = inptr1[col]; + outptr[RGB_BLUE] = inptr2[col]; +- /* Set unused byte to 0xFF so it can be interpreted as an opaque */ ++ /* Set unused byte to MAXJSAMPLE so it can be interpreted as an opaque */ + /* alpha channel value */ + #ifdef RGB_ALPHA +- outptr[RGB_ALPHA] = 0xFF; ++ outptr[RGB_ALPHA] = MAXJSAMPLE; + #endif + outptr += RGB_PIXELSIZE; + } +diff --git a/jdcolor.c b/jdcolor.c +index 8da2b4e..735190b 100644 +--- a/jdcolor.c ++++ b/jdcolor.c +@@ -18,7 +18,6 @@ + #include "jinclude.h" + #include "jpeglib.h" + #include "jsimd.h" +-#include "jconfigint.h" + + + /* Private subobject */ +diff --git a/jdmainct.c b/jdmainct.c +index f466b25..d332e6b 100644 +--- a/jdmainct.c ++++ b/jdmainct.c +@@ -18,7 +18,6 @@ + + #include "jinclude.h" + #include "jdmainct.h" +-#include "jconfigint.h" + + + /* +diff --git a/jdmerge.c b/jdmerge.c +index 3a456d6..38b0027 100644 +--- a/jdmerge.c ++++ b/jdmerge.c +@@ -42,7 +42,6 @@ + #include "jpeglib.h" + #include "jdmerge.h" + #include "jsimd.h" +-#include "jconfigint.h" + + #ifdef UPSAMPLE_MERGING_SUPPORTED + +diff --git a/jdmrgext.c b/jdmrgext.c +index 9bf4f1a..038abc7 100644 +--- a/jdmrgext.c ++++ b/jdmrgext.c +@@ -4,7 +4,7 @@ + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: +- * Copyright (C) 2011, 2015, 2020, D. R. Commander. ++ * Copyright (C) 2011, 2015, 2020, 2023, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * +@@ -57,7 +57,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + outptr[RGB_GREEN] = range_limit[y + cgreen]; + outptr[RGB_BLUE] = range_limit[y + cblue]; + #ifdef RGB_ALPHA +- outptr[RGB_ALPHA] = 0xFF; ++ outptr[RGB_ALPHA] = MAXJSAMPLE; + #endif + outptr += RGB_PIXELSIZE; + y = *inptr0++; +@@ -65,7 +65,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + outptr[RGB_GREEN] = range_limit[y + cgreen]; + outptr[RGB_BLUE] = range_limit[y + cblue]; + #ifdef RGB_ALPHA +- outptr[RGB_ALPHA] = 0xFF; ++ outptr[RGB_ALPHA] = MAXJSAMPLE; + #endif + outptr += RGB_PIXELSIZE; + } +@@ -81,7 +81,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + outptr[RGB_GREEN] = range_limit[y + cgreen]; + outptr[RGB_BLUE] = range_limit[y + cblue]; + #ifdef RGB_ALPHA +- outptr[RGB_ALPHA] = 0xFF; ++ outptr[RGB_ALPHA] = MAXJSAMPLE; + #endif + } + } +@@ -131,7 +131,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + outptr0[RGB_GREEN] = range_limit[y + cgreen]; + outptr0[RGB_BLUE] = range_limit[y + cblue]; + #ifdef RGB_ALPHA +- outptr0[RGB_ALPHA] = 0xFF; ++ outptr0[RGB_ALPHA] = MAXJSAMPLE; + #endif + outptr0 += RGB_PIXELSIZE; + y = *inptr00++; +@@ -139,7 +139,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + outptr0[RGB_GREEN] = range_limit[y + cgreen]; + outptr0[RGB_BLUE] = range_limit[y + cblue]; + #ifdef RGB_ALPHA +- outptr0[RGB_ALPHA] = 0xFF; ++ outptr0[RGB_ALPHA] = MAXJSAMPLE; + #endif + outptr0 += RGB_PIXELSIZE; + y = *inptr01++; +@@ -147,7 +147,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + outptr1[RGB_GREEN] = range_limit[y + cgreen]; + outptr1[RGB_BLUE] = range_limit[y + cblue]; + #ifdef RGB_ALPHA +- outptr1[RGB_ALPHA] = 0xFF; ++ outptr1[RGB_ALPHA] = MAXJSAMPLE; + #endif + outptr1 += RGB_PIXELSIZE; + y = *inptr01++; +@@ -155,7 +155,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + outptr1[RGB_GREEN] = range_limit[y + cgreen]; + outptr1[RGB_BLUE] = range_limit[y + cblue]; + #ifdef RGB_ALPHA +- outptr1[RGB_ALPHA] = 0xFF; ++ outptr1[RGB_ALPHA] = MAXJSAMPLE; + #endif + outptr1 += RGB_PIXELSIZE; + } +@@ -171,14 +171,14 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + outptr0[RGB_GREEN] = range_limit[y + cgreen]; + outptr0[RGB_BLUE] = range_limit[y + cblue]; + #ifdef RGB_ALPHA +- outptr0[RGB_ALPHA] = 0xFF; ++ outptr0[RGB_ALPHA] = MAXJSAMPLE; + #endif + y = *inptr01; + outptr1[RGB_RED] = range_limit[y + cred]; + outptr1[RGB_GREEN] = range_limit[y + cgreen]; + outptr1[RGB_BLUE] = range_limit[y + cblue]; + #ifdef RGB_ALPHA +- outptr1[RGB_ALPHA] = 0xFF; ++ outptr1[RGB_ALPHA] = MAXJSAMPLE; + #endif + } + } +diff --git a/jdsample.c b/jdsample.c +index eaad72a..02eaed6 100644 +--- a/jdsample.c ++++ b/jdsample.c +@@ -478,7 +478,7 @@ jinit_upsampler(j_decompress_ptr cinfo) + v_in_group * 2 == v_out_group && do_fancy) { + /* Non-fancy upsampling is handled by the generic method */ + #if defined(__arm__) || defined(__aarch64__) || \ +- defined(_M_ARM) || defined(_M_ARM64) ++ defined(_M_ARM) || defined(_M_ARM64) || defined(__riscv) + if (jsimd_can_h1v2_fancy_upsample()) + upsample->methods[ci] = jsimd_h1v2_fancy_upsample; + else +diff --git a/jpegtran.1 b/jpegtran.1 +index 5b1ded2..aaf5274 100644 +--- a/jpegtran.1 ++++ b/jpegtran.1 +@@ -167,7 +167,7 @@ on an iMCU boundary. If it doesn't, then it is silently moved up and/or left + to the nearest iMCU boundary (the lower right corner is unchanged.) Thus, the + output image covers at least the requested region, but it may cover more. The + adjustment of the region dimensions may be optionally disabled by attaching an +-'f' character ("force") to the width or height number. ++\'f\' character ("force") to the width or height number. + + The image can be losslessly cropped by giving the switch: + .TP +diff --git a/jsimd.h b/jsimd.h +index 6c20365..74d480a 100644 +--- a/jsimd.h ++++ b/jsimd.h +@@ -2,8 +2,8 @@ + * jsimd.h + * + * Copyright 2009 Pierre Ossman for Cendio AB +- * Copyright (C) 2011, 2014, D. R. Commander. +- * Copyright (C) 2015-2016, 2018, Matthieu Darbois. ++ * Copyright (C) 2011, 2014, 2022, D. R. Commander. ++ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. + * Copyright (C) 2020, Arm Limited. + * + * Based on the x86 SIMD extension for IJG JPEG library, +@@ -114,10 +114,10 @@ EXTERN(int) jsimd_can_encode_mcu_AC_first_prepare(void); + + EXTERN(void) jsimd_encode_mcu_AC_first_prepare + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, +- JCOEF *values, size_t *zerobits); ++ UJCOEF *values, size_t *zerobits); + + EXTERN(int) jsimd_can_encode_mcu_AC_refine_prepare(void); + + EXTERN(int) jsimd_encode_mcu_AC_refine_prepare + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, +- JCOEF *absvalues, size_t *bits); ++ UJCOEF *absvalues, size_t *bits); +diff --git a/jsimd_none.c b/jsimd_none.c +index 5b38a9f..a25db73 100644 +--- a/jsimd_none.c ++++ b/jsimd_none.c +@@ -2,8 +2,8 @@ + * jsimd_none.c + * + * Copyright 2009 Pierre Ossman for Cendio AB +- * Copyright (C) 2009-2011, 2014, D. R. Commander. +- * Copyright (C) 2015-2016, 2018, Matthieu Darbois. ++ * Copyright (C) 2009-2011, 2014, 2022, D. R. Commander. ++ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. + * Copyright (C) 2020, Arm Limited. + * + * Based on the x86 SIMD extension for IJG JPEG library, +@@ -412,7 +412,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void) + GLOBAL(void) + jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *values, size_t *zerobits) ++ int Al, UJCOEF *values, size_t *zerobits) + { + } + +@@ -425,7 +425,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void) + GLOBAL(int) + jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *absvalues, size_t *bits) ++ int Al, UJCOEF *absvalues, size_t *bits) + { + return 0; + } +diff --git a/jversion.h.in b/jversion.h.in +index dca4f08..dee6c02 100644 +--- a/jversion.h.in ++++ b/jversion.h.in +@@ -4,7 +4,7 @@ + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding. + * libjpeg-turbo Modifications: +- * Copyright (C) 2010, 2012-2022, D. R. Commander. ++ * Copyright (C) 2010, 2012-2023, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * +@@ -37,7 +37,7 @@ + */ + + #define JCOPYRIGHT \ +- "Copyright (C) 2009-2022 D. R. Commander\n" \ ++ "Copyright (C) 2009-2023 D. R. Commander\n" \ + "Copyright (C) 2015, 2020 Google, Inc.\n" \ + "Copyright (C) 2019-2020 Arm Limited\n" \ + "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \ +diff --git a/rdppm.c b/rdppm.c +index 294749a..883641d 100644 +--- a/rdppm.c ++++ b/rdppm.c +@@ -5,7 +5,7 @@ + * Copyright (C) 1991-1997, Thomas G. Lane. + * Modified 2009 by Bill Allombert, Guido Vollbeding. + * libjpeg-turbo Modifications: +- * Copyright (C) 2015-2017, 2020-2022, D. R. Commander. ++ * Copyright (C) 2015-2017, 2020-2023, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * +@@ -179,13 +179,13 @@ get_text_gray_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) + if (maxval == MAXJSAMPLE) { + if (aindex >= 0) + GRAY_RGB_READ_LOOP((JSAMPLE)read_pbm_integer(cinfo, infile, maxval), +- ptr[aindex] = 0xFF;) ++ ptr[aindex] = MAXJSAMPLE;) + else + GRAY_RGB_READ_LOOP((JSAMPLE)read_pbm_integer(cinfo, infile, maxval), {}) + } else { + if (aindex >= 0) + GRAY_RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)], +- ptr[aindex] = 0xFF;) ++ ptr[aindex] = MAXJSAMPLE;) + else + GRAY_RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)], {}) + } +@@ -253,13 +253,13 @@ get_text_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) + if (maxval == MAXJSAMPLE) { + if (aindex >= 0) + RGB_READ_LOOP((JSAMPLE)read_pbm_integer(cinfo, infile, maxval), +- ptr[aindex] = 0xFF;) ++ ptr[aindex] = MAXJSAMPLE;) + else + RGB_READ_LOOP((JSAMPLE)read_pbm_integer(cinfo, infile, maxval), {}) + } else { + if (aindex >= 0) + RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)], +- ptr[aindex] = 0xFF;) ++ ptr[aindex] = MAXJSAMPLE;) + else + RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)], {}) + } +@@ -345,12 +345,12 @@ get_gray_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) + bufferptr = source->iobuffer; + if (maxval == MAXJSAMPLE) { + if (aindex >= 0) +- GRAY_RGB_READ_LOOP(*bufferptr++, ptr[aindex] = 0xFF;) ++ GRAY_RGB_READ_LOOP(*bufferptr++, ptr[aindex] = MAXJSAMPLE;) + else + GRAY_RGB_READ_LOOP(*bufferptr++, {}) + } else { + if (aindex >= 0) +- GRAY_RGB_READ_LOOP(rescale[UCH(*bufferptr++)], ptr[aindex] = 0xFF;) ++ GRAY_RGB_READ_LOOP(rescale[UCH(*bufferptr++)], ptr[aindex] = MAXJSAMPLE;) + else + GRAY_RGB_READ_LOOP(rescale[UCH(*bufferptr++)], {}) + } +@@ -413,12 +413,12 @@ get_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) + bufferptr = source->iobuffer; + if (maxval == MAXJSAMPLE) { + if (aindex >= 0) +- RGB_READ_LOOP(*bufferptr++, ptr[aindex] = 0xFF;) ++ RGB_READ_LOOP(*bufferptr++, ptr[aindex] = MAXJSAMPLE;) + else + RGB_READ_LOOP(*bufferptr++, {}) + } else { + if (aindex >= 0) +- RGB_READ_LOOP(rescale[UCH(*bufferptr++)], ptr[aindex] = 0xFF;) ++ RGB_READ_LOOP(rescale[UCH(*bufferptr++)], ptr[aindex] = MAXJSAMPLE;) + else + RGB_READ_LOOP(rescale[UCH(*bufferptr++)], {}) + } +@@ -543,7 +543,7 @@ get_word_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) + ERREXIT(cinfo, JERR_PPM_OUTOFRANGE); + ptr[bindex] = rescale[temp]; + if (aindex >= 0) +- ptr[aindex] = 0xFF; ++ ptr[aindex] = MAXJSAMPLE; + ptr += ps; + } + return 1; +diff --git a/simd/CMakeLists.txt b/simd/CMakeLists.txt +index 6024900..66b44c9 100644 +--- a/simd/CMakeLists.txt ++++ b/simd/CMakeLists.txt +@@ -21,7 +21,7 @@ set(CMAKE_ASM_NASM_FLAGS_RELWITHDEBINFO_INIT "-g") + # environment variable. This should happen automatically, but unfortunately + # enable_language(ASM_NASM) doesn't parse the ASM_NASM environment variable + # until after CMAKE_ASM_NASM_COMPILER has been populated with the results of +-# searching for NASM or Yasm in the PATH. ++# searching for NASM or YASM in the PATH. + if(NOT DEFINED CMAKE_ASM_NASM_COMPILER AND DEFINED ENV{ASM_NASM}) + set(CMAKE_ASM_NASM_COMPILER $ENV{ASM_NASM}) + endif() +@@ -258,30 +258,28 @@ endif() + check_c_source_compiles(" + #include + int main(int argc, char **argv) { +- int16_t input[] = { ++ const int16_t input[] = { + (int16_t)argc, (int16_t)argc, (int16_t)argc, (int16_t)argc, + (int16_t)argc, (int16_t)argc, (int16_t)argc, (int16_t)argc, + (int16_t)argc, (int16_t)argc, (int16_t)argc, (int16_t)argc + }; + int16x4x3_t output = vld1_s16_x3(input); +- vst3_s16(input, output); +- return (int)input[0]; ++ return (int)output.val[0][0]; + }" HAVE_VLD1_S16_X3) + check_c_source_compiles(" + #include + int main(int argc, char **argv) { +- uint16_t input[] = { ++ const uint16_t input[] = { + (uint16_t)argc, (uint16_t)argc, (uint16_t)argc, (uint16_t)argc, + (uint16_t)argc, (uint16_t)argc, (uint16_t)argc, (uint16_t)argc + }; + uint16x4x2_t output = vld1_u16_x2(input); +- vst2_u16(input, output); +- return (int)input[0]; ++ return (int)output.val[0][0]; + }" HAVE_VLD1_U16_X2) + check_c_source_compiles(" + #include + int main(int argc, char **argv) { +- uint8_t input[] = { ++ const uint8_t input[] = { + (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, + (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, + (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, +@@ -300,8 +298,7 @@ check_c_source_compiles(" + (uint8_t)argc, (uint8_t)argc, (uint8_t)argc, (uint8_t)argc + }; + uint8x16x4_t output = vld1q_u8_x4(input); +- vst4q_u8(input, output); +- return (int)input[0]; ++ return (int)output.val[0][0]; + }" HAVE_VLD1Q_U8_X4) + if(BITS EQUAL 32) + unset(CMAKE_REQUIRED_FLAGS) +@@ -531,6 +528,45 @@ if(CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED) + set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1) + endif() + ++############################################################################### ++# RVV 64-bit (Intrinsics) ++############################################################################### ++ ++elseif(CPU_TYPE STREQUAL "riscv64") ++ ++set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv_zba_zbb_zbc_zbs") ++ ++check_c_source_compiles(" ++ #include ++ int main(void) { ++ const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f }; ++ vfloat32m1_t val = __riscv_vle32_v_f32m1((const float*)(src), 4); ++ return (int)__riscv_vfmv_f_s_f32m1_f32(val); ++ }" HAVE_RVV) ++ ++unset(CMAKE_REQUIRED_FLAGS) ++ ++if(NOT HAVE_RVV) ++ simd_fail("SIMD extensions not available for this CPU (riscv64)") ++ return() ++endif() ++ ++set(SIMD_SOURCES rvv/jccolor-rvv.c rvv/jcgray-rvv.c ++ rvv/jcsample-rvv.c rvv/jdcolor-rvv.c ++ rvv/jdmerge-rvv.c rvv/jdsample-rvv.c ++ rvv/jfdctfst-rvv.c rvv/jfdctint-rvv.c ++ rvv/jidctfst-rvv.c rvv/jidctint-rvv.c ++ rvv/jquanti-rvv.c rvv/jidctred-rvv.c) ++ ++set_source_files_properties(${SIMD_SOURCES} PROPERTIES ++ COMPILE_FLAGS -march=rv64gcv_zba_zbb_zbc_zbs) ++ ++add_library(simd OBJECT ${SIMD_SOURCES} rvv/jsimd.c) ++ ++if(CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED) ++ set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1) ++endif() ++ + + ############################################################################### + # None +diff --git a/simd/arm/aarch32/jsimd.c b/simd/arm/aarch32/jsimd.c +index 920f765..04d6452 100644 +--- a/simd/arm/aarch32/jsimd.c ++++ b/simd/arm/aarch32/jsimd.c +@@ -4,7 +4,7 @@ + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies). + * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander. +- * Copyright (C) 2015-2016, 2018, Matthieu Darbois. ++ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. + * Copyright (C) 2019, Google LLC. + * Copyright (C) 2020, Arm Limited. + * +@@ -27,8 +27,8 @@ + + #include + +-static unsigned int simd_support = ~0; +-static unsigned int simd_huffman = 1; ++static THREAD_LOCAL unsigned int simd_support = ~0; ++static THREAD_LOCAL unsigned int simd_huffman = 1; + + #if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)) + +@@ -96,8 +96,6 @@ parse_proc_cpuinfo(int bufsize) + + /* + * Check what SIMD accelerations are supported. +- * +- * FIXME: This code is racy under a multi-threaded environment. + */ + LOCAL(void) + init_simd(void) +@@ -945,7 +943,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void) + GLOBAL(void) + jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *values, size_t *zerobits) ++ int Al, UJCOEF *values, size_t *zerobits) + { + jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start, + Sl, Al, values, zerobits); +@@ -970,7 +968,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void) + GLOBAL(int) + jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *absvalues, size_t *bits) ++ int Al, UJCOEF *absvalues, size_t *bits) + { + return jsimd_encode_mcu_AC_refine_prepare_neon(block, + jpeg_natural_order_start, Sl, +diff --git a/simd/arm/aarch64/jsimd.c b/simd/arm/aarch64/jsimd.c +index 41c06d3..358e159 100644 +--- a/simd/arm/aarch64/jsimd.c ++++ b/simd/arm/aarch64/jsimd.c +@@ -4,7 +4,7 @@ + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies). + * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander. +- * Copyright (C) 2015-2016, 2018, Matthieu Darbois. ++ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. + * Copyright (C) 2020, Arm Limited. + * + * Based on the x86 SIMD extension for IJG JPEG library, +@@ -23,7 +23,6 @@ + #include "../../../jdct.h" + #include "../../../jsimddct.h" + #include "../../jsimd.h" +-#include "jconfigint.h" + + #include + +@@ -31,10 +30,10 @@ + #define JSIMD_FASTST3 2 + #define JSIMD_FASTTBL 4 + +-static unsigned int simd_support = ~0; +-static unsigned int simd_huffman = 1; +-static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 | +- JSIMD_FASTTBL; ++static THREAD_LOCAL unsigned int simd_support = ~0; ++static THREAD_LOCAL unsigned int simd_huffman = 1; ++static THREAD_LOCAL unsigned int simd_features = JSIMD_FASTLD3 | ++ JSIMD_FASTST3 | JSIMD_FASTTBL; + + #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) + +@@ -109,8 +108,6 @@ parse_proc_cpuinfo(int bufsize) + + /* + * Check what SIMD accelerations are supported. +- * +- * FIXME: This code is racy under a multi-threaded environment. + */ + + /* +@@ -1021,7 +1018,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void) + GLOBAL(void) + jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *values, size_t *zerobits) ++ int Al, UJCOEF *values, size_t *zerobits) + { + jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start, + Sl, Al, values, zerobits); +@@ -1048,7 +1045,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void) + GLOBAL(int) + jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *absvalues, size_t *bits) ++ int Al, UJCOEF *absvalues, size_t *bits) + { + return jsimd_encode_mcu_AC_refine_prepare_neon(block, + jpeg_natural_order_start, +diff --git a/simd/arm/jcphuff-neon.c b/simd/arm/jcphuff-neon.c +index b91c5db..51db3c5 100644 +--- a/simd/arm/jcphuff-neon.c ++++ b/simd/arm/jcphuff-neon.c +@@ -2,6 +2,8 @@ + * jcphuff-neon.c - prepare data for progressive Huffman encoding (Arm Neon) + * + * Copyright (C) 2020-2021, Arm Limited. All Rights Reserved. ++ * Copyright (C) 2022, Matthieu Darbois. All Rights Reserved. ++ * Copyright (C) 2022, D. R. Commander. All Rights Reserved. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages +@@ -21,7 +23,6 @@ + */ + + #define JPEG_INTERNALS +-#include "jconfigint.h" + #include "../../jinclude.h" + #include "../../jpeglib.h" + #include "../../jsimd.h" +@@ -41,10 +42,10 @@ + + void jsimd_encode_mcu_AC_first_prepare_neon + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, +- JCOEF *values, size_t *zerobits) ++ UJCOEF *values, size_t *zerobits) + { +- JCOEF *values_ptr = values; +- JCOEF *diff_values_ptr = values + DCTSIZE2; ++ UJCOEF *values_ptr = values; ++ UJCOEF *diff_values_ptr = values + DCTSIZE2; + + /* Rows of coefficients to zero (since they haven't been processed) */ + int i, rows_to_zero = 8; +@@ -68,23 +69,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon + coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[15], coefs2, 7); + + /* Isolate sign of coefficients. */ +- int16x8_t sign_coefs1 = vshrq_n_s16(coefs1, 15); +- int16x8_t sign_coefs2 = vshrq_n_s16(coefs2, 15); ++ uint16x8_t sign_coefs1 = vreinterpretq_u16_s16(vshrq_n_s16(coefs1, 15)); ++ uint16x8_t sign_coefs2 = vreinterpretq_u16_s16(vshrq_n_s16(coefs2, 15)); + /* Compute absolute value of coefficients and apply point transform Al. */ +- int16x8_t abs_coefs1 = vabsq_s16(coefs1); +- int16x8_t abs_coefs2 = vabsq_s16(coefs2); +- coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al)); +- coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al)); ++ uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1)); ++ uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2)); ++ abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al)); ++ abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al)); + + /* Compute diff values. */ +- int16x8_t diff1 = veorq_s16(coefs1, sign_coefs1); +- int16x8_t diff2 = veorq_s16(coefs2, sign_coefs2); ++ uint16x8_t diff1 = veorq_u16(abs_coefs1, sign_coefs1); ++ uint16x8_t diff2 = veorq_u16(abs_coefs2, sign_coefs2); + + /* Store transformed coefficients and diff values. */ +- vst1q_s16(values_ptr, coefs1); +- vst1q_s16(values_ptr + DCTSIZE, coefs2); +- vst1q_s16(diff_values_ptr, diff1); +- vst1q_s16(diff_values_ptr + DCTSIZE, diff2); ++ vst1q_u16(values_ptr, abs_coefs1); ++ vst1q_u16(values_ptr + DCTSIZE, abs_coefs2); ++ vst1q_u16(diff_values_ptr, diff1); ++ vst1q_u16(diff_values_ptr + DCTSIZE, diff2); + values_ptr += 16; + diff_values_ptr += 16; + jpeg_natural_order_start += 16; +@@ -130,23 +131,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon + } + + /* Isolate sign of coefficients. */ +- int16x8_t sign_coefs1 = vshrq_n_s16(coefs1, 15); +- int16x8_t sign_coefs2 = vshrq_n_s16(coefs2, 15); ++ uint16x8_t sign_coefs1 = vreinterpretq_u16_s16(vshrq_n_s16(coefs1, 15)); ++ uint16x8_t sign_coefs2 = vreinterpretq_u16_s16(vshrq_n_s16(coefs2, 15)); + /* Compute absolute value of coefficients and apply point transform Al. */ +- int16x8_t abs_coefs1 = vabsq_s16(coefs1); +- int16x8_t abs_coefs2 = vabsq_s16(coefs2); +- coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al)); +- coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al)); ++ uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1)); ++ uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2)); ++ abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al)); ++ abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al)); + + /* Compute diff values. */ +- int16x8_t diff1 = veorq_s16(coefs1, sign_coefs1); +- int16x8_t diff2 = veorq_s16(coefs2, sign_coefs2); ++ uint16x8_t diff1 = veorq_u16(abs_coefs1, sign_coefs1); ++ uint16x8_t diff2 = veorq_u16(abs_coefs2, sign_coefs2); + + /* Store transformed coefficients and diff values. */ +- vst1q_s16(values_ptr, coefs1); +- vst1q_s16(values_ptr + DCTSIZE, coefs2); +- vst1q_s16(diff_values_ptr, diff1); +- vst1q_s16(diff_values_ptr + DCTSIZE, diff2); ++ vst1q_u16(values_ptr, abs_coefs1); ++ vst1q_u16(values_ptr + DCTSIZE, abs_coefs2); ++ vst1q_u16(diff_values_ptr, diff1); ++ vst1q_u16(diff_values_ptr + DCTSIZE, diff2); + values_ptr += 16; + diff_values_ptr += 16; + rows_to_zero -= 2; +@@ -184,17 +185,17 @@ void jsimd_encode_mcu_AC_first_prepare_neon + } + + /* Isolate sign of coefficients. */ +- int16x8_t sign_coefs = vshrq_n_s16(coefs, 15); ++ uint16x8_t sign_coefs = vreinterpretq_u16_s16(vshrq_n_s16(coefs, 15)); + /* Compute absolute value of coefficients and apply point transform Al. */ +- int16x8_t abs_coefs = vabsq_s16(coefs); +- coefs = vshlq_s16(abs_coefs, vdupq_n_s16(-Al)); ++ uint16x8_t abs_coefs = vreinterpretq_u16_s16(vabsq_s16(coefs)); ++ abs_coefs = vshlq_u16(abs_coefs, vdupq_n_s16(-Al)); + + /* Compute diff values. */ +- int16x8_t diff = veorq_s16(coefs, sign_coefs); ++ uint16x8_t diff = veorq_u16(abs_coefs, sign_coefs); + + /* Store transformed coefficients and diff values. */ +- vst1q_s16(values_ptr, coefs); +- vst1q_s16(diff_values_ptr, diff); ++ vst1q_u16(values_ptr, abs_coefs); ++ vst1q_u16(diff_values_ptr, diff); + values_ptr += 8; + diff_values_ptr += 8; + rows_to_zero--; +@@ -202,8 +203,8 @@ void jsimd_encode_mcu_AC_first_prepare_neon + + /* Zero remaining memory in the values and diff_values blocks. */ + for (i = 0; i < rows_to_zero; i++) { +- vst1q_s16(values_ptr, vdupq_n_s16(0)); +- vst1q_s16(diff_values_ptr, vdupq_n_s16(0)); ++ vst1q_u16(values_ptr, vdupq_n_u16(0)); ++ vst1q_u16(diff_values_ptr, vdupq_n_u16(0)); + values_ptr += 8; + diff_values_ptr += 8; + } +@@ -211,23 +212,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon + /* Construct zerobits bitmap. A set bit means that the corresponding + * coefficient != 0. + */ +- int16x8_t row0 = vld1q_s16(values + 0 * DCTSIZE); +- int16x8_t row1 = vld1q_s16(values + 1 * DCTSIZE); +- int16x8_t row2 = vld1q_s16(values + 2 * DCTSIZE); +- int16x8_t row3 = vld1q_s16(values + 3 * DCTSIZE); +- int16x8_t row4 = vld1q_s16(values + 4 * DCTSIZE); +- int16x8_t row5 = vld1q_s16(values + 5 * DCTSIZE); +- int16x8_t row6 = vld1q_s16(values + 6 * DCTSIZE); +- int16x8_t row7 = vld1q_s16(values + 7 * DCTSIZE); +- +- uint8x8_t row0_eq0 = vmovn_u16(vceqq_s16(row0, vdupq_n_s16(0))); +- uint8x8_t row1_eq0 = vmovn_u16(vceqq_s16(row1, vdupq_n_s16(0))); +- uint8x8_t row2_eq0 = vmovn_u16(vceqq_s16(row2, vdupq_n_s16(0))); +- uint8x8_t row3_eq0 = vmovn_u16(vceqq_s16(row3, vdupq_n_s16(0))); +- uint8x8_t row4_eq0 = vmovn_u16(vceqq_s16(row4, vdupq_n_s16(0))); +- uint8x8_t row5_eq0 = vmovn_u16(vceqq_s16(row5, vdupq_n_s16(0))); +- uint8x8_t row6_eq0 = vmovn_u16(vceqq_s16(row6, vdupq_n_s16(0))); +- uint8x8_t row7_eq0 = vmovn_u16(vceqq_s16(row7, vdupq_n_s16(0))); ++ uint16x8_t row0 = vld1q_u16(values + 0 * DCTSIZE); ++ uint16x8_t row1 = vld1q_u16(values + 1 * DCTSIZE); ++ uint16x8_t row2 = vld1q_u16(values + 2 * DCTSIZE); ++ uint16x8_t row3 = vld1q_u16(values + 3 * DCTSIZE); ++ uint16x8_t row4 = vld1q_u16(values + 4 * DCTSIZE); ++ uint16x8_t row5 = vld1q_u16(values + 5 * DCTSIZE); ++ uint16x8_t row6 = vld1q_u16(values + 6 * DCTSIZE); ++ uint16x8_t row7 = vld1q_u16(values + 7 * DCTSIZE); ++ ++ uint8x8_t row0_eq0 = vmovn_u16(vceqq_u16(row0, vdupq_n_u16(0))); ++ uint8x8_t row1_eq0 = vmovn_u16(vceqq_u16(row1, vdupq_n_u16(0))); ++ uint8x8_t row2_eq0 = vmovn_u16(vceqq_u16(row2, vdupq_n_u16(0))); ++ uint8x8_t row3_eq0 = vmovn_u16(vceqq_u16(row3, vdupq_n_u16(0))); ++ uint8x8_t row4_eq0 = vmovn_u16(vceqq_u16(row4, vdupq_n_u16(0))); ++ uint8x8_t row5_eq0 = vmovn_u16(vceqq_u16(row5, vdupq_n_u16(0))); ++ uint8x8_t row6_eq0 = vmovn_u16(vceqq_u16(row6, vdupq_n_u16(0))); ++ uint8x8_t row7_eq0 = vmovn_u16(vceqq_u16(row7, vdupq_n_u16(0))); + + /* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */ + const uint8x8_t bitmap_mask = +@@ -274,7 +275,7 @@ void jsimd_encode_mcu_AC_first_prepare_neon + + int jsimd_encode_mcu_AC_refine_prepare_neon + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, +- JCOEF *absvalues, size_t *bits) ++ UJCOEF *absvalues, size_t *bits) + { + /* Temporary storage buffers for data used to compute the signbits bitmap and + * the end-of-block (EOB) position +@@ -282,7 +283,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon + uint8_t coef_sign_bits[64]; + uint8_t coef_eq1_bits[64]; + +- JCOEF *absvalues_ptr = absvalues; ++ UJCOEF *absvalues_ptr = absvalues; + uint8_t *coef_sign_bits_ptr = coef_sign_bits; + uint8_t *eq1_bits_ptr = coef_eq1_bits; + +@@ -316,18 +317,18 @@ int jsimd_encode_mcu_AC_refine_prepare_neon + vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2); + + /* Compute absolute value of coefficients and apply point transform Al. */ +- int16x8_t abs_coefs1 = vabsq_s16(coefs1); +- int16x8_t abs_coefs2 = vabsq_s16(coefs2); +- coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al)); +- coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al)); +- vst1q_s16(absvalues_ptr, coefs1); +- vst1q_s16(absvalues_ptr + DCTSIZE, coefs2); ++ uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1)); ++ uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2)); ++ abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al)); ++ abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al)); ++ vst1q_u16(absvalues_ptr, abs_coefs1); ++ vst1q_u16(absvalues_ptr + DCTSIZE, abs_coefs2); + + /* Test whether transformed coefficient values == 1 (used to find EOB + * position.) + */ +- uint8x8_t coefs_eq11 = vmovn_u16(vceqq_s16(coefs1, vdupq_n_s16(1))); +- uint8x8_t coefs_eq12 = vmovn_u16(vceqq_s16(coefs2, vdupq_n_s16(1))); ++ uint8x8_t coefs_eq11 = vmovn_u16(vceqq_u16(abs_coefs1, vdupq_n_u16(1))); ++ uint8x8_t coefs_eq12 = vmovn_u16(vceqq_u16(abs_coefs2, vdupq_n_u16(1))); + vst1_u8(eq1_bits_ptr, coefs_eq11); + vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12); + +@@ -385,18 +386,18 @@ int jsimd_encode_mcu_AC_refine_prepare_neon + vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2); + + /* Compute absolute value of coefficients and apply point transform Al. */ +- int16x8_t abs_coefs1 = vabsq_s16(coefs1); +- int16x8_t abs_coefs2 = vabsq_s16(coefs2); +- coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al)); +- coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al)); +- vst1q_s16(absvalues_ptr, coefs1); +- vst1q_s16(absvalues_ptr + DCTSIZE, coefs2); ++ uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1)); ++ uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2)); ++ abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al)); ++ abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al)); ++ vst1q_u16(absvalues_ptr, abs_coefs1); ++ vst1q_u16(absvalues_ptr + DCTSIZE, abs_coefs2); + + /* Test whether transformed coefficient values == 1 (used to find EOB + * position.) + */ +- uint8x8_t coefs_eq11 = vmovn_u16(vceqq_s16(coefs1, vdupq_n_s16(1))); +- uint8x8_t coefs_eq12 = vmovn_u16(vceqq_s16(coefs2, vdupq_n_s16(1))); ++ uint8x8_t coefs_eq11 = vmovn_u16(vceqq_u16(abs_coefs1, vdupq_n_u16(1))); ++ uint8x8_t coefs_eq12 = vmovn_u16(vceqq_u16(abs_coefs2, vdupq_n_u16(1))); + vst1_u8(eq1_bits_ptr, coefs_eq11); + vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12); + +@@ -444,14 +445,14 @@ int jsimd_encode_mcu_AC_refine_prepare_neon + vst1_u8(coef_sign_bits_ptr, sign_coefs); + + /* Compute absolute value of coefficients and apply point transform Al. */ +- int16x8_t abs_coefs = vabsq_s16(coefs); +- coefs = vshlq_s16(abs_coefs, vdupq_n_s16(-Al)); +- vst1q_s16(absvalues_ptr, coefs); ++ uint16x8_t abs_coefs = vreinterpretq_u16_s16(vabsq_s16(coefs)); ++ abs_coefs = vshlq_u16(abs_coefs, vdupq_n_s16(-Al)); ++ vst1q_u16(absvalues_ptr, abs_coefs); + + /* Test whether transformed coefficient values == 1 (used to find EOB + * position.) + */ +- uint8x8_t coefs_eq1 = vmovn_u16(vceqq_s16(coefs, vdupq_n_s16(1))); ++ uint8x8_t coefs_eq1 = vmovn_u16(vceqq_u16(abs_coefs, vdupq_n_u16(1))); + vst1_u8(eq1_bits_ptr, coefs_eq1); + + absvalues_ptr += 8; +@@ -462,7 +463,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon + + /* Zero remaining memory in blocks. */ + for (i = 0; i < rows_to_zero; i++) { +- vst1q_s16(absvalues_ptr, vdupq_n_s16(0)); ++ vst1q_u16(absvalues_ptr, vdupq_n_u16(0)); + vst1_u8(coef_sign_bits_ptr, vdup_n_u8(0)); + vst1_u8(eq1_bits_ptr, vdup_n_u8(0)); + absvalues_ptr += 8; +@@ -471,23 +472,23 @@ int jsimd_encode_mcu_AC_refine_prepare_neon + } + + /* Construct zerobits bitmap. */ +- int16x8_t abs_row0 = vld1q_s16(absvalues + 0 * DCTSIZE); +- int16x8_t abs_row1 = vld1q_s16(absvalues + 1 * DCTSIZE); +- int16x8_t abs_row2 = vld1q_s16(absvalues + 2 * DCTSIZE); +- int16x8_t abs_row3 = vld1q_s16(absvalues + 3 * DCTSIZE); +- int16x8_t abs_row4 = vld1q_s16(absvalues + 4 * DCTSIZE); +- int16x8_t abs_row5 = vld1q_s16(absvalues + 5 * DCTSIZE); +- int16x8_t abs_row6 = vld1q_s16(absvalues + 6 * DCTSIZE); +- int16x8_t abs_row7 = vld1q_s16(absvalues + 7 * DCTSIZE); +- +- uint8x8_t abs_row0_eq0 = vmovn_u16(vceqq_s16(abs_row0, vdupq_n_s16(0))); +- uint8x8_t abs_row1_eq0 = vmovn_u16(vceqq_s16(abs_row1, vdupq_n_s16(0))); +- uint8x8_t abs_row2_eq0 = vmovn_u16(vceqq_s16(abs_row2, vdupq_n_s16(0))); +- uint8x8_t abs_row3_eq0 = vmovn_u16(vceqq_s16(abs_row3, vdupq_n_s16(0))); +- uint8x8_t abs_row4_eq0 = vmovn_u16(vceqq_s16(abs_row4, vdupq_n_s16(0))); +- uint8x8_t abs_row5_eq0 = vmovn_u16(vceqq_s16(abs_row5, vdupq_n_s16(0))); +- uint8x8_t abs_row6_eq0 = vmovn_u16(vceqq_s16(abs_row6, vdupq_n_s16(0))); +- uint8x8_t abs_row7_eq0 = vmovn_u16(vceqq_s16(abs_row7, vdupq_n_s16(0))); ++ uint16x8_t abs_row0 = vld1q_u16(absvalues + 0 * DCTSIZE); ++ uint16x8_t abs_row1 = vld1q_u16(absvalues + 1 * DCTSIZE); ++ uint16x8_t abs_row2 = vld1q_u16(absvalues + 2 * DCTSIZE); ++ uint16x8_t abs_row3 = vld1q_u16(absvalues + 3 * DCTSIZE); ++ uint16x8_t abs_row4 = vld1q_u16(absvalues + 4 * DCTSIZE); ++ uint16x8_t abs_row5 = vld1q_u16(absvalues + 5 * DCTSIZE); ++ uint16x8_t abs_row6 = vld1q_u16(absvalues + 6 * DCTSIZE); ++ uint16x8_t abs_row7 = vld1q_u16(absvalues + 7 * DCTSIZE); ++ ++ uint8x8_t abs_row0_eq0 = vmovn_u16(vceqq_u16(abs_row0, vdupq_n_u16(0))); ++ uint8x8_t abs_row1_eq0 = vmovn_u16(vceqq_u16(abs_row1, vdupq_n_u16(0))); ++ uint8x8_t abs_row2_eq0 = vmovn_u16(vceqq_u16(abs_row2, vdupq_n_u16(0))); ++ uint8x8_t abs_row3_eq0 = vmovn_u16(vceqq_u16(abs_row3, vdupq_n_u16(0))); ++ uint8x8_t abs_row4_eq0 = vmovn_u16(vceqq_u16(abs_row4, vdupq_n_u16(0))); ++ uint8x8_t abs_row5_eq0 = vmovn_u16(vceqq_u16(abs_row5, vdupq_n_u16(0))); ++ uint8x8_t abs_row6_eq0 = vmovn_u16(vceqq_u16(abs_row6, vdupq_n_u16(0))); ++ uint8x8_t abs_row7_eq0 = vmovn_u16(vceqq_u16(abs_row7, vdupq_n_u16(0))); + + /* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */ + const uint8x8_t bitmap_mask = +diff --git a/simd/arm/jdcolor-neon.c b/simd/arm/jdcolor-neon.c +index ea4668f..28dbc57 100644 +--- a/simd/arm/jdcolor-neon.c ++++ b/simd/arm/jdcolor-neon.c +@@ -21,7 +21,6 @@ + */ + + #define JPEG_INTERNALS +-#include "jconfigint.h" + #include "../../jinclude.h" + #include "../../jpeglib.h" + #include "../../jsimd.h" +diff --git a/simd/arm/jdmerge-neon.c b/simd/arm/jdmerge-neon.c +index e4f91fd..18fb9d8 100644 +--- a/simd/arm/jdmerge-neon.c ++++ b/simd/arm/jdmerge-neon.c +@@ -21,7 +21,6 @@ + */ + + #define JPEG_INTERNALS +-#include "jconfigint.h" + #include "../../jinclude.h" + #include "../../jpeglib.h" + #include "../../jsimd.h" +diff --git a/simd/arm/jidctint-neon.c b/simd/arm/jidctint-neon.c +index 043b652..d25112e 100644 +--- a/simd/arm/jidctint-neon.c ++++ b/simd/arm/jidctint-neon.c +@@ -22,7 +22,6 @@ + */ + + #define JPEG_INTERNALS +-#include "jconfigint.h" + #include "../../jinclude.h" + #include "../../jpeglib.h" + #include "../../jsimd.h" +diff --git a/simd/i386/jsimd.c b/simd/i386/jsimd.c +index 80bc821..b429b0a 100644 +--- a/simd/i386/jsimd.c ++++ b/simd/i386/jsimd.c +@@ -2,8 +2,8 @@ + * jsimd_i386.c + * + * Copyright 2009 Pierre Ossman for Cendio AB +- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander. +- * Copyright (C) 2015-2016, 2018, Matthieu Darbois. ++ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2023, D. R. Commander. ++ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. +@@ -21,7 +21,6 @@ + #include "../../jdct.h" + #include "../../jsimddct.h" + #include "../jsimd.h" +-#include "jconfigint.h" + + /* + * In the PIC cases, we have no guarantee that constants will keep +@@ -32,13 +31,11 @@ + #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ + #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */ + +-static unsigned int simd_support = (unsigned int)(~0); +-static unsigned int simd_huffman = 1; ++static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0); ++static THREAD_LOCAL unsigned int simd_huffman = 1; + + /* + * Check what SIMD accelerations are supported. +- * +- * FIXME: This code is racy under a multi-threaded environment. + */ + LOCAL(void) + init_simd(void) +@@ -161,6 +158,9 @@ jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + ++ if (simd_support == ~0U) ++ init_simd(); ++ + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_extrgb_ycc_convert_avx2; +@@ -220,6 +220,9 @@ jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + ++ if (simd_support == ~0U) ++ init_simd(); ++ + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_extrgb_gray_convert_avx2; +@@ -279,6 +282,9 @@ jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + ++ if (simd_support == ~0U) ++ init_simd(); ++ + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_ycc_extrgb_convert_avx2; +@@ -382,6 +388,9 @@ GLOBAL(void) + jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, +@@ -402,6 +411,9 @@ GLOBAL(void) + jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, +@@ -464,6 +476,9 @@ GLOBAL(void) + jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +@@ -479,6 +494,9 @@ GLOBAL(void) + jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +@@ -540,6 +558,9 @@ GLOBAL(void) + jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, +@@ -558,6 +579,9 @@ GLOBAL(void) + jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, +@@ -626,6 +650,9 @@ jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + ++ if (simd_support == ~0U) ++ init_simd(); ++ + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2; +@@ -684,6 +711,9 @@ jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + ++ if (simd_support == ~0U) ++ init_simd(); ++ + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2; +@@ -788,6 +818,9 @@ GLOBAL(void) + jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_convsamp_avx2(sample_data, start_col, workspace); + else if (simd_support & JSIMD_SSE2) +@@ -800,6 +833,9 @@ GLOBAL(void) + jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_SSE2) + jsimd_convsamp_float_sse2(sample_data, start_col, workspace); + else if (simd_support & JSIMD_SSE) +@@ -870,6 +906,9 @@ jsimd_can_fdct_float(void) + GLOBAL(void) + jsimd_fdct_islow(DCTELEM *data) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_fdct_islow_avx2(data); + else if (simd_support & JSIMD_SSE2) +@@ -881,6 +920,9 @@ jsimd_fdct_islow(DCTELEM *data) + GLOBAL(void) + jsimd_fdct_ifast(DCTELEM *data) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) + jsimd_fdct_ifast_sse2(data); + else +@@ -890,6 +932,9 @@ jsimd_fdct_ifast(DCTELEM *data) + GLOBAL(void) + jsimd_fdct_float(FAST_FLOAT *data) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) + jsimd_fdct_float_sse(data); + else if (simd_support & JSIMD_3DNOW) +@@ -945,6 +990,9 @@ jsimd_can_quantize_float(void) + GLOBAL(void) + jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_quantize_avx2(coef_block, divisors, workspace); + else if (simd_support & JSIMD_SSE2) +@@ -957,6 +1005,9 @@ GLOBAL(void) + jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_SSE2) + jsimd_quantize_float_sse2(coef_block, divisors, workspace); + else if (simd_support & JSIMD_SSE) +@@ -1020,6 +1071,9 @@ jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, + output_col); +@@ -1032,6 +1086,9 @@ jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, + output_col); +@@ -1126,6 +1183,9 @@ jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf, + output_col); +@@ -1142,6 +1202,9 @@ jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) + jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, + output_col); +@@ -1155,6 +1218,9 @@ jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) + jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, + output_col); +@@ -1212,7 +1278,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void) + GLOBAL(void) + jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *values, size_t *zerobits) ++ int Al, UJCOEF *values, size_t *zerobits) + { + jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start, + Sl, Al, values, zerobits); +@@ -1238,7 +1304,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void) + GLOBAL(int) + jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *absvalues, size_t *bits) ++ int Al, UJCOEF *absvalues, size_t *bits) + { + return jsimd_encode_mcu_AC_refine_prepare_sse2(block, + jpeg_natural_order_start, +diff --git a/simd/jsimd.h b/simd/jsimd.h +index 64747c6..8019d5d 100644 +--- a/simd/jsimd.h ++++ b/simd/jsimd.h +@@ -2,10 +2,10 @@ + * simd/jsimd.h + * + * Copyright 2009 Pierre Ossman for Cendio AB +- * Copyright (C) 2011, 2014-2016, 2018, 2020, D. R. Commander. ++ * Copyright (C) 2011, 2014-2016, 2018, 2020, 2022, D. R. Commander. + * Copyright (C) 2013-2014, MIPS Technologies, Inc., California. + * Copyright (C) 2014, Linaro Limited. +- * Copyright (C) 2015-2016, 2018, Matthieu Darbois. ++ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. + * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing. + * Copyright (C) 2020, Arm Limited. + * +@@ -27,6 +27,7 @@ + #define JSIMD_ALTIVEC 0x40 + #define JSIMD_AVX2 0x80 + #define JSIMD_MMI 0x100 ++#define JSIMD_RVV 0x200 + + /* SIMD Ext: retrieve SIMD/CPU information */ + EXTERN(unsigned int) jpeg_simd_cpu_support(void); +@@ -199,6 +200,29 @@ EXTERN(void) jsimd_extxrgb_ycc_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + ++/* rvv */ ++EXTERN(void) jsimd_rgb_ycc_convert_rvv ++ (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, ++ JDIMENSION output_row, int num_rows); ++EXTERN(void) jsimd_extrgb_ycc_convert_rvv ++ (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, ++ JDIMENSION output_row, int num_rows); ++EXTERN(void) jsimd_extrgbx_ycc_convert_rvv ++ (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, ++ JDIMENSION output_row, int num_rows); ++EXTERN(void) jsimd_extbgr_ycc_convert_rvv ++ (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, ++ JDIMENSION output_row, int num_rows); ++EXTERN(void) jsimd_extbgrx_ycc_convert_rvv ++ (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, ++ JDIMENSION output_row, int num_rows); ++EXTERN(void) jsimd_extxbgr_ycc_convert_rvv ++ (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, ++ JDIMENSION output_row, int num_rows); ++EXTERN(void) jsimd_extxrgb_ycc_convert_rvv ++ (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, ++ JDIMENSION output_row, int num_rows); ++ + /* RGB & extended RGB --> Grayscale Colorspace Conversion */ + EXTERN(void) jsimd_rgb_gray_convert_mmx + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +@@ -356,6 +380,29 @@ EXTERN(void) jsimd_extxrgb_gray_convert_altivec + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + ++/* rvv */ ++EXTERN(void) jsimd_rgb_gray_convert_rvv ++ (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, ++ JDIMENSION output_row, int num_rows); ++EXTERN(void) jsimd_extrgb_gray_convert_rvv ++ (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, ++ JDIMENSION output_row, int num_rows); ++EXTERN(void) jsimd_extrgbx_gray_convert_rvv ++ (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, ++ JDIMENSION output_row, int num_rows); ++EXTERN(void) jsimd_extbgr_gray_convert_rvv ++ (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, ++ JDIMENSION output_row, int num_rows); ++EXTERN(void) jsimd_extbgrx_gray_convert_rvv ++ (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, ++ JDIMENSION output_row, int num_rows); ++EXTERN(void) jsimd_extxbgr_gray_convert_rvv ++ (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, ++ JDIMENSION output_row, int num_rows); ++EXTERN(void) jsimd_extxrgb_gray_convert_rvv ++ (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, ++ JDIMENSION output_row, int num_rows); ++ + /* YCC --> RGB & extended RGB Colorspace Conversion */ + EXTERN(void) jsimd_ycc_rgb_convert_mmx + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, +@@ -527,6 +574,29 @@ EXTERN(void) jsimd_ycc_extxrgb_convert_altivec + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); + ++/* rvv */ ++EXTERN(void) jsimd_ycc_rgb_convert_rvv ++ (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, ++ JSAMPARRAY output_buf, int num_rows); ++EXTERN(void) jsimd_ycc_extrgb_convert_rvv ++ (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, ++ JSAMPARRAY output_buf, int num_rows); ++EXTERN(void) jsimd_ycc_extrgbx_convert_rvv ++ (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, ++ JSAMPARRAY output_buf, int num_rows); ++EXTERN(void) jsimd_ycc_extbgr_convert_rvv ++ (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, ++ JSAMPARRAY output_buf, int num_rows); ++EXTERN(void) jsimd_ycc_extbgrx_convert_rvv ++ (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, ++ JSAMPARRAY output_buf, int num_rows); ++EXTERN(void) jsimd_ycc_extxbgr_convert_rvv ++ (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, ++ JSAMPARRAY output_buf, int num_rows); ++EXTERN(void) jsimd_ycc_extxrgb_convert_rvv ++ (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, ++ JSAMPARRAY output_buf, int num_rows); ++ + /* NULL Colorspace Conversion */ + EXTERN(void) jsimd_c_null_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, +@@ -557,6 +627,10 @@ EXTERN(void) jsimd_h2v1_downsample_altivec + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); + ++EXTERN(void) jsimd_h2v1_downsample_rvv ++ (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, ++ JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); ++ + /* h2v2 Downsampling */ + EXTERN(void) jsimd_h2v2_downsample_mmx + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, +@@ -586,6 +660,10 @@ EXTERN(void) jsimd_h2v2_downsample_altivec + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); + ++EXTERN(void) jsimd_h2v2_downsample_rvv ++ (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, ++ JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); ++ + /* h2v2 Smooth Downsampling */ + EXTERN(void) jsimd_h2v2_smooth_downsample_dspr2 + (JSAMPARRAY input_data, JSAMPARRAY output_data, JDIMENSION v_samp_factor, +@@ -641,6 +719,13 @@ EXTERN(void) jsimd_h2v2_upsample_altivec + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + ++EXTERN(void) jsimd_h2v1_upsample_rvv ++ (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, ++ JSAMPARRAY *output_data_ptr); ++EXTERN(void) jsimd_h2v2_upsample_rvv ++ (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, ++ JSAMPARRAY *output_data_ptr); ++ + /* Fancy Upsampling */ + EXTERN(void) jsimd_h2v1_fancy_upsample_mmx + (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, +@@ -696,6 +781,16 @@ EXTERN(void) jsimd_h2v2_fancy_upsample_altivec + (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + ++EXTERN(void) jsimd_h2v1_fancy_upsample_rvv ++ (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, ++ JSAMPARRAY *output_data_ptr); ++EXTERN(void) jsimd_h2v2_fancy_upsample_rvv ++ (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, ++ JSAMPARRAY *output_data_ptr); ++EXTERN(void) jsimd_h1v2_fancy_upsample_rvv ++ (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, ++ JSAMPARRAY *output_data_ptr); ++ + /* Merged Upsampling */ + EXTERN(void) jsimd_h2v1_merged_upsample_mmx + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, +@@ -1007,6 +1102,51 @@ EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_altivec + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); + ++/* rvv */ ++EXTERN(void) jsimd_h2v1_merged_upsample_rvv ++ (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf); ++EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_rvv ++ (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf); ++EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_rvv ++ (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf); ++EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_rvv ++ (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf); ++EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_rvv ++ (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf); ++EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_rvv ++ (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf); ++EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_rvv ++ (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf); ++ ++EXTERN(void) jsimd_h2v2_merged_upsample_rvv ++ (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf); ++EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_rvv ++ (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf); ++EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_rvv ++ (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf); ++EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_rvv ++ (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf); ++EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_rvv ++ (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf); ++EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_rvv ++ (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf); ++EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_rvv ++ (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf); ++ + /* Sample Conversion */ + EXTERN(void) jsimd_convsamp_mmx + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); +@@ -1026,6 +1166,9 @@ EXTERN(void) jsimd_convsamp_dspr2 + EXTERN(void) jsimd_convsamp_altivec + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + ++EXTERN(void) jsimd_convsamp_rvv ++ (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); ++ + /* Floating Point Sample Conversion */ + EXTERN(void) jsimd_convsamp_float_3dnow + (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); +@@ -1056,6 +1199,8 @@ EXTERN(void) jsimd_fdct_islow_mmi(DCTELEM *data); + + EXTERN(void) jsimd_fdct_islow_altivec(DCTELEM *data); + ++EXTERN(void) jsimd_fdct_islow_rvv(DCTELEM *data); ++ + /* Fast Integer Forward DCT */ + EXTERN(void) jsimd_fdct_ifast_mmx(DCTELEM *data); + +@@ -1070,6 +1215,8 @@ EXTERN(void) jsimd_fdct_ifast_mmi(DCTELEM *data); + + EXTERN(void) jsimd_fdct_ifast_altivec(DCTELEM *data); + ++EXTERN(void) jsimd_fdct_ifast_rvv(DCTELEM *data); ++ + /* Floating Point Forward DCT */ + EXTERN(void) jsimd_fdct_float_3dnow(FAST_FLOAT *data); + +@@ -1098,6 +1245,9 @@ EXTERN(void) jsimd_quantize_mmi + EXTERN(void) jsimd_quantize_altivec + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + ++EXTERN(void) jsimd_quantize_rvv ++ (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); ++ + /* Floating Point Quantization */ + EXTERN(void) jsimd_quantize_float_3dnow + (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); +@@ -1148,6 +1298,13 @@ EXTERN(void) jsimd_idct_12x12_pass1_dspr2 + EXTERN(void) jsimd_idct_12x12_pass2_dspr2 + (int *workspace, int *output); + ++EXTERN(void) jsimd_idct_2x2_rvv ++ (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, ++ JDIMENSION output_col); ++EXTERN(void) jsimd_idct_4x4_rvv ++ (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, ++ JDIMENSION output_col); ++ + /* Accurate Integer Inverse DCT */ + EXTERN(void) jsimd_idct_islow_mmx + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, +@@ -1178,6 +1335,10 @@ EXTERN(void) jsimd_idct_islow_altivec + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + ++EXTERN(void) jsimd_idct_islow_rvv ++ (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, ++ JDIMENSION output_col); ++ + /* Fast Integer Inverse DCT */ + EXTERN(void) jsimd_idct_ifast_mmx + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, +@@ -1207,6 +1368,10 @@ EXTERN(void) jsimd_idct_ifast_altivec + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + ++EXTERN(void) jsimd_idct_ifast_rvv ++ (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, ++ JDIMENSION output_col); ++ + /* Floating Point Inverse DCT */ + EXTERN(void) jsimd_idct_float_3dnow + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, +@@ -1243,16 +1408,16 @@ EXTERN(JOCTET *) jsimd_huff_encode_one_block_neon_slowtbl + /* Progressive Huffman encoding */ + EXTERN(void) jsimd_encode_mcu_AC_first_prepare_sse2 + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, +- JCOEF *values, size_t *zerobits); ++ UJCOEF *values, size_t *zerobits); + + EXTERN(void) jsimd_encode_mcu_AC_first_prepare_neon + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, +- JCOEF *values, size_t *zerobits); ++ UJCOEF *values, size_t *zerobits); + + EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_sse2 + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, +- JCOEF *absvalues, size_t *bits); ++ UJCOEF *absvalues, size_t *bits); + + EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_neon + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, +- JCOEF *absvalues, size_t *bits); ++ UJCOEF *absvalues, size_t *bits); +diff --git a/simd/mips/jsimd.c b/simd/mips/jsimd.c +index 36ea865..c6e789a 100644 +--- a/simd/mips/jsimd.c ++++ b/simd/mips/jsimd.c +@@ -2,9 +2,9 @@ + * jsimd_mips.c + * + * Copyright 2009 Pierre Ossman for Cendio AB +- * Copyright (C) 2009-2011, 2014, 2016, 2018, 2020, D. R. Commander. ++ * Copyright (C) 2009-2011, 2014, 2016, 2018, 2020, 2022, D. R. Commander. + * Copyright (C) 2013-2014, MIPS Technologies, Inc., California. +- * Copyright (C) 2015-2016, 2018, Matthieu Darbois. ++ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. +@@ -25,7 +25,7 @@ + + #include + +-static unsigned int simd_support = ~0; ++static THREAD_LOCAL unsigned int simd_support = ~0; + + #if !(defined(__mips_dsp) && (__mips_dsp_rev >= 2)) && defined(__linux__) + +@@ -55,8 +55,6 @@ parse_proc_cpuinfo(const char *search_string) + + /* + * Check what SIMD accelerations are supported. +- * +- * FIXME: This code is racy under a multi-threaded environment. + */ + LOCAL(void) + init_simd(void) +@@ -1126,7 +1124,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void) + GLOBAL(void) + jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *values, size_t *zerobits) ++ int Al, UJCOEF *values, size_t *zerobits) + { + } + +@@ -1139,7 +1137,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void) + GLOBAL(int) + jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *absvalues, size_t *bits) ++ int Al, UJCOEF *absvalues, size_t *bits) + { + return 0; + } +diff --git a/simd/mips64/jsimd.c b/simd/mips64/jsimd.c +index 2e626b2..917440b 100644 +--- a/simd/mips64/jsimd.c ++++ b/simd/mips64/jsimd.c +@@ -2,9 +2,9 @@ + * jsimd_mips64.c + * + * Copyright 2009 Pierre Ossman for Cendio AB +- * Copyright (C) 2009-2011, 2014, 2016, 2018, D. R. Commander. ++ * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander. + * Copyright (C) 2013-2014, MIPS Technologies, Inc., California. +- * Copyright (C) 2015, 2018, Matthieu Darbois. ++ * Copyright (C) 2015, 2018, 2022, Matthieu Darbois. + * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing. + * + * Based on the x86 SIMD extension for IJG JPEG library, +@@ -26,7 +26,7 @@ + + #include + +-static unsigned int simd_support = ~0; ++static THREAD_LOCAL unsigned int simd_support = ~0; + + #if defined(__linux__) + +@@ -94,8 +94,6 @@ parse_proc_cpuinfo(int bufsize) + + /* + * Check what SIMD accelerations are supported. +- * +- * FIXME: This code is racy under a multi-threaded environment. + */ + LOCAL(void) + init_simd(void) +@@ -849,7 +847,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void) + GLOBAL(void) + jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *values, size_t *zerobits) ++ int Al, UJCOEF *values, size_t *zerobits) + { + } + +@@ -862,7 +860,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void) + GLOBAL(int) + jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *absvalues, size_t *bits) ++ int Al, UJCOEF *absvalues, size_t *bits) + { + return 0; + } +diff --git a/simd/powerpc/jsimd.c b/simd/powerpc/jsimd.c +index 9a452a3..461f603 100644 +--- a/simd/powerpc/jsimd.c ++++ b/simd/powerpc/jsimd.c +@@ -2,8 +2,8 @@ + * jsimd_powerpc.c + * + * Copyright 2009 Pierre Ossman for Cendio AB +- * Copyright (C) 2009-2011, 2014-2016, 2018, D. R. Commander. +- * Copyright (C) 2015-2016, 2018, Matthieu Darbois. ++ * Copyright (C) 2009-2011, 2014-2016, 2018, 2022, D. R. Commander. ++ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. +@@ -41,7 +41,7 @@ + #include + #endif + +-static unsigned int simd_support = ~0; ++static THREAD_LOCAL unsigned int simd_support = ~0; + + #if !defined(__ALTIVEC__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)) + +@@ -109,8 +109,6 @@ parse_proc_cpuinfo(int bufsize) + + /* + * Check what SIMD accelerations are supported. +- * +- * FIXME: This code is racy under a multi-threaded environment. + */ + LOCAL(void) + init_simd(void) +@@ -867,7 +865,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void) + GLOBAL(void) + jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *values, size_t *zerobits) ++ int Al, UJCOEF *values, size_t *zerobits) + { + } + +@@ -880,7 +878,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void) + GLOBAL(int) + jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *absvalues, size_t *bits) ++ int Al, UJCOEF *absvalues, size_t *bits) + { + return 0; + } +diff --git a/simd/rvv/jccolext-rvv.c b/simd/rvv/jccolext-rvv.c +new file mode 100644 +index 0000000..2489688 +--- /dev/null ++++ b/simd/rvv/jccolext-rvv.c +@@ -0,0 +1,145 @@ ++/* ++ * Risc-V vector extension optimizations for libjpeg-turbo ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++/* This file is included by jccolor-rvv.c */ ++ ++/* RGB -> YCbCr conversion is defined by the following equations: ++ * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B ++ * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + 128 ++ * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + 128 ++ * ++ * Avoid floating point arithmetic by using shifted integer constants: ++ * 0.29899597 = 19595 * 2^-16 ++ * 0.58700561 = 38470 * 2^-16 ++ * 0.11399841 = 7471 * 2^-16 ++ * 0.16874695 = 11059 * 2^-16 ++ * 0.33125305 = 21709 * 2^-16 ++ * 0.50000000 = 32768 * 2^-16 ++ * 0.41868592 = 27439 * 2^-16 ++ * 0.08131409 = 5329 * 2^-16 ++ * These constants are defined in jccolor-rvv.c ++ */ ++ ++#define JPEG_INTERNALS ++ ++void jsimd_rgb_ycc_convert_rvv(JDIMENSION img_width, JSAMPARRAY input_buf, ++ JSAMPIMAGE output_buf, JDIMENSION output_row, ++ int num_rows) ++{ ++ int pixels_remaining; ++ size_t vl; ++ ptrdiff_t bstride; ++ JSAMPROW inptr, outptr0, outptr1, outptr2; ++ ++ vuint16m4_t r, g, b; ++ vuint16m4_t y, cb, cr; ++ vuint32m8_t accum, tmp; ++#if BITS_IN_JSAMPLE == 8 ++ vuint8m2_t dest, src; ++#endif ++ ++ bstride = RGB_PIXELSIZE * sizeof(JSAMPLE); ++ ++ for (; num_rows > 0; ++input_buf, ++output_row, --num_rows) ++ { ++ inptr = *input_buf; ++ outptr0 = output_buf[0][output_row]; ++ outptr1 = output_buf[1][output_row]; ++ outptr2 = output_buf[2][output_row]; ++ ++ for (pixels_remaining = img_width; pixels_remaining > 0; pixels_remaining -= vl) ++ { ++ /* Set vl for each iteration. */ ++ vl = __riscv_vsetvl_e16m4(pixels_remaining); ++ ++ /* Load R, G, B channels as vectors from inptr. */ ++#if BITS_IN_JSAMPLE == 8 ++ /* Extending to vuint16m4_t type for following multiply calculation. */ ++ src = __riscv_vlse8_v_u8m2(inptr + RGB_RED, bstride, vl); ++ r = __riscv_vwcvtu_x_x_v_u16m4(src, vl); ++ src = __riscv_vlse8_v_u8m2(inptr + RGB_GREEN, bstride, vl); ++ g = __riscv_vwcvtu_x_x_v_u16m4(src, vl); ++ src = __riscv_vlse8_v_u8m2(inptr + RGB_BLUE, bstride, vl); ++ b = __riscv_vwcvtu_x_x_v_u16m4(src, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ r = __riscv_vlse16_v_u16m4(inptr + RGB_RED, bstride, vl); ++ g = __riscv_vlse16_v_u16m4(inptr + RGB_GREEN, bstride, vl); ++ b = __riscv_vlse16_v_u16m4(inptr + RGB_BLUE, bstride, vl); ++#endif ++ ++ /* Calculate Y values */ ++ accum = __riscv_vwmulu_vx_u32m8(r, F_0_299, vl); ++ accum = __riscv_vwmaccu_vx_u32m8(accum, F_0_587, g, vl); ++ accum = __riscv_vwmaccu_vx_u32m8(accum, F_0_114, b, vl); ++ ++ // accum = __riscv_vadd_vx_u32m8(accum, ONE_HALF, vl); ++ // y = __riscv_vnsrl_wx_u16m4(accum, SCALEBITS, vl); ++ y = __riscv_vnclipu_wx_u16m4(accum, SCALEBITS, vl); ++ ++#if BITS_IN_JSAMPLE == 8 ++ dest = __riscv_vncvt_x_x_w_u8m2(y, vl); ++ __riscv_vse8_v_u8m2(outptr0, dest, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ __riscv_vse16_v_u16m4(outptr0, y, vl); ++#endif ++ ++ /* Calculate Cb values */ ++ accum = __riscv_vwmulu_vx_u32m8(b, F_0_500, vl); ++ accum = __riscv_vadd_vx_u32m8(accum, SCALED_CENTERJSAMPLE+ONE_HALF - 1, vl); ++ tmp = __riscv_vwmulu_vx_u32m8(g, F_0_331, vl); ++ accum = __riscv_vsub_vv_u32m8(accum, tmp, vl); ++ tmp = __riscv_vwmulu_vx_u32m8(r, F_0_168, vl); ++ accum = __riscv_vsub_vv_u32m8(accum, tmp, vl); ++ ++ cb = __riscv_vnsrl_wx_u16m4(accum, SCALEBITS, vl); ++ ++#if BITS_IN_JSAMPLE == 8 ++ dest = __riscv_vncvt_x_x_w_u8m2(cb, vl); ++ __riscv_vse8_v_u8m2(outptr1, dest, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ __riscv_vse16_v_u16m4(outptr1, cb, vl); ++#endif ++ ++ /* Calculate Cr values */ ++ accum = __riscv_vwmulu_vx_u32m8(r, F_0_500, vl); ++ accum = __riscv_vadd_vx_u32m8(accum, SCALED_CENTERJSAMPLE+ONE_HALF - 1, vl); ++ tmp = __riscv_vwmulu_vx_u32m8(g, F_0_418, vl); ++ accum = __riscv_vsub_vv_u32m8(accum, tmp, vl); ++ tmp = __riscv_vwmulu_vx_u32m8(b, F_0_081, vl); ++ accum = __riscv_vsub_vv_u32m8(accum, tmp, vl); ++ ++ cr = __riscv_vnsrl_wx_u16m4(accum, SCALEBITS, vl); ++ ++#if BITS_IN_JSAMPLE == 8 ++ dest = __riscv_vncvt_x_x_w_u8m2(cr, vl); ++ __riscv_vse8_v_u8m2(outptr2, dest, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ __riscv_vse16_v_u16m4(outptr2, cr, vl); ++#endif ++ inptr += vl * RGB_PIXELSIZE; ++ outptr0 += vl; ++ outptr1 += vl; ++ outptr2 += vl; ++ } ++ } ++} +\ No newline at end of file +diff --git a/simd/rvv/jccolor-rvv.c b/simd/rvv/jccolor-rvv.c +new file mode 100644 +index 0000000..e28905e +--- /dev/null ++++ b/simd/rvv/jccolor-rvv.c +@@ -0,0 +1,118 @@ ++/* ++ * Risc-V vector extension optimizations for libjpeg-turbo ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++/* RGB --> YCC CONVERSION */ ++#define JPEG_INTERNALS ++#include "jsimd_rvv.h" ++ ++/* RGB -> YCbCr conversion constants */ ++#define F_0_299 19595 /* 0.29899597 = 19595 * 2^-16 */ ++#define F_0_587 38470 /* 0.58700561 = 38470 * 2^-16 */ ++#define F_0_114 7471 /* 0.11399841 = 7471 * 2^-16 */ ++#define F_0_168 11059 /* 0.16874695 = 11059 * 2^-16 */ ++#define F_0_331 21709 /* 0.33125305 = 21709 * 2^-16 */ ++#define F_0_500 32768 /* 0.50000000 = 32768 * 2^-16 */ ++#define F_0_418 27439 /* 0.41868592 = 27439 * 2^-16 */ ++#define F_0_081 5329 /* 0.08131409 = 5329 * 2^-16 */ ++ ++#define SCALEBITS 16 ++#define ONE_HALF (1 << (SCALEBITS - 1)) ++#define SCALED_CENTERJSAMPLE (CENTERJSAMPLE << SCALEBITS) ++ ++#include "jccolext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++ ++#define RGB_RED EXT_RGB_RED ++#define RGB_GREEN EXT_RGB_GREEN ++#define RGB_BLUE EXT_RGB_BLUE ++#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE ++#define jsimd_rgb_ycc_convert_rvv jsimd_extrgb_ycc_convert_rvv ++#include "jccolext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_rgb_ycc_convert_rvv ++ ++#define RGB_RED EXT_RGBX_RED ++#define RGB_GREEN EXT_RGBX_GREEN ++#define RGB_BLUE EXT_RGBX_BLUE ++#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE ++#define jsimd_rgb_ycc_convert_rvv jsimd_extrgbx_ycc_convert_rvv ++#include "jccolext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_rgb_ycc_convert_rvv ++ ++#define RGB_RED EXT_BGR_RED ++#define RGB_GREEN EXT_BGR_GREEN ++#define RGB_BLUE EXT_BGR_BLUE ++#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE ++#define jsimd_rgb_ycc_convert_rvv jsimd_extbgr_ycc_convert_rvv ++#include "jccolext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_rgb_ycc_convert_rvv ++ ++#define RGB_RED EXT_BGRX_RED ++#define RGB_GREEN EXT_BGRX_GREEN ++#define RGB_BLUE EXT_BGRX_BLUE ++#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE ++#define jsimd_rgb_ycc_convert_rvv jsimd_extbgrx_ycc_convert_rvv ++#include "jccolext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_rgb_ycc_convert_rvv ++ ++#define RGB_RED EXT_XBGR_RED ++#define RGB_GREEN EXT_XBGR_GREEN ++#define RGB_BLUE EXT_XBGR_BLUE ++#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE ++#define jsimd_rgb_ycc_convert_rvv jsimd_extxbgr_ycc_convert_rvv ++#include "jccolext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_rgb_ycc_convert_rvv ++ ++#define RGB_RED EXT_XRGB_RED ++#define RGB_GREEN EXT_XRGB_GREEN ++#define RGB_BLUE EXT_XRGB_BLUE ++#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE ++#define jsimd_rgb_ycc_convert_rvv jsimd_extxrgb_ycc_convert_rvv ++#include "jccolext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_rgb_ycc_convert_rvv +diff --git a/simd/rvv/jcgray-rvv.c b/simd/rvv/jcgray-rvv.c +new file mode 100644 +index 0000000..8e389cb +--- /dev/null ++++ b/simd/rvv/jcgray-rvv.c +@@ -0,0 +1,112 @@ ++/* ++ * Risc-V vector extension optimizations for libjpeg-turbo ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++/* RGB --> GRAYSCALE CONVERSION */ ++#define JPEG_INTERNALS ++#include "jsimd_rvv.h" ++ ++/* RGB -> GRAYSCALE CONVERSION constants */ ++#define F_0_299 19595 /* 0.29899597 = 19595 * 2^-16 */ ++#define F_0_587 38470 /* 0.58700561 = 38470 * 2^-16 */ ++#define F_0_114 7471 /* 0.11399841 = 7471 * 2^-16 */ ++ ++#define SCALEBITS 16 ++#define ONE_HALF (1 << (SCALEBITS - 1)) ++ ++#include "jcgryext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++ ++#define RGB_RED EXT_RGB_RED ++#define RGB_GREEN EXT_RGB_GREEN ++#define RGB_BLUE EXT_RGB_BLUE ++#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE ++#define jsimd_rgb_gray_convert_rvv jsimd_extrgb_gray_convert_rvv ++#include "jcgryext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_rgb_gray_convert_rvv ++ ++#define RGB_RED EXT_RGBX_RED ++#define RGB_GREEN EXT_RGBX_GREEN ++#define RGB_BLUE EXT_RGBX_BLUE ++#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE ++#define jsimd_rgb_gray_convert_rvv jsimd_extrgbx_gray_convert_rvv ++#include "jcgryext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_rgb_gray_convert_rvv ++ ++#define RGB_RED EXT_BGR_RED ++#define RGB_GREEN EXT_BGR_GREEN ++#define RGB_BLUE EXT_BGR_BLUE ++#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE ++#define jsimd_rgb_gray_convert_rvv jsimd_extbgr_gray_convert_rvv ++#include "jcgryext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_rgb_gray_convert_rvv ++ ++#define RGB_RED EXT_BGRX_RED ++#define RGB_GREEN EXT_BGRX_GREEN ++#define RGB_BLUE EXT_BGRX_BLUE ++#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE ++#define jsimd_rgb_gray_convert_rvv jsimd_extbgrx_gray_convert_rvv ++#include "jcgryext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_rgb_gray_convert_rvv ++ ++#define RGB_RED EXT_XBGR_RED ++#define RGB_GREEN EXT_XBGR_GREEN ++#define RGB_BLUE EXT_XBGR_BLUE ++#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE ++#define jsimd_rgb_gray_convert_rvv jsimd_extxbgr_gray_convert_rvv ++#include "jcgryext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_rgb_gray_convert_rvv ++ ++#define RGB_RED EXT_XRGB_RED ++#define RGB_GREEN EXT_XRGB_GREEN ++#define RGB_BLUE EXT_XRGB_BLUE ++#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE ++#define jsimd_rgb_gray_convert_rvv jsimd_extxrgb_gray_convert_rvv ++#include "jcgryext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_rgb_gray_convert_rvv +diff --git a/simd/rvv/jcgryext-rvv.c b/simd/rvv/jcgryext-rvv.c +new file mode 100644 +index 0000000..a84fb74 +--- /dev/null ++++ b/simd/rvv/jcgryext-rvv.c +@@ -0,0 +1,104 @@ ++/* ++ * Risc-V vector extension optimizations for libjpeg-turbo ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++/* This file is included by jcgray-rvv.c */ ++ ++/* RGB -> Grayscale conversion is defined by the following equation: ++ * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B ++ * ++ * Avoid floating point arithmetic by using shifted integer constants: ++ * 0.29899597 = 19595 * 2^-16 ++ * 0.58700561 = 38470 * 2^-16 ++ * 0.11399841 = 7471 * 2^-16 ++ * These constants are defined in jcgray-rvv.c ++ * ++ * This is the same computation as the RGB -> Y portion of RGB -> YCbCr. ++ */ ++ ++#define JPEG_INTERNALS ++ ++void jsimd_rgb_gray_convert_rvv(JDIMENSION img_width, JSAMPARRAY input_buf, ++ JSAMPIMAGE output_buf, JDIMENSION output_row, ++ int num_rows) ++{ ++ int pixels_remaining; ++ size_t vl; ++ ptrdiff_t bstride; ++ JSAMPROW inptr, outptr; ++ ++ vuint16m4_t r, g, b, y; ++ vuint32m8_t accum; ++#if BITS_IN_JSAMPLE == 8 ++ vuint8m2_t dest, src; ++#endif ++ ++ bstride = RGB_PIXELSIZE * sizeof(JSAMPLE); ++ ++ while (--num_rows >= 0) ++ { ++ inptr = *input_buf++; ++ outptr = output_buf[0][output_row++]; ++ ++ for (pixels_remaining = img_width; pixels_remaining > 0; pixels_remaining -= vl) ++ { ++ /* Set vl for each iteration. */ ++ vl = __riscv_vsetvl_e16m4(pixels_remaining); ++ ++ /* Load R, G, B channels as vectors from inptr. */ ++#if BITS_IN_JSAMPLE == 8 ++ /* Extending to vuint16m4_t type for following multiply calculation. */ ++ src = __riscv_vlse8_v_u8m2(inptr + RGB_RED, bstride, vl); ++ r = __riscv_vwcvtu_x_x_v_u16m4(src, vl); ++ src = __riscv_vlse8_v_u8m2(inptr + RGB_GREEN, bstride, vl); ++ g = __riscv_vwcvtu_x_x_v_u16m4(src, vl); ++ src = __riscv_vlse8_v_u8m2(inptr + RGB_BLUE, bstride, vl); ++ b = __riscv_vwcvtu_x_x_v_u16m4(src, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ r = __riscv_vlse16_v_u16m4(inptr + RGB_RED, bstride, vl); ++ g = __riscv_vlse16_v_u16m4(inptr + RGB_GREEN, bstride, vl); ++ b = __riscv_vlse16_v_u16m4(inptr + RGB_BLUE, bstride, vl); ++#endif ++ ++ /* Calculate Y values */ ++ accum = __riscv_vwmulu_vx_u32m8(r, F_0_299, vl); ++ accum = __riscv_vwmaccu_vx_u32m8(accum, F_0_587, g, vl); ++ accum = __riscv_vwmaccu_vx_u32m8(accum, F_0_114, b, vl); ++ ++ // accum = __riscv_vssrl_vx_u32m8(accum, SCALEBITS, vl); ++ // y = __riscv_vncvt_x_x_w_u16m4(accum, vl); ++ // accum = __riscv_vadd_vx_u32m8(accum, ONE_HALF, vl); ++ // y = __riscv_vnsrl_wx_u16m4(accum, SCALEBITS, vl); ++ y = __riscv_vnclipu_wx_u16m4(accum, SCALEBITS, vl); ++ ++#if BITS_IN_JSAMPLE == 8 ++ dest = __riscv_vncvt_x_x_w_u8m2(y, vl); ++ // dest = __riscv_vnsrl_wx_u8m2(y, 0, vl); ++ __riscv_vse8_v_u8m2(outptr, dest, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ __riscv_vse16_v_u16m4(outptr, y, vl); ++#endif ++ inptr += vl * RGB_PIXELSIZE; ++ outptr += vl; ++ } ++ } ++} +\ No newline at end of file +diff --git a/simd/rvv/jcsample-rvv.c b/simd/rvv/jcsample-rvv.c +new file mode 100644 +index 0000000..2f8d087 +--- /dev/null ++++ b/simd/rvv/jcsample-rvv.c +@@ -0,0 +1,149 @@ ++/* ++ * Risc-V vector extension optimizations for libjpeg-turbo ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++/* ++ * Downsample pixel values of a single component. ++ * This version handles the common case of 2:1 horizontal and 1:1 vertical, ++ * without smoothing. ++ * ++ * A note about the "bias" calculations: when rounding fractional values to ++ * integer, we do not want to always round 0.5 up to the next integer. ++ * If we did that, we'd introduce a noticeable bias towards larger values. ++ * Instead, this code is arranged so that 0.5 will be rounded up or down at ++ * alternate pixel locations (a simple ordered dither pattern). ++ */ ++ ++#define JPEG_INTERNALS ++#include "jsimd_rvv.h" ++#include "jcsample.h" ++ ++void jsimd_h2v1_downsample_rvv(JDIMENSION image_width, ++ int max_v_samp_factor, ++ JDIMENSION v_samp_factor, ++ JDIMENSION width_in_blocks, ++ JSAMPARRAY input_data, ++ JSAMPARRAY output_data) ++{ ++ int outrow, cols_remaining; ++ size_t vl; ++ JDIMENSION output_cols = width_in_blocks * DCTSIZE; ++ JSAMPROW inptr, outptr; ++ ++ vuint8m2_t this_u8, next_u8, out; ++ vuint16m4_t this_u16, next_u16, bias; ++ ++ /* in_width = out_width * 2 */ ++ vl = __riscv_vsetvl_e16m4(output_cols * 2); ++ /* bias = 0, 1, 0, 1, 0, 1 ... */ ++ bias = __riscv_vid_v_u16m4(vl); ++ // bias = __riscv_vremu_vx_u16m4(bias, 2, vl);//余可以用and做0xFC ++ bias = __riscv_vand_vx_u16m4(bias, 0x01, vl); ++ ++ expand_right_edge_rvv(input_data, max_v_samp_factor, image_width, output_cols * 2); ++ ++ for (outrow = 0; outrow < v_samp_factor; outrow++) ++ { ++ outptr = output_data[outrow]; ++ inptr = input_data[outrow]; ++ ++ for (cols_remaining = output_cols; cols_remaining > 0; cols_remaining -= vl) ++ { ++ /* Set vl for each iteration. */ ++ vl = __riscv_vsetvl_e16m4(cols_remaining * 2); ++ ++ /* Load samples and the adjacent ones. */ ++ this_u8 = __riscv_vlse8_v_u8m2(inptr, 2 * sizeof(JSAMPLE), vl); ++ next_u8 = __riscv_vlse8_v_u8m2(inptr + 1, 2 * sizeof(JSAMPLE), vl); ++ ++ /* Widen to vuint16m4_t type. */ ++ this_u16 = __riscv_vwcvtu_x_x_v_u16m4(this_u8, vl); ++ next_u16 = __riscv_vwcvtu_x_x_v_u16m4(next_u8, vl); ++ ++ /* Add adjacent pixel values and add bias. */ ++ this_u16 = __riscv_vadd_vv_u16m4(this_u16, next_u16, vl); ++ this_u16 = __riscv_vadd_vv_u16m4(this_u16, bias, vl); ++ ++ /* Divide total by 2, narrow to 8-bit, and store. */ ++ out = __riscv_vnsrl_wx_u8m2(this_u16, 1, vl); ++ __riscv_vse8_v_u8m2(outptr, out, vl); ++ ++ inptr += vl * 2; ++ outptr += vl; ++ } ++ } ++} ++ ++void jsimd_h2v2_downsample_rvv(JDIMENSION image_width, int max_v_samp_factor, ++ JDIMENSION v_samp_factor, ++ JDIMENSION width_in_blocks, ++ JSAMPARRAY input_data, JSAMPARRAY output_data) ++{ ++ int inrow, outrow, cols_remaining; ++ size_t vl; ++ JDIMENSION output_cols = width_in_blocks * DCTSIZE; ++ JSAMPROW inptr0, inptr1, outptr; ++ ++ vuint8m2_t this0_u8m2, next0_u8m2, this1_u8m2, next1_u8m2, out_u8m2; ++ vuint16m4_t bias, out_u16m4, tmp_u16m4; ++ ++ vl = __riscv_vsetvl_e16m4(output_cols * 2); ++ /* bias = 1, 2, 1, 2, 1, 2 ... */ ++ bias = __riscv_vid_v_u16m4(vl); ++ bias = __riscv_vremu_vx_u16m4(bias, 2, vl); ++ bias = __riscv_vadd_vx_u16m4(bias, 1, vl); ++ ++ expand_right_edge_rvv(input_data, max_v_samp_factor, image_width, output_cols * 2); ++ ++ for (inrow = 0, outrow = 0; outrow < v_samp_factor; inrow += 2, outrow++) ++ { ++ inptr0 = input_data[inrow]; ++ inptr1 = input_data[inrow + 1]; ++ outptr = output_data[outrow]; ++ ++ for (cols_remaining = output_cols; cols_remaining > 0; cols_remaining -= vl) ++ { ++ /* Set vl for each iteration. */ ++ vl = __riscv_vsetvl_e16m4(cols_remaining * 2); ++ ++ /* Load samples and the adjacent ones of two rows. */ ++ this0_u8m2 = __riscv_vlse8_v_u8m2(inptr0, 2 * sizeof(JSAMPLE), vl); ++ next0_u8m2 = __riscv_vlse8_v_u8m2(inptr0 + 1, 2 * sizeof(JSAMPLE), vl); ++ this1_u8m2 = __riscv_vlse8_v_u8m2(inptr1, 2 * sizeof(JSAMPLE), vl); ++ next1_u8m2 = __riscv_vlse8_v_u8m2(inptr1 + 1, 2 * sizeof(JSAMPLE), vl); ++ ++ /* Add adjacent pixel values and add bias. */ ++ out_u16m4 = __riscv_vwaddu_vv_u16m4(this0_u8m2, next0_u8m2, vl); ++ tmp_u16m4 = __riscv_vwaddu_vv_u16m4(this1_u8m2, next1_u8m2, vl); ++ out_u16m4 = __riscv_vadd_vv_u16m4(out_u16m4, tmp_u16m4, vl); ++ out_u16m4 = __riscv_vadd_vv_u16m4(out_u16m4, bias, vl); ++ ++ /* Divide total by 4, narrow to 8-bit, and store. */ ++ out_u8m2 = __riscv_vnsrl_wx_u8m2(out_u16m4, 2, vl); ++ __riscv_vse8_v_u8m2(outptr, out_u8m2, vl); ++ ++ inptr0 += vl * 2; ++ inptr1 += vl * 2; ++ outptr += vl; ++ } ++ } ++} +diff --git a/simd/rvv/jcsample.h b/simd/rvv/jcsample.h +new file mode 100644 +index 0000000..fb74915 +--- /dev/null ++++ b/simd/rvv/jcsample.h +@@ -0,0 +1,59 @@ ++/* ++ * Risc-V vector extension optimizations for libjpeg-turbo ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++/* ++ * Expand a component horizontally from width input_cols to width output_cols, ++ * by duplicating the rightmost samples. ++ */ ++ ++LOCAL(void) ++expand_right_edge_rvv(JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols, ++ JDIMENSION output_cols) ++{ ++ register JSAMPROW ptr; ++ register JSAMPLE pixval; ++ register int count; ++ int row; ++ int numcols = (int)(output_cols - input_cols); ++ size_t vl; ++ ++ vuint8m1_t pad; ++ ++ if (numcols > 0) ++ { ++ for (row = 0; row < num_rows; row++) ++ { ++ ptr = image_data[row] + input_cols; ++ pixval = ptr[-1]; ++ for (count = numcols; count > 0; count -= vl, ptr += vl) ++ { ++ /* Set vl for each iteration. */ ++ vl = __riscv_vsetvl_e8m1(count); ++ ++ /* Copy last value to padding */ ++ pad = __riscv_vmv_v_x_u8m1(ptr[-1], vl); ++ __riscv_vse8_v_u8m1(ptr, pad, vl); ++ } ++ } ++ } ++} +diff --git a/simd/rvv/jdcolext-rvv.c b/simd/rvv/jdcolext-rvv.c +new file mode 100644 +index 0000000..119d572 +--- /dev/null ++++ b/simd/rvv/jdcolext-rvv.c +@@ -0,0 +1,170 @@ ++/* ++ * Risc-V vector extension optimizations for libjpeg-turbo ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++/* This file is included by jccolor-rvv.c */ ++ ++/* YCbCr -> RGB conversion is defined by the following equations: ++ * R = Y + 1.40200 * (Cr - 128) ++ * G = Y - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128) ++ * B = Y + 1.77200 * (Cb - 128) ++ * ++ * Scaled integer constants are used to avoid floating-point arithmetic: ++ * 0.3441467 = 11277 * 2^-15 ++ * 0.7141418 = 23401 * 2^-15 ++ * 1.4020386 = 22971 * 2^-14 ++ * 1.7720337 = 29033 * 2^-14 ++ * These constants are defined in jdcolor-neon.c. ++ * ++ * To ensure correct results, rounding is used when descaling. ++ */ ++ ++#define JPEG_INTERNALS ++ ++ ++void jsimd_ycc_rgb_convert_rvv(JDIMENSION output_width, JSAMPIMAGE input_buf, ++ JDIMENSION input_row, JSAMPARRAY output_buf, ++ int num_rows) ++{ ++ int pixels_remaining; ++ size_t vl; ++ ptrdiff_t bstride; ++ JSAMPROW outptr, inptr0, inptr1, inptr2; ++ ++ vbool4_t mask; ++ vuint16m4_t r_u16, g_u16, b_u16, y_u16, cb_u16, cr_u16; ++ vint16m4_t r_i16, g_i16, b_i16, y_i16, cb_i16, cr_i16, tmp_i16; ++ vint32m8_t tmp_i32; ++ ++#if BITS_IN_JSAMPLE == 8 ++ vuint8m2_t src; ++ vint8m2_t dest; ++#endif ++ /* Constant alpha value. */ ++#if RGB_PIXELSIZE == 4 ++ pixels_remaining = output_width; ++ vl = __riscv_vsetvl_e16m4(pixels_remaining); ++#if BITS_IN_JSAMPLE == 8 ++ vuint8m2_t alpha_v = __riscv_vmv_v_x_u8m2(0xFF, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ vuint16m4_t alpha_v = __riscv_vmv_v_x_u16m4(0xFF, vl); ++#endif ++#endif ++ ++ for (; num_rows > 0; --num_rows) ++ { ++ inptr0 = input_buf[0][input_row]; ++ inptr1 = input_buf[1][input_row]; ++ inptr2 = input_buf[2][input_row++]; ++ outptr = *output_buf++; ++ ++ for (pixels_remaining = output_width; pixels_remaining > 0; pixels_remaining -= vl) ++ { ++ /* Set vl for each iteration. */ ++ vl = __riscv_vsetvl_e16m4(pixels_remaining); ++ bstride = RGB_PIXELSIZE * sizeof(JSAMPLE); ++ ++ /* Load R, G, B channels as vectors from inptr. */ ++#if BITS_IN_JSAMPLE == 8 ++ /* Extending to vuint16m4_t type for following multiply calculation. */ ++ /* Can not load as signed which extends the highest bit during vwcvt ++ 11111111 becomes 11111111 11111111 rather than 00000000 11111111 */ ++ src = __riscv_vle8_v_u8m2(inptr0, vl); ++ y_u16 = __riscv_vwcvtu_x_x_v_u16m4(src, vl); ++ src = __riscv_vle8_v_u8m2(inptr1, vl); ++ cb_u16 = __riscv_vwcvtu_x_x_v_u16m4(src, vl); ++ src = __riscv_vle8_v_u8m2(inptr2, vl); ++ cr_u16 = __riscv_vwcvtu_x_x_v_u16m4(src, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ y_u16 = __riscv_vle16_v_u16m4(inptr0, vl); ++ cb_u16 = __riscv_vle16_v_u16m4(inptr1, vl); ++ cr_u16 = __riscv_vle16_v_u16m4(inptr2, vl); ++#endif ++ ++ y_i16 = __riscv_vreinterpret_v_u16m4_i16m4(y_u16); ++ cb_i16 = __riscv_vreinterpret_v_u16m4_i16m4(cb_u16); ++ cr_i16 = __riscv_vreinterpret_v_u16m4_i16m4(cr_u16); ++ ++ cb_i16 = __riscv_vsub_vx_i16m4(cb_i16, CENTERJSAMPLE, vl); ++ cr_i16 = __riscv_vsub_vx_i16m4(cr_i16, CENTERJSAMPLE, vl); ++ ++ /* Calculate R values */ ++ tmp_i32 = __riscv_vwmul_vx_i32m8(cr_i16, F_1_402, vl); ++ tmp_i32 = __riscv_vadd_vx_i32m8(tmp_i32, ONE_SL_13, vl); ++ tmp_i16 = __riscv_vnsra_wx_i16m4(tmp_i32, 14, vl); ++ r_i16 = __riscv_vadd_vv_i16m4(y_i16, tmp_i16, vl); ++ ++ CLIP(r_i16, i16m4); ++ ++#if BITS_IN_JSAMPLE == 8 ++ dest = __riscv_vncvt_x_x_w_i8m2(r_i16, vl); ++ __riscv_vsse8_v_i8m2(outptr + RGB_RED, bstride, dest, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ __riscv_vsse16_v_i16m4(outptr + RGB_RED, bstride, r_i16, vl); ++#endif ++ ++ /* Calculate G values */ ++ tmp_i32 = __riscv_vwmul_vx_i32m8(cb_i16, -F_0_344, vl); ++ tmp_i32 = __riscv_vwmacc_vx_i32m8(tmp_i32, -F_0_714, cr_i16, vl); ++ tmp_i32 = __riscv_vadd_vx_i32m8(tmp_i32, ONE_SL_14, vl); ++ tmp_i16 = __riscv_vnsra_wx_i16m4(tmp_i32, 15, vl); ++ g_i16 = __riscv_vadd_vv_i16m4(y_i16, tmp_i16, vl); ++ ++ CLIP(g_i16, i16m4); ++ ++#if BITS_IN_JSAMPLE == 8 ++ dest = __riscv_vncvt_x_x_w_i8m2(g_i16, vl); ++ __riscv_vsse8_v_i8m2(outptr + RGB_GREEN, bstride, dest, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ __riscv_vsse16_v_i16m4(outptr + RGB_GREEN, bstride, g_i16, vl); ++#endif ++ ++ /* Calculate B values */ ++ tmp_i32 = __riscv_vwmul_vx_i32m8(cb_i16, F_1_772, vl); ++ tmp_i32 = __riscv_vadd_vx_i32m8(tmp_i32, ONE_SL_13, vl); ++ tmp_i16 = __riscv_vnsra_wx_i16m4(tmp_i32, 14, vl); ++ b_i16 = __riscv_vadd_vv_i16m4(y_i16, tmp_i16, vl); ++ ++ CLIP(b_i16, i16m4); ++ ++#if BITS_IN_JSAMPLE == 8 ++ dest = __riscv_vncvt_x_x_w_i8m2(b_i16, vl); ++ __riscv_vsse8_v_i8m2(outptr + RGB_BLUE, bstride, dest, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ __riscv_vsse16_v_i16m4(outptr + RGB_BLUE, bstride, b_i16, vl); ++#endif ++ ++ /* Store alpha channel values. */ ++#if RGB_PIXELSIZE == 4 ++#if BITS_IN_JSAMPLE == 8 ++ __riscv_vsse8_v_u8m2(outptr + RGB_ALPHA, bstride, alpha_v, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ __riscv_vsse16_v_u16m4(outptr + RGB_ALPHA, bstride, alpha_v, vl); ++#endif ++#endif ++ outptr += vl * RGB_PIXELSIZE; ++ inptr0 += vl; ++ inptr1 += vl; ++ inptr2 += vl; ++ } ++ } ++} +\ No newline at end of file +diff --git a/simd/rvv/jdcolor-rvv.c b/simd/rvv/jdcolor-rvv.c +new file mode 100644 +index 0000000..b3496bb +--- /dev/null ++++ b/simd/rvv/jdcolor-rvv.c +@@ -0,0 +1,122 @@ ++/* ++ * Risc-V vector extension optimizations for libjpeg-turbo ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++/* YCbCr --> RGB CONVERSION */ ++ ++#define JPEG_INTERNALS ++#include "jsimd_rvv.h" ++ ++/* YCbCr --> RGB conversion constants */ ++#define F_0_344 11277 /* 0.3441467 = 11277 * 2^-15 */ ++#define F_0_714 23401 /* 0.7141418 = 23401 * 2^-15 */ ++#define F_1_402 22971 /* 1.4020386 = 22971 * 2^-14 */ ++#define F_1_772 29033 /* 1.7720337 = 29033 * 2^-14 */ ++ ++#define ONE_SL_13 1 << 13 ++#define ONE_SL_14 1 << 14 ++ ++#include "jdcolext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++ ++#define RGB_RED EXT_RGB_RED ++#define RGB_GREEN EXT_RGB_GREEN ++#define RGB_BLUE EXT_RGB_BLUE ++#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE ++#define jsimd_ycc_rgb_convert_rvv jsimd_ycc_extrgb_convert_rvv ++#include "jdcolext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_ycc_rgb_convert_rvv ++ ++#define RGB_RED EXT_RGBX_RED ++#define RGB_GREEN EXT_RGBX_GREEN ++#define RGB_BLUE EXT_RGBX_BLUE ++#define RGB_ALPHA 3 ++#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE ++#define jsimd_ycc_rgb_convert_rvv jsimd_ycc_extrgbx_convert_rvv ++#include "jdcolext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_ALPHA ++#undef RGB_PIXELSIZE ++#undef jsimd_ycc_rgb_convert_rvv ++ ++#define RGB_RED EXT_BGR_RED ++#define RGB_GREEN EXT_BGR_GREEN ++#define RGB_BLUE EXT_BGR_BLUE ++#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE ++#define jsimd_ycc_rgb_convert_rvv jsimd_ycc_extbgr_convert_rvv ++#include "jdcolext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_ycc_rgb_convert_rvv ++ ++#define RGB_RED EXT_BGRX_RED ++#define RGB_GREEN EXT_BGRX_GREEN ++#define RGB_BLUE EXT_BGRX_BLUE ++#define RGB_ALPHA 3 ++#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE ++#define jsimd_ycc_rgb_convert_rvv jsimd_ycc_extbgrx_convert_rvv ++#include "jdcolext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_ALPHA ++#undef RGB_PIXELSIZE ++#undef jsimd_ycc_rgb_convert_rvv ++ ++#define RGB_RED EXT_XBGR_RED ++#define RGB_GREEN EXT_XBGR_GREEN ++#define RGB_BLUE EXT_XBGR_BLUE ++#define RGB_ALPHA 0 ++#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE ++#define jsimd_ycc_rgb_convert_rvv jsimd_ycc_extxbgr_convert_rvv ++#include "jdcolext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_ALPHA ++#undef RGB_PIXELSIZE ++#undef jsimd_ycc_rgb_convert_rvv ++ ++#define RGB_RED EXT_XRGB_RED ++#define RGB_GREEN EXT_XRGB_GREEN ++#define RGB_BLUE EXT_XRGB_BLUE ++#define RGB_ALPHA 0 ++#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE ++#define jsimd_ycc_rgb_convert_rvv jsimd_ycc_extxrgb_convert_rvv ++#include "jdcolext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_ALPHA ++#undef RGB_PIXELSIZE ++#undef jsimd_ycc_rgb_convert_rvv +diff --git a/simd/rvv/jdmerge-rvv.c b/simd/rvv/jdmerge-rvv.c +new file mode 100644 +index 0000000..250bad4 +--- /dev/null ++++ b/simd/rvv/jdmerge-rvv.c +@@ -0,0 +1,136 @@ ++/* ++ * Risc-V vector extension optimizations for libjpeg-turbo ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++/* These routines combine simple (non-fancy, i.e. non-smooth) h2v1 or h2v2 ++ * chroma upsampling and YCbCr -> RGB color conversion into a single function. ++ */ ++ ++#define JPEG_INTERNALS ++#include "jsimd_rvv.h" ++ ++/* YCbCr --> RGB conversion constants */ ++#define F_0_344 11277 /* 0.3441467 = 11277 * 2^-15 */ ++#define F_0_714 23401 /* 0.7141418 = 23401 * 2^-15 */ ++#define F_1_402 22971 /* 1.4020386 = 22971 * 2^-14 */ ++#define F_1_772 29033 /* 1.7720337 = 29033 * 2^-14 */ ++ ++#define ONE_SL_13 1 << 13 ++#define ONE_SL_14 1 << 14 ++ ++#include "jdmrgext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++ ++#define RGB_RED EXT_RGB_RED ++#define RGB_GREEN EXT_RGB_GREEN ++#define RGB_BLUE EXT_RGB_BLUE ++#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE ++#define jsimd_h2v1_merged_upsample_rvv jsimd_h2v1_extrgb_merged_upsample_rvv ++#define jsimd_h2v2_merged_upsample_rvv jsimd_h2v2_extrgb_merged_upsample_rvv ++#include "jdmrgext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_h2v1_merged_upsample_rvv ++#undef jsimd_h2v2_merged_upsample_rvv ++ ++#define RGB_RED EXT_RGBX_RED ++#define RGB_GREEN EXT_RGBX_GREEN ++#define RGB_BLUE EXT_RGBX_BLUE ++#define RGB_ALPHA 3 ++#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE ++#define jsimd_h2v1_merged_upsample_rvv jsimd_h2v1_extrgbx_merged_upsample_rvv ++#define jsimd_h2v2_merged_upsample_rvv jsimd_h2v2_extrgbx_merged_upsample_rvv ++#include "jdmrgext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_ALPHA ++#undef RGB_PIXELSIZE ++#undef jsimd_h2v1_merged_upsample_rvv ++#undef jsimd_h2v2_merged_upsample_rvv ++ ++#define RGB_RED EXT_BGR_RED ++#define RGB_GREEN EXT_BGR_GREEN ++#define RGB_BLUE EXT_BGR_BLUE ++#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE ++#define jsimd_h2v1_merged_upsample_rvv jsimd_h2v1_extbgr_merged_upsample_rvv ++#define jsimd_h2v2_merged_upsample_rvv jsimd_h2v2_extbgr_merged_upsample_rvv ++#include "jdmrgext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_PIXELSIZE ++#undef jsimd_h2v1_merged_upsample_rvv ++#undef jsimd_h2v2_merged_upsample_rvv ++ ++#define RGB_RED EXT_BGRX_RED ++#define RGB_GREEN EXT_BGRX_GREEN ++#define RGB_BLUE EXT_BGRX_BLUE ++#define RGB_ALPHA 3 ++#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE ++#define jsimd_h2v1_merged_upsample_rvv jsimd_h2v1_extbgrx_merged_upsample_rvv ++#define jsimd_h2v2_merged_upsample_rvv jsimd_h2v2_extbgrx_merged_upsample_rvv ++#include "jdmrgext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_ALPHA ++#undef RGB_PIXELSIZE ++#undef jsimd_h2v1_merged_upsample_rvv ++#undef jsimd_h2v2_merged_upsample_rvv ++ ++#define RGB_RED EXT_XBGR_RED ++#define RGB_GREEN EXT_XBGR_GREEN ++#define RGB_BLUE EXT_XBGR_BLUE ++#define RGB_ALPHA 0 ++#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE ++#define jsimd_h2v1_merged_upsample_rvv jsimd_h2v1_extxbgr_merged_upsample_rvv ++#define jsimd_h2v2_merged_upsample_rvv jsimd_h2v2_extxbgr_merged_upsample_rvv ++#include "jdmrgext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_ALPHA ++#undef RGB_PIXELSIZE ++#undef jsimd_h2v1_merged_upsample_rvv ++#undef jsimd_h2v2_merged_upsample_rvv ++ ++#define RGB_RED EXT_XRGB_RED ++#define RGB_GREEN EXT_XRGB_GREEN ++#define RGB_BLUE EXT_XRGB_BLUE ++#define RGB_ALPHA 0 ++#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE ++#define jsimd_h2v1_merged_upsample_rvv jsimd_h2v1_extxrgb_merged_upsample_rvv ++#define jsimd_h2v2_merged_upsample_rvv jsimd_h2v2_extxrgb_merged_upsample_rvv ++#include "jdmrgext-rvv.c" ++#undef RGB_RED ++#undef RGB_GREEN ++#undef RGB_BLUE ++#undef RGB_ALPHA ++#undef RGB_PIXELSIZE ++#undef jsimd_h2v1_merged_upsample_rvv ++#undef jsimd_h2v2_merged_upsample_rvv +diff --git a/simd/rvv/jdmrgext-rvv.c b/simd/rvv/jdmrgext-rvv.c +new file mode 100644 +index 0000000..2eab756 +--- /dev/null ++++ b/simd/rvv/jdmrgext-rvv.c +@@ -0,0 +1,235 @@ ++/* ++ * Risc-V vector extension optimizations for libjpeg-turbo. ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++/* This file is included by jdmerge-rvv.c. */ ++ ++/* These routines combine simple (non-fancy, i.e. non-smooth) h2v1 or h2v2 ++ * chroma upsampling and YCbCr -> RGB color conversion into a single function. ++ * ++ * As with the standalone functions, YCbCr -> RGB conversion is defined by the ++ * following equations: ++ * R = Y + 1.40200 * (Cr - 128) ++ * G = Y - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128) ++ * B = Y + 1.77200 * (Cb - 128) ++ * ++ * Scaled integer constants are used to avoid floating-point arithmetic: ++ * 0.3441467 = 11277 * 2^-15 ++ * 0.7141418 = 23401 * 2^-15 ++ * 1.4020386 = 22971 * 2^-14 ++ * 1.7720337 = 29033 * 2^-14 ++ * These constants are defined in jdmerge-neon.c. ++ * ++ * To ensure correct results, rounding is used when descaling. ++ */ ++ ++/* Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. ++ Y0Y1CC -> RGB0 RGB1 ++ */ ++ ++#define JPEG_INTERNALS ++ ++ ++void jsimd_h2v1_merged_upsample_rvv(JDIMENSION output_width, ++ JSAMPIMAGE input_buf, ++ JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf) ++{ ++ int crcb_remaining; ++ int is_odd = output_width & 1; ++ size_t vl, vl_odd; ++ ptrdiff_t bstride; ++ JSAMPROW outptr; ++ JSAMPROW inptr0, inptr1, inptr2; ++ ++ vbool8_t mask; ++ vuint16m2_t y0_u16, y1_u16, cb_u16, cr_u16; ++ vint16m2_t y0_i16, y1_i16, cb_i16, cr_i16; ++ vint16m2_t r_sub_y, g_sub_y, b_sub_y; ++ vint16m2_t r, g, b; ++ vint32m4_t tmp_i32; ++ ++#if BITS_IN_JSAMPLE == 8 ++ vuint8m1_t src; ++ vint8m1_t dest; ++#endif ++ ++ /* Constant alpha value. */ ++#if RGB_PIXELSIZE == 4 ++ vl = __riscv_vsetvl_e16m2((output_width + 1) >> 1); ++#if BITS_IN_JSAMPLE == 8 ++ vuint8m1_t alpha_v = __riscv_vmv_v_x_u8m1(0xFF, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ vuint16m2_t alpha_v = __riscv_vmv_v_x_u16m2(0xFF, vl); ++#endif ++#endif ++ ++ inptr0 = input_buf[0][in_row_group_ctr]; ++ inptr1 = input_buf[1][in_row_group_ctr]; ++ inptr2 = input_buf[2][in_row_group_ctr]; ++ outptr = output_buf[0]; ++ ++ for (crcb_remaining = (output_width + 1) >> 1; crcb_remaining > 0; crcb_remaining -= vl) ++ { ++ /* Set vl for each iteration. */ ++ vl = __riscv_vsetvl_e16m2(crcb_remaining); ++ vl_odd = (is_odd && (vl == crcb_remaining)) ? (vl - 1) : vl; ++ bstride = RGB_PIXELSIZE * sizeof(JSAMPLE); ++ ++ /* Load R, G, B channels as vectors from inptr. */ ++#if BITS_IN_JSAMPLE == 8 ++ /* Extending to vuint16m4_t type for following multiply calculation. */ ++ /* Y component values with even-numbered indices. */ ++ src = __riscv_vlse8_v_u8m1(inptr0, 2 * sizeof(JSAMPLE), vl); ++ y0_u16 = __riscv_vwcvtu_x_x_v_u16m2(src, vl); ++ /* Y component values with odd-numbered indices. */ ++ src = __riscv_vlse8_v_u8m1(inptr0 + 1, 2 * sizeof(JSAMPLE), vl_odd); ++ y1_u16 = __riscv_vwcvtu_x_x_v_u16m2(src, vl_odd); ++ ++ src = __riscv_vle8_v_u8m1(inptr1, vl); ++ cb_u16 = __riscv_vwcvtu_x_x_v_u16m2(src, vl); ++ src = __riscv_vle8_v_u8m1(inptr2, vl); ++ cr_u16 = __riscv_vwcvtu_x_x_v_u16m2(src, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ y0_u16 = __riscv_vlse16_v_u16m2(inptr0, 2 * sizeof(JSAMPLE), vl); ++ y1_u16 = __riscv_vlse16_v_u16m2(inptr0 + 1, 2 * sizeof(JSAMPLE), vl_odd); ++ cb_u16 = __riscv_vle16_v_u16m2(inptr1, vl); ++ cr_u16 = __riscv_vle16_v_u16m2(inptr2, vl); ++#endif ++ ++ y0_i16 = __riscv_vreinterpret_v_u16m2_i16m2(y0_u16); ++ y1_i16 = __riscv_vreinterpret_v_u16m2_i16m2(y1_u16); ++ cb_i16 = __riscv_vreinterpret_v_u16m2_i16m2(cb_u16); ++ cr_i16 = __riscv_vreinterpret_v_u16m2_i16m2(cr_u16); ++ ++ cb_i16 = __riscv_vsub_vx_i16m2(cb_i16, CENTERJSAMPLE, vl); ++ cr_i16 = __riscv_vsub_vx_i16m2(cr_i16, CENTERJSAMPLE, vl); ++ ++ /* Calculate R-Y values */ ++ tmp_i32 = __riscv_vwmul_vx_i32m4(cr_i16, F_1_402, vl); ++ tmp_i32 = __riscv_vadd_vx_i32m4(tmp_i32, ONE_SL_13, vl); ++ r_sub_y = __riscv_vnsra_wx_i16m2(tmp_i32, 14, vl); ++ ++ /* Calculate G-Y values */ ++ tmp_i32 = __riscv_vwmul_vx_i32m4(cb_i16, -F_0_344, vl); ++ tmp_i32 = __riscv_vwmacc_vx_i32m4(tmp_i32, -F_0_714, cr_i16, vl); ++ tmp_i32 = __riscv_vadd_vx_i32m4(tmp_i32, ONE_SL_14, vl); ++ g_sub_y = __riscv_vnsra_wx_i16m2(tmp_i32, 15, vl); ++ ++ /* Calculate B-Y values */ ++ tmp_i32 = __riscv_vwmul_vx_i32m4(cb_i16, F_1_772, vl); ++ tmp_i32 = __riscv_vadd_vx_i32m4(tmp_i32, ONE_SL_13, vl); ++ b_sub_y = __riscv_vnsra_wx_i16m2(tmp_i32, 14, vl); ++ ++ /* Compute R, G, B values with even-numbered indices. */ ++ r = __riscv_vadd_vv_i16m2(r_sub_y, y0_i16, vl); ++ CLIP(r, i16m2); ++ ++ g = __riscv_vadd_vv_i16m2(g_sub_y, y0_i16, vl); ++ CLIP(g, i16m2); ++ ++ b = __riscv_vadd_vv_i16m2(b_sub_y, y0_i16, vl); ++ CLIP(b, i16m2); ++ ++ /* Narrow to 8-bit and store to memory. */ ++#if BITS_IN_JSAMPLE == 8 ++ dest = __riscv_vncvt_x_x_w_i8m1(r, vl); ++ __riscv_vsse8_v_i8m1(outptr + RGB_RED, 2 * bstride, dest, vl); ++ dest = __riscv_vncvt_x_x_w_i8m1(g, vl); ++ __riscv_vsse8_v_i8m1(outptr + RGB_GREEN, 2 * bstride, dest, vl); ++ dest = __riscv_vncvt_x_x_w_i8m1(b, vl); ++ __riscv_vsse8_v_i8m1(outptr + RGB_BLUE, 2 * bstride, dest, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ __riscv_vsse16_v_u16m2(outptr + RGB_RED, 2 * bstride, r, vl); ++ __riscv_vsse16_v_u16m2(outptr + RGB_GREEN, 2 * bstride, g, vl); ++ __riscv_vsse16_v_u16m2(outptr + RGB_BLUE, 2 * bstride, b, vl); ++#endif ++ ++ /* Deal with alpha channel. */ ++#if RGB_PIXELSIZE == 4 ++#if BITS_IN_JSAMPLE == 8 ++ __riscv_vsse8_v_u8m1(outptr + RGB_ALPHA, 2 * bstride, alpha_v, vl); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ __riscv_vsse16_v_u16m2(outptr + RGB_ALPHA, 2 * bstride, alpha_v, vl); ++#endif ++#endif ++ ++ /* Compute R, G, B values with odd-numbered indices. */ ++ r = __riscv_vadd_vv_i16m2(r_sub_y, y1_i16, vl_odd); ++ CLIP(r, i16m2); ++ ++ g = __riscv_vadd_vv_i16m2(g_sub_y, y1_i16, vl_odd); ++ CLIP(g, i16m2); ++ ++ b = __riscv_vadd_vv_i16m2(b_sub_y, y1_i16, vl_odd); ++ CLIP(b, i16m2); ++ ++ /* Narrow to 8-bit and store to memory. */ ++#if BITS_IN_JSAMPLE == 8 ++ dest = __riscv_vncvt_x_x_w_i8m1(r, vl_odd); ++ __riscv_vsse8_v_i8m1(outptr + RGB_PIXELSIZE + RGB_RED, 2 * bstride, dest, vl_odd); ++ dest = __riscv_vncvt_x_x_w_i8m1(g, vl_odd); ++ __riscv_vsse8_v_i8m1(outptr + RGB_PIXELSIZE + RGB_GREEN, 2 * bstride, dest, vl_odd); ++ dest = __riscv_vncvt_x_x_w_i8m1(b, vl_odd); ++ __riscv_vsse8_v_i8m1(outptr + RGB_PIXELSIZE + RGB_BLUE, 2 * bstride, dest, vl_odd); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ __riscv_vsse16_v_i16m2(outptr + RGB_PIXELSIZE + RGB_RED, 2 * bstride, r, vl_odd); ++ __riscv_vsse16_v_i16m2(outptr + RGB_PIXELSIZE + RGB_GREEN, 2 * bstride, g, vl_odd); ++ __riscv_vsse16_v_i16m2(outptr + RGB_PIXELSIZE + RGB_BLUE, 2 * bstride, b, vl_odd); ++#endif ++ /* Deal with alpha channel. */ ++#if RGB_PIXELSIZE == 4 ++#if BITS_IN_JSAMPLE == 8 ++ __riscv_vsse8_v_u8m1(outptr + RGB_PIXELSIZE + RGB_ALPHA, 2 * bstride, alpha_v, vl_odd); ++#else /* BITS_IN_JSAMPLE == 12 */ ++ __riscv_vsse16_v_u16m2(outptr + RGB_PIXELSIZE + RGB_ALPHA, 2 * bstride, alpha_v, vl_odd); ++#endif ++#endif ++ inptr0 += 2 * vl; ++ inptr1 += vl; ++ inptr2 += vl; ++ outptr += 2 * vl * RGB_PIXELSIZE; ++ } ++} ++ ++void jsimd_h2v2_merged_upsample_rvv(JDIMENSION output_width, ++ JSAMPIMAGE input_buf, ++ JDIMENSION in_row_group_ctr, ++ JSAMPARRAY output_buf) ++{ ++ JSAMPROW inptr, outptr; ++ ++ inptr = input_buf[0][in_row_group_ctr]; ++ outptr = output_buf[0]; ++ ++ input_buf[0][in_row_group_ctr] = input_buf[0][in_row_group_ctr * 2]; ++ jsimd_h2v1_merged_upsample_rvv(output_width, input_buf, in_row_group_ctr, ++ output_buf); ++ ++ input_buf[0][in_row_group_ctr] = input_buf[0][in_row_group_ctr * 2 + 1]; ++ output_buf[0] = output_buf[1]; ++ jsimd_h2v1_merged_upsample_rvv(output_width, input_buf, in_row_group_ctr, ++ output_buf); ++ ++ input_buf[0][in_row_group_ctr] = inptr; ++ output_buf[0] = outptr; ++} +\ No newline at end of file +diff --git a/simd/rvv/jdsample-rvv.c b/simd/rvv/jdsample-rvv.c +new file mode 100644 +index 0000000..6cbc6d5 +--- /dev/null ++++ b/simd/rvv/jdsample-rvv.c +@@ -0,0 +1,460 @@ ++/* ++ * Risc-V vector extension optimizations for libjpeg-turbo ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly vl_accued as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++/* CHROMA UPSAMPLING */ ++ ++#define JPEG_INTERNALS ++#include "jsimd_rvv.h" ++ ++/* The diagram below shows a row of samples produced by h2v1 downsampling. ++ * ++ * s0 s1 s2 ++ * +---------+---------+---------+ ++ * | | | | ++ * | p0 p1 | p2 p3 | p4 p5 | ++ * | | | | ++ * +---------+---------+---------+ ++ * ++ * Samples s0-s2 were created by averaging the original pixel component values ++ * centered at positions p0-p5 above. To approximate those original pixel ++ * component values, we proportionally blend the adjacent samples in each row. ++ * ++ * An upsampled pixel component value is computed by blending the sample ++ * containing the pixel center with the nearest neighboring sample, in the ++ * ratio 3:1. For example: ++ * p1(upsampled) = 3/4 * s0 + 1/4 * s1 ++ * p2(upsampled) = 3/4 * s1 + 1/4 * s0 ++ * When computing the first and last pixel component values in the row, there ++ * is no adjacent sample to blend, so: ++ * p0(upsampled) = s0 ++ * p5(upsampled) = s2 ++ */ ++ ++void jsimd_h2v1_fancy_upsample_rvv(int max_v_samp_factor, ++ JDIMENSION downsampled_width, ++ JSAMPARRAY input_data, ++ JSAMPARRAY *output_data_ptr) ++{ ++ int s_row, s_remaining; ++ size_t vl; ++ JSAMPARRAY output_data = *output_data_ptr; ++ JSAMPROW inptr, outptr; ++ ++ vuint8m4_t s0, s1, p1_u8, p2_u8; ++ vuint16m8_t p1_u16, p2_u16; ++ ++ for (s_row = 0; s_row < max_v_samp_factor;) ++ { ++ inptr = input_data[s_row]; ++ outptr = output_data[s_row++]; ++ ++ /* First pixel component value in this row of the original image */ ++ *outptr++ = (JSAMPLE)GETJSAMPLE(*inptr); ++ ++ for (s_remaining = downsampled_width - 1; s_remaining > 0; s_remaining -= vl) ++ { ++ /* Set vl for each iteration. */ ++ vl = __riscv_vsetvl_e16m8(s_remaining); ++ ++ /* Load smaples and samples with offset 1. */ ++ s0 = __riscv_vle8_v_u8m4(inptr, vl); ++ s1 = __riscv_vle8_v_u8m4(inptr + 1, vl); ++ ++ /* p1(upsampled) = (3 * s0 + s1 + 2) / 4 */ ++ p1_u16 = __riscv_vwaddu_vx_u16m8(s1, 2, vl); /* Add bias */ ++ p1_u16 = __riscv_vwmaccu_vx_u16m8(p1_u16, 3, s0, vl); ++ ++ /* p2(upsampled) = (3 * s1 + s0 + 1) / 4 */ ++ p2_u16 = __riscv_vwaddu_vx_u16m8(s0, 1, vl); /* Add bias */ ++ p2_u16 = __riscv_vwmaccu_vx_u16m8(p2_u16, 3, s1, vl); ++ ++ /* Right-shift by 2 (divide by 4) and narrow to 8-bit. */ ++ p1_u8 = __riscv_vnsrl_wx_u8m4(p1_u16, 2, vl); ++ p2_u8 = __riscv_vnsrl_wx_u8m4(p2_u16, 2, vl); ++ ++ /* Strided store to memory. */ ++ __riscv_vsse8_v_u8m4(outptr, 2 * sizeof(JSAMPLE), p1_u8, vl); ++ __riscv_vsse8_v_u8m4(outptr + 1, 2 * sizeof(JSAMPLE), p2_u8, vl); ++ ++ inptr += vl; ++ outptr += 2 * vl; ++ } ++ ++ /* Last pixel component value in this row of the original image */ ++ *outptr = (JSAMPLE)GETJSAMPLE(*inptr); ++ } ++} ++ ++/* The diagram below shows an array of samples produced by h2v2 downsampling. ++ * ++ * s0 s1 s2 ++ * +---------+---------+---------+ ++ * | p0 p1 | p2 p3 | p4 p5 | ++ * top | | | | ++ * | p6 p7 | p8 p9 | p10 p11| s0 s1 ++ * +---------+---------+---------+ +---------+---------+ ++ * | p12 p13| p14 p15| p16 p17| | | pa | pb | | ++ * mid | | | | | - - - - | - - - - | ++ * | p18 p19| p20 p21| p22 p23| | | px | py | | ++ * +---------+---------+---------+ +---------+---------+ ++ * | p24 p25| p26 p27| p28 p29| ++ * bot | | | | ++ * | p30 p31| p32 p33| p34 p35| ++ * +---------+---------+---------+ ++ * ++ * Samples s0A-s2C were created by averaging the original pixel component ++ * values centered at positions p0-p35 above. To approximate one of those ++ * original pixel component values, we proportionally blend the sample ++ * containing the pixel center with the nearest neighboring samples in each ++ * row, column, and diagonal. ++ * ++ * An upsampled pixel component value is computed by first blending the sample ++ * containing the pixel center with the nearest neighboring samples in the ++ * same column, in the ratio 3:1, and then blending each column sum with the ++ * nearest neighboring column sum, in the ratio 3:1. For example: ++ * p14(upsampled) = 3/4 * (3/4 * s1B + 1/4 * s0B) + ++ * 1/4 * (3/4 * s1A + 1/4 * s0A) ++ * = 9/16 * s1B + 3/16 * s1A + 3/16 * s0B + 1/16 * s0A ++ * When computing the first and last pixel component values in the row, there ++ * is no horizontally adjacent sample to blend, so: ++ * p12(upsampled) = 3/4 * s0B + 1/4 * s0A ++ * p23(upsampled) = 3/4 * s2B + 1/4 * s2C ++ * When computing the first and last pixel component values in the column, ++ * there is no vertically adjacent sample to blend, so: ++ * p2(upsampled) = 3/4 * s1A + 1/4 * s0A ++ * p33(upsampled) = 3/4 * s1C + 1/4 * s2C ++ * When computing the corner pixel component values, there is no adjacent ++ * sample to blend, so: ++ * p0(upsampled) = s0A ++ * p35(upsampled) = s2C ++ * ++ */ ++ ++void jsimd_h2v2_fancy_upsample_rvv(int max_v_samp_factor, ++ JDIMENSION downsampled_width, ++ JSAMPARRAY input_data, ++ JSAMPARRAY *output_data_ptr) ++{ ++ int inrow, outrow, s_remaining; ++ int mid, p[2]; ++ size_t vl; ++ JSAMPARRAY output_data = *output_data_ptr; ++ JSAMPROW inptr_top, inptr_mid, inptr_bot, outptr0, outptr1; ++ ++ vuint8m4_t s0_top_u8, s0_mid_u8, s0_bot_u8, s1_top_u8, s1_mid_u8, s1_bot_u8; ++ vuint8m4_t pa_u8, pb_u8, px_u8, py_u8; ++ vuint16m8_t s0_3s1_mid_x3, s1_3s0_mid_x3, s0_3s1_top, s1_3s0_top, s0_3s1_bot, s1_3s0_bot; ++ vuint16m8_t pa_u16, pb_u16, px_u16, py_u16; ++ ++ for (inrow = 0, outrow = 0; outrow < max_v_samp_factor; inrow++) ++ { ++ inptr_top = input_data[inrow - 1]; ++ inptr_mid = input_data[inrow]; ++ inptr_bot = input_data[inrow + 1]; ++ outptr0 = output_data[outrow++]; ++ outptr1 = output_data[outrow++]; ++ ++ /* First pixel component value in this row of the original image */ ++ mid = GETJSAMPLE(*inptr_mid) * 3; ++ p[0] = mid + GETJSAMPLE(*inptr_top); ++ p[1] = mid + GETJSAMPLE(*inptr_bot); ++ *outptr0++ = (JSAMPLE)((p[0] + 2) >> 2); ++ *outptr1++ = (JSAMPLE)((p[1] + 2) >> 2); ++ ++ for (s_remaining = downsampled_width - 1; s_remaining > 0; s_remaining -= vl) ++ { ++ vl = __riscv_vsetvl_e16m8(s_remaining); ++ ++ /* Load smaples and samples with offset 1. */ ++ s0_top_u8 = __riscv_vle8_v_u8m4(inptr_top, vl); ++ s1_top_u8 = __riscv_vle8_v_u8m4(inptr_top + 1, vl); ++ s0_mid_u8 = __riscv_vle8_v_u8m4(inptr_mid, vl); ++ s1_mid_u8 = __riscv_vle8_v_u8m4(inptr_mid + 1, vl); ++ s0_bot_u8 = __riscv_vle8_v_u8m4(inptr_bot, vl); ++ s1_bot_u8 = __riscv_vle8_v_u8m4(inptr_bot + 1, vl); ++ ++ /* Compute pixels for output in row 0. */ ++ /* Step 1: Blend samples vertically in columns s0 and s1. */ ++ /* 3 * s0A + s1A (row -1) */ ++ s1_3s0_top = __riscv_vwmulu_vx_u16m8(s0_top_u8, 3, vl); ++ s1_3s0_top = __riscv_vwaddu_wv_u16m8(s1_3s0_top, s1_top_u8, vl); ++ /* 3 * s1A + s0A (row -1) */ ++ s0_3s1_top = __riscv_vwmulu_vx_u16m8(s1_top_u8, 3, vl); ++ s0_3s1_top = __riscv_vwaddu_wv_u16m8(s0_3s1_top, s0_top_u8, vl); ++ /* 3 * (3 * s0B + s1B) (row 0) */ ++ s1_3s0_mid_x3 = __riscv_vwmulu_vx_u16m8(s0_mid_u8, 9, vl); ++ s1_3s0_mid_x3 = __riscv_vwmaccu_vx_u16m8(s1_3s0_mid_x3, 3, s1_mid_u8, vl); ++ s1_3s0_mid_x3 = __riscv_vadd_vx_u16m8(s1_3s0_mid_x3, 7, vl); /* Add bias to pa px */ ++ ++ /* 3 * (3 * s1B + s0B) (row 0) */ ++ s0_3s1_mid_x3 = __riscv_vwmulu_vx_u16m8(s1_mid_u8, 9, vl); ++ s0_3s1_mid_x3 = __riscv_vwmaccu_vx_u16m8(s0_3s1_mid_x3, 3, s0_mid_u8, vl); ++ s0_3s1_mid_x3 = __riscv_vadd_vx_u16m8(s0_3s1_mid_x3, 8, vl); /* Add bias to pb py */ ++ ++ /* Step 2: Blend the already-blended columns. */ ++ /* p13: (3 * (3 * s0B + s1B) + (3 * s0A + s1A) + 7) / 16 */ ++ pa_u16 = __riscv_vadd_vv_u16m8(s1_3s0_mid_x3, s1_3s0_top, vl); ++ /* p14: (3 * (3 * s1B + s0B) + (3 * s1A + s0A) + 8) / 16 */ ++ pb_u16 = __riscv_vadd_vv_u16m8(s0_3s1_mid_x3, s0_3s1_top, vl); ++ ++ /* Right-shift by 4 (divide by 16), narrow to 8-bit, and combine. */ ++ pa_u8 = __riscv_vnsrl_wx_u8m4(pa_u16, 4, vl); ++ pb_u8 = __riscv_vnsrl_wx_u8m4(pb_u16, 4, vl); ++ ++ /* Strided store to memory. */ ++ __riscv_vsse8_v_u8m4(outptr0, 2 * sizeof(JSAMPLE), pa_u8, vl); ++ __riscv_vsse8_v_u8m4(outptr0 + 1, 2 * sizeof(JSAMPLE), pb_u8, vl); ++ ++ /* Compute pixels for output in row 1. */ ++ /* Step 1: Blend samples vertically in columns s0 and s1. */ ++ /* 3 * s0C + s1C (row 1) */ ++ s1_3s0_bot = __riscv_vwmulu_vx_u16m8(s0_bot_u8, 3, vl); ++ s1_3s0_bot = __riscv_vwaddu_wv_u16m8(s1_3s0_bot, s1_bot_u8, vl); ++ /* 3 * s1C + s0C (row 1) */ ++ s0_3s1_bot = __riscv_vwmulu_vx_u16m8(s1_bot_u8, 3, vl); ++ s0_3s1_bot = __riscv_vwaddu_wv_u16m8(s0_3s1_bot, s0_bot_u8, vl); ++ ++ /* Step 2: Blend the already-blended columns. */ ++ /* p19: (3 * (3 * s0B + s1B) + (3 * s0C + s1C) + 7) / 16 */ ++ px_u16 = __riscv_vadd_vv_u16m8(s1_3s0_mid_x3, s1_3s0_bot, vl); ++ /* p20: (3 * (3 * s1B + s0B) + (3 * s1C + s0C) + 8) / 16 */ ++ py_u16 = __riscv_vadd_vv_u16m8(s0_3s1_mid_x3, s0_3s1_bot, vl); ++ ++ /* Right-shift by 4 (divide by 16), narrow to 8-bit, and combine. */ ++ px_u8 = __riscv_vnsrl_wx_u8m4(px_u16, 4, vl); ++ py_u8 = __riscv_vnsrl_wx_u8m4(py_u16, 4, vl); ++ ++ /* Strided store to memory. */ ++ __riscv_vsse8_v_u8m4(outptr1, 2 * sizeof(JSAMPLE), px_u8, vl); ++ __riscv_vsse8_v_u8m4(outptr1 + 1, 2 * sizeof(JSAMPLE), py_u8, vl); ++ ++ inptr_top += vl; ++ inptr_mid += vl; ++ inptr_bot += vl; ++ outptr0 += 2 * vl; ++ outptr1 += 2 * vl; ++ } ++ ++ mid = GETJSAMPLE(*inptr_mid) * 3; ++ p[0] = mid + GETJSAMPLE(*inptr_top); ++ p[1] = mid + GETJSAMPLE(*inptr_bot); ++ *outptr0++ = (JSAMPLE)((p[0] + 1) >> 2); ++ *outptr1++ = (JSAMPLE)((p[1] + 1) >> 2); ++ } ++} ++ ++/* The diagram below shows a column of samples produced by h1v2 downsampling ++ * (or by losslessly rotating or transposing an h2v1-downsampled image.) ++ * ++ * +---------+ ++ * | p0 | ++ * sA | | ++ * | p1 | ++ * +---------+ ++ * | p2 | ++ * sB | | ++ * | p3 | ++ * +---------+ ++ * | p4 | ++ * sC | | ++ * | p5 | ++ * +---------+ ++ * ++ * Samples sA-sC were created by averaging the original pixel component values ++ * centered at positions p0-p5 above. To approximate those original pixel ++ * component values, we proportionally blend the adjacent samples in each ++ * column. ++ * ++ * An upsampled pixel component value is computed by blending the sample ++ * containing the pixel center with the nearest neighboring sample, in the ++ * ratio 3:1. For example: ++ * p1(upsampled) = 3/4 * sA + 1/4 * sB ++ * p2(upsampled) = 3/4 * sB + 1/4 * sA ++ * When computing the first and last pixel component values in the column, ++ * there is no adjacent sample to blend, so: ++ * p0(upsampled) = sA ++ * p5(upsampled) = sC ++ */ ++ ++void jsimd_h1v2_fancy_upsample_rvv(int max_v_samp_factor, ++ JDIMENSION downsampled_width, ++ JSAMPARRAY input_data, ++ JSAMPARRAY *output_data_ptr) ++{ ++ int s_row, p_row, s_remaining; ++ size_t vl; ++ JSAMPARRAY output_data = *output_data_ptr; ++ JSAMPROW inptr_top, inptr_mid, inptr_bot, outptr_top, outptr_bot; ++ ++ vuint8m4_t sA_u8, sB_u8, sC_u8, p2_u8, p3_u8; ++ vuint16m8_t p2_u16, p3_u16; ++ ++ for (s_row = 0, p_row = 0; p_row < max_v_samp_factor; ++s_row) ++ { ++ inptr_top = input_data[s_row - 1]; ++ inptr_mid = input_data[s_row]; ++ inptr_bot = input_data[s_row + 1]; ++ ++ outptr_top = output_data[p_row++]; ++ outptr_bot = output_data[p_row++]; ++ ++ for (s_remaining = downsampled_width; s_remaining > 0; s_remaining -= vl) ++ { ++ /* Set vl for each iteration. */ ++ vl = __riscv_vsetvl_e16m8(s_remaining); ++ ++ sA_u8 = __riscv_vle8_v_u8m4(inptr_top, vl); ++ sB_u8 = __riscv_vle8_v_u8m4(inptr_mid, vl); ++ sC_u8 = __riscv_vle8_v_u8m4(inptr_bot, vl); ++ ++ /* p2(upsampled) = (3 * sB + sA + 1) / 4 */ ++ p2_u16 = __riscv_vwaddu_vx_u16m8(sA_u8, 1, vl); /* Add bias */ ++ p2_u16 = __riscv_vwmaccu_vx_u16m8(p2_u16, 3, sB_u8, vl); ++ ++ p2_u8 = __riscv_vnsrl_wx_u8m4(p2_u16, 2, vl); ++ ++ /* p3(upsampled) = (3 * sB + sC + 2) / 4 */ ++ p3_u16 = __riscv_vwaddu_vx_u16m8(sC_u8, 2, vl); /* Add bias */ ++ p3_u16 = __riscv_vwmaccu_vx_u16m8(p3_u16, 3, sB_u8, vl); ++ ++ p3_u8 = __riscv_vnsrl_wx_u8m4(p3_u16, 2, vl); ++ ++ /* Strided store to memory. */ ++ __riscv_vse8_v_u8m4(outptr_top, p2_u8, vl); ++ __riscv_vse8_v_u8m4(outptr_bot, p3_u8, vl); ++ ++ inptr_top += vl; ++ inptr_mid += vl; ++ inptr_bot += vl; ++ outptr_top += vl; ++ outptr_bot += vl; ++ } ++ } ++} ++ ++/* The diagram below shows a row of samples produced by h2v1 downsampling. ++ * ++ * s0 s1 ++ * +---------+---------+ ++ * | | | ++ * | p0 p1 | p2 p3 | ++ * | | | ++ * +---------+---------+ ++ * ++ * Samples s0 and s1 were created by averaging the original pixel component ++ * values centered at positions p0-p3 above. To approximate those original ++ * pixel component values, we duplicate the samples horizontally: ++ * p0(upsampled) = p1(upsampled) = s0 ++ * p2(upsampled) = p3(upsampled) = s1 ++ */ ++ ++void jsimd_h2v1_upsample_rvv(int max_v_samp_factor, ++ JDIMENSION output_width, ++ JSAMPARRAY input_data, ++ JSAMPARRAY *output_data_ptr) ++{ ++ int s_row, cols_remaining; ++ size_t vl; ++ JSAMPARRAY output_data = *output_data_ptr; ++ JSAMPROW inptr, outptr; ++ ++ vuint8m8_t samples; ++ ++ for (s_row = 0; s_row < max_v_samp_factor;) ++ { ++ inptr = input_data[s_row]; ++ outptr = output_data[s_row++]; ++ ++ for (cols_remaining = output_width; cols_remaining > 0; cols_remaining -= 2 * vl) ++ { ++ vl = __riscv_vsetvl_e8m8((cols_remaining + 1) / 2); ++ ++ samples = __riscv_vle8_v_u8m8(inptr, vl); ++ ++ __riscv_vsse8_v_u8m8(outptr, 2 * sizeof(JSAMPLE), samples, vl); ++ __riscv_vsse8_v_u8m8(outptr + 1, 2 * sizeof(JSAMPLE), samples, vl); ++ ++ inptr += vl; ++ outptr += 2 * vl; ++ } ++ } ++} ++ ++/* The diagram below shows an array of samples produced by h2v2 downsampling. ++ * ++ * s0 s1 ++ * +---------+---------+ ++ * | p0 p1 | p2 p3 | ++ * sA | | | ++ * | p4 p5 | p6 p7 | ++ * +---------+---------+ ++ * | p8 p9 | p10 p11| ++ * sB | | | ++ * | p12 p13| p14 p15| ++ * +---------+---------+ ++ * ++ * Samples s0A-s1B were created by averaging the original pixel component ++ * values centered at positions p0-p15 above. To approximate those original ++ * pixel component values, we duplicate the samples both horizontally and ++ * vertically: ++ * p0(upsampled) = p1(upsampled) = p4(upsampled) = p5(upsampled) = s0A ++ * p2(upsampled) = p3(upsampled) = p6(upsampled) = p7(upsampled) = s1A ++ * p8(upsampled) = p9(upsampled) = p12(upsampled) = p13(upsampled) = s0B ++ * p10(upsampled) = p11(upsampled) = p14(upsampled) = p15(upsampled) = s1B ++ */ ++ ++void jsimd_h2v2_upsample_rvv(int max_v_samp_factor, ++ JDIMENSION output_width, ++ JSAMPARRAY input_data, ++ JSAMPARRAY *output_data_ptr) ++{ ++ int s_row, p_row, cols_remaining; ++ size_t vl; ++ JSAMPARRAY output_data = *output_data_ptr; ++ JSAMPROW inptr, outptr_top, outptr_bot; ++ ++ vuint8m8_t samples; ++ ++ for (s_row = 0, p_row = 0; p_row < max_v_samp_factor;) ++ { ++ inptr = input_data[s_row++]; ++ outptr_top = output_data[p_row++]; ++ outptr_bot = output_data[p_row++]; ++ ++ for (cols_remaining = output_width; cols_remaining > 0; cols_remaining -= 2 * vl) ++ { ++ vl = __riscv_vsetvl_e8m8((cols_remaining + 1) / 2); ++ ++ samples = __riscv_vle8_v_u8m8(inptr, vl); ++ ++ __riscv_vsse8_v_u8m8(outptr_top, 2 * sizeof(JSAMPLE), samples, vl); ++ __riscv_vsse8_v_u8m8(outptr_top + 1, 2 * sizeof(JSAMPLE), samples, vl); ++ __riscv_vsse8_v_u8m8(outptr_bot, 2 * sizeof(JSAMPLE), samples, vl); ++ __riscv_vsse8_v_u8m8(outptr_bot + 1, 2 * sizeof(JSAMPLE), samples, vl); ++ ++ inptr += vl; ++ outptr_top += 2 * vl; ++ outptr_bot += 2 * vl; ++ } ++ } ++} +diff --git a/simd/rvv/jfdctfst-rvv.c b/simd/rvv/jfdctfst-rvv.c +new file mode 100644 +index 0000000..88fd5bd +--- /dev/null ++++ b/simd/rvv/jfdctfst-rvv.c +@@ -0,0 +1,155 @@ ++/* ++ * jfdctfst-rvv.c - fast integer FDCT ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++#define JPEG_INTERNALS ++#include "jsimd_rvv.h" ++ ++#define F_0_382 98 /* 0.382683433 = 98 * 2^-8 */ ++#define F_0_541 139 /* 0.541196100 = 139 * 2^-8 */ ++#define F_0_707 181 /* 0.707106781 = 181 * 2^-8 */ ++#define F_1_306 334 /* 0.306562965 = 334 * 2^-8 */ ++ ++#define CONST_BITS 8 ++ ++#define DO_DCT() \ ++ { \ ++ /* Even part */ \ ++ tmp10 = __riscv_vadd_vv_i16mf2(tmp0, tmp3, vl); \ ++ tmp13 = __riscv_vsub_vv_i16mf2(tmp0, tmp3, vl); \ ++ tmp11 = __riscv_vadd_vv_i16mf2(tmp1, tmp2, vl); \ ++ tmp12 = __riscv_vsub_vv_i16mf2(tmp1, tmp2, vl); \ ++ \ ++ out0 = __riscv_vadd_vv_i16mf2(tmp10, tmp11, vl); \ ++ out4 = __riscv_vsub_vv_i16mf2(tmp10, tmp11, vl); \ ++ \ ++ z1 = __riscv_vadd_vv_i16mf2(tmp12, tmp13, vl); \ ++ pdt = __riscv_vwmul_vx_i32m1(z1, F_0_707, vl); \ ++ z1 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ out2 = __riscv_vadd_vv_i16mf2(tmp13, z1, vl); \ ++ out6 = __riscv_vsub_vv_i16mf2(tmp13, z1, vl); \ ++ \ ++ /* Odd part */ \ ++ tmp10 = __riscv_vadd_vv_i16mf2(tmp4, tmp5, vl); \ ++ tmp11 = __riscv_vadd_vv_i16mf2(tmp5, tmp6, vl); \ ++ tmp12 = __riscv_vadd_vv_i16mf2(tmp6, tmp7, vl); \ ++ \ ++ z5 = __riscv_vsub_vv_i16mf2(tmp10, tmp12, vl); \ ++ pdt = __riscv_vwmul_vx_i32m1(z5, F_0_382, vl); \ ++ z5 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ pdt = __riscv_vwmul_vx_i32m1(tmp10, F_0_541, vl); \ ++ z2 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ z2 = __riscv_vadd_vv_i16mf2(z2, z5, vl); \ ++ pdt = __riscv_vwmul_vx_i32m1(tmp12, F_1_306, vl); \ ++ z4 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ z4 = __riscv_vadd_vv_i16mf2(z4, z5, vl); \ ++ pdt = __riscv_vwmul_vx_i32m1(tmp11, F_0_707, vl); \ ++ z3 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ \ ++ z11 = __riscv_vadd_vv_i16mf2(tmp7, z3, vl); \ ++ z13 = __riscv_vsub_vv_i16mf2(tmp7, z3, vl); \ ++ \ ++ out5 = __riscv_vadd_vv_i16mf2(z13, z2, vl); \ ++ out3 = __riscv_vsub_vv_i16mf2(z13, z2, vl); \ ++ out1 = __riscv_vadd_vv_i16mf2(z11, z4, vl); \ ++ out7 = __riscv_vsub_vv_i16mf2(z11, z4, vl); \ ++ } ++ ++/* DCTSIZE=8, VLEN must >= 64 while using i16mf2 */ ++void jsimd_fdct_ifast_rvv(DCTELEM *data) ++{ ++ //printf("fdct_ifast | "); ++ int col_stride = DCTSIZE * sizeof(DCTELEM); ++ size_t vl = __riscv_vsetvl_e16mf2(DCTSIZE); ++ ++ vint16mf2_t row0, row1, row2, row3, row4, row5, row6, row7, ++ col0, col1, col2, col3, col4, col5, col6, col7, ++ tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, ++ tmp10, tmp11, tmp12, tmp13, ++ z1, z2, z3, z4, z5, z11, z13, ++ out0, out1, out2, out3, out4, out5, out6, out7; ++ vint32m1_t pdt; ++ ++ /* Pass 1: process rows */ ++ /* Load columns */ ++ col0 = __riscv_vlse16_v_i16mf2(data + 0, col_stride, vl); ++ col1 = __riscv_vlse16_v_i16mf2(data + 1, col_stride, vl); ++ col2 = __riscv_vlse16_v_i16mf2(data + 2, col_stride, vl); ++ col3 = __riscv_vlse16_v_i16mf2(data + 3, col_stride, vl); ++ col4 = __riscv_vlse16_v_i16mf2(data + 4, col_stride, vl); ++ col5 = __riscv_vlse16_v_i16mf2(data + 5, col_stride, vl); ++ col6 = __riscv_vlse16_v_i16mf2(data + 6, col_stride, vl); ++ col7 = __riscv_vlse16_v_i16mf2(data + 7, col_stride, vl); ++ ++ tmp0 = __riscv_vadd_vv_i16mf2(col0, col7, vl); ++ tmp7 = __riscv_vsub_vv_i16mf2(col0, col7, vl); ++ tmp1 = __riscv_vadd_vv_i16mf2(col1, col6, vl); ++ tmp6 = __riscv_vsub_vv_i16mf2(col1, col6, vl); ++ tmp2 = __riscv_vadd_vv_i16mf2(col2, col5, vl); ++ tmp5 = __riscv_vsub_vv_i16mf2(col2, col5, vl); ++ tmp3 = __riscv_vadd_vv_i16mf2(col3, col4, vl); ++ tmp4 = __riscv_vsub_vv_i16mf2(col3, col4, vl); ++ ++ DO_DCT(); ++ ++ /* Store columns */ ++ __riscv_vsse16_v_i16mf2(data + 0, col_stride, out0, vl); ++ __riscv_vsse16_v_i16mf2(data + 1, col_stride, out1, vl); ++ __riscv_vsse16_v_i16mf2(data + 2, col_stride, out2, vl); ++ __riscv_vsse16_v_i16mf2(data + 3, col_stride, out3, vl); ++ __riscv_vsse16_v_i16mf2(data + 4, col_stride, out4, vl); ++ __riscv_vsse16_v_i16mf2(data + 5, col_stride, out5, vl); ++ __riscv_vsse16_v_i16mf2(data + 6, col_stride, out6, vl); ++ __riscv_vsse16_v_i16mf2(data + 7, col_stride, out7, vl); ++ ++ /* Pass 2: process columns */ ++ /* Load rows */ ++ row0 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 0, vl); ++ row1 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 1, vl); ++ row2 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 2, vl); ++ row3 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 3, vl); ++ row4 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 4, vl); ++ row5 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 5, vl); ++ row6 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 6, vl); ++ row7 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 7, vl); ++ ++ tmp0 = __riscv_vadd_vv_i16mf2(row0, row7, vl); ++ tmp7 = __riscv_vsub_vv_i16mf2(row0, row7, vl); ++ tmp1 = __riscv_vadd_vv_i16mf2(row1, row6, vl); ++ tmp6 = __riscv_vsub_vv_i16mf2(row1, row6, vl); ++ tmp2 = __riscv_vadd_vv_i16mf2(row2, row5, vl); ++ tmp5 = __riscv_vsub_vv_i16mf2(row2, row5, vl); ++ tmp3 = __riscv_vadd_vv_i16mf2(row3, row4, vl); ++ tmp4 = __riscv_vsub_vv_i16mf2(row3, row4, vl); ++ ++ DO_DCT(); ++ ++ /* Store rows */ ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 0, out0, vl); ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 1, out1, vl); ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 2, out2, vl); ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 3, out3, vl); ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 4, out4, vl); ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 5, out5, vl); ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 6, out6, vl); ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 7, out7, vl); ++} +diff --git a/simd/rvv/jfdctint-rvv.c b/simd/rvv/jfdctint-rvv.c +new file mode 100644 +index 0000000..c1dee50 +--- /dev/null ++++ b/simd/rvv/jfdctint-rvv.c +@@ -0,0 +1,250 @@ ++/* ++ * jfdctint-rvv.c - accurate integer FDCT ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++#define JPEG_INTERNALS ++#include "jsimd_rvv.h" ++ ++#define CONST_BITS 13 ++#define PASS1_BITS 2 ++ ++#define F_0_298 2446 /* 0.298631336 = 2446 * 2^-13 */ ++#define F_0_390 3196 /* 0.390180644 = 3196 * 2^-13 */ ++#define F_0_541 4433 /* 0.541196100 = 4433 * 2^-13 */ ++#define F_0_765 6270 /* 0.765366865 = 6270 * 2^-13 */ ++#define F_0_899 7373 /* 0.899976223 = 7373 * 2^-13 */ ++#define F_1_175 9633 /* 1.175875602 = 9633 * 2^-13 */ ++#define F_1_501 12299 /* 1.501321110 = 12299 * 2^-13 */ ++#define F_1_847 15137 /* 1.847759065 = 15137 * 2^-13 */ ++#define F_1_961 16069 /* 1.961570560 = 16069 * 2^-13 */ ++#define F_2_053 16819 /* 2.053119869 = 16819 * 2^-13 */ ++#define F_2_562 20995 /* 2.562915447 = 20995 * 2^-13 */ ++#define F_3_072 25172 /* 3.072711026 = 25172 * 2^-13 */ ++ ++#define ROUND_ADD(n) (int32_t)1 << ((n)-1) ++ ++/* DCTSIZE=8, VLEN must >= 64 while using i16mf2 */ ++void jsimd_fdct_islow_rvv(DCTELEM *data) ++{ ++ int col_stride = DCTSIZE * sizeof(DCTELEM); ++ size_t vl = __riscv_vsetvl_e16mf2(DCTSIZE); ++ ++ vint16mf2_t row0, row1, row2, row3, row4, row5, row6, row7; ++ vint16mf2_t col0, col1, col2, col3, col4, col5, col6, col7; ++ vint16mf2_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; ++ vint16mf2_t tmp10, tmp11, tmp12, tmp13; ++ vint16mf2_t z1, z2, z3, z4, z5, z11, z13; ++ vint16mf2_t out0, out1, out2, out3, out4, out5, out6, out7; ++ vint32m1_t p1, p2, p3, p4, p5, t4, t5, t6, t7, temp; ++ ++ /* Pass 1: process rows */ ++ /* Load columns */ ++ col0 = __riscv_vlse16_v_i16mf2(data + 0, col_stride, vl); ++ col1 = __riscv_vlse16_v_i16mf2(data + 1, col_stride, vl); ++ col2 = __riscv_vlse16_v_i16mf2(data + 2, col_stride, vl); ++ col3 = __riscv_vlse16_v_i16mf2(data + 3, col_stride, vl); ++ col4 = __riscv_vlse16_v_i16mf2(data + 4, col_stride, vl); ++ col5 = __riscv_vlse16_v_i16mf2(data + 5, col_stride, vl); ++ col6 = __riscv_vlse16_v_i16mf2(data + 6, col_stride, vl); ++ col7 = __riscv_vlse16_v_i16mf2(data + 7, col_stride, vl); ++ ++ tmp0 = __riscv_vadd_vv_i16mf2(col0, col7, vl); ++ tmp7 = __riscv_vsub_vv_i16mf2(col0, col7, vl); ++ tmp1 = __riscv_vadd_vv_i16mf2(col1, col6, vl); ++ tmp6 = __riscv_vsub_vv_i16mf2(col1, col6, vl); ++ tmp2 = __riscv_vadd_vv_i16mf2(col2, col5, vl); ++ tmp5 = __riscv_vsub_vv_i16mf2(col2, col5, vl); ++ tmp3 = __riscv_vadd_vv_i16mf2(col3, col4, vl); ++ tmp4 = __riscv_vsub_vv_i16mf2(col3, col4, vl); ++ ++ /* Even part */ ++ tmp10 = __riscv_vadd_vv_i16mf2(tmp0, tmp3, vl); ++ tmp13 = __riscv_vsub_vv_i16mf2(tmp0, tmp3, vl); ++ tmp11 = __riscv_vadd_vv_i16mf2(tmp1, tmp2, vl); ++ tmp12 = __riscv_vsub_vv_i16mf2(tmp1, tmp2, vl); ++ ++ out0 = __riscv_vadd_vv_i16mf2(tmp10, tmp11, vl); ++ out0 = __riscv_vsll_vx_i16mf2(out0, PASS1_BITS, vl); ++ out4 = __riscv_vsub_vv_i16mf2(tmp10, tmp11, vl); ++ out4 = __riscv_vsll_vx_i16mf2(out4, PASS1_BITS, vl); ++ ++ z1 = __riscv_vadd_vv_i16mf2(tmp12, tmp13, vl); ++ p1 = __riscv_vwmul_vx_i32m1(z1, F_0_541, vl); ++ ++ temp = __riscv_vwmul_vx_i32m1(tmp13, F_0_765, vl); ++ temp = __riscv_vadd_vv_i32m1(p1, temp, vl); ++ temp = __riscv_vadd_vx_i32m1(temp, ROUND_ADD(CONST_BITS - PASS1_BITS), vl); ++ out2 = __riscv_vnsra_wx_i16mf2(temp, CONST_BITS - PASS1_BITS, vl); ++ ++ temp = __riscv_vwmul_vx_i32m1(tmp12, F_1_847, vl); ++ temp = __riscv_vsub_vv_i32m1(p1, temp, vl); ++ temp = __riscv_vadd_vx_i32m1(temp, ROUND_ADD(CONST_BITS - PASS1_BITS), vl); ++ out6 = __riscv_vnsra_wx_i16mf2(temp, CONST_BITS - PASS1_BITS, vl); ++ ++ /* Odd part */ ++ z1 = __riscv_vadd_vv_i16mf2(tmp4, tmp7, vl); ++ z2 = __riscv_vadd_vv_i16mf2(tmp5, tmp6, vl); ++ z3 = __riscv_vadd_vv_i16mf2(tmp4, tmp6, vl); ++ z4 = __riscv_vadd_vv_i16mf2(tmp5, tmp7, vl); ++ z5 = __riscv_vadd_vv_i16mf2(z3, z4, vl); ++ p5 = __riscv_vwmul_vx_i32m1(z5, F_1_175, vl); ++ ++ t4 = __riscv_vwmul_vx_i32m1(tmp4, F_0_298, vl); ++ t5 = __riscv_vwmul_vx_i32m1(tmp5, F_2_053, vl); ++ t6 = __riscv_vwmul_vx_i32m1(tmp6, F_3_072, vl); ++ t7 = __riscv_vwmul_vx_i32m1(tmp7, F_1_501, vl); ++ ++ p1 = __riscv_vwmul_vx_i32m1(z1, -F_0_899, vl); ++ p2 = __riscv_vwmul_vx_i32m1(z2, -F_2_562, vl); ++ p3 = __riscv_vwmul_vx_i32m1(z3, -F_1_961, vl); ++ p4 = __riscv_vwmul_vx_i32m1(z4, -F_0_390, vl); ++ ++ p3 = __riscv_vadd_vv_i32m1(p3, p5, vl); ++ p4 = __riscv_vadd_vv_i32m1(p4, p5, vl); ++ ++ temp = __riscv_vadd_vv_i32m1(t4, p1, vl); ++ temp = __riscv_vadd_vv_i32m1(temp, p3, vl); ++ temp = __riscv_vadd_vx_i32m1(temp, ROUND_ADD(CONST_BITS - PASS1_BITS), vl); ++ out7 = __riscv_vnsra_wx_i16mf2(temp, CONST_BITS - PASS1_BITS, vl); ++ ++ temp = __riscv_vadd_vv_i32m1(t5, p2, vl); ++ temp = __riscv_vadd_vv_i32m1(temp, p4, vl); ++ temp = __riscv_vadd_vx_i32m1(temp, ROUND_ADD(CONST_BITS - PASS1_BITS), vl); ++ out5 = __riscv_vnsra_wx_i16mf2(temp, CONST_BITS - PASS1_BITS, vl); ++ ++ temp = __riscv_vadd_vv_i32m1(t6, p2, vl); ++ temp = __riscv_vadd_vv_i32m1(temp, p3, vl); ++ temp = __riscv_vadd_vx_i32m1(temp, ROUND_ADD(CONST_BITS - PASS1_BITS), vl); ++ out3 = __riscv_vnsra_wx_i16mf2(temp, CONST_BITS - PASS1_BITS, vl); ++ ++ temp = __riscv_vadd_vv_i32m1(t7, p1, vl); ++ temp = __riscv_vadd_vv_i32m1(temp, p4, vl); ++ temp = __riscv_vadd_vx_i32m1(temp, ROUND_ADD(CONST_BITS - PASS1_BITS), vl); ++ out1 = __riscv_vnsra_wx_i16mf2(temp, CONST_BITS - PASS1_BITS, vl); ++ ++ /* Store columns */ ++ __riscv_vsse16_v_i16mf2(data + 0, col_stride, out0, vl); ++ __riscv_vsse16_v_i16mf2(data + 1, col_stride, out1, vl); ++ __riscv_vsse16_v_i16mf2(data + 2, col_stride, out2, vl); ++ __riscv_vsse16_v_i16mf2(data + 3, col_stride, out3, vl); ++ __riscv_vsse16_v_i16mf2(data + 4, col_stride, out4, vl); ++ __riscv_vsse16_v_i16mf2(data + 5, col_stride, out5, vl); ++ __riscv_vsse16_v_i16mf2(data + 6, col_stride, out6, vl); ++ __riscv_vsse16_v_i16mf2(data + 7, col_stride, out7, vl); ++ ++ /* Pass 2: process columns */ ++ /* Load rows */ ++ row0 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 0, vl); ++ row1 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 1, vl); ++ row2 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 2, vl); ++ row3 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 3, vl); ++ row4 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 4, vl); ++ row5 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 5, vl); ++ row6 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 6, vl); ++ row7 = __riscv_vle16_v_i16mf2(data + DCTSIZE * 7, vl); ++ ++ tmp0 = __riscv_vadd_vv_i16mf2(row0, row7, vl); ++ tmp7 = __riscv_vsub_vv_i16mf2(row0, row7, vl); ++ tmp1 = __riscv_vadd_vv_i16mf2(row1, row6, vl); ++ tmp6 = __riscv_vsub_vv_i16mf2(row1, row6, vl); ++ tmp2 = __riscv_vadd_vv_i16mf2(row2, row5, vl); ++ tmp5 = __riscv_vsub_vv_i16mf2(row2, row5, vl); ++ tmp3 = __riscv_vadd_vv_i16mf2(row3, row4, vl); ++ tmp4 = __riscv_vsub_vv_i16mf2(row3, row4, vl); ++ ++ /* Even part */ ++ tmp10 = __riscv_vadd_vv_i16mf2(tmp0, tmp3, vl); ++ tmp13 = __riscv_vsub_vv_i16mf2(tmp0, tmp3, vl); ++ tmp11 = __riscv_vadd_vv_i16mf2(tmp1, tmp2, vl); ++ tmp12 = __riscv_vsub_vv_i16mf2(tmp1, tmp2, vl); ++ ++ out0 = __riscv_vadd_vv_i16mf2(tmp10, tmp11, vl); ++ out0 = __riscv_vadd_vx_i16mf2(out0, ROUND_ADD(PASS1_BITS), vl); ++ out0 = __riscv_vsra_vx_i16mf2(out0, PASS1_BITS, vl); ++ out4 = __riscv_vsub_vv_i16mf2(tmp10, tmp11, vl); ++ out4 = __riscv_vadd_vx_i16mf2(out4, ROUND_ADD(PASS1_BITS), vl); ++ out4 = __riscv_vsra_vx_i16mf2(out4, PASS1_BITS, vl); ++ ++ z1 = __riscv_vadd_vv_i16mf2(tmp12, tmp13, vl); ++ p1 = __riscv_vwmul_vx_i32m1(z1, F_0_541, vl); ++ ++ temp = __riscv_vwmul_vx_i32m1(tmp13, F_0_765, vl); ++ temp = __riscv_vadd_vv_i32m1(p1, temp, vl); ++ temp = __riscv_vadd_vx_i32m1(temp, ROUND_ADD(CONST_BITS + PASS1_BITS), vl); ++ out2 = __riscv_vnsra_wx_i16mf2(temp, CONST_BITS + PASS1_BITS, vl); ++ ++ temp = __riscv_vwmul_vx_i32m1(tmp12, F_1_847, vl); ++ temp = __riscv_vsub_vv_i32m1(p1, temp, vl); ++ temp = __riscv_vadd_vx_i32m1(temp, ROUND_ADD(CONST_BITS + PASS1_BITS), vl); ++ out6 = __riscv_vnsra_wx_i16mf2(temp, CONST_BITS + PASS1_BITS, vl); ++ ++ /* Odd part */ ++ z1 = __riscv_vadd_vv_i16mf2(tmp4, tmp7, vl); ++ z2 = __riscv_vadd_vv_i16mf2(tmp5, tmp6, vl); ++ z3 = __riscv_vadd_vv_i16mf2(tmp4, tmp6, vl); ++ z4 = __riscv_vadd_vv_i16mf2(tmp5, tmp7, vl); ++ z5 = __riscv_vadd_vv_i16mf2(z3, z4, vl); ++ p5 = __riscv_vwmul_vx_i32m1(z5, F_1_175, vl); ++ ++ t4 = __riscv_vwmul_vx_i32m1(tmp4, F_0_298, vl); ++ t5 = __riscv_vwmul_vx_i32m1(tmp5, F_2_053, vl); ++ t6 = __riscv_vwmul_vx_i32m1(tmp6, F_3_072, vl); ++ t7 = __riscv_vwmul_vx_i32m1(tmp7, F_1_501, vl); ++ ++ p1 = __riscv_vwmul_vx_i32m1(z1, -F_0_899, vl); ++ p2 = __riscv_vwmul_vx_i32m1(z2, -F_2_562, vl); ++ p3 = __riscv_vwmul_vx_i32m1(z3, -F_1_961, vl); ++ p4 = __riscv_vwmul_vx_i32m1(z4, -F_0_390, vl); ++ ++ p3 = __riscv_vadd_vv_i32m1(p3, p5, vl); ++ p4 = __riscv_vadd_vv_i32m1(p4, p5, vl); ++ ++ temp = __riscv_vadd_vv_i32m1(t4, p1, vl); ++ temp = __riscv_vadd_vv_i32m1(temp, p3, vl); ++ temp = __riscv_vadd_vx_i32m1(temp, ROUND_ADD(CONST_BITS + PASS1_BITS), vl); ++ out7 = __riscv_vnsra_wx_i16mf2(temp, CONST_BITS + PASS1_BITS, vl); ++ ++ temp = __riscv_vadd_vv_i32m1(t5, p2, vl); ++ temp = __riscv_vadd_vv_i32m1(temp, p4, vl); ++ temp = __riscv_vadd_vx_i32m1(temp, ROUND_ADD(CONST_BITS + PASS1_BITS), vl); ++ out5 = __riscv_vnsra_wx_i16mf2(temp, CONST_BITS + PASS1_BITS, vl); ++ ++ temp = __riscv_vadd_vv_i32m1(t6, p2, vl); ++ temp = __riscv_vadd_vv_i32m1(temp, p3, vl); ++ temp = __riscv_vadd_vx_i32m1(temp, ROUND_ADD(CONST_BITS + PASS1_BITS), vl); ++ out3 = __riscv_vnsra_wx_i16mf2(temp, CONST_BITS + PASS1_BITS, vl); ++ ++ temp = __riscv_vadd_vv_i32m1(t7, p1, vl); ++ temp = __riscv_vadd_vv_i32m1(temp, p4, vl); ++ temp = __riscv_vadd_vx_i32m1(temp, ROUND_ADD(CONST_BITS + PASS1_BITS), vl); ++ out1 = __riscv_vnsra_wx_i16mf2(temp, CONST_BITS + PASS1_BITS, vl); ++ ++ /* Store rows */ ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 0, out0, vl); ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 1, out1, vl); ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 2, out2, vl); ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 3, out3, vl); ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 4, out4, vl); ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 5, out5, vl); ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 6, out6, vl); ++ __riscv_vse16_v_i16mf2(data + DCTSIZE * 7, out7, vl); ++} +diff --git a/simd/rvv/jidctfst-rvv.c b/simd/rvv/jidctfst-rvv.c +new file mode 100644 +index 0000000..4f81c5f +--- /dev/null ++++ b/simd/rvv/jidctfst-rvv.c +@@ -0,0 +1,315 @@ ++ ++/* ++ * jidctfst-rvv.c - fast integer IDCT ++ * ++ * Copyright (C) 2020, Arm Limited. All Rights Reserved. ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++#define JPEG_INTERNALS ++#include "jsimd_rvv.h" ++ ++#define FIX_1_082 277 /* 1.082392200 = 277 * 2^-8 */ ++#define FIX_1_414 362 /* 1.414213562 = 362 * 2^-8 */ ++#define FIX_1_847 473 /* 1.847759065 = 473 * 2^-8 */ ++#define FIX_2_613 669 /* 2.613125930 = 669 * 2^-8 */ ++ ++#if BITS_IN_JSAMPLE == 8 ++#define CONST_BITS 8 ++#define PASS1_BITS 2 ++#else ++#define CONST_BITS 8 ++#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ ++#endif ++ ++#define DO_IDCT_0(in) \ ++ { \ ++ /* Even part */ \ ++ tmp10 = __riscv_vadd_vv_i16mf2(in##0, in##4, vl); \ ++ tmp11 = __riscv_vsub_vv_i16mf2(in##0, in##4, vl); \ ++ tmp13 = __riscv_vadd_vv_i16mf2(in##2, in##6, vl); \ ++ \ ++ tmp12 = __riscv_vsub_vv_i16mf2(in##2, in##6, vl); \ ++ pdt = __riscv_vwmul_vx_i32m1(tmp12, FIX_1_414, vl); \ ++ tmp12 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ tmp12 = __riscv_vsub_vv_i16mf2(tmp12, tmp13, vl); \ ++ } ++#define DO_IDCT_1(in) \ ++ { \ ++ tmp0 = __riscv_vadd_vv_i16mf2(tmp10, tmp13, vl); \ ++ tmp3 = __riscv_vsub_vv_i16mf2(tmp10, tmp13, vl); \ ++ tmp1 = __riscv_vadd_vv_i16mf2(tmp11, tmp12, vl); \ ++ tmp2 = __riscv_vsub_vv_i16mf2(tmp11, tmp12, vl); \ ++ \ ++ /* Odd part */ \ ++ z13 = __riscv_vadd_vv_i16mf2(in##5, in##3, vl); \ ++ z10 = __riscv_vsub_vv_i16mf2(in##5, in##3, vl); \ ++ z11 = __riscv_vadd_vv_i16mf2(in##1, in##7, vl); \ ++ z12 = __riscv_vsub_vv_i16mf2(in##1, in##7, vl); \ ++ \ ++ tmp7 = __riscv_vadd_vv_i16mf2(z11, z13, vl); \ ++ tmp11 = __riscv_vsub_vv_i16mf2(z11, z13, vl); \ ++ pdt = __riscv_vwmul_vx_i32m1(tmp11, FIX_1_414, vl); \ ++ tmp11 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ \ ++ z5 = __riscv_vadd_vv_i16mf2(z10, z12, vl); \ ++ pdt = __riscv_vwmul_vx_i32m1(z5, FIX_1_847, vl); \ ++ z5 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ \ ++ pdt = __riscv_vwmul_vx_i32m1(z12, FIX_1_082, vl); \ ++ tmp10 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ tmp10 = __riscv_vsub_vv_i16mf2(tmp10, z5, vl); \ ++ pdt = __riscv_vwmul_vx_i32m1(z10, -FIX_2_613, vl); \ ++ tmp12 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ tmp12 = __riscv_vadd_vv_i16mf2(z5, tmp12, vl); \ ++ \ ++ tmp6 = __riscv_vsub_vv_i16mf2(tmp12, tmp7, vl); \ ++ tmp5 = __riscv_vsub_vv_i16mf2(tmp11, tmp6, vl); \ ++ tmp4 = __riscv_vadd_vv_i16mf2(tmp10, tmp5, vl); \ ++ } ++ ++#define DO_SPARSE_IDCT_0(in) \ ++ { \ ++ /* Even part */ \ ++ pdt = __riscv_vwmul_vx_i32m1(in##2, FIX_1_414, vl); \ ++ tmp12 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ tmp12 = __riscv_vsub_vv_i16mf2(tmp12, in##2, vl); \ ++ } ++#define DO_SPARSE_IDCT_1(in) \ ++ { \ ++ tmp0 = __riscv_vadd_vv_i16mf2(in##0, in##2, vl); \ ++ tmp3 = __riscv_vsub_vv_i16mf2(in##0, in##2, vl); \ ++ tmp1 = __riscv_vadd_vv_i16mf2(in##0, tmp12, vl); \ ++ tmp2 = __riscv_vsub_vv_i16mf2(in##0, tmp12, vl); \ ++ \ ++ /* Odd part */ \ ++ tmp7 = __riscv_vadd_vv_i16mf2(in##1, in##3, vl); \ ++ tmp11 = __riscv_vsub_vv_i16mf2(in##1, in##3, vl); \ ++ pdt = __riscv_vwmul_vx_i32m1(tmp11, FIX_1_414, vl); \ ++ tmp11 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ \ ++ /* z5 = __riscv_vadd_vv_i16mf2(z10, z12, vl);*/ \ ++ z5 = __riscv_vsub_vv_i16mf2(in##1, in##3, vl); \ ++ pdt = __riscv_vwmul_vx_i32m1(z5, FIX_1_847, vl); \ ++ z5 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ \ ++ pdt = __riscv_vwmul_vx_i32m1(in##1, FIX_1_082, vl); \ ++ tmp10 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ tmp10 = __riscv_vsub_vv_i16mf2(tmp10, z5, vl); \ ++ pdt = __riscv_vwmul_vx_i32m1(in##3, FIX_2_613, vl); \ ++ tmp12 = __riscv_vnsra_wx_i16mf2(pdt, CONST_BITS, vl); \ ++ tmp12 = __riscv_vadd_vv_i16mf2(z5, tmp12, vl); \ ++ \ ++ tmp6 = __riscv_vsub_vv_i16mf2(tmp12, tmp7, vl); \ ++ tmp5 = __riscv_vsub_vv_i16mf2(tmp11, tmp6, vl); \ ++ tmp4 = __riscv_vadd_vv_i16mf2(tmp10, tmp5, vl); \ ++ } ++ ++#define OUTPUT() \ ++ { \ ++ out0 = __riscv_vadd_vv_i16mf2(tmp0, tmp7, vl); \ ++ out7 = __riscv_vsub_vv_i16mf2(tmp0, tmp7, vl); \ ++ out1 = __riscv_vadd_vv_i16mf2(tmp1, tmp6, vl); \ ++ out6 = __riscv_vsub_vv_i16mf2(tmp1, tmp6, vl); \ ++ out2 = __riscv_vadd_vv_i16mf2(tmp2, tmp5, vl); \ ++ out5 = __riscv_vsub_vv_i16mf2(tmp2, tmp5, vl); \ ++ out4 = __riscv_vadd_vv_i16mf2(tmp3, tmp4, vl); \ ++ out3 = __riscv_vsub_vv_i16mf2(tmp3, tmp4, vl); \ ++ } ++ ++void jsimd_idct_ifast_rvv(void *dct_table, JCOEFPTR coef_block, ++ JSAMPARRAY output_buf, JDIMENSION output_col) ++{ ++ int col_stride = DCTSIZE * sizeof(DCTELEM); ++ size_t stride[8]; ++ size_t vl = __riscv_vsetvl_e16mf2(DCTSIZE); ++ IFAST_MULT_TYPE *quantptr = dct_table; ++ DCTELEM workspace[DCTSIZE2]; ++ JSAMPROW output_row = output_buf[0] + output_col; ++ ++ stride[0] = 0; ++ for (int i = 1; i < 8; ++i) ++ { ++ stride[i] = output_buf[i] - output_buf[0]; ++ } ++ ++ vbool32_t beq0; ++ vint16mf2_t bit; ++ vint8mf4_t dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; ++ vint16mf2_t row0, row1, row2, row3, row4, row5, row6, row7; ++ vint16mf2_t col0, col1, col2, col3, col4, col5, col6, col7; ++ vint16mf2_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; ++ vint16mf2_t tmp10, tmp11, tmp12, tmp13; ++ vint16mf2_t z1, z2, z3, z4, z5, z10, z11, z12, z13; ++ vint16mf2_t out0, out1, out2, out3, out4, out5, out6, out7; ++ vint16mf2_t quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7; ++ vint16mf2_t bitmap; ++ vint32m1_t pdt; ++ ++ /* Pass 1: process columns from input, store into work array. */ ++ /* Load DCT coefficients. */ ++ row0 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 0, vl); ++ row1 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 1, vl); ++ row2 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 2, vl); ++ row3 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 3, vl); ++ row4 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 4, vl); ++ row5 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 5, vl); ++ row6 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 6, vl); ++ row7 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 7, vl); ++ ++ bit = __riscv_vor_vv_i16mf2(row4, row5, vl); ++ bit = __riscv_vor_vv_i16mf2(bit, row6, vl); ++ bit = __riscv_vor_vv_i16mf2(bit, row7, vl); ++ beq0 = __riscv_vmseq_vx_i16mf2_b32(bit, 0, vl); ++ unsigned long row4567_cpop0 = __riscv_vcpop_m_b32(beq0, vl); ++ ++ bit = __riscv_vor_vv_i16mf2(row1, row2, vl); ++ bit = __riscv_vor_vv_i16mf2(bit, row3, vl); ++ beq0 = __riscv_vmseq_vx_i16mf2_b32(bit, 0, vl); ++ unsigned long row123_cpop0 = __riscv_vcpop_m_b32(beq0, vl); ++ ++ // 当打印helloworld时,简化算法正常执行;注释helloworld后,仅执行复杂算法,不打印:) ++ if (8 == row123_cpop0 && 8 == row4567_cpop0) ++ { ++ // 全零,执行简化算法,此时将打印:) ++ /* Load quantization table values for DC coefficients. */ ++ quant0 = __riscv_vle16_v_i16mf2(quantptr + 0 * DCTSIZE, vl); ++ row0 = __riscv_vmul_vv_i16mf2(row0, quant0, vl); ++ ++ /* Store rows */ ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 0, row0, vl); ++ ++ // All zero时,用mv第一个参数代替load性能更佳 ++ col0 = __riscv_vmv_v_x_i16mf2(workspace[0], vl); ++ col1 = __riscv_vmv_v_x_i16mf2(workspace[1], vl); ++ col2 = __riscv_vmv_v_x_i16mf2(workspace[2], vl); ++ col3 = __riscv_vmv_v_x_i16mf2(workspace[3], vl); ++ col4 = __riscv_vmv_v_x_i16mf2(workspace[4], vl); ++ col5 = __riscv_vmv_v_x_i16mf2(workspace[5], vl); ++ col6 = __riscv_vmv_v_x_i16mf2(workspace[6], vl); ++ col7 = __riscv_vmv_v_x_i16mf2(workspace[7], vl); ++ } ++ else ++ { ++ if (8 == row4567_cpop0) ++ { ++ quant0 = __riscv_vle16_v_i16mf2(quantptr + 0 * DCTSIZE, vl); ++ quant1 = __riscv_vle16_v_i16mf2(quantptr + 1 * DCTSIZE, vl); ++ quant2 = __riscv_vle16_v_i16mf2(quantptr + 2 * DCTSIZE, vl); ++ quant3 = __riscv_vle16_v_i16mf2(quantptr + 3 * DCTSIZE, vl); ++ row0 = __riscv_vmul_vv_i16mf2(row0, quant0, vl); ++ row1 = __riscv_vmul_vv_i16mf2(row1, quant1, vl); ++ row2 = __riscv_vmul_vv_i16mf2(row2, quant2, vl); ++ row3 = __riscv_vmul_vv_i16mf2(row3, quant3, vl); ++ ++ DO_SPARSE_IDCT_0(row); ++ DO_SPARSE_IDCT_1(row); ++ } ++ else ++ { ++ /* Load quantization table. */ ++ quant0 = __riscv_vle16_v_i16mf2(quantptr + 0 * DCTSIZE, vl); ++ quant1 = __riscv_vle16_v_i16mf2(quantptr + 1 * DCTSIZE, vl); ++ quant2 = __riscv_vle16_v_i16mf2(quantptr + 2 * DCTSIZE, vl); ++ quant3 = __riscv_vle16_v_i16mf2(quantptr + 3 * DCTSIZE, vl); ++ quant4 = __riscv_vle16_v_i16mf2(quantptr + 4 * DCTSIZE, vl); ++ quant5 = __riscv_vle16_v_i16mf2(quantptr + 5 * DCTSIZE, vl); ++ quant6 = __riscv_vle16_v_i16mf2(quantptr + 6 * DCTSIZE, vl); ++ quant7 = __riscv_vle16_v_i16mf2(quantptr + 7 * DCTSIZE, vl); ++ ++ row0 = __riscv_vmul_vv_i16mf2(row0, quant0, vl); ++ row1 = __riscv_vmul_vv_i16mf2(row1, quant1, vl); ++ row2 = __riscv_vmul_vv_i16mf2(row2, quant2, vl); ++ row3 = __riscv_vmul_vv_i16mf2(row3, quant3, vl); ++ row4 = __riscv_vmul_vv_i16mf2(row4, quant4, vl); ++ row5 = __riscv_vmul_vv_i16mf2(row5, quant5, vl); ++ row6 = __riscv_vmul_vv_i16mf2(row6, quant6, vl); ++ row7 = __riscv_vmul_vv_i16mf2(row7, quant7, vl); ++ ++ DO_IDCT_0(row); ++ DO_IDCT_1(row); ++ } ++ ++ OUTPUT(); ++ ++ /* Store rows */ ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 0, out0, vl); ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 1, out1, vl); ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 2, out2, vl); ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 3, out3, vl); ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 4, out4, vl); ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 5, out5, vl); ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 6, out6, vl); ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 7, out7, vl); ++ ++ /* Pass 2: process rows from work array, store into output array. */ ++ /* Load columns */ ++ col0 = __riscv_vlse16_v_i16mf2(workspace + 0, col_stride, vl); ++ col1 = __riscv_vlse16_v_i16mf2(workspace + 1, col_stride, vl); ++ col2 = __riscv_vlse16_v_i16mf2(workspace + 2, col_stride, vl); ++ col3 = __riscv_vlse16_v_i16mf2(workspace + 3, col_stride, vl); ++ col4 = __riscv_vlse16_v_i16mf2(workspace + 4, col_stride, vl); ++ col5 = __riscv_vlse16_v_i16mf2(workspace + 5, col_stride, vl); ++ col6 = __riscv_vlse16_v_i16mf2(workspace + 6, col_stride, vl); ++ col7 = __riscv_vlse16_v_i16mf2(workspace + 7, col_stride, vl); ++ } ++ ++ DO_IDCT_0(col); ++ tmp10 = __riscv_vadd_vx_i16mf2(tmp10, CENTERJSAMPLE << PASS1_BITS + 3, vl); ++ tmp11 = __riscv_vadd_vx_i16mf2(tmp11, CENTERJSAMPLE << PASS1_BITS + 3, vl); ++ DO_IDCT_1(col); ++ OUTPUT(); ++ ++ out0 = __riscv_vsra_vx_i16mf2(out0, PASS1_BITS + 3, vl); ++ out1 = __riscv_vsra_vx_i16mf2(out1, PASS1_BITS + 3, vl); ++ out2 = __riscv_vsra_vx_i16mf2(out2, PASS1_BITS + 3, vl); ++ out3 = __riscv_vsra_vx_i16mf2(out3, PASS1_BITS + 3, vl); ++ out4 = __riscv_vsra_vx_i16mf2(out4, PASS1_BITS + 3, vl); ++ out5 = __riscv_vsra_vx_i16mf2(out5, PASS1_BITS + 3, vl); ++ out6 = __riscv_vsra_vx_i16mf2(out6, PASS1_BITS + 3, vl); ++ out7 = __riscv_vsra_vx_i16mf2(out7, PASS1_BITS + 3, vl); ++ ++ CLIP(out0, i16mf2); ++ CLIP(out1, i16mf2); ++ CLIP(out2, i16mf2); ++ CLIP(out3, i16mf2); ++ CLIP(out4, i16mf2); ++ CLIP(out5, i16mf2); ++ CLIP(out6, i16mf2); ++ CLIP(out7, i16mf2); ++ ++ dst0 = __riscv_vncvt_x_x_w_i8mf4(out0, vl); ++ dst1 = __riscv_vncvt_x_x_w_i8mf4(out1, vl); ++ dst2 = __riscv_vncvt_x_x_w_i8mf4(out2, vl); ++ dst3 = __riscv_vncvt_x_x_w_i8mf4(out3, vl); ++ dst4 = __riscv_vncvt_x_x_w_i8mf4(out4, vl); ++ dst5 = __riscv_vncvt_x_x_w_i8mf4(out5, vl); ++ dst6 = __riscv_vncvt_x_x_w_i8mf4(out6, vl); ++ dst7 = __riscv_vncvt_x_x_w_i8mf4(out7, vl); ++ ++ /* store columns */ ++ vuint64m2_t vstride = __riscv_vle64_v_u64m2(stride, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 0, vstride, dst0, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 1, vstride, dst1, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 2, vstride, dst2, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 3, vstride, dst3, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 4, vstride, dst4, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 5, vstride, dst5, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 6, vstride, dst6, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 7, vstride, dst7, vl); ++} +\ No newline at end of file +diff --git a/simd/rvv/jidctint-rvv.c b/simd/rvv/jidctint-rvv.c +new file mode 100644 +index 0000000..4ef39db +--- /dev/null ++++ b/simd/rvv/jidctint-rvv.c +@@ -0,0 +1,360 @@ ++/* ++ * jidctint-rvv.c - accurate integer IDCT ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++#define JPEG_INTERNALS ++#include "jsimd_rvv.h" ++ ++#define CONST_BITS 13 ++#define PASS1_BITS 2 ++ ++#define F_0_298 2446 /* 0.298631336 = 2446 * 2^-13 */ ++#define F_0_390 3196 /* 0.390180644 = 3196 * 2^-13 */ ++#define F_0_541 4433 /* 0.541196100 = 4433 * 2^-13 */ ++#define F_0_765 6270 /* 0.765366865 = 6270 * 2^-13 */ ++#define F_0_899 7373 /* 0.899976223 = 7373 * 2^-13 */ ++#define F_1_175 9633 /* 1.175875602 = 9633 * 2^-13 */ ++#define F_1_501 12299 /* 1.501321110 = 12299 * 2^-13 */ ++#define F_1_847 15137 /* 1.847759065 = 15137 * 2^-13 */ ++#define F_1_961 16069 /* 1.961570560 = 16069 * 2^-13 */ ++#define F_2_053 16819 /* 2.053119869 = 16819 * 2^-13 */ ++#define F_2_562 20995 /* 2.562915447 = 20995 * 2^-13 */ ++#define F_3_072 25172 /* 3.072711026 = 25172 * 2^-13 */ ++ ++#define F_1_175_MINUS_1_961 (F_1_175 - F_1_961) ++#define F_1_175_MINUS_0_390 (F_1_175 - F_0_390) ++#define F_0_541_MINUS_1_847 (F_0_541 - F_1_847) ++#define F_3_072_MINUS_2_562 (F_3_072 - F_2_562) ++#define F_0_298_MINUS_0_899 (F_0_298 - F_0_899) ++#define F_1_501_MINUS_0_899 (F_1_501 - F_0_899) ++#define F_2_053_MINUS_2_562 (F_2_053 - F_2_562) ++#define F_0_541_PLUS_0_765 (F_0_541 + F_0_765) ++ ++#define ROUND_ADD(n) (int32_t)1 << ((n)-1) ++ ++#define DO_REGULAR_IDCT_0(in) \ ++ { \ ++ /* Even part */ \ ++ z1 = __riscv_vwadd_vv_i32m1(in##2, in##6, vl); \ ++ z1 = __riscv_vmul_vx_i32m1(z1, F_0_541, vl); \ ++ tmp2 = __riscv_vwmacc_vx_i32m1(z1, -F_1_847, in##6, vl); \ ++ tmp3 = __riscv_vwmacc_vx_i32m1(z1, F_0_765, in##2, vl); \ ++ \ ++ tmp0 = __riscv_vwadd_vv_i32m1(in##0, in##4, vl); \ ++ tmp1 = __riscv_vwsub_vv_i32m1(in##0, in##4, vl); \ ++ tmp0 = __riscv_vsll_vx_i32m1(tmp0, CONST_BITS, vl); \ ++ tmp1 = __riscv_vsll_vx_i32m1(tmp1, CONST_BITS, vl); \ ++ } ++ ++#define DO_REGULAR_IDCT_1(in) \ ++ { \ ++ tmp10 = __riscv_vadd_vv_i32m1(tmp0, tmp3, vl); \ ++ tmp13 = __riscv_vsub_vv_i32m1(tmp0, tmp3, vl); \ ++ tmp11 = __riscv_vadd_vv_i32m1(tmp1, tmp2, vl); \ ++ tmp12 = __riscv_vsub_vv_i32m1(tmp1, tmp2, vl); \ ++ \ ++ /* Odd Part */ \ ++ z1 = __riscv_vwadd_vv_i32m1(in##7, in##1, vl); \ ++ z2 = __riscv_vwadd_vv_i32m1(in##5, in##3, vl); \ ++ z3 = __riscv_vwadd_vv_i32m1(in##7, in##3, vl); \ ++ z4 = __riscv_vwadd_vv_i32m1(in##5, in##1, vl); \ ++ z5 = __riscv_vadd_vv_i32m1(z3, z4, vl); \ ++ z5 = __riscv_vmul_vx_i32m1(z5, F_1_175, vl); \ ++ \ ++ tmp0 = __riscv_vwmul_vx_i32m1(in##7, F_0_298, vl); \ ++ tmp1 = __riscv_vwmul_vx_i32m1(in##5, F_2_053, vl); \ ++ tmp2 = __riscv_vwmul_vx_i32m1(in##3, F_3_072, vl); \ ++ tmp3 = __riscv_vwmul_vx_i32m1(in##1, F_1_501, vl); \ ++ z1 = __riscv_vmul_vx_i32m1(z1, -F_0_899, vl); \ ++ z2 = __riscv_vmul_vx_i32m1(z2, -F_2_562, vl); \ ++ z3 = __riscv_vmul_vx_i32m1(z3, -F_1_961, vl); \ ++ z4 = __riscv_vmul_vx_i32m1(z4, -F_0_390, vl); \ ++ \ ++ z3 = __riscv_vadd_vv_i32m1(z3, z5, vl); \ ++ z4 = __riscv_vadd_vv_i32m1(z4, z5, vl); \ ++ \ ++ tmp0 = __riscv_vadd_vv_i32m1(tmp0, z1, vl); \ ++ tmp0 = __riscv_vadd_vv_i32m1(tmp0, z3, vl); \ ++ tmp1 = __riscv_vadd_vv_i32m1(tmp1, z2, vl); \ ++ tmp1 = __riscv_vadd_vv_i32m1(tmp1, z4, vl); \ ++ tmp2 = __riscv_vadd_vv_i32m1(tmp2, z2, vl); \ ++ tmp2 = __riscv_vadd_vv_i32m1(tmp2, z3, vl); \ ++ tmp3 = __riscv_vadd_vv_i32m1(tmp3, z1, vl); \ ++ tmp3 = __riscv_vadd_vv_i32m1(tmp3, z4, vl); \ ++ } ++ ++#define DO_SPARSE_IDCT_0(in) \ ++ { \ ++ /* Even part */ \ ++ tmp2 = __riscv_vwmul_vx_i32m1(in##2, F_0_541, vl); \ ++ tmp3 = __riscv_vwmul_vx_i32m1(in##2, F_0_541_PLUS_0_765, vl); \ ++ \ ++ tmp0 = __riscv_vwadd_vv_i32m1(in##0, in##4, vl); \ ++ tmp1 = __riscv_vwsub_vv_i32m1(in##0, in##4, vl); \ ++ tmp0 = __riscv_vsll_vx_i32m1(tmp0, CONST_BITS, vl); \ ++ tmp1 = __riscv_vsll_vx_i32m1(tmp1, CONST_BITS, vl); \ ++ } ++ ++#define DO_SPARSE_IDCT_1(in) \ ++ { \ ++ tmp10 = __riscv_vadd_vv_i32m1(tmp0, tmp3, vl); \ ++ tmp13 = __riscv_vsub_vv_i32m1(tmp0, tmp3, vl); \ ++ tmp11 = __riscv_vadd_vv_i32m1(tmp1, tmp2, vl); \ ++ tmp12 = __riscv_vsub_vv_i32m1(tmp1, tmp2, vl); \ ++ \ ++ /* Odd Part */ \ ++ z3 = __riscv_vwmul_vx_i32m1(in##3, F_1_175_MINUS_1_961, vl); \ ++ z4 = __riscv_vwmul_vx_i32m1(in##3, F_1_175, vl); \ ++ z3 = __riscv_vwmacc_vx_i32m1(z3, F_1_175, in##1, vl); \ ++ z4 = __riscv_vwmacc_vx_i32m1(z4, F_1_175_MINUS_0_390, in##1, vl); \ ++ \ ++ tmp0 = __riscv_vwmacc_vx_i32m1(z3, -F_0_899, in##1, vl); \ ++ tmp1 = __riscv_vwmacc_vx_i32m1(z4, -F_2_562, in##3, vl); \ ++ tmp2 = __riscv_vwmacc_vx_i32m1(z3, F_3_072_MINUS_2_562, in##3, vl); \ ++ tmp3 = __riscv_vwmacc_vx_i32m1(z4, F_1_501_MINUS_0_899, in##1, vl); \ ++ } ++ ++#define OUTPUT(bm) \ ++ { \ ++ o0 = __riscv_vadd_vv_i32m1(tmp10, tmp3, vl); \ ++ o7 = __riscv_vsub_vv_i32m1(tmp10, tmp3, vl); \ ++ \ ++ o1 = __riscv_vadd_vv_i32m1(tmp11, tmp2, vl); \ ++ o6 = __riscv_vsub_vv_i32m1(tmp11, tmp2, vl); \ ++ \ ++ o2 = __riscv_vadd_vv_i32m1(tmp12, tmp1, vl); \ ++ o5 = __riscv_vsub_vv_i32m1(tmp12, tmp1, vl); \ ++ \ ++ o3 = __riscv_vadd_vv_i32m1(tmp13, tmp0, vl); \ ++ o4 = __riscv_vsub_vv_i32m1(tmp13, tmp0, vl); \ ++ \ ++ out2 = __riscv_vnsra_wx_i16mf2(o2, bm, vl); \ ++ out1 = __riscv_vnsra_wx_i16mf2(o1, bm, vl); \ ++ out6 = __riscv_vnsra_wx_i16mf2(o6, bm, vl); \ ++ out5 = __riscv_vnsra_wx_i16mf2(o5, bm, vl); \ ++ out0 = __riscv_vnsra_wx_i16mf2(o0, bm, vl); \ ++ out7 = __riscv_vnsra_wx_i16mf2(o7, bm, vl); \ ++ out3 = __riscv_vnsra_wx_i16mf2(o3, bm, vl); \ ++ out4 = __riscv_vnsra_wx_i16mf2(o4, bm, vl); \ ++ } ++ ++#define STORE_ROW_LOAD_COL() \ ++ { \ ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 0, out0, vl); \ ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 1, out1, vl); \ ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 2, out2, vl); \ ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 3, out3, vl); \ ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 4, out4, vl); \ ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 5, out5, vl); \ ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 6, out6, vl); \ ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 7, out7, vl); \ ++ \ ++ col0 = __riscv_vlse16_v_i16mf2(workspace + 0, col_stride, vl); \ ++ col1 = __riscv_vlse16_v_i16mf2(workspace + 1, col_stride, vl); \ ++ col2 = __riscv_vlse16_v_i16mf2(workspace + 2, col_stride, vl); \ ++ col3 = __riscv_vlse16_v_i16mf2(workspace + 3, col_stride, vl); \ ++ col4 = __riscv_vlse16_v_i16mf2(workspace + 4, col_stride, vl); \ ++ col5 = __riscv_vlse16_v_i16mf2(workspace + 5, col_stride, vl); \ ++ col6 = __riscv_vlse16_v_i16mf2(workspace + 6, col_stride, vl); \ ++ col7 = __riscv_vlse16_v_i16mf2(workspace + 7, col_stride, vl); \ ++ } ++ ++void jsimd_idct_islow_rvv(void *dct_table, JCOEFPTR coef_block, ++ JSAMPARRAY output_buf, JDIMENSION output_col) ++{ ++ int col_stride = DCTSIZE * sizeof(DCTELEM); ++ int bm0 = CONST_BITS - PASS1_BITS; ++ int bm1 = CONST_BITS + PASS1_BITS + 3; ++ int32_t bias0 = ROUND_ADD(CONST_BITS - PASS1_BITS); ++ int32_t bias1 = ROUND_ADD(CONST_BITS + PASS1_BITS + 3); ++ size_t stride[8]; ++ size_t vl = __riscv_vsetvl_e16m2(DCTSIZE); ++ ISLOW_MULT_TYPE *quantptr = dct_table; ++ DCTELEM workspace[DCTSIZE2]; ++ JSAMPROW output_row = output_buf[0] + output_col; ++ ++ stride[0] = 0; ++ for (int i = 1; i < 8; ++i) ++ { ++ stride[i] = output_buf[i] - output_buf[0]; ++ } ++ ++ vbool32_t beq0; ++ vint16mf2_t bit; ++ vint8mf4_t dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; ++ vint16mf2_t row0, row1, row2, row3, row4, row5, row6, row7; ++ vint16mf2_t col0, col1, col2, col3, col4, col5, col6, col7; ++ vint16mf2_t out0, out1, out2, out3, out4, out5, out6, out7; ++ vint16mf2_t quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7; ++ vint32m1_t z1, z2, z3, z4, z5; ++ vint32m1_t tmp0, tmp1, tmp2, tmp3, tmp4; ++ vint32m1_t tmp10, tmp11, tmp12, tmp13; ++ vint32m1_t o0, o1, o2, o3, o4, o5, o6, o7; ++ ++ /* Pass 1: process columns from input, store into work array. */ ++ /* Load DCT coefficients. */ ++ row0 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 0, vl); ++ row1 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 1, vl); ++ row2 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 2, vl); ++ row3 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 3, vl); ++ row4 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 4, vl); ++ row5 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 5, vl); ++ row6 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 6, vl); ++ row7 = __riscv_vle16_v_i16mf2(coef_block + DCTSIZE * 7, vl); ++ ++ bit = __riscv_vor_vv_i16mf2(row4, row5, vl); ++ bit = __riscv_vor_vv_i16mf2(bit, row6, vl); ++ bit = __riscv_vor_vv_i16mf2(bit, row7, vl); ++ beq0 = __riscv_vmseq_vx_i16mf2_b32(bit, 0, vl); ++ unsigned long row4567_cpop0 = __riscv_vcpop_m_b32(beq0, vl); ++ ++ bit = __riscv_vor_vv_i16mf2(row1, row2, vl); ++ bit = __riscv_vor_vv_i16mf2(bit, row3, vl); ++ beq0 = __riscv_vmseq_vx_i16mf2_b32(bit, 0, vl); ++ unsigned long row123_cpop0 = __riscv_vcpop_m_b32(beq0, vl); ++ ++ if (8 == row123_cpop0 && 8 == row4567_cpop0) ++ { ++ /* Load quantization table values for DC coefficients. */ ++ quant0 = __riscv_vle16_v_i16mf2(quantptr + 0 * DCTSIZE, vl); ++ row0 = __riscv_vmul_vv_i16mf2(row0, quant0, vl); ++ row0 = __riscv_vsll_vx_i16mf2(row0, PASS1_BITS, vl); ++ ++ /* Store rows */ ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 0, row0, vl); ++ ++ col0 = __riscv_vmv_v_x_i16mf2(workspace[0], vl); ++ col1 = __riscv_vmv_v_x_i16mf2(workspace[1], vl); ++ col2 = __riscv_vmv_v_x_i16mf2(workspace[2], vl); ++ col3 = __riscv_vmv_v_x_i16mf2(workspace[3], vl); ++ col4 = __riscv_vmv_v_x_i16mf2(workspace[4], vl); ++ col5 = __riscv_vmv_v_x_i16mf2(workspace[5], vl); ++ col6 = __riscv_vmv_v_x_i16mf2(workspace[6], vl); ++ col7 = __riscv_vmv_v_x_i16mf2(workspace[7], vl); ++ } ++ else ++ { ++ if (8 == row4567_cpop0) ++ { ++ quant0 = __riscv_vle16_v_i16mf2(quantptr + 0 * DCTSIZE, vl); ++ quant1 = __riscv_vle16_v_i16mf2(quantptr + 1 * DCTSIZE, vl); ++ quant2 = __riscv_vle16_v_i16mf2(quantptr + 2 * DCTSIZE, vl); ++ quant3 = __riscv_vle16_v_i16mf2(quantptr + 3 * DCTSIZE, vl); ++ row0 = __riscv_vmul_vv_i16mf2(row0, quant0, vl); ++ row1 = __riscv_vmul_vv_i16mf2(row1, quant1, vl); ++ row2 = __riscv_vmul_vv_i16mf2(row2, quant2, vl); ++ row3 = __riscv_vmul_vv_i16mf2(row3, quant3, vl); ++ ++ DO_SPARSE_IDCT_0(row); ++ /* early bias addition for better performance */ ++ tmp0 = __riscv_vadd_vx_i32m1(tmp0, bias0, vl); ++ tmp1 = __riscv_vadd_vx_i32m1(tmp1, bias0, vl); ++ DO_SPARSE_IDCT_1(row); ++ } ++ else ++ { ++ /* Load quantization table values for DC coefficients. */ ++ quant0 = __riscv_vle16_v_i16mf2(quantptr + 0 * DCTSIZE, vl); ++ quant1 = __riscv_vle16_v_i16mf2(quantptr + 1 * DCTSIZE, vl); ++ quant2 = __riscv_vle16_v_i16mf2(quantptr + 2 * DCTSIZE, vl); ++ quant3 = __riscv_vle16_v_i16mf2(quantptr + 3 * DCTSIZE, vl); ++ quant4 = __riscv_vle16_v_i16mf2(quantptr + 4 * DCTSIZE, vl); ++ quant5 = __riscv_vle16_v_i16mf2(quantptr + 5 * DCTSIZE, vl); ++ quant6 = __riscv_vle16_v_i16mf2(quantptr + 6 * DCTSIZE, vl); ++ quant7 = __riscv_vle16_v_i16mf2(quantptr + 7 * DCTSIZE, vl); ++ ++ row0 = __riscv_vmul_vv_i16mf2(row0, quant0, vl); ++ row1 = __riscv_vmul_vv_i16mf2(row1, quant1, vl); ++ row2 = __riscv_vmul_vv_i16mf2(row2, quant2, vl); ++ row3 = __riscv_vmul_vv_i16mf2(row3, quant3, vl); ++ row4 = __riscv_vmul_vv_i16mf2(row4, quant4, vl); ++ row5 = __riscv_vmul_vv_i16mf2(row5, quant5, vl); ++ row6 = __riscv_vmul_vv_i16mf2(row6, quant6, vl); ++ row7 = __riscv_vmul_vv_i16mf2(row7, quant7, vl); ++ ++ DO_REGULAR_IDCT_0(row); ++ /* early bias addition for better performance */ ++ tmp0 = __riscv_vadd_vx_i32m1(tmp0, bias0, vl); ++ tmp1 = __riscv_vadd_vx_i32m1(tmp1, bias0, vl); ++ DO_REGULAR_IDCT_1(row); ++ } ++ OUTPUT(bm0); ++ STORE_ROW_LOAD_COL(); ++ } ++ ++ /* Second pass: compute IDCT on rows in workspace. */ ++ bit = __riscv_vor_vv_i16mf2(col4, col5, vl); ++ bit = __riscv_vor_vv_i16mf2(bit, col6, vl); ++ bit = __riscv_vor_vv_i16mf2(bit, col7, vl); ++ beq0 = __riscv_vmseq_vx_i16mf2_b32(bit, 0, vl); ++ unsigned long col4567_cpop0 = __riscv_vcpop_m_b32(beq0, vl); ++ ++ if (8 == col4567_cpop0) ++ { ++ DO_SPARSE_IDCT_0(col); ++ tmp0 = __riscv_vadd_vx_i32m1(tmp0, bias1, vl); ++ tmp0 = __riscv_vadd_vx_i32m1(tmp0, CENTERJSAMPLE << bm1, vl); ++ tmp1 = __riscv_vadd_vx_i32m1(tmp1, bias1, vl); ++ tmp1 = __riscv_vadd_vx_i32m1(tmp1, CENTERJSAMPLE << bm1, vl); ++ DO_SPARSE_IDCT_1(col); ++ } ++ else ++ { ++ DO_REGULAR_IDCT_0(col); ++ /* early bias addition for better performance */ ++ tmp0 = __riscv_vadd_vx_i32m1(tmp0, bias1, vl); ++ tmp0 = __riscv_vadd_vx_i32m1(tmp0, CENTERJSAMPLE << bm1, vl); ++ tmp1 = __riscv_vadd_vx_i32m1(tmp1, bias1, vl); ++ tmp1 = __riscv_vadd_vx_i32m1(tmp1, CENTERJSAMPLE << bm1, vl); ++ DO_REGULAR_IDCT_1(col); ++ } ++ OUTPUT(bm1); ++ ++ CLIP(out0, i16mf2); ++ CLIP(out1, i16mf2); ++ CLIP(out2, i16mf2); ++ CLIP(out3, i16mf2); ++ CLIP(out4, i16mf2); ++ CLIP(out5, i16mf2); ++ CLIP(out6, i16mf2); ++ CLIP(out7, i16mf2); ++ ++ dst0 = __riscv_vncvt_x_x_w_i8mf4(out0, vl); ++ dst1 = __riscv_vncvt_x_x_w_i8mf4(out1, vl); ++ dst2 = __riscv_vncvt_x_x_w_i8mf4(out2, vl); ++ dst3 = __riscv_vncvt_x_x_w_i8mf4(out3, vl); ++ dst4 = __riscv_vncvt_x_x_w_i8mf4(out4, vl); ++ dst5 = __riscv_vncvt_x_x_w_i8mf4(out5, vl); ++ dst6 = __riscv_vncvt_x_x_w_i8mf4(out6, vl); ++ dst7 = __riscv_vncvt_x_x_w_i8mf4(out7, vl); ++ ++ /* store columns */ ++ vuint64m2_t vstride = __riscv_vle64_v_u64m2(stride, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 0, vstride, dst0, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 1, vstride, dst1, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 2, vstride, dst2, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 3, vstride, dst3, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 4, vstride, dst4, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 5, vstride, dst5, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 6, vstride, dst6, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 7, vstride, dst7, vl); ++} +\ No newline at end of file +diff --git a/simd/rvv/jidctred-rvv.c b/simd/rvv/jidctred-rvv.c +new file mode 100644 +index 0000000..ddee39b +--- /dev/null ++++ b/simd/rvv/jidctred-rvv.c +@@ -0,0 +1,353 @@ ++/* ++ * jidctred-rvv.c - reduced-size IDCT ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++#define JPEG_INTERNALS ++#include "jsimd_rvv.h" ++ ++#define CONST_BITS 13 ++#define PASS1_BITS 2 ++ ++#define F_0_720 5906 /* 0.720959822 = 5906 * 2^-13 */ ++#define F_0_850 6967 /* 0.850430095 = 6967 * 2^-13 */ ++#define F_1_272 10426 /* 1.272758580 = 10426 * 2^-13 */ ++#define F_3_624 29692 /* 3.624509785 = 29692 * 2^-13 */ ++ ++#define ONE_HALF 1 << (CONST_BITS - 1) ++#define bstride sizeof(JCOEF) ++ ++static const uint8_t index_1155_high[2] = {5 * bstride, 13 * bstride}; ++static const uint8_t index_1155_low[2] = {1 * bstride, 9 * bstride}; ++static const uint8_t index_3377_high[2] = {7 * bstride, 15 * bstride}; ++static const uint8_t index_3377_low[2] = {3 * bstride, 11 * bstride}; ++static const uint8_t index_0246_low[2] = {0 * bstride, 8 * bstride}; ++ ++void jsimd_idct_2x2_rvv(void *dct_table, JCOEFPTR coef_block, ++ JSAMPARRAY output_buf, JDIMENSION output_col) ++{ ++ size_t vl = __riscv_vsetvl_e16mf2(DCTSIZE); ++ size_t vl_2 = vl >> 2; ++ ISLOW_MULT_TYPE *quantptr = dct_table; ++ ++ vuint8mf4_t bindex; ++ vint8mf4_t tmp_l_8, tmp_h_8; ++ vint16mf2_t row0, row1, row3, row5, row7; ++ vint16mf2_t quant_row0, quant_row1, quant_row3, quant_row5, quant_row7; ++ vint16mf2_t cols_1155_low, cols_1155_high, cols_3377_low, cols_3377_high, cols_0246_low; ++ vint16mf2_t tmp_l_16, tmp_h_16; ++ vint32m1_t tmp0, tmp10, tmp, tmp_l, tmp_h; ++ ++ /* Load DCT coefficients. */ ++ row0 = __riscv_vle16_v_i16mf2(coef_block + 0 * DCTSIZE, vl); ++ row1 = __riscv_vle16_v_i16mf2(coef_block + 1 * DCTSIZE, vl); ++ row3 = __riscv_vle16_v_i16mf2(coef_block + 3 * DCTSIZE, vl); ++ row5 = __riscv_vle16_v_i16mf2(coef_block + 5 * DCTSIZE, vl); ++ row7 = __riscv_vle16_v_i16mf2(coef_block + 7 * DCTSIZE, vl); ++ ++ /* Load quantization table values. */ ++ quant_row0 = __riscv_vle16_v_i16mf2(quantptr + 0 * DCTSIZE, vl); ++ quant_row1 = __riscv_vle16_v_i16mf2(quantptr + 1 * DCTSIZE, vl); ++ quant_row3 = __riscv_vle16_v_i16mf2(quantptr + 3 * DCTSIZE, vl); ++ quant_row5 = __riscv_vle16_v_i16mf2(quantptr + 5 * DCTSIZE, vl); ++ quant_row7 = __riscv_vle16_v_i16mf2(quantptr + 7 * DCTSIZE, vl); ++ ++ /* Dequantize DCT coefficients. */ ++ row0 = __riscv_vmul_vv_i16mf2(row0, quant_row0, vl); ++ row1 = __riscv_vmul_vv_i16mf2(row1, quant_row1, vl); ++ row3 = __riscv_vmul_vv_i16mf2(row3, quant_row3, vl); ++ row5 = __riscv_vmul_vv_i16mf2(row5, quant_row5, vl); ++ row7 = __riscv_vmul_vv_i16mf2(row7, quant_row7, vl); ++ ++ /* Pass 1: process columns from input, put results in vectors row0 and row1. */ ++ ++ /* Even part */ ++ tmp10 = __riscv_vwcvt_x_x_v_i32m1(row0, vl); ++ tmp10 = __riscv_vsll_vx_i32m1(tmp10, CONST_BITS + 2, vl); ++ ++ /* Odd part */ ++ tmp0 = __riscv_vwmul_vx_i32m1(row1, F_3_624, vl); ++ tmp0 = __riscv_vwmacc_vx_i32m1(tmp0, -F_1_272, row3, vl); ++ tmp0 = __riscv_vwmacc_vx_i32m1(tmp0, F_0_850, row5, vl); ++ tmp0 = __riscv_vwmacc_vx_i32m1(tmp0, -F_0_720, row7, vl); ++ ++ /* Final output stage: descale and narrow to 16-bit. */ ++ tmp = __riscv_vadd_vv_i32m1(tmp10, tmp0, vl); ++ tmp = __riscv_vadd_vx_i32m1(tmp, ONE_HALF, vl); ++ row0 = __riscv_vnsra_wx_i16mf2(tmp, CONST_BITS, vl); ++ ++ tmp = __riscv_vsub_vv_i32m1(tmp10, tmp0, vl); ++ tmp = __riscv_vadd_vx_i32m1(tmp, ONE_HALF, vl); ++ row1 = __riscv_vnsra_wx_i16mf2(tmp, CONST_BITS, vl); ++ ++ /* Transpose two rows, ready for second pass. */ ++ __riscv_vse16_v_i16mf2(coef_block + 0 * DCTSIZE, row0, vl); ++ __riscv_vse16_v_i16mf2(coef_block + 1 * DCTSIZE, row1, vl); ++ ++ bindex = __riscv_vle8_v_u8mf4(index_1155_low, vl_2); ++ cols_1155_low = __riscv_vloxei8_v_i16mf2(coef_block, bindex, vl_2); ++ bindex = __riscv_vle8_v_u8mf4(index_1155_high, vl_2); ++ cols_1155_high = __riscv_vloxei8_v_i16mf2(coef_block, bindex, vl_2); ++ ++ bindex = __riscv_vle8_v_u8mf4(index_3377_low, vl_2); ++ cols_3377_low = __riscv_vloxei8_v_i16mf2(coef_block, bindex, vl_2); ++ bindex = __riscv_vle8_v_u8mf4(index_3377_high, vl_2); ++ cols_3377_high = __riscv_vloxei8_v_i16mf2(coef_block, bindex, vl_2); ++ ++ bindex = __riscv_vle8_v_u8mf4(index_0246_low, vl_2); ++ cols_0246_low = __riscv_vloxei8_v_i16mf2(coef_block, bindex, vl_2); ++ ++ /* Pass 2: process two rows, store to output array. */ ++ ++ /* Even part: we're only interested in col0; the top half of tmp10 is "don't care." */ ++ tmp10 = __riscv_vwcvt_x_x_v_i32m1(cols_0246_low, vl_2); ++ tmp10 = __riscv_vsll_vx_i32m1(tmp10, CONST_BITS + 2, vl_2); ++ ++ /* Odd part: we're only interested in the bottom half of tmp0. */ ++ tmp0 = __riscv_vwmul_vx_i32m1(cols_1155_low, F_3_624, vl_2); ++ tmp0 = __riscv_vwmacc_vx_i32m1(tmp0, -F_1_272, cols_3377_low, vl_2); ++ tmp0 = __riscv_vwmacc_vx_i32m1(tmp0, F_0_850, cols_1155_high, vl_2); ++ tmp0 = __riscv_vwmacc_vx_i32m1(tmp0, -F_0_720, cols_3377_high, vl_2); ++ ++ /* Final output stage: descale and clamp to range [0-255]. */ ++ tmp_l = __riscv_vadd_vv_i32m1(tmp10, tmp0, vl_2); ++ tmp_h = __riscv_vsub_vv_i32m1(tmp10, tmp0, vl_2); ++ ++ tmp_l = __riscv_vadd_vx_i32m1(tmp_l, (1 << (CONST_BITS + PASS1_BITS + 3 + 2 - 1)) + (CENTERJSAMPLE << (CONST_BITS + PASS1_BITS + 3 + 2)), vl_2); ++ tmp_h = __riscv_vadd_vx_i32m1(tmp_h, (1 << (CONST_BITS + PASS1_BITS + 3 + 2 - 1)) + (CENTERJSAMPLE << (CONST_BITS + PASS1_BITS + 3 + 2)), vl_2); ++ ++ tmp_l_16 = __riscv_vnsra_wx_i16mf2(tmp_l, CONST_BITS + PASS1_BITS + 3 + 2, vl_2); ++ tmp_h_16 = __riscv_vnsra_wx_i16mf2(tmp_h, CONST_BITS + PASS1_BITS + 3 + 2, vl_2); ++ ++ CLIP(tmp_l_16, i16mf2); ++ CLIP(tmp_h_16, i16mf2); ++ ++ tmp_l_8 = __riscv_vncvt_x_x_w_i8mf4(tmp_l_16, vl_2); ++ tmp_h_8 = __riscv_vncvt_x_x_w_i8mf4(tmp_h_16, vl_2); ++ ++ __riscv_vsse8_v_i8mf4(output_buf[0] + output_col, (output_buf[1] - output_buf[0]), tmp_l_8, vl_2); ++ __riscv_vsse8_v_i8mf4(output_buf[0] + output_col + 1, (output_buf[1] - output_buf[0]), tmp_h_8, vl_2); ++} ++ ++#define F_1_847 15137 /* 1.847759065 = 15137 * 2^-13 */ ++#define F_0_765 6270 /* 0.765366865 = 6270 * 2^-13 */ ++#define F_0_211 1730 /* 0.211164243 = 1730 * 2^-13 */ ++#define F_1_451 11893 /* 1.451774981 = 11893 * 2^-13 */ ++#define F_2_172 17799 /* 2.172734803 = 17799 * 2^-13 */ ++#define F_1_061 8697 /* 1.061594337 = 8697 * 2^-13 */ ++#define F_0_509 4176 /* 0.509795579 = 4176 * 2^-13 */ ++#define F_0_601 4926 /* 0.601344887 = 4926 * 2^-13 */ ++#define F_0_899 7373 /* 0.899976223 = 7373 * 2^-13 */ ++#define F_2_562 20995 /* 2.562915447 = 20995 * 2^-13 */ ++ ++void jsimd_idct_4x4_rvv(void *dct_table, JCOEFPTR coef_block, ++ JSAMPARRAY output_buf, JDIMENSION output_col) ++{ ++ int col_stride = DCTSIZE * sizeof(DCTELEM); ++ size_t stride[4]; ++ size_t vl = __riscv_vsetvl_e16mf2(DCTSIZE); ++ size_t vl_2 = vl >> 1; ++ DCTELEM workspace[DCTSIZE2 / 2]; ++ ISLOW_MULT_TYPE *quantptr = dct_table; ++ JSAMPROW output_row = output_buf[0] + output_col; ++ ++ stride[0] = 0; ++ for (int i = 1; i < 4; ++i) ++ { ++ stride[i] = output_buf[i] - output_buf[0]; ++ } ++ ++ vbool32_t beq0; ++ vint8mf4_t cols_02_low_8, cols_02_high_8, cols_13_low_8, cols_13_high_8; ++ vint16mf2_t bit; ++ vint16mf2_t z1, z2, z3, z4; ++ vint16mf2_t row0, row1, row2, row3, row5, row6, row7; ++ vint16mf2_t col0, col1, col2, col3, col5, col6, col7; ++ vint16mf2_t quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7; ++ vint16mf2_t cols_02_low_16, cols_02_high_16, cols_13_low_16, cols_13_high_16; ++ vint32m1_t o0, o1, o2, o3, o4, o5, o6, o7; ++ vint32m1_t tmp0, tmp2, tmp10, tmp12; ++ vint32m1_t cols_02_low_32, cols_02_high_32, cols_13_low_32, cols_13_high_32; ++ ++ /* Load DCT coefficients. */ ++ row0 = __riscv_vle16_v_i16mf2(coef_block + 0 * DCTSIZE, vl); ++ row1 = __riscv_vle16_v_i16mf2(coef_block + 1 * DCTSIZE, vl); ++ row2 = __riscv_vle16_v_i16mf2(coef_block + 2 * DCTSIZE, vl); ++ row3 = __riscv_vle16_v_i16mf2(coef_block + 3 * DCTSIZE, vl); ++ row5 = __riscv_vle16_v_i16mf2(coef_block + 5 * DCTSIZE, vl); ++ row6 = __riscv_vle16_v_i16mf2(coef_block + 6 * DCTSIZE, vl); ++ row7 = __riscv_vle16_v_i16mf2(coef_block + 7 * DCTSIZE, vl); ++ ++ bit = __riscv_vor_vv_i16mf2(row1, row2, vl); ++ bit = __riscv_vor_vv_i16mf2(bit, row3, vl); ++ bit = __riscv_vor_vv_i16mf2(bit, row5, vl); ++ bit = __riscv_vor_vv_i16mf2(bit, row6, vl); ++ bit = __riscv_vor_vv_i16mf2(bit, row7, vl); ++ beq0 = __riscv_vmseq_vx_i16mf2_b32(bit, 0, vl); ++ unsigned long row1234567_cpop0 = __riscv_vcpop_m_b32(beq0, vl); ++ ++ if (8 == row1234567_cpop0) ++ { ++ /* Load quantization table values for DC coefficients. */ ++ quant0 = __riscv_vle16_v_i16mf2(quantptr + 0 * DCTSIZE, vl); ++ row0 = __riscv_vmul_vv_i16mf2(row0, quant0, vl); ++ row0 = __riscv_vsll_vx_i16mf2(row0, PASS1_BITS, vl); ++ row1 = row0; ++ row2 = row0; ++ row3 = row0; ++ ++ /* Store rows */ ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 0, row0, vl); ++ ++ col0 = __riscv_vmv_v_x_i16mf2(workspace[0], vl); ++ col1 = __riscv_vmv_v_x_i16mf2(workspace[1], vl); ++ col2 = __riscv_vmv_v_x_i16mf2(workspace[2], vl); ++ col3 = __riscv_vmv_v_x_i16mf2(workspace[3], vl); ++ col5 = __riscv_vmv_v_x_i16mf2(workspace[5], vl); ++ col6 = __riscv_vmv_v_x_i16mf2(workspace[6], vl); ++ col7 = __riscv_vmv_v_x_i16mf2(workspace[7], vl); ++ } ++ else ++ { ++ quant0 = __riscv_vle16_v_i16mf2(quantptr + 0 * DCTSIZE, vl); ++ quant1 = __riscv_vle16_v_i16mf2(quantptr + 1 * DCTSIZE, vl); ++ quant2 = __riscv_vle16_v_i16mf2(quantptr + 2 * DCTSIZE, vl); ++ quant3 = __riscv_vle16_v_i16mf2(quantptr + 3 * DCTSIZE, vl); ++ quant5 = __riscv_vle16_v_i16mf2(quantptr + 5 * DCTSIZE, vl); ++ quant6 = __riscv_vle16_v_i16mf2(quantptr + 6 * DCTSIZE, vl); ++ quant7 = __riscv_vle16_v_i16mf2(quantptr + 7 * DCTSIZE, vl); ++ ++ /* Even part */ ++ tmp0 = __riscv_vwmul_vv_i32m1(row0, quant0, vl); ++ tmp0 = __riscv_vsll_vx_i32m1(tmp0, CONST_BITS + 1, vl); ++ ++ z2 = __riscv_vmul_vv_i16mf2(row2, quant2, vl); ++ z3 = __riscv_vmul_vv_i16mf2(row6, quant6, vl); ++ ++ tmp2 = __riscv_vwmul_vx_i32m1(z2, F_1_847, vl); ++ tmp2 = __riscv_vwmacc_vx_i32m1(tmp2, -F_0_765, z3, vl); ++ ++ tmp10 = __riscv_vadd_vv_i32m1(tmp0, tmp2, vl); ++ tmp12 = __riscv_vsub_vv_i32m1(tmp0, tmp2, vl); ++ ++ /* Odd part */ ++ z1 = __riscv_vmul_vv_i16mf2(row7, quant7, vl); ++ z2 = __riscv_vmul_vv_i16mf2(row5, quant5, vl); ++ z3 = __riscv_vmul_vv_i16mf2(row3, quant3, vl); ++ z4 = __riscv_vmul_vv_i16mf2(row1, quant1, vl); ++ ++ tmp0 = __riscv_vwmul_vx_i32m1(z1, -F_0_211, vl); ++ tmp0 = __riscv_vwmacc_vx_i32m1(tmp0, F_1_451, z2, vl); ++ tmp0 = __riscv_vwmacc_vx_i32m1(tmp0, -F_2_172, z3, vl); ++ tmp0 = __riscv_vwmacc_vx_i32m1(tmp0, F_1_061, z4, vl); ++ ++ tmp2 = __riscv_vwmul_vx_i32m1(z1, -F_0_509, vl); ++ tmp2 = __riscv_vwmacc_vx_i32m1(tmp2, -F_0_601, z2, vl); ++ tmp2 = __riscv_vwmacc_vx_i32m1(tmp2, F_0_899, z3, vl); ++ tmp2 = __riscv_vwmacc_vx_i32m1(tmp2, F_2_562, z4, vl); ++ ++ /* Final output stage: descale and narrow to 16-bit. */ ++ o0 = __riscv_vadd_vv_i32m1(tmp10, tmp2, vl); ++ o3 = __riscv_vsub_vv_i32m1(tmp10, tmp2, vl); ++ o1 = __riscv_vadd_vv_i32m1(tmp12, tmp0, vl); ++ o2 = __riscv_vsub_vv_i32m1(tmp12, tmp0, vl); ++ ++ o0 = __riscv_vadd_vx_i32m1(o0, 1 << CONST_BITS - PASS1_BITS, vl); ++ o3 = __riscv_vadd_vx_i32m1(o3, 1 << CONST_BITS - PASS1_BITS, vl); ++ o1 = __riscv_vadd_vx_i32m1(o1, 1 << CONST_BITS - PASS1_BITS, vl); ++ o2 = __riscv_vadd_vx_i32m1(o2, 1 << CONST_BITS - PASS1_BITS, vl); ++ ++ row0 = __riscv_vnsra_wx_i16mf2(o0, CONST_BITS - PASS1_BITS + 1, vl); ++ row3 = __riscv_vnsra_wx_i16mf2(o3, CONST_BITS - PASS1_BITS + 1, vl); ++ row1 = __riscv_vnsra_wx_i16mf2(o1, CONST_BITS - PASS1_BITS + 1, vl); ++ row2 = __riscv_vnsra_wx_i16mf2(o2, CONST_BITS - PASS1_BITS + 1, vl); ++ ++ /* Transpose 8x4 block to perform IDCT on rows in second pass. */ ++ /* Store rows */ ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 0, row0, vl); ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 1, row1, vl); ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 2, row2, vl); ++ __riscv_vse16_v_i16mf2(workspace + DCTSIZE * 3, row3, vl); ++ ++ col0 = __riscv_vlse16_v_i16mf2(workspace + 0, col_stride, vl_2); ++ col1 = __riscv_vlse16_v_i16mf2(workspace + 1, col_stride, vl_2); ++ col2 = __riscv_vlse16_v_i16mf2(workspace + 2, col_stride, vl_2); ++ col3 = __riscv_vlse16_v_i16mf2(workspace + 3, col_stride, vl_2); ++ col5 = __riscv_vlse16_v_i16mf2(workspace + 5, col_stride, vl_2); ++ col6 = __riscv_vlse16_v_i16mf2(workspace + 6, col_stride, vl_2); ++ col7 = __riscv_vlse16_v_i16mf2(workspace + 7, col_stride, vl_2); ++ } ++ ++ /* Commence second pass of IDCT. */ ++ ++ /* Even part */ ++ tmp0 = __riscv_vwcvt_x_x_v_i32m1(col0, vl_2); ++ tmp0 = __riscv_vsll_vx_i32m1(tmp0, CONST_BITS + 1, vl_2); ++ ++ tmp2 = __riscv_vwmul_vx_i32m1(col2, F_1_847, vl_2); ++ tmp2 = __riscv_vwmacc_vx_i32m1(tmp2, -F_0_765, col6, vl_2); ++ ++ tmp10 = __riscv_vadd_vv_i32m1(tmp0, tmp2, vl_2); ++ tmp12 = __riscv_vsub_vv_i32m1(tmp0, tmp2, vl_2); ++ ++ /* Odd part */ ++ tmp0 = __riscv_vwmul_vx_i32m1(col7, -F_0_211, vl_2); ++ tmp0 = __riscv_vwmacc_vx_i32m1(tmp0, F_1_451, col5, vl_2); ++ tmp0 = __riscv_vwmacc_vx_i32m1(tmp0, -F_2_172, col3, vl_2); ++ tmp0 = __riscv_vwmacc_vx_i32m1(tmp0, F_1_061, col1, vl_2); ++ ++ tmp2 = __riscv_vwmul_vx_i32m1(col7, -F_0_509, vl_2); ++ tmp2 = __riscv_vwmacc_vx_i32m1(tmp2, -F_0_601, col5, vl_2); ++ tmp2 = __riscv_vwmacc_vx_i32m1(tmp2, F_0_899, col3, vl_2); ++ tmp2 = __riscv_vwmacc_vx_i32m1(tmp2, F_2_562, col1, vl_2); ++ ++ /* Final output stage: descale and clamp to range [0-255]. */ ++ cols_02_low_32 = __riscv_vadd_vv_i32m1(tmp10, tmp2, vl_2); ++ cols_02_high_32 = __riscv_vsub_vv_i32m1(tmp12, tmp0, vl_2); ++ cols_13_low_32 = __riscv_vadd_vv_i32m1(tmp12, tmp0, vl_2); ++ cols_13_high_32 = __riscv_vsub_vv_i32m1(tmp10, tmp2, vl_2); ++ ++ cols_02_low_32 = __riscv_vadd_vx_i32m1(cols_02_low_32, (1 << (CONST_BITS + PASS1_BITS + 3)) + (CENTERJSAMPLE << (CONST_BITS + PASS1_BITS + 3 + 1)), vl_2); ++ cols_02_high_32 = __riscv_vadd_vx_i32m1(cols_02_high_32, (1 << (CONST_BITS + PASS1_BITS + 3)) + (CENTERJSAMPLE << (CONST_BITS + PASS1_BITS + 3 + 1)), vl_2); ++ cols_13_low_32 = __riscv_vadd_vx_i32m1(cols_13_low_32, (1 << (CONST_BITS + PASS1_BITS + 3)) + (CENTERJSAMPLE << (CONST_BITS + PASS1_BITS + 3 + 1)), vl_2); ++ cols_13_high_32 = __riscv_vadd_vx_i32m1(cols_13_high_32, (1 << (CONST_BITS + PASS1_BITS + 3)) + (CENTERJSAMPLE << (CONST_BITS + PASS1_BITS + 3 + 1)), vl_2); ++ ++ cols_02_low_16 = __riscv_vnsra_wx_i16mf2(cols_02_low_32, CONST_BITS + PASS1_BITS + 3 + 1, vl_2); ++ cols_02_high_16 = __riscv_vnsra_wx_i16mf2(cols_02_high_32, CONST_BITS + PASS1_BITS + 3 + 1, vl_2); ++ cols_13_low_16 = __riscv_vnsra_wx_i16mf2(cols_13_low_32, CONST_BITS + PASS1_BITS + 3 + 1, vl_2); ++ cols_13_high_16 = __riscv_vnsra_wx_i16mf2(cols_13_high_32, CONST_BITS + PASS1_BITS + 3 + 1, vl_2); ++ ++ CLIP(cols_02_low_16, i16mf2); ++ CLIP(cols_02_high_16, i16mf2); ++ CLIP(cols_13_low_16, i16mf2); ++ CLIP(cols_13_high_16, i16mf2); ++ ++ cols_02_low_8 = __riscv_vncvt_x_x_w_i8mf4(cols_02_low_16, vl_2); ++ cols_02_high_8 = __riscv_vncvt_x_x_w_i8mf4(cols_02_high_16, vl_2); ++ cols_13_low_8 = __riscv_vncvt_x_x_w_i8mf4(cols_13_low_16, vl_2); ++ cols_13_high_8 = __riscv_vncvt_x_x_w_i8mf4(cols_13_high_16, vl_2); ++ ++ vuint64m2_t vstride = __riscv_vle64_v_u64m2(stride, vl); ++ __riscv_vsoxei64_v_i8mf4(output_row + 0, vstride, cols_02_low_8, vl_2); ++ __riscv_vsoxei64_v_i8mf4(output_row + 1, vstride, cols_13_low_8, vl_2); ++ __riscv_vsoxei64_v_i8mf4(output_row + 2, vstride, cols_02_high_8, vl_2); ++ __riscv_vsoxei64_v_i8mf4(output_row + 3, vstride, cols_13_high_8, vl_2); ++} +\ No newline at end of file +diff --git a/simd/rvv/jquanti-rvv.c b/simd/rvv/jquanti-rvv.c +new file mode 100644 +index 0000000..69b2c20 +--- /dev/null ++++ b/simd/rvv/jquanti-rvv.c +@@ -0,0 +1,132 @@ ++/* ++ * jquanti-rvv.c - sample data conversion and quantization ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++/* INTEGER QUANTIZATION AND SAMPLE CONVERSION */ ++ ++#define JPEG_INTERNALS ++#include "jsimd_rvv.h" ++ ++void jsimd_convsamp_rvv(JSAMPARRAY sample_data, JDIMENSION start_col, ++ DCTELEM *workspace) ++{ ++ size_t vl = __riscv_vsetvl_e16mf2(DCTSIZE); ++ ++ vuint8mf4_t in0, in1, in2, in3, in4, in5, in6, in7; ++ vuint16mf2_t row0, row1, row2, row3, row4, row5, row6, row7; ++ vint16mf2_t out0, out1, out2, out3, out4, out5, out6, out7; ++ ++ in0 = __riscv_vle8_v_u8mf4(sample_data[0] + start_col, vl); ++ in1 = __riscv_vle8_v_u8mf4(sample_data[1] + start_col, vl); ++ in2 = __riscv_vle8_v_u8mf4(sample_data[2] + start_col, vl); ++ in3 = __riscv_vle8_v_u8mf4(sample_data[3] + start_col, vl); ++ in4 = __riscv_vle8_v_u8mf4(sample_data[4] + start_col, vl); ++ in5 = __riscv_vle8_v_u8mf4(sample_data[5] + start_col, vl); ++ in6 = __riscv_vle8_v_u8mf4(sample_data[6] + start_col, vl); ++ in7 = __riscv_vle8_v_u8mf4(sample_data[7] + start_col, vl); ++ ++ row0 = __riscv_vwcvtu_x_x_v_u16mf2(in0, vl); ++ row1 = __riscv_vwcvtu_x_x_v_u16mf2(in1, vl); ++ row2 = __riscv_vwcvtu_x_x_v_u16mf2(in2, vl); ++ row3 = __riscv_vwcvtu_x_x_v_u16mf2(in3, vl); ++ row4 = __riscv_vwcvtu_x_x_v_u16mf2(in4, vl); ++ row5 = __riscv_vwcvtu_x_x_v_u16mf2(in5, vl); ++ row6 = __riscv_vwcvtu_x_x_v_u16mf2(in6, vl); ++ row7 = __riscv_vwcvtu_x_x_v_u16mf2(in7, vl); ++ ++ out0 = __riscv_vreinterpret_v_u16mf2_i16mf2(row0); ++ out1 = __riscv_vreinterpret_v_u16mf2_i16mf2(row1); ++ out2 = __riscv_vreinterpret_v_u16mf2_i16mf2(row2); ++ out3 = __riscv_vreinterpret_v_u16mf2_i16mf2(row3); ++ out4 = __riscv_vreinterpret_v_u16mf2_i16mf2(row4); ++ out5 = __riscv_vreinterpret_v_u16mf2_i16mf2(row5); ++ out6 = __riscv_vreinterpret_v_u16mf2_i16mf2(row6); ++ out7 = __riscv_vreinterpret_v_u16mf2_i16mf2(row7); ++ ++ out0 = __riscv_vsub_vx_i16mf2(out0, CENTERJSAMPLE, vl); ++ out1 = __riscv_vsub_vx_i16mf2(out1, CENTERJSAMPLE, vl); ++ out2 = __riscv_vsub_vx_i16mf2(out2, CENTERJSAMPLE, vl); ++ out3 = __riscv_vsub_vx_i16mf2(out3, CENTERJSAMPLE, vl); ++ out4 = __riscv_vsub_vx_i16mf2(out4, CENTERJSAMPLE, vl); ++ out5 = __riscv_vsub_vx_i16mf2(out5, CENTERJSAMPLE, vl); ++ out6 = __riscv_vsub_vx_i16mf2(out6, CENTERJSAMPLE, vl); ++ out7 = __riscv_vsub_vx_i16mf2(out7, CENTERJSAMPLE, vl); ++ ++ __riscv_vse16_v_i16mf2(workspace + 0 * DCTSIZE, out0, vl); ++ __riscv_vse16_v_i16mf2(workspace + 1 * DCTSIZE, out1, vl); ++ __riscv_vse16_v_i16mf2(workspace + 2 * DCTSIZE, out2, vl); ++ __riscv_vse16_v_i16mf2(workspace + 3 * DCTSIZE, out3, vl); ++ __riscv_vse16_v_i16mf2(workspace + 4 * DCTSIZE, out4, vl); ++ __riscv_vse16_v_i16mf2(workspace + 5 * DCTSIZE, out5, vl); ++ __riscv_vse16_v_i16mf2(workspace + 6 * DCTSIZE, out6, vl); ++ __riscv_vse16_v_i16mf2(workspace + 7 * DCTSIZE, out7, vl); ++} ++ ++void jsimd_quantize_rvv(JCOEFPTR coef_block, DCTELEM *divisors, ++ DCTELEM *workspace) ++{ ++ int cols_remaining; ++ size_t vl; ++ JCOEFPTR out_ptr = coef_block; ++ DCTELEM *in_ptr = workspace; ++ DCTELEM *shift_ptr = divisors + 3 * DCTSIZE2; ++ UDCTELEM *recip_ptr = (UDCTELEM *)divisors; ++ UDCTELEM *corr_ptr = (UDCTELEM *)divisors + DCTSIZE2; ++ ++ vbool4_t mask; ++ vint16m4_t out, shift; ++ vuint16m4_t temp, recip, corr, ushift; ++ vuint32m8_t product; ++ ++ for (cols_remaining = DCTSIZE2; cols_remaining > 0; cols_remaining -= vl) ++ { ++ /* Set vl for each iteration. */ ++ vl = __riscv_vsetvl_e16m4(cols_remaining); ++ ++ /* Load needed variables. */ ++ out = __riscv_vle16_v_i16m4(in_ptr, vl); ++ recip = __riscv_vle16_v_u16m4(recip_ptr, vl); ++ corr = __riscv_vle16_v_u16m4(corr_ptr, vl); ++ shift = __riscv_vle16_v_i16m4(shift_ptr, vl); ++ ++ /* Mask set to 1 where elements are negative. */ ++ mask = __riscv_vmslt_vx_i16m4_b4(out, 0, vl); ++ out = __riscv_vneg_v_i16m4_m(mask, out, vl); ++ temp = __riscv_vreinterpret_v_i16m4_u16m4(out); ++ ++ temp = __riscv_vadd_vv_u16m4(temp, corr, vl); ++ product = __riscv_vwmulu_vv_u32m8(temp, recip, vl); ++ shift = __riscv_vadd_vx_i16m4(shift, sizeof(DCTELEM) * 8, vl); ++ ushift = __riscv_vreinterpret_v_i16m4_u16m4(shift); ++ temp = __riscv_vnsrl_wv_u16m4(product, ushift, vl); ++ ++ out = __riscv_vreinterpret_v_u16m4_i16m4(temp); ++ out = __riscv_vneg_v_i16m4_m(mask, out, vl); ++ __riscv_vse16_v_i16m4(out_ptr, out, vl); ++ ++ in_ptr += vl; ++ out_ptr += vl; ++ recip_ptr += vl; ++ corr_ptr += vl; ++ shift_ptr += vl; ++ } ++} +\ No newline at end of file +diff --git a/simd/rvv/jsimd.c b/simd/rvv/jsimd.c +new file mode 100644 +index 0000000..9277e76 +--- /dev/null ++++ b/simd/rvv/jsimd.c +@@ -0,0 +1,940 @@ ++/* ++ * jsimd_rvv.c ++ * ++ * Copyright 2009 Pierre Ossman for Cendio AB ++ * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies). ++ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander. ++ * Copyright (C) 2015-2016, 2018, Matthieu Darbois. ++ * Copyright (C) 2020, Arm Limited. ++ * Copyright (C) 2023, Spacemit, Inc. Liang Junzhao. ++ * ++ * Based on the x86 SIMD extension for IJG JPEG library, ++ * Copyright (C) 1999-2006, MIYASAKA Masaru. ++ * For conditions of distribution and use, see copyright notice in jsimdext.inc ++ * ++ * This file contains the interface between the "normal" portions ++ * of the library and the RVV implementations when running on a risc-v architecture. ++ */ ++ ++#define JPEG_INTERNALS ++#include "../../jinclude.h" ++#include "../../jpeglib.h" ++#include "../../jsimd.h" ++#include "../../jdct.h" ++#include "../../jsimddct.h" ++#include "../jsimd.h" ++ ++static unsigned int simd_support = ~0; ++ ++#if defined(__linux__) ++ ++#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) ++/* TODO: Check rvv intrinsic support by examine /proc/cpuinfo */ ++ ++#endif ++ ++/* RVV SIMD support is always available during test. */ ++/* ++ * Check what SIMD accelerations are supported. ++ * ++ * FIXME: This code is racy under a multi-threaded environment. ++ */ ++LOCAL(void) ++init_simd(void) ++{ ++#ifndef NO_GETENV ++ char *env = NULL; ++#endif ++#if defined(__linux__) ++ int bufsize = 1024; /* an initial guess for the line buffer size limit */ ++#endif ++ ++ if (simd_support != ~0U) ++ return; ++ ++ simd_support = 0; ++ simd_support |= JSIMD_RVV; ++ ++#ifndef NO_GETENV ++ /* Force different settings through environment variables */ ++ env = getenv("JSIMD_FORCERVV"); ++ if ((env != NULL) && (strcmp(env, "1") == 0)) ++ simd_support = JSIMD_RVV; ++ env = getenv("JSIMD_FORCENONE"); ++ if ((env != NULL) && (strcmp(env, "1") == 0)) ++ simd_support = 0; ++#endif ++} ++ ++//1 ++GLOBAL(int) ++jsimd_can_rgb_ycc(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++//2 ++GLOBAL(int) ++jsimd_can_rgb_gray(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++//5 ++GLOBAL(int) ++jsimd_can_ycc_rgb(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++GLOBAL(int) ++jsimd_can_ycc_rgb565(void) ++{ ++ return 0; ++} ++ ++GLOBAL(int) ++jsimd_c_can_null_convert(void) ++{ ++ return 0; ++} ++ ++GLOBAL(void) ++jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, ++ JSAMPIMAGE output_buf, JDIMENSION output_row, ++ int num_rows) ++{ ++ void (*rvvfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); ++ ++ switch (cinfo->in_color_space) ++ { ++ case JCS_EXT_RGB: ++ rvvfct = jsimd_extrgb_ycc_convert_rvv; ++ break; ++ case JCS_EXT_RGBX: ++ case JCS_EXT_RGBA: ++ rvvfct = jsimd_extrgbx_ycc_convert_rvv; ++ break; ++ case JCS_EXT_BGR: ++ rvvfct = jsimd_extbgr_ycc_convert_rvv; ++ break; ++ case JCS_EXT_BGRX: ++ case JCS_EXT_BGRA: ++ rvvfct = jsimd_extbgrx_ycc_convert_rvv; ++ break; ++ case JCS_EXT_XBGR: ++ case JCS_EXT_ABGR: ++ rvvfct = jsimd_extxbgr_ycc_convert_rvv; ++ break; ++ case JCS_EXT_XRGB: ++ case JCS_EXT_ARGB: ++ rvvfct = jsimd_extxrgb_ycc_convert_rvv; ++ break; ++ default: ++ rvvfct = jsimd_rgb_ycc_convert_rvv; ++ break; ++ } ++ ++ rvvfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); ++} ++ ++GLOBAL(void) ++jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, ++ JSAMPIMAGE output_buf, JDIMENSION output_row, ++ int num_rows) ++{ ++ void (*rvvfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); ++ ++ switch (cinfo->in_color_space) ++ { ++ case JCS_EXT_RGB: ++ rvvfct = jsimd_extrgb_gray_convert_rvv; ++ break; ++ case JCS_EXT_RGBX: ++ case JCS_EXT_RGBA: ++ rvvfct = jsimd_extrgbx_gray_convert_rvv; ++ break; ++ case JCS_EXT_BGR: ++ rvvfct = jsimd_extbgr_gray_convert_rvv; ++ break; ++ case JCS_EXT_BGRX: ++ case JCS_EXT_BGRA: ++ rvvfct = jsimd_extbgrx_gray_convert_rvv; ++ break; ++ case JCS_EXT_XBGR: ++ case JCS_EXT_ABGR: ++ rvvfct = jsimd_extxbgr_gray_convert_rvv; ++ break; ++ case JCS_EXT_XRGB: ++ case JCS_EXT_ARGB: ++ rvvfct = jsimd_extxrgb_gray_convert_rvv; ++ break; ++ default: ++ rvvfct = jsimd_rgb_gray_convert_rvv; ++ break; ++ } ++ ++ rvvfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); ++} ++ ++GLOBAL(void) ++jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, ++ JDIMENSION input_row, JSAMPARRAY output_buf, ++ int num_rows) ++{ ++ void (*rvvfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); ++ ++ switch (cinfo->out_color_space) ++ { ++ case JCS_EXT_RGB: ++ rvvfct = jsimd_ycc_extrgb_convert_rvv; ++ break; ++ case JCS_EXT_RGBX: ++ case JCS_EXT_RGBA: ++ rvvfct = jsimd_ycc_extrgbx_convert_rvv; ++ break; ++ case JCS_EXT_BGR: ++ rvvfct = jsimd_ycc_extbgr_convert_rvv; ++ break; ++ case JCS_EXT_BGRX: ++ case JCS_EXT_BGRA: ++ rvvfct = jsimd_ycc_extbgrx_convert_rvv; ++ break; ++ case JCS_EXT_XBGR: ++ case JCS_EXT_ABGR: ++ rvvfct = jsimd_ycc_extxbgr_convert_rvv; ++ break; ++ case JCS_EXT_XRGB: ++ case JCS_EXT_ARGB: ++ rvvfct = jsimd_ycc_extxrgb_convert_rvv; ++ break; ++ default: ++ rvvfct = jsimd_ycc_rgb_convert_rvv; ++ break; ++ } ++ ++ rvvfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); ++} ++ ++GLOBAL(void) ++jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, ++ JDIMENSION input_row, JSAMPARRAY output_buf, ++ int num_rows) ++{ ++} ++ ++GLOBAL(void) ++jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, ++ JSAMPIMAGE output_buf, JDIMENSION output_row, ++ int num_rows) ++{ ++} ++ ++//4 ++GLOBAL(int) ++jsimd_can_h2v2_downsample(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++GLOBAL(int) ++jsimd_can_h2v2_smooth_downsample(void) ++{ ++ return 0; ++} ++ ++//3 ++GLOBAL(int) ++jsimd_can_h2v1_downsample(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++GLOBAL(void) ++jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, ++ JSAMPARRAY input_data, JSAMPARRAY output_data) ++{ ++ jsimd_h2v2_downsample_rvv(cinfo->image_width, cinfo->max_v_samp_factor, ++ compptr->v_samp_factor, compptr->width_in_blocks, ++ input_data, output_data); ++} ++ ++GLOBAL(void) ++jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo, ++ jpeg_component_info *compptr, ++ JSAMPARRAY input_data, JSAMPARRAY output_data) ++{ ++} ++ ++GLOBAL(void) ++jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, ++ JSAMPARRAY input_data, JSAMPARRAY output_data) ++{ ++ jsimd_h2v1_downsample_rvv(cinfo->image_width, cinfo->max_v_samp_factor, ++ compptr->v_samp_factor, compptr->width_in_blocks, ++ input_data, output_data); ++} ++ ++//11 ++GLOBAL(int) ++jsimd_can_h2v2_upsample(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++//10 ++GLOBAL(int) ++jsimd_can_h2v1_upsample(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++GLOBAL(int) ++jsimd_can_int_upsample(void) ++{ ++ return 0; ++} ++ ++GLOBAL(void) ++jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, ++ JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) ++{ ++ jsimd_h2v2_upsample_rvv(cinfo->max_v_samp_factor, ++ cinfo->output_width, input_data, ++ output_data_ptr); ++} ++ ++GLOBAL(void) ++jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, ++ JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) ++{ ++ jsimd_h2v1_upsample_rvv(cinfo->max_v_samp_factor, ++ cinfo->output_width, input_data, ++ output_data_ptr); ++} ++ ++GLOBAL(void) ++jsimd_int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, ++ JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) ++{ ++} ++ ++//8 ++GLOBAL(int) ++jsimd_can_h2v2_fancy_upsample(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++//7 ++GLOBAL(int) ++jsimd_can_h2v1_fancy_upsample(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++//9 ++GLOBAL(int) ++jsimd_can_h1v2_fancy_upsample(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++GLOBAL(void) ++jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, ++ JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) ++{ ++ jsimd_h2v2_fancy_upsample_rvv(cinfo->max_v_samp_factor, ++ compptr->downsampled_width, input_data, ++ output_data_ptr); ++} ++ ++GLOBAL(void) ++jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, ++ JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) ++{ ++ jsimd_h2v1_fancy_upsample_rvv(cinfo->max_v_samp_factor, ++ compptr->downsampled_width, input_data, ++ output_data_ptr); ++} ++ ++GLOBAL(void) ++jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, ++ JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) ++{ ++ jsimd_h1v2_fancy_upsample_rvv(cinfo->max_v_samp_factor, ++ compptr->downsampled_width, input_data, ++ output_data_ptr); ++} ++ ++GLOBAL(int) ++jsimd_can_h2v2_merged_upsample(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++//6 ++GLOBAL(int) ++jsimd_can_h2v1_merged_upsample(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++GLOBAL(void) ++jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, ++ JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) ++{ ++ void (*rvvfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); ++ ++ switch (cinfo->out_color_space) ++ { ++ case JCS_EXT_RGB: ++ rvvfct = jsimd_h2v2_extrgb_merged_upsample_rvv; ++ break; ++ case JCS_EXT_RGBX: ++ case JCS_EXT_RGBA: ++ rvvfct = jsimd_h2v2_extrgbx_merged_upsample_rvv; ++ break; ++ case JCS_EXT_BGR: ++ rvvfct = jsimd_h2v2_extbgr_merged_upsample_rvv; ++ break; ++ case JCS_EXT_BGRX: ++ case JCS_EXT_BGRA: ++ rvvfct = jsimd_h2v2_extbgrx_merged_upsample_rvv; ++ break; ++ case JCS_EXT_XBGR: ++ case JCS_EXT_ABGR: ++ rvvfct = jsimd_h2v2_extxbgr_merged_upsample_rvv; ++ break; ++ case JCS_EXT_XRGB: ++ case JCS_EXT_ARGB: ++ rvvfct = jsimd_h2v2_extxrgb_merged_upsample_rvv; ++ break; ++ default: ++ rvvfct = jsimd_h2v2_merged_upsample_rvv; ++ break; ++ } ++ ++ rvvfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); ++} ++ ++//6 ++GLOBAL(void) ++jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, ++ JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) ++{ ++ void (*rvvfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); ++ ++ switch (cinfo->out_color_space) ++ { ++ case JCS_EXT_RGB: ++ rvvfct = jsimd_h2v1_extrgb_merged_upsample_rvv; ++ break; ++ case JCS_EXT_RGBX: ++ case JCS_EXT_RGBA: ++ rvvfct = jsimd_h2v1_extrgbx_merged_upsample_rvv; ++ break; ++ case JCS_EXT_BGR: ++ rvvfct = jsimd_h2v1_extbgr_merged_upsample_rvv; ++ break; ++ case JCS_EXT_BGRX: ++ case JCS_EXT_BGRA: ++ rvvfct = jsimd_h2v1_extbgrx_merged_upsample_rvv; ++ break; ++ case JCS_EXT_XBGR: ++ case JCS_EXT_ABGR: ++ rvvfct = jsimd_h2v1_extxbgr_merged_upsample_rvv; ++ break; ++ case JCS_EXT_XRGB: ++ case JCS_EXT_ARGB: ++ rvvfct = jsimd_h2v1_extxrgb_merged_upsample_rvv; ++ break; ++ default: ++ rvvfct = jsimd_h2v1_merged_upsample_rvv; ++ break; ++ } ++ ++ rvvfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); ++} ++ ++//16 ++GLOBAL(int) ++jsimd_can_convsamp(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(DCTELEM) != 2) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++GLOBAL(int) ++jsimd_can_convsamp_float(void) ++{ ++ return 0; ++} ++ ++GLOBAL(void) ++jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, ++ DCTELEM *workspace) ++{ ++ jsimd_convsamp_rvv(sample_data, start_col, workspace); ++} ++ ++GLOBAL(void) ++jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, ++ FAST_FLOAT *workspace) ++{ ++} ++ ++//13 ++GLOBAL(int) ++jsimd_can_fdct_islow(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (DCTSIZE != 8) ++ return 0; ++ if (sizeof(DCTELEM) != 2) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++//12 ++GLOBAL(int) ++jsimd_can_fdct_ifast(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (DCTSIZE != 8) ++ return 0; ++ if (sizeof(DCTELEM) != 2) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++GLOBAL(int) ++jsimd_can_fdct_float(void) ++{ ++ return 0; ++} ++ ++GLOBAL(void) ++jsimd_fdct_islow(DCTELEM *data) ++{ ++ jsimd_fdct_islow_rvv(data); ++} ++ ++GLOBAL(void) ++jsimd_fdct_ifast(DCTELEM *data) ++{ ++ jsimd_fdct_ifast_rvv(data); ++} ++ ++GLOBAL(void) ++jsimd_fdct_float(FAST_FLOAT *data) ++{ ++} ++ ++//17 ++GLOBAL(int) ++jsimd_can_quantize(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (DCTSIZE != 8) ++ return 0; ++ if (sizeof(JCOEF) != 2) ++ return 0; ++ if (sizeof(DCTELEM) != 2) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++GLOBAL(int) ++jsimd_can_quantize_float(void) ++{ ++ return 0; ++} ++ ++GLOBAL(void) ++jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) ++{ ++ jsimd_quantize_rvv(coef_block, divisors, workspace); ++} ++ ++GLOBAL(void) ++jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, ++ FAST_FLOAT *workspace) ++{ ++} ++ ++GLOBAL(int) ++jsimd_can_idct_2x2(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (DCTSIZE != 8) ++ return 0; ++ if (sizeof(JCOEF) != 2) ++ return 0; ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ if (sizeof(ISLOW_MULT_TYPE) != 2) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++GLOBAL(int) ++jsimd_can_idct_4x4(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (DCTSIZE != 8) ++ return 0; ++ if (sizeof(JCOEF) != 2) ++ return 0; ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ if (sizeof(ISLOW_MULT_TYPE) != 2) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++GLOBAL(int) ++jsimd_can_idct_6x6(void) ++{ ++ return 0; ++} ++ ++GLOBAL(int) ++jsimd_can_idct_12x12(void) ++{ ++ return 0; ++} ++ ++GLOBAL(void) ++jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, ++ JDIMENSION output_col) ++{ ++ jsimd_idct_2x2_rvv(compptr->dct_table, coef_block, output_buf, output_col); ++} ++ ++GLOBAL(void) ++jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, ++ JDIMENSION output_col) ++{ ++ jsimd_idct_4x4_rvv(compptr->dct_table, coef_block, output_buf, output_col); ++} ++ ++GLOBAL(void) ++jsimd_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr, ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, ++ JDIMENSION output_col) ++{ ++} ++ ++GLOBAL(void) ++jsimd_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr, ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, ++ JDIMENSION output_col) ++{ ++} ++ ++//15 ++GLOBAL(int) ++jsimd_can_idct_islow(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (DCTSIZE != 8) ++ return 0; ++ if (sizeof(JCOEF) != 2) ++ return 0; ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ if (sizeof(ISLOW_MULT_TYPE) != 2) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++//14 ++GLOBAL(int) ++jsimd_can_idct_ifast(void) ++{ ++ init_simd(); ++ ++ /* The code is optimised for these values only */ ++ if (DCTSIZE != 8) ++ return 0; ++ if (sizeof(JCOEF) != 2) ++ return 0; ++ if (BITS_IN_JSAMPLE != 8) ++ return 0; ++ if (sizeof(JDIMENSION) != 4) ++ return 0; ++ if (sizeof(IFAST_MULT_TYPE) != 2) ++ return 0; ++ if (IFAST_SCALE_BITS != 2) ++ return 0; ++ ++ if (simd_support & JSIMD_RVV) ++ return 1; ++ ++ return 0; ++} ++ ++GLOBAL(int) ++jsimd_can_idct_float(void) ++{ ++ return 0; ++} ++ ++GLOBAL(void) ++jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, ++ JDIMENSION output_col) ++{ ++ jsimd_idct_islow_rvv(compptr->dct_table, coef_block, output_buf, output_col); ++} ++ ++GLOBAL(void) ++jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, ++ JDIMENSION output_col) ++{ ++ jsimd_idct_ifast_rvv(compptr->dct_table, coef_block, output_buf, output_col); ++} ++ ++GLOBAL(void) ++jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, ++ JCOEFPTR coef_block, JSAMPARRAY output_buf, ++ JDIMENSION output_col) ++{ ++} ++ ++GLOBAL(int) ++jsimd_can_huff_encode_one_block(void) ++{ ++ return 0; ++} ++ ++GLOBAL(JOCTET *) ++jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block, ++ int last_dc_val, c_derived_tbl *dctbl, ++ c_derived_tbl *actbl) ++{ ++ return NULL; ++} ++ ++GLOBAL(int) ++jsimd_can_encode_mcu_AC_first_prepare(void) ++{ ++ return 0; ++} ++ ++GLOBAL(void) ++jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, ++ const int *jpeg_natural_order_start, int Sl, ++ int Al, UJCOEF *values, size_t *zerobits) ++{ ++} ++ ++GLOBAL(int) ++jsimd_can_encode_mcu_AC_refine_prepare(void) ++{ ++ return 0; ++} ++ ++GLOBAL(int) ++jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, ++ const int *jpeg_natural_order_start, int Sl, ++ int Al, UJCOEF *absvalues, size_t *bits) ++{ ++ return 0; ++} +diff --git a/simd/rvv/jsimd_rvv.h b/simd/rvv/jsimd_rvv.h +new file mode 100644 +index 0000000..e3bb7d8 +--- /dev/null ++++ b/simd/rvv/jsimd_rvv.h +@@ -0,0 +1,39 @@ ++/* ++ * Risc-V vector extension optimizations for libjpeg-turbo ++ * ++ * Copyright (C) 2023, Spacemit, Inc. All Rights Reserved. ++ * Contributed by Liang Junzhao ++ * ++ * This software is provided 'as-is', without any express or implied ++ * warranty. In no event will the authors be held liable for any damages ++ * arising from the use of this software. ++ * ++ * Permission is granted to anyone to use this software for any purpose, ++ * including commercial applications, and to alter it and redistribute it ++ * freely, subject to the following restrictions: ++ * ++ * 1. The origin of this software must not be misrepresented; you must not ++ * claim that you wrote the original software. If you use this software ++ * in a product, an acknowledgment in the product documentation would be ++ * appreciated but is not required. ++ * 2. Altered source versions must be plainly marked as such, and must not be ++ * misrepresented as being the original software. ++ * 3. This notice may not be removed or altered from any source distribution. ++ */ ++ ++#define JPEG_INTERNALS ++#include "../../jinclude.h" ++#include "../../jpeglib.h" ++#include "../../jsimd.h" ++#include "../../jdct.h" ++#include "../../jsimddct.h" ++#include "../jsimd.h" ++#include ++ ++#define BYTE_BIT 8 ++ ++#define CLIP(in, dtype) \ ++ { \ ++ in = __riscv_vmax_vx_##dtype(in, 0, vl); \ ++ in = __riscv_vmin_vx_##dtype(in, MAXJSAMPLE, vl); \ ++ } +diff --git a/simd/x86_64/jsimd.c b/simd/x86_64/jsimd.c +index 584a010..3f5ee77 100644 +--- a/simd/x86_64/jsimd.c ++++ b/simd/x86_64/jsimd.c +@@ -2,8 +2,8 @@ + * jsimd_x86_64.c + * + * Copyright 2009 Pierre Ossman for Cendio AB +- * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander. +- * Copyright (C) 2015-2016, 2018, Matthieu Darbois. ++ * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander. ++ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. +@@ -21,7 +21,6 @@ + #include "../../jdct.h" + #include "../../jsimddct.h" + #include "../jsimd.h" +-#include "jconfigint.h" + + /* + * In the PIC cases, we have no guarantee that constants will keep +@@ -32,13 +31,11 @@ + #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ + #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */ + +-static unsigned int simd_support = (unsigned int)(~0); +-static unsigned int simd_huffman = 1; ++static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0); ++static THREAD_LOCAL unsigned int simd_huffman = 1; + + /* + * Check what SIMD accelerations are supported. +- * +- * FIXME: This code is racy under a multi-threaded environment. + */ + LOCAL(void) + init_simd(void) +@@ -148,6 +145,9 @@ jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + ++ if (simd_support == ~0U) ++ init_simd(); ++ + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_extrgb_ycc_convert_avx2; +@@ -197,6 +197,9 @@ jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + ++ if (simd_support == ~0U) ++ init_simd(); ++ + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_extrgb_gray_convert_avx2; +@@ -246,6 +249,9 @@ jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + ++ if (simd_support == ~0U) ++ init_simd(); ++ + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_ycc_extrgb_convert_avx2; +@@ -336,6 +342,9 @@ GLOBAL(void) + jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, +@@ -352,6 +361,9 @@ GLOBAL(void) + jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, +@@ -406,6 +418,9 @@ GLOBAL(void) + jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +@@ -418,6 +433,9 @@ GLOBAL(void) + jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +@@ -472,6 +490,9 @@ GLOBAL(void) + jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, +@@ -486,6 +507,9 @@ GLOBAL(void) + jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, +@@ -545,6 +569,9 @@ jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + ++ if (simd_support == ~0U) ++ init_simd(); ++ + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2; +@@ -593,6 +620,9 @@ jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + ++ if (simd_support == ~0U) ++ init_simd(); ++ + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2; +@@ -682,6 +712,9 @@ GLOBAL(void) + jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_convsamp_avx2(sample_data, start_col, workspace); + else +@@ -751,6 +784,9 @@ jsimd_can_fdct_float(void) + GLOBAL(void) + jsimd_fdct_islow(DCTELEM *data) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_fdct_islow_avx2(data); + else +@@ -812,6 +848,9 @@ jsimd_can_quantize_float(void) + GLOBAL(void) + jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_quantize_avx2(coef_block, divisors, workspace); + else +@@ -966,6 +1005,9 @@ jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) + { ++ if (simd_support == ~0U) ++ init_simd(); ++ + if (simd_support & JSIMD_AVX2) + jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf, + output_col); +@@ -1036,7 +1078,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void) + GLOBAL(void) + jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *values, size_t *zerobits) ++ int Al, UJCOEF *values, size_t *zerobits) + { + jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start, + Sl, Al, values, zerobits); +@@ -1060,7 +1102,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void) + GLOBAL(int) + jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, +- int Al, JCOEF *absvalues, size_t *bits) ++ int Al, UJCOEF *absvalues, size_t *bits) + { + return jsimd_encode_mcu_AC_refine_prepare_sse2(block, + jpeg_natural_order_start, +diff --git a/tjbench.c b/tjbench.c +index 90786cf..624127f 100644 +--- a/tjbench.c ++++ b/tjbench.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (C)2009-2019, 2021-2022 D. R. Commander. All Rights Reserved. ++ * Copyright (C)2009-2019, 2021-2023 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: +@@ -82,7 +82,7 @@ int tjErrorLine = -1, tjErrorCode = -1; + } + + int flags = TJFLAG_NOREALLOC, compOnly = 0, decompOnly = 0, doYUV = 0, +- quiet = 0, doTile = 0, pf = TJPF_BGR, yuvPad = 1, doWrite = 1; ++ quiet = 0, doTile = 0, pf = TJPF_BGR, yuvAlign = 1, doWrite = 1; + char *ext = "ppm"; + const char *pixFormatStr[TJ_NUMPF] = { + "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "GRAY", "", "", "", "", "CMYK" +@@ -182,7 +182,7 @@ static int decomp(unsigned char *srcBuf, unsigned char **jpegBuf, + if (doYUV) { + int width = doTile ? tilew : scaledw; + int height = doTile ? tileh : scaledh; +- unsigned long yuvSize = tjBufSizeYUV2(width, yuvPad, height, subsamp); ++ unsigned long yuvSize = tjBufSizeYUV2(width, yuvAlign, height, subsamp); + + if (yuvSize == (unsigned long)-1) + THROW_TJ("allocating YUV buffer"); +@@ -209,10 +209,10 @@ static int decomp(unsigned char *srcBuf, unsigned char **jpegBuf, + double startDecode; + + if (tjDecompressToYUV2(handle, jpegBuf[tile], jpegSize[tile], yuvBuf, +- width, yuvPad, height, flags) == -1) ++ width, yuvAlign, height, flags) == -1) + THROW_TJ("executing tjDecompressToYUV2()"); + startDecode = getTime(); +- if (tjDecodeYUV(handle, yuvBuf, yuvPad, subsamp, dstPtr2, width, ++ if (tjDecodeYUV(handle, yuvBuf, yuvAlign, subsamp, dstPtr2, width, + pitch, height, pf, flags) == -1) + THROW_TJ("executing tjDecodeYUV()"); + if (iter >= 0) elapsedDecode += getTime() - startDecode; +@@ -273,12 +273,12 @@ static int decomp(unsigned char *srcBuf, unsigned char **jpegBuf, + qualStr, sizeStr, ext); + + if (tjSaveImage(tempStr, dstBuf, scaledw, 0, scaledh, pf, flags) == -1) +- THROW_TJG("saving bitmap"); ++ THROW_TJG("saving output image"); + ptr = strrchr(tempStr, '.'); + SNPRINTF(ptr, 1024 - (ptr - tempStr), "-err.%s", ext); + if (srcBuf && sf.num == 1 && sf.denom == 1) { + if (!quiet) printf("Compression error written to %s.\n", tempStr); +- if (subsamp == TJ_GRAYSCALE) { ++ if (subsamp == TJSAMP_GRAY) { + unsigned long index, index2; + + for (row = 0, index = 0; row < h; row++, index += pitch) { +@@ -305,7 +305,7 @@ static int decomp(unsigned char *srcBuf, unsigned char **jpegBuf, + srcBuf[pitch * row + col]); + } + if (tjSaveImage(tempStr, dstBuf, w, 0, h, pf, flags) == -1) +- THROW_TJG("saving bitmap"); ++ THROW_TJG("saving output image"); + } + + bailout: +@@ -380,7 +380,7 @@ static int fullTest(unsigned char *srcBuf, int w, int h, int subsamp, + THROW_TJ("executing tjInitCompress()"); + + if (doYUV) { +- yuvSize = tjBufSizeYUV2(tilew, yuvPad, tileh, subsamp); ++ yuvSize = tjBufSizeYUV2(tilew, yuvAlign, tileh, subsamp); + if (yuvSize == (unsigned long)-1) + THROW_TJ("allocating YUV buffer"); + if ((yuvBuf = (unsigned char *)malloc(yuvSize)) == NULL) +@@ -407,10 +407,10 @@ static int fullTest(unsigned char *srcBuf, int w, int h, int subsamp, + double startEncode = getTime(); + + if (tjEncodeYUV3(handle, srcPtr2, width, pitch, height, pf, yuvBuf, +- yuvPad, subsamp, flags) == -1) ++ yuvAlign, subsamp, flags) == -1) + THROW_TJ("executing tjEncodeYUV3()"); + if (iter >= 0) elapsedEncode += getTime() - startEncode; +- if (tjCompressFromYUV(handle, yuvBuf, width, yuvPad, height, ++ if (tjCompressFromYUV(handle, yuvBuf, width, yuvAlign, height, + subsamp, &jpegBuf[tile], &jpegSize[tile], + jpegQual, flags) == -1) + THROW_TJ("executing tjCompressFromYUV()"); +@@ -568,7 +568,7 @@ static int decompTest(char *fileName) + + if (quiet == 1) { + printf("All performance values in Mpixels/sec\n\n"); +- printf("Bitmap JPEG JPEG %s %s Xform Comp Decomp ", ++ printf("Pixel JPEG JPEG %s %s Xform Comp Decomp ", + doTile ? "Tile " : "Image", doTile ? "Tile " : "Image"); + if (doYUV) printf("Decode"); + printf("\n"); +@@ -630,7 +630,7 @@ static int decompTest(char *fileName) + tw = h; th = w; ttilew = tileh; ttileh = tilew; + } + +- if (xformOpt & TJXOPT_GRAY) tsubsamp = TJ_GRAYSCALE; ++ if (xformOpt & TJXOPT_GRAY) tsubsamp = TJSAMP_GRAY; + if (xformOp == TJXOP_HFLIP || xformOp == TJXOP_ROT180) + tw = tw - (tw % tjMCUWidth[tsubsamp]); + if (xformOp == TJXOP_VFLIP || xformOp == TJXOP_ROT180) +@@ -750,38 +750,34 @@ static void usage(char *progName) + int i; + + printf("USAGE: %s\n", progName); +- printf(" [options]\n\n"); ++ printf(" [options]\n\n"); + printf(" %s\n", progName); +- printf(" [options]\n\n"); ++ printf(" [options]\n\n"); + printf("Options:\n\n"); +- printf("-alloc = Dynamically allocate JPEG image buffers\n"); +- printf("-bmp = Generate output images in Windows Bitmap format (default = PPM)\n"); +- printf("-bottomup = Test bottom-up compression/decompression\n"); +- printf("-tile = Test performance of the codec when the image is encoded as separate\n"); +- printf(" tiles of varying sizes.\n"); ++ printf("-alloc = Dynamically allocate JPEG buffers\n"); ++ printf("-bmp = Use Windows Bitmap format for output images [default = PPM]\n"); ++ printf("-bottomup = Use bottom-up row order for packed-pixel source/destination buffers\n"); ++ printf("-tile = Compress/transform the input image into separate JPEG tiles of varying\n"); ++ printf(" sizes (useful for measuring JPEG overhead)\n"); + printf("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb =\n"); +- printf(" Test the specified color conversion path in the codec (default = BGR)\n"); +- printf("-cmyk = Indirectly test YCCK JPEG compression/decompression (the source\n"); +- printf(" and destination bitmaps are still RGB. The conversion is done\n"); +- printf(" internally prior to compression or after decompression.)\n"); +- printf("-fastupsample = Use the fastest chrominance upsampling algorithm available in\n"); +- printf(" the underlying codec\n"); +- printf("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying\n"); +- printf(" codec\n"); +- printf("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the\n"); +- printf(" underlying codec\n"); ++ printf(" Use the specified pixel format for packed-pixel source/destination buffers\n"); ++ printf(" [default = BGR]\n"); ++ printf("-cmyk = Indirectly test YCCK JPEG compression/decompression\n"); ++ printf(" (use the CMYK pixel format for packed-pixel source/destination buffers)\n"); ++ printf("-fastupsample = Use the fastest chrominance upsampling algorithm available\n"); ++ printf("-fastdct = Use the fastest DCT/IDCT algorithm available\n"); ++ printf("-accuratedct = Use the most accurate DCT/IDCT algorithm available\n"); + printf("-progressive = Use progressive entropy coding in JPEG images generated by\n"); +- printf(" compression and transform operations.\n"); +- printf("-subsamp = When testing JPEG compression, this option specifies the level\n"); +- printf(" of chrominance subsampling to use ( = 444, 422, 440, 420, 411, or\n"); +- printf(" GRAY). The default is to test Grayscale, 4:2:0, 4:2:2, and 4:4:4 in\n"); +- printf(" sequence.\n"); ++ printf(" compression and transform operations\n"); ++ printf("-subsamp = When compressing, use the specified level of chrominance\n"); ++ printf(" subsampling ( = 444, 422, 440, 420, 411, or GRAY) [default = test\n"); ++ printf(" Grayscale, 4:2:0, 4:2:2, and 4:4:4 in sequence]\n"); + printf("-quiet = Output results in tabular rather than verbose format\n"); +- printf("-yuv = Test YUV encoding/decoding functions\n"); +- printf("-yuvpad

    = If testing YUV encoding/decoding, this specifies the number of\n"); +- printf(" bytes to which each row of each plane in the intermediate YUV image is\n"); +- printf(" padded (default = 1)\n"); +- printf("-scale M/N = Scale down the width/height of the decompressed JPEG image by a\n"); ++ printf("-yuv = Compress from/decompress to intermediate planar YUV images\n"); ++ printf("-yuvpad

    = The number of bytes by which each row in each plane of an\n"); ++ printf(" intermediate YUV image is evenly divisible (must be a power of 2)\n"); ++ printf(" [default = 1]\n"); ++ printf("-scale M/N = When decompressing, scale the width/height of the JPEG image by a\n"); + printf(" factor of M/N (M/N = "); + for (i = 0; i < nsf; i++) { + printf("%d/%d", scalingFactors[i].num, scalingFactors[i].denom); +@@ -794,24 +790,24 @@ static void usage(char *progName) + } + printf(")\n"); + printf("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 =\n"); +- printf(" Perform the corresponding lossless transform prior to\n"); +- printf(" decompression (these options are mutually exclusive)\n"); +- printf("-grayscale = Perform lossless grayscale conversion prior to decompression\n"); +- printf(" test (can be combined with the other transforms above)\n"); ++ printf(" Perform the specified lossless transform operation on the input image\n"); ++ printf(" prior to decompression (these operations are mutually exclusive)\n"); ++ printf("-grayscale = Transform the input image into a grayscale JPEG image prior to\n"); ++ printf(" decompression (can be combined with the other transform operations above)\n"); + printf("-copynone = Do not copy any extra markers (including EXIF and ICC profile data)\n"); +- printf(" when transforming the image.\n"); +- printf("-benchtime = Run each benchmark for at least seconds (default = 5.0)\n"); +- printf("-warmup = Run each benchmark for seconds (default = 1.0) prior to\n"); ++ printf(" when transforming the input image\n"); ++ printf("-benchtime = Run each benchmark for at least seconds [default = 5.0]\n"); ++ printf("-warmup = Run each benchmark for seconds [default = 1.0] prior to\n"); + printf(" starting the timer, in order to prime the caches and thus improve the\n"); +- printf(" consistency of the results.\n"); ++ printf(" consistency of the benchmark results\n"); + printf("-componly = Stop after running compression tests. Do not test decompression.\n"); + printf("-nowrite = Do not write reference or output images (improves consistency of\n"); +- printf(" performance measurements.)\n"); ++ printf(" benchmark results)\n"); + printf("-limitscans = Refuse to decompress or transform progressive JPEG images that\n"); + printf(" have an unreasonably large number of scans\n"); + printf("-stoponwarning = Immediately discontinue the current\n"); +- printf(" compression/decompression/transform operation if the underlying codec\n"); +- printf(" throws a warning (non-fatal error)\n\n"); ++ printf(" compression/decompression/transform operation if a warning (non-fatal\n"); ++ printf(" error) occurs\n\n"); + printf("NOTE: If the quality is specified as a range (e.g. 90-100), a separate\n"); + printf("test will be performed for all quality values in the range.\n\n"); + exit(1); +@@ -857,7 +853,7 @@ int main(int argc, char *argv[]) + if (!strcasecmp(argv[i], "-tile")) { + doTile = 1; xformOpt |= TJXOPT_CROP; + } else if (!strcasecmp(argv[i], "-fastupsample")) { +- printf("Using fast upsampling code\n\n"); ++ printf("Using fastest upsampling algorithm\n\n"); + flags |= TJFLAG_FASTUPSAMPLE; + } else if (!strcasecmp(argv[i], "-fastdct")) { + printf("Using fastest DCT/IDCT algorithm\n\n"); +@@ -868,6 +864,7 @@ int main(int argc, char *argv[]) + } else if (!strcasecmp(argv[i], "-progressive")) { + printf("Using progressive entropy coding\n\n"); + flags |= TJFLAG_PROGRESSIVE; ++ xformOpt |= TJXOPT_PROGRESSIVE; + } else if (!strcasecmp(argv[i], "-rgb")) + pf = TJPF_RGB; + else if (!strcasecmp(argv[i], "-rgbx")) +@@ -940,12 +937,13 @@ int main(int argc, char *argv[]) + else if (!strcasecmp(argv[i], "-bmp")) + ext = "bmp"; + else if (!strcasecmp(argv[i], "-yuv")) { +- printf("Testing YUV planar encoding/decoding\n\n"); ++ printf("Testing planar YUV encoding/decoding\n\n"); + doYUV = 1; + } else if (!strcasecmp(argv[i], "-yuvpad") && i < argc - 1) { + int tempi = atoi(argv[++i]); + +- if (tempi >= 1) yuvPad = tempi; ++ if (tempi >= 1 && (tempi & (tempi - 1)) == 0) yuvAlign = tempi; ++ else usage(argv[0]); + } else if (!strcasecmp(argv[i], "-subsamp") && i < argc - 1) { + i++; + if (toupper(argv[i][0]) == 'G') subsamp = TJSAMP_GRAY; +@@ -958,6 +956,7 @@ int main(int argc, char *argv[]) + case 440: subsamp = TJSAMP_440; break; + case 420: subsamp = TJSAMP_420; break; + case 411: subsamp = TJSAMP_411; break; ++ default: usage(argv[0]); + } + } + } else if (!strcasecmp(argv[i], "-componly")) +@@ -974,26 +973,26 @@ int main(int argc, char *argv[]) + + if ((sf.num != 1 || sf.denom != 1) && doTile) { + printf("Disabling tiled compression/decompression tests, because those tests do not\n"); +- printf("work when scaled decompression is enabled.\n"); +- doTile = 0; ++ printf("work when scaled decompression is enabled.\n\n"); ++ doTile = 0; xformOpt &= (~TJXOPT_CROP); + } + + if ((flags & TJFLAG_NOREALLOC) == 0 && doTile) { + printf("Disabling tiled compression/decompression tests, because those tests do not\n"); + printf("work when dynamic JPEG buffer allocation is enabled.\n\n"); +- doTile = 0; ++ doTile = 0; xformOpt &= (~TJXOPT_CROP); + } + + if (!decompOnly) { + if ((srcBuf = tjLoadImage(argv[1], &w, 1, &h, &pf, flags)) == NULL) +- THROW_TJG("loading bitmap"); ++ THROW_TJG("loading input image"); + temp = strrchr(argv[1], '.'); + if (temp != NULL) *temp = '\0'; + } + + if (quiet == 1 && !decompOnly) { + printf("All performance values in Mpixels/sec\n\n"); +- printf("Bitmap JPEG JPEG %s %s ", ++ printf("Pixel JPEG JPEG %s %s ", + doTile ? "Tile " : "Image", doTile ? "Tile " : "Image"); + if (doYUV) printf("Encode "); + printf("Comp Comp Decomp "); +diff --git a/tjexample.c b/tjexample.c +index 505c9dd..0943725 100644 +--- a/tjexample.c ++++ b/tjexample.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (C)2011-2012, 2014-2015, 2017, 2019, 2021-2022 ++ * Copyright (C)2011-2012, 2014-2015, 2017, 2019, 2021-2023 + * D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -149,14 +149,11 @@ static void usage(char *programName) + printf("General Options\n"); + printf("---------------\n\n"); + +- printf("-fastupsample = Use the fastest chrominance upsampling algorithm available in\n"); +- printf(" the underlying codec.\n\n"); ++ printf("-fastupsample = Use the fastest chrominance upsampling algorithm available\n\n"); + +- printf("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying\n"); +- printf(" codec.\n\n"); ++ printf("-fastdct = Use the fastest DCT/IDCT algorithm available\n\n"); + +- printf("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the\n"); +- printf(" underlying codec.\n\n"); ++ printf("-accuratedct = Use the most accurate DCT/IDCT algorithm available\n\n"); + + exit(1); + } +diff --git a/tjunittest.c b/tjunittest.c +index b3f0311..0082149 100644 +--- a/tjunittest.c ++++ b/tjunittest.c +@@ -1,6 +1,6 @@ + /* +- * Copyright (C)2009-2014, 2017-2019, 2022 D. R. Commander. +- * All Rights Reserved. ++ * Copyright (C)2009-2014, 2017-2019, 2022-2023 D. R. Commander. ++ * All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + #include + #include "tjutil.h" + #include "turbojpeg.h" +@@ -55,11 +56,11 @@ static void usage(char *progName) + { + printf("\nUSAGE: %s [options]\n\n", progName); + printf("Options:\n"); +- printf("-yuv = test YUV encoding/decoding support\n"); +- printf("-noyuvpad = do not pad each line of each Y, U, and V plane to the nearest\n"); +- printf(" 4-byte boundary\n"); +- printf("-alloc = test automatic buffer allocation\n"); +- printf("-bmp = tjLoadImage()/tjSaveImage() unit test\n\n"); ++ printf("-yuv = test YUV encoding/compression/decompression/decoding\n"); ++ printf("-noyuvpad = do not pad each row in each Y, U, and V plane to the nearest\n"); ++ printf(" multiple of 4 bytes\n"); ++ printf("-alloc = test automatic JPEG buffer allocation\n"); ++ printf("-bmp = test packed-pixel image I/O\n"); + exit(1); + } + +@@ -95,7 +96,7 @@ const int _4byteFormats[] = { + const int _onlyGray[] = { TJPF_GRAY }; + const int _onlyRGB[] = { TJPF_RGB }; + +-int doYUV = 0, alloc = 0, pad = 4; ++int doYUV = 0, alloc = 0, yuvAlign = 4; + + int exitStatus = 0; + #define BAILOUT() { exitStatus = -1; goto bailout; } +@@ -282,7 +283,7 @@ static int checkBufYUV(unsigned char *buf, int w, int h, int subsamp, + int hsf = tjMCUWidth[subsamp] / 8, vsf = tjMCUHeight[subsamp] / 8; + int pw = PAD(w, hsf), ph = PAD(h, vsf); + int cw = pw / hsf, ch = ph / vsf; +- int ypitch = PAD(pw, pad), uvpitch = PAD(cw, pad); ++ int ypitch = PAD(pw, yuvAlign), uvpitch = PAD(cw, yuvAlign); + int retval = 1; + int halfway = 16 * sf.num / sf.denom; + int blocksize = 8 * sf.num / sf.denom; +@@ -381,7 +382,7 @@ static void compTest(tjhandle handle, unsigned char **dstBuf, + + if (!alloc) flags |= TJFLAG_NOREALLOC; + if (doYUV) { +- unsigned long yuvSize = tjBufSizeYUV2(w, pad, h, subsamp); ++ unsigned long yuvSize = tjBufSizeYUV2(w, yuvAlign, h, subsamp); + tjscalingfactor sf = { 1, 1 }; + tjhandle handle2 = tjInitCompress(); + +@@ -392,15 +393,15 @@ static void compTest(tjhandle handle, unsigned char **dstBuf, + memset(yuvBuf, 0, yuvSize); + + printf("%s %s -> YUV %s ... ", pfStr, buStrLong, subNameLong[subsamp]); +- TRY_TJ(tjEncodeYUV3(handle2, srcBuf, w, 0, h, pf, yuvBuf, pad, subsamp, +- flags)); ++ TRY_TJ(tjEncodeYUV3(handle2, srcBuf, w, 0, h, pf, yuvBuf, yuvAlign, ++ subsamp, flags)); + tjDestroy(handle2); + if (checkBufYUV(yuvBuf, w, h, subsamp, sf)) printf("Passed.\n"); + else printf("FAILED!\n"); + + printf("YUV %s %s -> JPEG Q%d ... ", subNameLong[subsamp], buStrLong, + jpegQual); +- TRY_TJ(tjCompressFromYUV(handle, yuvBuf, w, pad, h, subsamp, dstBuf, ++ TRY_TJ(tjCompressFromYUV(handle, yuvBuf, w, yuvAlign, h, subsamp, dstBuf, + dstSize, jpegQual, flags)); + } else { + printf("%s %s -> %s Q%d ... ", pfStr, buStrLong, subNameLong[subsamp], +@@ -442,7 +443,7 @@ static void _decompTest(tjhandle handle, unsigned char *jpegBuf, + memset(dstBuf, 0, dstSize); + + if (doYUV) { +- unsigned long yuvSize = tjBufSizeYUV2(scaledWidth, pad, scaledHeight, ++ unsigned long yuvSize = tjBufSizeYUV2(scaledWidth, yuvAlign, scaledHeight, + subsamp); + tjhandle handle2 = tjInitDecompress(); + +@@ -456,16 +457,20 @@ static void _decompTest(tjhandle handle, unsigned char *jpegBuf, + if (sf.num != 1 || sf.denom != 1) + printf("%d/%d ... ", sf.num, sf.denom); + else printf("... "); +- TRY_TJ(tjDecompressToYUV2(handle, jpegBuf, jpegSize, yuvBuf, scaledWidth, +- pad, scaledHeight, flags)); ++ /* We pass scaledWidth + 1 and scaledHeight + 1 to validate that ++ tjDecompressToYUV2() generates the largest possible scaled image that ++ fits within the desired dimensions, as documented. */ ++ TRY_TJ(tjDecompressToYUV2(handle, jpegBuf, jpegSize, yuvBuf, ++ scaledWidth + 1, yuvAlign, scaledHeight + 1, ++ flags)); + if (checkBufYUV(yuvBuf, scaledWidth, scaledHeight, subsamp, sf)) + printf("Passed.\n"); + else printf("FAILED!\n"); + + printf("YUV %s -> %s %s ... ", subNameLong[subsamp], pixFormatStr[pf], + (flags & TJFLAG_BOTTOMUP) ? "Bottom-Up" : "Top-Down "); +- TRY_TJ(tjDecodeYUV(handle2, yuvBuf, pad, subsamp, dstBuf, scaledWidth, 0, +- scaledHeight, pf, flags)); ++ TRY_TJ(tjDecodeYUV(handle2, yuvBuf, yuvAlign, subsamp, dstBuf, scaledWidth, ++ 0, scaledHeight, pf, flags)); + tjDestroy(handle2); + } else { + printf("JPEG -> %s %s ", pixFormatStr[pf], +@@ -473,8 +478,11 @@ static void _decompTest(tjhandle handle, unsigned char *jpegBuf, + if (sf.num != 1 || sf.denom != 1) + printf("%d/%d ... ", sf.num, sf.denom); + else printf("... "); +- TRY_TJ(tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, scaledWidth, 0, +- scaledHeight, pf, flags)); ++ /* We pass scaledWidth + 1 and scaledHeight + 1 to validate that ++ tjDecompress2() generates the largest possible scaled image that fits ++ within the desired dimensions, as documented. */ ++ TRY_TJ(tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, scaledWidth + 1, 0, ++ scaledHeight + 1, pf, flags)); + } + + if (checkBuf(dstBuf, scaledWidth, scaledHeight, pf, subsamp, sf, flags)) +@@ -571,11 +579,16 @@ bailout: + THROW(#function " overflow"); \ + } + #endif ++#define CHECKSIZEINT(function) { \ ++ if (intsize != -1 || !strcmp(tjGetErrorStr2(NULL), "No error")) \ ++ THROW(#function " overflow"); \ ++} + + static void overflowTest(void) + { + /* Ensure that the various buffer size functions don't overflow */ + unsigned long size; ++ int intsize; + + size = tjBufSize(26755, 26755, TJSAMP_444); + CHECKSIZE(tjBufSize()); +@@ -583,12 +596,20 @@ static void overflowTest(void) + CHECKSIZE(TJBUFSIZE()); + size = tjBufSizeYUV2(37838, 1, 37838, TJSAMP_444); + CHECKSIZE(tjBufSizeYUV2()); ++ size = tjBufSizeYUV2(37837, 3, 37837, TJSAMP_444); ++ CHECKSIZE(tjBufSizeYUV2()); ++ size = tjBufSizeYUV2(37837, -1, 37837, TJSAMP_444); ++ CHECKSIZE(tjBufSizeYUV2()); + size = TJBUFSIZEYUV(37838, 37838, TJSAMP_444); + CHECKSIZE(TJBUFSIZEYUV()); + size = tjBufSizeYUV(37838, 37838, TJSAMP_444); + CHECKSIZE(tjBufSizeYUV()); + size = tjPlaneSizeYUV(0, 65536, 0, 65536, TJSAMP_444); + CHECKSIZE(tjPlaneSizeYUV()); ++ intsize = tjPlaneWidth(0, INT_MAX, TJSAMP_420); ++ CHECKSIZEINT(tjPlaneWidth()); ++ intsize = tjPlaneHeight(0, INT_MAX, TJSAMP_420); ++ CHECKSIZEINT(tjPlaneHeight()); + + bailout: + return; +@@ -614,7 +635,7 @@ static void bufSizeTest(void) + if ((srcBuf = (unsigned char *)malloc(w * h * 4)) == NULL) + THROW("Memory allocation failure"); + if (!alloc || doYUV) { +- if (doYUV) dstSize = tjBufSizeYUV2(w, pad, h, subsamp); ++ if (doYUV) dstSize = tjBufSizeYUV2(w, yuvAlign, h, subsamp); + else dstSize = tjBufSize(w, h, subsamp); + if ((dstBuf = (unsigned char *)tjAlloc(dstSize)) == NULL) + THROW("Memory allocation failure"); +@@ -626,8 +647,8 @@ static void bufSizeTest(void) + } + + if (doYUV) { +- TRY_TJ(tjEncodeYUV3(handle, srcBuf, w, 0, h, TJPF_BGRX, dstBuf, pad, +- subsamp, 0)); ++ TRY_TJ(tjEncodeYUV3(handle, srcBuf, w, 0, h, TJPF_BGRX, dstBuf, ++ yuvAlign, subsamp, 0)); + } else { + TRY_TJ(tjCompress2(handle, srcBuf, w, 0, h, TJPF_BGRX, &dstBuf, + &dstSize, subsamp, 100, +@@ -641,7 +662,7 @@ static void bufSizeTest(void) + if ((srcBuf = (unsigned char *)malloc(h * w * 4)) == NULL) + THROW("Memory allocation failure"); + if (!alloc || doYUV) { +- if (doYUV) dstSize = tjBufSizeYUV2(h, pad, w, subsamp); ++ if (doYUV) dstSize = tjBufSizeYUV2(h, yuvAlign, w, subsamp); + else dstSize = tjBufSize(h, w, subsamp); + if ((dstBuf = (unsigned char *)tjAlloc(dstSize)) == NULL) + THROW("Memory allocation failure"); +@@ -653,8 +674,8 @@ static void bufSizeTest(void) + } + + if (doYUV) { +- TRY_TJ(tjEncodeYUV3(handle, srcBuf, h, 0, w, TJPF_BGRX, dstBuf, pad, +- subsamp, 0)); ++ TRY_TJ(tjEncodeYUV3(handle, srcBuf, h, 0, w, TJPF_BGRX, dstBuf, ++ yuvAlign, subsamp, 0)); + } else { + TRY_TJ(tjCompress2(handle, srcBuf, h, 0, w, TJPF_BGRX, &dstBuf, + &dstSize, subsamp, 100, +@@ -898,7 +919,7 @@ int main(int argc, char *argv[]) + if (argc > 1) { + for (i = 1; i < argc; i++) { + if (!strcasecmp(argv[i], "-yuv")) doYUV = 1; +- else if (!strcasecmp(argv[i], "-noyuvpad")) pad = 1; ++ else if (!strcasecmp(argv[i], "-noyuvpad")) yuvAlign = 1; + else if (!strcasecmp(argv[i], "-alloc")) alloc = 1; + else if (!strcasecmp(argv[i], "-bmp")) return bmpTest(); + else usage(argv[0]); +diff --git a/transupp.c b/transupp.c +index a3d878c..78dc91b 100644 +--- a/transupp.c ++++ b/transupp.c +@@ -143,7 +143,7 @@ requant_comp(j_decompress_ptr cinfo, jpeg_component_info *compptr, + for (k = 0; k < DCTSIZE2; k++) { + temp = qtblptr->quantval[k]; + qval = qtblptr1->quantval[k]; +- if (temp != qval) { ++ if (temp != qval && qval != 0) { + temp *= ptr[k]; + /* The following quantization code is copied from jcdctmgr.c */ + #ifdef FAST_DIVIDE +diff --git a/turbojpeg-jni.c b/turbojpeg-jni.c +index 0cf5f70..446cbd2 100644 +--- a/turbojpeg-jni.c ++++ b/turbojpeg-jni.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (C)2011-2022 D. R. Commander. All Rights Reserved. ++ * Copyright (C)2011-2023 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: +@@ -26,6 +26,7 @@ + * POSSIBILITY OF SUCH DAMAGE. + */ + ++#include + #include "turbojpeg.h" + #include "jinclude.h" + #include +@@ -132,24 +133,28 @@ bailout: + JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSize + (JNIEnv *env, jclass cls, jint width, jint height, jint jpegSubsamp) + { +- jint retval = (jint)tjBufSize(width, height, jpegSubsamp); ++ unsigned long retval = tjBufSize(width, height, jpegSubsamp); + +- if (retval == -1) THROW_ARG(tjGetErrorStr()); ++ if (retval == (unsigned long)-1) THROW_ARG(tjGetErrorStr()); ++ if (retval > (unsigned long)INT_MAX) ++ THROW_ARG("Image is too large"); + + bailout: +- return retval; ++ return (jint)retval; + } + + /* TurboJPEG 1.4.x: TJ::bufSizeYUV() */ + JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII +- (JNIEnv *env, jclass cls, jint width, jint pad, jint height, jint subsamp) ++ (JNIEnv *env, jclass cls, jint width, jint align, jint height, jint subsamp) + { +- jint retval = (jint)tjBufSizeYUV2(width, pad, height, subsamp); ++ unsigned long retval = tjBufSizeYUV2(width, align, height, subsamp); + +- if (retval == -1) THROW_ARG(tjGetErrorStr()); ++ if (retval == (unsigned long)-1) THROW_ARG(tjGetErrorStr()); ++ if (retval > (unsigned long)INT_MAX) ++ THROW_ARG("Image is too large"); + + bailout: +- return retval; ++ return (jint)retval; + } + + /* TurboJPEG 1.2.x: TJ::bufSizeYUV() */ +@@ -166,13 +171,15 @@ JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_planeSizeYUV__IIIII + (JNIEnv *env, jclass cls, jint componentID, jint width, jint stride, + jint height, jint subsamp) + { +- jint retval = (jint)tjPlaneSizeYUV(componentID, width, stride, height, +- subsamp); ++ unsigned long retval = tjPlaneSizeYUV(componentID, width, stride, height, ++ subsamp); + +- if (retval == -1) THROW_ARG(tjGetErrorStr()); ++ if (retval == (unsigned long)-1) THROW_ARG(tjGetErrorStr()); ++ if (retval > (unsigned long)INT_MAX) ++ THROW_ARG("Image is too large"); + + bailout: +- return retval; ++ return (jint)retval; + } + + /* TurboJPEG 1.4.x: TJ::planeWidth() */ +diff --git a/turbojpeg-mapfile b/turbojpeg-mapfile +index 5477fed..07a429b 100644 +--- a/turbojpeg-mapfile ++++ b/turbojpeg-mapfile +@@ -1,14 +1,14 @@ + TURBOJPEG_1.0 + { + global: +- tjInitCompress; +- tjCompress; + TJBUFSIZE; +- tjInitDecompress; +- tjDecompressHeader; ++ tjCompress; + tjDecompress; ++ tjDecompressHeader; + tjDestroy; + tjGetErrorStr; ++ tjInitCompress; ++ tjInitDecompress; + local: + *; + }; +diff --git a/turbojpeg-mapfile.jni b/turbojpeg-mapfile.jni +index 4432791..4ae25aa 100644 +--- a/turbojpeg-mapfile.jni ++++ b/turbojpeg-mapfile.jni +@@ -1,14 +1,14 @@ + TURBOJPEG_1.0 + { + global: +- tjInitCompress; +- tjCompress; + TJBUFSIZE; +- tjInitDecompress; +- tjDecompressHeader; ++ tjCompress; + tjDecompress; ++ tjDecompressHeader; + tjDestroy; + tjGetErrorStr; ++ tjInitCompress; ++ tjInitDecompress; + local: + *; + }; +diff --git a/turbojpeg.c b/turbojpeg.c +index a1544f2..b5498dc 100644 +--- a/turbojpeg.c ++++ b/turbojpeg.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (C)2009-2022 D. R. Commander. All Rights Reserved. ++ * Copyright (C)2009-2023 D. R. Commander. All Rights Reserved. + * Copyright (C)2021 Alex Richardson. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -31,6 +31,7 @@ + libjpeg-turbo */ + + #include ++#include + #include + #define JPEG_INTERNALS + #include +@@ -42,7 +43,6 @@ + #include "transupp.h" + #include "./jpegcomp.h" + #include "./cdjpeg.h" +-#include "jconfigint.h" + + extern void jpeg_mem_dest_tj(j_compress_ptr, unsigned char **, unsigned long *, + boolean); +@@ -98,7 +98,7 @@ static void my_emit_message(j_common_ptr cinfo, int msg_level) + } + + +-/* Global structures, macros, etc. */ ++/********************** Global structures, macros, etc. **********************/ + + enum { COMPRESS = 1, DECOMPRESS = 2 }; + +@@ -324,11 +324,13 @@ static void setCompDefaults(struct jpeg_compress_struct *cinfo, + else + jpeg_set_colorspace(cinfo, JCS_YCbCr); + ++#ifdef C_PROGRESSIVE_SUPPORTED + if (flags & TJFLAG_PROGRESSIVE) + jpeg_simple_progression(cinfo); + #ifndef NO_GETENV + else if (!GETENV_S(env, 7, "TJ_PROGRESSIVE") && !strcmp(env, "1")) + jpeg_simple_progression(cinfo); ++#endif + #endif + + cinfo->comp_info[0].h_samp_factor = tjMCUWidth[subsamp] / 8; +@@ -355,7 +357,7 @@ static int getSubsamp(j_decompress_ptr dinfo) + if (dinfo->num_components == 1 && dinfo->jpeg_color_space == JCS_GRAYSCALE) + return TJSAMP_GRAY; + +- for (i = 0; i < NUMSUBOPT; i++) { ++ for (i = 0; i < TJ_NUMSAMP; i++) { + if (dinfo->num_components == pixelsize[i] || + ((dinfo->jpeg_color_space == JCS_YCCK || + dinfo->jpeg_color_space == JCS_CMYK) && +@@ -424,8 +426,9 @@ static int getSubsamp(j_decompress_ptr dinfo) + } + + +-/* General API functions */ ++/*************************** General API functions ***************************/ + ++/* TurboJPEG 2.0+ */ + DLLEXPORT char *tjGetErrorStr2(tjhandle handle) + { + tjinstance *this = (tjinstance *)handle; +@@ -438,12 +441,14 @@ DLLEXPORT char *tjGetErrorStr2(tjhandle handle) + } + + ++/* TurboJPEG 1.0+ */ + DLLEXPORT char *tjGetErrorStr(void) + { + return errStr; + } + + ++/* TurboJPEG 2.0+ */ + DLLEXPORT int tjGetErrorCode(tjhandle handle) + { + tjinstance *this = (tjinstance *)handle; +@@ -453,6 +458,7 @@ DLLEXPORT int tjGetErrorCode(tjhandle handle) + } + + ++/* TurboJPEG 1.0+ */ + DLLEXPORT int tjDestroy(tjhandle handle) + { + GET_INSTANCE(handle); +@@ -470,19 +476,21 @@ DLLEXPORT int tjDestroy(tjhandle handle) + with turbojpeg.dll for compatibility reasons. However, these functions + can potentially be used for other purposes by different implementations. */ + ++/* TurboJPEG 1.2+ */ + DLLEXPORT void tjFree(unsigned char *buf) + { + free(buf); + } + + ++/* TurboJPEG 1.2+ */ + DLLEXPORT unsigned char *tjAlloc(int bytes) + { + return (unsigned char *)malloc(bytes); + } + + +-/* Compressor */ ++/******************************** Compressor *********************************/ + + static tjhandle _tjInitCompress(tjinstance *this) + { +@@ -514,6 +522,7 @@ static tjhandle _tjInitCompress(tjinstance *this) + return (tjhandle)this; + } + ++/* TurboJPEG 1.0+ */ + DLLEXPORT tjhandle tjInitCompress(void) + { + tjinstance *this = NULL; +@@ -529,12 +538,13 @@ DLLEXPORT tjhandle tjInitCompress(void) + } + + ++/* TurboJPEG 1.2+ */ + DLLEXPORT unsigned long tjBufSize(int width, int height, int jpegSubsamp) + { + unsigned long long retval = 0; + int mcuw, mcuh, chromasf; + +- if (width < 1 || height < 1 || jpegSubsamp < 0 || jpegSubsamp >= NUMSUBOPT) ++ if (width < 1 || height < 1 || jpegSubsamp < 0 || jpegSubsamp >= TJ_NUMSAMP) + THROWG("tjBufSize(): Invalid argument"); + + /* This allows for rare corner cases in which a JPEG image can actually be +@@ -551,6 +561,7 @@ bailout: + return (unsigned long)retval; + } + ++/* TurboJPEG 1.0+ */ + DLLEXPORT unsigned long TJBUFSIZE(int width, int height) + { + unsigned long long retval = 0; +@@ -570,19 +581,20 @@ bailout: + } + + +-DLLEXPORT unsigned long tjBufSizeYUV2(int width, int pad, int height, ++/* TurboJPEG 1.4+ */ ++DLLEXPORT unsigned long tjBufSizeYUV2(int width, int align, int height, + int subsamp) + { + unsigned long long retval = 0; + int nc, i; + +- if (subsamp < 0 || subsamp >= NUMSUBOPT) ++ if (align < 1 || !IS_POW2(align) || subsamp < 0 || subsamp >= TJ_NUMSAMP) + THROWG("tjBufSizeYUV2(): Invalid argument"); + + nc = (subsamp == TJSAMP_GRAY ? 1 : 3); + for (i = 0; i < nc; i++) { + int pw = tjPlaneWidth(i, width, subsamp); +- int stride = PAD(pw, pad); ++ int stride = PAD(pw, align); + int ph = tjPlaneHeight(i, height, subsamp); + + if (pw < 0 || ph < 0) return -1; +@@ -595,20 +607,24 @@ bailout: + return (unsigned long)retval; + } + ++/* TurboJPEG 1.2+ */ + DLLEXPORT unsigned long tjBufSizeYUV(int width, int height, int subsamp) + { + return tjBufSizeYUV2(width, 4, height, subsamp); + } + ++/* TurboJPEG 1.1+ */ + DLLEXPORT unsigned long TJBUFSIZEYUV(int width, int height, int subsamp) + { + return tjBufSizeYUV(width, height, subsamp); + } + + ++/* TurboJPEG 1.4+ */ + DLLEXPORT int tjPlaneWidth(int componentID, int width, int subsamp) + { +- int pw, nc, retval = 0; ++ unsigned long long pw, retval = 0; ++ int nc; + + if (width < 1 || subsamp < 0 || subsamp >= TJ_NUMSAMP) + THROWG("tjPlaneWidth(): Invalid argument"); +@@ -616,20 +632,25 @@ DLLEXPORT int tjPlaneWidth(int componentID, int width, int subsamp) + if (componentID < 0 || componentID >= nc) + THROWG("tjPlaneWidth(): Invalid argument"); + +- pw = PAD(width, tjMCUWidth[subsamp] / 8); ++ pw = PAD((unsigned long long)width, tjMCUWidth[subsamp] / 8); + if (componentID == 0) + retval = pw; + else + retval = pw * 8 / tjMCUWidth[subsamp]; + ++ if (retval > (unsigned long long)INT_MAX) ++ THROWG("tjPlaneWidth(): Width is too large"); ++ + bailout: +- return retval; ++ return (int)retval; + } + + ++/* TurboJPEG 1.4+ */ + DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp) + { +- int ph, nc, retval = 0; ++ unsigned long long ph, retval = 0; ++ int nc; + + if (height < 1 || subsamp < 0 || subsamp >= TJ_NUMSAMP) + THROWG("tjPlaneHeight(): Invalid argument"); +@@ -637,24 +658,28 @@ DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp) + if (componentID < 0 || componentID >= nc) + THROWG("tjPlaneHeight(): Invalid argument"); + +- ph = PAD(height, tjMCUHeight[subsamp] / 8); ++ ph = PAD((unsigned long long)height, tjMCUHeight[subsamp] / 8); + if (componentID == 0) + retval = ph; + else + retval = ph * 8 / tjMCUHeight[subsamp]; + ++ if (retval > (unsigned long long)INT_MAX) ++ THROWG("tjPlaneHeight(): Height is too large"); ++ + bailout: +- return retval; ++ return (int)retval; + } + + ++/* TurboJPEG 1.4+ */ + DLLEXPORT unsigned long tjPlaneSizeYUV(int componentID, int width, int stride, + int height, int subsamp) + { + unsigned long long retval = 0; + int pw, ph; + +- if (width < 1 || height < 1 || subsamp < 0 || subsamp >= NUMSUBOPT) ++ if (width < 1 || height < 1 || subsamp < 0 || subsamp >= TJ_NUMSAMP) + THROWG("tjPlaneSizeYUV(): Invalid argument"); + + pw = tjPlaneWidth(componentID, width, subsamp); +@@ -673,6 +698,7 @@ bailout: + } + + ++/* TurboJPEG 1.2+ */ + DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, int pixelFormat, + unsigned char **jpegBuf, unsigned long *jpegSize, +@@ -689,7 +715,7 @@ DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf, + + if (srcBuf == NULL || width <= 0 || pitch < 0 || height <= 0 || + pixelFormat < 0 || pixelFormat >= TJ_NUMPF || jpegBuf == NULL || +- jpegSize == NULL || jpegSubsamp < 0 || jpegSubsamp >= NUMSUBOPT || ++ jpegSize == NULL || jpegSubsamp < 0 || jpegSubsamp >= TJ_NUMSAMP || + jpegQual < 0 || jpegQual > 100) + THROW("tjCompress2(): Invalid argument"); + +@@ -741,6 +767,7 @@ bailout: + return retval; + } + ++/* TurboJPEG 1.0+ */ + DLLEXPORT int tjCompress(tjhandle handle, unsigned char *srcBuf, int width, + int pitch, int height, int pixelSize, + unsigned char *jpegBuf, unsigned long *jpegSize, +@@ -764,6 +791,7 @@ DLLEXPORT int tjCompress(tjhandle handle, unsigned char *srcBuf, int width, + } + + ++/* TurboJPEG 1.4+ */ + DLLEXPORT int tjEncodeYUVPlanes(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, + int pixelFormat, unsigned char **dstPlanes, +@@ -790,13 +818,13 @@ DLLEXPORT int tjEncodeYUVPlanes(tjhandle handle, const unsigned char *srcBuf, + + if (srcBuf == NULL || width <= 0 || pitch < 0 || height <= 0 || + pixelFormat < 0 || pixelFormat >= TJ_NUMPF || !dstPlanes || +- !dstPlanes[0] || subsamp < 0 || subsamp >= NUMSUBOPT) ++ !dstPlanes[0] || subsamp < 0 || subsamp >= TJ_NUMSAMP) + THROW("tjEncodeYUVPlanes(): Invalid argument"); + if (subsamp != TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2])) + THROW("tjEncodeYUVPlanes(): Invalid argument"); + + if (pixelFormat == TJPF_CMYK) +- THROW("tjEncodeYUVPlanes(): Cannot generate YUV images from CMYK pixels"); ++ THROW("tjEncodeYUVPlanes(): Cannot generate YUV images from packed-pixel CMYK images"); + + if (pitch == 0) pitch = width * tjPixelSize[pixelFormat]; + +@@ -922,9 +950,10 @@ bailout: + return retval; + } + ++/* TurboJPEG 1.4+ */ + DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, int pixelFormat, +- unsigned char *dstBuf, int pad, int subsamp, ++ unsigned char *dstBuf, int align, int subsamp, + int flags) + { + unsigned char *dstPlanes[3]; +@@ -934,14 +963,14 @@ DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf, + if (!this) THROWG("tjEncodeYUV3(): Invalid handle"); + this->isInstanceError = FALSE; + +- if (width <= 0 || height <= 0 || dstBuf == NULL || pad < 0 || +- !IS_POW2(pad) || subsamp < 0 || subsamp >= NUMSUBOPT) ++ if (width <= 0 || height <= 0 || dstBuf == NULL || align < 1 || ++ !IS_POW2(align) || subsamp < 0 || subsamp >= TJ_NUMSAMP) + THROW("tjEncodeYUV3(): Invalid argument"); + + pw0 = tjPlaneWidth(0, width, subsamp); + ph0 = tjPlaneHeight(0, height, subsamp); + dstPlanes[0] = dstBuf; +- strides[0] = PAD(pw0, pad); ++ strides[0] = PAD(pw0, align); + if (subsamp == TJSAMP_GRAY) { + strides[1] = strides[2] = 0; + dstPlanes[1] = dstPlanes[2] = NULL; +@@ -949,7 +978,7 @@ DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf, + int pw1 = tjPlaneWidth(1, width, subsamp); + int ph1 = tjPlaneHeight(1, height, subsamp); + +- strides[1] = strides[2] = PAD(pw1, pad); ++ strides[1] = strides[2] = PAD(pw1, align); + dstPlanes[1] = dstPlanes[0] + strides[0] * ph0; + dstPlanes[2] = dstPlanes[1] + strides[1] * ph1; + } +@@ -961,6 +990,7 @@ bailout: + return retval; + } + ++/* TurboJPEG 1.2+ */ + DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width, + int pitch, int height, int pixelFormat, + unsigned char *dstBuf, int subsamp, int flags) +@@ -969,6 +999,7 @@ DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width, + dstBuf, 4, subsamp, flags); + } + ++/* TurboJPEG 1.1+ */ + DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width, + int pitch, int height, int pixelSize, + unsigned char *dstBuf, int subsamp, int flags) +@@ -979,6 +1010,7 @@ DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width, + } + + ++/* TurboJPEG 1.4+ */ + DLLEXPORT int tjCompressFromYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, + int width, const int *strides, +@@ -1005,7 +1037,7 @@ DLLEXPORT int tjCompressFromYUVPlanes(tjhandle handle, + THROW("tjCompressFromYUVPlanes(): Instance has not been initialized for compression"); + + if (!srcPlanes || !srcPlanes[0] || width <= 0 || height <= 0 || +- subsamp < 0 || subsamp >= NUMSUBOPT || jpegBuf == NULL || ++ subsamp < 0 || subsamp >= TJ_NUMSAMP || jpegBuf == NULL || + jpegSize == NULL || jpegQual < 0 || jpegQual > 100) + THROW("tjCompressFromYUVPlanes(): Invalid argument"); + if (subsamp != TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2])) +@@ -1117,8 +1149,9 @@ bailout: + return retval; + } + ++/* TurboJPEG 1.4+ */ + DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf, +- int width, int pad, int height, int subsamp, ++ int width, int align, int height, int subsamp, + unsigned char **jpegBuf, + unsigned long *jpegSize, int jpegQual, + int flags) +@@ -1130,14 +1163,14 @@ DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf, + if (!this) THROWG("tjCompressFromYUV(): Invalid handle"); + this->isInstanceError = FALSE; + +- if (srcBuf == NULL || width <= 0 || pad < 1 || height <= 0 || subsamp < 0 || +- subsamp >= NUMSUBOPT) ++ if (srcBuf == NULL || width <= 0 || align < 1 || !IS_POW2(align) || ++ height <= 0 || subsamp < 0 || subsamp >= TJ_NUMSAMP) + THROW("tjCompressFromYUV(): Invalid argument"); + + pw0 = tjPlaneWidth(0, width, subsamp); + ph0 = tjPlaneHeight(0, height, subsamp); + srcPlanes[0] = srcBuf; +- strides[0] = PAD(pw0, pad); ++ strides[0] = PAD(pw0, align); + if (subsamp == TJSAMP_GRAY) { + strides[1] = strides[2] = 0; + srcPlanes[1] = srcPlanes[2] = NULL; +@@ -1145,7 +1178,7 @@ DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf, + int pw1 = tjPlaneWidth(1, width, subsamp); + int ph1 = tjPlaneHeight(1, height, subsamp); + +- strides[1] = strides[2] = PAD(pw1, pad); ++ strides[1] = strides[2] = PAD(pw1, align); + srcPlanes[1] = srcPlanes[0] + strides[0] * ph0; + srcPlanes[2] = srcPlanes[1] + strides[1] * ph1; + } +@@ -1158,7 +1191,7 @@ bailout: + } + + +-/* Decompressor */ ++/******************************* Decompressor ********************************/ + + static tjhandle _tjInitDecompress(tjinstance *this) + { +@@ -1188,6 +1221,7 @@ static tjhandle _tjInitDecompress(tjinstance *this) + return (tjhandle)this; + } + ++/* TurboJPEG 1.0+ */ + DLLEXPORT tjhandle tjInitDecompress(void) + { + tjinstance *this; +@@ -1203,6 +1237,7 @@ DLLEXPORT tjhandle tjInitDecompress(void) + } + + ++/* TurboJPEG 1.4+ */ + DLLEXPORT int tjDecompressHeader3(tjhandle handle, + const unsigned char *jpegBuf, + unsigned long jpegSize, int *width, +@@ -1259,6 +1294,7 @@ bailout: + return retval; + } + ++/* TurboJPEG 1.1+ */ + DLLEXPORT int tjDecompressHeader2(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, int *width, + int *height, int *jpegSubsamp) +@@ -1269,6 +1305,7 @@ DLLEXPORT int tjDecompressHeader2(tjhandle handle, unsigned char *jpegBuf, + jpegSubsamp, &jpegColorspace); + } + ++/* TurboJPEG 1.0+ */ + DLLEXPORT int tjDecompressHeader(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, int *width, + int *height) +@@ -1280,19 +1317,21 @@ DLLEXPORT int tjDecompressHeader(tjhandle handle, unsigned char *jpegBuf, + } + + +-DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numscalingfactors) ++/* TurboJPEG 1.2+ */ ++DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numScalingFactors) + { +- if (numscalingfactors == NULL) { ++ if (numScalingFactors == NULL) { + SNPRINTF(errStr, JMSG_LENGTH_MAX, + "tjGetScalingFactors(): Invalid argument"); + return NULL; + } + +- *numscalingfactors = NUMSF; ++ *numScalingFactors = NUMSF; + return (tjscalingfactor *)sf; + } + + ++/* TurboJPEG 1.2+ */ + DLLEXPORT int tjDecompress2(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, +@@ -1380,6 +1419,7 @@ bailout: + return retval; + } + ++/* TurboJPEG 1.0+ */ + DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelSize, +@@ -1393,8 +1433,8 @@ DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf, + } + + +-static int setDecodeDefaults(struct jpeg_decompress_struct *dinfo, +- int pixelFormat, int subsamp, int flags) ++static void setDecodeDefaults(struct jpeg_decompress_struct *dinfo, ++ int pixelFormat, int subsamp, int flags) + { + int i; + +@@ -1429,8 +1469,6 @@ static int setDecodeDefaults(struct jpeg_decompress_struct *dinfo, + if (dinfo->quant_tbl_ptrs[i] == NULL) + dinfo->quant_tbl_ptrs[i] = jpeg_alloc_quant_table((j_common_ptr)dinfo); + } +- +- return 0; + } + + +@@ -1443,6 +1481,7 @@ static void my_reset_marker_reader(j_decompress_ptr dinfo) + { + } + ++/* TurboJPEG 1.4+ */ + DLLEXPORT int tjDecodeYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, + const int *strides, int subsamp, +@@ -1468,7 +1507,7 @@ DLLEXPORT int tjDecodeYUVPlanes(tjhandle handle, + if ((this->init & DECOMPRESS) == 0) + THROW("tjDecodeYUVPlanes(): Instance has not been initialized for decompression"); + +- if (!srcPlanes || !srcPlanes[0] || subsamp < 0 || subsamp >= NUMSUBOPT || ++ if (!srcPlanes || !srcPlanes[0] || subsamp < 0 || subsamp >= TJ_NUMSAMP || + dstBuf == NULL || width <= 0 || pitch < 0 || height <= 0 || + pixelFormat < 0 || pixelFormat >= TJ_NUMPF) + THROW("tjDecodeYUVPlanes(): Invalid argument"); +@@ -1481,7 +1520,7 @@ DLLEXPORT int tjDecodeYUVPlanes(tjhandle handle, + } + + if (pixelFormat == TJPF_CMYK) +- THROW("tjDecodeYUVPlanes(): Cannot decode YUV images into CMYK pixels."); ++ THROW("tjDecodeYUVPlanes(): Cannot decode YUV images into packed-pixel CMYK images."); + + if (pitch == 0) pitch = width * tjPixelSize[pixelFormat]; + dinfo->image_width = width; +@@ -1496,9 +1535,7 @@ DLLEXPORT int tjDecodeYUVPlanes(tjhandle handle, + dinfo->progressive_mode = dinfo->inputctl->has_multiple_scans = FALSE; + dinfo->Ss = dinfo->Ah = dinfo->Al = 0; + dinfo->Se = DCTSIZE2 - 1; +- if (setDecodeDefaults(dinfo, pixelFormat, subsamp, flags) == -1) { +- retval = -1; goto bailout; +- } ++ setDecodeDefaults(dinfo, pixelFormat, subsamp, flags); + old_read_markers = dinfo->marker->read_markers; + dinfo->marker->read_markers = my_read_markers; + old_reset_marker_reader = dinfo->marker->reset_marker_reader; +@@ -1591,8 +1628,9 @@ bailout: + return retval; + } + ++/* TurboJPEG 1.4+ */ + DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, +- int pad, int subsamp, unsigned char *dstBuf, ++ int align, int subsamp, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags) + { +@@ -1603,14 +1641,14 @@ DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, + if (!this) THROWG("tjDecodeYUV(): Invalid handle"); + this->isInstanceError = FALSE; + +- if (srcBuf == NULL || pad < 0 || !IS_POW2(pad) || subsamp < 0 || +- subsamp >= NUMSUBOPT || width <= 0 || height <= 0) ++ if (srcBuf == NULL || align < 1 || !IS_POW2(align) || subsamp < 0 || ++ subsamp >= TJ_NUMSAMP || width <= 0 || height <= 0) + THROW("tjDecodeYUV(): Invalid argument"); + + pw0 = tjPlaneWidth(0, width, subsamp); + ph0 = tjPlaneHeight(0, height, subsamp); + srcPlanes[0] = srcBuf; +- strides[0] = PAD(pw0, pad); ++ strides[0] = PAD(pw0, align); + if (subsamp == TJSAMP_GRAY) { + strides[1] = strides[2] = 0; + srcPlanes[1] = srcPlanes[2] = NULL; +@@ -1618,7 +1656,7 @@ DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, + int pw1 = tjPlaneWidth(1, width, subsamp); + int ph1 = tjPlaneHeight(1, height, subsamp); + +- strides[1] = strides[2] = PAD(pw1, pad); ++ strides[1] = strides[2] = PAD(pw1, align); + srcPlanes[1] = srcPlanes[0] + strides[0] * ph0; + srcPlanes[2] = srcPlanes[1] + strides[1] * ph1; + } +@@ -1630,6 +1668,7 @@ bailout: + return retval; + } + ++/* TurboJPEG 1.4+ */ + DLLEXPORT int tjDecompressToYUVPlanes(tjhandle handle, + const unsigned char *jpegBuf, + unsigned long jpegSize, +@@ -1763,7 +1802,7 @@ DLLEXPORT int tjDecompressToYUVPlanes(tjhandle handle, + for (i = 0; i < dinfo->num_components; i++) { + jpeg_component_info *compptr = &dinfo->comp_info[i]; + +- if (jpegSubsamp == TJ_420) { ++ if (jpegSubsamp == TJSAMP_420) { + /* When 4:2:0 subsampling is used with IDCT scaling, libjpeg will try + to be clever and use the IDCT to perform upsampling on the U and V + planes. For instance, if the output image is to be scaled by 1/2 +@@ -1810,9 +1849,10 @@ bailout: + return retval; + } + ++/* TurboJPEG 1.4+ */ + DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, +- int width, int pad, int height, int flags) ++ int width, int align, int height, int flags) + { + unsigned char *dstPlanes[3]; + int pw0, ph0, strides[3], retval = -1, jpegSubsamp = -1; +@@ -1822,7 +1862,7 @@ DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf, + this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE; + + if (jpegBuf == NULL || jpegSize <= 0 || dstBuf == NULL || width < 0 || +- pad < 1 || !IS_POW2(pad) || height < 0) ++ align < 1 || !IS_POW2(align) || height < 0) + THROW("tjDecompressToYUV2(): Invalid argument"); + + if (setjmp(this->jerr.setjmp_buffer)) { +@@ -1839,7 +1879,6 @@ DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf, + jpegwidth = dinfo->image_width; jpegheight = dinfo->image_height; + if (width == 0) width = jpegwidth; + if (height == 0) height = jpegheight; +- + for (i = 0; i < NUMSF; i++) { + scaledw = TJSCALED(jpegwidth, sf[i]); + scaledh = TJSCALED(jpegheight, sf[i]); +@@ -1849,10 +1888,12 @@ DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf, + if (i >= NUMSF) + THROW("tjDecompressToYUV2(): Could not scale down to desired image dimensions"); + ++ width = scaledw; height = scaledh; ++ + pw0 = tjPlaneWidth(0, width, jpegSubsamp); + ph0 = tjPlaneHeight(0, height, jpegSubsamp); + dstPlanes[0] = dstBuf; +- strides[0] = PAD(pw0, pad); ++ strides[0] = PAD(pw0, align); + if (jpegSubsamp == TJSAMP_GRAY) { + strides[1] = strides[2] = 0; + dstPlanes[1] = dstPlanes[2] = NULL; +@@ -1860,7 +1901,7 @@ DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf, + int pw1 = tjPlaneWidth(1, width, jpegSubsamp); + int ph1 = tjPlaneHeight(1, height, jpegSubsamp); + +- strides[1] = strides[2] = PAD(pw1, pad); ++ strides[1] = strides[2] = PAD(pw1, align); + dstPlanes[1] = dstPlanes[0] + strides[0] * ph0; + dstPlanes[2] = dstPlanes[1] + strides[1] * ph1; + } +@@ -1874,6 +1915,7 @@ bailout: + return retval; + } + ++/* TurboJPEG 1.1+ */ + DLLEXPORT int tjDecompressToYUV(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int flags) +@@ -1882,8 +1924,9 @@ DLLEXPORT int tjDecompressToYUV(tjhandle handle, unsigned char *jpegBuf, + } + + +-/* Transformer */ ++/******************************** Transformer ********************************/ + ++/* TurboJPEG 1.2+ */ + DLLEXPORT tjhandle tjInitTransform(void) + { + tjinstance *this = NULL; +@@ -1903,6 +1946,7 @@ DLLEXPORT tjhandle tjInitTransform(void) + } + + ++/* TurboJPEG 1.2+ */ + DLLEXPORT int tjTransform(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, int n, + unsigned char **dstBufs, unsigned long *dstSizes, +@@ -2013,8 +2057,10 @@ DLLEXPORT int tjTransform(tjhandle handle, const unsigned char *jpegBuf, + jpeg_mem_dest_tj(cinfo, &dstBufs[i], &dstSizes[i], alloc); + jpeg_copy_critical_parameters(dinfo, cinfo); + dstcoefs = jtransform_adjust_parameters(dinfo, cinfo, srccoefs, &xinfo[i]); ++#ifdef C_PROGRESSIVE_SUPPORTED + if (flags & TJFLAG_PROGRESSIVE || t[i].options & TJXOPT_PROGRESSIVE) + jpeg_simple_progression(cinfo); ++#endif + if (!(t[i].options & TJXOPT_NOOUTPUT)) { + jpeg_write_coefficients(cinfo, dstcoefs); + jcopy_markers_execute(dinfo, cinfo, t[i].options & TJXOPT_COPYNONE ? +@@ -2069,6 +2115,9 @@ bailout: + } + + ++/*************************** Packed-Pixel Image I/O **************************/ ++ ++/* TurboJPEG 2.0+ */ + DLLEXPORT unsigned char *tjLoadImage(const char *filename, int *width, + int align, int *height, int *pixelFormat, + int flags) +@@ -2118,7 +2167,7 @@ DLLEXPORT unsigned char *tjLoadImage(const char *filename, int *width, + invert = (flags & TJFLAG_BOTTOMUP) == 0; + } else if (tempc == 'P') { + if ((src = jinit_read_ppm(cinfo)) == NULL) +- THROWG("tjLoadImage(): Could not initialize bitmap loader"); ++ THROWG("tjLoadImage(): Could not initialize PPM loader"); + invert = (flags & TJFLAG_BOTTOMUP) != 0; + } else + THROWG("tjLoadImage(): Unsupported file type"); +@@ -2171,6 +2220,7 @@ bailout: + } + + ++/* TurboJPEG 2.0+ */ + DLLEXPORT int tjSaveImage(const char *filename, unsigned char *buffer, + int width, int pitch, int height, int pixelFormat, + int flags) +diff --git a/turbojpeg.h b/turbojpeg.h +index 02b54ca..1f8756a 100644 +--- a/turbojpeg.h ++++ b/turbojpeg.h +@@ -1,6 +1,6 @@ + /* +- * Copyright (C)2009-2015, 2017, 2020-2021 D. R. Commander. +- * All Rights Reserved. ++ * Copyright (C)2009-2015, 2017, 2020-2021, 2023 D. R. Commander. ++ * All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: +@@ -54,23 +54,24 @@ + * Each plane is simply a 2D array of bytes, each byte representing the value + * of one of the components (Y, Cb, or Cr) at a particular location in the + * image. The width and height of each plane are determined by the image +- * width, height, and level of chrominance subsampling. The luminance plane ++ * width, height, and level of chrominance subsampling. The luminance plane + * width is the image width padded to the nearest multiple of the horizontal +- * subsampling factor (2 in the case of 4:2:0 and 4:2:2, 4 in the case of +- * 4:1:1, 1 in the case of 4:4:4 or grayscale.) Similarly, the luminance plane +- * height is the image height padded to the nearest multiple of the vertical +- * subsampling factor (2 in the case of 4:2:0 or 4:4:0, 1 in the case of 4:4:4 +- * or grayscale.) This is irrespective of any additional padding that may be +- * specified as an argument to the various YUV functions. The chrominance +- * plane width is equal to the luminance plane width divided by the horizontal +- * subsampling factor, and the chrominance plane height is equal to the +- * luminance plane height divided by the vertical subsampling factor. ++ * subsampling factor (1 in the case of 4:4:4, grayscale, or 4:4:0; 2 in the ++ * case of 4:2:2 or 4:2:0; 4 in the case of 4:1:1.) Similarly, the luminance ++ * plane height is the image height padded to the nearest multiple of the ++ * vertical subsampling factor (1 in the case of 4:4:4, 4:2:2, grayscale, or ++ * 4:1:1; 2 in the case of 4:2:0 or 4:4:0.) This is irrespective of any ++ * additional padding that may be specified as an argument to the various YUV ++ * functions. The chrominance plane width is equal to the luminance plane ++ * width divided by the horizontal subsampling factor, and the chrominance ++ * plane height is equal to the luminance plane height divided by the vertical ++ * subsampling factor. + * + * For example, if the source image is 35 x 35 pixels and 4:2:2 subsampling is + * used, then the luminance plane would be 36 x 35 bytes, and each of the +- * chrominance planes would be 18 x 35 bytes. If you specify a line padding of +- * 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes, and +- * each of the chrominance planes would be 20 x 35 bytes. ++ * chrominance planes would be 18 x 35 bytes. If you specify a row alignment ++ * of 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes, ++ * and each of the chrominance planes would be 20 x 35 bytes. + * + * @{ + */ +@@ -86,8 +87,8 @@ + * When pixels are converted from RGB to YCbCr (see #TJCS_YCbCr) or from CMYK + * to YCCK (see #TJCS_YCCK) as part of the JPEG compression process, some of + * the Cb and Cr (chrominance) components can be discarded or averaged together +- * to produce a smaller image with little perceptible loss of image clarity +- * (the human eye is more sensitive to small changes in brightness than to ++ * to produce a smaller image with little perceptible loss of image clarity. ++ * (The human eye is more sensitive to small changes in brightness than to + * small changes in color.) This is called "chrominance subsampling". + */ + enum TJSAMP { +@@ -245,8 +246,8 @@ enum TJPF { + * vice versa, but the mapping is typically not 1:1 or reversible, nor can it + * be defined with a simple formula. Thus, such a conversion is out of scope + * for a codec library. However, the TurboJPEG API allows for compressing +- * CMYK pixels into a YCCK JPEG image (see #TJCS_YCCK) and decompressing YCCK +- * JPEG images into CMYK pixels. ++ * packed-pixel CMYK images into YCCK JPEG images (see #TJCS_YCCK) and ++ * decompressing YCCK JPEG images into packed-pixel CMYK images. + */ + TJPF_CMYK, + /** +@@ -258,9 +259,10 @@ enum TJPF { + /** + * Red offset (in bytes) for a given pixel format. This specifies the number + * of bytes that the red component is offset from the start of the pixel. For +- * instance, if a pixel of format TJ_BGRX is stored in char pixel[], +- * then the red component will be pixel[tjRedOffset[TJ_BGRX]]. This +- * will be -1 if the pixel format does not have a red component. ++ * instance, if a pixel of format TJPF_BGRX is stored in ++ * `unsigned char pixel[]`, then the red component will be ++ *`pixel[tjRedOffset[TJPF_BGRX]]`. This will be -1 if the pixel format does ++ * not have a red component. + */ + static const int tjRedOffset[TJ_NUMPF] = { + 0, 2, 0, 2, 3, 1, -1, 0, 2, 3, 1, -1 +@@ -268,31 +270,32 @@ static const int tjRedOffset[TJ_NUMPF] = { + /** + * Green offset (in bytes) for a given pixel format. This specifies the number + * of bytes that the green component is offset from the start of the pixel. +- * For instance, if a pixel of format TJ_BGRX is stored in +- * char pixel[], then the green component will be +- * pixel[tjGreenOffset[TJ_BGRX]]. This will be -1 if the pixel format +- * does not have a green component. ++ * For instance, if a pixel of format TJPF_BGRX is stored in ++ * `unsigned char pixel[]`, then the green component will be ++ * `pixel[tjGreenOffset[TJPF_BGRX]]`. This will be -1 if the pixel format does ++ * not have a green component. + */ + static const int tjGreenOffset[TJ_NUMPF] = { + 1, 1, 1, 1, 2, 2, -1, 1, 1, 2, 2, -1 + }; + /** + * Blue offset (in bytes) for a given pixel format. This specifies the number +- * of bytes that the Blue component is offset from the start of the pixel. For +- * instance, if a pixel of format TJ_BGRX is stored in char pixel[], +- * then the blue component will be pixel[tjBlueOffset[TJ_BGRX]]. This +- * will be -1 if the pixel format does not have a blue component. ++ * of bytes that the blue component is offset from the start of the pixel. For ++ * instance, if a pixel of format TJPF_BGRX is stored in ++ * `unsigned char pixel[]`, then the blue component will be ++ * `pixel[tjBlueOffset[TJPF_BGRX]]`. This will be -1 if the pixel format does ++ * not have a blue component. + */ + static const int tjBlueOffset[TJ_NUMPF] = { + 2, 0, 2, 0, 1, 3, -1, 2, 0, 1, 3, -1 + }; + /** + * Alpha offset (in bytes) for a given pixel format. This specifies the number +- * of bytes that the Alpha component is offset from the start of the pixel. +- * For instance, if a pixel of format TJ_BGRA is stored in +- * char pixel[], then the alpha component will be +- * pixel[tjAlphaOffset[TJ_BGRA]]. This will be -1 if the pixel format +- * does not have an alpha component. ++ * of bytes that the alpha component is offset from the start of the pixel. ++ * For instance, if a pixel of format TJPF_BGRA is stored in ++ * `unsigned char pixel[]`, then the alpha component will be ++ * `pixel[tjAlphaOffset[TJPF_BGRA]]`. This will be -1 if the pixel format does ++ * not have an alpha component. + */ + static const int tjAlphaOffset[TJ_NUMPF] = { + -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1 +@@ -318,8 +321,9 @@ enum TJCS { + * RGB colorspace. When compressing the JPEG image, the R, G, and B + * components in the source image are reordered into image planes, but no + * colorspace conversion or subsampling is performed. RGB JPEG images can be +- * decompressed to any of the extended RGB pixel formats or grayscale, but +- * they cannot be decompressed to YUV images. ++ * decompressed to packed-pixel images with any of the extended RGB or ++ * grayscale pixel formats, but they cannot be decompressed to planar YUV ++ * images. + */ + TJCS_RGB = 0, + /** +@@ -332,25 +336,27 @@ enum TJCS { + * original image. Originally, the analog equivalent of this transformation + * allowed the same signal to drive both black & white and color televisions, + * but JPEG images use YCbCr primarily because it allows the color data to be +- * optionally subsampled for the purposes of reducing bandwidth or disk +- * space. YCbCr is the most common JPEG colorspace, and YCbCr JPEG images +- * can be compressed from and decompressed to any of the extended RGB pixel +- * formats or grayscale, or they can be decompressed to YUV planar images. ++ * optionally subsampled for the purposes of reducing network or disk usage. ++ * YCbCr is the most common JPEG colorspace, and YCbCr JPEG images can be ++ * compressed from and decompressed to packed-pixel images with any of the ++ * extended RGB or grayscale pixel formats. YCbCr JPEG images can also be ++ * compressed from and decompressed to planar YUV images. + */ + TJCS_YCbCr, + /** + * Grayscale colorspace. The JPEG image retains only the luminance data (Y + * component), and any color data from the source image is discarded. +- * Grayscale JPEG images can be compressed from and decompressed to any of +- * the extended RGB pixel formats or grayscale, or they can be decompressed +- * to YUV planar images. ++ * Grayscale JPEG images can be compressed from and decompressed to ++ * packed-pixel images with any of the extended RGB or grayscale pixel ++ * formats, or they can be compressed from and decompressed to planar YUV ++ * images. + */ + TJCS_GRAY, + /** + * CMYK colorspace. When compressing the JPEG image, the C, M, Y, and K + * components in the source image are reordered into image planes, but no + * colorspace conversion or subsampling is performed. CMYK JPEG images can +- * only be decompressed to CMYK pixels. ++ * only be decompressed to packed-pixel images with the CMYK pixel format. + */ + TJCS_CMYK, + /** +@@ -360,56 +366,54 @@ enum TJCS { + * reversibly transformed into YCCK, and as with YCbCr, the chrominance + * components in the YCCK pixels can be subsampled without incurring major + * perceptual loss. YCCK JPEG images can only be compressed from and +- * decompressed to CMYK pixels. ++ * decompressed to packed-pixel images with the CMYK pixel format. + */ + TJCS_YCCK + }; + + + /** +- * The uncompressed source/destination image is stored in bottom-up (Windows, +- * OpenGL) order, not top-down (X11) order. ++ * Rows in the packed-pixel source/destination image are stored in bottom-up ++ * (Windows, OpenGL) order rather than in top-down (X11) order. + */ + #define TJFLAG_BOTTOMUP 2 + /** + * When decompressing an image that was compressed using chrominance +- * subsampling, use the fastest chrominance upsampling algorithm available in +- * the underlying codec. The default is to use smooth upsampling, which +- * creates a smooth transition between neighboring chrominance components in +- * order to reduce upsampling artifacts in the decompressed image. ++ * subsampling, use the fastest chrominance upsampling algorithm available. ++ * The default is to use smooth upsampling, which creates a smooth transition ++ * between neighboring chrominance components in order to reduce upsampling ++ * artifacts in the decompressed image. + */ + #define TJFLAG_FASTUPSAMPLE 256 + /** +- * Disable buffer (re)allocation. If passed to one of the JPEG compression or +- * transform functions, this flag will cause those functions to generate an +- * error if the JPEG image buffer is invalid or too small rather than +- * attempting to allocate or reallocate that buffer. This reproduces the +- * behavior of earlier versions of TurboJPEG. ++ * Disable JPEG buffer (re)allocation. If passed to one of the JPEG ++ * compression or transform functions, this flag will cause those functions to ++ * generate an error if the JPEG destination buffer is invalid or too small, ++ * rather than attempt to allocate or reallocate that buffer. + */ + #define TJFLAG_NOREALLOC 1024 + /** +- * Use the fastest DCT/IDCT algorithm available in the underlying codec. The +- * default if this flag is not specified is implementation-specific. For +- * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast +- * algorithm by default when compressing, because this has been shown to have +- * only a very slight effect on accuracy, but it uses the accurate algorithm +- * when decompressing, because this has been shown to have a larger effect. ++ * Use the fastest DCT/IDCT algorithm available. The default if this flag is ++ * not specified is implementation-specific. For example, the implementation ++ * of the TurboJPEG API in libjpeg-turbo uses the fast algorithm by default ++ * when compressing, because this has been shown to have only a very slight ++ * effect on accuracy, but it uses the accurate algorithm when decompressing, ++ * because this has been shown to have a larger effect. + */ + #define TJFLAG_FASTDCT 2048 + /** +- * Use the most accurate DCT/IDCT algorithm available in the underlying codec. +- * The default if this flag is not specified is implementation-specific. For +- * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast +- * algorithm by default when compressing, because this has been shown to have +- * only a very slight effect on accuracy, but it uses the accurate algorithm +- * when decompressing, because this has been shown to have a larger effect. ++ * Use the most accurate DCT/IDCT algorithm available. The default if this ++ * flag is not specified is implementation-specific. For example, the ++ * implementation of the TurboJPEG API in libjpeg-turbo uses the fast algorithm ++ * by default when compressing, because this has been shown to have only a very ++ * slight effect on accuracy, but it uses the accurate algorithm when ++ * decompressing, because this has been shown to have a larger effect. + */ + #define TJFLAG_ACCURATEDCT 4096 + /** + * Immediately discontinue the current compression/decompression/transform +- * operation if the underlying codec throws a warning (non-fatal error). The +- * default behavior is to allow the operation to complete unless a fatal error +- * is encountered. ++ * operation if a warning (non-fatal error) occurs. The default behavior is to ++ * allow the operation to complete unless a fatal error is encountered. + */ + #define TJFLAG_STOPONWARNING 8192 + /** +@@ -441,8 +445,8 @@ enum TJCS { + */ + enum TJERR { + /** +- * The error was non-fatal and recoverable, but the image may still be +- * corrupt. ++ * The error was non-fatal and recoverable, but the destination image may ++ * still be corrupt. + */ + TJERR_WARNING = 0, + /** +@@ -509,9 +513,9 @@ enum TJXOP { + /** + * This option will cause #tjTransform() to return an error if the transform is + * not perfect. Lossless transforms operate on MCU blocks, whose size depends +- * on the level of chrominance subsampling used (see #tjMCUWidth +- * and #tjMCUHeight.) If the image's width or height is not evenly divisible +- * by the MCU block size, then there will be partial MCU blocks on the right ++ * on the level of chrominance subsampling used (see #tjMCUWidth and ++ * #tjMCUHeight.) If the image's width or height is not evenly divisible by ++ * the MCU block size, then there will be partial MCU blocks on the right + * and/or bottom edges. It is not possible to move these partial MCU blocks to + * the top or left of the image, so any transform that would require that is + * "imperfect." If this option is not specified, then any partial MCU blocks +@@ -530,29 +534,28 @@ enum TJXOP { + */ + #define TJXOPT_CROP 4 + /** +- * This option will discard the color data in the input image and produce +- * a grayscale output image. ++ * This option will discard the color data in the source image and produce a ++ * grayscale destination image. + */ + #define TJXOPT_GRAY 8 + /** + * This option will prevent #tjTransform() from outputting a JPEG image for +- * this particular transform (this can be used in conjunction with a custom ++ * this particular transform. (This can be used in conjunction with a custom + * filter to capture the transformed DCT coefficients without transcoding + * them.) + */ + #define TJXOPT_NOOUTPUT 16 + /** +- * This option will enable progressive entropy coding in the output image ++ * This option will enable progressive entropy coding in the JPEG image + * generated by this particular transform. Progressive entropy coding will + * generally improve compression relative to baseline entropy coding (the +- * default), but it will reduce compression and decompression performance +- * considerably. ++ * default), but it will reduce decompression performance considerably. + */ + #define TJXOPT_PROGRESSIVE 32 + /** + * This option will prevent #tjTransform() from copying any extra markers +- * (including EXIF and ICC profile data) from the source image to the output +- * image. ++ * (including EXIF and ICC profile data) from the source image to the ++ * destination image. + */ + #define TJXOPT_COPYNONE 64 + +@@ -586,12 +589,12 @@ typedef struct { + */ + int y; + /** +- * The width of the cropping region. Setting this to 0 is the equivalent of ++ * The width of the cropping region. Setting this to 0 is the equivalent of + * setting it to the width of the source JPEG image - x. + */ + int w; + /** +- * The height of the cropping region. Setting this to 0 is the equivalent of ++ * The height of the cropping region. Setting this to 0 is the equivalent of + * setting it to the height of the source JPEG image - y. + */ + int h; +@@ -610,7 +613,8 @@ typedef struct tjtransform { + */ + int op; + /** +- * The bitwise OR of one of more of the @ref TJXOPT_CROP "transform options" ++ * The bitwise OR of one of more of the @ref TJXOPT_COPYNONE ++ * "transform options" + */ + int options; + /** +@@ -619,10 +623,10 @@ typedef struct tjtransform { + */ + void *data; + /** +- * A callback function that can be used to modify the DCT coefficients +- * after they are losslessly transformed but before they are transcoded to a +- * new JPEG image. This allows for custom filters or other transformations +- * to be applied in the frequency domain. ++ * A callback function that can be used to modify the DCT coefficients after ++ * they are losslessly transformed but before they are transcoded to a new ++ * JPEG image. This allows for custom filters or other transformations to be ++ * applied in the frequency domain. + * + * @param coeffs pointer to an array of transformed DCT coefficients. (NOTE: + * this pointer is not guaranteed to be valid once the callback returns, so +@@ -630,21 +634,21 @@ typedef struct tjtransform { + * or library should make a copy of them within the body of the callback.) + * + * @param arrayRegion #tjregion structure containing the width and height of +- * the array pointed to by coeffs as well as its offset relative to +- * the component plane. TurboJPEG implementations may choose to split each ++ * the array pointed to by `coeffs` as well as its offset relative to the ++ * component plane. TurboJPEG implementations may choose to split each + * component plane into multiple DCT coefficient arrays and call the callback + * function once for each array. + * + * @param planeRegion #tjregion structure containing the width and height of +- * the component plane to which coeffs belongs ++ * the component plane to which `coeffs` belongs + * +- * @param componentID ID number of the component plane to which +- * coeffs belongs (Y, Cb, and Cr have, respectively, ID's of 0, 1, +- * and 2 in typical JPEG images.) ++ * @param componentID ID number of the component plane to which `coeffs` ++ * belongs. (Y, Cb, and Cr have, respectively, ID's of 0, 1, and 2 in ++ * typical JPEG images.) + * +- * @param transformID ID number of the transformed image to which +- * coeffs belongs. This is the same as the index of the transform +- * in the transforms array that was passed to #tjTransform(). ++ * @param transformID ID number of the transformed image to which `coeffs` ++ * belongs. This is the same as the index of the transform in the ++ * `transforms` array that was passed to #tjTransform(). + * + * @param transform a pointer to a #tjtransform structure that specifies the + * parameters and/or cropping region for this transform +@@ -663,14 +667,14 @@ typedef void *tjhandle; + + + /** +- * Pad the given width to the nearest 32-bit boundary ++ * Pad the given width to the nearest multiple of 4 + */ + #define TJPAD(width) (((width) + 3) & (~3)) + + /** +- * Compute the scaled value of dimension using the given scaling +- * factor. This macro performs the integer equivalent of ceil(dimension * +- * scalingFactor). ++ * Compute the scaled value of `dimension` using the given scaling factor. ++ * This macro performs the integer equivalent of `ceil(dimension * ++ * scalingFactor)`. + */ + #define TJSCALED(dimension, scalingFactor) \ + (((dimension) * scalingFactor.num + scalingFactor.denom - 1) / \ +@@ -685,27 +689,27 @@ extern "C" { + /** + * Create a TurboJPEG compressor instance. + * +- * @return a handle to the newly-created instance, or NULL if an error +- * occurred (see #tjGetErrorStr2().) ++ * @return a handle to the newly-created instance, or NULL if an error occurred ++ * (see #tjGetErrorStr2().) + */ + DLLEXPORT tjhandle tjInitCompress(void); + + + /** +- * Compress an RGB, grayscale, or CMYK image into a JPEG image. ++ * Compress a packed-pixel RGB, grayscale, or CMYK image into a JPEG image. + * + * @param handle a handle to a TurboJPEG compressor or transformer instance + * +- * @param srcBuf pointer to an image buffer containing RGB, grayscale, or +- * CMYK pixels to be compressed ++ * @param srcBuf pointer to a buffer containing a packed-pixel RGB, grayscale, ++ * or CMYK source image to be compressed + * + * @param width width (in pixels) of the source image + * +- * @param pitch bytes per line in the source image. Normally, this should be +- * width * #tjPixelSize[pixelFormat] if the image is unpadded, or +- * #TJPAD(width * #tjPixelSize[pixelFormat]) if each line of the image +- * is padded to the nearest 32-bit boundary, as is the case for Windows +- * bitmaps. You can also be clever and use this parameter to skip lines, etc. ++ * @param pitch bytes per row in the source image. Normally this should be ++ * width * #tjPixelSize[pixelFormat], if the image is unpadded, or ++ * #TJPAD(width * #tjPixelSize[pixelFormat]) if each row of the image ++ * is padded to the nearest multiple of 4 bytes, as is the case for Windows ++ * bitmaps. You can also be clever and use this parameter to skip rows, etc. + * Setting this parameter to 0 is the equivalent of setting it to + * width * #tjPixelSize[pixelFormat]. + * +@@ -714,29 +718,28 @@ DLLEXPORT tjhandle tjInitCompress(void); + * @param pixelFormat pixel format of the source image (see @ref TJPF + * "Pixel formats".) + * +- * @param jpegBuf address of a pointer to an image buffer that will receive the +- * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer +- * to accommodate the size of the JPEG image. Thus, you can choose to: ++ * @param jpegBuf address of a pointer to a byte buffer that will receive the ++ * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to ++ * accommodate the size of the JPEG image. Thus, you can choose to: + * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and + * let TurboJPEG grow the buffer as needed, +- * -# set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer +- * for you, or ++ * -# set `*jpegBuf` to NULL to tell TurboJPEG to allocate the buffer for you, ++ * or + * -# pre-allocate the buffer to a "worst case" size determined by calling + * #tjBufSize(). This should ensure that the buffer never has to be +- * re-allocated (setting #TJFLAG_NOREALLOC guarantees that it won't be.) ++ * re-allocated. (Setting #TJFLAG_NOREALLOC guarantees that it won't be.) + * . +- * If you choose option 1, *jpegSize should be set to the size of your ++ * If you choose option 1, then `*jpegSize` should be set to the size of your + * pre-allocated buffer. In any case, unless you have set #TJFLAG_NOREALLOC, +- * you should always check *jpegBuf upon return from this function, as +- * it may have changed. ++ * you should always check `*jpegBuf` upon return from this function, as it may ++ * have changed. + * + * @param jpegSize pointer to an unsigned long variable that holds the size of +- * the JPEG image buffer. If *jpegBuf points to a pre-allocated +- * buffer, then *jpegSize should be set to the size of the buffer. +- * Upon return, *jpegSize will contain the size of the JPEG image (in +- * bytes.) If *jpegBuf points to a JPEG image buffer that is being +- * reused from a previous call to one of the JPEG compression functions, then +- * *jpegSize is ignored. ++ * the JPEG buffer. If `*jpegBuf` points to a pre-allocated buffer, then ++ * `*jpegSize` should be set to the size of the buffer. Upon return, ++ * `*jpegSize` will contain the size of the JPEG image (in bytes.) If ++ * `*jpegBuf` points to a JPEG buffer that is being reused from a previous call ++ * to one of the JPEG compression functions, then `*jpegSize` is ignored. + * + * @param jpegSubsamp the level of chrominance subsampling to be used when + * generating the JPEG image (see @ref TJSAMP +@@ -750,7 +753,7 @@ DLLEXPORT tjhandle tjInitCompress(void); + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) +-*/ ++ */ + DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, int pixelFormat, + unsigned char **jpegBuf, unsigned long *jpegSize, +@@ -758,55 +761,55 @@ DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf, + + + /** +- * Compress a YUV planar image into a JPEG image. ++ * Compress a unified planar YUV image into a JPEG image. + * + * @param handle a handle to a TurboJPEG compressor or transformer instance + * +- * @param srcBuf pointer to an image buffer containing a YUV planar image to be +- * compressed. The size of this buffer should match the value returned by +- * #tjBufSizeYUV2() for the given image width, height, padding, and level of +- * chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be +- * stored sequentially in the source buffer (refer to @ref YUVnotes +- * "YUV Image Format Notes".) ++ * @param srcBuf pointer to a buffer containing a unified planar YUV source ++ * image to be compressed. The size of this buffer should match the value ++ * returned by #tjBufSizeYUV2() for the given image width, height, row ++ * alignment, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) ++ * image planes should be stored sequentially in the buffer. (Refer to ++ * @ref YUVnotes "YUV Image Format Notes".) + * + * @param width width (in pixels) of the source image. If the width is not an + * even multiple of the MCU block width (see #tjMCUWidth), then an intermediate +- * buffer copy will be performed within TurboJPEG. ++ * buffer copy will be performed. + * +- * @param pad the line padding used in the source image. For instance, if each +- * line in each plane of the YUV image is padded to the nearest multiple of 4 +- * bytes, then pad should be set to 4. ++ * @param align row alignment (in bytes) of the source image (must be a power ++ * of 2.) Setting this parameter to n indicates that each row in each plane of ++ * the source image is padded to the nearest multiple of n bytes ++ * (1 = unpadded.) + * + * @param height height (in pixels) of the source image. If the height is not + * an even multiple of the MCU block height (see #tjMCUHeight), then an +- * intermediate buffer copy will be performed within TurboJPEG. ++ * intermediate buffer copy will be performed. + * +- * @param subsamp the level of chrominance subsampling used in the source +- * image (see @ref TJSAMP "Chrominance subsampling options".) ++ * @param subsamp the level of chrominance subsampling used in the source image ++ * (see @ref TJSAMP "Chrominance subsampling options".) + * +- * @param jpegBuf address of a pointer to an image buffer that will receive the ++ * @param jpegBuf address of a pointer to a byte buffer that will receive the + * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to + * accommodate the size of the JPEG image. Thus, you can choose to: + * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and + * let TurboJPEG grow the buffer as needed, +- * -# set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer +- * for you, or ++ * -# set `*jpegBuf` to NULL to tell TurboJPEG to allocate the buffer for you, ++ * or + * -# pre-allocate the buffer to a "worst case" size determined by calling + * #tjBufSize(). This should ensure that the buffer never has to be +- * re-allocated (setting #TJFLAG_NOREALLOC guarantees that it won't be.) ++ * re-allocated. (Setting #TJFLAG_NOREALLOC guarantees that it won't be.) + * . +- * If you choose option 1, *jpegSize should be set to the size of your ++ * If you choose option 1, then `*jpegSize` should be set to the size of your + * pre-allocated buffer. In any case, unless you have set #TJFLAG_NOREALLOC, +- * you should always check *jpegBuf upon return from this function, as +- * it may have changed. ++ * you should always check `*jpegBuf` upon return from this function, as it may ++ * have changed. + * + * @param jpegSize pointer to an unsigned long variable that holds the size of +- * the JPEG image buffer. If *jpegBuf points to a pre-allocated +- * buffer, then *jpegSize should be set to the size of the buffer. +- * Upon return, *jpegSize will contain the size of the JPEG image (in +- * bytes.) If *jpegBuf points to a JPEG image buffer that is being +- * reused from a previous call to one of the JPEG compression functions, then +- * *jpegSize is ignored. ++ * the JPEG buffer. If `*jpegBuf` points to a pre-allocated buffer, then ++ * `*jpegSize` should be set to the size of the buffer. Upon return, ++ * `*jpegSize` will contain the size of the JPEG image (in bytes.) If ++ * `*jpegBuf` points to a JPEG buffer that is being reused from a previous call ++ * to one of the JPEG compression functions, then `*jpegSize` is ignored. + * + * @param jpegQual the image quality of the generated JPEG image (1 = worst, + * 100 = best) +@@ -816,9 +819,9 @@ DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf, + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) +-*/ ++ */ + DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf, +- int width, int pad, int height, int subsamp, ++ int width, int align, int height, int subsamp, + unsigned char **jpegBuf, + unsigned long *jpegSize, int jpegQual, + int flags); +@@ -831,55 +834,54 @@ DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf, + * + * @param srcPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes + * (or just a Y plane, if compressing a grayscale image) that contain a YUV +- * image to be compressed. These planes can be contiguous or non-contiguous in +- * memory. The size of each plane should match the value returned by +- * #tjPlaneSizeYUV() for the given image width, height, strides, and level of +- * chrominance subsampling. Refer to @ref YUVnotes "YUV Image Format Notes" +- * for more details. ++ * source image to be compressed. These planes can be contiguous or ++ * non-contiguous in memory. The size of each plane should match the value ++ * returned by #tjPlaneSizeYUV() for the given image width, height, strides, ++ * and level of chrominance subsampling. Refer to @ref YUVnotes ++ * "YUV Image Format Notes" for more details. + * + * @param width width (in pixels) of the source image. If the width is not an + * even multiple of the MCU block width (see #tjMCUWidth), then an intermediate +- * buffer copy will be performed within TurboJPEG. ++ * buffer copy will be performed. + * + * @param strides an array of integers, each specifying the number of bytes per +- * line in the corresponding plane of the YUV source image. Setting the stride ++ * row in the corresponding plane of the YUV source image. Setting the stride + * for any plane to 0 is the same as setting it to the plane width (see +- * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then +- * the strides for all planes will be set to their respective plane widths. +- * You can adjust the strides in order to specify an arbitrary amount of line ++ * @ref YUVnotes "YUV Image Format Notes".) If `strides` is NULL, then the ++ * strides for all planes will be set to their respective plane widths. You ++ * can adjust the strides in order to specify an arbitrary amount of row + * padding in each plane or to create a JPEG image from a subregion of a larger +- * YUV planar image. ++ * planar YUV image. + * + * @param height height (in pixels) of the source image. If the height is not + * an even multiple of the MCU block height (see #tjMCUHeight), then an +- * intermediate buffer copy will be performed within TurboJPEG. ++ * intermediate buffer copy will be performed. + * +- * @param subsamp the level of chrominance subsampling used in the source +- * image (see @ref TJSAMP "Chrominance subsampling options".) ++ * @param subsamp the level of chrominance subsampling used in the source image ++ * (see @ref TJSAMP "Chrominance subsampling options".) + * +- * @param jpegBuf address of a pointer to an image buffer that will receive the ++ * @param jpegBuf address of a pointer to a byte buffer that will receive the + * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to + * accommodate the size of the JPEG image. Thus, you can choose to: + * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and + * let TurboJPEG grow the buffer as needed, +- * -# set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer +- * for you, or ++ * -# set `*jpegBuf` to NULL to tell TurboJPEG to allocate the buffer for you, ++ * or + * -# pre-allocate the buffer to a "worst case" size determined by calling + * #tjBufSize(). This should ensure that the buffer never has to be +- * re-allocated (setting #TJFLAG_NOREALLOC guarantees that it won't be.) ++ * re-allocated. (Setting #TJFLAG_NOREALLOC guarantees that it won't be.) + * . +- * If you choose option 1, *jpegSize should be set to the size of your ++ * If you choose option 1, then `*jpegSize` should be set to the size of your + * pre-allocated buffer. In any case, unless you have set #TJFLAG_NOREALLOC, +- * you should always check *jpegBuf upon return from this function, as +- * it may have changed. ++ * you should always check `*jpegBuf` upon return from this function, as it may ++ * have changed. + * + * @param jpegSize pointer to an unsigned long variable that holds the size of +- * the JPEG image buffer. If *jpegBuf points to a pre-allocated +- * buffer, then *jpegSize should be set to the size of the buffer. +- * Upon return, *jpegSize will contain the size of the JPEG image (in +- * bytes.) If *jpegBuf points to a JPEG image buffer that is being +- * reused from a previous call to one of the JPEG compression functions, then +- * *jpegSize is ignored. ++ * the JPEG buffer. If `*jpegBuf` points to a pre-allocated buffer, then ++ * `*jpegSize` should be set to the size of the buffer. Upon return, ++ * `*jpegSize` will contain the size of the JPEG image (in bytes.) If ++ * `*jpegBuf` points to a JPEG buffer that is being reused from a previous call ++ * to one of the JPEG compression functions, then `*jpegSize` is ignored. + * + * @param jpegQual the image quality of the generated JPEG image (1 = worst, + * 100 = best) +@@ -889,7 +891,7 @@ DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf, + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) +-*/ ++ */ + DLLEXPORT int tjCompressFromYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, + int width, const int *strides, +@@ -903,11 +905,11 @@ DLLEXPORT int tjCompressFromYUVPlanes(tjhandle handle, + * The maximum size of the buffer (in bytes) required to hold a JPEG image with + * the given parameters. The number of bytes returned by this function is + * larger than the size of the uncompressed source image. The reason for this +- * is that the JPEG format uses 16-bit coefficients, and it is thus possible +- * for a very high-quality JPEG image with very high-frequency content to +- * expand rather than compress when converted to the JPEG format. Such images +- * represent a very rare corner case, but since there is no way to predict the +- * size of a JPEG image prior to compression, the corner case has to be ++ * is that the JPEG format uses 16-bit coefficients, so it is possible for a ++ * very high-quality source image with very high-frequency content to expand ++ * rather than compress when converted to the JPEG format. Such images ++ * represent very rare corner cases, but since there is no way to predict the ++ * size of a JPEG image prior to compression, the corner cases have to be + * handled. + * + * @param width width (in pixels) of the image +@@ -925,23 +927,24 @@ DLLEXPORT unsigned long tjBufSize(int width, int height, int jpegSubsamp); + + + /** +- * The size of the buffer (in bytes) required to hold a YUV planar image with +- * the given parameters. ++ * The size of the buffer (in bytes) required to hold a unified planar YUV ++ * image with the given parameters. + * + * @param width width (in pixels) of the image + * +- * @param pad the width of each line in each plane of the image is padded to +- * the nearest multiple of this number of bytes (must be a power of 2.) ++ * @param align row alignment (in bytes) of the image (must be a power of 2.) ++ * Setting this parameter to n specifies that each row in each plane of the ++ * image will be padded to the nearest multiple of n bytes (1 = unpadded.) + * + * @param height height (in pixels) of the image + * + * @param subsamp level of chrominance subsampling in the image (see + * @ref TJSAMP "Chrominance subsampling options".) + * +- * @return the size of the buffer (in bytes) required to hold the image, or +- * -1 if the arguments are out of bounds. ++ * @return the size of the buffer (in bytes) required to hold the image, or -1 ++ * if the arguments are out of bounds. + */ +-DLLEXPORT unsigned long tjBufSizeYUV2(int width, int pad, int height, ++DLLEXPORT unsigned long tjBufSizeYUV2(int width, int align, int height, + int subsamp); + + +@@ -954,7 +957,7 @@ DLLEXPORT unsigned long tjBufSizeYUV2(int width, int pad, int height, + * @param width width (in pixels) of the YUV image. NOTE: this is the width of + * the whole image, not the plane width. + * +- * @param stride bytes per line in the image plane. Setting this to 0 is the ++ * @param stride bytes per row in the image plane. Setting this to 0 is the + * equivalent of setting it to the plane width. + * + * @param height height (in pixels) of the YUV image. NOTE: this is the height +@@ -1005,23 +1008,23 @@ DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp); + + + /** +- * Encode an RGB or grayscale image into a YUV planar image. This function +- * uses the accelerated color conversion routines in the underlying +- * codec but does not execute any of the other steps in the JPEG compression +- * process. ++ * Encode a packed-pixel RGB or grayscale image into a unified planar YUV ++ * image. This function performs color conversion (which is accelerated in the ++ * libjpeg-turbo implementation) but does not execute any of the other steps in ++ * the JPEG compression process. + * + * @param handle a handle to a TurboJPEG compressor or transformer instance + * +- * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels +- * to be encoded ++ * @param srcBuf pointer to a buffer containing a packed-pixel RGB or grayscale ++ * source image to be encoded + * + * @param width width (in pixels) of the source image + * +- * @param pitch bytes per line in the source image. Normally, this should be +- * width * #tjPixelSize[pixelFormat] if the image is unpadded, or +- * #TJPAD(width * #tjPixelSize[pixelFormat]) if each line of the image +- * is padded to the nearest 32-bit boundary, as is the case for Windows +- * bitmaps. You can also be clever and use this parameter to skip lines, etc. ++ * @param pitch bytes per row in the source image. Normally this should be ++ * width * #tjPixelSize[pixelFormat], if the image is unpadded, or ++ * #TJPAD(width * #tjPixelSize[pixelFormat]) if each row of the image ++ * is padded to the nearest multiple of 4 bytes, as is the case for Windows ++ * bitmaps. You can also be clever and use this parameter to skip rows, etc. + * Setting this parameter to 0 is the equivalent of setting it to + * width * #tjPixelSize[pixelFormat]. + * +@@ -1030,53 +1033,54 @@ DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp); + * @param pixelFormat pixel format of the source image (see @ref TJPF + * "Pixel formats".) + * +- * @param dstBuf pointer to an image buffer that will receive the YUV image. +- * Use #tjBufSizeYUV2() to determine the appropriate size for this buffer based +- * on the image width, height, padding, and level of chrominance subsampling. +- * The Y, U (Cb), and V (Cr) image planes will be stored sequentially in the +- * buffer (refer to @ref YUVnotes "YUV Image Format Notes".) ++ * @param dstBuf pointer to a buffer that will receive the unified planar YUV ++ * image. Use #tjBufSizeYUV2() to determine the appropriate size for this ++ * buffer based on the image width, height, row alignment, and level of ++ * chrominance subsampling. The Y, U (Cb), and V (Cr) image planes will be ++ * stored sequentially in the buffer. (Refer to @ref YUVnotes ++ * "YUV Image Format Notes".) + * +- * @param pad the width of each line in each plane of the YUV image will be +- * padded to the nearest multiple of this number of bytes (must be a power of +- * 2.) To generate images suitable for X Video, pad should be set to +- * 4. ++ * @param align row alignment (in bytes) of the YUV image (must be a power of ++ * 2.) Setting this parameter to n will cause each row in each plane of the ++ * YUV image to be padded to the nearest multiple of n bytes (1 = unpadded.) ++ * To generate images suitable for X Video, `align` should be set to 4. + * + * @param subsamp the level of chrominance subsampling to be used when + * generating the YUV image (see @ref TJSAMP + * "Chrominance subsampling options".) To generate images suitable for X +- * Video, subsamp should be set to @ref TJSAMP_420. This produces an +- * image compatible with the I420 (AKA "YUV420P") format. ++ * Video, `subsamp` should be set to @ref TJSAMP_420. This produces an image ++ * compatible with the I420 (AKA "YUV420P") format. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) +-*/ ++ */ + DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, int pixelFormat, +- unsigned char *dstBuf, int pad, int subsamp, ++ unsigned char *dstBuf, int align, int subsamp, + int flags); + + + /** +- * Encode an RGB or grayscale image into separate Y, U (Cb), and V (Cr) image +- * planes. This function uses the accelerated color conversion routines in the +- * underlying codec but does not execute any of the other steps in the JPEG +- * compression process. ++ * Encode a packed-pixel RGB or grayscale image into separate Y, U (Cb), and ++ * V (Cr) image planes. This function performs color conversion (which is ++ * accelerated in the libjpeg-turbo implementation) but does not execute any of ++ * the other steps in the JPEG compression process. + * + * @param handle a handle to a TurboJPEG compressor or transformer instance + * +- * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels +- * to be encoded ++ * @param srcBuf pointer to a buffer containing a packed-pixel RGB or grayscale ++ * source image to be encoded + * + * @param width width (in pixels) of the source image + * +- * @param pitch bytes per line in the source image. Normally, this should be +- * width * #tjPixelSize[pixelFormat] if the image is unpadded, or +- * #TJPAD(width * #tjPixelSize[pixelFormat]) if each line of the image +- * is padded to the nearest 32-bit boundary, as is the case for Windows +- * bitmaps. You can also be clever and use this parameter to skip lines, etc. ++ * @param pitch bytes per row in the source image. Normally this should be ++ * width * #tjPixelSize[pixelFormat], if the image is unpadded, or ++ * #TJPAD(width * #tjPixelSize[pixelFormat]) if each row of the image ++ * is padded to the nearest multiple of 4 bytes, as is the case for Windows ++ * bitmaps. You can also be clever and use this parameter to skip rows, etc. + * Setting this parameter to 0 is the equivalent of setting it to + * width * #tjPixelSize[pixelFormat]. + * +@@ -1093,26 +1097,26 @@ DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf, + * Refer to @ref YUVnotes "YUV Image Format Notes" for more details. + * + * @param strides an array of integers, each specifying the number of bytes per +- * line in the corresponding plane of the output image. Setting the stride for +- * any plane to 0 is the same as setting it to the plane width (see +- * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then +- * the strides for all planes will be set to their respective plane widths. +- * You can adjust the strides in order to add an arbitrary amount of line +- * padding to each plane or to encode an RGB or grayscale image into a +- * subregion of a larger YUV planar image. ++ * row in the corresponding plane of the YUV image. Setting the stride for any ++ * plane to 0 is the same as setting it to the plane width (see @ref YUVnotes ++ * "YUV Image Format Notes".) If `strides` is NULL, then the strides for all ++ * planes will be set to their respective plane widths. You can adjust the ++ * strides in order to add an arbitrary amount of row padding to each plane or ++ * to encode an RGB or grayscale image into a subregion of a larger planar YUV ++ * image. + * + * @param subsamp the level of chrominance subsampling to be used when + * generating the YUV image (see @ref TJSAMP + * "Chrominance subsampling options".) To generate images suitable for X +- * Video, subsamp should be set to @ref TJSAMP_420. This produces an +- * image compatible with the I420 (AKA "YUV420P") format. ++ * Video, `subsamp` should be set to @ref TJSAMP_420. This produces an image ++ * compatible with the I420 (AKA "YUV420P") format. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT + * "flags" + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) +-*/ ++ */ + DLLEXPORT int tjEncodeYUVPlanes(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, + int pixelFormat, unsigned char **dstPlanes, +@@ -1122,9 +1126,9 @@ DLLEXPORT int tjEncodeYUVPlanes(tjhandle handle, const unsigned char *srcBuf, + /** + * Create a TurboJPEG decompressor instance. + * +- * @return a handle to the newly-created instance, or NULL if an error +- * occurred (see #tjGetErrorStr2().) +-*/ ++ * @return a handle to the newly-created instance, or NULL if an error occurred ++ * (see #tjGetErrorStr2().) ++ */ + DLLEXPORT tjhandle tjInitDecompress(void); + + +@@ -1134,7 +1138,7 @@ DLLEXPORT tjhandle tjInitDecompress(void); + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * +- * @param jpegBuf pointer to a buffer containing a JPEG image or an ++ * @param jpegBuf pointer to a byte buffer containing a JPEG image or an + * "abbreviated table specification" (AKA "tables-only") datastream. Passing a + * tables-only datastream to this function primes the decompressor with + * quantization and Huffman tables that can be used when decompressing +@@ -1145,26 +1149,26 @@ DLLEXPORT tjhandle tjInitDecompress(void); + * @param jpegSize size of the JPEG image or tables-only datastream (in bytes) + * + * @param width pointer to an integer variable that will receive the width (in +- * pixels) of the JPEG image. If jpegBuf points to a tables-only +- * datastream, then width is ignored. ++ * pixels) of the JPEG image. If `jpegBuf` points to a tables-only datastream, ++ * then `width` is ignored. + * + * @param height pointer to an integer variable that will receive the height +- * (in pixels) of the JPEG image. If jpegBuf points to a tables-only +- * datastream, then height is ignored. ++ * (in pixels) of the JPEG image. If `jpegBuf` points to a tables-only ++ * datastream, then `height` is ignored. + * + * @param jpegSubsamp pointer to an integer variable that will receive the + * level of chrominance subsampling used when the JPEG image was compressed +- * (see @ref TJSAMP "Chrominance subsampling options".) If jpegBuf +- * points to a tables-only datastream, then jpegSubsamp is ignored. ++ * (see @ref TJSAMP "Chrominance subsampling options".) If `jpegBuf` points to ++ * a tables-only datastream, then `jpegSubsamp` is ignored. + * + * @param jpegColorspace pointer to an integer variable that will receive one + * of the JPEG colorspace constants, indicating the colorspace of the JPEG +- * image (see @ref TJCS "JPEG colorspaces".) If jpegBuf +- * points to a tables-only datastream, then jpegColorspace is ignored. ++ * image (see @ref TJCS "JPEG colorspaces".) If `jpegBuf` points to a ++ * tables-only datastream, then `jpegColorspace` is ignored. + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) +-*/ ++ */ + DLLEXPORT int tjDecompressHeader3(tjhandle handle, + const unsigned char *jpegBuf, + unsigned long jpegSize, int *width, +@@ -1173,58 +1177,60 @@ DLLEXPORT int tjDecompressHeader3(tjhandle handle, + + + /** +- * Returns a list of fractional scaling factors that the JPEG decompressor in +- * this implementation of TurboJPEG supports. ++ * Returns a list of fractional scaling factors that the JPEG decompressor ++ * supports. + * +- * @param numscalingfactors pointer to an integer variable that will receive ++ * @param numScalingFactors pointer to an integer variable that will receive + * the number of elements in the list + * + * @return a pointer to a list of fractional scaling factors, or NULL if an + * error is encountered (see #tjGetErrorStr2().) +-*/ +-DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numscalingfactors); ++ */ ++DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numScalingFactors); + + + /** +- * Decompress a JPEG image to an RGB, grayscale, or CMYK image. ++ * Decompress a JPEG image into a packed-pixel RGB, grayscale, or CMYK image. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * +- * @param jpegBuf pointer to a buffer containing the JPEG image to decompress ++ * @param jpegBuf pointer to a byte buffer containing the JPEG image to ++ * decompress + * + * @param jpegSize size of the JPEG image (in bytes) + * +- * @param dstBuf pointer to an image buffer that will receive the decompressed +- * image. This buffer should normally be pitch * scaledHeight bytes +- * in size, where scaledHeight can be determined by calling +- * #TJSCALED() with the JPEG image height and one of the scaling factors +- * returned by #tjGetScalingFactors(). The dstBuf pointer may also be +- * used to decompress into a specific region of a larger buffer. ++ * @param dstBuf pointer to a buffer that will receive the packed-pixel ++ * decompressed image. This buffer should normally be `pitch * scaledHeight` ++ * bytes in size, where `scaledHeight` can be determined by calling #TJSCALED() ++ * with the JPEG image height and one of the scaling factors returned by ++ * #tjGetScalingFactors(). The `dstBuf` pointer may also be used to decompress ++ * into a specific region of a larger buffer. + * + * @param width desired width (in pixels) of the destination image. If this is + * different than the width of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest +- * possible image that will fit within the desired width. If width is +- * set to 0, then only the height will be considered when determining the +- * scaled image size. +- * +- * @param pitch bytes per line in the destination image. Normally, this is +- * scaledWidth * #tjPixelSize[pixelFormat] if the decompressed image +- * is unpadded, else #TJPAD(scaledWidth * #tjPixelSize[pixelFormat]) +- * if each line of the decompressed image is padded to the nearest 32-bit +- * boundary, as is the case for Windows bitmaps. (NOTE: scaledWidth +- * can be determined by calling #TJSCALED() with the JPEG image width and one +- * of the scaling factors returned by #tjGetScalingFactors().) You can also be +- * clever and use the pitch parameter to skip lines, etc. Setting this +- * parameter to 0 is the equivalent of setting it to ++ * possible image that will fit within the desired width. If `width` is set to ++ * 0, then only the height will be considered when determining the scaled image ++ * size. ++ * ++ * @param pitch bytes per row in the destination image. Normally this should ++ * be set to scaledWidth * #tjPixelSize[pixelFormat], if the ++ * destination image should be unpadded, or ++ * #TJPAD(scaledWidth * #tjPixelSize[pixelFormat]) if each row of the ++ * destination image should be padded to the nearest multiple of 4 bytes, as is ++ * the case for Windows bitmaps. (NOTE: `scaledWidth` can be determined by ++ * calling #TJSCALED() with the JPEG image width and one of the scaling factors ++ * returned by #tjGetScalingFactors().) You can also be clever and use the ++ * pitch parameter to skip rows, etc. Setting this parameter to 0 is the ++ * equivalent of setting it to + * scaledWidth * #tjPixelSize[pixelFormat]. + * + * @param height desired height (in pixels) of the destination image. If this + * is different than the height of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest +- * possible image that will fit within the desired height. If height +- * is set to 0, then only the width will be considered when determining the +- * scaled image size. ++ * possible image that will fit within the desired height. If `height` is set ++ * to 0, then only the width will be considered when determining the scaled ++ * image size. + * + * @param pixelFormat pixel format of the destination image (see @ref + * TJPF "Pixel formats".) +@@ -1242,44 +1248,45 @@ DLLEXPORT int tjDecompress2(tjhandle handle, const unsigned char *jpegBuf, + + + /** +- * Decompress a JPEG image to a YUV planar image. This function performs JPEG +- * decompression but leaves out the color conversion step, so a planar YUV +- * image is generated instead of an RGB image. ++ * Decompress a JPEG image into a unified planar YUV image. This function ++ * performs JPEG decompression but leaves out the color conversion step, so a ++ * planar YUV image is generated instead of a packed-pixel image. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * +- * @param jpegBuf pointer to a buffer containing the JPEG image to decompress ++ * @param jpegBuf pointer to a byte buffer containing the JPEG image to ++ * decompress + * + * @param jpegSize size of the JPEG image (in bytes) + * +- * @param dstBuf pointer to an image buffer that will receive the YUV image. +- * Use #tjBufSizeYUV2() to determine the appropriate size for this buffer based +- * on the image width, height, padding, and level of subsampling. The Y, +- * U (Cb), and V (Cr) image planes will be stored sequentially in the buffer +- * (refer to @ref YUVnotes "YUV Image Format Notes".) ++ * @param dstBuf pointer to a buffer that will receive the unified planar YUV ++ * decompressed image. Use #tjBufSizeYUV2() to determine the appropriate size ++ * for this buffer based on the scaled image width, scaled image height, row ++ * alignment, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) ++ * image planes will be stored sequentially in the buffer. (Refer to ++ * @ref YUVnotes "YUV Image Format Notes".) + * + * @param width desired width (in pixels) of the YUV image. If this is + * different than the width of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest +- * possible image that will fit within the desired width. If width is +- * set to 0, then only the height will be considered when determining the +- * scaled image size. If the scaled width is not an even multiple of the MCU +- * block width (see #tjMCUWidth), then an intermediate buffer copy will be +- * performed within TurboJPEG. ++ * possible image that will fit within the desired width. If `width` is set to ++ * 0, then only the height will be considered when determining the scaled image ++ * size. If the scaled width is not an even multiple of the MCU block width ++ * (see #tjMCUWidth), then an intermediate buffer copy will be performed. + * +- * @param pad the width of each line in each plane of the YUV image will be +- * padded to the nearest multiple of this number of bytes (must be a power of +- * 2.) To generate images suitable for X Video, pad should be set to +- * 4. ++ * @param align row alignment (in bytes) of the YUV image (must be a power of ++ * 2.) Setting this parameter to n will cause each row in each plane of the ++ * YUV image to be padded to the nearest multiple of n bytes (1 = unpadded.) ++ * To generate images suitable for X Video, `align` should be set to 4. + * + * @param height desired height (in pixels) of the YUV image. If this is + * different than the height of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest +- * possible image that will fit within the desired height. If height +- * is set to 0, then only the width will be considered when determining the +- * scaled image size. If the scaled height is not an even multiple of the MCU +- * block height (see #tjMCUHeight), then an intermediate buffer copy will be +- * performed within TurboJPEG. ++ * possible image that will fit within the desired height. If `height` is set ++ * to 0, then only the width will be considered when determining the scaled ++ * image size. If the scaled height is not an even multiple of the MCU block ++ * height (see #tjMCUHeight), then an intermediate buffer copy will be ++ * performed. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT + * "flags" +@@ -1289,54 +1296,55 @@ DLLEXPORT int tjDecompress2(tjhandle handle, const unsigned char *jpegBuf, + */ + DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, +- int width, int pad, int height, int flags); ++ int width, int align, int height, int flags); + + + /** + * Decompress a JPEG image into separate Y, U (Cb), and V (Cr) image + * planes. This function performs JPEG decompression but leaves out the color +- * conversion step, so a planar YUV image is generated instead of an RGB image. ++ * conversion step, so a planar YUV image is generated instead of a ++ * packed-pixel image. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * +- * @param jpegBuf pointer to a buffer containing the JPEG image to decompress ++ * @param jpegBuf pointer to a byte buffer containing the JPEG image to ++ * decompress + * + * @param jpegSize size of the JPEG image (in bytes) + * + * @param dstPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes + * (or just a Y plane, if decompressing a grayscale image) that will receive +- * the YUV image. These planes can be contiguous or non-contiguous in memory. +- * Use #tjPlaneSizeYUV() to determine the appropriate size for each plane based +- * on the scaled image width, scaled image height, strides, and level of +- * chrominance subsampling. Refer to @ref YUVnotes "YUV Image Format Notes" +- * for more details. ++ * the decompressed image. These planes can be contiguous or non-contiguous in ++ * memory. Use #tjPlaneSizeYUV() to determine the appropriate size for each ++ * plane based on the scaled image width, scaled image height, strides, and ++ * level of chrominance subsampling. Refer to @ref YUVnotes ++ * "YUV Image Format Notes" for more details. + * + * @param width desired width (in pixels) of the YUV image. If this is + * different than the width of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest +- * possible image that will fit within the desired width. If width is +- * set to 0, then only the height will be considered when determining the +- * scaled image size. If the scaled width is not an even multiple of the MCU +- * block width (see #tjMCUWidth), then an intermediate buffer copy will be +- * performed within TurboJPEG. ++ * possible image that will fit within the desired width. If `width` is set to ++ * 0, then only the height will be considered when determining the scaled image ++ * size. If the scaled width is not an even multiple of the MCU block width ++ * (see #tjMCUWidth), then an intermediate buffer copy will be performed. + * + * @param strides an array of integers, each specifying the number of bytes per +- * line in the corresponding plane of the output image. Setting the stride for +- * any plane to 0 is the same as setting it to the scaled plane width (see +- * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then +- * the strides for all planes will be set to their respective scaled plane +- * widths. You can adjust the strides in order to add an arbitrary amount of +- * line padding to each plane or to decompress the JPEG image into a subregion +- * of a larger YUV planar image. ++ * row in the corresponding plane of the YUV image. Setting the stride for any ++ * plane to 0 is the same as setting it to the scaled plane width (see ++ * @ref YUVnotes "YUV Image Format Notes".) If `strides` is NULL, then the ++ * strides for all planes will be set to their respective scaled plane widths. ++ * You can adjust the strides in order to add an arbitrary amount of row ++ * padding to each plane or to decompress the JPEG image into a subregion of a ++ * larger planar YUV image. + * + * @param height desired height (in pixels) of the YUV image. If this is + * different than the height of the JPEG image being decompressed, then + * TurboJPEG will use scaling in the JPEG decompressor to generate the largest +- * possible image that will fit within the desired height. If height +- * is set to 0, then only the width will be considered when determining the +- * scaled image size. If the scaled height is not an even multiple of the MCU +- * block height (see #tjMCUHeight), then an intermediate buffer copy will be +- * performed within TurboJPEG. ++ * possible image that will fit within the desired height. If `height` is set ++ * to 0, then only the width will be considered when determining the scaled ++ * image size. If the scaled height is not an even multiple of the MCU block ++ * height (see #tjMCUHeight), then an intermediate buffer copy will be ++ * performed. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT + * "flags" +@@ -1352,40 +1360,42 @@ DLLEXPORT int tjDecompressToYUVPlanes(tjhandle handle, + + + /** +- * Decode a YUV planar image into an RGB or grayscale image. This function +- * uses the accelerated color conversion routines in the underlying +- * codec but does not execute any of the other steps in the JPEG decompression +- * process. ++ * Decode a unified planar YUV image into a packed-pixel RGB or grayscale ++ * image. This function performs color conversion (which is accelerated in the ++ * libjpeg-turbo implementation) but does not execute any of the other steps in ++ * the JPEG decompression process. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * +- * @param srcBuf pointer to an image buffer containing a YUV planar image to be +- * decoded. The size of this buffer should match the value returned by +- * #tjBufSizeYUV2() for the given image width, height, padding, and level of +- * chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be +- * stored sequentially in the source buffer (refer to @ref YUVnotes +- * "YUV Image Format Notes".) ++ * @param srcBuf pointer to a buffer containing a unified planar YUV source ++ * image to be decoded. The size of this buffer should match the value ++ * returned by #tjBufSizeYUV2() for the given image width, height, row ++ * alignment, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) ++ * image planes should be stored sequentially in the source buffer. (Refer to ++ * @ref YUVnotes "YUV Image Format Notes".) + * +- * @param pad Use this parameter to specify that the width of each line in each +- * plane of the YUV source image is padded to the nearest multiple of this +- * number of bytes (must be a power of 2.) ++ * @param align row alignment (in bytes) of the YUV source image (must be a ++ * power of 2.) Setting this parameter to n indicates that each row in each ++ * plane of the YUV source image is padded to the nearest multiple of n bytes ++ * (1 = unpadded.) + * + * @param subsamp the level of chrominance subsampling used in the YUV source + * image (see @ref TJSAMP "Chrominance subsampling options".) + * +- * @param dstBuf pointer to an image buffer that will receive the decoded +- * image. This buffer should normally be pitch * height bytes in +- * size, but the dstBuf pointer can also be used to decode into a +- * specific region of a larger buffer. ++ * @param dstBuf pointer to a buffer that will receive the packed-pixel decoded ++ * image. This buffer should normally be `pitch * height` bytes in size, but ++ * the `dstBuf` pointer can also be used to decode into a specific region of a ++ * larger buffer. + * + * @param width width (in pixels) of the source and destination images + * +- * @param pitch bytes per line in the destination image. Normally, this should +- * be width * #tjPixelSize[pixelFormat] if the destination image is +- * unpadded, or #TJPAD(width * #tjPixelSize[pixelFormat]) if each line +- * of the destination image should be padded to the nearest 32-bit boundary, as +- * is the case for Windows bitmaps. You can also be clever and use the pitch +- * parameter to skip lines, etc. Setting this parameter to 0 is the equivalent ++ * @param pitch bytes per row in the destination image. Normally this should ++ * be set to width * #tjPixelSize[pixelFormat], if the destination ++ * image should be unpadded, or ++ * #TJPAD(width * #tjPixelSize[pixelFormat]) if each row of the ++ * destination image should be padded to the nearest multiple of 4 bytes, as is ++ * the case for Windows bitmaps. You can also be clever and use the pitch ++ * parameter to skip rows, etc. Setting this parameter to 0 is the equivalent + * of setting it to width * #tjPixelSize[pixelFormat]. + * + * @param height height (in pixels) of the source and destination images +@@ -1400,16 +1410,16 @@ DLLEXPORT int tjDecompressToYUVPlanes(tjhandle handle, + * and #tjGetErrorCode().) + */ + DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, +- int pad, int subsamp, unsigned char *dstBuf, ++ int align, int subsamp, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags); + + + /** +- * Decode a set of Y, U (Cb), and V (Cr) image planes into an RGB or grayscale +- * image. This function uses the accelerated color conversion routines in the +- * underlying codec but does not execute any of the other steps in the JPEG +- * decompression process. ++ * Decode a set of Y, U (Cb), and V (Cr) image planes into a packed-pixel RGB ++ * or grayscale image. This function performs color conversion (which is ++ * accelerated in the libjpeg-turbo implementation) but does not execute any of ++ * the other steps in the JPEG decompression process. + * + * @param handle a handle to a TurboJPEG decompressor or transformer instance + * +@@ -1422,29 +1432,30 @@ DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, + * details. + * + * @param strides an array of integers, each specifying the number of bytes per +- * line in the corresponding plane of the YUV source image. Setting the stride ++ * row in the corresponding plane of the YUV source image. Setting the stride + * for any plane to 0 is the same as setting it to the plane width (see +- * @ref YUVnotes "YUV Image Format Notes".) If strides is NULL, then +- * the strides for all planes will be set to their respective plane widths. +- * You can adjust the strides in order to specify an arbitrary amount of line +- * padding in each plane or to decode a subregion of a larger YUV planar image. ++ * @ref YUVnotes "YUV Image Format Notes".) If `strides` is NULL, then the ++ * strides for all planes will be set to their respective plane widths. You ++ * can adjust the strides in order to specify an arbitrary amount of row ++ * padding in each plane or to decode a subregion of a larger planar YUV image. + * + * @param subsamp the level of chrominance subsampling used in the YUV source + * image (see @ref TJSAMP "Chrominance subsampling options".) + * +- * @param dstBuf pointer to an image buffer that will receive the decoded +- * image. This buffer should normally be pitch * height bytes in +- * size, but the dstBuf pointer can also be used to decode into a +- * specific region of a larger buffer. ++ * @param dstBuf pointer to a buffer that will receive the packed-pixel decoded ++ * image. This buffer should normally be `pitch * height` bytes in size, but ++ * the `dstBuf` pointer can also be used to decode into a specific region of a ++ * larger buffer. + * + * @param width width (in pixels) of the source and destination images + * +- * @param pitch bytes per line in the destination image. Normally, this should +- * be width * #tjPixelSize[pixelFormat] if the destination image is +- * unpadded, or #TJPAD(width * #tjPixelSize[pixelFormat]) if each line +- * of the destination image should be padded to the nearest 32-bit boundary, as +- * is the case for Windows bitmaps. You can also be clever and use the pitch +- * parameter to skip lines, etc. Setting this parameter to 0 is the equivalent ++ * @param pitch bytes per row in the destination image. Normally this should ++ * be set to width * #tjPixelSize[pixelFormat], if the destination ++ * image should be unpadded, or ++ * #TJPAD(width * #tjPixelSize[pixelFormat]) if each row of the ++ * destination image should be padded to the nearest multiple of 4 bytes, as is ++ * the case for Windows bitmaps. You can also be clever and use the pitch ++ * parameter to skip rows, etc. Setting this parameter to 0 is the equivalent + * of setting it to width * #tjPixelSize[pixelFormat]. + * + * @param height height (in pixels) of the source and destination images +@@ -1483,50 +1494,51 @@ DLLEXPORT tjhandle tjInitTransform(void); + * transform requires reading and performing Huffman decoding on all of the + * coefficients in the source image, regardless of the size of the destination + * image. Thus, this function provides a means of generating multiple +- * transformed images from the same source or applying multiple +- * transformations simultaneously, in order to eliminate the need to read the +- * source coefficients multiple times. ++ * transformed images from the same source or applying multiple transformations ++ * simultaneously, in order to eliminate the need to read the source ++ * coefficients multiple times. + * + * @param handle a handle to a TurboJPEG transformer instance + * +- * @param jpegBuf pointer to a buffer containing the JPEG source image to ++ * @param jpegBuf pointer to a byte buffer containing the JPEG source image to + * transform + * + * @param jpegSize size of the JPEG source image (in bytes) + * + * @param n the number of transformed JPEG images to generate + * +- * @param dstBufs pointer to an array of n image buffers. dstBufs[i] +- * will receive a JPEG image that has been transformed using the parameters in +- * transforms[i]. TurboJPEG has the ability to reallocate the JPEG +- * buffer to accommodate the size of the JPEG image. Thus, you can choose to: +- * -# pre-allocate the JPEG buffer with an arbitrary size using #tjAlloc() and +- * let TurboJPEG grow the buffer as needed, +- * -# set dstBufs[i] to NULL to tell TurboJPEG to allocate the buffer +- * for you, or ++ * @param dstBufs pointer to an array of n byte buffers. `dstBufs[i]` will ++ * receive a JPEG image that has been transformed using the parameters in ++ * `transforms[i]`. TurboJPEG has the ability to reallocate the JPEG ++ * destination buffer to accommodate the size of the transformed JPEG image. ++ * Thus, you can choose to: ++ * -# pre-allocate the JPEG destination buffer with an arbitrary size using ++ * #tjAlloc() and let TurboJPEG grow the buffer as needed, ++ * -# set `dstBufs[i]` to NULL to tell TurboJPEG to allocate the buffer for ++ * you, or + * -# pre-allocate the buffer to a "worst case" size determined by calling + * #tjBufSize() with the transformed or cropped width and height. Under normal + * circumstances, this should ensure that the buffer never has to be +- * re-allocated (setting #TJFLAG_NOREALLOC guarantees that it won't be.) Note, +- * however, that there are some rare cases (such as transforming images with a +- * large amount of embedded EXIF or ICC profile data) in which the output image +- * will be larger than the worst-case size, and #TJFLAG_NOREALLOC cannot be +- * used in those cases. ++ * re-allocated. (Setting #TJFLAG_NOREALLOC guarantees that it won't be.) ++ * Note, however, that there are some rare cases (such as transforming images ++ * with a large amount of embedded EXIF or ICC profile data) in which the ++ * transformed JPEG image will be larger than the worst-case size, and ++ * #TJFLAG_NOREALLOC cannot be used in those cases. + * . +- * If you choose option 1, dstSizes[i] should be set to the size of +- * your pre-allocated buffer. In any case, unless you have set +- * #TJFLAG_NOREALLOC, you should always check dstBufs[i] upon return +- * from this function, as it may have changed. ++ * If you choose option 1, then `dstSizes[i]` should be set to the size of your ++ * pre-allocated buffer. In any case, unless you have set #TJFLAG_NOREALLOC, ++ * you should always check `dstBufs[i]` upon return from this function, as it ++ * may have changed. + * + * @param dstSizes pointer to an array of n unsigned long variables that will + * receive the actual sizes (in bytes) of each transformed JPEG image. If +- * dstBufs[i] points to a pre-allocated buffer, then +- * dstSizes[i] should be set to the size of the buffer. Upon return, +- * dstSizes[i] will contain the size of the JPEG image (in bytes.) ++ * `dstBufs[i]` points to a pre-allocated buffer, then `dstSizes[i]` should be ++ * set to the size of the buffer. Upon return, `dstSizes[i]` will contain the ++ * size of the transformed JPEG image (in bytes.) + * + * @param transforms pointer to an array of n #tjtransform structures, each of + * which specifies the transform parameters and/or cropping region for the +- * corresponding transformed output image. ++ * corresponding transformed JPEG image. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT + * "flags" +@@ -1552,10 +1564,10 @@ DLLEXPORT int tjDestroy(tjhandle handle); + + + /** +- * Allocate an image buffer for use with TurboJPEG. You should always use +- * this function to allocate the JPEG destination buffer(s) for the compression +- * and transform functions unless you are disabling automatic buffer +- * (re)allocation (by setting #TJFLAG_NOREALLOC.) ++ * Allocate a byte buffer for use with TurboJPEG. You should always use this ++ * function to allocate the JPEG destination buffer(s) for the compression and ++ * transform functions unless you are disabling automatic buffer (re)allocation ++ * (by setting #TJFLAG_NOREALLOC.) + * + * @param bytes the number of bytes to allocate + * +@@ -1568,44 +1580,43 @@ DLLEXPORT unsigned char *tjAlloc(int bytes); + + + /** +- * Load an uncompressed image from disk into memory. ++ * Load a packed-pixel image from disk into memory. + * +- * @param filename name of a file containing an uncompressed image in Windows ++ * @param filename name of a file containing a packed-pixel image in Windows + * BMP or PBMPLUS (PPM/PGM) format + * + * @param width pointer to an integer variable that will receive the width (in +- * pixels) of the uncompressed image ++ * pixels) of the packed-pixel image + * +- * @param align row alignment of the image buffer to be returned (must be a +- * power of 2.) For instance, setting this parameter to 4 will cause all rows +- * in the image buffer to be padded to the nearest 32-bit boundary, and setting +- * this parameter to 1 will cause all rows in the image buffer to be unpadded. ++ * @param align row alignment of the packed-pixel buffer to be returned (must ++ * be a power of 2.) Setting this parameter to n will cause all rows in the ++ * buffer to be padded to the nearest multiple of n bytes (1 = unpadded.) + * + * @param height pointer to an integer variable that will receive the height +- * (in pixels) of the uncompressed image ++ * (in pixels) of the packed-pixel image + * + * @param pixelFormat pointer to an integer variable that specifies or will +- * receive the pixel format of the uncompressed image buffer. The behavior of +- * #tjLoadImage() will vary depending on the value of *pixelFormat +- * passed to the function: +- * - @ref TJPF_UNKNOWN : The uncompressed image buffer returned by the function +- * will use the most optimal pixel format for the file type, and +- * *pixelFormat will contain the ID of this pixel format upon +- * successful return from the function. +- * - @ref TJPF_GRAY : Only PGM files and 8-bit BMP files with a grayscale +- * colormap can be loaded. ++ * receive the pixel format of the packed-pixel buffer. The behavior of ++ * #tjLoadImage() will vary depending on the value of `*pixelFormat` passed to ++ * the function: ++ * - @ref TJPF_UNKNOWN : The packed-pixel buffer returned by this function will ++ * use the most optimal pixel format for the file type, and `*pixelFormat` will ++ * contain the ID of that pixel format upon successful return from this ++ * function. ++ * - @ref TJPF_GRAY : Only PGM files and 8-bit-per-pixel BMP files with a ++ * grayscale colormap can be loaded. + * - @ref TJPF_CMYK : The RGB or grayscale pixels stored in the file will be + * converted using a quick & dirty algorithm that is suitable only for testing +- * purposes (proper conversion between CMYK and other formats requires a color +- * management system.) +- * - Other @ref TJPF "pixel formats" : The uncompressed image buffer will use +- * the specified pixel format, and pixel format conversion will be performed if ++ * purposes. (Proper conversion between CMYK and other formats requires a ++ * color management system.) ++ * - Other @ref TJPF "pixel formats" : The packed-pixel buffer will use the ++ * specified pixel format, and pixel format conversion will be performed if + * necessary. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags". + * +- * @return a pointer to a newly-allocated buffer containing the uncompressed ++ * @return a pointer to a newly-allocated buffer containing the packed-pixel + * image, converted to the chosen pixel format and with the chosen row + * alignment, or NULL if an error occurred (see #tjGetErrorStr2().) This + * buffer should be freed using #tjFree(). +@@ -1616,31 +1627,31 @@ DLLEXPORT unsigned char *tjLoadImage(const char *filename, int *width, + + + /** +- * Save an uncompressed image from memory to disk. ++ * Save a packed-pixel image from memory to disk. + * +- * @param filename name of a file to which to save the uncompressed image. +- * The image will be stored in Windows BMP or PBMPLUS (PPM/PGM) format, +- * depending on the file extension. ++ * @param filename name of a file to which to save the packed-pixel image. The ++ * image will be stored in Windows BMP or PBMPLUS (PPM/PGM) format, depending ++ * on the file extension. + * +- * @param buffer pointer to an image buffer containing RGB, grayscale, or +- * CMYK pixels to be saved ++ * @param buffer pointer to a buffer containing a packed-pixel RGB, grayscale, ++ * or CMYK image to be saved + * +- * @param width width (in pixels) of the uncompressed image ++ * @param width width (in pixels) of the packed-pixel image + * +- * @param pitch bytes per line in the image buffer. Setting this parameter to +- * 0 is the equivalent of setting it to ++ * @param pitch bytes per row in the packed-pixel image. Setting this ++ * parameter to 0 is the equivalent of setting it to + * width * #tjPixelSize[pixelFormat]. + * +- * @param height height (in pixels) of the uncompressed image ++ * @param height height (in pixels) of the packed-pixel image + * +- * @param pixelFormat pixel format of the image buffer (see @ref TJPF ++ * @param pixelFormat pixel format of the packed-pixel image (see @ref TJPF + * "Pixel formats".) If this parameter is set to @ref TJPF_GRAY, then the +- * image will be stored in PGM or 8-bit (indexed color) BMP format. Otherwise, +- * the image will be stored in PPM or 24-bit BMP format. If this parameter +- * is set to @ref TJPF_CMYK, then the CMYK pixels will be converted to RGB +- * using a quick & dirty algorithm that is suitable only for testing (proper +- * conversion between CMYK and other formats requires a color management +- * system.) ++ * image will be stored in PGM or 8-bit-per-pixel (indexed color) BMP format. ++ * Otherwise, the image will be stored in PPM or 24-bit-per-pixel BMP format. ++ * If this parameter is set to @ref TJPF_CMYK, then the CMYK pixels will be ++ * converted to RGB using a quick & dirty algorithm that is suitable only for ++ * testing purposes. (Proper conversion between CMYK and other formats ++ * requires a color management system.) + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags". +@@ -1653,8 +1664,8 @@ DLLEXPORT int tjSaveImage(const char *filename, unsigned char *buffer, + + + /** +- * Free an image buffer previously allocated by TurboJPEG. You should always +- * use this function to free JPEG destination buffer(s) that were automatically ++ * Free a byte buffer previously allocated by TurboJPEG. You should always use ++ * this function to free JPEG destination buffer(s) that were automatically + * (re)allocated by the compression and transform functions or that were + * manually allocated using #tjAlloc(). + * +@@ -1692,14 +1703,10 @@ DLLEXPORT char *tjGetErrorStr2(tjhandle handle); + DLLEXPORT int tjGetErrorCode(tjhandle handle); + + +-/* Deprecated functions and macros */ +-#define TJFLAG_FORCEMMX 8 +-#define TJFLAG_FORCESSE 16 +-#define TJFLAG_FORCESSE2 32 +-#define TJFLAG_FORCESSE3 128 ++/* Backward compatibility functions and macros (nothing to see here) */ + ++/* TurboJPEG 1.0+ */ + +-/* Backward compatibility functions and macros (nothing to see here) */ + #define NUMSUBOPT TJ_NUMSAMP + #define TJ_444 TJSAMP_444 + #define TJ_422 TJSAMP_422 +@@ -1715,46 +1722,55 @@ DLLEXPORT int tjGetErrorCode(tjhandle handle); + #define TJ_ALPHAFIRST 64 + #define TJ_FORCESSE3 TJFLAG_FORCESSE3 + #define TJ_FASTUPSAMPLE TJFLAG_FASTUPSAMPLE +-#define TJ_YUV 512 + + DLLEXPORT unsigned long TJBUFSIZE(int width, int height); + +-DLLEXPORT unsigned long TJBUFSIZEYUV(int width, int height, int jpegSubsamp); +- +-DLLEXPORT unsigned long tjBufSizeYUV(int width, int height, int subsamp); +- + DLLEXPORT int tjCompress(tjhandle handle, unsigned char *srcBuf, int width, + int pitch, int height, int pixelSize, + unsigned char *dstBuf, unsigned long *compressedSize, + int jpegSubsamp, int jpegQual, int flags); + +-DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width, +- int pitch, int height, int pixelSize, +- unsigned char *dstBuf, int subsamp, int flags); +- +-DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width, +- int pitch, int height, int pixelFormat, +- unsigned char *dstBuf, int subsamp, int flags); ++DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf, ++ unsigned long jpegSize, unsigned char *dstBuf, ++ int width, int pitch, int height, int pixelSize, ++ int flags); + + DLLEXPORT int tjDecompressHeader(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, int *width, + int *height); + ++DLLEXPORT char *tjGetErrorStr(void); ++ ++/* TurboJPEG 1.1+ */ ++ ++#define TJ_YUV 512 ++ ++DLLEXPORT unsigned long TJBUFSIZEYUV(int width, int height, int jpegSubsamp); ++ + DLLEXPORT int tjDecompressHeader2(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, int *width, + int *height, int *jpegSubsamp); + +-DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf, +- unsigned long jpegSize, unsigned char *dstBuf, +- int width, int pitch, int height, int pixelSize, +- int flags); +- + DLLEXPORT int tjDecompressToYUV(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int flags); + +-DLLEXPORT char *tjGetErrorStr(void); ++DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width, ++ int pitch, int height, int pixelSize, ++ unsigned char *dstBuf, int subsamp, int flags); + ++/* TurboJPEG 1.2+ */ ++ ++#define TJFLAG_FORCEMMX 8 ++#define TJFLAG_FORCESSE 16 ++#define TJFLAG_FORCESSE2 32 ++#define TJFLAG_FORCESSE3 128 ++ ++DLLEXPORT unsigned long tjBufSizeYUV(int width, int height, int subsamp); ++ ++DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width, ++ int pitch, int height, int pixelFormat, ++ unsigned char *dstBuf, int subsamp, int flags); + + /** + * @} +diff --git a/win/jpeg.rc.in b/win/jpeg.rc.in +index fca72b7..650fbe9 100644 +--- a/win/jpeg.rc.in ++++ b/win/jpeg.rc.in +@@ -24,7 +24,7 @@ BEGIN + VALUE "ProductVersion", "@VERSION@" + VALUE "ProductName", "@CMAKE_PROJECT_NAME@" + VALUE "InternalName", "jpeg@SO_MAJOR_VERSION@" +- VALUE "LegalCopyright", "Copyright \xA9 @COPYRIGHT_YEAR@ The libjpeg-turbo Project and many others" ++ VALUE "LegalCopyright", L"Copyright \xA9 @COPYRIGHT_YEAR@ The libjpeg-turbo Project and many others" + VALUE "OriginalFilename", "jpeg@SO_MAJOR_VERSION@.dll" + END + END +diff --git a/win/turbojpeg.rc.in b/win/turbojpeg.rc.in +index cc7ab3a..c6cfc2d 100644 +--- a/win/turbojpeg.rc.in ++++ b/win/turbojpeg.rc.in +@@ -24,7 +24,7 @@ BEGIN + VALUE "ProductVersion", "@VERSION@" + VALUE "ProductName", "@CMAKE_PROJECT_NAME@" + VALUE "InternalName", "turbojpeg" +- VALUE "LegalCopyright", "Copyright \xA9 @COPYRIGHT_YEAR@ The libjpeg-turbo Project and many others" ++ VALUE "LegalCopyright", L"Copyright \xA9 @COPYRIGHT_YEAR@ The libjpeg-turbo Project and many others" + VALUE "OriginalFilename", "turbojpeg.dll" + END + END +diff --git a/wizard.txt b/wizard.txt +index c57fe38..0e155f9 100644 +--- a/wizard.txt ++++ b/wizard.txt +@@ -149,7 +149,15 @@ the script represents a progressive or sequential file, by observing whether + Ss and Se values other than 0 and 63 appear. (The -progressive switch is + not needed to specify this; in fact, it is ignored when -scans appears.) + The scan script must meet the JPEG restrictions on progression sequences. +-(cjpeg checks that the spec's requirements are obeyed.) ++(cjpeg checks that the spec's requirements are obeyed.) More specifically: ++ ++ * An AC scan cannot include coefficients from more than one component. ++ ++ * An AC scan for a particular component must be preceded by a DC scan ++ that includes the same component. ++ ++ * Only the first AC scan that includes a particular coefficient for a ++ particular component can include more than one bit from that coefficient. + + Scan script files are free format, in that arbitrary whitespace can appear + between numbers and around punctuation. Also, comments can be included: a +-- +2.25.1 + diff --git a/package/jpeg-turbo/0002-fix-thumbnail-bug.patch b/package/jpeg-turbo/0002-fix-thumbnail-bug.patch new file mode 100644 index 00000000..c2540c3c --- /dev/null +++ b/package/jpeg-turbo/0002-fix-thumbnail-bug.patch @@ -0,0 +1,162 @@ +From 8b1fed41efd82f4843899b02764612bc22fc0a0b Mon Sep 17 00:00:00 2001 +From: "lff@Snode" +Date: Fri, 12 Apr 2024 14:32:03 +0800 +Subject: [PATCH] [fix] thumbnail bug + +--- + simd/rvv/jquanti-rvv.c | 27 ++++++++++++------- + simd/rvv/jsimd.c | 60 +++++++++++++++++++++--------------------- + 2 files changed, 48 insertions(+), 39 deletions(-) + +diff --git a/simd/rvv/jquanti-rvv.c b/simd/rvv/jquanti-rvv.c +index 69b2c20..66b72e6 100644 +--- a/simd/rvv/jquanti-rvv.c ++++ b/simd/rvv/jquanti-rvv.c +@@ -84,7 +84,6 @@ void jsimd_convsamp_rvv(JSAMPARRAY sample_data, JDIMENSION start_col, + void jsimd_quantize_rvv(JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) + { +- int cols_remaining; + size_t vl; + JCOEFPTR out_ptr = coef_block; + DCTELEM *in_ptr = workspace; +@@ -92,12 +91,11 @@ void jsimd_quantize_rvv(JCOEFPTR coef_block, DCTELEM *divisors, + UDCTELEM *recip_ptr = (UDCTELEM *)divisors; + UDCTELEM *corr_ptr = (UDCTELEM *)divisors + DCTSIZE2; + +- vbool4_t mask; +- vint16m4_t out, shift; ++ vint16m4_t out, nout, shift, sign; + vuint16m4_t temp, recip, corr, ushift; + vuint32m8_t product; + +- for (cols_remaining = DCTSIZE2; cols_remaining > 0; cols_remaining -= vl) ++ for (int cols_remaining = DCTSIZE2; cols_remaining > 0; cols_remaining -= vl) + { + /* Set vl for each iteration. */ + vl = __riscv_vsetvl_e16m4(cols_remaining); +@@ -108,19 +106,30 @@ void jsimd_quantize_rvv(JCOEFPTR coef_block, DCTELEM *divisors, + corr = __riscv_vle16_v_u16m4(corr_ptr, vl); + shift = __riscv_vle16_v_i16m4(shift_ptr, vl); + +- /* Mask set to 1 where elements are negative. */ +- mask = __riscv_vmslt_vx_i16m4_b4(out, 0, vl); +- out = __riscv_vneg_v_i16m4_m(mask, out, vl); ++ /* Extract sign from coefficients. */ ++ sign = __riscv_vsra_vx_i16m4(out, 15, vl); ++ ++ /* Get absolute value of DCT coefficients. */ ++ nout = __riscv_vneg_v_i16m4(out, vl); ++ out = __riscv_vmax_vv_i16m4(out, nout, vl); + temp = __riscv_vreinterpret_v_i16m4_u16m4(out); + ++ /* Add correction. */ + temp = __riscv_vadd_vv_u16m4(temp, corr, vl); ++ ++ /* Multiply DCT coefficients by quantization reciprocals. */ + product = __riscv_vwmulu_vv_u32m8(temp, recip, vl); ++ ++ /* Narrow back to 16-bit. */ + shift = __riscv_vadd_vx_i16m4(shift, sizeof(DCTELEM) * 8, vl); + ushift = __riscv_vreinterpret_v_i16m4_u16m4(shift); + temp = __riscv_vnsrl_wv_u16m4(product, ushift, vl); + ++ /* Restore sign to original product. */ + out = __riscv_vreinterpret_v_u16m4_i16m4(temp); +- out = __riscv_vneg_v_i16m4_m(mask, out, vl); ++ out = __riscv_vxor_vv_i16m4(out, sign, vl); ++ out = __riscv_vsub_vv_i16m4(out, sign, vl); ++ + __riscv_vse16_v_i16m4(out_ptr, out, vl); + + in_ptr += vl; +@@ -129,4 +138,4 @@ void jsimd_quantize_rvv(JCOEFPTR coef_block, DCTELEM *divisors, + corr_ptr += vl; + shift_ptr += vl; + } +-} +\ No newline at end of file ++} +diff --git a/simd/rvv/jsimd.c b/simd/rvv/jsimd.c +index 9277e76..1627ab0 100644 +--- a/simd/rvv/jsimd.c ++++ b/simd/rvv/jsimd.c +@@ -734,22 +734,22 @@ jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, + GLOBAL(int) + jsimd_can_idct_2x2(void) + { +- init_simd(); ++ // init_simd(); + +- /* The code is optimised for these values only */ +- if (DCTSIZE != 8) +- return 0; +- if (sizeof(JCOEF) != 2) +- return 0; +- if (BITS_IN_JSAMPLE != 8) +- return 0; +- if (sizeof(JDIMENSION) != 4) +- return 0; +- if (sizeof(ISLOW_MULT_TYPE) != 2) +- return 0; ++ // /* The code is optimised for these values only */ ++ // if (DCTSIZE != 8) ++ // return 0; ++ // if (sizeof(JCOEF) != 2) ++ // return 0; ++ // if (BITS_IN_JSAMPLE != 8) ++ // return 0; ++ // if (sizeof(JDIMENSION) != 4) ++ // return 0; ++ // if (sizeof(ISLOW_MULT_TYPE) != 2) ++ // return 0; + +- if (simd_support & JSIMD_RVV) +- return 1; ++ // if (simd_support & JSIMD_RVV) ++ // return 1; + + return 0; + } +@@ -757,22 +757,22 @@ jsimd_can_idct_2x2(void) + GLOBAL(int) + jsimd_can_idct_4x4(void) + { +- init_simd(); +- +- /* The code is optimised for these values only */ +- if (DCTSIZE != 8) +- return 0; +- if (sizeof(JCOEF) != 2) +- return 0; +- if (BITS_IN_JSAMPLE != 8) +- return 0; +- if (sizeof(JDIMENSION) != 4) +- return 0; +- if (sizeof(ISLOW_MULT_TYPE) != 2) +- return 0; +- +- if (simd_support & JSIMD_RVV) +- return 1; ++ // init_simd(); ++ ++ // /* The code is optimised for these values only */ ++ // if (DCTSIZE != 8) ++ // return 0; ++ // if (sizeof(JCOEF) != 2) ++ // return 0; ++ // if (BITS_IN_JSAMPLE != 8) ++ // return 0; ++ // if (sizeof(JDIMENSION) != 4) ++ // return 0; ++ // if (sizeof(ISLOW_MULT_TYPE) != 2) ++ // return 0; ++ ++ // if (simd_support & JSIMD_RVV) ++ // return 1; + + return 0; + } +-- +2.25.1 + diff --git a/package/jpeg-turbo/jpeg-turbo.mk b/package/jpeg-turbo/jpeg-turbo.mk index e5bc8e7c..620faa8e 100644 --- a/package/jpeg-turbo/jpeg-turbo.mk +++ b/package/jpeg-turbo/jpeg-turbo.mk @@ -23,10 +23,13 @@ else ifeq ($(BR2_SHARED_STATIC_LIBS),y) JPEG_TURBO_CONF_OPTS += -DENABLE_STATIC=ON -DENABLE_SHARED=ON else ifeq ($(BR2_SHARED_LIBS),y) JPEG_TURBO_CONF_OPTS += -DENABLE_STATIC=OFF -DENABLE_SHARED=ON -endif +endif ifeq ($(BR2_PACKAGE_JPEG_SIMD_SUPPORT),y) JPEG_TURBO_CONF_OPTS += -DWITH_SIMD=ON + ifeq ($(BR2_RISCV_64),y) + JPEG_TURBO_CONF_OPTS += -DCMAKE_C_FLAGS="-march=rv64gcv" + endif # x86 simd support needs nasm JPEG_TURBO_DEPENDENCIES += $(if $(BR2_X86_CPU_HAS_MMX),host-nasm) else diff --git a/package/jpeg/Config.in b/package/jpeg/Config.in index 371f89aa..69014ecd 100644 --- a/package/jpeg/Config.in +++ b/package/jpeg/Config.in @@ -6,6 +6,7 @@ config BR2_PACKAGE_JPEG_SIMD_SUPPORT default y if BR2_POWERPC_CPU_HAS_ALTIVEC && !BR2_powerpc64le default y if BR2_POWERPC_CPU_HAS_VSX && BR2_powerpc64le default y if BR2_aarch64 || BR2_aarch64_be + default y if BR2_RISCV_64 config BR2_PACKAGE_JPEG bool "jpeg support" diff --git a/package/libopenssl/0009-RVV-optimized-chacha20.patch b/package/libopenssl/0009-RVV-optimized-chacha20.patch new file mode 100644 index 00000000..1f34473a --- /dev/null +++ b/package/libopenssl/0009-RVV-optimized-chacha20.patch @@ -0,0 +1,312 @@ +From 928cc0e2a0b1bbf48b4f4047708f04c74f1edc1a Mon Sep 17 00:00:00 2001 +From: "lff@Snode" +Date: Mon, 25 Mar 2024 10:57:54 +0800 +Subject: [PATCH] RVV optimized chacha20 + +--- + crypto/chacha/chacha_enc.c | 223 ++++++++++++++++++++++++++++++- + crypto/evp/e_chacha20_poly1305.c | 11 +- + include/crypto/chacha.h | 7 + + 3 files changed, 239 insertions(+), 2 deletions(-) + +diff --git a/crypto/chacha/chacha_enc.c b/crypto/chacha/chacha_enc.c +index 18251ea..0231b8f 100644 +--- a/crypto/chacha/chacha_enc.c ++++ b/crypto/chacha/chacha_enc.c +@@ -11,7 +11,7 @@ + + #include + +-#include "crypto/chacha.h" ++#include "include/crypto/chacha.h" + #include "crypto/ctype.h" + + typedef unsigned int u32; +@@ -128,3 +128,224 @@ void ChaCha20_ctr32(unsigned char *out, const unsigned char *inp, + input[12]++; + } + } ++ ++#if defined(__riscv_vector) ++#include ++#define QUARTERROUND_RVV(n, vl) \ ++ { \ ++ va = __riscv_vadd_vv_u32m##n(va, vb, vl); \ ++ vd = __riscv_vxor_vv_u32m##n(vd, va, vl); \ ++ vd_t = __riscv_vsll_vx_u32m##n(vd, 16, vl); \ ++ vd = __riscv_vsrl_vx_u32m##n(vd, 16, vl); \ ++ vd = __riscv_vor_vv_u32m##n(vd, vd_t, vl); \ ++ \ ++ vc = __riscv_vadd_vv_u32m##n(vc, vd, vl); \ ++ vb = __riscv_vxor_vv_u32m##n(vb, vc, vl); \ ++ vb_t = __riscv_vsll_vx_u32m##n(vb, 12, vl); \ ++ vb = __riscv_vsrl_vx_u32m##n(vb, 20, vl); \ ++ vb = __riscv_vor_vv_u32m##n(vb, vb_t, vl); \ ++ \ ++ va = __riscv_vadd_vv_u32m##n(va, vb, vl); \ ++ vd = __riscv_vxor_vv_u32m##n(vd, va, vl); \ ++ vd_t = __riscv_vsll_vx_u32m##n(vd, 8, vl); \ ++ vd = __riscv_vsrl_vx_u32m##n(vd, 24, vl); \ ++ vd = __riscv_vor_vv_u32m##n(vd, vd_t, vl); \ ++ \ ++ vc = __riscv_vadd_vv_u32m##n(vc, vd, vl); \ ++ vb = __riscv_vxor_vv_u32m##n(vb, vc, vl); \ ++ vb_t = __riscv_vsll_vx_u32m##n(vb, 7, vl); \ ++ vb = __riscv_vsrl_vx_u32m##n(vb, 25, vl); \ ++ vb = __riscv_vor_vv_u32m##n(vb, vb_t, vl); \ ++ } ++ ++void ChaCha20_ctr32_r(unsigned char *out, const unsigned char *inp, ++ size_t len, size_t blocks, const unsigned int key[8], ++ const unsigned int counter[4]) ++{ ++ size_t i, vl; ++ u8 outbuf[4*16*8]; // 4Bytes x 16elems x 8blocks ++ ++ vuint32m1_t v00, v01, v02, v03, v04, v05, v06, v07, v08, v09, v10, v11, v12, v13, v14, v15; ++ vuint8m8_t vkey, vsrc; ++ vuint32m4_t va, vb, vc, vd, vb_t, vd_t; ++ vuint32m1_t vtmp0, vtmp1, vtmp2, vtmp3; ++ ++ /* deal with 8 blocks at a time */ ++ vuint32m1_t v12_og = __riscv_vid_v_u32m1(8); ++ v12_og = __riscv_vadd_vx_u32m1(v12_og, counter[0], 8); ++ ++ while (len > 0) { ++ /* prepare 16 vectors for each elements */ ++ v00 = __riscv_vmv_v_x_u32m1(0x61707865, 8); ++ v01 = __riscv_vmv_v_x_u32m1(0x3320646e, 8); ++ v02 = __riscv_vmv_v_x_u32m1(0x79622d32, 8); ++ v03 = __riscv_vmv_v_x_u32m1(0x6b206574, 8); ++ v04 = __riscv_vmv_v_x_u32m1(key[0], 8); ++ v05 = __riscv_vmv_v_x_u32m1(key[1], 8); ++ v06 = __riscv_vmv_v_x_u32m1(key[2], 8); ++ v07 = __riscv_vmv_v_x_u32m1(key[3], 8); ++ v08 = __riscv_vmv_v_x_u32m1(key[4], 8); ++ v09 = __riscv_vmv_v_x_u32m1(key[5], 8); ++ v10 = __riscv_vmv_v_x_u32m1(key[6], 8); ++ v11 = __riscv_vmv_v_x_u32m1(key[7], 8); ++ v12 = v12_og; ++ v13 = __riscv_vmv_v_x_u32m1(counter[1], 8); ++ v14 = __riscv_vmv_v_x_u32m1(counter[2], 8); ++ v15 = __riscv_vmv_v_x_u32m1(counter[3], 8); ++ ++ /* combine and compute 4 vectors simultaneously */ ++ va = __riscv_vset_v_u32m1_u32m4(va, 0, v00); ++ va = __riscv_vset_v_u32m1_u32m4(va, 1, v01); ++ va = __riscv_vset_v_u32m1_u32m4(va, 2, v02); ++ va = __riscv_vset_v_u32m1_u32m4(va, 3, v03); ++ vb = __riscv_vset_v_u32m1_u32m4(vb, 0, v04); ++ vb = __riscv_vset_v_u32m1_u32m4(vb, 1, v05); ++ vb = __riscv_vset_v_u32m1_u32m4(vb, 2, v06); ++ vb = __riscv_vset_v_u32m1_u32m4(vb, 3, v07); ++ vc = __riscv_vset_v_u32m1_u32m4(vc, 0, v08); ++ vc = __riscv_vset_v_u32m1_u32m4(vc, 1, v09); ++ vc = __riscv_vset_v_u32m1_u32m4(vc, 2, v10); ++ vc = __riscv_vset_v_u32m1_u32m4(vc, 3, v11); ++ vd = __riscv_vset_v_u32m1_u32m4(vd, 0, v12); ++ vd = __riscv_vset_v_u32m1_u32m4(vd, 1, v13); ++ vd = __riscv_vset_v_u32m1_u32m4(vd, 2, v14); ++ vd = __riscv_vset_v_u32m1_u32m4(vd, 3, v15); ++ ++ for (i = 0; i < 10; ++i) { ++ /* fisrt half quarter round */ ++ QUARTERROUND_RVV(4, 32); ++ ++ /* rerange */ ++ vtmp0 = __riscv_vget_v_u32m4_u32m1(vb, 0); ++ vtmp1 = __riscv_vget_v_u32m4_u32m1(vb, 1); ++ vtmp2 = __riscv_vget_v_u32m4_u32m1(vb, 2); ++ vtmp3 = __riscv_vget_v_u32m4_u32m1(vb, 3); ++ vb = __riscv_vset_v_u32m1_u32m4(vb, 0, vtmp1); ++ vb = __riscv_vset_v_u32m1_u32m4(vb, 1, vtmp2); ++ vb = __riscv_vset_v_u32m1_u32m4(vb, 2, vtmp3); ++ vb = __riscv_vset_v_u32m1_u32m4(vb, 3, vtmp0); ++ vtmp0 = __riscv_vget_v_u32m4_u32m1(vc, 0); ++ vtmp1 = __riscv_vget_v_u32m4_u32m1(vc, 1); ++ vtmp2 = __riscv_vget_v_u32m4_u32m1(vc, 2); ++ vtmp3 = __riscv_vget_v_u32m4_u32m1(vc, 3); ++ vc = __riscv_vset_v_u32m1_u32m4(vc, 0, vtmp2); ++ vc = __riscv_vset_v_u32m1_u32m4(vc, 1, vtmp3); ++ vc = __riscv_vset_v_u32m1_u32m4(vc, 2, vtmp0); ++ vc = __riscv_vset_v_u32m1_u32m4(vc, 3, vtmp1); ++ vtmp0 = __riscv_vget_v_u32m4_u32m1(vd, 0); ++ vtmp1 = __riscv_vget_v_u32m4_u32m1(vd, 1); ++ vtmp2 = __riscv_vget_v_u32m4_u32m1(vd, 2); ++ vtmp3 = __riscv_vget_v_u32m4_u32m1(vd, 3); ++ vd = __riscv_vset_v_u32m1_u32m4(vd, 0, vtmp3); ++ vd = __riscv_vset_v_u32m1_u32m4(vd, 1, vtmp0); ++ vd = __riscv_vset_v_u32m1_u32m4(vd, 2, vtmp1); ++ vd = __riscv_vset_v_u32m1_u32m4(vd, 3, vtmp2); ++ ++ /* second half quarter round */ ++ QUARTERROUND_RVV(4, 32); ++ ++ /* recover */ ++ vtmp1 = __riscv_vget_v_u32m4_u32m1(vb, 0); ++ vtmp2 = __riscv_vget_v_u32m4_u32m1(vb, 1); ++ vtmp3 = __riscv_vget_v_u32m4_u32m1(vb, 2); ++ vtmp0 = __riscv_vget_v_u32m4_u32m1(vb, 3); ++ vb = __riscv_vset_v_u32m1_u32m4(vb, 0, vtmp0); ++ vb = __riscv_vset_v_u32m1_u32m4(vb, 1, vtmp1); ++ vb = __riscv_vset_v_u32m1_u32m4(vb, 2, vtmp2); ++ vb = __riscv_vset_v_u32m1_u32m4(vb, 3, vtmp3); ++ vtmp2 = __riscv_vget_v_u32m4_u32m1(vc, 0); ++ vtmp3 = __riscv_vget_v_u32m4_u32m1(vc, 1); ++ vtmp0 = __riscv_vget_v_u32m4_u32m1(vc, 2); ++ vtmp1 = __riscv_vget_v_u32m4_u32m1(vc, 3); ++ vc = __riscv_vset_v_u32m1_u32m4(vc, 0, vtmp0); ++ vc = __riscv_vset_v_u32m1_u32m4(vc, 1, vtmp1); ++ vc = __riscv_vset_v_u32m1_u32m4(vc, 2, vtmp2); ++ vc = __riscv_vset_v_u32m1_u32m4(vc, 3, vtmp3); ++ vtmp3 = __riscv_vget_v_u32m4_u32m1(vd, 0); ++ vtmp0 = __riscv_vget_v_u32m4_u32m1(vd, 1); ++ vtmp1 = __riscv_vget_v_u32m4_u32m1(vd, 2); ++ vtmp2 = __riscv_vget_v_u32m4_u32m1(vd, 3); ++ vd = __riscv_vset_v_u32m1_u32m4(vd, 0, vtmp0); ++ vd = __riscv_vset_v_u32m1_u32m4(vd, 1, vtmp1); ++ vd = __riscv_vset_v_u32m1_u32m4(vd, 2, vtmp2); ++ vd = __riscv_vset_v_u32m1_u32m4(vd, 3, vtmp3); ++ ++ } ++ ++ /* split */ ++ v00 = __riscv_vget_v_u32m4_u32m1(va, 0); ++ v01 = __riscv_vget_v_u32m4_u32m1(va, 1); ++ v02 = __riscv_vget_v_u32m4_u32m1(va, 2); ++ v03 = __riscv_vget_v_u32m4_u32m1(va, 3); ++ v04 = __riscv_vget_v_u32m4_u32m1(vb, 0); ++ v05 = __riscv_vget_v_u32m4_u32m1(vb, 1); ++ v06 = __riscv_vget_v_u32m4_u32m1(vb, 2); ++ v07 = __riscv_vget_v_u32m4_u32m1(vb, 3); ++ v08 = __riscv_vget_v_u32m4_u32m1(vc, 0); ++ v09 = __riscv_vget_v_u32m4_u32m1(vc, 1); ++ v10 = __riscv_vget_v_u32m4_u32m1(vc, 2); ++ v11 = __riscv_vget_v_u32m4_u32m1(vc, 3); ++ v12 = __riscv_vget_v_u32m4_u32m1(vd, 0); ++ v13 = __riscv_vget_v_u32m4_u32m1(vd, 1); ++ v14 = __riscv_vget_v_u32m4_u32m1(vd, 2); ++ v15 = __riscv_vget_v_u32m4_u32m1(vd, 3); ++ ++ /* x[i] + input[i] */ ++ v00 = __riscv_vadd_vx_u32m1(v00, 0x61707865, 8); ++ v01 = __riscv_vadd_vx_u32m1(v01, 0x3320646e, 8); ++ v02 = __riscv_vadd_vx_u32m1(v02, 0x79622d32, 8); ++ v03 = __riscv_vadd_vx_u32m1(v03, 0x6b206574, 8); ++ v04 = __riscv_vadd_vx_u32m1(v04, key[0], 8); ++ v05 = __riscv_vadd_vx_u32m1(v05, key[1], 8); ++ v06 = __riscv_vadd_vx_u32m1(v06, key[2], 8); ++ v07 = __riscv_vadd_vx_u32m1(v07, key[3], 8); ++ v08 = __riscv_vadd_vx_u32m1(v08, key[4], 8); ++ v09 = __riscv_vadd_vx_u32m1(v09, key[5], 8); ++ v10 = __riscv_vadd_vx_u32m1(v10, key[6], 8); ++ v11 = __riscv_vadd_vx_u32m1(v11, key[7], 8); ++ v12 = __riscv_vadd_vv_u32m1(v12, v12_og, 8); ++ v13 = __riscv_vadd_vx_u32m1(v13, counter[1], 8); ++ v14 = __riscv_vadd_vx_u32m1(v14, counter[2], 8); ++ v15 = __riscv_vadd_vx_u32m1(v15, counter[3], 8); ++ ++ /* counter++ */ ++ v12_og = __riscv_vadd_vx_u32m1(v12_og, 8, 8); ++ ++ /* XOR input and store */ ++ int blk = blocks > 8 ? 8 : blocks; ++ ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 0, 64, v00, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 1, 64, v01, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 2, 64, v02, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 3, 64, v03, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 4, 64, v04, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 5, 64, v05, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 6, 64, v06, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 7, 64, v07, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 8, 64, v08, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 9, 64, v09, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 10, 64, v10, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 11, 64, v11, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 12, 64, v12, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 13, 64, v13, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 14, 64, v14, blk); ++ __riscv_vsse32_v_u32m1((u32 *)outbuf + 15, 64, v15, blk); ++ ++ blocks -= blk; ++ ++ /* e32m1*16 = e8m8*2 */ ++ for (i = 0; (len > 0) && (i < 2); ++i) { ++ vl = __riscv_vsetvl_e8m8(len); ++ vsrc = __riscv_vle8_v_u8m8(inp, vl); ++ vkey = __riscv_vle8_v_u8m8(outbuf + i * 256, vl); ++ vsrc = __riscv_vxor_vv_u8m8(vsrc, vkey, vl); ++ ++ __riscv_vse8_v_u8m8(out, vsrc, vl); ++ ++ out += vl; ++ inp += vl; ++ len -= vl; ++ } ++ } ++} ++#endif /* __riscv_vector */ +\ No newline at end of file +diff --git a/crypto/evp/e_chacha20_poly1305.c b/crypto/evp/e_chacha20_poly1305.c +index bdc406b..feaf7a6 100644 +--- a/crypto/evp/e_chacha20_poly1305.c ++++ b/crypto/evp/e_chacha20_poly1305.c +@@ -8,6 +8,7 @@ + */ + + #include ++#include + #include "internal/cryptlib.h" + + #ifndef OPENSSL_NO_CHACHA +@@ -16,7 +17,7 @@ + # include + # include "evp_local.h" + # include "crypto/evp.h" +-# include "crypto/chacha.h" ++# include "include/crypto/chacha.h" + + typedef struct { + union { +@@ -102,11 +103,19 @@ static int chacha_cipher(EVP_CIPHER_CTX * ctx, unsigned char *out, + blocks -= ctr32; + ctr32 = 0; + } ++ ++#if defined(__riscv_vector) ++ ChaCha20_ctr32_r(out, inp, len, blocks, key->key.d, key->counter); ++ inp += len; ++ out += len; ++ len -= len; ++#else + blocks *= CHACHA_BLK_SIZE; + ChaCha20_ctr32(out, inp, blocks, key->key.d, key->counter); + len -= blocks; + inp += blocks; + out += blocks; ++#endif + + key->counter[0] = ctr32; + if (ctr32 == 0) key->counter[1]++; +diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h +index 4029400..7ebf4d8 100644 +--- a/include/crypto/chacha.h ++++ b/include/crypto/chacha.h +@@ -26,6 +26,13 @@ + void ChaCha20_ctr32(unsigned char *out, const unsigned char *inp, + size_t len, const unsigned int key[8], + const unsigned int counter[4]); ++ ++#if defined(__riscv_vector) ++void ChaCha20_ctr32_r(unsigned char *out, const unsigned char *inp, ++ size_t len, size_t blocks, const unsigned int key[8], ++ const unsigned int counter[4]); ++#endif ++ + /* + * You can notice that there is no key setup procedure. Because it's + * as trivial as collecting bytes into 32-bit elements, it's reckoned +-- +2.25.1 + diff --git a/package/libopenssl/0010-fix-x86-host-compile-err-which-has-no-header-file-ri.patch b/package/libopenssl/0010-fix-x86-host-compile-err-which-has-no-header-file-ri.patch new file mode 100644 index 00000000..a5e32b05 --- /dev/null +++ b/package/libopenssl/0010-fix-x86-host-compile-err-which-has-no-header-file-ri.patch @@ -0,0 +1,27 @@ +From f14b1064fcc9ea570726034ac3766d742503c150 Mon Sep 17 00:00:00 2001 +From: maguoqun +Date: Fri, 29 Mar 2024 17:23:27 +0800 +Subject: [PATCH] fix x86 host compile err which has no header file + riscv_vector.h + +--- + crypto/evp/e_chacha20_poly1305.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/crypto/evp/e_chacha20_poly1305.c b/crypto/evp/e_chacha20_poly1305.c +index feaf7a6..e56f232 100644 +--- a/crypto/evp/e_chacha20_poly1305.c ++++ b/crypto/evp/e_chacha20_poly1305.c +@@ -8,7 +8,9 @@ + */ + + #include ++#if defined(__riscv_vector) + #include ++#endif + #include "internal/cryptlib.h" + + #ifndef OPENSSL_NO_CHACHA +-- +2.34.1 + diff --git a/package/libopenssl/libopenssl.mk b/package/libopenssl/libopenssl.mk index 87a91caa..b68f8ca0 100644 --- a/package/libopenssl/libopenssl.mk +++ b/package/libopenssl/libopenssl.mk @@ -64,6 +64,10 @@ define HOST_LIBOPENSSL_CONFIGURE_CMDS zlib-dynamic endef +ifeq ($(BR2_RISCV_64),y) +LIBOPENSSL_CFLAGS += -march=rv64gcv +endif + define LIBOPENSSL_CONFIGURE_CMDS cd $(@D); \ $(TARGET_CONFIGURE_ARGS) \ diff --git a/package/libpng/0002-RVV-optimized-libpng.patch b/package/libpng/0002-RVV-optimized-libpng.patch new file mode 100644 index 00000000..7a830338 --- /dev/null +++ b/package/libpng/0002-RVV-optimized-libpng.patch @@ -0,0 +1,1359 @@ +From 4aa2a015ac226e7b56207f342bfc5938f54e3d4a Mon Sep 17 00:00:00 2001 +From: "lff@Snode" +Date: Fri, 29 Mar 2024 11:22:34 +0800 +Subject: [PATCH] [RVV] optimized libpng + +--- + CMakeLists.txt | 30 +++ + INSTALL | 10 +- + Makefile.am | 5 + + Makefile.in | 49 ++++- + README | 1 + + config.h.in | 13 ++ + configure | 121 +++++++++++ + configure.ac | 85 ++++++++ + contrib/riscv_vector/README | 82 +++++++ + contrib/riscv_vector/linux.c | 84 ++++++++ + png.h | 5 +- + pngpriv.h | 29 +++ + riscv/filter_vector_intrinsics.c | 354 +++++++++++++++++++++++++++++++ + riscv/riscv_init.c | 127 +++++++++++ + 14 files changed, 981 insertions(+), 14 deletions(-) + create mode 100644 contrib/riscv_vector/README + create mode 100644 contrib/riscv_vector/linux.c + create mode 100644 riscv/filter_vector_intrinsics.c + create mode 100644 riscv/riscv_init.c + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 86e0aff..32512c9 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -123,6 +123,30 @@ if(TARGET_ARCH MATCHES "^arm" OR + endif() + endif() + ++# Set definitions and sources for RISC-V. ++if(CMAKE_SYSTEM_PROCESSOR MATCHES "^riscv*") ++ set(PNG_RISCV_VECTOR_POSSIBLE_VALUES check on off) ++ set(PNG_RISCV_VECTOR "check" ++ CACHE STRING "Enable RISC-V Vector optimizations: check|on|off; check is default") ++ set_property(CACHE PNG_RISCV_VECTOR ++ PROPERTY STRINGS ${PNG_RISCV_VECTOR_POSSIBLE_VALUES}) ++ list(FIND PNG_RISCV_VECTOR_POSSIBLE_VALUES ${PNG_RISCV_VECTOR} index) ++ if(index EQUAL -1) ++ message(FATAL_ERROR "PNG_RISCV_VECTOR must be one of [${PNG_RISCV_VECTOR_POSSIBLE_VALUES}]") ++ elseif(NOT ${PNG_RISCV_VECTOR} STREQUAL "off") ++ set(libpng_riscv_sources ++ riscv/riscv_init.c ++ riscv/filter_vector_intrinsics.c) ++ if(${PNG_RISCV_VECTOR} STREQUAL "on") ++ add_definitions(-DPNG_RISCV_VECTOR_OPT=2) ++ elseif(${PNG_RISCV_VECTOR} STREQUAL "check") ++ add_definitions(-DPNG_RISCV_VECTOR_CHECK_SUPPORTED) ++ endif() ++ else() ++ add_definitions(-DPNG_RISCV_VECTOR_OPT=0) ++ endif() ++endif() ++ + # Set definitions and sources for PowerPC. + if(TARGET_ARCH MATCHES "^powerpc*" OR + TARGET_ARCH MATCHES "^ppc64*") +@@ -200,6 +224,11 @@ if(TARGET_ARCH MATCHES "^arm" OR + add_definitions(-DPNG_ARM_NEON_OPT=0) + endif() + ++# Set definitions and sources for RISC-V. ++if(TARGET_ARCH MATCHES "^riscv") ++ add_definitions(-DPNG_RISCV_VECTOR_OPT=0) ++endif() ++ + # Set definitions and sources for PowerPC. + if(TARGET_ARCH MATCHES "^powerpc*" OR + TARGET_ARCH MATCHES "^ppc64*") +@@ -545,6 +574,7 @@ set(libpng_sources + pngwtran.c + pngwutil.c + ${libpng_arm_sources} ++ ${libpng_riscv_sources} + ${libpng_intel_sources} + ${libpng_mips_sources} + ${libpng_powerpc_sources} +diff --git a/INSTALL b/INSTALL +index 042d729..65a6ea9 100644 +--- a/INSTALL ++++ b/INSTALL +@@ -137,7 +137,7 @@ Your directory structure should look like this: + depcomp, install-sh, mkinstalldirs, test-pngtest.sh, etc. + contrib + arm-neon, conftest, examples, gregbook, libtests, pngminim, +- pngminus, pngsuite, tools, visupng ++ pngminus, pngsuite, tools, visupng, riscv-vector + projects + owatcom, visualc71, vstudio + scripts +@@ -287,6 +287,7 @@ are normally detected at run time. Enable them with configure options + such as one of + + --enable-arm-neon=yes ++ --enable-riscv-vector=yes + --enable-mips-msa=yes + --enable-intel-sse=yes + --enable-powerpc-vsx=yes +@@ -299,6 +300,7 @@ or, if you are not using "configure", you can use one + or more of + + CPPFLAGS += "-DPNG_ARM_NEON" ++ CPPFLAGS += "-DPNG_RISCV_VECTOR" + CPPFLAGS += "-DPNG_MIPS_MSA" + CPPFLAGS += "-DPNG_INTEL_SSE" + CPPFLAGS += "-DPNG_POWERPC_VSX" +@@ -318,13 +320,15 @@ to disable a particular one, + or via compiler-command options such as + + CPPFLAGS += "-DPNG_ARM_NEON_OPT=0, -DPNG_MIPS_MSA_OPT=0, +- -DPNG_INTEL_SSE_OPT=0, -DPNG_POWERPC_VSX_OPT=0" ++ -DPNG_INTEL_SSE_OPT=0, -DPNG_POWERPC_VSX_OPT=0, ++ -DPNG_RISCV_VECTOR_OPT=0" + + If you are using cmake, hardware optimizations are "on" + by default. To disable them, use + + cmake . -DPNG_ARM_NEON=no -DPNG_INTEL_SSE=no \ +- -DPNG_MIPS_MSA=no -DPNG_POWERPC_VSX=no ++ -DPNG_MIPS_MSA=no -DPNG_POWERPC_VSX=no \ ++ -DPNG_RISCV_VECTOR=no + + or disable them all at once with + +diff --git a/Makefile.am b/Makefile.am +index f0ca8d4..86e3da3 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -98,6 +98,11 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/arm_init.c\ + arm/palette_neon_intrinsics.c + endif + ++if PNG_RISCV_VECTOR ++libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += riscv/riscv_init.c\ ++ riscv/filter_vector_intrinsics.c ++endif ++ + if PNG_MIPS_MSA + libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += mips/mips_init.c\ + mips/filter_msa_intrinsics.c +diff --git a/Makefile.in b/Makefile.in +index ba57014..a12a9c9 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -118,13 +118,15 @@ bin_PROGRAMS = + @PNG_POWERPC_VSX_TRUE@am__append_5 = powerpc/powerpc_init.c\ + @PNG_POWERPC_VSX_TRUE@ powerpc/filter_vsx_intrinsics.c + ++@PNG_RISCV_VECTOR_TRUE@am__append_6 = riscv/riscv_init.c\ ++@PNG_RISCV_VECTOR_TRUE@ riscv/filter_vector_intrinsics.c + + # Versioned symbols and restricted exports +-@HAVE_LD_VERSION_SCRIPT_TRUE@@HAVE_SOLARIS_LD_TRUE@am__append_6 = -Wl,-M -Wl,libpng.vers +-@HAVE_LD_VERSION_SCRIPT_TRUE@@HAVE_SOLARIS_LD_FALSE@am__append_7 = -Wl,--version-script=libpng.vers ++@HAVE_LD_VERSION_SCRIPT_TRUE@@HAVE_SOLARIS_LD_TRUE@am__append_7 = -Wl,-M -Wl,libpng.vers ++@HAVE_LD_VERSION_SCRIPT_TRUE@@HAVE_SOLARIS_LD_FALSE@am__append_8 = -Wl,--version-script=libpng.vers + # Only restricted exports when possible +-@HAVE_LD_VERSION_SCRIPT_FALSE@am__append_8 = -export-symbols libpng.sym +-@DO_PNG_PREFIX_TRUE@am__append_9 = -DPNG_PREFIX='@PNG_PREFIX@' ++@HAVE_LD_VERSION_SCRIPT_FALSE@am__append_9 = -export-symbols libpng.sym ++@DO_PNG_PREFIX_TRUE@am__append_10 = -DPNG_PREFIX='@PNG_PREFIX@' + subdir = . + ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 + am__aclocal_m4_deps = $(top_srcdir)/scripts/libtool.m4 \ +@@ -187,7 +189,8 @@ am__libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES_DIST = png.c \ + arm/palette_neon_intrinsics.c mips/mips_init.c \ + mips/filter_msa_intrinsics.c intel/intel_init.c \ + intel/filter_sse2_intrinsics.c powerpc/powerpc_init.c \ +- powerpc/filter_vsx_intrinsics.c ++ powerpc/filter_vsx_intrinsics.c riscv/riscv_init.c \ ++ riscv/filter_vector_intrinsics.c + am__dirstamp = $(am__leading_dot)dirstamp + @PNG_ARM_NEON_TRUE@am__objects_1 = arm/arm_init.lo arm/filter_neon.lo \ + @PNG_ARM_NEON_TRUE@ arm/filter_neon_intrinsics.lo \ +@@ -198,11 +201,14 @@ am__dirstamp = $(am__leading_dot)dirstamp + @PNG_INTEL_SSE_TRUE@ intel/filter_sse2_intrinsics.lo + @PNG_POWERPC_VSX_TRUE@am__objects_4 = powerpc/powerpc_init.lo \ + @PNG_POWERPC_VSX_TRUE@ powerpc/filter_vsx_intrinsics.lo ++@PNG_RISCV_VECTOR_TRUE@am__objects_5 = riscv/riscv_init.lo \ ++@PNG_RISCV_VECTOR_TRUE@ riscv/filter_vector_intrinsics.lo + am_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS = png.lo pngerror.lo \ + pngget.lo pngmem.lo pngpread.lo pngread.lo pngrio.lo \ + pngrtran.lo pngrutil.lo pngset.lo pngtrans.lo pngwio.lo \ + pngwrite.lo pngwtran.lo pngwutil.lo $(am__objects_1) \ +- $(am__objects_2) $(am__objects_3) $(am__objects_4) ++ $(am__objects_2) $(am__objects_3) $(am__objects_4) \ ++ $(am__objects_5) + nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS = + libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS = \ + $(am_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS) \ +@@ -283,7 +289,9 @@ am__depfiles_remade = ./$(DEPDIR)/png.Plo ./$(DEPDIR)/pngerror.Plo \ + mips/$(DEPDIR)/filter_msa_intrinsics.Plo \ + mips/$(DEPDIR)/mips_init.Plo \ + powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo \ +- powerpc/$(DEPDIR)/powerpc_init.Plo ++ powerpc/$(DEPDIR)/powerpc_init.Plo \ ++ riscv/$(DEPDIR)/filter_vector_intrinsics.Plo \ ++ riscv/$(DEPDIR)/riscv_init.Plo + am__mv = mv -f + CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) +@@ -771,12 +779,12 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c \ + pngrutil.c pngset.c pngtrans.c pngwio.c pngwrite.c pngwtran.c \ + pngwutil.c png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h \ + pngstruct.h pngusr.dfa $(am__append_2) $(am__append_3) \ +- $(am__append_4) $(am__append_5) ++ $(am__append_4) $(am__append_5) $(am__append_6) + nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h + libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LDFLAGS = -no-undefined \ + -export-dynamic -version-number \ + @PNGLIB_MAJOR@@PNGLIB_MINOR@:@PNGLIB_RELEASE@:0 \ +- $(am__append_6) $(am__append_7) $(am__append_8) ++ $(am__append_7) $(am__append_8) $(am__append_9) + @HAVE_LD_VERSION_SCRIPT_FALSE@libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES = libpng.sym + @HAVE_LD_VERSION_SCRIPT_TRUE@libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES = libpng.vers + pkginclude_HEADERS = png.h pngconf.h +@@ -812,7 +820,7 @@ SUFFIXES = .chk .out + SYMBOL_CFLAGS = -DPNGLIB_LIBNAME='PNG@PNGLIB_MAJOR@@PNGLIB_MINOR@_0' \ + -DPNGLIB_VERSION='@PNGLIB_VERSION@' \ + -DSYMBOL_PREFIX='$(SYMBOL_PREFIX)' -DPNG_NO_USE_READ_MACROS \ +- -DPNG_BUILDING_SYMBOL_TABLE $(am__append_9) ++ -DPNG_BUILDING_SYMBOL_TABLE $(am__append_10) + + # EXT_LIST is a list of the possibly library directory extensions, this exists + # because we can't find a good way of discovering the file extensions that are +@@ -1010,6 +1018,16 @@ powerpc/powerpc_init.lo: powerpc/$(am__dirstamp) \ + powerpc/$(DEPDIR)/$(am__dirstamp) + powerpc/filter_vsx_intrinsics.lo: powerpc/$(am__dirstamp) \ + powerpc/$(DEPDIR)/$(am__dirstamp) ++riscv/$(am__dirstamp): ++ @$(MKDIR_P) riscv ++ @: > riscv/$(am__dirstamp) ++riscv/$(DEPDIR)/$(am__dirstamp): ++ @$(MKDIR_P) riscv/$(DEPDIR) ++ @: > riscv/$(DEPDIR)/$(am__dirstamp) ++riscv/riscv_init.lo: riscv/$(am__dirstamp) \ ++ riscv/$(DEPDIR)/$(am__dirstamp) ++riscv/filter_vector_intrinsics.lo: riscv/$(am__dirstamp) \ ++ riscv/$(DEPDIR)/$(am__dirstamp) + + libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@.la: $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS) $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES) $(EXTRA_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES) + $(AM_V_CCLD)$(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LINK) -rpath $(libdir) $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS) $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LIBADD) $(LIBS) +@@ -1126,6 +1144,8 @@ mostlyclean-compile: + -rm -f mips/*.lo + -rm -f powerpc/*.$(OBJEXT) + -rm -f powerpc/*.lo ++ -rm -f riscv/*.$(OBJEXT) ++ -rm -f riscv/*.lo + + distclean-compile: + -rm -f *.tab.c +@@ -1164,6 +1184,8 @@ distclean-compile: + @AMDEP_TRUE@@am__include@ @am__quote@mips/$(DEPDIR)/mips_init.Plo@am__quote@ # am--include-marker + @AMDEP_TRUE@@am__include@ @am__quote@powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo@am__quote@ # am--include-marker + @AMDEP_TRUE@@am__include@ @am__quote@powerpc/$(DEPDIR)/powerpc_init.Plo@am__quote@ # am--include-marker ++@AMDEP_TRUE@@am__include@ @am__quote@riscv/$(DEPDIR)/filter_vector_intrinsics.Plo@am__quote@ # am--include-marker ++@AMDEP_TRUE@@am__include@ @am__quote@riscv/$(DEPDIR)/riscv_init.Plo@am__quote@ # am--include-marker + + $(am__depfiles_remade): + @$(MKDIR_P) $(@D) +@@ -1228,6 +1250,7 @@ clean-libtool: + -rm -rf intel/.libs intel/_libs + -rm -rf mips/.libs mips/_libs + -rm -rf powerpc/.libs powerpc/_libs ++ -rm -rf riscv/.libs riscv/_libs + + distclean-libtool: + -rm -f libtool config.lt +@@ -2052,6 +2075,8 @@ distclean-generic: + -rm -f mips/$(am__dirstamp) + -rm -f powerpc/$(DEPDIR)/$(am__dirstamp) + -rm -f powerpc/$(am__dirstamp) ++ -rm -f riscv/$(DEPDIR)/$(am__dirstamp) ++ -rm -f riscv/$(am__dirstamp) + + maintainer-clean-generic: + @echo "This command is intended for maintainers to use" +@@ -2102,6 +2127,8 @@ distclean: distclean-am + -rm -f mips/$(DEPDIR)/mips_init.Plo + -rm -f powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo + -rm -f powerpc/$(DEPDIR)/powerpc_init.Plo ++ -rm -f riscv/$(DEPDIR)/filter_vector_intrinsics.Plo ++ -rm -f riscv/$(DEPDIR)/riscv_init.Plo + -rm -f Makefile + distclean-am: clean-am distclean-compile distclean-generic \ + distclean-hdr distclean-libtool distclean-tags +@@ -2187,6 +2214,8 @@ maintainer-clean: maintainer-clean-am + -rm -f mips/$(DEPDIR)/mips_init.Plo + -rm -f powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo + -rm -f powerpc/$(DEPDIR)/powerpc_init.Plo ++ -rm -f riscv/$(DEPDIR)/filter_vector_intrinsics.Plo ++ -rm -f riscv/$(DEPDIR)/riscv_init.Plo + -rm -f Makefile + maintainer-clean-am: distclean-am maintainer-clean-generic + +diff --git a/README b/README +index 097a3c2..d833622 100644 +--- a/README ++++ b/README +@@ -148,6 +148,7 @@ Files in this distribution: + intel/ => Optimized code for the INTEL-SSE2 platform + mips/ => Optimized code for the MIPS platform + powerpc/ => Optimized code for the PowerPC platform ++ riscv/ => Optimized code for the riscv platform + ci/ => Scripts for continuous integration + contrib/ => External contributions + arm-neon/ => Optimized code for the ARM-NEON platform +diff --git a/config.h.in b/config.h.in +index 3309c9f..9b501aa 100644 +--- a/config.h.in ++++ b/config.h.in +@@ -96,6 +96,19 @@ + /* Enable POWERPC VSX optimizations */ + #undef PNG_POWERPC_VSX_OPT + ++/* Turn on RISC-V Vector optimizations at run-time */ ++#undef PNG_RISCV_VECTOR_API_SUPPORTED ++ ++/* Check for RISC-V Vector support at run-time */ ++#undef PNG_RISCV_VECTOR_CHECK_SUPPORTED ++ ++/* Enable RISC-V Vector optimizations */ ++#undef PNG_RISCV_VECTOR_OPT ++ ++/* Enable RISC-V Vector compatibility for drafts 0.7.1, 0.8, 0.9, 0.10 ++ * and release 1.0 */ ++#undef PNG_RISCV_VECTOR_COMPAT ++ + /* Define to 1 if all of the C90 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +diff --git a/configure b/configure +index 8ad4092..636c336 100755 +--- a/configure ++++ b/configure +@@ -663,6 +663,8 @@ ac_subst_vars='am__EXEEXT_FALSE + am__EXEEXT_TRUE + LTLIBOBJS + LIBOBJS ++PNG_RISCV_VECTOR_FALSE ++PNG_RISCV_VECTOR_TRUE + PNG_POWERPC_VSX_FALSE + PNG_POWERPC_VSX_TRUE + PNG_INTEL_SSE_FALSE +@@ -849,6 +851,8 @@ enable_arm_neon + enable_mips_msa + enable_intel_sse + enable_powerpc_vsx ++enable_riscv_vector ++enable_riscv_vector_compat + ' + ac_precious_vars='build_alias + host_alias +@@ -1560,6 +1564,24 @@ Optional Features: + default, enable by a call to png_set_option yes/on: + turn on unconditionally. If not specified: + determined by the compiler. ++ --enable-riscv-vector Enable RISC-V Vector optimizations: =no/off, check, ++ api, yes/on: no/off: disable the optimizations; ++ check: use internal checking code (experimental); ++ api: disable by default, enable by a call to ++ png_set_option; yes/on: turn on unconditionally. ++ Note: The compiler must support riscv-vector (e.g. ++ -march=rv64gv). If not specified: determined by the ++ compiler. ++ --enable-riscv-vector-compat ++ Enable compatibility for RISC-V Vector drafts 0.7.1, ++ 0.8, 0.9, or 0.10 (only used if RISC-V Vector ++ optimizations are enabled): =no/off, 0.7.1, 0.8, ++ 0.9, 0.10: no/off: Support RISC-V Vector release ++ 1.0; 0.7.1: Support RISC-V Vector draft 0.7.1; 0.8: ++ Support RISC-V Vector draft 0.8; 0.9: Support RISC-V ++ Vector draft 0.9; 0.10: Support RISC-V Vector draft ++ 0.10. If not specified: off (Support RISC-V Vector ++ release 1.0). + + Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] +@@ -14061,6 +14083,10 @@ printf "%s\n" "#define PNG_POWERPC_VSX_OPT 0" >>confdefs.h + + printf "%s\n" "#define PNG_INTEL_SSE_OPT 0" >>confdefs.h + ++ enable_riscv_vector=no ++ ++printf "%s\n" "#define PNG_RISCV_VECTOR_OPT 0" >>confdefs.h ++ + ;; + *) + # allow enabling hardware optimization on any system: +@@ -14089,6 +14115,12 @@ printf "%s\n" "#define PNG_INTEL_SSE_OPT 1" >>confdefs.h + printf "%s\n" "#define PNG_POWERPC_VSX_OPT 2" >>confdefs.h + + ;; ++ riscv*) ++ enable_riscv_vector=yes ++ ++printf "%s\n" "#define PNG_RISCV_VECTOR_OPT 2" >>confdefs.h ++ ++ ;; + esac + ;; + esac +@@ -14309,6 +14341,91 @@ else + PNG_POWERPC_VSX_FALSE= + fi + ++# RISC-V ++# === ++# ++# RISC-V Vector support. ++ ++# Check whether --enable-riscv-vector was given. ++if test ${enable_riscv_vector+y} ++then : ++ enableval=$enable_riscv_vector; case "$enableval" in ++ no|off) ++ # disable the default enabling on __RISCV_VECTOR__ systems: ++ ++printf "%s\n" "#define PNG_RISCV_VECTOR_OPT 0" >>confdefs.h ++ ++ # Prevent inclusion of the assembler files below: ++ enable_riscv_vector=no;; ++ check) ++ ++printf "%s\n" "#define PNG_RISCV_VECTOR_CHECK_SUPPORTED /**/" >>confdefs.h ++;; ++ api) ++ ++printf "%s\n" "#define PNG_RISCV_VECTOR_API_SUPPORTED /**/" >>confdefs.h ++;; ++ yes|on) ++ ++printf "%s\n" "#define PNG_RISCV_VECTOR_OPT 2" >>confdefs.h ++ ++ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: --enable-riscv-vector: please specify 'check' or 'api', if ++ you want the optimizations unconditionally pass '-march=rv...v' (e.g. '-march=rv64gv') ++ to the compiler." >&5 ++printf "%s\n" "$as_me: WARNING: --enable-riscv-vector: please specify 'check' or 'api', if ++ you want the optimizations unconditionally pass '-march=rv...v' (e.g. '-march=rv64gv') ++ to the compiler." >&2;};; ++ *) ++ as_fn_error $? "--enable-riscv-vector=${enable_riscv_vector}: invalid value" "$LINENO" 5 ++ esac ++fi ++ ++ ++# compatibility to RVV draft 0.7.1, 0.8, 0.9, 0.10 and release 1.0 ++# Check whether --enable-riscv-vector-compat was given. ++if test "${enable_riscv_vector_compat+set}" = set; then : ++ enableval=$enable_riscv_vector_compat; case "$enableval" in ++ no|off);; ++ yes|on) ++ as_fn_error $? "--enable-riscv-vector-compat: a specific draft version must be given" "$LINENO" 5;; ++ "0.7.1") ++ ++printf "%s\n" "#define PNG_RISCV_VECTOR_COMPAT 7" >>confdefs.h ++;; ++ "0.8") ++ ++printf "%s\n" "#define PNG_RISCV_VECTOR_COMPAT 8" >>confdefs.h ++;; ++ "0.9") ++ ++printf "%s\n" "#define PNG_RISCV_VECTOR_COMPAT 9" >>confdefs.h ++;; ++ "0.10") ++ ++printf "%s\n" "#define PNG_RISCV_VECTOR_COMPAT 10" >>confdefs.h ++;; ++ *) ++ as_fn_error $? "--enable-riscv-vector-compat=${enable_riscv_vector_compat}: invalid value" "$LINENO" 5 ++ esac ++fi ++ ++ ++# Add RISC-V specific files to all builds where the host_cpu is riscv ('riscv*') or ++# where RISC-V optimizations were explicitly requested (this allows a fallback if a ++# future host CPU does not match 'riscv*') ++ ++ if test "$enable_riscv_vector" != 'no' && ++ case "$host_cpu" in ++ riscv*) :;; ++ *) test "$enable_riscv_vector" != '';; ++ esac; then ++ PNG_RISCV_VECTOR_TRUE= ++ PNG_RISCV_VECTOR_FALSE='#' ++else ++ PNG_RISCV_VECTOR_TRUE='#' ++ PNG_RISCV_VECTOR_FALSE= ++fi ++ + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Extra options for compiler: $PNG_COPTS" >&5 + printf "%s\n" "$as_me: Extra options for compiler: $PNG_COPTS" >&6;} +@@ -14504,6 +14621,10 @@ if test -z "${PNG_POWERPC_VSX_TRUE}" && test -z "${PNG_POWERPC_VSX_FALSE}"; then + as_fn_error $? "conditional \"PNG_POWERPC_VSX\" was never defined. + Usually this means the macro was only invoked conditionally." "$LINENO" 5 + fi ++if test -z "${PNG_RISCV_VECTOR_TRUE}" && test -z "${PNG_RISCV_VECTOR_FALSE}"; then ++ as_fn_error $? "conditional \"PNG_RISCV_VECTOR\" was never defined. ++Usually this means the macro was only invoked conditionally." "$LINENO" 5 ++fi + + : "${CONFIG_STATUS=./config.status}" + ac_write_fail=0 +diff --git a/configure.ac b/configure.ac +index 986c774..4ab447b 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -316,6 +316,9 @@ AC_ARG_ENABLE([hardware-optimizations], + enable_intel_sse=no + AC_DEFINE([PNG_INTEL_SSE_OPT], [0], + [Disable INTEL_SSE optimizations]) ++ enable_riscv_vector=no ++ AC_DEFINE([PNG_RISCV_VECTOR_OPT], [0], ++ [Disable RISCV_VECTOR optimizations]) + ;; + *) + # allow enabling hardware optimization on any system: +@@ -340,6 +343,11 @@ AC_ARG_ENABLE([hardware-optimizations], + AC_DEFINE([PNG_POWERPC_VSX_OPT], [2], + [Enable POWERPC VSX optimizations]) + ;; ++ riscv*) ++ enable_riscv_vector=yes ++ AC_DEFINE([PNG_RISCV_VECTOR_OPT], [2], ++ [Enable RISCV VECTOR optimizations]) ++ ;; + esac + ;; + esac]) +@@ -517,6 +525,83 @@ AM_CONDITIONAL([PNG_POWERPC_VSX], + powerpc*|ppc64*) : ;; + esac]) + ++# RISC-V ++# === ++# ++# RISC-V Vector support. ++ ++AC_ARG_ENABLE([riscv-vector], ++ AS_HELP_STRING([[[--enable-riscv-vector]]], ++ [Enable RISC-V Vector optimizations: =no/off, check, api, yes/on:] ++ [no/off: disable the optimizations; check: use internal checking code] ++ [(experimental); api: disable by default, enable by] ++ [a call to png_set_option; yes/on: turn on unconditionally.] ++ [Note: The compiler must support riscv-vector (e.g. -march=rv64gv).] ++ [If not specified: determined by the compiler.]), ++ [case "$enableval" in ++ no|off) ++ # disable the default enabling on __RISCV_VECTOR__ systems: ++ AC_DEFINE([PNG_RISCV_VECTOR_OPT], [0], ++ [Disable RISC-V Vector optimizations]) ++ # Prevent inclusion of the assembler files below: ++ enable_riscv_vector=no;; ++ check) ++ AC_DEFINE([PNG_RISCV_VECTOR_CHECK_SUPPORTED], [], ++ [Check for RISC-V Vector support at run-time]);; ++ api) ++ AC_DEFINE([PNG_RISCV_VECTOR_API_SUPPORTED], [], ++ [Turn on RISC-V Vector optimizations at run-time]);; ++ yes|on) ++ AC_DEFINE([PNG_RISCV_VECTOR_OPT], [2], ++ [Enable RISC-V Vector optimizations]) ++ AC_MSG_WARN([--enable-riscv_vector: please specify 'check' or 'api', if] ++ [you want the optimizations unconditionally pass '-march=rv...v' (e.g. '-march=rv64gv')] ++ [to the compiler.]);; ++ *) ++ AC_MSG_ERROR([--enable-riscv-vector=${enable_riscv_vector}: invalid value]) ++ esac]) ++ ++# compatibility to RVV draft 0.7.1, 0.8, 0.9, 0.10 and release 1.0 ++AC_ARG_ENABLE([riscv-vector-compat], ++ AS_HELP_STRING([[[--enable-riscv-vector-compat]]], ++ [Enable compatibility for RISC-V Vector drafts 0.7.1, 0.8, 0.9, or 0.10 (only used if RISC-V Vector optimizations are enabled): =no/off, 0.7.1, 0.8, 0.9, 0.10:] ++ [no/off: Support RISC-V Vector release 1.0;] ++ [0.7.1: Support RISC-V Vector draft 0.7.1;] ++ [0.8: Support RISC-V Vector draft 0.8;] ++ [0.9: Support RISC-V Vector draft 0.9;] ++ [0.10: Support RISC-V Vector draft 0.10.] ++ [If not specified: off (Support RISC-V Vector release 1.0).]), ++ [case "$enableval" in ++ no|off);; ++ yes|on) ++ AC_MSG_ERROR([--enable-riscv-vector-compat: a specific draft version must be given]);; ++ "0.7.1") ++ AC_DEFINE([PNG_RISCV_VECTOR_COMPAT], [7], ++ [Support RISC-V Vector draft 0.7.1]);; ++ "0.8") ++ AC_DEFINE([PNG_RISCV_VECTOR_COMPAT], [8], ++ [Support RISC-V Vector draft 0.8]);; ++ "0.9") ++ AC_DEFINE([PNG_RISCV_VECTOR_COMPAT], [9], ++ [Support RISC-V Vector draft 0.9]);; ++ "0.10") ++ AC_DEFINE([PNG_RISCV_VECTOR_COMPAT], [10], ++ [Support RISC-V Vector draft 0.10]);; ++ *) ++ AC_MSG_ERROR([--enable-riscv-vector-compat=${enable_riscv_vector_compat}: invalid value]) ++ esac]) ++ ++# Add RISC-V specific files to all builds where the host_cpu is riscv ('riscv*') or ++# where RISC-V optimizations were explicitly requested (this allows a fallback if a ++# future host CPU does not match 'riscv*') ++ ++AM_CONDITIONAL([PNG_RISCV_VECTOR], ++ [test "$enable_riscv_vector" != 'no' && ++ case "$host_cpu" in ++ riscv*) :;; ++ *) test "$enable_riscv_vector" != '';; ++ esac]) ++ + AC_MSG_NOTICE([[Extra options for compiler: $PNG_COPTS]]) + + # Config files, substituting as above +diff --git a/contrib/riscv_vector/README b/contrib/riscv_vector/README +new file mode 100644 +index 0000000..2b8ad7d +--- /dev/null ++++ b/contrib/riscv_vector/README +@@ -0,0 +1,82 @@ ++OPERATING SYSTEM SPECIFIC RISC-V VECTOR DETECTION ++------------------------------------------------- ++ ++Detection of the ability to execute RISC-V Vector on an RISC-V processor requires ++operating system support. (The information is not available in user mode.) ++ ++HOW TO USE THIS ++--------------- ++ ++This directory contains C code fragments that can be included in riscv/riscv_init.c ++by setting the macro PNG_RISCV_VECTOR_FILE to the file name in "" or <> at build ++time. This setting is not recorded in pnglibconf.h and can be changed simply by ++rebuilding riscv/vector_init.o with the required macro definition. ++ ++For any of this code to be used the RISC-V Vector code must be enabled and run time ++checks must be supported. I.e.: ++ ++#if PNG_RISCV_VECTOR_OPT > 0 ++#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED ++ ++This is done in a 'configure' build by passing configure the argument: ++ ++ --enable-riscv-vector=check ++ ++Furthermore the compiler must have enabled support for the RISC-V Vector extension. ++(e.g. -march=rv64gv) ++ ++FILE FORMAT ++----------- ++ ++Each file documents its testing status as of the last time it was tested (which ++may have been a long time ago): ++ ++STATUS: one of: ++ SUPPORTED: This indicates that the file is included in the regularly ++ performed test builds and bugs are fixed when discovered. ++ COMPILED: This indicates that the code did compile at least once. See the ++ more detailed description for the extent to which the result was ++ successful. ++ TESTED: This means the code was fully compiled into the libpng test programs ++ and these were run at least once. ++ ++BUG REPORTS: an email address to which to send reports of problems ++ ++The file is a fragment of C code. It should not define any 'extern' symbols; ++everything should be static. It must define the function: ++ ++static int png_have_vector(png_structp png_ptr); ++ ++That function must return 1 if RISC-V Vector instructions are supported, 0 if not. ++It must not execute png_error unless it detects a bug. A png_error will prevent ++the reading of the PNG and in the future, writing too. ++ ++BUG REPORTS ++----------- ++ ++If you mail a bug report for any file that is not SUPPORTED there may only be ++limited response. Consider fixing it and sending a patch to fix the problem - ++this is more likely to result in action. ++ ++CONTRIBUTIONS ++------------- ++ ++You may send contributions of new implementations to ++png-mng-implement@sourceforge.net. Please write code in strict C90 C where ++possible. Obviously OS dependencies are to be expected. If you submit code you ++must have the authors permission and it must have a license that is acceptable ++to the current maintainer; in particular that license must permit modification ++and redistribution. ++ ++Please try to make the contribution a single file and give the file a clear and ++unambiguous name that identifies the target OS. If multiple files really are ++required put them all in a sub-directory. ++ ++You must also be prepared to handle bug reports from users of the code, either ++by joining the png-mng-implement mailing list or by providing an email for the ++"BUG REPORTS" entry or both. Please make sure that the header of the file ++contains the STATUS and BUG REPORTS fields as above. ++ ++Please list the OS requirements as precisely as possible. Ideally you should ++also list the environment in which the code has been tested and certainly list ++any environments where you suspect it might not work. +\ No newline at end of file +diff --git a/contrib/riscv_vector/linux.c b/contrib/riscv_vector/linux.c +new file mode 100644 +index 0000000..43a0333 +--- /dev/null ++++ b/contrib/riscv_vector/linux.c +@@ -0,0 +1,84 @@ ++/* contrib/riscv-vector/linux.c ++ * ++ * Copyright (c) 2021 Manfred Schlaegl ++ * Copyright (c) 2020 Cosmin Truta ++ * Copyright (c) 2016 Glenn Randers-Pehrson ++ * Written by Manfred Schlaegl, October 2021. ++ * ++ * This code is released under the libpng license. ++ * For conditions of distribution and use, see the disclaimer ++ * and license in png.h ++ * ++ * SEE contrib/riscv-vector/README before reporting bugs ++ * ++ * STATUS: SUPPORTED ++ * BUG REPORTS: png-mng-implement@sourceforge.net ++ * ++ * png_have_vector implemented for Linux by reading the widely available ++ * pseudo-file /proc/cpuinfo. ++ * ++ * This code is strict ANSI-C and is probably moderately portable; it does ++ * however use , and it assumes that /proc/cpuinfo is ++ * never localized. ++ */ ++ ++#include ++#include ++ ++#define MAX_LINE_SIZE 256 ++ ++static int ++png_have_vector(png_structp png_ptr) ++{ ++ int ret = 0; ++ ++ FILE *f = fopen("/proc/cpuinfo", "rb"); ++ if (f == NULL) { ++#ifdef PNG_WARNINGS_SUPPORTED ++ png_warning(png_ptr, "/proc/cpuinfo open failed"); ++#endif ++ return 0; ++ } ++ ++ while(!feof(f)) ++ { ++ char line[MAX_LINE_SIZE]; ++ ++ /* read line */ ++ int i = 0; ++ while (i < (MAX_LINE_SIZE - 1)) ++ { ++ char ch = fgetc(f); ++ if (ch == '\n' || ch == EOF) ++ break; ++ line[i++] = ch; ++ } ++ line[i] = '\0'; ++ ++ /* does line start with "isa"? */ ++ if (strncmp("isa", line, 3) != 0) ++ continue; ++ ++ /* find value starting with "rv" */ ++ char *val = strstr(line, "rv"); ++ if (val == NULL) ++ continue; ++ ++ /* skip "rv" */ ++ val += 2; ++ ++ /* check for vector 'v' */ ++ val = strchr(line, 'v'); ++ if (val != NULL) ++ { ++ /* found */ ++ ret = 1; ++ break; ++ } ++ } ++ ++ fclose(f); ++ return ret; ++} ++ 5 changes: 4 additions & 1 deletion5 ++png.h +\ No newline at end of file +diff --git a/png.h b/png.h +index f109cdf..84a4fb5 100644 +--- a/png.h ++++ b/png.h +@@ -3211,7 +3211,10 @@ PNG_EXPORT(245, int, png_image_write_to_memory, (png_imagep image, void *memory, + #ifdef PNG_POWERPC_VSX_API_SUPPORTED + # define PNG_POWERPC_VSX 10 /* HARDWARE: PowerPC VSX SIMD instructions supported */ + #endif +-#define PNG_OPTION_NEXT 12 /* Next option - numbers must be even */ ++#ifdef PNG_RISCV_VECTOR_API_SUPPORTED ++# define PNG_RISCV_VECTOR 12 /* HARDWARE: RISC-V Vector extension supported */ ++#endif ++#define PNG_OPTION_NEXT 14 /* Next option - numbers must be even */ + + /* Return values: NOTE: there are four values and 'off' is *not* zero */ + #define PNG_OPTION_UNSET 0 /* Unset - defaults to off */ +diff --git a/pngpriv.h b/pngpriv.h +index b8a73b6..9a82b7f 100644 +--- a/pngpriv.h ++++ b/pngpriv.h +@@ -276,6 +276,18 @@ + # define PNG_POWERPC_VSX_IMPLEMENTATION 0 + #endif + ++#ifndef PNG_RISCV_VECTOR_OPT ++# if defined(__riscv_vector) ++# define PNG_RISCV_VECTOR_OPT 2 ++# else ++# define PNG_RISCV_VECTOR_OPT 0 ++# endif ++#endif ++ ++#if PNG_RISCV_VECTOR_OPT > 0 ++# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_vector ++# define PNG_RISCV_VECTOR_IMPLEMENTATION 1 // intrinsics implementation only ++#endif + + /* Is this a build of a DLL where compilation of the object modules requires + * different preprocessor settings to those required for a simple library? If +@@ -1355,6 +1367,23 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); + #endif + ++#if PNG_RISCV_VECTOR_IMPLEMENTATION > 0 ++PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_vector,(png_row_infop row_info, ++ png_bytep row, png_const_bytep prev_row),PNG_EMPTY); ++PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vector,(png_row_infop ++ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); ++PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vector,(png_row_infop ++ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); ++PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_vector,(png_row_infop ++ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); ++PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_vector,(png_row_infop ++ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); ++PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_vector,(png_row_infop ++ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); ++PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_vector,(png_row_infop ++ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); ++#endif ++ + /* Choose the best filter to use and filter the row data */ + PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr, + png_row_infop row_info),PNG_EMPTY); +diff --git a/riscv/filter_vector_intrinsics.c b/riscv/filter_vector_intrinsics.c +new file mode 100644 +index 0000000..bbcfabd +--- /dev/null ++++ b/riscv/filter_vector_intrinsics.c +@@ -0,0 +1,354 @@ ++/* filter_vector_intrinsics.c - Vector extension optimised filter functions ++ * ++ * Copyright (c) 2021 Manfred Schlaegl ++ * Copyright (c) 2018 Cosmin Truta ++ * Copyright (c) 2016 Glenn Randers-Pehrson ++ * Written by Manfred Schlaegl, October 2021. ++ * ++ * This code is released under the libpng license. ++ * For conditions of distribution and use, see the disclaimer ++ * and license in png.h ++ */ ++ ++#include "../pngpriv.h" ++#include ++ ++#ifdef PNG_READ_SUPPORTED ++ ++/* This code requires -march containing 'v' on the command line: */ ++#if PNG_RISCV_VECTOR_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */ ++ ++#if PNG_RISCV_VECTOR_OPT > 0 ++ ++void png_read_filter_row_up_vector(png_row_infop row_info, png_bytep row, ++ png_const_bytep prev_row) ++{ ++ size_t rowbytes = row_info->rowbytes; ++ ++ /* ++ * row: | b | ++ * prow: | x | ++ * ++ * b = b + x ++ * ++ */ ++ ++ for (size_t vl; rowbytes > 0; rowbytes -= vl, row += vl, prev_row += vl) ++ { ++ vl = __riscv_vsetvl_e8m8(rowbytes); // set vl ++ vuint8m8_t x = __riscv_vle8_v_u8m8(row, vl); // load b=row ++ vuint8m8_t b = __riscv_vle8_v_u8m8(prev_row, vl); // load x=prev_row ++ x = __riscv_vadd_vv_u8m8(x, b, vl); // row += prev_row ++ __riscv_vse8_v_u8m8(row, x, vl); // store x to row ++ } ++} ++ ++void png_read_filter_row_sub3_vector(png_row_infop row_info, png_bytep row, ++ png_const_bytep prev_row) ++{ ++ png_bytep rp = row; ++ png_bytep rp_stop = row + row_info->rowbytes; ++ ++ /* ++ * |<- bbp ->|<- bbp ->| ++ * row: | a | x | ++ * ++ * x = x + a ++ * ++ */ ++ ++ vuint8m1_t a = __riscv_vmv_v_x_u8m1(0, 3); ++ ++ for (; rp < rp_stop; rp += 12) ++ { ++ vuint8m1_t x = __riscv_vle8_v_u8m1(rp, 16); ++ vuint8m1_t x_1 = __riscv_vslidedown_vx_u8m1(x, 3, 16); ++ vuint8m1_t x_2 = __riscv_vslidedown_vx_u8m1(x_1, 3, 16); ++ vuint8m1_t x_3 = __riscv_vslidedown_vx_u8m1(x_2, 3, 16); ++ ++ // ========================================================================== ++ a = __riscv_vadd_vv_u8m1(x, a, 12); ++ vuint8m1_t dest = __riscv_vmv_v_v_u8m1(a, 12); // rgb|000|000|000 ++ ++ a = __riscv_vadd_vv_u8m1(x_1, a, 12); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 3, 12); // rgb|rgb|000|000 ++ ++ a = __riscv_vadd_vv_u8m1(x_2, a, 12); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 6, 12); // rgb|rgb|rgb|000 ++ ++ a = __riscv_vadd_vv_u8m1(x_3, a, 12); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 9, 12); // rgb|rgb|rgb|rgb ++ ++ __riscv_vse8_v_u8m1(rp, dest, 12); ++ } ++} ++ ++void png_read_filter_row_sub4_vector(png_row_infop row_info, png_bytep row, ++ png_const_bytep prev_row) ++{ ++ png_bytep rp = row; ++ png_bytep rp_stop = row + row_info->rowbytes; ++ ++ /* ++ * |<- bbp ->|<- bbp ->| ++ * row: | a | x | ++ * ++ * x = x + a ++ * ++ */ ++ ++ vuint8m1_t a = __riscv_vmv_v_x_u8m1(0, 4); ++ ++ for (; rp < rp_stop; rp += 16) ++ { ++ vuint8m1_t x = __riscv_vle8_v_u8m1(rp, 16); ++ vuint8m1_t x_1 = __riscv_vslidedown_vx_u8m1(x, 4, 16); ++ vuint8m1_t x_2 = __riscv_vslidedown_vx_u8m1(x_1, 4, 16); ++ vuint8m1_t x_3 = __riscv_vslidedown_vx_u8m1(x_2, 4, 16); ++ ++ // ========================================================================== ++ a = __riscv_vadd_vv_u8m1(x, a, 16); ++ vuint8m1_t dest = __riscv_vmv_v_v_u8m1(a, 16); // rgba|0000|0000|0000 ++ ++ a = __riscv_vadd_vv_u8m1(x_1, a, 16); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 4, 16); // rgba|rgba|0000|0000 ++ ++ a = __riscv_vadd_vv_u8m1(x_2, a, 16); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 8, 16); // rgba|rgba|rgba|0000 ++ ++ a = __riscv_vadd_vv_u8m1(x_3, a, 16); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 12, 16); // rgba|rgba|rgba|rgba ++ ++ __riscv_vse8_v_u8m1(rp, dest, 16); ++ } ++} ++ ++void png_read_filter_row_avg3_vector(png_row_infop row_info, png_bytep row, ++ png_const_bytep prev_row) ++{ ++ png_bytep rp = row; ++ png_const_bytep pp = prev_row; ++ png_bytep rp_stop = row + row_info->rowbytes; ++ ++ asm volatile("csrwi vxrm, 2"); ++ ++ vuint8m1_t x, a, b; ++ a = __riscv_vmv_v_x_u8m1(0, 3); ++ ++ for (; rp < rp_stop; rp += 12, pp += 12) ++ { ++ vuint8m1_t x = __riscv_vle8_v_u8m1(rp, 12); ++ vuint8m1_t x_1 = __riscv_vslidedown_vx_u8m1(x, 3, 12); ++ vuint8m1_t x_2 = __riscv_vslidedown_vx_u8m1(x_1, 3, 12); ++ vuint8m1_t x_3 = __riscv_vslidedown_vx_u8m1(x_2, 3, 12); ++ ++ vuint8m1_t b = __riscv_vle8_v_u8m1(pp, 12); ++ vuint8m1_t b_1 = __riscv_vslidedown_vx_u8m1(b, 3, 12); ++ vuint8m1_t b_2 = __riscv_vslidedown_vx_u8m1(b_1, 3, 12); ++ vuint8m1_t b_3 = __riscv_vslidedown_vx_u8m1(b_2, 3, 12); ++ ++ a = __riscv_vaaddu_vv_u8m1(a, b, 12); ++ a = __riscv_vadd_vv_u8m1(a, x, 12); ++ vuint8m1_t dest = __riscv_vmv_v_v_u8m1(a, 12); ++ ++ a = __riscv_vaaddu_vv_u8m1(a, b_1, 12); ++ a = __riscv_vadd_vv_u8m1(a, x_1, 12); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 3, 12); ++ ++ a = __riscv_vaaddu_vv_u8m1(a, b_2, 12); ++ a = __riscv_vadd_vv_u8m1(a, x_2, 12); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 6, 12); ++ ++ a = __riscv_vaaddu_vv_u8m1(a, b_3, 12); ++ a = __riscv_vadd_vv_u8m1(a, x_3, 12); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 9, 12); ++ ++ __riscv_vse8_v_u8m1(rp, dest, 12); ++ } ++ asm volatile("csrwi vxrm, 0"); ++} ++ ++void png_read_filter_row_avg4_vector(png_row_infop row_info, png_bytep row, ++ png_const_bytep prev_row) ++{ ++ png_bytep rp = row; ++ png_const_bytep pp = prev_row; ++ png_bytep rp_stop = row + row_info->rowbytes; ++ ++ asm volatile("csrwi vxrm, 2"); ++ ++ vuint8m1_t x, a, b; ++ a = __riscv_vmv_v_x_u8m1(0, 4); ++ ++ for (; rp < rp_stop; rp += 16, pp += 16) ++ { ++ vuint8m1_t x = __riscv_vle8_v_u8m1(rp, 16); ++ vuint8m1_t x_1 = __riscv_vslidedown_vx_u8m1(x, 4, 16); ++ vuint8m1_t x_2 = __riscv_vslidedown_vx_u8m1(x_1, 4, 16); ++ vuint8m1_t x_3 = __riscv_vslidedown_vx_u8m1(x_2, 4, 16); ++ ++ vuint8m1_t b = __riscv_vle8_v_u8m1(pp, 16); ++ vuint8m1_t b_1 = __riscv_vslidedown_vx_u8m1(b, 4, 16); ++ vuint8m1_t b_2 = __riscv_vslidedown_vx_u8m1(b_1, 4, 16); ++ vuint8m1_t b_3 = __riscv_vslidedown_vx_u8m1(b_2, 4, 16); ++ ++ a = __riscv_vaaddu_vv_u8m1(a, b, 16); ++ a = __riscv_vadd_vv_u8m1(a, x, 16); ++ vuint8m1_t dest = __riscv_vmv_v_v_u8m1(a, 16); ++ ++ a = __riscv_vaaddu_vv_u8m1(a, b_1, 16); ++ a = __riscv_vadd_vv_u8m1(a, x_1, 16); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 4, 16); ++ ++ a = __riscv_vaaddu_vv_u8m1(a, b_2, 16); ++ a = __riscv_vadd_vv_u8m1(a, x_2, 16); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 8, 16); ++ ++ a = __riscv_vaaddu_vv_u8m1(a, b_3, 16); ++ a = __riscv_vadd_vv_u8m1(a, x_3, 16); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 12, 16); ++ ++ __riscv_vse8_v_u8m1(rp, dest, 16); ++ } ++ asm volatile("csrwi vxrm, 0"); ++} ++ ++#define paeth(n, vl) \ ++ { \ ++ pa = __riscv_vwsubu_vv_u16m2(b_##n, c, vl); \ ++ pb = __riscv_vwsubu_vv_u16m2(a, c, vl); \ ++ \ ++ pc = __riscv_vadd_vv_u16m2(pa, pb, vl); \ ++ tmp = __riscv_vrsub_vx_u16m2(pc, 0, vl); \ ++ pc = __riscv_vminu_vv_u16m2(pc, tmp, vl); \ ++ \ ++ tmp = __riscv_vrsub_vx_u16m2(pa, 0, vl); \ ++ pa = __riscv_vminu_vv_u16m2(pa, tmp, vl); \ ++ \ ++ tmp = __riscv_vrsub_vx_u16m2(pb, 0, vl); \ ++ pb = __riscv_vminu_vv_u16m2(pb, tmp, vl); \ ++ \ ++ m1 = __riscv_vmsltu_vv_u16m2_b8(pb, pa, vl); \ ++ pa = __riscv_vmerge_vvm_u16m2(pa, pb, m1, vl); \ ++ m2 = __riscv_vmsltu_vv_u16m2_b8(pc, pa, vl); \ ++ \ ++ a = __riscv_vmerge_vvm_u8m1(a, b_##n, m1, vl); \ ++ a = __riscv_vmerge_vvm_u8m1(a, c, m2, vl); \ ++ } ++ ++void png_read_filter_row_paeth3_vector(png_row_infop row_info, png_bytep row, ++ png_const_bytep prev_row) ++{ ++ png_bytep rp = row; ++ png_const_bytep pp = prev_row; ++ png_bytep rp_stop = row + row_info->rowbytes; ++ ++ vbool8_t m1, m2; ++ vuint8m1_t x, a, b, c; ++ vuint16m2_t p, pa, pb, pc, tmp; ++ ++ a = __riscv_vmv_v_x_u8m1(0, 3); ++ c = __riscv_vmv_v_x_u8m1(0, 3); ++ ++ for (; rp < rp_stop; rp += 12, pp += 12) ++ { ++ vuint8m1_t x = __riscv_vle8_v_u8m1(rp, 12); ++ vuint8m1_t x_1 = __riscv_vslidedown_vx_u8m1(x, 3, 12); ++ vuint8m1_t x_2 = __riscv_vslidedown_vx_u8m1(x_1, 3, 12); ++ vuint8m1_t x_3 = __riscv_vslidedown_vx_u8m1(x_2, 3, 12); ++ ++ vuint8m1_t b_0 = __riscv_vle8_v_u8m1(pp, 12); ++ vuint8m1_t b_1 = __riscv_vslidedown_vx_u8m1(b_0, 3, 12); ++ vuint8m1_t b_2 = __riscv_vslidedown_vx_u8m1(b_1, 3, 12); ++ vuint8m1_t b_3 = __riscv_vslidedown_vx_u8m1(b_2, 3, 12); ++ ++ // a = paeth(a, b_0, c, 12); ++ paeth(0, 12); ++ a = __riscv_vadd_vv_u8m1(a, x, 12); ++ vuint8m1_t dest = __riscv_vmv_v_v_u8m1(a, 12); ++ c = b_0; ++ ++ // a = paeth(a, b_1, c, 12); ++ paeth(1, 12); ++ a = __riscv_vadd_vv_u8m1(a, x_1, 12); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 3, 12); ++ c = b_1; ++ ++ // a = paeth(a, b_2, c, 12); ++ paeth(2, 12); ++ a = __riscv_vadd_vv_u8m1(a, x_2, 12); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 6, 12); ++ c = b_2; ++ ++ // a = paeth(a, b_3, c, 12); ++ paeth(3, 12); ++ a = __riscv_vadd_vv_u8m1(a, x_3, 12); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 9, 12); ++ c = b_3; ++ ++ __riscv_vse8_v_u8m1(rp, dest, 12); ++ } ++} ++ ++void png_read_filter_row_paeth4_vector(png_row_infop row_info, png_bytep row, ++ png_const_bytep prev_row) ++{ ++ png_bytep rp = row; ++ png_const_bytep pp = prev_row; ++ png_bytep rp_stop = row + row_info->rowbytes; ++ ++ vbool8_t m1, m2; ++ vuint8m1_t x, a, b, c; ++ vuint16m2_t p, pa, pb, pc, tmp; ++ ++ vuint8m1_t bindex0 = __riscv_vid_v_u8m1(4); ++ vuint8m1_t bindex1 = __riscv_vadd_vx_u8m1(bindex0, 4, 4); ++ vuint8m1_t bindex2 = __riscv_vadd_vx_u8m1(bindex1, 4, 4); ++ vuint8m1_t bindex3 = __riscv_vadd_vx_u8m1(bindex2, 4, 4); ++ ++ a = __riscv_vmv_v_x_u8m1(0, 4); ++ c = __riscv_vmv_v_x_u8m1(0, 4); ++ ++ for (; rp < rp_stop; rp += 16, pp += 16) ++ { ++ vuint8m1_t x = __riscv_vle8_v_u8m1(rp, 16); ++ vuint8m1_t x_1 = __riscv_vrgather_vv_u8m1(x, bindex1, 16); ++ vuint8m1_t x_2 = __riscv_vrgather_vv_u8m1(x, bindex2, 16); ++ vuint8m1_t x_3 = __riscv_vrgather_vv_u8m1(x, bindex3, 16); ++ ++ vuint8m1_t b_0 = __riscv_vle8_v_u8m1(pp, 16); ++ vuint8m1_t b_1 = __riscv_vrgather_vv_u8m1(b_0, bindex1, 16); ++ vuint8m1_t b_2 = __riscv_vrgather_vv_u8m1(b_0, bindex2, 16); ++ vuint8m1_t b_3 = __riscv_vrgather_vv_u8m1(b_0, bindex3, 16); ++ ++ // a = paeth(a, b_0, c, 16); ++ paeth(0, 16); ++ a = __riscv_vadd_vv_u8m1(a, x, 16); ++ vuint8m1_t dest = __riscv_vmv_v_v_u8m1(a, 16); ++ c = b_0; ++ ++ // a = paeth(a, b_1, c, 16); ++ paeth(1, 16); ++ a = __riscv_vadd_vv_u8m1(a, x_1, 16); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 4, 16); ++ c = b_1; ++ ++ // a = paeth(a, b_2, c, 16); ++ paeth(2, 16); ++ a = __riscv_vadd_vv_u8m1(a, x_2, 16); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 8, 16); ++ c = b_2; ++ ++ // a = paeth(a, b_3, c, 16); ++ paeth(3, 16); ++ a = __riscv_vadd_vv_u8m1(a, x_3, 16); ++ dest = __riscv_vslideup_vx_u8m1(dest, a, 12, 16); ++ c = b_3; ++ ++ __riscv_vse8_v_u8m1(rp, dest, 16); ++ } ++} ++ ++#endif /* PNG_RISCV_VECTOR_OPT > 0 */ ++#endif /* PNG_RISCV_VECTOR_IMPLEMENTATION == 1 (intrinsics) */ ++#endif /* READ */ +diff --git a/riscv/riscv_init.c b/riscv/riscv_init.c +new file mode 100644 +index 0000000..df3ac45 +--- /dev/null ++++ b/riscv/riscv_init.c +@@ -0,0 +1,127 @@ ++/* riscv_init.c - Vector optimised filter functions ++ * ++ * Copyright (c) 2021 Manfred Schlaegl ++ * Copyright (c) 2018 Cosmin Truta ++ * Copyright (c) 2014,2016 Glenn Randers-Pehrson ++ * Written by Manfred Schlaegl, October 2021. ++ * ++ * This code is released under the libpng license. ++ * For conditions of distribution and use, see the disclaimer ++ * and license in png.h ++ */ ++ ++/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are ++ * called. ++ */ ++#define _POSIX_SOURCE 1 ++ ++#include "../pngpriv.h" ++ ++#ifdef PNG_READ_SUPPORTED ++ ++#if PNG_RISCV_VECTOR_OPT > 0 ++#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED /* Do run-time checks */ ++/* WARNING: it is strongly recommended that you do not build libpng with ++ * run-time checks for CPU features if at all possible. In the case of the RISC-V ++ * Vector instructions there is no processor-specific way of detecting the ++ * presence of the required support, therefore run-time detection is extremely ++ * OS specific. ++ * ++ * You may set the macro PNG_RISCV_VECTOR_FILE to the file name of file containing ++ * a fragment of C source code which defines the png_have_vector function. There ++ * may be number of implementations in contrib/riscv-vector, but the only one that ++ * has partial support is contrib/riscv-vector/linux.c - a generic Linux ++ * implementation which reads /proc/cpufino. ++ */ ++#ifndef PNG_RISCV_VECTOR_FILE ++# ifdef __linux__ ++# define PNG_RISCV_VECTOR_FILE "contrib/riscv-vector/linux.c" ++# endif ++#endif ++ ++#ifdef PNG_RISCV_VECTOR_FILE ++ ++#include /* for sig_atomic_t */ ++static int png_have_vector(png_structp png_ptr); ++#include PNG_RISCV_VECTOR_FILE ++ ++#else /* PNG_RISCV_VECTOR_FILE */ ++# error "PNG_RISCV_VECTOR_FILE undefined: no support for run-time RISC-V Vector checks" ++#endif /* PNG_RISCV_VECTOR_FILE */ ++#endif /* PNG_RISCV_VECTOR_CHECK_SUPPORTED */ ++ ++void ++png_init_filter_functions_vector(png_structp pp, unsigned int bpp) ++{ ++ /* The switch statement is compiled in for RISCV_VECTOR_API, the call to ++ * png_have_vector is compiled in for RISCV_VECTOR_CHECK. If both are defined ++ * the check is only performed if the API has not set the Vector option on ++ * or off explicitly. In this case the check controls what happens. ++ */ ++ png_debug(1, "in png_init_filter_functions_vector"); ++#ifdef PNG_RISCV_VECTOR_API_SUPPORTED ++ switch ((pp->options >> PNG_RISCV_VECTOR) & 3) ++ { ++ case PNG_OPTION_UNSET: ++ /* Allow the run-time check to execute if it has been enabled - ++ * thus both API and CHECK can be turned on. If it isn't supported ++ * this case will fall through to the 'default' below, which just ++ * returns. ++ */ ++#endif /* PNG_RISCV_VECTOR_API_SUPPORTED */ ++#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED ++ { ++ static volatile sig_atomic_t no_vector = -1; /* not checked */ ++ ++ if (no_vector < 0) ++ no_vector = !png_have_vector(pp); ++ ++ if (no_vector) ++ return; ++ } ++#ifdef PNG_RISCV_VECTOR_API_SUPPORTED ++ break; ++#endif ++#endif /* PNG_RISCV_VECTOR_CHECK_SUPPORTED */ ++ ++#ifdef PNG_RISCV_VECTOR_API_SUPPORTED ++ default: /* OFF or INVALID */ ++ return; ++ ++ case PNG_OPTION_ON: ++ /* Option turned on */ ++ break; ++ } ++#endif ++ ++ /* IMPORTANT: any new external functions used here must be declared using ++ * PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the ++ * 'prefix' option to configure works: ++ * ++ * ./configure --with-libpng-prefix=foobar_ ++ * ++ * Verify you have got this right by running the above command, doing a build ++ * and examining pngprefix.h; it must contain a #define for every external ++ * function you add. (Notice that this happens automatically for the ++ * initialization function.) ++ */ ++ pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_vector; ++ ++ if (bpp == 3) ++ { ++ pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vector; ++ pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_vector; ++ pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = ++ png_read_filter_row_paeth3_vector; ++ } ++ ++ else if (bpp == 4) ++ { ++ pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vector; ++ pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_vector; ++ pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = ++ png_read_filter_row_paeth4_vector; ++ } ++} ++#endif /* PNG_RISCV_VECTOR_OPT > 0 */ ++#endif /* READ */ +\ No newline at end of file +-- +2.25.1 + diff --git a/package/libpng/libpng.mk b/package/libpng/libpng.mk index a18c36fe..ea5c0edb 100644 --- a/package/libpng/libpng.mk +++ b/package/libpng/libpng.mk @@ -22,6 +22,11 @@ else LIBPNG_CONF_OPTS += --disable-arm-neon endif +ifeq ($(BR2_RISCV_64),y) +LIBPNG_CONF_OPTS += --enable-riscv-vector +LIBPNG_CONF_OPTS += CFLAGS="-march=rv64gcv" +endif + ifeq ($(BR2_X86_CPU_HAS_SSE2),y) LIBPNG_CONF_OPTS += --enable-intel-sse else diff --git a/package/sdl2/0001-fix-compile-error-when-open-wayland-can-not-find-way.patch b/package/sdl2/0001-fix-compile-error-when-open-wayland-can-not-find-way.patch new file mode 100644 index 00000000..36d377bd --- /dev/null +++ b/package/sdl2/0001-fix-compile-error-when-open-wayland-can-not-find-way.patch @@ -0,0 +1,27 @@ +From fcc00b5cfd9ba11b969c3884024fae6792474cfb Mon Sep 17 00:00:00 2001 +From: fuqiang +Date: Mon, 11 Mar 2024 19:44:13 +0800 +Subject: [PATCH] fix compile error when open wayland: can not find + wayland-scanner + +--- + configure | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/configure b/configure +index 108ba53..a42b5ae 100755 +--- a/configure ++++ b/configure +@@ -23128,7 +23128,8 @@ printf %s "checking for Wayland support... " >&6; } + if $PKG_CONFIG --exists 'wayland-client >= 1.18' wayland-scanner wayland-egl wayland-cursor egl 'xkbcommon >= 0.5.0'; then + WAYLAND_CFLAGS=`$PKG_CONFIG --cflags wayland-client wayland-egl wayland-cursor xkbcommon` + WAYLAND_LIBS=`$PKG_CONFIG --libs wayland-client wayland-egl wayland-cursor xkbcommon` +- WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland-scanner` ++ #WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland-scanner` ++ WAYLAND_SCANNER="$srcdir/../../host/bin/wayland-scanner" + if $PKG_CONFIG --exists 'wayland-scanner >= 1.15' + then : + WAYLAND_SCANNER_CODE_MODE=private-code +-- +2.25.1 + diff --git a/package/sdl2/sdl2.mk b/package/sdl2/sdl2.mk index 5f2e1e0e..c8806ea7 100644 --- a/package/sdl2/sdl2.mk +++ b/package/sdl2/sdl2.mk @@ -23,7 +23,6 @@ SDL2_CONF_OPTS += \ --disable-video-vivante \ --disable-video-cocoa \ --disable-video-metal \ - --disable-video-wayland \ --disable-video-dummy \ --disable-video-offscreen \ --disable-video-vulkan \ @@ -37,7 +36,8 @@ SDL2_CONF_OPTS += \ --disable-hidapi-joystick \ --disable-hidapi-libusb \ --disable-joystick-virtual \ - --disable-render-d3d + --disable-render-d3d \ + --disable-oss # We are using autotools build system for sdl2, so the sdl2-config.cmake # include path are not resolved like for sdl2-config script. @@ -181,4 +181,7 @@ else SDL2_CONF_OPTS += --disable-video-kmsdrm endif +SDL2_DEPENDENCIES += wayland libxkbcommon +SDL2_CONF_OPTS += --enable-video-wayland + $(eval $(autotools-package)) diff --git a/package/zlib-ng/0004-RVV-optimized-zlib-ng.patch b/package/zlib-ng/0004-RVV-optimized-zlib-ng.patch new file mode 100644 index 00000000..5d26c9d7 --- /dev/null +++ b/package/zlib-ng/0004-RVV-optimized-zlib-ng.patch @@ -0,0 +1,37255 @@ +From 085f5dfde5a27365e3491718ec2f6bb17b9169a3 Mon Sep 17 00:00:00 2001 +From: "lff@Snode" +Date: Wed, 27 Mar 2024 11:29:31 +0800 +Subject: [PATCH] [RVV] optimized zlib-ng + +--- + .gitignore | 2 + + CMakeLists.txt | 1356 ++- + Makefile.in | 190 +- + PORTING.md | 21 +- + README.md | 172 +- + adler32.c | 28 +- + adler32_fold.c | 16 + + adler32_fold.h | 11 + + adler32_p.h | 35 +- + arch/arm/Makefile.in | 37 +- + arch/arm/acle_intrins.h | 35 + + arch/arm/adler32_neon.c | 183 +- + arch/arm/arm_features.c | 100 + + arch/arm/arm_features.h | 16 + + arch/arm/chunkset_neon.c | 67 +- + arch/arm/compare256_neon.c | 59 + + arch/arm/crc32_acle.c | 96 +- + arch/arm/insert_string_acle.c | 18 +- + arch/arm/neon_intrins.h | 58 + + arch/arm/slide_hash_armv6.c | 47 + + arch/arm/slide_hash_neon.c | 46 + + arch/generic/Makefile.in | 3 + + arch/generic/chunk_permute_table.h | 53 + + arch/power/Makefile.in | 59 +- + arch/power/adler32_power8.c | 9 +- + arch/power/adler32_vmx.c | 186 + + arch/power/chunkset_power8.c | 55 + + arch/power/compare256_power9.c | 64 + + arch/power/crc32_constants.h | 1123 ++ + arch/power/crc32_power8.c | 589 + + arch/power/fallback_builtins.h | 31 + + arch/power/power_features.c | 46 + + arch/power/power_features.h | 18 + + arch/power/slide_hash_power8.c | 56 +- + arch/power/slide_hash_vmx.c | 10 + + arch/power/slide_ppc_tpl.h | 31 + + arch/riscv/README.md | 45 + + arch/riscv/adler32_rvv.c | 155 + + arch/riscv/chunkset_rvv.c | 125 + + arch/riscv/compare256_rvv.c | 47 + + arch/riscv/riscv_features.c | 45 + + arch/riscv/riscv_features.h | 18 + + arch/riscv/slide_hash_rvv.c | 34 + + arch/s390/Makefile.in | 16 +- + arch/s390/README.md | 21 +- + arch/s390/crc32-vx.c | 222 + + arch/s390/dfltcc_common.c | 65 +- + arch/s390/dfltcc_common.h | 23 +- + arch/s390/dfltcc_deflate.c | 122 +- + arch/s390/dfltcc_deflate.h | 45 +- + arch/s390/dfltcc_detail.h | 267 +- + arch/s390/dfltcc_inflate.c | 104 +- + arch/s390/dfltcc_inflate.h | 47 +- + arch/s390/s390_features.c | 14 + + arch/s390/s390_features.h | 10 + + .../actions-runner.Dockerfile | 7 +- + .../actions-runner.service | 1 + + arch/x86/Makefile.in | 145 +- + arch/x86/adler32_avx2.c | 154 + + arch/x86/adler32_avx2_p.h | 32 + + arch/x86/adler32_avx512.c | 115 + + arch/x86/adler32_avx512_p.h | 46 + + arch/x86/adler32_avx512_vnni.c | 225 + + arch/x86/adler32_sse42.c | 121 + + arch/x86/adler32_ssse3.c | 198 +- + arch/x86/adler32_ssse3_p.h | 29 + + arch/x86/chunkset_avx2.c | 133 + + arch/x86/chunkset_sse2.c | 56 + + arch/x86/chunkset_ssse3.c | 101 + + arch/x86/compare256_avx2.c | 63 + + arch/x86/compare256_sse2.c | 96 + + arch/x86/crc32_fold_pclmulqdq_tpl.h | 186 + + arch/x86/crc32_fold_vpclmulqdq_tpl.h | 107 + + arch/x86/crc32_pclmulqdq.c | 30 + + arch/x86/crc32_pclmulqdq_tpl.h | 363 + + arch/x86/crc32_vpclmulqdq.c | 17 + + arch/x86/insert_string_sse42.c | 24 + + arch/x86/slide_hash_avx2.c | 39 + + arch/x86/slide_hash_sse2.c | 62 + + arch/x86/x86_features.c | 97 + + arch/x86/x86_features.h | 24 + + arch/x86/x86_intrins.h | 87 + + chunkset.c | 61 +- + chunkset_tpl.h | 176 +- + cmake/detect-arch.c | 10 +- + cmake/detect-arch.cmake | 12 +- + cmake/detect-coverage.cmake | 20 +- + cmake/detect-install-dirs.cmake | 42 +- + cmake/detect-intrinsics.cmake | 543 + + cmake/detect-sanitizer.cmake | 57 +- + cmake/fallback-macros.cmake | 19 + + cmake/toolchain-aarch64.cmake | 8 +- + cmake/toolchain-arm.cmake | 13 +- + cmake/toolchain-armhf.cmake | 25 + + cmake/toolchain-llvm-mingw-aarch64.cmake | 41 + + cmake/toolchain-llvm-mingw-armv7.cmake | 41 + + cmake/toolchain-llvm-mingw-i686.cmake | 41 + + cmake/toolchain-llvm-mingw-x86_64.cmake | 41 + + cmake/toolchain-mingw-i686.cmake | 31 +- + cmake/toolchain-mingw-x86_64.cmake | 30 +- + cmake/toolchain-mips.cmake | 29 + + cmake/toolchain-mips64.cmake | 29 + + cmake/toolchain-powerpc.cmake | 12 +- + cmake/toolchain-powerpc64-clang.cmake | 16 + + cmake/toolchain-powerpc64-power9.cmake | 25 + + cmake/toolchain-powerpc64.cmake | 12 +- + cmake/toolchain-powerpc64le-clang.cmake | 16 + + cmake/toolchain-powerpc64le-power9.cmake | 25 + + cmake/toolchain-powerpc64le.cmake | 12 +- + cmake/toolchain-riscv.cmake | 28 + + cmake/toolchain-s390x.cmake | 10 +- + cmake/toolchain-sparc64.cmake | 10 +- + compare256.c | 180 + + compare256_rle.h | 134 + + compress.c | 19 +- + cpu_features.c | 23 + + cpu_features.h | 303 + + crc32_braid.c | 267 + + crc32_braid_comb.c | 57 + + crc32_braid_comb_p.h | 42 + + crc32_braid_p.h | 50 + + crc32_braid_tbl.h | 9446 +++++++++++++++++ + crc32_fold.c | 33 + + crc32_fold.h | 21 + + deflate.c | 704 +- + deflate.h | 127 +- + deflate_fast.c | 27 +- + deflate_huff.c | 45 + + deflate_medium.c | 48 +- + deflate_p.h | 49 +- + deflate_quick.c | 36 +- + deflate_rle.c | 85 + + deflate_slow.c | 40 +- + deflate_stored.c | 186 + + fallback_builtins.h | 54 +- + functable.c | 715 +- + functable.h | 39 +- + gzguts.h | 20 +- + gzlib.c | 24 +- + gzread.c.in | 606 ++ + gzwrite.c | 6 +- + infback.c | 69 +- + inffast_tpl.h | 326 + + inflate.c | 322 +- + inflate.h | 12 +- + inflate_p.h | 133 +- + inftrees.c | 34 +- + inftrees.h | 12 +- + insert_string.c | 20 +- + insert_string_roll.c | 24 + + insert_string_tpl.h | 85 +- + match_tpl.h | 183 +- + slide_hash.c | 52 + + test/Makefile.in | 76 +- + test/README.md | 7 +- + test/abi/ignore | 2 +- + test/abicheck.md | 2 +- + test/abicheck.sh | 29 +- + test/example.c | 52 +- + test/fuzz/standalone_fuzz_target_runner.c | 1 - + test/infcover.c | 18 +- + test/minideflate.c | 129 +- + test/minigzip.c | 46 +- + test/pigz/CMakeLists.txt | 15 +- + test/pkgcheck.sh | 19 +- + test/switchlevels.c | 6 +- + tools/makecrct.c | 321 +- + tools/maketrees.c | 16 +- + trees.c | 24 +- + trees_emit.h | 5 +- + trees_tbl.h | 2 +- + uncompr.c | 13 +- + win32/Makefile.a64 | 81 +- + win32/Makefile.arm | 87 +- + win32/Makefile.msc | 116 +- + win32/replace.vbs | 15 + + win32/zlib-ng.def.in | 60 + + win32/zlib-ng1.rc | 6 +- + win32/zlib.def.in | 64 + + win32/zlib1.rc | 8 +- + win32/zlibcompat.def.in | 97 + + zbuild.h | 226 +- + zconf-ng.h.in | 17 +- + zconf.h.in | 26 +- + zendian.h | 4 +- + zlib-ng.h.in | 1871 ++++ + zlib-ng.map | 28 +- + zlib.h.in | 1859 ++++ + zlib.map | 3 - + zlib.pc.cmakein | 3 +- + zlib.pc.in | 3 +- + zlib_name_mangling-ng.h.in | 178 + + zlib_name_mangling.h.empty | 8 + + zlib_name_mangling.h.in | 170 + + zutil.c | 62 +- + zutil.h | 180 +- + zutil_p.h | 39 +- + 197 files changed, 27114 insertions(+), 3882 deletions(-) + create mode 100644 adler32_fold.c + create mode 100644 adler32_fold.h + create mode 100644 arch/arm/acle_intrins.h + create mode 100644 arch/arm/arm_features.c + create mode 100644 arch/arm/arm_features.h + create mode 100644 arch/arm/compare256_neon.c + create mode 100644 arch/arm/neon_intrins.h + create mode 100644 arch/arm/slide_hash_armv6.c + create mode 100644 arch/arm/slide_hash_neon.c + create mode 100644 arch/generic/chunk_permute_table.h + create mode 100644 arch/power/adler32_vmx.c + create mode 100644 arch/power/chunkset_power8.c + create mode 100644 arch/power/compare256_power9.c + create mode 100644 arch/power/crc32_constants.h + create mode 100644 arch/power/crc32_power8.c + create mode 100644 arch/power/fallback_builtins.h + create mode 100644 arch/power/power_features.c + create mode 100644 arch/power/power_features.h + create mode 100644 arch/power/slide_hash_vmx.c + create mode 100644 arch/power/slide_ppc_tpl.h + create mode 100644 arch/riscv/README.md + create mode 100644 arch/riscv/adler32_rvv.c + create mode 100644 arch/riscv/chunkset_rvv.c + create mode 100644 arch/riscv/compare256_rvv.c + create mode 100644 arch/riscv/riscv_features.c + create mode 100644 arch/riscv/riscv_features.h + create mode 100644 arch/riscv/slide_hash_rvv.c + create mode 100644 arch/s390/crc32-vx.c + create mode 100644 arch/s390/s390_features.c + create mode 100644 arch/s390/s390_features.h + create mode 100644 arch/x86/adler32_avx2.c + create mode 100644 arch/x86/adler32_avx2_p.h + create mode 100644 arch/x86/adler32_avx512.c + create mode 100644 arch/x86/adler32_avx512_p.h + create mode 100644 arch/x86/adler32_avx512_vnni.c + create mode 100644 arch/x86/adler32_sse42.c + create mode 100644 arch/x86/adler32_ssse3_p.h + create mode 100644 arch/x86/chunkset_avx2.c + create mode 100644 arch/x86/chunkset_sse2.c + create mode 100644 arch/x86/chunkset_ssse3.c + create mode 100644 arch/x86/compare256_avx2.c + create mode 100644 arch/x86/compare256_sse2.c + create mode 100644 arch/x86/crc32_fold_pclmulqdq_tpl.h + create mode 100644 arch/x86/crc32_fold_vpclmulqdq_tpl.h + create mode 100644 arch/x86/crc32_pclmulqdq.c + create mode 100644 arch/x86/crc32_pclmulqdq_tpl.h + create mode 100644 arch/x86/crc32_vpclmulqdq.c + create mode 100644 arch/x86/insert_string_sse42.c + create mode 100644 arch/x86/slide_hash_avx2.c + create mode 100644 arch/x86/slide_hash_sse2.c + create mode 100644 arch/x86/x86_features.c + create mode 100644 arch/x86/x86_features.h + create mode 100644 arch/x86/x86_intrins.h + create mode 100644 cmake/detect-intrinsics.cmake + create mode 100644 cmake/fallback-macros.cmake + create mode 100644 cmake/toolchain-armhf.cmake + create mode 100644 cmake/toolchain-llvm-mingw-aarch64.cmake + create mode 100644 cmake/toolchain-llvm-mingw-armv7.cmake + create mode 100644 cmake/toolchain-llvm-mingw-i686.cmake + create mode 100644 cmake/toolchain-llvm-mingw-x86_64.cmake + create mode 100644 cmake/toolchain-mips.cmake + create mode 100644 cmake/toolchain-mips64.cmake + create mode 100644 cmake/toolchain-powerpc64-clang.cmake + create mode 100644 cmake/toolchain-powerpc64-power9.cmake + create mode 100644 cmake/toolchain-powerpc64le-clang.cmake + create mode 100644 cmake/toolchain-powerpc64le-power9.cmake + create mode 100644 cmake/toolchain-riscv.cmake + create mode 100644 compare256.c + create mode 100644 compare256_rle.h + create mode 100644 cpu_features.c + create mode 100644 cpu_features.h + create mode 100644 crc32_braid.c + create mode 100644 crc32_braid_comb.c + create mode 100644 crc32_braid_comb_p.h + create mode 100644 crc32_braid_p.h + create mode 100644 crc32_braid_tbl.h + create mode 100644 crc32_fold.c + create mode 100644 crc32_fold.h + create mode 100644 deflate_huff.c + create mode 100644 deflate_rle.c + create mode 100644 deflate_stored.c + create mode 100644 gzread.c.in + create mode 100644 inffast_tpl.h + create mode 100644 insert_string_roll.c + create mode 100644 slide_hash.c + create mode 100644 win32/replace.vbs + create mode 100644 win32/zlib-ng.def.in + create mode 100644 win32/zlib.def.in + create mode 100644 win32/zlibcompat.def.in + create mode 100644 zlib-ng.h.in + create mode 100644 zlib.h.in + create mode 100644 zlib_name_mangling-ng.h.in + create mode 100644 zlib_name_mangling.h.empty + create mode 100644 zlib_name_mangling.h.in + +diff --git a/.gitignore b/.gitignore +index bf420e5..1d77bb0 100644 +--- a/.gitignore ++++ b/.gitignore +@@ -1,3 +1,5 @@ ++doc ++test + *.diff + *.patch + *.orig +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 72008a0..58ed3a1 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -6,13 +6,11 @@ else() + endif() + message(STATUS "Using CMake version ${CMAKE_VERSION}") + +-set(CMAKE_MACOSX_RPATH 1) +- +-# If not specified on the command line, enable C99 as the default ++# If not specified on the command line, enable C11 as the default + # Configuration items that affect the global compiler environment standards + # should be issued before the "project" command. + if(NOT CMAKE_C_STANDARD) +- set(CMAKE_C_STANDARD 99) # The C standard whose features are requested to build this target ++ set(CMAKE_C_STANDARD 11) # The C standard whose features are requested to build this target + endif() + if(NOT CMAKE_C_STANDARD_REQUIRED) + set(CMAKE_C_STANDARD_REQUIRED ON) # Boolean describing whether the value of C_STANDARD is a requirement +@@ -22,11 +20,11 @@ if(NOT CMAKE_C_EXTENSIONS) + endif() + set(VALID_C_STANDARDS "99" "11") + if(NOT CMAKE_C_STANDARD IN_LIST VALID_C_STANDARDS) +- MESSAGE(FATAL_ERROR "CMAKE_C_STANDARD:STRING=${CMAKE_C_STANDARD} not in know standards list\n ${VALID_C_STANDARDS}") ++ MESSAGE(FATAL_ERROR "CMAKE_C_STANDARD:STRING=${CMAKE_C_STANDARD} not in known standards list\n ${VALID_C_STANDARDS}") + endif() + +-# Parse the full version number from zlib.h and include in ZLIB_FULL_VERSION +-file(READ ${CMAKE_CURRENT_SOURCE_DIR}/zlib${SUFFIX}.h _zlib_h_contents) ++# Parse the full version number from zlib.h.in and include in ZLIB_FULL_VERSION ++file(READ ${CMAKE_CURRENT_SOURCE_DIR}/zlib.h.in _zlib_h_contents) + string(REGEX REPLACE ".*#define[ \t]+ZLIB_VERSION[ \t]+\"([0-9]+.[0-9]+.[0-9]+).*\".*" + "\\1" ZLIB_HEADER_VERSION ${_zlib_h_contents}) + string(REGEX REPLACE ".*#define[ \t]+ZLIBNG_VERSION[ \t]+\"([-0-9A-Za-z.]+)\".*" +@@ -49,7 +47,9 @@ include(FeatureSummary) + include(cmake/detect-arch.cmake) + include(cmake/detect-install-dirs.cmake) + include(cmake/detect-coverage.cmake) ++include(cmake/detect-intrinsics.cmake) + include(cmake/detect-sanitizer.cmake) ++include(cmake/fallback-macros.cmake) + + if(CMAKE_TOOLCHAIN_FILE) + message(STATUS "Using CMake toolchain: ${CMAKE_TOOLCHAIN_FILE}") +@@ -57,68 +57,90 @@ endif() + + # Make sure we use an appropriate BUILD_TYPE by default, "Release" to be exact + # this should select the maximum generic optimisation on the current platform (i.e. -O3 for gcc/clang) +-if(NOT CMAKE_BUILD_TYPE) +- set(CMAKE_BUILD_TYPE "Release" CACHE STRING +- "Choose the type of build, standard options are: Debug Release RelWithDebInfo MinSizeRel." +- FORCE) +- add_feature_info(CMAKE_BUILD_TYPE 1 "Build type: ${CMAKE_BUILD_TYPE} (default)") +-else() +- add_feature_info(CMAKE_BUILD_TYPE 1 "Build type: ${CMAKE_BUILD_TYPE} (selected)") ++get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) ++if(NOT GENERATOR_IS_MULTI_CONFIG) ++ if(NOT CMAKE_BUILD_TYPE) ++ set(CMAKE_BUILD_TYPE "Release" CACHE STRING ++ "Choose the type of build, standard options are: Debug Release RelWithDebInfo MinSizeRel." ++ FORCE) ++ add_feature_info(CMAKE_BUILD_TYPE 1 "Build type: ${CMAKE_BUILD_TYPE} (default)") ++ else() ++ add_feature_info(CMAKE_BUILD_TYPE 1 "Build type: ${CMAKE_BUILD_TYPE} (selected)") ++ endif() + endif() + + # + # Options parsing + # +-macro(add_option name description value) +- option(${name} ${description} ${value}) +- add_feature_info(${name} ${name} ${description}) +-endmacro() +- +-add_option(WITH_GZFILEOP "Compile with support for gzFile related functions" ON) +-add_option(ZLIB_COMPAT "Compile with zlib compatible API" OFF) +-add_option(ZLIB_ENABLE_TESTS "Build test binaries" ON) +-add_option(ZLIB_DUAL_LINK "Dual link tests against system zlib" OFF) +-add_option(WITH_FUZZERS "Build test/fuzz" OFF) +-add_option(WITH_OPTIM "Build with optimisation" ON) +-add_option(WITH_NEW_STRATEGIES "Use new strategies" ON) +-add_option(WITH_NATIVE_INSTRUCTIONS ++option(WITH_GZFILEOP "Compile with support for gzFile related functions" ON) ++option(ZLIB_COMPAT "Compile with zlib compatible API" OFF) ++option(ZLIB_ENABLE_TESTS "Build test binaries" ON) ++option(ZLIBNG_ENABLE_TESTS "Test zlib-ng specific API" ON) ++option(WITH_GTEST "Build gtest_zlib" ON) ++option(WITH_FUZZERS "Build test/fuzz" OFF) ++option(WITH_BENCHMARKS "Build test/benchmarks" OFF) ++option(WITH_BENCHMARK_APPS "Build application benchmarks" OFF) ++option(WITH_OPTIM "Build with optimisation" ON) ++option(WITH_REDUCED_MEM "Reduced memory usage for special cases (reduces performance)" OFF) ++option(WITH_NEW_STRATEGIES "Use new strategies" ON) ++option(WITH_NATIVE_INSTRUCTIONS + "Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)" OFF) +-add_option(WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings" OFF) +-add_option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF) +-add_option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF) +-add_option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances" OFF) +-add_option(WITH_UNALIGNED "Support unaligned reads on platforms that support it" ON) ++option(WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings" OFF) ++option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF) ++option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF) ++option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances" OFF) ++option(WITH_UNALIGNED "Support unaligned reads on platforms that support it" ON) ++ ++set(ZLIB_SYMBOL_PREFIX "" CACHE STRING "Give this prefix to all publicly exported symbols. ++Useful when embedding into a larger library. ++Default is no prefix (empty prefix).") + + # Add multi-choice option + set(WITH_SANITIZER AUTO CACHE STRING "Enable sanitizer support") + set_property(CACHE WITH_SANITIZER PROPERTY STRINGS "Memory" "Address" "Undefined" "Thread") + + if(BASEARCH_ARM_FOUND) +- add_option(WITH_ACLE "Build with ACLE" ON) +- add_option(WITH_NEON "Build with NEON intrinsics" ON) ++ option(WITH_ACLE "Build with ACLE" ON) ++ option(WITH_NEON "Build with NEON intrinsics" ON) ++ option(WITH_ARMV6 "Build with ARMv6 SIMD" ON) + elseif(BASEARCH_PPC_FOUND) +- add_option(WITH_POWER8 "Build with optimisations for POWER8" ON) ++ option(WITH_ALTIVEC "Build with AltiVec (VMX) optimisations for PowerPC" ON) ++ option(WITH_POWER8 "Build with optimisations for POWER8" ON) ++ option(WITH_POWER9 "Build with optimisations for POWER9" ON) ++elseif(BASEARCH_RISCV_FOUND) ++ option(WITH_RVV "Build with RVV intrinsics" ON) + elseif(BASEARCH_S360_FOUND) +- add_option(WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z" OFF) +- add_option(WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z" OFF) ++ option(WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z" OFF) ++ option(WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z" OFF) ++ option(WITH_CRC32_VX "Build with vectorized CRC32 on IBM Z" ON) + elseif(BASEARCH_X86_FOUND) +- add_option(WITH_AVX2 "Build with AVX2" ON) +- add_option(WITH_SSE2 "Build with SSE2" ON) +- add_option(WITH_SSSE3 "Build with SSSE3" ON) +- add_option(WITH_SSE4 "Build with SSE4" ON) +- add_option(WITH_PCLMULQDQ "Build with PCLMULQDQ" ON) ++ option(WITH_AVX2 "Build with AVX2" ON) ++ option(WITH_AVX512 "Build with AVX512" ON) ++ option(WITH_AVX512VNNI "Build with AVX512 VNNI extensions" ON) ++ option(WITH_SSE2 "Build with SSE2" ON) ++ option(WITH_SSSE3 "Build with SSSE3" ON) ++ option(WITH_SSE42 "Build with SSE42" ON) ++ option(WITH_PCLMULQDQ "Build with PCLMULQDQ" ON) ++ option(WITH_VPCLMULQDQ "Build with VPCLMULQDQ" ON) + endif() +-add_option(INSTALL_UTILS "Copy minigzip and minideflate during install" OFF) ++ ++option(INSTALL_UTILS "Copy minigzip and minideflate during install" OFF) + + mark_as_advanced(FORCE +- ZLIB_DUAL_LINK ++ ZLIB_SYMBOL_PREFIX ++ WITH_REDUCED_MEM + WITH_ACLE WITH_NEON ++ WITH_ARMV6 + WITH_DFLTCC_DEFLATE + WITH_DFLTCC_INFLATE ++ WITH_CRC32_VX + WITH_AVX2 WITH_SSE2 +- WITH_SSSE3 WITH_SSE4 ++ WITH_SSSE3 WITH_SSE42 + WITH_PCLMULQDQ ++ WITH_ALTIVEC + WITH_POWER8 ++ WITH_POWER9 ++ WITH_RVV + WITH_INFLATE_STRICT + WITH_INFLATE_ALLOW_INVALID_DIST + WITH_UNALIGNED +@@ -130,39 +152,26 @@ if(ZLIB_COMPAT) + set(WITH_GZFILEOP ON) + set(SUFFIX "") + set(ZLIB_FULL_VERSION ${ZLIB_HEADER_VERSION}.zlib-ng) ++ set(EXPORT_NAME ZLIB) + else() + set(SUFFIX "-ng") + set(ZLIB_FULL_VERSION ${ZLIBNG_HEADER_VERSION}) ++ set(EXPORT_NAME zlib-ng) + endif() + + if(WITH_GZFILEOP) + add_definitions(-DWITH_GZFILEOP) + endif() + +-if(CMAKE_C_COMPILER_ID MATCHES "Intel") ++if(CMAKE_C_COMPILER_ID MATCHES "^Intel") + if(CMAKE_HOST_UNIX) +- set(WARNFLAGS "-w3") +- set(WARNFLAGS_MAINTAINER "-w3 -Wcheck -Wremarks") +- set(WARNFLAGS_DISABLE "") +- if(BASEARCH_X86_FOUND) +- set(AVX2FLAG "-mavx2") +- set(SSE2FLAG "-msse2") +- set(SSSE3FLAG "-mssse3") +- set(SSE4FLAG "-msse4.2") +- endif() ++ set(WARNFLAGS -Wall) ++ set(WARNFLAGS_MAINTAINER -Wall -Wcheck -Wremarks) ++ set(WARNFLAGS_DISABLE) + else() +- set(WARNFLAGS "/W3") +- set(WARNFLAGS_MAINTAINER "/W5") +- set(WARNFLAGS_DISABLE "") +- if(BASEARCH_X86_FOUND) +- set(AVX2FLAG "/arch:AVX2") +- set(SSE2FLAG "/arch:SSE2") +- set(SSSE3FLAG "/arch:SSSE3") +- set(SSE4FLAG "/arch:SSE4.2") +- endif() +- endif() +- if(WITH_NATIVE_INSTRUCTIONS) +- message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not supported on this configuration") ++ set(WARNFLAGS /Wall) ++ set(WARNFLAGS_MAINTAINER /W5) ++ set(WARNFLAGS_DISABLE) + endif() + elseif(MSVC) + # Minimum supported MSVC version is 1800 = Visual Studio 12.0/2013 +@@ -174,154 +183,119 @@ elseif(MSVC) + # (who'd use cmake from an IDE...) but checking for ICC before checking for MSVC should + # avoid mistakes. + # /Oi ? +- set(WARNFLAGS "/W3") +- set(WARNFLAGS_MAINTAINER "/W4") +- set(WARNFLAGS_DISABLE "") ++ set(WARNFLAGS /W3) ++ set(WARNFLAGS_MAINTAINER /W4) ++ set(WARNFLAGS_DISABLE) + if(BASEARCH_ARM_FOUND) + add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) + if(NOT "${ARCH}" MATCHES "aarch64") + set(NEONFLAG "/arch:VFPv4") + endif() +- elseif(BASEARCH_X86_FOUND) +- if(NOT "${ARCH}" MATCHES "x86_64") +- set(SSE2FLAG "/arch:SSE2") +- endif() +- endif() +- if(WITH_NATIVE_INSTRUCTIONS) +- message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not supported on this configuration") +- endif() +-else() +- # catch all GNU C compilers as well as Clang and AppleClang +- if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") +- set(__GNUC__ ON) + endif() ++elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") + # Enable warnings in GCC and Clang +- if(__GNUC__) +- set(WARNFLAGS "-Wall") +- set(WARNFLAGS_MAINTAINER "-Wextra -Wpedantic") +- set(WARNFLAGS_DISABLE "-Wno-implicit-fallthrough") +- endif() +- if(WITH_NATIVE_INSTRUCTIONS) +- if(__GNUC__) +- if(BASEARCH_PPC_FOUND) +- set(NATIVEFLAG "-mcpu=native") +- else() +- set(NATIVEFLAG "-march=native") +- endif() +- else() +- message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration") +- endif() ++ set(WARNFLAGS -Wall) ++ set(WARNFLAGS_MAINTAINER -Wextra) ++ set(WARNFLAGS_DISABLE) ++ # Check whether -fno-lto is available ++ set(CMAKE_REQUIRED_FLAGS "-fno-lto") ++ check_c_source_compiles( ++ "int main() { return 0; }" ++ FNO_LTO_AVAILABLE FAIL_REGEX "not supported") ++ set(CMAKE_REQUIRED_FLAGS) ++ if(FNO_LTO_AVAILABLE) ++ set(ZNOLTOFLAG "-fno-lto") + endif() +- if(NOT NATIVEFLAG) +- if(__GNUC__) +- if(BASEARCH_ARM_FOUND) +- if("${ARCH}" MATCHES "arm" AND NOT CMAKE_C_FLAGS MATCHES "-mfloat-abi") +- # Auto-detect support for ARM floating point ABI +- check_c_compiler_flag(-mfloat-abi=softfp HAVE_FLOATABI_SOFTFP) ++ if(NOT WITH_NATIVE_INSTRUCTIONS) ++ if(BASEARCH_ARM_FOUND) ++ if("${ARCH}" MATCHES "arm" AND NOT CMAKE_C_FLAGS MATCHES "-mfloat-abi") ++ # Auto-detect support for ARM floating point ABI ++ check_include_file(features.h HAVE_FEATURES_H) ++ if(HAVE_FEATURES_H) ++ set(CMAKE_REQUIRED_FLAGS -mfloat-abi=softfp) ++ check_c_source_compiles( ++ "#include ++ int main() { return 0; }" ++ HAVE_FLOATABI_SOFTFP) + if(HAVE_FLOATABI_SOFTFP) +- set(FLOATABI "-mfloat-abi=softfp") ++ set(FLOATABI -mfloat-abi=softfp) + else() +- check_c_compiler_flag(-mfloat-abi=hard HAVE_FLOATABI_HARD) ++ set(CMAKE_REQUIRED_FLAGS -mfloat-abi=hard) ++ check_c_source_compiles( ++ "#include ++ int main() { return 0; }" ++ HAVE_FLOATABI_HARD) + if(HAVE_FLOATABI_HARD) +- set(FLOATABI "-mfloat-abi=hard") ++ set(FLOATABI -mfloat-abi=hard) + endif() + endif() +- if(FLOATABI) +- message(STATUS "ARM floating point arch: ${FLOATABI}") +- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLOATABI}") +- else() +- message(STATUS "ARM floating point arch not auto-detected") +- endif() ++ set(CMAKE_REQUIRED_FLAGS) + endif() +- # NEON +- if("${ARCH}" MATCHES "aarch64") +- set(NEONFLAG "-march=armv8-a+simd") ++ if(FLOATABI) ++ message(STATUS "ARM floating point arch: ${FLOATABI}") ++ add_compile_options(${FLOATABI}) + else() +- # Check whether -mfpu=neon is available +- set(CMAKE_REQUIRED_FLAGS "-mfpu=neon") +- check_c_source_compiles( +- "int main() { return 0; }" +- MFPU_NEON_AVAILABLE FAIL_REGEX "not supported") +- set(CMAKE_REQUIRED_FLAGS) +- if(MFPU_NEON_AVAILABLE) +- set(NEONFLAG "-mfpu=neon") +- endif() ++ message(STATUS "ARM floating point arch not auto-detected") + endif() +- # ACLE +- set(ACLEFLAG "-march=armv8-a+crc") +- elseif(BASEARCH_PPC_FOUND) +- set(POWER8FLAG "-mcpu=power8") +- elseif(BASEARCH_X86_FOUND) +- set(AVX2FLAG "-mavx2") +- set(SSE2FLAG "-msse2") +- set(SSSE3FLAG "-mssse3") +- set(SSE4FLAG "-msse4") +- set(PCLMULFLAG "-mpclmul") +- endif() +- # Check whether -fno-lto is available +- set(CMAKE_REQUIRED_FLAGS "-fno-lto") +- check_c_source_compiles( +- "int main() { return 0; }" +- FNO_LTO_AVAILABLE FAIL_REGEX "not supported") +- set(CMAKE_REQUIRED_FLAGS) +- if(FNO_LTO_AVAILABLE) +- set(NOLTOFLAG "-fno-lto") + endif() + endif() +- endif() +-endif() +- +-# Replace optimization level 3 added by default with level 2 +-if(NOT MSVC AND NOT CMAKE_C_FLAGS MATCHES "([\\/\\-]O)3") +- string(REGEX REPLACE "([\\/\\-]O)3" "\\12" +- CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}") +-endif() +- +-# Set architecture alignment requirements +-if(WITH_UNALIGNED) +- if((BASEARCH_ARM_FOUND AND NOT "${ARCH}" MATCHES "armv[2-7]") OR (BASEARCH_PPC_FOUND AND "${ARCH}" MATCHES "powerpc64le") OR BASEARCH_X86_FOUND) +- if(NOT DEFINED UNALIGNED_OK) +- set(UNALIGNED_OK TRUE) ++ # Disable LTO unless Native Instructions are enabled ++ if(FNO_LTO_AVAILABLE) ++ set(NOLTOFLAG ${ZNOLTOFLAG}) + endif() + endif() +- if(UNALIGNED_OK) +- add_definitions(-DUNALIGNED_OK) +- message(STATUS "Architecture supports unaligned reads") +- endif() +- if(BASEARCH_ARM_FOUND) +- if(NOT DEFINED UNALIGNED64_OK) +- if("${ARCH}" MATCHES "armv[2-7]") +- set(UNALIGNED64_OK FALSE) +- elseif("${ARCH}" MATCHES "(arm(v[8-9])?|aarch64)") +- set(UNALIGNED64_OK TRUE) +- endif() ++ if(MINGW) ++ # Add `-Wno-pedantic-ms-format` only if the toolchain supports it ++ check_c_compiler_flag(-Wno-pedantic-ms-format HAVE_NO_PEDANTIC_MS_FORMAT) ++ if(HAVE_NO_PEDANTIC_MS_FORMAT) ++ list(APPEND WARNFLAGS_DISABLE -Wno-pedantic-ms-format) + endif() + endif() +- if(BASEARCH_PPC_FOUND) +- if(NOT DEFINED UNALIGNED64_OK) +- if("${ARCH}" MATCHES "powerpc64le") +- set(UNALIGNED64_OK TRUE) ++endif() ++ ++# Set native march/mcpu ++if(WITH_NATIVE_INSTRUCTIONS) ++ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ check_c_compiler_flag(-march=native HAVE_MARCH_NATIVE) ++ if(HAVE_MARCH_NATIVE) ++ set(NATIVEFLAG "-march=native") ++ else() ++ check_c_compiler_flag(-mcpu=native HAVE_MCPU_NATIVE) ++ if(HAVE_MCPU_NATIVE) ++ set(NATIVEFLAG "-mcpu=native") + endif() + endif() ++ # Fall through + endif() +- if(BASEARCH_X86_FOUND) +- if(NOT DEFINED UNALIGNED64_OK) +- set(UNALIGNED64_OK TRUE) +- endif() +- endif() +- if(UNALIGNED64_OK) +- add_definitions(-DUNALIGNED64_OK) +- message(STATUS "Architecture supports unaligned reads of > 4 bytes") ++ if(NATIVEFLAG) ++ # Apply flag to all source files and compilation checks ++ add_compile_options(${NATIVEFLAG}) ++ else() ++ message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration") ++ set(WITH_NATIVE_INSTRUCTIONS OFF) + endif() +-else() ++endif() ++ ++# Force disable LTO if WITH_NATIVE_INSTRUCTIONS is not active ++if(NOT WITH_NATIVE_INSTRUCTIONS) ++ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF) ++ foreach(_cfg_name IN LISTS CMAKE_CONFIGURATION_TYPES) ++ string(TOUPPER "${_cfg_name}" _cfg_name_uc) ++ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_${_cfg_name_uc} OFF) ++ endforeach() ++endif() ++ ++# Set architecture alignment requirements ++if(NOT WITH_UNALIGNED) ++ add_definitions(-DNO_UNALIGNED) + message(STATUS "Unaligned reads manually disabled") + endif() + + # Apply warning compiler flags + if(WITH_MAINTAINER_WARNINGS) +- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WARNFLAGS} ${WARNFLAGS_MAINTAINER} ${WARNFLAGS_DISABLE}") ++ add_compile_options(${WARNFLAGS} ${WARNFLAGS_MAINTAINER} ${WARNFLAGS_DISABLE}) + else() +- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WARNFLAGS} ${WARNFLAGS_DISABLE}") ++ add_compile_options(${WARNFLAGS} ${WARNFLAGS_DISABLE}) + endif() + + # Set code coverage compiler flags +@@ -329,15 +303,19 @@ if(WITH_CODE_COVERAGE) + add_code_coverage() + endif() + +-# Set native instruction set compiler flag +-if(WITH_NATIVE_INSTRUCTIONS AND DEFINED NATIVEFLAG) +- # Apply flag to all source files and compilation checks +- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${NATIVEFLAG}") ++# Replace optimization level 3 added by default with level 2 ++if(NOT WITH_CODE_COVERAGE AND NOT MSVC AND NOT CMAKE_C_FLAGS MATCHES "([\\/\\-]O)3") ++ string(REGEX REPLACE "([\\/\\-]O)3" "\\12" ++ CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}") + endif() + + # + # Check for standard/system includes + # ++check_include_file(arm_acle.h HAVE_ARM_ACLE_H) ++if(HAVE_ARM_ACLE_H) ++ add_definitions(-DHAVE_ARM_ACLE_H) ++endif() + check_include_file(sys/auxv.h HAVE_SYS_AUXV_H) + if(HAVE_SYS_AUXV_H) + add_definitions(-DHAVE_SYS_AUXV_H) +@@ -372,17 +350,26 @@ check_function_exists(fseeko HAVE_FSEEKO) + if(NOT HAVE_FSEEKO) + add_definitions(-DNO_FSEEKO) + endif() ++ + check_function_exists(strerror HAVE_STRERROR) + if(NOT HAVE_STRERROR) + add_definitions(-DNO_STRERROR) + endif() +-set(CMAKE_REQUIRED_DEFINITIONS -D _POSIX_C_SOURCE=200112L) +-check_function_exists(posix_memalign HAVE_POSIX_MEMALIGN) ++ ++set(CMAKE_REQUIRED_DEFINITIONS -D_POSIX_C_SOURCE=200112L) ++check_symbol_exists(posix_memalign stdlib.h HAVE_POSIX_MEMALIGN) + if(HAVE_POSIX_MEMALIGN) + add_definitions(-DHAVE_POSIX_MEMALIGN) + endif() + set(CMAKE_REQUIRED_DEFINITIONS) + ++set(CMAKE_REQUIRED_DEFINITIONS -D_ISOC11_SOURCE=1) ++check_symbol_exists(aligned_alloc stdlib.h HAVE_ALIGNED_ALLOC) ++if(HAVE_ALIGNED_ALLOC) ++ add_definitions(-DHAVE_ALIGNED_ALLOC) ++endif() ++set(CMAKE_REQUIRED_DEFINITIONS) ++ + if(WITH_SANITIZER STREQUAL "Address") + add_address_sanitizer() + elseif(WITH_SANITIZER STREQUAL "Memory") +@@ -407,7 +394,7 @@ check_c_source_compiles( + int main() { + return 0; + }" +- HAVE_ATTRIBUTE_VISIBILITY_HIDDEN FAIL_REGEX "not supported") ++ HAVE_ATTRIBUTE_VISIBILITY_HIDDEN FAIL_REGEX "visibility") + if(HAVE_ATTRIBUTE_VISIBILITY_HIDDEN) + add_definitions(-DHAVE_VISIBILITY_HIDDEN) + endif() +@@ -421,11 +408,25 @@ check_c_source_compiles( + int main() { + return 0; + }" +- HAVE_ATTRIBUTE_VISIBILITY_INTERNAL FAIL_REGEX "not supported") ++ HAVE_ATTRIBUTE_VISIBILITY_INTERNAL FAIL_REGEX "visibility") + if(HAVE_ATTRIBUTE_VISIBILITY_INTERNAL) + add_definitions(-DHAVE_VISIBILITY_INTERNAL) + endif() + ++# ++# Check for __attribute__((aligned(x))) support in the compiler ++# ++check_c_source_compiles( ++ "int main(void) { ++ __attribute__((aligned(8))) int test = 0; ++ (void)test; ++ return 0; ++ }" ++ HAVE_ATTRIBUTE_ALIGNED FAIL_REGEX "aligned") ++if(HAVE_ATTRIBUTE_ALIGNED) ++ add_definitions(-DHAVE_ATTRIBUTE_ALIGNED) ++endif() ++ + # + # check for __builtin_ctz() support in the compiler + # +@@ -441,6 +442,7 @@ check_c_source_compiles( + if(HAVE_BUILTIN_CTZ) + add_definitions(-DHAVE_BUILTIN_CTZ) + endif() ++ + # + # check for __builtin_ctzll() support in the compiler + # +@@ -484,17 +486,7 @@ if(NOT HAVE_PTRDIFF_T) + endif() + endif() + +-# Macro to check if source compiles +-# (and, when compiling very natively, also runs). +-macro(check_c_source_compile_or_run source flag) +- if(CMAKE_CROSSCOMPILING OR NOT WITH_NATIVE_INSTRUCTIONS) +- check_c_source_compiles("${source}" ${flag}) +- else() +- check_c_source_runs("${source}" ${flag}) +- endif() +-endmacro() +- +-set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DZLIB_DEBUG") ++add_compile_options($<$:-DZLIB_DEBUG>) + + if(MSVC) + set(CMAKE_DEBUG_POSTFIX "d") +@@ -502,119 +494,7 @@ if(MSVC) + add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE) + endif() + +-if(BASEARCH_PPC_FOUND) +- # Check if we have what we need for POWER8 optimizations +- set(CMAKE_REQUIRED_FLAGS "${POWER8FLAG}") +- check_c_source_compiles( +- "#include +- int main() { +- return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); +- }" +- HAVE_POWER8 +- ) +- set(CMAKE_REQUIRED_FLAGS) +-elseif(BASEARCH_X86_FOUND) +- # Check whether compiler supports SSE2 instrinics +- set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG}") +- check_c_source_compile_or_run( +- "#include +- int main(void) { +- __m128i zero = _mm_setzero_si128(); +- (void)zero; +- return 0; +- }" +- HAVE_SSE2_INTRIN +- ) +- # Check whether compiler supports SSSE3 intrinsics +- set(CMAKE_REQUIRED_FLAGS "${SSSE3FLAG}") +- check_c_source_compile_or_run( +- "#include +- int main(void) { +- __m128i u, v, w; +- u = _mm_set1_epi32(1); +- v = _mm_set1_epi32(2); +- w = _mm_hadd_epi32(u, v); +- (void)w; +- return 0; +- }" +- HAVE_SSSE3_INTRIN +- ) +- # Check whether compiler supports SSE4 CRC inline asm +- set(CMAKE_REQUIRED_FLAGS "${SSE4FLAG}") +- check_c_source_compile_or_run( +- "int main(void) { +- unsigned val = 0, h = 0; +- #if defined(_MSC_VER) +- { __asm mov edx, h __asm mov eax, val __asm crc32 eax, edx __asm mov val, eax } +- #else +- __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) ); +- #endif +- return (int)h; +- }" +- HAVE_SSE42CRC_INLINE_ASM +- ) +- # Check whether compiler supports SSE4 CRC intrinsics +- check_c_source_compile_or_run( +- "#include +- int main(void) { +- unsigned crc = 0; +- char c = 'c'; +- #if defined(_MSC_VER) +- crc = _mm_crc32_u32(crc, c); +- #else +- crc = __builtin_ia32_crc32qi(crc, c); +- #endif +- (void)crc; +- return 0; +- }" +- HAVE_SSE42CRC_INTRIN +- ) +- # Check whether compiler supports SSE4.2 compare string instrinics +- check_c_source_compile_or_run( +- "#include +- int main(void) { +- unsigned char a[64] = { 0 }; +- unsigned char b[64] = { 0 }; +- __m128i xmm_src0, xmm_src1; +- xmm_src0 = _mm_loadu_si128((__m128i *)(char *)a); +- xmm_src1 = _mm_loadu_si128((__m128i *)(char *)b); +- return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0); +- }" +- HAVE_SSE42CMPSTR_INTRIN +- ) +- # Check whether compiler supports PCLMULQDQ intrinsics +- set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG}") +- if(NOT (APPLE AND "${ARCH}" MATCHES "i386")) +- # The pclmul code currently crashes on Mac in 32bit mode. Avoid for now. +- check_c_source_compile_or_run( +- "#include +- int main(void) { +- __m128i a = _mm_setzero_si128(); +- __m128i b = _mm_setzero_si128(); +- __m128i c = _mm_clmulepi64_si128(a, b, 0x10); +- (void)c; +- return 0; +- }" +- HAVE_PCLMULQDQ_INTRIN +- ) +- else() +- set(HAVE_PCLMULQDQ_INTRIN NO) +- endif() +- # Check whether compiler supports AVX2 intrinics +- set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG}") +- check_c_source_compile_or_run( +- "#include +- int main(void) { +- __m256i x = _mm256_set1_epi16(2); +- const __m256i y = _mm256_set1_epi16(1); +- x = _mm256_subs_epu16(x, y); +- (void)x; +- return 0; +- }" +- HAVE_AVX2_INTRIN +- ) +- set(CMAKE_REQUIRED_FLAGS) +- ++if(BASEARCH_X86_FOUND) + # FORCE_SSE2 option will only be shown if HAVE_SSE2_INTRIN is true + if("${ARCH}" MATCHES "i[3-6]86") + cmake_dependent_option(FORCE_SSE2 "Always assume CPU is SSE2 capable" OFF "HAVE_SSE2_INTRIN" OFF) +@@ -644,6 +524,13 @@ if(WITH_INFLATE_ALLOW_INVALID_DIST) + add_definitions(-DINFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR) + message(STATUS "Inflate zero data for invalid distances enabled") + endif() ++# ++# Enable reduced memory configuration ++# ++if(WITH_REDUCED_MEM) ++ add_definitions(-DHASH_SIZE=32768u -DGZBUFSIZE=8192) ++ message(STATUS "Configured for reduced memory environment") ++endif() + + + set(ZLIB_ARCH_SRCS) +@@ -653,6 +540,8 @@ if(BASEARCH_ARM_FOUND) + set(ARCHDIR "arch/arm") + elseif(BASEARCH_PPC_FOUND) + set(ARCHDIR "arch/power") ++elseif(BASEARCH_RISCV_FOUND) ++ set(ARCHDIR "arch/riscv") + elseif(BASEARCH_S360_FOUND) + set(ARCHDIR "arch/s390") + elseif(BASEARCH_X86_FOUND) +@@ -668,7 +557,20 @@ if(WITH_OPTIM) + if(BASEARCH_ARM_FOUND) + add_definitions(-DARM_FEATURES) + if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") +- if(NOT "${ARCH}" MATCHES "aarch64") ++ if("${ARCH}" MATCHES "aarch64") ++ check_c_source_compiles( ++ "#include ++ int main() { ++ return (getauxval(AT_HWCAP) & HWCAP_CRC32); ++ }" ++ ARM_AUXV_HAS_CRC32 ++ ) ++ if(ARM_AUXV_HAS_CRC32) ++ add_definitions(-DARM_AUXV_HAS_CRC32) ++ else() ++ message(STATUS "HWCAP_CRC32 not present in sys/auxv.h; cannot detect support at runtime.") ++ endif() ++ else() + check_c_source_compiles( + "#include + int main() { +@@ -693,21 +595,6 @@ if(WITH_OPTIM) + message(STATUS "HWCAP2_CRC32 not present in sys/auxv.h; cannot detect support at runtime.") + endif() + endif() +- else() +- check_c_source_compiles( +- "#include +- int main() { +- return (getauxval(AT_HWCAP) & HWCAP_CRC32); +- }" +- ARM_AUXV_HAS_CRC32 +- ) +- if(ARM_AUXV_HAS_CRC32) +- add_definitions(-DARM_AUXV_HAS_CRC32) +- else() +- message(STATUS "HWCAP_CRC32 not present in sys/auxv.h; cannot detect support at runtime.") +- endif() +- endif() +- if(NOT "${ARCH}" MATCHES "aarch64") + check_c_source_compiles( + "#include + int main() { +@@ -733,42 +620,143 @@ if(WITH_OPTIM) + endif() + endif() + endif() +- list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm.h) +- list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/armfeature.c) +- if(WITH_ACLE AND NOT MSVC AND NOT "${ARCH}" MATCHES "armv[2-7]") +- add_definitions(-DARM_ACLE_CRC_HASH) +- set(ACLE_SRCS ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c) +- set_property(SOURCE ${ACLE_SRCS} PROPERTY COMPILE_FLAGS "${ACLEFLAG} ${NOLTOFLAG}") +- list(APPEND ZLIB_ARCH_SRCS ${ACLE_SRCS}) +- add_feature_info(ACLE_CRC 1 "Support ACLE optimized CRC hash generation, using \"${ACLEFLAG}\"") ++ list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h) ++ list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/arm_features.c) ++ if(WITH_ACLE) ++ check_acle_compiler_flag() ++ if(HAVE_ACLE_FLAG) ++ add_definitions(-DARM_ACLE) ++ set(ACLE_SRCS ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c) ++ set_property(SOURCE ${ACLE_SRCS} PROPERTY COMPILE_FLAGS "${ACLEFLAG} ${NOLTOFLAG}") ++ list(APPEND ZLIB_ARCH_SRCS ${ACLE_SRCS}) ++ add_feature_info(ACLE_CRC 1 "Support ACLE optimized CRC hash generation, using \"${ACLEFLAG}\"") ++ else() ++ set(WITH_ACLE OFF) ++ endif() ++ else() ++ set(WITH_ACLE OFF) + endif() + if(WITH_NEON) +- add_definitions(-DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH) +- set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c ${ARCHDIR}/slide_neon.c) +- list(APPEND ZLIB_ARCH_SRCS ${NEON_SRCS}) +- set_property(SOURCE ${NEON_SRCS} PROPERTY COMPILE_FLAGS "${NEONFLAG} ${NOLTOFLAG}") +- if(MSVC) +- add_definitions(-D__ARM_NEON__) ++ check_neon_compiler_flag() ++ if(NEON_AVAILABLE) ++ add_definitions(-DARM_NEON) ++ set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c ++ ${ARCHDIR}/compare256_neon.c ${ARCHDIR}/slide_hash_neon.c) ++ list(APPEND ZLIB_ARCH_SRCS ${NEON_SRCS}) ++ set_property(SOURCE ${NEON_SRCS} PROPERTY COMPILE_FLAGS "${NEONFLAG} ${NOLTOFLAG}") ++ if(MSVC) ++ add_definitions(-D__ARM_NEON__) ++ endif() ++ add_feature_info(NEON_ADLER32 1 "Support NEON instructions in adler32, using \"${NEONFLAG}\"") ++ add_feature_info(NEON_SLIDEHASH 1 "Support NEON instructions in slide_hash, using \"${NEONFLAG}\"") ++ check_neon_ld4_intrinsics() ++ if(NEON_HAS_LD4) ++ add_definitions(-DARM_NEON_HASLD4) ++ endif() ++ else() ++ set(WITH_NEON OFF) ++ endif() ++ endif() ++ if(WITH_ARMV6) ++ check_armv6_compiler_flag() ++ if(HAVE_ARMV6_INLINE_ASM OR HAVE_ARMV6_INTRIN) ++ add_definitions(-DARM_SIMD) ++ set(ARMV6_SRCS ${ARCHDIR}/slide_hash_armv6.c) ++ set_property(SOURCE ${ARMV6_SRCS} PROPERTY COMPILE_FLAGS "${ARMV6FLAG} ${NOLTOFLAG}") ++ list(APPEND ZLIB_ARCH_SRCS ${ARMV6_SRCS}) ++ add_feature_info(ARMV6 1 "Support ARMv6 SIMD instructions in slide_hash, using \"${ARMV6FLAG}\"") ++ if(HAVE_ARMV6_INTRIN) ++ add_definitions(-DARM_SIMD_INTRIN) ++ endif() ++ else() ++ set(WITH_ARMV6 OFF) + endif() +- add_feature_info(NEON_ADLER32 1 "Support NEON instructions in adler32, using \"${NEONFLAG}\"") +- add_feature_info(NEON_SLIDEHASH 1 "Support NEON instructions in slide_hash, using \"${NEONFLAG}\"") ++ else() ++ set(WITH_ARMV6 OFF) + endif() + elseif(BASEARCH_PPC_FOUND) +- if(WITH_POWER8 AND HAVE_POWER8) +- add_definitions(-DPOWER8) ++ # Common arch detection code ++ if(WITH_ALTIVEC) ++ check_ppc_intrinsics() ++ endif() ++ if(WITH_POWER8) ++ check_power8_intrinsics() ++ endif() ++ if(WITH_POWER9) ++ check_power9_intrinsics() ++ endif() ++ if(HAVE_VMX OR HAVE_POWER8_INTRIN OR HAVE_POWER9_INTRIN) + add_definitions(-DPOWER_FEATURES) +- add_definitions(-DPOWER8_VSX_ADLER32) +- add_definitions(-DPOWER8_VSX_SLIDEHASH) +- list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power.h) +- list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power.c) +- set(POWER8_SRCS ${ARCHDIR}/adler32_power8.c ${ARCHDIR}/slide_hash_power8.c) +- list(APPEND ZLIB_ARCH_SRCS ${POWER8_SRCS}) +- set_property(SOURCE ${POWER8_SRCS} PROPERTY COMPILE_FLAGS "${POWER8FLAG} ${NOLTOFLAG}") ++ list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h) ++ list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power_features.c) ++ endif() ++ # VMX specific options and files ++ if(WITH_ALTIVEC) ++ if(HAVE_VMX) ++ add_definitions(-DPPC_FEATURES) ++ if(HAVE_ALTIVEC) ++ add_definitions(-DPPC_VMX) ++ set(PPC_SRCS ${ARCHDIR}/adler32_vmx.c ${ARCHDIR}/slide_hash_vmx.c) ++ list(APPEND ZLIB_ARCH_SRCS ${PPC_SRCS}) ++ add_feature_info(ALTIVEC 1 "Support the AltiVec instruction set, using \"-maltivec\"") ++ set_property(SOURCE ${PPC_SRCS} PROPERTY COMPILE_FLAGS "${PPCFLAGS}") ++ else() ++ set(WITH_ALTIVEC OFF) ++ endif() ++ endif() ++ endif() ++ # Power8 specific options and files ++ if(WITH_POWER8) ++ if(HAVE_POWER8_INTRIN) ++ add_definitions(-DPOWER8_VSX) ++ set(POWER8_SRCS ${ARCHDIR}/adler32_power8.c ${ARCHDIR}/chunkset_power8.c ${ARCHDIR}/slide_hash_power8.c) ++ if("${ARCH}" MATCHES "powerpc64(le)?") ++ add_definitions(-DPOWER8_VSX_CRC32) ++ list(APPEND POWER8_SRCS ${ARCHDIR}/crc32_power8.c) ++ endif() ++ list(APPEND ZLIB_ARCH_SRCS ${POWER8_SRCS}) ++ set_property(SOURCE ${POWER8_SRCS} PROPERTY COMPILE_FLAGS "${POWER8FLAG} ${NOLTOFLAG}") ++ else() ++ set(WITH_POWER8 OFF) ++ endif() ++ endif() ++ # Power9 specific options and files ++ if(WITH_POWER9) ++ if(HAVE_POWER9_INTRIN) ++ add_definitions(-DPOWER9) ++ set(POWER9_SRCS ${ARCHDIR}/compare256_power9.c) ++ list(APPEND ZLIB_ARCH_SRCS ${POWER9_SRCS}) ++ set_property(SOURCE ${POWER9_SRCS} PROPERTY COMPILE_FLAGS "${POWER9FLAG} ${NOLTOFLAG}") ++ else() ++ set(WITH_POWER9 OFF) ++ endif() ++ endif() ++ elseif(BASEARCH_RISCV_FOUND) ++ if(WITH_RVV) ++ check_rvv_intrinsics() ++ if(HAVE_RVV_INTRIN) ++ add_definitions(-DRISCV_FEATURES) ++ add_definitions(-DRISCV_RVV) ++ list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_features.h) ++ list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c) ++ # FIXME: we will not set compile flags for riscv_features.c when ++ # the kernels update hwcap or hwprobe for riscv ++ set(RVV_SRCS ${ARCHDIR}/riscv_features.c ${ARCHDIR}/adler32_rvv.c ${ARCHDIR}/chunkset_rvv.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c) ++ list(APPEND ZLIB_ARCH_SRCS ${RVV_SRCS}) ++ set_property(SOURCE ${RVV_SRCS} PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}") ++ else() ++ set(WITH_RVV OFF) ++ endif() + endif() + elseif(BASEARCH_S360_FOUND) ++ check_s390_intrinsics() ++ if(HAVE_S390_INTRIN) ++ add_definitions(-DS390_FEATURES) ++ list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_features.h) ++ list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/s390_features.c) ++ endif() + if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/dfltcc_common.c) +- add_definitions(-DGZBUFSIZE=262144) + endif() + if(WITH_DFLTCC_DEFLATE) + add_definitions(-DS390_DFLTCC_DEFLATE) +@@ -778,71 +766,150 @@ if(WITH_OPTIM) + add_definitions(-DS390_DFLTCC_INFLATE) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/dfltcc_inflate.c) + endif() ++ if(WITH_CRC32_VX) ++ check_vgfma_intrinsics() ++ if(HAVE_VGFMA_INTRIN) ++ add_definitions(-DS390_CRC32_VX) ++ set(CRC32_VX_SRCS ${ARCHDIR}/crc32-vx.c) ++ list(APPEND ZLIB_ARCH_SRCS ${CRC32_VX_SRCS}) ++ set_property(SOURCE ${CRC32_VX_SRCS} PROPERTY COMPILE_FLAGS "${VGFMAFLAG} ${NOLTOFLAG}") ++ else() ++ set(WITH_CRC32_VX OFF) ++ endif() ++ endif() + elseif(BASEARCH_X86_FOUND) + add_definitions(-DX86_FEATURES) +- list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86.h) +- list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86.c) ++ list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h) ++ list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c) + if(MSVC) + list(APPEND ZLIB_ARCH_HDRS fallback_builtins.h) + endif() +- if(WITH_AVX2 AND HAVE_AVX2_INTRIN) +- add_definitions(-DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET) +- set(AVX2_SRCS ${ARCHDIR}/slide_avx.c) +- add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"") +- list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx.c) +- add_feature_info(AVX_CHUNKSET 1 "Support AVX optimized chunkset, using \"${AVX2FLAG}\"") +- list(APPEND AVX2_SRCS ${ARCHDIR}/compare258_avx.c) +- add_feature_info(AVX2_COMPARE258 1 "Support AVX2 optimized compare258, using \"${AVX2FLAG}\"") +- list(APPEND AVX2_SRCS ${ARCHDIR}/adler32_avx.c) +- add_feature_info(AVX2_ADLER32 1 "Support AVX2-accelerated adler32, using \"${AVX2FLAG}\"") +- list(APPEND ZLIB_ARCH_SRCS ${AVX2_SRCS}) +- set_property(SOURCE ${AVX2_SRCS} PROPERTY COMPILE_FLAGS "${AVX2FLAG} ${NOLTOFLAG}") ++ if(WITH_AVX2) ++ check_avx2_intrinsics() ++ if(HAVE_AVX2_INTRIN) ++ add_definitions(-DX86_AVX2) ++ set(AVX2_SRCS ${ARCHDIR}/slide_hash_avx2.c) ++ add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"") ++ list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx2.c) ++ add_feature_info(AVX2_CHUNKSET 1 "Support AVX2 optimized chunkset, using \"${AVX2FLAG}\"") ++ list(APPEND AVX2_SRCS ${ARCHDIR}/compare256_avx2.c) ++ add_feature_info(AVX2_COMPARE256 1 "Support AVX2 optimized compare256, using \"${AVX2FLAG}\"") ++ list(APPEND AVX2_SRCS ${ARCHDIR}/adler32_avx2.c) ++ add_feature_info(AVX2_ADLER32 1 "Support AVX2-accelerated adler32, using \"${AVX2FLAG}\"") ++ list(APPEND ZLIB_ARCH_SRCS ${AVX2_SRCS}) ++ set_property(SOURCE ${AVX2_SRCS} PROPERTY COMPILE_FLAGS "${AVX2FLAG} ${NOLTOFLAG}") ++ else() ++ set(WITH_AVX2 OFF) ++ endif() + endif() +- if(WITH_SSE4 AND (HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN)) +- add_definitions(-DX86_SSE42_CRC_HASH) +- set(SSE42_SRCS ${ARCHDIR}/insert_string_sse.c) +- add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE4FLAG}\"") +- list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS}) +- set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE4FLAG} ${NOLTOFLAG}") +- if(HAVE_SSE42CRC_INTRIN) +- add_definitions(-DX86_SSE42_CRC_INTRIN) ++ if(WITH_AVX512) ++ check_avx512_intrinsics() ++ if(HAVE_AVX512_INTRIN) ++ add_definitions(-DX86_AVX512) ++ list(APPEND AVX512_SRCS ${ARCHDIR}/adler32_avx512.c) ++ add_feature_info(AVX512_ADLER32 1 "Support AVX512-accelerated adler32, using \"${AVX512FLAG}\"") ++ list(APPEND ZLIB_ARCH_SRCS ${AVX512_SRCS}) ++ list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/adler32_avx512_p.h) ++ if(HAVE_MASK_INTRIN) ++ add_definitions(-DX86_MASK_INTRIN) ++ endif() ++ set_property(SOURCE ${AVX512_SRCS} PROPERTY COMPILE_FLAGS "${AVX512FLAG} ${NOLTOFLAG}") ++ else() ++ set(WITH_AVX512 OFF) ++ endif() ++ endif() ++ if(WITH_AVX512VNNI) ++ check_avx512vnni_intrinsics() ++ if(HAVE_AVX512VNNI_INTRIN) ++ add_definitions(-DX86_AVX512VNNI) ++ add_feature_info(AVX512VNNI_ADLER32 1 "Support AVX512VNNI adler32, using \"${AVX512VNNIFLAG}\"") ++ list(APPEND AVX512VNNI_SRCS ${ARCHDIR}/adler32_avx512_vnni.c) ++ list(APPEND ZLIB_ARCH_SRCS ${AVX512VNNI_SRCS}) ++ set_property(SOURCE ${AVX512VNNI_SRCS} PROPERTY COMPILE_FLAGS "${AVX512VNNIFLAG} ${NOLTOFLAG}") ++ else() ++ set(WITH_AVX512VNNI OFF) + endif() + endif() +- if(WITH_SSE4 AND HAVE_SSE42CMPSTR_INTRIN) +- add_definitions(-DX86_SSE42_CMP_STR) +- set(SSE42_SRCS ${ARCHDIR}/compare258_sse.c) +- add_feature_info(SSE42_COMPARE258 1 "Support SSE4.2 optimized compare258, using \"${SSE4FLAG}\"") +- list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS}) +- set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE4FLAG} ${NOLTOFLAG}") ++ if(WITH_SSE42) ++ check_sse42_intrinsics() ++ if(HAVE_SSE42_INTRIN) ++ add_definitions(-DX86_SSE42) ++ set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c) ++ add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE42FLAG}\"") ++ list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS}) ++ set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}") ++ else() ++ set(WITH_SSE42 OFF) ++ endif() + endif() +- if(WITH_SSE2 AND HAVE_SSE2_INTRIN) +- add_definitions(-DX86_SSE2 -DX86_SSE2_CHUNKSET -DX86_SSE2_SLIDEHASH) +- set(SSE2_SRCS ${ARCHDIR}/chunkset_sse.c ${ARCHDIR}/slide_sse.c) +- list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS}) +- if(NOT ${ARCH} MATCHES "x86_64") +- set_property(SOURCE ${SSE2_SRCS} PROPERTY COMPILE_FLAGS "${SSE2FLAG} ${NOLTOFLAG}") +- add_feature_info(FORCE_SSE2 FORCE_SSE2 "Assume CPU is SSE2 capable") +- if(FORCE_SSE2) +- add_definitions(-DX86_NOCHECK_SSE2) ++ if(WITH_SSE2) ++ check_sse2_intrinsics() ++ if(HAVE_SSE2_INTRIN) ++ add_definitions(-DX86_SSE2) ++ set(SSE2_SRCS ${ARCHDIR}/chunkset_sse2.c ${ARCHDIR}/compare256_sse2.c ${ARCHDIR}/slide_hash_sse2.c) ++ list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS}) ++ if(NOT ${ARCH} MATCHES "x86_64") ++ set_property(SOURCE ${SSE2_SRCS} PROPERTY COMPILE_FLAGS "${SSE2FLAG} ${NOLTOFLAG}") ++ add_feature_info(FORCE_SSE2 FORCE_SSE2 "Assume CPU is SSE2 capable") ++ if(FORCE_SSE2) ++ add_definitions(-DX86_NOCHECK_SSE2) ++ endif() + endif() ++ else() ++ set(WITH_SSE2 OFF) + endif() + endif() +- if(WITH_SSSE3 AND HAVE_SSSE3_INTRIN) +- add_definitions(-DX86_SSSE3 -DX86_SSSE3_ADLER32) +- set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c) +- add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"") +- list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS}) +- set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}") ++ if(WITH_SSSE3) ++ check_ssse3_intrinsics() ++ if(HAVE_SSSE3_INTRIN) ++ add_definitions(-DX86_SSSE3) ++ set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c ${ARCHDIR}/chunkset_ssse3.c) ++ add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"") ++ list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS}) ++ set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}") ++ else() ++ set(WITH_SSSE3 OFF) ++ endif() + endif() +- if(WITH_PCLMULQDQ AND HAVE_PCLMULQDQ_INTRIN AND WITH_SSSE3 AND WITH_SSE4) +- add_definitions(-DX86_PCLMULQDQ_CRC) +- set(PCLMULQDQ_SRCS ${ARCHDIR}/crc_folding.c) +- add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSSE3FLAG} ${SSE4FLAG} ${PCLMULFLAG}\"") +- list(APPEND ZLIB_ARCH_SRCS ${PCLMULQDQ_SRCS}) +- set_property(SOURCE ${PCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE4FLAG} ${PCLMULFLAG} ${NOLTOFLAG}") ++ if(WITH_PCLMULQDQ AND WITH_SSSE3 AND WITH_SSE42) ++ check_pclmulqdq_intrinsics() ++ if(HAVE_PCLMULQDQ_INTRIN AND HAVE_SSSE3_INTRIN) ++ add_definitions(-DX86_PCLMULQDQ_CRC) ++ set(PCLMULQDQ_SRCS ${ARCHDIR}/crc32_pclmulqdq.c) ++ add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG}\"") ++ list(APPEND ZLIB_ARCH_SRCS ${PCLMULQDQ_SRCS}) ++ set_property(SOURCE ${PCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG} ${NOLTOFLAG}") ++ ++ if(WITH_VPCLMULQDQ AND WITH_AVX512) ++ check_vpclmulqdq_intrinsics() ++ if(HAVE_VPCLMULQDQ_INTRIN AND HAVE_AVX512_INTRIN) ++ add_definitions(-DX86_VPCLMULQDQ_CRC) ++ set(VPCLMULQDQ_SRCS ${ARCHDIR}/crc32_vpclmulqdq.c) ++ add_feature_info(VPCLMUL_CRC 1 "Support CRC hash generation using VPCLMULQDQ, using \"${VPCLMULFLAG} ${AVX512FLAG}\"") ++ list(APPEND ZLIB_ARCH_SRCS ${VPCLMULQDQ_SRCS}) ++ set_property(SOURCE ${VPCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG} ${NOLTOFLAG}") ++ else() ++ set(WITH_VPCLMULQDQ OFF) ++ endif() ++ else() ++ set(WITH_VPCLMULQDQ OFF) ++ endif() ++ else() ++ set(WITH_PCLMULQDQ OFF) ++ set(WITH_VPCLMULQDQ OFF) ++ endif() ++ else() ++ set(WITH_PCLMULQDQ OFF) ++ set(WITH_VPCLMULQDQ OFF) ++ endif() ++ check_xsave_intrinsics() ++ if(HAVE_XSAVE_INTRIN) ++ add_feature_info(XSAVE 1 "Support XSAVE intrinsics using \"${XSAVEFLAG}\"") ++ set_property(SOURCE ${ARCHDIR}/x86_features.c PROPERTY COMPILE_FLAGS "${XSAVEFLAG}") + endif() + endif() + endif() ++ + message(STATUS "Architecture-specific source files: ${ZLIB_ARCH_SRCS}") + + #============================================================================ +@@ -886,17 +953,18 @@ if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR) + endif() + endif() + +-# Refer to prefix symbolically to ease relocation by end user, +-# as Makefile-generated .pc file does. +-if(INC_INSTALL_DIR STREQUAL "${CMAKE_INSTALL_PREFIX}/include") +- set(PC_INC_INSTALL_DIR "\${prefix}/include") ++# The user is allowed (but discouraged) to set absolute CMAKE_INSTALL_*DIR paths. ++# If they do, we copy these non-relocatable paths into the pkg-config file. ++if(IS_ABSOLUTE "${CMAKE_INSTALL_INCLUDEDIR}") ++ set(PC_INC_INSTALL_DIR "${CMAKE_INSTALL_INCLUDEDIR}") + else() +- set(PC_INC_INSTALL_DIR "${INC_INSTALL_DIR}") ++ set(PC_INC_INSTALL_DIR "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}") + endif() +-if(LIB_INSTALL_DIR STREQUAL "${CMAKE_INSTALL_PREFIX}/lib") +- set(PC_LIB_INSTALL_DIR "\${exec_prefix}/lib") ++ ++if(IS_ABSOLUTE "${CMAKE_INSTALL_LIBDIR}") ++ set(PC_LIB_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}") + else() +- set(PC_LIB_INSTALL_DIR "${LIB_INSTALL_DIR}") ++ set(PC_LIB_INSTALL_DIR "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}") + endif() + + #============================================================================ +@@ -905,18 +973,22 @@ endif() + + set(ZLIB_PUBLIC_HDRS + ${CMAKE_CURRENT_BINARY_DIR}/zconf${SUFFIX}.h +- zlib${SUFFIX}.h ++ ${CMAKE_CURRENT_BINARY_DIR}/zlib_name_mangling${SUFFIX}.h ++ ${CMAKE_CURRENT_BINARY_DIR}/zlib${SUFFIX}.h + ) + set(ZLIB_PRIVATE_HDRS + adler32_p.h + chunkset_tpl.h +- crc32_p.h +- crc32_tbl.h +- crc32_comb_tbl.h ++ compare256_rle.h ++ cpu_features.h ++ crc32_braid_p.h ++ crc32_braid_comb_p.h ++ crc32_braid_tbl.h ++ crc32_fold.h + deflate.h + deflate_p.h + functable.h +- inffast.h ++ inffast_tpl.h + inffixed_tbl.h + inflate.h + inflate_p.h +@@ -932,22 +1004,29 @@ set(ZLIB_PRIVATE_HDRS + ) + set(ZLIB_SRCS + adler32.c ++ adler32_fold.c + chunkset.c +- compare258.c ++ compare256.c + compress.c +- crc32.c +- crc32_comb.c ++ cpu_features.c ++ crc32_braid.c ++ crc32_braid_comb.c ++ crc32_fold.c + deflate.c + deflate_fast.c ++ deflate_huff.c + deflate_medium.c + deflate_quick.c ++ deflate_rle.c + deflate_slow.c ++ deflate_stored.c + functable.c + infback.c +- inffast.c + inflate.c + inftrees.c + insert_string.c ++ insert_string_roll.c ++ slide_hash.c + trees.c + uncompr.c + zutil.c +@@ -958,47 +1037,30 @@ set(ZLIB_GZFILE_PRIVATE_HDRS + ) + set(ZLIB_GZFILE_SRCS + gzlib.c +- gzread.c ++ ${CMAKE_CURRENT_BINARY_DIR}/gzread.c + gzwrite.c + ) + +-if(NOT MINGW AND NOT MSYS AND NOT CYGWIN) +- set(ZLIB_DLL_SRCS +- win32/zlib${SUFFIX}1.rc # If present will override custom build rule below. +- ) +-endif() +- +-if(MINGW OR MSYS OR CYGWIN) +- # This gets us DLL resource information when compiling on MinGW. +- if(NOT CMAKE_RC_COMPILER) +- set(CMAKE_RC_COMPILER windres.exe) +- endif() +- +- add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/zlib1rc.obj +- COMMAND ${CMAKE_RC_COMPILER} +- -D GCC_WINDRES +- -I ${CMAKE_CURRENT_SOURCE_DIR} +- -I ${CMAKE_CURRENT_BINARY_DIR} +- -o ${CMAKE_CURRENT_BINARY_DIR}/zlib1rc.obj +- -i ${CMAKE_CURRENT_SOURCE_DIR}/win32/zlib${SUFFIX}1.rc) +- set(ZLIB_DLL_SRCS ${CMAKE_CURRENT_BINARY_DIR}/zlib1rc.obj) +-endif() +- +-set(ZLIB_ALL_SRCS ${ZLIB_SRCS} ${ZLIB_ARCH_HDRS} ${ZLIB_ARCH_SRCS} ${ZLIB_DLL_SRCS} +- ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS}) ++set(ZLIB_ALL_SRCS ${ZLIB_SRCS} ${ZLIB_ARCH_HDRS} ${ZLIB_ARCH_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS}) + if(WITH_GZFILEOP) + list(APPEND ZLIB_ALL_SRCS ${ZLIB_GZFILE_PRIVATE_HDRS} ${ZLIB_GZFILE_SRCS}) + endif() + ++if(NOT DEFINED BUILD_SHARED_LIBS OR BUILD_SHARED_LIBS) ++ set(ZLIB_DLL_SRCS win32/zlib${SUFFIX}1.rc) ++endif() ++ + if(NOT DEFINED BUILD_SHARED_LIBS) +- add_library(zlib SHARED ${ZLIB_ALL_SRCS}) ++ add_library(zlib SHARED ${ZLIB_ALL_SRCS} ${ZLIB_DLL_SRCS}) + add_library(zlibstatic STATIC ${ZLIB_ALL_SRCS}) + + set(ZLIB_INSTALL_LIBRARIES zlib zlibstatic) + else() + add_library(zlib ${ZLIB_ALL_SRCS}) + +- if(NOT BUILD_SHARED_LIBS) ++ if(BUILD_SHARED_LIBS) ++ target_sources(zlib PRIVATE ${ZLIB_DLL_SRCS}) ++ else() + add_library(zlibstatic ALIAS zlib) + endif() + +@@ -1006,9 +1068,12 @@ else() + endif() + + foreach(ZLIB_INSTALL_LIBRARY ${ZLIB_INSTALL_LIBRARIES}) ++ if(NOT ZLIB_COMPAT) ++ target_compile_definitions(${ZLIB_INSTALL_LIBRARY} PUBLIC ZLIBNG_NATIVE_API) ++ endif() + target_include_directories(${ZLIB_INSTALL_LIBRARY} PUBLIC + "$" +- "$") ++ "$") + endforeach() + + if(WIN32) +@@ -1060,14 +1125,14 @@ if(NOT DEFINED BUILD_SHARED_LIBS OR BUILD_SHARED_LIBS) + set_target_properties(zlib PROPERTIES COMPILE_FLAGS "-fno-semantic-interposition") + endif() + if(NOT APPLE) ++ if(NOT ZLIB_COMPAT) ++ add_definitions(-DHAVE_SYMVER) ++ endif() + set_target_properties(zlib PROPERTIES LINK_FLAGS + "-Wl,--version-script,\"${CMAKE_CURRENT_SOURCE_DIR}/zlib${SUFFIX}.map\"") +- else() +- # Match configure/make's behavior (i.e. don't use @rpath on mac). +- set_target_properties(zlib PROPERTIES INSTALL_NAME_DIR "${LIB_INSTALL_DIR}") + endif() + endif() +- if(MSYS OR CYGWIN) ++ if(MSYS) + # Suppress version number from shared library name + set(CMAKE_SHARED_LIBRARY_NAME_WITH_VERSION 0) + elseif(WIN32) +@@ -1092,308 +1157,147 @@ else() + endif() + + set(ZLIB_PC ${CMAKE_CURRENT_BINARY_DIR}/zlib${SUFFIX}.pc) ++if(WITH_GZFILEOP) ++ set(PKG_CONFIG_CFLAGS "-DWITH_GZFILEOP") ++endif() + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/zlib.pc.cmakein + ${ZLIB_PC} @ONLY) + configure_file(${CMAKE_CURRENT_BINARY_DIR}/zconf${SUFFIX}.h.cmakein + ${CMAKE_CURRENT_BINARY_DIR}/zconf${SUFFIX}.h @ONLY) ++configure_file(${CMAKE_CURRENT_SOURCE_DIR}/zlib${SUFFIX}.h.in ++ ${CMAKE_CURRENT_BINARY_DIR}/zlib${SUFFIX}.h @ONLY) ++configure_file(${CMAKE_CURRENT_SOURCE_DIR}/gzread.c.in ++ ${CMAKE_CURRENT_BINARY_DIR}/gzread.c @ONLY) ++ ++if (NOT ZLIB_SYMBOL_PREFIX STREQUAL "") ++ add_feature_info(ZLIB_SYMBOL_PREFIX ON "Publicly exported symbols have a custom prefix") ++ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/zlib_name_mangling${SUFFIX}.h.in ++ ${CMAKE_CURRENT_BINARY_DIR}/zlib_name_mangling${SUFFIX}.h @ONLY) ++else() ++ add_feature_info(ZLIB_SYMBOL_PREFIX OFF "Publicly exported symbols DO NOT have a custom prefix") ++ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/zlib_name_mangling.h.empty ++ ${CMAKE_CURRENT_BINARY_DIR}/zlib_name_mangling${SUFFIX}.h COPYONLY) ++endif() ++# add_definitions(-DZLIB_SYMBOL_PREFIX=${ZLIB_SYMBOL_PREFIX}) # not needed ++ + + if(NOT SKIP_INSTALL_LIBRARIES AND NOT SKIP_INSTALL_ALL) + install(TARGETS ${ZLIB_INSTALL_LIBRARIES} +- RUNTIME DESTINATION "${BIN_INSTALL_DIR}" +- ARCHIVE DESTINATION "${LIB_INSTALL_DIR}" +- LIBRARY DESTINATION "${LIB_INSTALL_DIR}") ++ RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" ++ ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" ++ LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}") + endif() + if(NOT SKIP_INSTALL_HEADERS AND NOT SKIP_INSTALL_ALL) +- install(FILES zlib${SUFFIX}.h +- DESTINATION "${INC_INSTALL_DIR}" RENAME zlib${SUFFIX}.h) ++ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zlib${SUFFIX}.h ++ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" RENAME zlib${SUFFIX}.h) ++ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zlib_name_mangling${SUFFIX}.h ++ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" RENAME zlib_name_mangling${SUFFIX}.h) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zconf${SUFFIX}.h +- DESTINATION "${INC_INSTALL_DIR}" RENAME zconf${SUFFIX}.h) ++ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" RENAME zconf${SUFFIX}.h) + endif() + if(NOT SKIP_INSTALL_FILES AND NOT SKIP_INSTALL_ALL) + install(FILES ${ZLIB_PC} DESTINATION "${PKGCONFIG_INSTALL_DIR}") ++ # INFO: Mimics official zlib CMake target ++ # Generates ZLIB.cmake in case ZLIB_COMPAT=ON and always exports the CMake target ZLIB::ZLIB ++ # In case ZLIB_COMPAT=OFF, the CMake target and file follows zlib-ng naming convention ++ if (ZLIB_COMPAT) ++ if (TARGET zlib) ++ set_target_properties(zlib PROPERTIES EXPORT_NAME ZLIB) ++ else() ++ set_target_properties(zlibstatic PROPERTIES EXPORT_NAME ZLIB) ++ endif() ++ endif() ++ install(TARGETS ${ZLIB_INSTALL_LIBRARIES} ++ EXPORT ${EXPORT_NAME} ++ DESTINATION "${CMAKE_INSTALL_LIBDIR}") ++ install(EXPORT ${EXPORT_NAME} ++ DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${EXPORT_NAME}" ++ NAMESPACE ${EXPORT_NAME}::) + endif() + + #============================================================================ + # Example binaries + #============================================================================ + +-option(ZLIB_ENABLE_TESTS "Build test binaries" ON) + if(ZLIB_ENABLE_TESTS) + enable_testing() +- macro(configure_test_executable target) +- target_include_directories(${target} PUBLIC +- "$" +- "$") +- if(NOT WITH_GZFILEOP) +- target_compile_definitions(${target} PUBLIC -DWITH_GZFILEOP) +- target_sources(${target} PRIVATE ${ZLIB_GZFILE_PRIVATE_HDRS} ${ZLIB_GZFILE_SRCS}) +- endif() +- if(ZLIB_DUAL_LINK) +- find_package(ZLIB) +- if(ZLIB_FOUND) +- target_link_libraries(${target} ${ZLIB_LIBRARIES}) +- endif() +- endif() +- endmacro() +- +- macro(add_simple_test_executable target) +- add_executable(${target} test/${target}.c) +- configure_test_executable(${target}) +- target_link_libraries(${target} zlib) +- add_test(NAME ${target} COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $) +- endmacro() +- +- add_simple_test_executable(adler32_test) +- add_simple_test_executable(crc32_test) +- add_simple_test_executable(example) +- +- set(MINIGZIP_COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $) +- add_executable(minigzip test/minigzip.c) +- configure_test_executable(minigzip) +- if(NOT DEFINED BUILD_SHARED_LIBS) +- target_link_libraries(minigzip zlibstatic) +- else() +- target_link_libraries(minigzip zlib) +- endif() +- if(BASEARCH_S360_FOUND) +- if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE) +- set_source_files_properties(test/minigzip.c PROPERTIES COMPILE_DEFINITIONS BUFLEN=262144) +- endif() +- endif() + +- set(MINIDEFLATE_COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $) +- add_executable(minideflate test/minideflate.c) +- configure_test_executable(minideflate) +- target_link_libraries(minideflate zlib) ++ if(BUILD_SHARED_LIBS) ++ if(ZLIBNG_ENABLE_TESTS) ++ message(STATUS "Disabling zlib-ng tests because shared libraries are enabled") ++ set(ZLIBNG_ENABLE_TESTS OFF) ++ endif() + +- if(INSTALL_UTILS) +- install(TARGETS minigzip minideflate +- RUNTIME DESTINATION "${BIN_INSTALL_DIR}" +- ARCHIVE DESTINATION "${LIB_INSTALL_DIR}" +- LIBRARY DESTINATION "${LIB_INSTALL_DIR}") ++ if(WITH_BENCHMARKS OR WITH_BENCHMARK_APPS) ++ message(STATUS "Disabling benchmarks because shared libraries are enabled") ++ set(WITH_BENCHMARKS OFF) ++ set(WITH_BENCHMARK_APPS OFF) ++ endif() + endif() + +- set(SWITCHLEVELS_COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $) +- add_executable(switchlevels test/switchlevels.c) +- configure_test_executable(switchlevels) +- target_link_libraries(switchlevels zlib) +- +- add_simple_test_executable(infcover) +- target_sources(infcover PRIVATE inftrees.c) +- +- add_executable(makefixed tools/makefixed.c inftrees.c) +- target_include_directories(makefixed PUBLIC +- "$" +- "$") +- +- set(MAKEFIXED_COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $) +- add_test(NAME makefixed +- COMMAND ${CMAKE_COMMAND} +- "-DCOMMAND=${MAKEFIXED_COMMAND}" +- -DOUTPUT=${CMAKE_CURRENT_BINARY_DIR}/inffixed_tbl._h +- -DCOMPARE=${CMAKE_CURRENT_SOURCE_DIR}/inffixed_tbl.h +- -DIGNORE_LINE_ENDINGS=ON +- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/run-and-compare.cmake) +- +- add_executable(maketrees tools/maketrees.c trees.c zutil.c) +- target_include_directories(maketrees PUBLIC +- "$" +- "$") +- +- set(MAKETREES_COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $) +- add_test(NAME maketrees +- COMMAND ${CMAKE_COMMAND} +- "-DCOMMAND=${MAKETREES_COMMAND}" +- -DOUTPUT=${CMAKE_CURRENT_BINARY_DIR}/trees_tbl._h +- -DCOMPARE=${CMAKE_CURRENT_SOURCE_DIR}/trees_tbl.h +- -DIGNORE_LINE_ENDINGS=ON +- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/run-and-compare.cmake) +- +- add_executable(makecrct tools/makecrct.c) +- target_include_directories(makecrct PUBLIC +- "$" +- "$") +- +- set(MAKECRCT_COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $) +- add_test(NAME makecrct-crc32 +- COMMAND ${CMAKE_COMMAND} +- "-DCOMMAND=${MAKECRCT_COMMAND}" +- -DOUTPUT=${CMAKE_CURRENT_BINARY_DIR}/crc32_tbl._h +- -DCOMPARE=${CMAKE_CURRENT_SOURCE_DIR}/crc32_tbl.h +- -DIGNORE_LINE_ENDINGS=ON +- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/run-and-compare.cmake) +- +- set(MAKECRCT_COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $ -c) +- add_test(NAME makecrct-crc32-combine +- COMMAND ${CMAKE_COMMAND} +- "-DCOMMAND=${MAKECRCT_COMMAND}" +- -DOUTPUT=${CMAKE_CURRENT_BINARY_DIR}/crc32_comb_tbl._h +- -DCOMPARE=${CMAKE_CURRENT_SOURCE_DIR}/crc32_comb_tbl.h +- -DIGNORE_LINE_ENDINGS=ON +- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/run-and-compare.cmake) +- +- if(WITH_FUZZERS) +- set(FUZZERS checksum compress example_small example_large example_flush example_dict minigzip) +- file(GLOB ALL_SRC_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*") +- foreach(FUZZER ${FUZZERS}) +- add_executable(${FUZZER}_fuzzer test/fuzz/${FUZZER}_fuzzer.c test/fuzz/standalone_fuzz_target_runner.c) +- configure_test_executable(${FUZZER}_fuzzer) +- target_link_libraries(${FUZZER}_fuzzer zlib) +- set(FUZZER_COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $ ${ALL_SRC_FILES}) +- add_test(NAME ${FUZZER}_fuzzer COMMAND ${FUZZER_COMMAND}) +- endforeach() +- endif() ++ add_subdirectory(test) ++endif() + +- macro(test_minigzip name path) +- # Construct compression arguments for minigzip +- set(compress_args -k -c) +- foreach(extra_arg IN ITEMS "${ARGN}") +- list(APPEND compress_args ${extra_arg}) +- endforeach() +- +- # Create unique friendly string for test +- string(REPLACE ";" "" arg_list "${ARGN}") +- string(REPLACE " " "" arg_list "${arg_list}") +- string(REPLACE "-" "" arg_list "${arg_list}") +- +- set(test_id minigzip-${name}-${arg_list}) +- +- if(NOT TEST ${test_id}) +- add_test(NAME ${test_id} +- COMMAND ${CMAKE_COMMAND} +- "-DTARGET=${MINIGZIP_COMMAND}" +- "-DCOMPRESS_ARGS=${compress_args}" +- "-DDECOMPRESS_ARGS=-d;-c" +- -DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/${path} +- -DTEST_NAME=${test_id} +- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake) +- endif() +- endmacro() +- +- set(TEST_CONFIGS +- -R # Z_RLE +- -h # Z_HUFFMAN_ONLY +- -T # Direct store +- -0 # No compression +- -1 # Deflate quick +- -4 # Deflate medium (lazy matches) +- "-5;-F" # Deflate medium (Z_FIXED) +- -6 # Deflate medium +- -9 # Deflate slow +- "-9;-f" # Deflate slow (Z_FILTERED) +- ) ++add_feature_info(WITH_GZFILEOP WITH_GZFILEOP "Compile with support for gzFile related functions") ++add_feature_info(ZLIB_COMPAT ZLIB_COMPAT "Compile with zlib compatible API") ++add_feature_info(ZLIB_ENABLE_TESTS ZLIB_ENABLE_TESTS "Build test binaries") ++add_feature_info(ZLIBNG_ENABLE_TESTS ZLIBNG_ENABLE_TESTS "Test zlib-ng specific API") ++add_feature_info(WITH_SANITIZER WITH_SANITIZER "Enable sanitizer support") ++add_feature_info(WITH_GTEST WITH_GTEST "Build gtest_zlib") ++add_feature_info(WITH_FUZZERS WITH_FUZZERS "Build test/fuzz") ++add_feature_info(WITH_BENCHMARKS WITH_BENCHMARKS "Build test/benchmarks") ++add_feature_info(WITH_BENCHMARK_APPS WITH_BENCHMARK_APPS "Build application benchmarks") ++add_feature_info(WITH_OPTIM WITH_OPTIM "Build with optimisation") ++add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies") ++add_feature_info(WITH_NATIVE_INSTRUCTIONS WITH_NATIVE_INSTRUCTIONS ++ "Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)") ++add_feature_info(WITH_MAINTAINER_WARNINGS WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings") ++add_feature_info(WITH_CODE_COVERAGE WITH_CODE_COVERAGE "Enable code coverage reporting") ++add_feature_info(WITH_INFLATE_STRICT WITH_INFLATE_STRICT "Build with strict inflate distance checking") ++add_feature_info(WITH_INFLATE_ALLOW_INVALID_DIST WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances") + +- file(GLOB_RECURSE TEST_FILE_PATHS +- LIST_DIRECTORIES false +- RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} +- ${CMAKE_CURRENT_SOURCE_DIR}/test/data/*) ++if(BASEARCH_ARM_FOUND) ++ add_feature_info(WITH_ACLE WITH_ACLE "Build with ACLE") ++ add_feature_info(WITH_NEON WITH_NEON "Build with NEON intrinsics") ++ add_feature_info(WITH_ARMV6 WITH_ARMV6 "Build with ARMv6 SIMD") ++elseif(BASEARCH_PPC_FOUND) ++ add_feature_info(WITH_ALTIVEC WITH_ALTIVEC "Build with AltiVec optimisations") ++ add_feature_info(WITH_POWER8 WITH_POWER8 "Build with optimisations for POWER8") ++ add_feature_info(WITH_POWER9 WITH_POWER9 "Build with optimisations for POWER9") ++elseif(BASEARCH_RISCV_FOUND) ++ add_feature_info(WITH_RVV WITH_RVV "Build with RVV intrinsics") ++elseif(BASEARCH_S360_FOUND) ++ add_feature_info(WITH_DFLTCC_DEFLATE WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z") ++ add_feature_info(WITH_DFLTCC_INFLATE WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z") ++ add_feature_info(WITH_CRC32_VX WITH_CRC32_VX "Build with vectorized CRC32 on IBM Z") ++elseif(BASEARCH_X86_FOUND) ++ add_feature_info(WITH_AVX2 WITH_AVX2 "Build with AVX2") ++ add_feature_info(WITH_AVX512 WITH_AVX512 "Build with AVX512") ++ add_feature_info(WITH_AVX512VNNI WITH_AVX512VNNI "Build with AVX512 VNNI") ++ add_feature_info(WITH_SSE2 WITH_SSE2 "Build with SSE2") ++ add_feature_info(WITH_SSSE3 WITH_SSSE3 "Build with SSSE3") ++ add_feature_info(WITH_SSE42 WITH_SSE42 "Build with SSE42") ++ add_feature_info(WITH_PCLMULQDQ WITH_PCLMULQDQ "Build with PCLMULQDQ") ++ add_feature_info(WITH_VPCLMULQDQ WITH_VPCLMULQDQ "Build with VPCLMULQDQ") ++endif() + +- foreach(TEST_FILE_PATH ${TEST_FILE_PATHS}) +- if("${TEST_FILE_PATH}" MATCHES ".gz$" OR "${TEST_FILE_PATH}" MATCHES ".out$" OR +- "${TEST_FILE_PATH}" MATCHES "/.git/" OR "${TEST_FILE_PATH}" MATCHES ".md$") +- continue() +- endif() +- foreach(TEST_CONFIG ${TEST_CONFIGS}) +- get_filename_component(TEST_NAME ${TEST_FILE_PATH} NAME) +- if (TEST_NAME STREQUAL "") +- continue() +- endif() +- test_minigzip(${TEST_NAME} ${TEST_FILE_PATH} ${TEST_CONFIG}) +- endforeach() +- endforeach() ++add_feature_info(INSTALL_UTILS INSTALL_UTILS "Copy minigzip and minideflate during install") + +- test_minigzip("detect-text" "test/data/lcet10.txt" -A) +- test_minigzip("detect-binary" "test/data/paper-100k.pdf" -A) +- +- set(CVES CVE-2002-0059 CVE-2004-0797 CVE-2005-1849 CVE-2005-2096) +- foreach(CVE ${CVES}) +- set(CVE_COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $ -d) +- add_test(NAME ${CVE} +- COMMAND ${CMAKE_COMMAND} +- "-DCOMMAND=${CVE_COMMAND}" +- -DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/${CVE}/test.gz +- "-DSUCCESS_EXIT=0;1" +- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/run-and-redirect.cmake) +- endforeach() ++FEATURE_SUMMARY(WHAT ALL INCLUDE_QUIET_PACKAGES) + +- # Run tests targeting tools +- include(cmake/test-tools.cmake) ++#============================================================================ ++# CPack ++#============================================================================ ++set(CPACK_GENERATOR "TGZ") ++set(CPACK_SOURCE_GENERATOR "TGZ") ++set(CPACK_SOURCE_IGNORE_FILES .git/ _CPack_Packages/ "${PROJECT_BINARY_DIR}/") + +- if(NOT WIN32 AND ZLIB_COMPAT) +- add_simple_test_executable(CVE-2003-0107) +- endif() ++set(CPACK_PACKAGE_NAME "zlib${SUFFIX}") ++set(CPACK_PACKAGE_VERSION ${ZLIB_FULL_VERSION}) ++set(CPACK_PACKAGE_DIRECTORY "${PROJECT_BINARY_DIR}/package") + +- add_test(NAME GH-361 +- COMMAND ${CMAKE_COMMAND} +- "-DTARGET=${MINIGZIP_COMMAND}" +- "-DCOMPRESS_ARGS=-c;-k;-4" +- -DTEST_NAME=GH-361-test-txt +- -DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/GH-361/test.txt +- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake) +- +- add_test(NAME GH-364 +- COMMAND ${CMAKE_COMMAND} +- "-DCOMPRESS_TARGET=${SWITCHLEVELS_COMMAND}" +- "-DCOMPRESS_ARGS=1;5;9;3" +- "-DDECOMPRESS_TARGET=${MINIGZIP_COMMAND}" +- -DTEST_NAME=GH-364-test-bin +- -DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/GH-364/test.bin +- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake) +- +- add_test(NAME GH-382 +- COMMAND ${CMAKE_COMMAND} +- "-DTARGET=${MINIDEFLATE_COMMAND}" +- "-DCOMPRESS_ARGS=-c;-m;1;-w;-15;-1;-s;4" +- "-DDECOMPRESS_ARGS=-c;-d;-m;1;-w;-15" +- -DGZIP_VERIFY=OFF +- -DTEST_NAME=GH-382-defneg3-dat +- -DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/GH-382/defneg3.dat +- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake) +- +- add_test(NAME GH-536-segfault +- COMMAND ${CMAKE_COMMAND} +- "-DCOMPRESS_TARGET=${SWITCHLEVELS_COMMAND}" +- "-DCOMPRESS_ARGS=6;9744;1;91207" +- "-DDECOMPRESS_TARGET=${MINIGZIP_COMMAND}" +- -DCOMPARE=OFF +- -DGZIP_VERIFY=OFF +- -DTEST_NAME=GH-536-segfault-lcet10-txt +- -DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/data/lcet10.txt +- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake) +- +- add_test(NAME GH-536-incomplete-read +- COMMAND ${CMAKE_COMMAND} +- "-DCOMPRESS_TARGET=${SWITCHLEVELS_COMMAND}" +- "-DCOMPRESS_ARGS=6;88933;1;195840;2;45761" +- "-DDECOMPRESS_TARGET=${MINIGZIP_COMMAND}" +- -DCOMPARE=OFF +- -DGZIP_VERIFY=OFF +- -DTEST_NAME=GH-536-incomplete-read-lcet10-txt +- -DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/data/lcet10.txt +- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake) +- +- add_test(NAME GH-536-zero-stored-block +- COMMAND ${CMAKE_COMMAND} +- "-DCOMPRESS_TARGET=${SWITCHLEVELS_COMMAND}" +- "-DCOMPRESS_ARGS=6;15248;1;1050;2;25217" +- "-DDECOMPRESS_TARGET=${MINIGZIP_COMMAND}" +- -DCOMPARE=OFF +- -DGZIP_VERIFY=OFF +- -DTEST_NAME=GH-536-zero-stored-block-lcet10-txt +- -DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/data/lcet10.txt +- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake) +- +- add_test(NAME GH-751 +- COMMAND ${CMAKE_COMMAND} +- "-DTARGET=${MINIGZIP_COMMAND}" +- -DTEST_NAME=GH-751-test-txt +- -DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/test/GH-751/test.txt +- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-compress.cmake) +- +- add_simple_test_executable(deflate_quick_bi_valid) +- add_simple_test_executable(deflate_quick_block_open) +- add_simple_test_executable(inflate_adler32) +- add_simple_test_executable(hash_head_0) ++if("${PROJECT_BINARY_DIR}" STREQUAL "${PROJECT_SOURCE_DIR}") ++ message(WARNING "Building to source folder is not recommended. Cpack will be unable to generate source package.") + endif() + +-FEATURE_SUMMARY(WHAT ALL INCLUDE_QUIET_PACKAGES) ++include(CPack) +diff --git a/Makefile.in b/Makefile.in +index 8d4e42f..d2bd47f 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -29,7 +29,7 @@ TEST_LIBS=$(LIBNAME1).a + LDSHARED=$(CC) + LDSHAREDFLAGS=-shared + +-VER=2.0.6 ++VER=2.1.5 + VER1=2 + + STATICLIB=$(LIBNAME1).a +@@ -57,6 +57,8 @@ EXE= + SRCDIR=. + INCLUDES=-I$(SRCDIR) + ++BUILDDIR=. ++ + ARCHDIR=arch/generic + ARCH_STATIC_OBJS= + ARCH_SHARED_OBJS= +@@ -73,22 +75,29 @@ pkgconfigdir = ${libdir}/pkgconfig + + OBJZ = \ + adler32.o \ ++ adler32_fold.o \ + chunkset.o \ +- compare258.o \ ++ compare256.o \ + compress.o \ +- crc32.o \ +- crc32_comb.o \ ++ cpu_features.o \ ++ crc32_braid.o \ ++ crc32_braid_comb.o \ ++ crc32_fold.o \ + deflate.o \ + deflate_fast.o \ ++ deflate_huff.o \ + deflate_medium.o \ + deflate_quick.o \ ++ deflate_rle.o \ + deflate_slow.o \ ++ deflate_stored.o \ + functable.o \ + infback.o \ +- inffast.o \ + inflate.o \ + inftrees.o \ + insert_string.o \ ++ insert_string_roll.o \ ++ slide_hash.o \ + trees.o \ + uncompr.o \ + zutil.o \ +@@ -99,26 +108,34 @@ OBJG = \ + gzread.o \ + gzwrite.o + ++TESTOBJG = + OBJC = $(OBJZ) $(OBJG) + + PIC_OBJZ = \ + adler32.lo \ ++ adler32_fold.lo \ + chunkset.lo \ +- compare258.lo \ ++ compare256.lo \ + compress.lo \ +- crc32.lo \ +- crc32_comb.lo \ ++ cpu_features.lo \ ++ crc32_braid.lo \ ++ crc32_braid_comb.lo \ ++ crc32_fold.lo \ + deflate.lo \ + deflate_fast.lo \ ++ deflate_huff.lo \ + deflate_medium.lo \ + deflate_quick.lo \ ++ deflate_rle.lo \ + deflate_slow.lo \ ++ deflate_stored.lo \ + functable.lo \ + infback.lo \ +- inffast.lo \ + inflate.lo \ + inftrees.lo \ + insert_string.lo \ ++ insert_string_roll.lo \ ++ slide_hash.lo \ + trees.lo \ + uncompr.lo \ + zutil.lo \ +@@ -129,6 +146,7 @@ PIC_OBJG = \ + gzread.lo \ + gzwrite.lo + ++PIC_TESTOBJG = + PIC_OBJC = $(PIC_OBJZ) $(PIC_OBJG) + + OBJS = $(OBJC) +@@ -137,9 +155,9 @@ PIC_OBJS = $(PIC_OBJC) + + all: static shared + +-static: adler32_test$(EXE) crc32_test$(EXE) example$(EXE) minigzip$(EXE) fuzzers makefixed$(EXE) maketrees$(EXE) makecrct$(EXE) ++static: example$(EXE) minigzip$(EXE) makefixed$(EXE) maketrees$(EXE) makecrct$(EXE) + +-shared: adler32_testsh$(EXE) crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) ++shared: examplesh$(EXE) minigzipsh$(EXE) + + check: test + +@@ -160,60 +178,7 @@ $(ARCHDIR)/%.lo: $(SRCDIR)/$(ARCHDIR)/%.c + test: all + $(MAKE) -C test + +-# This variable is set by configure. +-WITH_FUZZERS= +- +-# By default, use our own standalone_fuzz_target_runner. +-# This runner does no fuzzing, but simply executes the inputs +-# provided via parameters. +-# Run e.g. "make all LIB_FUZZING_ENGINE=/path/to/libFuzzer.a" +-# to link the fuzzer(s) against a real fuzzing engine. +-ifeq (,$(LIB_FUZZING_ENGINE)) +- LIB_FUZZING_ENGINE = standalone_fuzz_target_runner.o +-else +- # OSS-Fuzz will define its own value for LIB_FUZZING_ENGINE. +- WITH_FUZZERS=1 +-endif +- +-ifeq (1,$(WITH_FUZZERS)) +-fuzzers: checksum_fuzzer$(EXE) compress_fuzzer$(EXE) example_small_fuzzer$(EXE) example_large_fuzzer$(EXE) example_flush_fuzzer$(EXE) example_dict_fuzzer$(EXE) minigzip_fuzzer$(EXE) +-else +-fuzzers: +-endif +- +-# The standalone fuzz target runner. +-standalone_fuzz_target_runner.o: +- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $< +-checksum_fuzzer.o: +- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $< +-compress_fuzzer.o: +- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $< +-example_small_fuzzer.o: +- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $< +-example_large_fuzzer.o: +- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $< +-example_flush_fuzzer.o: +- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $< +-example_dict_fuzzer.o: +- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $< +-minigzip_fuzzer.o: +- $(CC) $(CFLAGS) -DWITH_GZFILEOP $(INCLUDES) -c -o $@ $< +-checksum_fuzzer$(EXE): checksum_fuzzer.o standalone_fuzz_target_runner.o $(STATICLIB) +- $(CC) $(LDFLAGS) -o $@ $(LIB_FUZZING_ENGINE) checksum_fuzzer.o $(STATICLIB) -lpthread +-compress_fuzzer$(EXE): compress_fuzzer.o standalone_fuzz_target_runner.o $(STATICLIB) +- $(CC) $(LDFLAGS) -o $@ $(LIB_FUZZING_ENGINE) compress_fuzzer.o $(STATICLIB) -lpthread +-example_small_fuzzer$(EXE): example_small_fuzzer.o standalone_fuzz_target_runner.o $(STATICLIB) +- $(CC) $(LDFLAGS) -o $@ $(LIB_FUZZING_ENGINE) example_small_fuzzer.o $(STATICLIB) -lpthread +-example_large_fuzzer$(EXE): example_large_fuzzer.o standalone_fuzz_target_runner.o $(STATICLIB) +- $(CC) $(LDFLAGS) -o $@ $(LIB_FUZZING_ENGINE) example_large_fuzzer.o $(STATICLIB) -lpthread +-example_flush_fuzzer$(EXE): example_flush_fuzzer.o standalone_fuzz_target_runner.o $(STATICLIB) +- $(CC) $(LDFLAGS) -o $@ $(LIB_FUZZING_ENGINE) example_flush_fuzzer.o $(STATICLIB) -lpthread +-example_dict_fuzzer$(EXE): example_dict_fuzzer.o standalone_fuzz_target_runner.o $(STATICLIB) +- $(CC) $(LDFLAGS) -o $@ $(LIB_FUZZING_ENGINE) example_dict_fuzzer.o $(STATICLIB) -lpthread +-minigzip_fuzzer$(EXE): minigzip_fuzzer.o standalone_fuzz_target_runner.o $(OBJG) $(STATICLIB) +- $(CC) $(LDFLAGS) -o $@ $(LIB_FUZZING_ENGINE) minigzip_fuzzer.o $(OBJG) $(STATICLIB) -lpthread +- +-infcover.o: $(SRCDIR)/test/infcover.c $(SRCDIR)/zlib$(SUFFIX).h zconf$(SUFFIX).h ++infcover.o: $(SRCDIR)/test/infcover.c zlib$(SUFFIX).h zconf$(SUFFIX).h zlib_name_mangling$(SUFFIX).h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/test/infcover.c + + infcover$(EXE): infcover.o $(STATICLIB) +@@ -231,12 +196,6 @@ $(STATICLIB): $(OBJS) + $(AR) $(ARFLAGS) $@ $(OBJS) + -@ ($(RANLIB) $@ || true) >/dev/null 2>&1 + +-adler32_test.o: +- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/test/adler32_test.c +- +-crc32_test.o: +- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/test/crc32_test.c +- + example.o: + $(CC) $(CFLAGS) -DWITH_GZFILEOP $(INCLUDES) -c -o $@ $(SRCDIR)/test/example.c + +@@ -252,8 +211,8 @@ maketrees.o: + makecrct.o: + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/tools/makecrct.c + +-zlibrc.o: win32/zlib$(SUFFIX)1.rc +- $(RC) $(RCFLAGS) -o $@ win32/zlib$(SUFFIX)1.rc ++zlibrc.o: $(SRCDIR)/win32/zlib$(SUFFIX)1.rc ++ $(RC) $(RCFLAGS) -o $@ $(SRCDIR)/win32/zlib$(SUFFIX)1.rc + + .SUFFIXES: .lo + +@@ -263,9 +222,24 @@ zlibrc.o: win32/zlib$(SUFFIX)1.rc + %.lo: $(SRCDIR)/%.c + $(CC) $(SFLAGS) -DPIC $(INCLUDES) -c -o $@ $< + +-$(OBJG): %.o: $(SRCDIR)/%.c ++gzlib.o: $(SRCDIR)/gzlib.c + $(CC) $(CFLAGS) -DWITH_GZFILEOP $(INCLUDES) -c -o $@ $< + ++gzlib.lo: $(SRCDIR)/gzlib.c ++ $(CC) $(SFLAGS) -DPIC -DWITH_GZFILEOP $(INCLUDES) -c -o $@ $< ++ ++gzread.o: gzread.c ++ $(CC) $(CFLAGS) -DWITH_GZFILEOP $(INCLUDES) -c -o $@ $< ++ ++gzread.lo: gzread.c ++ $(CC) $(SFLAGS) -DPIC -DWITH_GZFILEOP $(INCLUDES) -c -o $@ $< ++ ++gzwrite.o: $(SRCDIR)/gzwrite.c ++ $(CC) $(CFLAGS) -DWITH_GZFILEOP $(INCLUDES) -c -o $@ $< ++ ++gzwrite.lo: $(SRCDIR)/gzwrite.c ++ $(CC) $(SFLAGS) -DPIC -DWITH_GZFILEOP $(INCLUDES) -c -o $@ $< ++ + $(SHAREDTARGET): $(PIC_OBJS) $(DEFFILE) $(RCOBJS) + ifneq ($(SHAREDTARGET),) + $(LDSHARED) $(CFLAGS) $(LDSHAREDFLAGS) $(LDFLAGS) -o $@ $(DEFFILE) $(PIC_OBJS) $(RCOBJS) $(LDSHAREDLIBC) +@@ -279,68 +253,45 @@ ifneq ($(SHAREDLIB),$(SHAREDTARGET)) + endif + endif + +-adler32_test$(EXE): adler32_test.o $(OBJG) $(STATICLIB) +- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ adler32_test.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC) +-ifneq ($(STRIP),) +- $(STRIP) $@ +-endif +- +-crc32_test$(EXE): crc32_test.o $(STATICLIB) +- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ crc32_test.o $(TEST_LIBS) $(LDSHAREDLIBC) +-ifneq ($(STRIP),) +- $(STRIP) $@ +-endif +- +-example$(EXE): example.o $(OBJG) $(STATICLIB) +- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ example.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC) +-ifneq ($(STRIP),) +- $(STRIP) $@ +-endif +- +-minigzip$(EXE): minigzip.o $(OBJG) $(STATICLIB) +- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ minigzip.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC) ++example$(EXE): example.o $(TESTOBJG) $(STATICLIB) ++ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ example.o $(TESTOBJG) $(TEST_LIBS) $(LDSHAREDLIBC) + ifneq ($(STRIP),) + $(STRIP) $@ + endif + +-adler32_testsh$(EXE): adler32_test.o $(OBJG) $(SHAREDTARGET) +- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ adler32_test.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC) ++minigzip$(EXE): minigzip.o $(TESTOBJG) $(STATICLIB) ++ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ minigzip.o $(TESTOBJG) $(TEST_LIBS) $(LDSHAREDLIBC) + ifneq ($(STRIP),) + $(STRIP) $@ + endif + +-crc32_testsh$(EXE): crc32_test.o $(SHAREDTARGET) +- $(CC) $(LDFLAGS) -o $@ crc32_test.o $(SHAREDTARGET) $(LDSHAREDLIBC) ++minigzipsh$(EXE): minigzip.o $(PIC_TESTOBJG) $(SHAREDTARGET) ++ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ minigzip.o $(PIC_TESTOBJG) $(SHAREDLIB) $(LDSHAREDLIBC) + ifneq ($(STRIP),) + $(STRIP) $@ + endif + +-examplesh$(EXE): example.o $(OBJG) $(SHAREDTARGET) +- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ example.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC) +-ifneq ($(STRIP),) +- $(STRIP) $@ +-endif + +-minigzipsh$(EXE): minigzip.o $(OBJG) $(SHAREDTARGET) +- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ minigzip.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC) ++examplesh$(EXE): example.o $(PIC_TESTOBJG) $(SHAREDTARGET) ++ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ example.o $(PIC_TESTOBJG) $(SHAREDLIB) $(LDSHAREDLIBC) + ifneq ($(STRIP),) + $(STRIP) $@ + endif + +-makefixed$(EXE): makefixed.o $(OBJG) $(STATICLIB) +- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ makefixed.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC) ++makefixed$(EXE): makefixed.o $(STATICLIB) ++ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ makefixed.o $(TEST_LIBS) $(LDSHAREDLIBC) + ifneq ($(STRIP),) + $(STRIP) $@ + endif + +-maketrees$(EXE): maketrees.o $(OBJG) $(STATICLIB) +- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ maketrees.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC) ++maketrees$(EXE): maketrees.o $(STATICLIB) ++ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ maketrees.o $(TEST_LIBS) $(LDSHAREDLIBC) + ifneq ($(STRIP),) + $(STRIP) $@ + endif + +-makecrct$(EXE): makecrct.o $(OBJG) $(STATICLIB) +- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ makecrct.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC) ++makecrct$(EXE): makecrct.o $(STATICLIB) ++ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ makecrct.o $(TEST_LIBS) $(LDSHAREDLIBC) + ifneq ($(STRIP),) + $(STRIP) $@ + endif +@@ -381,10 +332,11 @@ install-libs: install-shared install-static + + install: install-libs + -@if [ ! -d $(DESTDIR)$(includedir) ]; then mkdir -p $(DESTDIR)$(includedir); fi +- rm -f $(DESTDIR)$(includedir)/zlib$(SUFFIX).h $(DESTDIR)$(includedir)/zconf$(SUFFIX).h +- cp $(SRCDIR)/zlib$(SUFFIX).h $(DESTDIR)$(includedir)/zlib$(SUFFIX).h ++ rm -f $(DESTDIR)$(includedir)/zlib$(SUFFIX).h $(DESTDIR)$(includedir)/zconf$(SUFFIX).h $(DESTDIR)$(includedir)/zlib_name_mangling$(SUFFIX).h ++ cp zlib$(SUFFIX).h $(DESTDIR)$(includedir)/zlib$(SUFFIX).h + cp zconf$(SUFFIX).h $(DESTDIR)$(includedir)/zconf$(SUFFIX).h +- chmod 644 $(DESTDIR)$(includedir)/zlib$(SUFFIX).h $(DESTDIR)$(includedir)/zconf$(SUFFIX).h ++ cp zlib_name_mangling$(SUFFIX).h $(DESTDIR)$(includedir)/zlib_name_mangling$(SUFFIX).h ++ chmod 644 $(DESTDIR)$(includedir)/zlib$(SUFFIX).h $(DESTDIR)$(includedir)/zconf$(SUFFIX).h $(DESTDIR)$(includedir)/zlib_name_mangling$(SUFFIX).h + + uninstall-static: + cd $(DESTDIR)$(libdir) && rm -f $(STATICLIB) +@@ -398,7 +350,7 @@ ifneq ($(IMPORTLIB),) + endif + + uninstall: uninstall-static uninstall-shared +- cd $(DESTDIR)$(includedir) && rm -f zlib$(SUFFIX).h zconf$(SUFFIX).h ++ cd $(DESTDIR)$(includedir) && rm -f zlib$(SUFFIX).h zconf$(SUFFIX).h zlib_name_mangling$(SUFFIX).h + cd $(DESTDIR)$(pkgconfigdir) && rm -f $(PKGFILE) + + mostlyclean: clean +@@ -406,10 +358,7 @@ clean: + @if [ -f $(ARCHDIR)/Makefile ]; then $(MAKE) -C $(ARCHDIR) clean; fi + @if [ -f test/Makefile ]; then $(MAKE) -C test clean; fi + rm -f *.o *.lo *~ \ +- adler32_test$(EXE) crc32_test$(EXE) example$(EXE) minigzip$(EXE) \ +- adler32_testsh$(EXE) crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) \ +- checksum_fuzzer$(EXE) compress_fuzzer$(EXE) example_small_fuzzer$(EXE) example_large_fuzzer$(EXE) \ +- example_flush_fuzzer$(EXE) example_dict_fuzzer$(EXE) minigzip_fuzzer$(EXE) \ ++ example$(EXE) minigzip$(EXE) minigzipsh$(EXE) \ + infcover makefixed$(EXE) maketrees$(EXE) makecrct$(EXE) \ + $(STATICLIB) $(IMPORTLIB) $(SHAREDLIB) $(SHAREDLIBV) $(SHAREDLIBM) \ + foo.gz so_locations \ +@@ -417,7 +366,6 @@ clean: + rm -rf objs + rm -f *.gcda *.gcno *.gcov + rm -f a.out a.exe +- rm -f *.pc + rm -f *._h + rm -rf btmp1 btmp2 pkgtmp1 pkgtmp2 + +@@ -425,7 +373,7 @@ maintainer-clean: distclean + distclean: clean + @if [ -f $(ARCHDIR)/Makefile ]; then $(MAKE) -C $(ARCHDIR) distclean; fi + @if [ -f test/Makefile ]; then $(MAKE) -C test distclean; fi +- rm -f $(PKGFILE) configure.log zconf.h zconf.h.cmakein ++ rm -f $(PKGFILE) configure.log zconf.h zconf.h.cmakein zlib$(SUFFIX).h zlib_name_mangling$(SUFFIX).h *.pc gzread.c + -@rm -f .DS_Store + # Reset Makefile if building inside source tree + @if [ -f Makefile.in ]; then \ +diff --git a/PORTING.md b/PORTING.md +index eda2608..c48522e 100644 +--- a/PORTING.md ++++ b/PORTING.md +@@ -9,11 +9,19 @@ zlib-compat mode + Zlib-ng can be compiled in zlib-compat mode, suitable for zlib-replacement + in a single application or system-wide. + +-Please note that zlib-ng in zlib-compat mode is API-compatible but not +-ABI-compatible, meaning that you cannot simply replace the zlib library/dll +-files and expect the application to work. The application will need to be ++Please note that zlib-ng in zlib-compat mode tries to maintain both API and ++ABI compatibility with the original zlib. Any issues regarding compatibility ++can be reported as bugs. ++ ++In certain instances you may not be able to simply replace the zlib library/dll ++files and expect the application to work. The application may need to be + recompiled against the zlib-ng headers and libs to ensure full compatibility. + ++It is also possible for the deflate output stream to differ from the original ++zlib due to algorithmic differences between the two libraries. Any tests or ++applications that depend on the exact length of the deflate stream being a ++certain value will need to be updated. ++ + **Advantages:** + - Easy to port to, since it only requires a recompile of the application and + no changes to the application code. +@@ -25,8 +33,8 @@ recompiled against the zlib-ng headers and libs to ensure full compatibility. + - If your application is pre-allocating a memory buffer and you are providing + deflate/inflate init with your own allocator that allocates from that buffer + (looking at you nginx), you should be aware that zlib-ng needs to allocate +- more memory than stock zlib needs. The same problem exists with Intels and +- Cloudflares zlib forks. Doing this is not recommended since it makes it ++ more memory than stock zlib needs. The same problem exists with Intel’s and ++ Cloudflare’s zlib forks. Doing this is not recommended since it makes it + very hard to maintain compatibility over time. + + **Build Considerations:** +@@ -35,7 +43,8 @@ recompiled against the zlib-ng headers and libs to ensure full compatibility. + - Static library is *libz.a* on Unix and macOS, or *zlib.lib* on Windows + - Shared library is *libz.so* on Unix, *libz.dylib* on macOS, or *zlib1.dll* + on Windows +-- Type `z_size_t` is *unsigned long* ++- Type `z_size_t` is *unsigned __int64* on 64-bit Windows, and *unsigned long* on 32-bit Windows, Unix and macOS ++- Type `z_uintmax_t` is *unsigned long* in zlib-compat mode, and *size_t* with zlib-ng API + + zlib-ng native mode + ------------------- +diff --git a/README.md b/README.md +index 8528f28..4f9fe09 100644 +--- a/README.md ++++ b/README.md +@@ -1,32 +1,30 @@ ++| CI | Stable | Develop | ++|:---|:-------|:--------| ++| GitHub Actions | [![Stable CMake](https://github.com/zlib-ng/zlib-ng/actions/workflows/cmake.yml/badge.svg?branch=stable)](https://github.com/zlib-ng/zlib-ng/actions/workflows/cmake.yml?query=branch%3Astable)
    [![Stable Configure](https://github.com/zlib-ng/zlib-ng/actions/workflows/configure.yml/badge.svg?branch=stable)](https://github.com/zlib-ng/zlib-ng/actions/workflows/configure.yml?query=branch%3Astable)
    [![Stable NMake](https://github.com/zlib-ng/zlib-ng/actions/workflows/nmake.yml/badge.svg?branch=stable)](https://github.com/zlib-ng/zlib-ng/actions/workflows/nmake.yml?query=branch%3Astable) | [![Develop CMake](https://github.com/zlib-ng/zlib-ng/actions/workflows/cmake.yml/badge.svg?branch=develop)](https://github.com/zlib-ng/zlib-ng/actions/workflows/cmake.yml?query=branch%3Adevelop)
    [![Develop Configure](https://github.com/zlib-ng/zlib-ng/actions/workflows/configure.yml/badge.svg?branch=develop)](https://github.com/zlib-ng/zlib-ng/actions/workflows/configure.yml?query=branch%3Adevelop)
    [![Develop NMake](https://github.com/zlib-ng/zlib-ng/actions/workflows/nmake.yml/badge.svg?branch=develop)](https://github.com/zlib-ng/zlib-ng/actions/workflows/nmake.yml?query=branch%3Adevelop) | ++| CodeFactor | [![CodeFactor](https://www.codefactor.io/repository/github/zlib-ng/zlib-ng/badge/stable)](https://www.codefactor.io/repository/github/zlib-ng/zlib-ng/overview/stable) | [![CodeFactor](https://www.codefactor.io/repository/github/zlib-ng/zlib-ng/badge/develop)](https://www.codefactor.io/repository/github/zlib-ng/zlib-ng/overview/develop) | ++| OSS-Fuzz | [![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/zlib-ng.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:zlib-ng) | [![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/zlib-ng.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:zlib-ng) | ++| Codecov | [![codecov](https://codecov.io/github/zlib-ng/zlib-ng/branch/stable/graph/badge.svg?token=uKsgK9LIuC)](https://codecov.io/github/zlib-ng/zlib-ng/tree/stable) | [![codecov](https://codecov.io/github/zlib-ng/zlib-ng/branch/develop/graph/badge.svg?token=uKsgK9LIuC)](https://codecov.io/github/zlib-ng/zlib-ng/tree/develop) | ++ + ## zlib-ng + *zlib data compression library for the next generation systems* + + Maintained by Hans Kristian Rosbach + aka Dead2 (zlib-ng àt circlestorm dót org) + +-|CI|Status| +-|:-|-| +-|GitHub Actions|[![Master Branch Status](https://github.com/zlib-ng/zlib-ng/workflows/CI%20CMake/badge.svg)](https://github.com/zlib-ng/zlib-ng/actions) [![Master Branch Status](https://github.com/zlib-ng/zlib-ng/workflows/CI%20Configure/badge.svg)](https://github.com/zlib-ng/zlib-ng/actions) [![Master Branch Status](https://github.com/zlib-ng/zlib-ng/workflows/CI%20NMake/badge.svg)](https://github.com/zlib-ng/zlib-ng/actions)| +-|Buildkite|[![Build status](https://badge.buildkite.com/7bb1ef84356d3baee26202706cc053ee1de871c0c712b65d26.svg?branch=develop)](https://buildkite.com/circlestorm-productions/zlib-ng)| +-|CodeFactor|[![CodeFactor](https://www.codefactor.io/repository/github/zlib-ng/zlib-ng/badge)](https://www.codefactor.io/repository/github/zlib-ng/zlib-ng)| +-|OSS-Fuzz|[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/zlib-ng.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:zlib-ng) +-|Codecov|[![codecov.io](https://codecov.io/github/zlib-ng/zlib-ng/coverage.svg?branch=develop)](https://codecov.io/github/zlib-ng/zlib-ng/)| +- +- + Features + -------- + + * Zlib compatible API with support for dual-linking + * Modernized native API based on zlib API for ease of porting +-* Modern C99 syntax and a clean code layout +-* Deflate medium and quick algorithms based on Intels zlib fork ++* Modern C11 syntax and a clean code layout ++* Deflate medium and quick algorithms based on Intel’s zlib fork + * Support for CPU intrinsics when available +- * Adler32 implementation using SSSE3, AVX2, Neon & VSX +- * CRC32-B implementation using PCLMULQDQ & ACLE ++ * Adler32 implementation using SSSE3, AVX2, AVX512, AVX512-VNNI, Neon, VMX & VSX ++ * CRC32-B implementation using PCLMULQDQ, VPCLMULQDQ, ACLE, & IBM Z + * Hash table implementation using CRC32-C intrinsics on x86 and ARM +- * Slide hash implementations using SSE2, AVX2, Neon & VSX +- * Compare256/258 implementations using SSE4.2 & AVX2 +- * Inflate chunk copying using SSE2, AVX2 & Neon ++ * Slide hash implementations using SSE2, AVX2, ARMv6, Neon, VMX & VSX ++ * Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV ++ * Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX + * Support for hardware-accelerated deflate using IBM Z DFLTCC + * Unaligned memory read/writes and large bit buffer improvements + * Includes improvements from Cloudflare and Intel forks +@@ -34,51 +32,37 @@ Features + * Comprehensive set of CMake unit tests + * Code sanitizers, fuzzing, and coverage + * GitHub Actions continuous integration on Windows, macOS, and Linux +- * Emulated CI for ARM, AARCH64, PPC, PPC64, SPARC64, S390x using qemu ++ * Emulated CI for ARM, AARCH64, PPC, PPC64, RISCV, SPARC64, S390x using qemu + + + History + ------- + +-The motivation for this fork came after seeing several 3rd party +-contributions containing new optimizations not getting implemented +-into the official zlib repository. ++The motivation for this fork was seeing several 3rd party contributions with new optimizations not getting ++implemented into the official zlib repository. + +-Mark Adler has been maintaining zlib for a very long time, and he has +-done a great job and hopefully he will continue for a long time yet. +-The idea of zlib-ng is not to replace zlib, but to co-exist as a +-drop-in replacement with a lower threshold for code change. ++Mark Adler has been maintaining zlib for a very long time, and he has done a great job and hopefully he will continue ++for a long time yet. The idea of zlib-ng is not to replace zlib, but to co-exist as a drop-in replacement with a ++lower threshold for code change. + +-zlib has a long history and is incredibly portable, even supporting +-lots of systems that predate the Internet. This is great, but it does +-complicate further development and maintainability. +-The zlib code has numerous workarounds for old compilers that do not +-understand ANSI-C or to accommodate systems with limitations such as +-operating in a 16-bit environment. ++zlib has a long history and is incredibly portable, even supporting many systems that predate the Internet.
    ++That is great, but it can complicate further development and maintainability. The zlib code contains many workarounds ++for really old compilers or to accommodate systems with limitations such as operating in a 16-bit environment. + +-Many of these workarounds are only maintenance burdens, some of them +-are pretty huge code-wise. For example, the [v]s[n]printf workaround +-code has a whopping 8 different implementations just to cater to +-various old compilers. With this many workarounds cluttered throughout +-the code, new programmers with an idea/interest for zlib will need +-to take some time to figure out why all of these seemingly strange +-things are used, and how to work within those confines. ++Many of these workarounds are only maintenance burdens, some of them are pretty huge code-wise. With many workarounds ++cluttered throughout the code, it makes it harder for new programmers with an idea/interest for zlib to contribute. + +-So I decided to make a fork, merge all the Intel optimizations, merge +-the Cloudflare optimizations that did not conflict, plus a couple +-of other smaller patches. Then I started cleaning out workarounds, +-various dead code, all contrib and example code as there is little +-point in having those in this fork for various reasons. ++I decided to make a fork, merge all the Intel optimizations, some of the Cloudflare optimizations, plus a couple other ++smaller patches. Then started cleaning out workarounds, various dead code, all contrib and example code.
    ++The result is a better performing and easier to maintain zlib-ng. + +-A lot of improvements have gone into zlib-ng since its start, and +-numerous people and companies have contributed both small and big +-improvements, or valuable testing. +- +-Please read LICENSE.md, it is very simple and very liberal. ++A lot of improvements have gone into zlib-ng since its start, and numerous people and companies have contributed both ++small and big improvements, or valuable testing. + + + Build + ----- ++Please read LICENSE.md, it is very simple and very liberal. + + There are two ways to build zlib-ng: + +@@ -118,9 +102,11 @@ Build Options + | WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON | + | WITH_OPTIM | --without-optimizations | Build with optimisations | ON | + | WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON | +-| WITH_NATIVE_INSTRUCTIONS | --native | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF | +-| WITH_SANITIZER | --with-sanitizer | Build with sanitizer (memory, address, undefined) | OFF | +-| WITH_FUZZERS | --with-fuzzers | Build test/fuzz | OFF | ++| WITH_NATIVE_INSTRUCTIONS | | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF | ++| WITH_SANITIZER | | Build with sanitizer (memory, address, undefined) | OFF | ++| WITH_GTEST | | Build gtest_zlib | ON | ++| WITH_FUZZERS | | Build test/fuzz | OFF | ++| WITH_BENCHMARKS | | Build test/benchmarks | OFF | + | WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF | + | WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF | + +@@ -128,27 +114,24 @@ Build Options + Install + ------- + +-WARNING: We do not recommend manually installing unless you really +-know what you are doing, because this can potentially override the system +-default zlib library, and any incompatibility or wrong configuration of +-zlib-ng can make the whole system unusable, requiring recovery or reinstall. ++WARNING: We do not recommend manually installing unless you really know what you are doing, because this can ++potentially override the system default zlib library, and any incompatibility or wrong configuration of zlib-ng ++can make the whole system unusable, requiring recovery or reinstall. + If you still want a manual install, we recommend using the /opt/ path prefix. + +-For Linux distros, an alternative way to use zlib-ng (if compiled in +-zlib-compat mode) instead of zlib, is through the use of the +-_LD_PRELOAD_ environment variable. If the program is dynamically linked +-with zlib, then zlib-ng will temporarily be used instead by the program, +-without risking system-wide instability. ++For Linux distros, an alternative way to use zlib-ng (if compiled in zlib-compat mode) instead of zlib, is through ++the use of the _LD_PRELOAD_ environment variable. If the program is dynamically linked with zlib, then the program ++will temporarily attempt to use zlib-ng instead, without risking system-wide instability. + + ``` +-LD_PRELOAD=/opt/zlib-ng/libz.so.1.2.11.zlib-ng /usr/bin/program ++LD_PRELOAD=/opt/zlib-ng/libz.so.1.2.13.zlib-ng /usr/bin/program + ``` + + ### Cmake + + To install zlib-ng system-wide using cmake: + +-``` ++```sh or powershell + cmake --build . --target install + ``` + +@@ -156,35 +139,55 @@ cmake --build . --target install + + To install zlib-ng system-wide using the configure script: + +-``` ++```sh + make install + ``` + ++### CPack ++ ++After building with cmake, an installation package can be created using cpack. By default a tgz package is created, ++but you can append `-G ` to each command to generate alternative packages types (TGZ, ZIP, RPM, DEB). To easily ++create a rpm or deb package, you would use `-G RPM` or `-G DEB` respectively. ++ ++```sh or powershell ++cd build ++cpack --config CPackConfig.cmake ++cpack --config CPackSourceConfig.cmake ++``` ++ ++### Vcpkg ++ ++Alternatively, you can build and install zlib-ng using the [vcpkg](https://github.com/Microsoft/vcpkg/) dependency manager: ++ ++```sh or powershell ++git clone https://github.com/Microsoft/vcpkg.git ++cd vcpkg ++./bootstrap-vcpkg.sh # "./bootstrap-vcpkg.bat" for powershell ++./vcpkg integrate install ++./vcpkg install zlib-ng ++``` ++ ++The zlib-ng port in vcpkg is kept up to date by Microsoft team members and community contributors. ++If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. ++ + Contributing + ------------ + +-Zlib-ng is a aiming to be open to contributions, and we would be delighted to +-receive pull requests on github. +-Just remember that any code you submit must be your own and it must be zlib licensed. +-Help with testing and reviewing of pull requests etc is also very much appreciated. +- +-If you are interested in contributing, please consider joining our +-IRC channel #zlib-ng on the Freenode IRC network. ++Zlib-ng is aiming to be open to contributions, and we would be delighted to receive pull requests on github. ++Help with testing and reviewing pull requests etc is also very much appreciated. + ++Please check the Wiki for more info: [Contributing](https://github.com/zlib-ng/zlib-ng/wiki/Contributing) + + Acknowledgments + ---------------- + +-Thanks to Servebolt.com for sponsoring my maintainership of zlib-ng. +- + Thanks go out to all the people and companies who have taken the time to contribute + code reviews, testing and/or patches. Zlib-ng would not have been nearly as good without you. + +-The deflate format used by zlib was defined by Phil Katz. ++The deflate format used by zlib was defined by Phil Katz.
    + The deflate and zlib specifications were written by L. Peter Deutsch. + +-zlib was originally created by Jean-loup Gailly (compression) +-and Mark Adler (decompression). ++zlib was originally created by Jean-loup Gailly (compression) and Mark Adler (decompression). + + + Advanced Build Options +@@ -192,28 +195,35 @@ Advanced Build Options + + | CMake | configure | Description | Default | + |:--------------------------------|:----------------------|:--------------------------------------------------------------------|------------------------| +-| ZLIB_DUAL_LINK | | Dual link tests with system zlib | OFF | +-| UNALIGNED_OK | | Allow unaligned reads | ON (x86, arm) | +-| | --force-sse2 | Skip runtime check for SSE2 instructions (Always on for x86_64) | OFF (x86) | ++| FORCE_SSE2 | --force-sse2 | Skip runtime check for SSE2 instructions (Always on for x86_64) | OFF (x86) | + | WITH_AVX2 | | Build with AVX2 intrinsics | ON | ++| WITH_AVX512 | | Build with AVX512 intrinsics | ON | ++| WITH_AVX512VNNI | | Build with AVX512VNNI intrinsics | ON | + | WITH_SSE2 | | Build with SSE2 intrinsics | ON | +-| WITH_SSE4 | | Build with SSE4 intrinsics | ON | ++| WITH_SSSE3 | | Build with SSSE3 intrinsics | ON | ++| WITH_SSE42 | | Build with SSE42 intrinsics | ON | + | WITH_PCLMULQDQ | | Build with PCLMULQDQ intrinsics | ON | ++| WITH_VPCLMULQDQ | --without-vpclmulqdq | Build with VPCLMULQDQ intrinsics | ON | + | WITH_ACLE | --without-acle | Build with ACLE intrinsics | ON | + | WITH_NEON | --without-neon | Build with NEON intrinsics | ON | +-| WITH_POWER8 | | Build with POWER8 optimisations | ON | ++| WITH_ARMV6 | --without-armv6 | Build with ARMv6 intrinsics | ON | ++| WITH_ALTIVEC | --without-altivec | Build with AltiVec (VMX) intrinsics | ON | ++| WITH_POWER8 | --without-power8 | Build with POWER8 optimisations | ON | ++| WITH_RVV | | Build with RVV intrinsics | ON | ++| WITH_CRC32_VX | --without-crc32-vx | Build with vectorized CRC32 on IBM Z | ON | + | WITH_DFLTCC_DEFLATE | --with-dfltcc-deflate | Build with DFLTCC intrinsics for compression on IBM Z | OFF | + | WITH_DFLTCC_INFLATE | --with-dfltcc-inflate | Build with DFLTCC intrinsics for decompression on IBM Z | OFF | +-| WITH_UNALIGNED | | Allow optimizations that use unaligned reads if safe on current arch| ON | ++| WITH_UNALIGNED | --without-unaligned | Allow optimizations that use unaligned reads if safe on current arch| ON | + | WITH_INFLATE_STRICT | | Build with strict inflate distance checking | OFF | + | WITH_INFLATE_ALLOW_INVALID_DIST | | Build with zero fill for inflate invalid distances | OFF | + | INSTALL_UTILS | | Copy minigzip and minideflate during install | OFF | ++| ZLIBNG_ENABLE_TESTS | | Test zlib-ng specific API | ON | + + + Related Projects + ---------------- + +-* Fork of the popular minigzip https://github.com/zlib-ng/minizip-ng ++* Fork of the popular minizip https://github.com/zlib-ng/minizip-ng + * Python tool to benchmark minigzip/minideflate https://github.com/zlib-ng/deflatebench + * Python tool to benchmark pigz https://github.com/zlib-ng/pigzbench + * 3rd party patches for zlib-ng compatibility https://github.com/zlib-ng/patches +diff --git a/adler32.c b/adler32.c +index 7b245fc..95ac13c 100644 +--- a/adler32.c ++++ b/adler32.c +@@ -4,12 +4,11 @@ + */ + + #include "zbuild.h" +-#include "zutil.h" + #include "functable.h" + #include "adler32_p.h" + + /* ========================================================================= */ +-Z_INTERNAL uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) { ++Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) { + uint32_t sum2; + unsigned n; + +@@ -51,30 +50,7 @@ Z_INTERNAL uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t l + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ +- if (len) { /* avoid modulos if none remaining */ +-#ifdef UNROLL_MORE +- while (len >= 16) { +- len -= 16; +- DO16(adler, sum2, buf); +- buf += 16; +-#else +- while (len >= 8) { +- len -= 8; +- DO8(adler, sum2, buf, 0); +- buf += 8; +-#endif +- } +- while (len) { +- --len; +- adler += *buf++; +- sum2 += adler; +- } +- adler %= BASE; +- sum2 %= BASE; +- } +- +- /* return recombined sums */ +- return adler | (sum2 << 16); ++ return adler32_len_64(adler, buf, len, sum2); + } + + #ifdef ZLIB_COMPAT +diff --git a/adler32_fold.c b/adler32_fold.c +new file mode 100644 +index 0000000..e2f6f9a +--- /dev/null ++++ b/adler32_fold.c +@@ -0,0 +1,16 @@ ++/* adler32_fold.c -- adler32 folding interface ++ * Copyright (C) 2022 Adam Stylinski ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zbuild.h" ++#include "functable.h" ++#include "adler32_fold.h" ++ ++#include ++ ++Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) { ++ adler = functable.adler32(adler, src, len); ++ memcpy(dst, src, len); ++ return adler; ++} +diff --git a/adler32_fold.h b/adler32_fold.h +new file mode 100644 +index 0000000..20aa1c7 +--- /dev/null ++++ b/adler32_fold.h +@@ -0,0 +1,11 @@ ++/* adler32_fold.h -- adler32 folding interface ++ * Copyright (C) 2022 Adam Stylinski ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifndef ADLER32_FOLD_H_ ++#define ADLER32_FOLD_H_ ++ ++Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); ++ ++#endif +diff --git a/adler32_p.h b/adler32_p.h +index 7f75c71..38ba2ad 100644 +--- a/adler32_p.h ++++ b/adler32_p.h +@@ -18,33 +18,50 @@ + #define DO8(sum1, sum2, buf, i) {DO4(sum1, sum2, buf, i); DO4(sum1, sum2, buf, i+4);} + #define DO16(sum1, sum2, buf) {DO8(sum1, sum2, buf, 0); DO8(sum1, sum2, buf, 8);} + +-static inline uint32_t adler32_len_1(uint32_t adler, const unsigned char *buf, uint32_t sum2) { ++static inline uint32_t adler32_len_1(uint32_t adler, const uint8_t *buf, uint32_t sum2) { + adler += buf[0]; +- if (adler >= BASE) +- adler -= BASE; ++ adler %= BASE; + sum2 += adler; +- if (sum2 >= BASE) +- sum2 -= BASE; ++ sum2 %= BASE; + return adler | (sum2 << 16); + } + +-static inline uint32_t adler32_len_16(uint32_t adler, const unsigned char *buf, size_t len, uint32_t sum2) { ++static inline uint32_t adler32_len_16(uint32_t adler, const uint8_t *buf, size_t len, uint32_t sum2) { + while (len) { + --len; + adler += *buf++; + sum2 += adler; + } +- if (adler >= BASE) +- adler -= BASE; ++ adler %= BASE; + sum2 %= BASE; /* only added so many BASE's */ ++ /* return recombined sums */ + return adler | (sum2 << 16); + } + +-static inline uint32_t adler32_len_64(uint32_t adler, const unsigned char *buf, size_t len, uint32_t sum2) { ++static inline uint32_t adler32_copy_len_16(uint32_t adler, const uint8_t *buf, uint8_t *dst, size_t len, uint32_t sum2) { ++ while (len--) { ++ *dst = *buf++; ++ adler += *dst++; ++ sum2 += adler; ++ } ++ adler %= BASE; ++ sum2 %= BASE; /* only added so many BASE's */ ++ /* return recombined sums */ ++ return adler | (sum2 << 16); ++} ++ ++static inline uint32_t adler32_len_64(uint32_t adler, const uint8_t *buf, size_t len, uint32_t sum2) { ++#ifdef UNROLL_MORE + while (len >= 16) { + len -= 16; + DO16(adler, sum2, buf); + buf += 16; ++#else ++ while (len >= 8) { ++ len -= 8; ++ DO8(adler, sum2, buf, 0); ++ buf += 8; ++#endif + } + /* Process tail (len < 16). */ + return adler32_len_16(adler, buf, len, sum2); +diff --git a/arch/arm/Makefile.in b/arch/arm/Makefile.in +index d383ba0..9d05b00 100644 +--- a/arch/arm/Makefile.in ++++ b/arch/arm/Makefile.in +@@ -10,6 +10,7 @@ SUFFIX= + + ACLEFLAG= + NEONFLAG= ++ARMV6FLAG= + NOLTOFLAG= + + SRCDIR=. +@@ -18,10 +19,12 @@ TOPDIR=$(SRCTOP) + + all: \ + adler32_neon.o adler32_neon.lo \ +- armfeature.o armfeature.lo \ ++ arm_features.o arm_features.lo \ + chunkset_neon.o chunkset_neon.lo \ ++ compare256_neon.o compare256_neon.lo \ + crc32_acle.o crc32_acle.lo \ +- slide_neon.o slide_neon.lo \ ++ slide_hash_neon.o slide_hash_neon.lo \ ++ slide_hash_armv6.o slide_hash_armv6.lo \ + insert_string_acle.o insert_string_acle.lo + + adler32_neon.o: +@@ -30,11 +33,11 @@ adler32_neon.o: + adler32_neon.lo: + $(CC) $(SFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c + +-armfeature.o: +- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c ++arm_features.o: ++ $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/arm_features.c + +-armfeature.lo: +- $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c ++arm_features.lo: ++ $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/arm_features.c + + chunkset_neon.o: + $(CC) $(CFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_neon.c +@@ -42,17 +45,29 @@ chunkset_neon.o: + chunkset_neon.lo: + $(CC) $(SFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_neon.c + ++compare256_neon.o: ++ $(CC) $(CFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_neon.c ++ ++compare256_neon.lo: ++ $(CC) $(SFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_neon.c ++ + crc32_acle.o: + $(CC) $(CFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c + + crc32_acle.lo: + $(CC) $(SFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c + +-slide_neon.o: +- $(CC) $(CFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_neon.c ++slide_hash_neon.o: ++ $(CC) $(CFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_neon.c ++ ++slide_hash_neon.lo: ++ $(CC) $(SFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_neon.c ++ ++slide_hash_armv6.o: ++ $(CC) $(CFLAGS) $(ARMV6FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_armv6.c + +-slide_neon.lo: +- $(CC) $(SFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_neon.c ++slide_hash_armv6.lo: ++ $(CC) $(SFLAGS) $(ARMV6FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_armv6.c + + insert_string_acle.o: + $(CC) $(CFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c +@@ -66,5 +81,5 @@ clean: + rm -rf objs + rm -f *.gcda *.gcno *.gcov + +-distclean: ++distclean: clean + rm -f Makefile +diff --git a/arch/arm/acle_intrins.h b/arch/arm/acle_intrins.h +new file mode 100644 +index 0000000..531b8a7 +--- /dev/null ++++ b/arch/arm/acle_intrins.h +@@ -0,0 +1,35 @@ ++#ifndef ARM_ACLE_INTRINS_H ++#define ARM_ACLE_INTRINS_H ++ ++#include ++#ifdef _MSC_VER ++# include ++#elif defined(HAVE_ARM_ACLE_H) ++# include ++#endif ++ ++#ifdef ARM_ACLE ++#if defined(__aarch64__) ++# define Z_TARGET_CRC Z_TARGET("+crc") ++#else ++# define Z_TARGET_CRC ++#endif ++#endif ++ ++#ifdef ARM_SIMD ++#ifdef _MSC_VER ++typedef uint32_t uint16x2_t; ++ ++#define __uqsub16 _arm_uqsub16 ++#elif !defined(ARM_SIMD_INTRIN) ++typedef uint32_t uint16x2_t; ++ ++static inline uint16x2_t __uqsub16(uint16x2_t __a, uint16x2_t __b) { ++ uint16x2_t __c; ++ __asm__ __volatile__("uqsub16\t%0, %1, %2" : "=r" (__c) : "r"(__a), "r"(__b)); ++ return __c; ++} ++#endif ++#endif ++ ++#endif // include guard ARM_ACLE_INTRINS_H +diff --git a/arch/arm/adler32_neon.c b/arch/arm/adler32_neon.c +index adda6f6..f1c43ff 100644 +--- a/arch/arm/adler32_neon.c ++++ b/arch/arm/adler32_neon.c +@@ -1,61 +1,131 @@ + /* Copyright (C) 1995-2011, 2016 Mark Adler + * Copyright (C) 2017 ARM Holdings Inc. +- * Author: Adenilson Cavalcanti +- * ++ * Authors: ++ * Adenilson Cavalcanti ++ * Adam Stylinski + * For conditions of distribution and use, see copyright notice in zlib.h + */ +-#ifdef ARM_NEON_ADLER32 +-#ifdef _M_ARM64 +-# include +-#else +-# include +-#endif +-#include "../../zutil.h" ++#ifdef ARM_NEON ++#include "neon_intrins.h" ++#include "../../zbuild.h" + #include "../../adler32_p.h" + +-static void NEON_accum32(uint32_t *s, const unsigned char *buf, size_t len) { +- static const uint8_t taps[32] = { ++static void NEON_accum32(uint32_t *s, const uint8_t *buf, size_t len) { ++ static const uint16_t ALIGNED_(16) taps[64] = { ++ 64, 63, 62, 61, 60, 59, 58, 57, ++ 56, 55, 54, 53, 52, 51, 50, 49, ++ 48, 47, 46, 45, 44, 43, 42, 41, ++ 40, 39, 38, 37, 36, 35, 34, 33, + 32, 31, 30, 29, 28, 27, 26, 25, + 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, + 8, 7, 6, 5, 4, 3, 2, 1 }; + +- uint32x2_t adacc2, s2acc2, as; +- uint8x16_t t0 = vld1q_u8(taps), t1 = vld1q_u8(taps + 16); ++ uint32x4_t adacc = vdupq_n_u32(0); ++ uint32x4_t s2acc = vdupq_n_u32(0); ++ uint32x4_t s2acc_0 = vdupq_n_u32(0); ++ uint32x4_t s2acc_1 = vdupq_n_u32(0); ++ uint32x4_t s2acc_2 = vdupq_n_u32(0); + +- uint32x4_t adacc = vdupq_n_u32(0), s2acc = vdupq_n_u32(0); + adacc = vsetq_lane_u32(s[0], adacc, 0); + s2acc = vsetq_lane_u32(s[1], s2acc, 0); + +- while (len >= 2) { +- uint8x16_t d0 = vld1q_u8(buf), d1 = vld1q_u8(buf + 16); +- uint16x8_t adler, sum2; +- s2acc = vaddq_u32(s2acc, vshlq_n_u32(adacc, 5)); +- adler = vpaddlq_u8( d0); +- adler = vpadalq_u8(adler, d1); +- sum2 = vmull_u8( vget_low_u8(t0), vget_low_u8(d0)); +- sum2 = vmlal_u8(sum2, vget_high_u8(t0), vget_high_u8(d0)); +- sum2 = vmlal_u8(sum2, vget_low_u8(t1), vget_low_u8(d1)); +- sum2 = vmlal_u8(sum2, vget_high_u8(t1), vget_high_u8(d1)); +- adacc = vpadalq_u16(adacc, adler); +- s2acc = vpadalq_u16(s2acc, sum2); +- len -= 2; +- buf += 32; ++ uint32x4_t s3acc = vdupq_n_u32(0); ++ uint32x4_t adacc_prev = adacc; ++ ++ uint16x8_t s2_0, s2_1, s2_2, s2_3; ++ s2_0 = s2_1 = s2_2 = s2_3 = vdupq_n_u16(0); ++ ++ uint16x8_t s2_4, s2_5, s2_6, s2_7; ++ s2_4 = s2_5 = s2_6 = s2_7 = vdupq_n_u16(0); ++ ++ size_t num_iter = len >> 2; ++ int rem = len & 3; ++ ++ for (size_t i = 0; i < num_iter; ++i) { ++ uint8x16x4_t d0_d3 = vld1q_u8_x4(buf); ++ ++ /* Unfortunately it doesn't look like there's a direct sum 8 bit to 32 ++ * bit instruction, we'll have to make due summing to 16 bits first */ ++ uint16x8x2_t hsum, hsum_fold; ++ hsum.val[0] = vpaddlq_u8(d0_d3.val[0]); ++ hsum.val[1] = vpaddlq_u8(d0_d3.val[1]); ++ ++ hsum_fold.val[0] = vpadalq_u8(hsum.val[0], d0_d3.val[2]); ++ hsum_fold.val[1] = vpadalq_u8(hsum.val[1], d0_d3.val[3]); ++ ++ adacc = vpadalq_u16(adacc, hsum_fold.val[0]); ++ s3acc = vaddq_u32(s3acc, adacc_prev); ++ adacc = vpadalq_u16(adacc, hsum_fold.val[1]); ++ ++ /* If we do straight widening additions to the 16 bit values, we don't incur ++ * the usual penalties of a pairwise add. We can defer the multiplications ++ * until the very end. These will not overflow because we are incurring at ++ * most 408 loop iterations (NMAX / 64), and a given lane is only going to be ++ * summed into once. This means for the maximum input size, the largest value ++ * we will see is 255 * 102 = 26010, safely under uint16 max */ ++ s2_0 = vaddw_u8(s2_0, vget_low_u8(d0_d3.val[0])); ++ s2_1 = vaddw_high_u8(s2_1, d0_d3.val[0]); ++ s2_2 = vaddw_u8(s2_2, vget_low_u8(d0_d3.val[1])); ++ s2_3 = vaddw_high_u8(s2_3, d0_d3.val[1]); ++ s2_4 = vaddw_u8(s2_4, vget_low_u8(d0_d3.val[2])); ++ s2_5 = vaddw_high_u8(s2_5, d0_d3.val[2]); ++ s2_6 = vaddw_u8(s2_6, vget_low_u8(d0_d3.val[3])); ++ s2_7 = vaddw_high_u8(s2_7, d0_d3.val[3]); ++ ++ adacc_prev = adacc; ++ buf += 64; + } + +- while (len > 0) { +- uint8x16_t d0 = vld1q_u8(buf); +- uint16x8_t adler, sum2; +- s2acc = vaddq_u32(s2acc, vshlq_n_u32(adacc, 4)); +- adler = vpaddlq_u8(d0); +- sum2 = vmull_u8( vget_low_u8(t1), vget_low_u8(d0)); +- sum2 = vmlal_u8(sum2, vget_high_u8(t1), vget_high_u8(d0)); +- adacc = vpadalq_u16(adacc, adler); +- s2acc = vpadalq_u16(s2acc, sum2); +- buf += 16; +- len--; ++ s3acc = vshlq_n_u32(s3acc, 6); ++ ++ if (rem) { ++ uint32x4_t s3acc_0 = vdupq_n_u32(0); ++ while (rem--) { ++ uint8x16_t d0 = vld1q_u8(buf); ++ uint16x8_t adler; ++ adler = vpaddlq_u8(d0); ++ s2_6 = vaddw_u8(s2_6, vget_low_u8(d0)); ++ s2_7 = vaddw_high_u8(s2_7, d0); ++ adacc = vpadalq_u16(adacc, adler); ++ s3acc_0 = vaddq_u32(s3acc_0, adacc_prev); ++ adacc_prev = adacc; ++ buf += 16; ++ } ++ ++ s3acc_0 = vshlq_n_u32(s3acc_0, 4); ++ s3acc = vaddq_u32(s3acc_0, s3acc); + } + ++ uint16x8x4_t t0_t3 = vld1q_u16_x4(taps); ++ uint16x8x4_t t4_t7 = vld1q_u16_x4(taps + 32); ++ ++ s2acc = vmlal_high_u16(s2acc, t0_t3.val[0], s2_0); ++ s2acc_0 = vmlal_u16(s2acc_0, vget_low_u16(t0_t3.val[0]), vget_low_u16(s2_0)); ++ s2acc_1 = vmlal_high_u16(s2acc_1, t0_t3.val[1], s2_1); ++ s2acc_2 = vmlal_u16(s2acc_2, vget_low_u16(t0_t3.val[1]), vget_low_u16(s2_1)); ++ ++ s2acc = vmlal_high_u16(s2acc, t0_t3.val[2], s2_2); ++ s2acc_0 = vmlal_u16(s2acc_0, vget_low_u16(t0_t3.val[2]), vget_low_u16(s2_2)); ++ s2acc_1 = vmlal_high_u16(s2acc_1, t0_t3.val[3], s2_3); ++ s2acc_2 = vmlal_u16(s2acc_2, vget_low_u16(t0_t3.val[3]), vget_low_u16(s2_3)); ++ ++ s2acc = vmlal_high_u16(s2acc, t4_t7.val[0], s2_4); ++ s2acc_0 = vmlal_u16(s2acc_0, vget_low_u16(t4_t7.val[0]), vget_low_u16(s2_4)); ++ s2acc_1 = vmlal_high_u16(s2acc_1, t4_t7.val[1], s2_5); ++ s2acc_2 = vmlal_u16(s2acc_2, vget_low_u16(t4_t7.val[1]), vget_low_u16(s2_5)); ++ ++ s2acc = vmlal_high_u16(s2acc, t4_t7.val[2], s2_6); ++ s2acc_0 = vmlal_u16(s2acc_0, vget_low_u16(t4_t7.val[2]), vget_low_u16(s2_6)); ++ s2acc_1 = vmlal_high_u16(s2acc_1, t4_t7.val[3], s2_7); ++ s2acc_2 = vmlal_u16(s2acc_2, vget_low_u16(t4_t7.val[3]), vget_low_u16(s2_7)); ++ ++ s2acc = vaddq_u32(s2acc_0, s2acc); ++ s2acc_2 = vaddq_u32(s2acc_1, s2acc_2); ++ s2acc = vaddq_u32(s2acc, s2acc_2); ++ ++ uint32x2_t adacc2, s2acc2, as; ++ s2acc = vaddq_u32(s2acc, s3acc); + adacc2 = vpadd_u32(vget_low_u32(adacc), vget_high_u32(adacc)); + s2acc2 = vpadd_u32(vget_low_u32(s2acc), vget_high_u32(s2acc)); + as = vpadd_u32(adacc2, s2acc2); +@@ -63,7 +133,7 @@ static void NEON_accum32(uint32_t *s, const unsigned char *buf, size_t len) { + s[1] = vget_lane_u32(as, 1); + } + +-static void NEON_handle_tail(uint32_t *pair, const unsigned char *buf, size_t len) { ++static void NEON_handle_tail(uint32_t *pair, const uint8_t *buf, size_t len) { + unsigned int i; + for (i = 0; i < len; ++i) { + pair[0] += buf[i]; +@@ -71,7 +141,7 @@ static void NEON_handle_tail(uint32_t *pair, const unsigned char *buf, size_t le + } + } + +-uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) { ++Z_INTERNAL uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len) { + /* split Adler-32 into component sums */ + uint32_t sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; +@@ -91,7 +161,6 @@ uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) { + uint32_t pair[2]; + int n = NMAX; + unsigned int done = 0; +- unsigned int i; + + /* Split Adler-32 into component sums, it can be supplied by + * the caller sites (e.g. in a PNG file). +@@ -99,18 +168,37 @@ uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) { + pair[0] = adler; + pair[1] = sum2; + +- for (i = 0; i < len; i += n) { +- if ((i + n) > len) +- n = (int)(len - i); ++ /* If memory is not SIMD aligned, do scalar sums to an aligned ++ * offset, provided that doing so doesn't completely eliminate ++ * SIMD operation. Aligned loads are still faster on ARM, even ++ * though there's no explicit aligned load instruction */ ++ unsigned int align_offset = ((uintptr_t)buf & 15); ++ unsigned int align_adj = (align_offset) ? 16 - align_offset : 0; ++ ++ if (align_offset && len >= (16 + align_adj)) { ++ NEON_handle_tail(pair, buf, align_adj); ++ n -= align_adj; ++ done += align_adj; ++ ++ } else { ++ /* If here, we failed the len criteria test, it wouldn't be ++ * worthwhile to do scalar aligning sums */ ++ align_adj = 0; ++ } ++ ++ while (done < len) { ++ int remaining = (int)(len - done); ++ n = MIN(remaining, (done == align_adj) ? n : NMAX); + + if (n < 16) + break; + +- NEON_accum32(pair, buf + i, n / 16); ++ NEON_accum32(pair, buf + done, n >> 4); + pair[0] %= BASE; + pair[1] %= BASE; + +- done += (n / 16) * 16; ++ int actual_nsums = (n >> 4) << 4; ++ done += actual_nsums; + } + + /* Handle the tail elements. */ +@@ -123,4 +211,5 @@ uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) { + /* D = B * 65536 + A, see: https://en.wikipedia.org/wiki/Adler-32. */ + return (pair[1] << 16) | pair[0]; + } ++ + #endif +diff --git a/arch/arm/arm_features.c b/arch/arm/arm_features.c +new file mode 100644 +index 0000000..a0e070b +--- /dev/null ++++ b/arch/arm/arm_features.c +@@ -0,0 +1,100 @@ ++#include "../../zbuild.h" ++#include "arm_features.h" ++ ++#if defined(__linux__) && defined(HAVE_SYS_AUXV_H) ++# include ++# ifdef ARM_ASM_HWCAP ++# include ++# endif ++#elif defined(__FreeBSD__) && defined(__aarch64__) ++# include ++# ifndef ID_AA64ISAR0_CRC32_VAL ++# define ID_AA64ISAR0_CRC32_VAL ID_AA64ISAR0_CRC32 ++# endif ++#elif defined(__APPLE__) ++# if !defined(_DARWIN_C_SOURCE) ++# define _DARWIN_C_SOURCE /* enable types aliases (eg u_int) */ ++# endif ++# include ++#elif defined(_WIN32) ++# include ++#endif ++ ++static int arm_has_crc32() { ++#if defined(__linux__) && defined(ARM_AUXV_HAS_CRC32) ++# ifdef HWCAP_CRC32 ++ return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0 ? 1 : 0; ++# else ++ return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0 ? 1 : 0; ++# endif ++#elif defined(__FreeBSD__) && defined(__aarch64__) ++ return getenv("QEMU_EMULATING") == NULL ++ && ID_AA64ISAR0_CRC32_VAL(READ_SPECIALREG(id_aa64isar0_el1)) >= ID_AA64ISAR0_CRC32_BASE; ++#elif defined(__APPLE__) ++ int hascrc32; ++ size_t size = sizeof(hascrc32); ++ return sysctlbyname("hw.optional.armv8_crc32", &hascrc32, &size, NULL, 0) == 0 ++ && hascrc32 == 1; ++#elif defined(_WIN32) ++ return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE); ++#elif defined(ARM_NOCHECK_ACLE) ++ return 1; ++#else ++ return 0; ++#endif ++} ++ ++/* AArch64 has neon. */ ++#if !defined(__aarch64__) && !defined(_M_ARM64) && !defined(_M_ARM64EC) ++static inline int arm_has_neon() { ++#if defined(__linux__) && defined(ARM_AUXV_HAS_NEON) ++# ifdef HWCAP_ARM_NEON ++ return (getauxval(AT_HWCAP) & HWCAP_ARM_NEON) != 0 ? 1 : 0; ++# else ++ return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0 ? 1 : 0; ++# endif ++#elif defined(__APPLE__) ++ int hasneon; ++ size_t size = sizeof(hasneon); ++ return sysctlbyname("hw.optional.neon", &hasneon, &size, NULL, 0) == 0 ++ && hasneon == 1; ++#elif defined(_M_ARM) && defined(WINAPI_FAMILY_PARTITION) ++# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP) ++ return 1; /* Always supported */ ++# endif ++#endif ++ ++#if defined(ARM_NOCHECK_NEON) ++ return 1; ++#else ++ return 0; ++#endif ++} ++#endif ++ ++/* AArch64 does not have ARMv6 SIMD. */ ++#if !defined(__aarch64__) && !defined(_M_ARM64) && !defined(_M_ARM64EC) ++static inline int arm_has_simd() { ++#if defined(__linux__) && defined(HAVE_SYS_AUXV_H) ++ const char *platform = (const char *)getauxval(AT_PLATFORM); ++ return strncmp(platform, "v6l", 3) == 0 ++ || strncmp(platform, "v7l", 3) == 0 ++ || strncmp(platform, "v8l", 3) == 0; ++#elif defined(ARM_NOCHECK_SIMD) ++ return 1; ++#else ++ return 0; ++#endif ++} ++#endif ++ ++void Z_INTERNAL arm_check_features(struct arm_cpu_features *features) { ++#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) ++ features->has_simd = 0; /* never available */ ++ features->has_neon = 1; /* always available */ ++#else ++ features->has_simd = arm_has_simd(); ++ features->has_neon = arm_has_neon(); ++#endif ++ features->has_crc32 = arm_has_crc32(); ++} +diff --git a/arch/arm/arm_features.h b/arch/arm/arm_features.h +new file mode 100644 +index 0000000..eca078e +--- /dev/null ++++ b/arch/arm/arm_features.h +@@ -0,0 +1,16 @@ ++/* arm_features.h -- check for ARM features. ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifndef ARM_H_ ++#define ARM_H_ ++ ++struct arm_cpu_features { ++ int has_simd; ++ int has_neon; ++ int has_crc32; ++}; ++ ++void Z_INTERNAL arm_check_features(struct arm_cpu_features *features); ++ ++#endif /* ARM_H_ */ +diff --git a/arch/arm/chunkset_neon.c b/arch/arm/chunkset_neon.c +index e0ad3e0..f9a444b 100644 +--- a/arch/arm/chunkset_neon.c ++++ b/arch/arm/chunkset_neon.c +@@ -2,49 +2,56 @@ + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +-#ifdef ARM_NEON_CHUNKSET +-#ifdef _M_ARM64 +-# include +-#else +-# include +-#endif ++#ifdef ARM_NEON ++#include "neon_intrins.h" + #include "../../zbuild.h" +-#include "../../zutil.h" ++#include "../generic/chunk_permute_table.h" + + typedef uint8x16_t chunk_t; + + #define CHUNK_SIZE 16 + +-#define HAVE_CHUNKMEMSET_1 + #define HAVE_CHUNKMEMSET_2 + #define HAVE_CHUNKMEMSET_4 + #define HAVE_CHUNKMEMSET_8 ++#define HAVE_CHUNK_MAG + +-static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) { +- *chunk = vld1q_dup_u8(from); +-} ++static const lut_rem_pair perm_idx_lut[13] = { ++ {0, 1}, /* 3 */ ++ {0, 0}, /* don't care */ ++ {1 * 32, 1}, /* 5 */ ++ {2 * 32, 4}, /* 6 */ ++ {3 * 32, 2}, /* 7 */ ++ {0 * 32, 0}, /* don't care */ ++ {4 * 32, 7}, /* 9 */ ++ {5 * 32, 6}, /* 10 */ ++ {6 * 32, 5}, /* 11 */ ++ {7 * 32, 4}, /* 12 */ ++ {8 * 32, 3}, /* 13 */ ++ {9 * 32, 2}, /* 14 */ ++ {10 * 32, 1},/* 15 */ ++}; + + static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { + uint16_t tmp; +- memcpy(&tmp, from, 2); ++ memcpy(&tmp, from, sizeof(tmp)); + *chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp)); + } + + static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { + uint32_t tmp; +- memcpy(&tmp, from, 4); ++ memcpy(&tmp, from, sizeof(tmp)); + *chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp)); + } + + static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { + uint64_t tmp; +- memcpy(&tmp, from, 8); ++ memcpy(&tmp, from, sizeof(tmp)); + *chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp)); + } + + #define CHUNKSIZE chunksize_neon + #define CHUNKCOPY chunkcopy_neon +-#define CHUNKCOPY_SAFE chunkcopy_safe_neon + #define CHUNKUNROLL chunkunroll_neon + #define CHUNKMEMSET chunkmemset_neon + #define CHUNKMEMSET_SAFE chunkmemset_safe_neon +@@ -57,6 +64,36 @@ static inline void storechunk(uint8_t *out, chunk_t *chunk) { + vst1q_u8(out, *chunk); + } + ++static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { ++ lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; ++ *chunk_rem = lut_rem.remval; ++ ++ /* See note in chunkset_ssse3.c for why this is ok */ ++ __msan_unpoison(buf + dist, 16 - dist); ++ ++ /* This version of table is only available on aarch64 */ ++#if defined(_M_ARM64) || defined(_M_ARM64EC) || defined(__aarch64__) ++ uint8x16_t ret_vec = vld1q_u8(buf); ++ ++ uint8x16_t perm_vec = vld1q_u8(permute_table + lut_rem.idx); ++ return vqtbl1q_u8(ret_vec, perm_vec); ++#else ++ uint8x8_t ret0, ret1, a, b, perm_vec0, perm_vec1; ++ perm_vec0 = vld1_u8(permute_table + lut_rem.idx); ++ perm_vec1 = vld1_u8(permute_table + lut_rem.idx + 8); ++ a = vld1_u8(buf); ++ b = vld1_u8(buf + 8); ++ ret0 = vtbl1_u8(a, perm_vec0); ++ uint8x8x2_t ab = {{a, b}}; ++ ret1 = vtbl2_u8(ab, perm_vec1); ++ return vcombine_u8(ret0, ret1); ++#endif ++} ++ + #include "chunkset_tpl.h" + ++#define INFLATE_FAST inflate_fast_neon ++ ++#include "inffast_tpl.h" ++ + #endif +diff --git a/arch/arm/compare256_neon.c b/arch/arm/compare256_neon.c +new file mode 100644 +index 0000000..7daeba4 +--- /dev/null ++++ b/arch/arm/compare256_neon.c +@@ -0,0 +1,59 @@ ++/* compare256_neon.c - NEON version of compare256 ++ * Copyright (C) 2022 Nathan Moinvaziri ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "../../zbuild.h" ++ ++#include "fallback_builtins.h" ++ ++#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) ++#include "neon_intrins.h" ++ ++static inline uint32_t compare256_neon_static(const uint8_t *src0, const uint8_t *src1) { ++ uint32_t len = 0; ++ ++ do { ++ uint8x16_t a, b, cmp; ++ uint64_t lane; ++ ++ a = vld1q_u8(src0); ++ b = vld1q_u8(src1); ++ ++ cmp = veorq_u8(a, b); ++ ++ lane = vgetq_lane_u64(vreinterpretq_u64_u8(cmp), 0); ++ if (lane) { ++ uint32_t match_byte = (uint32_t)__builtin_ctzll(lane) / 8; ++ return len + match_byte; ++ } ++ len += 8; ++ lane = vgetq_lane_u64(vreinterpretq_u64_u8(cmp), 1); ++ if (lane) { ++ uint32_t match_byte = (uint32_t)__builtin_ctzll(lane) / 8; ++ return len + match_byte; ++ } ++ len += 8; ++ ++ src0 += 16, src1 += 16; ++ } while (len < 256); ++ ++ return 256; ++} ++ ++Z_INTERNAL uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1) { ++ return compare256_neon_static(src0, src1); ++} ++ ++#define LONGEST_MATCH longest_match_neon ++#define COMPARE256 compare256_neon_static ++ ++#include "match_tpl.h" ++ ++#define LONGEST_MATCH_SLOW ++#define LONGEST_MATCH longest_match_slow_neon ++#define COMPARE256 compare256_neon_static ++ ++#include "match_tpl.h" ++ ++#endif +diff --git a/arch/arm/crc32_acle.c b/arch/arm/crc32_acle.c +index 88ba6c3..ac7d6ff 100644 +--- a/arch/arm/crc32_acle.c ++++ b/arch/arm/crc32_acle.c +@@ -5,49 +5,48 @@ + * + */ + +-#ifdef ARM_ACLE_CRC_HASH +-#ifndef _MSC_VER +-# include +-#endif +-#include "../../zutil.h" ++#ifdef ARM_ACLE ++#include "acle_intrins.h" ++#include "../../zbuild.h" + +-uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) { ++Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len) { + Z_REGISTER uint32_t c; + Z_REGISTER const uint16_t *buf2; + Z_REGISTER const uint32_t *buf4; ++ Z_REGISTER const uint64_t *buf8; + + c = ~crc; +- if (len && ((ptrdiff_t)buf & 1)) { +- c = __crc32b(c, *buf++); +- len--; +- } +- +- if ((len > sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) { +- buf2 = (const uint16_t *) buf; +- c = __crc32h(c, *buf2++); +- len -= sizeof(uint16_t); +- buf4 = (const uint32_t *) buf2; +- } else { +- buf4 = (const uint32_t *) buf; +- } + +-#if defined(__aarch64__) +- if ((len > sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) { +- c = __crc32w(c, *buf4++); +- len -= sizeof(uint32_t); ++ if (UNLIKELY(len == 1)) { ++ c = __crc32b(c, *buf); ++ c = ~c; ++ return c; + } + +- const uint64_t *buf8 = (const uint64_t *) buf4; +- +-#ifdef UNROLL_MORE +- while (len >= 4 * sizeof(uint64_t)) { +- c = __crc32d(c, *buf8++); +- c = __crc32d(c, *buf8++); +- c = __crc32d(c, *buf8++); +- c = __crc32d(c, *buf8++); +- len -= 4 * sizeof(uint64_t); ++ if ((ptrdiff_t)buf & (sizeof(uint64_t) - 1)) { ++ if (len && ((ptrdiff_t)buf & 1)) { ++ c = __crc32b(c, *buf++); ++ len--; ++ } ++ ++ if ((len >= sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) { ++ buf2 = (const uint16_t *) buf; ++ c = __crc32h(c, *buf2++); ++ len -= sizeof(uint16_t); ++ buf4 = (const uint32_t *) buf2; ++ } else { ++ buf4 = (const uint32_t *) buf; ++ } ++ ++ if ((len >= sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) { ++ c = __crc32w(c, *buf4++); ++ len -= sizeof(uint32_t); ++ } ++ ++ buf8 = (const uint64_t *) buf4; ++ } else { ++ buf8 = (const uint64_t *) buf; + } +-#endif + + while (len >= sizeof(uint64_t)) { + c = __crc32d(c, *buf8++); +@@ -69,37 +68,6 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) { + } + + buf = (const unsigned char *) buf2; +-#else /* __aarch64__ */ +- +-# ifdef UNROLL_MORE +- while (len >= 8 * sizeof(uint32_t)) { +- c = __crc32w(c, *buf4++); +- c = __crc32w(c, *buf4++); +- c = __crc32w(c, *buf4++); +- c = __crc32w(c, *buf4++); +- c = __crc32w(c, *buf4++); +- c = __crc32w(c, *buf4++); +- c = __crc32w(c, *buf4++); +- c = __crc32w(c, *buf4++); +- len -= 8 * sizeof(uint32_t); +- } +-# endif +- +- while (len >= sizeof(uint32_t)) { +- c = __crc32w(c, *buf4++); +- len -= sizeof(uint32_t); +- } +- +- if (len >= sizeof(uint16_t)) { +- buf2 = (const uint16_t *) buf4; +- c = __crc32h(c, *buf2++); +- len -= sizeof(uint16_t); +- buf = (const unsigned char *) buf2; +- } else { +- buf = (const unsigned char *) buf4; +- } +-#endif /* __aarch64__ */ +- + if (len) { + c = __crc32b(c, *buf); + } +diff --git a/arch/arm/insert_string_acle.c b/arch/arm/insert_string_acle.c +index 2daf9ba..aa8385c 100644 +--- a/arch/arm/insert_string_acle.c ++++ b/arch/arm/insert_string_acle.c +@@ -1,22 +1,24 @@ +-/* insert_string_acle.c -- insert_string variant using ACLE's CRC instructions ++/* insert_string_acle.c -- insert_string integer hash variant using ACLE's CRC instructions + * + * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + */ + +-#ifdef ARM_ACLE_CRC_HASH +-#ifndef _MSC_VER +-# include +-#endif ++#ifdef ARM_ACLE ++#include "acle_intrins.h" + #include "../../zbuild.h" + #include "../../deflate.h" + +-#define UPDATE_HASH(s, h, val) \ ++#define HASH_CALC(s, h, val) \ + h = __crc32w(0, val) + +-#define INSERT_STRING insert_string_acle +-#define QUICK_INSERT_STRING quick_insert_string_acle ++#define HASH_CALC_VAR h ++#define HASH_CALC_VAR_INIT uint32_t h = 0 ++ ++#define UPDATE_HASH Z_TARGET_CRC update_hash_acle ++#define INSERT_STRING Z_TARGET_CRC insert_string_acle ++#define QUICK_INSERT_STRING Z_TARGET_CRC quick_insert_string_acle + + #include "../../insert_string_tpl.h" + #endif +diff --git a/arch/arm/neon_intrins.h b/arch/arm/neon_intrins.h +new file mode 100644 +index 0000000..51df77d +--- /dev/null ++++ b/arch/arm/neon_intrins.h +@@ -0,0 +1,58 @@ ++#ifndef ARM_NEON_INTRINS_H ++#define ARM_NEON_INTRINS_H ++ ++#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC)) ++/* arm64_neon.h is MSVC specific */ ++# include ++#else ++# include ++#endif ++ ++#if defined(ARM_NEON) && !defined(__aarch64__) && !defined(_M_ARM64) && !defined(_M_ARM64EC) ++/* Compatibility shim for the _high family of functions */ ++#define vmull_high_u8(a, b) vmull_u8(vget_high_u8(a), vget_high_u8(b)) ++#define vmlal_high_u8(a, b, c) vmlal_u8(a, vget_high_u8(b), vget_high_u8(c)) ++#define vmlal_high_u16(a, b, c) vmlal_u16(a, vget_high_u16(b), vget_high_u16(c)) ++#define vaddw_high_u8(a, b) vaddw_u8(a, vget_high_u8(b)) ++#endif ++ ++#ifdef ARM_NEON ++ ++#define vqsubq_u16_x4_x1(out, a, b) do { \ ++ out.val[0] = vqsubq_u16(a.val[0], b); \ ++ out.val[1] = vqsubq_u16(a.val[1], b); \ ++ out.val[2] = vqsubq_u16(a.val[2], b); \ ++ out.val[3] = vqsubq_u16(a.val[3], b); \ ++} while (0) ++ ++ ++# ifndef ARM_NEON_HASLD4 ++ ++static inline uint16x8x4_t vld1q_u16_x4(uint16_t const *a) { ++ uint16x8x4_t ret = (uint16x8x4_t) {{ ++ vld1q_u16(a), ++ vld1q_u16(a+8), ++ vld1q_u16(a+16), ++ vld1q_u16(a+24)}}; ++ return ret; ++} ++ ++static inline uint8x16x4_t vld1q_u8_x4(uint8_t const *a) { ++ uint8x16x4_t ret = (uint8x16x4_t) {{ ++ vld1q_u8(a), ++ vld1q_u8(a+16), ++ vld1q_u8(a+32), ++ vld1q_u8(a+48)}}; ++ return ret; ++} ++ ++static inline void vst1q_u16_x4(uint16_t *p, uint16x8x4_t a) { ++ vst1q_u16(p, a.val[0]); ++ vst1q_u16(p + 8, a.val[1]); ++ vst1q_u16(p + 16, a.val[2]); ++ vst1q_u16(p + 24, a.val[3]); ++} ++# endif // HASLD4 check ++#endif ++ ++#endif // include guard ARM_NEON_INTRINS_H +diff --git a/arch/arm/slide_hash_armv6.c b/arch/arm/slide_hash_armv6.c +new file mode 100644 +index 0000000..0a2eecc +--- /dev/null ++++ b/arch/arm/slide_hash_armv6.c +@@ -0,0 +1,47 @@ ++/* slide_hash_armv6.c -- Optimized hash table shifting for ARMv6 with support for SIMD instructions ++ * Copyright (C) 2023 Cameron Cawley ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#if defined(ARM_SIMD) ++#include "acle_intrins.h" ++#include "../../zbuild.h" ++#include "../../deflate.h" ++ ++/* SIMD version of hash_chain rebase */ ++static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { ++ Z_REGISTER uint16x2_t v; ++ uint16x2_t p0, p1, p2, p3; ++ Z_REGISTER size_t n; ++ ++ size_t size = entries*sizeof(table[0]); ++ Assert((size % (sizeof(uint16x2_t) * 4) == 0), "hash table size err"); ++ ++ Assert(sizeof(Pos) == 2, "Wrong Pos size"); ++ v = wsize | (wsize << 16); ++ ++ n = size / (sizeof(uint16x2_t) * 4); ++ do { ++ p0 = *((const uint16x2_t *)(table)); ++ p1 = *((const uint16x2_t *)(table+2)); ++ p2 = *((const uint16x2_t *)(table+4)); ++ p3 = *((const uint16x2_t *)(table+6)); ++ p0 = __uqsub16(p0, v); ++ p1 = __uqsub16(p1, v); ++ p2 = __uqsub16(p2, v); ++ p3 = __uqsub16(p3, v); ++ *((uint16x2_t *)(table)) = p0; ++ *((uint16x2_t *)(table+2)) = p1; ++ *((uint16x2_t *)(table+4)) = p2; ++ *((uint16x2_t *)(table+6)) = p3; ++ table += 8; ++ } while (--n); ++} ++ ++Z_INTERNAL void slide_hash_armv6(deflate_state *s) { ++ unsigned int wsize = s->w_size; ++ ++ slide_hash_chain(s->head, HASH_SIZE, wsize); ++ slide_hash_chain(s->prev, wsize, wsize); ++} ++#endif +diff --git a/arch/arm/slide_hash_neon.c b/arch/arm/slide_hash_neon.c +new file mode 100644 +index 0000000..a96ca11 +--- /dev/null ++++ b/arch/arm/slide_hash_neon.c +@@ -0,0 +1,46 @@ ++/* slide_hash_neon.c -- Optimized hash table shifting for ARM with support for NEON instructions ++ * Copyright (C) 2017-2020 Mika T. Lindqvist ++ * ++ * Authors: ++ * Mika T. Lindqvist ++ * Jun He ++ * ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifdef ARM_NEON ++#include "neon_intrins.h" ++#include "../../zbuild.h" ++#include "../../deflate.h" ++ ++/* SIMD version of hash_chain rebase */ ++static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { ++ Z_REGISTER uint16x8_t v; ++ uint16x8x4_t p0, p1; ++ Z_REGISTER size_t n; ++ ++ size_t size = entries*sizeof(table[0]); ++ Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err"); ++ ++ Assert(sizeof(Pos) == 2, "Wrong Pos size"); ++ v = vdupq_n_u16(wsize); ++ ++ n = size / (sizeof(uint16x8_t) * 8); ++ do { ++ p0 = vld1q_u16_x4(table); ++ p1 = vld1q_u16_x4(table+32); ++ vqsubq_u16_x4_x1(p0, p0, v); ++ vqsubq_u16_x4_x1(p1, p1, v); ++ vst1q_u16_x4(table, p0); ++ vst1q_u16_x4(table+32, p1); ++ table += 64; ++ } while (--n); ++} ++ ++Z_INTERNAL void slide_hash_neon(deflate_state *s) { ++ unsigned int wsize = s->w_size; ++ ++ slide_hash_chain(s->head, HASH_SIZE, wsize); ++ slide_hash_chain(s->prev, wsize, wsize); ++} ++#endif +diff --git a/arch/generic/Makefile.in b/arch/generic/Makefile.in +index be8c185..c717026 100644 +--- a/arch/generic/Makefile.in ++++ b/arch/generic/Makefile.in +@@ -19,3 +19,6 @@ clean: + rm -f *.o *.lo *~ \ + rm -rf objs + rm -f *.gcda *.gcno *.gcov ++ ++distclean: clean ++ rm -f Makefile +diff --git a/arch/generic/chunk_permute_table.h b/arch/generic/chunk_permute_table.h +new file mode 100644 +index 0000000..bad66cc +--- /dev/null ++++ b/arch/generic/chunk_permute_table.h +@@ -0,0 +1,53 @@ ++/* chunk_permute_table.h - shared AVX/SSSE3 permutation table for use with chunkmemset family of functions. ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifndef CHUNK_PERMUTE_TABLE_H_ ++#define CHUNK_PERMUTE_TABLE_H_ ++ ++#include "zbuild.h" ++ ++/* Need entries for all numbers not an even modulus for 1, 2, 4, 8, 16 & 32 */ ++static const ALIGNED_(32) uint8_t permute_table[26*32] = { ++ 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, /* dist 3 */ ++ 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, /* dist 5 */ ++ 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, /* dist 6 */ ++ 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, /* dist 7 */ ++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, /* dist 9 */ ++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, /* dist 10 */ ++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* dist 11 */ ++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, /* dist 12 */ ++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 2, 3, 4, 5, /* dist 13 */ ++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 1, 2, 3, /* dist 14 */ ++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 1, /* dist 15 */ ++ ++ /* Beyond dists of 15 means we have to permute from a vector > len(m128i). Because AVX couldn't permute ++ * beyond 128 bit lanes until AVX512 for sub 4-byte sequences, we have to do some math here for an eventual ++ * blend with a comparison. That means we need to wrap the indices with yet another derived table. For simplicity, ++ * we'll use absolute indexing here to derive a blend vector. This is actually a lot simpler with ARM's TBL, but, ++ * this is what we're dealt. ++ */ ++ ++ 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* dist 17 */ ++ 16, 17, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, /* dist 18 */ ++ 16, 17, 18, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, /* dist 19 */ ++ 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, /* dist 20 */ ++ 16, 17, 18, 19, 20, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, /* dist 21 */ ++ 16, 17, 18, 19, 20, 21, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* dist 22 */ ++ 16, 17, 18, 19, 20, 21, 22, 0, 1, 2, 3, 4, 5, 6, 7, 8, /* dist 23 */ ++ 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, /* dist 24 */ ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 0, 1, 2, 3, 4, 5, 6, /* dist 25 */ ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 1, 2, 3, 4, 5, /* dist 26 */ ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 0, 1, 2, 3, 4, /* dist 27 */ ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3, /* dist 28 */ ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 0, 1, 2, /* dist 29 */ ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 0, 1, /* dist 30 */ ++ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, /* dist 31 */ ++}; ++ ++typedef struct lut_rem_pair_s { ++ uint16_t idx; ++ uint16_t remval; ++} lut_rem_pair; ++ ++#endif +diff --git a/arch/power/Makefile.in b/arch/power/Makefile.in +index cf5839b..e2bec5e 100644 +--- a/arch/power/Makefile.in ++++ b/arch/power/Makefile.in +@@ -1,5 +1,6 @@ + # Makefile for POWER-specific files + # Copyright (C) 2020 Matheus Castanho , IBM ++# Copyright (C) 2021 Mika T. Lindqvist + # For conditions of distribution and use, see copyright notice in zlib.h + + CC= +@@ -9,24 +10,36 @@ INCLUDES= + SUFFIX= + + P8FLAGS=-mcpu=power8 ++P9FLAGS=-mcpu=power9 ++PPCFLAGS=-maltivec + NOLTOFLAG= + + SRCDIR=. + SRCTOP=../.. + TOPDIR=$(SRCTOP) + +-all: power.o \ +- power.lo \ ++all: power_features.o \ ++ power_features.lo \ + adler32_power8.o \ + adler32_power8.lo \ ++ adler32_vmx.o \ ++ adler32_vmx.lo \ ++ chunkset_power8.o \ ++ chunkset_power8.lo \ ++ compare256_power9.o \ ++ compare256_power9.lo \ ++ crc32_power8.o \ ++ crc32_power8.lo \ + slide_hash_power8.o \ +- slide_hash_power8.lo ++ slide_hash_power8.lo \ ++ slide_hash_vmx.o \ ++ slide_hash_vmx.lo + +-power.o: +- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c ++power_features.o: ++ $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power_features.c + +-power.lo: +- $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c ++power_features.lo: ++ $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power_features.c + + adler32_power8.o: + $(CC) $(CFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_power8.c +@@ -34,17 +47,47 @@ adler32_power8.o: + adler32_power8.lo: + $(CC) $(SFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_power8.c + ++adler32_vmx.o: ++ $(CC) $(CFLAGS) $(PPCFLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_vmx.c ++ ++adler32_vmx.lo: ++ $(CC) $(SFLAGS) $(PPCFLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_vmx.c ++ ++chunkset_power8.o: ++ $(CC) $(CFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_power8.c ++ ++chunkset_power8.lo: ++ $(CC) $(SFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_power8.c ++ ++compare256_power9.o: ++ $(CC) $(CFLAGS) $(P9FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_power9.c ++ ++compare256_power9.lo: ++ $(CC) $(SFLAGS) $(P9FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_power9.c ++ ++crc32_power8.o: ++ $(CC) $(CFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_power8.c ++ ++crc32_power8.lo: ++ $(CC) $(SFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_power8.c ++ + slide_hash_power8.o: + $(CC) $(CFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_power8.c + + slide_hash_power8.lo: + $(CC) $(SFLAGS) $(P8FLAGS) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_power8.c + ++slide_hash_vmx.o: ++ $(CC) $(CFLAGS) ${PPCFLAGS} $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_vmx.c ++ ++slide_hash_vmx.lo: ++ $(CC) $(SFLAGS) ${PPCFLAGS} $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_vmx.c ++ + mostlyclean: clean + clean: + rm -f *.o *.lo *~ + rm -rf objs + rm -f *.gcda *.gcno *.gcov + +-distclean: ++distclean: clean + rm -f Makefile +diff --git a/arch/power/adler32_power8.c b/arch/power/adler32_power8.c +index 3d00f2f..4aaea9f 100644 +--- a/arch/power/adler32_power8.c ++++ b/arch/power/adler32_power8.c +@@ -36,11 +36,10 @@ + * https://www.ietf.org/rfc/rfc1950.txt + */ + +-#ifdef POWER8_VSX_ADLER32 ++#ifdef POWER8_VSX + + #include + #include "zbuild.h" +-#include "zutil.h" + #include "adler32_p.h" + + /* Vector across sum unsigned int (saturate). */ +@@ -53,7 +52,7 @@ static inline vector unsigned int vec_sumsu(vector unsigned int __a, vector unsi + return __a; + } + +-uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, size_t len) { ++Z_INTERNAL uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len) { + uint32_t s1 = adler & 0xffff; + uint32_t s2 = (adler >> 16) & 0xffff; + +@@ -147,8 +146,8 @@ uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, size_t len) { + s1 = vs1[0] % BASE; + s2 = vs2[0] % BASE; + +- /* Process tail (len < 16).and return */ ++ /* Process tail (len < 16). */ + return adler32_len_16(s1, buf, len, s2); + } + +-#endif /* POWER8_VSX_ADLER32 */ ++#endif /* POWER8_VSX */ +diff --git a/arch/power/adler32_vmx.c b/arch/power/adler32_vmx.c +new file mode 100644 +index 0000000..3470c28 +--- /dev/null ++++ b/arch/power/adler32_vmx.c +@@ -0,0 +1,186 @@ ++/* adler32_vmx.c -- compute the Adler-32 checksum of a data stream ++ * Copyright (C) 1995-2011 Mark Adler ++ * Copyright (C) 2017-2023 Mika T. Lindqvist ++ * Copyright (C) 2021 Adam Stylinski ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifdef PPC_VMX ++#include ++#include "zbuild.h" ++#include "zendian.h" ++#include "adler32_p.h" ++ ++#define vmx_zero() (vec_splat_u32(0)) ++ ++static inline void vmx_handle_head_or_tail(uint32_t *pair, const uint8_t *buf, size_t len) { ++ unsigned int i; ++ for (i = 0; i < len; ++i) { ++ pair[0] += buf[i]; ++ pair[1] += pair[0]; ++ } ++} ++ ++static void vmx_accum32(uint32_t *s, const uint8_t *buf, size_t len) { ++ /* Different taps for the separable components of sums */ ++ const vector unsigned char t0 = {64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49}; ++ const vector unsigned char t1 = {48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33}; ++ const vector unsigned char t2 = {32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17}; ++ const vector unsigned char t3 = {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; ++ /* As silly and inefficient as it seems, creating 1 permutation vector to permute ++ * a 2 element vector from a single load + a subsequent shift is just barely faster ++ * than doing 2 indexed insertions into zero initialized vectors from unaligned memory. */ ++ const vector unsigned char s0_perm = {0, 1, 2, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}; ++ const vector unsigned char shift_vec = vec_sl(vec_splat_u8(8), vec_splat_u8(2)); ++ vector unsigned int adacc, s2acc; ++ vector unsigned int pair_vec = vec_ld(0, s); ++ adacc = vec_perm(pair_vec, pair_vec, s0_perm); ++#if BYTE_ORDER == LITTLE_ENDIAN ++ s2acc = vec_sro(pair_vec, shift_vec); ++#else ++ s2acc = vec_slo(pair_vec, shift_vec); ++#endif ++ ++ vector unsigned int zero = vmx_zero(); ++ vector unsigned int s3acc = zero; ++ vector unsigned int s3acc_0 = zero; ++ vector unsigned int adacc_prev = adacc; ++ vector unsigned int adacc_prev_0 = zero; ++ ++ vector unsigned int s2acc_0 = zero; ++ vector unsigned int s2acc_1 = zero; ++ vector unsigned int s2acc_2 = zero; ++ ++ /* Maintain a running sum of a second half, this might help use break yet another ++ * data dependency bubble in the sum */ ++ vector unsigned int adacc_0 = zero; ++ ++ int num_iter = len / 4; ++ int rem = len & 3; ++ ++ for (int i = 0; i < num_iter; ++i) { ++ vector unsigned char d0 = vec_ld(0, buf); ++ vector unsigned char d1 = vec_ld(16, buf); ++ vector unsigned char d2 = vec_ld(32, buf); ++ vector unsigned char d3 = vec_ld(48, buf); ++ ++ /* The core operation of the loop, basically ++ * what is being unrolled below */ ++ adacc = vec_sum4s(d0, adacc); ++ s3acc = vec_add(s3acc, adacc_prev); ++ s3acc_0 = vec_add(s3acc_0, adacc_prev_0); ++ s2acc = vec_msum(t0, d0, s2acc); ++ ++ /* interleave dependent sums in here */ ++ adacc_0 = vec_sum4s(d1, adacc_0); ++ s2acc_0 = vec_msum(t1, d1, s2acc_0); ++ adacc = vec_sum4s(d2, adacc); ++ s2acc_1 = vec_msum(t2, d2, s2acc_1); ++ s2acc_2 = vec_msum(t3, d3, s2acc_2); ++ adacc_0 = vec_sum4s(d3, adacc_0); ++ ++ adacc_prev = adacc; ++ adacc_prev_0 = adacc_0; ++ buf += 64; ++ } ++ ++ adacc = vec_add(adacc, adacc_0); ++ s3acc = vec_add(s3acc, s3acc_0); ++ s3acc = vec_sl(s3acc, vec_splat_u32(6)); ++ ++ if (rem) { ++ adacc_prev = vec_add(adacc_prev_0, adacc_prev); ++ adacc_prev = vec_sl(adacc_prev, vec_splat_u32(4)); ++ while (rem--) { ++ vector unsigned char d0 = vec_ld(0, buf); ++ adacc = vec_sum4s(d0, adacc); ++ s3acc = vec_add(s3acc, adacc_prev); ++ s2acc = vec_msum(t3, d0, s2acc); ++ adacc_prev = vec_sl(adacc, vec_splat_u32(4)); ++ buf += 16; ++ } ++ } ++ ++ ++ /* Sum up independent second sums */ ++ s2acc = vec_add(s2acc, s2acc_0); ++ s2acc_2 = vec_add(s2acc_1, s2acc_2); ++ s2acc = vec_add(s2acc, s2acc_2); ++ ++ s2acc = vec_add(s2acc, s3acc); ++ ++ adacc = vec_add(adacc, vec_sld(adacc, adacc, 8)); ++ s2acc = vec_add(s2acc, vec_sld(s2acc, s2acc, 8)); ++ adacc = vec_add(adacc, vec_sld(adacc, adacc, 4)); ++ s2acc = vec_add(s2acc, vec_sld(s2acc, s2acc, 4)); ++ ++ vec_ste(adacc, 0, s); ++ vec_ste(s2acc, 0, s+1); ++} ++ ++Z_INTERNAL uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len) { ++ uint32_t sum2; ++ uint32_t pair[16] ALIGNED_(16); ++ memset(&pair[2], 0, 14); ++ int n = NMAX; ++ unsigned int done = 0, i; ++ ++ /* Split Adler-32 into component sums, it can be supplied by ++ * the caller sites (e.g. in a PNG file). ++ */ ++ sum2 = (adler >> 16) & 0xffff; ++ adler &= 0xffff; ++ pair[0] = adler; ++ pair[1] = sum2; ++ ++ /* in case user likes doing a byte at a time, keep it fast */ ++ if (UNLIKELY(len == 1)) ++ return adler32_len_1(adler, buf, sum2); ++ ++ /* initial Adler-32 value (deferred check for len == 1 speed) */ ++ if (UNLIKELY(buf == NULL)) ++ return 1L; ++ ++ /* in case short lengths are provided, keep it somewhat fast */ ++ if (UNLIKELY(len < 16)) ++ return adler32_len_16(adler, buf, len, sum2); ++ ++ // Align buffer ++ unsigned int al = 0; ++ if ((uintptr_t)buf & 0xf) { ++ al = 16-((uintptr_t)buf & 0xf); ++ if (al > len) { ++ al=len; ++ } ++ vmx_handle_head_or_tail(pair, buf, al); ++ ++ done += al; ++ /* Rather than rebasing, we can reduce the max sums for the ++ * first round only */ ++ n -= al; ++ } ++ for (i = al; i < len; i += n) { ++ int remaining = (int)(len-i); ++ n = MIN(remaining, (i == al) ? n : NMAX); ++ ++ if (n < 16) ++ break; ++ ++ vmx_accum32(pair, buf + i, n / 16); ++ pair[0] %= BASE; ++ pair[1] %= BASE; ++ ++ done += (n / 16) * 16; ++ } ++ ++ /* Handle the tail elements. */ ++ if (done < len) { ++ vmx_handle_head_or_tail(pair, (buf + done), len - done); ++ pair[0] %= BASE; ++ pair[1] %= BASE; ++ } ++ ++ /* D = B * 65536 + A, see: https://en.wikipedia.org/wiki/Adler-32. */ ++ return (pair[1] << 16) | pair[0]; ++} ++#endif +diff --git a/arch/power/chunkset_power8.c b/arch/power/chunkset_power8.c +new file mode 100644 +index 0000000..7cbb802 +--- /dev/null ++++ b/arch/power/chunkset_power8.c +@@ -0,0 +1,55 @@ ++/* chunkset_power8.c -- VSX inline functions to copy small data chunks. ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifdef POWER8_VSX ++#include ++#include "../../zbuild.h" ++ ++typedef vector unsigned char chunk_t; ++ ++#define CHUNK_SIZE 16 ++ ++#define HAVE_CHUNKMEMSET_2 ++#define HAVE_CHUNKMEMSET_4 ++#define HAVE_CHUNKMEMSET_8 ++ ++static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { ++ uint16_t tmp; ++ memcpy(&tmp, from, sizeof(tmp)); ++ *chunk = (vector unsigned char)vec_splats(tmp); ++} ++ ++static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { ++ uint32_t tmp; ++ memcpy(&tmp, from, sizeof(tmp)); ++ *chunk = (vector unsigned char)vec_splats(tmp); ++} ++ ++static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { ++ uint64_t tmp; ++ memcpy(&tmp, from, sizeof(tmp)); ++ *chunk = (vector unsigned char)vec_splats((unsigned long long)tmp); ++} ++ ++static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { ++ *chunk = vec_xl(0, s); ++} ++ ++static inline void storechunk(uint8_t *out, chunk_t *chunk) { ++ vec_xst(*chunk, 0, out); ++} ++ ++#define CHUNKSIZE chunksize_power8 ++#define CHUNKCOPY chunkcopy_power8 ++#define CHUNKUNROLL chunkunroll_power8 ++#define CHUNKMEMSET chunkmemset_power8 ++#define CHUNKMEMSET_SAFE chunkmemset_safe_power8 ++ ++#include "chunkset_tpl.h" ++ ++#define INFLATE_FAST inflate_fast_power8 ++ ++#include "inffast_tpl.h" ++ ++#endif +diff --git a/arch/power/compare256_power9.c b/arch/power/compare256_power9.c +new file mode 100644 +index 0000000..9b0ddaf +--- /dev/null ++++ b/arch/power/compare256_power9.c +@@ -0,0 +1,64 @@ ++/* compare256_power9.c - Power9 version of compare256 ++ * Copyright (C) 2019 Matheus Castanho , IBM ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifdef POWER9 ++#include ++#include "../../zbuild.h" ++#include "../../zendian.h" ++ ++/* Older versions of GCC misimplemented semantics for these bit counting builtins. ++ * https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3f30f2d1dbb3228b8468b26239fe60c2974ce2ac */ ++#if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ < 12) ++#if BYTE_ORDER == LITTLE_ENDIAN ++# define zng_vec_vctzlsbb(vc, len) len = __builtin_vec_vctzlsbb(vc) ++#else ++# define zng_vec_vctzlsbb(vc, len) len = __builtin_vec_vclzlsbb(vc) ++#endif ++#else ++# define zng_vec_vctzlsbb(vc, len) len = vec_cntlz_lsbb(vc) ++#endif ++ ++static inline uint32_t compare256_power9_static(const uint8_t *src0, const uint8_t *src1) { ++ uint32_t len = 0, cmplen; ++ ++ do { ++ vector unsigned char vsrc0, vsrc1, vc; ++ ++ vsrc0 = *((vector unsigned char *)src0); ++ vsrc1 = *((vector unsigned char *)src1); ++ ++ /* Compare 16 bytes at a time. Each byte of vc will be either ++ * all ones or all zeroes, depending on the result of the comparison. */ ++ vc = (vector unsigned char)vec_cmpne(vsrc0, vsrc1); ++ ++ /* Since the index of matching bytes will contain only zeroes ++ * on vc (since we used cmpne), counting the number of consecutive ++ * bytes where LSB == 0 is the same as counting the length of the match. */ ++ zng_vec_vctzlsbb(vc, cmplen); ++ if (cmplen != 16) ++ return len + cmplen; ++ ++ src0 += 16, src1 += 16, len += 16; ++ } while (len < 256); ++ ++ return 256; ++} ++ ++Z_INTERNAL uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1) { ++ return compare256_power9_static(src0, src1); ++} ++ ++#define LONGEST_MATCH longest_match_power9 ++#define COMPARE256 compare256_power9_static ++ ++#include "match_tpl.h" ++ ++#define LONGEST_MATCH_SLOW ++#define LONGEST_MATCH longest_match_slow_power9 ++#define COMPARE256 compare256_power9_static ++ ++#include "match_tpl.h" ++ ++#endif +diff --git a/arch/power/crc32_constants.h b/arch/power/crc32_constants.h +new file mode 100644 +index 0000000..8c8f215 +--- /dev/null ++++ b/arch/power/crc32_constants.h +@@ -0,0 +1,1123 @@ ++/* Constants table used by crc32_power8.c ++ * Copyright (C) 2021 IBM Corporation ++ * ++ * This file was automatically generated, DO NOT EDIT IT MANUALLY. ++ * ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zendian.h" ++#include "zbuild.h" ++ ++/* Reduce 262144 kbits to 1024 bits */ ++static const __vector unsigned long long vcrc_const[255] ALIGNED_(16) = { ++#if BYTE_ORDER == LITTLE_ENDIAN ++ /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ ++ { 0x0000000099ea94a8, 0x00000001651797d2 }, ++ /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ ++ { 0x00000000945a8420, 0x0000000021e0d56c }, ++ /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ ++ { 0x0000000030762706, 0x000000000f95ecaa }, ++ /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ ++ { 0x00000001a52fc582, 0x00000001ebd224ac }, ++ /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ ++ { 0x00000001a4a7167a, 0x000000000ccb97ca }, ++ /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ ++ { 0x000000000c18249a, 0x00000001006ec8a8 }, ++ /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ ++ { 0x00000000a924ae7c, 0x000000014f58f196 }, ++ /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ ++ { 0x00000001e12ccc12, 0x00000001a7192ca6 }, ++ /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ ++ { 0x00000000a0b9d4ac, 0x000000019a64bab2 }, ++ /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ ++ { 0x0000000095e8ddfe, 0x0000000014f4ed2e }, ++ /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ ++ { 0x00000000233fddc4, 0x000000011092b6a2 }, ++ /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ ++ { 0x00000001b4529b62, 0x00000000c8a1629c }, ++ /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ ++ { 0x00000001a7fa0e64, 0x000000017bf32e8e }, ++ /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ ++ { 0x00000001b5334592, 0x00000001f8cc6582 }, ++ /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ ++ { 0x000000011f8ee1b4, 0x000000008631ddf0 }, ++ /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ ++ { 0x000000006252e632, 0x000000007e5a76d0 }, ++ /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ ++ { 0x00000000ab973e84, 0x000000002b09b31c }, ++ /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ ++ { 0x000000007734f5ec, 0x00000001b2df1f84 }, ++ /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ ++ { 0x000000007c547798, 0x00000001d6f56afc }, ++ /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ ++ { 0x000000007ec40210, 0x00000001b9b5e70c }, ++ /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ ++ { 0x00000001ab1695a8, 0x0000000034b626d2 }, ++ /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ ++ { 0x0000000090494bba, 0x000000014c53479a }, ++ /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ ++ { 0x00000001123fb816, 0x00000001a6d179a4 }, ++ /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ ++ { 0x00000001e188c74c, 0x000000015abd16b4 }, ++ /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ ++ { 0x00000001c2d3451c, 0x00000000018f9852 }, ++ /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ ++ { 0x00000000f55cf1ca, 0x000000001fb3084a }, ++ /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ ++ { 0x00000001a0531540, 0x00000000c53dfb04 }, ++ /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ ++ { 0x0000000132cd7ebc, 0x00000000e10c9ad6 }, ++ /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ ++ { 0x0000000073ab7f36, 0x0000000025aa994a }, ++ /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ ++ { 0x0000000041aed1c2, 0x00000000fa3a74c4 }, ++ /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ ++ { 0x0000000136c53800, 0x0000000033eb3f40 }, ++ /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ ++ { 0x0000000126835a30, 0x000000017193f296 }, ++ /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ ++ { 0x000000006241b502, 0x0000000043f6c86a }, ++ /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ ++ { 0x00000000d5196ad4, 0x000000016b513ec6 }, ++ /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ ++ { 0x000000009cfa769a, 0x00000000c8f25b4e }, ++ /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ ++ { 0x00000000920e5df4, 0x00000001a45048ec }, ++ /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ ++ { 0x0000000169dc310e, 0x000000000c441004 }, ++ /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ ++ { 0x0000000009fc331c, 0x000000000e17cad6 }, ++ /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ ++ { 0x000000010d94a81e, 0x00000001253ae964 }, ++ /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ ++ { 0x0000000027a20ab2, 0x00000001d7c88ebc }, ++ /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ ++ { 0x0000000114f87504, 0x00000001e7ca913a }, ++ /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ ++ { 0x000000004b076d96, 0x0000000033ed078a }, ++ /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ ++ { 0x00000000da4d1e74, 0x00000000e1839c78 }, ++ /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ ++ { 0x000000001b81f672, 0x00000001322b267e }, ++ /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ ++ { 0x000000009367c988, 0x00000000638231b6 }, ++ /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ ++ { 0x00000001717214ca, 0x00000001ee7f16f4 }, ++ /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ ++ { 0x000000009f47d820, 0x0000000117d9924a }, ++ /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ ++ { 0x000000010d9a47d2, 0x00000000e1a9e0c4 }, ++ /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ ++ { 0x00000000a696c58c, 0x00000001403731dc }, ++ /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ ++ { 0x000000002aa28ec6, 0x00000001a5ea9682 }, ++ /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ ++ { 0x00000001fe18fd9a, 0x0000000101c5c578 }, ++ /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ ++ { 0x000000019d4fc1ae, 0x00000000dddf6494 }, ++ /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ ++ { 0x00000001ba0e3dea, 0x00000000f1c3db28 }, ++ /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ ++ { 0x0000000074b59a5e, 0x000000013112fb9c }, ++ /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ ++ { 0x00000000f2b5ea98, 0x00000000b680b906 }, ++ /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ ++ { 0x0000000187132676, 0x000000001a282932 }, ++ /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ ++ { 0x000000010a8c6ad4, 0x0000000089406e7e }, ++ /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ ++ { 0x00000001e21dfe70, 0x00000001def6be8c }, ++ /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ ++ { 0x00000001da0050e4, 0x0000000075258728 }, ++ /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ ++ { 0x00000000772172ae, 0x000000019536090a }, ++ /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ ++ { 0x00000000e47724aa, 0x00000000f2455bfc }, ++ /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ ++ { 0x000000003cd63ac4, 0x000000018c40baf4 }, ++ /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ ++ { 0x00000001bf47d352, 0x000000004cd390d4 }, ++ /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ ++ { 0x000000018dc1d708, 0x00000001e4ece95a }, ++ /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ ++ { 0x000000002d4620a4, 0x000000001a3ee918 }, ++ /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ ++ { 0x0000000058fd1740, 0x000000007c652fb8 }, ++ /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ ++ { 0x00000000dadd9bfc, 0x000000011c67842c }, ++ /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ ++ { 0x00000001ea2140be, 0x00000000254f759c }, ++ /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ ++ { 0x000000009de128ba, 0x000000007ece94ca }, ++ /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ ++ { 0x000000013ac3aa8e, 0x0000000038f258c2 }, ++ /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ ++ { 0x0000000099980562, 0x00000001cdf17b00 }, ++ /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ ++ { 0x00000001c1579c86, 0x000000011f882c16 }, ++ /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ ++ { 0x0000000068dbbf94, 0x0000000100093fc8 }, ++ /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ ++ { 0x000000004509fb04, 0x00000001cd684f16 }, ++ /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ ++ { 0x00000001202f6398, 0x000000004bc6a70a }, ++ /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ ++ { 0x000000013aea243e, 0x000000004fc7e8e4 }, ++ /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ ++ { 0x00000001b4052ae6, 0x0000000130103f1c }, ++ /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ ++ { 0x00000001cd2a0ae8, 0x0000000111b0024c }, ++ /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ ++ { 0x00000001fe4aa8b4, 0x000000010b3079da }, ++ /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ ++ { 0x00000001d1559a42, 0x000000010192bcc2 }, ++ /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ ++ { 0x00000001f3e05ecc, 0x0000000074838d50 }, ++ /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ ++ { 0x0000000104ddd2cc, 0x000000001b20f520 }, ++ /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ ++ { 0x000000015393153c, 0x0000000050c3590a }, ++ /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ ++ { 0x0000000057e942c6, 0x00000000b41cac8e }, ++ /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ ++ { 0x000000012c633850, 0x000000000c72cc78 }, ++ /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ ++ { 0x00000000ebcaae4c, 0x0000000030cdb032 }, ++ /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ ++ { 0x000000013ee532a6, 0x000000013e09fc32 }, ++ /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ ++ { 0x00000001bf0cbc7e, 0x000000001ed624d2 }, ++ /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ ++ { 0x00000000d50b7a5a, 0x00000000781aee1a }, ++ /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ ++ { 0x0000000002fca6e8, 0x00000001c4d8348c }, ++ /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ ++ { 0x000000007af40044, 0x0000000057a40336 }, ++ /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ ++ { 0x0000000016178744, 0x0000000085544940 }, ++ /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ ++ { 0x000000014c177458, 0x000000019cd21e80 }, ++ /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ ++ { 0x000000011b6ddf04, 0x000000013eb95bc0 }, ++ /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ ++ { 0x00000001f3e29ccc, 0x00000001dfc9fdfc }, ++ /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ ++ { 0x0000000135ae7562, 0x00000000cd028bc2 }, ++ /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ ++ { 0x0000000190ef812c, 0x0000000090db8c44 }, ++ /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ ++ { 0x0000000067a2c786, 0x000000010010a4ce }, ++ /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ ++ { 0x0000000048b9496c, 0x00000001c8f4c72c }, ++ /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ ++ { 0x000000015a422de6, 0x000000001c26170c }, ++ /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ ++ { 0x00000001ef0e3640, 0x00000000e3fccf68 }, ++ /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ ++ { 0x00000001006d2d26, 0x00000000d513ed24 }, ++ /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ ++ { 0x00000001170d56d6, 0x00000000141beada }, ++ /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ ++ { 0x00000000a5fb613c, 0x000000011071aea0 }, ++ /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ ++ { 0x0000000040bbf7fc, 0x000000012e19080a }, ++ /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ ++ { 0x000000016ac3a5b2, 0x0000000100ecf826 }, ++ /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ ++ { 0x00000000abf16230, 0x0000000069b09412 }, ++ /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ ++ { 0x00000001ebe23fac, 0x0000000122297bac }, ++ /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ ++ { 0x000000008b6a0894, 0x00000000e9e4b068 }, ++ /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ ++ { 0x00000001288ea478, 0x000000004b38651a }, ++ /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ ++ { 0x000000016619c442, 0x00000001468360e2 }, ++ /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ ++ { 0x0000000086230038, 0x00000000121c2408 }, ++ /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ ++ { 0x000000017746a756, 0x00000000da7e7d08 }, ++ /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ ++ { 0x0000000191b8f8f8, 0x00000001058d7652 }, ++ /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ ++ { 0x000000008e167708, 0x000000014a098a90 }, ++ /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ ++ { 0x0000000148b22d54, 0x0000000020dbe72e }, ++ /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ ++ { 0x0000000044ba2c3c, 0x000000011e7323e8 }, ++ /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ ++ { 0x00000000b54d2b52, 0x00000000d5d4bf94 }, ++ /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ ++ { 0x0000000005a4fd8a, 0x0000000199d8746c }, ++ /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ ++ { 0x0000000139f9fc46, 0x00000000ce9ca8a0 }, ++ /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ ++ { 0x000000015a1fa824, 0x00000000136edece }, ++ /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ ++ { 0x000000000a61ae4c, 0x000000019b92a068 }, ++ /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ ++ { 0x0000000145e9113e, 0x0000000071d62206 }, ++ /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ ++ { 0x000000006a348448, 0x00000000dfc50158 }, ++ /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ ++ { 0x000000004d80a08c, 0x00000001517626bc }, ++ /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ ++ { 0x000000014b6837a0, 0x0000000148d1e4fa }, ++ /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ ++ { 0x000000016896a7fc, 0x0000000094d8266e }, ++ /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ ++ { 0x000000014f187140, 0x00000000606c5e34 }, ++ /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ ++ { 0x000000019581b9da, 0x000000019766beaa }, ++ /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ ++ { 0x00000001091bc984, 0x00000001d80c506c }, ++ /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ ++ { 0x000000001067223c, 0x000000001e73837c }, ++ /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ ++ { 0x00000001ab16ea02, 0x0000000064d587de }, ++ /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ ++ { 0x000000013c4598a8, 0x00000000f4a507b0 }, ++ /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ ++ { 0x00000000b3735430, 0x0000000040e342fc }, ++ /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ ++ { 0x00000001bb3fc0c0, 0x00000001d5ad9c3a }, ++ /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ ++ { 0x00000001570ae19c, 0x0000000094a691a4 }, ++ /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ ++ { 0x00000001ea910712, 0x00000001271ecdfa }, ++ /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ ++ { 0x0000000167127128, 0x000000009e54475a }, ++ /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ ++ { 0x0000000019e790a2, 0x00000000c9c099ee }, ++ /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ ++ { 0x000000003788f710, 0x000000009a2f736c }, ++ /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ ++ { 0x00000001682a160e, 0x00000000bb9f4996 }, ++ /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ ++ { 0x000000007f0ebd2e, 0x00000001db688050 }, ++ /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ ++ { 0x000000002b032080, 0x00000000e9b10af4 }, ++ /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ ++ { 0x00000000cfd1664a, 0x000000012d4545e4 }, ++ /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ ++ { 0x00000000aa1181c2, 0x000000000361139c }, ++ /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ ++ { 0x00000000ddd08002, 0x00000001a5a1a3a8 }, ++ /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ ++ { 0x00000000e8dd0446, 0x000000006844e0b0 }, ++ /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ ++ { 0x00000001bbd94a00, 0x00000000c3762f28 }, ++ /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ ++ { 0x00000000ab6cd180, 0x00000001d26287a2 }, ++ /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ ++ { 0x0000000031803ce2, 0x00000001f6f0bba8 }, ++ /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ ++ { 0x0000000024f40b0c, 0x000000002ffabd62 }, ++ /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ ++ { 0x00000001ba1d9834, 0x00000000fb4516b8 }, ++ /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ ++ { 0x0000000104de61aa, 0x000000018cfa961c }, ++ /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ ++ { 0x0000000113e40d46, 0x000000019e588d52 }, ++ /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ ++ { 0x00000001415598a0, 0x00000001180f0bbc }, ++ /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ ++ { 0x00000000bf6c8c90, 0x00000000e1d9177a }, ++ /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ ++ { 0x00000001788b0504, 0x0000000105abc27c }, ++ /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ ++ { 0x0000000038385d02, 0x00000000972e4a58 }, ++ /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ ++ { 0x00000001b6c83844, 0x0000000183499a5e }, ++ /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ ++ { 0x0000000051061a8a, 0x00000001c96a8cca }, ++ /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ ++ { 0x000000017351388a, 0x00000001a1a5b60c }, ++ /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ ++ { 0x0000000132928f92, 0x00000000e4b6ac9c }, ++ /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ ++ { 0x00000000e6b4f48a, 0x00000001807e7f5a }, ++ /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ ++ { 0x0000000039d15e90, 0x000000017a7e3bc8 }, ++ /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ ++ { 0x00000000312d6074, 0x00000000d73975da }, ++ /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ ++ { 0x000000017bbb2cc4, 0x000000017375d038 }, ++ /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ ++ { 0x000000016ded3e18, 0x00000000193680bc }, ++ /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ ++ { 0x00000000f1638b16, 0x00000000999b06f6 }, ++ /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ ++ { 0x00000001d38b9ecc, 0x00000001f685d2b8 }, ++ /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ ++ { 0x000000018b8d09dc, 0x00000001f4ecbed2 }, ++ /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ ++ { 0x00000000e7bc27d2, 0x00000000ba16f1a0 }, ++ /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ ++ { 0x00000000275e1e96, 0x0000000115aceac4 }, ++ /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ ++ { 0x00000000e2e3031e, 0x00000001aeff6292 }, ++ /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ ++ { 0x00000001041c84d8, 0x000000009640124c }, ++ /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ ++ { 0x00000000706ce672, 0x0000000114f41f02 }, ++ /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ ++ { 0x000000015d5070da, 0x000000009c5f3586 }, ++ /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ ++ { 0x0000000038f9493a, 0x00000001878275fa }, ++ /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ ++ { 0x00000000a3348a76, 0x00000000ddc42ce8 }, ++ /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ ++ { 0x00000001ad0aab92, 0x0000000181d2c73a }, ++ /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ ++ { 0x000000019e85f712, 0x0000000141c9320a }, ++ /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ ++ { 0x000000005a871e76, 0x000000015235719a }, ++ /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ ++ { 0x000000017249c662, 0x00000000be27d804 }, ++ /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ ++ { 0x000000003a084712, 0x000000006242d45a }, ++ /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ ++ { 0x00000000ed438478, 0x000000009a53638e }, ++ /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ ++ { 0x00000000abac34cc, 0x00000001001ecfb6 }, ++ /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ ++ { 0x000000005f35ef3e, 0x000000016d7c2d64 }, ++ /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ ++ { 0x0000000047d6608c, 0x00000001d0ce46c0 }, ++ /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ ++ { 0x000000002d01470e, 0x0000000124c907b4 }, ++ /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ ++ { 0x0000000158bbc7b0, 0x0000000018a555ca }, ++ /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ ++ { 0x00000000c0a23e8e, 0x000000006b0980bc }, ++ /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ ++ { 0x00000001ebd85c88, 0x000000008bbba964 }, ++ /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ ++ { 0x000000019ee20bb2, 0x00000001070a5a1e }, ++ /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ ++ { 0x00000001acabf2d6, 0x000000002204322a }, ++ /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ ++ { 0x00000001b7963d56, 0x00000000a27524d0 }, ++ /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ ++ { 0x000000017bffa1fe, 0x0000000020b1e4ba }, ++ /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ ++ { 0x000000001f15333e, 0x0000000032cc27fc }, ++ /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ ++ { 0x000000018593129e, 0x0000000044dd22b8 }, ++ /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ ++ { 0x000000019cb32602, 0x00000000dffc9e0a }, ++ /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ ++ { 0x0000000142b05cc8, 0x00000001b7a0ed14 }, ++ /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ ++ { 0x00000001be49e7a4, 0x00000000c7842488 }, ++ /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ ++ { 0x0000000108f69d6c, 0x00000001c02a4fee }, ++ /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ ++ { 0x000000006c0971f0, 0x000000003c273778 }, ++ /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ ++ { 0x000000005b16467a, 0x00000001d63f8894 }, ++ /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ ++ { 0x00000001551a628e, 0x000000006be557d6 }, ++ /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ ++ { 0x000000019e42ea92, 0x000000006a7806ea }, ++ /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ ++ { 0x000000012fa83ff2, 0x000000016155aa0c }, ++ /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ ++ { 0x000000011ca9cde0, 0x00000000908650ac }, ++ /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ ++ { 0x00000000c8e5cd74, 0x00000000aa5a8084 }, ++ /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ ++ { 0x0000000096c27f0c, 0x0000000191bb500a }, ++ /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ ++ { 0x000000002baed926, 0x0000000064e9bed0 }, ++ /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ ++ { 0x000000017c8de8d2, 0x000000009444f302 }, ++ /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ ++ { 0x00000000d43d6068, 0x000000019db07d3c }, ++ /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ ++ { 0x00000000cb2c4b26, 0x00000001359e3e6e }, ++ /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ ++ { 0x0000000145b8da26, 0x00000001e4f10dd2 }, ++ /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ ++ { 0x000000018fff4b08, 0x0000000124f5735e }, ++ /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ ++ { 0x0000000150b58ed0, 0x0000000124760a4c }, ++ /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ ++ { 0x00000001549f39bc, 0x000000000f1fc186 }, ++ /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ ++ { 0x00000000ef4d2f42, 0x00000000150e4cc4 }, ++ /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ ++ { 0x00000001b1468572, 0x000000002a6204e8 }, ++ /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ ++ { 0x000000013d7403b2, 0x00000000beb1d432 }, ++ /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ ++ { 0x00000001a4681842, 0x0000000135f3f1f0 }, ++ /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ ++ { 0x0000000167714492, 0x0000000074fe2232 }, ++ /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ ++ { 0x00000001e599099a, 0x000000001ac6e2ba }, ++ /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ ++ { 0x00000000fe128194, 0x0000000013fca91e }, ++ /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ ++ { 0x0000000077e8b990, 0x0000000183f4931e }, ++ /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ ++ { 0x00000001a267f63a, 0x00000000b6d9b4e4 }, ++ /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ ++ { 0x00000001945c245a, 0x00000000b5188656 }, ++ /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ ++ { 0x0000000149002e76, 0x0000000027a81a84 }, ++ /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ ++ { 0x00000001bb8310a4, 0x0000000125699258 }, ++ /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ ++ { 0x000000019ec60bcc, 0x00000001b23de796 }, ++ /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ ++ { 0x000000012d8590ae, 0x00000000fe4365dc }, ++ /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ ++ { 0x0000000065b00684, 0x00000000c68f497a }, ++ /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ ++ { 0x000000015e5aeadc, 0x00000000fbf521ee }, ++ /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ ++ { 0x00000000b77ff2b0, 0x000000015eac3378 }, ++ /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ ++ { 0x0000000188da2ff6, 0x0000000134914b90 }, ++ /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ ++ { 0x0000000063da929a, 0x0000000016335cfe }, ++ /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ ++ { 0x00000001389caa80, 0x000000010372d10c }, ++ /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ ++ { 0x000000013db599d2, 0x000000015097b908 }, ++ /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ ++ { 0x0000000122505a86, 0x00000001227a7572 }, ++ /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ ++ { 0x000000016bd72746, 0x000000009a8f75c0 }, ++ /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ ++ { 0x00000001c3faf1d4, 0x00000000682c77a2 }, ++ /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ ++ { 0x00000001111c826c, 0x00000000231f091c }, ++ /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ ++ { 0x00000000153e9fb2, 0x000000007d4439f2 }, ++ /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ ++ { 0x000000002b1f7b60, 0x000000017e221efc }, ++ /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ ++ { 0x00000000b1dba570, 0x0000000167457c38 }, ++ /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ ++ { 0x00000001f6397b76, 0x00000000bdf081c4 }, ++ /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ ++ { 0x0000000156335214, 0x000000016286d6b0 }, ++ /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ ++ { 0x00000001d70e3986, 0x00000000c84f001c }, ++ /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ ++ { 0x000000003701a774, 0x0000000064efe7c0 }, ++ /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ ++ { 0x00000000ac81ef72, 0x000000000ac2d904 }, ++ /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ ++ { 0x0000000133212464, 0x00000000fd226d14 }, ++ /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ ++ { 0x00000000e4e45610, 0x000000011cfd42e0 }, ++ /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ ++ { 0x000000000c1bd370, 0x000000016e5a5678 }, ++ /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ ++ { 0x00000001a7b9e7a6, 0x00000001d888fe22 }, ++ /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ ++ { 0x000000007d657a10, 0x00000001af77fcd4 } ++#else /* BYTE_ORDER == LITTLE_ENDIAN */ ++ /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ ++ { 0x00000001651797d2, 0x0000000099ea94a8 }, ++ /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ ++ { 0x0000000021e0d56c, 0x00000000945a8420 }, ++ /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ ++ { 0x000000000f95ecaa, 0x0000000030762706 }, ++ /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ ++ { 0x00000001ebd224ac, 0x00000001a52fc582 }, ++ /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ ++ { 0x000000000ccb97ca, 0x00000001a4a7167a }, ++ /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ ++ { 0x00000001006ec8a8, 0x000000000c18249a }, ++ /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ ++ { 0x000000014f58f196, 0x00000000a924ae7c }, ++ /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ ++ { 0x00000001a7192ca6, 0x00000001e12ccc12 }, ++ /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ ++ { 0x000000019a64bab2, 0x00000000a0b9d4ac }, ++ /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ ++ { 0x0000000014f4ed2e, 0x0000000095e8ddfe }, ++ /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ ++ { 0x000000011092b6a2, 0x00000000233fddc4 }, ++ /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ ++ { 0x00000000c8a1629c, 0x00000001b4529b62 }, ++ /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ ++ { 0x000000017bf32e8e, 0x00000001a7fa0e64 }, ++ /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ ++ { 0x00000001f8cc6582, 0x00000001b5334592 }, ++ /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ ++ { 0x000000008631ddf0, 0x000000011f8ee1b4 }, ++ /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ ++ { 0x000000007e5a76d0, 0x000000006252e632 }, ++ /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ ++ { 0x000000002b09b31c, 0x00000000ab973e84 }, ++ /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ ++ { 0x00000001b2df1f84, 0x000000007734f5ec }, ++ /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ ++ { 0x00000001d6f56afc, 0x000000007c547798 }, ++ /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ ++ { 0x00000001b9b5e70c, 0x000000007ec40210 }, ++ /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ ++ { 0x0000000034b626d2, 0x00000001ab1695a8 }, ++ /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ ++ { 0x000000014c53479a, 0x0000000090494bba }, ++ /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ ++ { 0x00000001a6d179a4, 0x00000001123fb816 }, ++ /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ ++ { 0x000000015abd16b4, 0x00000001e188c74c }, ++ /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ ++ { 0x00000000018f9852, 0x00000001c2d3451c }, ++ /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ ++ { 0x000000001fb3084a, 0x00000000f55cf1ca }, ++ /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ ++ { 0x00000000c53dfb04, 0x00000001a0531540 }, ++ /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ ++ { 0x00000000e10c9ad6, 0x0000000132cd7ebc }, ++ /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ ++ { 0x0000000025aa994a, 0x0000000073ab7f36 }, ++ /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ ++ { 0x00000000fa3a74c4, 0x0000000041aed1c2 }, ++ /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ ++ { 0x0000000033eb3f40, 0x0000000136c53800 }, ++ /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ ++ { 0x000000017193f296, 0x0000000126835a30 }, ++ /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ ++ { 0x0000000043f6c86a, 0x000000006241b502 }, ++ /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ ++ { 0x000000016b513ec6, 0x00000000d5196ad4 }, ++ /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ ++ { 0x00000000c8f25b4e, 0x000000009cfa769a }, ++ /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ ++ { 0x00000001a45048ec, 0x00000000920e5df4 }, ++ /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ ++ { 0x000000000c441004, 0x0000000169dc310e }, ++ /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ ++ { 0x000000000e17cad6, 0x0000000009fc331c }, ++ /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ ++ { 0x00000001253ae964, 0x000000010d94a81e }, ++ /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ ++ { 0x00000001d7c88ebc, 0x0000000027a20ab2 }, ++ /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ ++ { 0x00000001e7ca913a, 0x0000000114f87504 }, ++ /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ ++ { 0x0000000033ed078a, 0x000000004b076d96 }, ++ /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ ++ { 0x00000000e1839c78, 0x00000000da4d1e74 }, ++ /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ ++ { 0x00000001322b267e, 0x000000001b81f672 }, ++ /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ ++ { 0x00000000638231b6, 0x000000009367c988 }, ++ /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ ++ { 0x00000001ee7f16f4, 0x00000001717214ca }, ++ /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ ++ { 0x0000000117d9924a, 0x000000009f47d820 }, ++ /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ ++ { 0x00000000e1a9e0c4, 0x000000010d9a47d2 }, ++ /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ ++ { 0x00000001403731dc, 0x00000000a696c58c }, ++ /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ ++ { 0x00000001a5ea9682, 0x000000002aa28ec6 }, ++ /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ ++ { 0x0000000101c5c578, 0x00000001fe18fd9a }, ++ /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ ++ { 0x00000000dddf6494, 0x000000019d4fc1ae }, ++ /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ ++ { 0x00000000f1c3db28, 0x00000001ba0e3dea }, ++ /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ ++ { 0x000000013112fb9c, 0x0000000074b59a5e }, ++ /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ ++ { 0x00000000b680b906, 0x00000000f2b5ea98 }, ++ /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ ++ { 0x000000001a282932, 0x0000000187132676 }, ++ /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ ++ { 0x0000000089406e7e, 0x000000010a8c6ad4 }, ++ /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ ++ { 0x00000001def6be8c, 0x00000001e21dfe70 }, ++ /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ ++ { 0x0000000075258728, 0x00000001da0050e4 }, ++ /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ ++ { 0x000000019536090a, 0x00000000772172ae }, ++ /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ ++ { 0x00000000f2455bfc, 0x00000000e47724aa }, ++ /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ ++ { 0x000000018c40baf4, 0x000000003cd63ac4 }, ++ /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ ++ { 0x000000004cd390d4, 0x00000001bf47d352 }, ++ /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ ++ { 0x00000001e4ece95a, 0x000000018dc1d708 }, ++ /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ ++ { 0x000000001a3ee918, 0x000000002d4620a4 }, ++ /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ ++ { 0x000000007c652fb8, 0x0000000058fd1740 }, ++ /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ ++ { 0x000000011c67842c, 0x00000000dadd9bfc }, ++ /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ ++ { 0x00000000254f759c, 0x00000001ea2140be }, ++ /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ ++ { 0x000000007ece94ca, 0x000000009de128ba }, ++ /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ ++ { 0x0000000038f258c2, 0x000000013ac3aa8e }, ++ /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ ++ { 0x00000001cdf17b00, 0x0000000099980562 }, ++ /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ ++ { 0x000000011f882c16, 0x00000001c1579c86 }, ++ /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ ++ { 0x0000000100093fc8, 0x0000000068dbbf94 }, ++ /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ ++ { 0x00000001cd684f16, 0x000000004509fb04 }, ++ /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ ++ { 0x000000004bc6a70a, 0x00000001202f6398 }, ++ /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ ++ { 0x000000004fc7e8e4, 0x000000013aea243e }, ++ /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ ++ { 0x0000000130103f1c, 0x00000001b4052ae6 }, ++ /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ ++ { 0x0000000111b0024c, 0x00000001cd2a0ae8 }, ++ /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ ++ { 0x000000010b3079da, 0x00000001fe4aa8b4 }, ++ /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ ++ { 0x000000010192bcc2, 0x00000001d1559a42 }, ++ /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ ++ { 0x0000000074838d50, 0x00000001f3e05ecc }, ++ /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ ++ { 0x000000001b20f520, 0x0000000104ddd2cc }, ++ /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ ++ { 0x0000000050c3590a, 0x000000015393153c }, ++ /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ ++ { 0x00000000b41cac8e, 0x0000000057e942c6 }, ++ /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ ++ { 0x000000000c72cc78, 0x000000012c633850 }, ++ /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ ++ { 0x0000000030cdb032, 0x00000000ebcaae4c }, ++ /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ ++ { 0x000000013e09fc32, 0x000000013ee532a6 }, ++ /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ ++ { 0x000000001ed624d2, 0x00000001bf0cbc7e }, ++ /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ ++ { 0x00000000781aee1a, 0x00000000d50b7a5a }, ++ /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ ++ { 0x00000001c4d8348c, 0x0000000002fca6e8 }, ++ /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ ++ { 0x0000000057a40336, 0x000000007af40044 }, ++ /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ ++ { 0x0000000085544940, 0x0000000016178744 }, ++ /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ ++ { 0x000000019cd21e80, 0x000000014c177458 }, ++ /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ ++ { 0x000000013eb95bc0, 0x000000011b6ddf04 }, ++ /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ ++ { 0x00000001dfc9fdfc, 0x00000001f3e29ccc }, ++ /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ ++ { 0x00000000cd028bc2, 0x0000000135ae7562 }, ++ /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ ++ { 0x0000000090db8c44, 0x0000000190ef812c }, ++ /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ ++ { 0x000000010010a4ce, 0x0000000067a2c786 }, ++ /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ ++ { 0x00000001c8f4c72c, 0x0000000048b9496c }, ++ /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ ++ { 0x000000001c26170c, 0x000000015a422de6 }, ++ /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ ++ { 0x00000000e3fccf68, 0x00000001ef0e3640 }, ++ /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ ++ { 0x00000000d513ed24, 0x00000001006d2d26 }, ++ /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ ++ { 0x00000000141beada, 0x00000001170d56d6 }, ++ /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ ++ { 0x000000011071aea0, 0x00000000a5fb613c }, ++ /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ ++ { 0x000000012e19080a, 0x0000000040bbf7fc }, ++ /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ ++ { 0x0000000100ecf826, 0x000000016ac3a5b2 }, ++ /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ ++ { 0x0000000069b09412, 0x00000000abf16230 }, ++ /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ ++ { 0x0000000122297bac, 0x00000001ebe23fac }, ++ /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ ++ { 0x00000000e9e4b068, 0x000000008b6a0894 }, ++ /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ ++ { 0x000000004b38651a, 0x00000001288ea478 }, ++ /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ ++ { 0x00000001468360e2, 0x000000016619c442 }, ++ /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ ++ { 0x00000000121c2408, 0x0000000086230038 }, ++ /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ ++ { 0x00000000da7e7d08, 0x000000017746a756 }, ++ /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ ++ { 0x00000001058d7652, 0x0000000191b8f8f8 }, ++ /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ ++ { 0x000000014a098a90, 0x000000008e167708 }, ++ /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ ++ { 0x0000000020dbe72e, 0x0000000148b22d54 }, ++ /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ ++ { 0x000000011e7323e8, 0x0000000044ba2c3c }, ++ /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ ++ { 0x00000000d5d4bf94, 0x00000000b54d2b52 }, ++ /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ ++ { 0x0000000199d8746c, 0x0000000005a4fd8a }, ++ /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ ++ { 0x00000000ce9ca8a0, 0x0000000139f9fc46 }, ++ /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ ++ { 0x00000000136edece, 0x000000015a1fa824 }, ++ /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ ++ { 0x000000019b92a068, 0x000000000a61ae4c }, ++ /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ ++ { 0x0000000071d62206, 0x0000000145e9113e }, ++ /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ ++ { 0x00000000dfc50158, 0x000000006a348448 }, ++ /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ ++ { 0x00000001517626bc, 0x000000004d80a08c }, ++ /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ ++ { 0x0000000148d1e4fa, 0x000000014b6837a0 }, ++ /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ ++ { 0x0000000094d8266e, 0x000000016896a7fc }, ++ /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ ++ { 0x00000000606c5e34, 0x000000014f187140 }, ++ /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ ++ { 0x000000019766beaa, 0x000000019581b9da }, ++ /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ ++ { 0x00000001d80c506c, 0x00000001091bc984 }, ++ /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ ++ { 0x000000001e73837c, 0x000000001067223c }, ++ /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ ++ { 0x0000000064d587de, 0x00000001ab16ea02 }, ++ /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ ++ { 0x00000000f4a507b0, 0x000000013c4598a8 }, ++ /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ ++ { 0x0000000040e342fc, 0x00000000b3735430 }, ++ /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ ++ { 0x00000001d5ad9c3a, 0x00000001bb3fc0c0 }, ++ /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ ++ { 0x0000000094a691a4, 0x00000001570ae19c }, ++ /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ ++ { 0x00000001271ecdfa, 0x00000001ea910712 }, ++ /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ ++ { 0x000000009e54475a, 0x0000000167127128 }, ++ /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ ++ { 0x00000000c9c099ee, 0x0000000019e790a2 }, ++ /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ ++ { 0x000000009a2f736c, 0x000000003788f710 }, ++ /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ ++ { 0x00000000bb9f4996, 0x00000001682a160e }, ++ /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ ++ { 0x00000001db688050, 0x000000007f0ebd2e }, ++ /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ ++ { 0x00000000e9b10af4, 0x000000002b032080 }, ++ /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ ++ { 0x000000012d4545e4, 0x00000000cfd1664a }, ++ /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ ++ { 0x000000000361139c, 0x00000000aa1181c2 }, ++ /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ ++ { 0x00000001a5a1a3a8, 0x00000000ddd08002 }, ++ /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ ++ { 0x000000006844e0b0, 0x00000000e8dd0446 }, ++ /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ ++ { 0x00000000c3762f28, 0x00000001bbd94a00 }, ++ /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ ++ { 0x00000001d26287a2, 0x00000000ab6cd180 }, ++ /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ ++ { 0x00000001f6f0bba8, 0x0000000031803ce2 }, ++ /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ ++ { 0x000000002ffabd62, 0x0000000024f40b0c }, ++ /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ ++ { 0x00000000fb4516b8, 0x00000001ba1d9834 }, ++ /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ ++ { 0x000000018cfa961c, 0x0000000104de61aa }, ++ /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ ++ { 0x000000019e588d52, 0x0000000113e40d46 }, ++ /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ ++ { 0x00000001180f0bbc, 0x00000001415598a0 }, ++ /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ ++ { 0x00000000e1d9177a, 0x00000000bf6c8c90 }, ++ /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ ++ { 0x0000000105abc27c, 0x00000001788b0504 }, ++ /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ ++ { 0x00000000972e4a58, 0x0000000038385d02 }, ++ /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ ++ { 0x0000000183499a5e, 0x00000001b6c83844 }, ++ /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ ++ { 0x00000001c96a8cca, 0x0000000051061a8a }, ++ /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ ++ { 0x00000001a1a5b60c, 0x000000017351388a }, ++ /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ ++ { 0x00000000e4b6ac9c, 0x0000000132928f92 }, ++ /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ ++ { 0x00000001807e7f5a, 0x00000000e6b4f48a }, ++ /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ ++ { 0x000000017a7e3bc8, 0x0000000039d15e90 }, ++ /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ ++ { 0x00000000d73975da, 0x00000000312d6074 }, ++ /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ ++ { 0x000000017375d038, 0x000000017bbb2cc4 }, ++ /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ ++ { 0x00000000193680bc, 0x000000016ded3e18 }, ++ /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ ++ { 0x00000000999b06f6, 0x00000000f1638b16 }, ++ /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ ++ { 0x00000001f685d2b8, 0x00000001d38b9ecc }, ++ /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ ++ { 0x00000001f4ecbed2, 0x000000018b8d09dc }, ++ /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ ++ { 0x00000000ba16f1a0, 0x00000000e7bc27d2 }, ++ /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ ++ { 0x0000000115aceac4, 0x00000000275e1e96 }, ++ /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ ++ { 0x00000001aeff6292, 0x00000000e2e3031e }, ++ /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ ++ { 0x000000009640124c, 0x00000001041c84d8 }, ++ /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ ++ { 0x0000000114f41f02, 0x00000000706ce672 }, ++ /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ ++ { 0x000000009c5f3586, 0x000000015d5070da }, ++ /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ ++ { 0x00000001878275fa, 0x0000000038f9493a }, ++ /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ ++ { 0x00000000ddc42ce8, 0x00000000a3348a76 }, ++ /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ ++ { 0x0000000181d2c73a, 0x00000001ad0aab92 }, ++ /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ ++ { 0x0000000141c9320a, 0x000000019e85f712 }, ++ /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ ++ { 0x000000015235719a, 0x000000005a871e76 }, ++ /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ ++ { 0x00000000be27d804, 0x000000017249c662 }, ++ /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ ++ { 0x000000006242d45a, 0x000000003a084712 }, ++ /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ ++ { 0x000000009a53638e, 0x00000000ed438478 }, ++ /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ ++ { 0x00000001001ecfb6, 0x00000000abac34cc }, ++ /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ ++ { 0x000000016d7c2d64, 0x000000005f35ef3e }, ++ /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ ++ { 0x00000001d0ce46c0, 0x0000000047d6608c }, ++ /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ ++ { 0x0000000124c907b4, 0x000000002d01470e }, ++ /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ ++ { 0x0000000018a555ca, 0x0000000158bbc7b0 }, ++ /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ ++ { 0x000000006b0980bc, 0x00000000c0a23e8e }, ++ /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ ++ { 0x000000008bbba964, 0x00000001ebd85c88 }, ++ /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ ++ { 0x00000001070a5a1e, 0x000000019ee20bb2 }, ++ /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ ++ { 0x000000002204322a, 0x00000001acabf2d6 }, ++ /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ ++ { 0x00000000a27524d0, 0x00000001b7963d56 }, ++ /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ ++ { 0x0000000020b1e4ba, 0x000000017bffa1fe }, ++ /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ ++ { 0x0000000032cc27fc, 0x000000001f15333e }, ++ /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ ++ { 0x0000000044dd22b8, 0x000000018593129e }, ++ /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ ++ { 0x00000000dffc9e0a, 0x000000019cb32602 }, ++ /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ ++ { 0x00000001b7a0ed14, 0x0000000142b05cc8 }, ++ /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ ++ { 0x00000000c7842488, 0x00000001be49e7a4 }, ++ /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ ++ { 0x00000001c02a4fee, 0x0000000108f69d6c }, ++ /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ ++ { 0x000000003c273778, 0x000000006c0971f0 }, ++ /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ ++ { 0x00000001d63f8894, 0x000000005b16467a }, ++ /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ ++ { 0x000000006be557d6, 0x00000001551a628e }, ++ /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ ++ { 0x000000006a7806ea, 0x000000019e42ea92 }, ++ /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ ++ { 0x000000016155aa0c, 0x000000012fa83ff2 }, ++ /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ ++ { 0x00000000908650ac, 0x000000011ca9cde0 }, ++ /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ ++ { 0x00000000aa5a8084, 0x00000000c8e5cd74 }, ++ /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ ++ { 0x0000000191bb500a, 0x0000000096c27f0c }, ++ /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ ++ { 0x0000000064e9bed0, 0x000000002baed926 }, ++ /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ ++ { 0x000000009444f302, 0x000000017c8de8d2 }, ++ /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ ++ { 0x000000019db07d3c, 0x00000000d43d6068 }, ++ /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ ++ { 0x00000001359e3e6e, 0x00000000cb2c4b26 }, ++ /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ ++ { 0x00000001e4f10dd2, 0x0000000145b8da26 }, ++ /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ ++ { 0x0000000124f5735e, 0x000000018fff4b08 }, ++ /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ ++ { 0x0000000124760a4c, 0x0000000150b58ed0 }, ++ /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ ++ { 0x000000000f1fc186, 0x00000001549f39bc }, ++ /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ ++ { 0x00000000150e4cc4, 0x00000000ef4d2f42 }, ++ /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ ++ { 0x000000002a6204e8, 0x00000001b1468572 }, ++ /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ ++ { 0x00000000beb1d432, 0x000000013d7403b2 }, ++ /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ ++ { 0x0000000135f3f1f0, 0x00000001a4681842 }, ++ /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ ++ { 0x0000000074fe2232, 0x0000000167714492 }, ++ /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ ++ { 0x000000001ac6e2ba, 0x00000001e599099a }, ++ /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ ++ { 0x0000000013fca91e, 0x00000000fe128194 }, ++ /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ ++ { 0x0000000183f4931e, 0x0000000077e8b990 }, ++ /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ ++ { 0x00000000b6d9b4e4, 0x00000001a267f63a }, ++ /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ ++ { 0x00000000b5188656, 0x00000001945c245a }, ++ /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ ++ { 0x0000000027a81a84, 0x0000000149002e76 }, ++ /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ ++ { 0x0000000125699258, 0x00000001bb8310a4 }, ++ /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ ++ { 0x00000001b23de796, 0x000000019ec60bcc }, ++ /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ ++ { 0x00000000fe4365dc, 0x000000012d8590ae }, ++ /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ ++ { 0x00000000c68f497a, 0x0000000065b00684 }, ++ /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ ++ { 0x00000000fbf521ee, 0x000000015e5aeadc }, ++ /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ ++ { 0x000000015eac3378, 0x00000000b77ff2b0 }, ++ /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ ++ { 0x0000000134914b90, 0x0000000188da2ff6 }, ++ /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ ++ { 0x0000000016335cfe, 0x0000000063da929a }, ++ /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ ++ { 0x000000010372d10c, 0x00000001389caa80 }, ++ /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ ++ { 0x000000015097b908, 0x000000013db599d2 }, ++ /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ ++ { 0x00000001227a7572, 0x0000000122505a86 }, ++ /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ ++ { 0x000000009a8f75c0, 0x000000016bd72746 }, ++ /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ ++ { 0x00000000682c77a2, 0x00000001c3faf1d4 }, ++ /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ ++ { 0x00000000231f091c, 0x00000001111c826c }, ++ /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ ++ { 0x000000007d4439f2, 0x00000000153e9fb2 }, ++ /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ ++ { 0x000000017e221efc, 0x000000002b1f7b60 }, ++ /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ ++ { 0x0000000167457c38, 0x00000000b1dba570 }, ++ /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ ++ { 0x00000000bdf081c4, 0x00000001f6397b76 }, ++ /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ ++ { 0x000000016286d6b0, 0x0000000156335214 }, ++ /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ ++ { 0x00000000c84f001c, 0x00000001d70e3986 }, ++ /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ ++ { 0x0000000064efe7c0, 0x000000003701a774 }, ++ /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ ++ { 0x000000000ac2d904, 0x00000000ac81ef72 }, ++ /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ ++ { 0x00000000fd226d14, 0x0000000133212464 }, ++ /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ ++ { 0x000000011cfd42e0, 0x00000000e4e45610 }, ++ /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ ++ { 0x000000016e5a5678, 0x000000000c1bd370 }, ++ /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ ++ { 0x00000001d888fe22, 0x00000001a7b9e7a6 }, ++ /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ ++ { 0x00000001af77fcd4, 0x000000007d657a10 } ++#endif /* BYTE_ORDER == LITTLE_ENDIAN */ ++}; ++ ++/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ ++ ++static const __vector unsigned long long vcrc_short_const[16] ALIGNED_(16) = { ++#if BYTE_ORDER == LITTLE_ENDIAN ++ /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ ++ { 0x99168a18ec447f11, 0xed837b2613e8221e }, ++ /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ ++ { 0xe23e954e8fd2cd3c, 0xc8acdd8147b9ce5a }, ++ /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ ++ { 0x92f8befe6b1d2b53, 0xd9ad6d87d4277e25 }, ++ /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ ++ { 0xf38a3556291ea462, 0xc10ec5e033fbca3b }, ++ /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ ++ { 0x974ac56262b6ca4b, 0xc0b55b0e82e02e2f }, ++ /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ ++ { 0x855712b3784d2a56, 0x71aa1df0e172334d }, ++ /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ ++ { 0xa5abe9f80eaee722, 0xfee3053e3969324d }, ++ /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ ++ { 0x1fa0943ddb54814c, 0xf44779b93eb2bd08 }, ++ /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ ++ { 0xa53ff440d7bbfe6a, 0xf5449b3f00cc3374 }, ++ /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ ++ { 0xebe7e3566325605c, 0x6f8346e1d777606e }, ++ /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ ++ { 0xc65a272ce5b592b8, 0xe3ab4f2ac0b95347 }, ++ /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ ++ { 0x5705a9ca4721589f, 0xaa2215ea329ecc11 }, ++ /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ ++ { 0xe3720acb88d14467, 0x1ed8f66ed95efd26 }, ++ /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ ++ { 0xba1aca0315141c31, 0x78ed02d5a700e96a }, ++ /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ ++ { 0xad2a31b3ed627dae, 0xba8ccbe832b39da3 }, ++ /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ ++ { 0x6655004fa06a2517, 0xedb88320b1e6b092 } ++#else /* BYTE_ORDER == LITTLE_ENDIAN */ ++ /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ ++ { 0xed837b2613e8221e, 0x99168a18ec447f11 }, ++ /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ ++ { 0xc8acdd8147b9ce5a, 0xe23e954e8fd2cd3c }, ++ /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ ++ { 0xd9ad6d87d4277e25, 0x92f8befe6b1d2b53 }, ++ /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ ++ { 0xc10ec5e033fbca3b, 0xf38a3556291ea462 }, ++ /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ ++ { 0xc0b55b0e82e02e2f, 0x974ac56262b6ca4b }, ++ /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ ++ { 0x71aa1df0e172334d, 0x855712b3784d2a56 }, ++ /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ ++ { 0xfee3053e3969324d, 0xa5abe9f80eaee722 }, ++ /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ ++ { 0xf44779b93eb2bd08, 0x1fa0943ddb54814c }, ++ /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ ++ { 0xf5449b3f00cc3374, 0xa53ff440d7bbfe6a }, ++ /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ ++ { 0x6f8346e1d777606e, 0xebe7e3566325605c }, ++ /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ ++ { 0xe3ab4f2ac0b95347, 0xc65a272ce5b592b8 }, ++ /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ ++ { 0xaa2215ea329ecc11, 0x5705a9ca4721589f }, ++ /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ ++ { 0x1ed8f66ed95efd26, 0xe3720acb88d14467 }, ++ /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ ++ { 0x78ed02d5a700e96a, 0xba1aca0315141c31 }, ++ /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ ++ { 0xba8ccbe832b39da3, 0xad2a31b3ed627dae }, ++ /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ ++ { 0xedb88320b1e6b092, 0x6655004fa06a2517 } ++#endif /* BYTE_ORDER == LITTLE_ENDIAN */ ++}; ++ ++/* Barrett constants */ ++/* 33 bit reflected Barrett constant m - (4^32)/n */ ++ ++static const __vector unsigned long long v_Barrett_const[2] ALIGNED_(16) = { ++ /* x^64 div p(x) */ ++#if BYTE_ORDER == LITTLE_ENDIAN ++ { 0x00000001f7011641, 0x0000000000000000 }, ++ { 0x00000001db710641, 0x0000000000000000 } ++#else /* BYTE_ORDER == LITTLE_ENDIAN */ ++ { 0x0000000000000000, 0x00000001f7011641 }, ++ { 0x0000000000000000, 0x00000001db710641 } ++#endif /* BYTE_ORDER == LITTLE_ENDIAN */ ++}; +diff --git a/arch/power/crc32_power8.c b/arch/power/crc32_power8.c +new file mode 100644 +index 0000000..1cb5f29 +--- /dev/null ++++ b/arch/power/crc32_power8.c +@@ -0,0 +1,589 @@ ++/* crc32 for POWER8 using VSX instructions ++ * Copyright (C) 2021 IBM Corporation ++ * ++ * Author: Rogerio Alves ++ * ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ * ++ * Calculate the checksum of data that is 16 byte aligned and a multiple of ++ * 16 bytes. ++ * ++ * The first step is to reduce it to 1024 bits. We do this in 8 parallel ++ * chunks in order to mask the latency of the vpmsum instructions. If we ++ * have more than 32 kB of data to checksum we repeat this step multiple ++ * times, passing in the previous 1024 bits. ++ * ++ * The next step is to reduce the 1024 bits to 64 bits. This step adds ++ * 32 bits of 0s to the end - this matches what a CRC does. We just ++ * calculate constants that land the data in this 32 bits. ++ * ++ * We then use fixed point Barrett reduction to compute a mod n over GF(2) ++ * for n = CRC using POWER8 instructions. We use x = 32. ++ * ++ * http://en.wikipedia.org/wiki/Barrett_reduction ++ * ++ * This code uses gcc vector builtins instead using assembly directly. ++ */ ++ ++#include ++#include "zendian.h" ++#include "zbuild.h" ++ ++#include "crc32_constants.h" ++#include "crc32_braid_tbl.h" ++ ++#if defined (__clang__) ++#include "fallback_builtins.h" ++#endif ++ ++#define MAX_SIZE 32768 ++#define VMX_ALIGN 16 ++#define VMX_ALIGN_MASK (VMX_ALIGN-1) ++ ++static unsigned int crc32_align(unsigned int crc, const unsigned char *p, unsigned long len) { ++ while (len--) ++ crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8); ++ return crc; ++} ++ ++static unsigned int ALIGNED_(32) __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len); ++ ++Z_INTERNAL uint32_t crc32_power8(uint32_t crc, const unsigned char *p, size_t _len) { ++ unsigned int prealign; ++ unsigned int tail; ++ ++ unsigned long len = (unsigned long) _len; ++ ++ if (p == (const unsigned char *) 0x0) ++ return 0; ++ ++ crc ^= 0xffffffff; ++ ++ if (len < VMX_ALIGN + VMX_ALIGN_MASK) { ++ crc = crc32_align(crc, p, len); ++ goto out; ++ } ++ ++ if ((unsigned long)p & VMX_ALIGN_MASK) { ++ prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); ++ crc = crc32_align(crc, p, prealign); ++ len -= prealign; ++ p += prealign; ++ } ++ ++ crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); ++ ++ tail = len & VMX_ALIGN_MASK; ++ if (tail) { ++ p += len & ~VMX_ALIGN_MASK; ++ crc = crc32_align(crc, p, tail); ++ } ++ ++out: ++ crc ^= 0xffffffff; ++ ++ return crc; ++} ++ ++/* When we have a load-store in a single-dispatch group and address overlap ++ * such that forward is not allowed (load-hit-store) the group must be flushed. ++ * A group ending NOP prevents the flush. ++ */ ++#define GROUP_ENDING_NOP __asm__("ori 2,2,0" ::: "memory") ++ ++#if BYTE_ORDER == BIG_ENDIAN ++#define BYTESWAP_DATA ++#endif ++ ++#ifdef BYTESWAP_DATA ++#define VEC_PERM(vr, va, vb, vc) vr = vec_perm(va, vb, (__vector unsigned char) vc) ++#if BYTE_ORDER == LITTLE_ENDIAN ++/* Byte reverse permute constant LE. */ ++static const __vector unsigned long long vperm_const ALIGNED_(16) = { 0x08090A0B0C0D0E0FUL, 0x0001020304050607UL }; ++#else ++static const __vector unsigned long long vperm_const ALIGNED_(16) = { 0x0F0E0D0C0B0A0908UL, 0X0706050403020100UL }; ++#endif ++#else ++#define VEC_PERM(vr, va, vb, vc) ++#endif ++ ++static unsigned int ALIGNED_(32) __crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { ++ ++ const __vector unsigned long long vzero = {0,0}; ++ const __vector unsigned long long vones = {0xffffffffffffffffUL, 0xffffffffffffffffUL}; ++ ++ const __vector unsigned long long vmask_32bit = ++ (__vector unsigned long long)vec_sld((__vector unsigned char)vzero, (__vector unsigned char)vones, 4); ++ ++ const __vector unsigned long long vmask_64bit = ++ (__vector unsigned long long)vec_sld((__vector unsigned char)vzero, (__vector unsigned char)vones, 8); ++ ++ __vector unsigned long long vcrc; ++ ++ __vector unsigned long long vconst1, vconst2; ++ ++ /* vdata0-vdata7 will contain our data (p). */ ++ __vector unsigned long long vdata0, vdata1, vdata2, vdata3, vdata4, vdata5, vdata6, vdata7; ++ ++ /* v0-v7 will contain our checksums */ ++ __vector unsigned long long v0 = {0,0}; ++ __vector unsigned long long v1 = {0,0}; ++ __vector unsigned long long v2 = {0,0}; ++ __vector unsigned long long v3 = {0,0}; ++ __vector unsigned long long v4 = {0,0}; ++ __vector unsigned long long v5 = {0,0}; ++ __vector unsigned long long v6 = {0,0}; ++ __vector unsigned long long v7 = {0,0}; ++ ++ ++ /* Vector auxiliary variables. */ ++ __vector unsigned long long va0, va1, va2, va3, va4, va5, va6, va7; ++ ++ unsigned int offset; /* Constant table offset. */ ++ ++ unsigned long i; /* Counter. */ ++ unsigned long chunks; ++ ++ unsigned long block_size; ++ int next_block = 0; ++ ++ /* Align by 128 bits. The last 128 bit block will be processed at end. */ ++ unsigned long length = len & 0xFFFFFFFFFFFFFF80UL; ++ ++ vcrc = (__vector unsigned long long)__builtin_pack_vector_int128(0UL, crc); ++ ++ /* Short version. */ ++ if (len < 256) { ++ /* Calculate where in the constant table we need to start. */ ++ offset = 256 - len; ++ ++ vconst1 = vec_ld(offset, vcrc_short_const); ++ vdata0 = vec_ld(0, (__vector unsigned long long*) p); ++ VEC_PERM(vdata0, vdata0, vconst1, vperm_const); ++ ++ /* xor initial value */ ++ vdata0 = vec_xor(vdata0, vcrc); ++ ++ vdata0 = (__vector unsigned long long) __builtin_crypto_vpmsumw( ++ (__vector unsigned int)vdata0, (__vector unsigned int)vconst1); ++ v0 = vec_xor(v0, vdata0); ++ ++ for (i = 16; i < len; i += 16) { ++ vconst1 = vec_ld(offset + i, vcrc_short_const); ++ vdata0 = vec_ld(i, (__vector unsigned long long*) p); ++ VEC_PERM(vdata0, vdata0, vconst1, vperm_const); ++ vdata0 = (__vector unsigned long long) __builtin_crypto_vpmsumw( ++ (__vector unsigned int)vdata0, (__vector unsigned int)vconst1); ++ v0 = vec_xor(v0, vdata0); ++ } ++ } else { ++ ++ /* Load initial values. */ ++ vdata0 = vec_ld(0, (__vector unsigned long long*) p); ++ vdata1 = vec_ld(16, (__vector unsigned long long*) p); ++ ++ VEC_PERM(vdata0, vdata0, vdata0, vperm_const); ++ VEC_PERM(vdata1, vdata1, vdata1, vperm_const); ++ ++ vdata2 = vec_ld(32, (__vector unsigned long long*) p); ++ vdata3 = vec_ld(48, (__vector unsigned long long*) p); ++ ++ VEC_PERM(vdata2, vdata2, vdata2, vperm_const); ++ VEC_PERM(vdata3, vdata3, vdata3, vperm_const); ++ ++ vdata4 = vec_ld(64, (__vector unsigned long long*) p); ++ vdata5 = vec_ld(80, (__vector unsigned long long*) p); ++ ++ VEC_PERM(vdata4, vdata4, vdata4, vperm_const); ++ VEC_PERM(vdata5, vdata5, vdata5, vperm_const); ++ ++ vdata6 = vec_ld(96, (__vector unsigned long long*) p); ++ vdata7 = vec_ld(112, (__vector unsigned long long*) p); ++ ++ VEC_PERM(vdata6, vdata6, vdata6, vperm_const); ++ VEC_PERM(vdata7, vdata7, vdata7, vperm_const); ++ ++ /* xor in initial value */ ++ vdata0 = vec_xor(vdata0, vcrc); ++ ++ p = (char *)p + 128; ++ ++ do { ++ /* Checksum in blocks of MAX_SIZE. */ ++ block_size = length; ++ if (block_size > MAX_SIZE) { ++ block_size = MAX_SIZE; ++ } ++ ++ length = length - block_size; ++ ++ /* ++ * Work out the offset into the constants table to start at. Each ++ * constant is 16 bytes, and it is used against 128 bytes of input ++ * data - 128 / 16 = 8 ++ */ ++ offset = (MAX_SIZE/8) - (block_size/8); ++ /* We reduce our final 128 bytes in a separate step */ ++ chunks = (block_size/128)-1; ++ ++ vconst1 = vec_ld(offset, vcrc_const); ++ ++ va0 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata0, ++ (__vector unsigned long long)vconst1); ++ va1 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata1, ++ (__vector unsigned long long)vconst1); ++ va2 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata2, ++ (__vector unsigned long long)vconst1); ++ va3 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata3, ++ (__vector unsigned long long)vconst1); ++ va4 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata4, ++ (__vector unsigned long long)vconst1); ++ va5 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata5, ++ (__vector unsigned long long)vconst1); ++ va6 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata6, ++ (__vector unsigned long long)vconst1); ++ va7 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata7, ++ (__vector unsigned long long)vconst1); ++ ++ if (chunks > 1) { ++ offset += 16; ++ vconst2 = vec_ld(offset, vcrc_const); ++ GROUP_ENDING_NOP; ++ ++ vdata0 = vec_ld(0, (__vector unsigned long long*) p); ++ VEC_PERM(vdata0, vdata0, vdata0, vperm_const); ++ ++ vdata1 = vec_ld(16, (__vector unsigned long long*) p); ++ VEC_PERM(vdata1, vdata1, vdata1, vperm_const); ++ ++ vdata2 = vec_ld(32, (__vector unsigned long long*) p); ++ VEC_PERM(vdata2, vdata2, vdata2, vperm_const); ++ ++ vdata3 = vec_ld(48, (__vector unsigned long long*) p); ++ VEC_PERM(vdata3, vdata3, vdata3, vperm_const); ++ ++ vdata4 = vec_ld(64, (__vector unsigned long long*) p); ++ VEC_PERM(vdata4, vdata4, vdata4, vperm_const); ++ ++ vdata5 = vec_ld(80, (__vector unsigned long long*) p); ++ VEC_PERM(vdata5, vdata5, vdata5, vperm_const); ++ ++ vdata6 = vec_ld(96, (__vector unsigned long long*) p); ++ VEC_PERM(vdata6, vdata6, vdata6, vperm_const); ++ ++ vdata7 = vec_ld(112, (__vector unsigned long long*) p); ++ VEC_PERM(vdata7, vdata7, vdata7, vperm_const); ++ ++ p = (char *)p + 128; ++ ++ /* ++ * main loop. Each iteration calculates the CRC for a 128-byte ++ * block. ++ */ ++ for (i = 0; i < chunks-2; i++) { ++ vconst1 = vec_ld(offset, vcrc_const); ++ offset += 16; ++ GROUP_ENDING_NOP; ++ ++ v0 = vec_xor(v0, va0); ++ va0 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata0, ++ (__vector unsigned long long)vconst2); ++ vdata0 = vec_ld(0, (__vector unsigned long long*) p); ++ VEC_PERM(vdata0, vdata0, vdata0, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v1 = vec_xor(v1, va1); ++ va1 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata1, ++ (__vector unsigned long long)vconst2); ++ vdata1 = vec_ld(16, (__vector unsigned long long*) p); ++ VEC_PERM(vdata1, vdata1, vdata1, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v2 = vec_xor(v2, va2); ++ va2 = __builtin_crypto_vpmsumd((__vector unsigned long long) ++ vdata2, (__vector unsigned long long)vconst2); ++ vdata2 = vec_ld(32, (__vector unsigned long long*) p); ++ VEC_PERM(vdata2, vdata2, vdata2, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v3 = vec_xor(v3, va3); ++ va3 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata3, ++ (__vector unsigned long long)vconst2); ++ vdata3 = vec_ld(48, (__vector unsigned long long*) p); ++ VEC_PERM(vdata3, vdata3, vdata3, vperm_const); ++ ++ vconst2 = vec_ld(offset, vcrc_const); ++ GROUP_ENDING_NOP; ++ ++ v4 = vec_xor(v4, va4); ++ va4 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata4, ++ (__vector unsigned long long)vconst1); ++ vdata4 = vec_ld(64, (__vector unsigned long long*) p); ++ VEC_PERM(vdata4, vdata4, vdata4, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v5 = vec_xor(v5, va5); ++ va5 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata5, ++ (__vector unsigned long long)vconst1); ++ vdata5 = vec_ld(80, (__vector unsigned long long*) p); ++ VEC_PERM(vdata5, vdata5, vdata5, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v6 = vec_xor(v6, va6); ++ va6 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata6, ++ (__vector unsigned long long)vconst1); ++ vdata6 = vec_ld(96, (__vector unsigned long long*) p); ++ VEC_PERM(vdata6, vdata6, vdata6, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v7 = vec_xor(v7, va7); ++ va7 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata7, ++ (__vector unsigned long long)vconst1); ++ vdata7 = vec_ld(112, (__vector unsigned long long*) p); ++ VEC_PERM(vdata7, vdata7, vdata7, vperm_const); ++ ++ p = (char *)p + 128; ++ } ++ ++ /* First cool down */ ++ vconst1 = vec_ld(offset, vcrc_const); ++ offset += 16; ++ ++ v0 = vec_xor(v0, va0); ++ va0 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata0, ++ (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v1 = vec_xor(v1, va1); ++ va1 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata1, ++ (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v2 = vec_xor(v2, va2); ++ va2 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata2, ++ (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v3 = vec_xor(v3, va3); ++ va3 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata3, ++ (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v4 = vec_xor(v4, va4); ++ va4 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata4, ++ (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v5 = vec_xor(v5, va5); ++ va5 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata5, ++ (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v6 = vec_xor(v6, va6); ++ va6 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata6, ++ (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v7 = vec_xor(v7, va7); ++ va7 = __builtin_crypto_vpmsumd((__vector unsigned long long)vdata7, ++ (__vector unsigned long long)vconst1); ++ }/* else */ ++ ++ /* Second cool down. */ ++ v0 = vec_xor(v0, va0); ++ v1 = vec_xor(v1, va1); ++ v2 = vec_xor(v2, va2); ++ v3 = vec_xor(v3, va3); ++ v4 = vec_xor(v4, va4); ++ v5 = vec_xor(v5, va5); ++ v6 = vec_xor(v6, va6); ++ v7 = vec_xor(v7, va7); ++ ++ /* ++ * vpmsumd produces a 96 bit result in the least significant bits ++ * of the register. Since we are bit reflected we have to shift it ++ * left 32 bits so it occupies the least significant bits in the ++ * bit reflected domain. ++ */ ++ v0 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, ++ (__vector unsigned char)vzero, 4); ++ v1 = (__vector unsigned long long)vec_sld((__vector unsigned char)v1, ++ (__vector unsigned char)vzero, 4); ++ v2 = (__vector unsigned long long)vec_sld((__vector unsigned char)v2, ++ (__vector unsigned char)vzero, 4); ++ v3 = (__vector unsigned long long)vec_sld((__vector unsigned char)v3, ++ (__vector unsigned char)vzero, 4); ++ v4 = (__vector unsigned long long)vec_sld((__vector unsigned char)v4, ++ (__vector unsigned char)vzero, 4); ++ v5 = (__vector unsigned long long)vec_sld((__vector unsigned char)v5, ++ (__vector unsigned char)vzero, 4); ++ v6 = (__vector unsigned long long)vec_sld((__vector unsigned char)v6, ++ (__vector unsigned char)vzero, 4); ++ v7 = (__vector unsigned long long)vec_sld((__vector unsigned char)v7, ++ (__vector unsigned char)vzero, 4); ++ ++ /* xor with the last 1024 bits. */ ++ va0 = vec_ld(0, (__vector unsigned long long*) p); ++ VEC_PERM(va0, va0, va0, vperm_const); ++ ++ va1 = vec_ld(16, (__vector unsigned long long*) p); ++ VEC_PERM(va1, va1, va1, vperm_const); ++ ++ va2 = vec_ld(32, (__vector unsigned long long*) p); ++ VEC_PERM(va2, va2, va2, vperm_const); ++ ++ va3 = vec_ld(48, (__vector unsigned long long*) p); ++ VEC_PERM(va3, va3, va3, vperm_const); ++ ++ va4 = vec_ld(64, (__vector unsigned long long*) p); ++ VEC_PERM(va4, va4, va4, vperm_const); ++ ++ va5 = vec_ld(80, (__vector unsigned long long*) p); ++ VEC_PERM(va5, va5, va5, vperm_const); ++ ++ va6 = vec_ld(96, (__vector unsigned long long*) p); ++ VEC_PERM(va6, va6, va6, vperm_const); ++ ++ va7 = vec_ld(112, (__vector unsigned long long*) p); ++ VEC_PERM(va7, va7, va7, vperm_const); ++ ++ p = (char *)p + 128; ++ ++ vdata0 = vec_xor(v0, va0); ++ vdata1 = vec_xor(v1, va1); ++ vdata2 = vec_xor(v2, va2); ++ vdata3 = vec_xor(v3, va3); ++ vdata4 = vec_xor(v4, va4); ++ vdata5 = vec_xor(v5, va5); ++ vdata6 = vec_xor(v6, va6); ++ vdata7 = vec_xor(v7, va7); ++ ++ /* Check if we have more blocks to process */ ++ next_block = 0; ++ if (length != 0) { ++ next_block = 1; ++ ++ /* zero v0-v7 */ ++ v0 = vec_xor(v0, v0); ++ v1 = vec_xor(v1, v1); ++ v2 = vec_xor(v2, v2); ++ v3 = vec_xor(v3, v3); ++ v4 = vec_xor(v4, v4); ++ v5 = vec_xor(v5, v5); ++ v6 = vec_xor(v6, v6); ++ v7 = vec_xor(v7, v7); ++ } ++ length = length + 128; ++ ++ } while (next_block); ++ ++ /* Calculate how many bytes we have left. */ ++ length = (len & 127); ++ ++ /* Calculate where in (short) constant table we need to start. */ ++ offset = 128 - length; ++ ++ v0 = vec_ld(offset, vcrc_short_const); ++ v1 = vec_ld(offset + 16, vcrc_short_const); ++ v2 = vec_ld(offset + 32, vcrc_short_const); ++ v3 = vec_ld(offset + 48, vcrc_short_const); ++ v4 = vec_ld(offset + 64, vcrc_short_const); ++ v5 = vec_ld(offset + 80, vcrc_short_const); ++ v6 = vec_ld(offset + 96, vcrc_short_const); ++ v7 = vec_ld(offset + 112, vcrc_short_const); ++ ++ offset += 128; ++ ++ v0 = (__vector unsigned long long)__builtin_crypto_vpmsumw( ++ (__vector unsigned int)vdata0, (__vector unsigned int)v0); ++ v1 = (__vector unsigned long long)__builtin_crypto_vpmsumw( ++ (__vector unsigned int)vdata1, (__vector unsigned int)v1); ++ v2 = (__vector unsigned long long)__builtin_crypto_vpmsumw( ++ (__vector unsigned int)vdata2, (__vector unsigned int)v2); ++ v3 = (__vector unsigned long long)__builtin_crypto_vpmsumw( ++ (__vector unsigned int)vdata3, (__vector unsigned int)v3); ++ v4 = (__vector unsigned long long)__builtin_crypto_vpmsumw( ++ (__vector unsigned int)vdata4, (__vector unsigned int)v4); ++ v5 = (__vector unsigned long long)__builtin_crypto_vpmsumw( ++ (__vector unsigned int)vdata5, (__vector unsigned int)v5); ++ v6 = (__vector unsigned long long)__builtin_crypto_vpmsumw( ++ (__vector unsigned int)vdata6, (__vector unsigned int)v6); ++ v7 = (__vector unsigned long long)__builtin_crypto_vpmsumw( ++ (__vector unsigned int)vdata7, (__vector unsigned int)v7); ++ ++ /* Now reduce the tail (0-112 bytes). */ ++ for (i = 0; i < length; i+=16) { ++ vdata0 = vec_ld(i,(__vector unsigned long long*)p); ++ VEC_PERM(vdata0, vdata0, vdata0, vperm_const); ++ va0 = vec_ld(offset + i,vcrc_short_const); ++ va0 = (__vector unsigned long long)__builtin_crypto_vpmsumw( ++ (__vector unsigned int)vdata0, (__vector unsigned int)va0); ++ v0 = vec_xor(v0, va0); ++ } ++ ++ /* xor all parallel chunks together. */ ++ v0 = vec_xor(v0, v1); ++ v2 = vec_xor(v2, v3); ++ v4 = vec_xor(v4, v5); ++ v6 = vec_xor(v6, v7); ++ ++ v0 = vec_xor(v0, v2); ++ v4 = vec_xor(v4, v6); ++ ++ v0 = vec_xor(v0, v4); ++ } ++ ++ /* Barrett Reduction */ ++ vconst1 = vec_ld(0, v_Barrett_const); ++ vconst2 = vec_ld(16, v_Barrett_const); ++ ++ v1 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, ++ (__vector unsigned char)v0, 8); ++ v0 = vec_xor(v1,v0); ++ ++ /* shift left one bit */ ++ __vector unsigned char vsht_splat = vec_splat_u8 (1); ++ v0 = (__vector unsigned long long)vec_sll((__vector unsigned char)v0, vsht_splat); ++ ++ v0 = vec_and(v0, vmask_64bit); ++ ++ /* ++ * The reflected version of Barrett reduction. Instead of bit ++ * reflecting our data (which is expensive to do), we bit reflect our ++ * constants and our algorithm, which means the intermediate data in ++ * our vector registers goes from 0-63 instead of 63-0. We can reflect ++ * the algorithm because we don't carry in mod 2 arithmetic. ++ */ ++ ++ /* bottom 32 bits of a */ ++ v1 = vec_and(v0, vmask_32bit); ++ ++ /* ma */ ++ v1 = __builtin_crypto_vpmsumd((__vector unsigned long long)v1, ++ (__vector unsigned long long)vconst1); ++ ++ /* bottom 32bits of ma */ ++ v1 = vec_and(v1, vmask_32bit); ++ /* qn */ ++ v1 = __builtin_crypto_vpmsumd((__vector unsigned long long)v1, ++ (__vector unsigned long long)vconst2); ++ /* a - qn, subtraction is xor in GF(2) */ ++ v0 = vec_xor (v0, v1); ++ ++ /* ++ * Since we are bit reflected, the result (ie the low 32 bits) is in ++ * the high 32 bits. We just need to shift it left 4 bytes ++ * V0 [ 0 1 X 3 ] ++ * V0 [ 0 X 2 3 ] ++ */ ++ ++ /* shift result into top 64 bits of */ ++ v0 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, ++ (__vector unsigned char)vzero, 4); ++ ++#if BYTE_ORDER == BIG_ENDIAN ++ return v0[0]; ++#else ++ return v0[1]; ++#endif ++} +diff --git a/arch/power/fallback_builtins.h b/arch/power/fallback_builtins.h +new file mode 100644 +index 0000000..ed95846 +--- /dev/null ++++ b/arch/power/fallback_builtins.h +@@ -0,0 +1,31 @@ ++/* Helper functions to work around issues with clang builtins ++ * Copyright (C) 2021 IBM Corporation ++ * ++ * Authors: ++ * Daniel Black ++ * Rogerio Alves ++ * Tulio Magno Quites Machado Filho ++ * ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifndef POWER_BUILTINS_H ++#define POWER_BUILTINS_H ++ ++/* ++ * These stubs fix clang incompatibilities with GCC builtins. ++ */ ++ ++#ifndef __builtin_crypto_vpmsumw ++#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb ++#endif ++#ifndef __builtin_crypto_vpmsumd ++#define __builtin_crypto_vpmsumd __builtin_crypto_vpmsumb ++#endif ++ ++static inline __vector unsigned long long __attribute__((overloadable)) ++vec_ld(int __a, const __vector unsigned long long* __b) { ++ return (__vector unsigned long long)__builtin_altivec_lvx(__a, __b); ++} ++ ++#endif +diff --git a/arch/power/power_features.c b/arch/power/power_features.c +new file mode 100644 +index 0000000..f735037 +--- /dev/null ++++ b/arch/power/power_features.c +@@ -0,0 +1,46 @@ ++/* power_features.c - POWER feature check ++ * Copyright (C) 2020 Matheus Castanho , IBM ++ * Copyright (C) 2021-2022 Mika T. Lindqvist ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifdef HAVE_SYS_AUXV_H ++# include ++#endif ++#ifdef __FreeBSD__ ++# include ++#endif ++#include "../../zbuild.h" ++#include "power_features.h" ++ ++void Z_INTERNAL power_check_features(struct power_cpu_features *features) { ++#ifdef PPC_FEATURES ++ unsigned long hwcap; ++#ifdef __FreeBSD__ ++ elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap)); ++#else ++ hwcap = getauxval(AT_HWCAP); ++#endif ++ ++ if (hwcap & PPC_FEATURE_HAS_ALTIVEC) ++ features->has_altivec = 1; ++#endif ++ ++#ifdef POWER_FEATURES ++ unsigned long hwcap2; ++#ifdef __FreeBSD__ ++ elf_aux_info(AT_HWCAP2, &hwcap2, sizeof(hwcap2)); ++#else ++ hwcap2 = getauxval(AT_HWCAP2); ++#endif ++ ++#ifdef POWER8_VSX ++ if (hwcap2 & PPC_FEATURE2_ARCH_2_07) ++ features->has_arch_2_07 = 1; ++#endif ++#ifdef POWER9 ++ if (hwcap2 & PPC_FEATURE2_ARCH_3_00) ++ features->has_arch_3_00 = 1; ++#endif ++#endif ++} +diff --git a/arch/power/power_features.h b/arch/power/power_features.h +new file mode 100644 +index 0000000..9252364 +--- /dev/null ++++ b/arch/power/power_features.h +@@ -0,0 +1,18 @@ ++/* power_features.h -- check for POWER CPU features ++ * Copyright (C) 2020 Matheus Castanho , IBM ++ * Copyright (C) 2021 Mika T. Lindqvist ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifndef POWER_H_ ++#define POWER_H_ ++ ++struct power_cpu_features { ++ int has_altivec; ++ int has_arch_2_07; ++ int has_arch_3_00; ++}; ++ ++void Z_INTERNAL power_check_features(struct power_cpu_features *features); ++ ++#endif /* POWER_H_ */ +diff --git a/arch/power/slide_hash_power8.c b/arch/power/slide_hash_power8.c +index b1e30ce..d01e0ac 100644 +--- a/arch/power/slide_hash_power8.c ++++ b/arch/power/slide_hash_power8.c +@@ -4,57 +4,9 @@ + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +-#ifdef POWER8_VSX_SLIDEHASH ++#ifdef POWER8_VSX + +-#include +-#include "zbuild.h" +-#include "deflate.h" ++#define SLIDE_PPC slide_hash_power8 ++#include "slide_ppc_tpl.h" + +-static inline void slide_hash_power8_loop(deflate_state *s, unsigned n_elems, Pos *table_end) { +- vector unsigned short vw, vm, *vp; +- unsigned chunks; +- +- /* Each vector register (chunk) corresponds to 128 bits == 8 Posf, +- * so instead of processing each of the n_elems in the hash table +- * individually, we can do it in chunks of 8 with vector instructions. +- * +- * This function is only called from slide_hash_power8(), and both calls +- * pass n_elems as a power of 2 higher than 2^7, as defined by +- * deflateInit2_(), so n_elems will always be a multiple of 8. */ +- chunks = n_elems >> 3; +- Assert(n_elems % 8 == 0, "Weird hash table size!"); +- +- /* This type casting is safe since s->w_size is always <= 64KB +- * as defined by deflateInit2_() and Posf == unsigned short */ +- vw[0] = (Pos) s->w_size; +- vw = vec_splat(vw,0); +- +- vp = (vector unsigned short *) table_end; +- +- do { +- /* Processing 8 elements at a time */ +- vp--; +- vm = *vp; +- +- /* This is equivalent to: m >= w_size ? m - w_size : 0 +- * Since we are using a saturated unsigned subtraction, any +- * values that are > w_size will be set to 0, while the others +- * will be subtracted by w_size. */ +- *vp = vec_subs(vm,vw); +- } while (--chunks); +-} +- +-void Z_INTERNAL slide_hash_power8(deflate_state *s) { +- unsigned int n; +- Pos *p; +- +- n = HASH_SIZE; +- p = &s->head[n]; +- slide_hash_power8_loop(s,n,p); +- +- n = s->w_size; +- p = &s->prev[n]; +- slide_hash_power8_loop(s,n,p); +-} +- +-#endif /* POWER8_VSX_SLIDEHASH */ ++#endif /* POWER8_VSX */ +diff --git a/arch/power/slide_hash_vmx.c b/arch/power/slide_hash_vmx.c +new file mode 100644 +index 0000000..5a87ef7 +--- /dev/null ++++ b/arch/power/slide_hash_vmx.c +@@ -0,0 +1,10 @@ ++/* Optimized slide_hash for PowerPC processors with VMX instructions ++ * Copyright (C) 2017-2021 Mika T. Lindqvist ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++#ifdef PPC_VMX ++ ++#define SLIDE_PPC slide_hash_vmx ++#include "slide_ppc_tpl.h" ++ ++#endif /* PPC_VMX */ +diff --git a/arch/power/slide_ppc_tpl.h b/arch/power/slide_ppc_tpl.h +new file mode 100644 +index 0000000..5c17e38 +--- /dev/null ++++ b/arch/power/slide_ppc_tpl.h +@@ -0,0 +1,31 @@ ++/* Optimized slide_hash for PowerPC processors ++ * Copyright (C) 2017-2021 Mika T. Lindqvist ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include ++#include "zbuild.h" ++#include "deflate.h" ++ ++static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { ++ const vector unsigned short vmx_wsize = vec_splats(wsize); ++ Pos *p = table; ++ ++ do { ++ vector unsigned short value, result; ++ ++ value = vec_ld(0, p); ++ result = vec_subs(value, vmx_wsize); ++ vec_st(result, 0, p); ++ ++ p += 8; ++ entries -= 8; ++ } while (entries > 0); ++} ++ ++void Z_INTERNAL SLIDE_PPC(deflate_state *s) { ++ uint16_t wsize = s->w_size; ++ ++ slide_hash_chain(s->head, HASH_SIZE, wsize); ++ slide_hash_chain(s->prev, wsize, wsize); ++} +diff --git a/arch/riscv/README.md b/arch/riscv/README.md +new file mode 100644 +index 0000000..013095c +--- /dev/null ++++ b/arch/riscv/README.md +@@ -0,0 +1,45 @@ ++# Building RISC-V Target with Cmake # ++ ++> **Warning** ++> Runtime rvv detection (using `hwcap`) requires linux kernel 6.5 or newer. ++> ++> When running on older kernels, we fall back to compile-time detection, potentially this can cause crashes if rvv is enabled at compile but not supported by the target cpu. ++> Therefore if older kernel support is needed, rvv should be disabled if the target cpu does not support it. ++## Prerequisite: Build RISC-V Clang Toolchain and QEMU ## ++ ++If you don't have prebuilt clang and riscv64 qemu, you can refer to the [script](https://github.com/sifive/prepare-riscv-toolchain-qemu/blob/main/prepare_riscv_toolchain_qemu.sh) to get the source. Copy the script to the zlib-ng root directory, and run it to download the source and build them. Modify the content according to your conditions (e.g., toolchain version). ++ ++```bash ++./prepare_riscv_toolchain_qemu.sh ++``` ++ ++After running script, clang & qemu are built in `build-toolchain-qemu/riscv-clang/` & `build-toolchain-qemu/riscv-qemu/`. ++ ++`build-toolchain-qemu/riscv-clang/` is your `TOOLCHAIN_PATH`. ++`build-toolchain-qemu/riscv-qemu/bin/qemu-riscv64` is your `QEMU_PATH`. ++ ++You can also download the prebuilt toolchain & qemu from [the release page](https://github.com/sifive/prepare-riscv-toolchain-qemu/releases), and enjoy using them. ++ ++## Cross-Compile for RISC-V Target ## ++ ++```bash ++cmake -G Ninja -B ./build-riscv \ ++ -D CMAKE_TOOLCHAIN_FILE=./cmake/toolchain-riscv.cmake \ ++ -D CMAKE_INSTALL_PREFIX=./build-riscv/install \ ++ -D TOOLCHAIN_PATH={TOOLCHAIN_PATH} \ ++ -D QEMU_PATH={QEMU_PATH} \ ++ . ++ ++cmake --build ./build-riscv ++``` ++ ++Disable the option if there is no RVV support: ++``` ++-D WITH_RVV=OFF ++``` ++ ++## Run Unittests on User Mode QEMU ## ++ ++```bash ++cd ./build-riscv && ctest --verbose ++``` +diff --git a/arch/riscv/adler32_rvv.c b/arch/riscv/adler32_rvv.c +new file mode 100644 +index 0000000..92502b3 +--- /dev/null ++++ b/arch/riscv/adler32_rvv.c +@@ -0,0 +1,155 @@ ++/* adler32_rvv.c - RVV version of adler32 ++ * Copyright (C) 2023 SiFive, Inc. All rights reserved. ++ * Contributed by Alex Chiang ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++#ifdef RISCV_RVV ++ ++#include ++#include ++ ++#include "../../zbuild.h" ++#include "../../adler32_p.h" ++ ++// #include ++// #define p(sign, wide, type, x, vl) \ ++// { \ ++// sign##nt##wide##_t show_##type_##x[vl]; \ ++// __riscv_vse##wide##_v_##type(show_##type_##x, x, vl); \ ++// for (int i = 0; i < vl; ++i) \ ++// { \ ++// fprintf(stderr, "%d ", show_##type_##x[i]); \ ++// } \ ++// fprintf(stderr, "\n"); \ ++// } ++ ++static inline uint32_t adler32_rvv_impl(uint32_t adler, uint8_t* restrict dst, const uint8_t *src, size_t len, int COPY) { ++ // fprintf(stderr, "%ld ", len); ++ ++ /* split Adler-32 into component sums */ ++ uint32_t sum2 = (adler >> 16) & 0xffff; ++ adler &= 0xffff; ++ ++ /* in case user likes doing a byte at a time, keep it fast */ ++ if (len == 1) { ++ if (COPY) memcpy(dst, src, 1); ++ return adler32_len_1(adler, src, sum2); ++ } ++ ++ /* initial Adler-32 value (deferred check for len == 1 speed) */ ++ if (src == NULL) ++ return 1L; ++ ++ /* in case short lengths are provided, keep it somewhat fast */ ++ if (len < 16) { ++ if (COPY) memcpy(dst, src, len); ++ return adler32_len_16(adler, src, len, sum2); ++ } ++ ++ size_t left = len; ++ size_t vl = __riscv_vsetvlmax_e8m1(); ++ vl = vl > 256 ? 256 : vl; ++ vuint32m4_t v_buf32_accu = __riscv_vmv_v_x_u32m4(0, vl); /* A 各元素之和 */ ++ vuint32m4_t v_adler32_prev_accu = __riscv_vmv_v_x_u32m4(0, vl); /* B 求和之和 */ ++ vuint16m2_t v_buf16_accu; ++ ++ /* ++ * We accumulate 8-bit data, and to prevent overflow, we have to use a 32-bit accumulator. ++ * However, adding 8-bit data into a 32-bit accumulator isn't efficient. We use 16-bit & 32-bit ++ * accumulators to boost performance. ++ * ++ * The block_size is the largest multiple of vl that <= 256, because overflow would occur when ++ * vl > 256 (255 * 256 <= UINT16_MAX). ++ * ++ * We accumulate 8-bit data into a 16-bit accumulator and then ++ * move the data into the 32-bit accumulator at the last iteration. ++ */ ++ size_t block_size = (256 / vl) * vl; /* 256退化成N*vl N为整数 */ ++ size_t nmax_limit = (NMAX / block_size); ++ size_t cnt = 0; ++ ++ /* 使用block累加8bit->16bit,以免每次累加8bit->32bit */ ++ while (left >= block_size) { ++ v_buf16_accu = __riscv_vmv_v_x_u16m2(0, vl); ++ size_t subprob = block_size; ++ while (subprob > 0) { ++ vuint8m1_t v_buf8 = __riscv_vle8_v_u8m1(src, vl); ++ if (COPY) __riscv_vse8_v_u8m1(dst, v_buf8, vl); ++ v_adler32_prev_accu = __riscv_vwaddu_wv_u32m4(v_adler32_prev_accu, v_buf16_accu, vl); ++ v_buf16_accu = __riscv_vwaddu_wv_u16m2(v_buf16_accu, v_buf8, vl); ++ src += vl; ++ if (COPY) dst += vl; ++ subprob -= vl; ++ } ++ v_adler32_prev_accu = __riscv_vmacc_vx_u32m4(v_adler32_prev_accu, block_size / vl, v_buf32_accu, vl); // [溢出1] v_buf32_accu在累加过程中不取余时,恰逢cnt=0,v_adler32_prev_accu将累加多次(22)大额的v_buf32_accu,(2^21.5/32*255 * 8) * 22 = 2^32 ++ v_adler32_prev_accu = __riscv_vremu_vx_u32m4(v_adler32_prev_accu, BASE, vl); // [防溢出1] 频繁取余 (2^26/32*255 * 8) = 2^32 ++ v_buf32_accu = __riscv_vwaddu_wv_u32m4(v_buf32_accu, v_buf16_accu, vl); // [溢出2(未处理)] 2^29/32*255 = 2^32 ++ left -= block_size; ++ } ++ /* the left len <= 256 now, we can use 16-bit accum safely */ ++ v_buf16_accu = __riscv_vmv_v_x_u16m2(0, vl); ++ size_t res = left; ++ while (left >= vl) { ++ vuint8m1_t v_buf8 = __riscv_vle8_v_u8m1(src, vl); ++ if (COPY) __riscv_vse8_v_u8m1(dst, v_buf8, vl); ++ v_adler32_prev_accu = __riscv_vwaddu_wv_u32m4(v_adler32_prev_accu, v_buf16_accu, vl); ++ v_buf16_accu = __riscv_vwaddu_wv_u16m2(v_buf16_accu, v_buf8, vl); ++ src += vl; ++ if (COPY) dst += vl; ++ left -= vl; ++ } ++ v_adler32_prev_accu = __riscv_vmacc_vx_u32m4(v_adler32_prev_accu, res / vl, v_buf32_accu, vl); ++ v_adler32_prev_accu = __riscv_vremu_vx_u32m4(v_adler32_prev_accu, BASE, vl); ++ v_buf32_accu = __riscv_vwaddu_wv_u32m4(v_buf32_accu, v_buf16_accu, vl); ++ ++ /* 2^23 */ ++ // if (len > 1000000){ ++ // fprintf(stderr, "[%d]\n", len); ++ // p(ui, 32, u32m4, v_buf32_accu, vl); ++ // } ++ ++ /* B */ ++ vuint32m4_t v_seq = __riscv_vid_v_u32m4(vl); ++ vuint32m4_t v_rev_seq = __riscv_vrsub_vx_u32m4(v_seq, vl, vl); ++ ++ v_buf32_accu = __riscv_vremu_vx_u32m4(v_buf32_accu, BASE, vl); // [防溢出3/6] 取余 (3)(A x N) % b = [(A % b) x N] % b ++ vuint32m4_t v_sum32_accu = __riscv_vmul_vv_u32m4(v_buf32_accu, v_rev_seq, vl); // [溢出3] 每个元素只加了一次,n号理论上应该vl-n次; (1) (2^24/32*255) *32 = 2^32 (2) (2^20/32*255) * (1+32)*32/2(和) = 2^32 ++ ++ v_sum32_accu = __riscv_vadd_vv_u32m4(v_sum32_accu, __riscv_vmul_vx_u32m4(v_adler32_prev_accu, vl, vl), vl); ++ ++ vuint32m1_t v_sum2_sum = __riscv_vmv_s_x_u32m1(0, vl); ++ ++ v_sum32_accu = __riscv_vremu_vx_u32m4(v_sum32_accu, BASE, vl); // [防溢出4] ++ v_sum2_sum = __riscv_vredsum_vs_u32m4_u32m1(v_sum32_accu, v_sum2_sum, vl); // [溢出4] (2^15/32*255) * (1+32)*32/2(和) *32 = 2^32 ++ uint32_t sum2_sum = __riscv_vmv_x_s_u32m1_u32(v_sum2_sum); ++ ++ sum2 += (sum2_sum + (adler * (len - left)) % BASE); // [防溢出5] [溢出5] 加上之前没加的adler原值; adler * (len - left)可能溢出,例如58334*76032 ++ ++ /* A求和,并添加adler */ ++ vuint32m1_t v_adler_sum = __riscv_vmv_s_x_u32m1(0, vl); ++ v_adler_sum = __riscv_vredsum_vs_u32m4_u32m1(v_buf32_accu, v_adler_sum, vl); // [溢出6] (2^24/32*255) *32 = 2^32 ++ uint32_t adler_sum = __riscv_vmv_x_s_u32m1_u32(v_adler_sum); ++ ++ adler += adler_sum; ++ ++ while (left--) { ++ if (COPY) *dst++ = *src; ++ adler += *src++; ++ sum2 += adler; ++ } ++ ++ sum2 %= BASE; ++ adler %= BASE; ++ ++ return adler | (sum2 << 16); ++} ++ ++Z_INTERNAL uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) { ++ return adler32_rvv_impl(adler, dst, src, len, 1); ++} ++ ++Z_INTERNAL uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len) { ++ return adler32_rvv_impl(adler, NULL, buf, len, 0); ++} ++ ++#endif // RISCV_RVV +diff --git a/arch/riscv/chunkset_rvv.c b/arch/riscv/chunkset_rvv.c +new file mode 100644 +index 0000000..97bc79b +--- /dev/null ++++ b/arch/riscv/chunkset_rvv.c +@@ -0,0 +1,125 @@ ++/* chunkset_rvv.c - RVV version of chunkset ++ * Copyright (C) 2023 SiFive, Inc. All rights reserved. ++ * Contributed by Alex Chiang ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++#include ++#include "zbuild.h" ++ ++/* ++ * RISC-V glibc would enable RVV optimized memcpy at runtime by IFUNC, ++ * so we prefer using large size chunk and copy memory as much as possible. ++ */ ++#define CHUNK_SIZE 32 ++ ++#define HAVE_CHUNKMEMSET_2 ++#define HAVE_CHUNKMEMSET_4 ++#define HAVE_CHUNKMEMSET_8 ++ ++#define CHUNK_MEMSET_RVV_IMPL(elen) \ ++do { \ ++ size_t vl, len = CHUNK_SIZE / sizeof(uint##elen##_t); \ ++ uint##elen##_t val = *(uint##elen##_t*)from; \ ++ uint##elen##_t* chunk_p = (uint##elen##_t*)chunk; \ ++ do { \ ++ vl = __riscv_vsetvl_e##elen##m4(len); \ ++ vuint##elen##m4_t v_val = __riscv_vmv_v_x_u##elen##m4(val, vl); \ ++ __riscv_vse##elen##_v_u##elen##m4(chunk_p, v_val, vl); \ ++ len -= vl; chunk_p += vl; \ ++ } while (len > 0); \ ++} while (0) ++ ++/* We don't have a 32-byte datatype for RISC-V arch. */ ++typedef struct chunk_s { ++ uint64_t data[4]; ++} chunk_t; ++ ++static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { ++ CHUNK_MEMSET_RVV_IMPL(16); ++} ++ ++static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { ++ CHUNK_MEMSET_RVV_IMPL(32); ++} ++ ++static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { ++ CHUNK_MEMSET_RVV_IMPL(64); ++} ++ ++static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { ++ memcpy(chunk->data, (uint8_t *)s, CHUNK_SIZE); ++} ++ ++static inline void storechunk(uint8_t *out, chunk_t *chunk) { ++ memcpy(out, chunk->data, CHUNK_SIZE); ++} ++ ++#define CHUNKSIZE chunksize_rvv ++#define CHUNKCOPY chunkcopy_rvv ++#define CHUNKUNROLL chunkunroll_rvv ++#define CHUNKMEMSET chunkmemset_rvv ++#define CHUNKMEMSET_SAFE chunkmemset_safe_rvv ++ ++#define HAVE_CHUNKCOPY ++ ++/* ++ * Assuming that the length is non-zero, and that `from` lags `out` by at least ++ * sizeof chunk_t bytes, please see the comments in chunkset_tpl.h. ++ * ++ * We load/store a single chunk once in the `CHUNKCOPY`. ++ * However, RISC-V glibc would enable RVV optimized memcpy at runtime by IFUNC, ++ * such that, we prefer copy large memory size once to make good use of the the RVV advance. ++ * ++ * To be aligned to the other platforms, we didn't modify `CHUNKCOPY` method a lot, ++ * but we still copy as much memory as possible for some conditions. ++ * ++ * case 1: out - from >= len (no overlap) ++ * We can use memcpy to copy `len` size once ++ * because the memory layout would be the same. ++ * ++ * case 2: overlap ++ * We copy N chunks using memcpy at once, aiming to achieve our goal: ++ * to copy as much memory as possible. ++ * ++ * After using a single memcpy to copy N chunks, we have to use series of ++ * loadchunk and storechunk to ensure the result is correct. ++ */ ++ ++static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { ++ Assert(len > 0, "chunkcopy should never have a length 0"); ++ chunk_t chunk; ++ int32_t align = ((len - 1) % sizeof(chunk_t)) + 1; ++ memcpy(&chunk, (uint8_t *)from, sizeof(chunk_t)); ++ memcpy(out, &chunk, sizeof(chunk_t)); ++ out += align; ++ from += align; ++ len -= align; ++ ptrdiff_t dist = out - from; ++ if (dist >= len) { ++ memcpy(out, from, len); ++ out += len; ++ from += len; ++ return out; ++ } ++ if (dist >= sizeof(chunk_t)) { ++ dist = (dist / sizeof(chunk_t)) * sizeof(chunk_t); ++ memcpy(out, from, dist); ++ out += dist; ++ from += dist; ++ len -= dist; ++ } ++ while (len > 0) { ++ memcpy(&chunk, (uint8_t *)from, sizeof(chunk_t)); ++ memcpy(out, &chunk, sizeof(chunk_t)); ++ out += sizeof(chunk_t); ++ from += sizeof(chunk_t); ++ len -= sizeof(chunk_t); ++ } ++ return out; ++} ++ ++#include "chunkset_tpl.h" ++ ++#define INFLATE_FAST inflate_fast_rvv ++ ++#include "inffast_tpl.h" +diff --git a/arch/riscv/compare256_rvv.c b/arch/riscv/compare256_rvv.c +new file mode 100644 +index 0000000..0fd6082 +--- /dev/null ++++ b/arch/riscv/compare256_rvv.c +@@ -0,0 +1,47 @@ ++/* compare256_rvv.c - RVV version of compare256 ++ * Copyright (C) 2023 SiFive, Inc. All rights reserved. ++ * Contributed by Alex Chiang ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifdef RISCV_RVV ++ ++#include "../../zbuild.h" ++#include "fallback_builtins.h" ++ ++#include ++ ++static inline uint32_t compare256_rvv_static(const uint8_t *src0, const uint8_t *src1) { ++ uint32_t len = 0; ++ size_t vl; ++ long found_diff; ++ do { ++ vl = __riscv_vsetvl_e8m4(256 - len); ++ vuint8m4_t v_src0 = __riscv_vle8_v_u8m4(src0, vl); ++ vuint8m4_t v_src1 = __riscv_vle8_v_u8m4(src1, vl); ++ vbool2_t v_mask = __riscv_vmsne_vv_u8m4_b2(v_src0, v_src1, vl); ++ found_diff = __riscv_vfirst_m_b2(v_mask, vl); ++ if (found_diff >= 0) ++ return len + (uint32_t)found_diff; ++ src0 += vl, src1 += vl, len += vl; ++ } while (len < 256); ++ ++ return 256; ++} ++ ++Z_INTERNAL uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1) { ++ return compare256_rvv_static(src0, src1); ++} ++ ++#define LONGEST_MATCH longest_match_rvv ++#define COMPARE256 compare256_rvv_static ++ ++#include "match_tpl.h" ++ ++#define LONGEST_MATCH_SLOW ++#define LONGEST_MATCH longest_match_slow_rvv ++#define COMPARE256 compare256_rvv_static ++ ++#include "match_tpl.h" ++ ++#endif // RISCV_RVV +diff --git a/arch/riscv/riscv_features.c b/arch/riscv/riscv_features.c +new file mode 100644 +index 0000000..b066f42 +--- /dev/null ++++ b/arch/riscv/riscv_features.c +@@ -0,0 +1,45 @@ ++#include ++#include ++#include ++#include ++#include ++ ++#include "../../zbuild.h" ++#include "riscv_features.h" ++ ++#define ISA_V_HWCAP (1 << ('v' - 'a')) ++ ++int Z_INTERNAL is_kernel_version_greater_or_equal_to_6_5() { ++ struct utsname buffer; ++ uname(&buffer); ++ ++ int major, minor; ++ if (sscanf(buffer.release, "%d.%d", &major, &minor) != 2) { ++ // Something bad with uname() ++ return 0; ++ } ++ ++ if (major > 6 || major == 6 && minor >= 5) ++ return 1; ++ return 0; ++} ++ ++void Z_INTERNAL riscv_check_features_compile_time(struct riscv_cpu_features *features) { ++#if defined(__riscv_v) && defined(__linux__) ++ features->has_rvv = 1; ++#else ++ features->has_rvv = 0; ++#endif ++} ++ ++void Z_INTERNAL riscv_check_features_runtime(struct riscv_cpu_features *features) { ++ unsigned long hw_cap = getauxval(AT_HWCAP); ++ features->has_rvv = hw_cap & ISA_V_HWCAP; ++} ++ ++void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features) { ++ if (is_kernel_version_greater_or_equal_to_6_5()) ++ riscv_check_features_runtime(features); ++ else ++ riscv_check_features_compile_time(features); ++} +diff --git a/arch/riscv/riscv_features.h b/arch/riscv/riscv_features.h +new file mode 100644 +index 0000000..c76e967 +--- /dev/null ++++ b/arch/riscv/riscv_features.h +@@ -0,0 +1,18 @@ ++/* riscv_features.h -- check for riscv features. ++ * ++ * Copyright (C) 2023 SiFive, Inc. All rights reserved. ++ * Contributed by Alex Chiang ++ * ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifndef RISCV_H_ ++#define RISCV_H_ ++ ++struct riscv_cpu_features { ++ int has_rvv; ++}; ++ ++void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features); ++ ++#endif /* RISCV_H_ */ +diff --git a/arch/riscv/slide_hash_rvv.c b/arch/riscv/slide_hash_rvv.c +new file mode 100644 +index 0000000..1164e89 +--- /dev/null ++++ b/arch/riscv/slide_hash_rvv.c +@@ -0,0 +1,34 @@ ++/* slide_hash_rvv.c - RVV version of slide_hash ++ * Copyright (C) 2023 SiFive, Inc. All rights reserved. ++ * Contributed by Alex Chiang ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifdef RISCV_RVV ++ ++#include ++ ++#include "../../zbuild.h" ++#include "../../deflate.h" ++ ++static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { ++ size_t vl; ++ while (entries > 0) { ++ vl = __riscv_vsetvl_e16m4(entries); ++ vuint16m4_t v_tab = __riscv_vle16_v_u16m4(table, vl); ++ vuint16m4_t v_diff = __riscv_vsub_vx_u16m4(v_tab, wsize, vl); ++ vbool4_t mask = __riscv_vmsltu_vx_u16m4_b4(v_tab, wsize, vl); ++ v_tab = __riscv_vmerge_vxm_u16m4(v_diff, 0, mask, vl); ++ __riscv_vse16_v_u16m4(table, v_tab, vl); ++ table += vl, entries -= vl; ++ } ++} ++ ++Z_INTERNAL void slide_hash_rvv(deflate_state *s) { ++ uint16_t wsize = (uint16_t)s->w_size; ++ ++ slide_hash_chain(s->head, HASH_SIZE, wsize); ++ slide_hash_chain(s->prev, wsize, wsize); ++} ++ ++#endif // RISCV_RVV +diff --git a/arch/s390/Makefile.in b/arch/s390/Makefile.in +index 2652fe6..6b4fba7 100644 +--- a/arch/s390/Makefile.in ++++ b/arch/s390/Makefile.in +@@ -7,11 +7,19 @@ CFLAGS= + SFLAGS= + INCLUDES= + SUFFIX= ++VGFMAFLAG= ++NOLTOFLAG= + + SRCDIR=. + SRCTOP=../.. + TOPDIR=$(SRCTOP) + ++s390_features.o: ++ $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/s390_features.c ++ ++s390_features.lo: ++ $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/s390_features.c ++ + dfltcc_common.o: + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_common.c + +@@ -30,11 +38,17 @@ dfltcc_inflate.o: + dfltcc_inflate.lo: + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_inflate.c + ++crc32-vx.o: ++ $(CC) $(CFLAGS) $(VGFMAFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32-vx.c ++ ++crc32-vx.lo: ++ $(CC) $(SFLAGS) $(VGFMAFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32-vx.c ++ + mostlyclean: clean + clean: + rm -f *.o *.lo *~ + rm -rf objs + rm -f *.gcda *.gcno *.gcov + +-distclean: ++distclean: clean + rm -f Makefile +diff --git a/arch/s390/README.md b/arch/s390/README.md +index 90066f0..2c31654 100644 +--- a/arch/s390/README.md ++++ b/arch/s390/README.md +@@ -61,16 +61,17 @@ integrated with the rest of zlib-ng using hook macros. + ## Hook macros + + DFLTCC takes as arguments a parameter block, an input buffer, an output +-buffer and a window. `ZALLOC_STATE()`, `ZFREE_STATE()`, `ZCOPY_STATE()`, +-`ZALLOC_WINDOW()` and `TRY_FREE_WINDOW()` macros encapsulate allocation +-details for the parameter block (which is allocated alongside zlib-ng +-state) and the window (which must be page-aligned). +- +-While inflate software and hardware window formats match, this is not +-the case for deflate. Therefore, `deflateSetDictionary()` and +-`deflateGetDictionary()` need special handling, which is triggered using +-`DEFLATE_SET_DICTIONARY_HOOK()` and `DEFLATE_GET_DICTIONARY_HOOK()` +-macros. ++buffer and a window. `ZALLOC_DEFLATE_STATE()`, `ZALLOC_INFLATE_STATE()`, ++`ZFREE_STATE()`, `ZCOPY_DEFLATE_STATE()`, `ZCOPY_INFLATE_STATE()`, ++`ZALLOC_WINDOW()`, `ZCOPY_WINDOW()` and `TRY_FREE_WINDOW()` macros encapsulate ++allocation details for the parameter block (which is allocated alongside ++zlib-ng state) and the window (which must be page-aligned and large enough). ++ ++Software and hardware window formats do not match, therefore, ++`deflateSetDictionary()`, `deflateGetDictionary()`, `inflateSetDictionary()` ++and `inflateGetDictionary()` need special handling, which is triggered using ++`DEFLATE_SET_DICTIONARY_HOOK()`, `DEFLATE_GET_DICTIONARY_HOOK()`, ++`INFLATE_SET_DICTIONARY_HOOK()` and `INFLATE_GET_DICTIONARY_HOOK()` macros. + + `deflateResetKeep()` and `inflateResetKeep()` update the DFLTCC + parameter block using `DEFLATE_RESET_KEEP_HOOK()` and +diff --git a/arch/s390/crc32-vx.c b/arch/s390/crc32-vx.c +new file mode 100644 +index 0000000..acfa218 +--- /dev/null ++++ b/arch/s390/crc32-vx.c +@@ -0,0 +1,222 @@ ++/* ++ * Hardware-accelerated CRC-32 variants for Linux on z Systems ++ * ++ * Use the z/Architecture Vector Extension Facility to accelerate the ++ * computing of bitreflected CRC-32 checksums. ++ * ++ * This CRC-32 implementation algorithm is bitreflected and processes ++ * the least-significant bit first (Little-Endian). ++ * ++ * This code was originally written by Hendrik Brueckner ++ * for use in the Linux kernel and has been ++ * relicensed under the zlib license. ++ */ ++ ++#include "../../zbuild.h" ++#include "crc32_braid_p.h" ++ ++#include ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++ ++static uint32_t crc32_le_vgfm_16(uint32_t crc, const uint8_t *buf, size_t len) { ++ /* ++ * The CRC-32 constant block contains reduction constants to fold and ++ * process particular chunks of the input data stream in parallel. ++ * ++ * For the CRC-32 variants, the constants are precomputed according to ++ * these definitions: ++ * ++ * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1 ++ * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1 ++ * R3 = [(x128+32 mod P'(x) << 32)]' << 1 ++ * R4 = [(x128-32 mod P'(x) << 32)]' << 1 ++ * R5 = [(x64 mod P'(x) << 32)]' << 1 ++ * R6 = [(x32 mod P'(x) << 32)]' << 1 ++ * ++ * The bitreflected Barret reduction constant, u', is defined as ++ * the bit reversal of floor(x**64 / P(x)). ++ * ++ * where P(x) is the polynomial in the normal domain and the P'(x) is the ++ * polynomial in the reversed (bitreflected) domain. ++ * ++ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: ++ * ++ * P(x) = 0x04C11DB7 ++ * P'(x) = 0xEDB88320 ++ */ ++ const uv16qi perm_le2be = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; /* BE->LE mask */ ++ const uv2di r2r1 = {0x1C6E41596, 0x154442BD4}; /* R2, R1 */ ++ const uv2di r4r3 = {0x0CCAA009E, 0x1751997D0}; /* R4, R3 */ ++ const uv2di r5 = {0, 0x163CD6124}; /* R5 */ ++ const uv2di ru_poly = {0, 0x1F7011641}; /* u' */ ++ const uv2di crc_poly = {0, 0x1DB710641}; /* P'(x) << 1 */ ++ ++ /* ++ * Load the initial CRC value. ++ * ++ * The CRC value is loaded into the rightmost word of the ++ * vector register and is later XORed with the LSB portion ++ * of the loaded input data. ++ */ ++ uv2di v0 = {0, 0}; ++ v0 = (uv2di)vec_insert(crc, (uv4si)v0, 3); ++ ++ /* Load a 64-byte data chunk and XOR with CRC */ ++ uv2di v1 = vec_perm(((uv2di *)buf)[0], ((uv2di *)buf)[0], perm_le2be); ++ uv2di v2 = vec_perm(((uv2di *)buf)[1], ((uv2di *)buf)[1], perm_le2be); ++ uv2di v3 = vec_perm(((uv2di *)buf)[2], ((uv2di *)buf)[2], perm_le2be); ++ uv2di v4 = vec_perm(((uv2di *)buf)[3], ((uv2di *)buf)[3], perm_le2be); ++ ++ v1 ^= v0; ++ buf += 64; ++ len -= 64; ++ ++ while (len >= 64) { ++ /* Load the next 64-byte data chunk */ ++ uv16qi part1 = vec_perm(((uv16qi *)buf)[0], ((uv16qi *)buf)[0], perm_le2be); ++ uv16qi part2 = vec_perm(((uv16qi *)buf)[1], ((uv16qi *)buf)[1], perm_le2be); ++ uv16qi part3 = vec_perm(((uv16qi *)buf)[2], ((uv16qi *)buf)[2], perm_le2be); ++ uv16qi part4 = vec_perm(((uv16qi *)buf)[3], ((uv16qi *)buf)[3], perm_le2be); ++ ++ /* ++ * Perform a GF(2) multiplication of the doublewords in V1 with ++ * the R1 and R2 reduction constants in V0. The intermediate result ++ * is then folded (accumulated) with the next data chunk in PART1 and ++ * stored in V1. Repeat this step for the register contents ++ * in V2, V3, and V4 respectively. ++ */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r2r1, v1, part1); ++ v2 = (uv2di)vec_gfmsum_accum_128(r2r1, v2, part2); ++ v3 = (uv2di)vec_gfmsum_accum_128(r2r1, v3, part3); ++ v4 = (uv2di)vec_gfmsum_accum_128(r2r1, v4, part4); ++ ++ buf += 64; ++ len -= 64; ++ } ++ ++ /* ++ * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3 ++ * and R4 and accumulating the next 128-bit chunk until a single 128-bit ++ * value remains. ++ */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2); ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v3); ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v4); ++ ++ while (len >= 16) { ++ /* Load next data chunk */ ++ v2 = vec_perm(*(uv2di *)buf, *(uv2di *)buf, perm_le2be); ++ ++ /* Fold next data chunk */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2); ++ ++ buf += 16; ++ len -= 16; ++ } ++ ++ /* ++ * Set up a vector register for byte shifts. The shift value must ++ * be loaded in bits 1-4 in byte element 7 of a vector register. ++ * Shift by 8 bytes: 0x40 ++ * Shift by 4 bytes: 0x20 ++ */ ++ uv16qi v9 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; ++ v9 = vec_insert((unsigned char)0x40, v9, 7); ++ ++ /* ++ * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes ++ * to move R4 into the rightmost doubleword and set the leftmost ++ * doubleword to 0x1. ++ */ ++ v0 = vec_srb(r4r3, (uv2di)v9); ++ v0[0] = 1; ++ ++ /* ++ * Compute GF(2) product of V1 and V0. The rightmost doubleword ++ * of V1 is multiplied with R4. The leftmost doubleword of V1 is ++ * multiplied by 0x1 and is then XORed with rightmost product. ++ * Implicitly, the intermediate leftmost product becomes padded ++ */ ++ v1 = (uv2di)vec_gfmsum_128(v0, v1); ++ ++ /* ++ * Now do the final 32-bit fold by multiplying the rightmost word ++ * in V1 with R5 and XOR the result with the remaining bits in V1. ++ * ++ * To achieve this by a single VGFMAG, right shift V1 by a word ++ * and store the result in V2 which is then accumulated. Use the ++ * vector unpack instruction to load the rightmost half of the ++ * doubleword into the rightmost doubleword element of V1; the other ++ * half is loaded in the leftmost doubleword. ++ * The vector register with CONST_R5 contains the R5 constant in the ++ * rightmost doubleword and the leftmost doubleword is zero to ignore ++ * the leftmost product of V1. ++ */ ++ v9 = vec_insert((unsigned char)0x20, v9, 7); ++ v2 = vec_srb(v1, (uv2di)v9); ++ v1 = vec_unpackl((uv4si)v1); /* Split rightmost doubleword */ ++ v1 = (uv2di)vec_gfmsum_accum_128(r5, v1, (uv16qi)v2); ++ ++ /* ++ * Apply a Barret reduction to compute the final 32-bit CRC value. ++ * ++ * The input values to the Barret reduction are the degree-63 polynomial ++ * in V1 (R(x)), degree-32 generator polynomial, and the reduction ++ * constant u. The Barret reduction result is the CRC value of R(x) mod ++ * P(x). ++ * ++ * The Barret reduction algorithm is defined as: ++ * ++ * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u ++ * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) ++ * 3. C(x) = R(x) XOR T2(x) mod x^32 ++ * ++ * Note: The leftmost doubleword of vector register containing ++ * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product ++ * is zero and does not contribute to the final result. ++ */ ++ ++ /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ ++ v2 = vec_unpackl((uv4si)v1); ++ v2 = (uv2di)vec_gfmsum_128(ru_poly, v2); ++ ++ /* ++ * Compute the GF(2) product of the CRC polynomial with T1(x) in ++ * V2 and XOR the intermediate result, T2(x), with the value in V1. ++ * The final result is stored in word element 2 of V2. ++ */ ++ v2 = vec_unpackl((uv4si)v2); ++ v2 = (uv2di)vec_gfmsum_accum_128(crc_poly, v2, (uv16qi)v1); ++ ++ return ((uv4si)v2)[2]; ++} ++ ++#define VX_MIN_LEN 64 ++#define VX_ALIGNMENT 16L ++#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) ++ ++uint32_t Z_INTERNAL crc32_s390_vx(uint32_t crc, const unsigned char *buf, size_t len) { ++ size_t prealign, aligned, remaining; ++ ++ if (len < VX_MIN_LEN + VX_ALIGN_MASK) ++ return PREFIX(crc32_braid)(crc, buf, len); ++ ++ if ((uintptr_t)buf & VX_ALIGN_MASK) { ++ prealign = VX_ALIGNMENT - ((uintptr_t)buf & VX_ALIGN_MASK); ++ len -= prealign; ++ crc = PREFIX(crc32_braid)(crc, buf, prealign); ++ buf += prealign; ++ } ++ aligned = len & ~VX_ALIGN_MASK; ++ remaining = len & VX_ALIGN_MASK; ++ ++ crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, aligned) ^ 0xffffffff; ++ ++ if (remaining) ++ crc = PREFIX(crc32_braid)(crc, buf + aligned, remaining); ++ ++ return crc; ++} +diff --git a/arch/s390/dfltcc_common.c b/arch/s390/dfltcc_common.c +index f1ae904..78be718 100644 +--- a/arch/s390/dfltcc_common.c ++++ b/arch/s390/dfltcc_common.c +@@ -12,70 +12,17 @@ + `posix_memalign' is not an option. Thus, we overallocate and take the + aligned portion of the buffer. + */ +-static inline int is_dfltcc_enabled(void) { +- uint64_t facilities[(DFLTCC_FACILITY / 64) + 1]; +- Z_REGISTER uint8_t r0 __asm__("r0"); +- +- memset(facilities, 0, sizeof(facilities)); +- r0 = sizeof(facilities) / sizeof(facilities[0]) - 1; +- /* STFLE is supported since z9-109 and only in z/Architecture mode. When +- * compiling with -m31, gcc defaults to ESA mode, however, since the kernel +- * is 64-bit, it's always z/Architecture mode at runtime. +- */ +- __asm__ volatile( +-#ifndef __clang__ +- ".machinemode push\n" +- ".machinemode zarch\n" +-#endif +- "stfle %[facilities]\n" +-#ifndef __clang__ +- ".machinemode pop\n" +-#endif +- : [facilities] "=Q" (facilities), [r0] "+r" (r0) :: "cc"); +- return is_bit_set((const char *)facilities, DFLTCC_FACILITY); +-} +- +-void Z_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size) { +- struct dfltcc_state *dfltcc_state = (struct dfltcc_state *)((char *)strm->state + ALIGN_UP(size, 8)); +- struct dfltcc_qaf_param *param = (struct dfltcc_qaf_param *)&dfltcc_state->param; +- +- /* Initialize available functions */ +- if (is_dfltcc_enabled()) { +- dfltcc(DFLTCC_QAF, param, NULL, NULL, NULL, NULL, NULL); +- memmove(&dfltcc_state->af, param, sizeof(dfltcc_state->af)); +- } else +- memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); +- +- /* Initialize parameter block */ +- memset(&dfltcc_state->param, 0, sizeof(dfltcc_state->param)); +- dfltcc_state->param.nt = 1; +- +- /* Initialize tuning parameters */ +- dfltcc_state->level_mask = DFLTCC_LEVEL_MASK; +- dfltcc_state->block_size = DFLTCC_BLOCK_SIZE; +- dfltcc_state->block_threshold = DFLTCC_FIRST_FHT_BLOCK_SIZE; +- dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE; +- dfltcc_state->param.ribm = DFLTCC_RIBM; +-} +- +-void Z_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size) { +- return ZALLOC(strm, ALIGN_UP(items * size, 8) + sizeof(struct dfltcc_state), sizeof(unsigned char)); +-} +- +-void Z_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size) { +- memcpy(dst, src, ALIGN_UP(size, 8) + sizeof(struct dfltcc_state)); +-} + + static const int PAGE_ALIGN = 0x1000; + +-void Z_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size) { ++void Z_INTERNAL *PREFIX(dfltcc_alloc_window)(PREFIX3(streamp) strm, uInt items, uInt size) { + void *p; + void *w; + + /* To simplify freeing, we store the pointer to the allocated buffer right +- * before the window. ++ * before the window. Note that DFLTCC always uses HB_SIZE bytes. + */ +- p = ZALLOC(strm, sizeof(void *) + items * size + PAGE_ALIGN, sizeof(unsigned char)); ++ p = ZALLOC(strm, sizeof(void *) + MAX(items * size, HB_SIZE) + PAGE_ALIGN, sizeof(unsigned char)); + if (p == NULL) + return NULL; + w = ALIGN_UP((char *)p + sizeof(void *), PAGE_ALIGN); +@@ -83,7 +30,11 @@ void Z_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt siz + return w; + } + +-void Z_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w) { ++void Z_INTERNAL PREFIX(dfltcc_copy_window)(void *dest, const void *src, size_t n) { ++ memcpy(dest, src, MAX(n, HB_SIZE)); ++} ++ ++void Z_INTERNAL PREFIX(dfltcc_free_window)(PREFIX3(streamp) strm, void *w) { + if (w) + ZFREE(strm, *(void **)((unsigned char *)w - sizeof(void *))); + } +diff --git a/arch/s390/dfltcc_common.h b/arch/s390/dfltcc_common.h +index 6de8bb1..b734374 100644 +--- a/arch/s390/dfltcc_common.h ++++ b/arch/s390/dfltcc_common.h +@@ -1,30 +1,21 @@ + #ifndef DFLTCC_COMMON_H + #define DFLTCC_COMMON_H + +-#ifdef ZLIB_COMPAT +-#include "zlib.h" +-#else +-#include "zlib-ng.h" +-#endif + #include "zutil.h" + +-void Z_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size); +-void Z_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size); +-void Z_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size); +-void Z_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size); +-void Z_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w); +- +-#define ZALLOC_STATE dfltcc_alloc_state ++void Z_INTERNAL *PREFIX(dfltcc_alloc_window)(PREFIX3(streamp) strm, uInt items, uInt size); ++void Z_INTERNAL PREFIX(dfltcc_copy_window)(void *dest, const void *src, size_t n); ++void Z_INTERNAL PREFIX(dfltcc_free_window)(PREFIX3(streamp) strm, void *w); + + #define ZFREE_STATE ZFREE + +-#define ZCOPY_STATE dfltcc_copy_state ++#define ZALLOC_WINDOW PREFIX(dfltcc_alloc_window) + +-#define ZALLOC_WINDOW dfltcc_alloc_window ++#define ZCOPY_WINDOW PREFIX(dfltcc_copy_window) + +-#define ZFREE_WINDOW dfltcc_free_window ++#define ZFREE_WINDOW PREFIX(dfltcc_free_window) + +-#define TRY_FREE_WINDOW dfltcc_free_window ++#define TRY_FREE_WINDOW PREFIX(dfltcc_free_window) + + #define DFLTCC_BLOCK_HEADER_BITS 3 + #define DFLTCC_HLITS_COUNT_BITS 5 +diff --git a/arch/s390/dfltcc_deflate.c b/arch/s390/dfltcc_deflate.c +index e3b53ee..3ad988a 100644 +--- a/arch/s390/dfltcc_deflate.c ++++ b/arch/s390/dfltcc_deflate.c +@@ -14,16 +14,46 @@ + */ + + #include "zbuild.h" +-#include "zutil.h" + #include "deflate.h" + #include "trees_emit.h" + #include "dfltcc_deflate.h" + #include "dfltcc_detail.h" + ++struct dfltcc_deflate_state { ++ struct dfltcc_state common; ++ uint16_t level_mask; /* Levels on which to use DFLTCC */ ++ uint32_t block_size; /* New block each X bytes */ ++ size_t block_threshold; /* New block after total_in > X */ ++ uint32_t dht_threshold; /* New block only if avail_in >= X */ ++}; ++ ++#define GET_DFLTCC_DEFLATE_STATE(state) ((struct dfltcc_deflate_state *)GET_DFLTCC_STATE(state)) ++ ++void Z_INTERNAL *PREFIX(dfltcc_alloc_deflate_state)(PREFIX3(streamp) strm) { ++ return dfltcc_alloc_state(strm, sizeof(deflate_state), sizeof(struct dfltcc_deflate_state)); ++} ++ ++void Z_INTERNAL PREFIX(dfltcc_reset_deflate_state)(PREFIX3(streamp) strm) { ++ deflate_state *state = (deflate_state *)strm->state; ++ struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state); ++ ++ dfltcc_reset_state(&dfltcc_state->common); ++ ++ /* Initialize tuning parameters */ ++ dfltcc_state->level_mask = DFLTCC_LEVEL_MASK; ++ dfltcc_state->block_size = DFLTCC_BLOCK_SIZE; ++ dfltcc_state->block_threshold = DFLTCC_FIRST_FHT_BLOCK_SIZE; ++ dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE; ++} ++ ++void Z_INTERNAL PREFIX(dfltcc_copy_deflate_state)(void *dst, const void *src) { ++ dfltcc_copy_state(dst, src, sizeof(deflate_state), sizeof(struct dfltcc_deflate_state)); ++} ++ + static inline int dfltcc_can_deflate_with_params(PREFIX3(streamp) strm, int level, uInt window_bits, int strategy, + int reproducible) { + deflate_state *state = (deflate_state *)strm->state; +- struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state); + + /* Unsupported compression settings */ + if ((dfltcc_state->level_mask & (1 << level)) == 0) +@@ -36,15 +66,15 @@ static inline int dfltcc_can_deflate_with_params(PREFIX3(streamp) strm, int leve + return 0; + + /* Unsupported hardware */ +- if (!is_bit_set(dfltcc_state->af.fns, DFLTCC_GDHT) || +- !is_bit_set(dfltcc_state->af.fns, DFLTCC_CMPR) || +- !is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0)) ++ if (!is_bit_set(dfltcc_state->common.af.fns, DFLTCC_GDHT) || ++ !is_bit_set(dfltcc_state->common.af.fns, DFLTCC_CMPR) || ++ !is_bit_set(dfltcc_state->common.af.fmts, DFLTCC_FMT0)) + return 0; + + return 1; + } + +-int Z_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm) { ++int Z_INTERNAL PREFIX(dfltcc_can_deflate)(PREFIX3(streamp) strm) { + deflate_state *state = (deflate_state *)strm->state; + + return dfltcc_can_deflate_with_params(strm, state->level, state->w_bits, state->strategy, state->reproducible); +@@ -78,8 +108,8 @@ static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm) { + static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 *param) { + deflate_state *state = (deflate_state *)strm->state; + +- send_bits(state, bi_reverse(param->eobs >> (15 - param->eobl), param->eobl), param->eobl, state->bi_buf, state->bi_valid); +- flush_pending(strm); ++ send_bits(state, PREFIX(bi_reverse)(param->eobs >> (15 - param->eobl), param->eobl), param->eobl, state->bi_buf, state->bi_valid); ++ PREFIX(flush_pending)(strm); + if (state->pending != 0) { + /* The remaining data is located in pending_out[0:pending]. If someone + * calls put_byte() - this might happen in deflate() - the byte will be +@@ -95,17 +125,17 @@ static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 + #endif + } + +-int Z_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result) { ++int Z_INTERNAL PREFIX(dfltcc_deflate)(PREFIX3(streamp) strm, int flush, block_state *result) { + deflate_state *state = (deflate_state *)strm->state; +- struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); +- struct dfltcc_param_v0 *param = &dfltcc_state->param; ++ struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state); ++ struct dfltcc_param_v0 *param = &dfltcc_state->common.param; + uInt masked_avail_in; + dfltcc_cc cc; + int need_empty_block; + int soft_bcc; + int no_flush; + +- if (!dfltcc_can_deflate(strm)) { ++ if (!PREFIX(dfltcc_can_deflate)(strm)) { + /* Clear history. */ + if (flush == Z_FULL_FLUSH) + param->hl = 0; +@@ -210,7 +240,10 @@ again: + *strm->next_out = (unsigned char)state->bi_buf; + /* Honor history and check value */ + param->nt = 0; +- param->cv = state->wrap == 2 ? ZSWAP32(strm->adler) : strm->adler; ++ if (state->wrap == 1) ++ param->cv = strm->adler; ++ else if (state->wrap == 2) ++ param->cv = ZSWAP32(state->crc_fold.value); + + /* When opening a block, choose a Huffman-Table Type */ + if (!param->bcf) { +@@ -235,13 +268,16 @@ again: + } while (cc == DFLTCC_CC_AGAIN); + + /* Translate parameter block to stream */ +- strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); ++ strm->msg = oesc_msg(dfltcc_state->common.msg, param->oesc); + state->bi_valid = param->sbb; + if (state->bi_valid == 0) + state->bi_buf = 0; /* Avoid accessing next_out */ + else + state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1); +- strm->adler = state->wrap == 2 ? ZSWAP32(param->cv) : param->cv; ++ if (state->wrap == 1) ++ strm->adler = param->cv; ++ else if (state->wrap == 2) ++ state->crc_fold.value = ZSWAP32(param->cv); + + /* Unmask the input data */ + strm->avail_in += masked_avail_in; +@@ -297,9 +333,9 @@ static int dfltcc_was_deflate_used(PREFIX3(streamp) strm) { + return strm->total_in > 0 || param->nt == 0 || param->hl > 0; + } + +-int Z_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy, int *flush) { ++int Z_INTERNAL PREFIX(dfltcc_deflate_params)(PREFIX3(streamp) strm, int level, int strategy, int *flush) { + deflate_state *state = (deflate_state *)strm->state; +- int could_deflate = dfltcc_can_deflate(strm); ++ int could_deflate = PREFIX(dfltcc_can_deflate)(strm); + int can_deflate = dfltcc_can_deflate_with_params(strm, level, state->w_bits, strategy, state->reproducible); + + if (can_deflate == could_deflate) +@@ -315,7 +351,7 @@ int Z_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strat + return Z_OK; + } + +-int Z_INTERNAL dfltcc_deflate_done(PREFIX3(streamp) strm, int flush) { ++int Z_INTERNAL PREFIX(dfltcc_deflate_done)(PREFIX3(streamp) strm, int flush) { + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; +@@ -331,10 +367,10 @@ int Z_INTERNAL dfltcc_deflate_done(PREFIX3(streamp) strm, int flush) { + * buffered some data (Continuation Flag is set), or has not written EOBS + * yet (Block-Continuation Flag is set). + */ +- return !dfltcc_can_deflate(strm) || (!param->cf && !param->bcf); ++ return !PREFIX(dfltcc_can_deflate)(strm) || (!param->cf && !param->bcf); + } + +-int Z_INTERNAL dfltcc_can_set_reproducible(PREFIX3(streamp) strm, int reproducible) { ++int Z_INTERNAL PREFIX(dfltcc_can_set_reproducible)(PREFIX3(streamp) strm, int reproducible) { + deflate_state *state = (deflate_state *)strm->state; + + return reproducible != state->reproducible && !dfltcc_was_deflate_used(strm); +@@ -343,37 +379,7 @@ int Z_INTERNAL dfltcc_can_set_reproducible(PREFIX3(streamp) strm, int reproducib + /* + Preloading history. + */ +-static void append_history(struct dfltcc_param_v0 *param, unsigned char *history, const unsigned char *buf, uInt count) { +- size_t offset; +- size_t n; +- +- /* Do not use more than 32K */ +- if (count > HB_SIZE) { +- buf += count - HB_SIZE; +- count = HB_SIZE; +- } +- offset = (param->ho + param->hl) % HB_SIZE; +- if (offset + count <= HB_SIZE) +- /* Circular history buffer does not wrap - copy one chunk */ +- memcpy(history + offset, buf, count); +- else { +- /* Circular history buffer wraps - copy two chunks */ +- n = HB_SIZE - offset; +- memcpy(history + offset, buf, n); +- memcpy(history, buf + n, count - n); +- } +- n = param->hl + count; +- if (n <= HB_SIZE) +- /* All history fits into buffer - no need to discard anything */ +- param->hl = n; +- else { +- /* History does not fit into buffer - discard extra bytes */ +- param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE; +- param->hl = HB_SIZE; +- } +-} +- +-int Z_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm, ++int Z_INTERNAL PREFIX(dfltcc_deflate_set_dictionary)(PREFIX3(streamp) strm, + const unsigned char *dictionary, uInt dict_length) { + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); +@@ -385,21 +391,13 @@ int Z_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm, + return Z_OK; + } + +-int Z_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length) { ++int Z_INTERNAL PREFIX(dfltcc_deflate_get_dictionary)(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length) { + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + +- if (dictionary) { +- if (param->ho + param->hl <= HB_SIZE) +- /* Circular history buffer does not wrap - copy one chunk */ +- memcpy(dictionary, state->window + param->ho, param->hl); +- else { +- /* Circular history buffer wraps - copy two chunks */ +- memcpy(dictionary, state->window + param->ho, HB_SIZE - param->ho); +- memcpy(dictionary + HB_SIZE - param->ho, state->window, param->ho + param->hl - HB_SIZE); +- } +- } ++ if (dictionary) ++ get_history(param, state->window, dictionary); + if (dict_length) + *dict_length = param->hl; + return Z_OK; +diff --git a/arch/s390/dfltcc_deflate.h b/arch/s390/dfltcc_deflate.h +index 218e594..cb261b1 100644 +--- a/arch/s390/dfltcc_deflate.h ++++ b/arch/s390/dfltcc_deflate.h +@@ -3,53 +3,58 @@ + + #include "dfltcc_common.h" + +-int Z_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm); +-int Z_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result); +-int Z_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy, int *flush); +-int Z_INTERNAL dfltcc_deflate_done(PREFIX3(streamp) strm, int flush); +-int Z_INTERNAL dfltcc_can_set_reproducible(PREFIX3(streamp) strm, int reproducible); +-int Z_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm, ++void Z_INTERNAL *PREFIX(dfltcc_alloc_deflate_state)(PREFIX3(streamp)); ++void Z_INTERNAL PREFIX(dfltcc_reset_deflate_state)(PREFIX3(streamp)); ++void Z_INTERNAL PREFIX(dfltcc_copy_deflate_state)(void *dst, const void *src); ++int Z_INTERNAL PREFIX(dfltcc_can_deflate)(PREFIX3(streamp) strm); ++int Z_INTERNAL PREFIX(dfltcc_deflate)(PREFIX3(streamp) strm, int flush, block_state *result); ++int Z_INTERNAL PREFIX(dfltcc_deflate_params)(PREFIX3(streamp) strm, int level, int strategy, int *flush); ++int Z_INTERNAL PREFIX(dfltcc_deflate_done)(PREFIX3(streamp) strm, int flush); ++int Z_INTERNAL PREFIX(dfltcc_can_set_reproducible)(PREFIX3(streamp) strm, int reproducible); ++int Z_INTERNAL PREFIX(dfltcc_deflate_set_dictionary)(PREFIX3(streamp) strm, + const unsigned char *dictionary, uInt dict_length); +-int Z_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length); ++int Z_INTERNAL PREFIX(dfltcc_deflate_get_dictionary)(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length); ++ ++#define ZALLOC_DEFLATE_STATE PREFIX(dfltcc_alloc_deflate_state) ++#define ZCOPY_DEFLATE_STATE PREFIX(dfltcc_copy_deflate_state) + + #define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ +- if (dfltcc_can_deflate((strm))) \ +- return dfltcc_deflate_set_dictionary((strm), (dict), (dict_len)); \ ++ if (PREFIX(dfltcc_can_deflate)((strm))) \ ++ return PREFIX(dfltcc_deflate_set_dictionary)((strm), (dict), (dict_len)); \ + } while (0) + + #define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ +- if (dfltcc_can_deflate((strm))) \ +- return dfltcc_deflate_get_dictionary((strm), (dict), (dict_len)); \ ++ if (PREFIX(dfltcc_can_deflate)((strm))) \ ++ return PREFIX(dfltcc_deflate_get_dictionary)((strm), (dict), (dict_len)); \ + } while (0) + +-#define DEFLATE_RESET_KEEP_HOOK(strm) \ +- dfltcc_reset((strm), sizeof(deflate_state)) ++#define DEFLATE_RESET_KEEP_HOOK PREFIX(dfltcc_reset_deflate_state) + + #define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) \ + do { \ + int err; \ + \ +- err = dfltcc_deflate_params((strm), (level), (strategy), (hook_flush)); \ ++ err = PREFIX(dfltcc_deflate_params)((strm), (level), (strategy), (hook_flush)); \ + if (err == Z_STREAM_ERROR) \ + return err; \ + } while (0) + +-#define DEFLATE_DONE dfltcc_deflate_done ++#define DEFLATE_DONE PREFIX(dfltcc_deflate_done) + + #define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \ + do { \ +- if (dfltcc_can_deflate((strm))) \ ++ if (deflateStateCheck((strm)) || PREFIX(dfltcc_can_deflate)((strm))) \ + (complen) = DEFLATE_BOUND_COMPLEN(source_len); \ + } while (0) + +-#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (dfltcc_can_deflate((strm))) ++#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (PREFIX(dfltcc_can_deflate)((strm))) + +-#define DEFLATE_HOOK dfltcc_deflate ++#define DEFLATE_HOOK PREFIX(dfltcc_deflate) + +-#define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm))) ++#define DEFLATE_NEED_CHECKSUM(strm) (!PREFIX(dfltcc_can_deflate)((strm))) + +-#define DEFLATE_CAN_SET_REPRODUCIBLE dfltcc_can_set_reproducible ++#define DEFLATE_CAN_SET_REPRODUCIBLE PREFIX(dfltcc_can_set_reproducible) + + #endif +diff --git a/arch/s390/dfltcc_detail.h b/arch/s390/dfltcc_detail.h +index 4ec03f8..362d94c 100644 +--- a/arch/s390/dfltcc_detail.h ++++ b/arch/s390/dfltcc_detail.h +@@ -1,5 +1,4 @@ +-#include +-#include ++#include "../../zbuild.h" + #include + + #ifdef HAVE_SYS_SDT_H +@@ -25,74 +24,6 @@ + #define DFLTCC_RIBM 0 + #endif + +-/* +- C wrapper for the DEFLATE CONVERSION CALL instruction. +- */ +-typedef enum { +- DFLTCC_CC_OK = 0, +- DFLTCC_CC_OP1_TOO_SHORT = 1, +- DFLTCC_CC_OP2_TOO_SHORT = 2, +- DFLTCC_CC_OP2_CORRUPT = 2, +- DFLTCC_CC_AGAIN = 3, +-} dfltcc_cc; +- +-#define DFLTCC_QAF 0 +-#define DFLTCC_GDHT 1 +-#define DFLTCC_CMPR 2 +-#define DFLTCC_XPND 4 +-#define HBT_CIRCULAR (1 << 7) +-#define HB_BITS 15 +-#define HB_SIZE (1 << HB_BITS) +-#define DFLTCC_FACILITY 151 +- +-static inline dfltcc_cc dfltcc(int fn, void *param, +- unsigned char **op1, size_t *len1, z_const unsigned char **op2, size_t *len2, void *hist) { +- unsigned char *t2 = op1 ? *op1 : NULL; +- size_t t3 = len1 ? *len1 : 0; +- z_const unsigned char *t4 = op2 ? *op2 : NULL; +- size_t t5 = len2 ? *len2 : 0; +- Z_REGISTER int r0 __asm__("r0") = fn; +- Z_REGISTER void *r1 __asm__("r1") = param; +- Z_REGISTER unsigned char *r2 __asm__("r2") = t2; +- Z_REGISTER size_t r3 __asm__("r3") = t3; +- Z_REGISTER z_const unsigned char *r4 __asm__("r4") = t4; +- Z_REGISTER size_t r5 __asm__("r5") = t5; +- int cc; +- +- __asm__ volatile( +-#ifdef HAVE_SYS_SDT_H +- STAP_PROBE_ASM(zlib, dfltcc_entry, STAP_PROBE_ASM_TEMPLATE(5)) +-#endif +- ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n" +-#ifdef HAVE_SYS_SDT_H +- STAP_PROBE_ASM(zlib, dfltcc_exit, STAP_PROBE_ASM_TEMPLATE(5)) +-#endif +- "ipm %[cc]\n" +- : [r2] "+r" (r2) +- , [r3] "+r" (r3) +- , [r4] "+r" (r4) +- , [r5] "+r" (r5) +- , [cc] "=r" (cc) +- : [r0] "r" (r0) +- , [r1] "r" (r1) +- , [hist] "r" (hist) +-#ifdef HAVE_SYS_SDT_H +- , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist) +-#endif +- : "cc", "memory"); +- t2 = r2; t3 = r3; t4 = r4; t5 = r5; +- +- if (op1) +- *op1 = t2; +- if (len1) +- *len1 = t3; +- if (op2) +- *op2 = t4; +- if (len2) +- *len2 = t5; +- return (cc >> 28) & 3; +-} +- + /* + Parameter Block for Query Available Functions. + */ +@@ -105,7 +36,8 @@ struct dfltcc_qaf_param { + char reserved2[6]; + }; + +-static_assert(sizeof(struct dfltcc_qaf_param) == 32, sizeof_struct_dfltcc_qaf_param_is_32); ++#define DFLTCC_SIZEOF_QAF 32 ++static_assert(sizeof(struct dfltcc_qaf_param) == DFLTCC_SIZEOF_QAF, qaf); + + static inline int is_bit_set(const char *bits, int n) { + return bits[n / 8] & (1 << (7 - (n % 8))); +@@ -115,6 +47,31 @@ static inline void clear_bit(char *bits, int n) { + bits[n / 8] &= ~(1 << (7 - (n % 8))); + } + ++#define DFLTCC_FACILITY 151 ++ ++static inline int is_dfltcc_enabled(void) { ++ uint64_t facilities[(DFLTCC_FACILITY / 64) + 1]; ++ Z_REGISTER uint8_t r0 __asm__("r0"); ++ ++ memset(facilities, 0, sizeof(facilities)); ++ r0 = sizeof(facilities) / sizeof(facilities[0]) - 1; ++ /* STFLE is supported since z9-109 and only in z/Architecture mode. When ++ * compiling with -m31, gcc defaults to ESA mode, however, since the kernel ++ * is 64-bit, it's always z/Architecture mode at runtime. ++ */ ++ __asm__ volatile( ++#ifndef __clang__ ++ ".machinemode push\n" ++ ".machinemode zarch\n" ++#endif ++ "stfle %[facilities]\n" ++#ifndef __clang__ ++ ".machinemode pop\n" ++#endif ++ : [facilities] "=Q" (facilities), [r0] "+r" (r0) :: "cc"); ++ return is_bit_set((const char *)facilities, DFLTCC_FACILITY); ++} ++ + #define DFLTCC_FMT0 0 + + /* +@@ -165,12 +122,16 @@ struct dfltcc_param_v0 { + uint16_t cdhtl : 12; /* Compressed-Dynamic-Huffman Table + Length */ + uint8_t reserved464[6]; +- uint8_t cdht[288]; +- uint8_t reserved[32]; +- uint8_t csb[1152]; ++ uint8_t cdht[288]; /* Compressed-Dynamic-Huffman Table */ ++ uint8_t reserved[24]; ++ uint8_t ribm2[8]; /* Reserved for IBM use */ ++ uint8_t csb[1152]; /* Continuation-State Buffer */ + }; + +-static_assert(sizeof(struct dfltcc_param_v0) == 1536, sizeof_struct_dfltcc_param_v0_is_1536); ++#define DFLTCC_SIZEOF_GDHT_V0 384 ++#define DFLTCC_SIZEOF_CMPR_XPND_V0 1536 ++static_assert(offsetof(struct dfltcc_param_v0, csb) == DFLTCC_SIZEOF_GDHT_V0, gdht_v0); ++static_assert(sizeof(struct dfltcc_param_v0) == DFLTCC_SIZEOF_CMPR_XPND_V0, cmpr_xpnd_v0); + + static inline z_const char *oesc_msg(char *buf, int oesc) { + if (oesc == 0x00) +@@ -181,19 +142,167 @@ static inline z_const char *oesc_msg(char *buf, int oesc) { + } + } + ++/* ++ C wrapper for the DEFLATE CONVERSION CALL instruction. ++ */ ++typedef enum { ++ DFLTCC_CC_OK = 0, ++ DFLTCC_CC_OP1_TOO_SHORT = 1, ++ DFLTCC_CC_OP2_TOO_SHORT = 2, ++ DFLTCC_CC_OP2_CORRUPT = 2, ++ DFLTCC_CC_AGAIN = 3, ++} dfltcc_cc; ++ ++#define DFLTCC_QAF 0 ++#define DFLTCC_GDHT 1 ++#define DFLTCC_CMPR 2 ++#define DFLTCC_XPND 4 ++#define HBT_CIRCULAR (1 << 7) ++#define DFLTCC_FN_MASK ((1 << 7) - 1) ++#define HB_BITS 15 ++#define HB_SIZE (1 << HB_BITS) ++ ++static inline dfltcc_cc dfltcc(int fn, void *param, ++ unsigned char **op1, size_t *len1, ++ z_const unsigned char **op2, size_t *len2, void *hist) { ++ unsigned char *t2 = op1 ? *op1 : NULL; ++ unsigned char *orig_t2 = t2; ++ size_t t3 = len1 ? *len1 : 0; ++ z_const unsigned char *t4 = op2 ? *op2 : NULL; ++ size_t t5 = len2 ? *len2 : 0; ++ Z_REGISTER int r0 __asm__("r0") = fn; ++ Z_REGISTER void *r1 __asm__("r1") = param; ++ Z_REGISTER unsigned char *r2 __asm__("r2") = t2; ++ Z_REGISTER size_t r3 __asm__("r3") = t3; ++ Z_REGISTER z_const unsigned char *r4 __asm__("r4") = t4; ++ Z_REGISTER size_t r5 __asm__("r5") = t5; ++ int cc; ++ ++ __asm__ volatile( ++#ifdef HAVE_SYS_SDT_H ++ STAP_PROBE_ASM(zlib, dfltcc_entry, STAP_PROBE_ASM_TEMPLATE(5)) ++#endif ++ ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n" ++#ifdef HAVE_SYS_SDT_H ++ STAP_PROBE_ASM(zlib, dfltcc_exit, STAP_PROBE_ASM_TEMPLATE(5)) ++#endif ++ "ipm %[cc]\n" ++ : [r2] "+r" (r2) ++ , [r3] "+r" (r3) ++ , [r4] "+r" (r4) ++ , [r5] "+r" (r5) ++ , [cc] "=r" (cc) ++ : [r0] "r" (r0) ++ , [r1] "r" (r1) ++ , [hist] "r" (hist) ++#ifdef HAVE_SYS_SDT_H ++ , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist) ++#endif ++ : "cc", "memory"); ++ t2 = r2; t3 = r3; t4 = r4; t5 = r5; ++ ++ switch (fn & DFLTCC_FN_MASK) { ++ case DFLTCC_QAF: ++ __msan_unpoison(param, DFLTCC_SIZEOF_QAF); ++ break; ++ case DFLTCC_GDHT: ++ __msan_unpoison(param, DFLTCC_SIZEOF_GDHT_V0); ++ break; ++ case DFLTCC_CMPR: ++ __msan_unpoison(param, DFLTCC_SIZEOF_CMPR_XPND_V0); ++ __msan_unpoison(orig_t2, t2 - orig_t2 + (((struct dfltcc_param_v0 *)param)->sbb == 0 ? 0 : 1)); ++ break; ++ case DFLTCC_XPND: ++ __msan_unpoison(param, DFLTCC_SIZEOF_CMPR_XPND_V0); ++ __msan_unpoison(orig_t2, t2 - orig_t2); ++ break; ++ } ++ ++ if (op1) ++ *op1 = t2; ++ if (len1) ++ *len1 = t3; ++ if (op2) ++ *op2 = t4; ++ if (len2) ++ *len2 = t5; ++ return (cc >> 28) & 3; ++} ++ + /* + Extension of inflate_state and deflate_state. Must be doubleword-aligned. + */ + struct dfltcc_state { + struct dfltcc_param_v0 param; /* Parameter block. */ + struct dfltcc_qaf_param af; /* Available functions. */ +- uint16_t level_mask; /* Levels on which to use DFLTCC */ +- uint32_t block_size; /* New block each X bytes */ +- size_t block_threshold; /* New block after total_in > X */ +- uint32_t dht_threshold; /* New block only if avail_in >= X */ + char msg[64]; /* Buffer for strm->msg */ + }; + + #define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) + + #define GET_DFLTCC_STATE(state) ((struct dfltcc_state *)((char *)(state) + ALIGN_UP(sizeof(*state), 8))) ++ ++static inline void *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt size, uInt extension_size) { ++ return ZALLOC(strm, 1, ALIGN_UP(size, 8) + extension_size); ++} ++ ++static inline void dfltcc_reset_state(struct dfltcc_state *dfltcc_state) { ++ /* Initialize available functions */ ++ if (is_dfltcc_enabled()) { ++ dfltcc(DFLTCC_QAF, &dfltcc_state->param, NULL, NULL, NULL, NULL, NULL); ++ memmove(&dfltcc_state->af, &dfltcc_state->param, sizeof(dfltcc_state->af)); ++ } else ++ memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); ++ ++ /* Initialize parameter block */ ++ memset(&dfltcc_state->param, 0, sizeof(dfltcc_state->param)); ++ dfltcc_state->param.nt = 1; ++ dfltcc_state->param.ribm = DFLTCC_RIBM; ++} ++ ++static inline void dfltcc_copy_state(void *dst, const void *src, uInt size, uInt extension_size) { ++ memcpy(dst, src, ALIGN_UP(size, 8) + extension_size); ++} ++ ++static inline void append_history(struct dfltcc_param_v0 *param, unsigned char *history, ++ const unsigned char *buf, uInt count) { ++ size_t offset; ++ size_t n; ++ ++ /* Do not use more than 32K */ ++ if (count > HB_SIZE) { ++ buf += count - HB_SIZE; ++ count = HB_SIZE; ++ } ++ offset = (param->ho + param->hl) % HB_SIZE; ++ if (offset + count <= HB_SIZE) ++ /* Circular history buffer does not wrap - copy one chunk */ ++ memcpy(history + offset, buf, count); ++ else { ++ /* Circular history buffer wraps - copy two chunks */ ++ n = HB_SIZE - offset; ++ memcpy(history + offset, buf, n); ++ memcpy(history, buf + n, count - n); ++ } ++ n = param->hl + count; ++ if (n <= HB_SIZE) ++ /* All history fits into buffer - no need to discard anything */ ++ param->hl = n; ++ else { ++ /* History does not fit into buffer - discard extra bytes */ ++ param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE; ++ param->hl = HB_SIZE; ++ } ++} ++ ++static inline void get_history(struct dfltcc_param_v0 *param, const unsigned char *history, ++ unsigned char *buf) { ++ if (param->ho + param->hl <= HB_SIZE) ++ /* Circular history buffer does not wrap - copy one chunk */ ++ memcpy(buf, history + param->ho, param->hl); ++ else { ++ /* Circular history buffer wraps - copy two chunks */ ++ memcpy(buf, history + param->ho, HB_SIZE - param->ho); ++ memcpy(buf + HB_SIZE - param->ho, history, param->ho + param->hl - HB_SIZE); ++ } ++} +diff --git a/arch/s390/dfltcc_inflate.c b/arch/s390/dfltcc_inflate.c +index 2535064..f0d3951 100644 +--- a/arch/s390/dfltcc_inflate.c ++++ b/arch/s390/dfltcc_inflate.c +@@ -20,13 +20,24 @@ + #include "dfltcc_inflate.h" + #include "dfltcc_detail.h" + +-int Z_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm) { ++struct inflate_state Z_INTERNAL *PREFIX(dfltcc_alloc_inflate_state)(PREFIX3(streamp) strm) { ++ return (struct inflate_state *)dfltcc_alloc_state(strm, sizeof(struct inflate_state), sizeof(struct dfltcc_state)); ++} ++ ++void Z_INTERNAL PREFIX(dfltcc_reset_inflate_state)(PREFIX3(streamp) strm) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + +- /* Unsupported compression settings */ +- if (state->wbits != HB_BITS) +- return 0; ++ dfltcc_reset_state(dfltcc_state); ++} ++ ++void Z_INTERNAL PREFIX(dfltcc_copy_inflate_state)(struct inflate_state *dst, const struct inflate_state *src) { ++ dfltcc_copy_state(dst, src, sizeof(struct inflate_state), sizeof(struct dfltcc_state)); ++} ++ ++int Z_INTERNAL PREFIX(dfltcc_can_inflate)(PREFIX3(streamp) strm) { ++ struct inflate_state *state = (struct inflate_state *)strm->state; ++ struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + + /* Unsupported hardware */ + return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0); +@@ -47,7 +58,7 @@ static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm) { + return cc; + } + +-dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret) { ++dfltcc_inflate_action Z_INTERNAL PREFIX(dfltcc_inflate)(PREFIX3(streamp) strm, int flush, int *ret) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; +@@ -55,7 +66,7 @@ dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush + + if (flush == Z_BLOCK || flush == Z_TREES) { + /* DFLTCC does not support stopping on block boundaries */ +- if (dfltcc_inflate_disable(strm)) { ++ if (PREFIX(dfltcc_inflate_disable)(strm)) { + *ret = Z_STREAM_ERROR; + return DFLTCC_INFLATE_BREAK; + } else +@@ -75,19 +86,18 @@ dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush + if (strm->avail_in == 0 && !param->cf) + return DFLTCC_INFLATE_BREAK; + +- if (inflate_ensure_window(state)) { ++ if (PREFIX(inflate_ensure_window)(state)) { + state->mode = MEM; + return DFLTCC_INFLATE_CONTINUE; + } + + /* Translate stream to parameter block */ +- param->cvt = state->flags ? CVT_CRC32 : CVT_ADLER32; ++ param->cvt = ((state->wrap & 4) && state->flags) ? CVT_CRC32 : CVT_ADLER32; + param->sbb = state->bits; +- param->hl = state->whave; /* Software and hardware history formats match */ +- param->ho = (state->wnext - state->whave) & ((1 << HB_BITS) - 1); + if (param->hl) + param->nt = 0; /* Honor history for the first block */ +- param->cv = state->flags ? ZSWAP32(state->check) : state->check; ++ if (state->wrap & 4) ++ param->cv = state->flags ? ZSWAP32(state->check) : state->check; + + /* Inflate */ + do { +@@ -98,9 +108,8 @@ dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush + strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); + state->last = cc == DFLTCC_CC_OK; + state->bits = param->sbb; +- state->whave = param->hl; +- state->wnext = (param->ho + param->hl) & ((1 << HB_BITS) - 1); +- state->check = state->flags ? ZSWAP32(param->cv) : param->cv; ++ if (state->wrap & 4) ++ strm->adler = state->check = state->flags ? ZSWAP32(param->cv) : param->cv; + if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { + /* Report an error if stream is corrupted */ + state->mode = BAD; +@@ -112,20 +121,44 @@ dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush + DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE; + } + +-int Z_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm) { ++int Z_INTERNAL PREFIX(dfltcc_was_inflate_used)(PREFIX3(streamp) strm) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; + + return !param->nt; + } + +-int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm) { ++/* ++ Rotates a circular buffer. ++ The implementation is based on https://cplusplus.com/reference/algorithm/rotate/ ++ */ ++static void rotate(unsigned char *start, unsigned char *pivot, unsigned char *end) { ++ unsigned char *p = pivot; ++ unsigned char tmp; ++ ++ while (p != start) { ++ tmp = *start; ++ *start = *p; ++ *p = tmp; ++ ++ start++; ++ p++; ++ ++ if (p == end) ++ p = pivot; ++ else if (start == pivot) ++ pivot = p; ++ } ++} ++ ++int Z_INTERNAL PREFIX(dfltcc_inflate_disable)(PREFIX3(streamp) strm) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 *param = &dfltcc_state->param; + +- if (!dfltcc_can_inflate(strm)) ++ if (!PREFIX(dfltcc_can_inflate)(strm)) + return 0; +- if (dfltcc_was_inflate_used(strm)) ++ if (PREFIX(dfltcc_was_inflate_used)(strm)) + /* DFLTCC has already decompressed some data. Since there is not + * enough information to resume decompression in software, the call + * must fail. +@@ -133,5 +166,40 @@ int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm) { + return 1; + /* DFLTCC was not used yet - decompress in software */ + memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); ++ /* Convert the window from the hardware to the software format */ ++ rotate(state->window, state->window + param->ho, state->window + HB_SIZE); ++ state->whave = state->wnext = MIN(param->hl, state->wsize); + return 0; + } ++ ++/* ++ Preloading history. ++*/ ++int Z_INTERNAL PREFIX(dfltcc_inflate_set_dictionary)(PREFIX3(streamp) strm, ++ const unsigned char *dictionary, uInt dict_length) { ++ struct inflate_state *state = (struct inflate_state *)strm->state; ++ struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 *param = &dfltcc_state->param; ++ ++ if (PREFIX(inflate_ensure_window)(state)) { ++ state->mode = MEM; ++ return Z_MEM_ERROR; ++ } ++ ++ append_history(param, state->window, dictionary, dict_length); ++ state->havedict = 1; ++ return Z_OK; ++} ++ ++int Z_INTERNAL PREFIX(dfltcc_inflate_get_dictionary)(PREFIX3(streamp) strm, ++ unsigned char *dictionary, uInt *dict_length) { ++ struct inflate_state *state = (struct inflate_state *)strm->state; ++ struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 *param = &dfltcc_state->param; ++ ++ if (dictionary && state->window) ++ get_history(param, state->window, dictionary); ++ if (dict_length) ++ *dict_length = param->hl; ++ return Z_OK; ++} +diff --git a/arch/s390/dfltcc_inflate.h b/arch/s390/dfltcc_inflate.h +index fc8a000..632fada 100644 +--- a/arch/s390/dfltcc_inflate.h ++++ b/arch/s390/dfltcc_inflate.h +@@ -3,28 +3,37 @@ + + #include "dfltcc_common.h" + +-int Z_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm); ++struct inflate_state Z_INTERNAL *PREFIX(dfltcc_alloc_inflate_state)(PREFIX3(streamp) strm); ++void Z_INTERNAL PREFIX(dfltcc_reset_inflate_state)(PREFIX3(streamp) strm); ++void Z_INTERNAL PREFIX(dfltcc_copy_inflate_state)(struct inflate_state *dst, const struct inflate_state *src); ++int Z_INTERNAL PREFIX(dfltcc_can_inflate)(PREFIX3(streamp) strm); + typedef enum { + DFLTCC_INFLATE_CONTINUE, + DFLTCC_INFLATE_BREAK, + DFLTCC_INFLATE_SOFTWARE, + } dfltcc_inflate_action; +-dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret); +-int Z_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm); +-int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm); ++dfltcc_inflate_action Z_INTERNAL PREFIX(dfltcc_inflate)(PREFIX3(streamp) strm, int flush, int *ret); ++int Z_INTERNAL PREFIX(dfltcc_was_inflate_used)(PREFIX3(streamp) strm); ++int Z_INTERNAL PREFIX(dfltcc_inflate_disable)(PREFIX3(streamp) strm); ++int Z_INTERNAL PREFIX(dfltcc_inflate_set_dictionary)(PREFIX3(streamp) strm, ++ const unsigned char *dictionary, uInt dict_length); ++int Z_INTERNAL PREFIX(dfltcc_inflate_get_dictionary)(PREFIX3(streamp) strm, ++ unsigned char *dictionary, uInt* dict_length); + +-#define INFLATE_RESET_KEEP_HOOK(strm) \ +- dfltcc_reset((strm), sizeof(struct inflate_state)) ++#define ZALLOC_INFLATE_STATE PREFIX(dfltcc_alloc_inflate_state) ++#define ZCOPY_INFLATE_STATE PREFIX(dfltcc_copy_inflate_state) ++ ++#define INFLATE_RESET_KEEP_HOOK PREFIX(dfltcc_reset_inflate_state) + + #define INFLATE_PRIME_HOOK(strm, bits, value) \ +- do { if (dfltcc_inflate_disable((strm))) return Z_STREAM_ERROR; } while (0) ++ do { if (PREFIX(dfltcc_inflate_disable)((strm))) return Z_STREAM_ERROR; } while (0) + + #define INFLATE_TYPEDO_HOOK(strm, flush) \ +- if (dfltcc_can_inflate((strm))) { \ ++ if (PREFIX(dfltcc_can_inflate)((strm))) { \ + dfltcc_inflate_action action; \ + \ + RESTORE(); \ +- action = dfltcc_inflate((strm), (flush), &ret); \ ++ action = PREFIX(dfltcc_inflate)((strm), (flush), &ret); \ + LOAD(); \ + if (action == DFLTCC_INFLATE_CONTINUE) \ + break; \ +@@ -32,18 +41,30 @@ int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm); + goto inf_leave; \ + } + +-#define INFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_inflate((strm))) ++#define INFLATE_NEED_CHECKSUM(strm) (!PREFIX(dfltcc_can_inflate)((strm))) + +-#define INFLATE_NEED_UPDATEWINDOW(strm) (!dfltcc_can_inflate((strm))) ++#define INFLATE_NEED_UPDATEWINDOW(strm) (!PREFIX(dfltcc_can_inflate)((strm))) + + #define INFLATE_MARK_HOOK(strm) \ + do { \ +- if (dfltcc_was_inflate_used((strm))) return -(1L << 16); \ ++ if (PREFIX(dfltcc_was_inflate_used)((strm))) return -(1L << 16); \ + } while (0) + + #define INFLATE_SYNC_POINT_HOOK(strm) \ + do { \ +- if (dfltcc_was_inflate_used((strm))) return Z_STREAM_ERROR; \ ++ if (PREFIX(dfltcc_was_inflate_used)((strm))) return Z_STREAM_ERROR; \ ++ } while (0) ++ ++#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ ++ do { \ ++ if (PREFIX(dfltcc_can_inflate)((strm))) \ ++ return PREFIX(dfltcc_inflate_set_dictionary)((strm), (dict), (dict_len)); \ ++ } while (0) ++ ++#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ ++ do { \ ++ if (PREFIX(dfltcc_can_inflate)((strm))) \ ++ return PREFIX(dfltcc_inflate_get_dictionary)((strm), (dict), (dict_len)); \ + } while (0) + + #endif +diff --git a/arch/s390/s390_features.c b/arch/s390/s390_features.c +new file mode 100644 +index 0000000..8290106 +--- /dev/null ++++ b/arch/s390/s390_features.c +@@ -0,0 +1,14 @@ ++#include "../../zbuild.h" ++#include "s390_features.h" ++ ++#ifdef HAVE_SYS_AUXV_H ++# include ++#endif ++ ++#ifndef HWCAP_S390_VXRS ++#define HWCAP_S390_VXRS HWCAP_S390_VX ++#endif ++ ++void Z_INTERNAL s390_check_features(struct s390_cpu_features *features) { ++ features->has_vx = getauxval(AT_HWCAP) & HWCAP_S390_VXRS; ++} +diff --git a/arch/s390/s390_features.h b/arch/s390/s390_features.h +new file mode 100644 +index 0000000..b8ffef7 +--- /dev/null ++++ b/arch/s390/s390_features.h +@@ -0,0 +1,10 @@ ++#ifndef S390_FEATURES_H_ ++#define S390_FEATURES_H_ ++ ++struct s390_cpu_features { ++ int has_vx; ++}; ++ ++void Z_INTERNAL s390_check_features(struct s390_cpu_features *features); ++ ++#endif +diff --git a/arch/s390/self-hosted-builder/actions-runner.Dockerfile b/arch/s390/self-hosted-builder/actions-runner.Dockerfile +index a4bb774..136eec7 100644 +--- a/arch/s390/self-hosted-builder/actions-runner.Dockerfile ++++ b/arch/s390/self-hosted-builder/actions-runner.Dockerfile +@@ -11,14 +11,19 @@ FROM s390x/ubuntu:20.04 + # Packages for zlib-ng testing. + ENV DEBIAN_FRONTEND=noninteractive + RUN apt-get update && apt-get -y install \ ++ clang-11 \ + cmake \ + curl \ + gcc \ + git \ + jq \ ++ libxml2-dev \ ++ libxslt-dev \ ++ llvm-11-tools \ + ninja-build \ + python-is-python3 \ + python3 \ ++ python3-dev \ + python3-pip + + # amd64 dependencies. +@@ -31,7 +36,7 @@ ENV QEMU_LD_PREFIX=/usr/x86_64-linux-gnu + RUN useradd -m actions-runner + USER actions-runner + WORKDIR /home/actions-runner +-RUN curl -L https://github.com/actions/runner/releases/download/v2.283.2/actions-runner-linux-x64-2.283.2.tar.gz | tar -xz ++RUN curl -L https://github.com/actions/runner/releases/download/v2.287.1/actions-runner-linux-x64-2.287.1.tar.gz | tar -xz + VOLUME /home/actions-runner + + # Scripts. +diff --git a/arch/s390/self-hosted-builder/actions-runner.service b/arch/s390/self-hosted-builder/actions-runner.service +index 1d3129f..71053a7 100644 +--- a/arch/s390/self-hosted-builder/actions-runner.service ++++ b/arch/s390/self-hosted-builder/actions-runner.service +@@ -7,6 +7,7 @@ StartLimitIntervalSec=0 + [Service] + Type=simple + Restart=always ++ExecStartPre=-/usr/bin/docker rm --force actions-runner + ExecStart=/usr/bin/docker run \ + --env-file=/etc/actions-runner \ + --init \ +diff --git a/arch/x86/Makefile.in b/arch/x86/Makefile.in +index 13c736c..7c05246 100644 +--- a/arch/x86/Makefile.in ++++ b/arch/x86/Makefile.in +@@ -8,11 +8,15 @@ SFLAGS= + INCLUDES= + SUFFIX= + ++AVX512FLAG=-mavx512f -mavx512dq -mavx512vl -mavx512bw ++AVX512VNNIFLAG=-mavx512vnni + AVX2FLAG=-mavx2 + SSE2FLAG=-msse2 + SSSE3FLAG=-mssse3 +-SSE4FLAG=-msse4 ++SSE42FLAG=-msse4.2 + PCLMULFLAG=-mpclmul ++VPCLMULFLAG=-mvpclmulqdq ++XSAVEFLAG=-mxsave + NOLTOFLAG= + + SRCDIR=. +@@ -20,83 +24,118 @@ SRCTOP=../.. + TOPDIR=$(SRCTOP) + + all: \ +- x86.o x86.lo \ +- adler32_avx.o adler32.lo \ ++ x86_features.o x86_features.lo \ ++ adler32_avx2.o adler32_avx2.lo \ ++ adler32_avx512.o adler32_avx512.lo \ ++ adler32_avx512_vnni.o adler32_avx512_vnni.lo \ ++ adler32_sse42.o adler32_sse42.lo \ + adler32_ssse3.o adler32_ssse3.lo \ +- chunkset_avx.o chunkset_avx.lo \ +- chunkset_sse.o chunkset_sse.lo \ +- compare258_avx.o compare258_avx.lo \ +- compare258_sse.o compare258_sse.lo \ +- insert_string_sse.o insert_string_sse.lo \ +- crc_folding.o crc_folding.lo \ +- slide_avx.o slide_avx.lo \ +- slide_sse.o slide_sse.lo ++ chunkset_avx2.o chunkset_avx2.lo \ ++ chunkset_sse2.o chunkset_sse2.lo \ ++ chunkset_ssse3.o chunkset_ssse3.lo \ ++ compare256_avx2.o compare256_avx2.lo \ ++ compare256_sse2.o compare256_sse2.lo \ ++ insert_string_sse42.o insert_string_sse42.lo \ ++ crc32_pclmulqdq.o crc32_pclmulqdq.lo \ ++ crc32_vpclmulqdq.o crc32_vpclmulqdq.lo \ ++ slide_hash_avx2.o slide_hash_avx2.lo \ ++ slide_hash_sse2.o slide_hash_sse2.lo + +-x86.o: +- $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c ++x86_features.o: ++ $(CC) $(CFLAGS) $(XSAVEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/x86_features.c + +-x86.lo: +- $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c ++x86_features.lo: ++ $(CC) $(SFLAGS) $(XSAVEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/x86_features.c + +-chunkset_avx.o: +- $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_avx.c ++chunkset_avx2.o: ++ $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_avx2.c + +-chunkset_avx.lo: +- $(CC) $(SFLAGS) $(AVX2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_avx.c ++chunkset_avx2.lo: ++ $(CC) $(SFLAGS) $(AVX2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_avx2.c + +-chunkset_sse.o: +- $(CC) $(CFLAGS) $(SSE2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse.c ++chunkset_sse2.o: ++ $(CC) $(CFLAGS) $(SSE2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse2.c + +-chunkset_sse.lo: +- $(CC) $(SFLAGS) $(SSE2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse.c ++chunkset_sse2.lo: ++ $(CC) $(SFLAGS) $(SSE2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse2.c + +-compare258_avx.o: +- $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_avx.c ++chunkset_ssse3.o: ++ $(CC) $(CFLAGS) $(SSSE3FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_ssse3.c + +-compare258_avx.lo: +- $(CC) $(SFLAGS) $(AVX2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_avx.c ++chunkset_ssse3.lo: ++ $(CC) $(SFLAGS) $(SSSE3FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_ssse3.c + +-compare258_sse.o: +- $(CC) $(CFLAGS) $(SSE4FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_sse.c ++compare256_avx2.o: ++ $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_avx2.c + +-compare258_sse.lo: +- $(CC) $(SFLAGS) $(SSE4FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_sse.c ++compare256_avx2.lo: ++ $(CC) $(SFLAGS) $(AVX2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_avx2.c + +-insert_string_sse.o: +- $(CC) $(CFLAGS) $(SSE4FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c ++compare256_sse2.o: ++ $(CC) $(CFLAGS) $(SSE2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_sse2.c + +-insert_string_sse.lo: +- $(CC) $(SFLAGS) $(SSE4FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c ++compare256_sse2.lo: ++ $(CC) $(SFLAGS) $(SSE2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_sse2.c + +-crc_folding.o: +- $(CC) $(CFLAGS) $(PCLMULFLAG) $(SSE4FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c ++insert_string_sse42.o: ++ $(CC) $(CFLAGS) $(SSE42FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse42.c + +-crc_folding.lo: +- $(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE4FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c ++insert_string_sse42.lo: ++ $(CC) $(SFLAGS) $(SSE42FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse42.c + +-slide_avx.o: +- $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_avx.c ++crc32_pclmulqdq.o: ++ $(CC) $(CFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_pclmulqdq.c + +-slide_avx.lo: +- $(CC) $(SFLAGS) $(AVX2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_avx.c ++crc32_pclmulqdq.lo: ++ $(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_pclmulqdq.c + +-slide_sse.o: +- $(CC) $(CFLAGS) $(SSE2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c ++crc32_vpclmulqdq.o: ++ $(CC) $(CFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c + +-slide_sse.lo: +- $(CC) $(SFLAGS) $(SSE2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c ++crc32_vpclmulqdq.lo: ++ $(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c + +-adler32_avx.o: $(SRCDIR)/adler32_avx.c +- $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx.c ++slide_hash_avx2.o: ++ $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_avx2.c + +-adler32_avx.lo: $(SRCDIR)/adler32_avx.c +- $(CC) $(SFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx.c ++slide_hash_avx2.lo: ++ $(CC) $(SFLAGS) $(AVX2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_avx2.c ++ ++slide_hash_sse2.o: ++ $(CC) $(CFLAGS) $(SSE2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_sse2.c ++ ++slide_hash_sse2.lo: ++ $(CC) $(SFLAGS) $(SSE2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_sse2.c ++ ++adler32_avx2.o: $(SRCDIR)/adler32_avx2.c ++ $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx2.c ++ ++adler32_avx2.lo: $(SRCDIR)/adler32_avx2.c ++ $(CC) $(SFLAGS) $(AVX2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx2.c ++ ++adler32_avx512.o: $(SRCDIR)/adler32_avx512.c ++ $(CC) $(CFLAGS) $(AVX512FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx512.c ++ ++adler32_avx512.lo: $(SRCDIR)/adler32_avx512.c ++ $(CC) $(SFLAGS) $(AVX512FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx512.c ++ ++adler32_avx512_vnni.o: $(SRCDIR)/adler32_avx512_vnni.c ++ $(CC) $(CFLAGS) $(AVX512VNNIFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx512_vnni.c ++ ++adler32_avx512_vnni.lo: $(SRCDIR)/adler32_avx512_vnni.c ++ $(CC) $(SFLAGS) $(AVX512VNNIFLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx512_vnni.c + + adler32_ssse3.o: $(SRCDIR)/adler32_ssse3.c + $(CC) $(CFLAGS) $(SSSE3FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_ssse3.c + + adler32_ssse3.lo: $(SRCDIR)/adler32_ssse3.c +- $(CC) $(SFLAGS) $(SSSE3FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_ssse3.c ++ $(CC) $(SFLAGS) $(SSSE3FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_ssse3.c ++ ++adler32_sse42.o: $(SRCDIR)/adler32_sse42.c ++ $(CC) $(CFLAGS) $(SSE42FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_sse42.c ++ ++adler32_sse42.lo: $(SRCDIR)/adler32_sse42.c ++ $(CC) $(SFLAGS) $(SSE42FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_sse42.c + + mostlyclean: clean + clean: +@@ -104,5 +143,5 @@ clean: + rm -rf objs + rm -f *.gcda *.gcno *.gcov + +-distclean: ++distclean: clean + rm -f Makefile +diff --git a/arch/x86/adler32_avx2.c b/arch/x86/adler32_avx2.c +new file mode 100644 +index 0000000..e3ac670 +--- /dev/null ++++ b/arch/x86/adler32_avx2.c +@@ -0,0 +1,154 @@ ++/* adler32_avx2.c -- compute the Adler-32 checksum of a data stream ++ * Copyright (C) 1995-2011 Mark Adler ++ * Copyright (C) 2022 Adam Stylinski ++ * Authors: ++ * Brian Bockelman ++ * Adam Stylinski ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifdef X86_AVX2 ++ ++#include "../../zbuild.h" ++#include ++#include "../../adler32_fold.h" ++#include "../../adler32_p.h" ++#include "adler32_avx2_p.h" ++#include "x86_intrins.h" ++ ++#ifdef X86_SSE42 ++extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); ++extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, size_t len); ++ ++#define copy_sub32(a, b, c, d) adler32_fold_copy_sse42(a, b, c, d) ++#define sub32(a, b, c) adler32_ssse3(a, b, c) ++#else ++#define copy_sub32(a, b, c, d) adler32_copy_len_16(adler0, c, b, d, adler1) ++#define sub32(a, b, c) adler32_len_16(adler0, b, c, adler1) ++#endif ++ ++static inline uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) { ++ if (src == NULL) return 1L; ++ if (len == 0) return adler; ++ ++ uint32_t adler0, adler1; ++ adler1 = (adler >> 16) & 0xffff; ++ adler0 = adler & 0xffff; ++ ++rem_peel: ++ if (len < 16) { ++ if (COPY) { ++ return adler32_copy_len_16(adler0, src, dst, len, adler1); ++ } else { ++ return adler32_len_16(adler0, src, len, adler1); ++ } ++ } else if (len < 32) { ++ if (COPY) { ++ return copy_sub32(adler, dst, src, len); ++ } else { ++ return sub32(adler, src, len); ++ } ++ } ++ ++ __m256i vs1, vs2; ++ ++ const __m256i dot2v = _mm256_setr_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, ++ 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); ++ const __m256i dot3v = _mm256_set1_epi16(1); ++ const __m256i zero = _mm256_setzero_si256(); ++ ++ while (len >= 32) { ++ vs1 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler0)); ++ vs2 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler1)); ++ __m256i vs1_0 = vs1; ++ __m256i vs3 = _mm256_setzero_si256(); ++ ++ size_t k = MIN(len, NMAX); ++ k -= k % 32; ++ len -= k; ++ ++ while (k >= 32) { ++ /* ++ vs1 = adler + sum(c[i]) ++ vs2 = sum2 + 32 vs1 + sum( (32-i+1) c[i] ) ++ */ ++ __m256i vbuf = _mm256_loadu_si256((__m256i*)src); ++ src += 32; ++ k -= 32; ++ ++ __m256i vs1_sad = _mm256_sad_epu8(vbuf, zero); // Sum of abs diff, resulting in 2 x int32's ++ ++ if (COPY) { ++ _mm256_storeu_si256((__m256i*)dst, vbuf); ++ dst += 32; ++ } ++ ++ vs1 = _mm256_add_epi32(vs1, vs1_sad); ++ vs3 = _mm256_add_epi32(vs3, vs1_0); ++ __m256i v_short_sum2 = _mm256_maddubs_epi16(vbuf, dot2v); // sum 32 uint8s to 16 shorts ++ __m256i vsum2 = _mm256_madd_epi16(v_short_sum2, dot3v); // sum 16 shorts to 8 uint32s ++ vs2 = _mm256_add_epi32(vsum2, vs2); ++ vs1_0 = vs1; ++ } ++ ++ /* Defer the multiplication with 32 to outside of the loop */ ++ vs3 = _mm256_slli_epi32(vs3, 5); ++ vs2 = _mm256_add_epi32(vs2, vs3); ++ ++ /* The compiler is generating the following sequence for this integer modulus ++ * when done the scalar way, in GPRs: ++ ++ adler = (s1_unpack[0] % BASE) + (s1_unpack[1] % BASE) + (s1_unpack[2] % BASE) + (s1_unpack[3] % BASE) + ++ (s1_unpack[4] % BASE) + (s1_unpack[5] % BASE) + (s1_unpack[6] % BASE) + (s1_unpack[7] % BASE); ++ ++ mov $0x80078071,%edi // move magic constant into 32 bit register %edi ++ ... ++ vmovd %xmm1,%esi // move vector lane 0 to 32 bit register %esi ++ mov %rsi,%rax // zero-extend this value to 64 bit precision in %rax ++ imul %rdi,%rsi // do a signed multiplication with magic constant and vector element ++ shr $0x2f,%rsi // shift right by 47 ++ imul $0xfff1,%esi,%esi // do a signed multiplication with value truncated to 32 bits with 0xfff1 ++ sub %esi,%eax // subtract lower 32 bits of original vector value from modified one above ++ ... ++ // repeats for each element with vpextract instructions ++ ++ This is tricky with AVX2 for a number of reasons: ++ 1.) There's no 64 bit multiplication instruction, but there is a sequence to get there ++ 2.) There's ways to extend vectors to 64 bit precision, but no simple way to truncate ++ back down to 32 bit precision later (there is in AVX512) ++ 3.) Full width integer multiplications aren't cheap ++ ++ We can, however, do a relatively cheap sequence for horizontal sums. ++ Then, we simply do the integer modulus on the resulting 64 bit GPR, on a scalar value. It was ++ previously thought that casting to 64 bit precision was needed prior to the horizontal sum, but ++ that is simply not the case, as NMAX is defined as the maximum number of scalar sums that can be ++ performed on the maximum possible inputs before overflow ++ */ ++ ++ ++ /* In AVX2-land, this trip through GPRs will probably be unavoidable, as there's no cheap and easy ++ * conversion from 64 bit integer to 32 bit (needed for the inexpensive modulus with a constant). ++ * This casting to 32 bit is cheap through GPRs (just register aliasing). See above for exactly ++ * what the compiler is doing to avoid integer divisions. */ ++ adler0 = partial_hsum256(vs1) % BASE; ++ adler1 = hsum256(vs2) % BASE; ++ } ++ ++ adler = adler0 | (adler1 << 16); ++ ++ if (len) { ++ goto rem_peel; ++ } ++ ++ return adler; ++} ++ ++Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const uint8_t *src, size_t len) { ++ return adler32_fold_copy_impl(adler, NULL, src, len, 0); ++} ++ ++Z_INTERNAL uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) { ++ return adler32_fold_copy_impl(adler, dst, src, len, 1); ++} ++ ++#endif +diff --git a/arch/x86/adler32_avx2_p.h b/arch/x86/adler32_avx2_p.h +new file mode 100644 +index 0000000..f0f8a4a +--- /dev/null ++++ b/arch/x86/adler32_avx2_p.h +@@ -0,0 +1,32 @@ ++/* adler32_avx2_p.h -- adler32 avx2 utility functions ++ * Copyright (C) 2022 Adam Stylinski ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifndef ADLER32_AVX2_P_H_ ++#define ADLER32_AVX2_P_H_ ++ ++#if defined(X86_AVX2) || defined(X86_AVX512VNNI) ++ ++/* 32 bit horizontal sum, adapted from Agner Fog's vector library. */ ++static inline uint32_t hsum256(__m256i x) { ++ __m128i sum1 = _mm_add_epi32(_mm256_extracti128_si256(x, 1), ++ _mm256_castsi256_si128(x)); ++ __m128i sum2 = _mm_add_epi32(sum1, _mm_unpackhi_epi64(sum1, sum1)); ++ __m128i sum3 = _mm_add_epi32(sum2, _mm_shuffle_epi32(sum2, 1)); ++ return (uint32_t)_mm_cvtsi128_si32(sum3); ++} ++ ++static inline uint32_t partial_hsum256(__m256i x) { ++ /* We need a permutation vector to extract every other integer. The ++ * rest are going to be zeros */ ++ const __m256i perm_vec = _mm256_setr_epi32(0, 2, 4, 6, 1, 1, 1, 1); ++ __m256i non_zero = _mm256_permutevar8x32_epi32(x, perm_vec); ++ __m128i non_zero_sse = _mm256_castsi256_si128(non_zero); ++ __m128i sum2 = _mm_add_epi32(non_zero_sse,_mm_unpackhi_epi64(non_zero_sse, non_zero_sse)); ++ __m128i sum3 = _mm_add_epi32(sum2, _mm_shuffle_epi32(sum2, 1)); ++ return (uint32_t)_mm_cvtsi128_si32(sum3); ++} ++#endif ++ ++#endif +diff --git a/arch/x86/adler32_avx512.c b/arch/x86/adler32_avx512.c +new file mode 100644 +index 0000000..aa6cc17 +--- /dev/null ++++ b/arch/x86/adler32_avx512.c +@@ -0,0 +1,115 @@ ++/* adler32_avx512.c -- compute the Adler-32 checksum of a data stream ++ * Copyright (C) 1995-2011 Mark Adler ++ * Authors: ++ * Adam Stylinski ++ * Brian Bockelman ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifdef X86_AVX512 ++ ++#include "../../zbuild.h" ++#include "../../adler32_p.h" ++#include "../../adler32_fold.h" ++#include "../../cpu_features.h" ++#include ++#include "x86_intrins.h" ++#include "adler32_avx512_p.h" ++ ++static inline uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) { ++ if (src == NULL) return 1L; ++ if (len == 0) return adler; ++ ++ uint32_t adler0, adler1; ++ adler1 = (adler >> 16) & 0xffff; ++ adler0 = adler & 0xffff; ++ ++rem_peel: ++ if (len < 64) { ++ /* This handles the remaining copies, just call normal adler checksum after this */ ++ if (COPY) { ++ __mmask64 storemask = (0xFFFFFFFFFFFFFFFFUL >> (64 - len)); ++ __m512i copy_vec = _mm512_maskz_loadu_epi8(storemask, src); ++ _mm512_mask_storeu_epi8(dst, storemask, copy_vec); ++ } ++ ++#ifdef X86_AVX2 ++ return adler32_avx2(adler, src, len); ++#elif defined(X86_SSSE3) ++ return adler32_ssse3(adler, src, len); ++#else ++ return adler32_len_16(adler0, src, len, adler1); ++#endif ++ } ++ ++ __m512i vbuf, vs1_0, vs3; ++ ++ const __m512i dot2v = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ++ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, ++ 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, ++ 56, 57, 58, 59, 60, 61, 62, 63, 64); ++ const __m512i dot3v = _mm512_set1_epi16(1); ++ const __m512i zero = _mm512_setzero_si512(); ++ size_t k; ++ ++ while (len >= 64) { ++ __m512i vs1 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler0)); ++ __m512i vs2 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler1)); ++ vs1_0 = vs1; ++ vs3 = _mm512_setzero_si512(); ++ ++ k = MIN(len, NMAX); ++ k -= k % 64; ++ len -= k; ++ ++ while (k >= 64) { ++ /* ++ vs1 = adler + sum(c[i]) ++ vs2 = sum2 + 64 vs1 + sum( (64-i+1) c[i] ) ++ */ ++ vbuf = _mm512_loadu_si512(src); ++ ++ if (COPY) { ++ _mm512_storeu_si512(dst, vbuf); ++ dst += 64; ++ } ++ ++ src += 64; ++ k -= 64; ++ ++ __m512i vs1_sad = _mm512_sad_epu8(vbuf, zero); ++ __m512i v_short_sum2 = _mm512_maddubs_epi16(vbuf, dot2v); ++ vs1 = _mm512_add_epi32(vs1_sad, vs1); ++ vs3 = _mm512_add_epi32(vs3, vs1_0); ++ __m512i vsum2 = _mm512_madd_epi16(v_short_sum2, dot3v); ++ vs2 = _mm512_add_epi32(vsum2, vs2); ++ vs1_0 = vs1; ++ } ++ ++ vs3 = _mm512_slli_epi32(vs3, 6); ++ vs2 = _mm512_add_epi32(vs2, vs3); ++ ++ adler0 = partial_hsum(vs1) % BASE; ++ adler1 = _mm512_reduce_add_epu32(vs2) % BASE; ++ } ++ ++ adler = adler0 | (adler1 << 16); ++ ++ /* Process tail (len < 64). */ ++ if (len) { ++ goto rem_peel; ++ } ++ ++ return adler; ++} ++ ++Z_INTERNAL uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) { ++ return adler32_fold_copy_impl(adler, dst, src, len, 1); ++} ++ ++Z_INTERNAL uint32_t adler32_avx512(uint32_t adler, const uint8_t *src, size_t len) { ++ return adler32_fold_copy_impl(adler, NULL, src, len, 0); ++} ++ ++#endif ++ +diff --git a/arch/x86/adler32_avx512_p.h b/arch/x86/adler32_avx512_p.h +new file mode 100644 +index 0000000..5b79d2a +--- /dev/null ++++ b/arch/x86/adler32_avx512_p.h +@@ -0,0 +1,46 @@ ++#ifndef AVX512_FUNCS_H ++#define AVX512_FUNCS_H ++ ++#include ++#include ++/* Written because *_add_epi32(a) sets off ubsan */ ++static inline uint32_t _mm512_reduce_add_epu32(__m512i x) { ++ __m256i a = _mm512_extracti64x4_epi64(x, 1); ++ __m256i b = _mm512_extracti64x4_epi64(x, 0); ++ ++ __m256i a_plus_b = _mm256_add_epi32(a, b); ++ __m128i c = _mm256_extracti128_si256(a_plus_b, 1); ++ __m128i d = _mm256_extracti128_si256(a_plus_b, 0); ++ __m128i c_plus_d = _mm_add_epi32(c, d); ++ ++ __m128i sum1 = _mm_unpackhi_epi64(c_plus_d, c_plus_d); ++ __m128i sum2 = _mm_add_epi32(sum1, c_plus_d); ++ __m128i sum3 = _mm_shuffle_epi32(sum2, 0x01); ++ __m128i sum4 = _mm_add_epi32(sum2, sum3); ++ ++ return _mm_cvtsi128_si32(sum4); ++} ++ ++static inline uint32_t partial_hsum(__m512i x) { ++ /* We need a permutation vector to extract every other integer. The ++ * rest are going to be zeros. Marking this const so the compiler stands ++ * a better chance of keeping this resident in a register through entire ++ * loop execution. We certainly have enough zmm registers (32) */ ++ const __m512i perm_vec = _mm512_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14, ++ 1, 1, 1, 1, 1, 1, 1, 1); ++ ++ __m512i non_zero = _mm512_permutexvar_epi32(perm_vec, x); ++ ++ /* From here, it's a simple 256 bit wide reduction sum */ ++ __m256i non_zero_avx = _mm512_castsi512_si256(non_zero); ++ ++ /* See Agner Fog's vectorclass for a decent reference. Essentially, phadd is ++ * pretty slow, much slower than the longer instruction sequence below */ ++ __m128i sum1 = _mm_add_epi32(_mm256_extracti128_si256(non_zero_avx, 1), ++ _mm256_castsi256_si128(non_zero_avx)); ++ __m128i sum2 = _mm_add_epi32(sum1,_mm_unpackhi_epi64(sum1, sum1)); ++ __m128i sum3 = _mm_add_epi32(sum2,_mm_shuffle_epi32(sum2, 1)); ++ return (uint32_t)_mm_cvtsi128_si32(sum3); ++} ++ ++#endif +diff --git a/arch/x86/adler32_avx512_vnni.c b/arch/x86/adler32_avx512_vnni.c +new file mode 100644 +index 0000000..771f7eb +--- /dev/null ++++ b/arch/x86/adler32_avx512_vnni.c +@@ -0,0 +1,225 @@ ++/* adler32_avx512_vnni.c -- compute the Adler-32 checksum of a data stream ++ * Based on Brian Bockelman's AVX2 version ++ * Copyright (C) 1995-2011 Mark Adler ++ * Authors: ++ * Adam Stylinski ++ * Brian Bockelman ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifdef X86_AVX512VNNI ++ ++#include "../../zbuild.h" ++#include "../../adler32_p.h" ++#include "../../cpu_features.h" ++#include ++#include "../../adler32_fold.h" ++#include "x86_intrins.h" ++#include "adler32_avx512_p.h" ++#include "adler32_avx2_p.h" ++ ++Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, size_t len) { ++ if (src == NULL) return 1L; ++ if (len == 0) return adler; ++ ++ uint32_t adler0, adler1; ++ adler1 = (adler >> 16) & 0xffff; ++ adler0 = adler & 0xffff; ++ ++rem_peel: ++ if (len < 32) ++#if defined(X86_SSSE3) ++ return adler32_ssse3(adler, src, len); ++#else ++ return adler32_len_16(adler0, src, len, adler1); ++#endif ++ ++ if (len < 64) ++#ifdef X86_AVX2 ++ return adler32_avx2(adler, src, len); ++#elif defined(X86_SSE3) ++ return adler32_ssse3(adler, src, len); ++#else ++ return adler32_len_16(adler0, src, len, adler1); ++#endif ++ ++ const __m512i dot2v = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ++ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, ++ 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, ++ 56, 57, 58, 59, 60, 61, 62, 63, 64); ++ ++ const __m512i zero = _mm512_setzero_si512(); ++ __m512i vs1, vs2; ++ ++ while (len >= 64) { ++ vs1 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler0)); ++ vs2 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler1)); ++ size_t k = MIN(len, NMAX); ++ k -= k % 64; ++ len -= k; ++ __m512i vs1_0 = vs1; ++ __m512i vs3 = _mm512_setzero_si512(); ++ /* We might get a tad bit more ILP here if we sum to a second register in the loop */ ++ __m512i vs2_1 = _mm512_setzero_si512(); ++ __m512i vbuf0, vbuf1; ++ ++ /* Remainder peeling */ ++ if (k % 128) { ++ vbuf1 = _mm512_loadu_si512((__m512i*)src); ++ ++ src += 64; ++ k -= 64; ++ ++ __m512i vs1_sad = _mm512_sad_epu8(vbuf1, zero); ++ vs1 = _mm512_add_epi32(vs1, vs1_sad); ++ vs3 = _mm512_add_epi32(vs3, vs1_0); ++ vs2 = _mm512_dpbusd_epi32(vs2, vbuf1, dot2v); ++ vs1_0 = vs1; ++ } ++ ++ /* Manually unrolled this loop by 2 for an decent amount of ILP */ ++ while (k >= 128) { ++ /* ++ vs1 = adler + sum(c[i]) ++ vs2 = sum2 + 64 vs1 + sum( (64-i+1) c[i] ) ++ */ ++ vbuf0 = _mm512_loadu_si512((__m512i*)src); ++ vbuf1 = _mm512_loadu_si512((__m512i*)(src + 64)); ++ src += 128; ++ k -= 128; ++ ++ __m512i vs1_sad = _mm512_sad_epu8(vbuf0, zero); ++ vs1 = _mm512_add_epi32(vs1, vs1_sad); ++ vs3 = _mm512_add_epi32(vs3, vs1_0); ++ /* multiply-add, resulting in 16 ints. Fuse with sum stage from prior versions, as we now have the dp ++ * instructions to eliminate them */ ++ vs2 = _mm512_dpbusd_epi32(vs2, vbuf0, dot2v); ++ ++ vs3 = _mm512_add_epi32(vs3, vs1); ++ vs1_sad = _mm512_sad_epu8(vbuf1, zero); ++ vs1 = _mm512_add_epi32(vs1, vs1_sad); ++ vs2_1 = _mm512_dpbusd_epi32(vs2_1, vbuf1, dot2v); ++ vs1_0 = vs1; ++ } ++ ++ vs3 = _mm512_slli_epi32(vs3, 6); ++ vs2 = _mm512_add_epi32(vs2, vs3); ++ vs2 = _mm512_add_epi32(vs2, vs2_1); ++ ++ adler0 = partial_hsum(vs1) % BASE; ++ adler1 = _mm512_reduce_add_epu32(vs2) % BASE; ++ } ++ ++ adler = adler0 | (adler1 << 16); ++ ++ /* Process tail (len < 64). */ ++ if (len) { ++ goto rem_peel; ++ } ++ ++ return adler; ++} ++ ++Z_INTERNAL uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) { ++ if (src == NULL) return 1L; ++ if (len == 0) return adler; ++ ++ uint32_t adler0, adler1; ++ adler1 = (adler >> 16) & 0xffff; ++ adler0 = adler & 0xffff; ++ ++rem_peel_copy: ++ if (len < 32) { ++ /* This handles the remaining copies, just call normal adler checksum after this */ ++ __mmask32 storemask = (0xFFFFFFFFUL >> (32 - len)); ++ __m256i copy_vec = _mm256_maskz_loadu_epi8(storemask, src); ++ _mm256_mask_storeu_epi8(dst, storemask, copy_vec); ++ ++#if defined(X86_SSSE3) ++ return adler32_ssse3(adler, src, len); ++#else ++ return adler32_len_16(adler0, src, len, adler1); ++#endif ++ } ++ ++ const __m256i dot2v = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ++ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); ++ ++ const __m256i zero = _mm256_setzero_si256(); ++ __m256i vs1, vs2; ++ ++ while (len >= 32) { ++ vs1 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler0)); ++ vs2 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler1)); ++ size_t k = MIN(len, NMAX); ++ k -= k % 32; ++ len -= k; ++ __m256i vs1_0 = vs1; ++ __m256i vs3 = _mm256_setzero_si256(); ++ /* We might get a tad bit more ILP here if we sum to a second register in the loop */ ++ __m256i vs2_1 = _mm256_setzero_si256(); ++ __m256i vbuf0, vbuf1; ++ ++ /* Remainder peeling */ ++ if (k % 64) { ++ vbuf1 = _mm256_loadu_si256((__m256i*)src); ++ _mm256_storeu_si256((__m256i*)dst, vbuf1); ++ dst += 32; ++ ++ src += 32; ++ k -= 32; ++ ++ __m256i vs1_sad = _mm256_sad_epu8(vbuf1, zero); ++ vs1 = _mm256_add_epi32(vs1, vs1_sad); ++ vs3 = _mm256_add_epi32(vs3, vs1_0); ++ vs2 = _mm256_dpbusd_epi32(vs2, vbuf1, dot2v); ++ vs1_0 = vs1; ++ } ++ ++ /* Manually unrolled this loop by 2 for an decent amount of ILP */ ++ while (k >= 64) { ++ /* ++ vs1 = adler + sum(c[i]) ++ vs2 = sum2 + 64 vs1 + sum( (64-i+1) c[i] ) ++ */ ++ vbuf0 = _mm256_loadu_si256((__m256i*)src); ++ vbuf1 = _mm256_loadu_si256((__m256i*)(src + 32)); ++ _mm256_storeu_si256((__m256i*)dst, vbuf0); ++ _mm256_storeu_si256((__m256i*)(dst + 32), vbuf1); ++ dst += 64; ++ src += 64; ++ k -= 64; ++ ++ __m256i vs1_sad = _mm256_sad_epu8(vbuf0, zero); ++ vs1 = _mm256_add_epi32(vs1, vs1_sad); ++ vs3 = _mm256_add_epi32(vs3, vs1_0); ++ /* multiply-add, resulting in 16 ints. Fuse with sum stage from prior versions, as we now have the dp ++ * instructions to eliminate them */ ++ vs2 = _mm256_dpbusd_epi32(vs2, vbuf0, dot2v); ++ ++ vs3 = _mm256_add_epi32(vs3, vs1); ++ vs1_sad = _mm256_sad_epu8(vbuf1, zero); ++ vs1 = _mm256_add_epi32(vs1, vs1_sad); ++ vs2_1 = _mm256_dpbusd_epi32(vs2_1, vbuf1, dot2v); ++ vs1_0 = vs1; ++ } ++ ++ vs3 = _mm256_slli_epi32(vs3, 5); ++ vs2 = _mm256_add_epi32(vs2, vs3); ++ vs2 = _mm256_add_epi32(vs2, vs2_1); ++ ++ adler0 = partial_hsum256(vs1) % BASE; ++ adler1 = hsum256(vs2) % BASE; ++ } ++ ++ adler = adler0 | (adler1 << 16); ++ ++ /* Process tail (len < 64). */ ++ if (len) { ++ goto rem_peel_copy; ++ } ++ ++ return adler; ++} ++ ++#endif +diff --git a/arch/x86/adler32_sse42.c b/arch/x86/adler32_sse42.c +new file mode 100644 +index 0000000..257a360 +--- /dev/null ++++ b/arch/x86/adler32_sse42.c +@@ -0,0 +1,121 @@ ++/* adler32_sse42.c -- compute the Adler-32 checksum of a data stream ++ * Copyright (C) 1995-2011 Mark Adler ++ * Authors: ++ * Adam Stylinski ++ * Brian Bockelman ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "../../zbuild.h" ++#include "../../adler32_p.h" ++#include "../../adler32_fold.h" ++#include "adler32_ssse3_p.h" ++#include ++ ++#ifdef X86_SSE42 ++ ++Z_INTERNAL uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) { ++ uint32_t adler0, adler1; ++ adler1 = (adler >> 16) & 0xffff; ++ adler0 = adler & 0xffff; ++ ++rem_peel: ++ if (len < 16) { ++ return adler32_copy_len_16(adler0, src, dst, len, adler1); ++ } ++ ++ __m128i vbuf, vbuf_0; ++ __m128i vs1_0, vs3, vs1, vs2, vs2_0, v_sad_sum1, v_short_sum2, v_short_sum2_0, ++ v_sad_sum2, vsum2, vsum2_0; ++ __m128i zero = _mm_setzero_si128(); ++ const __m128i dot2v = _mm_setr_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17); ++ const __m128i dot2v_0 = _mm_setr_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); ++ const __m128i dot3v = _mm_set1_epi16(1); ++ size_t k; ++ ++ while (len >= 16) { ++ ++ k = MIN(len, NMAX); ++ k -= k % 16; ++ len -= k; ++ ++ vs1 = _mm_cvtsi32_si128(adler0); ++ vs2 = _mm_cvtsi32_si128(adler1); ++ ++ vs3 = _mm_setzero_si128(); ++ vs2_0 = _mm_setzero_si128(); ++ vs1_0 = vs1; ++ ++ while (k >= 32) { ++ /* ++ vs1 = adler + sum(c[i]) ++ vs2 = sum2 + 16 vs1 + sum( (16-i+1) c[i] ) ++ */ ++ vbuf = _mm_loadu_si128((__m128i*)src); ++ vbuf_0 = _mm_loadu_si128((__m128i*)(src + 16)); ++ src += 32; ++ k -= 32; ++ ++ v_sad_sum1 = _mm_sad_epu8(vbuf, zero); ++ v_sad_sum2 = _mm_sad_epu8(vbuf_0, zero); ++ _mm_storeu_si128((__m128i*)dst, vbuf); ++ _mm_storeu_si128((__m128i*)(dst + 16), vbuf_0); ++ dst += 32; ++ ++ v_short_sum2 = _mm_maddubs_epi16(vbuf, dot2v); ++ v_short_sum2_0 = _mm_maddubs_epi16(vbuf_0, dot2v_0); ++ ++ vs1 = _mm_add_epi32(v_sad_sum1, vs1); ++ vs3 = _mm_add_epi32(vs1_0, vs3); ++ ++ vsum2 = _mm_madd_epi16(v_short_sum2, dot3v); ++ vsum2_0 = _mm_madd_epi16(v_short_sum2_0, dot3v); ++ vs1 = _mm_add_epi32(v_sad_sum2, vs1); ++ vs2 = _mm_add_epi32(vsum2, vs2); ++ vs2_0 = _mm_add_epi32(vsum2_0, vs2_0); ++ vs1_0 = vs1; ++ } ++ ++ vs2 = _mm_add_epi32(vs2_0, vs2); ++ vs3 = _mm_slli_epi32(vs3, 5); ++ vs2 = _mm_add_epi32(vs3, vs2); ++ vs3 = _mm_setzero_si128(); ++ ++ while (k >= 16) { ++ /* ++ vs1 = adler + sum(c[i]) ++ vs2 = sum2 + 16 vs1 + sum( (16-i+1) c[i] ) ++ */ ++ vbuf = _mm_loadu_si128((__m128i*)src); ++ src += 16; ++ k -= 16; ++ ++ v_sad_sum1 = _mm_sad_epu8(vbuf, zero); ++ v_short_sum2 = _mm_maddubs_epi16(vbuf, dot2v_0); ++ ++ vs1 = _mm_add_epi32(v_sad_sum1, vs1); ++ vs3 = _mm_add_epi32(vs1_0, vs3); ++ vsum2 = _mm_madd_epi16(v_short_sum2, dot3v); ++ vs2 = _mm_add_epi32(vsum2, vs2); ++ vs1_0 = vs1; ++ ++ _mm_storeu_si128((__m128i*)dst, vbuf); ++ dst += 16; ++ } ++ ++ vs3 = _mm_slli_epi32(vs3, 4); ++ vs2 = _mm_add_epi32(vs2, vs3); ++ ++ adler0 = partial_hsum(vs1) % BASE; ++ adler1 = hsum(vs2) % BASE; ++ } ++ ++ /* If this is true, there's fewer than 16 elements remaining */ ++ if (len) { ++ goto rem_peel; ++ } ++ ++ return adler0 | (adler1 << 16); ++} ++ ++#endif +diff --git a/arch/x86/adler32_ssse3.c b/arch/x86/adler32_ssse3.c +index 101df4f..ae819d6 100644 +--- a/arch/x86/adler32_ssse3.c ++++ b/arch/x86/adler32_ssse3.c +@@ -1,20 +1,20 @@ +-/* adler32.c -- compute the Adler-32 checksum of a data stream ++/* adler32_ssse3.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2011 Mark Adler + * Authors: ++ * Adam Stylinski + * Brian Bockelman + * For conditions of distribution and use, see copyright notice in zlib.h + */ + + #include "../../zbuild.h" +-#include "../../zutil.h" +- + #include "../../adler32_p.h" ++#include "adler32_ssse3_p.h" + +-#ifdef X86_SSSE3_ADLER32 ++#ifdef X86_SSSE3 + + #include + +-Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size_t len) { ++Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len) { + uint32_t sum2; + + /* split Adler-32 into component sums */ +@@ -33,86 +33,124 @@ Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size + if (UNLIKELY(len < 16)) + return adler32_len_16(adler, buf, len, sum2); + +- uint32_t ALIGNED_(16) s1[4], s2[4]; +- +- s1[0] = s1[1] = s1[2] = 0; s1[3] = adler; +- s2[0] = s2[1] = s2[2] = 0; s2[3] = sum2; +- +- char ALIGNED_(16) dot1[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; +- __m128i dot1v = _mm_load_si128((__m128i*)dot1); +- char ALIGNED_(16) dot2[16] = {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; +- __m128i dot2v = _mm_load_si128((__m128i*)dot2); +- short ALIGNED_(16) dot3[8] = {1, 1, 1, 1, 1, 1, 1, 1}; +- __m128i dot3v = _mm_load_si128((__m128i*)dot3); +- +- // We will need to multiply by +- //char ALIGNED_(16) shift[4] = {0, 0, 0, 4}; //{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4}; ++ const __m128i dot2v = _mm_setr_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17); ++ const __m128i dot2v_0 = _mm_setr_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); ++ const __m128i dot3v = _mm_set1_epi16(1); ++ const __m128i zero = _mm_setzero_si128(); ++ ++ __m128i vbuf, vs1_0, vs3, vs1, vs2, vs2_0, v_sad_sum1, v_short_sum2, v_short_sum2_0, ++ vbuf_0, v_sad_sum2, vsum2, vsum2_0; ++ ++ /* If our buffer is unaligned (likely), make the determination whether ++ * or not there's enough of a buffer to consume to make the scalar, aligning ++ * additions worthwhile or if it's worth it to just eat the cost of an unaligned ++ * load. This is a pretty simple test, just test if 16 - the remainder + len is ++ * < 16 */ ++ size_t max_iters = NMAX; ++ size_t rem = (uintptr_t)buf & 15; ++ size_t align_offset = 16 - rem; ++ size_t k = 0; ++ if (rem) { ++ if (len < 16 + align_offset) { ++ /* Let's eat the cost of this one unaligned load so that ++ * we don't completely skip over the vectorization. Doing ++ * 16 bytes at a time unaligned is better than 16 + <= 15 ++ * sums */ ++ vbuf = _mm_loadu_si128((__m128i*)buf); ++ len -= 16; ++ buf += 16; ++ vs1 = _mm_cvtsi32_si128(adler); ++ vs2 = _mm_cvtsi32_si128(sum2); ++ vs3 = _mm_setzero_si128(); ++ vs1_0 = vs1; ++ goto unaligned_jmp; ++ } ++ ++ for (size_t i = 0; i < align_offset; ++i) { ++ adler += *(buf++); ++ sum2 += adler; ++ } ++ ++ /* lop off the max number of sums based on the scalar sums done ++ * above */ ++ len -= align_offset; ++ max_iters -= align_offset; ++ } + +- char ALIGNED_(16) shift[16] = {4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +- __m128i shiftv = _mm_load_si128((__m128i*)shift); + + while (len >= 16) { +- __m128i vs1 = _mm_load_si128((__m128i*)s1); +- __m128i vs2 = _mm_load_si128((__m128i*)s2); +- __m128i vs1_0 = vs1; +- +- int k = (len < NMAX ? (int)len : NMAX); +- k -= k % 16; +- len -= k; +- +- while (k >= 16) { +- /* +- vs1 = adler + sum(c[i]) +- vs2 = sum2 + 16 vs1 + sum( (16-i+1) c[i] ) +- +- NOTE: 256-bit equivalents are: +- _mm256_maddubs_epi16 <- operates on 32 bytes to 16 shorts +- _mm256_madd_epi16 <- Sums 16 shorts to 8 int32_t. +- We could rewrite the below to use 256-bit instructions instead of 128-bit. +- */ +- __m128i vbuf = _mm_loadu_si128((__m128i*)buf); +- buf += 16; +- k -= 16; +- +- __m128i v_short_sum1 = _mm_maddubs_epi16(vbuf, dot1v); // multiply-add, resulting in 8 shorts. +- __m128i vsum1 = _mm_madd_epi16(v_short_sum1, dot3v); // sum 8 shorts to 4 int32_t; +- __m128i v_short_sum2 = _mm_maddubs_epi16(vbuf, dot2v); +- vs1 = _mm_add_epi32(vsum1, vs1); +- __m128i vsum2 = _mm_madd_epi16(v_short_sum2, dot3v); +- vs1_0 = _mm_sll_epi32(vs1_0, shiftv); +- vsum2 = _mm_add_epi32(vsum2, vs2); +- vs2 = _mm_add_epi32(vsum2, vs1_0); +- vs1_0 = vs1; +- } +- +- // At this point, we have partial sums stored in vs1 and vs2. There are AVX512 instructions that +- // would allow us to sum these quickly (VP4DPWSSD). For now, just unpack and move on. +- +- uint32_t ALIGNED_(16) s1_unpack[4]; +- uint32_t ALIGNED_(16) s2_unpack[4]; +- +- _mm_store_si128((__m128i*)s1_unpack, vs1); +- _mm_store_si128((__m128i*)s2_unpack, vs2); +- +- adler = (s1_unpack[0] % BASE) + (s1_unpack[1] % BASE) + (s1_unpack[2] % BASE) + (s1_unpack[3] % BASE); +- adler %= BASE; +- s1[3] = adler; +- +- sum2 = (s2_unpack[0] % BASE) + (s2_unpack[1] % BASE) + (s2_unpack[2] % BASE) + (s2_unpack[3] % BASE); +- sum2 %= BASE; +- s2[3] = sum2; +- } +- +- while (len) { +- len--; +- adler += *buf++; +- sum2 += adler; ++ vs1 = _mm_cvtsi32_si128(adler); ++ vs2 = _mm_cvtsi32_si128(sum2); ++ vs3 = _mm_setzero_si128(); ++ vs2_0 = _mm_setzero_si128(); ++ vs1_0 = vs1; ++ ++ k = (len < max_iters ? len : max_iters); ++ k -= k % 16; ++ len -= k; ++ ++ while (k >= 32) { ++ /* ++ vs1 = adler + sum(c[i]) ++ vs2 = sum2 + 16 vs1 + sum( (16-i+1) c[i] ) ++ */ ++ vbuf = _mm_load_si128((__m128i*)buf); ++ vbuf_0 = _mm_load_si128((__m128i*)(buf + 16)); ++ buf += 32; ++ k -= 32; ++ ++ v_sad_sum1 = _mm_sad_epu8(vbuf, zero); ++ v_sad_sum2 = _mm_sad_epu8(vbuf_0, zero); ++ vs1 = _mm_add_epi32(v_sad_sum1, vs1); ++ vs3 = _mm_add_epi32(vs1_0, vs3); ++ ++ vs1 = _mm_add_epi32(v_sad_sum2, vs1); ++ v_short_sum2 = _mm_maddubs_epi16(vbuf, dot2v); ++ vsum2 = _mm_madd_epi16(v_short_sum2, dot3v); ++ v_short_sum2_0 = _mm_maddubs_epi16(vbuf_0, dot2v_0); ++ vs2 = _mm_add_epi32(vsum2, vs2); ++ vsum2_0 = _mm_madd_epi16(v_short_sum2_0, dot3v); ++ vs2_0 = _mm_add_epi32(vsum2_0, vs2_0); ++ vs1_0 = vs1; ++ } ++ ++ vs2 = _mm_add_epi32(vs2_0, vs2); ++ vs3 = _mm_slli_epi32(vs3, 5); ++ vs2 = _mm_add_epi32(vs3, vs2); ++ vs3 = _mm_setzero_si128(); ++ ++ while (k >= 16) { ++ /* ++ vs1 = adler + sum(c[i]) ++ vs2 = sum2 + 16 vs1 + sum( (16-i+1) c[i] ) ++ */ ++ vbuf = _mm_load_si128((__m128i*)buf); ++ buf += 16; ++ k -= 16; ++ ++unaligned_jmp: ++ v_sad_sum1 = _mm_sad_epu8(vbuf, zero); ++ vs1 = _mm_add_epi32(v_sad_sum1, vs1); ++ vs3 = _mm_add_epi32(vs1_0, vs3); ++ v_short_sum2 = _mm_maddubs_epi16(vbuf, dot2v_0); ++ vsum2 = _mm_madd_epi16(v_short_sum2, dot3v); ++ vs2 = _mm_add_epi32(vsum2, vs2); ++ vs1_0 = vs1; ++ } ++ ++ vs3 = _mm_slli_epi32(vs3, 4); ++ vs2 = _mm_add_epi32(vs2, vs3); ++ ++ /* We don't actually need to do a full horizontal sum, since psadbw is actually doing ++ * a partial reduction sum implicitly and only summing to integers in vector positions ++ * 0 and 2. This saves us some contention on the shuffle port(s) */ ++ adler = partial_hsum(vs1) % BASE; ++ sum2 = hsum(vs2) % BASE; ++ max_iters = NMAX; + } +- adler %= BASE; +- sum2 %= BASE; + +- /* return recombined sums */ +- return adler | (sum2 << 16); ++ /* Process tail (len < 16). */ ++ return adler32_len_16(adler, buf, len, sum2); + } + + #endif +diff --git a/arch/x86/adler32_ssse3_p.h b/arch/x86/adler32_ssse3_p.h +new file mode 100644 +index 0000000..d7ec3fe +--- /dev/null ++++ b/arch/x86/adler32_ssse3_p.h +@@ -0,0 +1,29 @@ ++/* adler32_ssse3_p.h -- adler32 ssse3 utility functions ++ * Copyright (C) 2022 Adam Stylinski ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifndef ADLER32_SSSE3_P_H_ ++#define ADLER32_SSSE3_P_H_ ++ ++#ifdef X86_SSSE3 ++ ++#include ++#include ++ ++static inline uint32_t partial_hsum(__m128i x) { ++ __m128i second_int = _mm_srli_si128(x, 8); ++ __m128i sum = _mm_add_epi32(x, second_int); ++ return _mm_cvtsi128_si32(sum); ++} ++ ++static inline uint32_t hsum(__m128i x) { ++ __m128i sum1 = _mm_unpackhi_epi64(x, x); ++ __m128i sum2 = _mm_add_epi32(x, sum1); ++ __m128i sum3 = _mm_shuffle_epi32(sum2, 0x01); ++ __m128i sum4 = _mm_add_epi32(sum2, sum3); ++ return _mm_cvtsi128_si32(sum4); ++} ++#endif ++ ++#endif +diff --git a/arch/x86/chunkset_avx2.c b/arch/x86/chunkset_avx2.c +new file mode 100644 +index 0000000..70620b9 +--- /dev/null ++++ b/arch/x86/chunkset_avx2.c +@@ -0,0 +1,133 @@ ++/* chunkset_avx2.c -- AVX2 inline functions to copy small data chunks. ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++#include "zbuild.h" ++ ++#ifdef X86_AVX2 ++#include ++#include "../generic/chunk_permute_table.h" ++ ++typedef __m256i chunk_t; ++ ++#define CHUNK_SIZE 32 ++ ++#define HAVE_CHUNKMEMSET_2 ++#define HAVE_CHUNKMEMSET_4 ++#define HAVE_CHUNKMEMSET_8 ++#define HAVE_CHUNK_MAG ++ ++/* Populate don't cares so that this is a direct lookup (with some indirection into the permute table), because dist can ++ * never be 0 - 2, we'll start with an offset, subtracting 3 from the input */ ++static const lut_rem_pair perm_idx_lut[29] = { ++ { 0, 2}, /* 3 */ ++ { 0, 0}, /* don't care */ ++ { 1 * 32, 2}, /* 5 */ ++ { 2 * 32, 2}, /* 6 */ ++ { 3 * 32, 4}, /* 7 */ ++ { 0 * 32, 0}, /* don't care */ ++ { 4 * 32, 5}, /* 9 */ ++ { 5 * 32, 22}, /* 10 */ ++ { 6 * 32, 21}, /* 11 */ ++ { 7 * 32, 20}, /* 12 */ ++ { 8 * 32, 6}, /* 13 */ ++ { 9 * 32, 4}, /* 14 */ ++ {10 * 32, 2}, /* 15 */ ++ { 0 * 32, 0}, /* don't care */ ++ {11 * 32, 15}, /* 17 */ ++ {11 * 32 + 16, 14}, /* 18 */ ++ {11 * 32 + 16 * 2, 13}, /* 19 */ ++ {11 * 32 + 16 * 3, 12}, /* 20 */ ++ {11 * 32 + 16 * 4, 11}, /* 21 */ ++ {11 * 32 + 16 * 5, 10}, /* 22 */ ++ {11 * 32 + 16 * 6, 9}, /* 23 */ ++ {11 * 32 + 16 * 7, 8}, /* 24 */ ++ {11 * 32 + 16 * 8, 7}, /* 25 */ ++ {11 * 32 + 16 * 9, 6}, /* 26 */ ++ {11 * 32 + 16 * 10, 5}, /* 27 */ ++ {11 * 32 + 16 * 11, 4}, /* 28 */ ++ {11 * 32 + 16 * 12, 3}, /* 29 */ ++ {11 * 32 + 16 * 13, 2}, /* 30 */ ++ {11 * 32 + 16 * 14, 1} /* 31 */ ++}; ++ ++static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { ++ int16_t tmp; ++ memcpy(&tmp, from, sizeof(tmp)); ++ *chunk = _mm256_set1_epi16(tmp); ++} ++ ++static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { ++ int32_t tmp; ++ memcpy(&tmp, from, sizeof(tmp)); ++ *chunk = _mm256_set1_epi32(tmp); ++} ++ ++static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { ++ int64_t tmp; ++ memcpy(&tmp, from, sizeof(tmp)); ++ *chunk = _mm256_set1_epi64x(tmp); ++} ++ ++static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { ++ *chunk = _mm256_loadu_si256((__m256i *)s); ++} ++ ++static inline void storechunk(uint8_t *out, chunk_t *chunk) { ++ _mm256_storeu_si256((__m256i *)out, *chunk); ++} ++ ++static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { ++ lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; ++ __m256i ret_vec; ++ /* While technically we only need to read 4 or 8 bytes into this vector register for a lot of cases, GCC is ++ * compiling this to a shared load for all branches, preferring the simpler code. Given that the buf value isn't in ++ * GPRs to begin with the 256 bit load is _probably_ just as inexpensive */ ++ *chunk_rem = lut_rem.remval; ++ ++ /* See note in chunkset_ssse3.c for why this is ok */ ++ __msan_unpoison(buf + dist, 32 - dist); ++ ++ if (dist < 16) { ++ /* This simpler case still requires us to shuffle in 128 bit lanes, so we must apply a static offset after ++ * broadcasting the first vector register to both halves. This is _marginally_ faster than doing two separate ++ * shuffles and combining the halves later */ ++ const __m256i permute_xform = ++ _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ++ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16); ++ __m256i perm_vec = _mm256_load_si256((__m256i*)(permute_table+lut_rem.idx)); ++ __m128i ret_vec0 = _mm_loadu_si128((__m128i*)buf); ++ perm_vec = _mm256_add_epi8(perm_vec, permute_xform); ++ ret_vec = _mm256_inserti128_si256(_mm256_castsi128_si256(ret_vec0), ret_vec0, 1); ++ ret_vec = _mm256_shuffle_epi8(ret_vec, perm_vec); ++ } else if (dist == 16) { ++ __m128i ret_vec0 = _mm_loadu_si128((__m128i*)buf); ++ return _mm256_inserti128_si256(_mm256_castsi128_si256(ret_vec0), ret_vec0, 1); ++ } else { ++ __m128i ret_vec0 = _mm_loadu_si128((__m128i*)buf); ++ __m128i ret_vec1 = _mm_loadu_si128((__m128i*)(buf + 16)); ++ /* Take advantage of the fact that only the latter half of the 256 bit vector will actually differ */ ++ __m128i perm_vec1 = _mm_load_si128((__m128i*)(permute_table + lut_rem.idx)); ++ __m128i xlane_permutes = _mm_cmpgt_epi8(_mm_set1_epi8(16), perm_vec1); ++ __m128i xlane_res = _mm_shuffle_epi8(ret_vec0, perm_vec1); ++ /* Since we can't wrap twice, we can simply keep the later half exactly how it is instead of having to _also_ ++ * shuffle those values */ ++ __m128i latter_half = _mm_blendv_epi8(ret_vec1, xlane_res, xlane_permutes); ++ ret_vec = _mm256_inserti128_si256(_mm256_castsi128_si256(ret_vec0), latter_half, 1); ++ } ++ ++ return ret_vec; ++} ++ ++#define CHUNKSIZE chunksize_avx2 ++#define CHUNKCOPY chunkcopy_avx2 ++#define CHUNKUNROLL chunkunroll_avx2 ++#define CHUNKMEMSET chunkmemset_avx2 ++#define CHUNKMEMSET_SAFE chunkmemset_safe_avx2 ++ ++#include "chunkset_tpl.h" ++ ++#define INFLATE_FAST inflate_fast_avx2 ++ ++#include "inffast_tpl.h" ++ ++#endif +diff --git a/arch/x86/chunkset_sse2.c b/arch/x86/chunkset_sse2.c +new file mode 100644 +index 0000000..c402c0e +--- /dev/null ++++ b/arch/x86/chunkset_sse2.c +@@ -0,0 +1,56 @@ ++/* chunkset_sse2.c -- SSE2 inline functions to copy small data chunks. ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zbuild.h" ++ ++#ifdef X86_SSE2 ++#include ++ ++typedef __m128i chunk_t; ++ ++#define CHUNK_SIZE 16 ++ ++#define HAVE_CHUNKMEMSET_2 ++#define HAVE_CHUNKMEMSET_4 ++#define HAVE_CHUNKMEMSET_8 ++ ++static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { ++ int16_t tmp; ++ memcpy(&tmp, from, sizeof(tmp)); ++ *chunk = _mm_set1_epi16(tmp); ++} ++ ++static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { ++ int32_t tmp; ++ memcpy(&tmp, from, sizeof(tmp)); ++ *chunk = _mm_set1_epi32(tmp); ++} ++ ++static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { ++ int64_t tmp; ++ memcpy(&tmp, from, sizeof(tmp)); ++ *chunk = _mm_set1_epi64x(tmp); ++} ++ ++static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { ++ *chunk = _mm_loadu_si128((__m128i *)s); ++} ++ ++static inline void storechunk(uint8_t *out, chunk_t *chunk) { ++ _mm_storeu_si128((__m128i *)out, *chunk); ++} ++ ++#define CHUNKSIZE chunksize_sse2 ++#define CHUNKCOPY chunkcopy_sse2 ++#define CHUNKUNROLL chunkunroll_sse2 ++#define CHUNKMEMSET chunkmemset_sse2 ++#define CHUNKMEMSET_SAFE chunkmemset_safe_sse2 ++ ++#include "chunkset_tpl.h" ++ ++#define INFLATE_FAST inflate_fast_sse2 ++ ++#include "inffast_tpl.h" ++ ++#endif +diff --git a/arch/x86/chunkset_ssse3.c b/arch/x86/chunkset_ssse3.c +new file mode 100644 +index 0000000..c06d1b3 +--- /dev/null ++++ b/arch/x86/chunkset_ssse3.c +@@ -0,0 +1,101 @@ ++/* chunkset_ssse3.c -- SSSE3 inline functions to copy small data chunks. ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zbuild.h" ++ ++/* This requires SSE2 support. While it's implicit with SSSE3, we can minimize ++ * code size by sharing the chunkcopy functions, which will certainly compile ++ * to identical machine code */ ++#if defined(X86_SSSE3) && defined(X86_SSE2) ++#include ++#include "../generic/chunk_permute_table.h" ++ ++typedef __m128i chunk_t; ++ ++#define CHUNK_SIZE 16 ++ ++#define HAVE_CHUNKMEMSET_2 ++#define HAVE_CHUNKMEMSET_4 ++#define HAVE_CHUNKMEMSET_8 ++#define HAVE_CHUNK_MAG ++#define HAVE_CHUNKCOPY ++#define HAVE_CHUNKUNROLL ++ ++static const lut_rem_pair perm_idx_lut[13] = { ++ {0, 1}, /* 3 */ ++ {0, 0}, /* don't care */ ++ {1 * 32, 1}, /* 5 */ ++ {2 * 32, 4}, /* 6 */ ++ {3 * 32, 2}, /* 7 */ ++ {0 * 32, 0}, /* don't care */ ++ {4 * 32, 7}, /* 9 */ ++ {5 * 32, 6}, /* 10 */ ++ {6 * 32, 5}, /* 11 */ ++ {7 * 32, 4}, /* 12 */ ++ {8 * 32, 3}, /* 13 */ ++ {9 * 32, 2}, /* 14 */ ++ {10 * 32, 1},/* 15 */ ++}; ++ ++ ++static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { ++ int16_t tmp; ++ memcpy(&tmp, from, sizeof(tmp)); ++ *chunk = _mm_set1_epi16(tmp); ++} ++ ++static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { ++ int32_t tmp; ++ memcpy(&tmp, from, sizeof(tmp)); ++ *chunk = _mm_set1_epi32(tmp); ++} ++ ++static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { ++ int64_t tmp; ++ memcpy(&tmp, from, sizeof(tmp)); ++ *chunk = _mm_set1_epi64x(tmp); ++} ++ ++static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { ++ *chunk = _mm_loadu_si128((__m128i *)s); ++} ++ ++static inline void storechunk(uint8_t *out, chunk_t *chunk) { ++ _mm_storeu_si128((__m128i *)out, *chunk); ++} ++ ++static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { ++ lut_rem_pair lut_rem = perm_idx_lut[dist - 3]; ++ __m128i perm_vec, ret_vec; ++ /* Important to note: ++ * This is _not_ to subvert the memory sanitizer but to instead unpoison some ++ * bytes we willingly and purposefully load uninitialized that we swizzle over ++ * in a vector register, anyway. If what we assume is wrong about what is used, ++ * the memory sanitizer will still usefully flag it */ ++ __msan_unpoison(buf + dist, 16 - dist); ++ ret_vec = _mm_loadu_si128((__m128i*)buf); ++ *chunk_rem = lut_rem.remval; ++ ++ perm_vec = _mm_load_si128((__m128i*)(permute_table + lut_rem.idx)); ++ ret_vec = _mm_shuffle_epi8(ret_vec, perm_vec); ++ ++ return ret_vec; ++} ++ ++extern uint8_t* chunkcopy_sse2(uint8_t *out, uint8_t const *from, unsigned len); ++extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len); ++ ++#define CHUNKSIZE chunksize_ssse3 ++#define CHUNKMEMSET chunkmemset_ssse3 ++#define CHUNKMEMSET_SAFE chunkmemset_safe_ssse3 ++#define CHUNKCOPY chunkcopy_sse2 ++#define CHUNKUNROLL chunkunroll_sse2 ++ ++#include "chunkset_tpl.h" ++ ++#define INFLATE_FAST inflate_fast_ssse3 ++ ++#include "inffast_tpl.h" ++ ++#endif +diff --git a/arch/x86/compare256_avx2.c b/arch/x86/compare256_avx2.c +new file mode 100644 +index 0000000..1318a0e +--- /dev/null ++++ b/arch/x86/compare256_avx2.c +@@ -0,0 +1,63 @@ ++/* compare256_avx2.c -- AVX2 version of compare256 ++ * Copyright Mika T. Lindqvist ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "../../zbuild.h" ++ ++#include "fallback_builtins.h" ++ ++#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) ++ ++#include ++#ifdef _MSC_VER ++# include ++#endif ++ ++static inline uint32_t compare256_avx2_static(const uint8_t *src0, const uint8_t *src1) { ++ uint32_t len = 0; ++ ++ do { ++ __m256i ymm_src0, ymm_src1, ymm_cmp; ++ ymm_src0 = _mm256_loadu_si256((__m256i*)src0); ++ ymm_src1 = _mm256_loadu_si256((__m256i*)src1); ++ ymm_cmp = _mm256_cmpeq_epi8(ymm_src0, ymm_src1); /* non-identical bytes = 00, identical bytes = FF */ ++ unsigned mask = (unsigned)_mm256_movemask_epi8(ymm_cmp); ++ if (mask != 0xFFFFFFFF) { ++ uint32_t match_byte = (uint32_t)__builtin_ctz(~mask); /* Invert bits so identical = 0 */ ++ return len + match_byte; ++ } ++ ++ src0 += 32, src1 += 32, len += 32; ++ ++ ymm_src0 = _mm256_loadu_si256((__m256i*)src0); ++ ymm_src1 = _mm256_loadu_si256((__m256i*)src1); ++ ymm_cmp = _mm256_cmpeq_epi8(ymm_src0, ymm_src1); ++ mask = (unsigned)_mm256_movemask_epi8(ymm_cmp); ++ if (mask != 0xFFFFFFFF) { ++ uint32_t match_byte = (uint32_t)__builtin_ctz(~mask); ++ return len + match_byte; ++ } ++ ++ src0 += 32, src1 += 32, len += 32; ++ } while (len < 256); ++ ++ return 256; ++} ++ ++Z_INTERNAL uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1) { ++ return compare256_avx2_static(src0, src1); ++} ++ ++#define LONGEST_MATCH longest_match_avx2 ++#define COMPARE256 compare256_avx2_static ++ ++#include "match_tpl.h" ++ ++#define LONGEST_MATCH_SLOW ++#define LONGEST_MATCH longest_match_slow_avx2 ++#define COMPARE256 compare256_avx2_static ++ ++#include "match_tpl.h" ++ ++#endif +diff --git a/arch/x86/compare256_sse2.c b/arch/x86/compare256_sse2.c +new file mode 100644 +index 0000000..aad4bd2 +--- /dev/null ++++ b/arch/x86/compare256_sse2.c +@@ -0,0 +1,96 @@ ++/* compare256_sse2.c -- SSE2 version of compare256 ++ * Copyright Adam Stylinski ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "../../zbuild.h" ++ ++#include "fallback_builtins.h" ++ ++#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) ++ ++#include ++ ++static inline uint32_t compare256_sse2_static(const uint8_t *src0, const uint8_t *src1) { ++ uint32_t len = 0; ++ int align_offset = ((uintptr_t)src0) & 15; ++ const uint8_t *end0 = src0 + 256; ++ const uint8_t *end1 = src1 + 256; ++ __m128i xmm_src0, xmm_src1, xmm_cmp; ++ ++ /* Do the first load unaligned, than all subsequent ones we have at least ++ * one aligned load. Sadly aligning both loads is probably unrealistic */ ++ xmm_src0 = _mm_loadu_si128((__m128i*)src0); ++ xmm_src1 = _mm_loadu_si128((__m128i*)src1); ++ xmm_cmp = _mm_cmpeq_epi8(xmm_src0, xmm_src1); ++ ++ unsigned mask = (unsigned)_mm_movemask_epi8(xmm_cmp); ++ ++ /* Compiler _may_ turn this branch into a ptest + movemask, ++ * since a lot of those uops are shared and fused */ ++ if (mask != 0xFFFF) { ++ uint32_t match_byte = (uint32_t)__builtin_ctz(~mask); ++ return len + match_byte; ++ } ++ ++ int align_adv = 16 - align_offset; ++ len += align_adv; ++ src0 += align_adv; ++ src1 += align_adv; ++ ++ /* Do a flooring division (should just be a shift right) */ ++ int num_iter = (256 - len) / 16; ++ ++ for (int i = 0; i < num_iter; ++i) { ++ xmm_src0 = _mm_load_si128((__m128i*)src0); ++ xmm_src1 = _mm_loadu_si128((__m128i*)src1); ++ xmm_cmp = _mm_cmpeq_epi8(xmm_src0, xmm_src1); ++ ++ mask = (unsigned)_mm_movemask_epi8(xmm_cmp); ++ ++ /* Compiler _may_ turn this branch into a ptest + movemask, ++ * since a lot of those uops are shared and fused */ ++ if (mask != 0xFFFF) { ++ uint32_t match_byte = (uint32_t)__builtin_ctz(~mask); ++ return len + match_byte; ++ } ++ ++ len += 16, src0 += 16, src1 += 16; ++ } ++ ++ if (align_offset) { ++ src0 = end0 - 16; ++ src1 = end1 - 16; ++ len = 256 - 16; ++ ++ xmm_src0 = _mm_loadu_si128((__m128i*)src0); ++ xmm_src1 = _mm_loadu_si128((__m128i*)src1); ++ xmm_cmp = _mm_cmpeq_epi8(xmm_src0, xmm_src1); ++ ++ mask = (unsigned)_mm_movemask_epi8(xmm_cmp); ++ ++ if (mask != 0xFFFF) { ++ uint32_t match_byte = (uint32_t)__builtin_ctz(~mask); ++ return len + match_byte; ++ } ++ } ++ ++ return 256; ++} ++ ++Z_INTERNAL uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1) { ++ return compare256_sse2_static(src0, src1); ++} ++ ++#define LONGEST_MATCH longest_match_sse2 ++#define COMPARE256 compare256_sse2_static ++ ++#include "match_tpl.h" ++ ++#define LONGEST_MATCH_SLOW ++#define LONGEST_MATCH longest_match_slow_sse2 ++#define COMPARE256 compare256_sse2_static ++ ++#include "match_tpl.h" ++ ++#endif +diff --git a/arch/x86/crc32_fold_pclmulqdq_tpl.h b/arch/x86/crc32_fold_pclmulqdq_tpl.h +new file mode 100644 +index 0000000..3e79928 +--- /dev/null ++++ b/arch/x86/crc32_fold_pclmulqdq_tpl.h +@@ -0,0 +1,186 @@ ++/* ++ * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ ++ * instruction. ++ * ++ * A white paper describing this algorithm can be found at: ++ * doc/crc-pclmulqdq.pdf ++ * ++ * Copyright (C) 2013 Intel Corporation. All rights reserved. ++ * Copyright (C) 2016 Marian Beermann (support for initial value) ++ * Authors: ++ * Wajdi Feghali ++ * Jim Guilford ++ * Vinodh Gopal ++ * Erdinc Ozturk ++ * Jim Kukunas ++ * ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifdef COPY ++Z_INTERNAL void CRC32_FOLD_COPY(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) { ++#else ++Z_INTERNAL void CRC32_FOLD(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc) { ++#endif ++ unsigned long algn_diff; ++ __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; ++ __m128i xmm_crc0, xmm_crc1, xmm_crc2, xmm_crc3; ++ __m128i xmm_crc_part = _mm_setzero_si128(); ++#ifdef COPY ++ char ALIGNED_(16) partial_buf[16] = { 0 }; ++#else ++ __m128i xmm_initial = _mm_cvtsi32_si128(init_crc); ++ int32_t first = init_crc != 0; ++ ++ /* Technically the CRC functions don't even call this for input < 64, but a bare minimum of 31 ++ * bytes of input is needed for the aligning load that occurs. If there's an initial CRC, to ++ * carry it forward through the folded CRC there must be 16 - src % 16 + 16 bytes available, which ++ * by definition can be up to 15 bytes + one full vector load. */ ++ assert(len >= 31 || first == 0); ++#endif ++ crc32_fold_load((__m128i *)crc->fold, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); ++ ++ if (len < 16) { ++#ifdef COPY ++ if (len == 0) ++ return; ++ ++ memcpy(partial_buf, src, len); ++ xmm_crc_part = _mm_load_si128((const __m128i *)partial_buf); ++ memcpy(dst, partial_buf, len); ++#endif ++ goto partial; ++ } ++ ++ algn_diff = ((uintptr_t)16 - ((uintptr_t)src & 0xF)) & 0xF; ++ if (algn_diff) { ++ xmm_crc_part = _mm_loadu_si128((__m128i *)src); ++#ifdef COPY ++ _mm_storeu_si128((__m128i *)dst, xmm_crc_part); ++ dst += algn_diff; ++#else ++ XOR_INITIAL128(xmm_crc_part); ++ ++ if (algn_diff < 4 && init_crc != 0) { ++ xmm_t0 = xmm_crc_part; ++ xmm_crc_part = _mm_loadu_si128((__m128i*)src + 1); ++ fold_1(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); ++ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0); ++ src += 16; ++ len -= 16; ++ } ++#endif ++ ++ partial_fold(algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part); ++ ++ src += algn_diff; ++ len -= algn_diff; ++ } ++ ++#ifdef X86_VPCLMULQDQ ++ if (len >= 256) { ++#ifdef COPY ++ size_t n = fold_16_vpclmulqdq_copy(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, dst, src, len); ++ dst += n; ++#else ++ size_t n = fold_16_vpclmulqdq(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, src, len, ++ xmm_initial, first); ++ first = 0; ++#endif ++ len -= n; ++ src += n; ++ } ++#endif ++ ++ while (len >= 64) { ++ len -= 64; ++ xmm_t0 = _mm_load_si128((__m128i *)src); ++ xmm_t1 = _mm_load_si128((__m128i *)src + 1); ++ xmm_t2 = _mm_load_si128((__m128i *)src + 2); ++ xmm_t3 = _mm_load_si128((__m128i *)src + 3); ++ src += 64; ++ ++ fold_4(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); ++#ifdef COPY ++ _mm_storeu_si128((__m128i *)dst, xmm_t0); ++ _mm_storeu_si128((__m128i *)dst + 1, xmm_t1); ++ _mm_storeu_si128((__m128i *)dst + 2, xmm_t2); ++ _mm_storeu_si128((__m128i *)dst + 3, xmm_t3); ++ dst += 64; ++#else ++ XOR_INITIAL128(xmm_t0); ++#endif ++ ++ xmm_crc0 = _mm_xor_si128(xmm_crc0, xmm_t0); ++ xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_t1); ++ xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t2); ++ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t3); ++ } ++ ++ /* ++ * len = num bytes left - 64 ++ */ ++ if (len >= 48) { ++ len -= 48; ++ ++ xmm_t0 = _mm_load_si128((__m128i *)src); ++ xmm_t1 = _mm_load_si128((__m128i *)src + 1); ++ xmm_t2 = _mm_load_si128((__m128i *)src + 2); ++ src += 48; ++#ifdef COPY ++ _mm_storeu_si128((__m128i *)dst, xmm_t0); ++ _mm_storeu_si128((__m128i *)dst + 1, xmm_t1); ++ _mm_storeu_si128((__m128i *)dst + 2, xmm_t2); ++ dst += 48; ++#else ++ XOR_INITIAL128(xmm_t0); ++#endif ++ fold_3(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); ++ ++ xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_t0); ++ xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t1); ++ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t2); ++ } else if (len >= 32) { ++ len -= 32; ++ ++ xmm_t0 = _mm_load_si128((__m128i *)src); ++ xmm_t1 = _mm_load_si128((__m128i *)src + 1); ++ src += 32; ++#ifdef COPY ++ _mm_storeu_si128((__m128i *)dst, xmm_t0); ++ _mm_storeu_si128((__m128i *)dst + 1, xmm_t1); ++ dst += 32; ++#else ++ XOR_INITIAL128(xmm_t0); ++#endif ++ fold_2(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); ++ ++ xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t0); ++ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t1); ++ } else if (len >= 16) { ++ len -= 16; ++ xmm_t0 = _mm_load_si128((__m128i *)src); ++ src += 16; ++#ifdef COPY ++ _mm_storeu_si128((__m128i *)dst, xmm_t0); ++ dst += 16; ++#else ++ XOR_INITIAL128(xmm_t0); ++#endif ++ fold_1(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); ++ ++ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0); ++ } ++ ++partial: ++ if (len) { ++ memcpy(&xmm_crc_part, src, len); ++#ifdef COPY ++ _mm_storeu_si128((__m128i *)partial_buf, xmm_crc_part); ++ memcpy(dst, partial_buf, len); ++#endif ++ partial_fold(len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part); ++ } ++ ++ crc32_fold_save((__m128i *)crc->fold, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); ++} +diff --git a/arch/x86/crc32_fold_vpclmulqdq_tpl.h b/arch/x86/crc32_fold_vpclmulqdq_tpl.h +new file mode 100644 +index 0000000..3ea5c33 +--- /dev/null ++++ b/arch/x86/crc32_fold_vpclmulqdq_tpl.h +@@ -0,0 +1,107 @@ ++/* crc32_fold_vpclmulqdq_tpl.h -- VPCMULQDQ-based CRC32 folding template. ++ * Copyright Wangyang Guo (wangyang.guo@intel.com) ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifdef COPY ++static size_t fold_16_vpclmulqdq_copy(__m128i *xmm_crc0, __m128i *xmm_crc1, ++ __m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, size_t len) { ++#else ++static size_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1, ++ __m128i *xmm_crc2, __m128i *xmm_crc3, const uint8_t *src, size_t len, ++ __m128i init_crc, int32_t first) { ++ __m512i zmm_initial = _mm512_zextsi128_si512(init_crc); ++#endif ++ __m512i zmm_t0, zmm_t1, zmm_t2, zmm_t3; ++ __m512i zmm_crc0, zmm_crc1, zmm_crc2, zmm_crc3; ++ __m512i z0, z1, z2, z3; ++ size_t len_tmp = len; ++ const __m512i zmm_fold4 = _mm512_set4_epi32( ++ 0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596); ++ const __m512i zmm_fold16 = _mm512_set4_epi32( ++ 0x00000001, 0x1542778a, 0x00000001, 0x322d1430); ++ ++ // zmm register init ++ zmm_crc0 = _mm512_setzero_si512(); ++ zmm_t0 = _mm512_loadu_si512((__m512i *)src); ++#ifndef COPY ++ XOR_INITIAL512(zmm_t0); ++#endif ++ zmm_crc1 = _mm512_loadu_si512((__m512i *)src + 1); ++ zmm_crc2 = _mm512_loadu_si512((__m512i *)src + 2); ++ zmm_crc3 = _mm512_loadu_si512((__m512i *)src + 3); ++ ++ /* already have intermediate CRC in xmm registers ++ * fold4 with 4 xmm_crc to get zmm_crc0 ++ */ ++ zmm_crc0 = _mm512_inserti32x4(zmm_crc0, *xmm_crc0, 0); ++ zmm_crc0 = _mm512_inserti32x4(zmm_crc0, *xmm_crc1, 1); ++ zmm_crc0 = _mm512_inserti32x4(zmm_crc0, *xmm_crc2, 2); ++ zmm_crc0 = _mm512_inserti32x4(zmm_crc0, *xmm_crc3, 3); ++ z0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x01); ++ zmm_crc0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x10); ++ zmm_crc0 = _mm512_ternarylogic_epi32(zmm_crc0, z0, zmm_t0, 0x96); ++ ++#ifdef COPY ++ _mm512_storeu_si512((__m512i *)dst, zmm_t0); ++ _mm512_storeu_si512((__m512i *)dst + 1, zmm_crc1); ++ _mm512_storeu_si512((__m512i *)dst + 2, zmm_crc2); ++ _mm512_storeu_si512((__m512i *)dst + 3, zmm_crc3); ++ dst += 256; ++#endif ++ len -= 256; ++ src += 256; ++ ++ // fold-16 loops ++ while (len >= 256) { ++ zmm_t0 = _mm512_loadu_si512((__m512i *)src); ++ zmm_t1 = _mm512_loadu_si512((__m512i *)src + 1); ++ zmm_t2 = _mm512_loadu_si512((__m512i *)src + 2); ++ zmm_t3 = _mm512_loadu_si512((__m512i *)src + 3); ++ ++ z0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold16, 0x01); ++ z1 = _mm512_clmulepi64_epi128(zmm_crc1, zmm_fold16, 0x01); ++ z2 = _mm512_clmulepi64_epi128(zmm_crc2, zmm_fold16, 0x01); ++ z3 = _mm512_clmulepi64_epi128(zmm_crc3, zmm_fold16, 0x01); ++ ++ zmm_crc0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold16, 0x10); ++ zmm_crc1 = _mm512_clmulepi64_epi128(zmm_crc1, zmm_fold16, 0x10); ++ zmm_crc2 = _mm512_clmulepi64_epi128(zmm_crc2, zmm_fold16, 0x10); ++ zmm_crc3 = _mm512_clmulepi64_epi128(zmm_crc3, zmm_fold16, 0x10); ++ ++ zmm_crc0 = _mm512_ternarylogic_epi32(zmm_crc0, z0, zmm_t0, 0x96); ++ zmm_crc1 = _mm512_ternarylogic_epi32(zmm_crc1, z1, zmm_t1, 0x96); ++ zmm_crc2 = _mm512_ternarylogic_epi32(zmm_crc2, z2, zmm_t2, 0x96); ++ zmm_crc3 = _mm512_ternarylogic_epi32(zmm_crc3, z3, zmm_t3, 0x96); ++ ++#ifdef COPY ++ _mm512_storeu_si512((__m512i *)dst, zmm_t0); ++ _mm512_storeu_si512((__m512i *)dst + 1, zmm_t1); ++ _mm512_storeu_si512((__m512i *)dst + 2, zmm_t2); ++ _mm512_storeu_si512((__m512i *)dst + 3, zmm_t3); ++ dst += 256; ++#endif ++ len -= 256; ++ src += 256; ++ } ++ // zmm_crc[0,1,2,3] -> zmm_crc0 ++ z0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x01); ++ zmm_crc0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x10); ++ zmm_crc0 = _mm512_ternarylogic_epi32(zmm_crc0, z0, zmm_crc1, 0x96); ++ ++ z0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x01); ++ zmm_crc0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x10); ++ zmm_crc0 = _mm512_ternarylogic_epi32(zmm_crc0, z0, zmm_crc2, 0x96); ++ ++ z0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x01); ++ zmm_crc0 = _mm512_clmulepi64_epi128(zmm_crc0, zmm_fold4, 0x10); ++ zmm_crc0 = _mm512_ternarylogic_epi32(zmm_crc0, z0, zmm_crc3, 0x96); ++ ++ // zmm_crc0 -> xmm_crc[0, 1, 2, 3] ++ *xmm_crc0 = _mm512_extracti32x4_epi32(zmm_crc0, 0); ++ *xmm_crc1 = _mm512_extracti32x4_epi32(zmm_crc0, 1); ++ *xmm_crc2 = _mm512_extracti32x4_epi32(zmm_crc0, 2); ++ *xmm_crc3 = _mm512_extracti32x4_epi32(zmm_crc0, 3); ++ ++ return (len_tmp - len); // return n bytes processed ++} +diff --git a/arch/x86/crc32_pclmulqdq.c b/arch/x86/crc32_pclmulqdq.c +new file mode 100644 +index 0000000..9383b7a +--- /dev/null ++++ b/arch/x86/crc32_pclmulqdq.c +@@ -0,0 +1,30 @@ ++/* ++ * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ ++ * instruction. ++ * ++ * A white paper describing this algorithm can be found at: ++ * doc/crc-pclmulqdq.pdf ++ * ++ * Copyright (C) 2013 Intel Corporation. All rights reserved. ++ * Copyright (C) 2016 Marian Beermann (support for initial value) ++ * Authors: ++ * Wajdi Feghali ++ * Jim Guilford ++ * Vinodh Gopal ++ * Erdinc Ozturk ++ * Jim Kukunas ++ * ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifdef X86_PCLMULQDQ_CRC ++ ++#define CRC32_FOLD_COPY crc32_fold_pclmulqdq_copy ++#define CRC32_FOLD crc32_fold_pclmulqdq ++#define CRC32_FOLD_RESET crc32_fold_pclmulqdq_reset ++#define CRC32_FOLD_FINAL crc32_fold_pclmulqdq_final ++#define CRC32 crc32_pclmulqdq ++ ++#include "crc32_pclmulqdq_tpl.h" ++ ++#endif +diff --git a/arch/x86/crc32_pclmulqdq_tpl.h b/arch/x86/crc32_pclmulqdq_tpl.h +new file mode 100644 +index 0000000..05d3b15 +--- /dev/null ++++ b/arch/x86/crc32_pclmulqdq_tpl.h +@@ -0,0 +1,363 @@ ++/* ++ * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ ++ * instruction. ++ * ++ * A white paper describing this algorithm can be found at: ++ * doc/crc-pclmulqdq.pdf ++ * ++ * Copyright (C) 2013 Intel Corporation. All rights reserved. ++ * Copyright (C) 2016 Marian Beermann (support for initial value) ++ * Authors: ++ * Wajdi Feghali ++ * Jim Guilford ++ * Vinodh Gopal ++ * Erdinc Ozturk ++ * Jim Kukunas ++ * ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "../../zbuild.h" ++ ++#include ++#include ++#include // _mm_extract_epi32 ++#ifdef X86_VPCLMULQDQ ++# include ++#endif ++ ++#include "../../crc32_fold.h" ++#include "../../crc32_braid_p.h" ++#include "x86_intrins.h" ++#include ++ ++#ifdef X86_VPCLMULQDQ ++static size_t fold_16_vpclmulqdq(__m128i *xmm_crc0, __m128i *xmm_crc1, ++ __m128i *xmm_crc2, __m128i *xmm_crc3, const uint8_t *src, size_t len, __m128i init_crc, ++ int32_t first); ++static size_t fold_16_vpclmulqdq_copy(__m128i *xmm_crc0, __m128i *xmm_crc1, ++ __m128i *xmm_crc2, __m128i *xmm_crc3, uint8_t *dst, const uint8_t *src, size_t len); ++#endif ++ ++static void fold_1(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) { ++ const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4, ++ 0x00000001, 0xc6e41596); ++ __m128i x_tmp3; ++ __m128 ps_crc0, ps_crc3, ps_res; ++ ++ x_tmp3 = *xmm_crc3; ++ ++ *xmm_crc3 = *xmm_crc0; ++ *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01); ++ *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10); ++ ps_crc0 = _mm_castsi128_ps(*xmm_crc0); ++ ps_crc3 = _mm_castsi128_ps(*xmm_crc3); ++ ps_res = _mm_xor_ps(ps_crc0, ps_crc3); ++ ++ *xmm_crc0 = *xmm_crc1; ++ *xmm_crc1 = *xmm_crc2; ++ *xmm_crc2 = x_tmp3; ++ *xmm_crc3 = _mm_castps_si128(ps_res); ++} ++ ++static void fold_2(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) { ++ const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4, ++ 0x00000001, 0xc6e41596); ++ __m128i x_tmp3, x_tmp2; ++ __m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3, ps_res31, ps_res20; ++ ++ x_tmp3 = *xmm_crc3; ++ x_tmp2 = *xmm_crc2; ++ ++ *xmm_crc3 = *xmm_crc1; ++ *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01); ++ *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10); ++ ps_crc3 = _mm_castsi128_ps(*xmm_crc3); ++ ps_crc1 = _mm_castsi128_ps(*xmm_crc1); ++ ps_res31 = _mm_xor_ps(ps_crc3, ps_crc1); ++ ++ *xmm_crc2 = *xmm_crc0; ++ *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01); ++ *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x10); ++ ps_crc0 = _mm_castsi128_ps(*xmm_crc0); ++ ps_crc2 = _mm_castsi128_ps(*xmm_crc2); ++ ps_res20 = _mm_xor_ps(ps_crc0, ps_crc2); ++ ++ *xmm_crc0 = x_tmp2; ++ *xmm_crc1 = x_tmp3; ++ *xmm_crc2 = _mm_castps_si128(ps_res20); ++ *xmm_crc3 = _mm_castps_si128(ps_res31); ++} ++ ++static void fold_3(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) { ++ const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4, ++ 0x00000001, 0xc6e41596); ++ __m128i x_tmp3; ++ __m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3, ps_res32, ps_res21, ps_res10; ++ ++ x_tmp3 = *xmm_crc3; ++ ++ *xmm_crc3 = *xmm_crc2; ++ *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x01); ++ *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10); ++ ps_crc2 = _mm_castsi128_ps(*xmm_crc2); ++ ps_crc3 = _mm_castsi128_ps(*xmm_crc3); ++ ps_res32 = _mm_xor_ps(ps_crc2, ps_crc3); ++ ++ *xmm_crc2 = *xmm_crc1; ++ *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01); ++ *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x10); ++ ps_crc1 = _mm_castsi128_ps(*xmm_crc1); ++ ps_crc2 = _mm_castsi128_ps(*xmm_crc2); ++ ps_res21 = _mm_xor_ps(ps_crc1, ps_crc2); ++ ++ *xmm_crc1 = *xmm_crc0; ++ *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01); ++ *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x10); ++ ps_crc0 = _mm_castsi128_ps(*xmm_crc0); ++ ps_crc1 = _mm_castsi128_ps(*xmm_crc1); ++ ps_res10 = _mm_xor_ps(ps_crc0, ps_crc1); ++ ++ *xmm_crc0 = x_tmp3; ++ *xmm_crc1 = _mm_castps_si128(ps_res10); ++ *xmm_crc2 = _mm_castps_si128(ps_res21); ++ *xmm_crc3 = _mm_castps_si128(ps_res32); ++} ++ ++static void fold_4(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) { ++ const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4, ++ 0x00000001, 0xc6e41596); ++ __m128i x_tmp0, x_tmp1, x_tmp2, x_tmp3; ++ __m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3; ++ __m128 ps_t0, ps_t1, ps_t2, ps_t3; ++ __m128 ps_res0, ps_res1, ps_res2, ps_res3; ++ ++ x_tmp0 = *xmm_crc0; ++ x_tmp1 = *xmm_crc1; ++ x_tmp2 = *xmm_crc2; ++ x_tmp3 = *xmm_crc3; ++ ++ *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01); ++ x_tmp0 = _mm_clmulepi64_si128(x_tmp0, xmm_fold4, 0x10); ++ ps_crc0 = _mm_castsi128_ps(*xmm_crc0); ++ ps_t0 = _mm_castsi128_ps(x_tmp0); ++ ps_res0 = _mm_xor_ps(ps_crc0, ps_t0); ++ ++ *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01); ++ x_tmp1 = _mm_clmulepi64_si128(x_tmp1, xmm_fold4, 0x10); ++ ps_crc1 = _mm_castsi128_ps(*xmm_crc1); ++ ps_t1 = _mm_castsi128_ps(x_tmp1); ++ ps_res1 = _mm_xor_ps(ps_crc1, ps_t1); ++ ++ *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x01); ++ x_tmp2 = _mm_clmulepi64_si128(x_tmp2, xmm_fold4, 0x10); ++ ps_crc2 = _mm_castsi128_ps(*xmm_crc2); ++ ps_t2 = _mm_castsi128_ps(x_tmp2); ++ ps_res2 = _mm_xor_ps(ps_crc2, ps_t2); ++ ++ *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x01); ++ x_tmp3 = _mm_clmulepi64_si128(x_tmp3, xmm_fold4, 0x10); ++ ps_crc3 = _mm_castsi128_ps(*xmm_crc3); ++ ps_t3 = _mm_castsi128_ps(x_tmp3); ++ ps_res3 = _mm_xor_ps(ps_crc3, ps_t3); ++ ++ *xmm_crc0 = _mm_castps_si128(ps_res0); ++ *xmm_crc1 = _mm_castps_si128(ps_res1); ++ *xmm_crc2 = _mm_castps_si128(ps_res2); ++ *xmm_crc3 = _mm_castps_si128(ps_res3); ++} ++ ++static const unsigned ALIGNED_(32) pshufb_shf_table[60] = { ++ 0x84838281, 0x88878685, 0x8c8b8a89, 0x008f8e8d, /* shl 15 (16 - 1)/shr1 */ ++ 0x85848382, 0x89888786, 0x8d8c8b8a, 0x01008f8e, /* shl 14 (16 - 3)/shr2 */ ++ 0x86858483, 0x8a898887, 0x8e8d8c8b, 0x0201008f, /* shl 13 (16 - 4)/shr3 */ ++ 0x87868584, 0x8b8a8988, 0x8f8e8d8c, 0x03020100, /* shl 12 (16 - 4)/shr4 */ ++ 0x88878685, 0x8c8b8a89, 0x008f8e8d, 0x04030201, /* shl 11 (16 - 5)/shr5 */ ++ 0x89888786, 0x8d8c8b8a, 0x01008f8e, 0x05040302, /* shl 10 (16 - 6)/shr6 */ ++ 0x8a898887, 0x8e8d8c8b, 0x0201008f, 0x06050403, /* shl 9 (16 - 7)/shr7 */ ++ 0x8b8a8988, 0x8f8e8d8c, 0x03020100, 0x07060504, /* shl 8 (16 - 8)/shr8 */ ++ 0x8c8b8a89, 0x008f8e8d, 0x04030201, 0x08070605, /* shl 7 (16 - 9)/shr9 */ ++ 0x8d8c8b8a, 0x01008f8e, 0x05040302, 0x09080706, /* shl 6 (16 -10)/shr10*/ ++ 0x8e8d8c8b, 0x0201008f, 0x06050403, 0x0a090807, /* shl 5 (16 -11)/shr11*/ ++ 0x8f8e8d8c, 0x03020100, 0x07060504, 0x0b0a0908, /* shl 4 (16 -12)/shr12*/ ++ 0x008f8e8d, 0x04030201, 0x08070605, 0x0c0b0a09, /* shl 3 (16 -13)/shr13*/ ++ 0x01008f8e, 0x05040302, 0x09080706, 0x0d0c0b0a, /* shl 2 (16 -14)/shr14*/ ++ 0x0201008f, 0x06050403, 0x0a090807, 0x0e0d0c0b /* shl 1 (16 -15)/shr15*/ ++}; ++ ++static void partial_fold(const size_t len, __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, ++ __m128i *xmm_crc3, __m128i *xmm_crc_part) { ++ const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4, ++ 0x00000001, 0xc6e41596); ++ const __m128i xmm_mask3 = _mm_set1_epi32((int32_t)0x80808080); ++ ++ __m128i xmm_shl, xmm_shr, xmm_tmp1, xmm_tmp2, xmm_tmp3; ++ __m128i xmm_a0_0, xmm_a0_1; ++ __m128 ps_crc3, psa0_0, psa0_1, ps_res; ++ ++ xmm_shl = _mm_load_si128((__m128i *)(pshufb_shf_table + (4 * (len - 1)))); ++ xmm_shr = xmm_shl; ++ xmm_shr = _mm_xor_si128(xmm_shr, xmm_mask3); ++ ++ xmm_a0_0 = _mm_shuffle_epi8(*xmm_crc0, xmm_shl); ++ ++ *xmm_crc0 = _mm_shuffle_epi8(*xmm_crc0, xmm_shr); ++ xmm_tmp1 = _mm_shuffle_epi8(*xmm_crc1, xmm_shl); ++ *xmm_crc0 = _mm_or_si128(*xmm_crc0, xmm_tmp1); ++ ++ *xmm_crc1 = _mm_shuffle_epi8(*xmm_crc1, xmm_shr); ++ xmm_tmp2 = _mm_shuffle_epi8(*xmm_crc2, xmm_shl); ++ *xmm_crc1 = _mm_or_si128(*xmm_crc1, xmm_tmp2); ++ ++ *xmm_crc2 = _mm_shuffle_epi8(*xmm_crc2, xmm_shr); ++ xmm_tmp3 = _mm_shuffle_epi8(*xmm_crc3, xmm_shl); ++ *xmm_crc2 = _mm_or_si128(*xmm_crc2, xmm_tmp3); ++ ++ *xmm_crc3 = _mm_shuffle_epi8(*xmm_crc3, xmm_shr); ++ *xmm_crc_part = _mm_shuffle_epi8(*xmm_crc_part, xmm_shl); ++ *xmm_crc3 = _mm_or_si128(*xmm_crc3, *xmm_crc_part); ++ ++ xmm_a0_1 = _mm_clmulepi64_si128(xmm_a0_0, xmm_fold4, 0x10); ++ xmm_a0_0 = _mm_clmulepi64_si128(xmm_a0_0, xmm_fold4, 0x01); ++ ++ ps_crc3 = _mm_castsi128_ps(*xmm_crc3); ++ psa0_0 = _mm_castsi128_ps(xmm_a0_0); ++ psa0_1 = _mm_castsi128_ps(xmm_a0_1); ++ ++ ps_res = _mm_xor_ps(ps_crc3, psa0_0); ++ ps_res = _mm_xor_ps(ps_res, psa0_1); ++ ++ *xmm_crc3 = _mm_castps_si128(ps_res); ++} ++ ++static inline void crc32_fold_load(__m128i *fold, __m128i *fold0, __m128i *fold1, __m128i *fold2, __m128i *fold3) { ++ *fold0 = _mm_load_si128(fold + 0); ++ *fold1 = _mm_load_si128(fold + 1); ++ *fold2 = _mm_load_si128(fold + 2); ++ *fold3 = _mm_load_si128(fold + 3); ++} ++ ++static inline void crc32_fold_save(__m128i *fold, const __m128i *fold0, const __m128i *fold1, ++ const __m128i *fold2, const __m128i *fold3) { ++ _mm_storeu_si128(fold + 0, *fold0); ++ _mm_storeu_si128(fold + 1, *fold1); ++ _mm_storeu_si128(fold + 2, *fold2); ++ _mm_storeu_si128(fold + 3, *fold3); ++} ++ ++Z_INTERNAL uint32_t CRC32_FOLD_RESET(crc32_fold *crc) { ++ __m128i xmm_crc0 = _mm_cvtsi32_si128(0x9db42487); ++ __m128i xmm_zero = _mm_setzero_si128(); ++ crc32_fold_save((__m128i *)crc->fold, &xmm_crc0, &xmm_zero, &xmm_zero, &xmm_zero); ++ return 0; ++} ++ ++#define ONCE(op) if (first) { first = 0; op; } ++#define XOR_INITIAL128(where) ONCE(where = _mm_xor_si128(where, xmm_initial)) ++#ifdef X86_VPCLMULQDQ ++# define XOR_INITIAL512(where) ONCE(where = _mm512_xor_si512(where, zmm_initial)) ++#endif ++ ++#ifdef X86_VPCLMULQDQ ++# include "crc32_fold_vpclmulqdq_tpl.h" ++#endif ++#include "crc32_fold_pclmulqdq_tpl.h" ++#define COPY ++#ifdef X86_VPCLMULQDQ ++# include "crc32_fold_vpclmulqdq_tpl.h" ++#endif ++#include "crc32_fold_pclmulqdq_tpl.h" ++ ++static const unsigned ALIGNED_(16) crc_k[] = { ++ 0xccaa009e, 0x00000000, /* rk1 */ ++ 0x751997d0, 0x00000001, /* rk2 */ ++ 0xccaa009e, 0x00000000, /* rk5 */ ++ 0x63cd6124, 0x00000001, /* rk6 */ ++ 0xf7011640, 0x00000001, /* rk7 */ ++ 0xdb710640, 0x00000001 /* rk8 */ ++}; ++ ++static const unsigned ALIGNED_(16) crc_mask[4] = { ++ 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 ++}; ++ ++static const unsigned ALIGNED_(16) crc_mask2[4] = { ++ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF ++}; ++ ++Z_INTERNAL uint32_t CRC32_FOLD_FINAL(crc32_fold *crc) { ++ const __m128i xmm_mask = _mm_load_si128((__m128i *)crc_mask); ++ const __m128i xmm_mask2 = _mm_load_si128((__m128i *)crc_mask2); ++ __m128i xmm_crc0, xmm_crc1, xmm_crc2, xmm_crc3; ++ __m128i x_tmp0, x_tmp1, x_tmp2, crc_fold; ++ ++ crc32_fold_load((__m128i *)crc->fold, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); ++ ++ /* ++ * k1 ++ */ ++ crc_fold = _mm_load_si128((__m128i *)crc_k); ++ ++ x_tmp0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x10); ++ xmm_crc0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x01); ++ xmm_crc1 = _mm_xor_si128(xmm_crc1, x_tmp0); ++ xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_crc0); ++ ++ x_tmp1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x10); ++ xmm_crc1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x01); ++ xmm_crc2 = _mm_xor_si128(xmm_crc2, x_tmp1); ++ xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_crc1); ++ ++ x_tmp2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x10); ++ xmm_crc2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x01); ++ xmm_crc3 = _mm_xor_si128(xmm_crc3, x_tmp2); ++ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); ++ ++ /* ++ * k5 ++ */ ++ crc_fold = _mm_load_si128((__m128i *)(crc_k + 4)); ++ ++ xmm_crc0 = xmm_crc3; ++ xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0); ++ xmm_crc0 = _mm_srli_si128(xmm_crc0, 8); ++ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0); ++ ++ xmm_crc0 = xmm_crc3; ++ xmm_crc3 = _mm_slli_si128(xmm_crc3, 4); ++ xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); ++ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0); ++ xmm_crc3 = _mm_and_si128(xmm_crc3, xmm_mask2); ++ ++ /* ++ * k7 ++ */ ++ xmm_crc1 = xmm_crc3; ++ xmm_crc2 = xmm_crc3; ++ crc_fold = _mm_load_si128((__m128i *)(crc_k + 8)); ++ ++ xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0); ++ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); ++ xmm_crc3 = _mm_and_si128(xmm_crc3, xmm_mask); ++ ++ xmm_crc2 = xmm_crc3; ++ xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); ++ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); ++ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); ++ ++ crc->value = ~((uint32_t)_mm_extract_epi32(xmm_crc3, 2)); ++ ++ return crc->value; ++} ++ ++Z_INTERNAL uint32_t CRC32(uint32_t crc32, const uint8_t *buf, size_t len) { ++ /* For lens < 64, crc32_braid method is faster. The CRC32 instruction for ++ * these short lengths might also prove to be effective */ ++ if (len < 64) ++ return PREFIX(crc32_braid)(crc32, buf, len); ++ ++ crc32_fold ALIGNED_(16) crc_state; ++ CRC32_FOLD_RESET(&crc_state); ++ CRC32_FOLD(&crc_state, buf, len, crc32); ++ return CRC32_FOLD_FINAL(&crc_state); ++} +diff --git a/arch/x86/crc32_vpclmulqdq.c b/arch/x86/crc32_vpclmulqdq.c +new file mode 100644 +index 0000000..ec641b4 +--- /dev/null ++++ b/arch/x86/crc32_vpclmulqdq.c +@@ -0,0 +1,17 @@ ++/* crc32_vpclmulqdq.c -- VPCMULQDQ-based CRC32 folding implementation. ++ * Copyright Wangyang Guo (wangyang.guo@intel.com) ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC) ++ ++#define X86_VPCLMULQDQ ++#define CRC32_FOLD_COPY crc32_fold_vpclmulqdq_copy ++#define CRC32_FOLD crc32_fold_vpclmulqdq ++#define CRC32_FOLD_RESET crc32_fold_vpclmulqdq_reset ++#define CRC32_FOLD_FINAL crc32_fold_vpclmulqdq_final ++#define CRC32 crc32_vpclmulqdq ++ ++#include "crc32_pclmulqdq_tpl.h" ++ ++#endif +diff --git a/arch/x86/insert_string_sse42.c b/arch/x86/insert_string_sse42.c +new file mode 100644 +index 0000000..ae092a7 +--- /dev/null ++++ b/arch/x86/insert_string_sse42.c +@@ -0,0 +1,24 @@ ++/* insert_string_sse42.c -- insert_string integer hash variant using SSE4.2's CRC instructions ++ * ++ * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ * ++ */ ++ ++#ifdef X86_SSE42 ++#include "../../zbuild.h" ++#include ++#include "../../deflate.h" ++ ++#define HASH_CALC(s, h, val)\ ++ h = _mm_crc32_u32(h, val) ++ ++#define HASH_CALC_VAR h ++#define HASH_CALC_VAR_INIT uint32_t h = 0 ++ ++#define UPDATE_HASH update_hash_sse42 ++#define INSERT_STRING insert_string_sse42 ++#define QUICK_INSERT_STRING quick_insert_string_sse42 ++ ++#include "../../insert_string_tpl.h" ++#endif +diff --git a/arch/x86/slide_hash_avx2.c b/arch/x86/slide_hash_avx2.c +new file mode 100644 +index 0000000..94fe10c +--- /dev/null ++++ b/arch/x86/slide_hash_avx2.c +@@ -0,0 +1,39 @@ ++/* ++ * AVX2 optimized hash slide, based on Intel's slide_sse implementation ++ * ++ * Copyright (C) 2017 Intel Corporation ++ * Authors: ++ * Arjan van de Ven ++ * Jim Kukunas ++ * Mika T. Lindqvist ++ * ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++#include "../../zbuild.h" ++#include "../../deflate.h" ++ ++#include ++ ++static inline void slide_hash_chain(Pos *table, uint32_t entries, const __m256i wsize) { ++ table += entries; ++ table -= 16; ++ ++ do { ++ __m256i value, result; ++ ++ value = _mm256_loadu_si256((__m256i *)table); ++ result = _mm256_subs_epu16(value, wsize); ++ _mm256_storeu_si256((__m256i *)table, result); ++ ++ table -= 16; ++ entries -= 16; ++ } while (entries > 0); ++} ++ ++Z_INTERNAL void slide_hash_avx2(deflate_state *s) { ++ uint16_t wsize = (uint16_t)s->w_size; ++ const __m256i ymm_wsize = _mm256_set1_epi16((short)wsize); ++ ++ slide_hash_chain(s->head, HASH_SIZE, ymm_wsize); ++ slide_hash_chain(s->prev, wsize, ymm_wsize); ++} +diff --git a/arch/x86/slide_hash_sse2.c b/arch/x86/slide_hash_sse2.c +new file mode 100644 +index 0000000..5daac4a +--- /dev/null ++++ b/arch/x86/slide_hash_sse2.c +@@ -0,0 +1,62 @@ ++/* ++ * SSE optimized hash slide ++ * ++ * Copyright (C) 2017 Intel Corporation ++ * Authors: ++ * Arjan van de Ven ++ * Jim Kukunas ++ * ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++#include "../../zbuild.h" ++#include "../../deflate.h" ++ ++#include ++#include ++ ++static inline void slide_hash_chain(Pos *table0, Pos *table1, uint32_t entries0, ++ uint32_t entries1, const __m128i wsize) { ++ uint32_t entries; ++ Pos *table; ++ __m128i value0, value1, result0, result1; ++ ++ int on_chain = 0; ++ ++next_chain: ++ table = (on_chain) ? table1 : table0; ++ entries = (on_chain) ? entries1 : entries0; ++ ++ table += entries; ++ table -= 16; ++ ++ /* ZALLOC allocates this pointer unless the user chose a custom allocator. ++ * Our alloc function is aligned to 64 byte boundaries */ ++ do { ++ value0 = _mm_load_si128((__m128i *)table); ++ value1 = _mm_load_si128((__m128i *)(table + 8)); ++ result0 = _mm_subs_epu16(value0, wsize); ++ result1 = _mm_subs_epu16(value1, wsize); ++ _mm_store_si128((__m128i *)table, result0); ++ _mm_store_si128((__m128i *)(table + 8), result1); ++ ++ table -= 16; ++ entries -= 16; ++ } while (entries > 0); ++ ++ ++on_chain; ++ if (on_chain > 1) { ++ return; ++ } else { ++ goto next_chain; ++ } ++} ++ ++Z_INTERNAL void slide_hash_sse2(deflate_state *s) { ++ uint16_t wsize = (uint16_t)s->w_size; ++ const __m128i xmm_wsize = _mm_set1_epi16((short)wsize); ++ ++ assert(((uintptr_t)s->head & 15) == 0); ++ assert(((uintptr_t)s->prev & 15) == 0); ++ ++ slide_hash_chain(s->head, s->prev, HASH_SIZE, wsize, xmm_wsize); ++} +diff --git a/arch/x86/x86_features.c b/arch/x86/x86_features.c +new file mode 100644 +index 0000000..8d11564 +--- /dev/null ++++ b/arch/x86/x86_features.c +@@ -0,0 +1,97 @@ ++/* x86_features.c - x86 feature check ++ * ++ * Copyright (C) 2013 Intel Corporation. All rights reserved. ++ * Author: ++ * Jim Kukunas ++ * ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "../../zbuild.h" ++#include "x86_features.h" ++ ++#ifdef _MSC_VER ++# include ++#else ++// Newer versions of GCC and clang come with cpuid.h ++# include ++#endif ++ ++#include ++ ++static inline void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) { ++#ifdef _MSC_VER ++ unsigned int registers[4]; ++ __cpuid((int *)registers, info); ++ ++ *eax = registers[0]; ++ *ebx = registers[1]; ++ *ecx = registers[2]; ++ *edx = registers[3]; ++#else ++ __cpuid(info, *eax, *ebx, *ecx, *edx); ++#endif ++} ++ ++static inline void cpuidex(int info, int subinfo, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) { ++#ifdef _MSC_VER ++ unsigned int registers[4]; ++ __cpuidex((int *)registers, info, subinfo); ++ ++ *eax = registers[0]; ++ *ebx = registers[1]; ++ *ecx = registers[2]; ++ *edx = registers[3]; ++#else ++ __cpuid_count(info, subinfo, *eax, *ebx, *ecx, *edx); ++#endif ++} ++ ++static inline uint64_t xgetbv(unsigned int xcr) { ++#ifdef _MSC_VER ++ return _xgetbv(xcr); ++#else ++ uint32_t eax, edx; ++ __asm__ ( ".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr)); ++ return (uint64_t)(edx) << 32 | eax; ++#endif ++} ++ ++void Z_INTERNAL x86_check_features(struct x86_cpu_features *features) { ++ unsigned eax, ebx, ecx, edx; ++ unsigned maxbasic; ++ ++ cpuid(0, &maxbasic, &ebx, &ecx, &edx); ++ cpuid(1 /*CPU_PROCINFO_AND_FEATUREBITS*/, &eax, &ebx, &ecx, &edx); ++ ++ features->has_sse2 = edx & 0x4000000; ++ features->has_ssse3 = ecx & 0x200; ++ features->has_sse42 = ecx & 0x100000; ++ features->has_pclmulqdq = ecx & 0x2; ++ ++ if (ecx & 0x08000000) { ++ uint64_t xfeature = xgetbv(0); ++ ++ features->has_os_save_ymm = ((xfeature & 0x06) == 0x06); ++ features->has_os_save_zmm = ((xfeature & 0xe6) == 0xe6); ++ } ++ ++ if (maxbasic >= 7) { ++ cpuidex(7, 0, &eax, &ebx, &ecx, &edx); ++ ++ // check BMI1 bit ++ // Reference: https://software.intel.com/sites/default/files/article/405250/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf ++ features->has_vpclmulqdq = ecx & 0x400; ++ ++ // check AVX2 bit if the OS supports saving YMM registers ++ if (features->has_os_save_ymm) { ++ features->has_avx2 = ebx & 0x20; ++ } ++ ++ // check AVX512 bits if the OS supports saving ZMM registers ++ if (features->has_os_save_zmm) { ++ features->has_avx512 = ebx & 0x00010000; ++ features->has_avx512vnni = ecx & 0x800; ++ } ++ } ++} +diff --git a/arch/x86/x86_features.h b/arch/x86/x86_features.h +new file mode 100644 +index 0000000..4a36bde +--- /dev/null ++++ b/arch/x86/x86_features.h +@@ -0,0 +1,24 @@ ++/* x86_features.h -- check for CPU features ++* Copyright (C) 2013 Intel Corporation Jim Kukunas ++* For conditions of distribution and use, see copyright notice in zlib.h ++*/ ++ ++#ifndef X86_FEATURES_H_ ++#define X86_FEATURES_H_ ++ ++struct x86_cpu_features { ++ int has_avx2; ++ int has_avx512; ++ int has_avx512vnni; ++ int has_sse2; ++ int has_ssse3; ++ int has_sse42; ++ int has_pclmulqdq; ++ int has_vpclmulqdq; ++ int has_os_save_ymm; ++ int has_os_save_zmm; ++}; ++ ++void Z_INTERNAL x86_check_features(struct x86_cpu_features *features); ++ ++#endif /* CPU_H_ */ +diff --git a/arch/x86/x86_intrins.h b/arch/x86/x86_intrins.h +new file mode 100644 +index 0000000..52e1085 +--- /dev/null ++++ b/arch/x86/x86_intrins.h +@@ -0,0 +1,87 @@ ++#ifndef X86_INTRINS_H ++#define X86_INTRINS_H ++ ++/* Unfortunately GCC didn't support these things until version 10. ++ * Similarly, AppleClang didn't support them in Xcode 9.2 but did in 9.3. ++ */ ++#ifdef __AVX2__ ++#include ++ ++#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 10) \ ++ || (defined(__apple_build_version__) && __apple_build_version__ < 9020039) ++static inline __m256i _mm256_zextsi128_si256(__m128i a) { ++ __m128i r; ++ __asm__ volatile ("vmovdqa %1,%0" : "=x" (r) : "x" (a)); ++ return _mm256_castsi128_si256(r); ++} ++ ++#ifdef __AVX512F__ ++static inline __m512i _mm512_zextsi128_si512(__m128i a) { ++ __m128i r; ++ __asm__ volatile ("vmovdqa %1,%0" : "=x" (r) : "x" (a)); ++ return _mm512_castsi128_si512(r); ++} ++#endif // __AVX512F__ ++#endif // gcc/AppleClang version test ++ ++#endif // __AVX2__ ++ ++/* GCC <9 is missing some AVX512 intrinsics. ++ */ ++#ifdef __AVX512F__ ++#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9) ++#include ++ ++#define PACK(c0, c1, c2, c3) (((int)(unsigned char)(c0) << 24) | ((int)(unsigned char)(c1) << 16) | \ ++ ((int)(unsigned char)(c2) << 8) | ((int)(unsigned char)(c3))) ++ ++static inline __m512i _mm512_set_epi8(char __q63, char __q62, char __q61, char __q60, ++ char __q59, char __q58, char __q57, char __q56, ++ char __q55, char __q54, char __q53, char __q52, ++ char __q51, char __q50, char __q49, char __q48, ++ char __q47, char __q46, char __q45, char __q44, ++ char __q43, char __q42, char __q41, char __q40, ++ char __q39, char __q38, char __q37, char __q36, ++ char __q35, char __q34, char __q33, char __q32, ++ char __q31, char __q30, char __q29, char __q28, ++ char __q27, char __q26, char __q25, char __q24, ++ char __q23, char __q22, char __q21, char __q20, ++ char __q19, char __q18, char __q17, char __q16, ++ char __q15, char __q14, char __q13, char __q12, ++ char __q11, char __q10, char __q09, char __q08, ++ char __q07, char __q06, char __q05, char __q04, ++ char __q03, char __q02, char __q01, char __q00) { ++ return _mm512_set_epi32(PACK(__q63, __q62, __q61, __q60), PACK(__q59, __q58, __q57, __q56), ++ PACK(__q55, __q54, __q53, __q52), PACK(__q51, __q50, __q49, __q48), ++ PACK(__q47, __q46, __q45, __q44), PACK(__q43, __q42, __q41, __q40), ++ PACK(__q39, __q38, __q37, __q36), PACK(__q35, __q34, __q33, __q32), ++ PACK(__q31, __q30, __q29, __q28), PACK(__q27, __q26, __q25, __q24), ++ PACK(__q23, __q22, __q21, __q20), PACK(__q19, __q18, __q17, __q16), ++ PACK(__q15, __q14, __q13, __q12), PACK(__q11, __q10, __q09, __q08), ++ PACK(__q07, __q06, __q05, __q04), PACK(__q03, __q02, __q01, __q00)); ++} ++ ++#undef PACK ++ ++#endif // gcc version test ++#endif // __AVX512F__ ++ ++/* Missing zero-extension AVX and AVX512 intrinsics. ++ * Fixed in Microsoft Visual Studio 2017 version 15.7 ++ * https://developercommunity.visualstudio.com/t/missing-zero-extension-avx-and-avx512-intrinsics/175737 ++ */ ++#if defined(_MSC_VER) && _MSC_VER < 1914 ++#ifdef __AVX2__ ++static inline __m256i _mm256_zextsi128_si256(__m128i a) { ++ return _mm256_inserti128_si256(_mm256_setzero_si256(), a, 0); ++} ++#endif // __AVX2__ ++ ++#ifdef __AVX512F__ ++static inline __m512i _mm512_zextsi128_si512(__m128i a) { ++ return _mm512_inserti32x4(_mm512_setzero_si512(), a, 0); ++} ++#endif // __AVX512F__ ++#endif // defined(_MSC_VER) && _MSC_VER < 1914 ++ ++#endif // include guard X86_INTRINS_H +diff --git a/chunkset.c b/chunkset.c +index b07e6f4..7b2bb7b 100644 +--- a/chunkset.c ++++ b/chunkset.c +@@ -3,81 +3,40 @@ + */ + + #include "zbuild.h" +-#include "zutil.h" + +-// We need sizeof(chunk_t) to be 8, no matter what. +-#if defined(UNALIGNED64_OK) + typedef uint64_t chunk_t; +-#elif defined(UNALIGNED_OK) +-typedef struct chunk_t { uint32_t u32[2]; } chunk_t; +-#else +-typedef struct chunk_t { uint8_t u8[8]; } chunk_t; +-#endif + + #define CHUNK_SIZE 8 + +-#define HAVE_CHUNKMEMSET_1 + #define HAVE_CHUNKMEMSET_4 + #define HAVE_CHUNKMEMSET_8 + +-static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) { +-#if defined(UNALIGNED64_OK) +- *chunk = 0x0101010101010101 * (uint8_t)*from; +-#elif defined(UNALIGNED_OK) +- chunk->u32[0] = 0x01010101 * (uint8_t)*from; +- chunk->u32[1] = chunk->u32[0]; +-#else +- memset(chunk, *from, sizeof(chunk_t)); +-#endif +-} +- + static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { +-#if defined(UNALIGNED64_OK) +- uint32_t half_chunk; +- half_chunk = *(uint32_t *)from; +- *chunk = 0x0000000100000001 * (uint64_t)half_chunk; +-#elif defined(UNALIGNED_OK) +- chunk->u32[0] = *(uint32_t *)from; +- chunk->u32[1] = chunk->u32[0]; +-#else +- uint8_t *chunkptr = (uint8_t *)chunk; +- memcpy(chunkptr, from, 4); +- memcpy(chunkptr+4, from, 4); +-#endif ++ uint8_t *dest = (uint8_t *)chunk; ++ memcpy(dest, from, sizeof(uint32_t)); ++ memcpy(dest+4, from, sizeof(uint32_t)); + } + + static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { +-#if defined(UNALIGNED64_OK) +- *chunk = *(uint64_t *)from; +-#elif defined(UNALIGNED_OK) +- uint32_t* p = (uint32_t *)from; +- chunk->u32[0] = p[0]; +- chunk->u32[1] = p[1]; +-#else +- memcpy(chunk, from, sizeof(chunk_t)); +-#endif ++ memcpy(chunk, from, sizeof(uint64_t)); + } + + static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { +- chunkmemset_8((uint8_t *)s, chunk); ++ memcpy(chunk, (uint8_t *)s, sizeof(uint64_t)); + } + + static inline void storechunk(uint8_t *out, chunk_t *chunk) { +-#if defined(UNALIGNED64_OK) +- *(uint64_t *)out = *chunk; +-#elif defined(UNALIGNED_OK) +- ((uint32_t *)out)[0] = chunk->u32[0]; +- ((uint32_t *)out)[1] = chunk->u32[1]; +-#else +- memcpy(out, chunk, sizeof(chunk_t)); +-#endif ++ memcpy(out, chunk, sizeof(uint64_t)); + } + + #define CHUNKSIZE chunksize_c + #define CHUNKCOPY chunkcopy_c +-#define CHUNKCOPY_SAFE chunkcopy_safe_c + #define CHUNKUNROLL chunkunroll_c + #define CHUNKMEMSET chunkmemset_c + #define CHUNKMEMSET_SAFE chunkmemset_safe_c + + #include "chunkset_tpl.h" ++ ++#define INFLATE_FAST inflate_fast_c ++ ++#include "inffast_tpl.h" +diff --git a/chunkset_tpl.h b/chunkset_tpl.h +index be52ee9..f909a12 100644 +--- a/chunkset_tpl.h ++++ b/chunkset_tpl.h +@@ -2,6 +2,13 @@ + * For conditions of distribution and use, see copyright notice in zlib.h + */ + ++#include "zbuild.h" ++#include ++ ++#if CHUNK_SIZE == 32 && defined(X86_SSSE3) && defined(X86_SSE2) ++extern uint8_t* chunkmemset_ssse3(uint8_t *out, unsigned dist, unsigned len); ++#endif ++ + /* Returns the chunk size */ + Z_INTERNAL uint32_t CHUNKSIZE(void) { + return sizeof(chunk_t); +@@ -17,70 +24,26 @@ Z_INTERNAL uint32_t CHUNKSIZE(void) { + (chunk_t bytes or fewer) will fall straight through the loop + without iteration, which will hopefully make the branch prediction more + reliable. */ ++#ifndef HAVE_CHUNKCOPY + Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { + Assert(len > 0, "chunkcopy should never have a length 0"); + chunk_t chunk; +- int32_t align = (--len % sizeof(chunk_t)) + 1; ++ int32_t align = ((len - 1) % sizeof(chunk_t)) + 1; + loadchunk(from, &chunk); + storechunk(out, &chunk); + out += align; + from += align; +- len /= sizeof(chunk_t); ++ len -= align; + while (len > 0) { + loadchunk(from, &chunk); + storechunk(out, &chunk); + out += sizeof(chunk_t); + from += sizeof(chunk_t); +- --len; ++ len -= sizeof(chunk_t); + } + return out; + } +- +-/* Behave like chunkcopy, but avoid writing beyond of legal output. */ +-Z_INTERNAL uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) { +- unsigned safelen = (unsigned)((safe - out) + 1); +- len = MIN(len, safelen); +-#if CHUNK_SIZE >= 32 +- while (len >= 32) { +- memcpy(out, from, 32); +- out += 32; +- from += 32; +- len -= 32; +- } + #endif +-#if CHUNK_SIZE >= 16 +- while (len >= 16) { +- memcpy(out, from, 16); +- out += 16; +- from += 16; +- len -= 16; +- } +-#endif +-#if CHUNK_SIZE >= 8 +- while (len >= 8) { +- memcpy(out, from, 8); +- out += 8; +- from += 8; +- len -= 8; +- } +-#endif +- if (len >= 4) { +- memcpy(out, from, 4); +- out += 4; +- from += 4; +- len -= 4; +- } +- if (len >= 2) { +- memcpy(out, from, 2); +- out += 2; +- from += 2; +- len -= 2; +- } +- if (len == 1) { +- *out++ = *from++; +- } +- return out; +-} + + /* Perform short copies until distance can be rewritten as being at least + sizeof chunk_t. +@@ -90,6 +53,7 @@ Z_INTERNAL uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned l + This assumption holds because inflate_fast() starts every iteration with at + least 258 bytes of output space available (258 being the maximum length + output from a single token; see inflate_fast()'s assumptions below). */ ++#ifndef HAVE_CHUNKUNROLL + Z_INTERNAL uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) { + unsigned char const *from = out - *dist; + chunk_t chunk; +@@ -102,6 +66,30 @@ Z_INTERNAL uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) { + } + return out; + } ++#endif ++ ++#ifndef HAVE_CHUNK_MAG ++/* Loads a magazine to feed into memory of the pattern */ ++static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t dist) { ++ /* This code takes string of length dist from "from" and repeats ++ * it for as many times as can fit in a chunk_t (vector register) */ ++ uint32_t cpy_dist; ++ uint32_t bytes_remaining = sizeof(chunk_t); ++ chunk_t chunk_load; ++ uint8_t *cur_chunk = (uint8_t *)&chunk_load; ++ while (bytes_remaining) { ++ cpy_dist = MIN(dist, bytes_remaining); ++ memcpy(cur_chunk, buf, cpy_dist); ++ bytes_remaining -= cpy_dist; ++ cur_chunk += cpy_dist; ++ /* This allows us to bypass an expensive integer division since we're effectively ++ * counting in this loop, anyway */ ++ *chunk_rem = cpy_dist; ++ } ++ ++ return chunk_load; ++} ++#endif + + /* Copy DIST bytes from OUT - DIST into OUT + DIST * k, for 0 <= k < LEN/DIST. + Return OUT + LEN. */ +@@ -109,67 +97,72 @@ Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) { + /* Debug performance related issues when len < sizeof(uint64_t): + Assert(len >= sizeof(uint64_t), "chunkmemset should be called on larger chunks"); */ + Assert(dist > 0, "chunkmemset cannot have a distance 0"); +- +- unsigned char *from = out - dist; +- chunk_t chunk; +- unsigned sz = sizeof(chunk); +- if (len < sz) { +- while (len != 0) { +- *out++ = *from++; +- --len; +- } +- return out; ++ /* Only AVX2 */ ++#if CHUNK_SIZE == 32 && defined(X86_SSSE3) && defined(X86_SSE2) ++ if (len <= 16) { ++ return chunkmemset_ssse3(out, dist, len); + } ++#endif ++ ++ uint8_t *from = out - dist; + +-#ifdef HAVE_CHUNKMEMSET_1 + if (dist == 1) { +- chunkmemset_1(from, &chunk); +- } else +-#endif ++ memset(out, *from, len); ++ return out + len; ++ } else if (dist > sizeof(chunk_t)) { ++ return CHUNKCOPY(out, out - dist, len); ++ } ++ ++ chunk_t chunk_load; ++ uint32_t chunk_mod = 0; ++ ++ /* TODO: possibly build up a permutation table for this if not an even modulus */ + #ifdef HAVE_CHUNKMEMSET_2 + if (dist == 2) { +- chunkmemset_2(from, &chunk); ++ chunkmemset_2(from, &chunk_load); + } else + #endif + #ifdef HAVE_CHUNKMEMSET_4 + if (dist == 4) { +- chunkmemset_4(from, &chunk); ++ chunkmemset_4(from, &chunk_load); + } else + #endif + #ifdef HAVE_CHUNKMEMSET_8 + if (dist == 8) { +- chunkmemset_8(from, &chunk); ++ chunkmemset_8(from, &chunk_load); ++ } else if (dist == sizeof(chunk_t)) { ++ loadchunk(from, &chunk_load); + } else + #endif +- if (dist == sz) { +- loadchunk(from, &chunk); +- } else if (dist < sz) { +- unsigned char *end = out + len - 1; +- while (len > dist) { +- out = CHUNKCOPY_SAFE(out, from, dist, end); +- len -= dist; +- } +- if (len > 0) { +- out = CHUNKCOPY_SAFE(out, from, len, end); ++ { ++ chunk_load = GET_CHUNK_MAG(from, &chunk_mod, dist); ++ } ++ ++ /* If we're lucky enough and dist happens to be an even modulus of our vector length, ++ * we can do two stores per loop iteration, which for most ISAs, especially x86, is beneficial */ ++ if (chunk_mod == 0) { ++ while (len >= (2 * sizeof(chunk_t))) { ++ storechunk(out, &chunk_load); ++ storechunk(out + sizeof(chunk_t), &chunk_load); ++ out += 2 * sizeof(chunk_t); ++ len -= 2 * sizeof(chunk_t); + } +- return out; +- } else { +- out = CHUNKUNROLL(out, &dist, &len); +- return CHUNKCOPY(out, out - dist, len); + } + +- unsigned rem = len % sz; +- len -= rem; +- while (len) { +- storechunk(out, &chunk); +- out += sz; +- len -= sz; ++ /* If we don't have a "dist" length that divides evenly into a vector ++ * register, we can write the whole vector register but we need only ++ * advance by the amount of the whole string that fits in our chunk_t. ++ * If we do divide evenly into the vector length, adv_amount = chunk_t size*/ ++ uint32_t adv_amount = sizeof(chunk_t) - chunk_mod; ++ while (len >= sizeof(chunk_t)) { ++ storechunk(out, &chunk_load); ++ len -= adv_amount; ++ out += adv_amount; + } + +- /* Last, deal with the case when LEN is not a multiple of SZ. */ +- if (rem) { +- memcpy(out, from, rem); +- out += rem; ++ if (len) { ++ memcpy(out, &chunk_load, len); ++ out += len; + } + + return out; +@@ -200,5 +193,8 @@ Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, unsigned dist, unsigned len, + } + return out; + } +- return CHUNKMEMSET(out, dist, len); ++ if (len) ++ return CHUNKMEMSET(out, dist, len); ++ ++ return out; + } +diff --git a/cmake/detect-arch.c b/cmake/detect-arch.c +index 84e6a84..9259018 100644 +--- a/cmake/detect-arch.c ++++ b/cmake/detect-arch.c +@@ -12,7 +12,7 @@ + #error archfound i686 + + // ARM +-#elif defined(__aarch64__) || defined(_M_ARM64) ++#elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC) + #error archfound aarch64 + #elif defined(__arm__) || defined(__arm) || defined(_M_ARM) || defined(__TARGET_ARCH_ARM) + #if defined(__ARM64_ARCH_8__) || defined(__ARMv8__) || defined(__ARMv8_A__) +@@ -101,6 +101,14 @@ + #error archfound riscv32 + #endif + ++// LOONGARCH ++#elif defined(__loongarch_lp64) ++ #error archfound loongarch64 ++ ++// Emscripten (WebAssembly) ++#elif defined(__EMSCRIPTEN__) ++ #error archfound wasm32 ++ + // return 'unrecognized' if we do not know what architecture this is + #else + #error archfound unrecognized +diff --git a/cmake/detect-arch.cmake b/cmake/detect-arch.cmake +index f0547ba..dfdc601 100644 +--- a/cmake/detect-arch.cmake ++++ b/cmake/detect-arch.cmake +@@ -13,9 +13,11 @@ elseif(MSVC) + set(ARCH "x86_64") + elseif("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM" OR "${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARMV7") + set(ARCH "arm") +- elseif ("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64") ++ elseif ("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64" OR "${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64EC") + set(ARCH "aarch64") + endif() ++elseif(EMSCRIPTEN) ++ set(ARCH "wasm32") + elseif(CMAKE_CROSSCOMPILING) + set(ARCH ${CMAKE_C_COMPILER_TARGET}) + else() +@@ -49,7 +51,7 @@ endif() + if("${ARCH}" MATCHES "(x86_64|AMD64|i[3-6]86)") + set(BASEARCH "x86") + set(BASEARCH_X86_FOUND TRUE) +-elseif("${ARCH}" MATCHES "(arm(v[0-9])?|aarch64)") ++elseif("${ARCH}" MATCHES "(arm(v[0-9])?|aarch64|cortex)") + set(BASEARCH "arm") + set(BASEARCH_ARM_FOUND TRUE) + elseif("${ARCH}" MATCHES "ppc(64(le)?)?|powerpc(64(le)?)?") +@@ -88,6 +90,12 @@ elseif("${ARCH}" MATCHES "rs6000") + elseif("${ARCH}" MATCHES "riscv(32|64)") + set(BASEARCH "riscv") + set(BASEARCH_RISCV_FOUND TRUE) ++elseif("${ARCH}" MATCHES "loongarch64") ++ set(BASEARCH "loongarch") ++ set(BASEARCH_LOONGARCH_FOUND TRUE) ++elseif("${ARCH}" MATCHES "wasm32") ++ set(BASEARCH "wasm32") ++ set(BASEARCH_WASM32_FOUND TRUE) + else() + set(BASEARCH "x86") + set(BASEARCH_X86_FOUND TRUE) +diff --git a/cmake/detect-coverage.cmake b/cmake/detect-coverage.cmake +index 96478f6..8e67a08 100644 +--- a/cmake/detect-coverage.cmake ++++ b/cmake/detect-coverage.cmake +@@ -13,9 +13,9 @@ macro(add_code_coverage) + set(CMAKE_REQUIRED_LINK_OPTIONS) + + if(HAVE_COVERAGE) +- set(CMAKE_C_FLAGS "-O0 ${CMAKE_C_FLAGS} -coverage") +- set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -coverage") +- set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -coverage") ++ add_compile_options(-coverage) ++ add_link_options(-coverage) ++ message(STATUS "Code coverage enabled using: -coverage") + else() + # Some versions of GCC don't support -coverage shorthand + if(CMAKE_VERSION VERSION_LESS 3.14) +@@ -28,11 +28,19 @@ macro(add_code_coverage) + set(CMAKE_REQUIRED_LINK_OPTIONS) + + if(HAVE_TEST_COVERAGE) +- set(CMAKE_C_FLAGS "-O0 ${CMAKE_C_FLAGS} -ftest-coverage -fprofile-arcs -fprofile-values") +- set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lgcov -fprofile-arcs") +- set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -lgcov -fprofile-arcs") ++ add_compile_options(-ftest-coverage -fprofile-arcs -fprofile-values) ++ add_link_options(-lgcov -fprofile-arcs) ++ message(STATUS "Code coverage enabled using: -ftest-coverage") + else() + message(WARNING "Compiler does not support code coverage") ++ set(WITH_CODE_COVERAGE OFF) + endif() + endif() ++ ++ # Set optimization level to zero for code coverage builds ++ if (WITH_CODE_COVERAGE) ++ # Use CMake compiler flag variables due to add_compile_options failure on Windows GCC ++ set(CMAKE_C_FLAGS "-O0 ${CMAKE_C_FLAGS}") ++ set(CMAKE_CXX_FLAGS "-O0 ${CMAKE_CXX_FLAGS}") ++ endif() + endmacro() +diff --git a/cmake/detect-install-dirs.cmake b/cmake/detect-install-dirs.cmake +index ddf1adb..a7c774f 100644 +--- a/cmake/detect-install-dirs.cmake ++++ b/cmake/detect-install-dirs.cmake +@@ -4,52 +4,40 @@ + + # Determine installation directory for executables + if (DEFINED BIN_INSTALL_DIR) +- set(BIN_INSTALL_DIR "${BIN_INSTALL_DIR}" CACHE PATH "Installation directory for executables" FORCE) ++ set(BIN_INSTALL_DIR "${BIN_INSTALL_DIR}" CACHE PATH "Installation directory for executables (Deprecated)" FORCE) ++ set(CMAKE_INSTALL_BINDIR "${BIN_INSTALL_DIR}") + elseif (DEFINED INSTALL_BIN_DIR) +- set(BIN_INSTALL_DIR "${INSTALL_BIN_DIR}" CACHE PATH "Installation directory for executables" FORCE) +-elseif (DEFINED CMAKE_INSTALL_FULL_BINDIR) +- set(BIN_INSTALL_DIR "${CMAKE_INSTALL_FULL_BINDIR}" CACHE PATH "Installation directory for executables" FORCE) +-elseif (DEFINED CMAKE_INSTALL_BINDIR) +- set(BIN_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}" CACHE PATH "Installation directory for executables" FORCE) +-else() +- set(BIN_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables") ++ set(CMAKE_INSTALL_BINDIR "${INSTALL_BIN_DIR}") + endif() + + # Determine installation directory for libraries + if (DEFINED LIB_INSTALL_DIR) +- set(LIB_INSTALL_DIR "${LIB_INSTALL_DIR}" CACHE PATH "Installation directory for libraries" FORCE) ++ set(LIB_INSTALL_DIR "${LIB_INSTALL_DIR}" CACHE PATH "Installation directory for libraries (Deprecated)" FORCE) ++ set(CMAKE_INSTALL_LIBDIR "${LIB_INSTALL_DIR}") + elseif (DEFINED INSTALL_LIB_DIR) +- set(LIB_INSTALL_DIR "${INSTALL_LIB_DIR}" CACHE PATH "Installation directory for libraries" FORCE) +-elseif (DEFINED CMAKE_INSTALL_FULL_LIBDIR) +- set(LIB_INSTALL_DIR "${CMAKE_INSTALL_FULL_LIBDIR}" CACHE PATH "Installation directory for libraries" FORCE) +-elseif (DEFINED CMAKE_INSTALL_LIBDIR) +- set(LIB_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}" CACHE PATH "Installation directory for libraries" FORCE) +-else() +- set(LIB_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE PATH "Installation directory for libraries") ++ set(CMAKE_INSTALL_LIBDIR "${INSTALL_LIB_DIR}") + endif() + + # Determine installation directory for include files + if (DEFINED INC_INSTALL_DIR) +- set(INC_INSTALL_DIR "${INC_INSTALL_DIR}" CACHE PATH "Installation directory for headers" FORCE) ++ set(INC_INSTALL_DIR "${INC_INSTALL_DIR}" CACHE PATH "Installation directory for headers (Deprecated)" FORCE) ++ set(CMAKE_INSTALL_INCLUDEDIR "${INC_INSTALL_DIR}") + elseif (DEFINED INSTALL_INC_DIR) +- set(INC_INSTALL_DIR "${INSTALL_INC_DIR}" CACHE PATH "Installation directory for headers" FORCE) +-elseif (DEFINED CMAKE_INSTALL_FULL_INCLUDEDIR) +- set(INC_INSTALL_DIR "${CMAKE_INSTALL_FULL_INCLUDEDIR}" CACHE PATH "Installation directory for headers" FORCE) +-elseif (DEFINED CMAKE_INSTALL_INCLUDEDIR) +- set(INC_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}" CACHE PATH "Installation directory for headers" FORCE) +-else() +- set(INC_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/include" CACHE PATH "Installation directory for headers") ++ set(CMAKE_INSTALL_INCLUDEDIR "${INSTALL_INC_DIR}") + endif() + ++# Define GNU standard installation directories ++include(GNUInstallDirs) ++ + # Determine installation directory for pkgconfig files + if (DEFINED PKGCONFIG_INSTALL_DIR) + set(PKGCONFIG_INSTALL_DIR "${PKGCONFIG_INSTALL_DIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE) + elseif (DEFINED INSTALL_PKGCONFIG_DIR) + set(PKGCONFIG_INSTALL_DIR "${INSTALL_PKGCONFIG_DIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE) ++elseif (DEFINED CMAKE_INSTALL_PKGCONFIGDIR) ++ set(PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_PKGCONFIGDIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE) + elseif (DEFINED CMAKE_INSTALL_FULL_PKGCONFIGDIR) + set(PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_FULL_PKGCONFIGDIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE) +-elseif (DEFINED CMAKE_INSTALL_PKGCONFIGDIR) +- set(PKGCONFIG_INSTALL_DIR "${LIB_INSTALL_DIR}/${CMAKE_INSTALL_PKGCONFIGDIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE) + else() +- set(PKGCONFIG_INSTALL_DIR "${LIB_INSTALL_DIR}/pkgconfig" CACHE PATH "Installation directory for pkgconfig (.pc) files") ++ set(PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/pkgconfig" CACHE PATH "Installation directory for pkgconfig (.pc) files") + endif() +diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake +new file mode 100644 +index 0000000..74ac391 +--- /dev/null ++++ b/cmake/detect-intrinsics.cmake +@@ -0,0 +1,543 @@ ++# detect-intrinsics.cmake -- Detect compiler intrinsics support ++# Licensed under the Zlib license, see LICENSE.md for details ++ ++macro(check_acle_compiler_flag) ++ if(MSVC) ++ # Both ARM and ARM64-targeting msvc support intrinsics, but ++ # ARM msvc is missing some intrinsics introduced with ARMv8, e.g. crc32 ++ if(MSVC_C_ARCHITECTURE_ID STREQUAL "ARM64") ++ set(HAVE_ACLE_FLAG TRUE) ++ endif() ++ else() ++ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ set(ACLEFLAG "-march=armv8-a+crc" CACHE INTERNAL "Compiler option to enable ACLE support") ++ endif() ++ endif() ++ # Check whether compiler supports ACLE flag ++ set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "int main() { return 0; }" ++ HAVE_ACLE_FLAG FAIL_REGEX "not supported") ++ if(NOT NATIVEFLAG AND NOT HAVE_ACLE_FLAG) ++ set(ACLEFLAG "-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ACLE support" FORCE) ++ # Check whether compiler supports ACLE flag ++ set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG}") ++ check_c_source_compiles( ++ "int main() { return 0; }" ++ HAVE_ACLE_FLAG2 FAIL_REGEX "not supported") ++ set(HAVE_ACLE_FLAG ${HAVE_ACLE_FLAG2} CACHE INTERNAL "Have compiler option to enable ACLE intrinsics" FORCE) ++ unset(HAVE_ACLE_FLAG2 CACHE) # Don't cache this internal variable ++ endif() ++ set(CMAKE_REQUIRED_FLAGS) ++ endif() ++endmacro() ++ ++macro(check_armv6_compiler_flag) ++ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ check_c_compiler_flag("-march=armv6" HAVE_MARCH_ARMV6) ++ if(HAVE_MARCH_ARMV6) ++ set(ARMV6FLAG "-march=armv6" CACHE INTERNAL "Compiler option to enable ARMv6 support") ++ endif() ++ endif() ++ endif() ++ # Check whether compiler supports ARMv6 inline asm ++ set(CMAKE_REQUIRED_FLAGS "${ARMV6FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "unsigned int f(unsigned int a, unsigned int b) { ++ unsigned int c; ++ __asm__ __volatile__ ( \"uqsub16 %0, %1, %2\" : \"=r\" (c) : \"r\" (a), \"r\" (b) ); ++ return (int)c; ++ } ++ int main(void) { return f(1,2); }" ++ HAVE_ARMV6_INLINE_ASM ++ ) ++ # Check whether compiler supports ARMv6 intrinsics ++ check_c_source_compiles( ++ "#if defined(_MSC_VER) ++ #include ++ #else ++ #include ++ #endif ++ unsigned int f(unsigned int a, unsigned int b) { ++ #if defined(_MSC_VER) ++ return _arm_uqsub16(a, b); ++ #else ++ return __uqsub16(a, b); ++ #endif ++ } ++ int main(void) { return 0; }" ++ HAVE_ARMV6_INTRIN ++ ) ++ set(CMAKE_REQUIRED_FLAGS) ++endmacro() ++ ++macro(check_avx512_intrinsics) ++ if(CMAKE_C_COMPILER_ID MATCHES "Intel") ++ if(CMAKE_HOST_UNIX OR APPLE) ++ set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl") ++ else() ++ set(AVX512FLAG "/arch:AVX512") ++ endif() ++ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ # For CPUs that can benefit from AVX512, it seems GCC generates suboptimal ++ # instruction scheduling unless you specify a reasonable -mtune= target ++ set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl") ++ if(NOT MSVC) ++ check_c_compiler_flag("-mtune=cascadelake" HAVE_CASCADE_LAKE) ++ if(HAVE_CASCADE_LAKE) ++ set(AVX512FLAG "${AVX512FLAG} -mtune=cascadelake") ++ else() ++ set(AVX512FLAG "${AVX512FLAG} -mtune=skylake-avx512") ++ endif() ++ unset(HAVE_CASCADE_LAKE) ++ endif() ++ endif() ++ elseif(MSVC) ++ set(AVX512FLAG "/arch:AVX512") ++ endif() ++ # Check whether compiler supports AVX512 intrinsics ++ set(CMAKE_REQUIRED_FLAGS "${AVX512FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ __m512i f(__m512i y) { ++ __m512i x = _mm512_set1_epi8(2); ++ return _mm512_sub_epi8(x, y); ++ } ++ int main(void) { return 0; }" ++ HAVE_AVX512_INTRIN ++ ) ++ ++ # Evidently both GCC and clang were late to implementing these ++ check_c_source_compiles( ++ "#include ++ __mmask16 f(__mmask16 x) { return _knot_mask16(x); } ++ int main(void) { return 0; }" ++ HAVE_MASK_INTRIN ++ ) ++ set(CMAKE_REQUIRED_FLAGS) ++endmacro() ++ ++macro(check_avx512vnni_intrinsics) ++ if(CMAKE_C_COMPILER_ID MATCHES "Intel") ++ if(CMAKE_HOST_UNIX OR APPLE) ++ set(AVX512VNNIFLAG "-mavx512f -mavx512bw -mavx512dq -mavx512vl -mavx512vnni") ++ else() ++ set(AVX512VNNIFLAG "/arch:AVX512") ++ endif() ++ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni") ++ if(NOT MSVC) ++ check_c_compiler_flag("-mtune=cascadelake" HAVE_CASCADE_LAKE) ++ if(HAVE_CASCADE_LAKE) ++ set(AVX512VNNIFLAG "${AVX512VNNIFLAG} -mtune=cascadelake") ++ else() ++ set(AVX512VNNIFLAG "${AVX512VNNIFLAG} -mtune=skylake-avx512") ++ endif() ++ unset(HAVE_CASCADE_LAKE) ++ endif() ++ endif() ++ elseif(MSVC) ++ set(AVX512VNNIFLAG "/arch:AVX512") ++ endif() ++ ++ # Check whether compiler supports AVX512vnni intrinsics ++ set(CMAKE_REQUIRED_FLAGS "${AVX512VNNIFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ __m512i f(__m512i x, __m512i y) { ++ __m512i z = _mm512_setzero_epi32(); ++ return _mm512_dpbusd_epi32(z, x, y); ++ } ++ int main(void) { return 0; }" ++ HAVE_AVX512VNNI_INTRIN ++ ) ++ set(CMAKE_REQUIRED_FLAGS) ++endmacro() ++ ++macro(check_avx2_intrinsics) ++ if(CMAKE_C_COMPILER_ID MATCHES "Intel") ++ if(CMAKE_HOST_UNIX OR APPLE) ++ set(AVX2FLAG "-mavx2") ++ else() ++ set(AVX2FLAG "/arch:AVX2") ++ endif() ++ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ set(AVX2FLAG "-mavx2") ++ endif() ++ elseif(MSVC) ++ set(AVX2FLAG "/arch:AVX2") ++ endif() ++ # Check whether compiler supports AVX2 intrinics ++ set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ __m256i f(__m256i x) { ++ const __m256i y = _mm256_set1_epi16(1); ++ return _mm256_subs_epu16(x, y); ++ } ++ int main(void) { return 0; }" ++ HAVE_AVX2_INTRIN ++ ) ++ set(CMAKE_REQUIRED_FLAGS) ++endmacro() ++ ++macro(check_neon_compiler_flag) ++ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ if("${ARCH}" MATCHES "aarch64") ++ set(NEONFLAG "-march=armv8-a+simd") ++ else() ++ set(NEONFLAG "-mfpu=neon") ++ endif() ++ endif() ++ endif() ++ # Check whether compiler supports NEON flag ++ set(CMAKE_REQUIRED_FLAGS "${NEONFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#if defined(_M_ARM64) || defined(_M_ARM64EC) ++ # include ++ #else ++ # include ++ #endif ++ int main() { return 0; }" ++ NEON_AVAILABLE FAIL_REGEX "not supported") ++ set(CMAKE_REQUIRED_FLAGS) ++endmacro() ++ ++macro(check_neon_ld4_intrinsics) ++ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ if("${ARCH}" MATCHES "aarch64") ++ set(NEONFLAG "-march=armv8-a+simd") ++ else() ++ set(NEONFLAG "-mfpu=neon") ++ endif() ++ endif() ++ endif() ++ # Check whether compiler supports loading 4 neon vecs into a register range ++ set(CMAKE_REQUIRED_FLAGS "${NEONFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC)) ++ # include ++ #else ++ # include ++ #endif ++ int32x4x4_t f(int var[16]) { return vld1q_s32_x4(var); } ++ int main(void) { return 0; }" ++ NEON_HAS_LD4) ++ set(CMAKE_REQUIRED_FLAGS) ++endmacro() ++ ++macro(check_pclmulqdq_intrinsics) ++ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ set(PCLMULFLAG "-mpclmul") ++ endif() ++ endif() ++ # Check whether compiler supports PCLMULQDQ intrinsics ++ if(NOT (APPLE AND "${ARCH}" MATCHES "i386")) ++ # The pclmul code currently crashes on Mac in 32bit mode. Avoid for now. ++ set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ #include ++ __m128i f(__m128i a, __m128i b) { return _mm_clmulepi64_si128(a, b, 0x10); } ++ int main(void) { return 0; }" ++ HAVE_PCLMULQDQ_INTRIN ++ ) ++ set(CMAKE_REQUIRED_FLAGS) ++ else() ++ set(HAVE_PCLMULQDQ_INTRIN OFF) ++ endif() ++endmacro() ++ ++macro(check_vpclmulqdq_intrinsics) ++ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ set(VPCLMULFLAG "-mvpclmulqdq -mavx512f") ++ endif() ++ endif() ++ # Check whether compiler supports VPCLMULQDQ intrinsics ++ if(NOT (APPLE AND "${ARCH}" MATCHES "i386")) ++ set(CMAKE_REQUIRED_FLAGS "${VPCLMULFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ #include ++ __m512i f(__m512i a) { ++ __m512i b = _mm512_setzero_si512(); ++ return _mm512_clmulepi64_epi128(a, b, 0x10); ++ } ++ int main(void) { return 0; }" ++ HAVE_VPCLMULQDQ_INTRIN ++ ) ++ set(CMAKE_REQUIRED_FLAGS) ++ else() ++ set(HAVE_VPCLMULQDQ_INTRIN OFF) ++ endif() ++endmacro() ++ ++macro(check_ppc_intrinsics) ++ # Check if compiler supports AltiVec ++ set(CMAKE_REQUIRED_FLAGS "-maltivec ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ int main(void) ++ { ++ vector int a = vec_splats(0); ++ vector int b = vec_splats(0); ++ a = vec_add(a, b); ++ return 0; ++ }" ++ HAVE_ALTIVEC ++ ) ++ set(CMAKE_REQUIRED_FLAGS) ++ ++ if(HAVE_ALTIVEC) ++ set(PPCFLAGS "-maltivec") ++ endif() ++ ++ set(CMAKE_REQUIRED_FLAGS "-maltivec -mno-vsx ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ int main(void) ++ { ++ vector int a = vec_splats(0); ++ vector int b = vec_splats(0); ++ a = vec_add(a, b); ++ return 0; ++ }" ++ HAVE_NOVSX ++ ) ++ set(CMAKE_REQUIRED_FLAGS) ++ ++ if(HAVE_NOVSX) ++ set(PPCFLAGS "${PPCFLAGS} -mno-vsx") ++ endif() ++ ++ # Check if we have what we need for AltiVec optimizations ++ set(CMAKE_REQUIRED_FLAGS "${PPCFLAGS} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ #ifdef __FreeBSD__ ++ #include ++ #endif ++ int main() { ++ #ifdef __FreeBSD__ ++ unsigned long hwcap; ++ elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap)); ++ return (hwcap & PPC_FEATURE_HAS_ALTIVEC); ++ #else ++ return (getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC); ++ #endif ++ }" ++ HAVE_VMX ++ ) ++ set(CMAKE_REQUIRED_FLAGS) ++endmacro() ++ ++macro(check_power8_intrinsics) ++ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ set(POWER8FLAG "-mcpu=power8") ++ endif() ++ endif() ++ # Check if we have what we need for POWER8 optimizations ++ set(CMAKE_REQUIRED_FLAGS "${POWER8FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ #ifdef __FreeBSD__ ++ #include ++ #endif ++ int main() { ++ #ifdef __FreeBSD__ ++ unsigned long hwcap; ++ elf_aux_info(AT_HWCAP2, &hwcap, sizeof(hwcap)); ++ return (hwcap & PPC_FEATURE2_ARCH_2_07); ++ #else ++ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); ++ #endif ++ }" ++ HAVE_POWER8_INTRIN ++ ) ++ set(CMAKE_REQUIRED_FLAGS) ++endmacro() ++ ++macro(check_rvv_intrinsics) ++ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ set(RISCVFLAG "-march=rv64gcv") ++ endif() ++ endif() ++ # Check whether compiler supports RVV ++ set(CMAKE_REQUIRED_FLAGS "${RISCVFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ int main() { ++ return 0; ++ }" ++ HAVE_RVV_INTRIN ++ ) ++ set(CMAKE_REQUIRED_FLAGS) ++endmacro() ++ ++macro(check_s390_intrinsics) ++ check_c_source_compiles( ++ "#include ++ #ifndef HWCAP_S390_VXRS ++ #define HWCAP_S390_VXRS HWCAP_S390_VX ++ #endif ++ int main() { ++ return (getauxval(AT_HWCAP) & HWCAP_S390_VXRS); ++ }" ++ HAVE_S390_INTRIN ++ ) ++endmacro() ++ ++macro(check_power9_intrinsics) ++ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ set(POWER9FLAG "-mcpu=power9") ++ endif() ++ endif() ++ # Check if we have what we need for POWER9 optimizations ++ set(CMAKE_REQUIRED_FLAGS "${POWER9FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ #ifdef __FreeBSD__ ++ #include ++ #endif ++ int main() { ++ #ifdef __FreeBSD__ ++ unsigned long hwcap; ++ elf_aux_info(AT_HWCAP2, &hwcap, sizeof(hwcap)); ++ return (hwcap & PPC_FEATURE2_ARCH_3_00); ++ #else ++ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00); ++ #endif ++ }" ++ HAVE_POWER9_INTRIN ++ ) ++ set(CMAKE_REQUIRED_FLAGS) ++endmacro() ++ ++macro(check_sse2_intrinsics) ++ if(CMAKE_C_COMPILER_ID MATCHES "Intel") ++ if(CMAKE_HOST_UNIX OR APPLE) ++ set(SSE2FLAG "-msse2") ++ else() ++ set(SSE2FLAG "/arch:SSE2") ++ endif() ++ elseif(MSVC) ++ if(NOT "${ARCH}" MATCHES "x86_64") ++ set(SSE2FLAG "/arch:SSE2") ++ endif() ++ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ set(SSE2FLAG "-msse2") ++ endif() ++ endif() ++ # Check whether compiler supports SSE2 intrinsics ++ set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ __m128i f(__m128i x, __m128i y) { return _mm_sad_epu8(x, y); } ++ int main(void) { return 0; }" ++ HAVE_SSE2_INTRIN ++ ) ++ set(CMAKE_REQUIRED_FLAGS) ++endmacro() ++ ++macro(check_ssse3_intrinsics) ++ if(CMAKE_C_COMPILER_ID MATCHES "Intel") ++ if(CMAKE_HOST_UNIX OR APPLE) ++ set(SSSE3FLAG "-mssse3") ++ else() ++ set(SSSE3FLAG "/arch:SSSE3") ++ endif() ++ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ set(SSSE3FLAG "-mssse3") ++ endif() ++ endif() ++ # Check whether compiler supports SSSE3 intrinsics ++ set(CMAKE_REQUIRED_FLAGS "${SSSE3FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ __m128i f(__m128i u) { ++ __m128i v = _mm_set1_epi32(1); ++ return _mm_hadd_epi32(u, v); ++ } ++ int main(void) { return 0; }" ++ HAVE_SSSE3_INTRIN ++ ) ++endmacro() ++ ++macro(check_sse42_intrinsics) ++ if(CMAKE_C_COMPILER_ID MATCHES "Intel") ++ if(CMAKE_HOST_UNIX OR APPLE) ++ set(SSE42FLAG "-msse4.2") ++ else() ++ set(SSE42FLAG "/arch:SSE4.2") ++ endif() ++ elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ if(NOT NATIVEFLAG) ++ set(SSE42FLAG "-msse4.2") ++ endif() ++ endif() ++ # Check whether compiler supports SSE4.2 intrinsics ++ set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ unsigned int f(unsigned int a, unsigned int b) { return _mm_crc32_u32(a, b); } ++ int main(void) { return 0; }" ++ HAVE_SSE42_INTRIN ++ ) ++ set(CMAKE_REQUIRED_FLAGS) ++endmacro() ++ ++macro(check_vgfma_intrinsics) ++ if(NOT NATIVEFLAG) ++ set(VGFMAFLAG "-march=z13") ++ if(CMAKE_C_COMPILER_ID MATCHES "GNU") ++ set(VGFMAFLAG "${VGFMAFLAG} -mzarch") ++ endif() ++ if(CMAKE_C_COMPILER_ID MATCHES "Clang") ++ set(VGFMAFLAG "${VGFMAFLAG} -fzvector") ++ endif() ++ endif() ++ # Check whether compiler supports "VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE" intrinsic ++ set(CMAKE_REQUIRED_FLAGS "${VGFMAFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#include ++ int main(void) { ++ unsigned long long a __attribute__((vector_size(16))) = { 0 }; ++ unsigned long long b __attribute__((vector_size(16))) = { 0 }; ++ unsigned char c __attribute__((vector_size(16))) = { 0 }; ++ c = vec_gfmsum_accum_128(a, b, c); ++ return c[0]; ++ }" ++ HAVE_VGFMA_INTRIN FAIL_REGEX "not supported") ++ set(CMAKE_REQUIRED_FLAGS) ++endmacro() ++ ++macro(check_xsave_intrinsics) ++ if(NOT NATIVEFLAG AND NOT MSVC) ++ set(XSAVEFLAG "-mxsave") ++ endif() ++ set(CMAKE_REQUIRED_FLAGS "${XSAVEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") ++ check_c_source_compiles( ++ "#ifdef _MSC_VER ++ # include ++ #else ++ # include ++ #endif ++ unsigned int f(unsigned int a) { return (int) _xgetbv(a); } ++ int main(void) { return 0; }" ++ HAVE_XSAVE_INTRIN FAIL_REGEX "not supported") ++ set(CMAKE_REQUIRED_FLAGS) ++endmacro() +diff --git a/cmake/detect-sanitizer.cmake b/cmake/detect-sanitizer.cmake +index b0a0236..f9521ec 100644 +--- a/cmake/detect-sanitizer.cmake ++++ b/cmake/detect-sanitizer.cmake +@@ -1,6 +1,22 @@ + # detect-sanitizer.cmake -- Detect supported compiler sanitizer flags + # Licensed under the Zlib license, see LICENSE.md for details + ++macro(add_common_sanitizer_flags) ++ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") ++ add_compile_options(-g3) ++ endif() ++ check_c_compiler_flag(-fno-omit-frame-pointer HAVE_NO_OMIT_FRAME_POINTER) ++ if(HAVE_NO_OMIT_FRAME_POINTER) ++ add_compile_options(-fno-omit-frame-pointer) ++ add_link_options(-fno-omit-frame-pointer) ++ endif() ++ check_c_compiler_flag(-fno-optimize-sibling-calls HAVE_NO_OPTIMIZE_SIBLING_CALLS) ++ if(HAVE_NO_OPTIMIZE_SIBLING_CALLS) ++ add_compile_options(-fno-optimize-sibling-calls) ++ add_link_options(-fno-optimize-sibling-calls) ++ endif() ++endmacro() ++ + macro(check_sanitizer_support known_checks supported_checks) + set(available_checks "") + +@@ -14,14 +30,14 @@ macro(check_sanitizer_support known_checks supported_checks) + set(compile_checks "${available_checks},${check}") + endif() + +- set(CMAKE_REQUIRED_FLAGS "-fsanitize=${compile_checks}") ++ set(CMAKE_REQUIRED_FLAGS -fsanitize=${compile_checks}) + +- check_c_source_compiles("int main() { return 0; }" HAS_SANITIZER_${check} ++ check_c_source_compiles("int main() { return 0; }" HAVE_SANITIZER_${check} + FAIL_REGEX "not supported|unrecognized command|unknown option") + + set(CMAKE_REQUIRED_FLAGS) + +- if(HAS_SANITIZER_${check}) ++ if(HAVE_SANITIZER_${check}) + set(available_checks ${compile_checks}) + endif() + endforeach() +@@ -39,7 +55,9 @@ macro(add_address_sanitizer) + check_sanitizer_support("${known_checks}" supported_checks) + if(NOT ${supported_checks} STREQUAL "") + message(STATUS "Address sanitizer is enabled: ${supported_checks}") +- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}") ++ add_compile_options(-fsanitize=${supported_checks}) ++ add_link_options(-fsanitize=${supported_checks}) ++ add_common_sanitizer_flags() + else() + message(STATUS "Address sanitizer is not supported") + endif() +@@ -52,7 +70,9 @@ macro(add_address_sanitizer) + check_sanitizer_support("leak" supported_checks) + if(NOT ${supported_checks} STREQUAL "") + message(STATUS "Leak sanitizer is enabled: ${supported_checks}") +- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}") ++ add_compile_options(-fsanitize=${supported_checks}) ++ add_link_options(-fsanitize=${supported_checks}) ++ add_common_sanitizer_flags() + else() + message(STATUS "Leak sanitizer is not supported") + endif() +@@ -63,7 +83,15 @@ macro(add_memory_sanitizer) + check_sanitizer_support("memory" supported_checks) + if(NOT ${supported_checks} STREQUAL "") + message(STATUS "Memory sanitizer is enabled: ${supported_checks}") +- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}") ++ add_compile_options(-fsanitize=${supported_checks}) ++ add_link_options(-fsanitize=${supported_checks}) ++ add_common_sanitizer_flags() ++ ++ check_c_compiler_flag(-fsanitize-memory-track-origins HAVE_MEMORY_TRACK_ORIGINS) ++ if(HAVE_MEMORY_TRACK_ORIGINS) ++ add_compile_options(-fsanitize-memory-track-origins) ++ add_link_options(-fsanitize-memory-track-origins) ++ endif() + else() + message(STATUS "Memory sanitizer is not supported") + endif() +@@ -73,7 +101,9 @@ macro(add_thread_sanitizer) + check_sanitizer_support("thread" supported_checks) + if(NOT ${supported_checks} STREQUAL "") + message(STATUS "Thread sanitizer is enabled: ${supported_checks}") +- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}") ++ add_compile_options(-fsanitize=${supported_checks}) ++ add_link_options(-fsanitize=${supported_checks}) ++ add_common_sanitizer_flags() + else() + message(STATUS "Thread sanitizer is not supported") + endif() +@@ -108,7 +138,7 @@ macro(add_undefined_sanitizer) + ) + + # Only check for alignment sanitizer flag if unaligned access is not supported +- if(NOT UNALIGNED_OK) ++ if(NOT WITH_UNALIGNED) + list(APPEND known_checks alignment) + endif() + # Object size sanitizer has no effect at -O0 and produces compiler warning if enabled +@@ -120,14 +150,17 @@ macro(add_undefined_sanitizer) + + if(NOT ${supported_checks} STREQUAL "") + message(STATUS "Undefined behavior sanitizer is enabled: ${supported_checks}") +- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}") ++ add_compile_options(-fsanitize=${supported_checks}) ++ add_link_options(-fsanitize=${supported_checks}) + + # Group sanitizer flag -fsanitize=undefined will automatically add alignment, even if + # it is not in our sanitize flag list, so we need to explicitly disable alignment sanitizing. +- if(UNALIGNED_OK) +- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-sanitize=alignment") ++ if(WITH_UNALIGNED) ++ add_compile_options(-fno-sanitize=alignment) + endif() ++ ++ add_common_sanitizer_flags() + else() +- message(STATUS "UNdefined behavior sanitizer is not supported") ++ message(STATUS "Undefined behavior sanitizer is not supported") + endif() + endmacro() +diff --git a/cmake/fallback-macros.cmake b/cmake/fallback-macros.cmake +new file mode 100644 +index 0000000..8bc6cf2 +--- /dev/null ++++ b/cmake/fallback-macros.cmake +@@ -0,0 +1,19 @@ ++# fallback-macros.cmake -- CMake fallback macros ++# Copyright (C) 2022 Nathan Moinvaziri ++# Licensed under the Zlib license, see LICENSE.md for details ++ ++# CMake less than version 3.5.2 ++if(NOT COMMAND add_compile_options) ++ macro(add_compile_options options) ++ string(APPEND CMAKE_C_FLAGS ${options}) ++ string(APPEND CMAKE_CXX_FLAGS ${options}) ++ endmacro() ++endif() ++ ++# CMake less than version 3.14 ++if(NOT COMMAND add_link_options) ++ macro(add_link_options options) ++ string(APPEND CMAKE_EXE_LINKER_FLAGS ${options}) ++ string(APPEND CMAKE_SHARED_LINKER_FLAGS ${options}) ++ endmacro() ++endif() +diff --git a/cmake/toolchain-aarch64.cmake b/cmake/toolchain-aarch64.cmake +index 31894fd..1e24731 100644 +--- a/cmake/toolchain-aarch64.cmake ++++ b/cmake/toolchain-aarch64.cmake +@@ -2,8 +2,6 @@ set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR aarch64) + set(CMAKE_SYSTEM_VERSION 1) + +-message(STATUS "Using cross-compile toolchain: ${CROSS_COMPILE_TOOLCHAIN}") +- + set(CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu") + set(CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu") + +@@ -14,13 +12,13 @@ SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +-find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc) ++find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) + if(NOT C_COMPILER_FULL_PATH) +- message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found") ++ message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") + endif() + set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) + +-find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++) ++find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) + if(CXX_COMPILER_FULL_PATH) + set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) + endif() +diff --git a/cmake/toolchain-arm.cmake b/cmake/toolchain-arm.cmake +index 0e3c5c3..1bdd8d2 100644 +--- a/cmake/toolchain-arm.cmake ++++ b/cmake/toolchain-arm.cmake +@@ -2,7 +2,12 @@ set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR arm) + set(CMAKE_SYSTEM_VERSION 1) + +-message(STATUS "Using cross-compile toolchain: ${CMAKE_C_COMPILER_TARGET}") ++if(NOT DEFINED CMAKE_C_COMPILER_TARGET) ++ set(CMAKE_C_COMPILER_TARGET arm-linux-gnueabi) ++endif() ++if(NOT DEFINED CMAKE_CXX_COMPILER_TARGET) ++ set(CMAKE_CXX_COMPILER_TARGET arm-linux-gnueabi) ++endif() + + set(CMAKE_CROSSCOMPILING TRUE) + set(CMAKE_CROSSCOMPILING_EMULATOR qemu-arm -L /usr/${CMAKE_C_COMPILER_TARGET}/) +@@ -12,13 +17,13 @@ set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + +-find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc) ++find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) + if(NOT C_COMPILER_FULL_PATH) +- message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found") ++ message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") + endif() + set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) + +-find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++) ++find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) + if(CXX_COMPILER_FULL_PATH) + set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) + endif() +diff --git a/cmake/toolchain-armhf.cmake b/cmake/toolchain-armhf.cmake +new file mode 100644 +index 0000000..007859c +--- /dev/null ++++ b/cmake/toolchain-armhf.cmake +@@ -0,0 +1,25 @@ ++set(CMAKE_SYSTEM_NAME Linux) ++set(CMAKE_SYSTEM_PROCESSOR arm) ++set(CMAKE_SYSTEM_VERSION 1) ++ ++set(CMAKE_C_COMPILER_TARGET arm-linux-gnueabihf) ++set(CMAKE_CXX_COMPILER_TARGET arm-linux-gnueabihf) ++ ++set(CMAKE_CROSSCOMPILING TRUE) ++set(CMAKE_CROSSCOMPILING_EMULATOR qemu-arm -L /usr/${CMAKE_C_COMPILER_TARGET}/) ++ ++set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) ++set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) ++set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) ++set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) ++ ++find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) ++if(NOT C_COMPILER_FULL_PATH) ++ message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") ++endif() ++set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) ++ ++find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) ++if(CXX_COMPILER_FULL_PATH) ++ set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) ++endif() +diff --git a/cmake/toolchain-llvm-mingw-aarch64.cmake b/cmake/toolchain-llvm-mingw-aarch64.cmake +new file mode 100644 +index 0000000..4da1e2c +--- /dev/null ++++ b/cmake/toolchain-llvm-mingw-aarch64.cmake +@@ -0,0 +1,41 @@ ++set(CMAKE_SYSTEM_NAME Windows) ++set(CMAKE_C_COMPILER_FRONTEND_VARIANT GNU) ++set(CMAKE_CROSSCOMPILING TRUE) ++set(CMAKE_CROSSCOMPILING_EMULATOR wine) ++ ++set(CMAKE_C_COMPILER_TARGET aarch64-w64-mingw32) ++set(CMAKE_CXX_COMPILER_TARGET aarch64-w64-mingw32) ++set(CMAKE_RC_COMPILER_TARGET aarch64-w64-mingw32) ++set(CMAKE_SYSTEM_PROCESSOR aarch64) ++ ++# Required to propagate 'LLVM_MINGW_ROOT' variables to C compiler feature test. ++set(CMAKE_TRY_COMPILE_PLATFORM_VARIABLES LLVM_MINGW_ROOT) ++ ++if(NOT LLVM_MINGW_ROOT) ++ set(LLVM_MINGW_ROOT $ENV{LLVM_MINGW_ROOT}) ++endif() ++cmake_path(CONVERT "${LLVM_MINGW_ROOT}" TO_CMAKE_PATH_LIST LLVM_MINGW_ROOT) ++ ++set(CMAKE_FIND_ROOT_PATH ${LLVM_MINGW_ROOT} ${LLVM_MINGW_ROOT}/aarch64-w64-mingw32) ++set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) ++set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ALWAYS) ++set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) ++ ++find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-clang HINTS ${LLVM_MINGW_ROOT}/bin) ++if(NOT C_COMPILER_FULL_PATH) ++message(FATAL_ERROR "Compiler for ${CMAKE_C_COMPILER_TARGET} not found. Try setting llvm-mingw root path to LLVM_MINGW_ROOT variable!") ++endif() ++set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) ++ ++find_program(CXX_COMPILER_FULL_PATH ${CMAKE_CXX_COMPILER_TARGET}-clang++ HINTS ${LLVM_MINGW_ROOT}/bin) ++if(CXX_COMPILER_FULL_PATH) ++ set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) ++endif() ++ ++find_program(RC_COMPILER_FULL_PATH ${CMAKE_RC_COMPILER_TARGET}-windres HINTS ${LLVM_MINGW_ROOT}/bin) ++if(RC_COMPILER_FULL_PATH) ++ set(CMAKE_RC_COMPILER ${RC_COMPILER_FULL_PATH}) ++endif() ++ ++add_compile_options($<$:-gcodeview>) ++add_link_options(-Wl,-pdb=) +diff --git a/cmake/toolchain-llvm-mingw-armv7.cmake b/cmake/toolchain-llvm-mingw-armv7.cmake +new file mode 100644 +index 0000000..d077309 +--- /dev/null ++++ b/cmake/toolchain-llvm-mingw-armv7.cmake +@@ -0,0 +1,41 @@ ++set(CMAKE_SYSTEM_NAME Windows) ++set(CMAKE_C_COMPILER_FRONTEND_VARIANT GNU) ++set(CMAKE_CROSSCOMPILING TRUE) ++set(CMAKE_CROSSCOMPILING_EMULATOR wine) ++ ++set(CMAKE_C_COMPILER_TARGET armv7-w64-mingw32) ++set(CMAKE_CXX_COMPILER_TARGET armv7-w64-mingw32) ++set(CMAKE_RC_COMPILER_TARGET armv7-w64-mingw32) ++set(CMAKE_SYSTEM_PROCESSOR armv7) ++ ++# Required to propagate 'LLVM_MINGW_ROOT' variables to C compiler feature test. ++set(CMAKE_TRY_COMPILE_PLATFORM_VARIABLES LLVM_MINGW_ROOT) ++ ++if(NOT LLVM_MINGW_ROOT) ++ set(LLVM_MINGW_ROOT $ENV{LLVM_MINGW_ROOT}) ++endif() ++cmake_path(CONVERT "${LLVM_MINGW_ROOT}" TO_CMAKE_PATH_LIST LLVM_MINGW_ROOT) ++ ++set(CMAKE_FIND_ROOT_PATH ${LLVM_MINGW_ROOT} ${LLVM_MINGW_ROOT}/armv7-w64-mingw32) ++set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) ++set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ALWAYS) ++set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) ++ ++find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-clang HINTS ${LLVM_MINGW_ROOT}/bin) ++if(NOT C_COMPILER_FULL_PATH) ++message(FATAL_ERROR "Compiler for ${CMAKE_C_COMPILER_TARGET} not found. Try setting llvm-mingw root path to LLVM_MINGW_ROOT variable!") ++endif() ++set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) ++ ++find_program(CXX_COMPILER_FULL_PATH ${CMAKE_CXX_COMPILER_TARGET}-clang++ HINTS ${LLVM_MINGW_ROOT}/bin) ++if(CXX_COMPILER_FULL_PATH) ++ set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) ++endif() ++ ++find_program(RC_COMPILER_FULL_PATH ${CMAKE_RC_COMPILER_TARGET}-windres HINTS ${LLVM_MINGW_ROOT}/bin) ++if(RC_COMPILER_FULL_PATH) ++ set(CMAKE_RC_COMPILER ${RC_COMPILER_FULL_PATH}) ++endif() ++ ++add_compile_options($<$:-gcodeview>) ++add_link_options(-Wl,-pdb=) +diff --git a/cmake/toolchain-llvm-mingw-i686.cmake b/cmake/toolchain-llvm-mingw-i686.cmake +new file mode 100644 +index 0000000..17a0aa7 +--- /dev/null ++++ b/cmake/toolchain-llvm-mingw-i686.cmake +@@ -0,0 +1,41 @@ ++set(CMAKE_SYSTEM_NAME Windows) ++set(CMAKE_C_COMPILER_FRONTEND_VARIANT GNU) ++set(CMAKE_CROSSCOMPILING TRUE) ++set(CMAKE_CROSSCOMPILING_EMULATOR wine) ++ ++set(CMAKE_C_COMPILER_TARGET i686-w64-mingw32) ++set(CMAKE_CXX_COMPILER_TARGET i686-w64-mingw32) ++set(CMAKE_RC_COMPILER_TARGET i686-w64-mingw32) ++set(CMAKE_SYSTEM_PROCESSOR i686) ++ ++# Required to propagate 'LLVM_MINGW_ROOT' variables to C compiler feature test. ++set(CMAKE_TRY_COMPILE_PLATFORM_VARIABLES LLVM_MINGW_ROOT) ++ ++if(NOT LLVM_MINGW_ROOT) ++ set(LLVM_MINGW_ROOT $ENV{LLVM_MINGW_ROOT}) ++endif() ++cmake_path(CONVERT "${LLVM_MINGW_ROOT}" TO_CMAKE_PATH_LIST LLVM_MINGW_ROOT) ++ ++set(CMAKE_FIND_ROOT_PATH ${LLVM_MINGW_ROOT} ${LLVM_MINGW_ROOT}/i686-w64-mingw32) ++set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) ++set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ALWAYS) ++set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) ++ ++find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-clang HINTS ${LLVM_MINGW_ROOT}/bin) ++if(NOT C_COMPILER_FULL_PATH) ++ message(FATAL_ERROR "Compiler for ${CMAKE_C_COMPILER_TARGET} not found. Try setting llvm-mingw root path to LLVM_MINGW_ROOT variable!") ++endif() ++set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) ++ ++find_program(CXX_COMPILER_FULL_PATH ${CMAKE_CXX_COMPILER_TARGET}-clang++ HINTS ${LLVM_MINGW_ROOT}/bin) ++if(CXX_COMPILER_FULL_PATH) ++ set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) ++endif() ++ ++find_program(RC_COMPILER_FULL_PATH ${CMAKE_RC_COMPILER_TARGET}-windres HINTS ${LLVM_MINGW_ROOT}/bin) ++if(RC_COMPILER_FULL_PATH) ++ set(CMAKE_RC_COMPILER ${RC_COMPILER_FULL_PATH}) ++endif() ++ ++add_compile_options($<$:-gcodeview>) ++add_link_options(-Wl,-pdb=) +diff --git a/cmake/toolchain-llvm-mingw-x86_64.cmake b/cmake/toolchain-llvm-mingw-x86_64.cmake +new file mode 100644 +index 0000000..e519562 +--- /dev/null ++++ b/cmake/toolchain-llvm-mingw-x86_64.cmake +@@ -0,0 +1,41 @@ ++set(CMAKE_SYSTEM_NAME Windows) ++set(CMAKE_C_COMPILER_FRONTEND_VARIANT GNU) ++set(CMAKE_CROSSCOMPILING TRUE) ++set(CMAKE_CROSSCOMPILING_EMULATOR wine) ++ ++set(CMAKE_C_COMPILER_TARGET x86_64-w64-mingw32) ++set(CMAKE_CXX_COMPILER_TARGET x86_64-w64-mingw32) ++set(CMAKE_RC_COMPILER_TARGET x86_64-w64-mingw32) ++set(CMAKE_SYSTEM_PROCESSOR x86_64) ++ ++# Required to propagate 'LLVM_MINGW_ROOT' variables to C compiler feature test. ++set(CMAKE_TRY_COMPILE_PLATFORM_VARIABLES LLVM_MINGW_ROOT) ++ ++if(NOT LLVM_MINGW_ROOT) ++ set(LLVM_MINGW_ROOT $ENV{LLVM_MINGW_ROOT}) ++endif() ++cmake_path(CONVERT "${LLVM_MINGW_ROOT}" TO_CMAKE_PATH_LIST LLVM_MINGW_ROOT) ++ ++set(CMAKE_FIND_ROOT_PATH ${LLVM_MINGW_ROOT} ${LLVM_MINGW_ROOT}/x86_64-w64-mingw32) ++set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) ++set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ALWAYS) ++set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) ++ ++find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-clang HINTS ${LLVM_MINGW_ROOT}/bin) ++if(NOT C_COMPILER_FULL_PATH) ++ message(FATAL_ERROR "Compiler for ${CMAKE_C_COMPILER_TARGET} not found. Try setting llvm-mingw root path to LLVM_MINGW_ROOT variable!") ++endif() ++set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) ++ ++find_program(CXX_COMPILER_FULL_PATH ${CMAKE_CXX_COMPILER_TARGET}-clang++ HINTS ${LLVM_MINGW_ROOT}/bin) ++if(CXX_COMPILER_FULL_PATH) ++ set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) ++endif() ++ ++find_program(RC_COMPILER_FULL_PATH ${CMAKE_RC_COMPILER_TARGET}-windres HINTS ${LLVM_MINGW_ROOT}/bin) ++if(RC_COMPILER_FULL_PATH) ++ set(CMAKE_RC_COMPILER ${RC_COMPILER_FULL_PATH}) ++endif() ++ ++add_compile_options($<$:-gcodeview>) ++add_link_options(-Wl,-pdb=) +diff --git a/cmake/toolchain-mingw-i686.cmake b/cmake/toolchain-mingw-i686.cmake +index 588ec0e..b95e63f 100644 +--- a/cmake/toolchain-mingw-i686.cmake ++++ b/cmake/toolchain-mingw-i686.cmake +@@ -1,11 +1,8 @@ + set(CMAKE_SYSTEM_NAME Windows) + +-set(CMAKE_C_COMPILER_TARGET i686) +-set(CMAKE_CXX_COMPILER_TARGET i686) +- +-set(CMAKE_C_COMPILER i686-w64-mingw32-gcc) +-set(CMAKE_CXX_COMPILER i686-w64-mingw32-g++) +-set(CMAKE_RC_COMPILER i686-w64-mingw32-windres) ++set(CMAKE_C_COMPILER_TARGET i686-w64-mingw32) ++set(CMAKE_CXX_COMPILER_TARGET i686-w64-mingw32) ++set(CMAKE_RC_COMPILER_TARGET i686-w64-mingw32) + + set(CMAKE_CROSSCOMPILING TRUE) + set(CMAKE_CROSSCOMPILING_EMULATOR wine) +@@ -14,3 +11,25 @@ set(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) ++ ++# Prefer posix gcc variant for gtest pthread support ++find_program(C_COMPILER_FULL_PATH NAMES ++ ${CMAKE_C_COMPILER_TARGET}-gcc-posix ++ ${CMAKE_C_COMPILER_TARGET}-gcc) ++if(NOT C_COMPILER_FULL_PATH) ++ message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") ++endif() ++set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) ++ ++find_program(CXX_COMPILER_FULL_PATH NAMES ++ ${CMAKE_CXX_COMPILER_TARGET}-g++-posix ++ ${CMAKE_CXX_COMPILER_TARGET}-g++) ++if(CXX_COMPILER_FULL_PATH) ++ set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) ++endif() ++ ++find_program(RC_COMPILER_FULL_PATH NAMES ++ ${CMAKE_RC_COMPILER_TARGET}-windres) ++if(RC_COMPILER_FULL_PATH) ++ set(CMAKE_RC_COMPILER ${RC_COMPILER_FULL_PATH}) ++endif() +diff --git a/cmake/toolchain-mingw-x86_64.cmake b/cmake/toolchain-mingw-x86_64.cmake +index c778b72..8c660b0 100644 +--- a/cmake/toolchain-mingw-x86_64.cmake ++++ b/cmake/toolchain-mingw-x86_64.cmake +@@ -1,11 +1,8 @@ + set(CMAKE_SYSTEM_NAME Windows) + +-set(CMAKE_C_COMPILER_TARGET x86_64) +-set(CMAKE_CXX_COMPILER_TARGET x86_64) +- +-set(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc) +-set(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++) +-set(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres) ++set(CMAKE_C_COMPILER_TARGET x86_64-w64-mingw32) ++set(CMAKE_CXX_COMPILER_TARGET x86_64-w64-mingw32) ++set(CMAKE_RC_COMPILER_TARGET x86_64-w64-mingw32) + + set(CMAKE_CROSSCOMPILING TRUE) + set(CMAKE_CROSSCOMPILING_EMULATOR wine) +@@ -14,3 +11,24 @@ set(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) ++ ++# Prefer posix gcc variant for gtest pthread support ++find_program(C_COMPILER_FULL_PATH NAMES ++ ${CMAKE_C_COMPILER_TARGET}-gcc-posix ++ ${CMAKE_C_COMPILER_TARGET}-gcc) ++if(NOT C_COMPILER_FULL_PATH) ++ message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") ++endif() ++set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) ++ ++find_program(CXX_COMPILER_FULL_PATH NAMES ++ ${CMAKE_CXX_COMPILER_TARGET}-g++-posix ++ ${CMAKE_CXX_COMPILER_TARGET}-g++) ++if(CXX_COMPILER_FULL_PATH) ++ set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) ++endif() ++ ++find_program(RC_COMPILER_FULL_PATH NAMES ${CMAKE_RC_COMPILER_TARGET}-windres) ++if(RC_COMPILER_FULL_PATH) ++ set(CMAKE_RC_COMPILER ${RC_COMPILER_FULL_PATH}) ++endif() +diff --git a/cmake/toolchain-mips.cmake b/cmake/toolchain-mips.cmake +new file mode 100644 +index 0000000..69a1025 +--- /dev/null ++++ b/cmake/toolchain-mips.cmake +@@ -0,0 +1,29 @@ ++set(CMAKE_SYSTEM_NAME Linux) ++set(CMAKE_SYSTEM_PROCESSOR mips) ++set(CMAKE_SYSTEM_VERSION 1) ++ ++if(NOT DEFINED CMAKE_C_COMPILER_TARGET) ++ set(CMAKE_C_COMPILER_TARGET mips-linux-gnu) ++endif() ++if(NOT DEFINED CMAKE_CXX_COMPILER_TARGET) ++ set(CMAKE_CXX_COMPILER_TARGET mips-linux-gnu) ++endif() ++ ++set(CMAKE_CROSSCOMPILING TRUE) ++set(CMAKE_CROSSCOMPILING_EMULATOR qemu-mips -L /usr/${CMAKE_C_COMPILER_TARGET}/) ++ ++set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) ++set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) ++set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) ++set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) ++ ++find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) ++if(NOT C_COMPILER_FULL_PATH) ++ message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") ++endif() ++set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) ++ ++find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) ++if(CXX_COMPILER_FULL_PATH) ++ set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) ++endif() +diff --git a/cmake/toolchain-mips64.cmake b/cmake/toolchain-mips64.cmake +new file mode 100644 +index 0000000..8ef3b6b +--- /dev/null ++++ b/cmake/toolchain-mips64.cmake +@@ -0,0 +1,29 @@ ++set(CMAKE_SYSTEM_NAME Linux) ++set(CMAKE_SYSTEM_PROCESSOR mips64) ++set(CMAKE_SYSTEM_VERSION 1) ++ ++if(NOT DEFINED CMAKE_C_COMPILER_TARGET) ++ set(CMAKE_C_COMPILER_TARGET mips64-linux-gnuabi64) ++endif() ++if(NOT DEFINED CMAKE_CXX_COMPILER_TARGET) ++ set(CMAKE_CXX_COMPILER_TARGET mips64-linux-gnuabi64) ++endif() ++ ++set(CMAKE_CROSSCOMPILING TRUE) ++set(CMAKE_CROSSCOMPILING_EMULATOR qemu-mips64 -L /usr/${CMAKE_C_COMPILER_TARGET}/) ++ ++set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) ++set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) ++set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) ++set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) ++ ++find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) ++if(NOT C_COMPILER_FULL_PATH) ++ message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") ++endif() ++set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) ++ ++find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) ++if(CXX_COMPILER_FULL_PATH) ++ set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) ++endif() +diff --git a/cmake/toolchain-powerpc.cmake b/cmake/toolchain-powerpc.cmake +index 4f7f8e9..f097133 100644 +--- a/cmake/toolchain-powerpc.cmake ++++ b/cmake/toolchain-powerpc.cmake +@@ -2,24 +2,24 @@ set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR powerpc) + set(CMAKE_SYSTEM_VERSION 1) + +-set(CMAKE_C_COMPILER_TARGET "powerpc-linux-gnu") +-set(CMAKE_CXX_COMPILER_TARGET "powerpc-linux-gnu") ++set(CMAKE_C_COMPILER_TARGET powerpc-linux-gnu) ++set(CMAKE_CXX_COMPILER_TARGET powerpc-linux-gnu) + + set(CMAKE_CROSSCOMPILING TRUE) +-set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc -L /usr/${CMAKE_C_COMPILER_TARGET}/) ++set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc -cpu 7400 -L /usr/${CMAKE_C_COMPILER_TARGET}/) + + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + +-find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc) ++find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) + if(NOT C_COMPILER_FULL_PATH) +- message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found") ++ message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") + endif() + set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) + +-find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++) ++find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) + if(CXX_COMPILER_FULL_PATH) + set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) + endif() +diff --git a/cmake/toolchain-powerpc64-clang.cmake b/cmake/toolchain-powerpc64-clang.cmake +new file mode 100644 +index 0000000..f986796 +--- /dev/null ++++ b/cmake/toolchain-powerpc64-clang.cmake +@@ -0,0 +1,16 @@ ++set(CMAKE_SYSTEM_NAME Linux) ++set(CMAKE_SYSTEM_PROCESSOR ppc64) ++set(CMAKE_SYSTEM_VERSION 1) ++ ++set(CMAKE_C_COMPILER clang) ++set(CMAKE_C_COMPILER_TARGET powerpc64-linux-gnu) ++set(CMAKE_CXX_COMPILER clang++) ++set(CMAKE_CXX_COMPILER_TARGET powerpc64-linux-gnu) ++ ++set(CMAKE_CROSSCOMPILING TRUE) ++set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc64 -cpu power9 -L /usr/${CMAKE_C_COMPILER_TARGET}/) ++ ++set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) ++set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) ++set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) ++set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) +diff --git a/cmake/toolchain-powerpc64-power9.cmake b/cmake/toolchain-powerpc64-power9.cmake +new file mode 100644 +index 0000000..2ea190a +--- /dev/null ++++ b/cmake/toolchain-powerpc64-power9.cmake +@@ -0,0 +1,25 @@ ++set(CMAKE_SYSTEM_NAME Linux) ++set(CMAKE_SYSTEM_PROCESSOR ppc64) ++set(CMAKE_SYSTEM_VERSION 1) ++ ++set(CMAKE_C_COMPILER_TARGET powerpc64-linux-gnu) ++set(CMAKE_CXX_COMPILER_TARGET powerpc64-linux-gnu) ++ ++set(CMAKE_CROSSCOMPILING TRUE) ++set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc64 -cpu power9 -L /usr/${CMAKE_C_COMPILER_TARGET}/) ++ ++set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) ++set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) ++set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) ++set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) ++ ++find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) ++if(NOT C_COMPILER_FULL_PATH) ++ message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") ++endif() ++set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) ++ ++find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) ++if(CXX_COMPILER_FULL_PATH) ++ set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) ++endif() +diff --git a/cmake/toolchain-powerpc64.cmake b/cmake/toolchain-powerpc64.cmake +index 4be3bbd..80d8b90 100644 +--- a/cmake/toolchain-powerpc64.cmake ++++ b/cmake/toolchain-powerpc64.cmake +@@ -2,24 +2,24 @@ set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR ppc64) + set(CMAKE_SYSTEM_VERSION 1) + +-set(CMAKE_C_COMPILER_TARGET "powerpc64-linux-gnu") +-set(CMAKE_CXX_COMPILER_TARGET "powerpc64-linux-gnu") ++set(CMAKE_C_COMPILER_TARGET powerpc64-linux-gnu) ++set(CMAKE_CXX_COMPILER_TARGET powerpc64-linux-gnu) + + set(CMAKE_CROSSCOMPILING TRUE) +-set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc64 -L /usr/${CMAKE_C_COMPILER_TARGET}/) ++set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc64 -cpu power8 -L /usr/${CMAKE_C_COMPILER_TARGET}/) + + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + +-find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc) ++find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) + if(NOT C_COMPILER_FULL_PATH) +- message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found") ++ message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") + endif() + set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) + +-find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++) ++find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) + if(CXX_COMPILER_FULL_PATH) + set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) + endif() +diff --git a/cmake/toolchain-powerpc64le-clang.cmake b/cmake/toolchain-powerpc64le-clang.cmake +new file mode 100644 +index 0000000..b3423c5 +--- /dev/null ++++ b/cmake/toolchain-powerpc64le-clang.cmake +@@ -0,0 +1,16 @@ ++set(CMAKE_SYSTEM_NAME Linux) ++set(CMAKE_SYSTEM_PROCESSOR ppc64le) ++set(CMAKE_SYSTEM_VERSION 1) ++ ++set(CMAKE_C_COMPILER clang) ++set(CMAKE_C_COMPILER_TARGET powerpc64le-linux-gnu) ++set(CMAKE_CXX_COMPILER clang++) ++set(CMAKE_CXX_COMPILER_TARGET powerpc64le-linux-gnu) ++ ++set(CMAKE_CROSSCOMPILING TRUE) ++set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc64le -cpu power9 -L /usr/${CMAKE_C_COMPILER_TARGET}/) ++ ++set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) ++set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) ++set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) ++set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) +diff --git a/cmake/toolchain-powerpc64le-power9.cmake b/cmake/toolchain-powerpc64le-power9.cmake +new file mode 100644 +index 0000000..5ac8c7a +--- /dev/null ++++ b/cmake/toolchain-powerpc64le-power9.cmake +@@ -0,0 +1,25 @@ ++set(CMAKE_SYSTEM_NAME Linux) ++set(CMAKE_SYSTEM_PROCESSOR ppc64le) ++set(CMAKE_SYSTEM_VERSION 1) ++ ++set(CMAKE_C_COMPILER_TARGET powerpc64le-linux-gnu) ++set(CMAKE_CXX_COMPILER_TARGET powerpc64le-linux-gnu) ++ ++set(CMAKE_CROSSCOMPILING TRUE) ++set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc64le -cpu power9 -L /usr/${CMAKE_C_COMPILER_TARGET}/) ++ ++set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) ++set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) ++set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) ++set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) ++ ++find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) ++if(NOT C_COMPILER_FULL_PATH) ++ message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") ++endif() ++set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) ++ ++find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) ++if(CXX_COMPILER_FULL_PATH) ++ set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) ++endif() +diff --git a/cmake/toolchain-powerpc64le.cmake b/cmake/toolchain-powerpc64le.cmake +index 5535f61..68381de 100644 +--- a/cmake/toolchain-powerpc64le.cmake ++++ b/cmake/toolchain-powerpc64le.cmake +@@ -2,24 +2,24 @@ set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR ppc64le) + set(CMAKE_SYSTEM_VERSION 1) + +-set(CMAKE_C_COMPILER_TARGET "powerpc64le-linux-gnu") +-set(CMAKE_CXX_COMPILER_TARGET "powerpc64le-linux-gnu") ++set(CMAKE_C_COMPILER_TARGET powerpc64le-linux-gnu) ++set(CMAKE_CXX_COMPILER_TARGET powerpc64le-linux-gnu) + + set(CMAKE_CROSSCOMPILING TRUE) +-set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc64le -L /usr/${CMAKE_C_COMPILER_TARGET}/) ++set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc64le -cpu power8 -L /usr/${CMAKE_C_COMPILER_TARGET}/) + + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + +-find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc) ++find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) + if(NOT C_COMPILER_FULL_PATH) +- message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found") ++ message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") + endif() + set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) + +-find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++) ++find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) + if(CXX_COMPILER_FULL_PATH) + set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) + endif() +diff --git a/cmake/toolchain-riscv.cmake b/cmake/toolchain-riscv.cmake +new file mode 100644 +index 0000000..9cf8fdb +--- /dev/null ++++ b/cmake/toolchain-riscv.cmake +@@ -0,0 +1,28 @@ ++set(CMAKE_CROSSCOMPILING TRUE) ++set(CMAKE_SYSTEM_NAME "Linux") ++set(CMAKE_SYSTEM_PROCESSOR "riscv64") ++ ++# Avoid to use system path for cross-compile ++set(CMAKE_FIND_USE_CMAKE_SYSTEM_PATH FALSE) ++ ++set(TOOLCHAIN_PATH "" CACHE STRING "The toolchain path.") ++if(NOT TOOLCHAIN_PATH) ++ set(TOOLCHAIN_PATH ${CMAKE_SOURCE_DIR}/prebuilt-riscv-toolchain-qemu/riscv-clang) ++endif() ++ ++set(TOOLCHAIN_PREFIX "riscv64-unknown-linux-gnu-" CACHE STRING "The toolchain prefix.") ++set(QEMU_PATH "" CACHE STRING "The qemu path.") ++if(NOT QEMU_PATH) ++ set(QEMU_PATH ${CMAKE_SOURCE_DIR}/prebuilt-riscv-toolchain-qemu/riscv-qemu/bin/qemu-riscv64) ++endif() ++ ++# toolchain setting ++set(CMAKE_C_COMPILER "${TOOLCHAIN_PATH}/bin/${TOOLCHAIN_PREFIX}clang") ++set(CMAKE_CXX_COMPILER "${TOOLCHAIN_PATH}/bin/${TOOLCHAIN_PREFIX}clang++") ++ ++# disable auto-vectorizer ++add_compile_options(-fno-vectorize -fno-slp-vectorize) ++ ++# emulator setting ++set(QEMU_CPU_OPTION "rv64,zba=true,zbb=true,zbc=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0") ++set(CMAKE_CROSSCOMPILING_EMULATOR ${QEMU_PATH} -cpu ${QEMU_CPU_OPTION} -L ${TOOLCHAIN_PATH}/sysroot/) +diff --git a/cmake/toolchain-s390x.cmake b/cmake/toolchain-s390x.cmake +index 41bc0d1..9455a2b 100644 +--- a/cmake/toolchain-s390x.cmake ++++ b/cmake/toolchain-s390x.cmake +@@ -2,8 +2,8 @@ set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR s390x) + set(CMAKE_SYSTEM_VERSION 1) + +-set(CMAKE_C_COMPILER_TARGET "s390x-linux-gnu") +-set(CMAKE_CXX_COMPILER_TARGET "s390x-linux-gnu") ++set(CMAKE_C_COMPILER_TARGET s390x-linux-gnu) ++set(CMAKE_CXX_COMPILER_TARGET s390x-linux-gnu) + + set(CMAKE_CROSSCOMPILING TRUE) + set(CMAKE_CROSSCOMPILING_EMULATOR qemu-s390x -L /usr/${CMAKE_C_COMPILER_TARGET}/) +@@ -13,13 +13,13 @@ set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + +-find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc) ++find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) + if(NOT C_COMPILER_FULL_PATH) +- message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found") ++ message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") + endif() + set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) + +-find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++) ++find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) + if(CXX_COMPILER_FULL_PATH) + set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) + endif() +diff --git a/cmake/toolchain-sparc64.cmake b/cmake/toolchain-sparc64.cmake +index f0cd995..16161a7 100644 +--- a/cmake/toolchain-sparc64.cmake ++++ b/cmake/toolchain-sparc64.cmake +@@ -2,8 +2,8 @@ set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR sparc64) + set(CMAKE_SYSTEM_VERSION 1) + +-set(CMAKE_C_COMPILER_TARGET "sparc64-linux-gnu") +-set(CMAKE_CXX_COMPILER_TARGET "sparc64-linux-gnu") ++set(CMAKE_C_COMPILER_TARGET sparc64-linux-gnu) ++set(CMAKE_CXX_COMPILER_TARGET sparc64-linux-gnu) + + set(CMAKE_CROSSCOMPILING TRUE) + set(CMAKE_CROSSCOMPILING_EMULATOR qemu-sparc64 -L /usr/${CMAKE_C_COMPILER_TARGET}/) +@@ -13,13 +13,13 @@ set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + +-find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc) ++find_program(C_COMPILER_FULL_PATH NAMES ${CMAKE_C_COMPILER_TARGET}-gcc) + if(NOT C_COMPILER_FULL_PATH) +- message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found") ++ message(FATAL_ERROR "Cross-compiler for ${CMAKE_C_COMPILER_TARGET} not found") + endif() + set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH}) + +-find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++) ++find_program(CXX_COMPILER_FULL_PATH NAMES g++-${CMAKE_CXX_COMPILER_TARGET} ${CMAKE_CXX_COMPILER_TARGET}-g++) + if(CXX_COMPILER_FULL_PATH) + set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH}) + endif() +diff --git a/compare256.c b/compare256.c +new file mode 100644 +index 0000000..82551cd +--- /dev/null ++++ b/compare256.c +@@ -0,0 +1,180 @@ ++/* compare256.c -- 256 byte memory comparison with match length return ++ * Copyright (C) 2020 Nathan Moinvaziri ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zbuild.h" ++#include "zutil_p.h" ++#include "fallback_builtins.h" ++ ++/* ALIGNED, byte comparison */ ++static inline uint32_t compare256_c_static(const uint8_t *src0, const uint8_t *src1) { ++ uint32_t len = 0; ++ ++ do { ++ if (*src0 != *src1) ++ return len; ++ src0 += 1, src1 += 1, len += 1; ++ if (*src0 != *src1) ++ return len; ++ src0 += 1, src1 += 1, len += 1; ++ if (*src0 != *src1) ++ return len; ++ src0 += 1, src1 += 1, len += 1; ++ if (*src0 != *src1) ++ return len; ++ src0 += 1, src1 += 1, len += 1; ++ if (*src0 != *src1) ++ return len; ++ src0 += 1, src1 += 1, len += 1; ++ if (*src0 != *src1) ++ return len; ++ src0 += 1, src1 += 1, len += 1; ++ if (*src0 != *src1) ++ return len; ++ src0 += 1, src1 += 1, len += 1; ++ if (*src0 != *src1) ++ return len; ++ src0 += 1, src1 += 1, len += 1; ++ } while (len < 256); ++ ++ return 256; ++} ++ ++Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) { ++ return compare256_c_static(src0, src1); ++} ++ ++#define LONGEST_MATCH longest_match_c ++#define COMPARE256 compare256_c_static ++ ++#include "match_tpl.h" ++ ++#define LONGEST_MATCH_SLOW ++#define LONGEST_MATCH longest_match_slow_c ++#define COMPARE256 compare256_c_static ++ ++#include "match_tpl.h" ++ ++#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN ++/* 16-bit unaligned integer comparison */ ++static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint8_t *src1) { ++ uint32_t len = 0; ++ ++ do { ++ if (zng_memcmp_2(src0, src1) != 0) ++ return len + (*src0 == *src1); ++ src0 += 2, src1 += 2, len += 2; ++ ++ if (zng_memcmp_2(src0, src1) != 0) ++ return len + (*src0 == *src1); ++ src0 += 2, src1 += 2, len += 2; ++ ++ if (zng_memcmp_2(src0, src1) != 0) ++ return len + (*src0 == *src1); ++ src0 += 2, src1 += 2, len += 2; ++ ++ if (zng_memcmp_2(src0, src1) != 0) ++ return len + (*src0 == *src1); ++ src0 += 2, src1 += 2, len += 2; ++ } while (len < 256); ++ ++ return 256; ++} ++ ++Z_INTERNAL uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1) { ++ return compare256_unaligned_16_static(src0, src1); ++} ++ ++#define LONGEST_MATCH longest_match_unaligned_16 ++#define COMPARE256 compare256_unaligned_16_static ++ ++#include "match_tpl.h" ++ ++#define LONGEST_MATCH_SLOW ++#define LONGEST_MATCH longest_match_slow_unaligned_16 ++#define COMPARE256 compare256_unaligned_16_static ++ ++#include "match_tpl.h" ++ ++#ifdef HAVE_BUILTIN_CTZ ++/* 32-bit unaligned integer comparison */ ++static inline uint32_t compare256_unaligned_32_static(const uint8_t *src0, const uint8_t *src1) { ++ uint32_t len = 0; ++ ++ do { ++ uint32_t sv, mv, diff; ++ ++ memcpy(&sv, src0, sizeof(sv)); ++ memcpy(&mv, src1, sizeof(mv)); ++ ++ diff = sv ^ mv; ++ if (diff) { ++ uint32_t match_byte = __builtin_ctz(diff) / 8; ++ return len + match_byte; ++ } ++ ++ src0 += 4, src1 += 4, len += 4; ++ } while (len < 256); ++ ++ return 256; ++} ++ ++Z_INTERNAL uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1) { ++ return compare256_unaligned_32_static(src0, src1); ++} ++ ++#define LONGEST_MATCH longest_match_unaligned_32 ++#define COMPARE256 compare256_unaligned_32_static ++ ++#include "match_tpl.h" ++ ++#define LONGEST_MATCH_SLOW ++#define LONGEST_MATCH longest_match_slow_unaligned_32 ++#define COMPARE256 compare256_unaligned_32_static ++ ++#include "match_tpl.h" ++ ++#endif ++ ++#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) ++/* UNALIGNED64_OK, 64-bit integer comparison */ ++static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const uint8_t *src1) { ++ uint32_t len = 0; ++ ++ do { ++ uint64_t sv, mv, diff; ++ ++ memcpy(&sv, src0, sizeof(sv)); ++ memcpy(&mv, src1, sizeof(mv)); ++ ++ diff = sv ^ mv; ++ if (diff) { ++ uint64_t match_byte = __builtin_ctzll(diff) / 8; ++ return len + (uint32_t)match_byte; ++ } ++ ++ src0 += 8, src1 += 8, len += 8; ++ } while (len < 256); ++ ++ return 256; ++} ++ ++Z_INTERNAL uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1) { ++ return compare256_unaligned_64_static(src0, src1); ++} ++ ++#define LONGEST_MATCH longest_match_unaligned_64 ++#define COMPARE256 compare256_unaligned_64_static ++ ++#include "match_tpl.h" ++ ++#define LONGEST_MATCH_SLOW ++#define LONGEST_MATCH longest_match_slow_unaligned_64 ++#define COMPARE256 compare256_unaligned_64_static ++ ++#include "match_tpl.h" ++ ++#endif ++ ++#endif +diff --git a/compare256_rle.h b/compare256_rle.h +new file mode 100644 +index 0000000..0f3998d +--- /dev/null ++++ b/compare256_rle.h +@@ -0,0 +1,134 @@ ++/* compare256_rle.h -- 256 byte run-length encoding comparison ++ * Copyright (C) 2022 Nathan Moinvaziri ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zbuild.h" ++#include "fallback_builtins.h" ++ ++typedef uint32_t (*compare256_rle_func)(const uint8_t* src0, const uint8_t* src1); ++ ++/* ALIGNED, byte comparison */ ++static inline uint32_t compare256_rle_c(const uint8_t *src0, const uint8_t *src1) { ++ uint32_t len = 0; ++ ++ do { ++ if (*src0 != *src1) ++ return len; ++ src1 += 1, len += 1; ++ if (*src0 != *src1) ++ return len; ++ src1 += 1, len += 1; ++ if (*src0 != *src1) ++ return len; ++ src1 += 1, len += 1; ++ if (*src0 != *src1) ++ return len; ++ src1 += 1, len += 1; ++ if (*src0 != *src1) ++ return len; ++ src1 += 1, len += 1; ++ if (*src0 != *src1) ++ return len; ++ src1 += 1, len += 1; ++ if (*src0 != *src1) ++ return len; ++ src1 += 1, len += 1; ++ if (*src0 != *src1) ++ return len; ++ src1 += 1, len += 1; ++ } while (len < 256); ++ ++ return 256; ++} ++ ++#ifdef UNALIGNED_OK ++/* 16-bit unaligned integer comparison */ ++static inline uint32_t compare256_rle_unaligned_16(const uint8_t *src0, const uint8_t *src1) { ++ uint32_t len = 0; ++ uint16_t src0_cmp, src1_cmp; ++ ++ memcpy(&src0_cmp, src0, sizeof(src0_cmp)); ++ ++ do { ++ memcpy(&src1_cmp, src1, sizeof(src1_cmp)); ++ if (src0_cmp != src1_cmp) ++ return len + (*src0 == *src1); ++ src1 += 2, len += 2; ++ memcpy(&src1_cmp, src1, sizeof(src1_cmp)); ++ if (src0_cmp != src1_cmp) ++ return len + (*src0 == *src1); ++ src1 += 2, len += 2; ++ memcpy(&src1_cmp, src1, sizeof(src1_cmp)); ++ if (src0_cmp != src1_cmp) ++ return len + (*src0 == *src1); ++ src1 += 2, len += 2; ++ memcpy(&src1_cmp, src1, sizeof(src1_cmp)); ++ if (src0_cmp != src1_cmp) ++ return len + (*src0 == *src1); ++ src1 += 2, len += 2; ++ } while (len < 256); ++ ++ return 256; ++} ++ ++#ifdef HAVE_BUILTIN_CTZ ++/* 32-bit unaligned integer comparison */ ++static inline uint32_t compare256_rle_unaligned_32(const uint8_t *src0, const uint8_t *src1) { ++ uint32_t sv, len = 0; ++ uint16_t src0_cmp; ++ ++ memcpy(&src0_cmp, src0, sizeof(src0_cmp)); ++ sv = ((uint32_t)src0_cmp << 16) | src0_cmp; ++ ++ do { ++ uint32_t mv, diff; ++ ++ memcpy(&mv, src1, sizeof(mv)); ++ ++ diff = sv ^ mv; ++ if (diff) { ++ uint32_t match_byte = __builtin_ctz(diff) / 8; ++ return len + match_byte; ++ } ++ ++ src1 += 4, len += 4; ++ } while (len < 256); ++ ++ return 256; ++} ++ ++#endif ++ ++#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) ++/* 64-bit unaligned integer comparison */ ++static inline uint32_t compare256_rle_unaligned_64(const uint8_t *src0, const uint8_t *src1) { ++ uint32_t src0_cmp32, len = 0; ++ uint16_t src0_cmp; ++ uint64_t sv; ++ ++ memcpy(&src0_cmp, src0, sizeof(src0_cmp)); ++ src0_cmp32 = ((uint32_t)src0_cmp << 16) | src0_cmp; ++ sv = ((uint64_t)src0_cmp32 << 32) | src0_cmp32; ++ ++ do { ++ uint64_t mv, diff; ++ ++ memcpy(&mv, src1, sizeof(mv)); ++ ++ diff = sv ^ mv; ++ if (diff) { ++ uint64_t match_byte = __builtin_ctzll(diff) / 8; ++ return len + (uint32_t)match_byte; ++ } ++ ++ src1 += 8, len += 8; ++ } while (len < 256); ++ ++ return 256; ++} ++ ++#endif ++ ++#endif ++ +diff --git a/compress.c b/compress.c +index fded2a4..66118e4 100644 +--- a/compress.c ++++ b/compress.c +@@ -5,11 +5,6 @@ + + #include "zbuild.h" + #include "zutil.h" +-#if defined(ZLIB_COMPAT) +-# include "zlib.h" +-#else +-# include "zlib-ng.h" +-#endif + + /* =========================================================================== + * Architecture-specific hooks. +@@ -33,8 +28,8 @@ + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. + */ +-int Z_EXPORT PREFIX(compress2)(unsigned char *dest, z_size_t *destLen, const unsigned char *source, +- z_size_t sourceLen, int level) { ++int Z_EXPORT PREFIX(compress2)(unsigned char *dest, z_uintmax_t *destLen, const unsigned char *source, ++ z_uintmax_t sourceLen, int level) { + PREFIX3(stream) stream; + int err; + const unsigned int max = (unsigned int)-1; +@@ -68,14 +63,14 @@ int Z_EXPORT PREFIX(compress2)(unsigned char *dest, z_size_t *destLen, const uns + err = PREFIX(deflate)(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH); + } while (err == Z_OK); + +- *destLen = (z_size_t)stream.total_out; ++ *destLen = stream.total_out; + PREFIX(deflateEnd)(&stream); + return err == Z_STREAM_END ? Z_OK : err; + } + + /* =========================================================================== + */ +-int Z_EXPORT PREFIX(compress)(unsigned char *dest, z_size_t *destLen, const unsigned char *source, z_size_t sourceLen) { ++int Z_EXPORT PREFIX(compress)(unsigned char *dest, z_uintmax_t *destLen, const unsigned char *source, z_uintmax_t sourceLen) { + return PREFIX(compress2)(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); + } + +@@ -83,8 +78,8 @@ int Z_EXPORT PREFIX(compress)(unsigned char *dest, z_size_t *destLen, const unsi + If the default memLevel or windowBits for deflateInit() is changed, then + this function needs to be updated. + */ +-z_size_t Z_EXPORT PREFIX(compressBound)(z_size_t sourceLen) { +- z_size_t complen = DEFLATE_BOUND_COMPLEN(sourceLen); ++z_uintmax_t Z_EXPORT PREFIX(compressBound)(z_uintmax_t sourceLen) { ++ z_uintmax_t complen = DEFLATE_BOUND_COMPLEN(sourceLen); + + if (complen > 0) + /* Architecture-specific code provided an upper bound. */ +@@ -92,6 +87,8 @@ z_size_t Z_EXPORT PREFIX(compressBound)(z_size_t sourceLen) { + + #ifndef NO_QUICK_STRATEGY + return sourceLen /* The source size itself */ ++ + (sourceLen == 0 ? 1 : 0) /* Always at least one byte for any input */ ++ + (sourceLen < 9 ? 1 : 0) /* One extra byte for lengths less than 9 */ + + DEFLATE_QUICK_OVERHEAD(sourceLen) /* Source encoding overhead, padded to next full byte */ + + DEFLATE_BLOCK_OVERHEAD /* Deflate block overhead bytes */ + + ZLIB_WRAPLEN; /* zlib wrapper */ +diff --git a/cpu_features.c b/cpu_features.c +new file mode 100644 +index 0000000..3585172 +--- /dev/null ++++ b/cpu_features.c +@@ -0,0 +1,23 @@ ++/* cpu_features.c -- CPU architecture feature check ++ * Copyright (C) 2017 Hans Kristian Rosbach ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zbuild.h" ++#include "cpu_features.h" ++#include ++ ++Z_INTERNAL void cpu_check_features(struct cpu_features *features) { ++ memset(features, 0, sizeof(struct cpu_features)); ++#if defined(X86_FEATURES) ++ x86_check_features(&features->x86); ++#elif defined(ARM_FEATURES) ++ arm_check_features(&features->arm); ++#elif defined(PPC_FEATURES) || defined(POWER_FEATURES) ++ power_check_features(&features->power); ++#elif defined(S390_FEATURES) ++ s390_check_features(&features->s390); ++#elif defined(RISCV_FEATURES) ++ riscv_check_features(&features->riscv); ++#endif ++} +diff --git a/cpu_features.h b/cpu_features.h +new file mode 100644 +index 0000000..00fa6c7 +--- /dev/null ++++ b/cpu_features.h +@@ -0,0 +1,303 @@ ++/* cpu_features.h -- CPU architecture feature check ++ * Copyright (C) 2017 Hans Kristian Rosbach ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#ifndef CPU_FEATURES_H_ ++#define CPU_FEATURES_H_ ++ ++#include "adler32_fold.h" ++#include "crc32_fold.h" ++ ++#if defined(X86_FEATURES) ++# include "arch/x86/x86_features.h" ++# include "fallback_builtins.h" ++#elif defined(ARM_FEATURES) ++# include "arch/arm/arm_features.h" ++#elif defined(PPC_FEATURES) || defined(POWER_FEATURES) ++# include "arch/power/power_features.h" ++#elif defined(S390_FEATURES) ++# include "arch/s390/s390_features.h" ++#elif defined(RISCV_FEATURES) ++# include "arch/riscv/riscv_features.h" ++#endif ++ ++struct cpu_features { ++#if defined(X86_FEATURES) ++ struct x86_cpu_features x86; ++#elif defined(ARM_FEATURES) ++ struct arm_cpu_features arm; ++#elif defined(PPC_FEATURES) || defined(POWER_FEATURES) ++ struct power_cpu_features power; ++#elif defined(S390_FEATURES) ++ struct s390_cpu_features s390; ++#elif defined(RISCV_FEATURES) ++ struct riscv_cpu_features riscv; ++#else ++ char empty; ++#endif ++}; ++ ++extern void cpu_check_features(struct cpu_features *features); ++ ++/* adler32 */ ++typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len); ++ ++extern uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len); ++#ifdef ARM_NEON ++extern uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len); ++#endif ++#ifdef PPC_VMX ++extern uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len); ++#endif ++#ifdef RISCV_RVV ++extern uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len); ++#endif ++#ifdef X86_SSSE3 ++extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len); ++#endif ++#ifdef X86_AVX2 ++extern uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len); ++#endif ++#ifdef X86_AVX512 ++extern uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len); ++#endif ++#ifdef X86_AVX512VNNI ++extern uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len); ++#endif ++#ifdef POWER8_VSX ++extern uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len); ++#endif ++ ++/* adler32 folding */ ++#ifdef RISCV_RVV ++extern uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); ++#endif ++#ifdef X86_SSE42 ++extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); ++#endif ++#ifdef X86_AVX2 ++extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); ++#endif ++#ifdef X86_AVX512 ++extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); ++#endif ++#ifdef X86_AVX512VNNI ++extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); ++#endif ++ ++/* CRC32 folding */ ++#ifdef X86_PCLMULQDQ_CRC ++extern uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc); ++extern void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); ++extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); ++extern uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc); ++extern uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); ++#endif ++#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC) ++extern uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc); ++extern void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); ++extern void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); ++extern uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc); ++extern uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); ++#endif ++ ++/* memory chunking */ ++extern uint32_t chunksize_c(void); ++extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left); ++#ifdef X86_SSE2 ++extern uint32_t chunksize_sse2(void); ++extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left); ++#endif ++#ifdef X86_SSSE3 ++extern uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left); ++#endif ++#ifdef X86_AVX2 ++extern uint32_t chunksize_avx2(void); ++extern uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left); ++#endif ++#ifdef ARM_NEON ++extern uint32_t chunksize_neon(void); ++extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left); ++#endif ++#ifdef POWER8_VSX ++extern uint32_t chunksize_power8(void); ++extern uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left); ++#endif ++#ifdef RISCV_RVV ++extern uint32_t chunksize_rvv(void); ++extern uint8_t* chunkmemset_safe_rvv(uint8_t *out, unsigned dist, unsigned len, unsigned left); ++#endif ++ ++#ifdef ZLIB_COMPAT ++typedef struct z_stream_s z_stream; ++#else ++typedef struct zng_stream_s zng_stream; ++#endif ++ ++/* inflate fast loop */ ++extern void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start); ++#ifdef X86_SSE2 ++extern void inflate_fast_sse2(PREFIX3(stream) *strm, uint32_t start); ++#endif ++#ifdef X86_SSSE3 ++extern void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start); ++#endif ++#ifdef X86_AVX2 ++extern void inflate_fast_avx2(PREFIX3(stream) *strm, uint32_t start); ++#endif ++#ifdef ARM_NEON ++extern void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start); ++#endif ++#ifdef POWER8_VSX ++extern void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start); ++#endif ++#ifdef RISCV_RVV ++extern void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start); ++#endif ++ ++/* CRC32 */ ++typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len); ++ ++extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len); ++#ifdef ARM_ACLE ++extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len); ++#elif defined(POWER8_VSX) ++extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len); ++#elif defined(S390_CRC32_VX) ++extern uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len); ++#endif ++ ++/* compare256 */ ++typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1); ++ ++extern uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1); ++#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN ++extern uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1); ++#ifdef HAVE_BUILTIN_CTZ ++extern uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1); ++#endif ++#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) ++extern uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1); ++#endif ++#endif ++#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) ++extern uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1); ++#endif ++#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) ++extern uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1); ++#endif ++#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) ++extern uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1); ++#endif ++#ifdef POWER9 ++extern uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1); ++#endif ++#ifdef RISCV_RVV ++extern uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1); ++#endif ++ ++#ifdef DEFLATE_H_ ++/* insert_string */ ++extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count); ++#ifdef X86_SSE42 ++extern void insert_string_sse42(deflate_state *const s, const uint32_t str, uint32_t count); ++#elif defined(ARM_ACLE) ++extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count); ++#endif ++ ++/* longest_match */ ++extern uint32_t longest_match_c(deflate_state *const s, Pos cur_match); ++#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN ++extern uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match); ++#ifdef HAVE_BUILTIN_CTZ ++extern uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match); ++#endif ++#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) ++extern uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match); ++#endif ++#endif ++#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) ++extern uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match); ++#endif ++#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) ++extern uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match); ++#endif ++#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) ++extern uint32_t longest_match_neon(deflate_state *const s, Pos cur_match); ++#endif ++#ifdef POWER9 ++extern uint32_t longest_match_power9(deflate_state *const s, Pos cur_match); ++#endif ++#ifdef RISCV_RVV ++extern uint32_t longest_match_rvv(deflate_state *const s, Pos cur_match); ++#endif ++ ++/* longest_match_slow */ ++extern uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match); ++#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN ++extern uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match); ++extern uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match); ++#ifdef UNALIGNED64_OK ++extern uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match); ++#endif ++#endif ++#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) ++extern uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match); ++#endif ++#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) ++extern uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match); ++#endif ++#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) ++extern uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match); ++#endif ++#ifdef POWER9 ++extern uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match); ++#endif ++#ifdef RISCV_RVV ++extern uint32_t longest_match_slow_rvv(deflate_state *const s, Pos cur_match); ++#endif ++ ++/* quick_insert_string */ ++extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str); ++#ifdef X86_SSE42 ++extern Pos quick_insert_string_sse42(deflate_state *const s, const uint32_t str); ++#elif defined(ARM_ACLE) ++extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str); ++#endif ++ ++/* slide_hash */ ++typedef void (*slide_hash_func)(deflate_state *s); ++ ++#ifdef X86_SSE2 ++extern void slide_hash_sse2(deflate_state *s); ++#endif ++#if defined(ARM_SIMD) ++extern void slide_hash_armv6(deflate_state *s); ++#endif ++#if defined(ARM_NEON) ++extern void slide_hash_neon(deflate_state *s); ++#endif ++#if defined(PPC_VMX) ++extern void slide_hash_vmx(deflate_state *s); ++#endif ++#if defined(POWER8_VSX) ++extern void slide_hash_power8(deflate_state *s); ++#endif ++#if defined(RISCV_RVV) ++extern void slide_hash_rvv(deflate_state *s); ++#endif ++#ifdef X86_AVX2 ++extern void slide_hash_avx2(deflate_state *s); ++#endif ++ ++/* update_hash */ ++extern uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val); ++#ifdef X86_SSE42 ++extern uint32_t update_hash_sse42(deflate_state *const s, uint32_t h, uint32_t val); ++#elif defined(ARM_ACLE) ++extern uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val); ++#endif ++#endif ++ ++#endif +diff --git a/crc32_braid.c b/crc32_braid.c +new file mode 100644 +index 0000000..96754b5 +--- /dev/null ++++ b/crc32_braid.c +@@ -0,0 +1,267 @@ ++/* crc32_braid.c -- compute the CRC-32 of a data stream ++ * Copyright (C) 1995-2022 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ * ++ * This interleaved implementation of a CRC makes use of pipelined multiple ++ * arithmetic-logic units, commonly found in modern CPU cores. It is due to ++ * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. ++ */ ++ ++#include "zbuild.h" ++#include "zutil.h" ++#include "functable.h" ++#include "crc32_braid_p.h" ++#include "crc32_braid_tbl.h" ++ ++/* ========================================================================= */ ++ ++const uint32_t * Z_EXPORT PREFIX(get_crc_table)(void) { ++ return (const uint32_t *)crc_table; ++} ++ ++#ifdef ZLIB_COMPAT ++unsigned long Z_EXPORT PREFIX(crc32_z)(unsigned long crc, const unsigned char *buf, size_t len) { ++ if (buf == NULL) return 0; ++ ++ return (unsigned long)functable.crc32((uint32_t)crc, buf, len); ++} ++#else ++uint32_t Z_EXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) { ++ if (buf == NULL) return 0; ++ ++ return functable.crc32(crc, buf, len); ++} ++#endif ++ ++#ifdef ZLIB_COMPAT ++unsigned long Z_EXPORT PREFIX(crc32)(unsigned long crc, const unsigned char *buf, unsigned int len) { ++ return (unsigned long)PREFIX(crc32_z)((uint32_t)crc, buf, len); ++} ++#else ++uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) { ++ return PREFIX(crc32_z)(crc, buf, len); ++} ++#endif ++ ++/* ========================================================================= */ ++ ++/* ++ A CRC of a message is computed on N braids of words in the message, where ++ each word consists of W bytes (4 or 8). If N is 3, for example, then three ++ running sparse CRCs are calculated respectively on each braid, at these ++ indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... ++ This is done starting at a word boundary, and continues until as many blocks ++ of N * W bytes as are available have been processed. The results are combined ++ into a single CRC at the end. For this code, N must be in the range 1..6 and ++ W must be 4 or 8. The upper limit on N can be increased if desired by adding ++ more #if blocks, extending the patterns apparent in the code. In addition, ++ crc32 tables would need to be regenerated, if the maximum N value is increased. ++ ++ N and W are chosen empirically by benchmarking the execution time on a given ++ processor. The choices for N and W below were based on testing on Intel Kaby ++ Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 ++ Octeon II processors. The Intel, AMD, and ARM processors were all fastest ++ with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. ++ They were all tested with either gcc or clang, all using the -O3 optimization ++ level. Your mileage may vary. ++*/ ++ ++/* ========================================================================= */ ++ ++#if BYTE_ORDER == LITTLE_ENDIAN ++# define ZSWAPWORD(word) (word) ++# define BRAID_TABLE crc_braid_table ++#elif BYTE_ORDER == BIG_ENDIAN ++# if W == 8 ++# define ZSWAPWORD(word) ZSWAP64(word) ++# elif W == 4 ++# define ZSWAPWORD(word) ZSWAP32(word) ++# endif ++# define BRAID_TABLE crc_braid_big_table ++#else ++# error "No endian defined" ++#endif ++#define DO1 c = crc_table[(c ^ *buf++) & 0xff] ^ (c >> 8) ++#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 ++ ++/* ========================================================================= */ ++#ifdef W ++/* ++ Return the CRC of the W bytes in the word_t data, taking the ++ least-significant byte of the word as the first byte of data, without any pre ++ or post conditioning. This is used to combine the CRCs of each braid. ++ */ ++#if BYTE_ORDER == LITTLE_ENDIAN ++static uint32_t crc_word(z_word_t data) { ++ int k; ++ for (k = 0; k < W; k++) ++ data = (data >> 8) ^ crc_table[data & 0xff]; ++ return (uint32_t)data; ++} ++#elif BYTE_ORDER == BIG_ENDIAN ++static z_word_t crc_word(z_word_t data) { ++ int k; ++ for (k = 0; k < W; k++) ++ data = (data << 8) ^ ++ crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; ++ return data; ++} ++#endif /* BYTE_ORDER */ ++ ++#endif /* W */ ++ ++/* ========================================================================= */ ++Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len) { ++ Z_REGISTER uint32_t c; ++ ++ /* Pre-condition the CRC */ ++ c = (~crc) & 0xffffffff; ++ ++#ifdef W ++ /* If provided enough bytes, do a braided CRC calculation. */ ++ if (len >= N * W + W - 1) { ++ size_t blks; ++ z_word_t const *words; ++ int k; ++ ++ /* Compute the CRC up to a z_word_t boundary. */ ++ while (len && ((uintptr_t)buf & (W - 1)) != 0) { ++ len--; ++ DO1; ++ } ++ ++ /* Compute the CRC on as many N z_word_t blocks as are available. */ ++ blks = len / (N * W); ++ len -= blks * N * W; ++ words = (z_word_t const *)buf; ++ ++ z_word_t crc0, word0, comb; ++#if N > 1 ++ z_word_t crc1, word1; ++#if N > 2 ++ z_word_t crc2, word2; ++#if N > 3 ++ z_word_t crc3, word3; ++#if N > 4 ++ z_word_t crc4, word4; ++#if N > 5 ++ z_word_t crc5, word5; ++#endif ++#endif ++#endif ++#endif ++#endif ++ /* Initialize the CRC for each braid. */ ++ crc0 = ZSWAPWORD(c); ++#if N > 1 ++ crc1 = 0; ++#if N > 2 ++ crc2 = 0; ++#if N > 3 ++ crc3 = 0; ++#if N > 4 ++ crc4 = 0; ++#if N > 5 ++ crc5 = 0; ++#endif ++#endif ++#endif ++#endif ++#endif ++ /* Process the first blks-1 blocks, computing the CRCs on each braid independently. */ ++ while (--blks) { ++ /* Load the word for each braid into registers. */ ++ word0 = crc0 ^ words[0]; ++#if N > 1 ++ word1 = crc1 ^ words[1]; ++#if N > 2 ++ word2 = crc2 ^ words[2]; ++#if N > 3 ++ word3 = crc3 ^ words[3]; ++#if N > 4 ++ word4 = crc4 ^ words[4]; ++#if N > 5 ++ word5 = crc5 ^ words[5]; ++#endif ++#endif ++#endif ++#endif ++#endif ++ words += N; ++ ++ /* Compute and update the CRC for each word. The loop should get unrolled. */ ++ crc0 = BRAID_TABLE[0][word0 & 0xff]; ++#if N > 1 ++ crc1 = BRAID_TABLE[0][word1 & 0xff]; ++#if N > 2 ++ crc2 = BRAID_TABLE[0][word2 & 0xff]; ++#if N > 3 ++ crc3 = BRAID_TABLE[0][word3 & 0xff]; ++#if N > 4 ++ crc4 = BRAID_TABLE[0][word4 & 0xff]; ++#if N > 5 ++ crc5 = BRAID_TABLE[0][word5 & 0xff]; ++#endif ++#endif ++#endif ++#endif ++#endif ++ for (k = 1; k < W; k++) { ++ crc0 ^= BRAID_TABLE[k][(word0 >> (k << 3)) & 0xff]; ++#if N > 1 ++ crc1 ^= BRAID_TABLE[k][(word1 >> (k << 3)) & 0xff]; ++#if N > 2 ++ crc2 ^= BRAID_TABLE[k][(word2 >> (k << 3)) & 0xff]; ++#if N > 3 ++ crc3 ^= BRAID_TABLE[k][(word3 >> (k << 3)) & 0xff]; ++#if N > 4 ++ crc4 ^= BRAID_TABLE[k][(word4 >> (k << 3)) & 0xff]; ++#if N > 5 ++ crc5 ^= BRAID_TABLE[k][(word5 >> (k << 3)) & 0xff]; ++#endif ++#endif ++#endif ++#endif ++#endif ++ } ++ } ++ ++ /* Process the last block, combining the CRCs of the N braids at the same time. */ ++ comb = crc_word(crc0 ^ words[0]); ++#if N > 1 ++ comb = crc_word(crc1 ^ words[1] ^ comb); ++#if N > 2 ++ comb = crc_word(crc2 ^ words[2] ^ comb); ++#if N > 3 ++ comb = crc_word(crc3 ^ words[3] ^ comb); ++#if N > 4 ++ comb = crc_word(crc4 ^ words[4] ^ comb); ++#if N > 5 ++ comb = crc_word(crc5 ^ words[5] ^ comb); ++#endif ++#endif ++#endif ++#endif ++#endif ++ words += N; ++ c = ZSWAPWORD(comb); ++ ++ /* Update the pointer to the remaining bytes to process. */ ++ buf = (const unsigned char *)words; ++ } ++ ++#endif /* W */ ++ ++ /* Complete the computation of the CRC on any remaining bytes. */ ++ while (len >= 8) { ++ len -= 8; ++ DO8; ++ } ++ while (len) { ++ len--; ++ DO1; ++ } ++ ++ /* Return the CRC, post-conditioned. */ ++ return c ^ 0xffffffff; ++} +diff --git a/crc32_braid_comb.c b/crc32_braid_comb.c +new file mode 100644 +index 0000000..75fb474 +--- /dev/null ++++ b/crc32_braid_comb.c +@@ -0,0 +1,57 @@ ++/* crc32_braid_comb.c -- compute the CRC-32 of a data stream ++ * Copyright (C) 1995-2022 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ * ++ * This interleaved implementation of a CRC makes use of pipelined multiple ++ * arithmetic-logic units, commonly found in modern CPU cores. It is due to ++ * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. ++ */ ++ ++#include "zbuild.h" ++#include "zutil.h" ++#include "crc32_braid_p.h" ++#include "crc32_braid_tbl.h" ++#include "crc32_braid_comb_p.h" ++ ++/* ========================================================================= */ ++static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2) { ++ return multmodp(x2nmodp(len2, 3), crc1) ^ crc2; ++} ++static uint32_t crc32_combine_gen_(z_off64_t len2) { ++ return x2nmodp(len2, 3); ++} ++static uint32_t crc32_combine_op_(uint32_t crc1, uint32_t crc2, const uint32_t op) { ++ return multmodp(op, crc1) ^ crc2; ++} ++ ++/* ========================================================================= */ ++ ++#ifdef ZLIB_COMPAT ++unsigned long Z_EXPORT PREFIX(crc32_combine)(unsigned long crc1, unsigned long crc2, z_off_t len2) { ++ return (unsigned long)crc32_combine_((uint32_t)crc1, (uint32_t)crc2, len2); ++} ++unsigned long Z_EXPORT PREFIX4(crc32_combine)(unsigned long crc1, unsigned long crc2, z_off64_t len2) { ++ return (unsigned long)crc32_combine_((uint32_t)crc1, (uint32_t)crc2, len2); ++} ++unsigned long Z_EXPORT PREFIX(crc32_combine_gen)(z_off_t len2) { ++ return crc32_combine_gen_(len2); ++} ++unsigned long Z_EXPORT PREFIX4(crc32_combine_gen)(z_off64_t len2) { ++ return crc32_combine_gen_(len2); ++} ++unsigned long Z_EXPORT PREFIX(crc32_combine_op)(unsigned long crc1, unsigned long crc2, const unsigned long op) { ++ return (unsigned long)crc32_combine_op_((uint32_t)crc1, (uint32_t)crc2, (uint32_t)op); ++} ++#else ++uint32_t Z_EXPORT PREFIX4(crc32_combine)(uint32_t crc1, uint32_t crc2, z_off64_t len2) { ++ return crc32_combine_(crc1, crc2, len2); ++} ++uint32_t Z_EXPORT PREFIX(crc32_combine_gen)(z_off64_t len2) { ++ return crc32_combine_gen_(len2); ++} ++uint32_t Z_EXPORT PREFIX(crc32_combine_op)(uint32_t crc1, uint32_t crc2, const uint32_t op) { ++ return crc32_combine_op_(crc1, crc2, op); ++} ++#endif ++ ++/* ========================================================================= */ +diff --git a/crc32_braid_comb_p.h b/crc32_braid_comb_p.h +new file mode 100644 +index 0000000..a269e7f +--- /dev/null ++++ b/crc32_braid_comb_p.h +@@ -0,0 +1,42 @@ ++#ifndef CRC32_BRAID_COMB_P_H_ ++#define CRC32_BRAID_COMB_P_H_ ++ ++/* ++ Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, ++ reflected. For speed, this requires that a not be zero. ++ */ ++static uint32_t multmodp(uint32_t a, uint32_t b) { ++ uint32_t m, p; ++ ++ m = (uint32_t)1 << 31; ++ p = 0; ++ for (;;) { ++ if (a & m) { ++ p ^= b; ++ if ((a & (m - 1)) == 0) ++ break; ++ } ++ m >>= 1; ++ b = b & 1 ? (b >> 1) ^ POLY : b >> 1; ++ } ++ return p; ++} ++ ++/* ++ Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been ++ initialized. ++ */ ++static uint32_t x2nmodp(z_off64_t n, unsigned k) { ++ uint32_t p; ++ ++ p = (uint32_t)1 << 31; /* x^0 == 1 */ ++ while (n) { ++ if (n & 1) ++ p = multmodp(x2n_table[k & 31], p); ++ n >>= 1; ++ k++; ++ } ++ return p; ++} ++ ++#endif /* CRC32_BRAID_COMB_P_H_ */ +diff --git a/crc32_braid_p.h b/crc32_braid_p.h +new file mode 100644 +index 0000000..1d8a070 +--- /dev/null ++++ b/crc32_braid_p.h +@@ -0,0 +1,50 @@ ++#ifndef CRC32_BRAID_P_H_ ++#define CRC32_BRAID_P_H_ ++ ++#include "zbuild.h" ++#include "zendian.h" ++ ++/* Define N */ ++#ifdef Z_TESTN ++# define N Z_TESTN ++#else ++# define N 5 ++#endif ++#if N < 1 || N > 6 ++# error N must be in 1..6 ++#endif ++ ++/* ++ Define W and the associated z_word_t type. If W is not defined, then a ++ braided calculation is not used, and the associated tables and code are not ++ compiled. ++ */ ++#ifdef Z_TESTW ++# if Z_TESTW-1 != -1 ++# define W Z_TESTW ++# endif ++#else ++# ifndef W ++# if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) ++# define W 8 ++# else ++# define W 4 ++# endif ++# endif ++#endif ++#ifdef W ++# if W == 8 ++ typedef uint64_t z_word_t; ++# else ++# undef W ++# define W 4 ++ typedef uint32_t z_word_t; ++# endif ++#endif ++ ++/* CRC polynomial. */ ++#define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ ++ ++extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len); ++ ++#endif /* CRC32_BRAID_P_H_ */ +diff --git a/crc32_braid_tbl.h b/crc32_braid_tbl.h +new file mode 100644 +index 0000000..84d79a6 +--- /dev/null ++++ b/crc32_braid_tbl.h +@@ -0,0 +1,9446 @@ ++#ifndef CRC32_BRAID_TBL_H_ ++#define CRC32_BRAID_TBL_H_ ++ ++/* crc32_braid_tbl.h -- tables for braided CRC calculation ++ * Generated automatically by makecrct.c ++ */ ++ ++static const uint32_t crc_table[] = { ++ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, ++ 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, ++ 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, ++ 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, ++ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, ++ 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, ++ 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, ++ 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, ++ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, ++ 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, ++ 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, ++ 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, ++ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, ++ 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, ++ 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, ++ 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, ++ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, ++ 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, ++ 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, ++ 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, ++ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, ++ 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, ++ 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, ++ 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, ++ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, ++ 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, ++ 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, ++ 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, ++ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, ++ 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, ++ 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, ++ 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, ++ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, ++ 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, ++ 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, ++ 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, ++ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, ++ 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, ++ 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, ++ 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, ++ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, ++ 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, ++ 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, ++ 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, ++ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, ++ 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, ++ 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, ++ 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, ++ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, ++ 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, ++ 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, ++ 0x2d02ef8d}; ++ ++#ifdef W ++ ++#if W == 8 ++ ++static const z_word_t crc_big_table[] = { ++ 0x0000000000000000, 0x9630077700000000, 0x2c610eee00000000, ++ 0xba51099900000000, 0x19c46d0700000000, 0x8ff46a7000000000, ++ 0x35a563e900000000, 0xa395649e00000000, 0x3288db0e00000000, ++ 0xa4b8dc7900000000, 0x1ee9d5e000000000, 0x88d9d29700000000, ++ 0x2b4cb60900000000, 0xbd7cb17e00000000, 0x072db8e700000000, ++ 0x911dbf9000000000, 0x6410b71d00000000, 0xf220b06a00000000, ++ 0x4871b9f300000000, 0xde41be8400000000, 0x7dd4da1a00000000, ++ 0xebe4dd6d00000000, 0x51b5d4f400000000, 0xc785d38300000000, ++ 0x56986c1300000000, 0xc0a86b6400000000, 0x7af962fd00000000, ++ 0xecc9658a00000000, 0x4f5c011400000000, 0xd96c066300000000, ++ 0x633d0ffa00000000, 0xf50d088d00000000, 0xc8206e3b00000000, ++ 0x5e10694c00000000, 0xe44160d500000000, 0x727167a200000000, ++ 0xd1e4033c00000000, 0x47d4044b00000000, 0xfd850dd200000000, ++ 0x6bb50aa500000000, 0xfaa8b53500000000, 0x6c98b24200000000, ++ 0xd6c9bbdb00000000, 0x40f9bcac00000000, 0xe36cd83200000000, ++ 0x755cdf4500000000, 0xcf0dd6dc00000000, 0x593dd1ab00000000, ++ 0xac30d92600000000, 0x3a00de5100000000, 0x8051d7c800000000, ++ 0x1661d0bf00000000, 0xb5f4b42100000000, 0x23c4b35600000000, ++ 0x9995bacf00000000, 0x0fa5bdb800000000, 0x9eb8022800000000, ++ 0x0888055f00000000, 0xb2d90cc600000000, 0x24e90bb100000000, ++ 0x877c6f2f00000000, 0x114c685800000000, 0xab1d61c100000000, ++ 0x3d2d66b600000000, 0x9041dc7600000000, 0x0671db0100000000, ++ 0xbc20d29800000000, 0x2a10d5ef00000000, 0x8985b17100000000, ++ 0x1fb5b60600000000, 0xa5e4bf9f00000000, 0x33d4b8e800000000, ++ 0xa2c9077800000000, 0x34f9000f00000000, 0x8ea8099600000000, ++ 0x18980ee100000000, 0xbb0d6a7f00000000, 0x2d3d6d0800000000, ++ 0x976c649100000000, 0x015c63e600000000, 0xf4516b6b00000000, ++ 0x62616c1c00000000, 0xd830658500000000, 0x4e0062f200000000, ++ 0xed95066c00000000, 0x7ba5011b00000000, 0xc1f4088200000000, ++ 0x57c40ff500000000, 0xc6d9b06500000000, 0x50e9b71200000000, ++ 0xeab8be8b00000000, 0x7c88b9fc00000000, 0xdf1ddd6200000000, ++ 0x492dda1500000000, 0xf37cd38c00000000, 0x654cd4fb00000000, ++ 0x5861b24d00000000, 0xce51b53a00000000, 0x7400bca300000000, ++ 0xe230bbd400000000, 0x41a5df4a00000000, 0xd795d83d00000000, ++ 0x6dc4d1a400000000, 0xfbf4d6d300000000, 0x6ae9694300000000, ++ 0xfcd96e3400000000, 0x468867ad00000000, 0xd0b860da00000000, ++ 0x732d044400000000, 0xe51d033300000000, 0x5f4c0aaa00000000, ++ 0xc97c0ddd00000000, 0x3c71055000000000, 0xaa41022700000000, ++ 0x10100bbe00000000, 0x86200cc900000000, 0x25b5685700000000, ++ 0xb3856f2000000000, 0x09d466b900000000, 0x9fe461ce00000000, ++ 0x0ef9de5e00000000, 0x98c9d92900000000, 0x2298d0b000000000, ++ 0xb4a8d7c700000000, 0x173db35900000000, 0x810db42e00000000, ++ 0x3b5cbdb700000000, 0xad6cbac000000000, 0x2083b8ed00000000, ++ 0xb6b3bf9a00000000, 0x0ce2b60300000000, 0x9ad2b17400000000, ++ 0x3947d5ea00000000, 0xaf77d29d00000000, 0x1526db0400000000, ++ 0x8316dc7300000000, 0x120b63e300000000, 0x843b649400000000, ++ 0x3e6a6d0d00000000, 0xa85a6a7a00000000, 0x0bcf0ee400000000, ++ 0x9dff099300000000, 0x27ae000a00000000, 0xb19e077d00000000, ++ 0x44930ff000000000, 0xd2a3088700000000, 0x68f2011e00000000, ++ 0xfec2066900000000, 0x5d5762f700000000, 0xcb67658000000000, ++ 0x71366c1900000000, 0xe7066b6e00000000, 0x761bd4fe00000000, ++ 0xe02bd38900000000, 0x5a7ada1000000000, 0xcc4add6700000000, ++ 0x6fdfb9f900000000, 0xf9efbe8e00000000, 0x43beb71700000000, ++ 0xd58eb06000000000, 0xe8a3d6d600000000, 0x7e93d1a100000000, ++ 0xc4c2d83800000000, 0x52f2df4f00000000, 0xf167bbd100000000, ++ 0x6757bca600000000, 0xdd06b53f00000000, 0x4b36b24800000000, ++ 0xda2b0dd800000000, 0x4c1b0aaf00000000, 0xf64a033600000000, ++ 0x607a044100000000, 0xc3ef60df00000000, 0x55df67a800000000, ++ 0xef8e6e3100000000, 0x79be694600000000, 0x8cb361cb00000000, ++ 0x1a8366bc00000000, 0xa0d26f2500000000, 0x36e2685200000000, ++ 0x95770ccc00000000, 0x03470bbb00000000, 0xb916022200000000, ++ 0x2f26055500000000, 0xbe3bbac500000000, 0x280bbdb200000000, ++ 0x925ab42b00000000, 0x046ab35c00000000, 0xa7ffd7c200000000, ++ 0x31cfd0b500000000, 0x8b9ed92c00000000, 0x1daede5b00000000, ++ 0xb0c2649b00000000, 0x26f263ec00000000, 0x9ca36a7500000000, ++ 0x0a936d0200000000, 0xa906099c00000000, 0x3f360eeb00000000, ++ 0x8567077200000000, 0x1357000500000000, 0x824abf9500000000, ++ 0x147ab8e200000000, 0xae2bb17b00000000, 0x381bb60c00000000, ++ 0x9b8ed29200000000, 0x0dbed5e500000000, 0xb7efdc7c00000000, ++ 0x21dfdb0b00000000, 0xd4d2d38600000000, 0x42e2d4f100000000, ++ 0xf8b3dd6800000000, 0x6e83da1f00000000, 0xcd16be8100000000, ++ 0x5b26b9f600000000, 0xe177b06f00000000, 0x7747b71800000000, ++ 0xe65a088800000000, 0x706a0fff00000000, 0xca3b066600000000, ++ 0x5c0b011100000000, 0xff9e658f00000000, 0x69ae62f800000000, ++ 0xd3ff6b6100000000, 0x45cf6c1600000000, 0x78e20aa000000000, ++ 0xeed20dd700000000, 0x5483044e00000000, 0xc2b3033900000000, ++ 0x612667a700000000, 0xf71660d000000000, 0x4d47694900000000, ++ 0xdb776e3e00000000, 0x4a6ad1ae00000000, 0xdc5ad6d900000000, ++ 0x660bdf4000000000, 0xf03bd83700000000, 0x53aebca900000000, ++ 0xc59ebbde00000000, 0x7fcfb24700000000, 0xe9ffb53000000000, ++ 0x1cf2bdbd00000000, 0x8ac2baca00000000, 0x3093b35300000000, ++ 0xa6a3b42400000000, 0x0536d0ba00000000, 0x9306d7cd00000000, ++ 0x2957de5400000000, 0xbf67d92300000000, 0x2e7a66b300000000, ++ 0xb84a61c400000000, 0x021b685d00000000, 0x942b6f2a00000000, ++ 0x37be0bb400000000, 0xa18e0cc300000000, 0x1bdf055a00000000, ++ 0x8def022d00000000}; ++ ++#else /* W == 4 */ ++ ++static const z_word_t crc_big_table[] = { ++ 0x00000000, 0x96300777, 0x2c610eee, 0xba510999, 0x19c46d07, ++ 0x8ff46a70, 0x35a563e9, 0xa395649e, 0x3288db0e, 0xa4b8dc79, ++ 0x1ee9d5e0, 0x88d9d297, 0x2b4cb609, 0xbd7cb17e, 0x072db8e7, ++ 0x911dbf90, 0x6410b71d, 0xf220b06a, 0x4871b9f3, 0xde41be84, ++ 0x7dd4da1a, 0xebe4dd6d, 0x51b5d4f4, 0xc785d383, 0x56986c13, ++ 0xc0a86b64, 0x7af962fd, 0xecc9658a, 0x4f5c0114, 0xd96c0663, ++ 0x633d0ffa, 0xf50d088d, 0xc8206e3b, 0x5e10694c, 0xe44160d5, ++ 0x727167a2, 0xd1e4033c, 0x47d4044b, 0xfd850dd2, 0x6bb50aa5, ++ 0xfaa8b535, 0x6c98b242, 0xd6c9bbdb, 0x40f9bcac, 0xe36cd832, ++ 0x755cdf45, 0xcf0dd6dc, 0x593dd1ab, 0xac30d926, 0x3a00de51, ++ 0x8051d7c8, 0x1661d0bf, 0xb5f4b421, 0x23c4b356, 0x9995bacf, ++ 0x0fa5bdb8, 0x9eb80228, 0x0888055f, 0xb2d90cc6, 0x24e90bb1, ++ 0x877c6f2f, 0x114c6858, 0xab1d61c1, 0x3d2d66b6, 0x9041dc76, ++ 0x0671db01, 0xbc20d298, 0x2a10d5ef, 0x8985b171, 0x1fb5b606, ++ 0xa5e4bf9f, 0x33d4b8e8, 0xa2c90778, 0x34f9000f, 0x8ea80996, ++ 0x18980ee1, 0xbb0d6a7f, 0x2d3d6d08, 0x976c6491, 0x015c63e6, ++ 0xf4516b6b, 0x62616c1c, 0xd8306585, 0x4e0062f2, 0xed95066c, ++ 0x7ba5011b, 0xc1f40882, 0x57c40ff5, 0xc6d9b065, 0x50e9b712, ++ 0xeab8be8b, 0x7c88b9fc, 0xdf1ddd62, 0x492dda15, 0xf37cd38c, ++ 0x654cd4fb, 0x5861b24d, 0xce51b53a, 0x7400bca3, 0xe230bbd4, ++ 0x41a5df4a, 0xd795d83d, 0x6dc4d1a4, 0xfbf4d6d3, 0x6ae96943, ++ 0xfcd96e34, 0x468867ad, 0xd0b860da, 0x732d0444, 0xe51d0333, ++ 0x5f4c0aaa, 0xc97c0ddd, 0x3c710550, 0xaa410227, 0x10100bbe, ++ 0x86200cc9, 0x25b56857, 0xb3856f20, 0x09d466b9, 0x9fe461ce, ++ 0x0ef9de5e, 0x98c9d929, 0x2298d0b0, 0xb4a8d7c7, 0x173db359, ++ 0x810db42e, 0x3b5cbdb7, 0xad6cbac0, 0x2083b8ed, 0xb6b3bf9a, ++ 0x0ce2b603, 0x9ad2b174, 0x3947d5ea, 0xaf77d29d, 0x1526db04, ++ 0x8316dc73, 0x120b63e3, 0x843b6494, 0x3e6a6d0d, 0xa85a6a7a, ++ 0x0bcf0ee4, 0x9dff0993, 0x27ae000a, 0xb19e077d, 0x44930ff0, ++ 0xd2a30887, 0x68f2011e, 0xfec20669, 0x5d5762f7, 0xcb676580, ++ 0x71366c19, 0xe7066b6e, 0x761bd4fe, 0xe02bd389, 0x5a7ada10, ++ 0xcc4add67, 0x6fdfb9f9, 0xf9efbe8e, 0x43beb717, 0xd58eb060, ++ 0xe8a3d6d6, 0x7e93d1a1, 0xc4c2d838, 0x52f2df4f, 0xf167bbd1, ++ 0x6757bca6, 0xdd06b53f, 0x4b36b248, 0xda2b0dd8, 0x4c1b0aaf, ++ 0xf64a0336, 0x607a0441, 0xc3ef60df, 0x55df67a8, 0xef8e6e31, ++ 0x79be6946, 0x8cb361cb, 0x1a8366bc, 0xa0d26f25, 0x36e26852, ++ 0x95770ccc, 0x03470bbb, 0xb9160222, 0x2f260555, 0xbe3bbac5, ++ 0x280bbdb2, 0x925ab42b, 0x046ab35c, 0xa7ffd7c2, 0x31cfd0b5, ++ 0x8b9ed92c, 0x1daede5b, 0xb0c2649b, 0x26f263ec, 0x9ca36a75, ++ 0x0a936d02, 0xa906099c, 0x3f360eeb, 0x85670772, 0x13570005, ++ 0x824abf95, 0x147ab8e2, 0xae2bb17b, 0x381bb60c, 0x9b8ed292, ++ 0x0dbed5e5, 0xb7efdc7c, 0x21dfdb0b, 0xd4d2d386, 0x42e2d4f1, ++ 0xf8b3dd68, 0x6e83da1f, 0xcd16be81, 0x5b26b9f6, 0xe177b06f, ++ 0x7747b718, 0xe65a0888, 0x706a0fff, 0xca3b0666, 0x5c0b0111, ++ 0xff9e658f, 0x69ae62f8, 0xd3ff6b61, 0x45cf6c16, 0x78e20aa0, ++ 0xeed20dd7, 0x5483044e, 0xc2b30339, 0x612667a7, 0xf71660d0, ++ 0x4d476949, 0xdb776e3e, 0x4a6ad1ae, 0xdc5ad6d9, 0x660bdf40, ++ 0xf03bd837, 0x53aebca9, 0xc59ebbde, 0x7fcfb247, 0xe9ffb530, ++ 0x1cf2bdbd, 0x8ac2baca, 0x3093b353, 0xa6a3b424, 0x0536d0ba, ++ 0x9306d7cd, 0x2957de54, 0xbf67d923, 0x2e7a66b3, 0xb84a61c4, ++ 0x021b685d, 0x942b6f2a, 0x37be0bb4, 0xa18e0cc3, 0x1bdf055a, ++ 0x8def022d}; ++ ++#endif ++ ++#endif /* W */ ++ ++#if N == 1 ++ ++#if W == 8 ++ ++static const uint32_t crc_braid_table[][256] = { ++ {0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3, 0x844a0efa, ++ 0x48e00e64, 0xc66f0987, 0x0ac50919, 0xd3e51bb5, 0x1f4f1b2b, ++ 0x91c01cc8, 0x5d6a1c56, 0x57af154f, 0x9b0515d1, 0x158a1232, ++ 0xd92012ac, 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8, ++ 0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832, 0xaf5e2a9e, ++ 0x63f42a00, 0xed7b2de3, 0x21d12d7d, 0x2b142464, 0xe7be24fa, ++ 0x69312319, 0xa59b2387, 0xf9766256, 0x35dc62c8, 0xbb53652b, ++ 0x77f965b5, 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f, ++ 0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00, 0xaed97719, ++ 0x62737787, 0xecfc7064, 0x205670fa, 0x85cd537d, 0x496753e3, ++ 0xc7e85400, 0x0b42549e, 0x01875d87, 0xcd2d5d19, 0x43a25afa, ++ 0x8f085a64, 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b, ++ 0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1, 0x299dc2ed, ++ 0xe537c273, 0x6bb8c590, 0xa712c50e, 0xadd7cc17, 0x617dcc89, ++ 0xeff2cb6a, 0x2358cbf4, 0xfa78d958, 0x36d2d9c6, 0xb85dde25, ++ 0x74f7debb, 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041, ++ 0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425, 0xd16cfd3c, ++ 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf, 0x86c3e873, 0x4a69e8ed, ++ 0xc4e6ef0e, 0x084cef90, 0x0289e689, 0xce23e617, 0x40ace1f4, ++ 0x8c06e16a, 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758, ++ 0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2, 0x030ebb0e, ++ 0xcfa4bb90, 0x412bbc73, 0x8d81bced, 0x8744b5f4, 0x4beeb56a, ++ 0xc561b289, 0x09cbb217, 0xac509190, 0x60fa910e, 0xee7596ed, ++ 0x22df9673, 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889, ++ 0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6, 0xfbff84df, ++ 0x37558441, 0xb9da83a2, 0x7570833c, 0x533b85da, 0x9f918544, ++ 0x111e82a7, 0xddb48239, 0xd7718b20, 0x1bdb8bbe, 0x95548c5d, ++ 0x59fe8cc3, 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c, ++ 0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776, 0x2f80b4f1, ++ 0xe32ab46f, 0x6da5b38c, 0xa10fb312, 0xabcaba0b, 0x6760ba95, ++ 0xe9efbd76, 0x2545bde8, 0xfc65af44, 0x30cfafda, 0xbe40a839, ++ 0x72eaa8a7, 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d, ++ 0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f, 0x2e07e976, ++ 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95, 0x79a8fc39, 0xb502fca7, ++ 0x3b8dfb44, 0xf727fbda, 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, ++ 0x736df520, 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144, ++ 0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe, 0x0513cd12, ++ 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1, 0x8159c3e8, 0x4df3c376, ++ 0xc37cc495, 0x0fd6c40b, 0x7aa64737, 0xb60c47a9, 0x3883404a, ++ 0xf42940d4, 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e, ++ 0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61, 0x2d095278, ++ 0xe1a352e6, 0x6f2c5505, 0xa386559b, 0x061d761c, 0xcab77682, ++ 0x44387161, 0x889271ff, 0x825778e6, 0x4efd7878, 0xc0727f9b, ++ 0x0cd87f05, 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a, ++ 0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0, 0x83d02561, ++ 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282, 0x079a2b9b, 0xcb302b05, ++ 0x45bf2ce6, 0x89152c78, 0x50353ed4, 0x9c9f3e4a, 0x121039a9, ++ 0xdeba3937, 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd, ++ 0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9, 0x7b211ab0, ++ 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53, 0x2c8e0fff, 0xe0240f61, ++ 0x6eab0882, 0xa201081c, 0xa8c40105, 0x646e019b, 0xeae10678, ++ 0x264b06e6}, ++ {0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d, 0xf44f2413, ++ 0x52382fa7, 0x63d0353a, 0xc5a73e8e, 0x33ef4e67, 0x959845d3, ++ 0xa4705f4e, 0x020754fa, 0xc7a06a74, 0x61d761c0, 0x503f7b5d, ++ 0xf64870e9, 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653, ++ 0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240, 0x5431d2a9, ++ 0xf246d91d, 0xc3aec380, 0x65d9c834, 0xa07ef6ba, 0x0609fd0e, ++ 0x37e1e793, 0x9196ec27, 0xcfbd399c, 0x69ca3228, 0x582228b5, ++ 0xfe552301, 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712, ++ 0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66, 0x081d53e8, ++ 0xae6a585c, 0x9f8242c1, 0x39f54975, 0xa863a552, 0x0e14aee6, ++ 0x3ffcb47b, 0x998bbfcf, 0x5c2c8141, 0xfa5b8af5, 0xcbb39068, ++ 0x6dc49bdc, 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8, ++ 0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb, 0x440b7579, ++ 0xe27c7ecd, 0xd3946450, 0x75e36fe4, 0xb044516a, 0x16335ade, ++ 0x27db4043, 0x81ac4bf7, 0x77e43b1e, 0xd19330aa, 0xe07b2a37, ++ 0x460c2183, 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590, ++ 0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a, 0xd79acda4, ++ 0x71edc610, 0x4005dc8d, 0xe672d739, 0x103aa7d0, 0xb64dac64, ++ 0x87a5b6f9, 0x21d2bd4d, 0xe47583c3, 0x42028877, 0x73ea92ea, ++ 0xd59d995e, 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678, ++ 0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b, 0xb8590282, ++ 0x1e2e0936, 0x2fc613ab, 0x89b1181f, 0x4c162691, 0xea612d25, ++ 0xdb8937b8, 0x7dfe3c0c, 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, ++ 0xdd80cab6, 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5, ++ 0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1, 0x2bc8ba5f, ++ 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2, 0x8816eaf2, 0x2e61e146, ++ 0x1f89fbdb, 0xb9fef06f, 0x7c59cee1, 0xda2ec555, 0xebc6dfc8, ++ 0x4db1d47c, 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08, ++ 0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b, 0xefc8763c, ++ 0x49bf7d88, 0x78576715, 0xde206ca1, 0x1b87522f, 0xbdf0599b, ++ 0x8c184306, 0x2a6f48b2, 0xdc27385b, 0x7a5033ef, 0x4bb82972, ++ 0xedcf22c6, 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5, ++ 0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3, 0xb3e4f77d, ++ 0x1593fcc9, 0x247be654, 0x820cede0, 0x74449d09, 0xd23396bd, ++ 0xe3db8c20, 0x45ac8794, 0x800bb91a, 0x267cb2ae, 0x1794a833, ++ 0xb1e3a387, 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d, ++ 0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e, 0x139a01c7, ++ 0xb5ed0a73, 0x840510ee, 0x22721b5a, 0xe7d525d4, 0x41a22e60, ++ 0x704a34fd, 0xd63d3f49, 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, ++ 0xfdf58516, 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105, ++ 0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71, 0x0bbdf5ff, ++ 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62, 0xabc30345, 0x0db408f1, ++ 0x3c5c126c, 0x9a2b19d8, 0x5f8c2756, 0xf9fb2ce2, 0xc813367f, ++ 0x6e643dcb, 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf, ++ 0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac, 0x03a0a617, ++ 0xa5d7ada3, 0x943fb73e, 0x3248bc8a, 0xf7ef8204, 0x519889b0, ++ 0x6070932d, 0xc6079899, 0x304fe870, 0x9638e3c4, 0xa7d0f959, ++ 0x01a7f2ed, 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe, ++ 0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044, 0x90311eca, ++ 0x3646157e, 0x07ae0fe3, 0xa1d90457, 0x579174be, 0xf1e67f0a, ++ 0xc00e6597, 0x66796e23, 0xa3de50ad, 0x05a95b19, 0x34414184, ++ 0x92364a30}, ++ {0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae, 0x9b914216, ++ 0x50cd91b3, 0xd659e31d, 0x1d0530b8, 0xec53826d, 0x270f51c8, ++ 0xa19b2366, 0x6ac7f0c3, 0x77c2c07b, 0xbc9e13de, 0x3a0a6170, ++ 0xf156b2d5, 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035, ++ 0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223, 0xef8580f6, ++ 0x24d95353, 0xa24d21fd, 0x6911f258, 0x7414c2e0, 0xbf481145, ++ 0x39dc63eb, 0xf280b04e, 0x07ac0536, 0xccf0d693, 0x4a64a43d, ++ 0x81387798, 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e, ++ 0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5, 0x706ec54d, ++ 0xbb3216e8, 0x3da66446, 0xf6fab7e3, 0x047a07ad, 0xcf26d408, ++ 0x49b2a6a6, 0x82ee7503, 0x9feb45bb, 0x54b7961e, 0xd223e4b0, ++ 0x197f3715, 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e, ++ 0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578, 0x0f580a6c, ++ 0xc404d9c9, 0x4290ab67, 0x89cc78c2, 0x94c9487a, 0x5f959bdf, ++ 0xd901e971, 0x125d3ad4, 0xe30b8801, 0x28575ba4, 0xaec3290a, ++ 0x659ffaaf, 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9, ++ 0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59, 0x971f4ae1, ++ 0x5c439944, 0xdad7ebea, 0x118b384f, 0xe0dd8a9a, 0x2b81593f, ++ 0xad152b91, 0x6649f834, 0x7b4cc88c, 0xb0101b29, 0x36846987, ++ 0xfdd8ba22, 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4, ++ 0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2, 0xe4a78d37, ++ 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99, 0x7f36cf21, 0xb46a1c84, ++ 0x32fe6e2a, 0xf9a2bd8f, 0x0b220dc1, 0xc07ede64, 0x46eaacca, ++ 0x8db67f6f, 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79, ++ 0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02, 0x7ce0cdba, ++ 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14, 0x1eb014d8, 0xd5ecc77d, ++ 0x5378b5d3, 0x98246676, 0x852156ce, 0x4e7d856b, 0xc8e9f7c5, ++ 0x03b52460, 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b, ++ 0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d, 0x1d661643, ++ 0xd63ac5e6, 0x50aeb748, 0x9bf264ed, 0x86f75455, 0x4dab87f0, ++ 0xcb3ff55e, 0x006326fb, 0xf135942e, 0x3a69478b, 0xbcfd3525, ++ 0x77a1e680, 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496, ++ 0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340, 0x828d53f8, ++ 0x49d1805d, 0xcf45f2f3, 0x04192156, 0xf54f9383, 0x3e134026, ++ 0xb8873288, 0x73dbe12d, 0x6eded195, 0xa5820230, 0x2316709e, ++ 0xe84aa33b, 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db, ++ 0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd, 0xf6999118, ++ 0x3dc542bd, 0xbb513013, 0x700de3b6, 0x6d08d30e, 0xa65400ab, ++ 0x20c07205, 0xeb9ca1a0, 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, ++ 0x977c6c1a, 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c, ++ 0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77, 0x662adecf, ++ 0xad760d6a, 0x2be27fc4, 0xe0beac61, 0x123e1c2f, 0xd962cf8a, ++ 0x5ff6bd24, 0x94aa6e81, 0x89af5e39, 0x42f38d9c, 0xc467ff32, ++ 0x0f3b2c97, 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec, ++ 0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa, 0x16441b82, ++ 0xdd18c827, 0x5b8cba89, 0x90d0692c, 0x8dd55994, 0x46898a31, ++ 0xc01df89f, 0x0b412b3a, 0xfa1799ef, 0x314b4a4a, 0xb7df38e4, ++ 0x7c83eb41, 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957, ++ 0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7, 0x8e035b0f, ++ 0x455f88aa, 0xc3cbfa04, 0x089729a1, 0xf9c19b74, 0x329d48d1, ++ 0xb4093a7f, 0x7f55e9da, 0x6250d962, 0xa90c0ac7, 0x2f987869, ++ 0xe4c4abcc}, ++ {0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0, 0xf580a6c0, ++ 0xc8e08f70, 0x8f40f5a0, 0xb220dc10, 0x30704bc1, 0x0d106271, ++ 0x4ab018a1, 0x77d03111, 0xc5f0ed01, 0xf890c4b1, 0xbf30be61, ++ 0x825097d1, 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52, ++ 0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92, 0x5090dc43, ++ 0x6df0f5f3, 0x2a508f23, 0x1730a693, 0xa5107a83, 0x98705333, ++ 0xdfd029e3, 0xe2b00053, 0xc1c12f04, 0xfca106b4, 0xbb017c64, ++ 0x866155d4, 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314, ++ 0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15, 0x0431c205, ++ 0x3951ebb5, 0x7ef19165, 0x4391b8d5, 0xa121b886, 0x9c419136, ++ 0xdbe1ebe6, 0xe681c256, 0x54a11e46, 0x69c137f6, 0x2e614d26, ++ 0x13016496, 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997, ++ 0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57, 0x58f35849, ++ 0x659371f9, 0x22330b29, 0x1f532299, 0xad73fe89, 0x9013d739, ++ 0xd7b3ade9, 0xead38459, 0x68831388, 0x55e33a38, 0x124340e8, ++ 0x2f236958, 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98, ++ 0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b, 0xcd93690b, ++ 0xf0f340bb, 0xb7533a6b, 0x8a3313db, 0x0863840a, 0x3503adba, ++ 0x72a3d76a, 0x4fc3feda, 0xfde322ca, 0xc0830b7a, 0x872371aa, ++ 0xba43581a, 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d, ++ 0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d, 0xa9423c8c, ++ 0x9422153c, 0xd3826fec, 0xeee2465c, 0x5cc29a4c, 0x61a2b3fc, ++ 0x2602c92c, 0x1b62e09c, 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, ++ 0xbe729a1f, 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf, ++ 0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de, 0x3c220dce, ++ 0x0142247e, 0x46e25eae, 0x7b82771e, 0xb1e6b092, 0x8c869922, ++ 0xcb26e3f2, 0xf646ca42, 0x44661652, 0x79063fe2, 0x3ea64532, ++ 0x03c66c82, 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183, ++ 0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743, 0xd1062710, ++ 0xec660ea0, 0xabc67470, 0x96a65dc0, 0x248681d0, 0x19e6a860, ++ 0x5e46d2b0, 0x6326fb00, 0xe1766cd1, 0xdc164561, 0x9bb63fb1, ++ 0xa6d61601, 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1, ++ 0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546, 0x85a73956, ++ 0xb8c710e6, 0xff676a36, 0xc2074386, 0x4057d457, 0x7d37fde7, ++ 0x3a978737, 0x07f7ae87, 0xb5d77297, 0x88b75b27, 0xcf1721f7, ++ 0xf2770847, 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4, ++ 0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404, 0x20b743d5, ++ 0x1dd76a65, 0x5a7710b5, 0x67173905, 0xd537e515, 0xe857cca5, ++ 0xaff7b675, 0x92979fc5, 0xe915e8db, 0xd475c16b, 0x93d5bbbb, ++ 0xaeb5920b, 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb, ++ 0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca, 0x2ce505da, ++ 0x11852c6a, 0x562556ba, 0x6b457f0a, 0x89f57f59, 0xb49556e9, ++ 0xf3352c39, 0xce550589, 0x7c75d999, 0x4115f029, 0x06b58af9, ++ 0x3bd5a349, 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48, ++ 0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888, 0x28d4c7df, ++ 0x15b4ee6f, 0x521494bf, 0x6f74bd0f, 0xdd54611f, 0xe03448af, ++ 0xa794327f, 0x9af41bcf, 0x18a48c1e, 0x25c4a5ae, 0x6264df7e, ++ 0x5f04f6ce, 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e, ++ 0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d, 0xbdb4f69d, ++ 0x80d4df2d, 0xc774a5fd, 0xfa148c4d, 0x78441b9c, 0x4524322c, ++ 0x028448fc, 0x3fe4614c, 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, ++ 0xca64c78c}, ++ {0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, 0x8f629757, ++ 0x37def032, 0x256b5fdc, 0x9dd738b9, 0xc5b428ef, 0x7d084f8a, ++ 0x6fbde064, 0xd7018701, 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, ++ 0x58631056, 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871, ++ 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, 0x95ad7f70, ++ 0x2d111815, 0x3fa4b7fb, 0x8718d09e, 0x1acfe827, 0xa2738f42, ++ 0xb0c620ac, 0x087a47c9, 0xa032af3e, 0x188ec85b, 0x0a3b67b5, ++ 0xb28700d0, 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787, ++ 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, 0xeae41086, ++ 0x525877e3, 0x40edd80d, 0xf851bf68, 0xf02bf8a1, 0x48979fc4, ++ 0x5a22302a, 0xe29e574f, 0x7f496ff6, 0xc7f50893, 0xd540a77d, ++ 0x6dfcc018, 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0, ++ 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, 0x9b14583d, ++ 0x23a83f58, 0x311d90b6, 0x89a1f7d3, 0x1476cf6a, 0xaccaa80f, ++ 0xbe7f07e1, 0x06c36084, 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, ++ 0x4c15df3c, 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b, ++ 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, 0x446f98f5, ++ 0xfcd3ff90, 0xee66507e, 0x56da371b, 0x0eb9274d, 0xb6054028, ++ 0xa4b0efc6, 0x1c0c88a3, 0x81dbb01a, 0x3967d77f, 0x2bd27891, ++ 0x936e1ff4, 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed, ++ 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, 0xfe92dfec, ++ 0x462eb889, 0x549b1767, 0xec277002, 0x71f048bb, 0xc94c2fde, ++ 0xdbf98030, 0x6345e755, 0x6b3fa09c, 0xd383c7f9, 0xc1366817, ++ 0x798a0f72, 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825, ++ 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, 0x21e91f24, ++ 0x99557841, 0x8be0d7af, 0x335cb0ca, 0xed59b63b, 0x55e5d15e, ++ 0x47507eb0, 0xffec19d5, 0x623b216c, 0xda874609, 0xc832e9e7, ++ 0x708e8e82, 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a, ++ 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, 0xbd40e1a4, ++ 0x05fc86c1, 0x1749292f, 0xaff54e4a, 0x322276f3, 0x8a9e1196, ++ 0x982bbe78, 0x2097d91d, 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, ++ 0x6a4166a5, 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2, ++ 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, 0xc2098e52, ++ 0x7ab5e937, 0x680046d9, 0xd0bc21bc, 0x88df31ea, 0x3063568f, ++ 0x22d6f961, 0x9a6a9e04, 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, ++ 0x15080953, 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174, ++ 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, 0xd8c66675, ++ 0x607a0110, 0x72cfaefe, 0xca73c99b, 0x57a4f122, 0xef189647, ++ 0xfdad39a9, 0x45115ecc, 0x764dee06, 0xcef18963, 0xdc44268d, ++ 0x64f841e8, 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf, ++ 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, 0x3c9b51be, ++ 0x842736db, 0x96929935, 0x2e2efe50, 0x2654b999, 0x9ee8defc, ++ 0x8c5d7112, 0x34e11677, 0xa9362ece, 0x118a49ab, 0x033fe645, ++ 0xbb838120, 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98, ++ 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, 0xd67f4138, ++ 0x6ec3265d, 0x7c7689b3, 0xc4caeed6, 0x591dd66f, 0xe1a1b10a, ++ 0xf3141ee4, 0x4ba87981, 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, ++ 0x017ec639, 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e, ++ 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, 0x090481f0, ++ 0xb1b8e695, 0xa30d497b, 0x1bb12e1e, 0x43d23e48, 0xfb6e592d, ++ 0xe9dbf6c3, 0x516791a6, 0xccb0a91f, 0x740cce7a, 0x66b96194, ++ 0xde0506f1}, ++ {0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, 0x0709a8dc, ++ 0x06cbc2eb, 0x048d7cb2, 0x054f1685, 0x0e1351b8, 0x0fd13b8f, ++ 0x0d9785d6, 0x0c55efe1, 0x091af964, 0x08d89353, 0x0a9e2d0a, ++ 0x0b5c473d, 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29, ++ 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, 0x1235f2c8, ++ 0x13f798ff, 0x11b126a6, 0x10734c91, 0x153c5a14, 0x14fe3023, ++ 0x16b88e7a, 0x177ae44d, 0x384d46e0, 0x398f2cd7, 0x3bc9928e, ++ 0x3a0bf8b9, 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065, ++ 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, 0x3157bf84, ++ 0x3095d5b3, 0x32d36bea, 0x331101dd, 0x246be590, 0x25a98fa7, ++ 0x27ef31fe, 0x262d5bc9, 0x23624d4c, 0x22a0277b, 0x20e69922, ++ 0x2124f315, 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71, ++ 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, 0x709a8dc0, ++ 0x7158e7f7, 0x731e59ae, 0x72dc3399, 0x7793251c, 0x76514f2b, ++ 0x7417f172, 0x75d59b45, 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, ++ 0x7ccf6221, 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd, ++ 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, 0x6bb5866c, ++ 0x6a77ec5b, 0x68315202, 0x69f33835, 0x62af7f08, 0x636d153f, ++ 0x612bab66, 0x60e9c151, 0x65a6d7d4, 0x6464bde3, 0x662203ba, ++ 0x67e0698d, 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579, ++ 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, 0x46c49a98, ++ 0x4706f0af, 0x45404ef6, 0x448224c1, 0x41cd3244, 0x400f5873, ++ 0x4249e62a, 0x438b8c1d, 0x54f16850, 0x55330267, 0x5775bc3e, ++ 0x56b7d609, 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5, ++ 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, 0x5deb9134, ++ 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, 0xe1351b80, 0xe0f771b7, ++ 0xe2b1cfee, 0xe373a5d9, 0xe63cb35c, 0xe7fed96b, 0xe5b86732, ++ 0xe47a0d05, 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461, ++ 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, 0xfd13b8f0, ++ 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, 0xfa1a102c, 0xfbd87a1b, ++ 0xf99ec442, 0xf85cae75, 0xf300e948, 0xf2c2837f, 0xf0843d26, ++ 0xf1465711, 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd, ++ 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, 0xde71f5bc, ++ 0xdfb39f8b, 0xddf521d2, 0xdc374be5, 0xd76b0cd8, 0xd6a966ef, ++ 0xd4efd8b6, 0xd52db281, 0xd062a404, 0xd1a0ce33, 0xd3e6706a, ++ 0xd2241a5d, 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049, ++ 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, 0xcb4dafa8, ++ 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, 0xcc440774, 0xcd866d43, ++ 0xcfc0d31a, 0xce02b92d, 0x91af9640, 0x906dfc77, 0x922b422e, ++ 0x93e92819, 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5, ++ 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, 0x98b56f24, ++ 0x99770513, 0x9b31bb4a, 0x9af3d17d, 0x8d893530, 0x8c4b5f07, ++ 0x8e0de15e, 0x8fcf8b69, 0x8a809dec, 0x8b42f7db, 0x89044982, ++ 0x88c623b5, 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1, ++ 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, 0xa9e2d0a0, ++ 0xa820ba97, 0xaa6604ce, 0xaba46ef9, 0xaeeb787c, 0xaf29124b, ++ 0xad6fac12, 0xacadc625, 0xa7f18118, 0xa633eb2f, 0xa4755576, ++ 0xa5b73f41, 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d, ++ 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, 0xb2cddb0c, ++ 0xb30fb13b, 0xb1490f62, 0xb08b6555, 0xbbd72268, 0xba15485f, ++ 0xb853f606, 0xb9919c31, 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, ++ 0xbe9834ed}, ++ {0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, 0x646cc504, ++ 0x7d77f445, 0x565aa786, 0x4f4196c7, 0xc8d98a08, 0xd1c2bb49, ++ 0xfaefe88a, 0xe3f4d9cb, 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, ++ 0x87981ccf, 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192, ++ 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, 0x821b9859, ++ 0x9b00a918, 0xb02dfadb, 0xa936cb9a, 0xe6775d5d, 0xff6c6c1c, ++ 0xd4413fdf, 0xcd5a0e9e, 0x958424a2, 0x8c9f15e3, 0xa7b24620, ++ 0xbea97761, 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265, ++ 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, 0x39316bae, ++ 0x202a5aef, 0x0b07092c, 0x121c386d, 0xdf4636f3, 0xc65d07b2, ++ 0xed705471, 0xf46b6530, 0xbb2af3f7, 0xa231c2b6, 0x891c9175, ++ 0x9007a034, 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38, ++ 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, 0xf0794f05, ++ 0xe9627e44, 0xc24f2d87, 0xdb541cc6, 0x94158a01, 0x8d0ebb40, ++ 0xa623e883, 0xbf38d9c2, 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, ++ 0x138d96ce, 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca, ++ 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, 0xded79850, ++ 0xc7cca911, 0xece1fad2, 0xf5facb93, 0x7262d75c, 0x6b79e61d, ++ 0x4054b5de, 0x594f849f, 0x160e1258, 0x0f152319, 0x243870da, ++ 0x3d23419b, 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864, ++ 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, 0xad24e1af, ++ 0xb43fd0ee, 0x9f12832d, 0x8609b26c, 0xc94824ab, 0xd05315ea, ++ 0xfb7e4629, 0xe2657768, 0x2f3f79f6, 0x362448b7, 0x1d091b74, ++ 0x04122a35, 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31, ++ 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, 0x838a36fa, ++ 0x9a9107bb, 0xb1bc5478, 0xa8a76539, 0x3b83984b, 0x2298a90a, ++ 0x09b5fac9, 0x10aecb88, 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, ++ 0x74c20e8c, 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180, ++ 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, 0x71418a1a, ++ 0x685abb5b, 0x4377e898, 0x5a6cd9d9, 0x152d4f1e, 0x0c367e5f, ++ 0x271b2d9c, 0x3e001cdd, 0xb9980012, 0xa0833153, 0x8bae6290, ++ 0x92b553d1, 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5, ++ 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, 0xca6b79ed, ++ 0xd37048ac, 0xf85d1b6f, 0xe1462a2e, 0x66de36e1, 0x7fc507a0, ++ 0x54e85463, 0x4df36522, 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, ++ 0x299fa026, 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b, ++ 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, 0x2c1c24b0, ++ 0x350715f1, 0x1e2a4632, 0x07317773, 0x4870e1b4, 0x516bd0f5, ++ 0x7a468336, 0x635db277, 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, ++ 0xe0d7848d, 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189, ++ 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, 0x674f9842, ++ 0x7e54a903, 0x5579fac0, 0x4c62cb81, 0x8138c51f, 0x9823f45e, ++ 0xb30ea79d, 0xaa1596dc, 0xe554001b, 0xfc4f315a, 0xd7626299, ++ 0xce7953d8, 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4, ++ 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, 0x5e7ef3ec, ++ 0x4765c2ad, 0x6c48916e, 0x7553a02f, 0x3a1236e8, 0x230907a9, ++ 0x0824546a, 0x113f652b, 0x96a779e4, 0x8fbc48a5, 0xa4911b66, ++ 0xbd8a2a27, 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23, ++ 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, 0x70d024b9, ++ 0x69cb15f8, 0x42e6463b, 0x5bfd777a, 0xdc656bb5, 0xc57e5af4, ++ 0xee530937, 0xf7483876, 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, ++ 0x9324fd72}, ++ {0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, ++ 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, ++ 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, ++ 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, ++ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, ++ 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, ++ 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, ++ 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, ++ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, ++ 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, ++ 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, ++ 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, ++ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, ++ 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, ++ 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, ++ 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, ++ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, ++ 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, ++ 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, ++ 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, ++ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, ++ 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, ++ 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, ++ 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, ++ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, ++ 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, ++ 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, ++ 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, ++ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, ++ 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, ++ 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, ++ 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, ++ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, ++ 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, ++ 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, ++ 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, ++ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, ++ 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, ++ 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, ++ 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, ++ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, ++ 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, ++ 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, ++ 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, ++ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, ++ 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, ++ 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, ++ 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, ++ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, ++ 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, ++ 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, ++ 0x2d02ef8d}}; ++ ++static const z_word_t crc_braid_big_table[][256] = { ++ {0x0000000000000000, 0x9630077700000000, 0x2c610eee00000000, ++ 0xba51099900000000, 0x19c46d0700000000, 0x8ff46a7000000000, ++ 0x35a563e900000000, 0xa395649e00000000, 0x3288db0e00000000, ++ 0xa4b8dc7900000000, 0x1ee9d5e000000000, 0x88d9d29700000000, ++ 0x2b4cb60900000000, 0xbd7cb17e00000000, 0x072db8e700000000, ++ 0x911dbf9000000000, 0x6410b71d00000000, 0xf220b06a00000000, ++ 0x4871b9f300000000, 0xde41be8400000000, 0x7dd4da1a00000000, ++ 0xebe4dd6d00000000, 0x51b5d4f400000000, 0xc785d38300000000, ++ 0x56986c1300000000, 0xc0a86b6400000000, 0x7af962fd00000000, ++ 0xecc9658a00000000, 0x4f5c011400000000, 0xd96c066300000000, ++ 0x633d0ffa00000000, 0xf50d088d00000000, 0xc8206e3b00000000, ++ 0x5e10694c00000000, 0xe44160d500000000, 0x727167a200000000, ++ 0xd1e4033c00000000, 0x47d4044b00000000, 0xfd850dd200000000, ++ 0x6bb50aa500000000, 0xfaa8b53500000000, 0x6c98b24200000000, ++ 0xd6c9bbdb00000000, 0x40f9bcac00000000, 0xe36cd83200000000, ++ 0x755cdf4500000000, 0xcf0dd6dc00000000, 0x593dd1ab00000000, ++ 0xac30d92600000000, 0x3a00de5100000000, 0x8051d7c800000000, ++ 0x1661d0bf00000000, 0xb5f4b42100000000, 0x23c4b35600000000, ++ 0x9995bacf00000000, 0x0fa5bdb800000000, 0x9eb8022800000000, ++ 0x0888055f00000000, 0xb2d90cc600000000, 0x24e90bb100000000, ++ 0x877c6f2f00000000, 0x114c685800000000, 0xab1d61c100000000, ++ 0x3d2d66b600000000, 0x9041dc7600000000, 0x0671db0100000000, ++ 0xbc20d29800000000, 0x2a10d5ef00000000, 0x8985b17100000000, ++ 0x1fb5b60600000000, 0xa5e4bf9f00000000, 0x33d4b8e800000000, ++ 0xa2c9077800000000, 0x34f9000f00000000, 0x8ea8099600000000, ++ 0x18980ee100000000, 0xbb0d6a7f00000000, 0x2d3d6d0800000000, ++ 0x976c649100000000, 0x015c63e600000000, 0xf4516b6b00000000, ++ 0x62616c1c00000000, 0xd830658500000000, 0x4e0062f200000000, ++ 0xed95066c00000000, 0x7ba5011b00000000, 0xc1f4088200000000, ++ 0x57c40ff500000000, 0xc6d9b06500000000, 0x50e9b71200000000, ++ 0xeab8be8b00000000, 0x7c88b9fc00000000, 0xdf1ddd6200000000, ++ 0x492dda1500000000, 0xf37cd38c00000000, 0x654cd4fb00000000, ++ 0x5861b24d00000000, 0xce51b53a00000000, 0x7400bca300000000, ++ 0xe230bbd400000000, 0x41a5df4a00000000, 0xd795d83d00000000, ++ 0x6dc4d1a400000000, 0xfbf4d6d300000000, 0x6ae9694300000000, ++ 0xfcd96e3400000000, 0x468867ad00000000, 0xd0b860da00000000, ++ 0x732d044400000000, 0xe51d033300000000, 0x5f4c0aaa00000000, ++ 0xc97c0ddd00000000, 0x3c71055000000000, 0xaa41022700000000, ++ 0x10100bbe00000000, 0x86200cc900000000, 0x25b5685700000000, ++ 0xb3856f2000000000, 0x09d466b900000000, 0x9fe461ce00000000, ++ 0x0ef9de5e00000000, 0x98c9d92900000000, 0x2298d0b000000000, ++ 0xb4a8d7c700000000, 0x173db35900000000, 0x810db42e00000000, ++ 0x3b5cbdb700000000, 0xad6cbac000000000, 0x2083b8ed00000000, ++ 0xb6b3bf9a00000000, 0x0ce2b60300000000, 0x9ad2b17400000000, ++ 0x3947d5ea00000000, 0xaf77d29d00000000, 0x1526db0400000000, ++ 0x8316dc7300000000, 0x120b63e300000000, 0x843b649400000000, ++ 0x3e6a6d0d00000000, 0xa85a6a7a00000000, 0x0bcf0ee400000000, ++ 0x9dff099300000000, 0x27ae000a00000000, 0xb19e077d00000000, ++ 0x44930ff000000000, 0xd2a3088700000000, 0x68f2011e00000000, ++ 0xfec2066900000000, 0x5d5762f700000000, 0xcb67658000000000, ++ 0x71366c1900000000, 0xe7066b6e00000000, 0x761bd4fe00000000, ++ 0xe02bd38900000000, 0x5a7ada1000000000, 0xcc4add6700000000, ++ 0x6fdfb9f900000000, 0xf9efbe8e00000000, 0x43beb71700000000, ++ 0xd58eb06000000000, 0xe8a3d6d600000000, 0x7e93d1a100000000, ++ 0xc4c2d83800000000, 0x52f2df4f00000000, 0xf167bbd100000000, ++ 0x6757bca600000000, 0xdd06b53f00000000, 0x4b36b24800000000, ++ 0xda2b0dd800000000, 0x4c1b0aaf00000000, 0xf64a033600000000, ++ 0x607a044100000000, 0xc3ef60df00000000, 0x55df67a800000000, ++ 0xef8e6e3100000000, 0x79be694600000000, 0x8cb361cb00000000, ++ 0x1a8366bc00000000, 0xa0d26f2500000000, 0x36e2685200000000, ++ 0x95770ccc00000000, 0x03470bbb00000000, 0xb916022200000000, ++ 0x2f26055500000000, 0xbe3bbac500000000, 0x280bbdb200000000, ++ 0x925ab42b00000000, 0x046ab35c00000000, 0xa7ffd7c200000000, ++ 0x31cfd0b500000000, 0x8b9ed92c00000000, 0x1daede5b00000000, ++ 0xb0c2649b00000000, 0x26f263ec00000000, 0x9ca36a7500000000, ++ 0x0a936d0200000000, 0xa906099c00000000, 0x3f360eeb00000000, ++ 0x8567077200000000, 0x1357000500000000, 0x824abf9500000000, ++ 0x147ab8e200000000, 0xae2bb17b00000000, 0x381bb60c00000000, ++ 0x9b8ed29200000000, 0x0dbed5e500000000, 0xb7efdc7c00000000, ++ 0x21dfdb0b00000000, 0xd4d2d38600000000, 0x42e2d4f100000000, ++ 0xf8b3dd6800000000, 0x6e83da1f00000000, 0xcd16be8100000000, ++ 0x5b26b9f600000000, 0xe177b06f00000000, 0x7747b71800000000, ++ 0xe65a088800000000, 0x706a0fff00000000, 0xca3b066600000000, ++ 0x5c0b011100000000, 0xff9e658f00000000, 0x69ae62f800000000, ++ 0xd3ff6b6100000000, 0x45cf6c1600000000, 0x78e20aa000000000, ++ 0xeed20dd700000000, 0x5483044e00000000, 0xc2b3033900000000, ++ 0x612667a700000000, 0xf71660d000000000, 0x4d47694900000000, ++ 0xdb776e3e00000000, 0x4a6ad1ae00000000, 0xdc5ad6d900000000, ++ 0x660bdf4000000000, 0xf03bd83700000000, 0x53aebca900000000, ++ 0xc59ebbde00000000, 0x7fcfb24700000000, 0xe9ffb53000000000, ++ 0x1cf2bdbd00000000, 0x8ac2baca00000000, 0x3093b35300000000, ++ 0xa6a3b42400000000, 0x0536d0ba00000000, 0x9306d7cd00000000, ++ 0x2957de5400000000, 0xbf67d92300000000, 0x2e7a66b300000000, ++ 0xb84a61c400000000, 0x021b685d00000000, 0x942b6f2a00000000, ++ 0x37be0bb400000000, 0xa18e0cc300000000, 0x1bdf055a00000000, ++ 0x8def022d00000000}, ++ {0x0000000000000000, 0x41311b1900000000, 0x8262363200000000, ++ 0xc3532d2b00000000, 0x04c56c6400000000, 0x45f4777d00000000, ++ 0x86a75a5600000000, 0xc796414f00000000, 0x088ad9c800000000, ++ 0x49bbc2d100000000, 0x8ae8effa00000000, 0xcbd9f4e300000000, ++ 0x0c4fb5ac00000000, 0x4d7eaeb500000000, 0x8e2d839e00000000, ++ 0xcf1c988700000000, 0x5112c24a00000000, 0x1023d95300000000, ++ 0xd370f47800000000, 0x9241ef6100000000, 0x55d7ae2e00000000, ++ 0x14e6b53700000000, 0xd7b5981c00000000, 0x9684830500000000, ++ 0x59981b8200000000, 0x18a9009b00000000, 0xdbfa2db000000000, ++ 0x9acb36a900000000, 0x5d5d77e600000000, 0x1c6c6cff00000000, ++ 0xdf3f41d400000000, 0x9e0e5acd00000000, 0xa224849500000000, ++ 0xe3159f8c00000000, 0x2046b2a700000000, 0x6177a9be00000000, ++ 0xa6e1e8f100000000, 0xe7d0f3e800000000, 0x2483dec300000000, ++ 0x65b2c5da00000000, 0xaaae5d5d00000000, 0xeb9f464400000000, ++ 0x28cc6b6f00000000, 0x69fd707600000000, 0xae6b313900000000, ++ 0xef5a2a2000000000, 0x2c09070b00000000, 0x6d381c1200000000, ++ 0xf33646df00000000, 0xb2075dc600000000, 0x715470ed00000000, ++ 0x30656bf400000000, 0xf7f32abb00000000, 0xb6c231a200000000, ++ 0x75911c8900000000, 0x34a0079000000000, 0xfbbc9f1700000000, ++ 0xba8d840e00000000, 0x79dea92500000000, 0x38efb23c00000000, ++ 0xff79f37300000000, 0xbe48e86a00000000, 0x7d1bc54100000000, ++ 0x3c2ade5800000000, 0x054f79f000000000, 0x447e62e900000000, ++ 0x872d4fc200000000, 0xc61c54db00000000, 0x018a159400000000, ++ 0x40bb0e8d00000000, 0x83e823a600000000, 0xc2d938bf00000000, ++ 0x0dc5a03800000000, 0x4cf4bb2100000000, 0x8fa7960a00000000, ++ 0xce968d1300000000, 0x0900cc5c00000000, 0x4831d74500000000, ++ 0x8b62fa6e00000000, 0xca53e17700000000, 0x545dbbba00000000, ++ 0x156ca0a300000000, 0xd63f8d8800000000, 0x970e969100000000, ++ 0x5098d7de00000000, 0x11a9ccc700000000, 0xd2fae1ec00000000, ++ 0x93cbfaf500000000, 0x5cd7627200000000, 0x1de6796b00000000, ++ 0xdeb5544000000000, 0x9f844f5900000000, 0x58120e1600000000, ++ 0x1923150f00000000, 0xda70382400000000, 0x9b41233d00000000, ++ 0xa76bfd6500000000, 0xe65ae67c00000000, 0x2509cb5700000000, ++ 0x6438d04e00000000, 0xa3ae910100000000, 0xe29f8a1800000000, ++ 0x21cca73300000000, 0x60fdbc2a00000000, 0xafe124ad00000000, ++ 0xeed03fb400000000, 0x2d83129f00000000, 0x6cb2098600000000, ++ 0xab2448c900000000, 0xea1553d000000000, 0x29467efb00000000, ++ 0x687765e200000000, 0xf6793f2f00000000, 0xb748243600000000, ++ 0x741b091d00000000, 0x352a120400000000, 0xf2bc534b00000000, ++ 0xb38d485200000000, 0x70de657900000000, 0x31ef7e6000000000, ++ 0xfef3e6e700000000, 0xbfc2fdfe00000000, 0x7c91d0d500000000, ++ 0x3da0cbcc00000000, 0xfa368a8300000000, 0xbb07919a00000000, ++ 0x7854bcb100000000, 0x3965a7a800000000, 0x4b98833b00000000, ++ 0x0aa9982200000000, 0xc9fab50900000000, 0x88cbae1000000000, ++ 0x4f5def5f00000000, 0x0e6cf44600000000, 0xcd3fd96d00000000, ++ 0x8c0ec27400000000, 0x43125af300000000, 0x022341ea00000000, ++ 0xc1706cc100000000, 0x804177d800000000, 0x47d7369700000000, ++ 0x06e62d8e00000000, 0xc5b500a500000000, 0x84841bbc00000000, ++ 0x1a8a417100000000, 0x5bbb5a6800000000, 0x98e8774300000000, ++ 0xd9d96c5a00000000, 0x1e4f2d1500000000, 0x5f7e360c00000000, ++ 0x9c2d1b2700000000, 0xdd1c003e00000000, 0x120098b900000000, ++ 0x533183a000000000, 0x9062ae8b00000000, 0xd153b59200000000, ++ 0x16c5f4dd00000000, 0x57f4efc400000000, 0x94a7c2ef00000000, ++ 0xd596d9f600000000, 0xe9bc07ae00000000, 0xa88d1cb700000000, ++ 0x6bde319c00000000, 0x2aef2a8500000000, 0xed796bca00000000, ++ 0xac4870d300000000, 0x6f1b5df800000000, 0x2e2a46e100000000, ++ 0xe136de6600000000, 0xa007c57f00000000, 0x6354e85400000000, ++ 0x2265f34d00000000, 0xe5f3b20200000000, 0xa4c2a91b00000000, ++ 0x6791843000000000, 0x26a09f2900000000, 0xb8aec5e400000000, ++ 0xf99fdefd00000000, 0x3accf3d600000000, 0x7bfde8cf00000000, ++ 0xbc6ba98000000000, 0xfd5ab29900000000, 0x3e099fb200000000, ++ 0x7f3884ab00000000, 0xb0241c2c00000000, 0xf115073500000000, ++ 0x32462a1e00000000, 0x7377310700000000, 0xb4e1704800000000, ++ 0xf5d06b5100000000, 0x3683467a00000000, 0x77b25d6300000000, ++ 0x4ed7facb00000000, 0x0fe6e1d200000000, 0xccb5ccf900000000, ++ 0x8d84d7e000000000, 0x4a1296af00000000, 0x0b238db600000000, ++ 0xc870a09d00000000, 0x8941bb8400000000, 0x465d230300000000, ++ 0x076c381a00000000, 0xc43f153100000000, 0x850e0e2800000000, ++ 0x42984f6700000000, 0x03a9547e00000000, 0xc0fa795500000000, ++ 0x81cb624c00000000, 0x1fc5388100000000, 0x5ef4239800000000, ++ 0x9da70eb300000000, 0xdc9615aa00000000, 0x1b0054e500000000, ++ 0x5a314ffc00000000, 0x996262d700000000, 0xd85379ce00000000, ++ 0x174fe14900000000, 0x567efa5000000000, 0x952dd77b00000000, ++ 0xd41ccc6200000000, 0x138a8d2d00000000, 0x52bb963400000000, ++ 0x91e8bb1f00000000, 0xd0d9a00600000000, 0xecf37e5e00000000, ++ 0xadc2654700000000, 0x6e91486c00000000, 0x2fa0537500000000, ++ 0xe836123a00000000, 0xa907092300000000, 0x6a54240800000000, ++ 0x2b653f1100000000, 0xe479a79600000000, 0xa548bc8f00000000, ++ 0x661b91a400000000, 0x272a8abd00000000, 0xe0bccbf200000000, ++ 0xa18dd0eb00000000, 0x62defdc000000000, 0x23efe6d900000000, ++ 0xbde1bc1400000000, 0xfcd0a70d00000000, 0x3f838a2600000000, ++ 0x7eb2913f00000000, 0xb924d07000000000, 0xf815cb6900000000, ++ 0x3b46e64200000000, 0x7a77fd5b00000000, 0xb56b65dc00000000, ++ 0xf45a7ec500000000, 0x370953ee00000000, 0x763848f700000000, ++ 0xb1ae09b800000000, 0xf09f12a100000000, 0x33cc3f8a00000000, ++ 0x72fd249300000000}, ++ {0x0000000000000000, 0x376ac20100000000, 0x6ed4840300000000, ++ 0x59be460200000000, 0xdca8090700000000, 0xebc2cb0600000000, ++ 0xb27c8d0400000000, 0x85164f0500000000, 0xb851130e00000000, ++ 0x8f3bd10f00000000, 0xd685970d00000000, 0xe1ef550c00000000, ++ 0x64f91a0900000000, 0x5393d80800000000, 0x0a2d9e0a00000000, ++ 0x3d475c0b00000000, 0x70a3261c00000000, 0x47c9e41d00000000, ++ 0x1e77a21f00000000, 0x291d601e00000000, 0xac0b2f1b00000000, ++ 0x9b61ed1a00000000, 0xc2dfab1800000000, 0xf5b5691900000000, ++ 0xc8f2351200000000, 0xff98f71300000000, 0xa626b11100000000, ++ 0x914c731000000000, 0x145a3c1500000000, 0x2330fe1400000000, ++ 0x7a8eb81600000000, 0x4de47a1700000000, 0xe0464d3800000000, ++ 0xd72c8f3900000000, 0x8e92c93b00000000, 0xb9f80b3a00000000, ++ 0x3cee443f00000000, 0x0b84863e00000000, 0x523ac03c00000000, ++ 0x6550023d00000000, 0x58175e3600000000, 0x6f7d9c3700000000, ++ 0x36c3da3500000000, 0x01a9183400000000, 0x84bf573100000000, ++ 0xb3d5953000000000, 0xea6bd33200000000, 0xdd01113300000000, ++ 0x90e56b2400000000, 0xa78fa92500000000, 0xfe31ef2700000000, ++ 0xc95b2d2600000000, 0x4c4d622300000000, 0x7b27a02200000000, ++ 0x2299e62000000000, 0x15f3242100000000, 0x28b4782a00000000, ++ 0x1fdeba2b00000000, 0x4660fc2900000000, 0x710a3e2800000000, ++ 0xf41c712d00000000, 0xc376b32c00000000, 0x9ac8f52e00000000, ++ 0xada2372f00000000, 0xc08d9a7000000000, 0xf7e7587100000000, ++ 0xae591e7300000000, 0x9933dc7200000000, 0x1c25937700000000, ++ 0x2b4f517600000000, 0x72f1177400000000, 0x459bd57500000000, ++ 0x78dc897e00000000, 0x4fb64b7f00000000, 0x16080d7d00000000, ++ 0x2162cf7c00000000, 0xa474807900000000, 0x931e427800000000, ++ 0xcaa0047a00000000, 0xfdcac67b00000000, 0xb02ebc6c00000000, ++ 0x87447e6d00000000, 0xdefa386f00000000, 0xe990fa6e00000000, ++ 0x6c86b56b00000000, 0x5bec776a00000000, 0x0252316800000000, ++ 0x3538f36900000000, 0x087faf6200000000, 0x3f156d6300000000, ++ 0x66ab2b6100000000, 0x51c1e96000000000, 0xd4d7a66500000000, ++ 0xe3bd646400000000, 0xba03226600000000, 0x8d69e06700000000, ++ 0x20cbd74800000000, 0x17a1154900000000, 0x4e1f534b00000000, ++ 0x7975914a00000000, 0xfc63de4f00000000, 0xcb091c4e00000000, ++ 0x92b75a4c00000000, 0xa5dd984d00000000, 0x989ac44600000000, ++ 0xaff0064700000000, 0xf64e404500000000, 0xc124824400000000, ++ 0x4432cd4100000000, 0x73580f4000000000, 0x2ae6494200000000, ++ 0x1d8c8b4300000000, 0x5068f15400000000, 0x6702335500000000, ++ 0x3ebc755700000000, 0x09d6b75600000000, 0x8cc0f85300000000, ++ 0xbbaa3a5200000000, 0xe2147c5000000000, 0xd57ebe5100000000, ++ 0xe839e25a00000000, 0xdf53205b00000000, 0x86ed665900000000, ++ 0xb187a45800000000, 0x3491eb5d00000000, 0x03fb295c00000000, ++ 0x5a456f5e00000000, 0x6d2fad5f00000000, 0x801b35e100000000, ++ 0xb771f7e000000000, 0xeecfb1e200000000, 0xd9a573e300000000, ++ 0x5cb33ce600000000, 0x6bd9fee700000000, 0x3267b8e500000000, ++ 0x050d7ae400000000, 0x384a26ef00000000, 0x0f20e4ee00000000, ++ 0x569ea2ec00000000, 0x61f460ed00000000, 0xe4e22fe800000000, ++ 0xd388ede900000000, 0x8a36abeb00000000, 0xbd5c69ea00000000, ++ 0xf0b813fd00000000, 0xc7d2d1fc00000000, 0x9e6c97fe00000000, ++ 0xa90655ff00000000, 0x2c101afa00000000, 0x1b7ad8fb00000000, ++ 0x42c49ef900000000, 0x75ae5cf800000000, 0x48e900f300000000, ++ 0x7f83c2f200000000, 0x263d84f000000000, 0x115746f100000000, ++ 0x944109f400000000, 0xa32bcbf500000000, 0xfa958df700000000, ++ 0xcdff4ff600000000, 0x605d78d900000000, 0x5737bad800000000, ++ 0x0e89fcda00000000, 0x39e33edb00000000, 0xbcf571de00000000, ++ 0x8b9fb3df00000000, 0xd221f5dd00000000, 0xe54b37dc00000000, ++ 0xd80c6bd700000000, 0xef66a9d600000000, 0xb6d8efd400000000, ++ 0x81b22dd500000000, 0x04a462d000000000, 0x33cea0d100000000, ++ 0x6a70e6d300000000, 0x5d1a24d200000000, 0x10fe5ec500000000, ++ 0x27949cc400000000, 0x7e2adac600000000, 0x494018c700000000, ++ 0xcc5657c200000000, 0xfb3c95c300000000, 0xa282d3c100000000, ++ 0x95e811c000000000, 0xa8af4dcb00000000, 0x9fc58fca00000000, ++ 0xc67bc9c800000000, 0xf1110bc900000000, 0x740744cc00000000, ++ 0x436d86cd00000000, 0x1ad3c0cf00000000, 0x2db902ce00000000, ++ 0x4096af9100000000, 0x77fc6d9000000000, 0x2e422b9200000000, ++ 0x1928e99300000000, 0x9c3ea69600000000, 0xab54649700000000, ++ 0xf2ea229500000000, 0xc580e09400000000, 0xf8c7bc9f00000000, ++ 0xcfad7e9e00000000, 0x9613389c00000000, 0xa179fa9d00000000, ++ 0x246fb59800000000, 0x1305779900000000, 0x4abb319b00000000, ++ 0x7dd1f39a00000000, 0x3035898d00000000, 0x075f4b8c00000000, ++ 0x5ee10d8e00000000, 0x698bcf8f00000000, 0xec9d808a00000000, ++ 0xdbf7428b00000000, 0x8249048900000000, 0xb523c68800000000, ++ 0x88649a8300000000, 0xbf0e588200000000, 0xe6b01e8000000000, ++ 0xd1dadc8100000000, 0x54cc938400000000, 0x63a6518500000000, ++ 0x3a18178700000000, 0x0d72d58600000000, 0xa0d0e2a900000000, ++ 0x97ba20a800000000, 0xce0466aa00000000, 0xf96ea4ab00000000, ++ 0x7c78ebae00000000, 0x4b1229af00000000, 0x12ac6fad00000000, ++ 0x25c6adac00000000, 0x1881f1a700000000, 0x2feb33a600000000, ++ 0x765575a400000000, 0x413fb7a500000000, 0xc429f8a000000000, ++ 0xf3433aa100000000, 0xaafd7ca300000000, 0x9d97bea200000000, ++ 0xd073c4b500000000, 0xe71906b400000000, 0xbea740b600000000, ++ 0x89cd82b700000000, 0x0cdbcdb200000000, 0x3bb10fb300000000, ++ 0x620f49b100000000, 0x55658bb000000000, 0x6822d7bb00000000, ++ 0x5f4815ba00000000, 0x06f653b800000000, 0x319c91b900000000, ++ 0xb48adebc00000000, 0x83e01cbd00000000, 0xda5e5abf00000000, ++ 0xed3498be00000000}, ++ {0x0000000000000000, 0x6567bcb800000000, 0x8bc809aa00000000, ++ 0xeeafb51200000000, 0x5797628f00000000, 0x32f0de3700000000, ++ 0xdc5f6b2500000000, 0xb938d79d00000000, 0xef28b4c500000000, ++ 0x8a4f087d00000000, 0x64e0bd6f00000000, 0x018701d700000000, ++ 0xb8bfd64a00000000, 0xddd86af200000000, 0x3377dfe000000000, ++ 0x5610635800000000, 0x9f57195000000000, 0xfa30a5e800000000, ++ 0x149f10fa00000000, 0x71f8ac4200000000, 0xc8c07bdf00000000, ++ 0xada7c76700000000, 0x4308727500000000, 0x266fcecd00000000, ++ 0x707fad9500000000, 0x1518112d00000000, 0xfbb7a43f00000000, ++ 0x9ed0188700000000, 0x27e8cf1a00000000, 0x428f73a200000000, ++ 0xac20c6b000000000, 0xc9477a0800000000, 0x3eaf32a000000000, ++ 0x5bc88e1800000000, 0xb5673b0a00000000, 0xd00087b200000000, ++ 0x6938502f00000000, 0x0c5fec9700000000, 0xe2f0598500000000, ++ 0x8797e53d00000000, 0xd187866500000000, 0xb4e03add00000000, ++ 0x5a4f8fcf00000000, 0x3f28337700000000, 0x8610e4ea00000000, ++ 0xe377585200000000, 0x0dd8ed4000000000, 0x68bf51f800000000, ++ 0xa1f82bf000000000, 0xc49f974800000000, 0x2a30225a00000000, ++ 0x4f579ee200000000, 0xf66f497f00000000, 0x9308f5c700000000, ++ 0x7da740d500000000, 0x18c0fc6d00000000, 0x4ed09f3500000000, ++ 0x2bb7238d00000000, 0xc518969f00000000, 0xa07f2a2700000000, ++ 0x1947fdba00000000, 0x7c20410200000000, 0x928ff41000000000, ++ 0xf7e848a800000000, 0x3d58149b00000000, 0x583fa82300000000, ++ 0xb6901d3100000000, 0xd3f7a18900000000, 0x6acf761400000000, ++ 0x0fa8caac00000000, 0xe1077fbe00000000, 0x8460c30600000000, ++ 0xd270a05e00000000, 0xb7171ce600000000, 0x59b8a9f400000000, ++ 0x3cdf154c00000000, 0x85e7c2d100000000, 0xe0807e6900000000, ++ 0x0e2fcb7b00000000, 0x6b4877c300000000, 0xa20f0dcb00000000, ++ 0xc768b17300000000, 0x29c7046100000000, 0x4ca0b8d900000000, ++ 0xf5986f4400000000, 0x90ffd3fc00000000, 0x7e5066ee00000000, ++ 0x1b37da5600000000, 0x4d27b90e00000000, 0x284005b600000000, ++ 0xc6efb0a400000000, 0xa3880c1c00000000, 0x1ab0db8100000000, ++ 0x7fd7673900000000, 0x9178d22b00000000, 0xf41f6e9300000000, ++ 0x03f7263b00000000, 0x66909a8300000000, 0x883f2f9100000000, ++ 0xed58932900000000, 0x546044b400000000, 0x3107f80c00000000, ++ 0xdfa84d1e00000000, 0xbacff1a600000000, 0xecdf92fe00000000, ++ 0x89b82e4600000000, 0x67179b5400000000, 0x027027ec00000000, ++ 0xbb48f07100000000, 0xde2f4cc900000000, 0x3080f9db00000000, ++ 0x55e7456300000000, 0x9ca03f6b00000000, 0xf9c783d300000000, ++ 0x176836c100000000, 0x720f8a7900000000, 0xcb375de400000000, ++ 0xae50e15c00000000, 0x40ff544e00000000, 0x2598e8f600000000, ++ 0x73888bae00000000, 0x16ef371600000000, 0xf840820400000000, ++ 0x9d273ebc00000000, 0x241fe92100000000, 0x4178559900000000, ++ 0xafd7e08b00000000, 0xcab05c3300000000, 0x3bb659ed00000000, ++ 0x5ed1e55500000000, 0xb07e504700000000, 0xd519ecff00000000, ++ 0x6c213b6200000000, 0x094687da00000000, 0xe7e932c800000000, ++ 0x828e8e7000000000, 0xd49eed2800000000, 0xb1f9519000000000, ++ 0x5f56e48200000000, 0x3a31583a00000000, 0x83098fa700000000, ++ 0xe66e331f00000000, 0x08c1860d00000000, 0x6da63ab500000000, ++ 0xa4e140bd00000000, 0xc186fc0500000000, 0x2f29491700000000, ++ 0x4a4ef5af00000000, 0xf376223200000000, 0x96119e8a00000000, ++ 0x78be2b9800000000, 0x1dd9972000000000, 0x4bc9f47800000000, ++ 0x2eae48c000000000, 0xc001fdd200000000, 0xa566416a00000000, ++ 0x1c5e96f700000000, 0x79392a4f00000000, 0x97969f5d00000000, ++ 0xf2f123e500000000, 0x05196b4d00000000, 0x607ed7f500000000, ++ 0x8ed162e700000000, 0xebb6de5f00000000, 0x528e09c200000000, ++ 0x37e9b57a00000000, 0xd946006800000000, 0xbc21bcd000000000, ++ 0xea31df8800000000, 0x8f56633000000000, 0x61f9d62200000000, ++ 0x049e6a9a00000000, 0xbda6bd0700000000, 0xd8c101bf00000000, ++ 0x366eb4ad00000000, 0x5309081500000000, 0x9a4e721d00000000, ++ 0xff29cea500000000, 0x11867bb700000000, 0x74e1c70f00000000, ++ 0xcdd9109200000000, 0xa8beac2a00000000, 0x4611193800000000, ++ 0x2376a58000000000, 0x7566c6d800000000, 0x10017a6000000000, ++ 0xfeaecf7200000000, 0x9bc973ca00000000, 0x22f1a45700000000, ++ 0x479618ef00000000, 0xa939adfd00000000, 0xcc5e114500000000, ++ 0x06ee4d7600000000, 0x6389f1ce00000000, 0x8d2644dc00000000, ++ 0xe841f86400000000, 0x51792ff900000000, 0x341e934100000000, ++ 0xdab1265300000000, 0xbfd69aeb00000000, 0xe9c6f9b300000000, ++ 0x8ca1450b00000000, 0x620ef01900000000, 0x07694ca100000000, ++ 0xbe519b3c00000000, 0xdb36278400000000, 0x3599929600000000, ++ 0x50fe2e2e00000000, 0x99b9542600000000, 0xfcdee89e00000000, ++ 0x12715d8c00000000, 0x7716e13400000000, 0xce2e36a900000000, ++ 0xab498a1100000000, 0x45e63f0300000000, 0x208183bb00000000, ++ 0x7691e0e300000000, 0x13f65c5b00000000, 0xfd59e94900000000, ++ 0x983e55f100000000, 0x2106826c00000000, 0x44613ed400000000, ++ 0xaace8bc600000000, 0xcfa9377e00000000, 0x38417fd600000000, ++ 0x5d26c36e00000000, 0xb389767c00000000, 0xd6eecac400000000, ++ 0x6fd61d5900000000, 0x0ab1a1e100000000, 0xe41e14f300000000, ++ 0x8179a84b00000000, 0xd769cb1300000000, 0xb20e77ab00000000, ++ 0x5ca1c2b900000000, 0x39c67e0100000000, 0x80fea99c00000000, ++ 0xe599152400000000, 0x0b36a03600000000, 0x6e511c8e00000000, ++ 0xa716668600000000, 0xc271da3e00000000, 0x2cde6f2c00000000, ++ 0x49b9d39400000000, 0xf081040900000000, 0x95e6b8b100000000, ++ 0x7b490da300000000, 0x1e2eb11b00000000, 0x483ed24300000000, ++ 0x2d596efb00000000, 0xc3f6dbe900000000, 0xa691675100000000, ++ 0x1fa9b0cc00000000, 0x7ace0c7400000000, 0x9461b96600000000, ++ 0xf10605de00000000}, ++ {0x0000000000000000, 0xb029603d00000000, 0x6053c07a00000000, ++ 0xd07aa04700000000, 0xc0a680f500000000, 0x708fe0c800000000, ++ 0xa0f5408f00000000, 0x10dc20b200000000, 0xc14b703000000000, ++ 0x7162100d00000000, 0xa118b04a00000000, 0x1131d07700000000, ++ 0x01edf0c500000000, 0xb1c490f800000000, 0x61be30bf00000000, ++ 0xd197508200000000, 0x8297e06000000000, 0x32be805d00000000, ++ 0xe2c4201a00000000, 0x52ed402700000000, 0x4231609500000000, ++ 0xf21800a800000000, 0x2262a0ef00000000, 0x924bc0d200000000, ++ 0x43dc905000000000, 0xf3f5f06d00000000, 0x238f502a00000000, ++ 0x93a6301700000000, 0x837a10a500000000, 0x3353709800000000, ++ 0xe329d0df00000000, 0x5300b0e200000000, 0x042fc1c100000000, ++ 0xb406a1fc00000000, 0x647c01bb00000000, 0xd455618600000000, ++ 0xc489413400000000, 0x74a0210900000000, 0xa4da814e00000000, ++ 0x14f3e17300000000, 0xc564b1f100000000, 0x754dd1cc00000000, ++ 0xa537718b00000000, 0x151e11b600000000, 0x05c2310400000000, ++ 0xb5eb513900000000, 0x6591f17e00000000, 0xd5b8914300000000, ++ 0x86b821a100000000, 0x3691419c00000000, 0xe6ebe1db00000000, ++ 0x56c281e600000000, 0x461ea15400000000, 0xf637c16900000000, ++ 0x264d612e00000000, 0x9664011300000000, 0x47f3519100000000, ++ 0xf7da31ac00000000, 0x27a091eb00000000, 0x9789f1d600000000, ++ 0x8755d16400000000, 0x377cb15900000000, 0xe706111e00000000, ++ 0x572f712300000000, 0x4958f35800000000, 0xf971936500000000, ++ 0x290b332200000000, 0x9922531f00000000, 0x89fe73ad00000000, ++ 0x39d7139000000000, 0xe9adb3d700000000, 0x5984d3ea00000000, ++ 0x8813836800000000, 0x383ae35500000000, 0xe840431200000000, ++ 0x5869232f00000000, 0x48b5039d00000000, 0xf89c63a000000000, ++ 0x28e6c3e700000000, 0x98cfa3da00000000, 0xcbcf133800000000, ++ 0x7be6730500000000, 0xab9cd34200000000, 0x1bb5b37f00000000, ++ 0x0b6993cd00000000, 0xbb40f3f000000000, 0x6b3a53b700000000, ++ 0xdb13338a00000000, 0x0a84630800000000, 0xbaad033500000000, ++ 0x6ad7a37200000000, 0xdafec34f00000000, 0xca22e3fd00000000, ++ 0x7a0b83c000000000, 0xaa71238700000000, 0x1a5843ba00000000, ++ 0x4d77329900000000, 0xfd5e52a400000000, 0x2d24f2e300000000, ++ 0x9d0d92de00000000, 0x8dd1b26c00000000, 0x3df8d25100000000, ++ 0xed82721600000000, 0x5dab122b00000000, 0x8c3c42a900000000, ++ 0x3c15229400000000, 0xec6f82d300000000, 0x5c46e2ee00000000, ++ 0x4c9ac25c00000000, 0xfcb3a26100000000, 0x2cc9022600000000, ++ 0x9ce0621b00000000, 0xcfe0d2f900000000, 0x7fc9b2c400000000, ++ 0xafb3128300000000, 0x1f9a72be00000000, 0x0f46520c00000000, ++ 0xbf6f323100000000, 0x6f15927600000000, 0xdf3cf24b00000000, ++ 0x0eaba2c900000000, 0xbe82c2f400000000, 0x6ef862b300000000, ++ 0xded1028e00000000, 0xce0d223c00000000, 0x7e24420100000000, ++ 0xae5ee24600000000, 0x1e77827b00000000, 0x92b0e6b100000000, ++ 0x2299868c00000000, 0xf2e326cb00000000, 0x42ca46f600000000, ++ 0x5216664400000000, 0xe23f067900000000, 0x3245a63e00000000, ++ 0x826cc60300000000, 0x53fb968100000000, 0xe3d2f6bc00000000, ++ 0x33a856fb00000000, 0x838136c600000000, 0x935d167400000000, ++ 0x2374764900000000, 0xf30ed60e00000000, 0x4327b63300000000, ++ 0x102706d100000000, 0xa00e66ec00000000, 0x7074c6ab00000000, ++ 0xc05da69600000000, 0xd081862400000000, 0x60a8e61900000000, ++ 0xb0d2465e00000000, 0x00fb266300000000, 0xd16c76e100000000, ++ 0x614516dc00000000, 0xb13fb69b00000000, 0x0116d6a600000000, ++ 0x11caf61400000000, 0xa1e3962900000000, 0x7199366e00000000, ++ 0xc1b0565300000000, 0x969f277000000000, 0x26b6474d00000000, ++ 0xf6cce70a00000000, 0x46e5873700000000, 0x5639a78500000000, ++ 0xe610c7b800000000, 0x366a67ff00000000, 0x864307c200000000, ++ 0x57d4574000000000, 0xe7fd377d00000000, 0x3787973a00000000, ++ 0x87aef70700000000, 0x9772d7b500000000, 0x275bb78800000000, ++ 0xf72117cf00000000, 0x470877f200000000, 0x1408c71000000000, ++ 0xa421a72d00000000, 0x745b076a00000000, 0xc472675700000000, ++ 0xd4ae47e500000000, 0x648727d800000000, 0xb4fd879f00000000, ++ 0x04d4e7a200000000, 0xd543b72000000000, 0x656ad71d00000000, ++ 0xb510775a00000000, 0x0539176700000000, 0x15e537d500000000, ++ 0xa5cc57e800000000, 0x75b6f7af00000000, 0xc59f979200000000, ++ 0xdbe815e900000000, 0x6bc175d400000000, 0xbbbbd59300000000, ++ 0x0b92b5ae00000000, 0x1b4e951c00000000, 0xab67f52100000000, ++ 0x7b1d556600000000, 0xcb34355b00000000, 0x1aa365d900000000, ++ 0xaa8a05e400000000, 0x7af0a5a300000000, 0xcad9c59e00000000, ++ 0xda05e52c00000000, 0x6a2c851100000000, 0xba56255600000000, ++ 0x0a7f456b00000000, 0x597ff58900000000, 0xe95695b400000000, ++ 0x392c35f300000000, 0x890555ce00000000, 0x99d9757c00000000, ++ 0x29f0154100000000, 0xf98ab50600000000, 0x49a3d53b00000000, ++ 0x983485b900000000, 0x281de58400000000, 0xf86745c300000000, ++ 0x484e25fe00000000, 0x5892054c00000000, 0xe8bb657100000000, ++ 0x38c1c53600000000, 0x88e8a50b00000000, 0xdfc7d42800000000, ++ 0x6feeb41500000000, 0xbf94145200000000, 0x0fbd746f00000000, ++ 0x1f6154dd00000000, 0xaf4834e000000000, 0x7f3294a700000000, ++ 0xcf1bf49a00000000, 0x1e8ca41800000000, 0xaea5c42500000000, ++ 0x7edf646200000000, 0xcef6045f00000000, 0xde2a24ed00000000, ++ 0x6e0344d000000000, 0xbe79e49700000000, 0x0e5084aa00000000, ++ 0x5d50344800000000, 0xed79547500000000, 0x3d03f43200000000, ++ 0x8d2a940f00000000, 0x9df6b4bd00000000, 0x2ddfd48000000000, ++ 0xfda574c700000000, 0x4d8c14fa00000000, 0x9c1b447800000000, ++ 0x2c32244500000000, 0xfc48840200000000, 0x4c61e43f00000000, ++ 0x5cbdc48d00000000, 0xec94a4b000000000, 0x3cee04f700000000, ++ 0x8cc764ca00000000}, ++ {0x0000000000000000, 0xa5d35ccb00000000, 0x0ba1c84d00000000, ++ 0xae72948600000000, 0x1642919b00000000, 0xb391cd5000000000, ++ 0x1de359d600000000, 0xb830051d00000000, 0x6d8253ec00000000, ++ 0xc8510f2700000000, 0x66239ba100000000, 0xc3f0c76a00000000, ++ 0x7bc0c27700000000, 0xde139ebc00000000, 0x70610a3a00000000, ++ 0xd5b256f100000000, 0x9b02d60300000000, 0x3ed18ac800000000, ++ 0x90a31e4e00000000, 0x3570428500000000, 0x8d40479800000000, ++ 0x28931b5300000000, 0x86e18fd500000000, 0x2332d31e00000000, ++ 0xf68085ef00000000, 0x5353d92400000000, 0xfd214da200000000, ++ 0x58f2116900000000, 0xe0c2147400000000, 0x451148bf00000000, ++ 0xeb63dc3900000000, 0x4eb080f200000000, 0x3605ac0700000000, ++ 0x93d6f0cc00000000, 0x3da4644a00000000, 0x9877388100000000, ++ 0x20473d9c00000000, 0x8594615700000000, 0x2be6f5d100000000, ++ 0x8e35a91a00000000, 0x5b87ffeb00000000, 0xfe54a32000000000, ++ 0x502637a600000000, 0xf5f56b6d00000000, 0x4dc56e7000000000, ++ 0xe81632bb00000000, 0x4664a63d00000000, 0xe3b7faf600000000, ++ 0xad077a0400000000, 0x08d426cf00000000, 0xa6a6b24900000000, ++ 0x0375ee8200000000, 0xbb45eb9f00000000, 0x1e96b75400000000, ++ 0xb0e423d200000000, 0x15377f1900000000, 0xc08529e800000000, ++ 0x6556752300000000, 0xcb24e1a500000000, 0x6ef7bd6e00000000, ++ 0xd6c7b87300000000, 0x7314e4b800000000, 0xdd66703e00000000, ++ 0x78b52cf500000000, 0x6c0a580f00000000, 0xc9d904c400000000, ++ 0x67ab904200000000, 0xc278cc8900000000, 0x7a48c99400000000, ++ 0xdf9b955f00000000, 0x71e901d900000000, 0xd43a5d1200000000, ++ 0x01880be300000000, 0xa45b572800000000, 0x0a29c3ae00000000, ++ 0xaffa9f6500000000, 0x17ca9a7800000000, 0xb219c6b300000000, ++ 0x1c6b523500000000, 0xb9b80efe00000000, 0xf7088e0c00000000, ++ 0x52dbd2c700000000, 0xfca9464100000000, 0x597a1a8a00000000, ++ 0xe14a1f9700000000, 0x4499435c00000000, 0xeaebd7da00000000, ++ 0x4f388b1100000000, 0x9a8adde000000000, 0x3f59812b00000000, ++ 0x912b15ad00000000, 0x34f8496600000000, 0x8cc84c7b00000000, ++ 0x291b10b000000000, 0x8769843600000000, 0x22bad8fd00000000, ++ 0x5a0ff40800000000, 0xffdca8c300000000, 0x51ae3c4500000000, ++ 0xf47d608e00000000, 0x4c4d659300000000, 0xe99e395800000000, ++ 0x47ecadde00000000, 0xe23ff11500000000, 0x378da7e400000000, ++ 0x925efb2f00000000, 0x3c2c6fa900000000, 0x99ff336200000000, ++ 0x21cf367f00000000, 0x841c6ab400000000, 0x2a6efe3200000000, ++ 0x8fbda2f900000000, 0xc10d220b00000000, 0x64de7ec000000000, ++ 0xcaacea4600000000, 0x6f7fb68d00000000, 0xd74fb39000000000, ++ 0x729cef5b00000000, 0xdcee7bdd00000000, 0x793d271600000000, ++ 0xac8f71e700000000, 0x095c2d2c00000000, 0xa72eb9aa00000000, ++ 0x02fde56100000000, 0xbacde07c00000000, 0x1f1ebcb700000000, ++ 0xb16c283100000000, 0x14bf74fa00000000, 0xd814b01e00000000, ++ 0x7dc7ecd500000000, 0xd3b5785300000000, 0x7666249800000000, ++ 0xce56218500000000, 0x6b857d4e00000000, 0xc5f7e9c800000000, ++ 0x6024b50300000000, 0xb596e3f200000000, 0x1045bf3900000000, ++ 0xbe372bbf00000000, 0x1be4777400000000, 0xa3d4726900000000, ++ 0x06072ea200000000, 0xa875ba2400000000, 0x0da6e6ef00000000, ++ 0x4316661d00000000, 0xe6c53ad600000000, 0x48b7ae5000000000, ++ 0xed64f29b00000000, 0x5554f78600000000, 0xf087ab4d00000000, ++ 0x5ef53fcb00000000, 0xfb26630000000000, 0x2e9435f100000000, ++ 0x8b47693a00000000, 0x2535fdbc00000000, 0x80e6a17700000000, ++ 0x38d6a46a00000000, 0x9d05f8a100000000, 0x33776c2700000000, ++ 0x96a430ec00000000, 0xee111c1900000000, 0x4bc240d200000000, ++ 0xe5b0d45400000000, 0x4063889f00000000, 0xf8538d8200000000, ++ 0x5d80d14900000000, 0xf3f245cf00000000, 0x5621190400000000, ++ 0x83934ff500000000, 0x2640133e00000000, 0x883287b800000000, ++ 0x2de1db7300000000, 0x95d1de6e00000000, 0x300282a500000000, ++ 0x9e70162300000000, 0x3ba34ae800000000, 0x7513ca1a00000000, ++ 0xd0c096d100000000, 0x7eb2025700000000, 0xdb615e9c00000000, ++ 0x63515b8100000000, 0xc682074a00000000, 0x68f093cc00000000, ++ 0xcd23cf0700000000, 0x189199f600000000, 0xbd42c53d00000000, ++ 0x133051bb00000000, 0xb6e30d7000000000, 0x0ed3086d00000000, ++ 0xab0054a600000000, 0x0572c02000000000, 0xa0a19ceb00000000, ++ 0xb41ee81100000000, 0x11cdb4da00000000, 0xbfbf205c00000000, ++ 0x1a6c7c9700000000, 0xa25c798a00000000, 0x078f254100000000, ++ 0xa9fdb1c700000000, 0x0c2eed0c00000000, 0xd99cbbfd00000000, ++ 0x7c4fe73600000000, 0xd23d73b000000000, 0x77ee2f7b00000000, ++ 0xcfde2a6600000000, 0x6a0d76ad00000000, 0xc47fe22b00000000, ++ 0x61acbee000000000, 0x2f1c3e1200000000, 0x8acf62d900000000, ++ 0x24bdf65f00000000, 0x816eaa9400000000, 0x395eaf8900000000, ++ 0x9c8df34200000000, 0x32ff67c400000000, 0x972c3b0f00000000, ++ 0x429e6dfe00000000, 0xe74d313500000000, 0x493fa5b300000000, ++ 0xececf97800000000, 0x54dcfc6500000000, 0xf10fa0ae00000000, ++ 0x5f7d342800000000, 0xfaae68e300000000, 0x821b441600000000, ++ 0x27c818dd00000000, 0x89ba8c5b00000000, 0x2c69d09000000000, ++ 0x9459d58d00000000, 0x318a894600000000, 0x9ff81dc000000000, ++ 0x3a2b410b00000000, 0xef9917fa00000000, 0x4a4a4b3100000000, ++ 0xe438dfb700000000, 0x41eb837c00000000, 0xf9db866100000000, ++ 0x5c08daaa00000000, 0xf27a4e2c00000000, 0x57a912e700000000, ++ 0x1919921500000000, 0xbccacede00000000, 0x12b85a5800000000, ++ 0xb76b069300000000, 0x0f5b038e00000000, 0xaa885f4500000000, ++ 0x04facbc300000000, 0xa129970800000000, 0x749bc1f900000000, ++ 0xd1489d3200000000, 0x7f3a09b400000000, 0xdae9557f00000000, ++ 0x62d9506200000000, 0xc70a0ca900000000, 0x6978982f00000000, ++ 0xccabc4e400000000}, ++ {0x0000000000000000, 0xb40b77a600000000, 0x29119f9700000000, ++ 0x9d1ae83100000000, 0x13244ff400000000, 0xa72f385200000000, ++ 0x3a35d06300000000, 0x8e3ea7c500000000, 0x674eef3300000000, ++ 0xd345989500000000, 0x4e5f70a400000000, 0xfa54070200000000, ++ 0x746aa0c700000000, 0xc061d76100000000, 0x5d7b3f5000000000, ++ 0xe97048f600000000, 0xce9cde6700000000, 0x7a97a9c100000000, ++ 0xe78d41f000000000, 0x5386365600000000, 0xddb8919300000000, ++ 0x69b3e63500000000, 0xf4a90e0400000000, 0x40a279a200000000, ++ 0xa9d2315400000000, 0x1dd946f200000000, 0x80c3aec300000000, ++ 0x34c8d96500000000, 0xbaf67ea000000000, 0x0efd090600000000, ++ 0x93e7e13700000000, 0x27ec969100000000, 0x9c39bdcf00000000, ++ 0x2832ca6900000000, 0xb528225800000000, 0x012355fe00000000, ++ 0x8f1df23b00000000, 0x3b16859d00000000, 0xa60c6dac00000000, ++ 0x12071a0a00000000, 0xfb7752fc00000000, 0x4f7c255a00000000, ++ 0xd266cd6b00000000, 0x666dbacd00000000, 0xe8531d0800000000, ++ 0x5c586aae00000000, 0xc142829f00000000, 0x7549f53900000000, ++ 0x52a563a800000000, 0xe6ae140e00000000, 0x7bb4fc3f00000000, ++ 0xcfbf8b9900000000, 0x41812c5c00000000, 0xf58a5bfa00000000, ++ 0x6890b3cb00000000, 0xdc9bc46d00000000, 0x35eb8c9b00000000, ++ 0x81e0fb3d00000000, 0x1cfa130c00000000, 0xa8f164aa00000000, ++ 0x26cfc36f00000000, 0x92c4b4c900000000, 0x0fde5cf800000000, ++ 0xbbd52b5e00000000, 0x79750b4400000000, 0xcd7e7ce200000000, ++ 0x506494d300000000, 0xe46fe37500000000, 0x6a5144b000000000, ++ 0xde5a331600000000, 0x4340db2700000000, 0xf74bac8100000000, ++ 0x1e3be47700000000, 0xaa3093d100000000, 0x372a7be000000000, ++ 0x83210c4600000000, 0x0d1fab8300000000, 0xb914dc2500000000, ++ 0x240e341400000000, 0x900543b200000000, 0xb7e9d52300000000, ++ 0x03e2a28500000000, 0x9ef84ab400000000, 0x2af33d1200000000, ++ 0xa4cd9ad700000000, 0x10c6ed7100000000, 0x8ddc054000000000, ++ 0x39d772e600000000, 0xd0a73a1000000000, 0x64ac4db600000000, ++ 0xf9b6a58700000000, 0x4dbdd22100000000, 0xc38375e400000000, ++ 0x7788024200000000, 0xea92ea7300000000, 0x5e999dd500000000, ++ 0xe54cb68b00000000, 0x5147c12d00000000, 0xcc5d291c00000000, ++ 0x78565eba00000000, 0xf668f97f00000000, 0x42638ed900000000, ++ 0xdf7966e800000000, 0x6b72114e00000000, 0x820259b800000000, ++ 0x36092e1e00000000, 0xab13c62f00000000, 0x1f18b18900000000, ++ 0x9126164c00000000, 0x252d61ea00000000, 0xb83789db00000000, ++ 0x0c3cfe7d00000000, 0x2bd068ec00000000, 0x9fdb1f4a00000000, ++ 0x02c1f77b00000000, 0xb6ca80dd00000000, 0x38f4271800000000, ++ 0x8cff50be00000000, 0x11e5b88f00000000, 0xa5eecf2900000000, ++ 0x4c9e87df00000000, 0xf895f07900000000, 0x658f184800000000, ++ 0xd1846fee00000000, 0x5fbac82b00000000, 0xebb1bf8d00000000, ++ 0x76ab57bc00000000, 0xc2a0201a00000000, 0xf2ea168800000000, ++ 0x46e1612e00000000, 0xdbfb891f00000000, 0x6ff0feb900000000, ++ 0xe1ce597c00000000, 0x55c52eda00000000, 0xc8dfc6eb00000000, ++ 0x7cd4b14d00000000, 0x95a4f9bb00000000, 0x21af8e1d00000000, ++ 0xbcb5662c00000000, 0x08be118a00000000, 0x8680b64f00000000, ++ 0x328bc1e900000000, 0xaf9129d800000000, 0x1b9a5e7e00000000, ++ 0x3c76c8ef00000000, 0x887dbf4900000000, 0x1567577800000000, ++ 0xa16c20de00000000, 0x2f52871b00000000, 0x9b59f0bd00000000, ++ 0x0643188c00000000, 0xb2486f2a00000000, 0x5b3827dc00000000, ++ 0xef33507a00000000, 0x7229b84b00000000, 0xc622cfed00000000, ++ 0x481c682800000000, 0xfc171f8e00000000, 0x610df7bf00000000, ++ 0xd506801900000000, 0x6ed3ab4700000000, 0xdad8dce100000000, ++ 0x47c234d000000000, 0xf3c9437600000000, 0x7df7e4b300000000, ++ 0xc9fc931500000000, 0x54e67b2400000000, 0xe0ed0c8200000000, ++ 0x099d447400000000, 0xbd9633d200000000, 0x208cdbe300000000, ++ 0x9487ac4500000000, 0x1ab90b8000000000, 0xaeb27c2600000000, ++ 0x33a8941700000000, 0x87a3e3b100000000, 0xa04f752000000000, ++ 0x1444028600000000, 0x895eeab700000000, 0x3d559d1100000000, ++ 0xb36b3ad400000000, 0x07604d7200000000, 0x9a7aa54300000000, ++ 0x2e71d2e500000000, 0xc7019a1300000000, 0x730aedb500000000, ++ 0xee10058400000000, 0x5a1b722200000000, 0xd425d5e700000000, ++ 0x602ea24100000000, 0xfd344a7000000000, 0x493f3dd600000000, ++ 0x8b9f1dcc00000000, 0x3f946a6a00000000, 0xa28e825b00000000, ++ 0x1685f5fd00000000, 0x98bb523800000000, 0x2cb0259e00000000, ++ 0xb1aacdaf00000000, 0x05a1ba0900000000, 0xecd1f2ff00000000, ++ 0x58da855900000000, 0xc5c06d6800000000, 0x71cb1ace00000000, ++ 0xfff5bd0b00000000, 0x4bfecaad00000000, 0xd6e4229c00000000, ++ 0x62ef553a00000000, 0x4503c3ab00000000, 0xf108b40d00000000, ++ 0x6c125c3c00000000, 0xd8192b9a00000000, 0x56278c5f00000000, ++ 0xe22cfbf900000000, 0x7f3613c800000000, 0xcb3d646e00000000, ++ 0x224d2c9800000000, 0x96465b3e00000000, 0x0b5cb30f00000000, ++ 0xbf57c4a900000000, 0x3169636c00000000, 0x856214ca00000000, ++ 0x1878fcfb00000000, 0xac738b5d00000000, 0x17a6a00300000000, ++ 0xa3add7a500000000, 0x3eb73f9400000000, 0x8abc483200000000, ++ 0x0482eff700000000, 0xb089985100000000, 0x2d93706000000000, ++ 0x999807c600000000, 0x70e84f3000000000, 0xc4e3389600000000, ++ 0x59f9d0a700000000, 0xedf2a70100000000, 0x63cc00c400000000, ++ 0xd7c7776200000000, 0x4add9f5300000000, 0xfed6e8f500000000, ++ 0xd93a7e6400000000, 0x6d3109c200000000, 0xf02be1f300000000, ++ 0x4420965500000000, 0xca1e319000000000, 0x7e15463600000000, ++ 0xe30fae0700000000, 0x5704d9a100000000, 0xbe74915700000000, ++ 0x0a7fe6f100000000, 0x97650ec000000000, 0x236e796600000000, ++ 0xad50dea300000000, 0x195ba90500000000, 0x8441413400000000, ++ 0x304a369200000000}, ++ {0x0000000000000000, 0x9e00aacc00000000, 0x7d07254200000000, ++ 0xe3078f8e00000000, 0xfa0e4a8400000000, 0x640ee04800000000, ++ 0x87096fc600000000, 0x1909c50a00000000, 0xb51be5d300000000, ++ 0x2b1b4f1f00000000, 0xc81cc09100000000, 0x561c6a5d00000000, ++ 0x4f15af5700000000, 0xd115059b00000000, 0x32128a1500000000, ++ 0xac1220d900000000, 0x2b31bb7c00000000, 0xb53111b000000000, ++ 0x56369e3e00000000, 0xc83634f200000000, 0xd13ff1f800000000, ++ 0x4f3f5b3400000000, 0xac38d4ba00000000, 0x32387e7600000000, ++ 0x9e2a5eaf00000000, 0x002af46300000000, 0xe32d7bed00000000, ++ 0x7d2dd12100000000, 0x6424142b00000000, 0xfa24bee700000000, ++ 0x1923316900000000, 0x87239ba500000000, 0x566276f900000000, ++ 0xc862dc3500000000, 0x2b6553bb00000000, 0xb565f97700000000, ++ 0xac6c3c7d00000000, 0x326c96b100000000, 0xd16b193f00000000, ++ 0x4f6bb3f300000000, 0xe379932a00000000, 0x7d7939e600000000, ++ 0x9e7eb66800000000, 0x007e1ca400000000, 0x1977d9ae00000000, ++ 0x8777736200000000, 0x6470fcec00000000, 0xfa70562000000000, ++ 0x7d53cd8500000000, 0xe353674900000000, 0x0054e8c700000000, ++ 0x9e54420b00000000, 0x875d870100000000, 0x195d2dcd00000000, ++ 0xfa5aa24300000000, 0x645a088f00000000, 0xc848285600000000, ++ 0x5648829a00000000, 0xb54f0d1400000000, 0x2b4fa7d800000000, ++ 0x324662d200000000, 0xac46c81e00000000, 0x4f41479000000000, ++ 0xd141ed5c00000000, 0xedc29d2900000000, 0x73c237e500000000, ++ 0x90c5b86b00000000, 0x0ec512a700000000, 0x17ccd7ad00000000, ++ 0x89cc7d6100000000, 0x6acbf2ef00000000, 0xf4cb582300000000, ++ 0x58d978fa00000000, 0xc6d9d23600000000, 0x25de5db800000000, ++ 0xbbdef77400000000, 0xa2d7327e00000000, 0x3cd798b200000000, ++ 0xdfd0173c00000000, 0x41d0bdf000000000, 0xc6f3265500000000, ++ 0x58f38c9900000000, 0xbbf4031700000000, 0x25f4a9db00000000, ++ 0x3cfd6cd100000000, 0xa2fdc61d00000000, 0x41fa499300000000, ++ 0xdffae35f00000000, 0x73e8c38600000000, 0xede8694a00000000, ++ 0x0eefe6c400000000, 0x90ef4c0800000000, 0x89e6890200000000, ++ 0x17e623ce00000000, 0xf4e1ac4000000000, 0x6ae1068c00000000, ++ 0xbba0ebd000000000, 0x25a0411c00000000, 0xc6a7ce9200000000, ++ 0x58a7645e00000000, 0x41aea15400000000, 0xdfae0b9800000000, ++ 0x3ca9841600000000, 0xa2a92eda00000000, 0x0ebb0e0300000000, ++ 0x90bba4cf00000000, 0x73bc2b4100000000, 0xedbc818d00000000, ++ 0xf4b5448700000000, 0x6ab5ee4b00000000, 0x89b261c500000000, ++ 0x17b2cb0900000000, 0x909150ac00000000, 0x0e91fa6000000000, ++ 0xed9675ee00000000, 0x7396df2200000000, 0x6a9f1a2800000000, ++ 0xf49fb0e400000000, 0x17983f6a00000000, 0x899895a600000000, ++ 0x258ab57f00000000, 0xbb8a1fb300000000, 0x588d903d00000000, ++ 0xc68d3af100000000, 0xdf84fffb00000000, 0x4184553700000000, ++ 0xa283dab900000000, 0x3c83707500000000, 0xda853b5300000000, ++ 0x4485919f00000000, 0xa7821e1100000000, 0x3982b4dd00000000, ++ 0x208b71d700000000, 0xbe8bdb1b00000000, 0x5d8c549500000000, ++ 0xc38cfe5900000000, 0x6f9ede8000000000, 0xf19e744c00000000, ++ 0x1299fbc200000000, 0x8c99510e00000000, 0x9590940400000000, ++ 0x0b903ec800000000, 0xe897b14600000000, 0x76971b8a00000000, ++ 0xf1b4802f00000000, 0x6fb42ae300000000, 0x8cb3a56d00000000, ++ 0x12b30fa100000000, 0x0bbacaab00000000, 0x95ba606700000000, ++ 0x76bdefe900000000, 0xe8bd452500000000, 0x44af65fc00000000, ++ 0xdaafcf3000000000, 0x39a840be00000000, 0xa7a8ea7200000000, ++ 0xbea12f7800000000, 0x20a185b400000000, 0xc3a60a3a00000000, ++ 0x5da6a0f600000000, 0x8ce74daa00000000, 0x12e7e76600000000, ++ 0xf1e068e800000000, 0x6fe0c22400000000, 0x76e9072e00000000, ++ 0xe8e9ade200000000, 0x0bee226c00000000, 0x95ee88a000000000, ++ 0x39fca87900000000, 0xa7fc02b500000000, 0x44fb8d3b00000000, ++ 0xdafb27f700000000, 0xc3f2e2fd00000000, 0x5df2483100000000, ++ 0xbef5c7bf00000000, 0x20f56d7300000000, 0xa7d6f6d600000000, ++ 0x39d65c1a00000000, 0xdad1d39400000000, 0x44d1795800000000, ++ 0x5dd8bc5200000000, 0xc3d8169e00000000, 0x20df991000000000, ++ 0xbedf33dc00000000, 0x12cd130500000000, 0x8ccdb9c900000000, ++ 0x6fca364700000000, 0xf1ca9c8b00000000, 0xe8c3598100000000, ++ 0x76c3f34d00000000, 0x95c47cc300000000, 0x0bc4d60f00000000, ++ 0x3747a67a00000000, 0xa9470cb600000000, 0x4a40833800000000, ++ 0xd44029f400000000, 0xcd49ecfe00000000, 0x5349463200000000, ++ 0xb04ec9bc00000000, 0x2e4e637000000000, 0x825c43a900000000, ++ 0x1c5ce96500000000, 0xff5b66eb00000000, 0x615bcc2700000000, ++ 0x7852092d00000000, 0xe652a3e100000000, 0x05552c6f00000000, ++ 0x9b5586a300000000, 0x1c761d0600000000, 0x8276b7ca00000000, ++ 0x6171384400000000, 0xff71928800000000, 0xe678578200000000, ++ 0x7878fd4e00000000, 0x9b7f72c000000000, 0x057fd80c00000000, ++ 0xa96df8d500000000, 0x376d521900000000, 0xd46add9700000000, ++ 0x4a6a775b00000000, 0x5363b25100000000, 0xcd63189d00000000, ++ 0x2e64971300000000, 0xb0643ddf00000000, 0x6125d08300000000, ++ 0xff257a4f00000000, 0x1c22f5c100000000, 0x82225f0d00000000, ++ 0x9b2b9a0700000000, 0x052b30cb00000000, 0xe62cbf4500000000, ++ 0x782c158900000000, 0xd43e355000000000, 0x4a3e9f9c00000000, ++ 0xa939101200000000, 0x3739bade00000000, 0x2e307fd400000000, ++ 0xb030d51800000000, 0x53375a9600000000, 0xcd37f05a00000000, ++ 0x4a146bff00000000, 0xd414c13300000000, 0x37134ebd00000000, ++ 0xa913e47100000000, 0xb01a217b00000000, 0x2e1a8bb700000000, ++ 0xcd1d043900000000, 0x531daef500000000, 0xff0f8e2c00000000, ++ 0x610f24e000000000, 0x8208ab6e00000000, 0x1c0801a200000000, ++ 0x0501c4a800000000, 0x9b016e6400000000, 0x7806e1ea00000000, ++ 0xe6064b2600000000}}; ++ ++#else /* W == 4 */ ++ ++static const uint32_t crc_braid_table[][256] = { ++ {0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, 0x8f629757, ++ 0x37def032, 0x256b5fdc, 0x9dd738b9, 0xc5b428ef, 0x7d084f8a, ++ 0x6fbde064, 0xd7018701, 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, ++ 0x58631056, 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871, ++ 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, 0x95ad7f70, ++ 0x2d111815, 0x3fa4b7fb, 0x8718d09e, 0x1acfe827, 0xa2738f42, ++ 0xb0c620ac, 0x087a47c9, 0xa032af3e, 0x188ec85b, 0x0a3b67b5, ++ 0xb28700d0, 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787, ++ 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, 0xeae41086, ++ 0x525877e3, 0x40edd80d, 0xf851bf68, 0xf02bf8a1, 0x48979fc4, ++ 0x5a22302a, 0xe29e574f, 0x7f496ff6, 0xc7f50893, 0xd540a77d, ++ 0x6dfcc018, 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0, ++ 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, 0x9b14583d, ++ 0x23a83f58, 0x311d90b6, 0x89a1f7d3, 0x1476cf6a, 0xaccaa80f, ++ 0xbe7f07e1, 0x06c36084, 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, ++ 0x4c15df3c, 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b, ++ 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, 0x446f98f5, ++ 0xfcd3ff90, 0xee66507e, 0x56da371b, 0x0eb9274d, 0xb6054028, ++ 0xa4b0efc6, 0x1c0c88a3, 0x81dbb01a, 0x3967d77f, 0x2bd27891, ++ 0x936e1ff4, 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed, ++ 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, 0xfe92dfec, ++ 0x462eb889, 0x549b1767, 0xec277002, 0x71f048bb, 0xc94c2fde, ++ 0xdbf98030, 0x6345e755, 0x6b3fa09c, 0xd383c7f9, 0xc1366817, ++ 0x798a0f72, 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825, ++ 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, 0x21e91f24, ++ 0x99557841, 0x8be0d7af, 0x335cb0ca, 0xed59b63b, 0x55e5d15e, ++ 0x47507eb0, 0xffec19d5, 0x623b216c, 0xda874609, 0xc832e9e7, ++ 0x708e8e82, 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a, ++ 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, 0xbd40e1a4, ++ 0x05fc86c1, 0x1749292f, 0xaff54e4a, 0x322276f3, 0x8a9e1196, ++ 0x982bbe78, 0x2097d91d, 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, ++ 0x6a4166a5, 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2, ++ 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, 0xc2098e52, ++ 0x7ab5e937, 0x680046d9, 0xd0bc21bc, 0x88df31ea, 0x3063568f, ++ 0x22d6f961, 0x9a6a9e04, 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, ++ 0x15080953, 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174, ++ 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, 0xd8c66675, ++ 0x607a0110, 0x72cfaefe, 0xca73c99b, 0x57a4f122, 0xef189647, ++ 0xfdad39a9, 0x45115ecc, 0x764dee06, 0xcef18963, 0xdc44268d, ++ 0x64f841e8, 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf, ++ 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, 0x3c9b51be, ++ 0x842736db, 0x96929935, 0x2e2efe50, 0x2654b999, 0x9ee8defc, ++ 0x8c5d7112, 0x34e11677, 0xa9362ece, 0x118a49ab, 0x033fe645, ++ 0xbb838120, 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98, ++ 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, 0xd67f4138, ++ 0x6ec3265d, 0x7c7689b3, 0xc4caeed6, 0x591dd66f, 0xe1a1b10a, ++ 0xf3141ee4, 0x4ba87981, 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, ++ 0x017ec639, 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e, ++ 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, 0x090481f0, ++ 0xb1b8e695, 0xa30d497b, 0x1bb12e1e, 0x43d23e48, 0xfb6e592d, ++ 0xe9dbf6c3, 0x516791a6, 0xccb0a91f, 0x740cce7a, 0x66b96194, ++ 0xde0506f1}, ++ {0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, 0x0709a8dc, ++ 0x06cbc2eb, 0x048d7cb2, 0x054f1685, 0x0e1351b8, 0x0fd13b8f, ++ 0x0d9785d6, 0x0c55efe1, 0x091af964, 0x08d89353, 0x0a9e2d0a, ++ 0x0b5c473d, 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29, ++ 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, 0x1235f2c8, ++ 0x13f798ff, 0x11b126a6, 0x10734c91, 0x153c5a14, 0x14fe3023, ++ 0x16b88e7a, 0x177ae44d, 0x384d46e0, 0x398f2cd7, 0x3bc9928e, ++ 0x3a0bf8b9, 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065, ++ 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, 0x3157bf84, ++ 0x3095d5b3, 0x32d36bea, 0x331101dd, 0x246be590, 0x25a98fa7, ++ 0x27ef31fe, 0x262d5bc9, 0x23624d4c, 0x22a0277b, 0x20e69922, ++ 0x2124f315, 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71, ++ 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, 0x709a8dc0, ++ 0x7158e7f7, 0x731e59ae, 0x72dc3399, 0x7793251c, 0x76514f2b, ++ 0x7417f172, 0x75d59b45, 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, ++ 0x7ccf6221, 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd, ++ 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, 0x6bb5866c, ++ 0x6a77ec5b, 0x68315202, 0x69f33835, 0x62af7f08, 0x636d153f, ++ 0x612bab66, 0x60e9c151, 0x65a6d7d4, 0x6464bde3, 0x662203ba, ++ 0x67e0698d, 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579, ++ 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, 0x46c49a98, ++ 0x4706f0af, 0x45404ef6, 0x448224c1, 0x41cd3244, 0x400f5873, ++ 0x4249e62a, 0x438b8c1d, 0x54f16850, 0x55330267, 0x5775bc3e, ++ 0x56b7d609, 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5, ++ 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, 0x5deb9134, ++ 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, 0xe1351b80, 0xe0f771b7, ++ 0xe2b1cfee, 0xe373a5d9, 0xe63cb35c, 0xe7fed96b, 0xe5b86732, ++ 0xe47a0d05, 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461, ++ 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, 0xfd13b8f0, ++ 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, 0xfa1a102c, 0xfbd87a1b, ++ 0xf99ec442, 0xf85cae75, 0xf300e948, 0xf2c2837f, 0xf0843d26, ++ 0xf1465711, 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd, ++ 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, 0xde71f5bc, ++ 0xdfb39f8b, 0xddf521d2, 0xdc374be5, 0xd76b0cd8, 0xd6a966ef, ++ 0xd4efd8b6, 0xd52db281, 0xd062a404, 0xd1a0ce33, 0xd3e6706a, ++ 0xd2241a5d, 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049, ++ 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, 0xcb4dafa8, ++ 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, 0xcc440774, 0xcd866d43, ++ 0xcfc0d31a, 0xce02b92d, 0x91af9640, 0x906dfc77, 0x922b422e, ++ 0x93e92819, 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5, ++ 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, 0x98b56f24, ++ 0x99770513, 0x9b31bb4a, 0x9af3d17d, 0x8d893530, 0x8c4b5f07, ++ 0x8e0de15e, 0x8fcf8b69, 0x8a809dec, 0x8b42f7db, 0x89044982, ++ 0x88c623b5, 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1, ++ 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, 0xa9e2d0a0, ++ 0xa820ba97, 0xaa6604ce, 0xaba46ef9, 0xaeeb787c, 0xaf29124b, ++ 0xad6fac12, 0xacadc625, 0xa7f18118, 0xa633eb2f, 0xa4755576, ++ 0xa5b73f41, 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d, ++ 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, 0xb2cddb0c, ++ 0xb30fb13b, 0xb1490f62, 0xb08b6555, 0xbbd72268, 0xba15485f, ++ 0xb853f606, 0xb9919c31, 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, ++ 0xbe9834ed}, ++ {0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, 0x646cc504, ++ 0x7d77f445, 0x565aa786, 0x4f4196c7, 0xc8d98a08, 0xd1c2bb49, ++ 0xfaefe88a, 0xe3f4d9cb, 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, ++ 0x87981ccf, 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192, ++ 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, 0x821b9859, ++ 0x9b00a918, 0xb02dfadb, 0xa936cb9a, 0xe6775d5d, 0xff6c6c1c, ++ 0xd4413fdf, 0xcd5a0e9e, 0x958424a2, 0x8c9f15e3, 0xa7b24620, ++ 0xbea97761, 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265, ++ 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, 0x39316bae, ++ 0x202a5aef, 0x0b07092c, 0x121c386d, 0xdf4636f3, 0xc65d07b2, ++ 0xed705471, 0xf46b6530, 0xbb2af3f7, 0xa231c2b6, 0x891c9175, ++ 0x9007a034, 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38, ++ 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, 0xf0794f05, ++ 0xe9627e44, 0xc24f2d87, 0xdb541cc6, 0x94158a01, 0x8d0ebb40, ++ 0xa623e883, 0xbf38d9c2, 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, ++ 0x138d96ce, 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca, ++ 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, 0xded79850, ++ 0xc7cca911, 0xece1fad2, 0xf5facb93, 0x7262d75c, 0x6b79e61d, ++ 0x4054b5de, 0x594f849f, 0x160e1258, 0x0f152319, 0x243870da, ++ 0x3d23419b, 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864, ++ 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, 0xad24e1af, ++ 0xb43fd0ee, 0x9f12832d, 0x8609b26c, 0xc94824ab, 0xd05315ea, ++ 0xfb7e4629, 0xe2657768, 0x2f3f79f6, 0x362448b7, 0x1d091b74, ++ 0x04122a35, 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31, ++ 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, 0x838a36fa, ++ 0x9a9107bb, 0xb1bc5478, 0xa8a76539, 0x3b83984b, 0x2298a90a, ++ 0x09b5fac9, 0x10aecb88, 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, ++ 0x74c20e8c, 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180, ++ 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, 0x71418a1a, ++ 0x685abb5b, 0x4377e898, 0x5a6cd9d9, 0x152d4f1e, 0x0c367e5f, ++ 0x271b2d9c, 0x3e001cdd, 0xb9980012, 0xa0833153, 0x8bae6290, ++ 0x92b553d1, 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5, ++ 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, 0xca6b79ed, ++ 0xd37048ac, 0xf85d1b6f, 0xe1462a2e, 0x66de36e1, 0x7fc507a0, ++ 0x54e85463, 0x4df36522, 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, ++ 0x299fa026, 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b, ++ 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, 0x2c1c24b0, ++ 0x350715f1, 0x1e2a4632, 0x07317773, 0x4870e1b4, 0x516bd0f5, ++ 0x7a468336, 0x635db277, 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, ++ 0xe0d7848d, 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189, ++ 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, 0x674f9842, ++ 0x7e54a903, 0x5579fac0, 0x4c62cb81, 0x8138c51f, 0x9823f45e, ++ 0xb30ea79d, 0xaa1596dc, 0xe554001b, 0xfc4f315a, 0xd7626299, ++ 0xce7953d8, 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4, ++ 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, 0x5e7ef3ec, ++ 0x4765c2ad, 0x6c48916e, 0x7553a02f, 0x3a1236e8, 0x230907a9, ++ 0x0824546a, 0x113f652b, 0x96a779e4, 0x8fbc48a5, 0xa4911b66, ++ 0xbd8a2a27, 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23, ++ 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, 0x70d024b9, ++ 0x69cb15f8, 0x42e6463b, 0x5bfd777a, 0xdc656bb5, 0xc57e5af4, ++ 0xee530937, 0xf7483876, 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, ++ 0x9324fd72}, ++ {0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, ++ 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, ++ 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, ++ 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, ++ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, ++ 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, ++ 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, ++ 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, ++ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, ++ 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, ++ 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, ++ 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, ++ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, ++ 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, ++ 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, ++ 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, ++ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, ++ 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, ++ 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, ++ 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, ++ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, ++ 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, ++ 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, ++ 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, ++ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, ++ 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, ++ 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, ++ 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, ++ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, ++ 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, ++ 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, ++ 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, ++ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, ++ 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, ++ 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, ++ 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, ++ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, ++ 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, ++ 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, ++ 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, ++ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, ++ 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, ++ 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, ++ 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, ++ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, ++ 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, ++ 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, ++ 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, ++ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, ++ 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, ++ 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, ++ 0x2d02ef8d}}; ++ ++static const z_word_t crc_braid_big_table[][256] = { ++ {0x00000000, 0x96300777, 0x2c610eee, 0xba510999, 0x19c46d07, ++ 0x8ff46a70, 0x35a563e9, 0xa395649e, 0x3288db0e, 0xa4b8dc79, ++ 0x1ee9d5e0, 0x88d9d297, 0x2b4cb609, 0xbd7cb17e, 0x072db8e7, ++ 0x911dbf90, 0x6410b71d, 0xf220b06a, 0x4871b9f3, 0xde41be84, ++ 0x7dd4da1a, 0xebe4dd6d, 0x51b5d4f4, 0xc785d383, 0x56986c13, ++ 0xc0a86b64, 0x7af962fd, 0xecc9658a, 0x4f5c0114, 0xd96c0663, ++ 0x633d0ffa, 0xf50d088d, 0xc8206e3b, 0x5e10694c, 0xe44160d5, ++ 0x727167a2, 0xd1e4033c, 0x47d4044b, 0xfd850dd2, 0x6bb50aa5, ++ 0xfaa8b535, 0x6c98b242, 0xd6c9bbdb, 0x40f9bcac, 0xe36cd832, ++ 0x755cdf45, 0xcf0dd6dc, 0x593dd1ab, 0xac30d926, 0x3a00de51, ++ 0x8051d7c8, 0x1661d0bf, 0xb5f4b421, 0x23c4b356, 0x9995bacf, ++ 0x0fa5bdb8, 0x9eb80228, 0x0888055f, 0xb2d90cc6, 0x24e90bb1, ++ 0x877c6f2f, 0x114c6858, 0xab1d61c1, 0x3d2d66b6, 0x9041dc76, ++ 0x0671db01, 0xbc20d298, 0x2a10d5ef, 0x8985b171, 0x1fb5b606, ++ 0xa5e4bf9f, 0x33d4b8e8, 0xa2c90778, 0x34f9000f, 0x8ea80996, ++ 0x18980ee1, 0xbb0d6a7f, 0x2d3d6d08, 0x976c6491, 0x015c63e6, ++ 0xf4516b6b, 0x62616c1c, 0xd8306585, 0x4e0062f2, 0xed95066c, ++ 0x7ba5011b, 0xc1f40882, 0x57c40ff5, 0xc6d9b065, 0x50e9b712, ++ 0xeab8be8b, 0x7c88b9fc, 0xdf1ddd62, 0x492dda15, 0xf37cd38c, ++ 0x654cd4fb, 0x5861b24d, 0xce51b53a, 0x7400bca3, 0xe230bbd4, ++ 0x41a5df4a, 0xd795d83d, 0x6dc4d1a4, 0xfbf4d6d3, 0x6ae96943, ++ 0xfcd96e34, 0x468867ad, 0xd0b860da, 0x732d0444, 0xe51d0333, ++ 0x5f4c0aaa, 0xc97c0ddd, 0x3c710550, 0xaa410227, 0x10100bbe, ++ 0x86200cc9, 0x25b56857, 0xb3856f20, 0x09d466b9, 0x9fe461ce, ++ 0x0ef9de5e, 0x98c9d929, 0x2298d0b0, 0xb4a8d7c7, 0x173db359, ++ 0x810db42e, 0x3b5cbdb7, 0xad6cbac0, 0x2083b8ed, 0xb6b3bf9a, ++ 0x0ce2b603, 0x9ad2b174, 0x3947d5ea, 0xaf77d29d, 0x1526db04, ++ 0x8316dc73, 0x120b63e3, 0x843b6494, 0x3e6a6d0d, 0xa85a6a7a, ++ 0x0bcf0ee4, 0x9dff0993, 0x27ae000a, 0xb19e077d, 0x44930ff0, ++ 0xd2a30887, 0x68f2011e, 0xfec20669, 0x5d5762f7, 0xcb676580, ++ 0x71366c19, 0xe7066b6e, 0x761bd4fe, 0xe02bd389, 0x5a7ada10, ++ 0xcc4add67, 0x6fdfb9f9, 0xf9efbe8e, 0x43beb717, 0xd58eb060, ++ 0xe8a3d6d6, 0x7e93d1a1, 0xc4c2d838, 0x52f2df4f, 0xf167bbd1, ++ 0x6757bca6, 0xdd06b53f, 0x4b36b248, 0xda2b0dd8, 0x4c1b0aaf, ++ 0xf64a0336, 0x607a0441, 0xc3ef60df, 0x55df67a8, 0xef8e6e31, ++ 0x79be6946, 0x8cb361cb, 0x1a8366bc, 0xa0d26f25, 0x36e26852, ++ 0x95770ccc, 0x03470bbb, 0xb9160222, 0x2f260555, 0xbe3bbac5, ++ 0x280bbdb2, 0x925ab42b, 0x046ab35c, 0xa7ffd7c2, 0x31cfd0b5, ++ 0x8b9ed92c, 0x1daede5b, 0xb0c2649b, 0x26f263ec, 0x9ca36a75, ++ 0x0a936d02, 0xa906099c, 0x3f360eeb, 0x85670772, 0x13570005, ++ 0x824abf95, 0x147ab8e2, 0xae2bb17b, 0x381bb60c, 0x9b8ed292, ++ 0x0dbed5e5, 0xb7efdc7c, 0x21dfdb0b, 0xd4d2d386, 0x42e2d4f1, ++ 0xf8b3dd68, 0x6e83da1f, 0xcd16be81, 0x5b26b9f6, 0xe177b06f, ++ 0x7747b718, 0xe65a0888, 0x706a0fff, 0xca3b0666, 0x5c0b0111, ++ 0xff9e658f, 0x69ae62f8, 0xd3ff6b61, 0x45cf6c16, 0x78e20aa0, ++ 0xeed20dd7, 0x5483044e, 0xc2b30339, 0x612667a7, 0xf71660d0, ++ 0x4d476949, 0xdb776e3e, 0x4a6ad1ae, 0xdc5ad6d9, 0x660bdf40, ++ 0xf03bd837, 0x53aebca9, 0xc59ebbde, 0x7fcfb247, 0xe9ffb530, ++ 0x1cf2bdbd, 0x8ac2baca, 0x3093b353, 0xa6a3b424, 0x0536d0ba, ++ 0x9306d7cd, 0x2957de54, 0xbf67d923, 0x2e7a66b3, 0xb84a61c4, ++ 0x021b685d, 0x942b6f2a, 0x37be0bb4, 0xa18e0cc3, 0x1bdf055a, ++ 0x8def022d}, ++ {0x00000000, 0x41311b19, 0x82623632, 0xc3532d2b, 0x04c56c64, ++ 0x45f4777d, 0x86a75a56, 0xc796414f, 0x088ad9c8, 0x49bbc2d1, ++ 0x8ae8effa, 0xcbd9f4e3, 0x0c4fb5ac, 0x4d7eaeb5, 0x8e2d839e, ++ 0xcf1c9887, 0x5112c24a, 0x1023d953, 0xd370f478, 0x9241ef61, ++ 0x55d7ae2e, 0x14e6b537, 0xd7b5981c, 0x96848305, 0x59981b82, ++ 0x18a9009b, 0xdbfa2db0, 0x9acb36a9, 0x5d5d77e6, 0x1c6c6cff, ++ 0xdf3f41d4, 0x9e0e5acd, 0xa2248495, 0xe3159f8c, 0x2046b2a7, ++ 0x6177a9be, 0xa6e1e8f1, 0xe7d0f3e8, 0x2483dec3, 0x65b2c5da, ++ 0xaaae5d5d, 0xeb9f4644, 0x28cc6b6f, 0x69fd7076, 0xae6b3139, ++ 0xef5a2a20, 0x2c09070b, 0x6d381c12, 0xf33646df, 0xb2075dc6, ++ 0x715470ed, 0x30656bf4, 0xf7f32abb, 0xb6c231a2, 0x75911c89, ++ 0x34a00790, 0xfbbc9f17, 0xba8d840e, 0x79dea925, 0x38efb23c, ++ 0xff79f373, 0xbe48e86a, 0x7d1bc541, 0x3c2ade58, 0x054f79f0, ++ 0x447e62e9, 0x872d4fc2, 0xc61c54db, 0x018a1594, 0x40bb0e8d, ++ 0x83e823a6, 0xc2d938bf, 0x0dc5a038, 0x4cf4bb21, 0x8fa7960a, ++ 0xce968d13, 0x0900cc5c, 0x4831d745, 0x8b62fa6e, 0xca53e177, ++ 0x545dbbba, 0x156ca0a3, 0xd63f8d88, 0x970e9691, 0x5098d7de, ++ 0x11a9ccc7, 0xd2fae1ec, 0x93cbfaf5, 0x5cd76272, 0x1de6796b, ++ 0xdeb55440, 0x9f844f59, 0x58120e16, 0x1923150f, 0xda703824, ++ 0x9b41233d, 0xa76bfd65, 0xe65ae67c, 0x2509cb57, 0x6438d04e, ++ 0xa3ae9101, 0xe29f8a18, 0x21cca733, 0x60fdbc2a, 0xafe124ad, ++ 0xeed03fb4, 0x2d83129f, 0x6cb20986, 0xab2448c9, 0xea1553d0, ++ 0x29467efb, 0x687765e2, 0xf6793f2f, 0xb7482436, 0x741b091d, ++ 0x352a1204, 0xf2bc534b, 0xb38d4852, 0x70de6579, 0x31ef7e60, ++ 0xfef3e6e7, 0xbfc2fdfe, 0x7c91d0d5, 0x3da0cbcc, 0xfa368a83, ++ 0xbb07919a, 0x7854bcb1, 0x3965a7a8, 0x4b98833b, 0x0aa99822, ++ 0xc9fab509, 0x88cbae10, 0x4f5def5f, 0x0e6cf446, 0xcd3fd96d, ++ 0x8c0ec274, 0x43125af3, 0x022341ea, 0xc1706cc1, 0x804177d8, ++ 0x47d73697, 0x06e62d8e, 0xc5b500a5, 0x84841bbc, 0x1a8a4171, ++ 0x5bbb5a68, 0x98e87743, 0xd9d96c5a, 0x1e4f2d15, 0x5f7e360c, ++ 0x9c2d1b27, 0xdd1c003e, 0x120098b9, 0x533183a0, 0x9062ae8b, ++ 0xd153b592, 0x16c5f4dd, 0x57f4efc4, 0x94a7c2ef, 0xd596d9f6, ++ 0xe9bc07ae, 0xa88d1cb7, 0x6bde319c, 0x2aef2a85, 0xed796bca, ++ 0xac4870d3, 0x6f1b5df8, 0x2e2a46e1, 0xe136de66, 0xa007c57f, ++ 0x6354e854, 0x2265f34d, 0xe5f3b202, 0xa4c2a91b, 0x67918430, ++ 0x26a09f29, 0xb8aec5e4, 0xf99fdefd, 0x3accf3d6, 0x7bfde8cf, ++ 0xbc6ba980, 0xfd5ab299, 0x3e099fb2, 0x7f3884ab, 0xb0241c2c, ++ 0xf1150735, 0x32462a1e, 0x73773107, 0xb4e17048, 0xf5d06b51, ++ 0x3683467a, 0x77b25d63, 0x4ed7facb, 0x0fe6e1d2, 0xccb5ccf9, ++ 0x8d84d7e0, 0x4a1296af, 0x0b238db6, 0xc870a09d, 0x8941bb84, ++ 0x465d2303, 0x076c381a, 0xc43f1531, 0x850e0e28, 0x42984f67, ++ 0x03a9547e, 0xc0fa7955, 0x81cb624c, 0x1fc53881, 0x5ef42398, ++ 0x9da70eb3, 0xdc9615aa, 0x1b0054e5, 0x5a314ffc, 0x996262d7, ++ 0xd85379ce, 0x174fe149, 0x567efa50, 0x952dd77b, 0xd41ccc62, ++ 0x138a8d2d, 0x52bb9634, 0x91e8bb1f, 0xd0d9a006, 0xecf37e5e, ++ 0xadc26547, 0x6e91486c, 0x2fa05375, 0xe836123a, 0xa9070923, ++ 0x6a542408, 0x2b653f11, 0xe479a796, 0xa548bc8f, 0x661b91a4, ++ 0x272a8abd, 0xe0bccbf2, 0xa18dd0eb, 0x62defdc0, 0x23efe6d9, ++ 0xbde1bc14, 0xfcd0a70d, 0x3f838a26, 0x7eb2913f, 0xb924d070, ++ 0xf815cb69, 0x3b46e642, 0x7a77fd5b, 0xb56b65dc, 0xf45a7ec5, ++ 0x370953ee, 0x763848f7, 0xb1ae09b8, 0xf09f12a1, 0x33cc3f8a, ++ 0x72fd2493}, ++ {0x00000000, 0x376ac201, 0x6ed48403, 0x59be4602, 0xdca80907, ++ 0xebc2cb06, 0xb27c8d04, 0x85164f05, 0xb851130e, 0x8f3bd10f, ++ 0xd685970d, 0xe1ef550c, 0x64f91a09, 0x5393d808, 0x0a2d9e0a, ++ 0x3d475c0b, 0x70a3261c, 0x47c9e41d, 0x1e77a21f, 0x291d601e, ++ 0xac0b2f1b, 0x9b61ed1a, 0xc2dfab18, 0xf5b56919, 0xc8f23512, ++ 0xff98f713, 0xa626b111, 0x914c7310, 0x145a3c15, 0x2330fe14, ++ 0x7a8eb816, 0x4de47a17, 0xe0464d38, 0xd72c8f39, 0x8e92c93b, ++ 0xb9f80b3a, 0x3cee443f, 0x0b84863e, 0x523ac03c, 0x6550023d, ++ 0x58175e36, 0x6f7d9c37, 0x36c3da35, 0x01a91834, 0x84bf5731, ++ 0xb3d59530, 0xea6bd332, 0xdd011133, 0x90e56b24, 0xa78fa925, ++ 0xfe31ef27, 0xc95b2d26, 0x4c4d6223, 0x7b27a022, 0x2299e620, ++ 0x15f32421, 0x28b4782a, 0x1fdeba2b, 0x4660fc29, 0x710a3e28, ++ 0xf41c712d, 0xc376b32c, 0x9ac8f52e, 0xada2372f, 0xc08d9a70, ++ 0xf7e75871, 0xae591e73, 0x9933dc72, 0x1c259377, 0x2b4f5176, ++ 0x72f11774, 0x459bd575, 0x78dc897e, 0x4fb64b7f, 0x16080d7d, ++ 0x2162cf7c, 0xa4748079, 0x931e4278, 0xcaa0047a, 0xfdcac67b, ++ 0xb02ebc6c, 0x87447e6d, 0xdefa386f, 0xe990fa6e, 0x6c86b56b, ++ 0x5bec776a, 0x02523168, 0x3538f369, 0x087faf62, 0x3f156d63, ++ 0x66ab2b61, 0x51c1e960, 0xd4d7a665, 0xe3bd6464, 0xba032266, ++ 0x8d69e067, 0x20cbd748, 0x17a11549, 0x4e1f534b, 0x7975914a, ++ 0xfc63de4f, 0xcb091c4e, 0x92b75a4c, 0xa5dd984d, 0x989ac446, ++ 0xaff00647, 0xf64e4045, 0xc1248244, 0x4432cd41, 0x73580f40, ++ 0x2ae64942, 0x1d8c8b43, 0x5068f154, 0x67023355, 0x3ebc7557, ++ 0x09d6b756, 0x8cc0f853, 0xbbaa3a52, 0xe2147c50, 0xd57ebe51, ++ 0xe839e25a, 0xdf53205b, 0x86ed6659, 0xb187a458, 0x3491eb5d, ++ 0x03fb295c, 0x5a456f5e, 0x6d2fad5f, 0x801b35e1, 0xb771f7e0, ++ 0xeecfb1e2, 0xd9a573e3, 0x5cb33ce6, 0x6bd9fee7, 0x3267b8e5, ++ 0x050d7ae4, 0x384a26ef, 0x0f20e4ee, 0x569ea2ec, 0x61f460ed, ++ 0xe4e22fe8, 0xd388ede9, 0x8a36abeb, 0xbd5c69ea, 0xf0b813fd, ++ 0xc7d2d1fc, 0x9e6c97fe, 0xa90655ff, 0x2c101afa, 0x1b7ad8fb, ++ 0x42c49ef9, 0x75ae5cf8, 0x48e900f3, 0x7f83c2f2, 0x263d84f0, ++ 0x115746f1, 0x944109f4, 0xa32bcbf5, 0xfa958df7, 0xcdff4ff6, ++ 0x605d78d9, 0x5737bad8, 0x0e89fcda, 0x39e33edb, 0xbcf571de, ++ 0x8b9fb3df, 0xd221f5dd, 0xe54b37dc, 0xd80c6bd7, 0xef66a9d6, ++ 0xb6d8efd4, 0x81b22dd5, 0x04a462d0, 0x33cea0d1, 0x6a70e6d3, ++ 0x5d1a24d2, 0x10fe5ec5, 0x27949cc4, 0x7e2adac6, 0x494018c7, ++ 0xcc5657c2, 0xfb3c95c3, 0xa282d3c1, 0x95e811c0, 0xa8af4dcb, ++ 0x9fc58fca, 0xc67bc9c8, 0xf1110bc9, 0x740744cc, 0x436d86cd, ++ 0x1ad3c0cf, 0x2db902ce, 0x4096af91, 0x77fc6d90, 0x2e422b92, ++ 0x1928e993, 0x9c3ea696, 0xab546497, 0xf2ea2295, 0xc580e094, ++ 0xf8c7bc9f, 0xcfad7e9e, 0x9613389c, 0xa179fa9d, 0x246fb598, ++ 0x13057799, 0x4abb319b, 0x7dd1f39a, 0x3035898d, 0x075f4b8c, ++ 0x5ee10d8e, 0x698bcf8f, 0xec9d808a, 0xdbf7428b, 0x82490489, ++ 0xb523c688, 0x88649a83, 0xbf0e5882, 0xe6b01e80, 0xd1dadc81, ++ 0x54cc9384, 0x63a65185, 0x3a181787, 0x0d72d586, 0xa0d0e2a9, ++ 0x97ba20a8, 0xce0466aa, 0xf96ea4ab, 0x7c78ebae, 0x4b1229af, ++ 0x12ac6fad, 0x25c6adac, 0x1881f1a7, 0x2feb33a6, 0x765575a4, ++ 0x413fb7a5, 0xc429f8a0, 0xf3433aa1, 0xaafd7ca3, 0x9d97bea2, ++ 0xd073c4b5, 0xe71906b4, 0xbea740b6, 0x89cd82b7, 0x0cdbcdb2, ++ 0x3bb10fb3, 0x620f49b1, 0x55658bb0, 0x6822d7bb, 0x5f4815ba, ++ 0x06f653b8, 0x319c91b9, 0xb48adebc, 0x83e01cbd, 0xda5e5abf, ++ 0xed3498be}, ++ {0x00000000, 0x6567bcb8, 0x8bc809aa, 0xeeafb512, 0x5797628f, ++ 0x32f0de37, 0xdc5f6b25, 0xb938d79d, 0xef28b4c5, 0x8a4f087d, ++ 0x64e0bd6f, 0x018701d7, 0xb8bfd64a, 0xddd86af2, 0x3377dfe0, ++ 0x56106358, 0x9f571950, 0xfa30a5e8, 0x149f10fa, 0x71f8ac42, ++ 0xc8c07bdf, 0xada7c767, 0x43087275, 0x266fcecd, 0x707fad95, ++ 0x1518112d, 0xfbb7a43f, 0x9ed01887, 0x27e8cf1a, 0x428f73a2, ++ 0xac20c6b0, 0xc9477a08, 0x3eaf32a0, 0x5bc88e18, 0xb5673b0a, ++ 0xd00087b2, 0x6938502f, 0x0c5fec97, 0xe2f05985, 0x8797e53d, ++ 0xd1878665, 0xb4e03add, 0x5a4f8fcf, 0x3f283377, 0x8610e4ea, ++ 0xe3775852, 0x0dd8ed40, 0x68bf51f8, 0xa1f82bf0, 0xc49f9748, ++ 0x2a30225a, 0x4f579ee2, 0xf66f497f, 0x9308f5c7, 0x7da740d5, ++ 0x18c0fc6d, 0x4ed09f35, 0x2bb7238d, 0xc518969f, 0xa07f2a27, ++ 0x1947fdba, 0x7c204102, 0x928ff410, 0xf7e848a8, 0x3d58149b, ++ 0x583fa823, 0xb6901d31, 0xd3f7a189, 0x6acf7614, 0x0fa8caac, ++ 0xe1077fbe, 0x8460c306, 0xd270a05e, 0xb7171ce6, 0x59b8a9f4, ++ 0x3cdf154c, 0x85e7c2d1, 0xe0807e69, 0x0e2fcb7b, 0x6b4877c3, ++ 0xa20f0dcb, 0xc768b173, 0x29c70461, 0x4ca0b8d9, 0xf5986f44, ++ 0x90ffd3fc, 0x7e5066ee, 0x1b37da56, 0x4d27b90e, 0x284005b6, ++ 0xc6efb0a4, 0xa3880c1c, 0x1ab0db81, 0x7fd76739, 0x9178d22b, ++ 0xf41f6e93, 0x03f7263b, 0x66909a83, 0x883f2f91, 0xed589329, ++ 0x546044b4, 0x3107f80c, 0xdfa84d1e, 0xbacff1a6, 0xecdf92fe, ++ 0x89b82e46, 0x67179b54, 0x027027ec, 0xbb48f071, 0xde2f4cc9, ++ 0x3080f9db, 0x55e74563, 0x9ca03f6b, 0xf9c783d3, 0x176836c1, ++ 0x720f8a79, 0xcb375de4, 0xae50e15c, 0x40ff544e, 0x2598e8f6, ++ 0x73888bae, 0x16ef3716, 0xf8408204, 0x9d273ebc, 0x241fe921, ++ 0x41785599, 0xafd7e08b, 0xcab05c33, 0x3bb659ed, 0x5ed1e555, ++ 0xb07e5047, 0xd519ecff, 0x6c213b62, 0x094687da, 0xe7e932c8, ++ 0x828e8e70, 0xd49eed28, 0xb1f95190, 0x5f56e482, 0x3a31583a, ++ 0x83098fa7, 0xe66e331f, 0x08c1860d, 0x6da63ab5, 0xa4e140bd, ++ 0xc186fc05, 0x2f294917, 0x4a4ef5af, 0xf3762232, 0x96119e8a, ++ 0x78be2b98, 0x1dd99720, 0x4bc9f478, 0x2eae48c0, 0xc001fdd2, ++ 0xa566416a, 0x1c5e96f7, 0x79392a4f, 0x97969f5d, 0xf2f123e5, ++ 0x05196b4d, 0x607ed7f5, 0x8ed162e7, 0xebb6de5f, 0x528e09c2, ++ 0x37e9b57a, 0xd9460068, 0xbc21bcd0, 0xea31df88, 0x8f566330, ++ 0x61f9d622, 0x049e6a9a, 0xbda6bd07, 0xd8c101bf, 0x366eb4ad, ++ 0x53090815, 0x9a4e721d, 0xff29cea5, 0x11867bb7, 0x74e1c70f, ++ 0xcdd91092, 0xa8beac2a, 0x46111938, 0x2376a580, 0x7566c6d8, ++ 0x10017a60, 0xfeaecf72, 0x9bc973ca, 0x22f1a457, 0x479618ef, ++ 0xa939adfd, 0xcc5e1145, 0x06ee4d76, 0x6389f1ce, 0x8d2644dc, ++ 0xe841f864, 0x51792ff9, 0x341e9341, 0xdab12653, 0xbfd69aeb, ++ 0xe9c6f9b3, 0x8ca1450b, 0x620ef019, 0x07694ca1, 0xbe519b3c, ++ 0xdb362784, 0x35999296, 0x50fe2e2e, 0x99b95426, 0xfcdee89e, ++ 0x12715d8c, 0x7716e134, 0xce2e36a9, 0xab498a11, 0x45e63f03, ++ 0x208183bb, 0x7691e0e3, 0x13f65c5b, 0xfd59e949, 0x983e55f1, ++ 0x2106826c, 0x44613ed4, 0xaace8bc6, 0xcfa9377e, 0x38417fd6, ++ 0x5d26c36e, 0xb389767c, 0xd6eecac4, 0x6fd61d59, 0x0ab1a1e1, ++ 0xe41e14f3, 0x8179a84b, 0xd769cb13, 0xb20e77ab, 0x5ca1c2b9, ++ 0x39c67e01, 0x80fea99c, 0xe5991524, 0x0b36a036, 0x6e511c8e, ++ 0xa7166686, 0xc271da3e, 0x2cde6f2c, 0x49b9d394, 0xf0810409, ++ 0x95e6b8b1, 0x7b490da3, 0x1e2eb11b, 0x483ed243, 0x2d596efb, ++ 0xc3f6dbe9, 0xa6916751, 0x1fa9b0cc, 0x7ace0c74, 0x9461b966, ++ 0xf10605de}}; ++ ++#endif /* W */ ++ ++#endif /* N == 1 */ ++#if N == 2 ++ ++#if W == 8 ++ ++static const uint32_t crc_braid_table[][256] = { ++ {0x00000000, 0xae689191, 0x87a02563, 0x29c8b4f2, 0xd4314c87, ++ 0x7a59dd16, 0x539169e4, 0xfdf9f875, 0x73139f4f, 0xdd7b0ede, ++ 0xf4b3ba2c, 0x5adb2bbd, 0xa722d3c8, 0x094a4259, 0x2082f6ab, ++ 0x8eea673a, 0xe6273e9e, 0x484faf0f, 0x61871bfd, 0xcfef8a6c, ++ 0x32167219, 0x9c7ee388, 0xb5b6577a, 0x1bdec6eb, 0x9534a1d1, ++ 0x3b5c3040, 0x129484b2, 0xbcfc1523, 0x4105ed56, 0xef6d7cc7, ++ 0xc6a5c835, 0x68cd59a4, 0x173f7b7d, 0xb957eaec, 0x909f5e1e, ++ 0x3ef7cf8f, 0xc30e37fa, 0x6d66a66b, 0x44ae1299, 0xeac68308, ++ 0x642ce432, 0xca4475a3, 0xe38cc151, 0x4de450c0, 0xb01da8b5, ++ 0x1e753924, 0x37bd8dd6, 0x99d51c47, 0xf11845e3, 0x5f70d472, ++ 0x76b86080, 0xd8d0f111, 0x25290964, 0x8b4198f5, 0xa2892c07, ++ 0x0ce1bd96, 0x820bdaac, 0x2c634b3d, 0x05abffcf, 0xabc36e5e, ++ 0x563a962b, 0xf85207ba, 0xd19ab348, 0x7ff222d9, 0x2e7ef6fa, ++ 0x8016676b, 0xa9ded399, 0x07b64208, 0xfa4fba7d, 0x54272bec, ++ 0x7def9f1e, 0xd3870e8f, 0x5d6d69b5, 0xf305f824, 0xdacd4cd6, ++ 0x74a5dd47, 0x895c2532, 0x2734b4a3, 0x0efc0051, 0xa09491c0, ++ 0xc859c864, 0x663159f5, 0x4ff9ed07, 0xe1917c96, 0x1c6884e3, ++ 0xb2001572, 0x9bc8a180, 0x35a03011, 0xbb4a572b, 0x1522c6ba, ++ 0x3cea7248, 0x9282e3d9, 0x6f7b1bac, 0xc1138a3d, 0xe8db3ecf, ++ 0x46b3af5e, 0x39418d87, 0x97291c16, 0xbee1a8e4, 0x10893975, ++ 0xed70c100, 0x43185091, 0x6ad0e463, 0xc4b875f2, 0x4a5212c8, ++ 0xe43a8359, 0xcdf237ab, 0x639aa63a, 0x9e635e4f, 0x300bcfde, ++ 0x19c37b2c, 0xb7abeabd, 0xdf66b319, 0x710e2288, 0x58c6967a, ++ 0xf6ae07eb, 0x0b57ff9e, 0xa53f6e0f, 0x8cf7dafd, 0x229f4b6c, ++ 0xac752c56, 0x021dbdc7, 0x2bd50935, 0x85bd98a4, 0x784460d1, ++ 0xd62cf140, 0xffe445b2, 0x518cd423, 0x5cfdedf4, 0xf2957c65, ++ 0xdb5dc897, 0x75355906, 0x88cca173, 0x26a430e2, 0x0f6c8410, ++ 0xa1041581, 0x2fee72bb, 0x8186e32a, 0xa84e57d8, 0x0626c649, ++ 0xfbdf3e3c, 0x55b7afad, 0x7c7f1b5f, 0xd2178ace, 0xbadad36a, ++ 0x14b242fb, 0x3d7af609, 0x93126798, 0x6eeb9fed, 0xc0830e7c, ++ 0xe94bba8e, 0x47232b1f, 0xc9c94c25, 0x67a1ddb4, 0x4e696946, ++ 0xe001f8d7, 0x1df800a2, 0xb3909133, 0x9a5825c1, 0x3430b450, ++ 0x4bc29689, 0xe5aa0718, 0xcc62b3ea, 0x620a227b, 0x9ff3da0e, ++ 0x319b4b9f, 0x1853ff6d, 0xb63b6efc, 0x38d109c6, 0x96b99857, ++ 0xbf712ca5, 0x1119bd34, 0xece04541, 0x4288d4d0, 0x6b406022, ++ 0xc528f1b3, 0xade5a817, 0x038d3986, 0x2a458d74, 0x842d1ce5, ++ 0x79d4e490, 0xd7bc7501, 0xfe74c1f3, 0x501c5062, 0xdef63758, ++ 0x709ea6c9, 0x5956123b, 0xf73e83aa, 0x0ac77bdf, 0xa4afea4e, ++ 0x8d675ebc, 0x230fcf2d, 0x72831b0e, 0xdceb8a9f, 0xf5233e6d, ++ 0x5b4baffc, 0xa6b25789, 0x08dac618, 0x211272ea, 0x8f7ae37b, ++ 0x01908441, 0xaff815d0, 0x8630a122, 0x285830b3, 0xd5a1c8c6, ++ 0x7bc95957, 0x5201eda5, 0xfc697c34, 0x94a42590, 0x3accb401, ++ 0x130400f3, 0xbd6c9162, 0x40956917, 0xeefdf886, 0xc7354c74, ++ 0x695ddde5, 0xe7b7badf, 0x49df2b4e, 0x60179fbc, 0xce7f0e2d, ++ 0x3386f658, 0x9dee67c9, 0xb426d33b, 0x1a4e42aa, 0x65bc6073, ++ 0xcbd4f1e2, 0xe21c4510, 0x4c74d481, 0xb18d2cf4, 0x1fe5bd65, ++ 0x362d0997, 0x98459806, 0x16afff3c, 0xb8c76ead, 0x910fda5f, ++ 0x3f674bce, 0xc29eb3bb, 0x6cf6222a, 0x453e96d8, 0xeb560749, ++ 0x839b5eed, 0x2df3cf7c, 0x043b7b8e, 0xaa53ea1f, 0x57aa126a, ++ 0xf9c283fb, 0xd00a3709, 0x7e62a698, 0xf088c1a2, 0x5ee05033, ++ 0x7728e4c1, 0xd9407550, 0x24b98d25, 0x8ad11cb4, 0xa319a846, ++ 0x0d7139d7}, ++ {0x00000000, 0xb9fbdbe8, 0xa886b191, 0x117d6a79, 0x8a7c6563, ++ 0x3387be8b, 0x22fad4f2, 0x9b010f1a, 0xcf89cc87, 0x7672176f, ++ 0x670f7d16, 0xdef4a6fe, 0x45f5a9e4, 0xfc0e720c, 0xed731875, ++ 0x5488c39d, 0x44629f4f, 0xfd9944a7, 0xece42ede, 0x551ff536, ++ 0xce1efa2c, 0x77e521c4, 0x66984bbd, 0xdf639055, 0x8beb53c8, ++ 0x32108820, 0x236de259, 0x9a9639b1, 0x019736ab, 0xb86ced43, ++ 0xa911873a, 0x10ea5cd2, 0x88c53e9e, 0x313ee576, 0x20438f0f, ++ 0x99b854e7, 0x02b95bfd, 0xbb428015, 0xaa3fea6c, 0x13c43184, ++ 0x474cf219, 0xfeb729f1, 0xefca4388, 0x56319860, 0xcd30977a, ++ 0x74cb4c92, 0x65b626eb, 0xdc4dfd03, 0xcca7a1d1, 0x755c7a39, ++ 0x64211040, 0xdddacba8, 0x46dbc4b2, 0xff201f5a, 0xee5d7523, ++ 0x57a6aecb, 0x032e6d56, 0xbad5b6be, 0xaba8dcc7, 0x1253072f, ++ 0x89520835, 0x30a9d3dd, 0x21d4b9a4, 0x982f624c, 0xcafb7b7d, ++ 0x7300a095, 0x627dcaec, 0xdb861104, 0x40871e1e, 0xf97cc5f6, ++ 0xe801af8f, 0x51fa7467, 0x0572b7fa, 0xbc896c12, 0xadf4066b, ++ 0x140fdd83, 0x8f0ed299, 0x36f50971, 0x27886308, 0x9e73b8e0, ++ 0x8e99e432, 0x37623fda, 0x261f55a3, 0x9fe48e4b, 0x04e58151, ++ 0xbd1e5ab9, 0xac6330c0, 0x1598eb28, 0x411028b5, 0xf8ebf35d, ++ 0xe9969924, 0x506d42cc, 0xcb6c4dd6, 0x7297963e, 0x63eafc47, ++ 0xda1127af, 0x423e45e3, 0xfbc59e0b, 0xeab8f472, 0x53432f9a, ++ 0xc8422080, 0x71b9fb68, 0x60c49111, 0xd93f4af9, 0x8db78964, ++ 0x344c528c, 0x253138f5, 0x9ccae31d, 0x07cbec07, 0xbe3037ef, ++ 0xaf4d5d96, 0x16b6867e, 0x065cdaac, 0xbfa70144, 0xaeda6b3d, ++ 0x1721b0d5, 0x8c20bfcf, 0x35db6427, 0x24a60e5e, 0x9d5dd5b6, ++ 0xc9d5162b, 0x702ecdc3, 0x6153a7ba, 0xd8a87c52, 0x43a97348, ++ 0xfa52a8a0, 0xeb2fc2d9, 0x52d41931, 0x4e87f0bb, 0xf77c2b53, ++ 0xe601412a, 0x5ffa9ac2, 0xc4fb95d8, 0x7d004e30, 0x6c7d2449, ++ 0xd586ffa1, 0x810e3c3c, 0x38f5e7d4, 0x29888dad, 0x90735645, ++ 0x0b72595f, 0xb28982b7, 0xa3f4e8ce, 0x1a0f3326, 0x0ae56ff4, ++ 0xb31eb41c, 0xa263de65, 0x1b98058d, 0x80990a97, 0x3962d17f, ++ 0x281fbb06, 0x91e460ee, 0xc56ca373, 0x7c97789b, 0x6dea12e2, ++ 0xd411c90a, 0x4f10c610, 0xf6eb1df8, 0xe7967781, 0x5e6dac69, ++ 0xc642ce25, 0x7fb915cd, 0x6ec47fb4, 0xd73fa45c, 0x4c3eab46, ++ 0xf5c570ae, 0xe4b81ad7, 0x5d43c13f, 0x09cb02a2, 0xb030d94a, ++ 0xa14db333, 0x18b668db, 0x83b767c1, 0x3a4cbc29, 0x2b31d650, ++ 0x92ca0db8, 0x8220516a, 0x3bdb8a82, 0x2aa6e0fb, 0x935d3b13, ++ 0x085c3409, 0xb1a7efe1, 0xa0da8598, 0x19215e70, 0x4da99ded, ++ 0xf4524605, 0xe52f2c7c, 0x5cd4f794, 0xc7d5f88e, 0x7e2e2366, ++ 0x6f53491f, 0xd6a892f7, 0x847c8bc6, 0x3d87502e, 0x2cfa3a57, ++ 0x9501e1bf, 0x0e00eea5, 0xb7fb354d, 0xa6865f34, 0x1f7d84dc, ++ 0x4bf54741, 0xf20e9ca9, 0xe373f6d0, 0x5a882d38, 0xc1892222, ++ 0x7872f9ca, 0x690f93b3, 0xd0f4485b, 0xc01e1489, 0x79e5cf61, ++ 0x6898a518, 0xd1637ef0, 0x4a6271ea, 0xf399aa02, 0xe2e4c07b, ++ 0x5b1f1b93, 0x0f97d80e, 0xb66c03e6, 0xa711699f, 0x1eeab277, ++ 0x85ebbd6d, 0x3c106685, 0x2d6d0cfc, 0x9496d714, 0x0cb9b558, ++ 0xb5426eb0, 0xa43f04c9, 0x1dc4df21, 0x86c5d03b, 0x3f3e0bd3, ++ 0x2e4361aa, 0x97b8ba42, 0xc33079df, 0x7acba237, 0x6bb6c84e, ++ 0xd24d13a6, 0x494c1cbc, 0xf0b7c754, 0xe1caad2d, 0x583176c5, ++ 0x48db2a17, 0xf120f1ff, 0xe05d9b86, 0x59a6406e, 0xc2a74f74, ++ 0x7b5c949c, 0x6a21fee5, 0xd3da250d, 0x8752e690, 0x3ea93d78, ++ 0x2fd45701, 0x962f8ce9, 0x0d2e83f3, 0xb4d5581b, 0xa5a83262, ++ 0x1c53e98a}, ++ {0x00000000, 0x9d0fe176, 0xe16ec4ad, 0x7c6125db, 0x19ac8f1b, ++ 0x84a36e6d, 0xf8c24bb6, 0x65cdaac0, 0x33591e36, 0xae56ff40, ++ 0xd237da9b, 0x4f383bed, 0x2af5912d, 0xb7fa705b, 0xcb9b5580, ++ 0x5694b4f6, 0x66b23c6c, 0xfbbddd1a, 0x87dcf8c1, 0x1ad319b7, ++ 0x7f1eb377, 0xe2115201, 0x9e7077da, 0x037f96ac, 0x55eb225a, ++ 0xc8e4c32c, 0xb485e6f7, 0x298a0781, 0x4c47ad41, 0xd1484c37, ++ 0xad2969ec, 0x3026889a, 0xcd6478d8, 0x506b99ae, 0x2c0abc75, ++ 0xb1055d03, 0xd4c8f7c3, 0x49c716b5, 0x35a6336e, 0xa8a9d218, ++ 0xfe3d66ee, 0x63328798, 0x1f53a243, 0x825c4335, 0xe791e9f5, ++ 0x7a9e0883, 0x06ff2d58, 0x9bf0cc2e, 0xabd644b4, 0x36d9a5c2, ++ 0x4ab88019, 0xd7b7616f, 0xb27acbaf, 0x2f752ad9, 0x53140f02, ++ 0xce1bee74, 0x988f5a82, 0x0580bbf4, 0x79e19e2f, 0xe4ee7f59, ++ 0x8123d599, 0x1c2c34ef, 0x604d1134, 0xfd42f042, 0x41b9f7f1, ++ 0xdcb61687, 0xa0d7335c, 0x3dd8d22a, 0x581578ea, 0xc51a999c, ++ 0xb97bbc47, 0x24745d31, 0x72e0e9c7, 0xefef08b1, 0x938e2d6a, ++ 0x0e81cc1c, 0x6b4c66dc, 0xf64387aa, 0x8a22a271, 0x172d4307, ++ 0x270bcb9d, 0xba042aeb, 0xc6650f30, 0x5b6aee46, 0x3ea74486, ++ 0xa3a8a5f0, 0xdfc9802b, 0x42c6615d, 0x1452d5ab, 0x895d34dd, ++ 0xf53c1106, 0x6833f070, 0x0dfe5ab0, 0x90f1bbc6, 0xec909e1d, ++ 0x719f7f6b, 0x8cdd8f29, 0x11d26e5f, 0x6db34b84, 0xf0bcaaf2, ++ 0x95710032, 0x087ee144, 0x741fc49f, 0xe91025e9, 0xbf84911f, ++ 0x228b7069, 0x5eea55b2, 0xc3e5b4c4, 0xa6281e04, 0x3b27ff72, ++ 0x4746daa9, 0xda493bdf, 0xea6fb345, 0x77605233, 0x0b0177e8, ++ 0x960e969e, 0xf3c33c5e, 0x6eccdd28, 0x12adf8f3, 0x8fa21985, ++ 0xd936ad73, 0x44394c05, 0x385869de, 0xa55788a8, 0xc09a2268, ++ 0x5d95c31e, 0x21f4e6c5, 0xbcfb07b3, 0x8373efe2, 0x1e7c0e94, ++ 0x621d2b4f, 0xff12ca39, 0x9adf60f9, 0x07d0818f, 0x7bb1a454, ++ 0xe6be4522, 0xb02af1d4, 0x2d2510a2, 0x51443579, 0xcc4bd40f, ++ 0xa9867ecf, 0x34899fb9, 0x48e8ba62, 0xd5e75b14, 0xe5c1d38e, ++ 0x78ce32f8, 0x04af1723, 0x99a0f655, 0xfc6d5c95, 0x6162bde3, ++ 0x1d039838, 0x800c794e, 0xd698cdb8, 0x4b972cce, 0x37f60915, ++ 0xaaf9e863, 0xcf3442a3, 0x523ba3d5, 0x2e5a860e, 0xb3556778, ++ 0x4e17973a, 0xd318764c, 0xaf795397, 0x3276b2e1, 0x57bb1821, ++ 0xcab4f957, 0xb6d5dc8c, 0x2bda3dfa, 0x7d4e890c, 0xe041687a, ++ 0x9c204da1, 0x012facd7, 0x64e20617, 0xf9ede761, 0x858cc2ba, ++ 0x188323cc, 0x28a5ab56, 0xb5aa4a20, 0xc9cb6ffb, 0x54c48e8d, ++ 0x3109244d, 0xac06c53b, 0xd067e0e0, 0x4d680196, 0x1bfcb560, ++ 0x86f35416, 0xfa9271cd, 0x679d90bb, 0x02503a7b, 0x9f5fdb0d, ++ 0xe33efed6, 0x7e311fa0, 0xc2ca1813, 0x5fc5f965, 0x23a4dcbe, ++ 0xbeab3dc8, 0xdb669708, 0x4669767e, 0x3a0853a5, 0xa707b2d3, ++ 0xf1930625, 0x6c9ce753, 0x10fdc288, 0x8df223fe, 0xe83f893e, ++ 0x75306848, 0x09514d93, 0x945eace5, 0xa478247f, 0x3977c509, ++ 0x4516e0d2, 0xd81901a4, 0xbdd4ab64, 0x20db4a12, 0x5cba6fc9, ++ 0xc1b58ebf, 0x97213a49, 0x0a2edb3f, 0x764ffee4, 0xeb401f92, ++ 0x8e8db552, 0x13825424, 0x6fe371ff, 0xf2ec9089, 0x0fae60cb, ++ 0x92a181bd, 0xeec0a466, 0x73cf4510, 0x1602efd0, 0x8b0d0ea6, ++ 0xf76c2b7d, 0x6a63ca0b, 0x3cf77efd, 0xa1f89f8b, 0xdd99ba50, ++ 0x40965b26, 0x255bf1e6, 0xb8541090, 0xc435354b, 0x593ad43d, ++ 0x691c5ca7, 0xf413bdd1, 0x8872980a, 0x157d797c, 0x70b0d3bc, ++ 0xedbf32ca, 0x91de1711, 0x0cd1f667, 0x5a454291, 0xc74aa3e7, ++ 0xbb2b863c, 0x2624674a, 0x43e9cd8a, 0xdee62cfc, 0xa2870927, ++ 0x3f88e851}, ++ {0x00000000, 0xdd96d985, 0x605cb54b, 0xbdca6cce, 0xc0b96a96, ++ 0x1d2fb313, 0xa0e5dfdd, 0x7d730658, 0x5a03d36d, 0x87950ae8, ++ 0x3a5f6626, 0xe7c9bfa3, 0x9abab9fb, 0x472c607e, 0xfae60cb0, ++ 0x2770d535, 0xb407a6da, 0x69917f5f, 0xd45b1391, 0x09cdca14, ++ 0x74becc4c, 0xa92815c9, 0x14e27907, 0xc974a082, 0xee0475b7, ++ 0x3392ac32, 0x8e58c0fc, 0x53ce1979, 0x2ebd1f21, 0xf32bc6a4, ++ 0x4ee1aa6a, 0x937773ef, 0xb37e4bf5, 0x6ee89270, 0xd322febe, ++ 0x0eb4273b, 0x73c72163, 0xae51f8e6, 0x139b9428, 0xce0d4dad, ++ 0xe97d9898, 0x34eb411d, 0x89212dd3, 0x54b7f456, 0x29c4f20e, ++ 0xf4522b8b, 0x49984745, 0x940e9ec0, 0x0779ed2f, 0xdaef34aa, ++ 0x67255864, 0xbab381e1, 0xc7c087b9, 0x1a565e3c, 0xa79c32f2, ++ 0x7a0aeb77, 0x5d7a3e42, 0x80ece7c7, 0x3d268b09, 0xe0b0528c, ++ 0x9dc354d4, 0x40558d51, 0xfd9fe19f, 0x2009381a, 0xbd8d91ab, ++ 0x601b482e, 0xddd124e0, 0x0047fd65, 0x7d34fb3d, 0xa0a222b8, ++ 0x1d684e76, 0xc0fe97f3, 0xe78e42c6, 0x3a189b43, 0x87d2f78d, ++ 0x5a442e08, 0x27372850, 0xfaa1f1d5, 0x476b9d1b, 0x9afd449e, ++ 0x098a3771, 0xd41ceef4, 0x69d6823a, 0xb4405bbf, 0xc9335de7, ++ 0x14a58462, 0xa96fe8ac, 0x74f93129, 0x5389e41c, 0x8e1f3d99, ++ 0x33d55157, 0xee4388d2, 0x93308e8a, 0x4ea6570f, 0xf36c3bc1, ++ 0x2efae244, 0x0ef3da5e, 0xd36503db, 0x6eaf6f15, 0xb339b690, ++ 0xce4ab0c8, 0x13dc694d, 0xae160583, 0x7380dc06, 0x54f00933, ++ 0x8966d0b6, 0x34acbc78, 0xe93a65fd, 0x944963a5, 0x49dfba20, ++ 0xf415d6ee, 0x29830f6b, 0xbaf47c84, 0x6762a501, 0xdaa8c9cf, ++ 0x073e104a, 0x7a4d1612, 0xa7dbcf97, 0x1a11a359, 0xc7877adc, ++ 0xe0f7afe9, 0x3d61766c, 0x80ab1aa2, 0x5d3dc327, 0x204ec57f, ++ 0xfdd81cfa, 0x40127034, 0x9d84a9b1, 0xa06a2517, 0x7dfcfc92, ++ 0xc036905c, 0x1da049d9, 0x60d34f81, 0xbd459604, 0x008ffaca, ++ 0xdd19234f, 0xfa69f67a, 0x27ff2fff, 0x9a354331, 0x47a39ab4, ++ 0x3ad09cec, 0xe7464569, 0x5a8c29a7, 0x871af022, 0x146d83cd, ++ 0xc9fb5a48, 0x74313686, 0xa9a7ef03, 0xd4d4e95b, 0x094230de, ++ 0xb4885c10, 0x691e8595, 0x4e6e50a0, 0x93f88925, 0x2e32e5eb, ++ 0xf3a43c6e, 0x8ed73a36, 0x5341e3b3, 0xee8b8f7d, 0x331d56f8, ++ 0x13146ee2, 0xce82b767, 0x7348dba9, 0xaede022c, 0xd3ad0474, ++ 0x0e3bddf1, 0xb3f1b13f, 0x6e6768ba, 0x4917bd8f, 0x9481640a, ++ 0x294b08c4, 0xf4ddd141, 0x89aed719, 0x54380e9c, 0xe9f26252, ++ 0x3464bbd7, 0xa713c838, 0x7a8511bd, 0xc74f7d73, 0x1ad9a4f6, ++ 0x67aaa2ae, 0xba3c7b2b, 0x07f617e5, 0xda60ce60, 0xfd101b55, ++ 0x2086c2d0, 0x9d4cae1e, 0x40da779b, 0x3da971c3, 0xe03fa846, ++ 0x5df5c488, 0x80631d0d, 0x1de7b4bc, 0xc0716d39, 0x7dbb01f7, ++ 0xa02dd872, 0xdd5ede2a, 0x00c807af, 0xbd026b61, 0x6094b2e4, ++ 0x47e467d1, 0x9a72be54, 0x27b8d29a, 0xfa2e0b1f, 0x875d0d47, ++ 0x5acbd4c2, 0xe701b80c, 0x3a976189, 0xa9e01266, 0x7476cbe3, ++ 0xc9bca72d, 0x142a7ea8, 0x695978f0, 0xb4cfa175, 0x0905cdbb, ++ 0xd493143e, 0xf3e3c10b, 0x2e75188e, 0x93bf7440, 0x4e29adc5, ++ 0x335aab9d, 0xeecc7218, 0x53061ed6, 0x8e90c753, 0xae99ff49, ++ 0x730f26cc, 0xcec54a02, 0x13539387, 0x6e2095df, 0xb3b64c5a, ++ 0x0e7c2094, 0xd3eaf911, 0xf49a2c24, 0x290cf5a1, 0x94c6996f, ++ 0x495040ea, 0x342346b2, 0xe9b59f37, 0x547ff3f9, 0x89e92a7c, ++ 0x1a9e5993, 0xc7088016, 0x7ac2ecd8, 0xa754355d, 0xda273305, ++ 0x07b1ea80, 0xba7b864e, 0x67ed5fcb, 0x409d8afe, 0x9d0b537b, ++ 0x20c13fb5, 0xfd57e630, 0x8024e068, 0x5db239ed, 0xe0785523, ++ 0x3dee8ca6}, ++ {0x00000000, 0x9ba54c6f, 0xec3b9e9f, 0x779ed2f0, 0x03063b7f, ++ 0x98a37710, 0xef3da5e0, 0x7498e98f, 0x060c76fe, 0x9da93a91, ++ 0xea37e861, 0x7192a40e, 0x050a4d81, 0x9eaf01ee, 0xe931d31e, ++ 0x72949f71, 0x0c18edfc, 0x97bda193, 0xe0237363, 0x7b863f0c, ++ 0x0f1ed683, 0x94bb9aec, 0xe325481c, 0x78800473, 0x0a149b02, ++ 0x91b1d76d, 0xe62f059d, 0x7d8a49f2, 0x0912a07d, 0x92b7ec12, ++ 0xe5293ee2, 0x7e8c728d, 0x1831dbf8, 0x83949797, 0xf40a4567, ++ 0x6faf0908, 0x1b37e087, 0x8092ace8, 0xf70c7e18, 0x6ca93277, ++ 0x1e3dad06, 0x8598e169, 0xf2063399, 0x69a37ff6, 0x1d3b9679, ++ 0x869eda16, 0xf10008e6, 0x6aa54489, 0x14293604, 0x8f8c7a6b, ++ 0xf812a89b, 0x63b7e4f4, 0x172f0d7b, 0x8c8a4114, 0xfb1493e4, ++ 0x60b1df8b, 0x122540fa, 0x89800c95, 0xfe1ede65, 0x65bb920a, ++ 0x11237b85, 0x8a8637ea, 0xfd18e51a, 0x66bda975, 0x3063b7f0, ++ 0xabc6fb9f, 0xdc58296f, 0x47fd6500, 0x33658c8f, 0xa8c0c0e0, ++ 0xdf5e1210, 0x44fb5e7f, 0x366fc10e, 0xadca8d61, 0xda545f91, ++ 0x41f113fe, 0x3569fa71, 0xaeccb61e, 0xd95264ee, 0x42f72881, ++ 0x3c7b5a0c, 0xa7de1663, 0xd040c493, 0x4be588fc, 0x3f7d6173, ++ 0xa4d82d1c, 0xd346ffec, 0x48e3b383, 0x3a772cf2, 0xa1d2609d, ++ 0xd64cb26d, 0x4de9fe02, 0x3971178d, 0xa2d45be2, 0xd54a8912, ++ 0x4eefc57d, 0x28526c08, 0xb3f72067, 0xc469f297, 0x5fccbef8, ++ 0x2b545777, 0xb0f11b18, 0xc76fc9e8, 0x5cca8587, 0x2e5e1af6, ++ 0xb5fb5699, 0xc2658469, 0x59c0c806, 0x2d582189, 0xb6fd6de6, ++ 0xc163bf16, 0x5ac6f379, 0x244a81f4, 0xbfefcd9b, 0xc8711f6b, ++ 0x53d45304, 0x274cba8b, 0xbce9f6e4, 0xcb772414, 0x50d2687b, ++ 0x2246f70a, 0xb9e3bb65, 0xce7d6995, 0x55d825fa, 0x2140cc75, ++ 0xbae5801a, 0xcd7b52ea, 0x56de1e85, 0x60c76fe0, 0xfb62238f, ++ 0x8cfcf17f, 0x1759bd10, 0x63c1549f, 0xf86418f0, 0x8ffaca00, ++ 0x145f866f, 0x66cb191e, 0xfd6e5571, 0x8af08781, 0x1155cbee, ++ 0x65cd2261, 0xfe686e0e, 0x89f6bcfe, 0x1253f091, 0x6cdf821c, ++ 0xf77ace73, 0x80e41c83, 0x1b4150ec, 0x6fd9b963, 0xf47cf50c, ++ 0x83e227fc, 0x18476b93, 0x6ad3f4e2, 0xf176b88d, 0x86e86a7d, ++ 0x1d4d2612, 0x69d5cf9d, 0xf27083f2, 0x85ee5102, 0x1e4b1d6d, ++ 0x78f6b418, 0xe353f877, 0x94cd2a87, 0x0f6866e8, 0x7bf08f67, ++ 0xe055c308, 0x97cb11f8, 0x0c6e5d97, 0x7efac2e6, 0xe55f8e89, ++ 0x92c15c79, 0x09641016, 0x7dfcf999, 0xe659b5f6, 0x91c76706, ++ 0x0a622b69, 0x74ee59e4, 0xef4b158b, 0x98d5c77b, 0x03708b14, ++ 0x77e8629b, 0xec4d2ef4, 0x9bd3fc04, 0x0076b06b, 0x72e22f1a, ++ 0xe9476375, 0x9ed9b185, 0x057cfdea, 0x71e41465, 0xea41580a, ++ 0x9ddf8afa, 0x067ac695, 0x50a4d810, 0xcb01947f, 0xbc9f468f, ++ 0x273a0ae0, 0x53a2e36f, 0xc807af00, 0xbf997df0, 0x243c319f, ++ 0x56a8aeee, 0xcd0de281, 0xba933071, 0x21367c1e, 0x55ae9591, ++ 0xce0bd9fe, 0xb9950b0e, 0x22304761, 0x5cbc35ec, 0xc7197983, ++ 0xb087ab73, 0x2b22e71c, 0x5fba0e93, 0xc41f42fc, 0xb381900c, ++ 0x2824dc63, 0x5ab04312, 0xc1150f7d, 0xb68bdd8d, 0x2d2e91e2, ++ 0x59b6786d, 0xc2133402, 0xb58de6f2, 0x2e28aa9d, 0x489503e8, ++ 0xd3304f87, 0xa4ae9d77, 0x3f0bd118, 0x4b933897, 0xd03674f8, ++ 0xa7a8a608, 0x3c0dea67, 0x4e997516, 0xd53c3979, 0xa2a2eb89, ++ 0x3907a7e6, 0x4d9f4e69, 0xd63a0206, 0xa1a4d0f6, 0x3a019c99, ++ 0x448dee14, 0xdf28a27b, 0xa8b6708b, 0x33133ce4, 0x478bd56b, ++ 0xdc2e9904, 0xabb04bf4, 0x3015079b, 0x428198ea, 0xd924d485, ++ 0xaeba0675, 0x351f4a1a, 0x4187a395, 0xda22effa, 0xadbc3d0a, ++ 0x36197165}, ++ {0x00000000, 0xc18edfc0, 0x586cb9c1, 0x99e26601, 0xb0d97382, ++ 0x7157ac42, 0xe8b5ca43, 0x293b1583, 0xbac3e145, 0x7b4d3e85, ++ 0xe2af5884, 0x23218744, 0x0a1a92c7, 0xcb944d07, 0x52762b06, ++ 0x93f8f4c6, 0xaef6c4cb, 0x6f781b0b, 0xf69a7d0a, 0x3714a2ca, ++ 0x1e2fb749, 0xdfa16889, 0x46430e88, 0x87cdd148, 0x1435258e, ++ 0xd5bbfa4e, 0x4c599c4f, 0x8dd7438f, 0xa4ec560c, 0x656289cc, ++ 0xfc80efcd, 0x3d0e300d, 0x869c8fd7, 0x47125017, 0xdef03616, ++ 0x1f7ee9d6, 0x3645fc55, 0xf7cb2395, 0x6e294594, 0xafa79a54, ++ 0x3c5f6e92, 0xfdd1b152, 0x6433d753, 0xa5bd0893, 0x8c861d10, ++ 0x4d08c2d0, 0xd4eaa4d1, 0x15647b11, 0x286a4b1c, 0xe9e494dc, ++ 0x7006f2dd, 0xb1882d1d, 0x98b3389e, 0x593de75e, 0xc0df815f, ++ 0x01515e9f, 0x92a9aa59, 0x53277599, 0xcac51398, 0x0b4bcc58, ++ 0x2270d9db, 0xe3fe061b, 0x7a1c601a, 0xbb92bfda, 0xd64819ef, ++ 0x17c6c62f, 0x8e24a02e, 0x4faa7fee, 0x66916a6d, 0xa71fb5ad, ++ 0x3efdd3ac, 0xff730c6c, 0x6c8bf8aa, 0xad05276a, 0x34e7416b, ++ 0xf5699eab, 0xdc528b28, 0x1ddc54e8, 0x843e32e9, 0x45b0ed29, ++ 0x78bedd24, 0xb93002e4, 0x20d264e5, 0xe15cbb25, 0xc867aea6, ++ 0x09e97166, 0x900b1767, 0x5185c8a7, 0xc27d3c61, 0x03f3e3a1, ++ 0x9a1185a0, 0x5b9f5a60, 0x72a44fe3, 0xb32a9023, 0x2ac8f622, ++ 0xeb4629e2, 0x50d49638, 0x915a49f8, 0x08b82ff9, 0xc936f039, ++ 0xe00de5ba, 0x21833a7a, 0xb8615c7b, 0x79ef83bb, 0xea17777d, ++ 0x2b99a8bd, 0xb27bcebc, 0x73f5117c, 0x5ace04ff, 0x9b40db3f, ++ 0x02a2bd3e, 0xc32c62fe, 0xfe2252f3, 0x3fac8d33, 0xa64eeb32, ++ 0x67c034f2, 0x4efb2171, 0x8f75feb1, 0x169798b0, 0xd7194770, ++ 0x44e1b3b6, 0x856f6c76, 0x1c8d0a77, 0xdd03d5b7, 0xf438c034, ++ 0x35b61ff4, 0xac5479f5, 0x6ddaa635, 0x77e1359f, 0xb66fea5f, ++ 0x2f8d8c5e, 0xee03539e, 0xc738461d, 0x06b699dd, 0x9f54ffdc, ++ 0x5eda201c, 0xcd22d4da, 0x0cac0b1a, 0x954e6d1b, 0x54c0b2db, ++ 0x7dfba758, 0xbc757898, 0x25971e99, 0xe419c159, 0xd917f154, ++ 0x18992e94, 0x817b4895, 0x40f59755, 0x69ce82d6, 0xa8405d16, ++ 0x31a23b17, 0xf02ce4d7, 0x63d41011, 0xa25acfd1, 0x3bb8a9d0, ++ 0xfa367610, 0xd30d6393, 0x1283bc53, 0x8b61da52, 0x4aef0592, ++ 0xf17dba48, 0x30f36588, 0xa9110389, 0x689fdc49, 0x41a4c9ca, ++ 0x802a160a, 0x19c8700b, 0xd846afcb, 0x4bbe5b0d, 0x8a3084cd, ++ 0x13d2e2cc, 0xd25c3d0c, 0xfb67288f, 0x3ae9f74f, 0xa30b914e, ++ 0x62854e8e, 0x5f8b7e83, 0x9e05a143, 0x07e7c742, 0xc6691882, ++ 0xef520d01, 0x2edcd2c1, 0xb73eb4c0, 0x76b06b00, 0xe5489fc6, ++ 0x24c64006, 0xbd242607, 0x7caaf9c7, 0x5591ec44, 0x941f3384, ++ 0x0dfd5585, 0xcc738a45, 0xa1a92c70, 0x6027f3b0, 0xf9c595b1, ++ 0x384b4a71, 0x11705ff2, 0xd0fe8032, 0x491ce633, 0x889239f3, ++ 0x1b6acd35, 0xdae412f5, 0x430674f4, 0x8288ab34, 0xabb3beb7, ++ 0x6a3d6177, 0xf3df0776, 0x3251d8b6, 0x0f5fe8bb, 0xced1377b, ++ 0x5733517a, 0x96bd8eba, 0xbf869b39, 0x7e0844f9, 0xe7ea22f8, ++ 0x2664fd38, 0xb59c09fe, 0x7412d63e, 0xedf0b03f, 0x2c7e6fff, ++ 0x05457a7c, 0xc4cba5bc, 0x5d29c3bd, 0x9ca71c7d, 0x2735a3a7, ++ 0xe6bb7c67, 0x7f591a66, 0xbed7c5a6, 0x97ecd025, 0x56620fe5, ++ 0xcf8069e4, 0x0e0eb624, 0x9df642e2, 0x5c789d22, 0xc59afb23, ++ 0x041424e3, 0x2d2f3160, 0xeca1eea0, 0x754388a1, 0xb4cd5761, ++ 0x89c3676c, 0x484db8ac, 0xd1afdead, 0x1021016d, 0x391a14ee, ++ 0xf894cb2e, 0x6176ad2f, 0xa0f872ef, 0x33008629, 0xf28e59e9, ++ 0x6b6c3fe8, 0xaae2e028, 0x83d9f5ab, 0x42572a6b, 0xdbb54c6a, ++ 0x1a3b93aa}, ++ {0x00000000, 0xefc26b3e, 0x04f5d03d, 0xeb37bb03, 0x09eba07a, ++ 0xe629cb44, 0x0d1e7047, 0xe2dc1b79, 0x13d740f4, 0xfc152bca, ++ 0x172290c9, 0xf8e0fbf7, 0x1a3ce08e, 0xf5fe8bb0, 0x1ec930b3, ++ 0xf10b5b8d, 0x27ae81e8, 0xc86cead6, 0x235b51d5, 0xcc993aeb, ++ 0x2e452192, 0xc1874aac, 0x2ab0f1af, 0xc5729a91, 0x3479c11c, ++ 0xdbbbaa22, 0x308c1121, 0xdf4e7a1f, 0x3d926166, 0xd2500a58, ++ 0x3967b15b, 0xd6a5da65, 0x4f5d03d0, 0xa09f68ee, 0x4ba8d3ed, ++ 0xa46ab8d3, 0x46b6a3aa, 0xa974c894, 0x42437397, 0xad8118a9, ++ 0x5c8a4324, 0xb348281a, 0x587f9319, 0xb7bdf827, 0x5561e35e, ++ 0xbaa38860, 0x51943363, 0xbe56585d, 0x68f38238, 0x8731e906, ++ 0x6c065205, 0x83c4393b, 0x61182242, 0x8eda497c, 0x65edf27f, ++ 0x8a2f9941, 0x7b24c2cc, 0x94e6a9f2, 0x7fd112f1, 0x901379cf, ++ 0x72cf62b6, 0x9d0d0988, 0x763ab28b, 0x99f8d9b5, 0x9eba07a0, ++ 0x71786c9e, 0x9a4fd79d, 0x758dbca3, 0x9751a7da, 0x7893cce4, ++ 0x93a477e7, 0x7c661cd9, 0x8d6d4754, 0x62af2c6a, 0x89989769, ++ 0x665afc57, 0x8486e72e, 0x6b448c10, 0x80733713, 0x6fb15c2d, ++ 0xb9148648, 0x56d6ed76, 0xbde15675, 0x52233d4b, 0xb0ff2632, ++ 0x5f3d4d0c, 0xb40af60f, 0x5bc89d31, 0xaac3c6bc, 0x4501ad82, ++ 0xae361681, 0x41f47dbf, 0xa32866c6, 0x4cea0df8, 0xa7ddb6fb, ++ 0x481fddc5, 0xd1e70470, 0x3e256f4e, 0xd512d44d, 0x3ad0bf73, ++ 0xd80ca40a, 0x37cecf34, 0xdcf97437, 0x333b1f09, 0xc2304484, ++ 0x2df22fba, 0xc6c594b9, 0x2907ff87, 0xcbdbe4fe, 0x24198fc0, ++ 0xcf2e34c3, 0x20ec5ffd, 0xf6498598, 0x198beea6, 0xf2bc55a5, ++ 0x1d7e3e9b, 0xffa225e2, 0x10604edc, 0xfb57f5df, 0x14959ee1, ++ 0xe59ec56c, 0x0a5cae52, 0xe16b1551, 0x0ea97e6f, 0xec756516, ++ 0x03b70e28, 0xe880b52b, 0x0742de15, 0xe6050901, 0x09c7623f, ++ 0xe2f0d93c, 0x0d32b202, 0xefeea97b, 0x002cc245, 0xeb1b7946, ++ 0x04d91278, 0xf5d249f5, 0x1a1022cb, 0xf12799c8, 0x1ee5f2f6, ++ 0xfc39e98f, 0x13fb82b1, 0xf8cc39b2, 0x170e528c, 0xc1ab88e9, ++ 0x2e69e3d7, 0xc55e58d4, 0x2a9c33ea, 0xc8402893, 0x278243ad, ++ 0xccb5f8ae, 0x23779390, 0xd27cc81d, 0x3dbea323, 0xd6891820, ++ 0x394b731e, 0xdb976867, 0x34550359, 0xdf62b85a, 0x30a0d364, ++ 0xa9580ad1, 0x469a61ef, 0xadaddaec, 0x426fb1d2, 0xa0b3aaab, ++ 0x4f71c195, 0xa4467a96, 0x4b8411a8, 0xba8f4a25, 0x554d211b, ++ 0xbe7a9a18, 0x51b8f126, 0xb364ea5f, 0x5ca68161, 0xb7913a62, ++ 0x5853515c, 0x8ef68b39, 0x6134e007, 0x8a035b04, 0x65c1303a, ++ 0x871d2b43, 0x68df407d, 0x83e8fb7e, 0x6c2a9040, 0x9d21cbcd, ++ 0x72e3a0f3, 0x99d41bf0, 0x761670ce, 0x94ca6bb7, 0x7b080089, ++ 0x903fbb8a, 0x7ffdd0b4, 0x78bf0ea1, 0x977d659f, 0x7c4ade9c, ++ 0x9388b5a2, 0x7154aedb, 0x9e96c5e5, 0x75a17ee6, 0x9a6315d8, ++ 0x6b684e55, 0x84aa256b, 0x6f9d9e68, 0x805ff556, 0x6283ee2f, ++ 0x8d418511, 0x66763e12, 0x89b4552c, 0x5f118f49, 0xb0d3e477, ++ 0x5be45f74, 0xb426344a, 0x56fa2f33, 0xb938440d, 0x520fff0e, ++ 0xbdcd9430, 0x4cc6cfbd, 0xa304a483, 0x48331f80, 0xa7f174be, ++ 0x452d6fc7, 0xaaef04f9, 0x41d8bffa, 0xae1ad4c4, 0x37e20d71, ++ 0xd820664f, 0x3317dd4c, 0xdcd5b672, 0x3e09ad0b, 0xd1cbc635, ++ 0x3afc7d36, 0xd53e1608, 0x24354d85, 0xcbf726bb, 0x20c09db8, ++ 0xcf02f686, 0x2ddeedff, 0xc21c86c1, 0x292b3dc2, 0xc6e956fc, ++ 0x104c8c99, 0xff8ee7a7, 0x14b95ca4, 0xfb7b379a, 0x19a72ce3, ++ 0xf66547dd, 0x1d52fcde, 0xf29097e0, 0x039bcc6d, 0xec59a753, ++ 0x076e1c50, 0xe8ac776e, 0x0a706c17, 0xe5b20729, 0x0e85bc2a, ++ 0xe147d714}, ++ {0x00000000, 0x177b1443, 0x2ef62886, 0x398d3cc5, 0x5dec510c, ++ 0x4a97454f, 0x731a798a, 0x64616dc9, 0xbbd8a218, 0xaca3b65b, ++ 0x952e8a9e, 0x82559edd, 0xe634f314, 0xf14fe757, 0xc8c2db92, ++ 0xdfb9cfd1, 0xacc04271, 0xbbbb5632, 0x82366af7, 0x954d7eb4, ++ 0xf12c137d, 0xe657073e, 0xdfda3bfb, 0xc8a12fb8, 0x1718e069, ++ 0x0063f42a, 0x39eec8ef, 0x2e95dcac, 0x4af4b165, 0x5d8fa526, ++ 0x640299e3, 0x73798da0, 0x82f182a3, 0x958a96e0, 0xac07aa25, ++ 0xbb7cbe66, 0xdf1dd3af, 0xc866c7ec, 0xf1ebfb29, 0xe690ef6a, ++ 0x392920bb, 0x2e5234f8, 0x17df083d, 0x00a41c7e, 0x64c571b7, ++ 0x73be65f4, 0x4a335931, 0x5d484d72, 0x2e31c0d2, 0x394ad491, ++ 0x00c7e854, 0x17bcfc17, 0x73dd91de, 0x64a6859d, 0x5d2bb958, ++ 0x4a50ad1b, 0x95e962ca, 0x82927689, 0xbb1f4a4c, 0xac645e0f, ++ 0xc80533c6, 0xdf7e2785, 0xe6f31b40, 0xf1880f03, 0xde920307, ++ 0xc9e91744, 0xf0642b81, 0xe71f3fc2, 0x837e520b, 0x94054648, ++ 0xad887a8d, 0xbaf36ece, 0x654aa11f, 0x7231b55c, 0x4bbc8999, ++ 0x5cc79dda, 0x38a6f013, 0x2fdde450, 0x1650d895, 0x012bccd6, ++ 0x72524176, 0x65295535, 0x5ca469f0, 0x4bdf7db3, 0x2fbe107a, ++ 0x38c50439, 0x014838fc, 0x16332cbf, 0xc98ae36e, 0xdef1f72d, ++ 0xe77ccbe8, 0xf007dfab, 0x9466b262, 0x831da621, 0xba909ae4, ++ 0xadeb8ea7, 0x5c6381a4, 0x4b1895e7, 0x7295a922, 0x65eebd61, ++ 0x018fd0a8, 0x16f4c4eb, 0x2f79f82e, 0x3802ec6d, 0xe7bb23bc, ++ 0xf0c037ff, 0xc94d0b3a, 0xde361f79, 0xba5772b0, 0xad2c66f3, ++ 0x94a15a36, 0x83da4e75, 0xf0a3c3d5, 0xe7d8d796, 0xde55eb53, ++ 0xc92eff10, 0xad4f92d9, 0xba34869a, 0x83b9ba5f, 0x94c2ae1c, ++ 0x4b7b61cd, 0x5c00758e, 0x658d494b, 0x72f65d08, 0x169730c1, ++ 0x01ec2482, 0x38611847, 0x2f1a0c04, 0x6655004f, 0x712e140c, ++ 0x48a328c9, 0x5fd83c8a, 0x3bb95143, 0x2cc24500, 0x154f79c5, ++ 0x02346d86, 0xdd8da257, 0xcaf6b614, 0xf37b8ad1, 0xe4009e92, ++ 0x8061f35b, 0x971ae718, 0xae97dbdd, 0xb9eccf9e, 0xca95423e, ++ 0xddee567d, 0xe4636ab8, 0xf3187efb, 0x97791332, 0x80020771, ++ 0xb98f3bb4, 0xaef42ff7, 0x714de026, 0x6636f465, 0x5fbbc8a0, ++ 0x48c0dce3, 0x2ca1b12a, 0x3bdaa569, 0x025799ac, 0x152c8def, ++ 0xe4a482ec, 0xf3df96af, 0xca52aa6a, 0xdd29be29, 0xb948d3e0, ++ 0xae33c7a3, 0x97befb66, 0x80c5ef25, 0x5f7c20f4, 0x480734b7, ++ 0x718a0872, 0x66f11c31, 0x029071f8, 0x15eb65bb, 0x2c66597e, ++ 0x3b1d4d3d, 0x4864c09d, 0x5f1fd4de, 0x6692e81b, 0x71e9fc58, ++ 0x15889191, 0x02f385d2, 0x3b7eb917, 0x2c05ad54, 0xf3bc6285, ++ 0xe4c776c6, 0xdd4a4a03, 0xca315e40, 0xae503389, 0xb92b27ca, ++ 0x80a61b0f, 0x97dd0f4c, 0xb8c70348, 0xafbc170b, 0x96312bce, ++ 0x814a3f8d, 0xe52b5244, 0xf2504607, 0xcbdd7ac2, 0xdca66e81, ++ 0x031fa150, 0x1464b513, 0x2de989d6, 0x3a929d95, 0x5ef3f05c, ++ 0x4988e41f, 0x7005d8da, 0x677ecc99, 0x14074139, 0x037c557a, ++ 0x3af169bf, 0x2d8a7dfc, 0x49eb1035, 0x5e900476, 0x671d38b3, ++ 0x70662cf0, 0xafdfe321, 0xb8a4f762, 0x8129cba7, 0x9652dfe4, ++ 0xf233b22d, 0xe548a66e, 0xdcc59aab, 0xcbbe8ee8, 0x3a3681eb, ++ 0x2d4d95a8, 0x14c0a96d, 0x03bbbd2e, 0x67dad0e7, 0x70a1c4a4, ++ 0x492cf861, 0x5e57ec22, 0x81ee23f3, 0x969537b0, 0xaf180b75, ++ 0xb8631f36, 0xdc0272ff, 0xcb7966bc, 0xf2f45a79, 0xe58f4e3a, ++ 0x96f6c39a, 0x818dd7d9, 0xb800eb1c, 0xaf7bff5f, 0xcb1a9296, ++ 0xdc6186d5, 0xe5ecba10, 0xf297ae53, 0x2d2e6182, 0x3a5575c1, ++ 0x03d84904, 0x14a35d47, 0x70c2308e, 0x67b924cd, 0x5e341808, ++ 0x494f0c4b}}; ++ ++static const z_word_t crc_braid_big_table[][256] = { ++ {0x0000000000000000, 0x43147b1700000000, 0x8628f62e00000000, ++ 0xc53c8d3900000000, 0x0c51ec5d00000000, 0x4f45974a00000000, ++ 0x8a791a7300000000, 0xc96d616400000000, 0x18a2d8bb00000000, ++ 0x5bb6a3ac00000000, 0x9e8a2e9500000000, 0xdd9e558200000000, ++ 0x14f334e600000000, 0x57e74ff100000000, 0x92dbc2c800000000, ++ 0xd1cfb9df00000000, 0x7142c0ac00000000, 0x3256bbbb00000000, ++ 0xf76a368200000000, 0xb47e4d9500000000, 0x7d132cf100000000, ++ 0x3e0757e600000000, 0xfb3bdadf00000000, 0xb82fa1c800000000, ++ 0x69e0181700000000, 0x2af4630000000000, 0xefc8ee3900000000, ++ 0xacdc952e00000000, 0x65b1f44a00000000, 0x26a58f5d00000000, ++ 0xe399026400000000, 0xa08d797300000000, 0xa382f18200000000, ++ 0xe0968a9500000000, 0x25aa07ac00000000, 0x66be7cbb00000000, ++ 0xafd31ddf00000000, 0xecc766c800000000, 0x29fbebf100000000, ++ 0x6aef90e600000000, 0xbb20293900000000, 0xf834522e00000000, ++ 0x3d08df1700000000, 0x7e1ca40000000000, 0xb771c56400000000, ++ 0xf465be7300000000, 0x3159334a00000000, 0x724d485d00000000, ++ 0xd2c0312e00000000, 0x91d44a3900000000, 0x54e8c70000000000, ++ 0x17fcbc1700000000, 0xde91dd7300000000, 0x9d85a66400000000, ++ 0x58b92b5d00000000, 0x1bad504a00000000, 0xca62e99500000000, ++ 0x8976928200000000, 0x4c4a1fbb00000000, 0x0f5e64ac00000000, ++ 0xc63305c800000000, 0x85277edf00000000, 0x401bf3e600000000, ++ 0x030f88f100000000, 0x070392de00000000, 0x4417e9c900000000, ++ 0x812b64f000000000, 0xc23f1fe700000000, 0x0b527e8300000000, ++ 0x4846059400000000, 0x8d7a88ad00000000, 0xce6ef3ba00000000, ++ 0x1fa14a6500000000, 0x5cb5317200000000, 0x9989bc4b00000000, ++ 0xda9dc75c00000000, 0x13f0a63800000000, 0x50e4dd2f00000000, ++ 0x95d8501600000000, 0xd6cc2b0100000000, 0x7641527200000000, ++ 0x3555296500000000, 0xf069a45c00000000, 0xb37ddf4b00000000, ++ 0x7a10be2f00000000, 0x3904c53800000000, 0xfc38480100000000, ++ 0xbf2c331600000000, 0x6ee38ac900000000, 0x2df7f1de00000000, ++ 0xe8cb7ce700000000, 0xabdf07f000000000, 0x62b2669400000000, ++ 0x21a61d8300000000, 0xe49a90ba00000000, 0xa78eebad00000000, ++ 0xa481635c00000000, 0xe795184b00000000, 0x22a9957200000000, ++ 0x61bdee6500000000, 0xa8d08f0100000000, 0xebc4f41600000000, ++ 0x2ef8792f00000000, 0x6dec023800000000, 0xbc23bbe700000000, ++ 0xff37c0f000000000, 0x3a0b4dc900000000, 0x791f36de00000000, ++ 0xb07257ba00000000, 0xf3662cad00000000, 0x365aa19400000000, ++ 0x754eda8300000000, 0xd5c3a3f000000000, 0x96d7d8e700000000, ++ 0x53eb55de00000000, 0x10ff2ec900000000, 0xd9924fad00000000, ++ 0x9a8634ba00000000, 0x5fbab98300000000, 0x1caec29400000000, ++ 0xcd617b4b00000000, 0x8e75005c00000000, 0x4b498d6500000000, ++ 0x085df67200000000, 0xc130971600000000, 0x8224ec0100000000, ++ 0x4718613800000000, 0x040c1a2f00000000, 0x4f00556600000000, ++ 0x0c142e7100000000, 0xc928a34800000000, 0x8a3cd85f00000000, ++ 0x4351b93b00000000, 0x0045c22c00000000, 0xc5794f1500000000, ++ 0x866d340200000000, 0x57a28ddd00000000, 0x14b6f6ca00000000, ++ 0xd18a7bf300000000, 0x929e00e400000000, 0x5bf3618000000000, ++ 0x18e71a9700000000, 0xdddb97ae00000000, 0x9ecfecb900000000, ++ 0x3e4295ca00000000, 0x7d56eedd00000000, 0xb86a63e400000000, ++ 0xfb7e18f300000000, 0x3213799700000000, 0x7107028000000000, ++ 0xb43b8fb900000000, 0xf72ff4ae00000000, 0x26e04d7100000000, ++ 0x65f4366600000000, 0xa0c8bb5f00000000, 0xe3dcc04800000000, ++ 0x2ab1a12c00000000, 0x69a5da3b00000000, 0xac99570200000000, ++ 0xef8d2c1500000000, 0xec82a4e400000000, 0xaf96dff300000000, ++ 0x6aaa52ca00000000, 0x29be29dd00000000, 0xe0d348b900000000, ++ 0xa3c733ae00000000, 0x66fbbe9700000000, 0x25efc58000000000, ++ 0xf4207c5f00000000, 0xb734074800000000, 0x72088a7100000000, ++ 0x311cf16600000000, 0xf871900200000000, 0xbb65eb1500000000, ++ 0x7e59662c00000000, 0x3d4d1d3b00000000, 0x9dc0644800000000, ++ 0xded41f5f00000000, 0x1be8926600000000, 0x58fce97100000000, ++ 0x9191881500000000, 0xd285f30200000000, 0x17b97e3b00000000, ++ 0x54ad052c00000000, 0x8562bcf300000000, 0xc676c7e400000000, ++ 0x034a4add00000000, 0x405e31ca00000000, 0x893350ae00000000, ++ 0xca272bb900000000, 0x0f1ba68000000000, 0x4c0fdd9700000000, ++ 0x4803c7b800000000, 0x0b17bcaf00000000, 0xce2b319600000000, ++ 0x8d3f4a8100000000, 0x44522be500000000, 0x074650f200000000, ++ 0xc27addcb00000000, 0x816ea6dc00000000, 0x50a11f0300000000, ++ 0x13b5641400000000, 0xd689e92d00000000, 0x959d923a00000000, ++ 0x5cf0f35e00000000, 0x1fe4884900000000, 0xdad8057000000000, ++ 0x99cc7e6700000000, 0x3941071400000000, 0x7a557c0300000000, ++ 0xbf69f13a00000000, 0xfc7d8a2d00000000, 0x3510eb4900000000, ++ 0x7604905e00000000, 0xb3381d6700000000, 0xf02c667000000000, ++ 0x21e3dfaf00000000, 0x62f7a4b800000000, 0xa7cb298100000000, ++ 0xe4df529600000000, 0x2db233f200000000, 0x6ea648e500000000, ++ 0xab9ac5dc00000000, 0xe88ebecb00000000, 0xeb81363a00000000, ++ 0xa8954d2d00000000, 0x6da9c01400000000, 0x2ebdbb0300000000, ++ 0xe7d0da6700000000, 0xa4c4a17000000000, 0x61f82c4900000000, ++ 0x22ec575e00000000, 0xf323ee8100000000, 0xb037959600000000, ++ 0x750b18af00000000, 0x361f63b800000000, 0xff7202dc00000000, ++ 0xbc6679cb00000000, 0x795af4f200000000, 0x3a4e8fe500000000, ++ 0x9ac3f69600000000, 0xd9d78d8100000000, 0x1ceb00b800000000, ++ 0x5fff7baf00000000, 0x96921acb00000000, 0xd58661dc00000000, ++ 0x10baece500000000, 0x53ae97f200000000, 0x82612e2d00000000, ++ 0xc175553a00000000, 0x0449d80300000000, 0x475da31400000000, ++ 0x8e30c27000000000, 0xcd24b96700000000, 0x0818345e00000000, ++ 0x4b0c4f4900000000}, ++ {0x0000000000000000, 0x3e6bc2ef00000000, 0x3dd0f50400000000, ++ 0x03bb37eb00000000, 0x7aa0eb0900000000, 0x44cb29e600000000, ++ 0x47701e0d00000000, 0x791bdce200000000, 0xf440d71300000000, ++ 0xca2b15fc00000000, 0xc990221700000000, 0xf7fbe0f800000000, ++ 0x8ee03c1a00000000, 0xb08bfef500000000, 0xb330c91e00000000, ++ 0x8d5b0bf100000000, 0xe881ae2700000000, 0xd6ea6cc800000000, ++ 0xd5515b2300000000, 0xeb3a99cc00000000, 0x9221452e00000000, ++ 0xac4a87c100000000, 0xaff1b02a00000000, 0x919a72c500000000, ++ 0x1cc1793400000000, 0x22aabbdb00000000, 0x21118c3000000000, ++ 0x1f7a4edf00000000, 0x6661923d00000000, 0x580a50d200000000, ++ 0x5bb1673900000000, 0x65daa5d600000000, 0xd0035d4f00000000, ++ 0xee689fa000000000, 0xedd3a84b00000000, 0xd3b86aa400000000, ++ 0xaaa3b64600000000, 0x94c874a900000000, 0x9773434200000000, ++ 0xa91881ad00000000, 0x24438a5c00000000, 0x1a2848b300000000, ++ 0x19937f5800000000, 0x27f8bdb700000000, 0x5ee3615500000000, ++ 0x6088a3ba00000000, 0x6333945100000000, 0x5d5856be00000000, ++ 0x3882f36800000000, 0x06e9318700000000, 0x0552066c00000000, ++ 0x3b39c48300000000, 0x4222186100000000, 0x7c49da8e00000000, ++ 0x7ff2ed6500000000, 0x41992f8a00000000, 0xccc2247b00000000, ++ 0xf2a9e69400000000, 0xf112d17f00000000, 0xcf79139000000000, ++ 0xb662cf7200000000, 0x88090d9d00000000, 0x8bb23a7600000000, ++ 0xb5d9f89900000000, 0xa007ba9e00000000, 0x9e6c787100000000, ++ 0x9dd74f9a00000000, 0xa3bc8d7500000000, 0xdaa7519700000000, ++ 0xe4cc937800000000, 0xe777a49300000000, 0xd91c667c00000000, ++ 0x54476d8d00000000, 0x6a2caf6200000000, 0x6997988900000000, ++ 0x57fc5a6600000000, 0x2ee7868400000000, 0x108c446b00000000, ++ 0x1337738000000000, 0x2d5cb16f00000000, 0x488614b900000000, ++ 0x76edd65600000000, 0x7556e1bd00000000, 0x4b3d235200000000, ++ 0x3226ffb000000000, 0x0c4d3d5f00000000, 0x0ff60ab400000000, ++ 0x319dc85b00000000, 0xbcc6c3aa00000000, 0x82ad014500000000, ++ 0x811636ae00000000, 0xbf7df44100000000, 0xc66628a300000000, ++ 0xf80dea4c00000000, 0xfbb6dda700000000, 0xc5dd1f4800000000, ++ 0x7004e7d100000000, 0x4e6f253e00000000, 0x4dd412d500000000, ++ 0x73bfd03a00000000, 0x0aa40cd800000000, 0x34cfce3700000000, ++ 0x3774f9dc00000000, 0x091f3b3300000000, 0x844430c200000000, ++ 0xba2ff22d00000000, 0xb994c5c600000000, 0x87ff072900000000, ++ 0xfee4dbcb00000000, 0xc08f192400000000, 0xc3342ecf00000000, ++ 0xfd5fec2000000000, 0x988549f600000000, 0xa6ee8b1900000000, ++ 0xa555bcf200000000, 0x9b3e7e1d00000000, 0xe225a2ff00000000, ++ 0xdc4e601000000000, 0xdff557fb00000000, 0xe19e951400000000, ++ 0x6cc59ee500000000, 0x52ae5c0a00000000, 0x51156be100000000, ++ 0x6f7ea90e00000000, 0x166575ec00000000, 0x280eb70300000000, ++ 0x2bb580e800000000, 0x15de420700000000, 0x010905e600000000, ++ 0x3f62c70900000000, 0x3cd9f0e200000000, 0x02b2320d00000000, ++ 0x7ba9eeef00000000, 0x45c22c0000000000, 0x46791beb00000000, ++ 0x7812d90400000000, 0xf549d2f500000000, 0xcb22101a00000000, ++ 0xc89927f100000000, 0xf6f2e51e00000000, 0x8fe939fc00000000, ++ 0xb182fb1300000000, 0xb239ccf800000000, 0x8c520e1700000000, ++ 0xe988abc100000000, 0xd7e3692e00000000, 0xd4585ec500000000, ++ 0xea339c2a00000000, 0x932840c800000000, 0xad43822700000000, ++ 0xaef8b5cc00000000, 0x9093772300000000, 0x1dc87cd200000000, ++ 0x23a3be3d00000000, 0x201889d600000000, 0x1e734b3900000000, ++ 0x676897db00000000, 0x5903553400000000, 0x5ab862df00000000, ++ 0x64d3a03000000000, 0xd10a58a900000000, 0xef619a4600000000, ++ 0xecdaadad00000000, 0xd2b16f4200000000, 0xabaab3a000000000, ++ 0x95c1714f00000000, 0x967a46a400000000, 0xa811844b00000000, ++ 0x254a8fba00000000, 0x1b214d5500000000, 0x189a7abe00000000, ++ 0x26f1b85100000000, 0x5fea64b300000000, 0x6181a65c00000000, ++ 0x623a91b700000000, 0x5c51535800000000, 0x398bf68e00000000, ++ 0x07e0346100000000, 0x045b038a00000000, 0x3a30c16500000000, ++ 0x432b1d8700000000, 0x7d40df6800000000, 0x7efbe88300000000, ++ 0x40902a6c00000000, 0xcdcb219d00000000, 0xf3a0e37200000000, ++ 0xf01bd49900000000, 0xce70167600000000, 0xb76bca9400000000, ++ 0x8900087b00000000, 0x8abb3f9000000000, 0xb4d0fd7f00000000, ++ 0xa10ebf7800000000, 0x9f657d9700000000, 0x9cde4a7c00000000, ++ 0xa2b5889300000000, 0xdbae547100000000, 0xe5c5969e00000000, ++ 0xe67ea17500000000, 0xd815639a00000000, 0x554e686b00000000, ++ 0x6b25aa8400000000, 0x689e9d6f00000000, 0x56f55f8000000000, ++ 0x2fee836200000000, 0x1185418d00000000, 0x123e766600000000, ++ 0x2c55b48900000000, 0x498f115f00000000, 0x77e4d3b000000000, ++ 0x745fe45b00000000, 0x4a3426b400000000, 0x332ffa5600000000, ++ 0x0d4438b900000000, 0x0eff0f5200000000, 0x3094cdbd00000000, ++ 0xbdcfc64c00000000, 0x83a404a300000000, 0x801f334800000000, ++ 0xbe74f1a700000000, 0xc76f2d4500000000, 0xf904efaa00000000, ++ 0xfabfd84100000000, 0xc4d41aae00000000, 0x710de23700000000, ++ 0x4f6620d800000000, 0x4cdd173300000000, 0x72b6d5dc00000000, ++ 0x0bad093e00000000, 0x35c6cbd100000000, 0x367dfc3a00000000, ++ 0x08163ed500000000, 0x854d352400000000, 0xbb26f7cb00000000, ++ 0xb89dc02000000000, 0x86f602cf00000000, 0xffedde2d00000000, ++ 0xc1861cc200000000, 0xc23d2b2900000000, 0xfc56e9c600000000, ++ 0x998c4c1000000000, 0xa7e78eff00000000, 0xa45cb91400000000, ++ 0x9a377bfb00000000, 0xe32ca71900000000, 0xdd4765f600000000, ++ 0xdefc521d00000000, 0xe09790f200000000, 0x6dcc9b0300000000, ++ 0x53a759ec00000000, 0x501c6e0700000000, 0x6e77ace800000000, ++ 0x176c700a00000000, 0x2907b2e500000000, 0x2abc850e00000000, ++ 0x14d747e100000000}, ++ {0x0000000000000000, 0xc0df8ec100000000, 0xc1b96c5800000000, ++ 0x0166e29900000000, 0x8273d9b000000000, 0x42ac577100000000, ++ 0x43cab5e800000000, 0x83153b2900000000, 0x45e1c3ba00000000, ++ 0x853e4d7b00000000, 0x8458afe200000000, 0x4487212300000000, ++ 0xc7921a0a00000000, 0x074d94cb00000000, 0x062b765200000000, ++ 0xc6f4f89300000000, 0xcbc4f6ae00000000, 0x0b1b786f00000000, ++ 0x0a7d9af600000000, 0xcaa2143700000000, 0x49b72f1e00000000, ++ 0x8968a1df00000000, 0x880e434600000000, 0x48d1cd8700000000, ++ 0x8e25351400000000, 0x4efabbd500000000, 0x4f9c594c00000000, ++ 0x8f43d78d00000000, 0x0c56eca400000000, 0xcc89626500000000, ++ 0xcdef80fc00000000, 0x0d300e3d00000000, 0xd78f9c8600000000, ++ 0x1750124700000000, 0x1636f0de00000000, 0xd6e97e1f00000000, ++ 0x55fc453600000000, 0x9523cbf700000000, 0x9445296e00000000, ++ 0x549aa7af00000000, 0x926e5f3c00000000, 0x52b1d1fd00000000, ++ 0x53d7336400000000, 0x9308bda500000000, 0x101d868c00000000, ++ 0xd0c2084d00000000, 0xd1a4ead400000000, 0x117b641500000000, ++ 0x1c4b6a2800000000, 0xdc94e4e900000000, 0xddf2067000000000, ++ 0x1d2d88b100000000, 0x9e38b39800000000, 0x5ee73d5900000000, ++ 0x5f81dfc000000000, 0x9f5e510100000000, 0x59aaa99200000000, ++ 0x9975275300000000, 0x9813c5ca00000000, 0x58cc4b0b00000000, ++ 0xdbd9702200000000, 0x1b06fee300000000, 0x1a601c7a00000000, ++ 0xdabf92bb00000000, 0xef1948d600000000, 0x2fc6c61700000000, ++ 0x2ea0248e00000000, 0xee7faa4f00000000, 0x6d6a916600000000, ++ 0xadb51fa700000000, 0xacd3fd3e00000000, 0x6c0c73ff00000000, ++ 0xaaf88b6c00000000, 0x6a2705ad00000000, 0x6b41e73400000000, ++ 0xab9e69f500000000, 0x288b52dc00000000, 0xe854dc1d00000000, ++ 0xe9323e8400000000, 0x29edb04500000000, 0x24ddbe7800000000, ++ 0xe40230b900000000, 0xe564d22000000000, 0x25bb5ce100000000, ++ 0xa6ae67c800000000, 0x6671e90900000000, 0x67170b9000000000, ++ 0xa7c8855100000000, 0x613c7dc200000000, 0xa1e3f30300000000, ++ 0xa085119a00000000, 0x605a9f5b00000000, 0xe34fa47200000000, ++ 0x23902ab300000000, 0x22f6c82a00000000, 0xe22946eb00000000, ++ 0x3896d45000000000, 0xf8495a9100000000, 0xf92fb80800000000, ++ 0x39f036c900000000, 0xbae50de000000000, 0x7a3a832100000000, ++ 0x7b5c61b800000000, 0xbb83ef7900000000, 0x7d7717ea00000000, ++ 0xbda8992b00000000, 0xbcce7bb200000000, 0x7c11f57300000000, ++ 0xff04ce5a00000000, 0x3fdb409b00000000, 0x3ebda20200000000, ++ 0xfe622cc300000000, 0xf35222fe00000000, 0x338dac3f00000000, ++ 0x32eb4ea600000000, 0xf234c06700000000, 0x7121fb4e00000000, ++ 0xb1fe758f00000000, 0xb098971600000000, 0x704719d700000000, ++ 0xb6b3e14400000000, 0x766c6f8500000000, 0x770a8d1c00000000, ++ 0xb7d503dd00000000, 0x34c038f400000000, 0xf41fb63500000000, ++ 0xf57954ac00000000, 0x35a6da6d00000000, 0x9f35e17700000000, ++ 0x5fea6fb600000000, 0x5e8c8d2f00000000, 0x9e5303ee00000000, ++ 0x1d4638c700000000, 0xdd99b60600000000, 0xdcff549f00000000, ++ 0x1c20da5e00000000, 0xdad422cd00000000, 0x1a0bac0c00000000, ++ 0x1b6d4e9500000000, 0xdbb2c05400000000, 0x58a7fb7d00000000, ++ 0x987875bc00000000, 0x991e972500000000, 0x59c119e400000000, ++ 0x54f117d900000000, 0x942e991800000000, 0x95487b8100000000, ++ 0x5597f54000000000, 0xd682ce6900000000, 0x165d40a800000000, ++ 0x173ba23100000000, 0xd7e42cf000000000, 0x1110d46300000000, ++ 0xd1cf5aa200000000, 0xd0a9b83b00000000, 0x107636fa00000000, ++ 0x93630dd300000000, 0x53bc831200000000, 0x52da618b00000000, ++ 0x9205ef4a00000000, 0x48ba7df100000000, 0x8865f33000000000, ++ 0x890311a900000000, 0x49dc9f6800000000, 0xcac9a44100000000, ++ 0x0a162a8000000000, 0x0b70c81900000000, 0xcbaf46d800000000, ++ 0x0d5bbe4b00000000, 0xcd84308a00000000, 0xcce2d21300000000, ++ 0x0c3d5cd200000000, 0x8f2867fb00000000, 0x4ff7e93a00000000, ++ 0x4e910ba300000000, 0x8e4e856200000000, 0x837e8b5f00000000, ++ 0x43a1059e00000000, 0x42c7e70700000000, 0x821869c600000000, ++ 0x010d52ef00000000, 0xc1d2dc2e00000000, 0xc0b43eb700000000, ++ 0x006bb07600000000, 0xc69f48e500000000, 0x0640c62400000000, ++ 0x072624bd00000000, 0xc7f9aa7c00000000, 0x44ec915500000000, ++ 0x84331f9400000000, 0x8555fd0d00000000, 0x458a73cc00000000, ++ 0x702ca9a100000000, 0xb0f3276000000000, 0xb195c5f900000000, ++ 0x714a4b3800000000, 0xf25f701100000000, 0x3280fed000000000, ++ 0x33e61c4900000000, 0xf339928800000000, 0x35cd6a1b00000000, ++ 0xf512e4da00000000, 0xf474064300000000, 0x34ab888200000000, ++ 0xb7beb3ab00000000, 0x77613d6a00000000, 0x7607dff300000000, ++ 0xb6d8513200000000, 0xbbe85f0f00000000, 0x7b37d1ce00000000, ++ 0x7a51335700000000, 0xba8ebd9600000000, 0x399b86bf00000000, ++ 0xf944087e00000000, 0xf822eae700000000, 0x38fd642600000000, ++ 0xfe099cb500000000, 0x3ed6127400000000, 0x3fb0f0ed00000000, ++ 0xff6f7e2c00000000, 0x7c7a450500000000, 0xbca5cbc400000000, ++ 0xbdc3295d00000000, 0x7d1ca79c00000000, 0xa7a3352700000000, ++ 0x677cbbe600000000, 0x661a597f00000000, 0xa6c5d7be00000000, ++ 0x25d0ec9700000000, 0xe50f625600000000, 0xe46980cf00000000, ++ 0x24b60e0e00000000, 0xe242f69d00000000, 0x229d785c00000000, ++ 0x23fb9ac500000000, 0xe324140400000000, 0x60312f2d00000000, ++ 0xa0eea1ec00000000, 0xa188437500000000, 0x6157cdb400000000, ++ 0x6c67c38900000000, 0xacb84d4800000000, 0xaddeafd100000000, ++ 0x6d01211000000000, 0xee141a3900000000, 0x2ecb94f800000000, ++ 0x2fad766100000000, 0xef72f8a000000000, 0x2986003300000000, ++ 0xe9598ef200000000, 0xe83f6c6b00000000, 0x28e0e2aa00000000, ++ 0xabf5d98300000000, 0x6b2a574200000000, 0x6a4cb5db00000000, ++ 0xaa933b1a00000000}, ++ {0x0000000000000000, 0x6f4ca59b00000000, 0x9f9e3bec00000000, ++ 0xf0d29e7700000000, 0x7f3b060300000000, 0x1077a39800000000, ++ 0xe0a53def00000000, 0x8fe9987400000000, 0xfe760c0600000000, ++ 0x913aa99d00000000, 0x61e837ea00000000, 0x0ea4927100000000, ++ 0x814d0a0500000000, 0xee01af9e00000000, 0x1ed331e900000000, ++ 0x719f947200000000, 0xfced180c00000000, 0x93a1bd9700000000, ++ 0x637323e000000000, 0x0c3f867b00000000, 0x83d61e0f00000000, ++ 0xec9abb9400000000, 0x1c4825e300000000, 0x7304807800000000, ++ 0x029b140a00000000, 0x6dd7b19100000000, 0x9d052fe600000000, ++ 0xf2498a7d00000000, 0x7da0120900000000, 0x12ecb79200000000, ++ 0xe23e29e500000000, 0x8d728c7e00000000, 0xf8db311800000000, ++ 0x9797948300000000, 0x67450af400000000, 0x0809af6f00000000, ++ 0x87e0371b00000000, 0xe8ac928000000000, 0x187e0cf700000000, ++ 0x7732a96c00000000, 0x06ad3d1e00000000, 0x69e1988500000000, ++ 0x993306f200000000, 0xf67fa36900000000, 0x79963b1d00000000, ++ 0x16da9e8600000000, 0xe60800f100000000, 0x8944a56a00000000, ++ 0x0436291400000000, 0x6b7a8c8f00000000, 0x9ba812f800000000, ++ 0xf4e4b76300000000, 0x7b0d2f1700000000, 0x14418a8c00000000, ++ 0xe49314fb00000000, 0x8bdfb16000000000, 0xfa40251200000000, ++ 0x950c808900000000, 0x65de1efe00000000, 0x0a92bb6500000000, ++ 0x857b231100000000, 0xea37868a00000000, 0x1ae518fd00000000, ++ 0x75a9bd6600000000, 0xf0b7633000000000, 0x9ffbc6ab00000000, ++ 0x6f2958dc00000000, 0x0065fd4700000000, 0x8f8c653300000000, ++ 0xe0c0c0a800000000, 0x10125edf00000000, 0x7f5efb4400000000, ++ 0x0ec16f3600000000, 0x618dcaad00000000, 0x915f54da00000000, ++ 0xfe13f14100000000, 0x71fa693500000000, 0x1eb6ccae00000000, ++ 0xee6452d900000000, 0x8128f74200000000, 0x0c5a7b3c00000000, ++ 0x6316dea700000000, 0x93c440d000000000, 0xfc88e54b00000000, ++ 0x73617d3f00000000, 0x1c2dd8a400000000, 0xecff46d300000000, ++ 0x83b3e34800000000, 0xf22c773a00000000, 0x9d60d2a100000000, ++ 0x6db24cd600000000, 0x02fee94d00000000, 0x8d17713900000000, ++ 0xe25bd4a200000000, 0x12894ad500000000, 0x7dc5ef4e00000000, ++ 0x086c522800000000, 0x6720f7b300000000, 0x97f269c400000000, ++ 0xf8becc5f00000000, 0x7757542b00000000, 0x181bf1b000000000, ++ 0xe8c96fc700000000, 0x8785ca5c00000000, 0xf61a5e2e00000000, ++ 0x9956fbb500000000, 0x698465c200000000, 0x06c8c05900000000, ++ 0x8921582d00000000, 0xe66dfdb600000000, 0x16bf63c100000000, ++ 0x79f3c65a00000000, 0xf4814a2400000000, 0x9bcdefbf00000000, ++ 0x6b1f71c800000000, 0x0453d45300000000, 0x8bba4c2700000000, ++ 0xe4f6e9bc00000000, 0x142477cb00000000, 0x7b68d25000000000, ++ 0x0af7462200000000, 0x65bbe3b900000000, 0x95697dce00000000, ++ 0xfa25d85500000000, 0x75cc402100000000, 0x1a80e5ba00000000, ++ 0xea527bcd00000000, 0x851ede5600000000, 0xe06fc76000000000, ++ 0x8f2362fb00000000, 0x7ff1fc8c00000000, 0x10bd591700000000, ++ 0x9f54c16300000000, 0xf01864f800000000, 0x00cafa8f00000000, ++ 0x6f865f1400000000, 0x1e19cb6600000000, 0x71556efd00000000, ++ 0x8187f08a00000000, 0xeecb551100000000, 0x6122cd6500000000, ++ 0x0e6e68fe00000000, 0xfebcf68900000000, 0x91f0531200000000, ++ 0x1c82df6c00000000, 0x73ce7af700000000, 0x831ce48000000000, ++ 0xec50411b00000000, 0x63b9d96f00000000, 0x0cf57cf400000000, ++ 0xfc27e28300000000, 0x936b471800000000, 0xe2f4d36a00000000, ++ 0x8db876f100000000, 0x7d6ae88600000000, 0x12264d1d00000000, ++ 0x9dcfd56900000000, 0xf28370f200000000, 0x0251ee8500000000, ++ 0x6d1d4b1e00000000, 0x18b4f67800000000, 0x77f853e300000000, ++ 0x872acd9400000000, 0xe866680f00000000, 0x678ff07b00000000, ++ 0x08c355e000000000, 0xf811cb9700000000, 0x975d6e0c00000000, ++ 0xe6c2fa7e00000000, 0x898e5fe500000000, 0x795cc19200000000, ++ 0x1610640900000000, 0x99f9fc7d00000000, 0xf6b559e600000000, ++ 0x0667c79100000000, 0x692b620a00000000, 0xe459ee7400000000, ++ 0x8b154bef00000000, 0x7bc7d59800000000, 0x148b700300000000, ++ 0x9b62e87700000000, 0xf42e4dec00000000, 0x04fcd39b00000000, ++ 0x6bb0760000000000, 0x1a2fe27200000000, 0x756347e900000000, ++ 0x85b1d99e00000000, 0xeafd7c0500000000, 0x6514e47100000000, ++ 0x0a5841ea00000000, 0xfa8adf9d00000000, 0x95c67a0600000000, ++ 0x10d8a45000000000, 0x7f9401cb00000000, 0x8f469fbc00000000, ++ 0xe00a3a2700000000, 0x6fe3a25300000000, 0x00af07c800000000, ++ 0xf07d99bf00000000, 0x9f313c2400000000, 0xeeaea85600000000, ++ 0x81e20dcd00000000, 0x713093ba00000000, 0x1e7c362100000000, ++ 0x9195ae5500000000, 0xfed90bce00000000, 0x0e0b95b900000000, ++ 0x6147302200000000, 0xec35bc5c00000000, 0x837919c700000000, ++ 0x73ab87b000000000, 0x1ce7222b00000000, 0x930eba5f00000000, ++ 0xfc421fc400000000, 0x0c9081b300000000, 0x63dc242800000000, ++ 0x1243b05a00000000, 0x7d0f15c100000000, 0x8ddd8bb600000000, ++ 0xe2912e2d00000000, 0x6d78b65900000000, 0x023413c200000000, ++ 0xf2e68db500000000, 0x9daa282e00000000, 0xe803954800000000, ++ 0x874f30d300000000, 0x779daea400000000, 0x18d10b3f00000000, ++ 0x9738934b00000000, 0xf87436d000000000, 0x08a6a8a700000000, ++ 0x67ea0d3c00000000, 0x1675994e00000000, 0x79393cd500000000, ++ 0x89eba2a200000000, 0xe6a7073900000000, 0x694e9f4d00000000, ++ 0x06023ad600000000, 0xf6d0a4a100000000, 0x999c013a00000000, ++ 0x14ee8d4400000000, 0x7ba228df00000000, 0x8b70b6a800000000, ++ 0xe43c133300000000, 0x6bd58b4700000000, 0x04992edc00000000, ++ 0xf44bb0ab00000000, 0x9b07153000000000, 0xea98814200000000, ++ 0x85d424d900000000, 0x7506baae00000000, 0x1a4a1f3500000000, ++ 0x95a3874100000000, 0xfaef22da00000000, 0x0a3dbcad00000000, ++ 0x6571193600000000}, ++ {0x0000000000000000, 0x85d996dd00000000, 0x4bb55c6000000000, ++ 0xce6ccabd00000000, 0x966ab9c000000000, 0x13b32f1d00000000, ++ 0xdddfe5a000000000, 0x5806737d00000000, 0x6dd3035a00000000, ++ 0xe80a958700000000, 0x26665f3a00000000, 0xa3bfc9e700000000, ++ 0xfbb9ba9a00000000, 0x7e602c4700000000, 0xb00ce6fa00000000, ++ 0x35d5702700000000, 0xdaa607b400000000, 0x5f7f916900000000, ++ 0x91135bd400000000, 0x14cacd0900000000, 0x4cccbe7400000000, ++ 0xc91528a900000000, 0x0779e21400000000, 0x82a074c900000000, ++ 0xb77504ee00000000, 0x32ac923300000000, 0xfcc0588e00000000, ++ 0x7919ce5300000000, 0x211fbd2e00000000, 0xa4c62bf300000000, ++ 0x6aaae14e00000000, 0xef73779300000000, 0xf54b7eb300000000, ++ 0x7092e86e00000000, 0xbefe22d300000000, 0x3b27b40e00000000, ++ 0x6321c77300000000, 0xe6f851ae00000000, 0x28949b1300000000, ++ 0xad4d0dce00000000, 0x98987de900000000, 0x1d41eb3400000000, ++ 0xd32d218900000000, 0x56f4b75400000000, 0x0ef2c42900000000, ++ 0x8b2b52f400000000, 0x4547984900000000, 0xc09e0e9400000000, ++ 0x2fed790700000000, 0xaa34efda00000000, 0x6458256700000000, ++ 0xe181b3ba00000000, 0xb987c0c700000000, 0x3c5e561a00000000, ++ 0xf2329ca700000000, 0x77eb0a7a00000000, 0x423e7a5d00000000, ++ 0xc7e7ec8000000000, 0x098b263d00000000, 0x8c52b0e000000000, ++ 0xd454c39d00000000, 0x518d554000000000, 0x9fe19ffd00000000, ++ 0x1a38092000000000, 0xab918dbd00000000, 0x2e481b6000000000, ++ 0xe024d1dd00000000, 0x65fd470000000000, 0x3dfb347d00000000, ++ 0xb822a2a000000000, 0x764e681d00000000, 0xf397fec000000000, ++ 0xc6428ee700000000, 0x439b183a00000000, 0x8df7d28700000000, ++ 0x082e445a00000000, 0x5028372700000000, 0xd5f1a1fa00000000, ++ 0x1b9d6b4700000000, 0x9e44fd9a00000000, 0x71378a0900000000, ++ 0xf4ee1cd400000000, 0x3a82d66900000000, 0xbf5b40b400000000, ++ 0xe75d33c900000000, 0x6284a51400000000, 0xace86fa900000000, ++ 0x2931f97400000000, 0x1ce4895300000000, 0x993d1f8e00000000, ++ 0x5751d53300000000, 0xd28843ee00000000, 0x8a8e309300000000, ++ 0x0f57a64e00000000, 0xc13b6cf300000000, 0x44e2fa2e00000000, ++ 0x5edaf30e00000000, 0xdb0365d300000000, 0x156faf6e00000000, ++ 0x90b639b300000000, 0xc8b04ace00000000, 0x4d69dc1300000000, ++ 0x830516ae00000000, 0x06dc807300000000, 0x3309f05400000000, ++ 0xb6d0668900000000, 0x78bcac3400000000, 0xfd653ae900000000, ++ 0xa563499400000000, 0x20badf4900000000, 0xeed615f400000000, ++ 0x6b0f832900000000, 0x847cf4ba00000000, 0x01a5626700000000, ++ 0xcfc9a8da00000000, 0x4a103e0700000000, 0x12164d7a00000000, ++ 0x97cfdba700000000, 0x59a3111a00000000, 0xdc7a87c700000000, ++ 0xe9aff7e000000000, 0x6c76613d00000000, 0xa21aab8000000000, ++ 0x27c33d5d00000000, 0x7fc54e2000000000, 0xfa1cd8fd00000000, ++ 0x3470124000000000, 0xb1a9849d00000000, 0x17256aa000000000, ++ 0x92fcfc7d00000000, 0x5c9036c000000000, 0xd949a01d00000000, ++ 0x814fd36000000000, 0x049645bd00000000, 0xcafa8f0000000000, ++ 0x4f2319dd00000000, 0x7af669fa00000000, 0xff2fff2700000000, ++ 0x3143359a00000000, 0xb49aa34700000000, 0xec9cd03a00000000, ++ 0x694546e700000000, 0xa7298c5a00000000, 0x22f01a8700000000, ++ 0xcd836d1400000000, 0x485afbc900000000, 0x8636317400000000, ++ 0x03efa7a900000000, 0x5be9d4d400000000, 0xde30420900000000, ++ 0x105c88b400000000, 0x95851e6900000000, 0xa0506e4e00000000, ++ 0x2589f89300000000, 0xebe5322e00000000, 0x6e3ca4f300000000, ++ 0x363ad78e00000000, 0xb3e3415300000000, 0x7d8f8bee00000000, ++ 0xf8561d3300000000, 0xe26e141300000000, 0x67b782ce00000000, ++ 0xa9db487300000000, 0x2c02deae00000000, 0x7404add300000000, ++ 0xf1dd3b0e00000000, 0x3fb1f1b300000000, 0xba68676e00000000, ++ 0x8fbd174900000000, 0x0a64819400000000, 0xc4084b2900000000, ++ 0x41d1ddf400000000, 0x19d7ae8900000000, 0x9c0e385400000000, ++ 0x5262f2e900000000, 0xd7bb643400000000, 0x38c813a700000000, ++ 0xbd11857a00000000, 0x737d4fc700000000, 0xf6a4d91a00000000, ++ 0xaea2aa6700000000, 0x2b7b3cba00000000, 0xe517f60700000000, ++ 0x60ce60da00000000, 0x551b10fd00000000, 0xd0c2862000000000, ++ 0x1eae4c9d00000000, 0x9b77da4000000000, 0xc371a93d00000000, ++ 0x46a83fe000000000, 0x88c4f55d00000000, 0x0d1d638000000000, ++ 0xbcb4e71d00000000, 0x396d71c000000000, 0xf701bb7d00000000, ++ 0x72d82da000000000, 0x2ade5edd00000000, 0xaf07c80000000000, ++ 0x616b02bd00000000, 0xe4b2946000000000, 0xd167e44700000000, ++ 0x54be729a00000000, 0x9ad2b82700000000, 0x1f0b2efa00000000, ++ 0x470d5d8700000000, 0xc2d4cb5a00000000, 0x0cb801e700000000, ++ 0x8961973a00000000, 0x6612e0a900000000, 0xe3cb767400000000, ++ 0x2da7bcc900000000, 0xa87e2a1400000000, 0xf078596900000000, ++ 0x75a1cfb400000000, 0xbbcd050900000000, 0x3e1493d400000000, ++ 0x0bc1e3f300000000, 0x8e18752e00000000, 0x4074bf9300000000, ++ 0xc5ad294e00000000, 0x9dab5a3300000000, 0x1872ccee00000000, ++ 0xd61e065300000000, 0x53c7908e00000000, 0x49ff99ae00000000, ++ 0xcc260f7300000000, 0x024ac5ce00000000, 0x8793531300000000, ++ 0xdf95206e00000000, 0x5a4cb6b300000000, 0x94207c0e00000000, ++ 0x11f9ead300000000, 0x242c9af400000000, 0xa1f50c2900000000, ++ 0x6f99c69400000000, 0xea40504900000000, 0xb246233400000000, ++ 0x379fb5e900000000, 0xf9f37f5400000000, 0x7c2ae98900000000, ++ 0x93599e1a00000000, 0x168008c700000000, 0xd8ecc27a00000000, ++ 0x5d3554a700000000, 0x053327da00000000, 0x80eab10700000000, ++ 0x4e867bba00000000, 0xcb5fed6700000000, 0xfe8a9d4000000000, ++ 0x7b530b9d00000000, 0xb53fc12000000000, 0x30e657fd00000000, ++ 0x68e0248000000000, 0xed39b25d00000000, 0x235578e000000000, ++ 0xa68cee3d00000000}, ++ {0x0000000000000000, 0x76e10f9d00000000, 0xadc46ee100000000, ++ 0xdb25617c00000000, 0x1b8fac1900000000, 0x6d6ea38400000000, ++ 0xb64bc2f800000000, 0xc0aacd6500000000, 0x361e593300000000, ++ 0x40ff56ae00000000, 0x9bda37d200000000, 0xed3b384f00000000, ++ 0x2d91f52a00000000, 0x5b70fab700000000, 0x80559bcb00000000, ++ 0xf6b4945600000000, 0x6c3cb26600000000, 0x1addbdfb00000000, ++ 0xc1f8dc8700000000, 0xb719d31a00000000, 0x77b31e7f00000000, ++ 0x015211e200000000, 0xda77709e00000000, 0xac967f0300000000, ++ 0x5a22eb5500000000, 0x2cc3e4c800000000, 0xf7e685b400000000, ++ 0x81078a2900000000, 0x41ad474c00000000, 0x374c48d100000000, ++ 0xec6929ad00000000, 0x9a88263000000000, 0xd87864cd00000000, ++ 0xae996b5000000000, 0x75bc0a2c00000000, 0x035d05b100000000, ++ 0xc3f7c8d400000000, 0xb516c74900000000, 0x6e33a63500000000, ++ 0x18d2a9a800000000, 0xee663dfe00000000, 0x9887326300000000, ++ 0x43a2531f00000000, 0x35435c8200000000, 0xf5e991e700000000, ++ 0x83089e7a00000000, 0x582dff0600000000, 0x2eccf09b00000000, ++ 0xb444d6ab00000000, 0xc2a5d93600000000, 0x1980b84a00000000, ++ 0x6f61b7d700000000, 0xafcb7ab200000000, 0xd92a752f00000000, ++ 0x020f145300000000, 0x74ee1bce00000000, 0x825a8f9800000000, ++ 0xf4bb800500000000, 0x2f9ee17900000000, 0x597feee400000000, ++ 0x99d5238100000000, 0xef342c1c00000000, 0x34114d6000000000, ++ 0x42f042fd00000000, 0xf1f7b94100000000, 0x8716b6dc00000000, ++ 0x5c33d7a000000000, 0x2ad2d83d00000000, 0xea78155800000000, ++ 0x9c991ac500000000, 0x47bc7bb900000000, 0x315d742400000000, ++ 0xc7e9e07200000000, 0xb108efef00000000, 0x6a2d8e9300000000, ++ 0x1ccc810e00000000, 0xdc664c6b00000000, 0xaa8743f600000000, ++ 0x71a2228a00000000, 0x07432d1700000000, 0x9dcb0b2700000000, ++ 0xeb2a04ba00000000, 0x300f65c600000000, 0x46ee6a5b00000000, ++ 0x8644a73e00000000, 0xf0a5a8a300000000, 0x2b80c9df00000000, ++ 0x5d61c64200000000, 0xabd5521400000000, 0xdd345d8900000000, ++ 0x06113cf500000000, 0x70f0336800000000, 0xb05afe0d00000000, ++ 0xc6bbf19000000000, 0x1d9e90ec00000000, 0x6b7f9f7100000000, ++ 0x298fdd8c00000000, 0x5f6ed21100000000, 0x844bb36d00000000, ++ 0xf2aabcf000000000, 0x3200719500000000, 0x44e17e0800000000, ++ 0x9fc41f7400000000, 0xe92510e900000000, 0x1f9184bf00000000, ++ 0x69708b2200000000, 0xb255ea5e00000000, 0xc4b4e5c300000000, ++ 0x041e28a600000000, 0x72ff273b00000000, 0xa9da464700000000, ++ 0xdf3b49da00000000, 0x45b36fea00000000, 0x3352607700000000, ++ 0xe877010b00000000, 0x9e960e9600000000, 0x5e3cc3f300000000, ++ 0x28ddcc6e00000000, 0xf3f8ad1200000000, 0x8519a28f00000000, ++ 0x73ad36d900000000, 0x054c394400000000, 0xde69583800000000, ++ 0xa88857a500000000, 0x68229ac000000000, 0x1ec3955d00000000, ++ 0xc5e6f42100000000, 0xb307fbbc00000000, 0xe2ef738300000000, ++ 0x940e7c1e00000000, 0x4f2b1d6200000000, 0x39ca12ff00000000, ++ 0xf960df9a00000000, 0x8f81d00700000000, 0x54a4b17b00000000, ++ 0x2245bee600000000, 0xd4f12ab000000000, 0xa210252d00000000, ++ 0x7935445100000000, 0x0fd44bcc00000000, 0xcf7e86a900000000, ++ 0xb99f893400000000, 0x62bae84800000000, 0x145be7d500000000, ++ 0x8ed3c1e500000000, 0xf832ce7800000000, 0x2317af0400000000, ++ 0x55f6a09900000000, 0x955c6dfc00000000, 0xe3bd626100000000, ++ 0x3898031d00000000, 0x4e790c8000000000, 0xb8cd98d600000000, ++ 0xce2c974b00000000, 0x1509f63700000000, 0x63e8f9aa00000000, ++ 0xa34234cf00000000, 0xd5a33b5200000000, 0x0e865a2e00000000, ++ 0x786755b300000000, 0x3a97174e00000000, 0x4c7618d300000000, ++ 0x975379af00000000, 0xe1b2763200000000, 0x2118bb5700000000, ++ 0x57f9b4ca00000000, 0x8cdcd5b600000000, 0xfa3dda2b00000000, ++ 0x0c894e7d00000000, 0x7a6841e000000000, 0xa14d209c00000000, ++ 0xd7ac2f0100000000, 0x1706e26400000000, 0x61e7edf900000000, ++ 0xbac28c8500000000, 0xcc23831800000000, 0x56aba52800000000, ++ 0x204aaab500000000, 0xfb6fcbc900000000, 0x8d8ec45400000000, ++ 0x4d24093100000000, 0x3bc506ac00000000, 0xe0e067d000000000, ++ 0x9601684d00000000, 0x60b5fc1b00000000, 0x1654f38600000000, ++ 0xcd7192fa00000000, 0xbb909d6700000000, 0x7b3a500200000000, ++ 0x0ddb5f9f00000000, 0xd6fe3ee300000000, 0xa01f317e00000000, ++ 0x1318cac200000000, 0x65f9c55f00000000, 0xbedca42300000000, ++ 0xc83dabbe00000000, 0x089766db00000000, 0x7e76694600000000, ++ 0xa553083a00000000, 0xd3b207a700000000, 0x250693f100000000, ++ 0x53e79c6c00000000, 0x88c2fd1000000000, 0xfe23f28d00000000, ++ 0x3e893fe800000000, 0x4868307500000000, 0x934d510900000000, ++ 0xe5ac5e9400000000, 0x7f2478a400000000, 0x09c5773900000000, ++ 0xd2e0164500000000, 0xa40119d800000000, 0x64abd4bd00000000, ++ 0x124adb2000000000, 0xc96fba5c00000000, 0xbf8eb5c100000000, ++ 0x493a219700000000, 0x3fdb2e0a00000000, 0xe4fe4f7600000000, ++ 0x921f40eb00000000, 0x52b58d8e00000000, 0x2454821300000000, ++ 0xff71e36f00000000, 0x8990ecf200000000, 0xcb60ae0f00000000, ++ 0xbd81a19200000000, 0x66a4c0ee00000000, 0x1045cf7300000000, ++ 0xd0ef021600000000, 0xa60e0d8b00000000, 0x7d2b6cf700000000, ++ 0x0bca636a00000000, 0xfd7ef73c00000000, 0x8b9ff8a100000000, ++ 0x50ba99dd00000000, 0x265b964000000000, 0xe6f15b2500000000, ++ 0x901054b800000000, 0x4b3535c400000000, 0x3dd43a5900000000, ++ 0xa75c1c6900000000, 0xd1bd13f400000000, 0x0a98728800000000, ++ 0x7c797d1500000000, 0xbcd3b07000000000, 0xca32bfed00000000, ++ 0x1117de9100000000, 0x67f6d10c00000000, 0x9142455a00000000, ++ 0xe7a34ac700000000, 0x3c862bbb00000000, 0x4a67242600000000, ++ 0x8acde94300000000, 0xfc2ce6de00000000, 0x270987a200000000, ++ 0x51e8883f00000000}, ++ {0x0000000000000000, 0xe8dbfbb900000000, 0x91b186a800000000, ++ 0x796a7d1100000000, 0x63657c8a00000000, 0x8bbe873300000000, ++ 0xf2d4fa2200000000, 0x1a0f019b00000000, 0x87cc89cf00000000, ++ 0x6f17727600000000, 0x167d0f6700000000, 0xfea6f4de00000000, ++ 0xe4a9f54500000000, 0x0c720efc00000000, 0x751873ed00000000, ++ 0x9dc3885400000000, 0x4f9f624400000000, 0xa74499fd00000000, ++ 0xde2ee4ec00000000, 0x36f51f5500000000, 0x2cfa1ece00000000, ++ 0xc421e57700000000, 0xbd4b986600000000, 0x559063df00000000, ++ 0xc853eb8b00000000, 0x2088103200000000, 0x59e26d2300000000, ++ 0xb139969a00000000, 0xab36970100000000, 0x43ed6cb800000000, ++ 0x3a8711a900000000, 0xd25cea1000000000, 0x9e3ec58800000000, ++ 0x76e53e3100000000, 0x0f8f432000000000, 0xe754b89900000000, ++ 0xfd5bb90200000000, 0x158042bb00000000, 0x6cea3faa00000000, ++ 0x8431c41300000000, 0x19f24c4700000000, 0xf129b7fe00000000, ++ 0x8843caef00000000, 0x6098315600000000, 0x7a9730cd00000000, ++ 0x924ccb7400000000, 0xeb26b66500000000, 0x03fd4ddc00000000, ++ 0xd1a1a7cc00000000, 0x397a5c7500000000, 0x4010216400000000, ++ 0xa8cbdadd00000000, 0xb2c4db4600000000, 0x5a1f20ff00000000, ++ 0x23755dee00000000, 0xcbaea65700000000, 0x566d2e0300000000, ++ 0xbeb6d5ba00000000, 0xc7dca8ab00000000, 0x2f07531200000000, ++ 0x3508528900000000, 0xddd3a93000000000, 0xa4b9d42100000000, ++ 0x4c622f9800000000, 0x7d7bfbca00000000, 0x95a0007300000000, ++ 0xecca7d6200000000, 0x041186db00000000, 0x1e1e874000000000, ++ 0xf6c57cf900000000, 0x8faf01e800000000, 0x6774fa5100000000, ++ 0xfab7720500000000, 0x126c89bc00000000, 0x6b06f4ad00000000, ++ 0x83dd0f1400000000, 0x99d20e8f00000000, 0x7109f53600000000, ++ 0x0863882700000000, 0xe0b8739e00000000, 0x32e4998e00000000, ++ 0xda3f623700000000, 0xa3551f2600000000, 0x4b8ee49f00000000, ++ 0x5181e50400000000, 0xb95a1ebd00000000, 0xc03063ac00000000, ++ 0x28eb981500000000, 0xb528104100000000, 0x5df3ebf800000000, ++ 0x249996e900000000, 0xcc426d5000000000, 0xd64d6ccb00000000, ++ 0x3e96977200000000, 0x47fcea6300000000, 0xaf2711da00000000, ++ 0xe3453e4200000000, 0x0b9ec5fb00000000, 0x72f4b8ea00000000, ++ 0x9a2f435300000000, 0x802042c800000000, 0x68fbb97100000000, ++ 0x1191c46000000000, 0xf94a3fd900000000, 0x6489b78d00000000, ++ 0x8c524c3400000000, 0xf538312500000000, 0x1de3ca9c00000000, ++ 0x07eccb0700000000, 0xef3730be00000000, 0x965d4daf00000000, ++ 0x7e86b61600000000, 0xacda5c0600000000, 0x4401a7bf00000000, ++ 0x3d6bdaae00000000, 0xd5b0211700000000, 0xcfbf208c00000000, ++ 0x2764db3500000000, 0x5e0ea62400000000, 0xb6d55d9d00000000, ++ 0x2b16d5c900000000, 0xc3cd2e7000000000, 0xbaa7536100000000, ++ 0x527ca8d800000000, 0x4873a94300000000, 0xa0a852fa00000000, ++ 0xd9c22feb00000000, 0x3119d45200000000, 0xbbf0874e00000000, ++ 0x532b7cf700000000, 0x2a4101e600000000, 0xc29afa5f00000000, ++ 0xd895fbc400000000, 0x304e007d00000000, 0x49247d6c00000000, ++ 0xa1ff86d500000000, 0x3c3c0e8100000000, 0xd4e7f53800000000, ++ 0xad8d882900000000, 0x4556739000000000, 0x5f59720b00000000, ++ 0xb78289b200000000, 0xcee8f4a300000000, 0x26330f1a00000000, ++ 0xf46fe50a00000000, 0x1cb41eb300000000, 0x65de63a200000000, ++ 0x8d05981b00000000, 0x970a998000000000, 0x7fd1623900000000, ++ 0x06bb1f2800000000, 0xee60e49100000000, 0x73a36cc500000000, ++ 0x9b78977c00000000, 0xe212ea6d00000000, 0x0ac911d400000000, ++ 0x10c6104f00000000, 0xf81debf600000000, 0x817796e700000000, ++ 0x69ac6d5e00000000, 0x25ce42c600000000, 0xcd15b97f00000000, ++ 0xb47fc46e00000000, 0x5ca43fd700000000, 0x46ab3e4c00000000, ++ 0xae70c5f500000000, 0xd71ab8e400000000, 0x3fc1435d00000000, ++ 0xa202cb0900000000, 0x4ad930b000000000, 0x33b34da100000000, ++ 0xdb68b61800000000, 0xc167b78300000000, 0x29bc4c3a00000000, ++ 0x50d6312b00000000, 0xb80dca9200000000, 0x6a51208200000000, ++ 0x828adb3b00000000, 0xfbe0a62a00000000, 0x133b5d9300000000, ++ 0x09345c0800000000, 0xe1efa7b100000000, 0x9885daa000000000, ++ 0x705e211900000000, 0xed9da94d00000000, 0x054652f400000000, ++ 0x7c2c2fe500000000, 0x94f7d45c00000000, 0x8ef8d5c700000000, ++ 0x66232e7e00000000, 0x1f49536f00000000, 0xf792a8d600000000, ++ 0xc68b7c8400000000, 0x2e50873d00000000, 0x573afa2c00000000, ++ 0xbfe1019500000000, 0xa5ee000e00000000, 0x4d35fbb700000000, ++ 0x345f86a600000000, 0xdc847d1f00000000, 0x4147f54b00000000, ++ 0xa99c0ef200000000, 0xd0f673e300000000, 0x382d885a00000000, ++ 0x222289c100000000, 0xcaf9727800000000, 0xb3930f6900000000, ++ 0x5b48f4d000000000, 0x89141ec000000000, 0x61cfe57900000000, ++ 0x18a5986800000000, 0xf07e63d100000000, 0xea71624a00000000, ++ 0x02aa99f300000000, 0x7bc0e4e200000000, 0x931b1f5b00000000, ++ 0x0ed8970f00000000, 0xe6036cb600000000, 0x9f6911a700000000, ++ 0x77b2ea1e00000000, 0x6dbdeb8500000000, 0x8566103c00000000, ++ 0xfc0c6d2d00000000, 0x14d7969400000000, 0x58b5b90c00000000, ++ 0xb06e42b500000000, 0xc9043fa400000000, 0x21dfc41d00000000, ++ 0x3bd0c58600000000, 0xd30b3e3f00000000, 0xaa61432e00000000, ++ 0x42bab89700000000, 0xdf7930c300000000, 0x37a2cb7a00000000, ++ 0x4ec8b66b00000000, 0xa6134dd200000000, 0xbc1c4c4900000000, ++ 0x54c7b7f000000000, 0x2dadcae100000000, 0xc576315800000000, ++ 0x172adb4800000000, 0xfff120f100000000, 0x869b5de000000000, ++ 0x6e40a65900000000, 0x744fa7c200000000, 0x9c945c7b00000000, ++ 0xe5fe216a00000000, 0x0d25dad300000000, 0x90e6528700000000, ++ 0x783da93e00000000, 0x0157d42f00000000, 0xe98c2f9600000000, ++ 0xf3832e0d00000000, 0x1b58d5b400000000, 0x6232a8a500000000, ++ 0x8ae9531c00000000}, ++ {0x0000000000000000, 0x919168ae00000000, 0x6325a08700000000, ++ 0xf2b4c82900000000, 0x874c31d400000000, 0x16dd597a00000000, ++ 0xe469915300000000, 0x75f8f9fd00000000, 0x4f9f137300000000, ++ 0xde0e7bdd00000000, 0x2cbab3f400000000, 0xbd2bdb5a00000000, ++ 0xc8d322a700000000, 0x59424a0900000000, 0xabf6822000000000, ++ 0x3a67ea8e00000000, 0x9e3e27e600000000, 0x0faf4f4800000000, ++ 0xfd1b876100000000, 0x6c8aefcf00000000, 0x1972163200000000, ++ 0x88e37e9c00000000, 0x7a57b6b500000000, 0xebc6de1b00000000, ++ 0xd1a1349500000000, 0x40305c3b00000000, 0xb284941200000000, ++ 0x2315fcbc00000000, 0x56ed054100000000, 0xc77c6def00000000, ++ 0x35c8a5c600000000, 0xa459cd6800000000, 0x7d7b3f1700000000, ++ 0xecea57b900000000, 0x1e5e9f9000000000, 0x8fcff73e00000000, ++ 0xfa370ec300000000, 0x6ba6666d00000000, 0x9912ae4400000000, ++ 0x0883c6ea00000000, 0x32e42c6400000000, 0xa37544ca00000000, ++ 0x51c18ce300000000, 0xc050e44d00000000, 0xb5a81db000000000, ++ 0x2439751e00000000, 0xd68dbd3700000000, 0x471cd59900000000, ++ 0xe34518f100000000, 0x72d4705f00000000, 0x8060b87600000000, ++ 0x11f1d0d800000000, 0x6409292500000000, 0xf598418b00000000, ++ 0x072c89a200000000, 0x96bde10c00000000, 0xacda0b8200000000, ++ 0x3d4b632c00000000, 0xcfffab0500000000, 0x5e6ec3ab00000000, ++ 0x2b963a5600000000, 0xba0752f800000000, 0x48b39ad100000000, ++ 0xd922f27f00000000, 0xfaf67e2e00000000, 0x6b67168000000000, ++ 0x99d3dea900000000, 0x0842b60700000000, 0x7dba4ffa00000000, ++ 0xec2b275400000000, 0x1e9fef7d00000000, 0x8f0e87d300000000, ++ 0xb5696d5d00000000, 0x24f805f300000000, 0xd64ccdda00000000, ++ 0x47dda57400000000, 0x32255c8900000000, 0xa3b4342700000000, ++ 0x5100fc0e00000000, 0xc09194a000000000, 0x64c859c800000000, ++ 0xf559316600000000, 0x07edf94f00000000, 0x967c91e100000000, ++ 0xe384681c00000000, 0x721500b200000000, 0x80a1c89b00000000, ++ 0x1130a03500000000, 0x2b574abb00000000, 0xbac6221500000000, ++ 0x4872ea3c00000000, 0xd9e3829200000000, 0xac1b7b6f00000000, ++ 0x3d8a13c100000000, 0xcf3edbe800000000, 0x5eafb34600000000, ++ 0x878d413900000000, 0x161c299700000000, 0xe4a8e1be00000000, ++ 0x7539891000000000, 0x00c170ed00000000, 0x9150184300000000, ++ 0x63e4d06a00000000, 0xf275b8c400000000, 0xc812524a00000000, ++ 0x59833ae400000000, 0xab37f2cd00000000, 0x3aa69a6300000000, ++ 0x4f5e639e00000000, 0xdecf0b3000000000, 0x2c7bc31900000000, ++ 0xbdeaabb700000000, 0x19b366df00000000, 0x88220e7100000000, ++ 0x7a96c65800000000, 0xeb07aef600000000, 0x9eff570b00000000, ++ 0x0f6e3fa500000000, 0xfddaf78c00000000, 0x6c4b9f2200000000, ++ 0x562c75ac00000000, 0xc7bd1d0200000000, 0x3509d52b00000000, ++ 0xa498bd8500000000, 0xd160447800000000, 0x40f12cd600000000, ++ 0xb245e4ff00000000, 0x23d48c5100000000, 0xf4edfd5c00000000, ++ 0x657c95f200000000, 0x97c85ddb00000000, 0x0659357500000000, ++ 0x73a1cc8800000000, 0xe230a42600000000, 0x10846c0f00000000, ++ 0x811504a100000000, 0xbb72ee2f00000000, 0x2ae3868100000000, ++ 0xd8574ea800000000, 0x49c6260600000000, 0x3c3edffb00000000, ++ 0xadafb75500000000, 0x5f1b7f7c00000000, 0xce8a17d200000000, ++ 0x6ad3daba00000000, 0xfb42b21400000000, 0x09f67a3d00000000, ++ 0x9867129300000000, 0xed9feb6e00000000, 0x7c0e83c000000000, ++ 0x8eba4be900000000, 0x1f2b234700000000, 0x254cc9c900000000, ++ 0xb4dda16700000000, 0x4669694e00000000, 0xd7f801e000000000, ++ 0xa200f81d00000000, 0x339190b300000000, 0xc125589a00000000, ++ 0x50b4303400000000, 0x8996c24b00000000, 0x1807aae500000000, ++ 0xeab362cc00000000, 0x7b220a6200000000, 0x0edaf39f00000000, ++ 0x9f4b9b3100000000, 0x6dff531800000000, 0xfc6e3bb600000000, ++ 0xc609d13800000000, 0x5798b99600000000, 0xa52c71bf00000000, ++ 0x34bd191100000000, 0x4145e0ec00000000, 0xd0d4884200000000, ++ 0x2260406b00000000, 0xb3f128c500000000, 0x17a8e5ad00000000, ++ 0x86398d0300000000, 0x748d452a00000000, 0xe51c2d8400000000, ++ 0x90e4d47900000000, 0x0175bcd700000000, 0xf3c174fe00000000, ++ 0x62501c5000000000, 0x5837f6de00000000, 0xc9a69e7000000000, ++ 0x3b12565900000000, 0xaa833ef700000000, 0xdf7bc70a00000000, ++ 0x4eeaafa400000000, 0xbc5e678d00000000, 0x2dcf0f2300000000, ++ 0x0e1b837200000000, 0x9f8aebdc00000000, 0x6d3e23f500000000, ++ 0xfcaf4b5b00000000, 0x8957b2a600000000, 0x18c6da0800000000, ++ 0xea72122100000000, 0x7be37a8f00000000, 0x4184900100000000, ++ 0xd015f8af00000000, 0x22a1308600000000, 0xb330582800000000, ++ 0xc6c8a1d500000000, 0x5759c97b00000000, 0xa5ed015200000000, ++ 0x347c69fc00000000, 0x9025a49400000000, 0x01b4cc3a00000000, ++ 0xf300041300000000, 0x62916cbd00000000, 0x1769954000000000, ++ 0x86f8fdee00000000, 0x744c35c700000000, 0xe5dd5d6900000000, ++ 0xdfbab7e700000000, 0x4e2bdf4900000000, 0xbc9f176000000000, ++ 0x2d0e7fce00000000, 0x58f6863300000000, 0xc967ee9d00000000, ++ 0x3bd326b400000000, 0xaa424e1a00000000, 0x7360bc6500000000, ++ 0xe2f1d4cb00000000, 0x10451ce200000000, 0x81d4744c00000000, ++ 0xf42c8db100000000, 0x65bde51f00000000, 0x97092d3600000000, ++ 0x0698459800000000, 0x3cffaf1600000000, 0xad6ec7b800000000, ++ 0x5fda0f9100000000, 0xce4b673f00000000, 0xbbb39ec200000000, ++ 0x2a22f66c00000000, 0xd8963e4500000000, 0x490756eb00000000, ++ 0xed5e9b8300000000, 0x7ccff32d00000000, 0x8e7b3b0400000000, ++ 0x1fea53aa00000000, 0x6a12aa5700000000, 0xfb83c2f900000000, ++ 0x09370ad000000000, 0x98a6627e00000000, 0xa2c188f000000000, ++ 0x3350e05e00000000, 0xc1e4287700000000, 0x507540d900000000, ++ 0x258db92400000000, 0xb41cd18a00000000, 0x46a819a300000000, ++ 0xd739710d00000000}}; ++ ++#else /* W == 4 */ ++ ++static const uint32_t crc_braid_table[][256] = { ++ {0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3, 0x844a0efa, ++ 0x48e00e64, 0xc66f0987, 0x0ac50919, 0xd3e51bb5, 0x1f4f1b2b, ++ 0x91c01cc8, 0x5d6a1c56, 0x57af154f, 0x9b0515d1, 0x158a1232, ++ 0xd92012ac, 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8, ++ 0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832, 0xaf5e2a9e, ++ 0x63f42a00, 0xed7b2de3, 0x21d12d7d, 0x2b142464, 0xe7be24fa, ++ 0x69312319, 0xa59b2387, 0xf9766256, 0x35dc62c8, 0xbb53652b, ++ 0x77f965b5, 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f, ++ 0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00, 0xaed97719, ++ 0x62737787, 0xecfc7064, 0x205670fa, 0x85cd537d, 0x496753e3, ++ 0xc7e85400, 0x0b42549e, 0x01875d87, 0xcd2d5d19, 0x43a25afa, ++ 0x8f085a64, 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b, ++ 0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1, 0x299dc2ed, ++ 0xe537c273, 0x6bb8c590, 0xa712c50e, 0xadd7cc17, 0x617dcc89, ++ 0xeff2cb6a, 0x2358cbf4, 0xfa78d958, 0x36d2d9c6, 0xb85dde25, ++ 0x74f7debb, 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041, ++ 0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425, 0xd16cfd3c, ++ 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf, 0x86c3e873, 0x4a69e8ed, ++ 0xc4e6ef0e, 0x084cef90, 0x0289e689, 0xce23e617, 0x40ace1f4, ++ 0x8c06e16a, 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758, ++ 0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2, 0x030ebb0e, ++ 0xcfa4bb90, 0x412bbc73, 0x8d81bced, 0x8744b5f4, 0x4beeb56a, ++ 0xc561b289, 0x09cbb217, 0xac509190, 0x60fa910e, 0xee7596ed, ++ 0x22df9673, 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889, ++ 0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6, 0xfbff84df, ++ 0x37558441, 0xb9da83a2, 0x7570833c, 0x533b85da, 0x9f918544, ++ 0x111e82a7, 0xddb48239, 0xd7718b20, 0x1bdb8bbe, 0x95548c5d, ++ 0x59fe8cc3, 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c, ++ 0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776, 0x2f80b4f1, ++ 0xe32ab46f, 0x6da5b38c, 0xa10fb312, 0xabcaba0b, 0x6760ba95, ++ 0xe9efbd76, 0x2545bde8, 0xfc65af44, 0x30cfafda, 0xbe40a839, ++ 0x72eaa8a7, 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d, ++ 0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f, 0x2e07e976, ++ 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95, 0x79a8fc39, 0xb502fca7, ++ 0x3b8dfb44, 0xf727fbda, 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, ++ 0x736df520, 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144, ++ 0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe, 0x0513cd12, ++ 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1, 0x8159c3e8, 0x4df3c376, ++ 0xc37cc495, 0x0fd6c40b, 0x7aa64737, 0xb60c47a9, 0x3883404a, ++ 0xf42940d4, 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e, ++ 0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61, 0x2d095278, ++ 0xe1a352e6, 0x6f2c5505, 0xa386559b, 0x061d761c, 0xcab77682, ++ 0x44387161, 0x889271ff, 0x825778e6, 0x4efd7878, 0xc0727f9b, ++ 0x0cd87f05, 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a, ++ 0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0, 0x83d02561, ++ 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282, 0x079a2b9b, 0xcb302b05, ++ 0x45bf2ce6, 0x89152c78, 0x50353ed4, 0x9c9f3e4a, 0x121039a9, ++ 0xdeba3937, 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd, ++ 0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9, 0x7b211ab0, ++ 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53, 0x2c8e0fff, 0xe0240f61, ++ 0x6eab0882, 0xa201081c, 0xa8c40105, 0x646e019b, 0xeae10678, ++ 0x264b06e6}, ++ {0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d, 0xf44f2413, ++ 0x52382fa7, 0x63d0353a, 0xc5a73e8e, 0x33ef4e67, 0x959845d3, ++ 0xa4705f4e, 0x020754fa, 0xc7a06a74, 0x61d761c0, 0x503f7b5d, ++ 0xf64870e9, 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653, ++ 0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240, 0x5431d2a9, ++ 0xf246d91d, 0xc3aec380, 0x65d9c834, 0xa07ef6ba, 0x0609fd0e, ++ 0x37e1e793, 0x9196ec27, 0xcfbd399c, 0x69ca3228, 0x582228b5, ++ 0xfe552301, 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712, ++ 0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66, 0x081d53e8, ++ 0xae6a585c, 0x9f8242c1, 0x39f54975, 0xa863a552, 0x0e14aee6, ++ 0x3ffcb47b, 0x998bbfcf, 0x5c2c8141, 0xfa5b8af5, 0xcbb39068, ++ 0x6dc49bdc, 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8, ++ 0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb, 0x440b7579, ++ 0xe27c7ecd, 0xd3946450, 0x75e36fe4, 0xb044516a, 0x16335ade, ++ 0x27db4043, 0x81ac4bf7, 0x77e43b1e, 0xd19330aa, 0xe07b2a37, ++ 0x460c2183, 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590, ++ 0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a, 0xd79acda4, ++ 0x71edc610, 0x4005dc8d, 0xe672d739, 0x103aa7d0, 0xb64dac64, ++ 0x87a5b6f9, 0x21d2bd4d, 0xe47583c3, 0x42028877, 0x73ea92ea, ++ 0xd59d995e, 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678, ++ 0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b, 0xb8590282, ++ 0x1e2e0936, 0x2fc613ab, 0x89b1181f, 0x4c162691, 0xea612d25, ++ 0xdb8937b8, 0x7dfe3c0c, 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, ++ 0xdd80cab6, 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5, ++ 0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1, 0x2bc8ba5f, ++ 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2, 0x8816eaf2, 0x2e61e146, ++ 0x1f89fbdb, 0xb9fef06f, 0x7c59cee1, 0xda2ec555, 0xebc6dfc8, ++ 0x4db1d47c, 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08, ++ 0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b, 0xefc8763c, ++ 0x49bf7d88, 0x78576715, 0xde206ca1, 0x1b87522f, 0xbdf0599b, ++ 0x8c184306, 0x2a6f48b2, 0xdc27385b, 0x7a5033ef, 0x4bb82972, ++ 0xedcf22c6, 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5, ++ 0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3, 0xb3e4f77d, ++ 0x1593fcc9, 0x247be654, 0x820cede0, 0x74449d09, 0xd23396bd, ++ 0xe3db8c20, 0x45ac8794, 0x800bb91a, 0x267cb2ae, 0x1794a833, ++ 0xb1e3a387, 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d, ++ 0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e, 0x139a01c7, ++ 0xb5ed0a73, 0x840510ee, 0x22721b5a, 0xe7d525d4, 0x41a22e60, ++ 0x704a34fd, 0xd63d3f49, 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, ++ 0xfdf58516, 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105, ++ 0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71, 0x0bbdf5ff, ++ 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62, 0xabc30345, 0x0db408f1, ++ 0x3c5c126c, 0x9a2b19d8, 0x5f8c2756, 0xf9fb2ce2, 0xc813367f, ++ 0x6e643dcb, 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf, ++ 0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac, 0x03a0a617, ++ 0xa5d7ada3, 0x943fb73e, 0x3248bc8a, 0xf7ef8204, 0x519889b0, ++ 0x6070932d, 0xc6079899, 0x304fe870, 0x9638e3c4, 0xa7d0f959, ++ 0x01a7f2ed, 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe, ++ 0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044, 0x90311eca, ++ 0x3646157e, 0x07ae0fe3, 0xa1d90457, 0x579174be, 0xf1e67f0a, ++ 0xc00e6597, 0x66796e23, 0xa3de50ad, 0x05a95b19, 0x34414184, ++ 0x92364a30}, ++ {0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae, 0x9b914216, ++ 0x50cd91b3, 0xd659e31d, 0x1d0530b8, 0xec53826d, 0x270f51c8, ++ 0xa19b2366, 0x6ac7f0c3, 0x77c2c07b, 0xbc9e13de, 0x3a0a6170, ++ 0xf156b2d5, 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035, ++ 0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223, 0xef8580f6, ++ 0x24d95353, 0xa24d21fd, 0x6911f258, 0x7414c2e0, 0xbf481145, ++ 0x39dc63eb, 0xf280b04e, 0x07ac0536, 0xccf0d693, 0x4a64a43d, ++ 0x81387798, 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e, ++ 0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5, 0x706ec54d, ++ 0xbb3216e8, 0x3da66446, 0xf6fab7e3, 0x047a07ad, 0xcf26d408, ++ 0x49b2a6a6, 0x82ee7503, 0x9feb45bb, 0x54b7961e, 0xd223e4b0, ++ 0x197f3715, 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e, ++ 0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578, 0x0f580a6c, ++ 0xc404d9c9, 0x4290ab67, 0x89cc78c2, 0x94c9487a, 0x5f959bdf, ++ 0xd901e971, 0x125d3ad4, 0xe30b8801, 0x28575ba4, 0xaec3290a, ++ 0x659ffaaf, 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9, ++ 0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59, 0x971f4ae1, ++ 0x5c439944, 0xdad7ebea, 0x118b384f, 0xe0dd8a9a, 0x2b81593f, ++ 0xad152b91, 0x6649f834, 0x7b4cc88c, 0xb0101b29, 0x36846987, ++ 0xfdd8ba22, 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4, ++ 0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2, 0xe4a78d37, ++ 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99, 0x7f36cf21, 0xb46a1c84, ++ 0x32fe6e2a, 0xf9a2bd8f, 0x0b220dc1, 0xc07ede64, 0x46eaacca, ++ 0x8db67f6f, 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79, ++ 0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02, 0x7ce0cdba, ++ 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14, 0x1eb014d8, 0xd5ecc77d, ++ 0x5378b5d3, 0x98246676, 0x852156ce, 0x4e7d856b, 0xc8e9f7c5, ++ 0x03b52460, 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b, ++ 0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d, 0x1d661643, ++ 0xd63ac5e6, 0x50aeb748, 0x9bf264ed, 0x86f75455, 0x4dab87f0, ++ 0xcb3ff55e, 0x006326fb, 0xf135942e, 0x3a69478b, 0xbcfd3525, ++ 0x77a1e680, 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496, ++ 0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340, 0x828d53f8, ++ 0x49d1805d, 0xcf45f2f3, 0x04192156, 0xf54f9383, 0x3e134026, ++ 0xb8873288, 0x73dbe12d, 0x6eded195, 0xa5820230, 0x2316709e, ++ 0xe84aa33b, 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db, ++ 0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd, 0xf6999118, ++ 0x3dc542bd, 0xbb513013, 0x700de3b6, 0x6d08d30e, 0xa65400ab, ++ 0x20c07205, 0xeb9ca1a0, 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, ++ 0x977c6c1a, 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c, ++ 0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77, 0x662adecf, ++ 0xad760d6a, 0x2be27fc4, 0xe0beac61, 0x123e1c2f, 0xd962cf8a, ++ 0x5ff6bd24, 0x94aa6e81, 0x89af5e39, 0x42f38d9c, 0xc467ff32, ++ 0x0f3b2c97, 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec, ++ 0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa, 0x16441b82, ++ 0xdd18c827, 0x5b8cba89, 0x90d0692c, 0x8dd55994, 0x46898a31, ++ 0xc01df89f, 0x0b412b3a, 0xfa1799ef, 0x314b4a4a, 0xb7df38e4, ++ 0x7c83eb41, 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957, ++ 0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7, 0x8e035b0f, ++ 0x455f88aa, 0xc3cbfa04, 0x089729a1, 0xf9c19b74, 0x329d48d1, ++ 0xb4093a7f, 0x7f55e9da, 0x6250d962, 0xa90c0ac7, 0x2f987869, ++ 0xe4c4abcc}, ++ {0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0, 0xf580a6c0, ++ 0xc8e08f70, 0x8f40f5a0, 0xb220dc10, 0x30704bc1, 0x0d106271, ++ 0x4ab018a1, 0x77d03111, 0xc5f0ed01, 0xf890c4b1, 0xbf30be61, ++ 0x825097d1, 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52, ++ 0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92, 0x5090dc43, ++ 0x6df0f5f3, 0x2a508f23, 0x1730a693, 0xa5107a83, 0x98705333, ++ 0xdfd029e3, 0xe2b00053, 0xc1c12f04, 0xfca106b4, 0xbb017c64, ++ 0x866155d4, 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314, ++ 0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15, 0x0431c205, ++ 0x3951ebb5, 0x7ef19165, 0x4391b8d5, 0xa121b886, 0x9c419136, ++ 0xdbe1ebe6, 0xe681c256, 0x54a11e46, 0x69c137f6, 0x2e614d26, ++ 0x13016496, 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997, ++ 0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57, 0x58f35849, ++ 0x659371f9, 0x22330b29, 0x1f532299, 0xad73fe89, 0x9013d739, ++ 0xd7b3ade9, 0xead38459, 0x68831388, 0x55e33a38, 0x124340e8, ++ 0x2f236958, 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98, ++ 0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b, 0xcd93690b, ++ 0xf0f340bb, 0xb7533a6b, 0x8a3313db, 0x0863840a, 0x3503adba, ++ 0x72a3d76a, 0x4fc3feda, 0xfde322ca, 0xc0830b7a, 0x872371aa, ++ 0xba43581a, 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d, ++ 0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d, 0xa9423c8c, ++ 0x9422153c, 0xd3826fec, 0xeee2465c, 0x5cc29a4c, 0x61a2b3fc, ++ 0x2602c92c, 0x1b62e09c, 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, ++ 0xbe729a1f, 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf, ++ 0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de, 0x3c220dce, ++ 0x0142247e, 0x46e25eae, 0x7b82771e, 0xb1e6b092, 0x8c869922, ++ 0xcb26e3f2, 0xf646ca42, 0x44661652, 0x79063fe2, 0x3ea64532, ++ 0x03c66c82, 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183, ++ 0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743, 0xd1062710, ++ 0xec660ea0, 0xabc67470, 0x96a65dc0, 0x248681d0, 0x19e6a860, ++ 0x5e46d2b0, 0x6326fb00, 0xe1766cd1, 0xdc164561, 0x9bb63fb1, ++ 0xa6d61601, 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1, ++ 0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546, 0x85a73956, ++ 0xb8c710e6, 0xff676a36, 0xc2074386, 0x4057d457, 0x7d37fde7, ++ 0x3a978737, 0x07f7ae87, 0xb5d77297, 0x88b75b27, 0xcf1721f7, ++ 0xf2770847, 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4, ++ 0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404, 0x20b743d5, ++ 0x1dd76a65, 0x5a7710b5, 0x67173905, 0xd537e515, 0xe857cca5, ++ 0xaff7b675, 0x92979fc5, 0xe915e8db, 0xd475c16b, 0x93d5bbbb, ++ 0xaeb5920b, 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb, ++ 0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca, 0x2ce505da, ++ 0x11852c6a, 0x562556ba, 0x6b457f0a, 0x89f57f59, 0xb49556e9, ++ 0xf3352c39, 0xce550589, 0x7c75d999, 0x4115f029, 0x06b58af9, ++ 0x3bd5a349, 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48, ++ 0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888, 0x28d4c7df, ++ 0x15b4ee6f, 0x521494bf, 0x6f74bd0f, 0xdd54611f, 0xe03448af, ++ 0xa794327f, 0x9af41bcf, 0x18a48c1e, 0x25c4a5ae, 0x6264df7e, ++ 0x5f04f6ce, 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e, ++ 0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d, 0xbdb4f69d, ++ 0x80d4df2d, 0xc774a5fd, 0xfa148c4d, 0x78441b9c, 0x4524322c, ++ 0x028448fc, 0x3fe4614c, 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, ++ 0xca64c78c}}; ++ ++static const z_word_t crc_braid_big_table[][256] = { ++ {0x00000000, 0xb029603d, 0x6053c07a, 0xd07aa047, 0xc0a680f5, ++ 0x708fe0c8, 0xa0f5408f, 0x10dc20b2, 0xc14b7030, 0x7162100d, ++ 0xa118b04a, 0x1131d077, 0x01edf0c5, 0xb1c490f8, 0x61be30bf, ++ 0xd1975082, 0x8297e060, 0x32be805d, 0xe2c4201a, 0x52ed4027, ++ 0x42316095, 0xf21800a8, 0x2262a0ef, 0x924bc0d2, 0x43dc9050, ++ 0xf3f5f06d, 0x238f502a, 0x93a63017, 0x837a10a5, 0x33537098, ++ 0xe329d0df, 0x5300b0e2, 0x042fc1c1, 0xb406a1fc, 0x647c01bb, ++ 0xd4556186, 0xc4894134, 0x74a02109, 0xa4da814e, 0x14f3e173, ++ 0xc564b1f1, 0x754dd1cc, 0xa537718b, 0x151e11b6, 0x05c23104, ++ 0xb5eb5139, 0x6591f17e, 0xd5b89143, 0x86b821a1, 0x3691419c, ++ 0xe6ebe1db, 0x56c281e6, 0x461ea154, 0xf637c169, 0x264d612e, ++ 0x96640113, 0x47f35191, 0xf7da31ac, 0x27a091eb, 0x9789f1d6, ++ 0x8755d164, 0x377cb159, 0xe706111e, 0x572f7123, 0x4958f358, ++ 0xf9719365, 0x290b3322, 0x9922531f, 0x89fe73ad, 0x39d71390, ++ 0xe9adb3d7, 0x5984d3ea, 0x88138368, 0x383ae355, 0xe8404312, ++ 0x5869232f, 0x48b5039d, 0xf89c63a0, 0x28e6c3e7, 0x98cfa3da, ++ 0xcbcf1338, 0x7be67305, 0xab9cd342, 0x1bb5b37f, 0x0b6993cd, ++ 0xbb40f3f0, 0x6b3a53b7, 0xdb13338a, 0x0a846308, 0xbaad0335, ++ 0x6ad7a372, 0xdafec34f, 0xca22e3fd, 0x7a0b83c0, 0xaa712387, ++ 0x1a5843ba, 0x4d773299, 0xfd5e52a4, 0x2d24f2e3, 0x9d0d92de, ++ 0x8dd1b26c, 0x3df8d251, 0xed827216, 0x5dab122b, 0x8c3c42a9, ++ 0x3c152294, 0xec6f82d3, 0x5c46e2ee, 0x4c9ac25c, 0xfcb3a261, ++ 0x2cc90226, 0x9ce0621b, 0xcfe0d2f9, 0x7fc9b2c4, 0xafb31283, ++ 0x1f9a72be, 0x0f46520c, 0xbf6f3231, 0x6f159276, 0xdf3cf24b, ++ 0x0eaba2c9, 0xbe82c2f4, 0x6ef862b3, 0xded1028e, 0xce0d223c, ++ 0x7e244201, 0xae5ee246, 0x1e77827b, 0x92b0e6b1, 0x2299868c, ++ 0xf2e326cb, 0x42ca46f6, 0x52166644, 0xe23f0679, 0x3245a63e, ++ 0x826cc603, 0x53fb9681, 0xe3d2f6bc, 0x33a856fb, 0x838136c6, ++ 0x935d1674, 0x23747649, 0xf30ed60e, 0x4327b633, 0x102706d1, ++ 0xa00e66ec, 0x7074c6ab, 0xc05da696, 0xd0818624, 0x60a8e619, ++ 0xb0d2465e, 0x00fb2663, 0xd16c76e1, 0x614516dc, 0xb13fb69b, ++ 0x0116d6a6, 0x11caf614, 0xa1e39629, 0x7199366e, 0xc1b05653, ++ 0x969f2770, 0x26b6474d, 0xf6cce70a, 0x46e58737, 0x5639a785, ++ 0xe610c7b8, 0x366a67ff, 0x864307c2, 0x57d45740, 0xe7fd377d, ++ 0x3787973a, 0x87aef707, 0x9772d7b5, 0x275bb788, 0xf72117cf, ++ 0x470877f2, 0x1408c710, 0xa421a72d, 0x745b076a, 0xc4726757, ++ 0xd4ae47e5, 0x648727d8, 0xb4fd879f, 0x04d4e7a2, 0xd543b720, ++ 0x656ad71d, 0xb510775a, 0x05391767, 0x15e537d5, 0xa5cc57e8, ++ 0x75b6f7af, 0xc59f9792, 0xdbe815e9, 0x6bc175d4, 0xbbbbd593, ++ 0x0b92b5ae, 0x1b4e951c, 0xab67f521, 0x7b1d5566, 0xcb34355b, ++ 0x1aa365d9, 0xaa8a05e4, 0x7af0a5a3, 0xcad9c59e, 0xda05e52c, ++ 0x6a2c8511, 0xba562556, 0x0a7f456b, 0x597ff589, 0xe95695b4, ++ 0x392c35f3, 0x890555ce, 0x99d9757c, 0x29f01541, 0xf98ab506, ++ 0x49a3d53b, 0x983485b9, 0x281de584, 0xf86745c3, 0x484e25fe, ++ 0x5892054c, 0xe8bb6571, 0x38c1c536, 0x88e8a50b, 0xdfc7d428, ++ 0x6feeb415, 0xbf941452, 0x0fbd746f, 0x1f6154dd, 0xaf4834e0, ++ 0x7f3294a7, 0xcf1bf49a, 0x1e8ca418, 0xaea5c425, 0x7edf6462, ++ 0xcef6045f, 0xde2a24ed, 0x6e0344d0, 0xbe79e497, 0x0e5084aa, ++ 0x5d503448, 0xed795475, 0x3d03f432, 0x8d2a940f, 0x9df6b4bd, ++ 0x2ddfd480, 0xfda574c7, 0x4d8c14fa, 0x9c1b4478, 0x2c322445, ++ 0xfc488402, 0x4c61e43f, 0x5cbdc48d, 0xec94a4b0, 0x3cee04f7, ++ 0x8cc764ca}, ++ {0x00000000, 0xa5d35ccb, 0x0ba1c84d, 0xae729486, 0x1642919b, ++ 0xb391cd50, 0x1de359d6, 0xb830051d, 0x6d8253ec, 0xc8510f27, ++ 0x66239ba1, 0xc3f0c76a, 0x7bc0c277, 0xde139ebc, 0x70610a3a, ++ 0xd5b256f1, 0x9b02d603, 0x3ed18ac8, 0x90a31e4e, 0x35704285, ++ 0x8d404798, 0x28931b53, 0x86e18fd5, 0x2332d31e, 0xf68085ef, ++ 0x5353d924, 0xfd214da2, 0x58f21169, 0xe0c21474, 0x451148bf, ++ 0xeb63dc39, 0x4eb080f2, 0x3605ac07, 0x93d6f0cc, 0x3da4644a, ++ 0x98773881, 0x20473d9c, 0x85946157, 0x2be6f5d1, 0x8e35a91a, ++ 0x5b87ffeb, 0xfe54a320, 0x502637a6, 0xf5f56b6d, 0x4dc56e70, ++ 0xe81632bb, 0x4664a63d, 0xe3b7faf6, 0xad077a04, 0x08d426cf, ++ 0xa6a6b249, 0x0375ee82, 0xbb45eb9f, 0x1e96b754, 0xb0e423d2, ++ 0x15377f19, 0xc08529e8, 0x65567523, 0xcb24e1a5, 0x6ef7bd6e, ++ 0xd6c7b873, 0x7314e4b8, 0xdd66703e, 0x78b52cf5, 0x6c0a580f, ++ 0xc9d904c4, 0x67ab9042, 0xc278cc89, 0x7a48c994, 0xdf9b955f, ++ 0x71e901d9, 0xd43a5d12, 0x01880be3, 0xa45b5728, 0x0a29c3ae, ++ 0xaffa9f65, 0x17ca9a78, 0xb219c6b3, 0x1c6b5235, 0xb9b80efe, ++ 0xf7088e0c, 0x52dbd2c7, 0xfca94641, 0x597a1a8a, 0xe14a1f97, ++ 0x4499435c, 0xeaebd7da, 0x4f388b11, 0x9a8adde0, 0x3f59812b, ++ 0x912b15ad, 0x34f84966, 0x8cc84c7b, 0x291b10b0, 0x87698436, ++ 0x22bad8fd, 0x5a0ff408, 0xffdca8c3, 0x51ae3c45, 0xf47d608e, ++ 0x4c4d6593, 0xe99e3958, 0x47ecadde, 0xe23ff115, 0x378da7e4, ++ 0x925efb2f, 0x3c2c6fa9, 0x99ff3362, 0x21cf367f, 0x841c6ab4, ++ 0x2a6efe32, 0x8fbda2f9, 0xc10d220b, 0x64de7ec0, 0xcaacea46, ++ 0x6f7fb68d, 0xd74fb390, 0x729cef5b, 0xdcee7bdd, 0x793d2716, ++ 0xac8f71e7, 0x095c2d2c, 0xa72eb9aa, 0x02fde561, 0xbacde07c, ++ 0x1f1ebcb7, 0xb16c2831, 0x14bf74fa, 0xd814b01e, 0x7dc7ecd5, ++ 0xd3b57853, 0x76662498, 0xce562185, 0x6b857d4e, 0xc5f7e9c8, ++ 0x6024b503, 0xb596e3f2, 0x1045bf39, 0xbe372bbf, 0x1be47774, ++ 0xa3d47269, 0x06072ea2, 0xa875ba24, 0x0da6e6ef, 0x4316661d, ++ 0xe6c53ad6, 0x48b7ae50, 0xed64f29b, 0x5554f786, 0xf087ab4d, ++ 0x5ef53fcb, 0xfb266300, 0x2e9435f1, 0x8b47693a, 0x2535fdbc, ++ 0x80e6a177, 0x38d6a46a, 0x9d05f8a1, 0x33776c27, 0x96a430ec, ++ 0xee111c19, 0x4bc240d2, 0xe5b0d454, 0x4063889f, 0xf8538d82, ++ 0x5d80d149, 0xf3f245cf, 0x56211904, 0x83934ff5, 0x2640133e, ++ 0x883287b8, 0x2de1db73, 0x95d1de6e, 0x300282a5, 0x9e701623, ++ 0x3ba34ae8, 0x7513ca1a, 0xd0c096d1, 0x7eb20257, 0xdb615e9c, ++ 0x63515b81, 0xc682074a, 0x68f093cc, 0xcd23cf07, 0x189199f6, ++ 0xbd42c53d, 0x133051bb, 0xb6e30d70, 0x0ed3086d, 0xab0054a6, ++ 0x0572c020, 0xa0a19ceb, 0xb41ee811, 0x11cdb4da, 0xbfbf205c, ++ 0x1a6c7c97, 0xa25c798a, 0x078f2541, 0xa9fdb1c7, 0x0c2eed0c, ++ 0xd99cbbfd, 0x7c4fe736, 0xd23d73b0, 0x77ee2f7b, 0xcfde2a66, ++ 0x6a0d76ad, 0xc47fe22b, 0x61acbee0, 0x2f1c3e12, 0x8acf62d9, ++ 0x24bdf65f, 0x816eaa94, 0x395eaf89, 0x9c8df342, 0x32ff67c4, ++ 0x972c3b0f, 0x429e6dfe, 0xe74d3135, 0x493fa5b3, 0xececf978, ++ 0x54dcfc65, 0xf10fa0ae, 0x5f7d3428, 0xfaae68e3, 0x821b4416, ++ 0x27c818dd, 0x89ba8c5b, 0x2c69d090, 0x9459d58d, 0x318a8946, ++ 0x9ff81dc0, 0x3a2b410b, 0xef9917fa, 0x4a4a4b31, 0xe438dfb7, ++ 0x41eb837c, 0xf9db8661, 0x5c08daaa, 0xf27a4e2c, 0x57a912e7, ++ 0x19199215, 0xbccacede, 0x12b85a58, 0xb76b0693, 0x0f5b038e, ++ 0xaa885f45, 0x04facbc3, 0xa1299708, 0x749bc1f9, 0xd1489d32, ++ 0x7f3a09b4, 0xdae9557f, 0x62d95062, 0xc70a0ca9, 0x6978982f, ++ 0xccabc4e4}, ++ {0x00000000, 0xb40b77a6, 0x29119f97, 0x9d1ae831, 0x13244ff4, ++ 0xa72f3852, 0x3a35d063, 0x8e3ea7c5, 0x674eef33, 0xd3459895, ++ 0x4e5f70a4, 0xfa540702, 0x746aa0c7, 0xc061d761, 0x5d7b3f50, ++ 0xe97048f6, 0xce9cde67, 0x7a97a9c1, 0xe78d41f0, 0x53863656, ++ 0xddb89193, 0x69b3e635, 0xf4a90e04, 0x40a279a2, 0xa9d23154, ++ 0x1dd946f2, 0x80c3aec3, 0x34c8d965, 0xbaf67ea0, 0x0efd0906, ++ 0x93e7e137, 0x27ec9691, 0x9c39bdcf, 0x2832ca69, 0xb5282258, ++ 0x012355fe, 0x8f1df23b, 0x3b16859d, 0xa60c6dac, 0x12071a0a, ++ 0xfb7752fc, 0x4f7c255a, 0xd266cd6b, 0x666dbacd, 0xe8531d08, ++ 0x5c586aae, 0xc142829f, 0x7549f539, 0x52a563a8, 0xe6ae140e, ++ 0x7bb4fc3f, 0xcfbf8b99, 0x41812c5c, 0xf58a5bfa, 0x6890b3cb, ++ 0xdc9bc46d, 0x35eb8c9b, 0x81e0fb3d, 0x1cfa130c, 0xa8f164aa, ++ 0x26cfc36f, 0x92c4b4c9, 0x0fde5cf8, 0xbbd52b5e, 0x79750b44, ++ 0xcd7e7ce2, 0x506494d3, 0xe46fe375, 0x6a5144b0, 0xde5a3316, ++ 0x4340db27, 0xf74bac81, 0x1e3be477, 0xaa3093d1, 0x372a7be0, ++ 0x83210c46, 0x0d1fab83, 0xb914dc25, 0x240e3414, 0x900543b2, ++ 0xb7e9d523, 0x03e2a285, 0x9ef84ab4, 0x2af33d12, 0xa4cd9ad7, ++ 0x10c6ed71, 0x8ddc0540, 0x39d772e6, 0xd0a73a10, 0x64ac4db6, ++ 0xf9b6a587, 0x4dbdd221, 0xc38375e4, 0x77880242, 0xea92ea73, ++ 0x5e999dd5, 0xe54cb68b, 0x5147c12d, 0xcc5d291c, 0x78565eba, ++ 0xf668f97f, 0x42638ed9, 0xdf7966e8, 0x6b72114e, 0x820259b8, ++ 0x36092e1e, 0xab13c62f, 0x1f18b189, 0x9126164c, 0x252d61ea, ++ 0xb83789db, 0x0c3cfe7d, 0x2bd068ec, 0x9fdb1f4a, 0x02c1f77b, ++ 0xb6ca80dd, 0x38f42718, 0x8cff50be, 0x11e5b88f, 0xa5eecf29, ++ 0x4c9e87df, 0xf895f079, 0x658f1848, 0xd1846fee, 0x5fbac82b, ++ 0xebb1bf8d, 0x76ab57bc, 0xc2a0201a, 0xf2ea1688, 0x46e1612e, ++ 0xdbfb891f, 0x6ff0feb9, 0xe1ce597c, 0x55c52eda, 0xc8dfc6eb, ++ 0x7cd4b14d, 0x95a4f9bb, 0x21af8e1d, 0xbcb5662c, 0x08be118a, ++ 0x8680b64f, 0x328bc1e9, 0xaf9129d8, 0x1b9a5e7e, 0x3c76c8ef, ++ 0x887dbf49, 0x15675778, 0xa16c20de, 0x2f52871b, 0x9b59f0bd, ++ 0x0643188c, 0xb2486f2a, 0x5b3827dc, 0xef33507a, 0x7229b84b, ++ 0xc622cfed, 0x481c6828, 0xfc171f8e, 0x610df7bf, 0xd5068019, ++ 0x6ed3ab47, 0xdad8dce1, 0x47c234d0, 0xf3c94376, 0x7df7e4b3, ++ 0xc9fc9315, 0x54e67b24, 0xe0ed0c82, 0x099d4474, 0xbd9633d2, ++ 0x208cdbe3, 0x9487ac45, 0x1ab90b80, 0xaeb27c26, 0x33a89417, ++ 0x87a3e3b1, 0xa04f7520, 0x14440286, 0x895eeab7, 0x3d559d11, ++ 0xb36b3ad4, 0x07604d72, 0x9a7aa543, 0x2e71d2e5, 0xc7019a13, ++ 0x730aedb5, 0xee100584, 0x5a1b7222, 0xd425d5e7, 0x602ea241, ++ 0xfd344a70, 0x493f3dd6, 0x8b9f1dcc, 0x3f946a6a, 0xa28e825b, ++ 0x1685f5fd, 0x98bb5238, 0x2cb0259e, 0xb1aacdaf, 0x05a1ba09, ++ 0xecd1f2ff, 0x58da8559, 0xc5c06d68, 0x71cb1ace, 0xfff5bd0b, ++ 0x4bfecaad, 0xd6e4229c, 0x62ef553a, 0x4503c3ab, 0xf108b40d, ++ 0x6c125c3c, 0xd8192b9a, 0x56278c5f, 0xe22cfbf9, 0x7f3613c8, ++ 0xcb3d646e, 0x224d2c98, 0x96465b3e, 0x0b5cb30f, 0xbf57c4a9, ++ 0x3169636c, 0x856214ca, 0x1878fcfb, 0xac738b5d, 0x17a6a003, ++ 0xa3add7a5, 0x3eb73f94, 0x8abc4832, 0x0482eff7, 0xb0899851, ++ 0x2d937060, 0x999807c6, 0x70e84f30, 0xc4e33896, 0x59f9d0a7, ++ 0xedf2a701, 0x63cc00c4, 0xd7c77762, 0x4add9f53, 0xfed6e8f5, ++ 0xd93a7e64, 0x6d3109c2, 0xf02be1f3, 0x44209655, 0xca1e3190, ++ 0x7e154636, 0xe30fae07, 0x5704d9a1, 0xbe749157, 0x0a7fe6f1, ++ 0x97650ec0, 0x236e7966, 0xad50dea3, 0x195ba905, 0x84414134, ++ 0x304a3692}, ++ {0x00000000, 0x9e00aacc, 0x7d072542, 0xe3078f8e, 0xfa0e4a84, ++ 0x640ee048, 0x87096fc6, 0x1909c50a, 0xb51be5d3, 0x2b1b4f1f, ++ 0xc81cc091, 0x561c6a5d, 0x4f15af57, 0xd115059b, 0x32128a15, ++ 0xac1220d9, 0x2b31bb7c, 0xb53111b0, 0x56369e3e, 0xc83634f2, ++ 0xd13ff1f8, 0x4f3f5b34, 0xac38d4ba, 0x32387e76, 0x9e2a5eaf, ++ 0x002af463, 0xe32d7bed, 0x7d2dd121, 0x6424142b, 0xfa24bee7, ++ 0x19233169, 0x87239ba5, 0x566276f9, 0xc862dc35, 0x2b6553bb, ++ 0xb565f977, 0xac6c3c7d, 0x326c96b1, 0xd16b193f, 0x4f6bb3f3, ++ 0xe379932a, 0x7d7939e6, 0x9e7eb668, 0x007e1ca4, 0x1977d9ae, ++ 0x87777362, 0x6470fcec, 0xfa705620, 0x7d53cd85, 0xe3536749, ++ 0x0054e8c7, 0x9e54420b, 0x875d8701, 0x195d2dcd, 0xfa5aa243, ++ 0x645a088f, 0xc8482856, 0x5648829a, 0xb54f0d14, 0x2b4fa7d8, ++ 0x324662d2, 0xac46c81e, 0x4f414790, 0xd141ed5c, 0xedc29d29, ++ 0x73c237e5, 0x90c5b86b, 0x0ec512a7, 0x17ccd7ad, 0x89cc7d61, ++ 0x6acbf2ef, 0xf4cb5823, 0x58d978fa, 0xc6d9d236, 0x25de5db8, ++ 0xbbdef774, 0xa2d7327e, 0x3cd798b2, 0xdfd0173c, 0x41d0bdf0, ++ 0xc6f32655, 0x58f38c99, 0xbbf40317, 0x25f4a9db, 0x3cfd6cd1, ++ 0xa2fdc61d, 0x41fa4993, 0xdffae35f, 0x73e8c386, 0xede8694a, ++ 0x0eefe6c4, 0x90ef4c08, 0x89e68902, 0x17e623ce, 0xf4e1ac40, ++ 0x6ae1068c, 0xbba0ebd0, 0x25a0411c, 0xc6a7ce92, 0x58a7645e, ++ 0x41aea154, 0xdfae0b98, 0x3ca98416, 0xa2a92eda, 0x0ebb0e03, ++ 0x90bba4cf, 0x73bc2b41, 0xedbc818d, 0xf4b54487, 0x6ab5ee4b, ++ 0x89b261c5, 0x17b2cb09, 0x909150ac, 0x0e91fa60, 0xed9675ee, ++ 0x7396df22, 0x6a9f1a28, 0xf49fb0e4, 0x17983f6a, 0x899895a6, ++ 0x258ab57f, 0xbb8a1fb3, 0x588d903d, 0xc68d3af1, 0xdf84fffb, ++ 0x41845537, 0xa283dab9, 0x3c837075, 0xda853b53, 0x4485919f, ++ 0xa7821e11, 0x3982b4dd, 0x208b71d7, 0xbe8bdb1b, 0x5d8c5495, ++ 0xc38cfe59, 0x6f9ede80, 0xf19e744c, 0x1299fbc2, 0x8c99510e, ++ 0x95909404, 0x0b903ec8, 0xe897b146, 0x76971b8a, 0xf1b4802f, ++ 0x6fb42ae3, 0x8cb3a56d, 0x12b30fa1, 0x0bbacaab, 0x95ba6067, ++ 0x76bdefe9, 0xe8bd4525, 0x44af65fc, 0xdaafcf30, 0x39a840be, ++ 0xa7a8ea72, 0xbea12f78, 0x20a185b4, 0xc3a60a3a, 0x5da6a0f6, ++ 0x8ce74daa, 0x12e7e766, 0xf1e068e8, 0x6fe0c224, 0x76e9072e, ++ 0xe8e9ade2, 0x0bee226c, 0x95ee88a0, 0x39fca879, 0xa7fc02b5, ++ 0x44fb8d3b, 0xdafb27f7, 0xc3f2e2fd, 0x5df24831, 0xbef5c7bf, ++ 0x20f56d73, 0xa7d6f6d6, 0x39d65c1a, 0xdad1d394, 0x44d17958, ++ 0x5dd8bc52, 0xc3d8169e, 0x20df9910, 0xbedf33dc, 0x12cd1305, ++ 0x8ccdb9c9, 0x6fca3647, 0xf1ca9c8b, 0xe8c35981, 0x76c3f34d, ++ 0x95c47cc3, 0x0bc4d60f, 0x3747a67a, 0xa9470cb6, 0x4a408338, ++ 0xd44029f4, 0xcd49ecfe, 0x53494632, 0xb04ec9bc, 0x2e4e6370, ++ 0x825c43a9, 0x1c5ce965, 0xff5b66eb, 0x615bcc27, 0x7852092d, ++ 0xe652a3e1, 0x05552c6f, 0x9b5586a3, 0x1c761d06, 0x8276b7ca, ++ 0x61713844, 0xff719288, 0xe6785782, 0x7878fd4e, 0x9b7f72c0, ++ 0x057fd80c, 0xa96df8d5, 0x376d5219, 0xd46add97, 0x4a6a775b, ++ 0x5363b251, 0xcd63189d, 0x2e649713, 0xb0643ddf, 0x6125d083, ++ 0xff257a4f, 0x1c22f5c1, 0x82225f0d, 0x9b2b9a07, 0x052b30cb, ++ 0xe62cbf45, 0x782c1589, 0xd43e3550, 0x4a3e9f9c, 0xa9391012, ++ 0x3739bade, 0x2e307fd4, 0xb030d518, 0x53375a96, 0xcd37f05a, ++ 0x4a146bff, 0xd414c133, 0x37134ebd, 0xa913e471, 0xb01a217b, ++ 0x2e1a8bb7, 0xcd1d0439, 0x531daef5, 0xff0f8e2c, 0x610f24e0, ++ 0x8208ab6e, 0x1c0801a2, 0x0501c4a8, 0x9b016e64, 0x7806e1ea, ++ 0xe6064b26}}; ++ ++#endif /* W */ ++ ++#endif /* N == 2 */ ++#if N == 3 ++ ++#if W == 8 ++ ++static const uint32_t crc_braid_table[][256] = { ++ {0x00000000, 0x81256527, 0xd93bcc0f, 0x581ea928, 0x69069e5f, ++ 0xe823fb78, 0xb03d5250, 0x31183777, 0xd20d3cbe, 0x53285999, ++ 0x0b36f0b1, 0x8a139596, 0xbb0ba2e1, 0x3a2ec7c6, 0x62306eee, ++ 0xe3150bc9, 0x7f6b7f3d, 0xfe4e1a1a, 0xa650b332, 0x2775d615, ++ 0x166de162, 0x97488445, 0xcf562d6d, 0x4e73484a, 0xad664383, ++ 0x2c4326a4, 0x745d8f8c, 0xf578eaab, 0xc460dddc, 0x4545b8fb, ++ 0x1d5b11d3, 0x9c7e74f4, 0xfed6fe7a, 0x7ff39b5d, 0x27ed3275, ++ 0xa6c85752, 0x97d06025, 0x16f50502, 0x4eebac2a, 0xcfcec90d, ++ 0x2cdbc2c4, 0xadfea7e3, 0xf5e00ecb, 0x74c56bec, 0x45dd5c9b, ++ 0xc4f839bc, 0x9ce69094, 0x1dc3f5b3, 0x81bd8147, 0x0098e460, ++ 0x58864d48, 0xd9a3286f, 0xe8bb1f18, 0x699e7a3f, 0x3180d317, ++ 0xb0a5b630, 0x53b0bdf9, 0xd295d8de, 0x8a8b71f6, 0x0bae14d1, ++ 0x3ab623a6, 0xbb934681, 0xe38defa9, 0x62a88a8e, 0x26dcfab5, ++ 0xa7f99f92, 0xffe736ba, 0x7ec2539d, 0x4fda64ea, 0xceff01cd, ++ 0x96e1a8e5, 0x17c4cdc2, 0xf4d1c60b, 0x75f4a32c, 0x2dea0a04, ++ 0xaccf6f23, 0x9dd75854, 0x1cf23d73, 0x44ec945b, 0xc5c9f17c, ++ 0x59b78588, 0xd892e0af, 0x808c4987, 0x01a92ca0, 0x30b11bd7, ++ 0xb1947ef0, 0xe98ad7d8, 0x68afb2ff, 0x8bbab936, 0x0a9fdc11, ++ 0x52817539, 0xd3a4101e, 0xe2bc2769, 0x6399424e, 0x3b87eb66, ++ 0xbaa28e41, 0xd80a04cf, 0x592f61e8, 0x0131c8c0, 0x8014ade7, ++ 0xb10c9a90, 0x3029ffb7, 0x6837569f, 0xe91233b8, 0x0a073871, ++ 0x8b225d56, 0xd33cf47e, 0x52199159, 0x6301a62e, 0xe224c309, ++ 0xba3a6a21, 0x3b1f0f06, 0xa7617bf2, 0x26441ed5, 0x7e5ab7fd, ++ 0xff7fd2da, 0xce67e5ad, 0x4f42808a, 0x175c29a2, 0x96794c85, ++ 0x756c474c, 0xf449226b, 0xac578b43, 0x2d72ee64, 0x1c6ad913, ++ 0x9d4fbc34, 0xc551151c, 0x4474703b, 0x4db9f56a, 0xcc9c904d, ++ 0x94823965, 0x15a75c42, 0x24bf6b35, 0xa59a0e12, 0xfd84a73a, ++ 0x7ca1c21d, 0x9fb4c9d4, 0x1e91acf3, 0x468f05db, 0xc7aa60fc, ++ 0xf6b2578b, 0x779732ac, 0x2f899b84, 0xaeacfea3, 0x32d28a57, ++ 0xb3f7ef70, 0xebe94658, 0x6acc237f, 0x5bd41408, 0xdaf1712f, ++ 0x82efd807, 0x03cabd20, 0xe0dfb6e9, 0x61fad3ce, 0x39e47ae6, ++ 0xb8c11fc1, 0x89d928b6, 0x08fc4d91, 0x50e2e4b9, 0xd1c7819e, ++ 0xb36f0b10, 0x324a6e37, 0x6a54c71f, 0xeb71a238, 0xda69954f, ++ 0x5b4cf068, 0x03525940, 0x82773c67, 0x616237ae, 0xe0475289, ++ 0xb859fba1, 0x397c9e86, 0x0864a9f1, 0x8941ccd6, 0xd15f65fe, ++ 0x507a00d9, 0xcc04742d, 0x4d21110a, 0x153fb822, 0x941add05, ++ 0xa502ea72, 0x24278f55, 0x7c39267d, 0xfd1c435a, 0x1e094893, ++ 0x9f2c2db4, 0xc732849c, 0x4617e1bb, 0x770fd6cc, 0xf62ab3eb, ++ 0xae341ac3, 0x2f117fe4, 0x6b650fdf, 0xea406af8, 0xb25ec3d0, ++ 0x337ba6f7, 0x02639180, 0x8346f4a7, 0xdb585d8f, 0x5a7d38a8, ++ 0xb9683361, 0x384d5646, 0x6053ff6e, 0xe1769a49, 0xd06ead3e, ++ 0x514bc819, 0x09556131, 0x88700416, 0x140e70e2, 0x952b15c5, ++ 0xcd35bced, 0x4c10d9ca, 0x7d08eebd, 0xfc2d8b9a, 0xa43322b2, ++ 0x25164795, 0xc6034c5c, 0x4726297b, 0x1f388053, 0x9e1de574, ++ 0xaf05d203, 0x2e20b724, 0x763e1e0c, 0xf71b7b2b, 0x95b3f1a5, ++ 0x14969482, 0x4c883daa, 0xcdad588d, 0xfcb56ffa, 0x7d900add, ++ 0x258ea3f5, 0xa4abc6d2, 0x47becd1b, 0xc69ba83c, 0x9e850114, ++ 0x1fa06433, 0x2eb85344, 0xaf9d3663, 0xf7839f4b, 0x76a6fa6c, ++ 0xead88e98, 0x6bfdebbf, 0x33e34297, 0xb2c627b0, 0x83de10c7, ++ 0x02fb75e0, 0x5ae5dcc8, 0xdbc0b9ef, 0x38d5b226, 0xb9f0d701, ++ 0xe1ee7e29, 0x60cb1b0e, 0x51d32c79, 0xd0f6495e, 0x88e8e076, ++ 0x09cd8551}, ++ {0x00000000, 0x9b73ead4, 0xed96d3e9, 0x76e5393d, 0x005ca193, ++ 0x9b2f4b47, 0xedca727a, 0x76b998ae, 0x00b94326, 0x9bcaa9f2, ++ 0xed2f90cf, 0x765c7a1b, 0x00e5e2b5, 0x9b960861, 0xed73315c, ++ 0x7600db88, 0x0172864c, 0x9a016c98, 0xece455a5, 0x7797bf71, ++ 0x012e27df, 0x9a5dcd0b, 0xecb8f436, 0x77cb1ee2, 0x01cbc56a, ++ 0x9ab82fbe, 0xec5d1683, 0x772efc57, 0x019764f9, 0x9ae48e2d, ++ 0xec01b710, 0x77725dc4, 0x02e50c98, 0x9996e64c, 0xef73df71, ++ 0x740035a5, 0x02b9ad0b, 0x99ca47df, 0xef2f7ee2, 0x745c9436, ++ 0x025c4fbe, 0x992fa56a, 0xefca9c57, 0x74b97683, 0x0200ee2d, ++ 0x997304f9, 0xef963dc4, 0x74e5d710, 0x03978ad4, 0x98e46000, ++ 0xee01593d, 0x7572b3e9, 0x03cb2b47, 0x98b8c193, 0xee5df8ae, ++ 0x752e127a, 0x032ec9f2, 0x985d2326, 0xeeb81a1b, 0x75cbf0cf, ++ 0x03726861, 0x980182b5, 0xeee4bb88, 0x7597515c, 0x05ca1930, ++ 0x9eb9f3e4, 0xe85ccad9, 0x732f200d, 0x0596b8a3, 0x9ee55277, ++ 0xe8006b4a, 0x7373819e, 0x05735a16, 0x9e00b0c2, 0xe8e589ff, ++ 0x7396632b, 0x052ffb85, 0x9e5c1151, 0xe8b9286c, 0x73cac2b8, ++ 0x04b89f7c, 0x9fcb75a8, 0xe92e4c95, 0x725da641, 0x04e43eef, ++ 0x9f97d43b, 0xe972ed06, 0x720107d2, 0x0401dc5a, 0x9f72368e, ++ 0xe9970fb3, 0x72e4e567, 0x045d7dc9, 0x9f2e971d, 0xe9cbae20, ++ 0x72b844f4, 0x072f15a8, 0x9c5cff7c, 0xeab9c641, 0x71ca2c95, ++ 0x0773b43b, 0x9c005eef, 0xeae567d2, 0x71968d06, 0x0796568e, ++ 0x9ce5bc5a, 0xea008567, 0x71736fb3, 0x07caf71d, 0x9cb91dc9, ++ 0xea5c24f4, 0x712fce20, 0x065d93e4, 0x9d2e7930, 0xebcb400d, ++ 0x70b8aad9, 0x06013277, 0x9d72d8a3, 0xeb97e19e, 0x70e40b4a, ++ 0x06e4d0c2, 0x9d973a16, 0xeb72032b, 0x7001e9ff, 0x06b87151, ++ 0x9dcb9b85, 0xeb2ea2b8, 0x705d486c, 0x0b943260, 0x90e7d8b4, ++ 0xe602e189, 0x7d710b5d, 0x0bc893f3, 0x90bb7927, 0xe65e401a, ++ 0x7d2daace, 0x0b2d7146, 0x905e9b92, 0xe6bba2af, 0x7dc8487b, ++ 0x0b71d0d5, 0x90023a01, 0xe6e7033c, 0x7d94e9e8, 0x0ae6b42c, ++ 0x91955ef8, 0xe77067c5, 0x7c038d11, 0x0aba15bf, 0x91c9ff6b, ++ 0xe72cc656, 0x7c5f2c82, 0x0a5ff70a, 0x912c1dde, 0xe7c924e3, ++ 0x7cbace37, 0x0a035699, 0x9170bc4d, 0xe7958570, 0x7ce66fa4, ++ 0x09713ef8, 0x9202d42c, 0xe4e7ed11, 0x7f9407c5, 0x092d9f6b, ++ 0x925e75bf, 0xe4bb4c82, 0x7fc8a656, 0x09c87dde, 0x92bb970a, ++ 0xe45eae37, 0x7f2d44e3, 0x0994dc4d, 0x92e73699, 0xe4020fa4, ++ 0x7f71e570, 0x0803b8b4, 0x93705260, 0xe5956b5d, 0x7ee68189, ++ 0x085f1927, 0x932cf3f3, 0xe5c9cace, 0x7eba201a, 0x08bafb92, ++ 0x93c91146, 0xe52c287b, 0x7e5fc2af, 0x08e65a01, 0x9395b0d5, ++ 0xe57089e8, 0x7e03633c, 0x0e5e2b50, 0x952dc184, 0xe3c8f8b9, ++ 0x78bb126d, 0x0e028ac3, 0x95716017, 0xe394592a, 0x78e7b3fe, ++ 0x0ee76876, 0x959482a2, 0xe371bb9f, 0x7802514b, 0x0ebbc9e5, ++ 0x95c82331, 0xe32d1a0c, 0x785ef0d8, 0x0f2cad1c, 0x945f47c8, ++ 0xe2ba7ef5, 0x79c99421, 0x0f700c8f, 0x9403e65b, 0xe2e6df66, ++ 0x799535b2, 0x0f95ee3a, 0x94e604ee, 0xe2033dd3, 0x7970d707, ++ 0x0fc94fa9, 0x94baa57d, 0xe25f9c40, 0x792c7694, 0x0cbb27c8, ++ 0x97c8cd1c, 0xe12df421, 0x7a5e1ef5, 0x0ce7865b, 0x97946c8f, ++ 0xe17155b2, 0x7a02bf66, 0x0c0264ee, 0x97718e3a, 0xe194b707, ++ 0x7ae75dd3, 0x0c5ec57d, 0x972d2fa9, 0xe1c81694, 0x7abbfc40, ++ 0x0dc9a184, 0x96ba4b50, 0xe05f726d, 0x7b2c98b9, 0x0d950017, ++ 0x96e6eac3, 0xe003d3fe, 0x7b70392a, 0x0d70e2a2, 0x96030876, ++ 0xe0e6314b, 0x7b95db9f, 0x0d2c4331, 0x965fa9e5, 0xe0ba90d8, ++ 0x7bc97a0c}, ++ {0x00000000, 0x172864c0, 0x2e50c980, 0x3978ad40, 0x5ca19300, ++ 0x4b89f7c0, 0x72f15a80, 0x65d93e40, 0xb9432600, 0xae6b42c0, ++ 0x9713ef80, 0x803b8b40, 0xe5e2b500, 0xf2cad1c0, 0xcbb27c80, ++ 0xdc9a1840, 0xa9f74a41, 0xbedf2e81, 0x87a783c1, 0x908fe701, ++ 0xf556d941, 0xe27ebd81, 0xdb0610c1, 0xcc2e7401, 0x10b46c41, ++ 0x079c0881, 0x3ee4a5c1, 0x29ccc101, 0x4c15ff41, 0x5b3d9b81, ++ 0x624536c1, 0x756d5201, 0x889f92c3, 0x9fb7f603, 0xa6cf5b43, ++ 0xb1e73f83, 0xd43e01c3, 0xc3166503, 0xfa6ec843, 0xed46ac83, ++ 0x31dcb4c3, 0x26f4d003, 0x1f8c7d43, 0x08a41983, 0x6d7d27c3, ++ 0x7a554303, 0x432dee43, 0x54058a83, 0x2168d882, 0x3640bc42, ++ 0x0f381102, 0x181075c2, 0x7dc94b82, 0x6ae12f42, 0x53998202, ++ 0x44b1e6c2, 0x982bfe82, 0x8f039a42, 0xb67b3702, 0xa15353c2, ++ 0xc48a6d82, 0xd3a20942, 0xeadaa402, 0xfdf2c0c2, 0xca4e23c7, ++ 0xdd664707, 0xe41eea47, 0xf3368e87, 0x96efb0c7, 0x81c7d407, ++ 0xb8bf7947, 0xaf971d87, 0x730d05c7, 0x64256107, 0x5d5dcc47, ++ 0x4a75a887, 0x2fac96c7, 0x3884f207, 0x01fc5f47, 0x16d43b87, ++ 0x63b96986, 0x74910d46, 0x4de9a006, 0x5ac1c4c6, 0x3f18fa86, ++ 0x28309e46, 0x11483306, 0x066057c6, 0xdafa4f86, 0xcdd22b46, ++ 0xf4aa8606, 0xe382e2c6, 0x865bdc86, 0x9173b846, 0xa80b1506, ++ 0xbf2371c6, 0x42d1b104, 0x55f9d5c4, 0x6c817884, 0x7ba91c44, ++ 0x1e702204, 0x095846c4, 0x3020eb84, 0x27088f44, 0xfb929704, ++ 0xecbaf3c4, 0xd5c25e84, 0xc2ea3a44, 0xa7330404, 0xb01b60c4, ++ 0x8963cd84, 0x9e4ba944, 0xeb26fb45, 0xfc0e9f85, 0xc57632c5, ++ 0xd25e5605, 0xb7876845, 0xa0af0c85, 0x99d7a1c5, 0x8effc505, ++ 0x5265dd45, 0x454db985, 0x7c3514c5, 0x6b1d7005, 0x0ec44e45, ++ 0x19ec2a85, 0x209487c5, 0x37bce305, 0x4fed41cf, 0x58c5250f, ++ 0x61bd884f, 0x7695ec8f, 0x134cd2cf, 0x0464b60f, 0x3d1c1b4f, ++ 0x2a347f8f, 0xf6ae67cf, 0xe186030f, 0xd8feae4f, 0xcfd6ca8f, ++ 0xaa0ff4cf, 0xbd27900f, 0x845f3d4f, 0x9377598f, 0xe61a0b8e, ++ 0xf1326f4e, 0xc84ac20e, 0xdf62a6ce, 0xbabb988e, 0xad93fc4e, ++ 0x94eb510e, 0x83c335ce, 0x5f592d8e, 0x4871494e, 0x7109e40e, ++ 0x662180ce, 0x03f8be8e, 0x14d0da4e, 0x2da8770e, 0x3a8013ce, ++ 0xc772d30c, 0xd05ab7cc, 0xe9221a8c, 0xfe0a7e4c, 0x9bd3400c, ++ 0x8cfb24cc, 0xb583898c, 0xa2abed4c, 0x7e31f50c, 0x691991cc, ++ 0x50613c8c, 0x4749584c, 0x2290660c, 0x35b802cc, 0x0cc0af8c, ++ 0x1be8cb4c, 0x6e85994d, 0x79adfd8d, 0x40d550cd, 0x57fd340d, ++ 0x32240a4d, 0x250c6e8d, 0x1c74c3cd, 0x0b5ca70d, 0xd7c6bf4d, ++ 0xc0eedb8d, 0xf99676cd, 0xeebe120d, 0x8b672c4d, 0x9c4f488d, ++ 0xa537e5cd, 0xb21f810d, 0x85a36208, 0x928b06c8, 0xabf3ab88, ++ 0xbcdbcf48, 0xd902f108, 0xce2a95c8, 0xf7523888, 0xe07a5c48, ++ 0x3ce04408, 0x2bc820c8, 0x12b08d88, 0x0598e948, 0x6041d708, ++ 0x7769b3c8, 0x4e111e88, 0x59397a48, 0x2c542849, 0x3b7c4c89, ++ 0x0204e1c9, 0x152c8509, 0x70f5bb49, 0x67dddf89, 0x5ea572c9, ++ 0x498d1609, 0x95170e49, 0x823f6a89, 0xbb47c7c9, 0xac6fa309, ++ 0xc9b69d49, 0xde9ef989, 0xe7e654c9, 0xf0ce3009, 0x0d3cf0cb, ++ 0x1a14940b, 0x236c394b, 0x34445d8b, 0x519d63cb, 0x46b5070b, ++ 0x7fcdaa4b, 0x68e5ce8b, 0xb47fd6cb, 0xa357b20b, 0x9a2f1f4b, ++ 0x8d077b8b, 0xe8de45cb, 0xfff6210b, 0xc68e8c4b, 0xd1a6e88b, ++ 0xa4cbba8a, 0xb3e3de4a, 0x8a9b730a, 0x9db317ca, 0xf86a298a, ++ 0xef424d4a, 0xd63ae00a, 0xc11284ca, 0x1d889c8a, 0x0aa0f84a, ++ 0x33d8550a, 0x24f031ca, 0x41290f8a, 0x56016b4a, 0x6f79c60a, ++ 0x7851a2ca}, ++ {0x00000000, 0x9fda839e, 0xe4c4017d, 0x7b1e82e3, 0x12f904bb, ++ 0x8d238725, 0xf63d05c6, 0x69e78658, 0x25f20976, 0xba288ae8, ++ 0xc136080b, 0x5eec8b95, 0x370b0dcd, 0xa8d18e53, 0xd3cf0cb0, ++ 0x4c158f2e, 0x4be412ec, 0xd43e9172, 0xaf201391, 0x30fa900f, ++ 0x591d1657, 0xc6c795c9, 0xbdd9172a, 0x220394b4, 0x6e161b9a, ++ 0xf1cc9804, 0x8ad21ae7, 0x15089979, 0x7cef1f21, 0xe3359cbf, ++ 0x982b1e5c, 0x07f19dc2, 0x97c825d8, 0x0812a646, 0x730c24a5, ++ 0xecd6a73b, 0x85312163, 0x1aeba2fd, 0x61f5201e, 0xfe2fa380, ++ 0xb23a2cae, 0x2de0af30, 0x56fe2dd3, 0xc924ae4d, 0xa0c32815, ++ 0x3f19ab8b, 0x44072968, 0xdbddaaf6, 0xdc2c3734, 0x43f6b4aa, ++ 0x38e83649, 0xa732b5d7, 0xced5338f, 0x510fb011, 0x2a1132f2, ++ 0xb5cbb16c, 0xf9de3e42, 0x6604bddc, 0x1d1a3f3f, 0x82c0bca1, ++ 0xeb273af9, 0x74fdb967, 0x0fe33b84, 0x9039b81a, 0xf4e14df1, ++ 0x6b3bce6f, 0x10254c8c, 0x8fffcf12, 0xe618494a, 0x79c2cad4, ++ 0x02dc4837, 0x9d06cba9, 0xd1134487, 0x4ec9c719, 0x35d745fa, ++ 0xaa0dc664, 0xc3ea403c, 0x5c30c3a2, 0x272e4141, 0xb8f4c2df, ++ 0xbf055f1d, 0x20dfdc83, 0x5bc15e60, 0xc41bddfe, 0xadfc5ba6, ++ 0x3226d838, 0x49385adb, 0xd6e2d945, 0x9af7566b, 0x052dd5f5, ++ 0x7e335716, 0xe1e9d488, 0x880e52d0, 0x17d4d14e, 0x6cca53ad, ++ 0xf310d033, 0x63296829, 0xfcf3ebb7, 0x87ed6954, 0x1837eaca, ++ 0x71d06c92, 0xee0aef0c, 0x95146def, 0x0aceee71, 0x46db615f, ++ 0xd901e2c1, 0xa21f6022, 0x3dc5e3bc, 0x542265e4, 0xcbf8e67a, ++ 0xb0e66499, 0x2f3ce707, 0x28cd7ac5, 0xb717f95b, 0xcc097bb8, ++ 0x53d3f826, 0x3a347e7e, 0xa5eefde0, 0xdef07f03, 0x412afc9d, ++ 0x0d3f73b3, 0x92e5f02d, 0xe9fb72ce, 0x7621f150, 0x1fc67708, ++ 0x801cf496, 0xfb027675, 0x64d8f5eb, 0x32b39da3, 0xad691e3d, ++ 0xd6779cde, 0x49ad1f40, 0x204a9918, 0xbf901a86, 0xc48e9865, ++ 0x5b541bfb, 0x174194d5, 0x889b174b, 0xf38595a8, 0x6c5f1636, ++ 0x05b8906e, 0x9a6213f0, 0xe17c9113, 0x7ea6128d, 0x79578f4f, ++ 0xe68d0cd1, 0x9d938e32, 0x02490dac, 0x6bae8bf4, 0xf474086a, ++ 0x8f6a8a89, 0x10b00917, 0x5ca58639, 0xc37f05a7, 0xb8618744, ++ 0x27bb04da, 0x4e5c8282, 0xd186011c, 0xaa9883ff, 0x35420061, ++ 0xa57bb87b, 0x3aa13be5, 0x41bfb906, 0xde653a98, 0xb782bcc0, ++ 0x28583f5e, 0x5346bdbd, 0xcc9c3e23, 0x8089b10d, 0x1f533293, ++ 0x644db070, 0xfb9733ee, 0x9270b5b6, 0x0daa3628, 0x76b4b4cb, ++ 0xe96e3755, 0xee9faa97, 0x71452909, 0x0a5babea, 0x95812874, ++ 0xfc66ae2c, 0x63bc2db2, 0x18a2af51, 0x87782ccf, 0xcb6da3e1, ++ 0x54b7207f, 0x2fa9a29c, 0xb0732102, 0xd994a75a, 0x464e24c4, ++ 0x3d50a627, 0xa28a25b9, 0xc652d052, 0x598853cc, 0x2296d12f, ++ 0xbd4c52b1, 0xd4abd4e9, 0x4b715777, 0x306fd594, 0xafb5560a, ++ 0xe3a0d924, 0x7c7a5aba, 0x0764d859, 0x98be5bc7, 0xf159dd9f, ++ 0x6e835e01, 0x159ddce2, 0x8a475f7c, 0x8db6c2be, 0x126c4120, ++ 0x6972c3c3, 0xf6a8405d, 0x9f4fc605, 0x0095459b, 0x7b8bc778, ++ 0xe45144e6, 0xa844cbc8, 0x379e4856, 0x4c80cab5, 0xd35a492b, ++ 0xbabdcf73, 0x25674ced, 0x5e79ce0e, 0xc1a34d90, 0x519af58a, ++ 0xce407614, 0xb55ef4f7, 0x2a847769, 0x4363f131, 0xdcb972af, ++ 0xa7a7f04c, 0x387d73d2, 0x7468fcfc, 0xebb27f62, 0x90acfd81, ++ 0x0f767e1f, 0x6691f847, 0xf94b7bd9, 0x8255f93a, 0x1d8f7aa4, ++ 0x1a7ee766, 0x85a464f8, 0xfebae61b, 0x61606585, 0x0887e3dd, ++ 0x975d6043, 0xec43e2a0, 0x7399613e, 0x3f8cee10, 0xa0566d8e, ++ 0xdb48ef6d, 0x44926cf3, 0x2d75eaab, 0xb2af6935, 0xc9b1ebd6, ++ 0x566b6848}, ++ {0x00000000, 0x65673b46, 0xcace768c, 0xafa94dca, 0x4eedeb59, ++ 0x2b8ad01f, 0x84239dd5, 0xe144a693, 0x9ddbd6b2, 0xf8bcedf4, ++ 0x5715a03e, 0x32729b78, 0xd3363deb, 0xb65106ad, 0x19f84b67, ++ 0x7c9f7021, 0xe0c6ab25, 0x85a19063, 0x2a08dda9, 0x4f6fe6ef, ++ 0xae2b407c, 0xcb4c7b3a, 0x64e536f0, 0x01820db6, 0x7d1d7d97, ++ 0x187a46d1, 0xb7d30b1b, 0xd2b4305d, 0x33f096ce, 0x5697ad88, ++ 0xf93ee042, 0x9c59db04, 0x1afc500b, 0x7f9b6b4d, 0xd0322687, ++ 0xb5551dc1, 0x5411bb52, 0x31768014, 0x9edfcdde, 0xfbb8f698, ++ 0x872786b9, 0xe240bdff, 0x4de9f035, 0x288ecb73, 0xc9ca6de0, ++ 0xacad56a6, 0x03041b6c, 0x6663202a, 0xfa3afb2e, 0x9f5dc068, ++ 0x30f48da2, 0x5593b6e4, 0xb4d71077, 0xd1b02b31, 0x7e1966fb, ++ 0x1b7e5dbd, 0x67e12d9c, 0x028616da, 0xad2f5b10, 0xc8486056, ++ 0x290cc6c5, 0x4c6bfd83, 0xe3c2b049, 0x86a58b0f, 0x35f8a016, ++ 0x509f9b50, 0xff36d69a, 0x9a51eddc, 0x7b154b4f, 0x1e727009, ++ 0xb1db3dc3, 0xd4bc0685, 0xa82376a4, 0xcd444de2, 0x62ed0028, ++ 0x078a3b6e, 0xe6ce9dfd, 0x83a9a6bb, 0x2c00eb71, 0x4967d037, ++ 0xd53e0b33, 0xb0593075, 0x1ff07dbf, 0x7a9746f9, 0x9bd3e06a, ++ 0xfeb4db2c, 0x511d96e6, 0x347aada0, 0x48e5dd81, 0x2d82e6c7, ++ 0x822bab0d, 0xe74c904b, 0x060836d8, 0x636f0d9e, 0xccc64054, ++ 0xa9a17b12, 0x2f04f01d, 0x4a63cb5b, 0xe5ca8691, 0x80adbdd7, ++ 0x61e91b44, 0x048e2002, 0xab276dc8, 0xce40568e, 0xb2df26af, ++ 0xd7b81de9, 0x78115023, 0x1d766b65, 0xfc32cdf6, 0x9955f6b0, ++ 0x36fcbb7a, 0x539b803c, 0xcfc25b38, 0xaaa5607e, 0x050c2db4, ++ 0x606b16f2, 0x812fb061, 0xe4488b27, 0x4be1c6ed, 0x2e86fdab, ++ 0x52198d8a, 0x377eb6cc, 0x98d7fb06, 0xfdb0c040, 0x1cf466d3, ++ 0x79935d95, 0xd63a105f, 0xb35d2b19, 0x6bf1402c, 0x0e967b6a, ++ 0xa13f36a0, 0xc4580de6, 0x251cab75, 0x407b9033, 0xefd2ddf9, ++ 0x8ab5e6bf, 0xf62a969e, 0x934dadd8, 0x3ce4e012, 0x5983db54, ++ 0xb8c77dc7, 0xdda04681, 0x72090b4b, 0x176e300d, 0x8b37eb09, ++ 0xee50d04f, 0x41f99d85, 0x249ea6c3, 0xc5da0050, 0xa0bd3b16, ++ 0x0f1476dc, 0x6a734d9a, 0x16ec3dbb, 0x738b06fd, 0xdc224b37, ++ 0xb9457071, 0x5801d6e2, 0x3d66eda4, 0x92cfa06e, 0xf7a89b28, ++ 0x710d1027, 0x146a2b61, 0xbbc366ab, 0xdea45ded, 0x3fe0fb7e, ++ 0x5a87c038, 0xf52e8df2, 0x9049b6b4, 0xecd6c695, 0x89b1fdd3, ++ 0x2618b019, 0x437f8b5f, 0xa23b2dcc, 0xc75c168a, 0x68f55b40, ++ 0x0d926006, 0x91cbbb02, 0xf4ac8044, 0x5b05cd8e, 0x3e62f6c8, ++ 0xdf26505b, 0xba416b1d, 0x15e826d7, 0x708f1d91, 0x0c106db0, ++ 0x697756f6, 0xc6de1b3c, 0xa3b9207a, 0x42fd86e9, 0x279abdaf, ++ 0x8833f065, 0xed54cb23, 0x5e09e03a, 0x3b6edb7c, 0x94c796b6, ++ 0xf1a0adf0, 0x10e40b63, 0x75833025, 0xda2a7def, 0xbf4d46a9, ++ 0xc3d23688, 0xa6b50dce, 0x091c4004, 0x6c7b7b42, 0x8d3fddd1, ++ 0xe858e697, 0x47f1ab5d, 0x2296901b, 0xbecf4b1f, 0xdba87059, ++ 0x74013d93, 0x116606d5, 0xf022a046, 0x95459b00, 0x3aecd6ca, ++ 0x5f8bed8c, 0x23149dad, 0x4673a6eb, 0xe9daeb21, 0x8cbdd067, ++ 0x6df976f4, 0x089e4db2, 0xa7370078, 0xc2503b3e, 0x44f5b031, ++ 0x21928b77, 0x8e3bc6bd, 0xeb5cfdfb, 0x0a185b68, 0x6f7f602e, ++ 0xc0d62de4, 0xa5b116a2, 0xd92e6683, 0xbc495dc5, 0x13e0100f, ++ 0x76872b49, 0x97c38dda, 0xf2a4b69c, 0x5d0dfb56, 0x386ac010, ++ 0xa4331b14, 0xc1542052, 0x6efd6d98, 0x0b9a56de, 0xeadef04d, ++ 0x8fb9cb0b, 0x201086c1, 0x4577bd87, 0x39e8cda6, 0x5c8ff6e0, ++ 0xf326bb2a, 0x9641806c, 0x770526ff, 0x12621db9, 0xbdcb5073, ++ 0xd8ac6b35}, ++ {0x00000000, 0xd7e28058, 0x74b406f1, 0xa35686a9, 0xe9680de2, ++ 0x3e8a8dba, 0x9ddc0b13, 0x4a3e8b4b, 0x09a11d85, 0xde439ddd, ++ 0x7d151b74, 0xaaf79b2c, 0xe0c91067, 0x372b903f, 0x947d1696, ++ 0x439f96ce, 0x13423b0a, 0xc4a0bb52, 0x67f63dfb, 0xb014bda3, ++ 0xfa2a36e8, 0x2dc8b6b0, 0x8e9e3019, 0x597cb041, 0x1ae3268f, ++ 0xcd01a6d7, 0x6e57207e, 0xb9b5a026, 0xf38b2b6d, 0x2469ab35, ++ 0x873f2d9c, 0x50ddadc4, 0x26847614, 0xf166f64c, 0x523070e5, ++ 0x85d2f0bd, 0xcfec7bf6, 0x180efbae, 0xbb587d07, 0x6cbafd5f, ++ 0x2f256b91, 0xf8c7ebc9, 0x5b916d60, 0x8c73ed38, 0xc64d6673, ++ 0x11afe62b, 0xb2f96082, 0x651be0da, 0x35c64d1e, 0xe224cd46, ++ 0x41724bef, 0x9690cbb7, 0xdcae40fc, 0x0b4cc0a4, 0xa81a460d, ++ 0x7ff8c655, 0x3c67509b, 0xeb85d0c3, 0x48d3566a, 0x9f31d632, ++ 0xd50f5d79, 0x02eddd21, 0xa1bb5b88, 0x7659dbd0, 0x4d08ec28, ++ 0x9aea6c70, 0x39bcead9, 0xee5e6a81, 0xa460e1ca, 0x73826192, ++ 0xd0d4e73b, 0x07366763, 0x44a9f1ad, 0x934b71f5, 0x301df75c, ++ 0xe7ff7704, 0xadc1fc4f, 0x7a237c17, 0xd975fabe, 0x0e977ae6, ++ 0x5e4ad722, 0x89a8577a, 0x2afed1d3, 0xfd1c518b, 0xb722dac0, ++ 0x60c05a98, 0xc396dc31, 0x14745c69, 0x57ebcaa7, 0x80094aff, ++ 0x235fcc56, 0xf4bd4c0e, 0xbe83c745, 0x6961471d, 0xca37c1b4, ++ 0x1dd541ec, 0x6b8c9a3c, 0xbc6e1a64, 0x1f389ccd, 0xc8da1c95, ++ 0x82e497de, 0x55061786, 0xf650912f, 0x21b21177, 0x622d87b9, ++ 0xb5cf07e1, 0x16998148, 0xc17b0110, 0x8b458a5b, 0x5ca70a03, ++ 0xfff18caa, 0x28130cf2, 0x78cea136, 0xaf2c216e, 0x0c7aa7c7, ++ 0xdb98279f, 0x91a6acd4, 0x46442c8c, 0xe512aa25, 0x32f02a7d, ++ 0x716fbcb3, 0xa68d3ceb, 0x05dbba42, 0xd2393a1a, 0x9807b151, ++ 0x4fe53109, 0xecb3b7a0, 0x3b5137f8, 0x9a11d850, 0x4df35808, ++ 0xeea5dea1, 0x39475ef9, 0x7379d5b2, 0xa49b55ea, 0x07cdd343, ++ 0xd02f531b, 0x93b0c5d5, 0x4452458d, 0xe704c324, 0x30e6437c, ++ 0x7ad8c837, 0xad3a486f, 0x0e6ccec6, 0xd98e4e9e, 0x8953e35a, ++ 0x5eb16302, 0xfde7e5ab, 0x2a0565f3, 0x603beeb8, 0xb7d96ee0, ++ 0x148fe849, 0xc36d6811, 0x80f2fedf, 0x57107e87, 0xf446f82e, ++ 0x23a47876, 0x699af33d, 0xbe787365, 0x1d2ef5cc, 0xcacc7594, ++ 0xbc95ae44, 0x6b772e1c, 0xc821a8b5, 0x1fc328ed, 0x55fda3a6, ++ 0x821f23fe, 0x2149a557, 0xf6ab250f, 0xb534b3c1, 0x62d63399, ++ 0xc180b530, 0x16623568, 0x5c5cbe23, 0x8bbe3e7b, 0x28e8b8d2, ++ 0xff0a388a, 0xafd7954e, 0x78351516, 0xdb6393bf, 0x0c8113e7, ++ 0x46bf98ac, 0x915d18f4, 0x320b9e5d, 0xe5e91e05, 0xa67688cb, ++ 0x71940893, 0xd2c28e3a, 0x05200e62, 0x4f1e8529, 0x98fc0571, ++ 0x3baa83d8, 0xec480380, 0xd7193478, 0x00fbb420, 0xa3ad3289, ++ 0x744fb2d1, 0x3e71399a, 0xe993b9c2, 0x4ac53f6b, 0x9d27bf33, ++ 0xdeb829fd, 0x095aa9a5, 0xaa0c2f0c, 0x7deeaf54, 0x37d0241f, ++ 0xe032a447, 0x436422ee, 0x9486a2b6, 0xc45b0f72, 0x13b98f2a, ++ 0xb0ef0983, 0x670d89db, 0x2d330290, 0xfad182c8, 0x59870461, ++ 0x8e658439, 0xcdfa12f7, 0x1a1892af, 0xb94e1406, 0x6eac945e, ++ 0x24921f15, 0xf3709f4d, 0x502619e4, 0x87c499bc, 0xf19d426c, ++ 0x267fc234, 0x8529449d, 0x52cbc4c5, 0x18f54f8e, 0xcf17cfd6, ++ 0x6c41497f, 0xbba3c927, 0xf83c5fe9, 0x2fdedfb1, 0x8c885918, ++ 0x5b6ad940, 0x1154520b, 0xc6b6d253, 0x65e054fa, 0xb202d4a2, ++ 0xe2df7966, 0x353df93e, 0x966b7f97, 0x4189ffcf, 0x0bb77484, ++ 0xdc55f4dc, 0x7f037275, 0xa8e1f22d, 0xeb7e64e3, 0x3c9ce4bb, ++ 0x9fca6212, 0x4828e24a, 0x02166901, 0xd5f4e959, 0x76a26ff0, ++ 0xa140efa8}, ++ {0x00000000, 0xef52b6e1, 0x05d46b83, 0xea86dd62, 0x0ba8d706, ++ 0xe4fa61e7, 0x0e7cbc85, 0xe12e0a64, 0x1751ae0c, 0xf80318ed, ++ 0x1285c58f, 0xfdd7736e, 0x1cf9790a, 0xf3abcfeb, 0x192d1289, ++ 0xf67fa468, 0x2ea35c18, 0xc1f1eaf9, 0x2b77379b, 0xc425817a, ++ 0x250b8b1e, 0xca593dff, 0x20dfe09d, 0xcf8d567c, 0x39f2f214, ++ 0xd6a044f5, 0x3c269997, 0xd3742f76, 0x325a2512, 0xdd0893f3, ++ 0x378e4e91, 0xd8dcf870, 0x5d46b830, 0xb2140ed1, 0x5892d3b3, ++ 0xb7c06552, 0x56ee6f36, 0xb9bcd9d7, 0x533a04b5, 0xbc68b254, ++ 0x4a17163c, 0xa545a0dd, 0x4fc37dbf, 0xa091cb5e, 0x41bfc13a, ++ 0xaeed77db, 0x446baab9, 0xab391c58, 0x73e5e428, 0x9cb752c9, ++ 0x76318fab, 0x9963394a, 0x784d332e, 0x971f85cf, 0x7d9958ad, ++ 0x92cbee4c, 0x64b44a24, 0x8be6fcc5, 0x616021a7, 0x8e329746, ++ 0x6f1c9d22, 0x804e2bc3, 0x6ac8f6a1, 0x859a4040, 0xba8d7060, ++ 0x55dfc681, 0xbf591be3, 0x500bad02, 0xb125a766, 0x5e771187, ++ 0xb4f1cce5, 0x5ba37a04, 0xaddcde6c, 0x428e688d, 0xa808b5ef, ++ 0x475a030e, 0xa674096a, 0x4926bf8b, 0xa3a062e9, 0x4cf2d408, ++ 0x942e2c78, 0x7b7c9a99, 0x91fa47fb, 0x7ea8f11a, 0x9f86fb7e, ++ 0x70d44d9f, 0x9a5290fd, 0x7500261c, 0x837f8274, 0x6c2d3495, ++ 0x86abe9f7, 0x69f95f16, 0x88d75572, 0x6785e393, 0x8d033ef1, ++ 0x62518810, 0xe7cbc850, 0x08997eb1, 0xe21fa3d3, 0x0d4d1532, ++ 0xec631f56, 0x0331a9b7, 0xe9b774d5, 0x06e5c234, 0xf09a665c, ++ 0x1fc8d0bd, 0xf54e0ddf, 0x1a1cbb3e, 0xfb32b15a, 0x146007bb, ++ 0xfee6dad9, 0x11b46c38, 0xc9689448, 0x263a22a9, 0xccbcffcb, ++ 0x23ee492a, 0xc2c0434e, 0x2d92f5af, 0xc71428cd, 0x28469e2c, ++ 0xde393a44, 0x316b8ca5, 0xdbed51c7, 0x34bfe726, 0xd591ed42, ++ 0x3ac35ba3, 0xd04586c1, 0x3f173020, 0xae6be681, 0x41395060, ++ 0xabbf8d02, 0x44ed3be3, 0xa5c33187, 0x4a918766, 0xa0175a04, ++ 0x4f45ece5, 0xb93a488d, 0x5668fe6c, 0xbcee230e, 0x53bc95ef, ++ 0xb2929f8b, 0x5dc0296a, 0xb746f408, 0x581442e9, 0x80c8ba99, ++ 0x6f9a0c78, 0x851cd11a, 0x6a4e67fb, 0x8b606d9f, 0x6432db7e, ++ 0x8eb4061c, 0x61e6b0fd, 0x97991495, 0x78cba274, 0x924d7f16, ++ 0x7d1fc9f7, 0x9c31c393, 0x73637572, 0x99e5a810, 0x76b71ef1, ++ 0xf32d5eb1, 0x1c7fe850, 0xf6f93532, 0x19ab83d3, 0xf88589b7, ++ 0x17d73f56, 0xfd51e234, 0x120354d5, 0xe47cf0bd, 0x0b2e465c, ++ 0xe1a89b3e, 0x0efa2ddf, 0xefd427bb, 0x0086915a, 0xea004c38, ++ 0x0552fad9, 0xdd8e02a9, 0x32dcb448, 0xd85a692a, 0x3708dfcb, ++ 0xd626d5af, 0x3974634e, 0xd3f2be2c, 0x3ca008cd, 0xcadfaca5, ++ 0x258d1a44, 0xcf0bc726, 0x205971c7, 0xc1777ba3, 0x2e25cd42, ++ 0xc4a31020, 0x2bf1a6c1, 0x14e696e1, 0xfbb42000, 0x1132fd62, ++ 0xfe604b83, 0x1f4e41e7, 0xf01cf706, 0x1a9a2a64, 0xf5c89c85, ++ 0x03b738ed, 0xece58e0c, 0x0663536e, 0xe931e58f, 0x081fefeb, ++ 0xe74d590a, 0x0dcb8468, 0xe2993289, 0x3a45caf9, 0xd5177c18, ++ 0x3f91a17a, 0xd0c3179b, 0x31ed1dff, 0xdebfab1e, 0x3439767c, ++ 0xdb6bc09d, 0x2d1464f5, 0xc246d214, 0x28c00f76, 0xc792b997, ++ 0x26bcb3f3, 0xc9ee0512, 0x2368d870, 0xcc3a6e91, 0x49a02ed1, ++ 0xa6f29830, 0x4c744552, 0xa326f3b3, 0x4208f9d7, 0xad5a4f36, ++ 0x47dc9254, 0xa88e24b5, 0x5ef180dd, 0xb1a3363c, 0x5b25eb5e, ++ 0xb4775dbf, 0x555957db, 0xba0be13a, 0x508d3c58, 0xbfdf8ab9, ++ 0x670372c9, 0x8851c428, 0x62d7194a, 0x8d85afab, 0x6caba5cf, ++ 0x83f9132e, 0x697fce4c, 0x862d78ad, 0x7052dcc5, 0x9f006a24, ++ 0x7586b746, 0x9ad401a7, 0x7bfa0bc3, 0x94a8bd22, 0x7e2e6040, ++ 0x917cd6a1}, ++ {0x00000000, 0x87a6cb43, 0xd43c90c7, 0x539a5b84, 0x730827cf, ++ 0xf4aeec8c, 0xa734b708, 0x20927c4b, 0xe6104f9e, 0x61b684dd, ++ 0x322cdf59, 0xb58a141a, 0x95186851, 0x12bea312, 0x4124f896, ++ 0xc68233d5, 0x1751997d, 0x90f7523e, 0xc36d09ba, 0x44cbc2f9, ++ 0x6459beb2, 0xe3ff75f1, 0xb0652e75, 0x37c3e536, 0xf141d6e3, ++ 0x76e71da0, 0x257d4624, 0xa2db8d67, 0x8249f12c, 0x05ef3a6f, ++ 0x567561eb, 0xd1d3aaa8, 0x2ea332fa, 0xa905f9b9, 0xfa9fa23d, ++ 0x7d39697e, 0x5dab1535, 0xda0dde76, 0x899785f2, 0x0e314eb1, ++ 0xc8b37d64, 0x4f15b627, 0x1c8feda3, 0x9b2926e0, 0xbbbb5aab, ++ 0x3c1d91e8, 0x6f87ca6c, 0xe821012f, 0x39f2ab87, 0xbe5460c4, ++ 0xedce3b40, 0x6a68f003, 0x4afa8c48, 0xcd5c470b, 0x9ec61c8f, ++ 0x1960d7cc, 0xdfe2e419, 0x58442f5a, 0x0bde74de, 0x8c78bf9d, ++ 0xaceac3d6, 0x2b4c0895, 0x78d65311, 0xff709852, 0x5d4665f4, ++ 0xdae0aeb7, 0x897af533, 0x0edc3e70, 0x2e4e423b, 0xa9e88978, ++ 0xfa72d2fc, 0x7dd419bf, 0xbb562a6a, 0x3cf0e129, 0x6f6abaad, ++ 0xe8cc71ee, 0xc85e0da5, 0x4ff8c6e6, 0x1c629d62, 0x9bc45621, ++ 0x4a17fc89, 0xcdb137ca, 0x9e2b6c4e, 0x198da70d, 0x391fdb46, ++ 0xbeb91005, 0xed234b81, 0x6a8580c2, 0xac07b317, 0x2ba17854, ++ 0x783b23d0, 0xff9de893, 0xdf0f94d8, 0x58a95f9b, 0x0b33041f, ++ 0x8c95cf5c, 0x73e5570e, 0xf4439c4d, 0xa7d9c7c9, 0x207f0c8a, ++ 0x00ed70c1, 0x874bbb82, 0xd4d1e006, 0x53772b45, 0x95f51890, ++ 0x1253d3d3, 0x41c98857, 0xc66f4314, 0xe6fd3f5f, 0x615bf41c, ++ 0x32c1af98, 0xb56764db, 0x64b4ce73, 0xe3120530, 0xb0885eb4, ++ 0x372e95f7, 0x17bce9bc, 0x901a22ff, 0xc380797b, 0x4426b238, ++ 0x82a481ed, 0x05024aae, 0x5698112a, 0xd13eda69, 0xf1aca622, ++ 0x760a6d61, 0x259036e5, 0xa236fda6, 0xba8ccbe8, 0x3d2a00ab, ++ 0x6eb05b2f, 0xe916906c, 0xc984ec27, 0x4e222764, 0x1db87ce0, ++ 0x9a1eb7a3, 0x5c9c8476, 0xdb3a4f35, 0x88a014b1, 0x0f06dff2, ++ 0x2f94a3b9, 0xa83268fa, 0xfba8337e, 0x7c0ef83d, 0xaddd5295, ++ 0x2a7b99d6, 0x79e1c252, 0xfe470911, 0xded5755a, 0x5973be19, ++ 0x0ae9e59d, 0x8d4f2ede, 0x4bcd1d0b, 0xcc6bd648, 0x9ff18dcc, ++ 0x1857468f, 0x38c53ac4, 0xbf63f187, 0xecf9aa03, 0x6b5f6140, ++ 0x942ff912, 0x13893251, 0x401369d5, 0xc7b5a296, 0xe727dedd, ++ 0x6081159e, 0x331b4e1a, 0xb4bd8559, 0x723fb68c, 0xf5997dcf, ++ 0xa603264b, 0x21a5ed08, 0x01379143, 0x86915a00, 0xd50b0184, ++ 0x52adcac7, 0x837e606f, 0x04d8ab2c, 0x5742f0a8, 0xd0e43beb, ++ 0xf07647a0, 0x77d08ce3, 0x244ad767, 0xa3ec1c24, 0x656e2ff1, ++ 0xe2c8e4b2, 0xb152bf36, 0x36f47475, 0x1666083e, 0x91c0c37d, ++ 0xc25a98f9, 0x45fc53ba, 0xe7caae1c, 0x606c655f, 0x33f63edb, ++ 0xb450f598, 0x94c289d3, 0x13644290, 0x40fe1914, 0xc758d257, ++ 0x01dae182, 0x867c2ac1, 0xd5e67145, 0x5240ba06, 0x72d2c64d, ++ 0xf5740d0e, 0xa6ee568a, 0x21489dc9, 0xf09b3761, 0x773dfc22, ++ 0x24a7a7a6, 0xa3016ce5, 0x839310ae, 0x0435dbed, 0x57af8069, ++ 0xd0094b2a, 0x168b78ff, 0x912db3bc, 0xc2b7e838, 0x4511237b, ++ 0x65835f30, 0xe2259473, 0xb1bfcff7, 0x361904b4, 0xc9699ce6, ++ 0x4ecf57a5, 0x1d550c21, 0x9af3c762, 0xba61bb29, 0x3dc7706a, ++ 0x6e5d2bee, 0xe9fbe0ad, 0x2f79d378, 0xa8df183b, 0xfb4543bf, ++ 0x7ce388fc, 0x5c71f4b7, 0xdbd73ff4, 0x884d6470, 0x0febaf33, ++ 0xde38059b, 0x599eced8, 0x0a04955c, 0x8da25e1f, 0xad302254, ++ 0x2a96e917, 0x790cb293, 0xfeaa79d0, 0x38284a05, 0xbf8e8146, ++ 0xec14dac2, 0x6bb21181, 0x4b206dca, 0xcc86a689, 0x9f1cfd0d, ++ 0x18ba364e}}; ++ ++static const z_word_t crc_braid_big_table[][256] = { ++ {0x0000000000000000, 0x43cba68700000000, 0xc7903cd400000000, ++ 0x845b9a5300000000, 0xcf27087300000000, 0x8cecaef400000000, ++ 0x08b734a700000000, 0x4b7c922000000000, 0x9e4f10e600000000, ++ 0xdd84b66100000000, 0x59df2c3200000000, 0x1a148ab500000000, ++ 0x5168189500000000, 0x12a3be1200000000, 0x96f8244100000000, ++ 0xd53382c600000000, 0x7d99511700000000, 0x3e52f79000000000, ++ 0xba096dc300000000, 0xf9c2cb4400000000, 0xb2be596400000000, ++ 0xf175ffe300000000, 0x752e65b000000000, 0x36e5c33700000000, ++ 0xe3d641f100000000, 0xa01de77600000000, 0x24467d2500000000, ++ 0x678ddba200000000, 0x2cf1498200000000, 0x6f3aef0500000000, ++ 0xeb61755600000000, 0xa8aad3d100000000, 0xfa32a32e00000000, ++ 0xb9f905a900000000, 0x3da29ffa00000000, 0x7e69397d00000000, ++ 0x3515ab5d00000000, 0x76de0dda00000000, 0xf285978900000000, ++ 0xb14e310e00000000, 0x647db3c800000000, 0x27b6154f00000000, ++ 0xa3ed8f1c00000000, 0xe026299b00000000, 0xab5abbbb00000000, ++ 0xe8911d3c00000000, 0x6cca876f00000000, 0x2f0121e800000000, ++ 0x87abf23900000000, 0xc46054be00000000, 0x403bceed00000000, ++ 0x03f0686a00000000, 0x488cfa4a00000000, 0x0b475ccd00000000, ++ 0x8f1cc69e00000000, 0xccd7601900000000, 0x19e4e2df00000000, ++ 0x5a2f445800000000, 0xde74de0b00000000, 0x9dbf788c00000000, ++ 0xd6c3eaac00000000, 0x95084c2b00000000, 0x1153d67800000000, ++ 0x529870ff00000000, 0xf465465d00000000, 0xb7aee0da00000000, ++ 0x33f57a8900000000, 0x703edc0e00000000, 0x3b424e2e00000000, ++ 0x7889e8a900000000, 0xfcd272fa00000000, 0xbf19d47d00000000, ++ 0x6a2a56bb00000000, 0x29e1f03c00000000, 0xadba6a6f00000000, ++ 0xee71cce800000000, 0xa50d5ec800000000, 0xe6c6f84f00000000, ++ 0x629d621c00000000, 0x2156c49b00000000, 0x89fc174a00000000, ++ 0xca37b1cd00000000, 0x4e6c2b9e00000000, 0x0da78d1900000000, ++ 0x46db1f3900000000, 0x0510b9be00000000, 0x814b23ed00000000, ++ 0xc280856a00000000, 0x17b307ac00000000, 0x5478a12b00000000, ++ 0xd0233b7800000000, 0x93e89dff00000000, 0xd8940fdf00000000, ++ 0x9b5fa95800000000, 0x1f04330b00000000, 0x5ccf958c00000000, ++ 0x0e57e57300000000, 0x4d9c43f400000000, 0xc9c7d9a700000000, ++ 0x8a0c7f2000000000, 0xc170ed0000000000, 0x82bb4b8700000000, ++ 0x06e0d1d400000000, 0x452b775300000000, 0x9018f59500000000, ++ 0xd3d3531200000000, 0x5788c94100000000, 0x14436fc600000000, ++ 0x5f3ffde600000000, 0x1cf45b6100000000, 0x98afc13200000000, ++ 0xdb6467b500000000, 0x73ceb46400000000, 0x300512e300000000, ++ 0xb45e88b000000000, 0xf7952e3700000000, 0xbce9bc1700000000, ++ 0xff221a9000000000, 0x7b7980c300000000, 0x38b2264400000000, ++ 0xed81a48200000000, 0xae4a020500000000, 0x2a11985600000000, ++ 0x69da3ed100000000, 0x22a6acf100000000, 0x616d0a7600000000, ++ 0xe536902500000000, 0xa6fd36a200000000, 0xe8cb8cba00000000, ++ 0xab002a3d00000000, 0x2f5bb06e00000000, 0x6c9016e900000000, ++ 0x27ec84c900000000, 0x6427224e00000000, 0xe07cb81d00000000, ++ 0xa3b71e9a00000000, 0x76849c5c00000000, 0x354f3adb00000000, ++ 0xb114a08800000000, 0xf2df060f00000000, 0xb9a3942f00000000, ++ 0xfa6832a800000000, 0x7e33a8fb00000000, 0x3df80e7c00000000, ++ 0x9552ddad00000000, 0xd6997b2a00000000, 0x52c2e17900000000, ++ 0x110947fe00000000, 0x5a75d5de00000000, 0x19be735900000000, ++ 0x9de5e90a00000000, 0xde2e4f8d00000000, 0x0b1dcd4b00000000, ++ 0x48d66bcc00000000, 0xcc8df19f00000000, 0x8f46571800000000, ++ 0xc43ac53800000000, 0x87f163bf00000000, 0x03aaf9ec00000000, ++ 0x40615f6b00000000, 0x12f92f9400000000, 0x5132891300000000, ++ 0xd569134000000000, 0x96a2b5c700000000, 0xddde27e700000000, ++ 0x9e15816000000000, 0x1a4e1b3300000000, 0x5985bdb400000000, ++ 0x8cb63f7200000000, 0xcf7d99f500000000, 0x4b2603a600000000, ++ 0x08eda52100000000, 0x4391370100000000, 0x005a918600000000, ++ 0x84010bd500000000, 0xc7caad5200000000, 0x6f607e8300000000, ++ 0x2cabd80400000000, 0xa8f0425700000000, 0xeb3be4d000000000, ++ 0xa04776f000000000, 0xe38cd07700000000, 0x67d74a2400000000, ++ 0x241ceca300000000, 0xf12f6e6500000000, 0xb2e4c8e200000000, ++ 0x36bf52b100000000, 0x7574f43600000000, 0x3e08661600000000, ++ 0x7dc3c09100000000, 0xf9985ac200000000, 0xba53fc4500000000, ++ 0x1caecae700000000, 0x5f656c6000000000, 0xdb3ef63300000000, ++ 0x98f550b400000000, 0xd389c29400000000, 0x9042641300000000, ++ 0x1419fe4000000000, 0x57d258c700000000, 0x82e1da0100000000, ++ 0xc12a7c8600000000, 0x4571e6d500000000, 0x06ba405200000000, ++ 0x4dc6d27200000000, 0x0e0d74f500000000, 0x8a56eea600000000, ++ 0xc99d482100000000, 0x61379bf000000000, 0x22fc3d7700000000, ++ 0xa6a7a72400000000, 0xe56c01a300000000, 0xae10938300000000, ++ 0xeddb350400000000, 0x6980af5700000000, 0x2a4b09d000000000, ++ 0xff788b1600000000, 0xbcb32d9100000000, 0x38e8b7c200000000, ++ 0x7b23114500000000, 0x305f836500000000, 0x739425e200000000, ++ 0xf7cfbfb100000000, 0xb404193600000000, 0xe69c69c900000000, ++ 0xa557cf4e00000000, 0x210c551d00000000, 0x62c7f39a00000000, ++ 0x29bb61ba00000000, 0x6a70c73d00000000, 0xee2b5d6e00000000, ++ 0xade0fbe900000000, 0x78d3792f00000000, 0x3b18dfa800000000, ++ 0xbf4345fb00000000, 0xfc88e37c00000000, 0xb7f4715c00000000, ++ 0xf43fd7db00000000, 0x70644d8800000000, 0x33afeb0f00000000, ++ 0x9b0538de00000000, 0xd8ce9e5900000000, 0x5c95040a00000000, ++ 0x1f5ea28d00000000, 0x542230ad00000000, 0x17e9962a00000000, ++ 0x93b20c7900000000, 0xd079aafe00000000, 0x054a283800000000, ++ 0x46818ebf00000000, 0xc2da14ec00000000, 0x8111b26b00000000, ++ 0xca6d204b00000000, 0x89a686cc00000000, 0x0dfd1c9f00000000, ++ 0x4e36ba1800000000}, ++ {0x0000000000000000, 0xe1b652ef00000000, 0x836bd40500000000, ++ 0x62dd86ea00000000, 0x06d7a80b00000000, 0xe761fae400000000, ++ 0x85bc7c0e00000000, 0x640a2ee100000000, 0x0cae511700000000, ++ 0xed1803f800000000, 0x8fc5851200000000, 0x6e73d7fd00000000, ++ 0x0a79f91c00000000, 0xebcfabf300000000, 0x89122d1900000000, ++ 0x68a47ff600000000, 0x185ca32e00000000, 0xf9eaf1c100000000, ++ 0x9b37772b00000000, 0x7a8125c400000000, 0x1e8b0b2500000000, ++ 0xff3d59ca00000000, 0x9de0df2000000000, 0x7c568dcf00000000, ++ 0x14f2f23900000000, 0xf544a0d600000000, 0x9799263c00000000, ++ 0x762f74d300000000, 0x12255a3200000000, 0xf39308dd00000000, ++ 0x914e8e3700000000, 0x70f8dcd800000000, 0x30b8465d00000000, ++ 0xd10e14b200000000, 0xb3d3925800000000, 0x5265c0b700000000, ++ 0x366fee5600000000, 0xd7d9bcb900000000, 0xb5043a5300000000, ++ 0x54b268bc00000000, 0x3c16174a00000000, 0xdda045a500000000, ++ 0xbf7dc34f00000000, 0x5ecb91a000000000, 0x3ac1bf4100000000, ++ 0xdb77edae00000000, 0xb9aa6b4400000000, 0x581c39ab00000000, ++ 0x28e4e57300000000, 0xc952b79c00000000, 0xab8f317600000000, ++ 0x4a39639900000000, 0x2e334d7800000000, 0xcf851f9700000000, ++ 0xad58997d00000000, 0x4ceecb9200000000, 0x244ab46400000000, ++ 0xc5fce68b00000000, 0xa721606100000000, 0x4697328e00000000, ++ 0x229d1c6f00000000, 0xc32b4e8000000000, 0xa1f6c86a00000000, ++ 0x40409a8500000000, 0x60708dba00000000, 0x81c6df5500000000, ++ 0xe31b59bf00000000, 0x02ad0b5000000000, 0x66a725b100000000, ++ 0x8711775e00000000, 0xe5ccf1b400000000, 0x047aa35b00000000, ++ 0x6cdedcad00000000, 0x8d688e4200000000, 0xefb508a800000000, ++ 0x0e035a4700000000, 0x6a0974a600000000, 0x8bbf264900000000, ++ 0xe962a0a300000000, 0x08d4f24c00000000, 0x782c2e9400000000, ++ 0x999a7c7b00000000, 0xfb47fa9100000000, 0x1af1a87e00000000, ++ 0x7efb869f00000000, 0x9f4dd47000000000, 0xfd90529a00000000, ++ 0x1c26007500000000, 0x74827f8300000000, 0x95342d6c00000000, ++ 0xf7e9ab8600000000, 0x165ff96900000000, 0x7255d78800000000, ++ 0x93e3856700000000, 0xf13e038d00000000, 0x1088516200000000, ++ 0x50c8cbe700000000, 0xb17e990800000000, 0xd3a31fe200000000, ++ 0x32154d0d00000000, 0x561f63ec00000000, 0xb7a9310300000000, ++ 0xd574b7e900000000, 0x34c2e50600000000, 0x5c669af000000000, ++ 0xbdd0c81f00000000, 0xdf0d4ef500000000, 0x3ebb1c1a00000000, ++ 0x5ab132fb00000000, 0xbb07601400000000, 0xd9dae6fe00000000, ++ 0x386cb41100000000, 0x489468c900000000, 0xa9223a2600000000, ++ 0xcbffbccc00000000, 0x2a49ee2300000000, 0x4e43c0c200000000, ++ 0xaff5922d00000000, 0xcd2814c700000000, 0x2c9e462800000000, ++ 0x443a39de00000000, 0xa58c6b3100000000, 0xc751eddb00000000, ++ 0x26e7bf3400000000, 0x42ed91d500000000, 0xa35bc33a00000000, ++ 0xc18645d000000000, 0x2030173f00000000, 0x81e66bae00000000, ++ 0x6050394100000000, 0x028dbfab00000000, 0xe33bed4400000000, ++ 0x8731c3a500000000, 0x6687914a00000000, 0x045a17a000000000, ++ 0xe5ec454f00000000, 0x8d483ab900000000, 0x6cfe685600000000, ++ 0x0e23eebc00000000, 0xef95bc5300000000, 0x8b9f92b200000000, ++ 0x6a29c05d00000000, 0x08f446b700000000, 0xe942145800000000, ++ 0x99bac88000000000, 0x780c9a6f00000000, 0x1ad11c8500000000, ++ 0xfb674e6a00000000, 0x9f6d608b00000000, 0x7edb326400000000, ++ 0x1c06b48e00000000, 0xfdb0e66100000000, 0x9514999700000000, ++ 0x74a2cb7800000000, 0x167f4d9200000000, 0xf7c91f7d00000000, ++ 0x93c3319c00000000, 0x7275637300000000, 0x10a8e59900000000, ++ 0xf11eb77600000000, 0xb15e2df300000000, 0x50e87f1c00000000, ++ 0x3235f9f600000000, 0xd383ab1900000000, 0xb78985f800000000, ++ 0x563fd71700000000, 0x34e251fd00000000, 0xd554031200000000, ++ 0xbdf07ce400000000, 0x5c462e0b00000000, 0x3e9ba8e100000000, ++ 0xdf2dfa0e00000000, 0xbb27d4ef00000000, 0x5a91860000000000, ++ 0x384c00ea00000000, 0xd9fa520500000000, 0xa9028edd00000000, ++ 0x48b4dc3200000000, 0x2a695ad800000000, 0xcbdf083700000000, ++ 0xafd526d600000000, 0x4e63743900000000, 0x2cbef2d300000000, ++ 0xcd08a03c00000000, 0xa5acdfca00000000, 0x441a8d2500000000, ++ 0x26c70bcf00000000, 0xc771592000000000, 0xa37b77c100000000, ++ 0x42cd252e00000000, 0x2010a3c400000000, 0xc1a6f12b00000000, ++ 0xe196e61400000000, 0x0020b4fb00000000, 0x62fd321100000000, ++ 0x834b60fe00000000, 0xe7414e1f00000000, 0x06f71cf000000000, ++ 0x642a9a1a00000000, 0x859cc8f500000000, 0xed38b70300000000, ++ 0x0c8ee5ec00000000, 0x6e53630600000000, 0x8fe531e900000000, ++ 0xebef1f0800000000, 0x0a594de700000000, 0x6884cb0d00000000, ++ 0x893299e200000000, 0xf9ca453a00000000, 0x187c17d500000000, ++ 0x7aa1913f00000000, 0x9b17c3d000000000, 0xff1ded3100000000, ++ 0x1eabbfde00000000, 0x7c76393400000000, 0x9dc06bdb00000000, ++ 0xf564142d00000000, 0x14d246c200000000, 0x760fc02800000000, ++ 0x97b992c700000000, 0xf3b3bc2600000000, 0x1205eec900000000, ++ 0x70d8682300000000, 0x916e3acc00000000, 0xd12ea04900000000, ++ 0x3098f2a600000000, 0x5245744c00000000, 0xb3f326a300000000, ++ 0xd7f9084200000000, 0x364f5aad00000000, 0x5492dc4700000000, ++ 0xb5248ea800000000, 0xdd80f15e00000000, 0x3c36a3b100000000, ++ 0x5eeb255b00000000, 0xbf5d77b400000000, 0xdb57595500000000, ++ 0x3ae10bba00000000, 0x583c8d5000000000, 0xb98adfbf00000000, ++ 0xc972036700000000, 0x28c4518800000000, 0x4a19d76200000000, ++ 0xabaf858d00000000, 0xcfa5ab6c00000000, 0x2e13f98300000000, ++ 0x4cce7f6900000000, 0xad782d8600000000, 0xc5dc527000000000, ++ 0x246a009f00000000, 0x46b7867500000000, 0xa701d49a00000000, ++ 0xc30bfa7b00000000, 0x22bda89400000000, 0x40602e7e00000000, ++ 0xa1d67c9100000000}, ++ {0x0000000000000000, 0x5880e2d700000000, 0xf106b47400000000, ++ 0xa98656a300000000, 0xe20d68e900000000, 0xba8d8a3e00000000, ++ 0x130bdc9d00000000, 0x4b8b3e4a00000000, 0x851da10900000000, ++ 0xdd9d43de00000000, 0x741b157d00000000, 0x2c9bf7aa00000000, ++ 0x6710c9e000000000, 0x3f902b3700000000, 0x96167d9400000000, ++ 0xce969f4300000000, 0x0a3b421300000000, 0x52bba0c400000000, ++ 0xfb3df66700000000, 0xa3bd14b000000000, 0xe8362afa00000000, ++ 0xb0b6c82d00000000, 0x19309e8e00000000, 0x41b07c5900000000, ++ 0x8f26e31a00000000, 0xd7a601cd00000000, 0x7e20576e00000000, ++ 0x26a0b5b900000000, 0x6d2b8bf300000000, 0x35ab692400000000, ++ 0x9c2d3f8700000000, 0xc4addd5000000000, 0x1476842600000000, ++ 0x4cf666f100000000, 0xe570305200000000, 0xbdf0d28500000000, ++ 0xf67beccf00000000, 0xaefb0e1800000000, 0x077d58bb00000000, ++ 0x5ffdba6c00000000, 0x916b252f00000000, 0xc9ebc7f800000000, ++ 0x606d915b00000000, 0x38ed738c00000000, 0x73664dc600000000, ++ 0x2be6af1100000000, 0x8260f9b200000000, 0xdae01b6500000000, ++ 0x1e4dc63500000000, 0x46cd24e200000000, 0xef4b724100000000, ++ 0xb7cb909600000000, 0xfc40aedc00000000, 0xa4c04c0b00000000, ++ 0x0d461aa800000000, 0x55c6f87f00000000, 0x9b50673c00000000, ++ 0xc3d085eb00000000, 0x6a56d34800000000, 0x32d6319f00000000, ++ 0x795d0fd500000000, 0x21dded0200000000, 0x885bbba100000000, ++ 0xd0db597600000000, 0x28ec084d00000000, 0x706cea9a00000000, ++ 0xd9eabc3900000000, 0x816a5eee00000000, 0xcae160a400000000, ++ 0x9261827300000000, 0x3be7d4d000000000, 0x6367360700000000, ++ 0xadf1a94400000000, 0xf5714b9300000000, 0x5cf71d3000000000, ++ 0x0477ffe700000000, 0x4ffcc1ad00000000, 0x177c237a00000000, ++ 0xbefa75d900000000, 0xe67a970e00000000, 0x22d74a5e00000000, ++ 0x7a57a88900000000, 0xd3d1fe2a00000000, 0x8b511cfd00000000, ++ 0xc0da22b700000000, 0x985ac06000000000, 0x31dc96c300000000, ++ 0x695c741400000000, 0xa7caeb5700000000, 0xff4a098000000000, ++ 0x56cc5f2300000000, 0x0e4cbdf400000000, 0x45c783be00000000, ++ 0x1d47616900000000, 0xb4c137ca00000000, 0xec41d51d00000000, ++ 0x3c9a8c6b00000000, 0x641a6ebc00000000, 0xcd9c381f00000000, ++ 0x951cdac800000000, 0xde97e48200000000, 0x8617065500000000, ++ 0x2f9150f600000000, 0x7711b22100000000, 0xb9872d6200000000, ++ 0xe107cfb500000000, 0x4881991600000000, 0x10017bc100000000, ++ 0x5b8a458b00000000, 0x030aa75c00000000, 0xaa8cf1ff00000000, ++ 0xf20c132800000000, 0x36a1ce7800000000, 0x6e212caf00000000, ++ 0xc7a77a0c00000000, 0x9f2798db00000000, 0xd4aca69100000000, ++ 0x8c2c444600000000, 0x25aa12e500000000, 0x7d2af03200000000, ++ 0xb3bc6f7100000000, 0xeb3c8da600000000, 0x42badb0500000000, ++ 0x1a3a39d200000000, 0x51b1079800000000, 0x0931e54f00000000, ++ 0xa0b7b3ec00000000, 0xf837513b00000000, 0x50d8119a00000000, ++ 0x0858f34d00000000, 0xa1dea5ee00000000, 0xf95e473900000000, ++ 0xb2d5797300000000, 0xea559ba400000000, 0x43d3cd0700000000, ++ 0x1b532fd000000000, 0xd5c5b09300000000, 0x8d45524400000000, ++ 0x24c304e700000000, 0x7c43e63000000000, 0x37c8d87a00000000, ++ 0x6f483aad00000000, 0xc6ce6c0e00000000, 0x9e4e8ed900000000, ++ 0x5ae3538900000000, 0x0263b15e00000000, 0xabe5e7fd00000000, ++ 0xf365052a00000000, 0xb8ee3b6000000000, 0xe06ed9b700000000, ++ 0x49e88f1400000000, 0x11686dc300000000, 0xdffef28000000000, ++ 0x877e105700000000, 0x2ef846f400000000, 0x7678a42300000000, ++ 0x3df39a6900000000, 0x657378be00000000, 0xccf52e1d00000000, ++ 0x9475ccca00000000, 0x44ae95bc00000000, 0x1c2e776b00000000, ++ 0xb5a821c800000000, 0xed28c31f00000000, 0xa6a3fd5500000000, ++ 0xfe231f8200000000, 0x57a5492100000000, 0x0f25abf600000000, ++ 0xc1b334b500000000, 0x9933d66200000000, 0x30b580c100000000, ++ 0x6835621600000000, 0x23be5c5c00000000, 0x7b3ebe8b00000000, ++ 0xd2b8e82800000000, 0x8a380aff00000000, 0x4e95d7af00000000, ++ 0x1615357800000000, 0xbf9363db00000000, 0xe713810c00000000, ++ 0xac98bf4600000000, 0xf4185d9100000000, 0x5d9e0b3200000000, ++ 0x051ee9e500000000, 0xcb8876a600000000, 0x9308947100000000, ++ 0x3a8ec2d200000000, 0x620e200500000000, 0x29851e4f00000000, ++ 0x7105fc9800000000, 0xd883aa3b00000000, 0x800348ec00000000, ++ 0x783419d700000000, 0x20b4fb0000000000, 0x8932ada300000000, ++ 0xd1b24f7400000000, 0x9a39713e00000000, 0xc2b993e900000000, ++ 0x6b3fc54a00000000, 0x33bf279d00000000, 0xfd29b8de00000000, ++ 0xa5a95a0900000000, 0x0c2f0caa00000000, 0x54afee7d00000000, ++ 0x1f24d03700000000, 0x47a432e000000000, 0xee22644300000000, ++ 0xb6a2869400000000, 0x720f5bc400000000, 0x2a8fb91300000000, ++ 0x8309efb000000000, 0xdb890d6700000000, 0x9002332d00000000, ++ 0xc882d1fa00000000, 0x6104875900000000, 0x3984658e00000000, ++ 0xf712facd00000000, 0xaf92181a00000000, 0x06144eb900000000, ++ 0x5e94ac6e00000000, 0x151f922400000000, 0x4d9f70f300000000, ++ 0xe419265000000000, 0xbc99c48700000000, 0x6c429df100000000, ++ 0x34c27f2600000000, 0x9d44298500000000, 0xc5c4cb5200000000, ++ 0x8e4ff51800000000, 0xd6cf17cf00000000, 0x7f49416c00000000, ++ 0x27c9a3bb00000000, 0xe95f3cf800000000, 0xb1dfde2f00000000, ++ 0x1859888c00000000, 0x40d96a5b00000000, 0x0b52541100000000, ++ 0x53d2b6c600000000, 0xfa54e06500000000, 0xa2d402b200000000, ++ 0x6679dfe200000000, 0x3ef93d3500000000, 0x977f6b9600000000, ++ 0xcfff894100000000, 0x8474b70b00000000, 0xdcf455dc00000000, ++ 0x7572037f00000000, 0x2df2e1a800000000, 0xe3647eeb00000000, ++ 0xbbe49c3c00000000, 0x1262ca9f00000000, 0x4ae2284800000000, ++ 0x0169160200000000, 0x59e9f4d500000000, 0xf06fa27600000000, ++ 0xa8ef40a100000000}, ++ {0x0000000000000000, 0x463b676500000000, 0x8c76ceca00000000, ++ 0xca4da9af00000000, 0x59ebed4e00000000, 0x1fd08a2b00000000, ++ 0xd59d238400000000, 0x93a644e100000000, 0xb2d6db9d00000000, ++ 0xf4edbcf800000000, 0x3ea0155700000000, 0x789b723200000000, ++ 0xeb3d36d300000000, 0xad0651b600000000, 0x674bf81900000000, ++ 0x21709f7c00000000, 0x25abc6e000000000, 0x6390a18500000000, ++ 0xa9dd082a00000000, 0xefe66f4f00000000, 0x7c402bae00000000, ++ 0x3a7b4ccb00000000, 0xf036e56400000000, 0xb60d820100000000, ++ 0x977d1d7d00000000, 0xd1467a1800000000, 0x1b0bd3b700000000, ++ 0x5d30b4d200000000, 0xce96f03300000000, 0x88ad975600000000, ++ 0x42e03ef900000000, 0x04db599c00000000, 0x0b50fc1a00000000, ++ 0x4d6b9b7f00000000, 0x872632d000000000, 0xc11d55b500000000, ++ 0x52bb115400000000, 0x1480763100000000, 0xdecddf9e00000000, ++ 0x98f6b8fb00000000, 0xb986278700000000, 0xffbd40e200000000, ++ 0x35f0e94d00000000, 0x73cb8e2800000000, 0xe06dcac900000000, ++ 0xa656adac00000000, 0x6c1b040300000000, 0x2a20636600000000, ++ 0x2efb3afa00000000, 0x68c05d9f00000000, 0xa28df43000000000, ++ 0xe4b6935500000000, 0x7710d7b400000000, 0x312bb0d100000000, ++ 0xfb66197e00000000, 0xbd5d7e1b00000000, 0x9c2de16700000000, ++ 0xda16860200000000, 0x105b2fad00000000, 0x566048c800000000, ++ 0xc5c60c2900000000, 0x83fd6b4c00000000, 0x49b0c2e300000000, ++ 0x0f8ba58600000000, 0x16a0f83500000000, 0x509b9f5000000000, ++ 0x9ad636ff00000000, 0xdced519a00000000, 0x4f4b157b00000000, ++ 0x0970721e00000000, 0xc33ddbb100000000, 0x8506bcd400000000, ++ 0xa47623a800000000, 0xe24d44cd00000000, 0x2800ed6200000000, ++ 0x6e3b8a0700000000, 0xfd9dcee600000000, 0xbba6a98300000000, ++ 0x71eb002c00000000, 0x37d0674900000000, 0x330b3ed500000000, ++ 0x753059b000000000, 0xbf7df01f00000000, 0xf946977a00000000, ++ 0x6ae0d39b00000000, 0x2cdbb4fe00000000, 0xe6961d5100000000, ++ 0xa0ad7a3400000000, 0x81dde54800000000, 0xc7e6822d00000000, ++ 0x0dab2b8200000000, 0x4b904ce700000000, 0xd836080600000000, ++ 0x9e0d6f6300000000, 0x5440c6cc00000000, 0x127ba1a900000000, ++ 0x1df0042f00000000, 0x5bcb634a00000000, 0x9186cae500000000, ++ 0xd7bdad8000000000, 0x441be96100000000, 0x02208e0400000000, ++ 0xc86d27ab00000000, 0x8e5640ce00000000, 0xaf26dfb200000000, ++ 0xe91db8d700000000, 0x2350117800000000, 0x656b761d00000000, ++ 0xf6cd32fc00000000, 0xb0f6559900000000, 0x7abbfc3600000000, ++ 0x3c809b5300000000, 0x385bc2cf00000000, 0x7e60a5aa00000000, ++ 0xb42d0c0500000000, 0xf2166b6000000000, 0x61b02f8100000000, ++ 0x278b48e400000000, 0xedc6e14b00000000, 0xabfd862e00000000, ++ 0x8a8d195200000000, 0xccb67e3700000000, 0x06fbd79800000000, ++ 0x40c0b0fd00000000, 0xd366f41c00000000, 0x955d937900000000, ++ 0x5f103ad600000000, 0x192b5db300000000, 0x2c40f16b00000000, ++ 0x6a7b960e00000000, 0xa0363fa100000000, 0xe60d58c400000000, ++ 0x75ab1c2500000000, 0x33907b4000000000, 0xf9ddd2ef00000000, ++ 0xbfe6b58a00000000, 0x9e962af600000000, 0xd8ad4d9300000000, ++ 0x12e0e43c00000000, 0x54db835900000000, 0xc77dc7b800000000, ++ 0x8146a0dd00000000, 0x4b0b097200000000, 0x0d306e1700000000, ++ 0x09eb378b00000000, 0x4fd050ee00000000, 0x859df94100000000, ++ 0xc3a69e2400000000, 0x5000dac500000000, 0x163bbda000000000, ++ 0xdc76140f00000000, 0x9a4d736a00000000, 0xbb3dec1600000000, ++ 0xfd068b7300000000, 0x374b22dc00000000, 0x717045b900000000, ++ 0xe2d6015800000000, 0xa4ed663d00000000, 0x6ea0cf9200000000, ++ 0x289ba8f700000000, 0x27100d7100000000, 0x612b6a1400000000, ++ 0xab66c3bb00000000, 0xed5da4de00000000, 0x7efbe03f00000000, ++ 0x38c0875a00000000, 0xf28d2ef500000000, 0xb4b6499000000000, ++ 0x95c6d6ec00000000, 0xd3fdb18900000000, 0x19b0182600000000, ++ 0x5f8b7f4300000000, 0xcc2d3ba200000000, 0x8a165cc700000000, ++ 0x405bf56800000000, 0x0660920d00000000, 0x02bbcb9100000000, ++ 0x4480acf400000000, 0x8ecd055b00000000, 0xc8f6623e00000000, ++ 0x5b5026df00000000, 0x1d6b41ba00000000, 0xd726e81500000000, ++ 0x911d8f7000000000, 0xb06d100c00000000, 0xf656776900000000, ++ 0x3c1bdec600000000, 0x7a20b9a300000000, 0xe986fd4200000000, ++ 0xafbd9a2700000000, 0x65f0338800000000, 0x23cb54ed00000000, ++ 0x3ae0095e00000000, 0x7cdb6e3b00000000, 0xb696c79400000000, ++ 0xf0ada0f100000000, 0x630be41000000000, 0x2530837500000000, ++ 0xef7d2ada00000000, 0xa9464dbf00000000, 0x8836d2c300000000, ++ 0xce0db5a600000000, 0x04401c0900000000, 0x427b7b6c00000000, ++ 0xd1dd3f8d00000000, 0x97e658e800000000, 0x5dabf14700000000, ++ 0x1b90962200000000, 0x1f4bcfbe00000000, 0x5970a8db00000000, ++ 0x933d017400000000, 0xd506661100000000, 0x46a022f000000000, ++ 0x009b459500000000, 0xcad6ec3a00000000, 0x8ced8b5f00000000, ++ 0xad9d142300000000, 0xeba6734600000000, 0x21ebdae900000000, ++ 0x67d0bd8c00000000, 0xf476f96d00000000, 0xb24d9e0800000000, ++ 0x780037a700000000, 0x3e3b50c200000000, 0x31b0f54400000000, ++ 0x778b922100000000, 0xbdc63b8e00000000, 0xfbfd5ceb00000000, ++ 0x685b180a00000000, 0x2e607f6f00000000, 0xe42dd6c000000000, ++ 0xa216b1a500000000, 0x83662ed900000000, 0xc55d49bc00000000, ++ 0x0f10e01300000000, 0x492b877600000000, 0xda8dc39700000000, ++ 0x9cb6a4f200000000, 0x56fb0d5d00000000, 0x10c06a3800000000, ++ 0x141b33a400000000, 0x522054c100000000, 0x986dfd6e00000000, ++ 0xde569a0b00000000, 0x4df0deea00000000, 0x0bcbb98f00000000, ++ 0xc186102000000000, 0x87bd774500000000, 0xa6cde83900000000, ++ 0xe0f68f5c00000000, 0x2abb26f300000000, 0x6c80419600000000, ++ 0xff26057700000000, 0xb91d621200000000, 0x7350cbbd00000000, ++ 0x356bacd800000000}, ++ {0x0000000000000000, 0x9e83da9f00000000, 0x7d01c4e400000000, ++ 0xe3821e7b00000000, 0xbb04f91200000000, 0x2587238d00000000, ++ 0xc6053df600000000, 0x5886e76900000000, 0x7609f22500000000, ++ 0xe88a28ba00000000, 0x0b0836c100000000, 0x958bec5e00000000, ++ 0xcd0d0b3700000000, 0x538ed1a800000000, 0xb00ccfd300000000, ++ 0x2e8f154c00000000, 0xec12e44b00000000, 0x72913ed400000000, ++ 0x911320af00000000, 0x0f90fa3000000000, 0x57161d5900000000, ++ 0xc995c7c600000000, 0x2a17d9bd00000000, 0xb494032200000000, ++ 0x9a1b166e00000000, 0x0498ccf100000000, 0xe71ad28a00000000, ++ 0x7999081500000000, 0x211fef7c00000000, 0xbf9c35e300000000, ++ 0x5c1e2b9800000000, 0xc29df10700000000, 0xd825c89700000000, ++ 0x46a6120800000000, 0xa5240c7300000000, 0x3ba7d6ec00000000, ++ 0x6321318500000000, 0xfda2eb1a00000000, 0x1e20f56100000000, ++ 0x80a32ffe00000000, 0xae2c3ab200000000, 0x30afe02d00000000, ++ 0xd32dfe5600000000, 0x4dae24c900000000, 0x1528c3a000000000, ++ 0x8bab193f00000000, 0x6829074400000000, 0xf6aadddb00000000, ++ 0x34372cdc00000000, 0xaab4f64300000000, 0x4936e83800000000, ++ 0xd7b532a700000000, 0x8f33d5ce00000000, 0x11b00f5100000000, ++ 0xf232112a00000000, 0x6cb1cbb500000000, 0x423edef900000000, ++ 0xdcbd046600000000, 0x3f3f1a1d00000000, 0xa1bcc08200000000, ++ 0xf93a27eb00000000, 0x67b9fd7400000000, 0x843be30f00000000, ++ 0x1ab8399000000000, 0xf14de1f400000000, 0x6fce3b6b00000000, ++ 0x8c4c251000000000, 0x12cfff8f00000000, 0x4a4918e600000000, ++ 0xd4cac27900000000, 0x3748dc0200000000, 0xa9cb069d00000000, ++ 0x874413d100000000, 0x19c7c94e00000000, 0xfa45d73500000000, ++ 0x64c60daa00000000, 0x3c40eac300000000, 0xa2c3305c00000000, ++ 0x41412e2700000000, 0xdfc2f4b800000000, 0x1d5f05bf00000000, ++ 0x83dcdf2000000000, 0x605ec15b00000000, 0xfedd1bc400000000, ++ 0xa65bfcad00000000, 0x38d8263200000000, 0xdb5a384900000000, ++ 0x45d9e2d600000000, 0x6b56f79a00000000, 0xf5d52d0500000000, ++ 0x1657337e00000000, 0x88d4e9e100000000, 0xd0520e8800000000, ++ 0x4ed1d41700000000, 0xad53ca6c00000000, 0x33d010f300000000, ++ 0x2968296300000000, 0xb7ebf3fc00000000, 0x5469ed8700000000, ++ 0xcaea371800000000, 0x926cd07100000000, 0x0cef0aee00000000, ++ 0xef6d149500000000, 0x71eece0a00000000, 0x5f61db4600000000, ++ 0xc1e201d900000000, 0x22601fa200000000, 0xbce3c53d00000000, ++ 0xe465225400000000, 0x7ae6f8cb00000000, 0x9964e6b000000000, ++ 0x07e73c2f00000000, 0xc57acd2800000000, 0x5bf917b700000000, ++ 0xb87b09cc00000000, 0x26f8d35300000000, 0x7e7e343a00000000, ++ 0xe0fdeea500000000, 0x037ff0de00000000, 0x9dfc2a4100000000, ++ 0xb3733f0d00000000, 0x2df0e59200000000, 0xce72fbe900000000, ++ 0x50f1217600000000, 0x0877c61f00000000, 0x96f41c8000000000, ++ 0x757602fb00000000, 0xebf5d86400000000, 0xa39db33200000000, ++ 0x3d1e69ad00000000, 0xde9c77d600000000, 0x401fad4900000000, ++ 0x18994a2000000000, 0x861a90bf00000000, 0x65988ec400000000, ++ 0xfb1b545b00000000, 0xd594411700000000, 0x4b179b8800000000, ++ 0xa89585f300000000, 0x36165f6c00000000, 0x6e90b80500000000, ++ 0xf013629a00000000, 0x13917ce100000000, 0x8d12a67e00000000, ++ 0x4f8f577900000000, 0xd10c8de600000000, 0x328e939d00000000, ++ 0xac0d490200000000, 0xf48bae6b00000000, 0x6a0874f400000000, ++ 0x898a6a8f00000000, 0x1709b01000000000, 0x3986a55c00000000, ++ 0xa7057fc300000000, 0x448761b800000000, 0xda04bb2700000000, ++ 0x82825c4e00000000, 0x1c0186d100000000, 0xff8398aa00000000, ++ 0x6100423500000000, 0x7bb87ba500000000, 0xe53ba13a00000000, ++ 0x06b9bf4100000000, 0x983a65de00000000, 0xc0bc82b700000000, ++ 0x5e3f582800000000, 0xbdbd465300000000, 0x233e9ccc00000000, ++ 0x0db1898000000000, 0x9332531f00000000, 0x70b04d6400000000, ++ 0xee3397fb00000000, 0xb6b5709200000000, 0x2836aa0d00000000, ++ 0xcbb4b47600000000, 0x55376ee900000000, 0x97aa9fee00000000, ++ 0x0929457100000000, 0xeaab5b0a00000000, 0x7428819500000000, ++ 0x2cae66fc00000000, 0xb22dbc6300000000, 0x51afa21800000000, ++ 0xcf2c788700000000, 0xe1a36dcb00000000, 0x7f20b75400000000, ++ 0x9ca2a92f00000000, 0x022173b000000000, 0x5aa794d900000000, ++ 0xc4244e4600000000, 0x27a6503d00000000, 0xb9258aa200000000, ++ 0x52d052c600000000, 0xcc53885900000000, 0x2fd1962200000000, ++ 0xb1524cbd00000000, 0xe9d4abd400000000, 0x7757714b00000000, ++ 0x94d56f3000000000, 0x0a56b5af00000000, 0x24d9a0e300000000, ++ 0xba5a7a7c00000000, 0x59d8640700000000, 0xc75bbe9800000000, ++ 0x9fdd59f100000000, 0x015e836e00000000, 0xe2dc9d1500000000, ++ 0x7c5f478a00000000, 0xbec2b68d00000000, 0x20416c1200000000, ++ 0xc3c3726900000000, 0x5d40a8f600000000, 0x05c64f9f00000000, ++ 0x9b45950000000000, 0x78c78b7b00000000, 0xe64451e400000000, ++ 0xc8cb44a800000000, 0x56489e3700000000, 0xb5ca804c00000000, ++ 0x2b495ad300000000, 0x73cfbdba00000000, 0xed4c672500000000, ++ 0x0ece795e00000000, 0x904da3c100000000, 0x8af59a5100000000, ++ 0x147640ce00000000, 0xf7f45eb500000000, 0x6977842a00000000, ++ 0x31f1634300000000, 0xaf72b9dc00000000, 0x4cf0a7a700000000, ++ 0xd2737d3800000000, 0xfcfc687400000000, 0x627fb2eb00000000, ++ 0x81fdac9000000000, 0x1f7e760f00000000, 0x47f8916600000000, ++ 0xd97b4bf900000000, 0x3af9558200000000, 0xa47a8f1d00000000, ++ 0x66e77e1a00000000, 0xf864a48500000000, 0x1be6bafe00000000, ++ 0x8565606100000000, 0xdde3870800000000, 0x43605d9700000000, ++ 0xa0e243ec00000000, 0x3e61997300000000, 0x10ee8c3f00000000, ++ 0x8e6d56a000000000, 0x6def48db00000000, 0xf36c924400000000, ++ 0xabea752d00000000, 0x3569afb200000000, 0xd6ebb1c900000000, ++ 0x48686b5600000000}, ++ {0x0000000000000000, 0xc064281700000000, 0x80c9502e00000000, ++ 0x40ad783900000000, 0x0093a15c00000000, 0xc0f7894b00000000, ++ 0x805af17200000000, 0x403ed96500000000, 0x002643b900000000, ++ 0xc0426bae00000000, 0x80ef139700000000, 0x408b3b8000000000, ++ 0x00b5e2e500000000, 0xc0d1caf200000000, 0x807cb2cb00000000, ++ 0x40189adc00000000, 0x414af7a900000000, 0x812edfbe00000000, ++ 0xc183a78700000000, 0x01e78f9000000000, 0x41d956f500000000, ++ 0x81bd7ee200000000, 0xc11006db00000000, 0x01742ecc00000000, ++ 0x416cb41000000000, 0x81089c0700000000, 0xc1a5e43e00000000, ++ 0x01c1cc2900000000, 0x41ff154c00000000, 0x819b3d5b00000000, ++ 0xc136456200000000, 0x01526d7500000000, 0xc3929f8800000000, ++ 0x03f6b79f00000000, 0x435bcfa600000000, 0x833fe7b100000000, ++ 0xc3013ed400000000, 0x036516c300000000, 0x43c86efa00000000, ++ 0x83ac46ed00000000, 0xc3b4dc3100000000, 0x03d0f42600000000, ++ 0x437d8c1f00000000, 0x8319a40800000000, 0xc3277d6d00000000, ++ 0x0343557a00000000, 0x43ee2d4300000000, 0x838a055400000000, ++ 0x82d8682100000000, 0x42bc403600000000, 0x0211380f00000000, ++ 0xc275101800000000, 0x824bc97d00000000, 0x422fe16a00000000, ++ 0x0282995300000000, 0xc2e6b14400000000, 0x82fe2b9800000000, ++ 0x429a038f00000000, 0x02377bb600000000, 0xc25353a100000000, ++ 0x826d8ac400000000, 0x4209a2d300000000, 0x02a4daea00000000, ++ 0xc2c0f2fd00000000, 0xc7234eca00000000, 0x074766dd00000000, ++ 0x47ea1ee400000000, 0x878e36f300000000, 0xc7b0ef9600000000, ++ 0x07d4c78100000000, 0x4779bfb800000000, 0x871d97af00000000, ++ 0xc7050d7300000000, 0x0761256400000000, 0x47cc5d5d00000000, ++ 0x87a8754a00000000, 0xc796ac2f00000000, 0x07f2843800000000, ++ 0x475ffc0100000000, 0x873bd41600000000, 0x8669b96300000000, ++ 0x460d917400000000, 0x06a0e94d00000000, 0xc6c4c15a00000000, ++ 0x86fa183f00000000, 0x469e302800000000, 0x0633481100000000, ++ 0xc657600600000000, 0x864ffada00000000, 0x462bd2cd00000000, ++ 0x0686aaf400000000, 0xc6e282e300000000, 0x86dc5b8600000000, ++ 0x46b8739100000000, 0x06150ba800000000, 0xc67123bf00000000, ++ 0x04b1d14200000000, 0xc4d5f95500000000, 0x8478816c00000000, ++ 0x441ca97b00000000, 0x0422701e00000000, 0xc446580900000000, ++ 0x84eb203000000000, 0x448f082700000000, 0x049792fb00000000, ++ 0xc4f3baec00000000, 0x845ec2d500000000, 0x443aeac200000000, ++ 0x040433a700000000, 0xc4601bb000000000, 0x84cd638900000000, ++ 0x44a94b9e00000000, 0x45fb26eb00000000, 0x859f0efc00000000, ++ 0xc53276c500000000, 0x05565ed200000000, 0x456887b700000000, ++ 0x850cafa000000000, 0xc5a1d79900000000, 0x05c5ff8e00000000, ++ 0x45dd655200000000, 0x85b94d4500000000, 0xc514357c00000000, ++ 0x05701d6b00000000, 0x454ec40e00000000, 0x852aec1900000000, ++ 0xc587942000000000, 0x05e3bc3700000000, 0xcf41ed4f00000000, ++ 0x0f25c55800000000, 0x4f88bd6100000000, 0x8fec957600000000, ++ 0xcfd24c1300000000, 0x0fb6640400000000, 0x4f1b1c3d00000000, ++ 0x8f7f342a00000000, 0xcf67aef600000000, 0x0f0386e100000000, ++ 0x4faefed800000000, 0x8fcad6cf00000000, 0xcff40faa00000000, ++ 0x0f9027bd00000000, 0x4f3d5f8400000000, 0x8f59779300000000, ++ 0x8e0b1ae600000000, 0x4e6f32f100000000, 0x0ec24ac800000000, ++ 0xcea662df00000000, 0x8e98bbba00000000, 0x4efc93ad00000000, ++ 0x0e51eb9400000000, 0xce35c38300000000, 0x8e2d595f00000000, ++ 0x4e49714800000000, 0x0ee4097100000000, 0xce80216600000000, ++ 0x8ebef80300000000, 0x4edad01400000000, 0x0e77a82d00000000, ++ 0xce13803a00000000, 0x0cd372c700000000, 0xccb75ad000000000, ++ 0x8c1a22e900000000, 0x4c7e0afe00000000, 0x0c40d39b00000000, ++ 0xcc24fb8c00000000, 0x8c8983b500000000, 0x4cedaba200000000, ++ 0x0cf5317e00000000, 0xcc91196900000000, 0x8c3c615000000000, ++ 0x4c58494700000000, 0x0c66902200000000, 0xcc02b83500000000, ++ 0x8cafc00c00000000, 0x4ccbe81b00000000, 0x4d99856e00000000, ++ 0x8dfdad7900000000, 0xcd50d54000000000, 0x0d34fd5700000000, ++ 0x4d0a243200000000, 0x8d6e0c2500000000, 0xcdc3741c00000000, ++ 0x0da75c0b00000000, 0x4dbfc6d700000000, 0x8ddbeec000000000, ++ 0xcd7696f900000000, 0x0d12beee00000000, 0x4d2c678b00000000, ++ 0x8d484f9c00000000, 0xcde537a500000000, 0x0d811fb200000000, ++ 0x0862a38500000000, 0xc8068b9200000000, 0x88abf3ab00000000, ++ 0x48cfdbbc00000000, 0x08f102d900000000, 0xc8952ace00000000, ++ 0x883852f700000000, 0x485c7ae000000000, 0x0844e03c00000000, ++ 0xc820c82b00000000, 0x888db01200000000, 0x48e9980500000000, ++ 0x08d7416000000000, 0xc8b3697700000000, 0x881e114e00000000, ++ 0x487a395900000000, 0x4928542c00000000, 0x894c7c3b00000000, ++ 0xc9e1040200000000, 0x09852c1500000000, 0x49bbf57000000000, ++ 0x89dfdd6700000000, 0xc972a55e00000000, 0x09168d4900000000, ++ 0x490e179500000000, 0x896a3f8200000000, 0xc9c747bb00000000, ++ 0x09a36fac00000000, 0x499db6c900000000, 0x89f99ede00000000, ++ 0xc954e6e700000000, 0x0930cef000000000, 0xcbf03c0d00000000, ++ 0x0b94141a00000000, 0x4b396c2300000000, 0x8b5d443400000000, ++ 0xcb639d5100000000, 0x0b07b54600000000, 0x4baacd7f00000000, ++ 0x8bcee56800000000, 0xcbd67fb400000000, 0x0bb257a300000000, ++ 0x4b1f2f9a00000000, 0x8b7b078d00000000, 0xcb45dee800000000, ++ 0x0b21f6ff00000000, 0x4b8c8ec600000000, 0x8be8a6d100000000, ++ 0x8abacba400000000, 0x4adee3b300000000, 0x0a739b8a00000000, ++ 0xca17b39d00000000, 0x8a296af800000000, 0x4a4d42ef00000000, ++ 0x0ae03ad600000000, 0xca8412c100000000, 0x8a9c881d00000000, ++ 0x4af8a00a00000000, 0x0a55d83300000000, 0xca31f02400000000, ++ 0x8a0f294100000000, 0x4a6b015600000000, 0x0ac6796f00000000, ++ 0xcaa2517800000000}, ++ {0x0000000000000000, 0xd4ea739b00000000, 0xe9d396ed00000000, ++ 0x3d39e57600000000, 0x93a15c0000000000, 0x474b2f9b00000000, ++ 0x7a72caed00000000, 0xae98b97600000000, 0x2643b90000000000, ++ 0xf2a9ca9b00000000, 0xcf902fed00000000, 0x1b7a5c7600000000, ++ 0xb5e2e50000000000, 0x6108969b00000000, 0x5c3173ed00000000, ++ 0x88db007600000000, 0x4c86720100000000, 0x986c019a00000000, ++ 0xa555e4ec00000000, 0x71bf977700000000, 0xdf272e0100000000, ++ 0x0bcd5d9a00000000, 0x36f4b8ec00000000, 0xe21ecb7700000000, ++ 0x6ac5cb0100000000, 0xbe2fb89a00000000, 0x83165dec00000000, ++ 0x57fc2e7700000000, 0xf964970100000000, 0x2d8ee49a00000000, ++ 0x10b701ec00000000, 0xc45d727700000000, 0x980ce50200000000, ++ 0x4ce6969900000000, 0x71df73ef00000000, 0xa535007400000000, ++ 0x0badb90200000000, 0xdf47ca9900000000, 0xe27e2fef00000000, ++ 0x36945c7400000000, 0xbe4f5c0200000000, 0x6aa52f9900000000, ++ 0x579ccaef00000000, 0x8376b97400000000, 0x2dee000200000000, ++ 0xf904739900000000, 0xc43d96ef00000000, 0x10d7e57400000000, ++ 0xd48a970300000000, 0x0060e49800000000, 0x3d5901ee00000000, ++ 0xe9b3727500000000, 0x472bcb0300000000, 0x93c1b89800000000, ++ 0xaef85dee00000000, 0x7a122e7500000000, 0xf2c92e0300000000, ++ 0x26235d9800000000, 0x1b1ab8ee00000000, 0xcff0cb7500000000, ++ 0x6168720300000000, 0xb582019800000000, 0x88bbe4ee00000000, ++ 0x5c51977500000000, 0x3019ca0500000000, 0xe4f3b99e00000000, ++ 0xd9ca5ce800000000, 0x0d202f7300000000, 0xa3b8960500000000, ++ 0x7752e59e00000000, 0x4a6b00e800000000, 0x9e81737300000000, ++ 0x165a730500000000, 0xc2b0009e00000000, 0xff89e5e800000000, ++ 0x2b63967300000000, 0x85fb2f0500000000, 0x51115c9e00000000, ++ 0x6c28b9e800000000, 0xb8c2ca7300000000, 0x7c9fb80400000000, ++ 0xa875cb9f00000000, 0x954c2ee900000000, 0x41a65d7200000000, ++ 0xef3ee40400000000, 0x3bd4979f00000000, 0x06ed72e900000000, ++ 0xd207017200000000, 0x5adc010400000000, 0x8e36729f00000000, ++ 0xb30f97e900000000, 0x67e5e47200000000, 0xc97d5d0400000000, ++ 0x1d972e9f00000000, 0x20aecbe900000000, 0xf444b87200000000, ++ 0xa8152f0700000000, 0x7cff5c9c00000000, 0x41c6b9ea00000000, ++ 0x952cca7100000000, 0x3bb4730700000000, 0xef5e009c00000000, ++ 0xd267e5ea00000000, 0x068d967100000000, 0x8e56960700000000, ++ 0x5abce59c00000000, 0x678500ea00000000, 0xb36f737100000000, ++ 0x1df7ca0700000000, 0xc91db99c00000000, 0xf4245cea00000000, ++ 0x20ce2f7100000000, 0xe4935d0600000000, 0x30792e9d00000000, ++ 0x0d40cbeb00000000, 0xd9aab87000000000, 0x7732010600000000, ++ 0xa3d8729d00000000, 0x9ee197eb00000000, 0x4a0be47000000000, ++ 0xc2d0e40600000000, 0x163a979d00000000, 0x2b0372eb00000000, ++ 0xffe9017000000000, 0x5171b80600000000, 0x859bcb9d00000000, ++ 0xb8a22eeb00000000, 0x6c485d7000000000, 0x6032940b00000000, ++ 0xb4d8e79000000000, 0x89e102e600000000, 0x5d0b717d00000000, ++ 0xf393c80b00000000, 0x2779bb9000000000, 0x1a405ee600000000, ++ 0xceaa2d7d00000000, 0x46712d0b00000000, 0x929b5e9000000000, ++ 0xafa2bbe600000000, 0x7b48c87d00000000, 0xd5d0710b00000000, ++ 0x013a029000000000, 0x3c03e7e600000000, 0xe8e9947d00000000, ++ 0x2cb4e60a00000000, 0xf85e959100000000, 0xc56770e700000000, ++ 0x118d037c00000000, 0xbf15ba0a00000000, 0x6bffc99100000000, ++ 0x56c62ce700000000, 0x822c5f7c00000000, 0x0af75f0a00000000, ++ 0xde1d2c9100000000, 0xe324c9e700000000, 0x37ceba7c00000000, ++ 0x9956030a00000000, 0x4dbc709100000000, 0x708595e700000000, ++ 0xa46fe67c00000000, 0xf83e710900000000, 0x2cd4029200000000, ++ 0x11ede7e400000000, 0xc507947f00000000, 0x6b9f2d0900000000, ++ 0xbf755e9200000000, 0x824cbbe400000000, 0x56a6c87f00000000, ++ 0xde7dc80900000000, 0x0a97bb9200000000, 0x37ae5ee400000000, ++ 0xe3442d7f00000000, 0x4ddc940900000000, 0x9936e79200000000, ++ 0xa40f02e400000000, 0x70e5717f00000000, 0xb4b8030800000000, ++ 0x6052709300000000, 0x5d6b95e500000000, 0x8981e67e00000000, ++ 0x27195f0800000000, 0xf3f32c9300000000, 0xcecac9e500000000, ++ 0x1a20ba7e00000000, 0x92fbba0800000000, 0x4611c99300000000, ++ 0x7b282ce500000000, 0xafc25f7e00000000, 0x015ae60800000000, ++ 0xd5b0959300000000, 0xe88970e500000000, 0x3c63037e00000000, ++ 0x502b5e0e00000000, 0x84c12d9500000000, 0xb9f8c8e300000000, ++ 0x6d12bb7800000000, 0xc38a020e00000000, 0x1760719500000000, ++ 0x2a5994e300000000, 0xfeb3e77800000000, 0x7668e70e00000000, ++ 0xa282949500000000, 0x9fbb71e300000000, 0x4b51027800000000, ++ 0xe5c9bb0e00000000, 0x3123c89500000000, 0x0c1a2de300000000, ++ 0xd8f05e7800000000, 0x1cad2c0f00000000, 0xc8475f9400000000, ++ 0xf57ebae200000000, 0x2194c97900000000, 0x8f0c700f00000000, ++ 0x5be6039400000000, 0x66dfe6e200000000, 0xb235957900000000, ++ 0x3aee950f00000000, 0xee04e69400000000, 0xd33d03e200000000, ++ 0x07d7707900000000, 0xa94fc90f00000000, 0x7da5ba9400000000, ++ 0x409c5fe200000000, 0x94762c7900000000, 0xc827bb0c00000000, ++ 0x1ccdc89700000000, 0x21f42de100000000, 0xf51e5e7a00000000, ++ 0x5b86e70c00000000, 0x8f6c949700000000, 0xb25571e100000000, ++ 0x66bf027a00000000, 0xee64020c00000000, 0x3a8e719700000000, ++ 0x07b794e100000000, 0xd35de77a00000000, 0x7dc55e0c00000000, ++ 0xa92f2d9700000000, 0x9416c8e100000000, 0x40fcbb7a00000000, ++ 0x84a1c90d00000000, 0x504bba9600000000, 0x6d725fe000000000, ++ 0xb9982c7b00000000, 0x1700950d00000000, 0xc3eae69600000000, ++ 0xfed303e000000000, 0x2a39707b00000000, 0xa2e2700d00000000, ++ 0x7608039600000000, 0x4b31e6e000000000, 0x9fdb957b00000000, ++ 0x31432c0d00000000, 0xe5a95f9600000000, 0xd890bae000000000, ++ 0x0c7ac97b00000000}, ++ {0x0000000000000000, 0x2765258100000000, 0x0fcc3bd900000000, ++ 0x28a91e5800000000, 0x5f9e066900000000, 0x78fb23e800000000, ++ 0x50523db000000000, 0x7737183100000000, 0xbe3c0dd200000000, ++ 0x9959285300000000, 0xb1f0360b00000000, 0x9695138a00000000, ++ 0xe1a20bbb00000000, 0xc6c72e3a00000000, 0xee6e306200000000, ++ 0xc90b15e300000000, 0x3d7f6b7f00000000, 0x1a1a4efe00000000, ++ 0x32b350a600000000, 0x15d6752700000000, 0x62e16d1600000000, ++ 0x4584489700000000, 0x6d2d56cf00000000, 0x4a48734e00000000, ++ 0x834366ad00000000, 0xa426432c00000000, 0x8c8f5d7400000000, ++ 0xabea78f500000000, 0xdcdd60c400000000, 0xfbb8454500000000, ++ 0xd3115b1d00000000, 0xf4747e9c00000000, 0x7afed6fe00000000, ++ 0x5d9bf37f00000000, 0x7532ed2700000000, 0x5257c8a600000000, ++ 0x2560d09700000000, 0x0205f51600000000, 0x2aaceb4e00000000, ++ 0x0dc9cecf00000000, 0xc4c2db2c00000000, 0xe3a7fead00000000, ++ 0xcb0ee0f500000000, 0xec6bc57400000000, 0x9b5cdd4500000000, ++ 0xbc39f8c400000000, 0x9490e69c00000000, 0xb3f5c31d00000000, ++ 0x4781bd8100000000, 0x60e4980000000000, 0x484d865800000000, ++ 0x6f28a3d900000000, 0x181fbbe800000000, 0x3f7a9e6900000000, ++ 0x17d3803100000000, 0x30b6a5b000000000, 0xf9bdb05300000000, ++ 0xded895d200000000, 0xf6718b8a00000000, 0xd114ae0b00000000, ++ 0xa623b63a00000000, 0x814693bb00000000, 0xa9ef8de300000000, ++ 0x8e8aa86200000000, 0xb5fadc2600000000, 0x929ff9a700000000, ++ 0xba36e7ff00000000, 0x9d53c27e00000000, 0xea64da4f00000000, ++ 0xcd01ffce00000000, 0xe5a8e19600000000, 0xc2cdc41700000000, ++ 0x0bc6d1f400000000, 0x2ca3f47500000000, 0x040aea2d00000000, ++ 0x236fcfac00000000, 0x5458d79d00000000, 0x733df21c00000000, ++ 0x5b94ec4400000000, 0x7cf1c9c500000000, 0x8885b75900000000, ++ 0xafe092d800000000, 0x87498c8000000000, 0xa02ca90100000000, ++ 0xd71bb13000000000, 0xf07e94b100000000, 0xd8d78ae900000000, ++ 0xffb2af6800000000, 0x36b9ba8b00000000, 0x11dc9f0a00000000, ++ 0x3975815200000000, 0x1e10a4d300000000, 0x6927bce200000000, ++ 0x4e42996300000000, 0x66eb873b00000000, 0x418ea2ba00000000, ++ 0xcf040ad800000000, 0xe8612f5900000000, 0xc0c8310100000000, ++ 0xe7ad148000000000, 0x909a0cb100000000, 0xb7ff293000000000, ++ 0x9f56376800000000, 0xb83312e900000000, 0x7138070a00000000, ++ 0x565d228b00000000, 0x7ef43cd300000000, 0x5991195200000000, ++ 0x2ea6016300000000, 0x09c324e200000000, 0x216a3aba00000000, ++ 0x060f1f3b00000000, 0xf27b61a700000000, 0xd51e442600000000, ++ 0xfdb75a7e00000000, 0xdad27fff00000000, 0xade567ce00000000, ++ 0x8a80424f00000000, 0xa2295c1700000000, 0x854c799600000000, ++ 0x4c476c7500000000, 0x6b2249f400000000, 0x438b57ac00000000, ++ 0x64ee722d00000000, 0x13d96a1c00000000, 0x34bc4f9d00000000, ++ 0x1c1551c500000000, 0x3b70744400000000, 0x6af5b94d00000000, ++ 0x4d909ccc00000000, 0x6539829400000000, 0x425ca71500000000, ++ 0x356bbf2400000000, 0x120e9aa500000000, 0x3aa784fd00000000, ++ 0x1dc2a17c00000000, 0xd4c9b49f00000000, 0xf3ac911e00000000, ++ 0xdb058f4600000000, 0xfc60aac700000000, 0x8b57b2f600000000, ++ 0xac32977700000000, 0x849b892f00000000, 0xa3feacae00000000, ++ 0x578ad23200000000, 0x70eff7b300000000, 0x5846e9eb00000000, ++ 0x7f23cc6a00000000, 0x0814d45b00000000, 0x2f71f1da00000000, ++ 0x07d8ef8200000000, 0x20bdca0300000000, 0xe9b6dfe000000000, ++ 0xced3fa6100000000, 0xe67ae43900000000, 0xc11fc1b800000000, ++ 0xb628d98900000000, 0x914dfc0800000000, 0xb9e4e25000000000, ++ 0x9e81c7d100000000, 0x100b6fb300000000, 0x376e4a3200000000, ++ 0x1fc7546a00000000, 0x38a271eb00000000, 0x4f9569da00000000, ++ 0x68f04c5b00000000, 0x4059520300000000, 0x673c778200000000, ++ 0xae37626100000000, 0x895247e000000000, 0xa1fb59b800000000, ++ 0x869e7c3900000000, 0xf1a9640800000000, 0xd6cc418900000000, ++ 0xfe655fd100000000, 0xd9007a5000000000, 0x2d7404cc00000000, ++ 0x0a11214d00000000, 0x22b83f1500000000, 0x05dd1a9400000000, ++ 0x72ea02a500000000, 0x558f272400000000, 0x7d26397c00000000, ++ 0x5a431cfd00000000, 0x9348091e00000000, 0xb42d2c9f00000000, ++ 0x9c8432c700000000, 0xbbe1174600000000, 0xccd60f7700000000, ++ 0xebb32af600000000, 0xc31a34ae00000000, 0xe47f112f00000000, ++ 0xdf0f656b00000000, 0xf86a40ea00000000, 0xd0c35eb200000000, ++ 0xf7a67b3300000000, 0x8091630200000000, 0xa7f4468300000000, ++ 0x8f5d58db00000000, 0xa8387d5a00000000, 0x613368b900000000, ++ 0x46564d3800000000, 0x6eff536000000000, 0x499a76e100000000, ++ 0x3ead6ed000000000, 0x19c84b5100000000, 0x3161550900000000, ++ 0x1604708800000000, 0xe2700e1400000000, 0xc5152b9500000000, ++ 0xedbc35cd00000000, 0xcad9104c00000000, 0xbdee087d00000000, ++ 0x9a8b2dfc00000000, 0xb22233a400000000, 0x9547162500000000, ++ 0x5c4c03c600000000, 0x7b29264700000000, 0x5380381f00000000, ++ 0x74e51d9e00000000, 0x03d205af00000000, 0x24b7202e00000000, ++ 0x0c1e3e7600000000, 0x2b7b1bf700000000, 0xa5f1b39500000000, ++ 0x8294961400000000, 0xaa3d884c00000000, 0x8d58adcd00000000, ++ 0xfa6fb5fc00000000, 0xdd0a907d00000000, 0xf5a38e2500000000, ++ 0xd2c6aba400000000, 0x1bcdbe4700000000, 0x3ca89bc600000000, ++ 0x1401859e00000000, 0x3364a01f00000000, 0x4453b82e00000000, ++ 0x63369daf00000000, 0x4b9f83f700000000, 0x6cfaa67600000000, ++ 0x988ed8ea00000000, 0xbfebfd6b00000000, 0x9742e33300000000, ++ 0xb027c6b200000000, 0xc710de8300000000, 0xe075fb0200000000, ++ 0xc8dce55a00000000, 0xefb9c0db00000000, 0x26b2d53800000000, ++ 0x01d7f0b900000000, 0x297eeee100000000, 0x0e1bcb6000000000, ++ 0x792cd35100000000, 0x5e49f6d000000000, 0x76e0e88800000000, ++ 0x5185cd0900000000}}; ++ ++#else /* W == 4 */ ++ ++static const uint32_t crc_braid_table[][256] = { ++ {0x00000000, 0x9ba54c6f, 0xec3b9e9f, 0x779ed2f0, 0x03063b7f, ++ 0x98a37710, 0xef3da5e0, 0x7498e98f, 0x060c76fe, 0x9da93a91, ++ 0xea37e861, 0x7192a40e, 0x050a4d81, 0x9eaf01ee, 0xe931d31e, ++ 0x72949f71, 0x0c18edfc, 0x97bda193, 0xe0237363, 0x7b863f0c, ++ 0x0f1ed683, 0x94bb9aec, 0xe325481c, 0x78800473, 0x0a149b02, ++ 0x91b1d76d, 0xe62f059d, 0x7d8a49f2, 0x0912a07d, 0x92b7ec12, ++ 0xe5293ee2, 0x7e8c728d, 0x1831dbf8, 0x83949797, 0xf40a4567, ++ 0x6faf0908, 0x1b37e087, 0x8092ace8, 0xf70c7e18, 0x6ca93277, ++ 0x1e3dad06, 0x8598e169, 0xf2063399, 0x69a37ff6, 0x1d3b9679, ++ 0x869eda16, 0xf10008e6, 0x6aa54489, 0x14293604, 0x8f8c7a6b, ++ 0xf812a89b, 0x63b7e4f4, 0x172f0d7b, 0x8c8a4114, 0xfb1493e4, ++ 0x60b1df8b, 0x122540fa, 0x89800c95, 0xfe1ede65, 0x65bb920a, ++ 0x11237b85, 0x8a8637ea, 0xfd18e51a, 0x66bda975, 0x3063b7f0, ++ 0xabc6fb9f, 0xdc58296f, 0x47fd6500, 0x33658c8f, 0xa8c0c0e0, ++ 0xdf5e1210, 0x44fb5e7f, 0x366fc10e, 0xadca8d61, 0xda545f91, ++ 0x41f113fe, 0x3569fa71, 0xaeccb61e, 0xd95264ee, 0x42f72881, ++ 0x3c7b5a0c, 0xa7de1663, 0xd040c493, 0x4be588fc, 0x3f7d6173, ++ 0xa4d82d1c, 0xd346ffec, 0x48e3b383, 0x3a772cf2, 0xa1d2609d, ++ 0xd64cb26d, 0x4de9fe02, 0x3971178d, 0xa2d45be2, 0xd54a8912, ++ 0x4eefc57d, 0x28526c08, 0xb3f72067, 0xc469f297, 0x5fccbef8, ++ 0x2b545777, 0xb0f11b18, 0xc76fc9e8, 0x5cca8587, 0x2e5e1af6, ++ 0xb5fb5699, 0xc2658469, 0x59c0c806, 0x2d582189, 0xb6fd6de6, ++ 0xc163bf16, 0x5ac6f379, 0x244a81f4, 0xbfefcd9b, 0xc8711f6b, ++ 0x53d45304, 0x274cba8b, 0xbce9f6e4, 0xcb772414, 0x50d2687b, ++ 0x2246f70a, 0xb9e3bb65, 0xce7d6995, 0x55d825fa, 0x2140cc75, ++ 0xbae5801a, 0xcd7b52ea, 0x56de1e85, 0x60c76fe0, 0xfb62238f, ++ 0x8cfcf17f, 0x1759bd10, 0x63c1549f, 0xf86418f0, 0x8ffaca00, ++ 0x145f866f, 0x66cb191e, 0xfd6e5571, 0x8af08781, 0x1155cbee, ++ 0x65cd2261, 0xfe686e0e, 0x89f6bcfe, 0x1253f091, 0x6cdf821c, ++ 0xf77ace73, 0x80e41c83, 0x1b4150ec, 0x6fd9b963, 0xf47cf50c, ++ 0x83e227fc, 0x18476b93, 0x6ad3f4e2, 0xf176b88d, 0x86e86a7d, ++ 0x1d4d2612, 0x69d5cf9d, 0xf27083f2, 0x85ee5102, 0x1e4b1d6d, ++ 0x78f6b418, 0xe353f877, 0x94cd2a87, 0x0f6866e8, 0x7bf08f67, ++ 0xe055c308, 0x97cb11f8, 0x0c6e5d97, 0x7efac2e6, 0xe55f8e89, ++ 0x92c15c79, 0x09641016, 0x7dfcf999, 0xe659b5f6, 0x91c76706, ++ 0x0a622b69, 0x74ee59e4, 0xef4b158b, 0x98d5c77b, 0x03708b14, ++ 0x77e8629b, 0xec4d2ef4, 0x9bd3fc04, 0x0076b06b, 0x72e22f1a, ++ 0xe9476375, 0x9ed9b185, 0x057cfdea, 0x71e41465, 0xea41580a, ++ 0x9ddf8afa, 0x067ac695, 0x50a4d810, 0xcb01947f, 0xbc9f468f, ++ 0x273a0ae0, 0x53a2e36f, 0xc807af00, 0xbf997df0, 0x243c319f, ++ 0x56a8aeee, 0xcd0de281, 0xba933071, 0x21367c1e, 0x55ae9591, ++ 0xce0bd9fe, 0xb9950b0e, 0x22304761, 0x5cbc35ec, 0xc7197983, ++ 0xb087ab73, 0x2b22e71c, 0x5fba0e93, 0xc41f42fc, 0xb381900c, ++ 0x2824dc63, 0x5ab04312, 0xc1150f7d, 0xb68bdd8d, 0x2d2e91e2, ++ 0x59b6786d, 0xc2133402, 0xb58de6f2, 0x2e28aa9d, 0x489503e8, ++ 0xd3304f87, 0xa4ae9d77, 0x3f0bd118, 0x4b933897, 0xd03674f8, ++ 0xa7a8a608, 0x3c0dea67, 0x4e997516, 0xd53c3979, 0xa2a2eb89, ++ 0x3907a7e6, 0x4d9f4e69, 0xd63a0206, 0xa1a4d0f6, 0x3a019c99, ++ 0x448dee14, 0xdf28a27b, 0xa8b6708b, 0x33133ce4, 0x478bd56b, ++ 0xdc2e9904, 0xabb04bf4, 0x3015079b, 0x428198ea, 0xd924d485, ++ 0xaeba0675, 0x351f4a1a, 0x4187a395, 0xda22effa, 0xadbc3d0a, ++ 0x36197165}, ++ {0x00000000, 0xc18edfc0, 0x586cb9c1, 0x99e26601, 0xb0d97382, ++ 0x7157ac42, 0xe8b5ca43, 0x293b1583, 0xbac3e145, 0x7b4d3e85, ++ 0xe2af5884, 0x23218744, 0x0a1a92c7, 0xcb944d07, 0x52762b06, ++ 0x93f8f4c6, 0xaef6c4cb, 0x6f781b0b, 0xf69a7d0a, 0x3714a2ca, ++ 0x1e2fb749, 0xdfa16889, 0x46430e88, 0x87cdd148, 0x1435258e, ++ 0xd5bbfa4e, 0x4c599c4f, 0x8dd7438f, 0xa4ec560c, 0x656289cc, ++ 0xfc80efcd, 0x3d0e300d, 0x869c8fd7, 0x47125017, 0xdef03616, ++ 0x1f7ee9d6, 0x3645fc55, 0xf7cb2395, 0x6e294594, 0xafa79a54, ++ 0x3c5f6e92, 0xfdd1b152, 0x6433d753, 0xa5bd0893, 0x8c861d10, ++ 0x4d08c2d0, 0xd4eaa4d1, 0x15647b11, 0x286a4b1c, 0xe9e494dc, ++ 0x7006f2dd, 0xb1882d1d, 0x98b3389e, 0x593de75e, 0xc0df815f, ++ 0x01515e9f, 0x92a9aa59, 0x53277599, 0xcac51398, 0x0b4bcc58, ++ 0x2270d9db, 0xe3fe061b, 0x7a1c601a, 0xbb92bfda, 0xd64819ef, ++ 0x17c6c62f, 0x8e24a02e, 0x4faa7fee, 0x66916a6d, 0xa71fb5ad, ++ 0x3efdd3ac, 0xff730c6c, 0x6c8bf8aa, 0xad05276a, 0x34e7416b, ++ 0xf5699eab, 0xdc528b28, 0x1ddc54e8, 0x843e32e9, 0x45b0ed29, ++ 0x78bedd24, 0xb93002e4, 0x20d264e5, 0xe15cbb25, 0xc867aea6, ++ 0x09e97166, 0x900b1767, 0x5185c8a7, 0xc27d3c61, 0x03f3e3a1, ++ 0x9a1185a0, 0x5b9f5a60, 0x72a44fe3, 0xb32a9023, 0x2ac8f622, ++ 0xeb4629e2, 0x50d49638, 0x915a49f8, 0x08b82ff9, 0xc936f039, ++ 0xe00de5ba, 0x21833a7a, 0xb8615c7b, 0x79ef83bb, 0xea17777d, ++ 0x2b99a8bd, 0xb27bcebc, 0x73f5117c, 0x5ace04ff, 0x9b40db3f, ++ 0x02a2bd3e, 0xc32c62fe, 0xfe2252f3, 0x3fac8d33, 0xa64eeb32, ++ 0x67c034f2, 0x4efb2171, 0x8f75feb1, 0x169798b0, 0xd7194770, ++ 0x44e1b3b6, 0x856f6c76, 0x1c8d0a77, 0xdd03d5b7, 0xf438c034, ++ 0x35b61ff4, 0xac5479f5, 0x6ddaa635, 0x77e1359f, 0xb66fea5f, ++ 0x2f8d8c5e, 0xee03539e, 0xc738461d, 0x06b699dd, 0x9f54ffdc, ++ 0x5eda201c, 0xcd22d4da, 0x0cac0b1a, 0x954e6d1b, 0x54c0b2db, ++ 0x7dfba758, 0xbc757898, 0x25971e99, 0xe419c159, 0xd917f154, ++ 0x18992e94, 0x817b4895, 0x40f59755, 0x69ce82d6, 0xa8405d16, ++ 0x31a23b17, 0xf02ce4d7, 0x63d41011, 0xa25acfd1, 0x3bb8a9d0, ++ 0xfa367610, 0xd30d6393, 0x1283bc53, 0x8b61da52, 0x4aef0592, ++ 0xf17dba48, 0x30f36588, 0xa9110389, 0x689fdc49, 0x41a4c9ca, ++ 0x802a160a, 0x19c8700b, 0xd846afcb, 0x4bbe5b0d, 0x8a3084cd, ++ 0x13d2e2cc, 0xd25c3d0c, 0xfb67288f, 0x3ae9f74f, 0xa30b914e, ++ 0x62854e8e, 0x5f8b7e83, 0x9e05a143, 0x07e7c742, 0xc6691882, ++ 0xef520d01, 0x2edcd2c1, 0xb73eb4c0, 0x76b06b00, 0xe5489fc6, ++ 0x24c64006, 0xbd242607, 0x7caaf9c7, 0x5591ec44, 0x941f3384, ++ 0x0dfd5585, 0xcc738a45, 0xa1a92c70, 0x6027f3b0, 0xf9c595b1, ++ 0x384b4a71, 0x11705ff2, 0xd0fe8032, 0x491ce633, 0x889239f3, ++ 0x1b6acd35, 0xdae412f5, 0x430674f4, 0x8288ab34, 0xabb3beb7, ++ 0x6a3d6177, 0xf3df0776, 0x3251d8b6, 0x0f5fe8bb, 0xced1377b, ++ 0x5733517a, 0x96bd8eba, 0xbf869b39, 0x7e0844f9, 0xe7ea22f8, ++ 0x2664fd38, 0xb59c09fe, 0x7412d63e, 0xedf0b03f, 0x2c7e6fff, ++ 0x05457a7c, 0xc4cba5bc, 0x5d29c3bd, 0x9ca71c7d, 0x2735a3a7, ++ 0xe6bb7c67, 0x7f591a66, 0xbed7c5a6, 0x97ecd025, 0x56620fe5, ++ 0xcf8069e4, 0x0e0eb624, 0x9df642e2, 0x5c789d22, 0xc59afb23, ++ 0x041424e3, 0x2d2f3160, 0xeca1eea0, 0x754388a1, 0xb4cd5761, ++ 0x89c3676c, 0x484db8ac, 0xd1afdead, 0x1021016d, 0x391a14ee, ++ 0xf894cb2e, 0x6176ad2f, 0xa0f872ef, 0x33008629, 0xf28e59e9, ++ 0x6b6c3fe8, 0xaae2e028, 0x83d9f5ab, 0x42572a6b, 0xdbb54c6a, ++ 0x1a3b93aa}, ++ {0x00000000, 0xefc26b3e, 0x04f5d03d, 0xeb37bb03, 0x09eba07a, ++ 0xe629cb44, 0x0d1e7047, 0xe2dc1b79, 0x13d740f4, 0xfc152bca, ++ 0x172290c9, 0xf8e0fbf7, 0x1a3ce08e, 0xf5fe8bb0, 0x1ec930b3, ++ 0xf10b5b8d, 0x27ae81e8, 0xc86cead6, 0x235b51d5, 0xcc993aeb, ++ 0x2e452192, 0xc1874aac, 0x2ab0f1af, 0xc5729a91, 0x3479c11c, ++ 0xdbbbaa22, 0x308c1121, 0xdf4e7a1f, 0x3d926166, 0xd2500a58, ++ 0x3967b15b, 0xd6a5da65, 0x4f5d03d0, 0xa09f68ee, 0x4ba8d3ed, ++ 0xa46ab8d3, 0x46b6a3aa, 0xa974c894, 0x42437397, 0xad8118a9, ++ 0x5c8a4324, 0xb348281a, 0x587f9319, 0xb7bdf827, 0x5561e35e, ++ 0xbaa38860, 0x51943363, 0xbe56585d, 0x68f38238, 0x8731e906, ++ 0x6c065205, 0x83c4393b, 0x61182242, 0x8eda497c, 0x65edf27f, ++ 0x8a2f9941, 0x7b24c2cc, 0x94e6a9f2, 0x7fd112f1, 0x901379cf, ++ 0x72cf62b6, 0x9d0d0988, 0x763ab28b, 0x99f8d9b5, 0x9eba07a0, ++ 0x71786c9e, 0x9a4fd79d, 0x758dbca3, 0x9751a7da, 0x7893cce4, ++ 0x93a477e7, 0x7c661cd9, 0x8d6d4754, 0x62af2c6a, 0x89989769, ++ 0x665afc57, 0x8486e72e, 0x6b448c10, 0x80733713, 0x6fb15c2d, ++ 0xb9148648, 0x56d6ed76, 0xbde15675, 0x52233d4b, 0xb0ff2632, ++ 0x5f3d4d0c, 0xb40af60f, 0x5bc89d31, 0xaac3c6bc, 0x4501ad82, ++ 0xae361681, 0x41f47dbf, 0xa32866c6, 0x4cea0df8, 0xa7ddb6fb, ++ 0x481fddc5, 0xd1e70470, 0x3e256f4e, 0xd512d44d, 0x3ad0bf73, ++ 0xd80ca40a, 0x37cecf34, 0xdcf97437, 0x333b1f09, 0xc2304484, ++ 0x2df22fba, 0xc6c594b9, 0x2907ff87, 0xcbdbe4fe, 0x24198fc0, ++ 0xcf2e34c3, 0x20ec5ffd, 0xf6498598, 0x198beea6, 0xf2bc55a5, ++ 0x1d7e3e9b, 0xffa225e2, 0x10604edc, 0xfb57f5df, 0x14959ee1, ++ 0xe59ec56c, 0x0a5cae52, 0xe16b1551, 0x0ea97e6f, 0xec756516, ++ 0x03b70e28, 0xe880b52b, 0x0742de15, 0xe6050901, 0x09c7623f, ++ 0xe2f0d93c, 0x0d32b202, 0xefeea97b, 0x002cc245, 0xeb1b7946, ++ 0x04d91278, 0xf5d249f5, 0x1a1022cb, 0xf12799c8, 0x1ee5f2f6, ++ 0xfc39e98f, 0x13fb82b1, 0xf8cc39b2, 0x170e528c, 0xc1ab88e9, ++ 0x2e69e3d7, 0xc55e58d4, 0x2a9c33ea, 0xc8402893, 0x278243ad, ++ 0xccb5f8ae, 0x23779390, 0xd27cc81d, 0x3dbea323, 0xd6891820, ++ 0x394b731e, 0xdb976867, 0x34550359, 0xdf62b85a, 0x30a0d364, ++ 0xa9580ad1, 0x469a61ef, 0xadaddaec, 0x426fb1d2, 0xa0b3aaab, ++ 0x4f71c195, 0xa4467a96, 0x4b8411a8, 0xba8f4a25, 0x554d211b, ++ 0xbe7a9a18, 0x51b8f126, 0xb364ea5f, 0x5ca68161, 0xb7913a62, ++ 0x5853515c, 0x8ef68b39, 0x6134e007, 0x8a035b04, 0x65c1303a, ++ 0x871d2b43, 0x68df407d, 0x83e8fb7e, 0x6c2a9040, 0x9d21cbcd, ++ 0x72e3a0f3, 0x99d41bf0, 0x761670ce, 0x94ca6bb7, 0x7b080089, ++ 0x903fbb8a, 0x7ffdd0b4, 0x78bf0ea1, 0x977d659f, 0x7c4ade9c, ++ 0x9388b5a2, 0x7154aedb, 0x9e96c5e5, 0x75a17ee6, 0x9a6315d8, ++ 0x6b684e55, 0x84aa256b, 0x6f9d9e68, 0x805ff556, 0x6283ee2f, ++ 0x8d418511, 0x66763e12, 0x89b4552c, 0x5f118f49, 0xb0d3e477, ++ 0x5be45f74, 0xb426344a, 0x56fa2f33, 0xb938440d, 0x520fff0e, ++ 0xbdcd9430, 0x4cc6cfbd, 0xa304a483, 0x48331f80, 0xa7f174be, ++ 0x452d6fc7, 0xaaef04f9, 0x41d8bffa, 0xae1ad4c4, 0x37e20d71, ++ 0xd820664f, 0x3317dd4c, 0xdcd5b672, 0x3e09ad0b, 0xd1cbc635, ++ 0x3afc7d36, 0xd53e1608, 0x24354d85, 0xcbf726bb, 0x20c09db8, ++ 0xcf02f686, 0x2ddeedff, 0xc21c86c1, 0x292b3dc2, 0xc6e956fc, ++ 0x104c8c99, 0xff8ee7a7, 0x14b95ca4, 0xfb7b379a, 0x19a72ce3, ++ 0xf66547dd, 0x1d52fcde, 0xf29097e0, 0x039bcc6d, 0xec59a753, ++ 0x076e1c50, 0xe8ac776e, 0x0a706c17, 0xe5b20729, 0x0e85bc2a, ++ 0xe147d714}, ++ {0x00000000, 0x177b1443, 0x2ef62886, 0x398d3cc5, 0x5dec510c, ++ 0x4a97454f, 0x731a798a, 0x64616dc9, 0xbbd8a218, 0xaca3b65b, ++ 0x952e8a9e, 0x82559edd, 0xe634f314, 0xf14fe757, 0xc8c2db92, ++ 0xdfb9cfd1, 0xacc04271, 0xbbbb5632, 0x82366af7, 0x954d7eb4, ++ 0xf12c137d, 0xe657073e, 0xdfda3bfb, 0xc8a12fb8, 0x1718e069, ++ 0x0063f42a, 0x39eec8ef, 0x2e95dcac, 0x4af4b165, 0x5d8fa526, ++ 0x640299e3, 0x73798da0, 0x82f182a3, 0x958a96e0, 0xac07aa25, ++ 0xbb7cbe66, 0xdf1dd3af, 0xc866c7ec, 0xf1ebfb29, 0xe690ef6a, ++ 0x392920bb, 0x2e5234f8, 0x17df083d, 0x00a41c7e, 0x64c571b7, ++ 0x73be65f4, 0x4a335931, 0x5d484d72, 0x2e31c0d2, 0x394ad491, ++ 0x00c7e854, 0x17bcfc17, 0x73dd91de, 0x64a6859d, 0x5d2bb958, ++ 0x4a50ad1b, 0x95e962ca, 0x82927689, 0xbb1f4a4c, 0xac645e0f, ++ 0xc80533c6, 0xdf7e2785, 0xe6f31b40, 0xf1880f03, 0xde920307, ++ 0xc9e91744, 0xf0642b81, 0xe71f3fc2, 0x837e520b, 0x94054648, ++ 0xad887a8d, 0xbaf36ece, 0x654aa11f, 0x7231b55c, 0x4bbc8999, ++ 0x5cc79dda, 0x38a6f013, 0x2fdde450, 0x1650d895, 0x012bccd6, ++ 0x72524176, 0x65295535, 0x5ca469f0, 0x4bdf7db3, 0x2fbe107a, ++ 0x38c50439, 0x014838fc, 0x16332cbf, 0xc98ae36e, 0xdef1f72d, ++ 0xe77ccbe8, 0xf007dfab, 0x9466b262, 0x831da621, 0xba909ae4, ++ 0xadeb8ea7, 0x5c6381a4, 0x4b1895e7, 0x7295a922, 0x65eebd61, ++ 0x018fd0a8, 0x16f4c4eb, 0x2f79f82e, 0x3802ec6d, 0xe7bb23bc, ++ 0xf0c037ff, 0xc94d0b3a, 0xde361f79, 0xba5772b0, 0xad2c66f3, ++ 0x94a15a36, 0x83da4e75, 0xf0a3c3d5, 0xe7d8d796, 0xde55eb53, ++ 0xc92eff10, 0xad4f92d9, 0xba34869a, 0x83b9ba5f, 0x94c2ae1c, ++ 0x4b7b61cd, 0x5c00758e, 0x658d494b, 0x72f65d08, 0x169730c1, ++ 0x01ec2482, 0x38611847, 0x2f1a0c04, 0x6655004f, 0x712e140c, ++ 0x48a328c9, 0x5fd83c8a, 0x3bb95143, 0x2cc24500, 0x154f79c5, ++ 0x02346d86, 0xdd8da257, 0xcaf6b614, 0xf37b8ad1, 0xe4009e92, ++ 0x8061f35b, 0x971ae718, 0xae97dbdd, 0xb9eccf9e, 0xca95423e, ++ 0xddee567d, 0xe4636ab8, 0xf3187efb, 0x97791332, 0x80020771, ++ 0xb98f3bb4, 0xaef42ff7, 0x714de026, 0x6636f465, 0x5fbbc8a0, ++ 0x48c0dce3, 0x2ca1b12a, 0x3bdaa569, 0x025799ac, 0x152c8def, ++ 0xe4a482ec, 0xf3df96af, 0xca52aa6a, 0xdd29be29, 0xb948d3e0, ++ 0xae33c7a3, 0x97befb66, 0x80c5ef25, 0x5f7c20f4, 0x480734b7, ++ 0x718a0872, 0x66f11c31, 0x029071f8, 0x15eb65bb, 0x2c66597e, ++ 0x3b1d4d3d, 0x4864c09d, 0x5f1fd4de, 0x6692e81b, 0x71e9fc58, ++ 0x15889191, 0x02f385d2, 0x3b7eb917, 0x2c05ad54, 0xf3bc6285, ++ 0xe4c776c6, 0xdd4a4a03, 0xca315e40, 0xae503389, 0xb92b27ca, ++ 0x80a61b0f, 0x97dd0f4c, 0xb8c70348, 0xafbc170b, 0x96312bce, ++ 0x814a3f8d, 0xe52b5244, 0xf2504607, 0xcbdd7ac2, 0xdca66e81, ++ 0x031fa150, 0x1464b513, 0x2de989d6, 0x3a929d95, 0x5ef3f05c, ++ 0x4988e41f, 0x7005d8da, 0x677ecc99, 0x14074139, 0x037c557a, ++ 0x3af169bf, 0x2d8a7dfc, 0x49eb1035, 0x5e900476, 0x671d38b3, ++ 0x70662cf0, 0xafdfe321, 0xb8a4f762, 0x8129cba7, 0x9652dfe4, ++ 0xf233b22d, 0xe548a66e, 0xdcc59aab, 0xcbbe8ee8, 0x3a3681eb, ++ 0x2d4d95a8, 0x14c0a96d, 0x03bbbd2e, 0x67dad0e7, 0x70a1c4a4, ++ 0x492cf861, 0x5e57ec22, 0x81ee23f3, 0x969537b0, 0xaf180b75, ++ 0xb8631f36, 0xdc0272ff, 0xcb7966bc, 0xf2f45a79, 0xe58f4e3a, ++ 0x96f6c39a, 0x818dd7d9, 0xb800eb1c, 0xaf7bff5f, 0xcb1a9296, ++ 0xdc6186d5, 0xe5ecba10, 0xf297ae53, 0x2d2e6182, 0x3a5575c1, ++ 0x03d84904, 0x14a35d47, 0x70c2308e, 0x67b924cd, 0x5e341808, ++ 0x494f0c4b}}; ++ ++static const z_word_t crc_braid_big_table[][256] = { ++ {0x00000000, 0x43147b17, 0x8628f62e, 0xc53c8d39, 0x0c51ec5d, ++ 0x4f45974a, 0x8a791a73, 0xc96d6164, 0x18a2d8bb, 0x5bb6a3ac, ++ 0x9e8a2e95, 0xdd9e5582, 0x14f334e6, 0x57e74ff1, 0x92dbc2c8, ++ 0xd1cfb9df, 0x7142c0ac, 0x3256bbbb, 0xf76a3682, 0xb47e4d95, ++ 0x7d132cf1, 0x3e0757e6, 0xfb3bdadf, 0xb82fa1c8, 0x69e01817, ++ 0x2af46300, 0xefc8ee39, 0xacdc952e, 0x65b1f44a, 0x26a58f5d, ++ 0xe3990264, 0xa08d7973, 0xa382f182, 0xe0968a95, 0x25aa07ac, ++ 0x66be7cbb, 0xafd31ddf, 0xecc766c8, 0x29fbebf1, 0x6aef90e6, ++ 0xbb202939, 0xf834522e, 0x3d08df17, 0x7e1ca400, 0xb771c564, ++ 0xf465be73, 0x3159334a, 0x724d485d, 0xd2c0312e, 0x91d44a39, ++ 0x54e8c700, 0x17fcbc17, 0xde91dd73, 0x9d85a664, 0x58b92b5d, ++ 0x1bad504a, 0xca62e995, 0x89769282, 0x4c4a1fbb, 0x0f5e64ac, ++ 0xc63305c8, 0x85277edf, 0x401bf3e6, 0x030f88f1, 0x070392de, ++ 0x4417e9c9, 0x812b64f0, 0xc23f1fe7, 0x0b527e83, 0x48460594, ++ 0x8d7a88ad, 0xce6ef3ba, 0x1fa14a65, 0x5cb53172, 0x9989bc4b, ++ 0xda9dc75c, 0x13f0a638, 0x50e4dd2f, 0x95d85016, 0xd6cc2b01, ++ 0x76415272, 0x35552965, 0xf069a45c, 0xb37ddf4b, 0x7a10be2f, ++ 0x3904c538, 0xfc384801, 0xbf2c3316, 0x6ee38ac9, 0x2df7f1de, ++ 0xe8cb7ce7, 0xabdf07f0, 0x62b26694, 0x21a61d83, 0xe49a90ba, ++ 0xa78eebad, 0xa481635c, 0xe795184b, 0x22a99572, 0x61bdee65, ++ 0xa8d08f01, 0xebc4f416, 0x2ef8792f, 0x6dec0238, 0xbc23bbe7, ++ 0xff37c0f0, 0x3a0b4dc9, 0x791f36de, 0xb07257ba, 0xf3662cad, ++ 0x365aa194, 0x754eda83, 0xd5c3a3f0, 0x96d7d8e7, 0x53eb55de, ++ 0x10ff2ec9, 0xd9924fad, 0x9a8634ba, 0x5fbab983, 0x1caec294, ++ 0xcd617b4b, 0x8e75005c, 0x4b498d65, 0x085df672, 0xc1309716, ++ 0x8224ec01, 0x47186138, 0x040c1a2f, 0x4f005566, 0x0c142e71, ++ 0xc928a348, 0x8a3cd85f, 0x4351b93b, 0x0045c22c, 0xc5794f15, ++ 0x866d3402, 0x57a28ddd, 0x14b6f6ca, 0xd18a7bf3, 0x929e00e4, ++ 0x5bf36180, 0x18e71a97, 0xdddb97ae, 0x9ecfecb9, 0x3e4295ca, ++ 0x7d56eedd, 0xb86a63e4, 0xfb7e18f3, 0x32137997, 0x71070280, ++ 0xb43b8fb9, 0xf72ff4ae, 0x26e04d71, 0x65f43666, 0xa0c8bb5f, ++ 0xe3dcc048, 0x2ab1a12c, 0x69a5da3b, 0xac995702, 0xef8d2c15, ++ 0xec82a4e4, 0xaf96dff3, 0x6aaa52ca, 0x29be29dd, 0xe0d348b9, ++ 0xa3c733ae, 0x66fbbe97, 0x25efc580, 0xf4207c5f, 0xb7340748, ++ 0x72088a71, 0x311cf166, 0xf8719002, 0xbb65eb15, 0x7e59662c, ++ 0x3d4d1d3b, 0x9dc06448, 0xded41f5f, 0x1be89266, 0x58fce971, ++ 0x91918815, 0xd285f302, 0x17b97e3b, 0x54ad052c, 0x8562bcf3, ++ 0xc676c7e4, 0x034a4add, 0x405e31ca, 0x893350ae, 0xca272bb9, ++ 0x0f1ba680, 0x4c0fdd97, 0x4803c7b8, 0x0b17bcaf, 0xce2b3196, ++ 0x8d3f4a81, 0x44522be5, 0x074650f2, 0xc27addcb, 0x816ea6dc, ++ 0x50a11f03, 0x13b56414, 0xd689e92d, 0x959d923a, 0x5cf0f35e, ++ 0x1fe48849, 0xdad80570, 0x99cc7e67, 0x39410714, 0x7a557c03, ++ 0xbf69f13a, 0xfc7d8a2d, 0x3510eb49, 0x7604905e, 0xb3381d67, ++ 0xf02c6670, 0x21e3dfaf, 0x62f7a4b8, 0xa7cb2981, 0xe4df5296, ++ 0x2db233f2, 0x6ea648e5, 0xab9ac5dc, 0xe88ebecb, 0xeb81363a, ++ 0xa8954d2d, 0x6da9c014, 0x2ebdbb03, 0xe7d0da67, 0xa4c4a170, ++ 0x61f82c49, 0x22ec575e, 0xf323ee81, 0xb0379596, 0x750b18af, ++ 0x361f63b8, 0xff7202dc, 0xbc6679cb, 0x795af4f2, 0x3a4e8fe5, ++ 0x9ac3f696, 0xd9d78d81, 0x1ceb00b8, 0x5fff7baf, 0x96921acb, ++ 0xd58661dc, 0x10baece5, 0x53ae97f2, 0x82612e2d, 0xc175553a, ++ 0x0449d803, 0x475da314, 0x8e30c270, 0xcd24b967, 0x0818345e, ++ 0x4b0c4f49}, ++ {0x00000000, 0x3e6bc2ef, 0x3dd0f504, 0x03bb37eb, 0x7aa0eb09, ++ 0x44cb29e6, 0x47701e0d, 0x791bdce2, 0xf440d713, 0xca2b15fc, ++ 0xc9902217, 0xf7fbe0f8, 0x8ee03c1a, 0xb08bfef5, 0xb330c91e, ++ 0x8d5b0bf1, 0xe881ae27, 0xd6ea6cc8, 0xd5515b23, 0xeb3a99cc, ++ 0x9221452e, 0xac4a87c1, 0xaff1b02a, 0x919a72c5, 0x1cc17934, ++ 0x22aabbdb, 0x21118c30, 0x1f7a4edf, 0x6661923d, 0x580a50d2, ++ 0x5bb16739, 0x65daa5d6, 0xd0035d4f, 0xee689fa0, 0xedd3a84b, ++ 0xd3b86aa4, 0xaaa3b646, 0x94c874a9, 0x97734342, 0xa91881ad, ++ 0x24438a5c, 0x1a2848b3, 0x19937f58, 0x27f8bdb7, 0x5ee36155, ++ 0x6088a3ba, 0x63339451, 0x5d5856be, 0x3882f368, 0x06e93187, ++ 0x0552066c, 0x3b39c483, 0x42221861, 0x7c49da8e, 0x7ff2ed65, ++ 0x41992f8a, 0xccc2247b, 0xf2a9e694, 0xf112d17f, 0xcf791390, ++ 0xb662cf72, 0x88090d9d, 0x8bb23a76, 0xb5d9f899, 0xa007ba9e, ++ 0x9e6c7871, 0x9dd74f9a, 0xa3bc8d75, 0xdaa75197, 0xe4cc9378, ++ 0xe777a493, 0xd91c667c, 0x54476d8d, 0x6a2caf62, 0x69979889, ++ 0x57fc5a66, 0x2ee78684, 0x108c446b, 0x13377380, 0x2d5cb16f, ++ 0x488614b9, 0x76edd656, 0x7556e1bd, 0x4b3d2352, 0x3226ffb0, ++ 0x0c4d3d5f, 0x0ff60ab4, 0x319dc85b, 0xbcc6c3aa, 0x82ad0145, ++ 0x811636ae, 0xbf7df441, 0xc66628a3, 0xf80dea4c, 0xfbb6dda7, ++ 0xc5dd1f48, 0x7004e7d1, 0x4e6f253e, 0x4dd412d5, 0x73bfd03a, ++ 0x0aa40cd8, 0x34cfce37, 0x3774f9dc, 0x091f3b33, 0x844430c2, ++ 0xba2ff22d, 0xb994c5c6, 0x87ff0729, 0xfee4dbcb, 0xc08f1924, ++ 0xc3342ecf, 0xfd5fec20, 0x988549f6, 0xa6ee8b19, 0xa555bcf2, ++ 0x9b3e7e1d, 0xe225a2ff, 0xdc4e6010, 0xdff557fb, 0xe19e9514, ++ 0x6cc59ee5, 0x52ae5c0a, 0x51156be1, 0x6f7ea90e, 0x166575ec, ++ 0x280eb703, 0x2bb580e8, 0x15de4207, 0x010905e6, 0x3f62c709, ++ 0x3cd9f0e2, 0x02b2320d, 0x7ba9eeef, 0x45c22c00, 0x46791beb, ++ 0x7812d904, 0xf549d2f5, 0xcb22101a, 0xc89927f1, 0xf6f2e51e, ++ 0x8fe939fc, 0xb182fb13, 0xb239ccf8, 0x8c520e17, 0xe988abc1, ++ 0xd7e3692e, 0xd4585ec5, 0xea339c2a, 0x932840c8, 0xad438227, ++ 0xaef8b5cc, 0x90937723, 0x1dc87cd2, 0x23a3be3d, 0x201889d6, ++ 0x1e734b39, 0x676897db, 0x59035534, 0x5ab862df, 0x64d3a030, ++ 0xd10a58a9, 0xef619a46, 0xecdaadad, 0xd2b16f42, 0xabaab3a0, ++ 0x95c1714f, 0x967a46a4, 0xa811844b, 0x254a8fba, 0x1b214d55, ++ 0x189a7abe, 0x26f1b851, 0x5fea64b3, 0x6181a65c, 0x623a91b7, ++ 0x5c515358, 0x398bf68e, 0x07e03461, 0x045b038a, 0x3a30c165, ++ 0x432b1d87, 0x7d40df68, 0x7efbe883, 0x40902a6c, 0xcdcb219d, ++ 0xf3a0e372, 0xf01bd499, 0xce701676, 0xb76bca94, 0x8900087b, ++ 0x8abb3f90, 0xb4d0fd7f, 0xa10ebf78, 0x9f657d97, 0x9cde4a7c, ++ 0xa2b58893, 0xdbae5471, 0xe5c5969e, 0xe67ea175, 0xd815639a, ++ 0x554e686b, 0x6b25aa84, 0x689e9d6f, 0x56f55f80, 0x2fee8362, ++ 0x1185418d, 0x123e7666, 0x2c55b489, 0x498f115f, 0x77e4d3b0, ++ 0x745fe45b, 0x4a3426b4, 0x332ffa56, 0x0d4438b9, 0x0eff0f52, ++ 0x3094cdbd, 0xbdcfc64c, 0x83a404a3, 0x801f3348, 0xbe74f1a7, ++ 0xc76f2d45, 0xf904efaa, 0xfabfd841, 0xc4d41aae, 0x710de237, ++ 0x4f6620d8, 0x4cdd1733, 0x72b6d5dc, 0x0bad093e, 0x35c6cbd1, ++ 0x367dfc3a, 0x08163ed5, 0x854d3524, 0xbb26f7cb, 0xb89dc020, ++ 0x86f602cf, 0xffedde2d, 0xc1861cc2, 0xc23d2b29, 0xfc56e9c6, ++ 0x998c4c10, 0xa7e78eff, 0xa45cb914, 0x9a377bfb, 0xe32ca719, ++ 0xdd4765f6, 0xdefc521d, 0xe09790f2, 0x6dcc9b03, 0x53a759ec, ++ 0x501c6e07, 0x6e77ace8, 0x176c700a, 0x2907b2e5, 0x2abc850e, ++ 0x14d747e1}, ++ {0x00000000, 0xc0df8ec1, 0xc1b96c58, 0x0166e299, 0x8273d9b0, ++ 0x42ac5771, 0x43cab5e8, 0x83153b29, 0x45e1c3ba, 0x853e4d7b, ++ 0x8458afe2, 0x44872123, 0xc7921a0a, 0x074d94cb, 0x062b7652, ++ 0xc6f4f893, 0xcbc4f6ae, 0x0b1b786f, 0x0a7d9af6, 0xcaa21437, ++ 0x49b72f1e, 0x8968a1df, 0x880e4346, 0x48d1cd87, 0x8e253514, ++ 0x4efabbd5, 0x4f9c594c, 0x8f43d78d, 0x0c56eca4, 0xcc896265, ++ 0xcdef80fc, 0x0d300e3d, 0xd78f9c86, 0x17501247, 0x1636f0de, ++ 0xd6e97e1f, 0x55fc4536, 0x9523cbf7, 0x9445296e, 0x549aa7af, ++ 0x926e5f3c, 0x52b1d1fd, 0x53d73364, 0x9308bda5, 0x101d868c, ++ 0xd0c2084d, 0xd1a4ead4, 0x117b6415, 0x1c4b6a28, 0xdc94e4e9, ++ 0xddf20670, 0x1d2d88b1, 0x9e38b398, 0x5ee73d59, 0x5f81dfc0, ++ 0x9f5e5101, 0x59aaa992, 0x99752753, 0x9813c5ca, 0x58cc4b0b, ++ 0xdbd97022, 0x1b06fee3, 0x1a601c7a, 0xdabf92bb, 0xef1948d6, ++ 0x2fc6c617, 0x2ea0248e, 0xee7faa4f, 0x6d6a9166, 0xadb51fa7, ++ 0xacd3fd3e, 0x6c0c73ff, 0xaaf88b6c, 0x6a2705ad, 0x6b41e734, ++ 0xab9e69f5, 0x288b52dc, 0xe854dc1d, 0xe9323e84, 0x29edb045, ++ 0x24ddbe78, 0xe40230b9, 0xe564d220, 0x25bb5ce1, 0xa6ae67c8, ++ 0x6671e909, 0x67170b90, 0xa7c88551, 0x613c7dc2, 0xa1e3f303, ++ 0xa085119a, 0x605a9f5b, 0xe34fa472, 0x23902ab3, 0x22f6c82a, ++ 0xe22946eb, 0x3896d450, 0xf8495a91, 0xf92fb808, 0x39f036c9, ++ 0xbae50de0, 0x7a3a8321, 0x7b5c61b8, 0xbb83ef79, 0x7d7717ea, ++ 0xbda8992b, 0xbcce7bb2, 0x7c11f573, 0xff04ce5a, 0x3fdb409b, ++ 0x3ebda202, 0xfe622cc3, 0xf35222fe, 0x338dac3f, 0x32eb4ea6, ++ 0xf234c067, 0x7121fb4e, 0xb1fe758f, 0xb0989716, 0x704719d7, ++ 0xb6b3e144, 0x766c6f85, 0x770a8d1c, 0xb7d503dd, 0x34c038f4, ++ 0xf41fb635, 0xf57954ac, 0x35a6da6d, 0x9f35e177, 0x5fea6fb6, ++ 0x5e8c8d2f, 0x9e5303ee, 0x1d4638c7, 0xdd99b606, 0xdcff549f, ++ 0x1c20da5e, 0xdad422cd, 0x1a0bac0c, 0x1b6d4e95, 0xdbb2c054, ++ 0x58a7fb7d, 0x987875bc, 0x991e9725, 0x59c119e4, 0x54f117d9, ++ 0x942e9918, 0x95487b81, 0x5597f540, 0xd682ce69, 0x165d40a8, ++ 0x173ba231, 0xd7e42cf0, 0x1110d463, 0xd1cf5aa2, 0xd0a9b83b, ++ 0x107636fa, 0x93630dd3, 0x53bc8312, 0x52da618b, 0x9205ef4a, ++ 0x48ba7df1, 0x8865f330, 0x890311a9, 0x49dc9f68, 0xcac9a441, ++ 0x0a162a80, 0x0b70c819, 0xcbaf46d8, 0x0d5bbe4b, 0xcd84308a, ++ 0xcce2d213, 0x0c3d5cd2, 0x8f2867fb, 0x4ff7e93a, 0x4e910ba3, ++ 0x8e4e8562, 0x837e8b5f, 0x43a1059e, 0x42c7e707, 0x821869c6, ++ 0x010d52ef, 0xc1d2dc2e, 0xc0b43eb7, 0x006bb076, 0xc69f48e5, ++ 0x0640c624, 0x072624bd, 0xc7f9aa7c, 0x44ec9155, 0x84331f94, ++ 0x8555fd0d, 0x458a73cc, 0x702ca9a1, 0xb0f32760, 0xb195c5f9, ++ 0x714a4b38, 0xf25f7011, 0x3280fed0, 0x33e61c49, 0xf3399288, ++ 0x35cd6a1b, 0xf512e4da, 0xf4740643, 0x34ab8882, 0xb7beb3ab, ++ 0x77613d6a, 0x7607dff3, 0xb6d85132, 0xbbe85f0f, 0x7b37d1ce, ++ 0x7a513357, 0xba8ebd96, 0x399b86bf, 0xf944087e, 0xf822eae7, ++ 0x38fd6426, 0xfe099cb5, 0x3ed61274, 0x3fb0f0ed, 0xff6f7e2c, ++ 0x7c7a4505, 0xbca5cbc4, 0xbdc3295d, 0x7d1ca79c, 0xa7a33527, ++ 0x677cbbe6, 0x661a597f, 0xa6c5d7be, 0x25d0ec97, 0xe50f6256, ++ 0xe46980cf, 0x24b60e0e, 0xe242f69d, 0x229d785c, 0x23fb9ac5, ++ 0xe3241404, 0x60312f2d, 0xa0eea1ec, 0xa1884375, 0x6157cdb4, ++ 0x6c67c389, 0xacb84d48, 0xaddeafd1, 0x6d012110, 0xee141a39, ++ 0x2ecb94f8, 0x2fad7661, 0xef72f8a0, 0x29860033, 0xe9598ef2, ++ 0xe83f6c6b, 0x28e0e2aa, 0xabf5d983, 0x6b2a5742, 0x6a4cb5db, ++ 0xaa933b1a}, ++ {0x00000000, 0x6f4ca59b, 0x9f9e3bec, 0xf0d29e77, 0x7f3b0603, ++ 0x1077a398, 0xe0a53def, 0x8fe99874, 0xfe760c06, 0x913aa99d, ++ 0x61e837ea, 0x0ea49271, 0x814d0a05, 0xee01af9e, 0x1ed331e9, ++ 0x719f9472, 0xfced180c, 0x93a1bd97, 0x637323e0, 0x0c3f867b, ++ 0x83d61e0f, 0xec9abb94, 0x1c4825e3, 0x73048078, 0x029b140a, ++ 0x6dd7b191, 0x9d052fe6, 0xf2498a7d, 0x7da01209, 0x12ecb792, ++ 0xe23e29e5, 0x8d728c7e, 0xf8db3118, 0x97979483, 0x67450af4, ++ 0x0809af6f, 0x87e0371b, 0xe8ac9280, 0x187e0cf7, 0x7732a96c, ++ 0x06ad3d1e, 0x69e19885, 0x993306f2, 0xf67fa369, 0x79963b1d, ++ 0x16da9e86, 0xe60800f1, 0x8944a56a, 0x04362914, 0x6b7a8c8f, ++ 0x9ba812f8, 0xf4e4b763, 0x7b0d2f17, 0x14418a8c, 0xe49314fb, ++ 0x8bdfb160, 0xfa402512, 0x950c8089, 0x65de1efe, 0x0a92bb65, ++ 0x857b2311, 0xea37868a, 0x1ae518fd, 0x75a9bd66, 0xf0b76330, ++ 0x9ffbc6ab, 0x6f2958dc, 0x0065fd47, 0x8f8c6533, 0xe0c0c0a8, ++ 0x10125edf, 0x7f5efb44, 0x0ec16f36, 0x618dcaad, 0x915f54da, ++ 0xfe13f141, 0x71fa6935, 0x1eb6ccae, 0xee6452d9, 0x8128f742, ++ 0x0c5a7b3c, 0x6316dea7, 0x93c440d0, 0xfc88e54b, 0x73617d3f, ++ 0x1c2dd8a4, 0xecff46d3, 0x83b3e348, 0xf22c773a, 0x9d60d2a1, ++ 0x6db24cd6, 0x02fee94d, 0x8d177139, 0xe25bd4a2, 0x12894ad5, ++ 0x7dc5ef4e, 0x086c5228, 0x6720f7b3, 0x97f269c4, 0xf8becc5f, ++ 0x7757542b, 0x181bf1b0, 0xe8c96fc7, 0x8785ca5c, 0xf61a5e2e, ++ 0x9956fbb5, 0x698465c2, 0x06c8c059, 0x8921582d, 0xe66dfdb6, ++ 0x16bf63c1, 0x79f3c65a, 0xf4814a24, 0x9bcdefbf, 0x6b1f71c8, ++ 0x0453d453, 0x8bba4c27, 0xe4f6e9bc, 0x142477cb, 0x7b68d250, ++ 0x0af74622, 0x65bbe3b9, 0x95697dce, 0xfa25d855, 0x75cc4021, ++ 0x1a80e5ba, 0xea527bcd, 0x851ede56, 0xe06fc760, 0x8f2362fb, ++ 0x7ff1fc8c, 0x10bd5917, 0x9f54c163, 0xf01864f8, 0x00cafa8f, ++ 0x6f865f14, 0x1e19cb66, 0x71556efd, 0x8187f08a, 0xeecb5511, ++ 0x6122cd65, 0x0e6e68fe, 0xfebcf689, 0x91f05312, 0x1c82df6c, ++ 0x73ce7af7, 0x831ce480, 0xec50411b, 0x63b9d96f, 0x0cf57cf4, ++ 0xfc27e283, 0x936b4718, 0xe2f4d36a, 0x8db876f1, 0x7d6ae886, ++ 0x12264d1d, 0x9dcfd569, 0xf28370f2, 0x0251ee85, 0x6d1d4b1e, ++ 0x18b4f678, 0x77f853e3, 0x872acd94, 0xe866680f, 0x678ff07b, ++ 0x08c355e0, 0xf811cb97, 0x975d6e0c, 0xe6c2fa7e, 0x898e5fe5, ++ 0x795cc192, 0x16106409, 0x99f9fc7d, 0xf6b559e6, 0x0667c791, ++ 0x692b620a, 0xe459ee74, 0x8b154bef, 0x7bc7d598, 0x148b7003, ++ 0x9b62e877, 0xf42e4dec, 0x04fcd39b, 0x6bb07600, 0x1a2fe272, ++ 0x756347e9, 0x85b1d99e, 0xeafd7c05, 0x6514e471, 0x0a5841ea, ++ 0xfa8adf9d, 0x95c67a06, 0x10d8a450, 0x7f9401cb, 0x8f469fbc, ++ 0xe00a3a27, 0x6fe3a253, 0x00af07c8, 0xf07d99bf, 0x9f313c24, ++ 0xeeaea856, 0x81e20dcd, 0x713093ba, 0x1e7c3621, 0x9195ae55, ++ 0xfed90bce, 0x0e0b95b9, 0x61473022, 0xec35bc5c, 0x837919c7, ++ 0x73ab87b0, 0x1ce7222b, 0x930eba5f, 0xfc421fc4, 0x0c9081b3, ++ 0x63dc2428, 0x1243b05a, 0x7d0f15c1, 0x8ddd8bb6, 0xe2912e2d, ++ 0x6d78b659, 0x023413c2, 0xf2e68db5, 0x9daa282e, 0xe8039548, ++ 0x874f30d3, 0x779daea4, 0x18d10b3f, 0x9738934b, 0xf87436d0, ++ 0x08a6a8a7, 0x67ea0d3c, 0x1675994e, 0x79393cd5, 0x89eba2a2, ++ 0xe6a70739, 0x694e9f4d, 0x06023ad6, 0xf6d0a4a1, 0x999c013a, ++ 0x14ee8d44, 0x7ba228df, 0x8b70b6a8, 0xe43c1333, 0x6bd58b47, ++ 0x04992edc, 0xf44bb0ab, 0x9b071530, 0xea988142, 0x85d424d9, ++ 0x7506baae, 0x1a4a1f35, 0x95a38741, 0xfaef22da, 0x0a3dbcad, ++ 0x65711936}}; ++ ++#endif /* W */ ++ ++#endif /* N == 3 */ ++#if N == 4 ++ ++#if W == 8 ++ ++static const uint32_t crc_braid_table[][256] = { ++ {0x00000000, 0xf1da05aa, 0x38c50d15, 0xc91f08bf, 0x718a1a2a, ++ 0x80501f80, 0x494f173f, 0xb8951295, 0xe3143454, 0x12ce31fe, ++ 0xdbd13941, 0x2a0b3ceb, 0x929e2e7e, 0x63442bd4, 0xaa5b236b, ++ 0x5b8126c1, 0x1d596ee9, 0xec836b43, 0x259c63fc, 0xd4466656, ++ 0x6cd374c3, 0x9d097169, 0x541679d6, 0xa5cc7c7c, 0xfe4d5abd, ++ 0x0f975f17, 0xc68857a8, 0x37525202, 0x8fc74097, 0x7e1d453d, ++ 0xb7024d82, 0x46d84828, 0x3ab2ddd2, 0xcb68d878, 0x0277d0c7, ++ 0xf3add56d, 0x4b38c7f8, 0xbae2c252, 0x73fdcaed, 0x8227cf47, ++ 0xd9a6e986, 0x287cec2c, 0xe163e493, 0x10b9e139, 0xa82cf3ac, ++ 0x59f6f606, 0x90e9feb9, 0x6133fb13, 0x27ebb33b, 0xd631b691, ++ 0x1f2ebe2e, 0xeef4bb84, 0x5661a911, 0xa7bbacbb, 0x6ea4a404, ++ 0x9f7ea1ae, 0xc4ff876f, 0x352582c5, 0xfc3a8a7a, 0x0de08fd0, ++ 0xb5759d45, 0x44af98ef, 0x8db09050, 0x7c6a95fa, 0x7565bba4, ++ 0x84bfbe0e, 0x4da0b6b1, 0xbc7ab31b, 0x04efa18e, 0xf535a424, ++ 0x3c2aac9b, 0xcdf0a931, 0x96718ff0, 0x67ab8a5a, 0xaeb482e5, ++ 0x5f6e874f, 0xe7fb95da, 0x16219070, 0xdf3e98cf, 0x2ee49d65, ++ 0x683cd54d, 0x99e6d0e7, 0x50f9d858, 0xa123ddf2, 0x19b6cf67, ++ 0xe86ccacd, 0x2173c272, 0xd0a9c7d8, 0x8b28e119, 0x7af2e4b3, ++ 0xb3edec0c, 0x4237e9a6, 0xfaa2fb33, 0x0b78fe99, 0xc267f626, ++ 0x33bdf38c, 0x4fd76676, 0xbe0d63dc, 0x77126b63, 0x86c86ec9, ++ 0x3e5d7c5c, 0xcf8779f6, 0x06987149, 0xf74274e3, 0xacc35222, ++ 0x5d195788, 0x94065f37, 0x65dc5a9d, 0xdd494808, 0x2c934da2, ++ 0xe58c451d, 0x145640b7, 0x528e089f, 0xa3540d35, 0x6a4b058a, ++ 0x9b910020, 0x230412b5, 0xd2de171f, 0x1bc11fa0, 0xea1b1a0a, ++ 0xb19a3ccb, 0x40403961, 0x895f31de, 0x78853474, 0xc01026e1, ++ 0x31ca234b, 0xf8d52bf4, 0x090f2e5e, 0xeacb7748, 0x1b1172e2, ++ 0xd20e7a5d, 0x23d47ff7, 0x9b416d62, 0x6a9b68c8, 0xa3846077, ++ 0x525e65dd, 0x09df431c, 0xf80546b6, 0x311a4e09, 0xc0c04ba3, ++ 0x78555936, 0x898f5c9c, 0x40905423, 0xb14a5189, 0xf79219a1, ++ 0x06481c0b, 0xcf5714b4, 0x3e8d111e, 0x8618038b, 0x77c20621, ++ 0xbedd0e9e, 0x4f070b34, 0x14862df5, 0xe55c285f, 0x2c4320e0, ++ 0xdd99254a, 0x650c37df, 0x94d63275, 0x5dc93aca, 0xac133f60, ++ 0xd079aa9a, 0x21a3af30, 0xe8bca78f, 0x1966a225, 0xa1f3b0b0, ++ 0x5029b51a, 0x9936bda5, 0x68ecb80f, 0x336d9ece, 0xc2b79b64, ++ 0x0ba893db, 0xfa729671, 0x42e784e4, 0xb33d814e, 0x7a2289f1, ++ 0x8bf88c5b, 0xcd20c473, 0x3cfac1d9, 0xf5e5c966, 0x043fcccc, ++ 0xbcaade59, 0x4d70dbf3, 0x846fd34c, 0x75b5d6e6, 0x2e34f027, ++ 0xdfeef58d, 0x16f1fd32, 0xe72bf898, 0x5fbeea0d, 0xae64efa7, ++ 0x677be718, 0x96a1e2b2, 0x9faeccec, 0x6e74c946, 0xa76bc1f9, ++ 0x56b1c453, 0xee24d6c6, 0x1ffed36c, 0xd6e1dbd3, 0x273bde79, ++ 0x7cbaf8b8, 0x8d60fd12, 0x447ff5ad, 0xb5a5f007, 0x0d30e292, ++ 0xfceae738, 0x35f5ef87, 0xc42fea2d, 0x82f7a205, 0x732da7af, ++ 0xba32af10, 0x4be8aaba, 0xf37db82f, 0x02a7bd85, 0xcbb8b53a, ++ 0x3a62b090, 0x61e39651, 0x903993fb, 0x59269b44, 0xa8fc9eee, ++ 0x10698c7b, 0xe1b389d1, 0x28ac816e, 0xd97684c4, 0xa51c113e, ++ 0x54c61494, 0x9dd91c2b, 0x6c031981, 0xd4960b14, 0x254c0ebe, ++ 0xec530601, 0x1d8903ab, 0x4608256a, 0xb7d220c0, 0x7ecd287f, ++ 0x8f172dd5, 0x37823f40, 0xc6583aea, 0x0f473255, 0xfe9d37ff, ++ 0xb8457fd7, 0x499f7a7d, 0x808072c2, 0x715a7768, 0xc9cf65fd, ++ 0x38156057, 0xf10a68e8, 0x00d06d42, 0x5b514b83, 0xaa8b4e29, ++ 0x63944696, 0x924e433c, 0x2adb51a9, 0xdb015403, 0x121e5cbc, ++ 0xe3c45916}, ++ {0x00000000, 0x0ee7e8d1, 0x1dcfd1a2, 0x13283973, 0x3b9fa344, ++ 0x35784b95, 0x265072e6, 0x28b79a37, 0x773f4688, 0x79d8ae59, ++ 0x6af0972a, 0x64177ffb, 0x4ca0e5cc, 0x42470d1d, 0x516f346e, ++ 0x5f88dcbf, 0xee7e8d10, 0xe09965c1, 0xf3b15cb2, 0xfd56b463, ++ 0xd5e12e54, 0xdb06c685, 0xc82efff6, 0xc6c91727, 0x9941cb98, ++ 0x97a62349, 0x848e1a3a, 0x8a69f2eb, 0xa2de68dc, 0xac39800d, ++ 0xbf11b97e, 0xb1f651af, 0x078c1c61, 0x096bf4b0, 0x1a43cdc3, ++ 0x14a42512, 0x3c13bf25, 0x32f457f4, 0x21dc6e87, 0x2f3b8656, ++ 0x70b35ae9, 0x7e54b238, 0x6d7c8b4b, 0x639b639a, 0x4b2cf9ad, ++ 0x45cb117c, 0x56e3280f, 0x5804c0de, 0xe9f29171, 0xe71579a0, ++ 0xf43d40d3, 0xfadaa802, 0xd26d3235, 0xdc8adae4, 0xcfa2e397, ++ 0xc1450b46, 0x9ecdd7f9, 0x902a3f28, 0x8302065b, 0x8de5ee8a, ++ 0xa55274bd, 0xabb59c6c, 0xb89da51f, 0xb67a4dce, 0x0f1838c2, ++ 0x01ffd013, 0x12d7e960, 0x1c3001b1, 0x34879b86, 0x3a607357, ++ 0x29484a24, 0x27afa2f5, 0x78277e4a, 0x76c0969b, 0x65e8afe8, ++ 0x6b0f4739, 0x43b8dd0e, 0x4d5f35df, 0x5e770cac, 0x5090e47d, ++ 0xe166b5d2, 0xef815d03, 0xfca96470, 0xf24e8ca1, 0xdaf91696, ++ 0xd41efe47, 0xc736c734, 0xc9d12fe5, 0x9659f35a, 0x98be1b8b, ++ 0x8b9622f8, 0x8571ca29, 0xadc6501e, 0xa321b8cf, 0xb00981bc, ++ 0xbeee696d, 0x089424a3, 0x0673cc72, 0x155bf501, 0x1bbc1dd0, ++ 0x330b87e7, 0x3dec6f36, 0x2ec45645, 0x2023be94, 0x7fab622b, ++ 0x714c8afa, 0x6264b389, 0x6c835b58, 0x4434c16f, 0x4ad329be, ++ 0x59fb10cd, 0x571cf81c, 0xe6eaa9b3, 0xe80d4162, 0xfb257811, ++ 0xf5c290c0, 0xdd750af7, 0xd392e226, 0xc0badb55, 0xce5d3384, ++ 0x91d5ef3b, 0x9f3207ea, 0x8c1a3e99, 0x82fdd648, 0xaa4a4c7f, ++ 0xa4ada4ae, 0xb7859ddd, 0xb962750c, 0x1e307184, 0x10d79955, ++ 0x03ffa026, 0x0d1848f7, 0x25afd2c0, 0x2b483a11, 0x38600362, ++ 0x3687ebb3, 0x690f370c, 0x67e8dfdd, 0x74c0e6ae, 0x7a270e7f, ++ 0x52909448, 0x5c777c99, 0x4f5f45ea, 0x41b8ad3b, 0xf04efc94, ++ 0xfea91445, 0xed812d36, 0xe366c5e7, 0xcbd15fd0, 0xc536b701, ++ 0xd61e8e72, 0xd8f966a3, 0x8771ba1c, 0x899652cd, 0x9abe6bbe, ++ 0x9459836f, 0xbcee1958, 0xb209f189, 0xa121c8fa, 0xafc6202b, ++ 0x19bc6de5, 0x175b8534, 0x0473bc47, 0x0a945496, 0x2223cea1, ++ 0x2cc42670, 0x3fec1f03, 0x310bf7d2, 0x6e832b6d, 0x6064c3bc, ++ 0x734cfacf, 0x7dab121e, 0x551c8829, 0x5bfb60f8, 0x48d3598b, ++ 0x4634b15a, 0xf7c2e0f5, 0xf9250824, 0xea0d3157, 0xe4ead986, ++ 0xcc5d43b1, 0xc2baab60, 0xd1929213, 0xdf757ac2, 0x80fda67d, ++ 0x8e1a4eac, 0x9d3277df, 0x93d59f0e, 0xbb620539, 0xb585ede8, ++ 0xa6add49b, 0xa84a3c4a, 0x11284946, 0x1fcfa197, 0x0ce798e4, ++ 0x02007035, 0x2ab7ea02, 0x245002d3, 0x37783ba0, 0x399fd371, ++ 0x66170fce, 0x68f0e71f, 0x7bd8de6c, 0x753f36bd, 0x5d88ac8a, ++ 0x536f445b, 0x40477d28, 0x4ea095f9, 0xff56c456, 0xf1b12c87, ++ 0xe29915f4, 0xec7efd25, 0xc4c96712, 0xca2e8fc3, 0xd906b6b0, ++ 0xd7e15e61, 0x886982de, 0x868e6a0f, 0x95a6537c, 0x9b41bbad, ++ 0xb3f6219a, 0xbd11c94b, 0xae39f038, 0xa0de18e9, 0x16a45527, ++ 0x1843bdf6, 0x0b6b8485, 0x058c6c54, 0x2d3bf663, 0x23dc1eb2, ++ 0x30f427c1, 0x3e13cf10, 0x619b13af, 0x6f7cfb7e, 0x7c54c20d, ++ 0x72b32adc, 0x5a04b0eb, 0x54e3583a, 0x47cb6149, 0x492c8998, ++ 0xf8dad837, 0xf63d30e6, 0xe5150995, 0xebf2e144, 0xc3457b73, ++ 0xcda293a2, 0xde8aaad1, 0xd06d4200, 0x8fe59ebf, 0x8102766e, ++ 0x922a4f1d, 0x9ccda7cc, 0xb47a3dfb, 0xba9dd52a, 0xa9b5ec59, ++ 0xa7520488}, ++ {0x00000000, 0x3c60e308, 0x78c1c610, 0x44a12518, 0xf1838c20, ++ 0xcde36f28, 0x89424a30, 0xb522a938, 0x38761e01, 0x0416fd09, ++ 0x40b7d811, 0x7cd73b19, 0xc9f59221, 0xf5957129, 0xb1345431, ++ 0x8d54b739, 0x70ec3c02, 0x4c8cdf0a, 0x082dfa12, 0x344d191a, ++ 0x816fb022, 0xbd0f532a, 0xf9ae7632, 0xc5ce953a, 0x489a2203, ++ 0x74fac10b, 0x305be413, 0x0c3b071b, 0xb919ae23, 0x85794d2b, ++ 0xc1d86833, 0xfdb88b3b, 0xe1d87804, 0xddb89b0c, 0x9919be14, ++ 0xa5795d1c, 0x105bf424, 0x2c3b172c, 0x689a3234, 0x54fad13c, ++ 0xd9ae6605, 0xe5ce850d, 0xa16fa015, 0x9d0f431d, 0x282dea25, ++ 0x144d092d, 0x50ec2c35, 0x6c8ccf3d, 0x91344406, 0xad54a70e, ++ 0xe9f58216, 0xd595611e, 0x60b7c826, 0x5cd72b2e, 0x18760e36, ++ 0x2416ed3e, 0xa9425a07, 0x9522b90f, 0xd1839c17, 0xede37f1f, ++ 0x58c1d627, 0x64a1352f, 0x20001037, 0x1c60f33f, 0x18c1f649, ++ 0x24a11541, 0x60003059, 0x5c60d351, 0xe9427a69, 0xd5229961, ++ 0x9183bc79, 0xade35f71, 0x20b7e848, 0x1cd70b40, 0x58762e58, ++ 0x6416cd50, 0xd1346468, 0xed548760, 0xa9f5a278, 0x95954170, ++ 0x682dca4b, 0x544d2943, 0x10ec0c5b, 0x2c8cef53, 0x99ae466b, ++ 0xa5cea563, 0xe16f807b, 0xdd0f6373, 0x505bd44a, 0x6c3b3742, ++ 0x289a125a, 0x14faf152, 0xa1d8586a, 0x9db8bb62, 0xd9199e7a, ++ 0xe5797d72, 0xf9198e4d, 0xc5796d45, 0x81d8485d, 0xbdb8ab55, ++ 0x089a026d, 0x34fae165, 0x705bc47d, 0x4c3b2775, 0xc16f904c, ++ 0xfd0f7344, 0xb9ae565c, 0x85ceb554, 0x30ec1c6c, 0x0c8cff64, ++ 0x482dda7c, 0x744d3974, 0x89f5b24f, 0xb5955147, 0xf134745f, ++ 0xcd549757, 0x78763e6f, 0x4416dd67, 0x00b7f87f, 0x3cd71b77, ++ 0xb183ac4e, 0x8de34f46, 0xc9426a5e, 0xf5228956, 0x4000206e, ++ 0x7c60c366, 0x38c1e67e, 0x04a10576, 0x3183ec92, 0x0de30f9a, ++ 0x49422a82, 0x7522c98a, 0xc00060b2, 0xfc6083ba, 0xb8c1a6a2, ++ 0x84a145aa, 0x09f5f293, 0x3595119b, 0x71343483, 0x4d54d78b, ++ 0xf8767eb3, 0xc4169dbb, 0x80b7b8a3, 0xbcd75bab, 0x416fd090, ++ 0x7d0f3398, 0x39ae1680, 0x05cef588, 0xb0ec5cb0, 0x8c8cbfb8, ++ 0xc82d9aa0, 0xf44d79a8, 0x7919ce91, 0x45792d99, 0x01d80881, ++ 0x3db8eb89, 0x889a42b1, 0xb4faa1b9, 0xf05b84a1, 0xcc3b67a9, ++ 0xd05b9496, 0xec3b779e, 0xa89a5286, 0x94fab18e, 0x21d818b6, ++ 0x1db8fbbe, 0x5919dea6, 0x65793dae, 0xe82d8a97, 0xd44d699f, ++ 0x90ec4c87, 0xac8caf8f, 0x19ae06b7, 0x25cee5bf, 0x616fc0a7, ++ 0x5d0f23af, 0xa0b7a894, 0x9cd74b9c, 0xd8766e84, 0xe4168d8c, ++ 0x513424b4, 0x6d54c7bc, 0x29f5e2a4, 0x159501ac, 0x98c1b695, ++ 0xa4a1559d, 0xe0007085, 0xdc60938d, 0x69423ab5, 0x5522d9bd, ++ 0x1183fca5, 0x2de31fad, 0x29421adb, 0x1522f9d3, 0x5183dccb, ++ 0x6de33fc3, 0xd8c196fb, 0xe4a175f3, 0xa00050eb, 0x9c60b3e3, ++ 0x113404da, 0x2d54e7d2, 0x69f5c2ca, 0x559521c2, 0xe0b788fa, ++ 0xdcd76bf2, 0x98764eea, 0xa416ade2, 0x59ae26d9, 0x65cec5d1, ++ 0x216fe0c9, 0x1d0f03c1, 0xa82daaf9, 0x944d49f1, 0xd0ec6ce9, ++ 0xec8c8fe1, 0x61d838d8, 0x5db8dbd0, 0x1919fec8, 0x25791dc0, ++ 0x905bb4f8, 0xac3b57f0, 0xe89a72e8, 0xd4fa91e0, 0xc89a62df, ++ 0xf4fa81d7, 0xb05ba4cf, 0x8c3b47c7, 0x3919eeff, 0x05790df7, ++ 0x41d828ef, 0x7db8cbe7, 0xf0ec7cde, 0xcc8c9fd6, 0x882dbace, ++ 0xb44d59c6, 0x016ff0fe, 0x3d0f13f6, 0x79ae36ee, 0x45ced5e6, ++ 0xb8765edd, 0x8416bdd5, 0xc0b798cd, 0xfcd77bc5, 0x49f5d2fd, ++ 0x759531f5, 0x313414ed, 0x0d54f7e5, 0x800040dc, 0xbc60a3d4, ++ 0xf8c186cc, 0xc4a165c4, 0x7183ccfc, 0x4de32ff4, 0x09420aec, ++ 0x3522e9e4}, ++ {0x00000000, 0x6307d924, 0xc60fb248, 0xa5086b6c, 0x576e62d1, ++ 0x3469bbf5, 0x9161d099, 0xf26609bd, 0xaedcc5a2, 0xcddb1c86, ++ 0x68d377ea, 0x0bd4aece, 0xf9b2a773, 0x9ab57e57, 0x3fbd153b, ++ 0x5cbacc1f, 0x86c88d05, 0xe5cf5421, 0x40c73f4d, 0x23c0e669, ++ 0xd1a6efd4, 0xb2a136f0, 0x17a95d9c, 0x74ae84b8, 0x281448a7, ++ 0x4b139183, 0xee1bfaef, 0x8d1c23cb, 0x7f7a2a76, 0x1c7df352, ++ 0xb975983e, 0xda72411a, 0xd6e01c4b, 0xb5e7c56f, 0x10efae03, ++ 0x73e87727, 0x818e7e9a, 0xe289a7be, 0x4781ccd2, 0x248615f6, ++ 0x783cd9e9, 0x1b3b00cd, 0xbe336ba1, 0xdd34b285, 0x2f52bb38, ++ 0x4c55621c, 0xe95d0970, 0x8a5ad054, 0x5028914e, 0x332f486a, ++ 0x96272306, 0xf520fa22, 0x0746f39f, 0x64412abb, 0xc14941d7, ++ 0xa24e98f3, 0xfef454ec, 0x9df38dc8, 0x38fbe6a4, 0x5bfc3f80, ++ 0xa99a363d, 0xca9def19, 0x6f958475, 0x0c925d51, 0x76b13ed7, ++ 0x15b6e7f3, 0xb0be8c9f, 0xd3b955bb, 0x21df5c06, 0x42d88522, ++ 0xe7d0ee4e, 0x84d7376a, 0xd86dfb75, 0xbb6a2251, 0x1e62493d, ++ 0x7d659019, 0x8f0399a4, 0xec044080, 0x490c2bec, 0x2a0bf2c8, ++ 0xf079b3d2, 0x937e6af6, 0x3676019a, 0x5571d8be, 0xa717d103, ++ 0xc4100827, 0x6118634b, 0x021fba6f, 0x5ea57670, 0x3da2af54, ++ 0x98aac438, 0xfbad1d1c, 0x09cb14a1, 0x6acccd85, 0xcfc4a6e9, ++ 0xacc37fcd, 0xa051229c, 0xc356fbb8, 0x665e90d4, 0x055949f0, ++ 0xf73f404d, 0x94389969, 0x3130f205, 0x52372b21, 0x0e8de73e, ++ 0x6d8a3e1a, 0xc8825576, 0xab858c52, 0x59e385ef, 0x3ae45ccb, ++ 0x9fec37a7, 0xfcebee83, 0x2699af99, 0x459e76bd, 0xe0961dd1, ++ 0x8391c4f5, 0x71f7cd48, 0x12f0146c, 0xb7f87f00, 0xd4ffa624, ++ 0x88456a3b, 0xeb42b31f, 0x4e4ad873, 0x2d4d0157, 0xdf2b08ea, ++ 0xbc2cd1ce, 0x1924baa2, 0x7a236386, 0xed627dae, 0x8e65a48a, ++ 0x2b6dcfe6, 0x486a16c2, 0xba0c1f7f, 0xd90bc65b, 0x7c03ad37, ++ 0x1f047413, 0x43beb80c, 0x20b96128, 0x85b10a44, 0xe6b6d360, ++ 0x14d0dadd, 0x77d703f9, 0xd2df6895, 0xb1d8b1b1, 0x6baaf0ab, ++ 0x08ad298f, 0xada542e3, 0xcea29bc7, 0x3cc4927a, 0x5fc34b5e, ++ 0xfacb2032, 0x99ccf916, 0xc5763509, 0xa671ec2d, 0x03798741, ++ 0x607e5e65, 0x921857d8, 0xf11f8efc, 0x5417e590, 0x37103cb4, ++ 0x3b8261e5, 0x5885b8c1, 0xfd8dd3ad, 0x9e8a0a89, 0x6cec0334, ++ 0x0febda10, 0xaae3b17c, 0xc9e46858, 0x955ea447, 0xf6597d63, ++ 0x5351160f, 0x3056cf2b, 0xc230c696, 0xa1371fb2, 0x043f74de, ++ 0x6738adfa, 0xbd4aece0, 0xde4d35c4, 0x7b455ea8, 0x1842878c, ++ 0xea248e31, 0x89235715, 0x2c2b3c79, 0x4f2ce55d, 0x13962942, ++ 0x7091f066, 0xd5999b0a, 0xb69e422e, 0x44f84b93, 0x27ff92b7, ++ 0x82f7f9db, 0xe1f020ff, 0x9bd34379, 0xf8d49a5d, 0x5ddcf131, ++ 0x3edb2815, 0xccbd21a8, 0xafbaf88c, 0x0ab293e0, 0x69b54ac4, ++ 0x350f86db, 0x56085fff, 0xf3003493, 0x9007edb7, 0x6261e40a, ++ 0x01663d2e, 0xa46e5642, 0xc7698f66, 0x1d1bce7c, 0x7e1c1758, ++ 0xdb147c34, 0xb813a510, 0x4a75acad, 0x29727589, 0x8c7a1ee5, ++ 0xef7dc7c1, 0xb3c70bde, 0xd0c0d2fa, 0x75c8b996, 0x16cf60b2, ++ 0xe4a9690f, 0x87aeb02b, 0x22a6db47, 0x41a10263, 0x4d335f32, ++ 0x2e348616, 0x8b3ced7a, 0xe83b345e, 0x1a5d3de3, 0x795ae4c7, ++ 0xdc528fab, 0xbf55568f, 0xe3ef9a90, 0x80e843b4, 0x25e028d8, ++ 0x46e7f1fc, 0xb481f841, 0xd7862165, 0x728e4a09, 0x1189932d, ++ 0xcbfbd237, 0xa8fc0b13, 0x0df4607f, 0x6ef3b95b, 0x9c95b0e6, ++ 0xff9269c2, 0x5a9a02ae, 0x399ddb8a, 0x65271795, 0x0620ceb1, ++ 0xa328a5dd, 0xc02f7cf9, 0x32497544, 0x514eac60, 0xf446c70c, ++ 0x97411e28}, ++ {0x00000000, 0x01b5fd1d, 0x036bfa3a, 0x02de0727, 0x06d7f474, ++ 0x07620969, 0x05bc0e4e, 0x0409f353, 0x0dafe8e8, 0x0c1a15f5, ++ 0x0ec412d2, 0x0f71efcf, 0x0b781c9c, 0x0acde181, 0x0813e6a6, ++ 0x09a61bbb, 0x1b5fd1d0, 0x1aea2ccd, 0x18342bea, 0x1981d6f7, ++ 0x1d8825a4, 0x1c3dd8b9, 0x1ee3df9e, 0x1f562283, 0x16f03938, ++ 0x1745c425, 0x159bc302, 0x142e3e1f, 0x1027cd4c, 0x11923051, ++ 0x134c3776, 0x12f9ca6b, 0x36bfa3a0, 0x370a5ebd, 0x35d4599a, ++ 0x3461a487, 0x306857d4, 0x31ddaac9, 0x3303adee, 0x32b650f3, ++ 0x3b104b48, 0x3aa5b655, 0x387bb172, 0x39ce4c6f, 0x3dc7bf3c, ++ 0x3c724221, 0x3eac4506, 0x3f19b81b, 0x2de07270, 0x2c558f6d, ++ 0x2e8b884a, 0x2f3e7557, 0x2b378604, 0x2a827b19, 0x285c7c3e, ++ 0x29e98123, 0x204f9a98, 0x21fa6785, 0x232460a2, 0x22919dbf, ++ 0x26986eec, 0x272d93f1, 0x25f394d6, 0x244669cb, 0x6d7f4740, ++ 0x6ccaba5d, 0x6e14bd7a, 0x6fa14067, 0x6ba8b334, 0x6a1d4e29, ++ 0x68c3490e, 0x6976b413, 0x60d0afa8, 0x616552b5, 0x63bb5592, ++ 0x620ea88f, 0x66075bdc, 0x67b2a6c1, 0x656ca1e6, 0x64d95cfb, ++ 0x76209690, 0x77956b8d, 0x754b6caa, 0x74fe91b7, 0x70f762e4, ++ 0x71429ff9, 0x739c98de, 0x722965c3, 0x7b8f7e78, 0x7a3a8365, ++ 0x78e48442, 0x7951795f, 0x7d588a0c, 0x7ced7711, 0x7e337036, ++ 0x7f868d2b, 0x5bc0e4e0, 0x5a7519fd, 0x58ab1eda, 0x591ee3c7, ++ 0x5d171094, 0x5ca2ed89, 0x5e7ceaae, 0x5fc917b3, 0x566f0c08, ++ 0x57daf115, 0x5504f632, 0x54b10b2f, 0x50b8f87c, 0x510d0561, ++ 0x53d30246, 0x5266ff5b, 0x409f3530, 0x412ac82d, 0x43f4cf0a, ++ 0x42413217, 0x4648c144, 0x47fd3c59, 0x45233b7e, 0x4496c663, ++ 0x4d30ddd8, 0x4c8520c5, 0x4e5b27e2, 0x4feedaff, 0x4be729ac, ++ 0x4a52d4b1, 0x488cd396, 0x49392e8b, 0xdafe8e80, 0xdb4b739d, ++ 0xd99574ba, 0xd82089a7, 0xdc297af4, 0xdd9c87e9, 0xdf4280ce, ++ 0xdef77dd3, 0xd7516668, 0xd6e49b75, 0xd43a9c52, 0xd58f614f, ++ 0xd186921c, 0xd0336f01, 0xd2ed6826, 0xd358953b, 0xc1a15f50, ++ 0xc014a24d, 0xc2caa56a, 0xc37f5877, 0xc776ab24, 0xc6c35639, ++ 0xc41d511e, 0xc5a8ac03, 0xcc0eb7b8, 0xcdbb4aa5, 0xcf654d82, ++ 0xced0b09f, 0xcad943cc, 0xcb6cbed1, 0xc9b2b9f6, 0xc80744eb, ++ 0xec412d20, 0xedf4d03d, 0xef2ad71a, 0xee9f2a07, 0xea96d954, ++ 0xeb232449, 0xe9fd236e, 0xe848de73, 0xe1eec5c8, 0xe05b38d5, ++ 0xe2853ff2, 0xe330c2ef, 0xe73931bc, 0xe68ccca1, 0xe452cb86, ++ 0xe5e7369b, 0xf71efcf0, 0xf6ab01ed, 0xf47506ca, 0xf5c0fbd7, ++ 0xf1c90884, 0xf07cf599, 0xf2a2f2be, 0xf3170fa3, 0xfab11418, ++ 0xfb04e905, 0xf9daee22, 0xf86f133f, 0xfc66e06c, 0xfdd31d71, ++ 0xff0d1a56, 0xfeb8e74b, 0xb781c9c0, 0xb63434dd, 0xb4ea33fa, ++ 0xb55fcee7, 0xb1563db4, 0xb0e3c0a9, 0xb23dc78e, 0xb3883a93, ++ 0xba2e2128, 0xbb9bdc35, 0xb945db12, 0xb8f0260f, 0xbcf9d55c, ++ 0xbd4c2841, 0xbf922f66, 0xbe27d27b, 0xacde1810, 0xad6be50d, ++ 0xafb5e22a, 0xae001f37, 0xaa09ec64, 0xabbc1179, 0xa962165e, ++ 0xa8d7eb43, 0xa171f0f8, 0xa0c40de5, 0xa21a0ac2, 0xa3aff7df, ++ 0xa7a6048c, 0xa613f991, 0xa4cdfeb6, 0xa57803ab, 0x813e6a60, ++ 0x808b977d, 0x8255905a, 0x83e06d47, 0x87e99e14, 0x865c6309, ++ 0x8482642e, 0x85379933, 0x8c918288, 0x8d247f95, 0x8ffa78b2, ++ 0x8e4f85af, 0x8a4676fc, 0x8bf38be1, 0x892d8cc6, 0x889871db, ++ 0x9a61bbb0, 0x9bd446ad, 0x990a418a, 0x98bfbc97, 0x9cb64fc4, ++ 0x9d03b2d9, 0x9fddb5fe, 0x9e6848e3, 0x97ce5358, 0x967bae45, ++ 0x94a5a962, 0x9510547f, 0x9119a72c, 0x90ac5a31, 0x92725d16, ++ 0x93c7a00b}, ++ {0x00000000, 0x6e8c1b41, 0xdd183682, 0xb3942dc3, 0x61416b45, ++ 0x0fcd7004, 0xbc595dc7, 0xd2d54686, 0xc282d68a, 0xac0ecdcb, ++ 0x1f9ae008, 0x7116fb49, 0xa3c3bdcf, 0xcd4fa68e, 0x7edb8b4d, ++ 0x1057900c, 0x5e74ab55, 0x30f8b014, 0x836c9dd7, 0xede08696, ++ 0x3f35c010, 0x51b9db51, 0xe22df692, 0x8ca1edd3, 0x9cf67ddf, ++ 0xf27a669e, 0x41ee4b5d, 0x2f62501c, 0xfdb7169a, 0x933b0ddb, ++ 0x20af2018, 0x4e233b59, 0xbce956aa, 0xd2654deb, 0x61f16028, ++ 0x0f7d7b69, 0xdda83def, 0xb32426ae, 0x00b00b6d, 0x6e3c102c, ++ 0x7e6b8020, 0x10e79b61, 0xa373b6a2, 0xcdffade3, 0x1f2aeb65, ++ 0x71a6f024, 0xc232dde7, 0xacbec6a6, 0xe29dfdff, 0x8c11e6be, ++ 0x3f85cb7d, 0x5109d03c, 0x83dc96ba, 0xed508dfb, 0x5ec4a038, ++ 0x3048bb79, 0x201f2b75, 0x4e933034, 0xfd071df7, 0x938b06b6, ++ 0x415e4030, 0x2fd25b71, 0x9c4676b2, 0xf2ca6df3, 0xa2a3ab15, ++ 0xcc2fb054, 0x7fbb9d97, 0x113786d6, 0xc3e2c050, 0xad6edb11, ++ 0x1efaf6d2, 0x7076ed93, 0x60217d9f, 0x0ead66de, 0xbd394b1d, ++ 0xd3b5505c, 0x016016da, 0x6fec0d9b, 0xdc782058, 0xb2f43b19, ++ 0xfcd70040, 0x925b1b01, 0x21cf36c2, 0x4f432d83, 0x9d966b05, ++ 0xf31a7044, 0x408e5d87, 0x2e0246c6, 0x3e55d6ca, 0x50d9cd8b, ++ 0xe34de048, 0x8dc1fb09, 0x5f14bd8f, 0x3198a6ce, 0x820c8b0d, ++ 0xec80904c, 0x1e4afdbf, 0x70c6e6fe, 0xc352cb3d, 0xadded07c, ++ 0x7f0b96fa, 0x11878dbb, 0xa213a078, 0xcc9fbb39, 0xdcc82b35, ++ 0xb2443074, 0x01d01db7, 0x6f5c06f6, 0xbd894070, 0xd3055b31, ++ 0x609176f2, 0x0e1d6db3, 0x403e56ea, 0x2eb24dab, 0x9d266068, ++ 0xf3aa7b29, 0x217f3daf, 0x4ff326ee, 0xfc670b2d, 0x92eb106c, ++ 0x82bc8060, 0xec309b21, 0x5fa4b6e2, 0x3128ada3, 0xe3fdeb25, ++ 0x8d71f064, 0x3ee5dda7, 0x5069c6e6, 0x9e36506b, 0xf0ba4b2a, ++ 0x432e66e9, 0x2da27da8, 0xff773b2e, 0x91fb206f, 0x226f0dac, ++ 0x4ce316ed, 0x5cb486e1, 0x32389da0, 0x81acb063, 0xef20ab22, ++ 0x3df5eda4, 0x5379f6e5, 0xe0eddb26, 0x8e61c067, 0xc042fb3e, ++ 0xaecee07f, 0x1d5acdbc, 0x73d6d6fd, 0xa103907b, 0xcf8f8b3a, ++ 0x7c1ba6f9, 0x1297bdb8, 0x02c02db4, 0x6c4c36f5, 0xdfd81b36, ++ 0xb1540077, 0x638146f1, 0x0d0d5db0, 0xbe997073, 0xd0156b32, ++ 0x22df06c1, 0x4c531d80, 0xffc73043, 0x914b2b02, 0x439e6d84, ++ 0x2d1276c5, 0x9e865b06, 0xf00a4047, 0xe05dd04b, 0x8ed1cb0a, ++ 0x3d45e6c9, 0x53c9fd88, 0x811cbb0e, 0xef90a04f, 0x5c048d8c, ++ 0x328896cd, 0x7cabad94, 0x1227b6d5, 0xa1b39b16, 0xcf3f8057, ++ 0x1deac6d1, 0x7366dd90, 0xc0f2f053, 0xae7eeb12, 0xbe297b1e, ++ 0xd0a5605f, 0x63314d9c, 0x0dbd56dd, 0xdf68105b, 0xb1e40b1a, ++ 0x027026d9, 0x6cfc3d98, 0x3c95fb7e, 0x5219e03f, 0xe18dcdfc, ++ 0x8f01d6bd, 0x5dd4903b, 0x33588b7a, 0x80cca6b9, 0xee40bdf8, ++ 0xfe172df4, 0x909b36b5, 0x230f1b76, 0x4d830037, 0x9f5646b1, ++ 0xf1da5df0, 0x424e7033, 0x2cc26b72, 0x62e1502b, 0x0c6d4b6a, ++ 0xbff966a9, 0xd1757de8, 0x03a03b6e, 0x6d2c202f, 0xdeb80dec, ++ 0xb03416ad, 0xa06386a1, 0xceef9de0, 0x7d7bb023, 0x13f7ab62, ++ 0xc122ede4, 0xafaef6a5, 0x1c3adb66, 0x72b6c027, 0x807cadd4, ++ 0xeef0b695, 0x5d649b56, 0x33e88017, 0xe13dc691, 0x8fb1ddd0, ++ 0x3c25f013, 0x52a9eb52, 0x42fe7b5e, 0x2c72601f, 0x9fe64ddc, ++ 0xf16a569d, 0x23bf101b, 0x4d330b5a, 0xfea72699, 0x902b3dd8, ++ 0xde080681, 0xb0841dc0, 0x03103003, 0x6d9c2b42, 0xbf496dc4, ++ 0xd1c57685, 0x62515b46, 0x0cdd4007, 0x1c8ad00b, 0x7206cb4a, ++ 0xc192e689, 0xaf1efdc8, 0x7dcbbb4e, 0x1347a00f, 0xa0d38dcc, ++ 0xce5f968d}, ++ {0x00000000, 0xe71da697, 0x154a4b6f, 0xf257edf8, 0x2a9496de, ++ 0xcd893049, 0x3fdeddb1, 0xd8c37b26, 0x55292dbc, 0xb2348b2b, ++ 0x406366d3, 0xa77ec044, 0x7fbdbb62, 0x98a01df5, 0x6af7f00d, ++ 0x8dea569a, 0xaa525b78, 0x4d4ffdef, 0xbf181017, 0x5805b680, ++ 0x80c6cda6, 0x67db6b31, 0x958c86c9, 0x7291205e, 0xff7b76c4, ++ 0x1866d053, 0xea313dab, 0x0d2c9b3c, 0xd5efe01a, 0x32f2468d, ++ 0xc0a5ab75, 0x27b80de2, 0x8fd5b0b1, 0x68c81626, 0x9a9ffbde, ++ 0x7d825d49, 0xa541266f, 0x425c80f8, 0xb00b6d00, 0x5716cb97, ++ 0xdafc9d0d, 0x3de13b9a, 0xcfb6d662, 0x28ab70f5, 0xf0680bd3, ++ 0x1775ad44, 0xe52240bc, 0x023fe62b, 0x2587ebc9, 0xc29a4d5e, ++ 0x30cda0a6, 0xd7d00631, 0x0f137d17, 0xe80edb80, 0x1a593678, ++ 0xfd4490ef, 0x70aec675, 0x97b360e2, 0x65e48d1a, 0x82f92b8d, ++ 0x5a3a50ab, 0xbd27f63c, 0x4f701bc4, 0xa86dbd53, 0xc4da6723, ++ 0x23c7c1b4, 0xd1902c4c, 0x368d8adb, 0xee4ef1fd, 0x0953576a, ++ 0xfb04ba92, 0x1c191c05, 0x91f34a9f, 0x76eeec08, 0x84b901f0, ++ 0x63a4a767, 0xbb67dc41, 0x5c7a7ad6, 0xae2d972e, 0x493031b9, ++ 0x6e883c5b, 0x89959acc, 0x7bc27734, 0x9cdfd1a3, 0x441caa85, ++ 0xa3010c12, 0x5156e1ea, 0xb64b477d, 0x3ba111e7, 0xdcbcb770, ++ 0x2eeb5a88, 0xc9f6fc1f, 0x11358739, 0xf62821ae, 0x047fcc56, ++ 0xe3626ac1, 0x4b0fd792, 0xac127105, 0x5e459cfd, 0xb9583a6a, ++ 0x619b414c, 0x8686e7db, 0x74d10a23, 0x93ccacb4, 0x1e26fa2e, ++ 0xf93b5cb9, 0x0b6cb141, 0xec7117d6, 0x34b26cf0, 0xd3afca67, ++ 0x21f8279f, 0xc6e58108, 0xe15d8cea, 0x06402a7d, 0xf417c785, ++ 0x130a6112, 0xcbc91a34, 0x2cd4bca3, 0xde83515b, 0x399ef7cc, ++ 0xb474a156, 0x536907c1, 0xa13eea39, 0x46234cae, 0x9ee03788, ++ 0x79fd911f, 0x8baa7ce7, 0x6cb7da70, 0x52c5c807, 0xb5d86e90, ++ 0x478f8368, 0xa09225ff, 0x78515ed9, 0x9f4cf84e, 0x6d1b15b6, ++ 0x8a06b321, 0x07ece5bb, 0xe0f1432c, 0x12a6aed4, 0xf5bb0843, ++ 0x2d787365, 0xca65d5f2, 0x3832380a, 0xdf2f9e9d, 0xf897937f, ++ 0x1f8a35e8, 0xedddd810, 0x0ac07e87, 0xd20305a1, 0x351ea336, ++ 0xc7494ece, 0x2054e859, 0xadbebec3, 0x4aa31854, 0xb8f4f5ac, ++ 0x5fe9533b, 0x872a281d, 0x60378e8a, 0x92606372, 0x757dc5e5, ++ 0xdd1078b6, 0x3a0dde21, 0xc85a33d9, 0x2f47954e, 0xf784ee68, ++ 0x109948ff, 0xe2cea507, 0x05d30390, 0x8839550a, 0x6f24f39d, ++ 0x9d731e65, 0x7a6eb8f2, 0xa2adc3d4, 0x45b06543, 0xb7e788bb, ++ 0x50fa2e2c, 0x774223ce, 0x905f8559, 0x620868a1, 0x8515ce36, ++ 0x5dd6b510, 0xbacb1387, 0x489cfe7f, 0xaf8158e8, 0x226b0e72, ++ 0xc576a8e5, 0x3721451d, 0xd03ce38a, 0x08ff98ac, 0xefe23e3b, ++ 0x1db5d3c3, 0xfaa87554, 0x961faf24, 0x710209b3, 0x8355e44b, ++ 0x644842dc, 0xbc8b39fa, 0x5b969f6d, 0xa9c17295, 0x4edcd402, ++ 0xc3368298, 0x242b240f, 0xd67cc9f7, 0x31616f60, 0xe9a21446, ++ 0x0ebfb2d1, 0xfce85f29, 0x1bf5f9be, 0x3c4df45c, 0xdb5052cb, ++ 0x2907bf33, 0xce1a19a4, 0x16d96282, 0xf1c4c415, 0x039329ed, ++ 0xe48e8f7a, 0x6964d9e0, 0x8e797f77, 0x7c2e928f, 0x9b333418, ++ 0x43f04f3e, 0xa4ede9a9, 0x56ba0451, 0xb1a7a2c6, 0x19ca1f95, ++ 0xfed7b902, 0x0c8054fa, 0xeb9df26d, 0x335e894b, 0xd4432fdc, ++ 0x2614c224, 0xc10964b3, 0x4ce33229, 0xabfe94be, 0x59a97946, ++ 0xbeb4dfd1, 0x6677a4f7, 0x816a0260, 0x733def98, 0x9420490f, ++ 0xb39844ed, 0x5485e27a, 0xa6d20f82, 0x41cfa915, 0x990cd233, ++ 0x7e1174a4, 0x8c46995c, 0x6b5b3fcb, 0xe6b16951, 0x01accfc6, ++ 0xf3fb223e, 0x14e684a9, 0xcc25ff8f, 0x2b385918, 0xd96fb4e0, ++ 0x3e721277}, ++ {0x00000000, 0xa58b900e, 0x9066265d, 0x35edb653, 0xfbbd4afb, ++ 0x5e36daf5, 0x6bdb6ca6, 0xce50fca8, 0x2c0b93b7, 0x898003b9, ++ 0xbc6db5ea, 0x19e625e4, 0xd7b6d94c, 0x723d4942, 0x47d0ff11, ++ 0xe25b6f1f, 0x5817276e, 0xfd9cb760, 0xc8710133, 0x6dfa913d, ++ 0xa3aa6d95, 0x0621fd9b, 0x33cc4bc8, 0x9647dbc6, 0x741cb4d9, ++ 0xd19724d7, 0xe47a9284, 0x41f1028a, 0x8fa1fe22, 0x2a2a6e2c, ++ 0x1fc7d87f, 0xba4c4871, 0xb02e4edc, 0x15a5ded2, 0x20486881, ++ 0x85c3f88f, 0x4b930427, 0xee189429, 0xdbf5227a, 0x7e7eb274, ++ 0x9c25dd6b, 0x39ae4d65, 0x0c43fb36, 0xa9c86b38, 0x67989790, ++ 0xc213079e, 0xf7feb1cd, 0x527521c3, 0xe83969b2, 0x4db2f9bc, ++ 0x785f4fef, 0xddd4dfe1, 0x13842349, 0xb60fb347, 0x83e20514, ++ 0x2669951a, 0xc432fa05, 0x61b96a0b, 0x5454dc58, 0xf1df4c56, ++ 0x3f8fb0fe, 0x9a0420f0, 0xafe996a3, 0x0a6206ad, 0xbb2d9bf9, ++ 0x1ea60bf7, 0x2b4bbda4, 0x8ec02daa, 0x4090d102, 0xe51b410c, ++ 0xd0f6f75f, 0x757d6751, 0x9726084e, 0x32ad9840, 0x07402e13, ++ 0xa2cbbe1d, 0x6c9b42b5, 0xc910d2bb, 0xfcfd64e8, 0x5976f4e6, ++ 0xe33abc97, 0x46b12c99, 0x735c9aca, 0xd6d70ac4, 0x1887f66c, ++ 0xbd0c6662, 0x88e1d031, 0x2d6a403f, 0xcf312f20, 0x6ababf2e, ++ 0x5f57097d, 0xfadc9973, 0x348c65db, 0x9107f5d5, 0xa4ea4386, ++ 0x0161d388, 0x0b03d525, 0xae88452b, 0x9b65f378, 0x3eee6376, ++ 0xf0be9fde, 0x55350fd0, 0x60d8b983, 0xc553298d, 0x27084692, ++ 0x8283d69c, 0xb76e60cf, 0x12e5f0c1, 0xdcb50c69, 0x793e9c67, ++ 0x4cd32a34, 0xe958ba3a, 0x5314f24b, 0xf69f6245, 0xc372d416, ++ 0x66f94418, 0xa8a9b8b0, 0x0d2228be, 0x38cf9eed, 0x9d440ee3, ++ 0x7f1f61fc, 0xda94f1f2, 0xef7947a1, 0x4af2d7af, 0x84a22b07, ++ 0x2129bb09, 0x14c40d5a, 0xb14f9d54, 0xad2a31b3, 0x08a1a1bd, ++ 0x3d4c17ee, 0x98c787e0, 0x56977b48, 0xf31ceb46, 0xc6f15d15, ++ 0x637acd1b, 0x8121a204, 0x24aa320a, 0x11478459, 0xb4cc1457, ++ 0x7a9ce8ff, 0xdf1778f1, 0xeafacea2, 0x4f715eac, 0xf53d16dd, ++ 0x50b686d3, 0x655b3080, 0xc0d0a08e, 0x0e805c26, 0xab0bcc28, ++ 0x9ee67a7b, 0x3b6dea75, 0xd936856a, 0x7cbd1564, 0x4950a337, ++ 0xecdb3339, 0x228bcf91, 0x87005f9f, 0xb2ede9cc, 0x176679c2, ++ 0x1d047f6f, 0xb88fef61, 0x8d625932, 0x28e9c93c, 0xe6b93594, ++ 0x4332a59a, 0x76df13c9, 0xd35483c7, 0x310fecd8, 0x94847cd6, ++ 0xa169ca85, 0x04e25a8b, 0xcab2a623, 0x6f39362d, 0x5ad4807e, ++ 0xff5f1070, 0x45135801, 0xe098c80f, 0xd5757e5c, 0x70feee52, ++ 0xbeae12fa, 0x1b2582f4, 0x2ec834a7, 0x8b43a4a9, 0x6918cbb6, ++ 0xcc935bb8, 0xf97eedeb, 0x5cf57de5, 0x92a5814d, 0x372e1143, ++ 0x02c3a710, 0xa748371e, 0x1607aa4a, 0xb38c3a44, 0x86618c17, ++ 0x23ea1c19, 0xedbae0b1, 0x483170bf, 0x7ddcc6ec, 0xd85756e2, ++ 0x3a0c39fd, 0x9f87a9f3, 0xaa6a1fa0, 0x0fe18fae, 0xc1b17306, ++ 0x643ae308, 0x51d7555b, 0xf45cc555, 0x4e108d24, 0xeb9b1d2a, ++ 0xde76ab79, 0x7bfd3b77, 0xb5adc7df, 0x102657d1, 0x25cbe182, ++ 0x8040718c, 0x621b1e93, 0xc7908e9d, 0xf27d38ce, 0x57f6a8c0, ++ 0x99a65468, 0x3c2dc466, 0x09c07235, 0xac4be23b, 0xa629e496, ++ 0x03a27498, 0x364fc2cb, 0x93c452c5, 0x5d94ae6d, 0xf81f3e63, ++ 0xcdf28830, 0x6879183e, 0x8a227721, 0x2fa9e72f, 0x1a44517c, ++ 0xbfcfc172, 0x719f3dda, 0xd414add4, 0xe1f91b87, 0x44728b89, ++ 0xfe3ec3f8, 0x5bb553f6, 0x6e58e5a5, 0xcbd375ab, 0x05838903, ++ 0xa008190d, 0x95e5af5e, 0x306e3f50, 0xd235504f, 0x77bec041, ++ 0x42537612, 0xe7d8e61c, 0x29881ab4, 0x8c038aba, 0xb9ee3ce9, ++ 0x1c65ace7}}; ++ ++static const z_word_t crc_braid_big_table[][256] = { ++ {0x0000000000000000, 0x0e908ba500000000, 0x5d26669000000000, ++ 0x53b6ed3500000000, 0xfb4abdfb00000000, 0xf5da365e00000000, ++ 0xa66cdb6b00000000, 0xa8fc50ce00000000, 0xb7930b2c00000000, ++ 0xb903808900000000, 0xeab56dbc00000000, 0xe425e61900000000, ++ 0x4cd9b6d700000000, 0x42493d7200000000, 0x11ffd04700000000, ++ 0x1f6f5be200000000, 0x6e27175800000000, 0x60b79cfd00000000, ++ 0x330171c800000000, 0x3d91fa6d00000000, 0x956daaa300000000, ++ 0x9bfd210600000000, 0xc84bcc3300000000, 0xc6db479600000000, ++ 0xd9b41c7400000000, 0xd72497d100000000, 0x84927ae400000000, ++ 0x8a02f14100000000, 0x22fea18f00000000, 0x2c6e2a2a00000000, ++ 0x7fd8c71f00000000, 0x71484cba00000000, 0xdc4e2eb000000000, ++ 0xd2dea51500000000, 0x8168482000000000, 0x8ff8c38500000000, ++ 0x2704934b00000000, 0x299418ee00000000, 0x7a22f5db00000000, ++ 0x74b27e7e00000000, 0x6bdd259c00000000, 0x654dae3900000000, ++ 0x36fb430c00000000, 0x386bc8a900000000, 0x9097986700000000, ++ 0x9e0713c200000000, 0xcdb1fef700000000, 0xc321755200000000, ++ 0xb26939e800000000, 0xbcf9b24d00000000, 0xef4f5f7800000000, ++ 0xe1dfd4dd00000000, 0x4923841300000000, 0x47b30fb600000000, ++ 0x1405e28300000000, 0x1a95692600000000, 0x05fa32c400000000, ++ 0x0b6ab96100000000, 0x58dc545400000000, 0x564cdff100000000, ++ 0xfeb08f3f00000000, 0xf020049a00000000, 0xa396e9af00000000, ++ 0xad06620a00000000, 0xf99b2dbb00000000, 0xf70ba61e00000000, ++ 0xa4bd4b2b00000000, 0xaa2dc08e00000000, 0x02d1904000000000, ++ 0x0c411be500000000, 0x5ff7f6d000000000, 0x51677d7500000000, ++ 0x4e08269700000000, 0x4098ad3200000000, 0x132e400700000000, ++ 0x1dbecba200000000, 0xb5429b6c00000000, 0xbbd210c900000000, ++ 0xe864fdfc00000000, 0xe6f4765900000000, 0x97bc3ae300000000, ++ 0x992cb14600000000, 0xca9a5c7300000000, 0xc40ad7d600000000, ++ 0x6cf6871800000000, 0x62660cbd00000000, 0x31d0e18800000000, ++ 0x3f406a2d00000000, 0x202f31cf00000000, 0x2ebfba6a00000000, ++ 0x7d09575f00000000, 0x7399dcfa00000000, 0xdb658c3400000000, ++ 0xd5f5079100000000, 0x8643eaa400000000, 0x88d3610100000000, ++ 0x25d5030b00000000, 0x2b4588ae00000000, 0x78f3659b00000000, ++ 0x7663ee3e00000000, 0xde9fbef000000000, 0xd00f355500000000, ++ 0x83b9d86000000000, 0x8d2953c500000000, 0x9246082700000000, ++ 0x9cd6838200000000, 0xcf606eb700000000, 0xc1f0e51200000000, ++ 0x690cb5dc00000000, 0x679c3e7900000000, 0x342ad34c00000000, ++ 0x3aba58e900000000, 0x4bf2145300000000, 0x45629ff600000000, ++ 0x16d472c300000000, 0x1844f96600000000, 0xb0b8a9a800000000, ++ 0xbe28220d00000000, 0xed9ecf3800000000, 0xe30e449d00000000, ++ 0xfc611f7f00000000, 0xf2f194da00000000, 0xa14779ef00000000, ++ 0xafd7f24a00000000, 0x072ba28400000000, 0x09bb292100000000, ++ 0x5a0dc41400000000, 0x549d4fb100000000, 0xb3312aad00000000, ++ 0xbda1a10800000000, 0xee174c3d00000000, 0xe087c79800000000, ++ 0x487b975600000000, 0x46eb1cf300000000, 0x155df1c600000000, ++ 0x1bcd7a6300000000, 0x04a2218100000000, 0x0a32aa2400000000, ++ 0x5984471100000000, 0x5714ccb400000000, 0xffe89c7a00000000, ++ 0xf17817df00000000, 0xa2cefaea00000000, 0xac5e714f00000000, ++ 0xdd163df500000000, 0xd386b65000000000, 0x80305b6500000000, ++ 0x8ea0d0c000000000, 0x265c800e00000000, 0x28cc0bab00000000, ++ 0x7b7ae69e00000000, 0x75ea6d3b00000000, 0x6a8536d900000000, ++ 0x6415bd7c00000000, 0x37a3504900000000, 0x3933dbec00000000, ++ 0x91cf8b2200000000, 0x9f5f008700000000, 0xcce9edb200000000, ++ 0xc279661700000000, 0x6f7f041d00000000, 0x61ef8fb800000000, ++ 0x3259628d00000000, 0x3cc9e92800000000, 0x9435b9e600000000, ++ 0x9aa5324300000000, 0xc913df7600000000, 0xc78354d300000000, ++ 0xd8ec0f3100000000, 0xd67c849400000000, 0x85ca69a100000000, ++ 0x8b5ae20400000000, 0x23a6b2ca00000000, 0x2d36396f00000000, ++ 0x7e80d45a00000000, 0x70105fff00000000, 0x0158134500000000, ++ 0x0fc898e000000000, 0x5c7e75d500000000, 0x52eefe7000000000, ++ 0xfa12aebe00000000, 0xf482251b00000000, 0xa734c82e00000000, ++ 0xa9a4438b00000000, 0xb6cb186900000000, 0xb85b93cc00000000, ++ 0xebed7ef900000000, 0xe57df55c00000000, 0x4d81a59200000000, ++ 0x43112e3700000000, 0x10a7c30200000000, 0x1e3748a700000000, ++ 0x4aaa071600000000, 0x443a8cb300000000, 0x178c618600000000, ++ 0x191cea2300000000, 0xb1e0baed00000000, 0xbf70314800000000, ++ 0xecc6dc7d00000000, 0xe25657d800000000, 0xfd390c3a00000000, ++ 0xf3a9879f00000000, 0xa01f6aaa00000000, 0xae8fe10f00000000, ++ 0x0673b1c100000000, 0x08e33a6400000000, 0x5b55d75100000000, ++ 0x55c55cf400000000, 0x248d104e00000000, 0x2a1d9beb00000000, ++ 0x79ab76de00000000, 0x773bfd7b00000000, 0xdfc7adb500000000, ++ 0xd157261000000000, 0x82e1cb2500000000, 0x8c71408000000000, ++ 0x931e1b6200000000, 0x9d8e90c700000000, 0xce387df200000000, ++ 0xc0a8f65700000000, 0x6854a69900000000, 0x66c42d3c00000000, ++ 0x3572c00900000000, 0x3be24bac00000000, 0x96e429a600000000, ++ 0x9874a20300000000, 0xcbc24f3600000000, 0xc552c49300000000, ++ 0x6dae945d00000000, 0x633e1ff800000000, 0x3088f2cd00000000, ++ 0x3e18796800000000, 0x2177228a00000000, 0x2fe7a92f00000000, ++ 0x7c51441a00000000, 0x72c1cfbf00000000, 0xda3d9f7100000000, ++ 0xd4ad14d400000000, 0x871bf9e100000000, 0x898b724400000000, ++ 0xf8c33efe00000000, 0xf653b55b00000000, 0xa5e5586e00000000, ++ 0xab75d3cb00000000, 0x0389830500000000, 0x0d1908a000000000, ++ 0x5eafe59500000000, 0x503f6e3000000000, 0x4f5035d200000000, ++ 0x41c0be7700000000, 0x1276534200000000, 0x1ce6d8e700000000, ++ 0xb41a882900000000, 0xba8a038c00000000, 0xe93ceeb900000000, ++ 0xe7ac651c00000000}, ++ {0x0000000000000000, 0x97a61de700000000, 0x6f4b4a1500000000, ++ 0xf8ed57f200000000, 0xde96942a00000000, 0x493089cd00000000, ++ 0xb1ddde3f00000000, 0x267bc3d800000000, 0xbc2d295500000000, ++ 0x2b8b34b200000000, 0xd366634000000000, 0x44c07ea700000000, ++ 0x62bbbd7f00000000, 0xf51da09800000000, 0x0df0f76a00000000, ++ 0x9a56ea8d00000000, 0x785b52aa00000000, 0xeffd4f4d00000000, ++ 0x171018bf00000000, 0x80b6055800000000, 0xa6cdc68000000000, ++ 0x316bdb6700000000, 0xc9868c9500000000, 0x5e20917200000000, ++ 0xc4767bff00000000, 0x53d0661800000000, 0xab3d31ea00000000, ++ 0x3c9b2c0d00000000, 0x1ae0efd500000000, 0x8d46f23200000000, ++ 0x75aba5c000000000, 0xe20db82700000000, 0xb1b0d58f00000000, ++ 0x2616c86800000000, 0xdefb9f9a00000000, 0x495d827d00000000, ++ 0x6f2641a500000000, 0xf8805c4200000000, 0x006d0bb000000000, ++ 0x97cb165700000000, 0x0d9dfcda00000000, 0x9a3be13d00000000, ++ 0x62d6b6cf00000000, 0xf570ab2800000000, 0xd30b68f000000000, ++ 0x44ad751700000000, 0xbc4022e500000000, 0x2be63f0200000000, ++ 0xc9eb872500000000, 0x5e4d9ac200000000, 0xa6a0cd3000000000, ++ 0x3106d0d700000000, 0x177d130f00000000, 0x80db0ee800000000, ++ 0x7836591a00000000, 0xef9044fd00000000, 0x75c6ae7000000000, ++ 0xe260b39700000000, 0x1a8de46500000000, 0x8d2bf98200000000, ++ 0xab503a5a00000000, 0x3cf627bd00000000, 0xc41b704f00000000, ++ 0x53bd6da800000000, 0x2367dac400000000, 0xb4c1c72300000000, ++ 0x4c2c90d100000000, 0xdb8a8d3600000000, 0xfdf14eee00000000, ++ 0x6a57530900000000, 0x92ba04fb00000000, 0x051c191c00000000, ++ 0x9f4af39100000000, 0x08ecee7600000000, 0xf001b98400000000, ++ 0x67a7a46300000000, 0x41dc67bb00000000, 0xd67a7a5c00000000, ++ 0x2e972dae00000000, 0xb931304900000000, 0x5b3c886e00000000, ++ 0xcc9a958900000000, 0x3477c27b00000000, 0xa3d1df9c00000000, ++ 0x85aa1c4400000000, 0x120c01a300000000, 0xeae1565100000000, ++ 0x7d474bb600000000, 0xe711a13b00000000, 0x70b7bcdc00000000, ++ 0x885aeb2e00000000, 0x1ffcf6c900000000, 0x3987351100000000, ++ 0xae2128f600000000, 0x56cc7f0400000000, 0xc16a62e300000000, ++ 0x92d70f4b00000000, 0x057112ac00000000, 0xfd9c455e00000000, ++ 0x6a3a58b900000000, 0x4c419b6100000000, 0xdbe7868600000000, ++ 0x230ad17400000000, 0xb4accc9300000000, 0x2efa261e00000000, ++ 0xb95c3bf900000000, 0x41b16c0b00000000, 0xd61771ec00000000, ++ 0xf06cb23400000000, 0x67caafd300000000, 0x9f27f82100000000, ++ 0x0881e5c600000000, 0xea8c5de100000000, 0x7d2a400600000000, ++ 0x85c717f400000000, 0x12610a1300000000, 0x341ac9cb00000000, ++ 0xa3bcd42c00000000, 0x5b5183de00000000, 0xccf79e3900000000, ++ 0x56a174b400000000, 0xc107695300000000, 0x39ea3ea100000000, ++ 0xae4c234600000000, 0x8837e09e00000000, 0x1f91fd7900000000, ++ 0xe77caa8b00000000, 0x70dab76c00000000, 0x07c8c55200000000, ++ 0x906ed8b500000000, 0x68838f4700000000, 0xff2592a000000000, ++ 0xd95e517800000000, 0x4ef84c9f00000000, 0xb6151b6d00000000, ++ 0x21b3068a00000000, 0xbbe5ec0700000000, 0x2c43f1e000000000, ++ 0xd4aea61200000000, 0x4308bbf500000000, 0x6573782d00000000, ++ 0xf2d565ca00000000, 0x0a38323800000000, 0x9d9e2fdf00000000, ++ 0x7f9397f800000000, 0xe8358a1f00000000, 0x10d8dded00000000, ++ 0x877ec00a00000000, 0xa10503d200000000, 0x36a31e3500000000, ++ 0xce4e49c700000000, 0x59e8542000000000, 0xc3bebead00000000, ++ 0x5418a34a00000000, 0xacf5f4b800000000, 0x3b53e95f00000000, ++ 0x1d282a8700000000, 0x8a8e376000000000, 0x7263609200000000, ++ 0xe5c57d7500000000, 0xb67810dd00000000, 0x21de0d3a00000000, ++ 0xd9335ac800000000, 0x4e95472f00000000, 0x68ee84f700000000, ++ 0xff48991000000000, 0x07a5cee200000000, 0x9003d30500000000, ++ 0x0a55398800000000, 0x9df3246f00000000, 0x651e739d00000000, ++ 0xf2b86e7a00000000, 0xd4c3ada200000000, 0x4365b04500000000, ++ 0xbb88e7b700000000, 0x2c2efa5000000000, 0xce23427700000000, ++ 0x59855f9000000000, 0xa168086200000000, 0x36ce158500000000, ++ 0x10b5d65d00000000, 0x8713cbba00000000, 0x7ffe9c4800000000, ++ 0xe85881af00000000, 0x720e6b2200000000, 0xe5a876c500000000, ++ 0x1d45213700000000, 0x8ae33cd000000000, 0xac98ff0800000000, ++ 0x3b3ee2ef00000000, 0xc3d3b51d00000000, 0x5475a8fa00000000, ++ 0x24af1f9600000000, 0xb309027100000000, 0x4be4558300000000, ++ 0xdc42486400000000, 0xfa398bbc00000000, 0x6d9f965b00000000, ++ 0x9572c1a900000000, 0x02d4dc4e00000000, 0x988236c300000000, ++ 0x0f242b2400000000, 0xf7c97cd600000000, 0x606f613100000000, ++ 0x4614a2e900000000, 0xd1b2bf0e00000000, 0x295fe8fc00000000, ++ 0xbef9f51b00000000, 0x5cf44d3c00000000, 0xcb5250db00000000, ++ 0x33bf072900000000, 0xa4191ace00000000, 0x8262d91600000000, ++ 0x15c4c4f100000000, 0xed29930300000000, 0x7a8f8ee400000000, ++ 0xe0d9646900000000, 0x777f798e00000000, 0x8f922e7c00000000, ++ 0x1834339b00000000, 0x3e4ff04300000000, 0xa9e9eda400000000, ++ 0x5104ba5600000000, 0xc6a2a7b100000000, 0x951fca1900000000, ++ 0x02b9d7fe00000000, 0xfa54800c00000000, 0x6df29deb00000000, ++ 0x4b895e3300000000, 0xdc2f43d400000000, 0x24c2142600000000, ++ 0xb36409c100000000, 0x2932e34c00000000, 0xbe94feab00000000, ++ 0x4679a95900000000, 0xd1dfb4be00000000, 0xf7a4776600000000, ++ 0x60026a8100000000, 0x98ef3d7300000000, 0x0f49209400000000, ++ 0xed4498b300000000, 0x7ae2855400000000, 0x820fd2a600000000, ++ 0x15a9cf4100000000, 0x33d20c9900000000, 0xa474117e00000000, ++ 0x5c99468c00000000, 0xcb3f5b6b00000000, 0x5169b1e600000000, ++ 0xc6cfac0100000000, 0x3e22fbf300000000, 0xa984e61400000000, ++ 0x8fff25cc00000000, 0x1859382b00000000, 0xe0b46fd900000000, ++ 0x7712723e00000000}, ++ {0x0000000000000000, 0x411b8c6e00000000, 0x823618dd00000000, ++ 0xc32d94b300000000, 0x456b416100000000, 0x0470cd0f00000000, ++ 0xc75d59bc00000000, 0x8646d5d200000000, 0x8ad682c200000000, ++ 0xcbcd0eac00000000, 0x08e09a1f00000000, 0x49fb167100000000, ++ 0xcfbdc3a300000000, 0x8ea64fcd00000000, 0x4d8bdb7e00000000, ++ 0x0c90571000000000, 0x55ab745e00000000, 0x14b0f83000000000, ++ 0xd79d6c8300000000, 0x9686e0ed00000000, 0x10c0353f00000000, ++ 0x51dbb95100000000, 0x92f62de200000000, 0xd3eda18c00000000, ++ 0xdf7df69c00000000, 0x9e667af200000000, 0x5d4bee4100000000, ++ 0x1c50622f00000000, 0x9a16b7fd00000000, 0xdb0d3b9300000000, ++ 0x1820af2000000000, 0x593b234e00000000, 0xaa56e9bc00000000, ++ 0xeb4d65d200000000, 0x2860f16100000000, 0x697b7d0f00000000, ++ 0xef3da8dd00000000, 0xae2624b300000000, 0x6d0bb00000000000, ++ 0x2c103c6e00000000, 0x20806b7e00000000, 0x619be71000000000, ++ 0xa2b673a300000000, 0xe3adffcd00000000, 0x65eb2a1f00000000, ++ 0x24f0a67100000000, 0xe7dd32c200000000, 0xa6c6beac00000000, ++ 0xfffd9de200000000, 0xbee6118c00000000, 0x7dcb853f00000000, ++ 0x3cd0095100000000, 0xba96dc8300000000, 0xfb8d50ed00000000, ++ 0x38a0c45e00000000, 0x79bb483000000000, 0x752b1f2000000000, ++ 0x3430934e00000000, 0xf71d07fd00000000, 0xb6068b9300000000, ++ 0x30405e4100000000, 0x715bd22f00000000, 0xb276469c00000000, ++ 0xf36dcaf200000000, 0x15aba3a200000000, 0x54b02fcc00000000, ++ 0x979dbb7f00000000, 0xd686371100000000, 0x50c0e2c300000000, ++ 0x11db6ead00000000, 0xd2f6fa1e00000000, 0x93ed767000000000, ++ 0x9f7d216000000000, 0xde66ad0e00000000, 0x1d4b39bd00000000, ++ 0x5c50b5d300000000, 0xda16600100000000, 0x9b0dec6f00000000, ++ 0x582078dc00000000, 0x193bf4b200000000, 0x4000d7fc00000000, ++ 0x011b5b9200000000, 0xc236cf2100000000, 0x832d434f00000000, ++ 0x056b969d00000000, 0x44701af300000000, 0x875d8e4000000000, ++ 0xc646022e00000000, 0xcad6553e00000000, 0x8bcdd95000000000, ++ 0x48e04de300000000, 0x09fbc18d00000000, 0x8fbd145f00000000, ++ 0xcea6983100000000, 0x0d8b0c8200000000, 0x4c9080ec00000000, ++ 0xbffd4a1e00000000, 0xfee6c67000000000, 0x3dcb52c300000000, ++ 0x7cd0dead00000000, 0xfa960b7f00000000, 0xbb8d871100000000, ++ 0x78a013a200000000, 0x39bb9fcc00000000, 0x352bc8dc00000000, ++ 0x743044b200000000, 0xb71dd00100000000, 0xf6065c6f00000000, ++ 0x704089bd00000000, 0x315b05d300000000, 0xf276916000000000, ++ 0xb36d1d0e00000000, 0xea563e4000000000, 0xab4db22e00000000, ++ 0x6860269d00000000, 0x297baaf300000000, 0xaf3d7f2100000000, ++ 0xee26f34f00000000, 0x2d0b67fc00000000, 0x6c10eb9200000000, ++ 0x6080bc8200000000, 0x219b30ec00000000, 0xe2b6a45f00000000, ++ 0xa3ad283100000000, 0x25ebfde300000000, 0x64f0718d00000000, ++ 0xa7dde53e00000000, 0xe6c6695000000000, 0x6b50369e00000000, ++ 0x2a4bbaf000000000, 0xe9662e4300000000, 0xa87da22d00000000, ++ 0x2e3b77ff00000000, 0x6f20fb9100000000, 0xac0d6f2200000000, ++ 0xed16e34c00000000, 0xe186b45c00000000, 0xa09d383200000000, ++ 0x63b0ac8100000000, 0x22ab20ef00000000, 0xa4edf53d00000000, ++ 0xe5f6795300000000, 0x26dbede000000000, 0x67c0618e00000000, ++ 0x3efb42c000000000, 0x7fe0ceae00000000, 0xbccd5a1d00000000, ++ 0xfdd6d67300000000, 0x7b9003a100000000, 0x3a8b8fcf00000000, ++ 0xf9a61b7c00000000, 0xb8bd971200000000, 0xb42dc00200000000, ++ 0xf5364c6c00000000, 0x361bd8df00000000, 0x770054b100000000, ++ 0xf146816300000000, 0xb05d0d0d00000000, 0x737099be00000000, ++ 0x326b15d000000000, 0xc106df2200000000, 0x801d534c00000000, ++ 0x4330c7ff00000000, 0x022b4b9100000000, 0x846d9e4300000000, ++ 0xc576122d00000000, 0x065b869e00000000, 0x47400af000000000, ++ 0x4bd05de000000000, 0x0acbd18e00000000, 0xc9e6453d00000000, ++ 0x88fdc95300000000, 0x0ebb1c8100000000, 0x4fa090ef00000000, ++ 0x8c8d045c00000000, 0xcd96883200000000, 0x94adab7c00000000, ++ 0xd5b6271200000000, 0x169bb3a100000000, 0x57803fcf00000000, ++ 0xd1c6ea1d00000000, 0x90dd667300000000, 0x53f0f2c000000000, ++ 0x12eb7eae00000000, 0x1e7b29be00000000, 0x5f60a5d000000000, ++ 0x9c4d316300000000, 0xdd56bd0d00000000, 0x5b1068df00000000, ++ 0x1a0be4b100000000, 0xd926700200000000, 0x983dfc6c00000000, ++ 0x7efb953c00000000, 0x3fe0195200000000, 0xfccd8de100000000, ++ 0xbdd6018f00000000, 0x3b90d45d00000000, 0x7a8b583300000000, ++ 0xb9a6cc8000000000, 0xf8bd40ee00000000, 0xf42d17fe00000000, ++ 0xb5369b9000000000, 0x761b0f2300000000, 0x3700834d00000000, ++ 0xb146569f00000000, 0xf05ddaf100000000, 0x33704e4200000000, ++ 0x726bc22c00000000, 0x2b50e16200000000, 0x6a4b6d0c00000000, ++ 0xa966f9bf00000000, 0xe87d75d100000000, 0x6e3ba00300000000, ++ 0x2f202c6d00000000, 0xec0db8de00000000, 0xad1634b000000000, ++ 0xa18663a000000000, 0xe09defce00000000, 0x23b07b7d00000000, ++ 0x62abf71300000000, 0xe4ed22c100000000, 0xa5f6aeaf00000000, ++ 0x66db3a1c00000000, 0x27c0b67200000000, 0xd4ad7c8000000000, ++ 0x95b6f0ee00000000, 0x569b645d00000000, 0x1780e83300000000, ++ 0x91c63de100000000, 0xd0ddb18f00000000, 0x13f0253c00000000, ++ 0x52eba95200000000, 0x5e7bfe4200000000, 0x1f60722c00000000, ++ 0xdc4de69f00000000, 0x9d566af100000000, 0x1b10bf2300000000, ++ 0x5a0b334d00000000, 0x9926a7fe00000000, 0xd83d2b9000000000, ++ 0x810608de00000000, 0xc01d84b000000000, 0x0330100300000000, ++ 0x422b9c6d00000000, 0xc46d49bf00000000, 0x8576c5d100000000, ++ 0x465b516200000000, 0x0740dd0c00000000, 0x0bd08a1c00000000, ++ 0x4acb067200000000, 0x89e692c100000000, 0xc8fd1eaf00000000, ++ 0x4ebbcb7d00000000, 0x0fa0471300000000, 0xcc8dd3a000000000, ++ 0x8d965fce00000000}, ++ {0x0000000000000000, 0x1dfdb50100000000, 0x3afa6b0300000000, ++ 0x2707de0200000000, 0x74f4d70600000000, 0x6909620700000000, ++ 0x4e0ebc0500000000, 0x53f3090400000000, 0xe8e8af0d00000000, ++ 0xf5151a0c00000000, 0xd212c40e00000000, 0xcfef710f00000000, ++ 0x9c1c780b00000000, 0x81e1cd0a00000000, 0xa6e6130800000000, ++ 0xbb1ba60900000000, 0xd0d15f1b00000000, 0xcd2cea1a00000000, ++ 0xea2b341800000000, 0xf7d6811900000000, 0xa425881d00000000, ++ 0xb9d83d1c00000000, 0x9edfe31e00000000, 0x8322561f00000000, ++ 0x3839f01600000000, 0x25c4451700000000, 0x02c39b1500000000, ++ 0x1f3e2e1400000000, 0x4ccd271000000000, 0x5130921100000000, ++ 0x76374c1300000000, 0x6bcaf91200000000, 0xa0a3bf3600000000, ++ 0xbd5e0a3700000000, 0x9a59d43500000000, 0x87a4613400000000, ++ 0xd457683000000000, 0xc9aadd3100000000, 0xeead033300000000, ++ 0xf350b63200000000, 0x484b103b00000000, 0x55b6a53a00000000, ++ 0x72b17b3800000000, 0x6f4cce3900000000, 0x3cbfc73d00000000, ++ 0x2142723c00000000, 0x0645ac3e00000000, 0x1bb8193f00000000, ++ 0x7072e02d00000000, 0x6d8f552c00000000, 0x4a888b2e00000000, ++ 0x57753e2f00000000, 0x0486372b00000000, 0x197b822a00000000, ++ 0x3e7c5c2800000000, 0x2381e92900000000, 0x989a4f2000000000, ++ 0x8567fa2100000000, 0xa260242300000000, 0xbf9d912200000000, ++ 0xec6e982600000000, 0xf1932d2700000000, 0xd694f32500000000, ++ 0xcb69462400000000, 0x40477f6d00000000, 0x5dbaca6c00000000, ++ 0x7abd146e00000000, 0x6740a16f00000000, 0x34b3a86b00000000, ++ 0x294e1d6a00000000, 0x0e49c36800000000, 0x13b4766900000000, ++ 0xa8afd06000000000, 0xb552656100000000, 0x9255bb6300000000, ++ 0x8fa80e6200000000, 0xdc5b076600000000, 0xc1a6b26700000000, ++ 0xe6a16c6500000000, 0xfb5cd96400000000, 0x9096207600000000, ++ 0x8d6b957700000000, 0xaa6c4b7500000000, 0xb791fe7400000000, ++ 0xe462f77000000000, 0xf99f427100000000, 0xde989c7300000000, ++ 0xc365297200000000, 0x787e8f7b00000000, 0x65833a7a00000000, ++ 0x4284e47800000000, 0x5f79517900000000, 0x0c8a587d00000000, ++ 0x1177ed7c00000000, 0x3670337e00000000, 0x2b8d867f00000000, ++ 0xe0e4c05b00000000, 0xfd19755a00000000, 0xda1eab5800000000, ++ 0xc7e31e5900000000, 0x9410175d00000000, 0x89eda25c00000000, ++ 0xaeea7c5e00000000, 0xb317c95f00000000, 0x080c6f5600000000, ++ 0x15f1da5700000000, 0x32f6045500000000, 0x2f0bb15400000000, ++ 0x7cf8b85000000000, 0x61050d5100000000, 0x4602d35300000000, ++ 0x5bff665200000000, 0x30359f4000000000, 0x2dc82a4100000000, ++ 0x0acff44300000000, 0x1732414200000000, 0x44c1484600000000, ++ 0x593cfd4700000000, 0x7e3b234500000000, 0x63c6964400000000, ++ 0xd8dd304d00000000, 0xc520854c00000000, 0xe2275b4e00000000, ++ 0xffdaee4f00000000, 0xac29e74b00000000, 0xb1d4524a00000000, ++ 0x96d38c4800000000, 0x8b2e394900000000, 0x808efeda00000000, ++ 0x9d734bdb00000000, 0xba7495d900000000, 0xa78920d800000000, ++ 0xf47a29dc00000000, 0xe9879cdd00000000, 0xce8042df00000000, ++ 0xd37df7de00000000, 0x686651d700000000, 0x759be4d600000000, ++ 0x529c3ad400000000, 0x4f618fd500000000, 0x1c9286d100000000, ++ 0x016f33d000000000, 0x2668edd200000000, 0x3b9558d300000000, ++ 0x505fa1c100000000, 0x4da214c000000000, 0x6aa5cac200000000, ++ 0x77587fc300000000, 0x24ab76c700000000, 0x3956c3c600000000, ++ 0x1e511dc400000000, 0x03aca8c500000000, 0xb8b70ecc00000000, ++ 0xa54abbcd00000000, 0x824d65cf00000000, 0x9fb0d0ce00000000, ++ 0xcc43d9ca00000000, 0xd1be6ccb00000000, 0xf6b9b2c900000000, ++ 0xeb4407c800000000, 0x202d41ec00000000, 0x3dd0f4ed00000000, ++ 0x1ad72aef00000000, 0x072a9fee00000000, 0x54d996ea00000000, ++ 0x492423eb00000000, 0x6e23fde900000000, 0x73de48e800000000, ++ 0xc8c5eee100000000, 0xd5385be000000000, 0xf23f85e200000000, ++ 0xefc230e300000000, 0xbc3139e700000000, 0xa1cc8ce600000000, ++ 0x86cb52e400000000, 0x9b36e7e500000000, 0xf0fc1ef700000000, ++ 0xed01abf600000000, 0xca0675f400000000, 0xd7fbc0f500000000, ++ 0x8408c9f100000000, 0x99f57cf000000000, 0xbef2a2f200000000, ++ 0xa30f17f300000000, 0x1814b1fa00000000, 0x05e904fb00000000, ++ 0x22eedaf900000000, 0x3f136ff800000000, 0x6ce066fc00000000, ++ 0x711dd3fd00000000, 0x561a0dff00000000, 0x4be7b8fe00000000, ++ 0xc0c981b700000000, 0xdd3434b600000000, 0xfa33eab400000000, ++ 0xe7ce5fb500000000, 0xb43d56b100000000, 0xa9c0e3b000000000, ++ 0x8ec73db200000000, 0x933a88b300000000, 0x28212eba00000000, ++ 0x35dc9bbb00000000, 0x12db45b900000000, 0x0f26f0b800000000, ++ 0x5cd5f9bc00000000, 0x41284cbd00000000, 0x662f92bf00000000, ++ 0x7bd227be00000000, 0x1018deac00000000, 0x0de56bad00000000, ++ 0x2ae2b5af00000000, 0x371f00ae00000000, 0x64ec09aa00000000, ++ 0x7911bcab00000000, 0x5e1662a900000000, 0x43ebd7a800000000, ++ 0xf8f071a100000000, 0xe50dc4a000000000, 0xc20a1aa200000000, ++ 0xdff7afa300000000, 0x8c04a6a700000000, 0x91f913a600000000, ++ 0xb6fecda400000000, 0xab0378a500000000, 0x606a3e8100000000, ++ 0x7d978b8000000000, 0x5a90558200000000, 0x476de08300000000, ++ 0x149ee98700000000, 0x09635c8600000000, 0x2e64828400000000, ++ 0x3399378500000000, 0x8882918c00000000, 0x957f248d00000000, ++ 0xb278fa8f00000000, 0xaf854f8e00000000, 0xfc76468a00000000, ++ 0xe18bf38b00000000, 0xc68c2d8900000000, 0xdb71988800000000, ++ 0xb0bb619a00000000, 0xad46d49b00000000, 0x8a410a9900000000, ++ 0x97bcbf9800000000, 0xc44fb69c00000000, 0xd9b2039d00000000, ++ 0xfeb5dd9f00000000, 0xe348689e00000000, 0x5853ce9700000000, ++ 0x45ae7b9600000000, 0x62a9a59400000000, 0x7f54109500000000, ++ 0x2ca7199100000000, 0x315aac9000000000, 0x165d729200000000, ++ 0x0ba0c79300000000}, ++ {0x0000000000000000, 0x24d9076300000000, 0x48b20fc600000000, ++ 0x6c6b08a500000000, 0xd1626e5700000000, 0xf5bb693400000000, ++ 0x99d0619100000000, 0xbd0966f200000000, 0xa2c5dcae00000000, ++ 0x861cdbcd00000000, 0xea77d36800000000, 0xceaed40b00000000, ++ 0x73a7b2f900000000, 0x577eb59a00000000, 0x3b15bd3f00000000, ++ 0x1fccba5c00000000, 0x058dc88600000000, 0x2154cfe500000000, ++ 0x4d3fc74000000000, 0x69e6c02300000000, 0xd4efa6d100000000, ++ 0xf036a1b200000000, 0x9c5da91700000000, 0xb884ae7400000000, ++ 0xa748142800000000, 0x8391134b00000000, 0xeffa1bee00000000, ++ 0xcb231c8d00000000, 0x762a7a7f00000000, 0x52f37d1c00000000, ++ 0x3e9875b900000000, 0x1a4172da00000000, 0x4b1ce0d600000000, ++ 0x6fc5e7b500000000, 0x03aeef1000000000, 0x2777e87300000000, ++ 0x9a7e8e8100000000, 0xbea789e200000000, 0xd2cc814700000000, ++ 0xf615862400000000, 0xe9d93c7800000000, 0xcd003b1b00000000, ++ 0xa16b33be00000000, 0x85b234dd00000000, 0x38bb522f00000000, ++ 0x1c62554c00000000, 0x70095de900000000, 0x54d05a8a00000000, ++ 0x4e91285000000000, 0x6a482f3300000000, 0x0623279600000000, ++ 0x22fa20f500000000, 0x9ff3460700000000, 0xbb2a416400000000, ++ 0xd74149c100000000, 0xf3984ea200000000, 0xec54f4fe00000000, ++ 0xc88df39d00000000, 0xa4e6fb3800000000, 0x803ffc5b00000000, ++ 0x3d369aa900000000, 0x19ef9dca00000000, 0x7584956f00000000, ++ 0x515d920c00000000, 0xd73eb17600000000, 0xf3e7b61500000000, ++ 0x9f8cbeb000000000, 0xbb55b9d300000000, 0x065cdf2100000000, ++ 0x2285d84200000000, 0x4eeed0e700000000, 0x6a37d78400000000, ++ 0x75fb6dd800000000, 0x51226abb00000000, 0x3d49621e00000000, ++ 0x1990657d00000000, 0xa499038f00000000, 0x804004ec00000000, ++ 0xec2b0c4900000000, 0xc8f20b2a00000000, 0xd2b379f000000000, ++ 0xf66a7e9300000000, 0x9a01763600000000, 0xbed8715500000000, ++ 0x03d117a700000000, 0x270810c400000000, 0x4b63186100000000, ++ 0x6fba1f0200000000, 0x7076a55e00000000, 0x54afa23d00000000, ++ 0x38c4aa9800000000, 0x1c1dadfb00000000, 0xa114cb0900000000, ++ 0x85cdcc6a00000000, 0xe9a6c4cf00000000, 0xcd7fc3ac00000000, ++ 0x9c2251a000000000, 0xb8fb56c300000000, 0xd4905e6600000000, ++ 0xf049590500000000, 0x4d403ff700000000, 0x6999389400000000, ++ 0x05f2303100000000, 0x212b375200000000, 0x3ee78d0e00000000, ++ 0x1a3e8a6d00000000, 0x765582c800000000, 0x528c85ab00000000, ++ 0xef85e35900000000, 0xcb5ce43a00000000, 0xa737ec9f00000000, ++ 0x83eeebfc00000000, 0x99af992600000000, 0xbd769e4500000000, ++ 0xd11d96e000000000, 0xf5c4918300000000, 0x48cdf77100000000, ++ 0x6c14f01200000000, 0x007ff8b700000000, 0x24a6ffd400000000, ++ 0x3b6a458800000000, 0x1fb342eb00000000, 0x73d84a4e00000000, ++ 0x57014d2d00000000, 0xea082bdf00000000, 0xced12cbc00000000, ++ 0xa2ba241900000000, 0x8663237a00000000, 0xae7d62ed00000000, ++ 0x8aa4658e00000000, 0xe6cf6d2b00000000, 0xc2166a4800000000, ++ 0x7f1f0cba00000000, 0x5bc60bd900000000, 0x37ad037c00000000, ++ 0x1374041f00000000, 0x0cb8be4300000000, 0x2861b92000000000, ++ 0x440ab18500000000, 0x60d3b6e600000000, 0xdddad01400000000, ++ 0xf903d77700000000, 0x9568dfd200000000, 0xb1b1d8b100000000, ++ 0xabf0aa6b00000000, 0x8f29ad0800000000, 0xe342a5ad00000000, ++ 0xc79ba2ce00000000, 0x7a92c43c00000000, 0x5e4bc35f00000000, ++ 0x3220cbfa00000000, 0x16f9cc9900000000, 0x093576c500000000, ++ 0x2dec71a600000000, 0x4187790300000000, 0x655e7e6000000000, ++ 0xd857189200000000, 0xfc8e1ff100000000, 0x90e5175400000000, ++ 0xb43c103700000000, 0xe561823b00000000, 0xc1b8855800000000, ++ 0xadd38dfd00000000, 0x890a8a9e00000000, 0x3403ec6c00000000, ++ 0x10daeb0f00000000, 0x7cb1e3aa00000000, 0x5868e4c900000000, ++ 0x47a45e9500000000, 0x637d59f600000000, 0x0f16515300000000, ++ 0x2bcf563000000000, 0x96c630c200000000, 0xb21f37a100000000, ++ 0xde743f0400000000, 0xfaad386700000000, 0xe0ec4abd00000000, ++ 0xc4354dde00000000, 0xa85e457b00000000, 0x8c87421800000000, ++ 0x318e24ea00000000, 0x1557238900000000, 0x793c2b2c00000000, ++ 0x5de52c4f00000000, 0x4229961300000000, 0x66f0917000000000, ++ 0x0a9b99d500000000, 0x2e429eb600000000, 0x934bf84400000000, ++ 0xb792ff2700000000, 0xdbf9f78200000000, 0xff20f0e100000000, ++ 0x7943d39b00000000, 0x5d9ad4f800000000, 0x31f1dc5d00000000, ++ 0x1528db3e00000000, 0xa821bdcc00000000, 0x8cf8baaf00000000, ++ 0xe093b20a00000000, 0xc44ab56900000000, 0xdb860f3500000000, ++ 0xff5f085600000000, 0x933400f300000000, 0xb7ed079000000000, ++ 0x0ae4616200000000, 0x2e3d660100000000, 0x42566ea400000000, ++ 0x668f69c700000000, 0x7cce1b1d00000000, 0x58171c7e00000000, ++ 0x347c14db00000000, 0x10a513b800000000, 0xadac754a00000000, ++ 0x8975722900000000, 0xe51e7a8c00000000, 0xc1c77def00000000, ++ 0xde0bc7b300000000, 0xfad2c0d000000000, 0x96b9c87500000000, ++ 0xb260cf1600000000, 0x0f69a9e400000000, 0x2bb0ae8700000000, ++ 0x47dba62200000000, 0x6302a14100000000, 0x325f334d00000000, ++ 0x1686342e00000000, 0x7aed3c8b00000000, 0x5e343be800000000, ++ 0xe33d5d1a00000000, 0xc7e45a7900000000, 0xab8f52dc00000000, ++ 0x8f5655bf00000000, 0x909aefe300000000, 0xb443e88000000000, ++ 0xd828e02500000000, 0xfcf1e74600000000, 0x41f881b400000000, ++ 0x652186d700000000, 0x094a8e7200000000, 0x2d93891100000000, ++ 0x37d2fbcb00000000, 0x130bfca800000000, 0x7f60f40d00000000, ++ 0x5bb9f36e00000000, 0xe6b0959c00000000, 0xc26992ff00000000, ++ 0xae029a5a00000000, 0x8adb9d3900000000, 0x9517276500000000, ++ 0xb1ce200600000000, 0xdda528a300000000, 0xf97c2fc000000000, ++ 0x4475493200000000, 0x60ac4e5100000000, 0x0cc746f400000000, ++ 0x281e419700000000}, ++ {0x0000000000000000, 0x08e3603c00000000, 0x10c6c17800000000, ++ 0x1825a14400000000, 0x208c83f100000000, 0x286fe3cd00000000, ++ 0x304a428900000000, 0x38a922b500000000, 0x011e763800000000, ++ 0x09fd160400000000, 0x11d8b74000000000, 0x193bd77c00000000, ++ 0x2192f5c900000000, 0x297195f500000000, 0x315434b100000000, ++ 0x39b7548d00000000, 0x023cec7000000000, 0x0adf8c4c00000000, ++ 0x12fa2d0800000000, 0x1a194d3400000000, 0x22b06f8100000000, ++ 0x2a530fbd00000000, 0x3276aef900000000, 0x3a95cec500000000, ++ 0x03229a4800000000, 0x0bc1fa7400000000, 0x13e45b3000000000, ++ 0x1b073b0c00000000, 0x23ae19b900000000, 0x2b4d798500000000, ++ 0x3368d8c100000000, 0x3b8bb8fd00000000, 0x0478d8e100000000, ++ 0x0c9bb8dd00000000, 0x14be199900000000, 0x1c5d79a500000000, ++ 0x24f45b1000000000, 0x2c173b2c00000000, 0x34329a6800000000, ++ 0x3cd1fa5400000000, 0x0566aed900000000, 0x0d85cee500000000, ++ 0x15a06fa100000000, 0x1d430f9d00000000, 0x25ea2d2800000000, ++ 0x2d094d1400000000, 0x352cec5000000000, 0x3dcf8c6c00000000, ++ 0x0644349100000000, 0x0ea754ad00000000, 0x1682f5e900000000, ++ 0x1e6195d500000000, 0x26c8b76000000000, 0x2e2bd75c00000000, ++ 0x360e761800000000, 0x3eed162400000000, 0x075a42a900000000, ++ 0x0fb9229500000000, 0x179c83d100000000, 0x1f7fe3ed00000000, ++ 0x27d6c15800000000, 0x2f35a16400000000, 0x3710002000000000, ++ 0x3ff3601c00000000, 0x49f6c11800000000, 0x4115a12400000000, ++ 0x5930006000000000, 0x51d3605c00000000, 0x697a42e900000000, ++ 0x619922d500000000, 0x79bc839100000000, 0x715fe3ad00000000, ++ 0x48e8b72000000000, 0x400bd71c00000000, 0x582e765800000000, ++ 0x50cd166400000000, 0x686434d100000000, 0x608754ed00000000, ++ 0x78a2f5a900000000, 0x7041959500000000, 0x4bca2d6800000000, ++ 0x43294d5400000000, 0x5b0cec1000000000, 0x53ef8c2c00000000, ++ 0x6b46ae9900000000, 0x63a5cea500000000, 0x7b806fe100000000, ++ 0x73630fdd00000000, 0x4ad45b5000000000, 0x42373b6c00000000, ++ 0x5a129a2800000000, 0x52f1fa1400000000, 0x6a58d8a100000000, ++ 0x62bbb89d00000000, 0x7a9e19d900000000, 0x727d79e500000000, ++ 0x4d8e19f900000000, 0x456d79c500000000, 0x5d48d88100000000, ++ 0x55abb8bd00000000, 0x6d029a0800000000, 0x65e1fa3400000000, ++ 0x7dc45b7000000000, 0x75273b4c00000000, 0x4c906fc100000000, ++ 0x44730ffd00000000, 0x5c56aeb900000000, 0x54b5ce8500000000, ++ 0x6c1cec3000000000, 0x64ff8c0c00000000, 0x7cda2d4800000000, ++ 0x74394d7400000000, 0x4fb2f58900000000, 0x475195b500000000, ++ 0x5f7434f100000000, 0x579754cd00000000, 0x6f3e767800000000, ++ 0x67dd164400000000, 0x7ff8b70000000000, 0x771bd73c00000000, ++ 0x4eac83b100000000, 0x464fe38d00000000, 0x5e6a42c900000000, ++ 0x568922f500000000, 0x6e20004000000000, 0x66c3607c00000000, ++ 0x7ee6c13800000000, 0x7605a10400000000, 0x92ec833100000000, ++ 0x9a0fe30d00000000, 0x822a424900000000, 0x8ac9227500000000, ++ 0xb26000c000000000, 0xba8360fc00000000, 0xa2a6c1b800000000, ++ 0xaa45a18400000000, 0x93f2f50900000000, 0x9b11953500000000, ++ 0x8334347100000000, 0x8bd7544d00000000, 0xb37e76f800000000, ++ 0xbb9d16c400000000, 0xa3b8b78000000000, 0xab5bd7bc00000000, ++ 0x90d06f4100000000, 0x98330f7d00000000, 0x8016ae3900000000, ++ 0x88f5ce0500000000, 0xb05cecb000000000, 0xb8bf8c8c00000000, ++ 0xa09a2dc800000000, 0xa8794df400000000, 0x91ce197900000000, ++ 0x992d794500000000, 0x8108d80100000000, 0x89ebb83d00000000, ++ 0xb1429a8800000000, 0xb9a1fab400000000, 0xa1845bf000000000, ++ 0xa9673bcc00000000, 0x96945bd000000000, 0x9e773bec00000000, ++ 0x86529aa800000000, 0x8eb1fa9400000000, 0xb618d82100000000, ++ 0xbefbb81d00000000, 0xa6de195900000000, 0xae3d796500000000, ++ 0x978a2de800000000, 0x9f694dd400000000, 0x874cec9000000000, ++ 0x8faf8cac00000000, 0xb706ae1900000000, 0xbfe5ce2500000000, ++ 0xa7c06f6100000000, 0xaf230f5d00000000, 0x94a8b7a000000000, ++ 0x9c4bd79c00000000, 0x846e76d800000000, 0x8c8d16e400000000, ++ 0xb424345100000000, 0xbcc7546d00000000, 0xa4e2f52900000000, ++ 0xac01951500000000, 0x95b6c19800000000, 0x9d55a1a400000000, ++ 0x857000e000000000, 0x8d9360dc00000000, 0xb53a426900000000, ++ 0xbdd9225500000000, 0xa5fc831100000000, 0xad1fe32d00000000, ++ 0xdb1a422900000000, 0xd3f9221500000000, 0xcbdc835100000000, ++ 0xc33fe36d00000000, 0xfb96c1d800000000, 0xf375a1e400000000, ++ 0xeb5000a000000000, 0xe3b3609c00000000, 0xda04341100000000, ++ 0xd2e7542d00000000, 0xcac2f56900000000, 0xc221955500000000, ++ 0xfa88b7e000000000, 0xf26bd7dc00000000, 0xea4e769800000000, ++ 0xe2ad16a400000000, 0xd926ae5900000000, 0xd1c5ce6500000000, ++ 0xc9e06f2100000000, 0xc1030f1d00000000, 0xf9aa2da800000000, ++ 0xf1494d9400000000, 0xe96cecd000000000, 0xe18f8cec00000000, ++ 0xd838d86100000000, 0xd0dbb85d00000000, 0xc8fe191900000000, ++ 0xc01d792500000000, 0xf8b45b9000000000, 0xf0573bac00000000, ++ 0xe8729ae800000000, 0xe091fad400000000, 0xdf629ac800000000, ++ 0xd781faf400000000, 0xcfa45bb000000000, 0xc7473b8c00000000, ++ 0xffee193900000000, 0xf70d790500000000, 0xef28d84100000000, ++ 0xe7cbb87d00000000, 0xde7cecf000000000, 0xd69f8ccc00000000, ++ 0xceba2d8800000000, 0xc6594db400000000, 0xfef06f0100000000, ++ 0xf6130f3d00000000, 0xee36ae7900000000, 0xe6d5ce4500000000, ++ 0xdd5e76b800000000, 0xd5bd168400000000, 0xcd98b7c000000000, ++ 0xc57bd7fc00000000, 0xfdd2f54900000000, 0xf531957500000000, ++ 0xed14343100000000, 0xe5f7540d00000000, 0xdc40008000000000, ++ 0xd4a360bc00000000, 0xcc86c1f800000000, 0xc465a1c400000000, ++ 0xfccc837100000000, 0xf42fe34d00000000, 0xec0a420900000000, ++ 0xe4e9223500000000}, ++ {0x0000000000000000, 0xd1e8e70e00000000, 0xa2d1cf1d00000000, ++ 0x7339281300000000, 0x44a39f3b00000000, 0x954b783500000000, ++ 0xe672502600000000, 0x379ab72800000000, 0x88463f7700000000, ++ 0x59aed87900000000, 0x2a97f06a00000000, 0xfb7f176400000000, ++ 0xcce5a04c00000000, 0x1d0d474200000000, 0x6e346f5100000000, ++ 0xbfdc885f00000000, 0x108d7eee00000000, 0xc16599e000000000, ++ 0xb25cb1f300000000, 0x63b456fd00000000, 0x542ee1d500000000, ++ 0x85c606db00000000, 0xf6ff2ec800000000, 0x2717c9c600000000, ++ 0x98cb419900000000, 0x4923a69700000000, 0x3a1a8e8400000000, ++ 0xebf2698a00000000, 0xdc68dea200000000, 0x0d8039ac00000000, ++ 0x7eb911bf00000000, 0xaf51f6b100000000, 0x611c8c0700000000, ++ 0xb0f46b0900000000, 0xc3cd431a00000000, 0x1225a41400000000, ++ 0x25bf133c00000000, 0xf457f43200000000, 0x876edc2100000000, ++ 0x56863b2f00000000, 0xe95ab37000000000, 0x38b2547e00000000, ++ 0x4b8b7c6d00000000, 0x9a639b6300000000, 0xadf92c4b00000000, ++ 0x7c11cb4500000000, 0x0f28e35600000000, 0xdec0045800000000, ++ 0x7191f2e900000000, 0xa07915e700000000, 0xd3403df400000000, ++ 0x02a8dafa00000000, 0x35326dd200000000, 0xe4da8adc00000000, ++ 0x97e3a2cf00000000, 0x460b45c100000000, 0xf9d7cd9e00000000, ++ 0x283f2a9000000000, 0x5b06028300000000, 0x8aeee58d00000000, ++ 0xbd7452a500000000, 0x6c9cb5ab00000000, 0x1fa59db800000000, ++ 0xce4d7ab600000000, 0xc238180f00000000, 0x13d0ff0100000000, ++ 0x60e9d71200000000, 0xb101301c00000000, 0x869b873400000000, ++ 0x5773603a00000000, 0x244a482900000000, 0xf5a2af2700000000, ++ 0x4a7e277800000000, 0x9b96c07600000000, 0xe8afe86500000000, ++ 0x39470f6b00000000, 0x0eddb84300000000, 0xdf355f4d00000000, ++ 0xac0c775e00000000, 0x7de4905000000000, 0xd2b566e100000000, ++ 0x035d81ef00000000, 0x7064a9fc00000000, 0xa18c4ef200000000, ++ 0x9616f9da00000000, 0x47fe1ed400000000, 0x34c736c700000000, ++ 0xe52fd1c900000000, 0x5af3599600000000, 0x8b1bbe9800000000, ++ 0xf822968b00000000, 0x29ca718500000000, 0x1e50c6ad00000000, ++ 0xcfb821a300000000, 0xbc8109b000000000, 0x6d69eebe00000000, ++ 0xa324940800000000, 0x72cc730600000000, 0x01f55b1500000000, ++ 0xd01dbc1b00000000, 0xe7870b3300000000, 0x366fec3d00000000, ++ 0x4556c42e00000000, 0x94be232000000000, 0x2b62ab7f00000000, ++ 0xfa8a4c7100000000, 0x89b3646200000000, 0x585b836c00000000, ++ 0x6fc1344400000000, 0xbe29d34a00000000, 0xcd10fb5900000000, ++ 0x1cf81c5700000000, 0xb3a9eae600000000, 0x62410de800000000, ++ 0x117825fb00000000, 0xc090c2f500000000, 0xf70a75dd00000000, ++ 0x26e292d300000000, 0x55dbbac000000000, 0x84335dce00000000, ++ 0x3befd59100000000, 0xea07329f00000000, 0x993e1a8c00000000, ++ 0x48d6fd8200000000, 0x7f4c4aaa00000000, 0xaea4ada400000000, ++ 0xdd9d85b700000000, 0x0c7562b900000000, 0x8471301e00000000, ++ 0x5599d71000000000, 0x26a0ff0300000000, 0xf748180d00000000, ++ 0xc0d2af2500000000, 0x113a482b00000000, 0x6203603800000000, ++ 0xb3eb873600000000, 0x0c370f6900000000, 0xdddfe86700000000, ++ 0xaee6c07400000000, 0x7f0e277a00000000, 0x4894905200000000, ++ 0x997c775c00000000, 0xea455f4f00000000, 0x3badb84100000000, ++ 0x94fc4ef000000000, 0x4514a9fe00000000, 0x362d81ed00000000, ++ 0xe7c566e300000000, 0xd05fd1cb00000000, 0x01b736c500000000, ++ 0x728e1ed600000000, 0xa366f9d800000000, 0x1cba718700000000, ++ 0xcd52968900000000, 0xbe6bbe9a00000000, 0x6f83599400000000, ++ 0x5819eebc00000000, 0x89f109b200000000, 0xfac821a100000000, ++ 0x2b20c6af00000000, 0xe56dbc1900000000, 0x34855b1700000000, ++ 0x47bc730400000000, 0x9654940a00000000, 0xa1ce232200000000, ++ 0x7026c42c00000000, 0x031fec3f00000000, 0xd2f70b3100000000, ++ 0x6d2b836e00000000, 0xbcc3646000000000, 0xcffa4c7300000000, ++ 0x1e12ab7d00000000, 0x29881c5500000000, 0xf860fb5b00000000, ++ 0x8b59d34800000000, 0x5ab1344600000000, 0xf5e0c2f700000000, ++ 0x240825f900000000, 0x57310dea00000000, 0x86d9eae400000000, ++ 0xb1435dcc00000000, 0x60abbac200000000, 0x139292d100000000, ++ 0xc27a75df00000000, 0x7da6fd8000000000, 0xac4e1a8e00000000, ++ 0xdf77329d00000000, 0x0e9fd59300000000, 0x390562bb00000000, ++ 0xe8ed85b500000000, 0x9bd4ada600000000, 0x4a3c4aa800000000, ++ 0x4649281100000000, 0x97a1cf1f00000000, 0xe498e70c00000000, ++ 0x3570000200000000, 0x02eab72a00000000, 0xd302502400000000, ++ 0xa03b783700000000, 0x71d39f3900000000, 0xce0f176600000000, ++ 0x1fe7f06800000000, 0x6cded87b00000000, 0xbd363f7500000000, ++ 0x8aac885d00000000, 0x5b446f5300000000, 0x287d474000000000, ++ 0xf995a04e00000000, 0x56c456ff00000000, 0x872cb1f100000000, ++ 0xf41599e200000000, 0x25fd7eec00000000, 0x1267c9c400000000, ++ 0xc38f2eca00000000, 0xb0b606d900000000, 0x615ee1d700000000, ++ 0xde82698800000000, 0x0f6a8e8600000000, 0x7c53a69500000000, ++ 0xadbb419b00000000, 0x9a21f6b300000000, 0x4bc911bd00000000, ++ 0x38f039ae00000000, 0xe918dea000000000, 0x2755a41600000000, ++ 0xf6bd431800000000, 0x85846b0b00000000, 0x546c8c0500000000, ++ 0x63f63b2d00000000, 0xb21edc2300000000, 0xc127f43000000000, ++ 0x10cf133e00000000, 0xaf139b6100000000, 0x7efb7c6f00000000, ++ 0x0dc2547c00000000, 0xdc2ab37200000000, 0xebb0045a00000000, ++ 0x3a58e35400000000, 0x4961cb4700000000, 0x98892c4900000000, ++ 0x37d8daf800000000, 0xe6303df600000000, 0x950915e500000000, ++ 0x44e1f2eb00000000, 0x737b45c300000000, 0xa293a2cd00000000, ++ 0xd1aa8ade00000000, 0x00426dd000000000, 0xbf9ee58f00000000, ++ 0x6e76028100000000, 0x1d4f2a9200000000, 0xcca7cd9c00000000, ++ 0xfb3d7ab400000000, 0x2ad59dba00000000, 0x59ecb5a900000000, ++ 0x880452a700000000}, ++ {0x0000000000000000, 0xaa05daf100000000, 0x150dc53800000000, ++ 0xbf081fc900000000, 0x2a1a8a7100000000, 0x801f508000000000, ++ 0x3f174f4900000000, 0x951295b800000000, 0x543414e300000000, ++ 0xfe31ce1200000000, 0x4139d1db00000000, 0xeb3c0b2a00000000, ++ 0x7e2e9e9200000000, 0xd42b446300000000, 0x6b235baa00000000, ++ 0xc126815b00000000, 0xe96e591d00000000, 0x436b83ec00000000, ++ 0xfc639c2500000000, 0x566646d400000000, 0xc374d36c00000000, ++ 0x6971099d00000000, 0xd679165400000000, 0x7c7ccca500000000, ++ 0xbd5a4dfe00000000, 0x175f970f00000000, 0xa85788c600000000, ++ 0x0252523700000000, 0x9740c78f00000000, 0x3d451d7e00000000, ++ 0x824d02b700000000, 0x2848d84600000000, 0xd2ddb23a00000000, ++ 0x78d868cb00000000, 0xc7d0770200000000, 0x6dd5adf300000000, ++ 0xf8c7384b00000000, 0x52c2e2ba00000000, 0xedcafd7300000000, ++ 0x47cf278200000000, 0x86e9a6d900000000, 0x2cec7c2800000000, ++ 0x93e463e100000000, 0x39e1b91000000000, 0xacf32ca800000000, ++ 0x06f6f65900000000, 0xb9fee99000000000, 0x13fb336100000000, ++ 0x3bb3eb2700000000, 0x91b631d600000000, 0x2ebe2e1f00000000, ++ 0x84bbf4ee00000000, 0x11a9615600000000, 0xbbacbba700000000, ++ 0x04a4a46e00000000, 0xaea17e9f00000000, 0x6f87ffc400000000, ++ 0xc582253500000000, 0x7a8a3afc00000000, 0xd08fe00d00000000, ++ 0x459d75b500000000, 0xef98af4400000000, 0x5090b08d00000000, ++ 0xfa956a7c00000000, 0xa4bb657500000000, 0x0ebebf8400000000, ++ 0xb1b6a04d00000000, 0x1bb37abc00000000, 0x8ea1ef0400000000, ++ 0x24a435f500000000, 0x9bac2a3c00000000, 0x31a9f0cd00000000, ++ 0xf08f719600000000, 0x5a8aab6700000000, 0xe582b4ae00000000, ++ 0x4f876e5f00000000, 0xda95fbe700000000, 0x7090211600000000, ++ 0xcf983edf00000000, 0x659de42e00000000, 0x4dd53c6800000000, ++ 0xe7d0e69900000000, 0x58d8f95000000000, 0xf2dd23a100000000, ++ 0x67cfb61900000000, 0xcdca6ce800000000, 0x72c2732100000000, ++ 0xd8c7a9d000000000, 0x19e1288b00000000, 0xb3e4f27a00000000, ++ 0x0cecedb300000000, 0xa6e9374200000000, 0x33fba2fa00000000, ++ 0x99fe780b00000000, 0x26f667c200000000, 0x8cf3bd3300000000, ++ 0x7666d74f00000000, 0xdc630dbe00000000, 0x636b127700000000, ++ 0xc96ec88600000000, 0x5c7c5d3e00000000, 0xf67987cf00000000, ++ 0x4971980600000000, 0xe37442f700000000, 0x2252c3ac00000000, ++ 0x8857195d00000000, 0x375f069400000000, 0x9d5adc6500000000, ++ 0x084849dd00000000, 0xa24d932c00000000, 0x1d458ce500000000, ++ 0xb740561400000000, 0x9f088e5200000000, 0x350d54a300000000, ++ 0x8a054b6a00000000, 0x2000919b00000000, 0xb512042300000000, ++ 0x1f17ded200000000, 0xa01fc11b00000000, 0x0a1a1bea00000000, ++ 0xcb3c9ab100000000, 0x6139404000000000, 0xde315f8900000000, ++ 0x7434857800000000, 0xe12610c000000000, 0x4b23ca3100000000, ++ 0xf42bd5f800000000, 0x5e2e0f0900000000, 0x4877cbea00000000, ++ 0xe272111b00000000, 0x5d7a0ed200000000, 0xf77fd42300000000, ++ 0x626d419b00000000, 0xc8689b6a00000000, 0x776084a300000000, ++ 0xdd655e5200000000, 0x1c43df0900000000, 0xb64605f800000000, ++ 0x094e1a3100000000, 0xa34bc0c000000000, 0x3659557800000000, ++ 0x9c5c8f8900000000, 0x2354904000000000, 0x89514ab100000000, ++ 0xa11992f700000000, 0x0b1c480600000000, 0xb41457cf00000000, ++ 0x1e118d3e00000000, 0x8b03188600000000, 0x2106c27700000000, ++ 0x9e0eddbe00000000, 0x340b074f00000000, 0xf52d861400000000, ++ 0x5f285ce500000000, 0xe020432c00000000, 0x4a2599dd00000000, ++ 0xdf370c6500000000, 0x7532d69400000000, 0xca3ac95d00000000, ++ 0x603f13ac00000000, 0x9aaa79d000000000, 0x30afa32100000000, ++ 0x8fa7bce800000000, 0x25a2661900000000, 0xb0b0f3a100000000, ++ 0x1ab5295000000000, 0xa5bd369900000000, 0x0fb8ec6800000000, ++ 0xce9e6d3300000000, 0x649bb7c200000000, 0xdb93a80b00000000, ++ 0x719672fa00000000, 0xe484e74200000000, 0x4e813db300000000, ++ 0xf189227a00000000, 0x5b8cf88b00000000, 0x73c420cd00000000, ++ 0xd9c1fa3c00000000, 0x66c9e5f500000000, 0xcccc3f0400000000, ++ 0x59deaabc00000000, 0xf3db704d00000000, 0x4cd36f8400000000, ++ 0xe6d6b57500000000, 0x27f0342e00000000, 0x8df5eedf00000000, ++ 0x32fdf11600000000, 0x98f82be700000000, 0x0deabe5f00000000, ++ 0xa7ef64ae00000000, 0x18e77b6700000000, 0xb2e2a19600000000, ++ 0xecccae9f00000000, 0x46c9746e00000000, 0xf9c16ba700000000, ++ 0x53c4b15600000000, 0xc6d624ee00000000, 0x6cd3fe1f00000000, ++ 0xd3dbe1d600000000, 0x79de3b2700000000, 0xb8f8ba7c00000000, ++ 0x12fd608d00000000, 0xadf57f4400000000, 0x07f0a5b500000000, ++ 0x92e2300d00000000, 0x38e7eafc00000000, 0x87eff53500000000, ++ 0x2dea2fc400000000, 0x05a2f78200000000, 0xafa72d7300000000, ++ 0x10af32ba00000000, 0xbaaae84b00000000, 0x2fb87df300000000, ++ 0x85bda70200000000, 0x3ab5b8cb00000000, 0x90b0623a00000000, ++ 0x5196e36100000000, 0xfb93399000000000, 0x449b265900000000, ++ 0xee9efca800000000, 0x7b8c691000000000, 0xd189b3e100000000, ++ 0x6e81ac2800000000, 0xc48476d900000000, 0x3e111ca500000000, ++ 0x9414c65400000000, 0x2b1cd99d00000000, 0x8119036c00000000, ++ 0x140b96d400000000, 0xbe0e4c2500000000, 0x010653ec00000000, ++ 0xab03891d00000000, 0x6a25084600000000, 0xc020d2b700000000, ++ 0x7f28cd7e00000000, 0xd52d178f00000000, 0x403f823700000000, ++ 0xea3a58c600000000, 0x5532470f00000000, 0xff379dfe00000000, ++ 0xd77f45b800000000, 0x7d7a9f4900000000, 0xc272808000000000, ++ 0x68775a7100000000, 0xfd65cfc900000000, 0x5760153800000000, ++ 0xe8680af100000000, 0x426dd00000000000, 0x834b515b00000000, ++ 0x294e8baa00000000, 0x9646946300000000, 0x3c434e9200000000, ++ 0xa951db2a00000000, 0x035401db00000000, 0xbc5c1e1200000000, ++ 0x1659c4e300000000}}; ++ ++#else /* W == 4 */ ++ ++static const uint32_t crc_braid_table[][256] = { ++ {0x00000000, 0xae689191, 0x87a02563, 0x29c8b4f2, 0xd4314c87, ++ 0x7a59dd16, 0x539169e4, 0xfdf9f875, 0x73139f4f, 0xdd7b0ede, ++ 0xf4b3ba2c, 0x5adb2bbd, 0xa722d3c8, 0x094a4259, 0x2082f6ab, ++ 0x8eea673a, 0xe6273e9e, 0x484faf0f, 0x61871bfd, 0xcfef8a6c, ++ 0x32167219, 0x9c7ee388, 0xb5b6577a, 0x1bdec6eb, 0x9534a1d1, ++ 0x3b5c3040, 0x129484b2, 0xbcfc1523, 0x4105ed56, 0xef6d7cc7, ++ 0xc6a5c835, 0x68cd59a4, 0x173f7b7d, 0xb957eaec, 0x909f5e1e, ++ 0x3ef7cf8f, 0xc30e37fa, 0x6d66a66b, 0x44ae1299, 0xeac68308, ++ 0x642ce432, 0xca4475a3, 0xe38cc151, 0x4de450c0, 0xb01da8b5, ++ 0x1e753924, 0x37bd8dd6, 0x99d51c47, 0xf11845e3, 0x5f70d472, ++ 0x76b86080, 0xd8d0f111, 0x25290964, 0x8b4198f5, 0xa2892c07, ++ 0x0ce1bd96, 0x820bdaac, 0x2c634b3d, 0x05abffcf, 0xabc36e5e, ++ 0x563a962b, 0xf85207ba, 0xd19ab348, 0x7ff222d9, 0x2e7ef6fa, ++ 0x8016676b, 0xa9ded399, 0x07b64208, 0xfa4fba7d, 0x54272bec, ++ 0x7def9f1e, 0xd3870e8f, 0x5d6d69b5, 0xf305f824, 0xdacd4cd6, ++ 0x74a5dd47, 0x895c2532, 0x2734b4a3, 0x0efc0051, 0xa09491c0, ++ 0xc859c864, 0x663159f5, 0x4ff9ed07, 0xe1917c96, 0x1c6884e3, ++ 0xb2001572, 0x9bc8a180, 0x35a03011, 0xbb4a572b, 0x1522c6ba, ++ 0x3cea7248, 0x9282e3d9, 0x6f7b1bac, 0xc1138a3d, 0xe8db3ecf, ++ 0x46b3af5e, 0x39418d87, 0x97291c16, 0xbee1a8e4, 0x10893975, ++ 0xed70c100, 0x43185091, 0x6ad0e463, 0xc4b875f2, 0x4a5212c8, ++ 0xe43a8359, 0xcdf237ab, 0x639aa63a, 0x9e635e4f, 0x300bcfde, ++ 0x19c37b2c, 0xb7abeabd, 0xdf66b319, 0x710e2288, 0x58c6967a, ++ 0xf6ae07eb, 0x0b57ff9e, 0xa53f6e0f, 0x8cf7dafd, 0x229f4b6c, ++ 0xac752c56, 0x021dbdc7, 0x2bd50935, 0x85bd98a4, 0x784460d1, ++ 0xd62cf140, 0xffe445b2, 0x518cd423, 0x5cfdedf4, 0xf2957c65, ++ 0xdb5dc897, 0x75355906, 0x88cca173, 0x26a430e2, 0x0f6c8410, ++ 0xa1041581, 0x2fee72bb, 0x8186e32a, 0xa84e57d8, 0x0626c649, ++ 0xfbdf3e3c, 0x55b7afad, 0x7c7f1b5f, 0xd2178ace, 0xbadad36a, ++ 0x14b242fb, 0x3d7af609, 0x93126798, 0x6eeb9fed, 0xc0830e7c, ++ 0xe94bba8e, 0x47232b1f, 0xc9c94c25, 0x67a1ddb4, 0x4e696946, ++ 0xe001f8d7, 0x1df800a2, 0xb3909133, 0x9a5825c1, 0x3430b450, ++ 0x4bc29689, 0xe5aa0718, 0xcc62b3ea, 0x620a227b, 0x9ff3da0e, ++ 0x319b4b9f, 0x1853ff6d, 0xb63b6efc, 0x38d109c6, 0x96b99857, ++ 0xbf712ca5, 0x1119bd34, 0xece04541, 0x4288d4d0, 0x6b406022, ++ 0xc528f1b3, 0xade5a817, 0x038d3986, 0x2a458d74, 0x842d1ce5, ++ 0x79d4e490, 0xd7bc7501, 0xfe74c1f3, 0x501c5062, 0xdef63758, ++ 0x709ea6c9, 0x5956123b, 0xf73e83aa, 0x0ac77bdf, 0xa4afea4e, ++ 0x8d675ebc, 0x230fcf2d, 0x72831b0e, 0xdceb8a9f, 0xf5233e6d, ++ 0x5b4baffc, 0xa6b25789, 0x08dac618, 0x211272ea, 0x8f7ae37b, ++ 0x01908441, 0xaff815d0, 0x8630a122, 0x285830b3, 0xd5a1c8c6, ++ 0x7bc95957, 0x5201eda5, 0xfc697c34, 0x94a42590, 0x3accb401, ++ 0x130400f3, 0xbd6c9162, 0x40956917, 0xeefdf886, 0xc7354c74, ++ 0x695ddde5, 0xe7b7badf, 0x49df2b4e, 0x60179fbc, 0xce7f0e2d, ++ 0x3386f658, 0x9dee67c9, 0xb426d33b, 0x1a4e42aa, 0x65bc6073, ++ 0xcbd4f1e2, 0xe21c4510, 0x4c74d481, 0xb18d2cf4, 0x1fe5bd65, ++ 0x362d0997, 0x98459806, 0x16afff3c, 0xb8c76ead, 0x910fda5f, ++ 0x3f674bce, 0xc29eb3bb, 0x6cf6222a, 0x453e96d8, 0xeb560749, ++ 0x839b5eed, 0x2df3cf7c, 0x043b7b8e, 0xaa53ea1f, 0x57aa126a, ++ 0xf9c283fb, 0xd00a3709, 0x7e62a698, 0xf088c1a2, 0x5ee05033, ++ 0x7728e4c1, 0xd9407550, 0x24b98d25, 0x8ad11cb4, 0xa319a846, ++ 0x0d7139d7}, ++ {0x00000000, 0xb9fbdbe8, 0xa886b191, 0x117d6a79, 0x8a7c6563, ++ 0x3387be8b, 0x22fad4f2, 0x9b010f1a, 0xcf89cc87, 0x7672176f, ++ 0x670f7d16, 0xdef4a6fe, 0x45f5a9e4, 0xfc0e720c, 0xed731875, ++ 0x5488c39d, 0x44629f4f, 0xfd9944a7, 0xece42ede, 0x551ff536, ++ 0xce1efa2c, 0x77e521c4, 0x66984bbd, 0xdf639055, 0x8beb53c8, ++ 0x32108820, 0x236de259, 0x9a9639b1, 0x019736ab, 0xb86ced43, ++ 0xa911873a, 0x10ea5cd2, 0x88c53e9e, 0x313ee576, 0x20438f0f, ++ 0x99b854e7, 0x02b95bfd, 0xbb428015, 0xaa3fea6c, 0x13c43184, ++ 0x474cf219, 0xfeb729f1, 0xefca4388, 0x56319860, 0xcd30977a, ++ 0x74cb4c92, 0x65b626eb, 0xdc4dfd03, 0xcca7a1d1, 0x755c7a39, ++ 0x64211040, 0xdddacba8, 0x46dbc4b2, 0xff201f5a, 0xee5d7523, ++ 0x57a6aecb, 0x032e6d56, 0xbad5b6be, 0xaba8dcc7, 0x1253072f, ++ 0x89520835, 0x30a9d3dd, 0x21d4b9a4, 0x982f624c, 0xcafb7b7d, ++ 0x7300a095, 0x627dcaec, 0xdb861104, 0x40871e1e, 0xf97cc5f6, ++ 0xe801af8f, 0x51fa7467, 0x0572b7fa, 0xbc896c12, 0xadf4066b, ++ 0x140fdd83, 0x8f0ed299, 0x36f50971, 0x27886308, 0x9e73b8e0, ++ 0x8e99e432, 0x37623fda, 0x261f55a3, 0x9fe48e4b, 0x04e58151, ++ 0xbd1e5ab9, 0xac6330c0, 0x1598eb28, 0x411028b5, 0xf8ebf35d, ++ 0xe9969924, 0x506d42cc, 0xcb6c4dd6, 0x7297963e, 0x63eafc47, ++ 0xda1127af, 0x423e45e3, 0xfbc59e0b, 0xeab8f472, 0x53432f9a, ++ 0xc8422080, 0x71b9fb68, 0x60c49111, 0xd93f4af9, 0x8db78964, ++ 0x344c528c, 0x253138f5, 0x9ccae31d, 0x07cbec07, 0xbe3037ef, ++ 0xaf4d5d96, 0x16b6867e, 0x065cdaac, 0xbfa70144, 0xaeda6b3d, ++ 0x1721b0d5, 0x8c20bfcf, 0x35db6427, 0x24a60e5e, 0x9d5dd5b6, ++ 0xc9d5162b, 0x702ecdc3, 0x6153a7ba, 0xd8a87c52, 0x43a97348, ++ 0xfa52a8a0, 0xeb2fc2d9, 0x52d41931, 0x4e87f0bb, 0xf77c2b53, ++ 0xe601412a, 0x5ffa9ac2, 0xc4fb95d8, 0x7d004e30, 0x6c7d2449, ++ 0xd586ffa1, 0x810e3c3c, 0x38f5e7d4, 0x29888dad, 0x90735645, ++ 0x0b72595f, 0xb28982b7, 0xa3f4e8ce, 0x1a0f3326, 0x0ae56ff4, ++ 0xb31eb41c, 0xa263de65, 0x1b98058d, 0x80990a97, 0x3962d17f, ++ 0x281fbb06, 0x91e460ee, 0xc56ca373, 0x7c97789b, 0x6dea12e2, ++ 0xd411c90a, 0x4f10c610, 0xf6eb1df8, 0xe7967781, 0x5e6dac69, ++ 0xc642ce25, 0x7fb915cd, 0x6ec47fb4, 0xd73fa45c, 0x4c3eab46, ++ 0xf5c570ae, 0xe4b81ad7, 0x5d43c13f, 0x09cb02a2, 0xb030d94a, ++ 0xa14db333, 0x18b668db, 0x83b767c1, 0x3a4cbc29, 0x2b31d650, ++ 0x92ca0db8, 0x8220516a, 0x3bdb8a82, 0x2aa6e0fb, 0x935d3b13, ++ 0x085c3409, 0xb1a7efe1, 0xa0da8598, 0x19215e70, 0x4da99ded, ++ 0xf4524605, 0xe52f2c7c, 0x5cd4f794, 0xc7d5f88e, 0x7e2e2366, ++ 0x6f53491f, 0xd6a892f7, 0x847c8bc6, 0x3d87502e, 0x2cfa3a57, ++ 0x9501e1bf, 0x0e00eea5, 0xb7fb354d, 0xa6865f34, 0x1f7d84dc, ++ 0x4bf54741, 0xf20e9ca9, 0xe373f6d0, 0x5a882d38, 0xc1892222, ++ 0x7872f9ca, 0x690f93b3, 0xd0f4485b, 0xc01e1489, 0x79e5cf61, ++ 0x6898a518, 0xd1637ef0, 0x4a6271ea, 0xf399aa02, 0xe2e4c07b, ++ 0x5b1f1b93, 0x0f97d80e, 0xb66c03e6, 0xa711699f, 0x1eeab277, ++ 0x85ebbd6d, 0x3c106685, 0x2d6d0cfc, 0x9496d714, 0x0cb9b558, ++ 0xb5426eb0, 0xa43f04c9, 0x1dc4df21, 0x86c5d03b, 0x3f3e0bd3, ++ 0x2e4361aa, 0x97b8ba42, 0xc33079df, 0x7acba237, 0x6bb6c84e, ++ 0xd24d13a6, 0x494c1cbc, 0xf0b7c754, 0xe1caad2d, 0x583176c5, ++ 0x48db2a17, 0xf120f1ff, 0xe05d9b86, 0x59a6406e, 0xc2a74f74, ++ 0x7b5c949c, 0x6a21fee5, 0xd3da250d, 0x8752e690, 0x3ea93d78, ++ 0x2fd45701, 0x962f8ce9, 0x0d2e83f3, 0xb4d5581b, 0xa5a83262, ++ 0x1c53e98a}, ++ {0x00000000, 0x9d0fe176, 0xe16ec4ad, 0x7c6125db, 0x19ac8f1b, ++ 0x84a36e6d, 0xf8c24bb6, 0x65cdaac0, 0x33591e36, 0xae56ff40, ++ 0xd237da9b, 0x4f383bed, 0x2af5912d, 0xb7fa705b, 0xcb9b5580, ++ 0x5694b4f6, 0x66b23c6c, 0xfbbddd1a, 0x87dcf8c1, 0x1ad319b7, ++ 0x7f1eb377, 0xe2115201, 0x9e7077da, 0x037f96ac, 0x55eb225a, ++ 0xc8e4c32c, 0xb485e6f7, 0x298a0781, 0x4c47ad41, 0xd1484c37, ++ 0xad2969ec, 0x3026889a, 0xcd6478d8, 0x506b99ae, 0x2c0abc75, ++ 0xb1055d03, 0xd4c8f7c3, 0x49c716b5, 0x35a6336e, 0xa8a9d218, ++ 0xfe3d66ee, 0x63328798, 0x1f53a243, 0x825c4335, 0xe791e9f5, ++ 0x7a9e0883, 0x06ff2d58, 0x9bf0cc2e, 0xabd644b4, 0x36d9a5c2, ++ 0x4ab88019, 0xd7b7616f, 0xb27acbaf, 0x2f752ad9, 0x53140f02, ++ 0xce1bee74, 0x988f5a82, 0x0580bbf4, 0x79e19e2f, 0xe4ee7f59, ++ 0x8123d599, 0x1c2c34ef, 0x604d1134, 0xfd42f042, 0x41b9f7f1, ++ 0xdcb61687, 0xa0d7335c, 0x3dd8d22a, 0x581578ea, 0xc51a999c, ++ 0xb97bbc47, 0x24745d31, 0x72e0e9c7, 0xefef08b1, 0x938e2d6a, ++ 0x0e81cc1c, 0x6b4c66dc, 0xf64387aa, 0x8a22a271, 0x172d4307, ++ 0x270bcb9d, 0xba042aeb, 0xc6650f30, 0x5b6aee46, 0x3ea74486, ++ 0xa3a8a5f0, 0xdfc9802b, 0x42c6615d, 0x1452d5ab, 0x895d34dd, ++ 0xf53c1106, 0x6833f070, 0x0dfe5ab0, 0x90f1bbc6, 0xec909e1d, ++ 0x719f7f6b, 0x8cdd8f29, 0x11d26e5f, 0x6db34b84, 0xf0bcaaf2, ++ 0x95710032, 0x087ee144, 0x741fc49f, 0xe91025e9, 0xbf84911f, ++ 0x228b7069, 0x5eea55b2, 0xc3e5b4c4, 0xa6281e04, 0x3b27ff72, ++ 0x4746daa9, 0xda493bdf, 0xea6fb345, 0x77605233, 0x0b0177e8, ++ 0x960e969e, 0xf3c33c5e, 0x6eccdd28, 0x12adf8f3, 0x8fa21985, ++ 0xd936ad73, 0x44394c05, 0x385869de, 0xa55788a8, 0xc09a2268, ++ 0x5d95c31e, 0x21f4e6c5, 0xbcfb07b3, 0x8373efe2, 0x1e7c0e94, ++ 0x621d2b4f, 0xff12ca39, 0x9adf60f9, 0x07d0818f, 0x7bb1a454, ++ 0xe6be4522, 0xb02af1d4, 0x2d2510a2, 0x51443579, 0xcc4bd40f, ++ 0xa9867ecf, 0x34899fb9, 0x48e8ba62, 0xd5e75b14, 0xe5c1d38e, ++ 0x78ce32f8, 0x04af1723, 0x99a0f655, 0xfc6d5c95, 0x6162bde3, ++ 0x1d039838, 0x800c794e, 0xd698cdb8, 0x4b972cce, 0x37f60915, ++ 0xaaf9e863, 0xcf3442a3, 0x523ba3d5, 0x2e5a860e, 0xb3556778, ++ 0x4e17973a, 0xd318764c, 0xaf795397, 0x3276b2e1, 0x57bb1821, ++ 0xcab4f957, 0xb6d5dc8c, 0x2bda3dfa, 0x7d4e890c, 0xe041687a, ++ 0x9c204da1, 0x012facd7, 0x64e20617, 0xf9ede761, 0x858cc2ba, ++ 0x188323cc, 0x28a5ab56, 0xb5aa4a20, 0xc9cb6ffb, 0x54c48e8d, ++ 0x3109244d, 0xac06c53b, 0xd067e0e0, 0x4d680196, 0x1bfcb560, ++ 0x86f35416, 0xfa9271cd, 0x679d90bb, 0x02503a7b, 0x9f5fdb0d, ++ 0xe33efed6, 0x7e311fa0, 0xc2ca1813, 0x5fc5f965, 0x23a4dcbe, ++ 0xbeab3dc8, 0xdb669708, 0x4669767e, 0x3a0853a5, 0xa707b2d3, ++ 0xf1930625, 0x6c9ce753, 0x10fdc288, 0x8df223fe, 0xe83f893e, ++ 0x75306848, 0x09514d93, 0x945eace5, 0xa478247f, 0x3977c509, ++ 0x4516e0d2, 0xd81901a4, 0xbdd4ab64, 0x20db4a12, 0x5cba6fc9, ++ 0xc1b58ebf, 0x97213a49, 0x0a2edb3f, 0x764ffee4, 0xeb401f92, ++ 0x8e8db552, 0x13825424, 0x6fe371ff, 0xf2ec9089, 0x0fae60cb, ++ 0x92a181bd, 0xeec0a466, 0x73cf4510, 0x1602efd0, 0x8b0d0ea6, ++ 0xf76c2b7d, 0x6a63ca0b, 0x3cf77efd, 0xa1f89f8b, 0xdd99ba50, ++ 0x40965b26, 0x255bf1e6, 0xb8541090, 0xc435354b, 0x593ad43d, ++ 0x691c5ca7, 0xf413bdd1, 0x8872980a, 0x157d797c, 0x70b0d3bc, ++ 0xedbf32ca, 0x91de1711, 0x0cd1f667, 0x5a454291, 0xc74aa3e7, ++ 0xbb2b863c, 0x2624674a, 0x43e9cd8a, 0xdee62cfc, 0xa2870927, ++ 0x3f88e851}, ++ {0x00000000, 0xdd96d985, 0x605cb54b, 0xbdca6cce, 0xc0b96a96, ++ 0x1d2fb313, 0xa0e5dfdd, 0x7d730658, 0x5a03d36d, 0x87950ae8, ++ 0x3a5f6626, 0xe7c9bfa3, 0x9abab9fb, 0x472c607e, 0xfae60cb0, ++ 0x2770d535, 0xb407a6da, 0x69917f5f, 0xd45b1391, 0x09cdca14, ++ 0x74becc4c, 0xa92815c9, 0x14e27907, 0xc974a082, 0xee0475b7, ++ 0x3392ac32, 0x8e58c0fc, 0x53ce1979, 0x2ebd1f21, 0xf32bc6a4, ++ 0x4ee1aa6a, 0x937773ef, 0xb37e4bf5, 0x6ee89270, 0xd322febe, ++ 0x0eb4273b, 0x73c72163, 0xae51f8e6, 0x139b9428, 0xce0d4dad, ++ 0xe97d9898, 0x34eb411d, 0x89212dd3, 0x54b7f456, 0x29c4f20e, ++ 0xf4522b8b, 0x49984745, 0x940e9ec0, 0x0779ed2f, 0xdaef34aa, ++ 0x67255864, 0xbab381e1, 0xc7c087b9, 0x1a565e3c, 0xa79c32f2, ++ 0x7a0aeb77, 0x5d7a3e42, 0x80ece7c7, 0x3d268b09, 0xe0b0528c, ++ 0x9dc354d4, 0x40558d51, 0xfd9fe19f, 0x2009381a, 0xbd8d91ab, ++ 0x601b482e, 0xddd124e0, 0x0047fd65, 0x7d34fb3d, 0xa0a222b8, ++ 0x1d684e76, 0xc0fe97f3, 0xe78e42c6, 0x3a189b43, 0x87d2f78d, ++ 0x5a442e08, 0x27372850, 0xfaa1f1d5, 0x476b9d1b, 0x9afd449e, ++ 0x098a3771, 0xd41ceef4, 0x69d6823a, 0xb4405bbf, 0xc9335de7, ++ 0x14a58462, 0xa96fe8ac, 0x74f93129, 0x5389e41c, 0x8e1f3d99, ++ 0x33d55157, 0xee4388d2, 0x93308e8a, 0x4ea6570f, 0xf36c3bc1, ++ 0x2efae244, 0x0ef3da5e, 0xd36503db, 0x6eaf6f15, 0xb339b690, ++ 0xce4ab0c8, 0x13dc694d, 0xae160583, 0x7380dc06, 0x54f00933, ++ 0x8966d0b6, 0x34acbc78, 0xe93a65fd, 0x944963a5, 0x49dfba20, ++ 0xf415d6ee, 0x29830f6b, 0xbaf47c84, 0x6762a501, 0xdaa8c9cf, ++ 0x073e104a, 0x7a4d1612, 0xa7dbcf97, 0x1a11a359, 0xc7877adc, ++ 0xe0f7afe9, 0x3d61766c, 0x80ab1aa2, 0x5d3dc327, 0x204ec57f, ++ 0xfdd81cfa, 0x40127034, 0x9d84a9b1, 0xa06a2517, 0x7dfcfc92, ++ 0xc036905c, 0x1da049d9, 0x60d34f81, 0xbd459604, 0x008ffaca, ++ 0xdd19234f, 0xfa69f67a, 0x27ff2fff, 0x9a354331, 0x47a39ab4, ++ 0x3ad09cec, 0xe7464569, 0x5a8c29a7, 0x871af022, 0x146d83cd, ++ 0xc9fb5a48, 0x74313686, 0xa9a7ef03, 0xd4d4e95b, 0x094230de, ++ 0xb4885c10, 0x691e8595, 0x4e6e50a0, 0x93f88925, 0x2e32e5eb, ++ 0xf3a43c6e, 0x8ed73a36, 0x5341e3b3, 0xee8b8f7d, 0x331d56f8, ++ 0x13146ee2, 0xce82b767, 0x7348dba9, 0xaede022c, 0xd3ad0474, ++ 0x0e3bddf1, 0xb3f1b13f, 0x6e6768ba, 0x4917bd8f, 0x9481640a, ++ 0x294b08c4, 0xf4ddd141, 0x89aed719, 0x54380e9c, 0xe9f26252, ++ 0x3464bbd7, 0xa713c838, 0x7a8511bd, 0xc74f7d73, 0x1ad9a4f6, ++ 0x67aaa2ae, 0xba3c7b2b, 0x07f617e5, 0xda60ce60, 0xfd101b55, ++ 0x2086c2d0, 0x9d4cae1e, 0x40da779b, 0x3da971c3, 0xe03fa846, ++ 0x5df5c488, 0x80631d0d, 0x1de7b4bc, 0xc0716d39, 0x7dbb01f7, ++ 0xa02dd872, 0xdd5ede2a, 0x00c807af, 0xbd026b61, 0x6094b2e4, ++ 0x47e467d1, 0x9a72be54, 0x27b8d29a, 0xfa2e0b1f, 0x875d0d47, ++ 0x5acbd4c2, 0xe701b80c, 0x3a976189, 0xa9e01266, 0x7476cbe3, ++ 0xc9bca72d, 0x142a7ea8, 0x695978f0, 0xb4cfa175, 0x0905cdbb, ++ 0xd493143e, 0xf3e3c10b, 0x2e75188e, 0x93bf7440, 0x4e29adc5, ++ 0x335aab9d, 0xeecc7218, 0x53061ed6, 0x8e90c753, 0xae99ff49, ++ 0x730f26cc, 0xcec54a02, 0x13539387, 0x6e2095df, 0xb3b64c5a, ++ 0x0e7c2094, 0xd3eaf911, 0xf49a2c24, 0x290cf5a1, 0x94c6996f, ++ 0x495040ea, 0x342346b2, 0xe9b59f37, 0x547ff3f9, 0x89e92a7c, ++ 0x1a9e5993, 0xc7088016, 0x7ac2ecd8, 0xa754355d, 0xda273305, ++ 0x07b1ea80, 0xba7b864e, 0x67ed5fcb, 0x409d8afe, 0x9d0b537b, ++ 0x20c13fb5, 0xfd57e630, 0x8024e068, 0x5db239ed, 0xe0785523, ++ 0x3dee8ca6}}; ++ ++static const z_word_t crc_braid_big_table[][256] = { ++ {0x00000000, 0x85d996dd, 0x4bb55c60, 0xce6ccabd, 0x966ab9c0, ++ 0x13b32f1d, 0xdddfe5a0, 0x5806737d, 0x6dd3035a, 0xe80a9587, ++ 0x26665f3a, 0xa3bfc9e7, 0xfbb9ba9a, 0x7e602c47, 0xb00ce6fa, ++ 0x35d57027, 0xdaa607b4, 0x5f7f9169, 0x91135bd4, 0x14cacd09, ++ 0x4cccbe74, 0xc91528a9, 0x0779e214, 0x82a074c9, 0xb77504ee, ++ 0x32ac9233, 0xfcc0588e, 0x7919ce53, 0x211fbd2e, 0xa4c62bf3, ++ 0x6aaae14e, 0xef737793, 0xf54b7eb3, 0x7092e86e, 0xbefe22d3, ++ 0x3b27b40e, 0x6321c773, 0xe6f851ae, 0x28949b13, 0xad4d0dce, ++ 0x98987de9, 0x1d41eb34, 0xd32d2189, 0x56f4b754, 0x0ef2c429, ++ 0x8b2b52f4, 0x45479849, 0xc09e0e94, 0x2fed7907, 0xaa34efda, ++ 0x64582567, 0xe181b3ba, 0xb987c0c7, 0x3c5e561a, 0xf2329ca7, ++ 0x77eb0a7a, 0x423e7a5d, 0xc7e7ec80, 0x098b263d, 0x8c52b0e0, ++ 0xd454c39d, 0x518d5540, 0x9fe19ffd, 0x1a380920, 0xab918dbd, ++ 0x2e481b60, 0xe024d1dd, 0x65fd4700, 0x3dfb347d, 0xb822a2a0, ++ 0x764e681d, 0xf397fec0, 0xc6428ee7, 0x439b183a, 0x8df7d287, ++ 0x082e445a, 0x50283727, 0xd5f1a1fa, 0x1b9d6b47, 0x9e44fd9a, ++ 0x71378a09, 0xf4ee1cd4, 0x3a82d669, 0xbf5b40b4, 0xe75d33c9, ++ 0x6284a514, 0xace86fa9, 0x2931f974, 0x1ce48953, 0x993d1f8e, ++ 0x5751d533, 0xd28843ee, 0x8a8e3093, 0x0f57a64e, 0xc13b6cf3, ++ 0x44e2fa2e, 0x5edaf30e, 0xdb0365d3, 0x156faf6e, 0x90b639b3, ++ 0xc8b04ace, 0x4d69dc13, 0x830516ae, 0x06dc8073, 0x3309f054, ++ 0xb6d06689, 0x78bcac34, 0xfd653ae9, 0xa5634994, 0x20badf49, ++ 0xeed615f4, 0x6b0f8329, 0x847cf4ba, 0x01a56267, 0xcfc9a8da, ++ 0x4a103e07, 0x12164d7a, 0x97cfdba7, 0x59a3111a, 0xdc7a87c7, ++ 0xe9aff7e0, 0x6c76613d, 0xa21aab80, 0x27c33d5d, 0x7fc54e20, ++ 0xfa1cd8fd, 0x34701240, 0xb1a9849d, 0x17256aa0, 0x92fcfc7d, ++ 0x5c9036c0, 0xd949a01d, 0x814fd360, 0x049645bd, 0xcafa8f00, ++ 0x4f2319dd, 0x7af669fa, 0xff2fff27, 0x3143359a, 0xb49aa347, ++ 0xec9cd03a, 0x694546e7, 0xa7298c5a, 0x22f01a87, 0xcd836d14, ++ 0x485afbc9, 0x86363174, 0x03efa7a9, 0x5be9d4d4, 0xde304209, ++ 0x105c88b4, 0x95851e69, 0xa0506e4e, 0x2589f893, 0xebe5322e, ++ 0x6e3ca4f3, 0x363ad78e, 0xb3e34153, 0x7d8f8bee, 0xf8561d33, ++ 0xe26e1413, 0x67b782ce, 0xa9db4873, 0x2c02deae, 0x7404add3, ++ 0xf1dd3b0e, 0x3fb1f1b3, 0xba68676e, 0x8fbd1749, 0x0a648194, ++ 0xc4084b29, 0x41d1ddf4, 0x19d7ae89, 0x9c0e3854, 0x5262f2e9, ++ 0xd7bb6434, 0x38c813a7, 0xbd11857a, 0x737d4fc7, 0xf6a4d91a, ++ 0xaea2aa67, 0x2b7b3cba, 0xe517f607, 0x60ce60da, 0x551b10fd, ++ 0xd0c28620, 0x1eae4c9d, 0x9b77da40, 0xc371a93d, 0x46a83fe0, ++ 0x88c4f55d, 0x0d1d6380, 0xbcb4e71d, 0x396d71c0, 0xf701bb7d, ++ 0x72d82da0, 0x2ade5edd, 0xaf07c800, 0x616b02bd, 0xe4b29460, ++ 0xd167e447, 0x54be729a, 0x9ad2b827, 0x1f0b2efa, 0x470d5d87, ++ 0xc2d4cb5a, 0x0cb801e7, 0x8961973a, 0x6612e0a9, 0xe3cb7674, ++ 0x2da7bcc9, 0xa87e2a14, 0xf0785969, 0x75a1cfb4, 0xbbcd0509, ++ 0x3e1493d4, 0x0bc1e3f3, 0x8e18752e, 0x4074bf93, 0xc5ad294e, ++ 0x9dab5a33, 0x1872ccee, 0xd61e0653, 0x53c7908e, 0x49ff99ae, ++ 0xcc260f73, 0x024ac5ce, 0x87935313, 0xdf95206e, 0x5a4cb6b3, ++ 0x94207c0e, 0x11f9ead3, 0x242c9af4, 0xa1f50c29, 0x6f99c694, ++ 0xea405049, 0xb2462334, 0x379fb5e9, 0xf9f37f54, 0x7c2ae989, ++ 0x93599e1a, 0x168008c7, 0xd8ecc27a, 0x5d3554a7, 0x053327da, ++ 0x80eab107, 0x4e867bba, 0xcb5fed67, 0xfe8a9d40, 0x7b530b9d, ++ 0xb53fc120, 0x30e657fd, 0x68e02480, 0xed39b25d, 0x235578e0, ++ 0xa68cee3d}, ++ {0x00000000, 0x76e10f9d, 0xadc46ee1, 0xdb25617c, 0x1b8fac19, ++ 0x6d6ea384, 0xb64bc2f8, 0xc0aacd65, 0x361e5933, 0x40ff56ae, ++ 0x9bda37d2, 0xed3b384f, 0x2d91f52a, 0x5b70fab7, 0x80559bcb, ++ 0xf6b49456, 0x6c3cb266, 0x1addbdfb, 0xc1f8dc87, 0xb719d31a, ++ 0x77b31e7f, 0x015211e2, 0xda77709e, 0xac967f03, 0x5a22eb55, ++ 0x2cc3e4c8, 0xf7e685b4, 0x81078a29, 0x41ad474c, 0x374c48d1, ++ 0xec6929ad, 0x9a882630, 0xd87864cd, 0xae996b50, 0x75bc0a2c, ++ 0x035d05b1, 0xc3f7c8d4, 0xb516c749, 0x6e33a635, 0x18d2a9a8, ++ 0xee663dfe, 0x98873263, 0x43a2531f, 0x35435c82, 0xf5e991e7, ++ 0x83089e7a, 0x582dff06, 0x2eccf09b, 0xb444d6ab, 0xc2a5d936, ++ 0x1980b84a, 0x6f61b7d7, 0xafcb7ab2, 0xd92a752f, 0x020f1453, ++ 0x74ee1bce, 0x825a8f98, 0xf4bb8005, 0x2f9ee179, 0x597feee4, ++ 0x99d52381, 0xef342c1c, 0x34114d60, 0x42f042fd, 0xf1f7b941, ++ 0x8716b6dc, 0x5c33d7a0, 0x2ad2d83d, 0xea781558, 0x9c991ac5, ++ 0x47bc7bb9, 0x315d7424, 0xc7e9e072, 0xb108efef, 0x6a2d8e93, ++ 0x1ccc810e, 0xdc664c6b, 0xaa8743f6, 0x71a2228a, 0x07432d17, ++ 0x9dcb0b27, 0xeb2a04ba, 0x300f65c6, 0x46ee6a5b, 0x8644a73e, ++ 0xf0a5a8a3, 0x2b80c9df, 0x5d61c642, 0xabd55214, 0xdd345d89, ++ 0x06113cf5, 0x70f03368, 0xb05afe0d, 0xc6bbf190, 0x1d9e90ec, ++ 0x6b7f9f71, 0x298fdd8c, 0x5f6ed211, 0x844bb36d, 0xf2aabcf0, ++ 0x32007195, 0x44e17e08, 0x9fc41f74, 0xe92510e9, 0x1f9184bf, ++ 0x69708b22, 0xb255ea5e, 0xc4b4e5c3, 0x041e28a6, 0x72ff273b, ++ 0xa9da4647, 0xdf3b49da, 0x45b36fea, 0x33526077, 0xe877010b, ++ 0x9e960e96, 0x5e3cc3f3, 0x28ddcc6e, 0xf3f8ad12, 0x8519a28f, ++ 0x73ad36d9, 0x054c3944, 0xde695838, 0xa88857a5, 0x68229ac0, ++ 0x1ec3955d, 0xc5e6f421, 0xb307fbbc, 0xe2ef7383, 0x940e7c1e, ++ 0x4f2b1d62, 0x39ca12ff, 0xf960df9a, 0x8f81d007, 0x54a4b17b, ++ 0x2245bee6, 0xd4f12ab0, 0xa210252d, 0x79354451, 0x0fd44bcc, ++ 0xcf7e86a9, 0xb99f8934, 0x62bae848, 0x145be7d5, 0x8ed3c1e5, ++ 0xf832ce78, 0x2317af04, 0x55f6a099, 0x955c6dfc, 0xe3bd6261, ++ 0x3898031d, 0x4e790c80, 0xb8cd98d6, 0xce2c974b, 0x1509f637, ++ 0x63e8f9aa, 0xa34234cf, 0xd5a33b52, 0x0e865a2e, 0x786755b3, ++ 0x3a97174e, 0x4c7618d3, 0x975379af, 0xe1b27632, 0x2118bb57, ++ 0x57f9b4ca, 0x8cdcd5b6, 0xfa3dda2b, 0x0c894e7d, 0x7a6841e0, ++ 0xa14d209c, 0xd7ac2f01, 0x1706e264, 0x61e7edf9, 0xbac28c85, ++ 0xcc238318, 0x56aba528, 0x204aaab5, 0xfb6fcbc9, 0x8d8ec454, ++ 0x4d240931, 0x3bc506ac, 0xe0e067d0, 0x9601684d, 0x60b5fc1b, ++ 0x1654f386, 0xcd7192fa, 0xbb909d67, 0x7b3a5002, 0x0ddb5f9f, ++ 0xd6fe3ee3, 0xa01f317e, 0x1318cac2, 0x65f9c55f, 0xbedca423, ++ 0xc83dabbe, 0x089766db, 0x7e766946, 0xa553083a, 0xd3b207a7, ++ 0x250693f1, 0x53e79c6c, 0x88c2fd10, 0xfe23f28d, 0x3e893fe8, ++ 0x48683075, 0x934d5109, 0xe5ac5e94, 0x7f2478a4, 0x09c57739, ++ 0xd2e01645, 0xa40119d8, 0x64abd4bd, 0x124adb20, 0xc96fba5c, ++ 0xbf8eb5c1, 0x493a2197, 0x3fdb2e0a, 0xe4fe4f76, 0x921f40eb, ++ 0x52b58d8e, 0x24548213, 0xff71e36f, 0x8990ecf2, 0xcb60ae0f, ++ 0xbd81a192, 0x66a4c0ee, 0x1045cf73, 0xd0ef0216, 0xa60e0d8b, ++ 0x7d2b6cf7, 0x0bca636a, 0xfd7ef73c, 0x8b9ff8a1, 0x50ba99dd, ++ 0x265b9640, 0xe6f15b25, 0x901054b8, 0x4b3535c4, 0x3dd43a59, ++ 0xa75c1c69, 0xd1bd13f4, 0x0a987288, 0x7c797d15, 0xbcd3b070, ++ 0xca32bfed, 0x1117de91, 0x67f6d10c, 0x9142455a, 0xe7a34ac7, ++ 0x3c862bbb, 0x4a672426, 0x8acde943, 0xfc2ce6de, 0x270987a2, ++ 0x51e8883f}, ++ {0x00000000, 0xe8dbfbb9, 0x91b186a8, 0x796a7d11, 0x63657c8a, ++ 0x8bbe8733, 0xf2d4fa22, 0x1a0f019b, 0x87cc89cf, 0x6f177276, ++ 0x167d0f67, 0xfea6f4de, 0xe4a9f545, 0x0c720efc, 0x751873ed, ++ 0x9dc38854, 0x4f9f6244, 0xa74499fd, 0xde2ee4ec, 0x36f51f55, ++ 0x2cfa1ece, 0xc421e577, 0xbd4b9866, 0x559063df, 0xc853eb8b, ++ 0x20881032, 0x59e26d23, 0xb139969a, 0xab369701, 0x43ed6cb8, ++ 0x3a8711a9, 0xd25cea10, 0x9e3ec588, 0x76e53e31, 0x0f8f4320, ++ 0xe754b899, 0xfd5bb902, 0x158042bb, 0x6cea3faa, 0x8431c413, ++ 0x19f24c47, 0xf129b7fe, 0x8843caef, 0x60983156, 0x7a9730cd, ++ 0x924ccb74, 0xeb26b665, 0x03fd4ddc, 0xd1a1a7cc, 0x397a5c75, ++ 0x40102164, 0xa8cbdadd, 0xb2c4db46, 0x5a1f20ff, 0x23755dee, ++ 0xcbaea657, 0x566d2e03, 0xbeb6d5ba, 0xc7dca8ab, 0x2f075312, ++ 0x35085289, 0xddd3a930, 0xa4b9d421, 0x4c622f98, 0x7d7bfbca, ++ 0x95a00073, 0xecca7d62, 0x041186db, 0x1e1e8740, 0xf6c57cf9, ++ 0x8faf01e8, 0x6774fa51, 0xfab77205, 0x126c89bc, 0x6b06f4ad, ++ 0x83dd0f14, 0x99d20e8f, 0x7109f536, 0x08638827, 0xe0b8739e, ++ 0x32e4998e, 0xda3f6237, 0xa3551f26, 0x4b8ee49f, 0x5181e504, ++ 0xb95a1ebd, 0xc03063ac, 0x28eb9815, 0xb5281041, 0x5df3ebf8, ++ 0x249996e9, 0xcc426d50, 0xd64d6ccb, 0x3e969772, 0x47fcea63, ++ 0xaf2711da, 0xe3453e42, 0x0b9ec5fb, 0x72f4b8ea, 0x9a2f4353, ++ 0x802042c8, 0x68fbb971, 0x1191c460, 0xf94a3fd9, 0x6489b78d, ++ 0x8c524c34, 0xf5383125, 0x1de3ca9c, 0x07eccb07, 0xef3730be, ++ 0x965d4daf, 0x7e86b616, 0xacda5c06, 0x4401a7bf, 0x3d6bdaae, ++ 0xd5b02117, 0xcfbf208c, 0x2764db35, 0x5e0ea624, 0xb6d55d9d, ++ 0x2b16d5c9, 0xc3cd2e70, 0xbaa75361, 0x527ca8d8, 0x4873a943, ++ 0xa0a852fa, 0xd9c22feb, 0x3119d452, 0xbbf0874e, 0x532b7cf7, ++ 0x2a4101e6, 0xc29afa5f, 0xd895fbc4, 0x304e007d, 0x49247d6c, ++ 0xa1ff86d5, 0x3c3c0e81, 0xd4e7f538, 0xad8d8829, 0x45567390, ++ 0x5f59720b, 0xb78289b2, 0xcee8f4a3, 0x26330f1a, 0xf46fe50a, ++ 0x1cb41eb3, 0x65de63a2, 0x8d05981b, 0x970a9980, 0x7fd16239, ++ 0x06bb1f28, 0xee60e491, 0x73a36cc5, 0x9b78977c, 0xe212ea6d, ++ 0x0ac911d4, 0x10c6104f, 0xf81debf6, 0x817796e7, 0x69ac6d5e, ++ 0x25ce42c6, 0xcd15b97f, 0xb47fc46e, 0x5ca43fd7, 0x46ab3e4c, ++ 0xae70c5f5, 0xd71ab8e4, 0x3fc1435d, 0xa202cb09, 0x4ad930b0, ++ 0x33b34da1, 0xdb68b618, 0xc167b783, 0x29bc4c3a, 0x50d6312b, ++ 0xb80dca92, 0x6a512082, 0x828adb3b, 0xfbe0a62a, 0x133b5d93, ++ 0x09345c08, 0xe1efa7b1, 0x9885daa0, 0x705e2119, 0xed9da94d, ++ 0x054652f4, 0x7c2c2fe5, 0x94f7d45c, 0x8ef8d5c7, 0x66232e7e, ++ 0x1f49536f, 0xf792a8d6, 0xc68b7c84, 0x2e50873d, 0x573afa2c, ++ 0xbfe10195, 0xa5ee000e, 0x4d35fbb7, 0x345f86a6, 0xdc847d1f, ++ 0x4147f54b, 0xa99c0ef2, 0xd0f673e3, 0x382d885a, 0x222289c1, ++ 0xcaf97278, 0xb3930f69, 0x5b48f4d0, 0x89141ec0, 0x61cfe579, ++ 0x18a59868, 0xf07e63d1, 0xea71624a, 0x02aa99f3, 0x7bc0e4e2, ++ 0x931b1f5b, 0x0ed8970f, 0xe6036cb6, 0x9f6911a7, 0x77b2ea1e, ++ 0x6dbdeb85, 0x8566103c, 0xfc0c6d2d, 0x14d79694, 0x58b5b90c, ++ 0xb06e42b5, 0xc9043fa4, 0x21dfc41d, 0x3bd0c586, 0xd30b3e3f, ++ 0xaa61432e, 0x42bab897, 0xdf7930c3, 0x37a2cb7a, 0x4ec8b66b, ++ 0xa6134dd2, 0xbc1c4c49, 0x54c7b7f0, 0x2dadcae1, 0xc5763158, ++ 0x172adb48, 0xfff120f1, 0x869b5de0, 0x6e40a659, 0x744fa7c2, ++ 0x9c945c7b, 0xe5fe216a, 0x0d25dad3, 0x90e65287, 0x783da93e, ++ 0x0157d42f, 0xe98c2f96, 0xf3832e0d, 0x1b58d5b4, 0x6232a8a5, ++ 0x8ae9531c}, ++ {0x00000000, 0x919168ae, 0x6325a087, 0xf2b4c829, 0x874c31d4, ++ 0x16dd597a, 0xe4699153, 0x75f8f9fd, 0x4f9f1373, 0xde0e7bdd, ++ 0x2cbab3f4, 0xbd2bdb5a, 0xc8d322a7, 0x59424a09, 0xabf68220, ++ 0x3a67ea8e, 0x9e3e27e6, 0x0faf4f48, 0xfd1b8761, 0x6c8aefcf, ++ 0x19721632, 0x88e37e9c, 0x7a57b6b5, 0xebc6de1b, 0xd1a13495, ++ 0x40305c3b, 0xb2849412, 0x2315fcbc, 0x56ed0541, 0xc77c6def, ++ 0x35c8a5c6, 0xa459cd68, 0x7d7b3f17, 0xecea57b9, 0x1e5e9f90, ++ 0x8fcff73e, 0xfa370ec3, 0x6ba6666d, 0x9912ae44, 0x0883c6ea, ++ 0x32e42c64, 0xa37544ca, 0x51c18ce3, 0xc050e44d, 0xb5a81db0, ++ 0x2439751e, 0xd68dbd37, 0x471cd599, 0xe34518f1, 0x72d4705f, ++ 0x8060b876, 0x11f1d0d8, 0x64092925, 0xf598418b, 0x072c89a2, ++ 0x96bde10c, 0xacda0b82, 0x3d4b632c, 0xcfffab05, 0x5e6ec3ab, ++ 0x2b963a56, 0xba0752f8, 0x48b39ad1, 0xd922f27f, 0xfaf67e2e, ++ 0x6b671680, 0x99d3dea9, 0x0842b607, 0x7dba4ffa, 0xec2b2754, ++ 0x1e9fef7d, 0x8f0e87d3, 0xb5696d5d, 0x24f805f3, 0xd64ccdda, ++ 0x47dda574, 0x32255c89, 0xa3b43427, 0x5100fc0e, 0xc09194a0, ++ 0x64c859c8, 0xf5593166, 0x07edf94f, 0x967c91e1, 0xe384681c, ++ 0x721500b2, 0x80a1c89b, 0x1130a035, 0x2b574abb, 0xbac62215, ++ 0x4872ea3c, 0xd9e38292, 0xac1b7b6f, 0x3d8a13c1, 0xcf3edbe8, ++ 0x5eafb346, 0x878d4139, 0x161c2997, 0xe4a8e1be, 0x75398910, ++ 0x00c170ed, 0x91501843, 0x63e4d06a, 0xf275b8c4, 0xc812524a, ++ 0x59833ae4, 0xab37f2cd, 0x3aa69a63, 0x4f5e639e, 0xdecf0b30, ++ 0x2c7bc319, 0xbdeaabb7, 0x19b366df, 0x88220e71, 0x7a96c658, ++ 0xeb07aef6, 0x9eff570b, 0x0f6e3fa5, 0xfddaf78c, 0x6c4b9f22, ++ 0x562c75ac, 0xc7bd1d02, 0x3509d52b, 0xa498bd85, 0xd1604478, ++ 0x40f12cd6, 0xb245e4ff, 0x23d48c51, 0xf4edfd5c, 0x657c95f2, ++ 0x97c85ddb, 0x06593575, 0x73a1cc88, 0xe230a426, 0x10846c0f, ++ 0x811504a1, 0xbb72ee2f, 0x2ae38681, 0xd8574ea8, 0x49c62606, ++ 0x3c3edffb, 0xadafb755, 0x5f1b7f7c, 0xce8a17d2, 0x6ad3daba, ++ 0xfb42b214, 0x09f67a3d, 0x98671293, 0xed9feb6e, 0x7c0e83c0, ++ 0x8eba4be9, 0x1f2b2347, 0x254cc9c9, 0xb4dda167, 0x4669694e, ++ 0xd7f801e0, 0xa200f81d, 0x339190b3, 0xc125589a, 0x50b43034, ++ 0x8996c24b, 0x1807aae5, 0xeab362cc, 0x7b220a62, 0x0edaf39f, ++ 0x9f4b9b31, 0x6dff5318, 0xfc6e3bb6, 0xc609d138, 0x5798b996, ++ 0xa52c71bf, 0x34bd1911, 0x4145e0ec, 0xd0d48842, 0x2260406b, ++ 0xb3f128c5, 0x17a8e5ad, 0x86398d03, 0x748d452a, 0xe51c2d84, ++ 0x90e4d479, 0x0175bcd7, 0xf3c174fe, 0x62501c50, 0x5837f6de, ++ 0xc9a69e70, 0x3b125659, 0xaa833ef7, 0xdf7bc70a, 0x4eeaafa4, ++ 0xbc5e678d, 0x2dcf0f23, 0x0e1b8372, 0x9f8aebdc, 0x6d3e23f5, ++ 0xfcaf4b5b, 0x8957b2a6, 0x18c6da08, 0xea721221, 0x7be37a8f, ++ 0x41849001, 0xd015f8af, 0x22a13086, 0xb3305828, 0xc6c8a1d5, ++ 0x5759c97b, 0xa5ed0152, 0x347c69fc, 0x9025a494, 0x01b4cc3a, ++ 0xf3000413, 0x62916cbd, 0x17699540, 0x86f8fdee, 0x744c35c7, ++ 0xe5dd5d69, 0xdfbab7e7, 0x4e2bdf49, 0xbc9f1760, 0x2d0e7fce, ++ 0x58f68633, 0xc967ee9d, 0x3bd326b4, 0xaa424e1a, 0x7360bc65, ++ 0xe2f1d4cb, 0x10451ce2, 0x81d4744c, 0xf42c8db1, 0x65bde51f, ++ 0x97092d36, 0x06984598, 0x3cffaf16, 0xad6ec7b8, 0x5fda0f91, ++ 0xce4b673f, 0xbbb39ec2, 0x2a22f66c, 0xd8963e45, 0x490756eb, ++ 0xed5e9b83, 0x7ccff32d, 0x8e7b3b04, 0x1fea53aa, 0x6a12aa57, ++ 0xfb83c2f9, 0x09370ad0, 0x98a6627e, 0xa2c188f0, 0x3350e05e, ++ 0xc1e42877, 0x507540d9, 0x258db924, 0xb41cd18a, 0x46a819a3, ++ 0xd739710d}}; ++ ++#endif /* W */ ++ ++#endif /* N == 4 */ ++#if N == 5 ++ ++#if W == 8 ++ ++static const uint32_t crc_braid_table[][256] = { ++ {0x00000000, 0xaf449247, 0x85f822cf, 0x2abcb088, 0xd08143df, ++ 0x7fc5d198, 0x55796110, 0xfa3df357, 0x7a7381ff, 0xd53713b8, ++ 0xff8ba330, 0x50cf3177, 0xaaf2c220, 0x05b65067, 0x2f0ae0ef, ++ 0x804e72a8, 0xf4e703fe, 0x5ba391b9, 0x711f2131, 0xde5bb376, ++ 0x24664021, 0x8b22d266, 0xa19e62ee, 0x0edaf0a9, 0x8e948201, ++ 0x21d01046, 0x0b6ca0ce, 0xa4283289, 0x5e15c1de, 0xf1515399, ++ 0xdbede311, 0x74a97156, 0x32bf01bd, 0x9dfb93fa, 0xb7472372, ++ 0x1803b135, 0xe23e4262, 0x4d7ad025, 0x67c660ad, 0xc882f2ea, ++ 0x48cc8042, 0xe7881205, 0xcd34a28d, 0x627030ca, 0x984dc39d, ++ 0x370951da, 0x1db5e152, 0xb2f17315, 0xc6580243, 0x691c9004, ++ 0x43a0208c, 0xece4b2cb, 0x16d9419c, 0xb99dd3db, 0x93216353, ++ 0x3c65f114, 0xbc2b83bc, 0x136f11fb, 0x39d3a173, 0x96973334, ++ 0x6caac063, 0xc3ee5224, 0xe952e2ac, 0x461670eb, 0x657e037a, ++ 0xca3a913d, 0xe08621b5, 0x4fc2b3f2, 0xb5ff40a5, 0x1abbd2e2, ++ 0x3007626a, 0x9f43f02d, 0x1f0d8285, 0xb04910c2, 0x9af5a04a, ++ 0x35b1320d, 0xcf8cc15a, 0x60c8531d, 0x4a74e395, 0xe53071d2, ++ 0x91990084, 0x3edd92c3, 0x1461224b, 0xbb25b00c, 0x4118435b, ++ 0xee5cd11c, 0xc4e06194, 0x6ba4f3d3, 0xebea817b, 0x44ae133c, ++ 0x6e12a3b4, 0xc15631f3, 0x3b6bc2a4, 0x942f50e3, 0xbe93e06b, ++ 0x11d7722c, 0x57c102c7, 0xf8859080, 0xd2392008, 0x7d7db24f, ++ 0x87404118, 0x2804d35f, 0x02b863d7, 0xadfcf190, 0x2db28338, ++ 0x82f6117f, 0xa84aa1f7, 0x070e33b0, 0xfd33c0e7, 0x527752a0, ++ 0x78cbe228, 0xd78f706f, 0xa3260139, 0x0c62937e, 0x26de23f6, ++ 0x899ab1b1, 0x73a742e6, 0xdce3d0a1, 0xf65f6029, 0x591bf26e, ++ 0xd95580c6, 0x76111281, 0x5cada209, 0xf3e9304e, 0x09d4c319, ++ 0xa690515e, 0x8c2ce1d6, 0x23687391, 0xcafc06f4, 0x65b894b3, ++ 0x4f04243b, 0xe040b67c, 0x1a7d452b, 0xb539d76c, 0x9f8567e4, ++ 0x30c1f5a3, 0xb08f870b, 0x1fcb154c, 0x3577a5c4, 0x9a333783, ++ 0x600ec4d4, 0xcf4a5693, 0xe5f6e61b, 0x4ab2745c, 0x3e1b050a, ++ 0x915f974d, 0xbbe327c5, 0x14a7b582, 0xee9a46d5, 0x41ded492, ++ 0x6b62641a, 0xc426f65d, 0x446884f5, 0xeb2c16b2, 0xc190a63a, ++ 0x6ed4347d, 0x94e9c72a, 0x3bad556d, 0x1111e5e5, 0xbe5577a2, ++ 0xf8430749, 0x5707950e, 0x7dbb2586, 0xd2ffb7c1, 0x28c24496, ++ 0x8786d6d1, 0xad3a6659, 0x027ef41e, 0x823086b6, 0x2d7414f1, ++ 0x07c8a479, 0xa88c363e, 0x52b1c569, 0xfdf5572e, 0xd749e7a6, ++ 0x780d75e1, 0x0ca404b7, 0xa3e096f0, 0x895c2678, 0x2618b43f, ++ 0xdc254768, 0x7361d52f, 0x59dd65a7, 0xf699f7e0, 0x76d78548, ++ 0xd993170f, 0xf32fa787, 0x5c6b35c0, 0xa656c697, 0x091254d0, ++ 0x23aee458, 0x8cea761f, 0xaf82058e, 0x00c697c9, 0x2a7a2741, ++ 0x853eb506, 0x7f034651, 0xd047d416, 0xfafb649e, 0x55bff6d9, ++ 0xd5f18471, 0x7ab51636, 0x5009a6be, 0xff4d34f9, 0x0570c7ae, ++ 0xaa3455e9, 0x8088e561, 0x2fcc7726, 0x5b650670, 0xf4219437, ++ 0xde9d24bf, 0x71d9b6f8, 0x8be445af, 0x24a0d7e8, 0x0e1c6760, ++ 0xa158f527, 0x2116878f, 0x8e5215c8, 0xa4eea540, 0x0baa3707, ++ 0xf197c450, 0x5ed35617, 0x746fe69f, 0xdb2b74d8, 0x9d3d0433, ++ 0x32799674, 0x18c526fc, 0xb781b4bb, 0x4dbc47ec, 0xe2f8d5ab, ++ 0xc8446523, 0x6700f764, 0xe74e85cc, 0x480a178b, 0x62b6a703, ++ 0xcdf23544, 0x37cfc613, 0x988b5454, 0xb237e4dc, 0x1d73769b, ++ 0x69da07cd, 0xc69e958a, 0xec222502, 0x4366b745, 0xb95b4412, ++ 0x161fd655, 0x3ca366dd, 0x93e7f49a, 0x13a98632, 0xbced1475, ++ 0x9651a4fd, 0x391536ba, 0xc328c5ed, 0x6c6c57aa, 0x46d0e722, ++ 0xe9947565}, ++ {0x00000000, 0x4e890ba9, 0x9d121752, 0xd39b1cfb, 0xe15528e5, ++ 0xafdc234c, 0x7c473fb7, 0x32ce341e, 0x19db578b, 0x57525c22, ++ 0x84c940d9, 0xca404b70, 0xf88e7f6e, 0xb60774c7, 0x659c683c, ++ 0x2b156395, 0x33b6af16, 0x7d3fa4bf, 0xaea4b844, 0xe02db3ed, ++ 0xd2e387f3, 0x9c6a8c5a, 0x4ff190a1, 0x01789b08, 0x2a6df89d, ++ 0x64e4f334, 0xb77fefcf, 0xf9f6e466, 0xcb38d078, 0x85b1dbd1, ++ 0x562ac72a, 0x18a3cc83, 0x676d5e2c, 0x29e45585, 0xfa7f497e, ++ 0xb4f642d7, 0x863876c9, 0xc8b17d60, 0x1b2a619b, 0x55a36a32, ++ 0x7eb609a7, 0x303f020e, 0xe3a41ef5, 0xad2d155c, 0x9fe32142, ++ 0xd16a2aeb, 0x02f13610, 0x4c783db9, 0x54dbf13a, 0x1a52fa93, ++ 0xc9c9e668, 0x8740edc1, 0xb58ed9df, 0xfb07d276, 0x289cce8d, ++ 0x6615c524, 0x4d00a6b1, 0x0389ad18, 0xd012b1e3, 0x9e9bba4a, ++ 0xac558e54, 0xe2dc85fd, 0x31479906, 0x7fce92af, 0xcedabc58, ++ 0x8053b7f1, 0x53c8ab0a, 0x1d41a0a3, 0x2f8f94bd, 0x61069f14, ++ 0xb29d83ef, 0xfc148846, 0xd701ebd3, 0x9988e07a, 0x4a13fc81, ++ 0x049af728, 0x3654c336, 0x78ddc89f, 0xab46d464, 0xe5cfdfcd, ++ 0xfd6c134e, 0xb3e518e7, 0x607e041c, 0x2ef70fb5, 0x1c393bab, ++ 0x52b03002, 0x812b2cf9, 0xcfa22750, 0xe4b744c5, 0xaa3e4f6c, ++ 0x79a55397, 0x372c583e, 0x05e26c20, 0x4b6b6789, 0x98f07b72, ++ 0xd67970db, 0xa9b7e274, 0xe73ee9dd, 0x34a5f526, 0x7a2cfe8f, ++ 0x48e2ca91, 0x066bc138, 0xd5f0ddc3, 0x9b79d66a, 0xb06cb5ff, ++ 0xfee5be56, 0x2d7ea2ad, 0x63f7a904, 0x51399d1a, 0x1fb096b3, ++ 0xcc2b8a48, 0x82a281e1, 0x9a014d62, 0xd48846cb, 0x07135a30, ++ 0x499a5199, 0x7b546587, 0x35dd6e2e, 0xe64672d5, 0xa8cf797c, ++ 0x83da1ae9, 0xcd531140, 0x1ec80dbb, 0x50410612, 0x628f320c, ++ 0x2c0639a5, 0xff9d255e, 0xb1142ef7, 0x46c47ef1, 0x084d7558, ++ 0xdbd669a3, 0x955f620a, 0xa7915614, 0xe9185dbd, 0x3a834146, ++ 0x740a4aef, 0x5f1f297a, 0x119622d3, 0xc20d3e28, 0x8c843581, ++ 0xbe4a019f, 0xf0c30a36, 0x235816cd, 0x6dd11d64, 0x7572d1e7, ++ 0x3bfbda4e, 0xe860c6b5, 0xa6e9cd1c, 0x9427f902, 0xdaaef2ab, ++ 0x0935ee50, 0x47bce5f9, 0x6ca9866c, 0x22208dc5, 0xf1bb913e, ++ 0xbf329a97, 0x8dfcae89, 0xc375a520, 0x10eeb9db, 0x5e67b272, ++ 0x21a920dd, 0x6f202b74, 0xbcbb378f, 0xf2323c26, 0xc0fc0838, ++ 0x8e750391, 0x5dee1f6a, 0x136714c3, 0x38727756, 0x76fb7cff, ++ 0xa5606004, 0xebe96bad, 0xd9275fb3, 0x97ae541a, 0x443548e1, ++ 0x0abc4348, 0x121f8fcb, 0x5c968462, 0x8f0d9899, 0xc1849330, ++ 0xf34aa72e, 0xbdc3ac87, 0x6e58b07c, 0x20d1bbd5, 0x0bc4d840, ++ 0x454dd3e9, 0x96d6cf12, 0xd85fc4bb, 0xea91f0a5, 0xa418fb0c, ++ 0x7783e7f7, 0x390aec5e, 0x881ec2a9, 0xc697c900, 0x150cd5fb, ++ 0x5b85de52, 0x694bea4c, 0x27c2e1e5, 0xf459fd1e, 0xbad0f6b7, ++ 0x91c59522, 0xdf4c9e8b, 0x0cd78270, 0x425e89d9, 0x7090bdc7, ++ 0x3e19b66e, 0xed82aa95, 0xa30ba13c, 0xbba86dbf, 0xf5216616, ++ 0x26ba7aed, 0x68337144, 0x5afd455a, 0x14744ef3, 0xc7ef5208, ++ 0x896659a1, 0xa2733a34, 0xecfa319d, 0x3f612d66, 0x71e826cf, ++ 0x432612d1, 0x0daf1978, 0xde340583, 0x90bd0e2a, 0xef739c85, ++ 0xa1fa972c, 0x72618bd7, 0x3ce8807e, 0x0e26b460, 0x40afbfc9, ++ 0x9334a332, 0xddbda89b, 0xf6a8cb0e, 0xb821c0a7, 0x6bbadc5c, ++ 0x2533d7f5, 0x17fde3eb, 0x5974e842, 0x8aeff4b9, 0xc466ff10, ++ 0xdcc53393, 0x924c383a, 0x41d724c1, 0x0f5e2f68, 0x3d901b76, ++ 0x731910df, 0xa0820c24, 0xee0b078d, 0xc51e6418, 0x8b976fb1, ++ 0x580c734a, 0x168578e3, 0x244b4cfd, 0x6ac24754, 0xb9595baf, ++ 0xf7d05006}, ++ {0x00000000, 0x8d88fde2, 0xc060fd85, 0x4de80067, 0x5bb0fd4b, ++ 0xd63800a9, 0x9bd000ce, 0x1658fd2c, 0xb761fa96, 0x3ae90774, ++ 0x77010713, 0xfa89faf1, 0xecd107dd, 0x6159fa3f, 0x2cb1fa58, ++ 0xa13907ba, 0xb5b2f36d, 0x383a0e8f, 0x75d20ee8, 0xf85af30a, ++ 0xee020e26, 0x638af3c4, 0x2e62f3a3, 0xa3ea0e41, 0x02d309fb, ++ 0x8f5bf419, 0xc2b3f47e, 0x4f3b099c, 0x5963f4b0, 0xd4eb0952, ++ 0x99030935, 0x148bf4d7, 0xb014e09b, 0x3d9c1d79, 0x70741d1e, ++ 0xfdfce0fc, 0xeba41dd0, 0x662ce032, 0x2bc4e055, 0xa64c1db7, ++ 0x07751a0d, 0x8afde7ef, 0xc715e788, 0x4a9d1a6a, 0x5cc5e746, ++ 0xd14d1aa4, 0x9ca51ac3, 0x112de721, 0x05a613f6, 0x882eee14, ++ 0xc5c6ee73, 0x484e1391, 0x5e16eebd, 0xd39e135f, 0x9e761338, ++ 0x13feeeda, 0xb2c7e960, 0x3f4f1482, 0x72a714e5, 0xff2fe907, ++ 0xe977142b, 0x64ffe9c9, 0x2917e9ae, 0xa49f144c, 0xbb58c777, ++ 0x36d03a95, 0x7b383af2, 0xf6b0c710, 0xe0e83a3c, 0x6d60c7de, ++ 0x2088c7b9, 0xad003a5b, 0x0c393de1, 0x81b1c003, 0xcc59c064, ++ 0x41d13d86, 0x5789c0aa, 0xda013d48, 0x97e93d2f, 0x1a61c0cd, ++ 0x0eea341a, 0x8362c9f8, 0xce8ac99f, 0x4302347d, 0x555ac951, ++ 0xd8d234b3, 0x953a34d4, 0x18b2c936, 0xb98bce8c, 0x3403336e, ++ 0x79eb3309, 0xf463ceeb, 0xe23b33c7, 0x6fb3ce25, 0x225bce42, ++ 0xafd333a0, 0x0b4c27ec, 0x86c4da0e, 0xcb2cda69, 0x46a4278b, ++ 0x50fcdaa7, 0xdd742745, 0x909c2722, 0x1d14dac0, 0xbc2ddd7a, ++ 0x31a52098, 0x7c4d20ff, 0xf1c5dd1d, 0xe79d2031, 0x6a15ddd3, ++ 0x27fdddb4, 0xaa752056, 0xbefed481, 0x33762963, 0x7e9e2904, ++ 0xf316d4e6, 0xe54e29ca, 0x68c6d428, 0x252ed44f, 0xa8a629ad, ++ 0x099f2e17, 0x8417d3f5, 0xc9ffd392, 0x44772e70, 0x522fd35c, ++ 0xdfa72ebe, 0x924f2ed9, 0x1fc7d33b, 0xadc088af, 0x2048754d, ++ 0x6da0752a, 0xe02888c8, 0xf67075e4, 0x7bf88806, 0x36108861, ++ 0xbb987583, 0x1aa17239, 0x97298fdb, 0xdac18fbc, 0x5749725e, ++ 0x41118f72, 0xcc997290, 0x817172f7, 0x0cf98f15, 0x18727bc2, ++ 0x95fa8620, 0xd8128647, 0x559a7ba5, 0x43c28689, 0xce4a7b6b, ++ 0x83a27b0c, 0x0e2a86ee, 0xaf138154, 0x229b7cb6, 0x6f737cd1, ++ 0xe2fb8133, 0xf4a37c1f, 0x792b81fd, 0x34c3819a, 0xb94b7c78, ++ 0x1dd46834, 0x905c95d6, 0xddb495b1, 0x503c6853, 0x4664957f, ++ 0xcbec689d, 0x860468fa, 0x0b8c9518, 0xaab592a2, 0x273d6f40, ++ 0x6ad56f27, 0xe75d92c5, 0xf1056fe9, 0x7c8d920b, 0x3165926c, ++ 0xbced6f8e, 0xa8669b59, 0x25ee66bb, 0x680666dc, 0xe58e9b3e, ++ 0xf3d66612, 0x7e5e9bf0, 0x33b69b97, 0xbe3e6675, 0x1f0761cf, ++ 0x928f9c2d, 0xdf679c4a, 0x52ef61a8, 0x44b79c84, 0xc93f6166, ++ 0x84d76101, 0x095f9ce3, 0x16984fd8, 0x9b10b23a, 0xd6f8b25d, ++ 0x5b704fbf, 0x4d28b293, 0xc0a04f71, 0x8d484f16, 0x00c0b2f4, ++ 0xa1f9b54e, 0x2c7148ac, 0x619948cb, 0xec11b529, 0xfa494805, ++ 0x77c1b5e7, 0x3a29b580, 0xb7a14862, 0xa32abcb5, 0x2ea24157, ++ 0x634a4130, 0xeec2bcd2, 0xf89a41fe, 0x7512bc1c, 0x38fabc7b, ++ 0xb5724199, 0x144b4623, 0x99c3bbc1, 0xd42bbba6, 0x59a34644, ++ 0x4ffbbb68, 0xc273468a, 0x8f9b46ed, 0x0213bb0f, 0xa68caf43, ++ 0x2b0452a1, 0x66ec52c6, 0xeb64af24, 0xfd3c5208, 0x70b4afea, ++ 0x3d5caf8d, 0xb0d4526f, 0x11ed55d5, 0x9c65a837, 0xd18da850, ++ 0x5c0555b2, 0x4a5da89e, 0xc7d5557c, 0x8a3d551b, 0x07b5a8f9, ++ 0x133e5c2e, 0x9eb6a1cc, 0xd35ea1ab, 0x5ed65c49, 0x488ea165, ++ 0xc5065c87, 0x88ee5ce0, 0x0566a102, 0xa45fa6b8, 0x29d75b5a, ++ 0x643f5b3d, 0xe9b7a6df, 0xffef5bf3, 0x7267a611, 0x3f8fa676, ++ 0xb2075b94}, ++ {0x00000000, 0x80f0171f, 0xda91287f, 0x5a613f60, 0x6e5356bf, ++ 0xeea341a0, 0xb4c27ec0, 0x343269df, 0xdca6ad7e, 0x5c56ba61, ++ 0x06378501, 0x86c7921e, 0xb2f5fbc1, 0x3205ecde, 0x6864d3be, ++ 0xe894c4a1, 0x623c5cbd, 0xe2cc4ba2, 0xb8ad74c2, 0x385d63dd, ++ 0x0c6f0a02, 0x8c9f1d1d, 0xd6fe227d, 0x560e3562, 0xbe9af1c3, ++ 0x3e6ae6dc, 0x640bd9bc, 0xe4fbcea3, 0xd0c9a77c, 0x5039b063, ++ 0x0a588f03, 0x8aa8981c, 0xc478b97a, 0x4488ae65, 0x1ee99105, ++ 0x9e19861a, 0xaa2befc5, 0x2adbf8da, 0x70bac7ba, 0xf04ad0a5, ++ 0x18de1404, 0x982e031b, 0xc24f3c7b, 0x42bf2b64, 0x768d42bb, ++ 0xf67d55a4, 0xac1c6ac4, 0x2cec7ddb, 0xa644e5c7, 0x26b4f2d8, ++ 0x7cd5cdb8, 0xfc25daa7, 0xc817b378, 0x48e7a467, 0x12869b07, ++ 0x92768c18, 0x7ae248b9, 0xfa125fa6, 0xa07360c6, 0x208377d9, ++ 0x14b11e06, 0x94410919, 0xce203679, 0x4ed02166, 0x538074b5, ++ 0xd37063aa, 0x89115cca, 0x09e14bd5, 0x3dd3220a, 0xbd233515, ++ 0xe7420a75, 0x67b21d6a, 0x8f26d9cb, 0x0fd6ced4, 0x55b7f1b4, ++ 0xd547e6ab, 0xe1758f74, 0x6185986b, 0x3be4a70b, 0xbb14b014, ++ 0x31bc2808, 0xb14c3f17, 0xeb2d0077, 0x6bdd1768, 0x5fef7eb7, ++ 0xdf1f69a8, 0x857e56c8, 0x058e41d7, 0xed1a8576, 0x6dea9269, ++ 0x378bad09, 0xb77bba16, 0x8349d3c9, 0x03b9c4d6, 0x59d8fbb6, ++ 0xd928eca9, 0x97f8cdcf, 0x1708dad0, 0x4d69e5b0, 0xcd99f2af, ++ 0xf9ab9b70, 0x795b8c6f, 0x233ab30f, 0xa3caa410, 0x4b5e60b1, ++ 0xcbae77ae, 0x91cf48ce, 0x113f5fd1, 0x250d360e, 0xa5fd2111, ++ 0xff9c1e71, 0x7f6c096e, 0xf5c49172, 0x7534866d, 0x2f55b90d, ++ 0xafa5ae12, 0x9b97c7cd, 0x1b67d0d2, 0x4106efb2, 0xc1f6f8ad, ++ 0x29623c0c, 0xa9922b13, 0xf3f31473, 0x7303036c, 0x47316ab3, ++ 0xc7c17dac, 0x9da042cc, 0x1d5055d3, 0xa700e96a, 0x27f0fe75, ++ 0x7d91c115, 0xfd61d60a, 0xc953bfd5, 0x49a3a8ca, 0x13c297aa, ++ 0x933280b5, 0x7ba64414, 0xfb56530b, 0xa1376c6b, 0x21c77b74, ++ 0x15f512ab, 0x950505b4, 0xcf643ad4, 0x4f942dcb, 0xc53cb5d7, ++ 0x45cca2c8, 0x1fad9da8, 0x9f5d8ab7, 0xab6fe368, 0x2b9ff477, ++ 0x71fecb17, 0xf10edc08, 0x199a18a9, 0x996a0fb6, 0xc30b30d6, ++ 0x43fb27c9, 0x77c94e16, 0xf7395909, 0xad586669, 0x2da87176, ++ 0x63785010, 0xe388470f, 0xb9e9786f, 0x39196f70, 0x0d2b06af, ++ 0x8ddb11b0, 0xd7ba2ed0, 0x574a39cf, 0xbfdefd6e, 0x3f2eea71, ++ 0x654fd511, 0xe5bfc20e, 0xd18dabd1, 0x517dbcce, 0x0b1c83ae, ++ 0x8bec94b1, 0x01440cad, 0x81b41bb2, 0xdbd524d2, 0x5b2533cd, ++ 0x6f175a12, 0xefe74d0d, 0xb586726d, 0x35766572, 0xdde2a1d3, ++ 0x5d12b6cc, 0x077389ac, 0x87839eb3, 0xb3b1f76c, 0x3341e073, ++ 0x6920df13, 0xe9d0c80c, 0xf4809ddf, 0x74708ac0, 0x2e11b5a0, ++ 0xaee1a2bf, 0x9ad3cb60, 0x1a23dc7f, 0x4042e31f, 0xc0b2f400, ++ 0x282630a1, 0xa8d627be, 0xf2b718de, 0x72470fc1, 0x4675661e, ++ 0xc6857101, 0x9ce44e61, 0x1c14597e, 0x96bcc162, 0x164cd67d, ++ 0x4c2de91d, 0xccddfe02, 0xf8ef97dd, 0x781f80c2, 0x227ebfa2, ++ 0xa28ea8bd, 0x4a1a6c1c, 0xcaea7b03, 0x908b4463, 0x107b537c, ++ 0x24493aa3, 0xa4b92dbc, 0xfed812dc, 0x7e2805c3, 0x30f824a5, ++ 0xb00833ba, 0xea690cda, 0x6a991bc5, 0x5eab721a, 0xde5b6505, ++ 0x843a5a65, 0x04ca4d7a, 0xec5e89db, 0x6cae9ec4, 0x36cfa1a4, ++ 0xb63fb6bb, 0x820ddf64, 0x02fdc87b, 0x589cf71b, 0xd86ce004, ++ 0x52c47818, 0xd2346f07, 0x88555067, 0x08a54778, 0x3c972ea7, ++ 0xbc6739b8, 0xe60606d8, 0x66f611c7, 0x8e62d566, 0x0e92c279, ++ 0x54f3fd19, 0xd403ea06, 0xe03183d9, 0x60c194c6, 0x3aa0aba6, ++ 0xba50bcb9}, ++ {0x00000000, 0x9570d495, 0xf190af6b, 0x64e07bfe, 0x38505897, ++ 0xad208c02, 0xc9c0f7fc, 0x5cb02369, 0x70a0b12e, 0xe5d065bb, ++ 0x81301e45, 0x1440cad0, 0x48f0e9b9, 0xdd803d2c, 0xb96046d2, ++ 0x2c109247, 0xe141625c, 0x7431b6c9, 0x10d1cd37, 0x85a119a2, ++ 0xd9113acb, 0x4c61ee5e, 0x288195a0, 0xbdf14135, 0x91e1d372, ++ 0x049107e7, 0x60717c19, 0xf501a88c, 0xa9b18be5, 0x3cc15f70, ++ 0x5821248e, 0xcd51f01b, 0x19f3c2f9, 0x8c83166c, 0xe8636d92, ++ 0x7d13b907, 0x21a39a6e, 0xb4d34efb, 0xd0333505, 0x4543e190, ++ 0x695373d7, 0xfc23a742, 0x98c3dcbc, 0x0db30829, 0x51032b40, ++ 0xc473ffd5, 0xa093842b, 0x35e350be, 0xf8b2a0a5, 0x6dc27430, ++ 0x09220fce, 0x9c52db5b, 0xc0e2f832, 0x55922ca7, 0x31725759, ++ 0xa40283cc, 0x8812118b, 0x1d62c51e, 0x7982bee0, 0xecf26a75, ++ 0xb042491c, 0x25329d89, 0x41d2e677, 0xd4a232e2, 0x33e785f2, ++ 0xa6975167, 0xc2772a99, 0x5707fe0c, 0x0bb7dd65, 0x9ec709f0, ++ 0xfa27720e, 0x6f57a69b, 0x434734dc, 0xd637e049, 0xb2d79bb7, ++ 0x27a74f22, 0x7b176c4b, 0xee67b8de, 0x8a87c320, 0x1ff717b5, ++ 0xd2a6e7ae, 0x47d6333b, 0x233648c5, 0xb6469c50, 0xeaf6bf39, ++ 0x7f866bac, 0x1b661052, 0x8e16c4c7, 0xa2065680, 0x37768215, ++ 0x5396f9eb, 0xc6e62d7e, 0x9a560e17, 0x0f26da82, 0x6bc6a17c, ++ 0xfeb675e9, 0x2a14470b, 0xbf64939e, 0xdb84e860, 0x4ef43cf5, ++ 0x12441f9c, 0x8734cb09, 0xe3d4b0f7, 0x76a46462, 0x5ab4f625, ++ 0xcfc422b0, 0xab24594e, 0x3e548ddb, 0x62e4aeb2, 0xf7947a27, ++ 0x937401d9, 0x0604d54c, 0xcb552557, 0x5e25f1c2, 0x3ac58a3c, ++ 0xafb55ea9, 0xf3057dc0, 0x6675a955, 0x0295d2ab, 0x97e5063e, ++ 0xbbf59479, 0x2e8540ec, 0x4a653b12, 0xdf15ef87, 0x83a5ccee, ++ 0x16d5187b, 0x72356385, 0xe745b710, 0x67cf0be4, 0xf2bfdf71, ++ 0x965fa48f, 0x032f701a, 0x5f9f5373, 0xcaef87e6, 0xae0ffc18, ++ 0x3b7f288d, 0x176fbaca, 0x821f6e5f, 0xe6ff15a1, 0x738fc134, ++ 0x2f3fe25d, 0xba4f36c8, 0xdeaf4d36, 0x4bdf99a3, 0x868e69b8, ++ 0x13febd2d, 0x771ec6d3, 0xe26e1246, 0xbede312f, 0x2baee5ba, ++ 0x4f4e9e44, 0xda3e4ad1, 0xf62ed896, 0x635e0c03, 0x07be77fd, ++ 0x92cea368, 0xce7e8001, 0x5b0e5494, 0x3fee2f6a, 0xaa9efbff, ++ 0x7e3cc91d, 0xeb4c1d88, 0x8fac6676, 0x1adcb2e3, 0x466c918a, ++ 0xd31c451f, 0xb7fc3ee1, 0x228cea74, 0x0e9c7833, 0x9becaca6, ++ 0xff0cd758, 0x6a7c03cd, 0x36cc20a4, 0xa3bcf431, 0xc75c8fcf, ++ 0x522c5b5a, 0x9f7dab41, 0x0a0d7fd4, 0x6eed042a, 0xfb9dd0bf, ++ 0xa72df3d6, 0x325d2743, 0x56bd5cbd, 0xc3cd8828, 0xefdd1a6f, ++ 0x7aadcefa, 0x1e4db504, 0x8b3d6191, 0xd78d42f8, 0x42fd966d, ++ 0x261ded93, 0xb36d3906, 0x54288e16, 0xc1585a83, 0xa5b8217d, ++ 0x30c8f5e8, 0x6c78d681, 0xf9080214, 0x9de879ea, 0x0898ad7f, ++ 0x24883f38, 0xb1f8ebad, 0xd5189053, 0x406844c6, 0x1cd867af, ++ 0x89a8b33a, 0xed48c8c4, 0x78381c51, 0xb569ec4a, 0x201938df, ++ 0x44f94321, 0xd18997b4, 0x8d39b4dd, 0x18496048, 0x7ca91bb6, ++ 0xe9d9cf23, 0xc5c95d64, 0x50b989f1, 0x3459f20f, 0xa129269a, ++ 0xfd9905f3, 0x68e9d166, 0x0c09aa98, 0x99797e0d, 0x4ddb4cef, ++ 0xd8ab987a, 0xbc4be384, 0x293b3711, 0x758b1478, 0xe0fbc0ed, ++ 0x841bbb13, 0x116b6f86, 0x3d7bfdc1, 0xa80b2954, 0xcceb52aa, ++ 0x599b863f, 0x052ba556, 0x905b71c3, 0xf4bb0a3d, 0x61cbdea8, ++ 0xac9a2eb3, 0x39eafa26, 0x5d0a81d8, 0xc87a554d, 0x94ca7624, ++ 0x01baa2b1, 0x655ad94f, 0xf02a0dda, 0xdc3a9f9d, 0x494a4b08, ++ 0x2daa30f6, 0xb8dae463, 0xe46ac70a, 0x711a139f, 0x15fa6861, ++ 0x808abcf4}, ++ {0x00000000, 0xcf9e17c8, 0x444d29d1, 0x8bd33e19, 0x889a53a2, ++ 0x4704446a, 0xccd77a73, 0x03496dbb, 0xca45a105, 0x05dbb6cd, ++ 0x8e0888d4, 0x41969f1c, 0x42dff2a7, 0x8d41e56f, 0x0692db76, ++ 0xc90cccbe, 0x4ffa444b, 0x80645383, 0x0bb76d9a, 0xc4297a52, ++ 0xc76017e9, 0x08fe0021, 0x832d3e38, 0x4cb329f0, 0x85bfe54e, ++ 0x4a21f286, 0xc1f2cc9f, 0x0e6cdb57, 0x0d25b6ec, 0xc2bba124, ++ 0x49689f3d, 0x86f688f5, 0x9ff48896, 0x506a9f5e, 0xdbb9a147, ++ 0x1427b68f, 0x176edb34, 0xd8f0ccfc, 0x5323f2e5, 0x9cbde52d, ++ 0x55b12993, 0x9a2f3e5b, 0x11fc0042, 0xde62178a, 0xdd2b7a31, ++ 0x12b56df9, 0x996653e0, 0x56f84428, 0xd00eccdd, 0x1f90db15, ++ 0x9443e50c, 0x5bddf2c4, 0x58949f7f, 0x970a88b7, 0x1cd9b6ae, ++ 0xd347a166, 0x1a4b6dd8, 0xd5d57a10, 0x5e064409, 0x919853c1, ++ 0x92d13e7a, 0x5d4f29b2, 0xd69c17ab, 0x19020063, 0xe498176d, ++ 0x2b0600a5, 0xa0d53ebc, 0x6f4b2974, 0x6c0244cf, 0xa39c5307, ++ 0x284f6d1e, 0xe7d17ad6, 0x2eddb668, 0xe143a1a0, 0x6a909fb9, ++ 0xa50e8871, 0xa647e5ca, 0x69d9f202, 0xe20acc1b, 0x2d94dbd3, ++ 0xab625326, 0x64fc44ee, 0xef2f7af7, 0x20b16d3f, 0x23f80084, ++ 0xec66174c, 0x67b52955, 0xa82b3e9d, 0x6127f223, 0xaeb9e5eb, ++ 0x256adbf2, 0xeaf4cc3a, 0xe9bda181, 0x2623b649, 0xadf08850, ++ 0x626e9f98, 0x7b6c9ffb, 0xb4f28833, 0x3f21b62a, 0xf0bfa1e2, ++ 0xf3f6cc59, 0x3c68db91, 0xb7bbe588, 0x7825f240, 0xb1293efe, ++ 0x7eb72936, 0xf564172f, 0x3afa00e7, 0x39b36d5c, 0xf62d7a94, ++ 0x7dfe448d, 0xb2605345, 0x3496dbb0, 0xfb08cc78, 0x70dbf261, ++ 0xbf45e5a9, 0xbc0c8812, 0x73929fda, 0xf841a1c3, 0x37dfb60b, ++ 0xfed37ab5, 0x314d6d7d, 0xba9e5364, 0x750044ac, 0x76492917, ++ 0xb9d73edf, 0x320400c6, 0xfd9a170e, 0x1241289b, 0xdddf3f53, ++ 0x560c014a, 0x99921682, 0x9adb7b39, 0x55456cf1, 0xde9652e8, ++ 0x11084520, 0xd804899e, 0x179a9e56, 0x9c49a04f, 0x53d7b787, ++ 0x509eda3c, 0x9f00cdf4, 0x14d3f3ed, 0xdb4de425, 0x5dbb6cd0, ++ 0x92257b18, 0x19f64501, 0xd66852c9, 0xd5213f72, 0x1abf28ba, ++ 0x916c16a3, 0x5ef2016b, 0x97fecdd5, 0x5860da1d, 0xd3b3e404, ++ 0x1c2df3cc, 0x1f649e77, 0xd0fa89bf, 0x5b29b7a6, 0x94b7a06e, ++ 0x8db5a00d, 0x422bb7c5, 0xc9f889dc, 0x06669e14, 0x052ff3af, ++ 0xcab1e467, 0x4162da7e, 0x8efccdb6, 0x47f00108, 0x886e16c0, ++ 0x03bd28d9, 0xcc233f11, 0xcf6a52aa, 0x00f44562, 0x8b277b7b, ++ 0x44b96cb3, 0xc24fe446, 0x0dd1f38e, 0x8602cd97, 0x499cda5f, ++ 0x4ad5b7e4, 0x854ba02c, 0x0e989e35, 0xc10689fd, 0x080a4543, ++ 0xc794528b, 0x4c476c92, 0x83d97b5a, 0x809016e1, 0x4f0e0129, ++ 0xc4dd3f30, 0x0b4328f8, 0xf6d93ff6, 0x3947283e, 0xb2941627, ++ 0x7d0a01ef, 0x7e436c54, 0xb1dd7b9c, 0x3a0e4585, 0xf590524d, ++ 0x3c9c9ef3, 0xf302893b, 0x78d1b722, 0xb74fa0ea, 0xb406cd51, ++ 0x7b98da99, 0xf04be480, 0x3fd5f348, 0xb9237bbd, 0x76bd6c75, ++ 0xfd6e526c, 0x32f045a4, 0x31b9281f, 0xfe273fd7, 0x75f401ce, ++ 0xba6a1606, 0x7366dab8, 0xbcf8cd70, 0x372bf369, 0xf8b5e4a1, ++ 0xfbfc891a, 0x34629ed2, 0xbfb1a0cb, 0x702fb703, 0x692db760, ++ 0xa6b3a0a8, 0x2d609eb1, 0xe2fe8979, 0xe1b7e4c2, 0x2e29f30a, ++ 0xa5facd13, 0x6a64dadb, 0xa3681665, 0x6cf601ad, 0xe7253fb4, ++ 0x28bb287c, 0x2bf245c7, 0xe46c520f, 0x6fbf6c16, 0xa0217bde, ++ 0x26d7f32b, 0xe949e4e3, 0x629adafa, 0xad04cd32, 0xae4da089, ++ 0x61d3b741, 0xea008958, 0x259e9e90, 0xec92522e, 0x230c45e6, ++ 0xa8df7bff, 0x67416c37, 0x6408018c, 0xab961644, 0x2045285d, ++ 0xefdb3f95}, ++ {0x00000000, 0x24825136, 0x4904a26c, 0x6d86f35a, 0x920944d8, ++ 0xb68b15ee, 0xdb0de6b4, 0xff8fb782, 0xff638ff1, 0xdbe1dec7, ++ 0xb6672d9d, 0x92e57cab, 0x6d6acb29, 0x49e89a1f, 0x246e6945, ++ 0x00ec3873, 0x25b619a3, 0x01344895, 0x6cb2bbcf, 0x4830eaf9, ++ 0xb7bf5d7b, 0x933d0c4d, 0xfebbff17, 0xda39ae21, 0xdad59652, ++ 0xfe57c764, 0x93d1343e, 0xb7536508, 0x48dcd28a, 0x6c5e83bc, ++ 0x01d870e6, 0x255a21d0, 0x4b6c3346, 0x6fee6270, 0x0268912a, ++ 0x26eac01c, 0xd965779e, 0xfde726a8, 0x9061d5f2, 0xb4e384c4, ++ 0xb40fbcb7, 0x908ded81, 0xfd0b1edb, 0xd9894fed, 0x2606f86f, ++ 0x0284a959, 0x6f025a03, 0x4b800b35, 0x6eda2ae5, 0x4a587bd3, ++ 0x27de8889, 0x035cd9bf, 0xfcd36e3d, 0xd8513f0b, 0xb5d7cc51, ++ 0x91559d67, 0x91b9a514, 0xb53bf422, 0xd8bd0778, 0xfc3f564e, ++ 0x03b0e1cc, 0x2732b0fa, 0x4ab443a0, 0x6e361296, 0x96d8668c, ++ 0xb25a37ba, 0xdfdcc4e0, 0xfb5e95d6, 0x04d12254, 0x20537362, ++ 0x4dd58038, 0x6957d10e, 0x69bbe97d, 0x4d39b84b, 0x20bf4b11, ++ 0x043d1a27, 0xfbb2ada5, 0xdf30fc93, 0xb2b60fc9, 0x96345eff, ++ 0xb36e7f2f, 0x97ec2e19, 0xfa6add43, 0xdee88c75, 0x21673bf7, ++ 0x05e56ac1, 0x6863999b, 0x4ce1c8ad, 0x4c0df0de, 0x688fa1e8, ++ 0x050952b2, 0x218b0384, 0xde04b406, 0xfa86e530, 0x9700166a, ++ 0xb382475c, 0xddb455ca, 0xf93604fc, 0x94b0f7a6, 0xb032a690, ++ 0x4fbd1112, 0x6b3f4024, 0x06b9b37e, 0x223be248, 0x22d7da3b, ++ 0x06558b0d, 0x6bd37857, 0x4f512961, 0xb0de9ee3, 0x945ccfd5, ++ 0xf9da3c8f, 0xdd586db9, 0xf8024c69, 0xdc801d5f, 0xb106ee05, ++ 0x9584bf33, 0x6a0b08b1, 0x4e895987, 0x230faadd, 0x078dfbeb, ++ 0x0761c398, 0x23e392ae, 0x4e6561f4, 0x6ae730c2, 0x95688740, ++ 0xb1ead676, 0xdc6c252c, 0xf8ee741a, 0xf6c1cb59, 0xd2439a6f, ++ 0xbfc56935, 0x9b473803, 0x64c88f81, 0x404adeb7, 0x2dcc2ded, ++ 0x094e7cdb, 0x09a244a8, 0x2d20159e, 0x40a6e6c4, 0x6424b7f2, ++ 0x9bab0070, 0xbf295146, 0xd2afa21c, 0xf62df32a, 0xd377d2fa, ++ 0xf7f583cc, 0x9a737096, 0xbef121a0, 0x417e9622, 0x65fcc714, ++ 0x087a344e, 0x2cf86578, 0x2c145d0b, 0x08960c3d, 0x6510ff67, ++ 0x4192ae51, 0xbe1d19d3, 0x9a9f48e5, 0xf719bbbf, 0xd39bea89, ++ 0xbdadf81f, 0x992fa929, 0xf4a95a73, 0xd02b0b45, 0x2fa4bcc7, ++ 0x0b26edf1, 0x66a01eab, 0x42224f9d, 0x42ce77ee, 0x664c26d8, ++ 0x0bcad582, 0x2f4884b4, 0xd0c73336, 0xf4456200, 0x99c3915a, ++ 0xbd41c06c, 0x981be1bc, 0xbc99b08a, 0xd11f43d0, 0xf59d12e6, ++ 0x0a12a564, 0x2e90f452, 0x43160708, 0x6794563e, 0x67786e4d, ++ 0x43fa3f7b, 0x2e7ccc21, 0x0afe9d17, 0xf5712a95, 0xd1f37ba3, ++ 0xbc7588f9, 0x98f7d9cf, 0x6019add5, 0x449bfce3, 0x291d0fb9, ++ 0x0d9f5e8f, 0xf210e90d, 0xd692b83b, 0xbb144b61, 0x9f961a57, ++ 0x9f7a2224, 0xbbf87312, 0xd67e8048, 0xf2fcd17e, 0x0d7366fc, ++ 0x29f137ca, 0x4477c490, 0x60f595a6, 0x45afb476, 0x612de540, ++ 0x0cab161a, 0x2829472c, 0xd7a6f0ae, 0xf324a198, 0x9ea252c2, ++ 0xba2003f4, 0xbacc3b87, 0x9e4e6ab1, 0xf3c899eb, 0xd74ac8dd, ++ 0x28c57f5f, 0x0c472e69, 0x61c1dd33, 0x45438c05, 0x2b759e93, ++ 0x0ff7cfa5, 0x62713cff, 0x46f36dc9, 0xb97cda4b, 0x9dfe8b7d, ++ 0xf0787827, 0xd4fa2911, 0xd4161162, 0xf0944054, 0x9d12b30e, ++ 0xb990e238, 0x461f55ba, 0x629d048c, 0x0f1bf7d6, 0x2b99a6e0, ++ 0x0ec38730, 0x2a41d606, 0x47c7255c, 0x6345746a, 0x9ccac3e8, ++ 0xb84892de, 0xd5ce6184, 0xf14c30b2, 0xf1a008c1, 0xd52259f7, ++ 0xb8a4aaad, 0x9c26fb9b, 0x63a94c19, 0x472b1d2f, 0x2aadee75, ++ 0x0e2fbf43}, ++ {0x00000000, 0x36f290f3, 0x6de521e6, 0x5b17b115, 0xdbca43cc, ++ 0xed38d33f, 0xb62f622a, 0x80ddf2d9, 0x6ce581d9, 0x5a17112a, ++ 0x0100a03f, 0x37f230cc, 0xb72fc215, 0x81dd52e6, 0xdacae3f3, ++ 0xec387300, 0xd9cb03b2, 0xef399341, 0xb42e2254, 0x82dcb2a7, ++ 0x0201407e, 0x34f3d08d, 0x6fe46198, 0x5916f16b, 0xb52e826b, ++ 0x83dc1298, 0xd8cba38d, 0xee39337e, 0x6ee4c1a7, 0x58165154, ++ 0x0301e041, 0x35f370b2, 0x68e70125, 0x5e1591d6, 0x050220c3, ++ 0x33f0b030, 0xb32d42e9, 0x85dfd21a, 0xdec8630f, 0xe83af3fc, ++ 0x040280fc, 0x32f0100f, 0x69e7a11a, 0x5f1531e9, 0xdfc8c330, ++ 0xe93a53c3, 0xb22de2d6, 0x84df7225, 0xb12c0297, 0x87de9264, ++ 0xdcc92371, 0xea3bb382, 0x6ae6415b, 0x5c14d1a8, 0x070360bd, ++ 0x31f1f04e, 0xddc9834e, 0xeb3b13bd, 0xb02ca2a8, 0x86de325b, ++ 0x0603c082, 0x30f15071, 0x6be6e164, 0x5d147197, 0xd1ce024a, ++ 0xe73c92b9, 0xbc2b23ac, 0x8ad9b35f, 0x0a044186, 0x3cf6d175, ++ 0x67e16060, 0x5113f093, 0xbd2b8393, 0x8bd91360, 0xd0cea275, ++ 0xe63c3286, 0x66e1c05f, 0x501350ac, 0x0b04e1b9, 0x3df6714a, ++ 0x080501f8, 0x3ef7910b, 0x65e0201e, 0x5312b0ed, 0xd3cf4234, ++ 0xe53dd2c7, 0xbe2a63d2, 0x88d8f321, 0x64e08021, 0x521210d2, ++ 0x0905a1c7, 0x3ff73134, 0xbf2ac3ed, 0x89d8531e, 0xd2cfe20b, ++ 0xe43d72f8, 0xb929036f, 0x8fdb939c, 0xd4cc2289, 0xe23eb27a, ++ 0x62e340a3, 0x5411d050, 0x0f066145, 0x39f4f1b6, 0xd5cc82b6, ++ 0xe33e1245, 0xb829a350, 0x8edb33a3, 0x0e06c17a, 0x38f45189, ++ 0x63e3e09c, 0x5511706f, 0x60e200dd, 0x5610902e, 0x0d07213b, ++ 0x3bf5b1c8, 0xbb284311, 0x8ddad3e2, 0xd6cd62f7, 0xe03ff204, ++ 0x0c078104, 0x3af511f7, 0x61e2a0e2, 0x57103011, 0xd7cdc2c8, ++ 0xe13f523b, 0xba28e32e, 0x8cda73dd, 0x78ed02d5, 0x4e1f9226, ++ 0x15082333, 0x23fab3c0, 0xa3274119, 0x95d5d1ea, 0xcec260ff, ++ 0xf830f00c, 0x1408830c, 0x22fa13ff, 0x79eda2ea, 0x4f1f3219, ++ 0xcfc2c0c0, 0xf9305033, 0xa227e126, 0x94d571d5, 0xa1260167, ++ 0x97d49194, 0xccc32081, 0xfa31b072, 0x7aec42ab, 0x4c1ed258, ++ 0x1709634d, 0x21fbf3be, 0xcdc380be, 0xfb31104d, 0xa026a158, ++ 0x96d431ab, 0x1609c372, 0x20fb5381, 0x7bece294, 0x4d1e7267, ++ 0x100a03f0, 0x26f89303, 0x7def2216, 0x4b1db2e5, 0xcbc0403c, ++ 0xfd32d0cf, 0xa62561da, 0x90d7f129, 0x7cef8229, 0x4a1d12da, ++ 0x110aa3cf, 0x27f8333c, 0xa725c1e5, 0x91d75116, 0xcac0e003, ++ 0xfc3270f0, 0xc9c10042, 0xff3390b1, 0xa42421a4, 0x92d6b157, ++ 0x120b438e, 0x24f9d37d, 0x7fee6268, 0x491cf29b, 0xa524819b, ++ 0x93d61168, 0xc8c1a07d, 0xfe33308e, 0x7eeec257, 0x481c52a4, ++ 0x130be3b1, 0x25f97342, 0xa923009f, 0x9fd1906c, 0xc4c62179, ++ 0xf234b18a, 0x72e94353, 0x441bd3a0, 0x1f0c62b5, 0x29fef246, ++ 0xc5c68146, 0xf33411b5, 0xa823a0a0, 0x9ed13053, 0x1e0cc28a, ++ 0x28fe5279, 0x73e9e36c, 0x451b739f, 0x70e8032d, 0x461a93de, ++ 0x1d0d22cb, 0x2bffb238, 0xab2240e1, 0x9dd0d012, 0xc6c76107, ++ 0xf035f1f4, 0x1c0d82f4, 0x2aff1207, 0x71e8a312, 0x471a33e1, ++ 0xc7c7c138, 0xf13551cb, 0xaa22e0de, 0x9cd0702d, 0xc1c401ba, ++ 0xf7369149, 0xac21205c, 0x9ad3b0af, 0x1a0e4276, 0x2cfcd285, ++ 0x77eb6390, 0x4119f363, 0xad218063, 0x9bd31090, 0xc0c4a185, ++ 0xf6363176, 0x76ebc3af, 0x4019535c, 0x1b0ee249, 0x2dfc72ba, ++ 0x180f0208, 0x2efd92fb, 0x75ea23ee, 0x4318b31d, 0xc3c541c4, ++ 0xf537d137, 0xae206022, 0x98d2f0d1, 0x74ea83d1, 0x42181322, ++ 0x190fa237, 0x2ffd32c4, 0xaf20c01d, 0x99d250ee, 0xc2c5e1fb, ++ 0xf4377108}}; ++ ++static const z_word_t crc_braid_big_table[][256] = { ++ {0x0000000000000000, 0xf390f23600000000, 0xe621e56d00000000, ++ 0x15b1175b00000000, 0xcc43cadb00000000, 0x3fd338ed00000000, ++ 0x2a622fb600000000, 0xd9f2dd8000000000, 0xd981e56c00000000, ++ 0x2a11175a00000000, 0x3fa0000100000000, 0xcc30f23700000000, ++ 0x15c22fb700000000, 0xe652dd8100000000, 0xf3e3cada00000000, ++ 0x007338ec00000000, 0xb203cbd900000000, 0x419339ef00000000, ++ 0x54222eb400000000, 0xa7b2dc8200000000, 0x7e40010200000000, ++ 0x8dd0f33400000000, 0x9861e46f00000000, 0x6bf1165900000000, ++ 0x6b822eb500000000, 0x9812dc8300000000, 0x8da3cbd800000000, ++ 0x7e3339ee00000000, 0xa7c1e46e00000000, 0x5451165800000000, ++ 0x41e0010300000000, 0xb270f33500000000, 0x2501e76800000000, ++ 0xd691155e00000000, 0xc320020500000000, 0x30b0f03300000000, ++ 0xe9422db300000000, 0x1ad2df8500000000, 0x0f63c8de00000000, ++ 0xfcf33ae800000000, 0xfc80020400000000, 0x0f10f03200000000, ++ 0x1aa1e76900000000, 0xe931155f00000000, 0x30c3c8df00000000, ++ 0xc3533ae900000000, 0xd6e22db200000000, 0x2572df8400000000, ++ 0x97022cb100000000, 0x6492de8700000000, 0x7123c9dc00000000, ++ 0x82b33bea00000000, 0x5b41e66a00000000, 0xa8d1145c00000000, ++ 0xbd60030700000000, 0x4ef0f13100000000, 0x4e83c9dd00000000, ++ 0xbd133beb00000000, 0xa8a22cb000000000, 0x5b32de8600000000, ++ 0x82c0030600000000, 0x7150f13000000000, 0x64e1e66b00000000, ++ 0x9771145d00000000, 0x4a02ced100000000, 0xb9923ce700000000, ++ 0xac232bbc00000000, 0x5fb3d98a00000000, 0x8641040a00000000, ++ 0x75d1f63c00000000, 0x6060e16700000000, 0x93f0135100000000, ++ 0x93832bbd00000000, 0x6013d98b00000000, 0x75a2ced000000000, ++ 0x86323ce600000000, 0x5fc0e16600000000, 0xac50135000000000, ++ 0xb9e1040b00000000, 0x4a71f63d00000000, 0xf801050800000000, ++ 0x0b91f73e00000000, 0x1e20e06500000000, 0xedb0125300000000, ++ 0x3442cfd300000000, 0xc7d23de500000000, 0xd2632abe00000000, ++ 0x21f3d88800000000, 0x2180e06400000000, 0xd210125200000000, ++ 0xc7a1050900000000, 0x3431f73f00000000, 0xedc32abf00000000, ++ 0x1e53d88900000000, 0x0be2cfd200000000, 0xf8723de400000000, ++ 0x6f0329b900000000, 0x9c93db8f00000000, 0x8922ccd400000000, ++ 0x7ab23ee200000000, 0xa340e36200000000, 0x50d0115400000000, ++ 0x4561060f00000000, 0xb6f1f43900000000, 0xb682ccd500000000, ++ 0x45123ee300000000, 0x50a329b800000000, 0xa333db8e00000000, ++ 0x7ac1060e00000000, 0x8951f43800000000, 0x9ce0e36300000000, ++ 0x6f70115500000000, 0xdd00e26000000000, 0x2e90105600000000, ++ 0x3b21070d00000000, 0xc8b1f53b00000000, 0x114328bb00000000, ++ 0xe2d3da8d00000000, 0xf762cdd600000000, 0x04f23fe000000000, ++ 0x0481070c00000000, 0xf711f53a00000000, 0xe2a0e26100000000, ++ 0x1130105700000000, 0xc8c2cdd700000000, 0x3b523fe100000000, ++ 0x2ee328ba00000000, 0xdd73da8c00000000, 0xd502ed7800000000, ++ 0x26921f4e00000000, 0x3323081500000000, 0xc0b3fa2300000000, ++ 0x194127a300000000, 0xead1d59500000000, 0xff60c2ce00000000, ++ 0x0cf030f800000000, 0x0c83081400000000, 0xff13fa2200000000, ++ 0xeaa2ed7900000000, 0x19321f4f00000000, 0xc0c0c2cf00000000, ++ 0x335030f900000000, 0x26e127a200000000, 0xd571d59400000000, ++ 0x670126a100000000, 0x9491d49700000000, 0x8120c3cc00000000, ++ 0x72b031fa00000000, 0xab42ec7a00000000, 0x58d21e4c00000000, ++ 0x4d63091700000000, 0xbef3fb2100000000, 0xbe80c3cd00000000, ++ 0x4d1031fb00000000, 0x58a126a000000000, 0xab31d49600000000, ++ 0x72c3091600000000, 0x8153fb2000000000, 0x94e2ec7b00000000, ++ 0x67721e4d00000000, 0xf0030a1000000000, 0x0393f82600000000, ++ 0x1622ef7d00000000, 0xe5b21d4b00000000, 0x3c40c0cb00000000, ++ 0xcfd032fd00000000, 0xda6125a600000000, 0x29f1d79000000000, ++ 0x2982ef7c00000000, 0xda121d4a00000000, 0xcfa30a1100000000, ++ 0x3c33f82700000000, 0xe5c125a700000000, 0x1651d79100000000, ++ 0x03e0c0ca00000000, 0xf07032fc00000000, 0x4200c1c900000000, ++ 0xb19033ff00000000, 0xa42124a400000000, 0x57b1d69200000000, ++ 0x8e430b1200000000, 0x7dd3f92400000000, 0x6862ee7f00000000, ++ 0x9bf21c4900000000, 0x9b8124a500000000, 0x6811d69300000000, ++ 0x7da0c1c800000000, 0x8e3033fe00000000, 0x57c2ee7e00000000, ++ 0xa4521c4800000000, 0xb1e30b1300000000, 0x4273f92500000000, ++ 0x9f0023a900000000, 0x6c90d19f00000000, 0x7921c6c400000000, ++ 0x8ab134f200000000, 0x5343e97200000000, 0xa0d31b4400000000, ++ 0xb5620c1f00000000, 0x46f2fe2900000000, 0x4681c6c500000000, ++ 0xb51134f300000000, 0xa0a023a800000000, 0x5330d19e00000000, ++ 0x8ac20c1e00000000, 0x7952fe2800000000, 0x6ce3e97300000000, ++ 0x9f731b4500000000, 0x2d03e87000000000, 0xde931a4600000000, ++ 0xcb220d1d00000000, 0x38b2ff2b00000000, 0xe14022ab00000000, ++ 0x12d0d09d00000000, 0x0761c7c600000000, 0xf4f135f000000000, ++ 0xf4820d1c00000000, 0x0712ff2a00000000, 0x12a3e87100000000, ++ 0xe1331a4700000000, 0x38c1c7c700000000, 0xcb5135f100000000, ++ 0xdee022aa00000000, 0x2d70d09c00000000, 0xba01c4c100000000, ++ 0x499136f700000000, 0x5c2021ac00000000, 0xafb0d39a00000000, ++ 0x76420e1a00000000, 0x85d2fc2c00000000, 0x9063eb7700000000, ++ 0x63f3194100000000, 0x638021ad00000000, 0x9010d39b00000000, ++ 0x85a1c4c000000000, 0x763136f600000000, 0xafc3eb7600000000, ++ 0x5c53194000000000, 0x49e20e1b00000000, 0xba72fc2d00000000, ++ 0x08020f1800000000, 0xfb92fd2e00000000, 0xee23ea7500000000, ++ 0x1db3184300000000, 0xc441c5c300000000, 0x37d137f500000000, ++ 0x226020ae00000000, 0xd1f0d29800000000, 0xd183ea7400000000, ++ 0x2213184200000000, 0x37a20f1900000000, 0xc432fd2f00000000, ++ 0x1dc020af00000000, 0xee50d29900000000, 0xfbe1c5c200000000, ++ 0x087137f400000000}, ++ {0x0000000000000000, 0x3651822400000000, 0x6ca2044900000000, ++ 0x5af3866d00000000, 0xd844099200000000, 0xee158bb600000000, ++ 0xb4e60ddb00000000, 0x82b78fff00000000, 0xf18f63ff00000000, ++ 0xc7dee1db00000000, 0x9d2d67b600000000, 0xab7ce59200000000, ++ 0x29cb6a6d00000000, 0x1f9ae84900000000, 0x45696e2400000000, ++ 0x7338ec0000000000, 0xa319b62500000000, 0x9548340100000000, ++ 0xcfbbb26c00000000, 0xf9ea304800000000, 0x7b5dbfb700000000, ++ 0x4d0c3d9300000000, 0x17ffbbfe00000000, 0x21ae39da00000000, ++ 0x5296d5da00000000, 0x64c757fe00000000, 0x3e34d19300000000, ++ 0x086553b700000000, 0x8ad2dc4800000000, 0xbc835e6c00000000, ++ 0xe670d80100000000, 0xd0215a2500000000, 0x46336c4b00000000, ++ 0x7062ee6f00000000, 0x2a91680200000000, 0x1cc0ea2600000000, ++ 0x9e7765d900000000, 0xa826e7fd00000000, 0xf2d5619000000000, ++ 0xc484e3b400000000, 0xb7bc0fb400000000, 0x81ed8d9000000000, ++ 0xdb1e0bfd00000000, 0xed4f89d900000000, 0x6ff8062600000000, ++ 0x59a9840200000000, 0x035a026f00000000, 0x350b804b00000000, ++ 0xe52ada6e00000000, 0xd37b584a00000000, 0x8988de2700000000, ++ 0xbfd95c0300000000, 0x3d6ed3fc00000000, 0x0b3f51d800000000, ++ 0x51ccd7b500000000, 0x679d559100000000, 0x14a5b99100000000, ++ 0x22f43bb500000000, 0x7807bdd800000000, 0x4e563ffc00000000, ++ 0xcce1b00300000000, 0xfab0322700000000, 0xa043b44a00000000, ++ 0x9612366e00000000, 0x8c66d89600000000, 0xba375ab200000000, ++ 0xe0c4dcdf00000000, 0xd6955efb00000000, 0x5422d10400000000, ++ 0x6273532000000000, 0x3880d54d00000000, 0x0ed1576900000000, ++ 0x7de9bb6900000000, 0x4bb8394d00000000, 0x114bbf2000000000, ++ 0x271a3d0400000000, 0xa5adb2fb00000000, 0x93fc30df00000000, ++ 0xc90fb6b200000000, 0xff5e349600000000, 0x2f7f6eb300000000, ++ 0x192eec9700000000, 0x43dd6afa00000000, 0x758ce8de00000000, ++ 0xf73b672100000000, 0xc16ae50500000000, 0x9b99636800000000, ++ 0xadc8e14c00000000, 0xdef00d4c00000000, 0xe8a18f6800000000, ++ 0xb252090500000000, 0x84038b2100000000, 0x06b404de00000000, ++ 0x30e586fa00000000, 0x6a16009700000000, 0x5c4782b300000000, ++ 0xca55b4dd00000000, 0xfc0436f900000000, 0xa6f7b09400000000, ++ 0x90a632b000000000, 0x1211bd4f00000000, 0x24403f6b00000000, ++ 0x7eb3b90600000000, 0x48e23b2200000000, 0x3bdad72200000000, ++ 0x0d8b550600000000, 0x5778d36b00000000, 0x6129514f00000000, ++ 0xe39edeb000000000, 0xd5cf5c9400000000, 0x8f3cdaf900000000, ++ 0xb96d58dd00000000, 0x694c02f800000000, 0x5f1d80dc00000000, ++ 0x05ee06b100000000, 0x33bf849500000000, 0xb1080b6a00000000, ++ 0x8759894e00000000, 0xddaa0f2300000000, 0xebfb8d0700000000, ++ 0x98c3610700000000, 0xae92e32300000000, 0xf461654e00000000, ++ 0xc230e76a00000000, 0x4087689500000000, 0x76d6eab100000000, ++ 0x2c256cdc00000000, 0x1a74eef800000000, 0x59cbc1f600000000, ++ 0x6f9a43d200000000, 0x3569c5bf00000000, 0x0338479b00000000, ++ 0x818fc86400000000, 0xb7de4a4000000000, 0xed2dcc2d00000000, ++ 0xdb7c4e0900000000, 0xa844a20900000000, 0x9e15202d00000000, ++ 0xc4e6a64000000000, 0xf2b7246400000000, 0x7000ab9b00000000, ++ 0x465129bf00000000, 0x1ca2afd200000000, 0x2af32df600000000, ++ 0xfad277d300000000, 0xcc83f5f700000000, 0x9670739a00000000, ++ 0xa021f1be00000000, 0x22967e4100000000, 0x14c7fc6500000000, ++ 0x4e347a0800000000, 0x7865f82c00000000, 0x0b5d142c00000000, ++ 0x3d0c960800000000, 0x67ff106500000000, 0x51ae924100000000, ++ 0xd3191dbe00000000, 0xe5489f9a00000000, 0xbfbb19f700000000, ++ 0x89ea9bd300000000, 0x1ff8adbd00000000, 0x29a92f9900000000, ++ 0x735aa9f400000000, 0x450b2bd000000000, 0xc7bca42f00000000, ++ 0xf1ed260b00000000, 0xab1ea06600000000, 0x9d4f224200000000, ++ 0xee77ce4200000000, 0xd8264c6600000000, 0x82d5ca0b00000000, ++ 0xb484482f00000000, 0x3633c7d000000000, 0x006245f400000000, ++ 0x5a91c39900000000, 0x6cc041bd00000000, 0xbce11b9800000000, ++ 0x8ab099bc00000000, 0xd0431fd100000000, 0xe6129df500000000, ++ 0x64a5120a00000000, 0x52f4902e00000000, 0x0807164300000000, ++ 0x3e56946700000000, 0x4d6e786700000000, 0x7b3ffa4300000000, ++ 0x21cc7c2e00000000, 0x179dfe0a00000000, 0x952a71f500000000, ++ 0xa37bf3d100000000, 0xf98875bc00000000, 0xcfd9f79800000000, ++ 0xd5ad196000000000, 0xe3fc9b4400000000, 0xb90f1d2900000000, ++ 0x8f5e9f0d00000000, 0x0de910f200000000, 0x3bb892d600000000, ++ 0x614b14bb00000000, 0x571a969f00000000, 0x24227a9f00000000, ++ 0x1273f8bb00000000, 0x48807ed600000000, 0x7ed1fcf200000000, ++ 0xfc66730d00000000, 0xca37f12900000000, 0x90c4774400000000, ++ 0xa695f56000000000, 0x76b4af4500000000, 0x40e52d6100000000, ++ 0x1a16ab0c00000000, 0x2c47292800000000, 0xaef0a6d700000000, ++ 0x98a124f300000000, 0xc252a29e00000000, 0xf40320ba00000000, ++ 0x873bccba00000000, 0xb16a4e9e00000000, 0xeb99c8f300000000, ++ 0xddc84ad700000000, 0x5f7fc52800000000, 0x692e470c00000000, ++ 0x33ddc16100000000, 0x058c434500000000, 0x939e752b00000000, ++ 0xa5cff70f00000000, 0xff3c716200000000, 0xc96df34600000000, ++ 0x4bda7cb900000000, 0x7d8bfe9d00000000, 0x277878f000000000, ++ 0x1129fad400000000, 0x621116d400000000, 0x544094f000000000, ++ 0x0eb3129d00000000, 0x38e290b900000000, 0xba551f4600000000, ++ 0x8c049d6200000000, 0xd6f71b0f00000000, 0xe0a6992b00000000, ++ 0x3087c30e00000000, 0x06d6412a00000000, 0x5c25c74700000000, ++ 0x6a74456300000000, 0xe8c3ca9c00000000, 0xde9248b800000000, ++ 0x8461ced500000000, 0xb2304cf100000000, 0xc108a0f100000000, ++ 0xf75922d500000000, 0xadaaa4b800000000, 0x9bfb269c00000000, ++ 0x194ca96300000000, 0x2f1d2b4700000000, 0x75eead2a00000000, ++ 0x43bf2f0e00000000}, ++ {0x0000000000000000, 0xc8179ecf00000000, 0xd1294d4400000000, ++ 0x193ed38b00000000, 0xa2539a8800000000, 0x6a44044700000000, ++ 0x737ad7cc00000000, 0xbb6d490300000000, 0x05a145ca00000000, ++ 0xcdb6db0500000000, 0xd488088e00000000, 0x1c9f964100000000, ++ 0xa7f2df4200000000, 0x6fe5418d00000000, 0x76db920600000000, ++ 0xbecc0cc900000000, 0x4b44fa4f00000000, 0x8353648000000000, ++ 0x9a6db70b00000000, 0x527a29c400000000, 0xe91760c700000000, ++ 0x2100fe0800000000, 0x383e2d8300000000, 0xf029b34c00000000, ++ 0x4ee5bf8500000000, 0x86f2214a00000000, 0x9fccf2c100000000, ++ 0x57db6c0e00000000, 0xecb6250d00000000, 0x24a1bbc200000000, ++ 0x3d9f684900000000, 0xf588f68600000000, 0x9688f49f00000000, ++ 0x5e9f6a5000000000, 0x47a1b9db00000000, 0x8fb6271400000000, ++ 0x34db6e1700000000, 0xfcccf0d800000000, 0xe5f2235300000000, ++ 0x2de5bd9c00000000, 0x9329b15500000000, 0x5b3e2f9a00000000, ++ 0x4200fc1100000000, 0x8a1762de00000000, 0x317a2bdd00000000, ++ 0xf96db51200000000, 0xe053669900000000, 0x2844f85600000000, ++ 0xddcc0ed000000000, 0x15db901f00000000, 0x0ce5439400000000, ++ 0xc4f2dd5b00000000, 0x7f9f945800000000, 0xb7880a9700000000, ++ 0xaeb6d91c00000000, 0x66a147d300000000, 0xd86d4b1a00000000, ++ 0x107ad5d500000000, 0x0944065e00000000, 0xc153989100000000, ++ 0x7a3ed19200000000, 0xb2294f5d00000000, 0xab179cd600000000, ++ 0x6300021900000000, 0x6d1798e400000000, 0xa500062b00000000, ++ 0xbc3ed5a000000000, 0x74294b6f00000000, 0xcf44026c00000000, ++ 0x07539ca300000000, 0x1e6d4f2800000000, 0xd67ad1e700000000, ++ 0x68b6dd2e00000000, 0xa0a143e100000000, 0xb99f906a00000000, ++ 0x71880ea500000000, 0xcae547a600000000, 0x02f2d96900000000, ++ 0x1bcc0ae200000000, 0xd3db942d00000000, 0x265362ab00000000, ++ 0xee44fc6400000000, 0xf77a2fef00000000, 0x3f6db12000000000, ++ 0x8400f82300000000, 0x4c1766ec00000000, 0x5529b56700000000, ++ 0x9d3e2ba800000000, 0x23f2276100000000, 0xebe5b9ae00000000, ++ 0xf2db6a2500000000, 0x3accf4ea00000000, 0x81a1bde900000000, ++ 0x49b6232600000000, 0x5088f0ad00000000, 0x989f6e6200000000, ++ 0xfb9f6c7b00000000, 0x3388f2b400000000, 0x2ab6213f00000000, ++ 0xe2a1bff000000000, 0x59ccf6f300000000, 0x91db683c00000000, ++ 0x88e5bbb700000000, 0x40f2257800000000, 0xfe3e29b100000000, ++ 0x3629b77e00000000, 0x2f1764f500000000, 0xe700fa3a00000000, ++ 0x5c6db33900000000, 0x947a2df600000000, 0x8d44fe7d00000000, ++ 0x455360b200000000, 0xb0db963400000000, 0x78cc08fb00000000, ++ 0x61f2db7000000000, 0xa9e545bf00000000, 0x12880cbc00000000, ++ 0xda9f927300000000, 0xc3a141f800000000, 0x0bb6df3700000000, ++ 0xb57ad3fe00000000, 0x7d6d4d3100000000, 0x64539eba00000000, ++ 0xac44007500000000, 0x1729497600000000, 0xdf3ed7b900000000, ++ 0xc600043200000000, 0x0e179afd00000000, 0x9b28411200000000, ++ 0x533fdfdd00000000, 0x4a010c5600000000, 0x8216929900000000, ++ 0x397bdb9a00000000, 0xf16c455500000000, 0xe85296de00000000, ++ 0x2045081100000000, 0x9e8904d800000000, 0x569e9a1700000000, ++ 0x4fa0499c00000000, 0x87b7d75300000000, 0x3cda9e5000000000, ++ 0xf4cd009f00000000, 0xedf3d31400000000, 0x25e44ddb00000000, ++ 0xd06cbb5d00000000, 0x187b259200000000, 0x0145f61900000000, ++ 0xc95268d600000000, 0x723f21d500000000, 0xba28bf1a00000000, ++ 0xa3166c9100000000, 0x6b01f25e00000000, 0xd5cdfe9700000000, ++ 0x1dda605800000000, 0x04e4b3d300000000, 0xccf32d1c00000000, ++ 0x779e641f00000000, 0xbf89fad000000000, 0xa6b7295b00000000, ++ 0x6ea0b79400000000, 0x0da0b58d00000000, 0xc5b72b4200000000, ++ 0xdc89f8c900000000, 0x149e660600000000, 0xaff32f0500000000, ++ 0x67e4b1ca00000000, 0x7eda624100000000, 0xb6cdfc8e00000000, ++ 0x0801f04700000000, 0xc0166e8800000000, 0xd928bd0300000000, ++ 0x113f23cc00000000, 0xaa526acf00000000, 0x6245f40000000000, ++ 0x7b7b278b00000000, 0xb36cb94400000000, 0x46e44fc200000000, ++ 0x8ef3d10d00000000, 0x97cd028600000000, 0x5fda9c4900000000, ++ 0xe4b7d54a00000000, 0x2ca04b8500000000, 0x359e980e00000000, ++ 0xfd8906c100000000, 0x43450a0800000000, 0x8b5294c700000000, ++ 0x926c474c00000000, 0x5a7bd98300000000, 0xe116908000000000, ++ 0x29010e4f00000000, 0x303fddc400000000, 0xf828430b00000000, ++ 0xf63fd9f600000000, 0x3e28473900000000, 0x271694b200000000, ++ 0xef010a7d00000000, 0x546c437e00000000, 0x9c7bddb100000000, ++ 0x85450e3a00000000, 0x4d5290f500000000, 0xf39e9c3c00000000, ++ 0x3b8902f300000000, 0x22b7d17800000000, 0xeaa04fb700000000, ++ 0x51cd06b400000000, 0x99da987b00000000, 0x80e44bf000000000, ++ 0x48f3d53f00000000, 0xbd7b23b900000000, 0x756cbd7600000000, ++ 0x6c526efd00000000, 0xa445f03200000000, 0x1f28b93100000000, ++ 0xd73f27fe00000000, 0xce01f47500000000, 0x06166aba00000000, ++ 0xb8da667300000000, 0x70cdf8bc00000000, 0x69f32b3700000000, ++ 0xa1e4b5f800000000, 0x1a89fcfb00000000, 0xd29e623400000000, ++ 0xcba0b1bf00000000, 0x03b72f7000000000, 0x60b72d6900000000, ++ 0xa8a0b3a600000000, 0xb19e602d00000000, 0x7989fee200000000, ++ 0xc2e4b7e100000000, 0x0af3292e00000000, 0x13cdfaa500000000, ++ 0xdbda646a00000000, 0x651668a300000000, 0xad01f66c00000000, ++ 0xb43f25e700000000, 0x7c28bb2800000000, 0xc745f22b00000000, ++ 0x0f526ce400000000, 0x166cbf6f00000000, 0xde7b21a000000000, ++ 0x2bf3d72600000000, 0xe3e449e900000000, 0xfada9a6200000000, ++ 0x32cd04ad00000000, 0x89a04dae00000000, 0x41b7d36100000000, ++ 0x588900ea00000000, 0x909e9e2500000000, 0x2e5292ec00000000, ++ 0xe6450c2300000000, 0xff7bdfa800000000, 0x376c416700000000, ++ 0x8c01086400000000, 0x441696ab00000000, 0x5d28452000000000, ++ 0x953fdbef00000000}, ++ {0x0000000000000000, 0x95d4709500000000, 0x6baf90f100000000, ++ 0xfe7be06400000000, 0x9758503800000000, 0x028c20ad00000000, ++ 0xfcf7c0c900000000, 0x6923b05c00000000, 0x2eb1a07000000000, ++ 0xbb65d0e500000000, 0x451e308100000000, 0xd0ca401400000000, ++ 0xb9e9f04800000000, 0x2c3d80dd00000000, 0xd24660b900000000, ++ 0x4792102c00000000, 0x5c6241e100000000, 0xc9b6317400000000, ++ 0x37cdd11000000000, 0xa219a18500000000, 0xcb3a11d900000000, ++ 0x5eee614c00000000, 0xa095812800000000, 0x3541f1bd00000000, ++ 0x72d3e19100000000, 0xe707910400000000, 0x197c716000000000, ++ 0x8ca801f500000000, 0xe58bb1a900000000, 0x705fc13c00000000, ++ 0x8e24215800000000, 0x1bf051cd00000000, 0xf9c2f31900000000, ++ 0x6c16838c00000000, 0x926d63e800000000, 0x07b9137d00000000, ++ 0x6e9aa32100000000, 0xfb4ed3b400000000, 0x053533d000000000, ++ 0x90e1434500000000, 0xd773536900000000, 0x42a723fc00000000, ++ 0xbcdcc39800000000, 0x2908b30d00000000, 0x402b035100000000, ++ 0xd5ff73c400000000, 0x2b8493a000000000, 0xbe50e33500000000, ++ 0xa5a0b2f800000000, 0x3074c26d00000000, 0xce0f220900000000, ++ 0x5bdb529c00000000, 0x32f8e2c000000000, 0xa72c925500000000, ++ 0x5957723100000000, 0xcc8302a400000000, 0x8b11128800000000, ++ 0x1ec5621d00000000, 0xe0be827900000000, 0x756af2ec00000000, ++ 0x1c4942b000000000, 0x899d322500000000, 0x77e6d24100000000, ++ 0xe232a2d400000000, 0xf285e73300000000, 0x675197a600000000, ++ 0x992a77c200000000, 0x0cfe075700000000, 0x65ddb70b00000000, ++ 0xf009c79e00000000, 0x0e7227fa00000000, 0x9ba6576f00000000, ++ 0xdc34474300000000, 0x49e037d600000000, 0xb79bd7b200000000, ++ 0x224fa72700000000, 0x4b6c177b00000000, 0xdeb867ee00000000, ++ 0x20c3878a00000000, 0xb517f71f00000000, 0xaee7a6d200000000, ++ 0x3b33d64700000000, 0xc548362300000000, 0x509c46b600000000, ++ 0x39bff6ea00000000, 0xac6b867f00000000, 0x5210661b00000000, ++ 0xc7c4168e00000000, 0x805606a200000000, 0x1582763700000000, ++ 0xebf9965300000000, 0x7e2de6c600000000, 0x170e569a00000000, ++ 0x82da260f00000000, 0x7ca1c66b00000000, 0xe975b6fe00000000, ++ 0x0b47142a00000000, 0x9e9364bf00000000, 0x60e884db00000000, ++ 0xf53cf44e00000000, 0x9c1f441200000000, 0x09cb348700000000, ++ 0xf7b0d4e300000000, 0x6264a47600000000, 0x25f6b45a00000000, ++ 0xb022c4cf00000000, 0x4e5924ab00000000, 0xdb8d543e00000000, ++ 0xb2aee46200000000, 0x277a94f700000000, 0xd901749300000000, ++ 0x4cd5040600000000, 0x572555cb00000000, 0xc2f1255e00000000, ++ 0x3c8ac53a00000000, 0xa95eb5af00000000, 0xc07d05f300000000, ++ 0x55a9756600000000, 0xabd2950200000000, 0x3e06e59700000000, ++ 0x7994f5bb00000000, 0xec40852e00000000, 0x123b654a00000000, ++ 0x87ef15df00000000, 0xeecca58300000000, 0x7b18d51600000000, ++ 0x8563357200000000, 0x10b745e700000000, 0xe40bcf6700000000, ++ 0x71dfbff200000000, 0x8fa45f9600000000, 0x1a702f0300000000, ++ 0x73539f5f00000000, 0xe687efca00000000, 0x18fc0fae00000000, ++ 0x8d287f3b00000000, 0xcaba6f1700000000, 0x5f6e1f8200000000, ++ 0xa115ffe600000000, 0x34c18f7300000000, 0x5de23f2f00000000, ++ 0xc8364fba00000000, 0x364dafde00000000, 0xa399df4b00000000, ++ 0xb8698e8600000000, 0x2dbdfe1300000000, 0xd3c61e7700000000, ++ 0x46126ee200000000, 0x2f31debe00000000, 0xbae5ae2b00000000, ++ 0x449e4e4f00000000, 0xd14a3eda00000000, 0x96d82ef600000000, ++ 0x030c5e6300000000, 0xfd77be0700000000, 0x68a3ce9200000000, ++ 0x01807ece00000000, 0x94540e5b00000000, 0x6a2fee3f00000000, ++ 0xfffb9eaa00000000, 0x1dc93c7e00000000, 0x881d4ceb00000000, ++ 0x7666ac8f00000000, 0xe3b2dc1a00000000, 0x8a916c4600000000, ++ 0x1f451cd300000000, 0xe13efcb700000000, 0x74ea8c2200000000, ++ 0x33789c0e00000000, 0xa6acec9b00000000, 0x58d70cff00000000, ++ 0xcd037c6a00000000, 0xa420cc3600000000, 0x31f4bca300000000, ++ 0xcf8f5cc700000000, 0x5a5b2c5200000000, 0x41ab7d9f00000000, ++ 0xd47f0d0a00000000, 0x2a04ed6e00000000, 0xbfd09dfb00000000, ++ 0xd6f32da700000000, 0x43275d3200000000, 0xbd5cbd5600000000, ++ 0x2888cdc300000000, 0x6f1addef00000000, 0xfacead7a00000000, ++ 0x04b54d1e00000000, 0x91613d8b00000000, 0xf8428dd700000000, ++ 0x6d96fd4200000000, 0x93ed1d2600000000, 0x06396db300000000, ++ 0x168e285400000000, 0x835a58c100000000, 0x7d21b8a500000000, ++ 0xe8f5c83000000000, 0x81d6786c00000000, 0x140208f900000000, ++ 0xea79e89d00000000, 0x7fad980800000000, 0x383f882400000000, ++ 0xadebf8b100000000, 0x539018d500000000, 0xc644684000000000, ++ 0xaf67d81c00000000, 0x3ab3a88900000000, 0xc4c848ed00000000, ++ 0x511c387800000000, 0x4aec69b500000000, 0xdf38192000000000, ++ 0x2143f94400000000, 0xb49789d100000000, 0xddb4398d00000000, ++ 0x4860491800000000, 0xb61ba97c00000000, 0x23cfd9e900000000, ++ 0x645dc9c500000000, 0xf189b95000000000, 0x0ff2593400000000, ++ 0x9a2629a100000000, 0xf30599fd00000000, 0x66d1e96800000000, ++ 0x98aa090c00000000, 0x0d7e799900000000, 0xef4cdb4d00000000, ++ 0x7a98abd800000000, 0x84e34bbc00000000, 0x11373b2900000000, ++ 0x78148b7500000000, 0xedc0fbe000000000, 0x13bb1b8400000000, ++ 0x866f6b1100000000, 0xc1fd7b3d00000000, 0x54290ba800000000, ++ 0xaa52ebcc00000000, 0x3f869b5900000000, 0x56a52b0500000000, ++ 0xc3715b9000000000, 0x3d0abbf400000000, 0xa8decb6100000000, ++ 0xb32e9aac00000000, 0x26faea3900000000, 0xd8810a5d00000000, ++ 0x4d557ac800000000, 0x2476ca9400000000, 0xb1a2ba0100000000, ++ 0x4fd95a6500000000, 0xda0d2af000000000, 0x9d9f3adc00000000, ++ 0x084b4a4900000000, 0xf630aa2d00000000, 0x63e4dab800000000, ++ 0x0ac76ae400000000, 0x9f131a7100000000, 0x6168fa1500000000, ++ 0xf4bc8a8000000000}, ++ {0x0000000000000000, 0x1f17f08000000000, 0x7f2891da00000000, ++ 0x603f615a00000000, 0xbf56536e00000000, 0xa041a3ee00000000, ++ 0xc07ec2b400000000, 0xdf69323400000000, 0x7eada6dc00000000, ++ 0x61ba565c00000000, 0x0185370600000000, 0x1e92c78600000000, ++ 0xc1fbf5b200000000, 0xdeec053200000000, 0xbed3646800000000, ++ 0xa1c494e800000000, 0xbd5c3c6200000000, 0xa24bcce200000000, ++ 0xc274adb800000000, 0xdd635d3800000000, 0x020a6f0c00000000, ++ 0x1d1d9f8c00000000, 0x7d22fed600000000, 0x62350e5600000000, ++ 0xc3f19abe00000000, 0xdce66a3e00000000, 0xbcd90b6400000000, ++ 0xa3cefbe400000000, 0x7ca7c9d000000000, 0x63b0395000000000, ++ 0x038f580a00000000, 0x1c98a88a00000000, 0x7ab978c400000000, ++ 0x65ae884400000000, 0x0591e91e00000000, 0x1a86199e00000000, ++ 0xc5ef2baa00000000, 0xdaf8db2a00000000, 0xbac7ba7000000000, ++ 0xa5d04af000000000, 0x0414de1800000000, 0x1b032e9800000000, ++ 0x7b3c4fc200000000, 0x642bbf4200000000, 0xbb428d7600000000, ++ 0xa4557df600000000, 0xc46a1cac00000000, 0xdb7dec2c00000000, ++ 0xc7e544a600000000, 0xd8f2b42600000000, 0xb8cdd57c00000000, ++ 0xa7da25fc00000000, 0x78b317c800000000, 0x67a4e74800000000, ++ 0x079b861200000000, 0x188c769200000000, 0xb948e27a00000000, ++ 0xa65f12fa00000000, 0xc66073a000000000, 0xd977832000000000, ++ 0x061eb11400000000, 0x1909419400000000, 0x793620ce00000000, ++ 0x6621d04e00000000, 0xb574805300000000, 0xaa6370d300000000, ++ 0xca5c118900000000, 0xd54be10900000000, 0x0a22d33d00000000, ++ 0x153523bd00000000, 0x750a42e700000000, 0x6a1db26700000000, ++ 0xcbd9268f00000000, 0xd4ced60f00000000, 0xb4f1b75500000000, ++ 0xabe647d500000000, 0x748f75e100000000, 0x6b98856100000000, ++ 0x0ba7e43b00000000, 0x14b014bb00000000, 0x0828bc3100000000, ++ 0x173f4cb100000000, 0x77002deb00000000, 0x6817dd6b00000000, ++ 0xb77eef5f00000000, 0xa8691fdf00000000, 0xc8567e8500000000, ++ 0xd7418e0500000000, 0x76851aed00000000, 0x6992ea6d00000000, ++ 0x09ad8b3700000000, 0x16ba7bb700000000, 0xc9d3498300000000, ++ 0xd6c4b90300000000, 0xb6fbd85900000000, 0xa9ec28d900000000, ++ 0xcfcdf89700000000, 0xd0da081700000000, 0xb0e5694d00000000, ++ 0xaff299cd00000000, 0x709babf900000000, 0x6f8c5b7900000000, ++ 0x0fb33a2300000000, 0x10a4caa300000000, 0xb1605e4b00000000, ++ 0xae77aecb00000000, 0xce48cf9100000000, 0xd15f3f1100000000, ++ 0x0e360d2500000000, 0x1121fda500000000, 0x711e9cff00000000, ++ 0x6e096c7f00000000, 0x7291c4f500000000, 0x6d86347500000000, ++ 0x0db9552f00000000, 0x12aea5af00000000, 0xcdc7979b00000000, ++ 0xd2d0671b00000000, 0xb2ef064100000000, 0xadf8f6c100000000, ++ 0x0c3c622900000000, 0x132b92a900000000, 0x7314f3f300000000, ++ 0x6c03037300000000, 0xb36a314700000000, 0xac7dc1c700000000, ++ 0xcc42a09d00000000, 0xd355501d00000000, 0x6ae900a700000000, ++ 0x75fef02700000000, 0x15c1917d00000000, 0x0ad661fd00000000, ++ 0xd5bf53c900000000, 0xcaa8a34900000000, 0xaa97c21300000000, ++ 0xb580329300000000, 0x1444a67b00000000, 0x0b5356fb00000000, ++ 0x6b6c37a100000000, 0x747bc72100000000, 0xab12f51500000000, ++ 0xb405059500000000, 0xd43a64cf00000000, 0xcb2d944f00000000, ++ 0xd7b53cc500000000, 0xc8a2cc4500000000, 0xa89dad1f00000000, ++ 0xb78a5d9f00000000, 0x68e36fab00000000, 0x77f49f2b00000000, ++ 0x17cbfe7100000000, 0x08dc0ef100000000, 0xa9189a1900000000, ++ 0xb60f6a9900000000, 0xd6300bc300000000, 0xc927fb4300000000, ++ 0x164ec97700000000, 0x095939f700000000, 0x696658ad00000000, ++ 0x7671a82d00000000, 0x1050786300000000, 0x0f4788e300000000, ++ 0x6f78e9b900000000, 0x706f193900000000, 0xaf062b0d00000000, ++ 0xb011db8d00000000, 0xd02ebad700000000, 0xcf394a5700000000, ++ 0x6efddebf00000000, 0x71ea2e3f00000000, 0x11d54f6500000000, ++ 0x0ec2bfe500000000, 0xd1ab8dd100000000, 0xcebc7d5100000000, ++ 0xae831c0b00000000, 0xb194ec8b00000000, 0xad0c440100000000, ++ 0xb21bb48100000000, 0xd224d5db00000000, 0xcd33255b00000000, ++ 0x125a176f00000000, 0x0d4de7ef00000000, 0x6d7286b500000000, ++ 0x7265763500000000, 0xd3a1e2dd00000000, 0xccb6125d00000000, ++ 0xac89730700000000, 0xb39e838700000000, 0x6cf7b1b300000000, ++ 0x73e0413300000000, 0x13df206900000000, 0x0cc8d0e900000000, ++ 0xdf9d80f400000000, 0xc08a707400000000, 0xa0b5112e00000000, ++ 0xbfa2e1ae00000000, 0x60cbd39a00000000, 0x7fdc231a00000000, ++ 0x1fe3424000000000, 0x00f4b2c000000000, 0xa130262800000000, ++ 0xbe27d6a800000000, 0xde18b7f200000000, 0xc10f477200000000, ++ 0x1e66754600000000, 0x017185c600000000, 0x614ee49c00000000, ++ 0x7e59141c00000000, 0x62c1bc9600000000, 0x7dd64c1600000000, ++ 0x1de92d4c00000000, 0x02feddcc00000000, 0xdd97eff800000000, ++ 0xc2801f7800000000, 0xa2bf7e2200000000, 0xbda88ea200000000, ++ 0x1c6c1a4a00000000, 0x037beaca00000000, 0x63448b9000000000, ++ 0x7c537b1000000000, 0xa33a492400000000, 0xbc2db9a400000000, ++ 0xdc12d8fe00000000, 0xc305287e00000000, 0xa524f83000000000, ++ 0xba3308b000000000, 0xda0c69ea00000000, 0xc51b996a00000000, ++ 0x1a72ab5e00000000, 0x05655bde00000000, 0x655a3a8400000000, ++ 0x7a4dca0400000000, 0xdb895eec00000000, 0xc49eae6c00000000, ++ 0xa4a1cf3600000000, 0xbbb63fb600000000, 0x64df0d8200000000, ++ 0x7bc8fd0200000000, 0x1bf79c5800000000, 0x04e06cd800000000, ++ 0x1878c45200000000, 0x076f34d200000000, 0x6750558800000000, ++ 0x7847a50800000000, 0xa72e973c00000000, 0xb83967bc00000000, ++ 0xd80606e600000000, 0xc711f66600000000, 0x66d5628e00000000, ++ 0x79c2920e00000000, 0x19fdf35400000000, 0x06ea03d400000000, ++ 0xd98331e000000000, 0xc694c16000000000, 0xa6aba03a00000000, ++ 0xb9bc50ba00000000}, ++ {0x0000000000000000, 0xe2fd888d00000000, 0x85fd60c000000000, ++ 0x6700e84d00000000, 0x4bfdb05b00000000, 0xa90038d600000000, ++ 0xce00d09b00000000, 0x2cfd581600000000, 0x96fa61b700000000, ++ 0x7407e93a00000000, 0x1307017700000000, 0xf1fa89fa00000000, ++ 0xdd07d1ec00000000, 0x3ffa596100000000, 0x58fab12c00000000, ++ 0xba0739a100000000, 0x6df3b2b500000000, 0x8f0e3a3800000000, ++ 0xe80ed27500000000, 0x0af35af800000000, 0x260e02ee00000000, ++ 0xc4f38a6300000000, 0xa3f3622e00000000, 0x410eeaa300000000, ++ 0xfb09d30200000000, 0x19f45b8f00000000, 0x7ef4b3c200000000, ++ 0x9c093b4f00000000, 0xb0f4635900000000, 0x5209ebd400000000, ++ 0x3509039900000000, 0xd7f48b1400000000, 0x9be014b000000000, ++ 0x791d9c3d00000000, 0x1e1d747000000000, 0xfce0fcfd00000000, ++ 0xd01da4eb00000000, 0x32e02c6600000000, 0x55e0c42b00000000, ++ 0xb71d4ca600000000, 0x0d1a750700000000, 0xefe7fd8a00000000, ++ 0x88e715c700000000, 0x6a1a9d4a00000000, 0x46e7c55c00000000, ++ 0xa41a4dd100000000, 0xc31aa59c00000000, 0x21e72d1100000000, ++ 0xf613a60500000000, 0x14ee2e8800000000, 0x73eec6c500000000, ++ 0x91134e4800000000, 0xbdee165e00000000, 0x5f139ed300000000, ++ 0x3813769e00000000, 0xdaeefe1300000000, 0x60e9c7b200000000, ++ 0x82144f3f00000000, 0xe514a77200000000, 0x07e92fff00000000, ++ 0x2b1477e900000000, 0xc9e9ff6400000000, 0xaee9172900000000, ++ 0x4c149fa400000000, 0x77c758bb00000000, 0x953ad03600000000, ++ 0xf23a387b00000000, 0x10c7b0f600000000, 0x3c3ae8e000000000, ++ 0xdec7606d00000000, 0xb9c7882000000000, 0x5b3a00ad00000000, ++ 0xe13d390c00000000, 0x03c0b18100000000, 0x64c059cc00000000, ++ 0x863dd14100000000, 0xaac0895700000000, 0x483d01da00000000, ++ 0x2f3de99700000000, 0xcdc0611a00000000, 0x1a34ea0e00000000, ++ 0xf8c9628300000000, 0x9fc98ace00000000, 0x7d34024300000000, ++ 0x51c95a5500000000, 0xb334d2d800000000, 0xd4343a9500000000, ++ 0x36c9b21800000000, 0x8cce8bb900000000, 0x6e33033400000000, ++ 0x0933eb7900000000, 0xebce63f400000000, 0xc7333be200000000, ++ 0x25ceb36f00000000, 0x42ce5b2200000000, 0xa033d3af00000000, ++ 0xec274c0b00000000, 0x0edac48600000000, 0x69da2ccb00000000, ++ 0x8b27a44600000000, 0xa7dafc5000000000, 0x452774dd00000000, ++ 0x22279c9000000000, 0xc0da141d00000000, 0x7add2dbc00000000, ++ 0x9820a53100000000, 0xff204d7c00000000, 0x1dddc5f100000000, ++ 0x31209de700000000, 0xd3dd156a00000000, 0xb4ddfd2700000000, ++ 0x562075aa00000000, 0x81d4febe00000000, 0x6329763300000000, ++ 0x04299e7e00000000, 0xe6d416f300000000, 0xca294ee500000000, ++ 0x28d4c66800000000, 0x4fd42e2500000000, 0xad29a6a800000000, ++ 0x172e9f0900000000, 0xf5d3178400000000, 0x92d3ffc900000000, ++ 0x702e774400000000, 0x5cd32f5200000000, 0xbe2ea7df00000000, ++ 0xd92e4f9200000000, 0x3bd3c71f00000000, 0xaf88c0ad00000000, ++ 0x4d75482000000000, 0x2a75a06d00000000, 0xc88828e000000000, ++ 0xe47570f600000000, 0x0688f87b00000000, 0x6188103600000000, ++ 0x837598bb00000000, 0x3972a11a00000000, 0xdb8f299700000000, ++ 0xbc8fc1da00000000, 0x5e72495700000000, 0x728f114100000000, ++ 0x907299cc00000000, 0xf772718100000000, 0x158ff90c00000000, ++ 0xc27b721800000000, 0x2086fa9500000000, 0x478612d800000000, ++ 0xa57b9a5500000000, 0x8986c24300000000, 0x6b7b4ace00000000, ++ 0x0c7ba28300000000, 0xee862a0e00000000, 0x548113af00000000, ++ 0xb67c9b2200000000, 0xd17c736f00000000, 0x3381fbe200000000, ++ 0x1f7ca3f400000000, 0xfd812b7900000000, 0x9a81c33400000000, ++ 0x787c4bb900000000, 0x3468d41d00000000, 0xd6955c9000000000, ++ 0xb195b4dd00000000, 0x53683c5000000000, 0x7f95644600000000, ++ 0x9d68eccb00000000, 0xfa68048600000000, 0x18958c0b00000000, ++ 0xa292b5aa00000000, 0x406f3d2700000000, 0x276fd56a00000000, ++ 0xc5925de700000000, 0xe96f05f100000000, 0x0b928d7c00000000, ++ 0x6c92653100000000, 0x8e6fedbc00000000, 0x599b66a800000000, ++ 0xbb66ee2500000000, 0xdc66066800000000, 0x3e9b8ee500000000, ++ 0x1266d6f300000000, 0xf09b5e7e00000000, 0x979bb63300000000, ++ 0x75663ebe00000000, 0xcf61071f00000000, 0x2d9c8f9200000000, ++ 0x4a9c67df00000000, 0xa861ef5200000000, 0x849cb74400000000, ++ 0x66613fc900000000, 0x0161d78400000000, 0xe39c5f0900000000, ++ 0xd84f981600000000, 0x3ab2109b00000000, 0x5db2f8d600000000, ++ 0xbf4f705b00000000, 0x93b2284d00000000, 0x714fa0c000000000, ++ 0x164f488d00000000, 0xf4b2c00000000000, 0x4eb5f9a100000000, ++ 0xac48712c00000000, 0xcb48996100000000, 0x29b511ec00000000, ++ 0x054849fa00000000, 0xe7b5c17700000000, 0x80b5293a00000000, ++ 0x6248a1b700000000, 0xb5bc2aa300000000, 0x5741a22e00000000, ++ 0x30414a6300000000, 0xd2bcc2ee00000000, 0xfe419af800000000, ++ 0x1cbc127500000000, 0x7bbcfa3800000000, 0x994172b500000000, ++ 0x23464b1400000000, 0xc1bbc39900000000, 0xa6bb2bd400000000, ++ 0x4446a35900000000, 0x68bbfb4f00000000, 0x8a4673c200000000, ++ 0xed469b8f00000000, 0x0fbb130200000000, 0x43af8ca600000000, ++ 0xa152042b00000000, 0xc652ec6600000000, 0x24af64eb00000000, ++ 0x08523cfd00000000, 0xeaafb47000000000, 0x8daf5c3d00000000, ++ 0x6f52d4b000000000, 0xd555ed1100000000, 0x37a8659c00000000, ++ 0x50a88dd100000000, 0xb255055c00000000, 0x9ea85d4a00000000, ++ 0x7c55d5c700000000, 0x1b553d8a00000000, 0xf9a8b50700000000, ++ 0x2e5c3e1300000000, 0xcca1b69e00000000, 0xaba15ed300000000, ++ 0x495cd65e00000000, 0x65a18e4800000000, 0x875c06c500000000, ++ 0xe05cee8800000000, 0x02a1660500000000, 0xb8a65fa400000000, ++ 0x5a5bd72900000000, 0x3d5b3f6400000000, 0xdfa6b7e900000000, ++ 0xf35befff00000000, 0x11a6677200000000, 0x76a68f3f00000000, ++ 0x945b07b200000000}, ++ {0x0000000000000000, 0xa90b894e00000000, 0x5217129d00000000, ++ 0xfb1c9bd300000000, 0xe52855e100000000, 0x4c23dcaf00000000, ++ 0xb73f477c00000000, 0x1e34ce3200000000, 0x8b57db1900000000, ++ 0x225c525700000000, 0xd940c98400000000, 0x704b40ca00000000, ++ 0x6e7f8ef800000000, 0xc77407b600000000, 0x3c689c6500000000, ++ 0x9563152b00000000, 0x16afb63300000000, 0xbfa43f7d00000000, ++ 0x44b8a4ae00000000, 0xedb32de000000000, 0xf387e3d200000000, ++ 0x5a8c6a9c00000000, 0xa190f14f00000000, 0x089b780100000000, ++ 0x9df86d2a00000000, 0x34f3e46400000000, 0xcfef7fb700000000, ++ 0x66e4f6f900000000, 0x78d038cb00000000, 0xd1dbb18500000000, ++ 0x2ac72a5600000000, 0x83cca31800000000, 0x2c5e6d6700000000, ++ 0x8555e42900000000, 0x7e497ffa00000000, 0xd742f6b400000000, ++ 0xc976388600000000, 0x607db1c800000000, 0x9b612a1b00000000, ++ 0x326aa35500000000, 0xa709b67e00000000, 0x0e023f3000000000, ++ 0xf51ea4e300000000, 0x5c152dad00000000, 0x4221e39f00000000, ++ 0xeb2a6ad100000000, 0x1036f10200000000, 0xb93d784c00000000, ++ 0x3af1db5400000000, 0x93fa521a00000000, 0x68e6c9c900000000, ++ 0xc1ed408700000000, 0xdfd98eb500000000, 0x76d207fb00000000, ++ 0x8dce9c2800000000, 0x24c5156600000000, 0xb1a6004d00000000, ++ 0x18ad890300000000, 0xe3b112d000000000, 0x4aba9b9e00000000, ++ 0x548e55ac00000000, 0xfd85dce200000000, 0x0699473100000000, ++ 0xaf92ce7f00000000, 0x58bcdace00000000, 0xf1b7538000000000, ++ 0x0aabc85300000000, 0xa3a0411d00000000, 0xbd948f2f00000000, ++ 0x149f066100000000, 0xef839db200000000, 0x468814fc00000000, ++ 0xd3eb01d700000000, 0x7ae0889900000000, 0x81fc134a00000000, ++ 0x28f79a0400000000, 0x36c3543600000000, 0x9fc8dd7800000000, ++ 0x64d446ab00000000, 0xcddfcfe500000000, 0x4e136cfd00000000, ++ 0xe718e5b300000000, 0x1c047e6000000000, 0xb50ff72e00000000, ++ 0xab3b391c00000000, 0x0230b05200000000, 0xf92c2b8100000000, ++ 0x5027a2cf00000000, 0xc544b7e400000000, 0x6c4f3eaa00000000, ++ 0x9753a57900000000, 0x3e582c3700000000, 0x206ce20500000000, ++ 0x89676b4b00000000, 0x727bf09800000000, 0xdb7079d600000000, ++ 0x74e2b7a900000000, 0xdde93ee700000000, 0x26f5a53400000000, ++ 0x8ffe2c7a00000000, 0x91cae24800000000, 0x38c16b0600000000, ++ 0xc3ddf0d500000000, 0x6ad6799b00000000, 0xffb56cb000000000, ++ 0x56bee5fe00000000, 0xada27e2d00000000, 0x04a9f76300000000, ++ 0x1a9d395100000000, 0xb396b01f00000000, 0x488a2bcc00000000, ++ 0xe181a28200000000, 0x624d019a00000000, 0xcb4688d400000000, ++ 0x305a130700000000, 0x99519a4900000000, 0x8765547b00000000, ++ 0x2e6edd3500000000, 0xd57246e600000000, 0x7c79cfa800000000, ++ 0xe91ada8300000000, 0x401153cd00000000, 0xbb0dc81e00000000, ++ 0x1206415000000000, 0x0c328f6200000000, 0xa539062c00000000, ++ 0x5e259dff00000000, 0xf72e14b100000000, 0xf17ec44600000000, ++ 0x58754d0800000000, 0xa369d6db00000000, 0x0a625f9500000000, ++ 0x145691a700000000, 0xbd5d18e900000000, 0x4641833a00000000, ++ 0xef4a0a7400000000, 0x7a291f5f00000000, 0xd322961100000000, ++ 0x283e0dc200000000, 0x8135848c00000000, 0x9f014abe00000000, ++ 0x360ac3f000000000, 0xcd16582300000000, 0x641dd16d00000000, ++ 0xe7d1727500000000, 0x4edafb3b00000000, 0xb5c660e800000000, ++ 0x1ccde9a600000000, 0x02f9279400000000, 0xabf2aeda00000000, ++ 0x50ee350900000000, 0xf9e5bc4700000000, 0x6c86a96c00000000, ++ 0xc58d202200000000, 0x3e91bbf100000000, 0x979a32bf00000000, ++ 0x89aefc8d00000000, 0x20a575c300000000, 0xdbb9ee1000000000, ++ 0x72b2675e00000000, 0xdd20a92100000000, 0x742b206f00000000, ++ 0x8f37bbbc00000000, 0x263c32f200000000, 0x3808fcc000000000, ++ 0x9103758e00000000, 0x6a1fee5d00000000, 0xc314671300000000, ++ 0x5677723800000000, 0xff7cfb7600000000, 0x046060a500000000, ++ 0xad6be9eb00000000, 0xb35f27d900000000, 0x1a54ae9700000000, ++ 0xe148354400000000, 0x4843bc0a00000000, 0xcb8f1f1200000000, ++ 0x6284965c00000000, 0x99980d8f00000000, 0x309384c100000000, ++ 0x2ea74af300000000, 0x87acc3bd00000000, 0x7cb0586e00000000, ++ 0xd5bbd12000000000, 0x40d8c40b00000000, 0xe9d34d4500000000, ++ 0x12cfd69600000000, 0xbbc45fd800000000, 0xa5f091ea00000000, ++ 0x0cfb18a400000000, 0xf7e7837700000000, 0x5eec0a3900000000, ++ 0xa9c21e8800000000, 0x00c997c600000000, 0xfbd50c1500000000, ++ 0x52de855b00000000, 0x4cea4b6900000000, 0xe5e1c22700000000, ++ 0x1efd59f400000000, 0xb7f6d0ba00000000, 0x2295c59100000000, ++ 0x8b9e4cdf00000000, 0x7082d70c00000000, 0xd9895e4200000000, ++ 0xc7bd907000000000, 0x6eb6193e00000000, 0x95aa82ed00000000, ++ 0x3ca10ba300000000, 0xbf6da8bb00000000, 0x166621f500000000, ++ 0xed7aba2600000000, 0x4471336800000000, 0x5a45fd5a00000000, ++ 0xf34e741400000000, 0x0852efc700000000, 0xa159668900000000, ++ 0x343a73a200000000, 0x9d31faec00000000, 0x662d613f00000000, ++ 0xcf26e87100000000, 0xd112264300000000, 0x7819af0d00000000, ++ 0x830534de00000000, 0x2a0ebd9000000000, 0x859c73ef00000000, ++ 0x2c97faa100000000, 0xd78b617200000000, 0x7e80e83c00000000, ++ 0x60b4260e00000000, 0xc9bfaf4000000000, 0x32a3349300000000, ++ 0x9ba8bddd00000000, 0x0ecba8f600000000, 0xa7c021b800000000, ++ 0x5cdcba6b00000000, 0xf5d7332500000000, 0xebe3fd1700000000, ++ 0x42e8745900000000, 0xb9f4ef8a00000000, 0x10ff66c400000000, ++ 0x9333c5dc00000000, 0x3a384c9200000000, 0xc124d74100000000, ++ 0x682f5e0f00000000, 0x761b903d00000000, 0xdf10197300000000, ++ 0x240c82a000000000, 0x8d070bee00000000, 0x18641ec500000000, ++ 0xb16f978b00000000, 0x4a730c5800000000, 0xe378851600000000, ++ 0xfd4c4b2400000000, 0x5447c26a00000000, 0xaf5b59b900000000, ++ 0x0650d0f700000000}, ++ {0x0000000000000000, 0x479244af00000000, 0xcf22f88500000000, ++ 0x88b0bc2a00000000, 0xdf4381d000000000, 0x98d1c57f00000000, ++ 0x1061795500000000, 0x57f33dfa00000000, 0xff81737a00000000, ++ 0xb81337d500000000, 0x30a38bff00000000, 0x7731cf5000000000, ++ 0x20c2f2aa00000000, 0x6750b60500000000, 0xefe00a2f00000000, ++ 0xa8724e8000000000, 0xfe03e7f400000000, 0xb991a35b00000000, ++ 0x31211f7100000000, 0x76b35bde00000000, 0x2140662400000000, ++ 0x66d2228b00000000, 0xee629ea100000000, 0xa9f0da0e00000000, ++ 0x0182948e00000000, 0x4610d02100000000, 0xcea06c0b00000000, ++ 0x893228a400000000, 0xdec1155e00000000, 0x995351f100000000, ++ 0x11e3eddb00000000, 0x5671a97400000000, 0xbd01bf3200000000, ++ 0xfa93fb9d00000000, 0x722347b700000000, 0x35b1031800000000, ++ 0x62423ee200000000, 0x25d07a4d00000000, 0xad60c66700000000, ++ 0xeaf282c800000000, 0x4280cc4800000000, 0x051288e700000000, ++ 0x8da234cd00000000, 0xca30706200000000, 0x9dc34d9800000000, ++ 0xda51093700000000, 0x52e1b51d00000000, 0x1573f1b200000000, ++ 0x430258c600000000, 0x04901c6900000000, 0x8c20a04300000000, ++ 0xcbb2e4ec00000000, 0x9c41d91600000000, 0xdbd39db900000000, ++ 0x5363219300000000, 0x14f1653c00000000, 0xbc832bbc00000000, ++ 0xfb116f1300000000, 0x73a1d33900000000, 0x3433979600000000, ++ 0x63c0aa6c00000000, 0x2452eec300000000, 0xace252e900000000, ++ 0xeb70164600000000, 0x7a037e6500000000, 0x3d913aca00000000, ++ 0xb52186e000000000, 0xf2b3c24f00000000, 0xa540ffb500000000, ++ 0xe2d2bb1a00000000, 0x6a62073000000000, 0x2df0439f00000000, ++ 0x85820d1f00000000, 0xc21049b000000000, 0x4aa0f59a00000000, ++ 0x0d32b13500000000, 0x5ac18ccf00000000, 0x1d53c86000000000, ++ 0x95e3744a00000000, 0xd27130e500000000, 0x8400999100000000, ++ 0xc392dd3e00000000, 0x4b22611400000000, 0x0cb025bb00000000, ++ 0x5b43184100000000, 0x1cd15cee00000000, 0x9461e0c400000000, ++ 0xd3f3a46b00000000, 0x7b81eaeb00000000, 0x3c13ae4400000000, ++ 0xb4a3126e00000000, 0xf33156c100000000, 0xa4c26b3b00000000, ++ 0xe3502f9400000000, 0x6be093be00000000, 0x2c72d71100000000, ++ 0xc702c15700000000, 0x809085f800000000, 0x082039d200000000, ++ 0x4fb27d7d00000000, 0x1841408700000000, 0x5fd3042800000000, ++ 0xd763b80200000000, 0x90f1fcad00000000, 0x3883b22d00000000, ++ 0x7f11f68200000000, 0xf7a14aa800000000, 0xb0330e0700000000, ++ 0xe7c033fd00000000, 0xa052775200000000, 0x28e2cb7800000000, ++ 0x6f708fd700000000, 0x390126a300000000, 0x7e93620c00000000, ++ 0xf623de2600000000, 0xb1b19a8900000000, 0xe642a77300000000, ++ 0xa1d0e3dc00000000, 0x29605ff600000000, 0x6ef21b5900000000, ++ 0xc68055d900000000, 0x8112117600000000, 0x09a2ad5c00000000, ++ 0x4e30e9f300000000, 0x19c3d40900000000, 0x5e5190a600000000, ++ 0xd6e12c8c00000000, 0x9173682300000000, 0xf406fcca00000000, ++ 0xb394b86500000000, 0x3b24044f00000000, 0x7cb640e000000000, ++ 0x2b457d1a00000000, 0x6cd739b500000000, 0xe467859f00000000, ++ 0xa3f5c13000000000, 0x0b878fb000000000, 0x4c15cb1f00000000, ++ 0xc4a5773500000000, 0x8337339a00000000, 0xd4c40e6000000000, ++ 0x93564acf00000000, 0x1be6f6e500000000, 0x5c74b24a00000000, ++ 0x0a051b3e00000000, 0x4d975f9100000000, 0xc527e3bb00000000, ++ 0x82b5a71400000000, 0xd5469aee00000000, 0x92d4de4100000000, ++ 0x1a64626b00000000, 0x5df626c400000000, 0xf584684400000000, ++ 0xb2162ceb00000000, 0x3aa690c100000000, 0x7d34d46e00000000, ++ 0x2ac7e99400000000, 0x6d55ad3b00000000, 0xe5e5111100000000, ++ 0xa27755be00000000, 0x490743f800000000, 0x0e95075700000000, ++ 0x8625bb7d00000000, 0xc1b7ffd200000000, 0x9644c22800000000, ++ 0xd1d6868700000000, 0x59663aad00000000, 0x1ef47e0200000000, ++ 0xb686308200000000, 0xf114742d00000000, 0x79a4c80700000000, ++ 0x3e368ca800000000, 0x69c5b15200000000, 0x2e57f5fd00000000, ++ 0xa6e749d700000000, 0xe1750d7800000000, 0xb704a40c00000000, ++ 0xf096e0a300000000, 0x78265c8900000000, 0x3fb4182600000000, ++ 0x684725dc00000000, 0x2fd5617300000000, 0xa765dd5900000000, ++ 0xe0f799f600000000, 0x4885d77600000000, 0x0f1793d900000000, ++ 0x87a72ff300000000, 0xc0356b5c00000000, 0x97c656a600000000, ++ 0xd054120900000000, 0x58e4ae2300000000, 0x1f76ea8c00000000, ++ 0x8e0582af00000000, 0xc997c60000000000, 0x41277a2a00000000, ++ 0x06b53e8500000000, 0x5146037f00000000, 0x16d447d000000000, ++ 0x9e64fbfa00000000, 0xd9f6bf5500000000, 0x7184f1d500000000, ++ 0x3616b57a00000000, 0xbea6095000000000, 0xf9344dff00000000, ++ 0xaec7700500000000, 0xe95534aa00000000, 0x61e5888000000000, ++ 0x2677cc2f00000000, 0x7006655b00000000, 0x379421f400000000, ++ 0xbf249dde00000000, 0xf8b6d97100000000, 0xaf45e48b00000000, ++ 0xe8d7a02400000000, 0x60671c0e00000000, 0x27f558a100000000, ++ 0x8f87162100000000, 0xc815528e00000000, 0x40a5eea400000000, ++ 0x0737aa0b00000000, 0x50c497f100000000, 0x1756d35e00000000, ++ 0x9fe66f7400000000, 0xd8742bdb00000000, 0x33043d9d00000000, ++ 0x7496793200000000, 0xfc26c51800000000, 0xbbb481b700000000, ++ 0xec47bc4d00000000, 0xabd5f8e200000000, 0x236544c800000000, ++ 0x64f7006700000000, 0xcc854ee700000000, 0x8b170a4800000000, ++ 0x03a7b66200000000, 0x4435f2cd00000000, 0x13c6cf3700000000, ++ 0x54548b9800000000, 0xdce437b200000000, 0x9b76731d00000000, ++ 0xcd07da6900000000, 0x8a959ec600000000, 0x022522ec00000000, ++ 0x45b7664300000000, 0x12445bb900000000, 0x55d61f1600000000, ++ 0xdd66a33c00000000, 0x9af4e79300000000, 0x3286a91300000000, ++ 0x7514edbc00000000, 0xfda4519600000000, 0xba36153900000000, ++ 0xedc528c300000000, 0xaa576c6c00000000, 0x22e7d04600000000, ++ 0x657594e900000000}}; ++ ++#else /* W == 4 */ ++ ++static const uint32_t crc_braid_table[][256] = { ++ {0x00000000, 0x65673b46, 0xcace768c, 0xafa94dca, 0x4eedeb59, ++ 0x2b8ad01f, 0x84239dd5, 0xe144a693, 0x9ddbd6b2, 0xf8bcedf4, ++ 0x5715a03e, 0x32729b78, 0xd3363deb, 0xb65106ad, 0x19f84b67, ++ 0x7c9f7021, 0xe0c6ab25, 0x85a19063, 0x2a08dda9, 0x4f6fe6ef, ++ 0xae2b407c, 0xcb4c7b3a, 0x64e536f0, 0x01820db6, 0x7d1d7d97, ++ 0x187a46d1, 0xb7d30b1b, 0xd2b4305d, 0x33f096ce, 0x5697ad88, ++ 0xf93ee042, 0x9c59db04, 0x1afc500b, 0x7f9b6b4d, 0xd0322687, ++ 0xb5551dc1, 0x5411bb52, 0x31768014, 0x9edfcdde, 0xfbb8f698, ++ 0x872786b9, 0xe240bdff, 0x4de9f035, 0x288ecb73, 0xc9ca6de0, ++ 0xacad56a6, 0x03041b6c, 0x6663202a, 0xfa3afb2e, 0x9f5dc068, ++ 0x30f48da2, 0x5593b6e4, 0xb4d71077, 0xd1b02b31, 0x7e1966fb, ++ 0x1b7e5dbd, 0x67e12d9c, 0x028616da, 0xad2f5b10, 0xc8486056, ++ 0x290cc6c5, 0x4c6bfd83, 0xe3c2b049, 0x86a58b0f, 0x35f8a016, ++ 0x509f9b50, 0xff36d69a, 0x9a51eddc, 0x7b154b4f, 0x1e727009, ++ 0xb1db3dc3, 0xd4bc0685, 0xa82376a4, 0xcd444de2, 0x62ed0028, ++ 0x078a3b6e, 0xe6ce9dfd, 0x83a9a6bb, 0x2c00eb71, 0x4967d037, ++ 0xd53e0b33, 0xb0593075, 0x1ff07dbf, 0x7a9746f9, 0x9bd3e06a, ++ 0xfeb4db2c, 0x511d96e6, 0x347aada0, 0x48e5dd81, 0x2d82e6c7, ++ 0x822bab0d, 0xe74c904b, 0x060836d8, 0x636f0d9e, 0xccc64054, ++ 0xa9a17b12, 0x2f04f01d, 0x4a63cb5b, 0xe5ca8691, 0x80adbdd7, ++ 0x61e91b44, 0x048e2002, 0xab276dc8, 0xce40568e, 0xb2df26af, ++ 0xd7b81de9, 0x78115023, 0x1d766b65, 0xfc32cdf6, 0x9955f6b0, ++ 0x36fcbb7a, 0x539b803c, 0xcfc25b38, 0xaaa5607e, 0x050c2db4, ++ 0x606b16f2, 0x812fb061, 0xe4488b27, 0x4be1c6ed, 0x2e86fdab, ++ 0x52198d8a, 0x377eb6cc, 0x98d7fb06, 0xfdb0c040, 0x1cf466d3, ++ 0x79935d95, 0xd63a105f, 0xb35d2b19, 0x6bf1402c, 0x0e967b6a, ++ 0xa13f36a0, 0xc4580de6, 0x251cab75, 0x407b9033, 0xefd2ddf9, ++ 0x8ab5e6bf, 0xf62a969e, 0x934dadd8, 0x3ce4e012, 0x5983db54, ++ 0xb8c77dc7, 0xdda04681, 0x72090b4b, 0x176e300d, 0x8b37eb09, ++ 0xee50d04f, 0x41f99d85, 0x249ea6c3, 0xc5da0050, 0xa0bd3b16, ++ 0x0f1476dc, 0x6a734d9a, 0x16ec3dbb, 0x738b06fd, 0xdc224b37, ++ 0xb9457071, 0x5801d6e2, 0x3d66eda4, 0x92cfa06e, 0xf7a89b28, ++ 0x710d1027, 0x146a2b61, 0xbbc366ab, 0xdea45ded, 0x3fe0fb7e, ++ 0x5a87c038, 0xf52e8df2, 0x9049b6b4, 0xecd6c695, 0x89b1fdd3, ++ 0x2618b019, 0x437f8b5f, 0xa23b2dcc, 0xc75c168a, 0x68f55b40, ++ 0x0d926006, 0x91cbbb02, 0xf4ac8044, 0x5b05cd8e, 0x3e62f6c8, ++ 0xdf26505b, 0xba416b1d, 0x15e826d7, 0x708f1d91, 0x0c106db0, ++ 0x697756f6, 0xc6de1b3c, 0xa3b9207a, 0x42fd86e9, 0x279abdaf, ++ 0x8833f065, 0xed54cb23, 0x5e09e03a, 0x3b6edb7c, 0x94c796b6, ++ 0xf1a0adf0, 0x10e40b63, 0x75833025, 0xda2a7def, 0xbf4d46a9, ++ 0xc3d23688, 0xa6b50dce, 0x091c4004, 0x6c7b7b42, 0x8d3fddd1, ++ 0xe858e697, 0x47f1ab5d, 0x2296901b, 0xbecf4b1f, 0xdba87059, ++ 0x74013d93, 0x116606d5, 0xf022a046, 0x95459b00, 0x3aecd6ca, ++ 0x5f8bed8c, 0x23149dad, 0x4673a6eb, 0xe9daeb21, 0x8cbdd067, ++ 0x6df976f4, 0x089e4db2, 0xa7370078, 0xc2503b3e, 0x44f5b031, ++ 0x21928b77, 0x8e3bc6bd, 0xeb5cfdfb, 0x0a185b68, 0x6f7f602e, ++ 0xc0d62de4, 0xa5b116a2, 0xd92e6683, 0xbc495dc5, 0x13e0100f, ++ 0x76872b49, 0x97c38dda, 0xf2a4b69c, 0x5d0dfb56, 0x386ac010, ++ 0xa4331b14, 0xc1542052, 0x6efd6d98, 0x0b9a56de, 0xeadef04d, ++ 0x8fb9cb0b, 0x201086c1, 0x4577bd87, 0x39e8cda6, 0x5c8ff6e0, ++ 0xf326bb2a, 0x9641806c, 0x770526ff, 0x12621db9, 0xbdcb5073, ++ 0xd8ac6b35}, ++ {0x00000000, 0xd7e28058, 0x74b406f1, 0xa35686a9, 0xe9680de2, ++ 0x3e8a8dba, 0x9ddc0b13, 0x4a3e8b4b, 0x09a11d85, 0xde439ddd, ++ 0x7d151b74, 0xaaf79b2c, 0xe0c91067, 0x372b903f, 0x947d1696, ++ 0x439f96ce, 0x13423b0a, 0xc4a0bb52, 0x67f63dfb, 0xb014bda3, ++ 0xfa2a36e8, 0x2dc8b6b0, 0x8e9e3019, 0x597cb041, 0x1ae3268f, ++ 0xcd01a6d7, 0x6e57207e, 0xb9b5a026, 0xf38b2b6d, 0x2469ab35, ++ 0x873f2d9c, 0x50ddadc4, 0x26847614, 0xf166f64c, 0x523070e5, ++ 0x85d2f0bd, 0xcfec7bf6, 0x180efbae, 0xbb587d07, 0x6cbafd5f, ++ 0x2f256b91, 0xf8c7ebc9, 0x5b916d60, 0x8c73ed38, 0xc64d6673, ++ 0x11afe62b, 0xb2f96082, 0x651be0da, 0x35c64d1e, 0xe224cd46, ++ 0x41724bef, 0x9690cbb7, 0xdcae40fc, 0x0b4cc0a4, 0xa81a460d, ++ 0x7ff8c655, 0x3c67509b, 0xeb85d0c3, 0x48d3566a, 0x9f31d632, ++ 0xd50f5d79, 0x02eddd21, 0xa1bb5b88, 0x7659dbd0, 0x4d08ec28, ++ 0x9aea6c70, 0x39bcead9, 0xee5e6a81, 0xa460e1ca, 0x73826192, ++ 0xd0d4e73b, 0x07366763, 0x44a9f1ad, 0x934b71f5, 0x301df75c, ++ 0xe7ff7704, 0xadc1fc4f, 0x7a237c17, 0xd975fabe, 0x0e977ae6, ++ 0x5e4ad722, 0x89a8577a, 0x2afed1d3, 0xfd1c518b, 0xb722dac0, ++ 0x60c05a98, 0xc396dc31, 0x14745c69, 0x57ebcaa7, 0x80094aff, ++ 0x235fcc56, 0xf4bd4c0e, 0xbe83c745, 0x6961471d, 0xca37c1b4, ++ 0x1dd541ec, 0x6b8c9a3c, 0xbc6e1a64, 0x1f389ccd, 0xc8da1c95, ++ 0x82e497de, 0x55061786, 0xf650912f, 0x21b21177, 0x622d87b9, ++ 0xb5cf07e1, 0x16998148, 0xc17b0110, 0x8b458a5b, 0x5ca70a03, ++ 0xfff18caa, 0x28130cf2, 0x78cea136, 0xaf2c216e, 0x0c7aa7c7, ++ 0xdb98279f, 0x91a6acd4, 0x46442c8c, 0xe512aa25, 0x32f02a7d, ++ 0x716fbcb3, 0xa68d3ceb, 0x05dbba42, 0xd2393a1a, 0x9807b151, ++ 0x4fe53109, 0xecb3b7a0, 0x3b5137f8, 0x9a11d850, 0x4df35808, ++ 0xeea5dea1, 0x39475ef9, 0x7379d5b2, 0xa49b55ea, 0x07cdd343, ++ 0xd02f531b, 0x93b0c5d5, 0x4452458d, 0xe704c324, 0x30e6437c, ++ 0x7ad8c837, 0xad3a486f, 0x0e6ccec6, 0xd98e4e9e, 0x8953e35a, ++ 0x5eb16302, 0xfde7e5ab, 0x2a0565f3, 0x603beeb8, 0xb7d96ee0, ++ 0x148fe849, 0xc36d6811, 0x80f2fedf, 0x57107e87, 0xf446f82e, ++ 0x23a47876, 0x699af33d, 0xbe787365, 0x1d2ef5cc, 0xcacc7594, ++ 0xbc95ae44, 0x6b772e1c, 0xc821a8b5, 0x1fc328ed, 0x55fda3a6, ++ 0x821f23fe, 0x2149a557, 0xf6ab250f, 0xb534b3c1, 0x62d63399, ++ 0xc180b530, 0x16623568, 0x5c5cbe23, 0x8bbe3e7b, 0x28e8b8d2, ++ 0xff0a388a, 0xafd7954e, 0x78351516, 0xdb6393bf, 0x0c8113e7, ++ 0x46bf98ac, 0x915d18f4, 0x320b9e5d, 0xe5e91e05, 0xa67688cb, ++ 0x71940893, 0xd2c28e3a, 0x05200e62, 0x4f1e8529, 0x98fc0571, ++ 0x3baa83d8, 0xec480380, 0xd7193478, 0x00fbb420, 0xa3ad3289, ++ 0x744fb2d1, 0x3e71399a, 0xe993b9c2, 0x4ac53f6b, 0x9d27bf33, ++ 0xdeb829fd, 0x095aa9a5, 0xaa0c2f0c, 0x7deeaf54, 0x37d0241f, ++ 0xe032a447, 0x436422ee, 0x9486a2b6, 0xc45b0f72, 0x13b98f2a, ++ 0xb0ef0983, 0x670d89db, 0x2d330290, 0xfad182c8, 0x59870461, ++ 0x8e658439, 0xcdfa12f7, 0x1a1892af, 0xb94e1406, 0x6eac945e, ++ 0x24921f15, 0xf3709f4d, 0x502619e4, 0x87c499bc, 0xf19d426c, ++ 0x267fc234, 0x8529449d, 0x52cbc4c5, 0x18f54f8e, 0xcf17cfd6, ++ 0x6c41497f, 0xbba3c927, 0xf83c5fe9, 0x2fdedfb1, 0x8c885918, ++ 0x5b6ad940, 0x1154520b, 0xc6b6d253, 0x65e054fa, 0xb202d4a2, ++ 0xe2df7966, 0x353df93e, 0x966b7f97, 0x4189ffcf, 0x0bb77484, ++ 0xdc55f4dc, 0x7f037275, 0xa8e1f22d, 0xeb7e64e3, 0x3c9ce4bb, ++ 0x9fca6212, 0x4828e24a, 0x02166901, 0xd5f4e959, 0x76a26ff0, ++ 0xa140efa8}, ++ {0x00000000, 0xef52b6e1, 0x05d46b83, 0xea86dd62, 0x0ba8d706, ++ 0xe4fa61e7, 0x0e7cbc85, 0xe12e0a64, 0x1751ae0c, 0xf80318ed, ++ 0x1285c58f, 0xfdd7736e, 0x1cf9790a, 0xf3abcfeb, 0x192d1289, ++ 0xf67fa468, 0x2ea35c18, 0xc1f1eaf9, 0x2b77379b, 0xc425817a, ++ 0x250b8b1e, 0xca593dff, 0x20dfe09d, 0xcf8d567c, 0x39f2f214, ++ 0xd6a044f5, 0x3c269997, 0xd3742f76, 0x325a2512, 0xdd0893f3, ++ 0x378e4e91, 0xd8dcf870, 0x5d46b830, 0xb2140ed1, 0x5892d3b3, ++ 0xb7c06552, 0x56ee6f36, 0xb9bcd9d7, 0x533a04b5, 0xbc68b254, ++ 0x4a17163c, 0xa545a0dd, 0x4fc37dbf, 0xa091cb5e, 0x41bfc13a, ++ 0xaeed77db, 0x446baab9, 0xab391c58, 0x73e5e428, 0x9cb752c9, ++ 0x76318fab, 0x9963394a, 0x784d332e, 0x971f85cf, 0x7d9958ad, ++ 0x92cbee4c, 0x64b44a24, 0x8be6fcc5, 0x616021a7, 0x8e329746, ++ 0x6f1c9d22, 0x804e2bc3, 0x6ac8f6a1, 0x859a4040, 0xba8d7060, ++ 0x55dfc681, 0xbf591be3, 0x500bad02, 0xb125a766, 0x5e771187, ++ 0xb4f1cce5, 0x5ba37a04, 0xaddcde6c, 0x428e688d, 0xa808b5ef, ++ 0x475a030e, 0xa674096a, 0x4926bf8b, 0xa3a062e9, 0x4cf2d408, ++ 0x942e2c78, 0x7b7c9a99, 0x91fa47fb, 0x7ea8f11a, 0x9f86fb7e, ++ 0x70d44d9f, 0x9a5290fd, 0x7500261c, 0x837f8274, 0x6c2d3495, ++ 0x86abe9f7, 0x69f95f16, 0x88d75572, 0x6785e393, 0x8d033ef1, ++ 0x62518810, 0xe7cbc850, 0x08997eb1, 0xe21fa3d3, 0x0d4d1532, ++ 0xec631f56, 0x0331a9b7, 0xe9b774d5, 0x06e5c234, 0xf09a665c, ++ 0x1fc8d0bd, 0xf54e0ddf, 0x1a1cbb3e, 0xfb32b15a, 0x146007bb, ++ 0xfee6dad9, 0x11b46c38, 0xc9689448, 0x263a22a9, 0xccbcffcb, ++ 0x23ee492a, 0xc2c0434e, 0x2d92f5af, 0xc71428cd, 0x28469e2c, ++ 0xde393a44, 0x316b8ca5, 0xdbed51c7, 0x34bfe726, 0xd591ed42, ++ 0x3ac35ba3, 0xd04586c1, 0x3f173020, 0xae6be681, 0x41395060, ++ 0xabbf8d02, 0x44ed3be3, 0xa5c33187, 0x4a918766, 0xa0175a04, ++ 0x4f45ece5, 0xb93a488d, 0x5668fe6c, 0xbcee230e, 0x53bc95ef, ++ 0xb2929f8b, 0x5dc0296a, 0xb746f408, 0x581442e9, 0x80c8ba99, ++ 0x6f9a0c78, 0x851cd11a, 0x6a4e67fb, 0x8b606d9f, 0x6432db7e, ++ 0x8eb4061c, 0x61e6b0fd, 0x97991495, 0x78cba274, 0x924d7f16, ++ 0x7d1fc9f7, 0x9c31c393, 0x73637572, 0x99e5a810, 0x76b71ef1, ++ 0xf32d5eb1, 0x1c7fe850, 0xf6f93532, 0x19ab83d3, 0xf88589b7, ++ 0x17d73f56, 0xfd51e234, 0x120354d5, 0xe47cf0bd, 0x0b2e465c, ++ 0xe1a89b3e, 0x0efa2ddf, 0xefd427bb, 0x0086915a, 0xea004c38, ++ 0x0552fad9, 0xdd8e02a9, 0x32dcb448, 0xd85a692a, 0x3708dfcb, ++ 0xd626d5af, 0x3974634e, 0xd3f2be2c, 0x3ca008cd, 0xcadfaca5, ++ 0x258d1a44, 0xcf0bc726, 0x205971c7, 0xc1777ba3, 0x2e25cd42, ++ 0xc4a31020, 0x2bf1a6c1, 0x14e696e1, 0xfbb42000, 0x1132fd62, ++ 0xfe604b83, 0x1f4e41e7, 0xf01cf706, 0x1a9a2a64, 0xf5c89c85, ++ 0x03b738ed, 0xece58e0c, 0x0663536e, 0xe931e58f, 0x081fefeb, ++ 0xe74d590a, 0x0dcb8468, 0xe2993289, 0x3a45caf9, 0xd5177c18, ++ 0x3f91a17a, 0xd0c3179b, 0x31ed1dff, 0xdebfab1e, 0x3439767c, ++ 0xdb6bc09d, 0x2d1464f5, 0xc246d214, 0x28c00f76, 0xc792b997, ++ 0x26bcb3f3, 0xc9ee0512, 0x2368d870, 0xcc3a6e91, 0x49a02ed1, ++ 0xa6f29830, 0x4c744552, 0xa326f3b3, 0x4208f9d7, 0xad5a4f36, ++ 0x47dc9254, 0xa88e24b5, 0x5ef180dd, 0xb1a3363c, 0x5b25eb5e, ++ 0xb4775dbf, 0x555957db, 0xba0be13a, 0x508d3c58, 0xbfdf8ab9, ++ 0x670372c9, 0x8851c428, 0x62d7194a, 0x8d85afab, 0x6caba5cf, ++ 0x83f9132e, 0x697fce4c, 0x862d78ad, 0x7052dcc5, 0x9f006a24, ++ 0x7586b746, 0x9ad401a7, 0x7bfa0bc3, 0x94a8bd22, 0x7e2e6040, ++ 0x917cd6a1}, ++ {0x00000000, 0x87a6cb43, 0xd43c90c7, 0x539a5b84, 0x730827cf, ++ 0xf4aeec8c, 0xa734b708, 0x20927c4b, 0xe6104f9e, 0x61b684dd, ++ 0x322cdf59, 0xb58a141a, 0x95186851, 0x12bea312, 0x4124f896, ++ 0xc68233d5, 0x1751997d, 0x90f7523e, 0xc36d09ba, 0x44cbc2f9, ++ 0x6459beb2, 0xe3ff75f1, 0xb0652e75, 0x37c3e536, 0xf141d6e3, ++ 0x76e71da0, 0x257d4624, 0xa2db8d67, 0x8249f12c, 0x05ef3a6f, ++ 0x567561eb, 0xd1d3aaa8, 0x2ea332fa, 0xa905f9b9, 0xfa9fa23d, ++ 0x7d39697e, 0x5dab1535, 0xda0dde76, 0x899785f2, 0x0e314eb1, ++ 0xc8b37d64, 0x4f15b627, 0x1c8feda3, 0x9b2926e0, 0xbbbb5aab, ++ 0x3c1d91e8, 0x6f87ca6c, 0xe821012f, 0x39f2ab87, 0xbe5460c4, ++ 0xedce3b40, 0x6a68f003, 0x4afa8c48, 0xcd5c470b, 0x9ec61c8f, ++ 0x1960d7cc, 0xdfe2e419, 0x58442f5a, 0x0bde74de, 0x8c78bf9d, ++ 0xaceac3d6, 0x2b4c0895, 0x78d65311, 0xff709852, 0x5d4665f4, ++ 0xdae0aeb7, 0x897af533, 0x0edc3e70, 0x2e4e423b, 0xa9e88978, ++ 0xfa72d2fc, 0x7dd419bf, 0xbb562a6a, 0x3cf0e129, 0x6f6abaad, ++ 0xe8cc71ee, 0xc85e0da5, 0x4ff8c6e6, 0x1c629d62, 0x9bc45621, ++ 0x4a17fc89, 0xcdb137ca, 0x9e2b6c4e, 0x198da70d, 0x391fdb46, ++ 0xbeb91005, 0xed234b81, 0x6a8580c2, 0xac07b317, 0x2ba17854, ++ 0x783b23d0, 0xff9de893, 0xdf0f94d8, 0x58a95f9b, 0x0b33041f, ++ 0x8c95cf5c, 0x73e5570e, 0xf4439c4d, 0xa7d9c7c9, 0x207f0c8a, ++ 0x00ed70c1, 0x874bbb82, 0xd4d1e006, 0x53772b45, 0x95f51890, ++ 0x1253d3d3, 0x41c98857, 0xc66f4314, 0xe6fd3f5f, 0x615bf41c, ++ 0x32c1af98, 0xb56764db, 0x64b4ce73, 0xe3120530, 0xb0885eb4, ++ 0x372e95f7, 0x17bce9bc, 0x901a22ff, 0xc380797b, 0x4426b238, ++ 0x82a481ed, 0x05024aae, 0x5698112a, 0xd13eda69, 0xf1aca622, ++ 0x760a6d61, 0x259036e5, 0xa236fda6, 0xba8ccbe8, 0x3d2a00ab, ++ 0x6eb05b2f, 0xe916906c, 0xc984ec27, 0x4e222764, 0x1db87ce0, ++ 0x9a1eb7a3, 0x5c9c8476, 0xdb3a4f35, 0x88a014b1, 0x0f06dff2, ++ 0x2f94a3b9, 0xa83268fa, 0xfba8337e, 0x7c0ef83d, 0xaddd5295, ++ 0x2a7b99d6, 0x79e1c252, 0xfe470911, 0xded5755a, 0x5973be19, ++ 0x0ae9e59d, 0x8d4f2ede, 0x4bcd1d0b, 0xcc6bd648, 0x9ff18dcc, ++ 0x1857468f, 0x38c53ac4, 0xbf63f187, 0xecf9aa03, 0x6b5f6140, ++ 0x942ff912, 0x13893251, 0x401369d5, 0xc7b5a296, 0xe727dedd, ++ 0x6081159e, 0x331b4e1a, 0xb4bd8559, 0x723fb68c, 0xf5997dcf, ++ 0xa603264b, 0x21a5ed08, 0x01379143, 0x86915a00, 0xd50b0184, ++ 0x52adcac7, 0x837e606f, 0x04d8ab2c, 0x5742f0a8, 0xd0e43beb, ++ 0xf07647a0, 0x77d08ce3, 0x244ad767, 0xa3ec1c24, 0x656e2ff1, ++ 0xe2c8e4b2, 0xb152bf36, 0x36f47475, 0x1666083e, 0x91c0c37d, ++ 0xc25a98f9, 0x45fc53ba, 0xe7caae1c, 0x606c655f, 0x33f63edb, ++ 0xb450f598, 0x94c289d3, 0x13644290, 0x40fe1914, 0xc758d257, ++ 0x01dae182, 0x867c2ac1, 0xd5e67145, 0x5240ba06, 0x72d2c64d, ++ 0xf5740d0e, 0xa6ee568a, 0x21489dc9, 0xf09b3761, 0x773dfc22, ++ 0x24a7a7a6, 0xa3016ce5, 0x839310ae, 0x0435dbed, 0x57af8069, ++ 0xd0094b2a, 0x168b78ff, 0x912db3bc, 0xc2b7e838, 0x4511237b, ++ 0x65835f30, 0xe2259473, 0xb1bfcff7, 0x361904b4, 0xc9699ce6, ++ 0x4ecf57a5, 0x1d550c21, 0x9af3c762, 0xba61bb29, 0x3dc7706a, ++ 0x6e5d2bee, 0xe9fbe0ad, 0x2f79d378, 0xa8df183b, 0xfb4543bf, ++ 0x7ce388fc, 0x5c71f4b7, 0xdbd73ff4, 0x884d6470, 0x0febaf33, ++ 0xde38059b, 0x599eced8, 0x0a04955c, 0x8da25e1f, 0xad302254, ++ 0x2a96e917, 0x790cb293, 0xfeaa79d0, 0x38284a05, 0xbf8e8146, ++ 0xec14dac2, 0x6bb21181, 0x4b206dca, 0xcc86a689, 0x9f1cfd0d, ++ 0x18ba364e}}; ++ ++static const z_word_t crc_braid_big_table[][256] = { ++ {0x00000000, 0x43cba687, 0xc7903cd4, 0x845b9a53, 0xcf270873, ++ 0x8cecaef4, 0x08b734a7, 0x4b7c9220, 0x9e4f10e6, 0xdd84b661, ++ 0x59df2c32, 0x1a148ab5, 0x51681895, 0x12a3be12, 0x96f82441, ++ 0xd53382c6, 0x7d995117, 0x3e52f790, 0xba096dc3, 0xf9c2cb44, ++ 0xb2be5964, 0xf175ffe3, 0x752e65b0, 0x36e5c337, 0xe3d641f1, ++ 0xa01de776, 0x24467d25, 0x678ddba2, 0x2cf14982, 0x6f3aef05, ++ 0xeb617556, 0xa8aad3d1, 0xfa32a32e, 0xb9f905a9, 0x3da29ffa, ++ 0x7e69397d, 0x3515ab5d, 0x76de0dda, 0xf2859789, 0xb14e310e, ++ 0x647db3c8, 0x27b6154f, 0xa3ed8f1c, 0xe026299b, 0xab5abbbb, ++ 0xe8911d3c, 0x6cca876f, 0x2f0121e8, 0x87abf239, 0xc46054be, ++ 0x403bceed, 0x03f0686a, 0x488cfa4a, 0x0b475ccd, 0x8f1cc69e, ++ 0xccd76019, 0x19e4e2df, 0x5a2f4458, 0xde74de0b, 0x9dbf788c, ++ 0xd6c3eaac, 0x95084c2b, 0x1153d678, 0x529870ff, 0xf465465d, ++ 0xb7aee0da, 0x33f57a89, 0x703edc0e, 0x3b424e2e, 0x7889e8a9, ++ 0xfcd272fa, 0xbf19d47d, 0x6a2a56bb, 0x29e1f03c, 0xadba6a6f, ++ 0xee71cce8, 0xa50d5ec8, 0xe6c6f84f, 0x629d621c, 0x2156c49b, ++ 0x89fc174a, 0xca37b1cd, 0x4e6c2b9e, 0x0da78d19, 0x46db1f39, ++ 0x0510b9be, 0x814b23ed, 0xc280856a, 0x17b307ac, 0x5478a12b, ++ 0xd0233b78, 0x93e89dff, 0xd8940fdf, 0x9b5fa958, 0x1f04330b, ++ 0x5ccf958c, 0x0e57e573, 0x4d9c43f4, 0xc9c7d9a7, 0x8a0c7f20, ++ 0xc170ed00, 0x82bb4b87, 0x06e0d1d4, 0x452b7753, 0x9018f595, ++ 0xd3d35312, 0x5788c941, 0x14436fc6, 0x5f3ffde6, 0x1cf45b61, ++ 0x98afc132, 0xdb6467b5, 0x73ceb464, 0x300512e3, 0xb45e88b0, ++ 0xf7952e37, 0xbce9bc17, 0xff221a90, 0x7b7980c3, 0x38b22644, ++ 0xed81a482, 0xae4a0205, 0x2a119856, 0x69da3ed1, 0x22a6acf1, ++ 0x616d0a76, 0xe5369025, 0xa6fd36a2, 0xe8cb8cba, 0xab002a3d, ++ 0x2f5bb06e, 0x6c9016e9, 0x27ec84c9, 0x6427224e, 0xe07cb81d, ++ 0xa3b71e9a, 0x76849c5c, 0x354f3adb, 0xb114a088, 0xf2df060f, ++ 0xb9a3942f, 0xfa6832a8, 0x7e33a8fb, 0x3df80e7c, 0x9552ddad, ++ 0xd6997b2a, 0x52c2e179, 0x110947fe, 0x5a75d5de, 0x19be7359, ++ 0x9de5e90a, 0xde2e4f8d, 0x0b1dcd4b, 0x48d66bcc, 0xcc8df19f, ++ 0x8f465718, 0xc43ac538, 0x87f163bf, 0x03aaf9ec, 0x40615f6b, ++ 0x12f92f94, 0x51328913, 0xd5691340, 0x96a2b5c7, 0xddde27e7, ++ 0x9e158160, 0x1a4e1b33, 0x5985bdb4, 0x8cb63f72, 0xcf7d99f5, ++ 0x4b2603a6, 0x08eda521, 0x43913701, 0x005a9186, 0x84010bd5, ++ 0xc7caad52, 0x6f607e83, 0x2cabd804, 0xa8f04257, 0xeb3be4d0, ++ 0xa04776f0, 0xe38cd077, 0x67d74a24, 0x241ceca3, 0xf12f6e65, ++ 0xb2e4c8e2, 0x36bf52b1, 0x7574f436, 0x3e086616, 0x7dc3c091, ++ 0xf9985ac2, 0xba53fc45, 0x1caecae7, 0x5f656c60, 0xdb3ef633, ++ 0x98f550b4, 0xd389c294, 0x90426413, 0x1419fe40, 0x57d258c7, ++ 0x82e1da01, 0xc12a7c86, 0x4571e6d5, 0x06ba4052, 0x4dc6d272, ++ 0x0e0d74f5, 0x8a56eea6, 0xc99d4821, 0x61379bf0, 0x22fc3d77, ++ 0xa6a7a724, 0xe56c01a3, 0xae109383, 0xeddb3504, 0x6980af57, ++ 0x2a4b09d0, 0xff788b16, 0xbcb32d91, 0x38e8b7c2, 0x7b231145, ++ 0x305f8365, 0x739425e2, 0xf7cfbfb1, 0xb4041936, 0xe69c69c9, ++ 0xa557cf4e, 0x210c551d, 0x62c7f39a, 0x29bb61ba, 0x6a70c73d, ++ 0xee2b5d6e, 0xade0fbe9, 0x78d3792f, 0x3b18dfa8, 0xbf4345fb, ++ 0xfc88e37c, 0xb7f4715c, 0xf43fd7db, 0x70644d88, 0x33afeb0f, ++ 0x9b0538de, 0xd8ce9e59, 0x5c95040a, 0x1f5ea28d, 0x542230ad, ++ 0x17e9962a, 0x93b20c79, 0xd079aafe, 0x054a2838, 0x46818ebf, ++ 0xc2da14ec, 0x8111b26b, 0xca6d204b, 0x89a686cc, 0x0dfd1c9f, ++ 0x4e36ba18}, ++ {0x00000000, 0xe1b652ef, 0x836bd405, 0x62dd86ea, 0x06d7a80b, ++ 0xe761fae4, 0x85bc7c0e, 0x640a2ee1, 0x0cae5117, 0xed1803f8, ++ 0x8fc58512, 0x6e73d7fd, 0x0a79f91c, 0xebcfabf3, 0x89122d19, ++ 0x68a47ff6, 0x185ca32e, 0xf9eaf1c1, 0x9b37772b, 0x7a8125c4, ++ 0x1e8b0b25, 0xff3d59ca, 0x9de0df20, 0x7c568dcf, 0x14f2f239, ++ 0xf544a0d6, 0x9799263c, 0x762f74d3, 0x12255a32, 0xf39308dd, ++ 0x914e8e37, 0x70f8dcd8, 0x30b8465d, 0xd10e14b2, 0xb3d39258, ++ 0x5265c0b7, 0x366fee56, 0xd7d9bcb9, 0xb5043a53, 0x54b268bc, ++ 0x3c16174a, 0xdda045a5, 0xbf7dc34f, 0x5ecb91a0, 0x3ac1bf41, ++ 0xdb77edae, 0xb9aa6b44, 0x581c39ab, 0x28e4e573, 0xc952b79c, ++ 0xab8f3176, 0x4a396399, 0x2e334d78, 0xcf851f97, 0xad58997d, ++ 0x4ceecb92, 0x244ab464, 0xc5fce68b, 0xa7216061, 0x4697328e, ++ 0x229d1c6f, 0xc32b4e80, 0xa1f6c86a, 0x40409a85, 0x60708dba, ++ 0x81c6df55, 0xe31b59bf, 0x02ad0b50, 0x66a725b1, 0x8711775e, ++ 0xe5ccf1b4, 0x047aa35b, 0x6cdedcad, 0x8d688e42, 0xefb508a8, ++ 0x0e035a47, 0x6a0974a6, 0x8bbf2649, 0xe962a0a3, 0x08d4f24c, ++ 0x782c2e94, 0x999a7c7b, 0xfb47fa91, 0x1af1a87e, 0x7efb869f, ++ 0x9f4dd470, 0xfd90529a, 0x1c260075, 0x74827f83, 0x95342d6c, ++ 0xf7e9ab86, 0x165ff969, 0x7255d788, 0x93e38567, 0xf13e038d, ++ 0x10885162, 0x50c8cbe7, 0xb17e9908, 0xd3a31fe2, 0x32154d0d, ++ 0x561f63ec, 0xb7a93103, 0xd574b7e9, 0x34c2e506, 0x5c669af0, ++ 0xbdd0c81f, 0xdf0d4ef5, 0x3ebb1c1a, 0x5ab132fb, 0xbb076014, ++ 0xd9dae6fe, 0x386cb411, 0x489468c9, 0xa9223a26, 0xcbffbccc, ++ 0x2a49ee23, 0x4e43c0c2, 0xaff5922d, 0xcd2814c7, 0x2c9e4628, ++ 0x443a39de, 0xa58c6b31, 0xc751eddb, 0x26e7bf34, 0x42ed91d5, ++ 0xa35bc33a, 0xc18645d0, 0x2030173f, 0x81e66bae, 0x60503941, ++ 0x028dbfab, 0xe33bed44, 0x8731c3a5, 0x6687914a, 0x045a17a0, ++ 0xe5ec454f, 0x8d483ab9, 0x6cfe6856, 0x0e23eebc, 0xef95bc53, ++ 0x8b9f92b2, 0x6a29c05d, 0x08f446b7, 0xe9421458, 0x99bac880, ++ 0x780c9a6f, 0x1ad11c85, 0xfb674e6a, 0x9f6d608b, 0x7edb3264, ++ 0x1c06b48e, 0xfdb0e661, 0x95149997, 0x74a2cb78, 0x167f4d92, ++ 0xf7c91f7d, 0x93c3319c, 0x72756373, 0x10a8e599, 0xf11eb776, ++ 0xb15e2df3, 0x50e87f1c, 0x3235f9f6, 0xd383ab19, 0xb78985f8, ++ 0x563fd717, 0x34e251fd, 0xd5540312, 0xbdf07ce4, 0x5c462e0b, ++ 0x3e9ba8e1, 0xdf2dfa0e, 0xbb27d4ef, 0x5a918600, 0x384c00ea, ++ 0xd9fa5205, 0xa9028edd, 0x48b4dc32, 0x2a695ad8, 0xcbdf0837, ++ 0xafd526d6, 0x4e637439, 0x2cbef2d3, 0xcd08a03c, 0xa5acdfca, ++ 0x441a8d25, 0x26c70bcf, 0xc7715920, 0xa37b77c1, 0x42cd252e, ++ 0x2010a3c4, 0xc1a6f12b, 0xe196e614, 0x0020b4fb, 0x62fd3211, ++ 0x834b60fe, 0xe7414e1f, 0x06f71cf0, 0x642a9a1a, 0x859cc8f5, ++ 0xed38b703, 0x0c8ee5ec, 0x6e536306, 0x8fe531e9, 0xebef1f08, ++ 0x0a594de7, 0x6884cb0d, 0x893299e2, 0xf9ca453a, 0x187c17d5, ++ 0x7aa1913f, 0x9b17c3d0, 0xff1ded31, 0x1eabbfde, 0x7c763934, ++ 0x9dc06bdb, 0xf564142d, 0x14d246c2, 0x760fc028, 0x97b992c7, ++ 0xf3b3bc26, 0x1205eec9, 0x70d86823, 0x916e3acc, 0xd12ea049, ++ 0x3098f2a6, 0x5245744c, 0xb3f326a3, 0xd7f90842, 0x364f5aad, ++ 0x5492dc47, 0xb5248ea8, 0xdd80f15e, 0x3c36a3b1, 0x5eeb255b, ++ 0xbf5d77b4, 0xdb575955, 0x3ae10bba, 0x583c8d50, 0xb98adfbf, ++ 0xc9720367, 0x28c45188, 0x4a19d762, 0xabaf858d, 0xcfa5ab6c, ++ 0x2e13f983, 0x4cce7f69, 0xad782d86, 0xc5dc5270, 0x246a009f, ++ 0x46b78675, 0xa701d49a, 0xc30bfa7b, 0x22bda894, 0x40602e7e, ++ 0xa1d67c91}, ++ {0x00000000, 0x5880e2d7, 0xf106b474, 0xa98656a3, 0xe20d68e9, ++ 0xba8d8a3e, 0x130bdc9d, 0x4b8b3e4a, 0x851da109, 0xdd9d43de, ++ 0x741b157d, 0x2c9bf7aa, 0x6710c9e0, 0x3f902b37, 0x96167d94, ++ 0xce969f43, 0x0a3b4213, 0x52bba0c4, 0xfb3df667, 0xa3bd14b0, ++ 0xe8362afa, 0xb0b6c82d, 0x19309e8e, 0x41b07c59, 0x8f26e31a, ++ 0xd7a601cd, 0x7e20576e, 0x26a0b5b9, 0x6d2b8bf3, 0x35ab6924, ++ 0x9c2d3f87, 0xc4addd50, 0x14768426, 0x4cf666f1, 0xe5703052, ++ 0xbdf0d285, 0xf67beccf, 0xaefb0e18, 0x077d58bb, 0x5ffdba6c, ++ 0x916b252f, 0xc9ebc7f8, 0x606d915b, 0x38ed738c, 0x73664dc6, ++ 0x2be6af11, 0x8260f9b2, 0xdae01b65, 0x1e4dc635, 0x46cd24e2, ++ 0xef4b7241, 0xb7cb9096, 0xfc40aedc, 0xa4c04c0b, 0x0d461aa8, ++ 0x55c6f87f, 0x9b50673c, 0xc3d085eb, 0x6a56d348, 0x32d6319f, ++ 0x795d0fd5, 0x21dded02, 0x885bbba1, 0xd0db5976, 0x28ec084d, ++ 0x706cea9a, 0xd9eabc39, 0x816a5eee, 0xcae160a4, 0x92618273, ++ 0x3be7d4d0, 0x63673607, 0xadf1a944, 0xf5714b93, 0x5cf71d30, ++ 0x0477ffe7, 0x4ffcc1ad, 0x177c237a, 0xbefa75d9, 0xe67a970e, ++ 0x22d74a5e, 0x7a57a889, 0xd3d1fe2a, 0x8b511cfd, 0xc0da22b7, ++ 0x985ac060, 0x31dc96c3, 0x695c7414, 0xa7caeb57, 0xff4a0980, ++ 0x56cc5f23, 0x0e4cbdf4, 0x45c783be, 0x1d476169, 0xb4c137ca, ++ 0xec41d51d, 0x3c9a8c6b, 0x641a6ebc, 0xcd9c381f, 0x951cdac8, ++ 0xde97e482, 0x86170655, 0x2f9150f6, 0x7711b221, 0xb9872d62, ++ 0xe107cfb5, 0x48819916, 0x10017bc1, 0x5b8a458b, 0x030aa75c, ++ 0xaa8cf1ff, 0xf20c1328, 0x36a1ce78, 0x6e212caf, 0xc7a77a0c, ++ 0x9f2798db, 0xd4aca691, 0x8c2c4446, 0x25aa12e5, 0x7d2af032, ++ 0xb3bc6f71, 0xeb3c8da6, 0x42badb05, 0x1a3a39d2, 0x51b10798, ++ 0x0931e54f, 0xa0b7b3ec, 0xf837513b, 0x50d8119a, 0x0858f34d, ++ 0xa1dea5ee, 0xf95e4739, 0xb2d57973, 0xea559ba4, 0x43d3cd07, ++ 0x1b532fd0, 0xd5c5b093, 0x8d455244, 0x24c304e7, 0x7c43e630, ++ 0x37c8d87a, 0x6f483aad, 0xc6ce6c0e, 0x9e4e8ed9, 0x5ae35389, ++ 0x0263b15e, 0xabe5e7fd, 0xf365052a, 0xb8ee3b60, 0xe06ed9b7, ++ 0x49e88f14, 0x11686dc3, 0xdffef280, 0x877e1057, 0x2ef846f4, ++ 0x7678a423, 0x3df39a69, 0x657378be, 0xccf52e1d, 0x9475ccca, ++ 0x44ae95bc, 0x1c2e776b, 0xb5a821c8, 0xed28c31f, 0xa6a3fd55, ++ 0xfe231f82, 0x57a54921, 0x0f25abf6, 0xc1b334b5, 0x9933d662, ++ 0x30b580c1, 0x68356216, 0x23be5c5c, 0x7b3ebe8b, 0xd2b8e828, ++ 0x8a380aff, 0x4e95d7af, 0x16153578, 0xbf9363db, 0xe713810c, ++ 0xac98bf46, 0xf4185d91, 0x5d9e0b32, 0x051ee9e5, 0xcb8876a6, ++ 0x93089471, 0x3a8ec2d2, 0x620e2005, 0x29851e4f, 0x7105fc98, ++ 0xd883aa3b, 0x800348ec, 0x783419d7, 0x20b4fb00, 0x8932ada3, ++ 0xd1b24f74, 0x9a39713e, 0xc2b993e9, 0x6b3fc54a, 0x33bf279d, ++ 0xfd29b8de, 0xa5a95a09, 0x0c2f0caa, 0x54afee7d, 0x1f24d037, ++ 0x47a432e0, 0xee226443, 0xb6a28694, 0x720f5bc4, 0x2a8fb913, ++ 0x8309efb0, 0xdb890d67, 0x9002332d, 0xc882d1fa, 0x61048759, ++ 0x3984658e, 0xf712facd, 0xaf92181a, 0x06144eb9, 0x5e94ac6e, ++ 0x151f9224, 0x4d9f70f3, 0xe4192650, 0xbc99c487, 0x6c429df1, ++ 0x34c27f26, 0x9d442985, 0xc5c4cb52, 0x8e4ff518, 0xd6cf17cf, ++ 0x7f49416c, 0x27c9a3bb, 0xe95f3cf8, 0xb1dfde2f, 0x1859888c, ++ 0x40d96a5b, 0x0b525411, 0x53d2b6c6, 0xfa54e065, 0xa2d402b2, ++ 0x6679dfe2, 0x3ef93d35, 0x977f6b96, 0xcfff8941, 0x8474b70b, ++ 0xdcf455dc, 0x7572037f, 0x2df2e1a8, 0xe3647eeb, 0xbbe49c3c, ++ 0x1262ca9f, 0x4ae22848, 0x01691602, 0x59e9f4d5, 0xf06fa276, ++ 0xa8ef40a1}, ++ {0x00000000, 0x463b6765, 0x8c76ceca, 0xca4da9af, 0x59ebed4e, ++ 0x1fd08a2b, 0xd59d2384, 0x93a644e1, 0xb2d6db9d, 0xf4edbcf8, ++ 0x3ea01557, 0x789b7232, 0xeb3d36d3, 0xad0651b6, 0x674bf819, ++ 0x21709f7c, 0x25abc6e0, 0x6390a185, 0xa9dd082a, 0xefe66f4f, ++ 0x7c402bae, 0x3a7b4ccb, 0xf036e564, 0xb60d8201, 0x977d1d7d, ++ 0xd1467a18, 0x1b0bd3b7, 0x5d30b4d2, 0xce96f033, 0x88ad9756, ++ 0x42e03ef9, 0x04db599c, 0x0b50fc1a, 0x4d6b9b7f, 0x872632d0, ++ 0xc11d55b5, 0x52bb1154, 0x14807631, 0xdecddf9e, 0x98f6b8fb, ++ 0xb9862787, 0xffbd40e2, 0x35f0e94d, 0x73cb8e28, 0xe06dcac9, ++ 0xa656adac, 0x6c1b0403, 0x2a206366, 0x2efb3afa, 0x68c05d9f, ++ 0xa28df430, 0xe4b69355, 0x7710d7b4, 0x312bb0d1, 0xfb66197e, ++ 0xbd5d7e1b, 0x9c2de167, 0xda168602, 0x105b2fad, 0x566048c8, ++ 0xc5c60c29, 0x83fd6b4c, 0x49b0c2e3, 0x0f8ba586, 0x16a0f835, ++ 0x509b9f50, 0x9ad636ff, 0xdced519a, 0x4f4b157b, 0x0970721e, ++ 0xc33ddbb1, 0x8506bcd4, 0xa47623a8, 0xe24d44cd, 0x2800ed62, ++ 0x6e3b8a07, 0xfd9dcee6, 0xbba6a983, 0x71eb002c, 0x37d06749, ++ 0x330b3ed5, 0x753059b0, 0xbf7df01f, 0xf946977a, 0x6ae0d39b, ++ 0x2cdbb4fe, 0xe6961d51, 0xa0ad7a34, 0x81dde548, 0xc7e6822d, ++ 0x0dab2b82, 0x4b904ce7, 0xd8360806, 0x9e0d6f63, 0x5440c6cc, ++ 0x127ba1a9, 0x1df0042f, 0x5bcb634a, 0x9186cae5, 0xd7bdad80, ++ 0x441be961, 0x02208e04, 0xc86d27ab, 0x8e5640ce, 0xaf26dfb2, ++ 0xe91db8d7, 0x23501178, 0x656b761d, 0xf6cd32fc, 0xb0f65599, ++ 0x7abbfc36, 0x3c809b53, 0x385bc2cf, 0x7e60a5aa, 0xb42d0c05, ++ 0xf2166b60, 0x61b02f81, 0x278b48e4, 0xedc6e14b, 0xabfd862e, ++ 0x8a8d1952, 0xccb67e37, 0x06fbd798, 0x40c0b0fd, 0xd366f41c, ++ 0x955d9379, 0x5f103ad6, 0x192b5db3, 0x2c40f16b, 0x6a7b960e, ++ 0xa0363fa1, 0xe60d58c4, 0x75ab1c25, 0x33907b40, 0xf9ddd2ef, ++ 0xbfe6b58a, 0x9e962af6, 0xd8ad4d93, 0x12e0e43c, 0x54db8359, ++ 0xc77dc7b8, 0x8146a0dd, 0x4b0b0972, 0x0d306e17, 0x09eb378b, ++ 0x4fd050ee, 0x859df941, 0xc3a69e24, 0x5000dac5, 0x163bbda0, ++ 0xdc76140f, 0x9a4d736a, 0xbb3dec16, 0xfd068b73, 0x374b22dc, ++ 0x717045b9, 0xe2d60158, 0xa4ed663d, 0x6ea0cf92, 0x289ba8f7, ++ 0x27100d71, 0x612b6a14, 0xab66c3bb, 0xed5da4de, 0x7efbe03f, ++ 0x38c0875a, 0xf28d2ef5, 0xb4b64990, 0x95c6d6ec, 0xd3fdb189, ++ 0x19b01826, 0x5f8b7f43, 0xcc2d3ba2, 0x8a165cc7, 0x405bf568, ++ 0x0660920d, 0x02bbcb91, 0x4480acf4, 0x8ecd055b, 0xc8f6623e, ++ 0x5b5026df, 0x1d6b41ba, 0xd726e815, 0x911d8f70, 0xb06d100c, ++ 0xf6567769, 0x3c1bdec6, 0x7a20b9a3, 0xe986fd42, 0xafbd9a27, ++ 0x65f03388, 0x23cb54ed, 0x3ae0095e, 0x7cdb6e3b, 0xb696c794, ++ 0xf0ada0f1, 0x630be410, 0x25308375, 0xef7d2ada, 0xa9464dbf, ++ 0x8836d2c3, 0xce0db5a6, 0x04401c09, 0x427b7b6c, 0xd1dd3f8d, ++ 0x97e658e8, 0x5dabf147, 0x1b909622, 0x1f4bcfbe, 0x5970a8db, ++ 0x933d0174, 0xd5066611, 0x46a022f0, 0x009b4595, 0xcad6ec3a, ++ 0x8ced8b5f, 0xad9d1423, 0xeba67346, 0x21ebdae9, 0x67d0bd8c, ++ 0xf476f96d, 0xb24d9e08, 0x780037a7, 0x3e3b50c2, 0x31b0f544, ++ 0x778b9221, 0xbdc63b8e, 0xfbfd5ceb, 0x685b180a, 0x2e607f6f, ++ 0xe42dd6c0, 0xa216b1a5, 0x83662ed9, 0xc55d49bc, 0x0f10e013, ++ 0x492b8776, 0xda8dc397, 0x9cb6a4f2, 0x56fb0d5d, 0x10c06a38, ++ 0x141b33a4, 0x522054c1, 0x986dfd6e, 0xde569a0b, 0x4df0deea, ++ 0x0bcbb98f, 0xc1861020, 0x87bd7745, 0xa6cde839, 0xe0f68f5c, ++ 0x2abb26f3, 0x6c804196, 0xff260577, 0xb91d6212, 0x7350cbbd, ++ 0x356bacd8}}; ++ ++#endif /* W */ ++ ++#endif /* N == 5 */ ++#if N == 6 ++ ++#if W == 8 ++ ++static const uint32_t crc_braid_table[][256] = { ++ {0x00000000, 0x3db1ecdc, 0x7b63d9b8, 0x46d23564, 0xf6c7b370, ++ 0xcb765fac, 0x8da46ac8, 0xb0158614, 0x36fe60a1, 0x0b4f8c7d, ++ 0x4d9db919, 0x702c55c5, 0xc039d3d1, 0xfd883f0d, 0xbb5a0a69, ++ 0x86ebe6b5, 0x6dfcc142, 0x504d2d9e, 0x169f18fa, 0x2b2ef426, ++ 0x9b3b7232, 0xa68a9eee, 0xe058ab8a, 0xdde94756, 0x5b02a1e3, ++ 0x66b34d3f, 0x2061785b, 0x1dd09487, 0xadc51293, 0x9074fe4f, ++ 0xd6a6cb2b, 0xeb1727f7, 0xdbf98284, 0xe6486e58, 0xa09a5b3c, ++ 0x9d2bb7e0, 0x2d3e31f4, 0x108fdd28, 0x565de84c, 0x6bec0490, ++ 0xed07e225, 0xd0b60ef9, 0x96643b9d, 0xabd5d741, 0x1bc05155, ++ 0x2671bd89, 0x60a388ed, 0x5d126431, 0xb60543c6, 0x8bb4af1a, ++ 0xcd669a7e, 0xf0d776a2, 0x40c2f0b6, 0x7d731c6a, 0x3ba1290e, ++ 0x0610c5d2, 0x80fb2367, 0xbd4acfbb, 0xfb98fadf, 0xc6291603, ++ 0x763c9017, 0x4b8d7ccb, 0x0d5f49af, 0x30eea573, 0x6c820349, ++ 0x5133ef95, 0x17e1daf1, 0x2a50362d, 0x9a45b039, 0xa7f45ce5, ++ 0xe1266981, 0xdc97855d, 0x5a7c63e8, 0x67cd8f34, 0x211fba50, ++ 0x1cae568c, 0xacbbd098, 0x910a3c44, 0xd7d80920, 0xea69e5fc, ++ 0x017ec20b, 0x3ccf2ed7, 0x7a1d1bb3, 0x47acf76f, 0xf7b9717b, ++ 0xca089da7, 0x8cdaa8c3, 0xb16b441f, 0x3780a2aa, 0x0a314e76, ++ 0x4ce37b12, 0x715297ce, 0xc14711da, 0xfcf6fd06, 0xba24c862, ++ 0x879524be, 0xb77b81cd, 0x8aca6d11, 0xcc185875, 0xf1a9b4a9, ++ 0x41bc32bd, 0x7c0dde61, 0x3adfeb05, 0x076e07d9, 0x8185e16c, ++ 0xbc340db0, 0xfae638d4, 0xc757d408, 0x7742521c, 0x4af3bec0, ++ 0x0c218ba4, 0x31906778, 0xda87408f, 0xe736ac53, 0xa1e49937, ++ 0x9c5575eb, 0x2c40f3ff, 0x11f11f23, 0x57232a47, 0x6a92c69b, ++ 0xec79202e, 0xd1c8ccf2, 0x971af996, 0xaaab154a, 0x1abe935e, ++ 0x270f7f82, 0x61dd4ae6, 0x5c6ca63a, 0xd9040692, 0xe4b5ea4e, ++ 0xa267df2a, 0x9fd633f6, 0x2fc3b5e2, 0x1272593e, 0x54a06c5a, ++ 0x69118086, 0xeffa6633, 0xd24b8aef, 0x9499bf8b, 0xa9285357, ++ 0x193dd543, 0x248c399f, 0x625e0cfb, 0x5fefe027, 0xb4f8c7d0, ++ 0x89492b0c, 0xcf9b1e68, 0xf22af2b4, 0x423f74a0, 0x7f8e987c, ++ 0x395cad18, 0x04ed41c4, 0x8206a771, 0xbfb74bad, 0xf9657ec9, ++ 0xc4d49215, 0x74c11401, 0x4970f8dd, 0x0fa2cdb9, 0x32132165, ++ 0x02fd8416, 0x3f4c68ca, 0x799e5dae, 0x442fb172, 0xf43a3766, ++ 0xc98bdbba, 0x8f59eede, 0xb2e80202, 0x3403e4b7, 0x09b2086b, ++ 0x4f603d0f, 0x72d1d1d3, 0xc2c457c7, 0xff75bb1b, 0xb9a78e7f, ++ 0x841662a3, 0x6f014554, 0x52b0a988, 0x14629cec, 0x29d37030, ++ 0x99c6f624, 0xa4771af8, 0xe2a52f9c, 0xdf14c340, 0x59ff25f5, ++ 0x644ec929, 0x229cfc4d, 0x1f2d1091, 0xaf389685, 0x92897a59, ++ 0xd45b4f3d, 0xe9eaa3e1, 0xb58605db, 0x8837e907, 0xcee5dc63, ++ 0xf35430bf, 0x4341b6ab, 0x7ef05a77, 0x38226f13, 0x059383cf, ++ 0x8378657a, 0xbec989a6, 0xf81bbcc2, 0xc5aa501e, 0x75bfd60a, ++ 0x480e3ad6, 0x0edc0fb2, 0x336de36e, 0xd87ac499, 0xe5cb2845, ++ 0xa3191d21, 0x9ea8f1fd, 0x2ebd77e9, 0x130c9b35, 0x55deae51, ++ 0x686f428d, 0xee84a438, 0xd33548e4, 0x95e77d80, 0xa856915c, ++ 0x18431748, 0x25f2fb94, 0x6320cef0, 0x5e91222c, 0x6e7f875f, ++ 0x53ce6b83, 0x151c5ee7, 0x28adb23b, 0x98b8342f, 0xa509d8f3, ++ 0xe3dbed97, 0xde6a014b, 0x5881e7fe, 0x65300b22, 0x23e23e46, ++ 0x1e53d29a, 0xae46548e, 0x93f7b852, 0xd5258d36, 0xe89461ea, ++ 0x0383461d, 0x3e32aac1, 0x78e09fa5, 0x45517379, 0xf544f56d, ++ 0xc8f519b1, 0x8e272cd5, 0xb396c009, 0x357d26bc, 0x08ccca60, ++ 0x4e1eff04, 0x73af13d8, 0xc3ba95cc, 0xfe0b7910, 0xb8d94c74, ++ 0x8568a0a8}, ++ {0x00000000, 0x69790b65, 0xd2f216ca, 0xbb8b1daf, 0x7e952bd5, ++ 0x17ec20b0, 0xac673d1f, 0xc51e367a, 0xfd2a57aa, 0x94535ccf, ++ 0x2fd84160, 0x46a14a05, 0x83bf7c7f, 0xeac6771a, 0x514d6ab5, ++ 0x383461d0, 0x2125a915, 0x485ca270, 0xf3d7bfdf, 0x9aaeb4ba, ++ 0x5fb082c0, 0x36c989a5, 0x8d42940a, 0xe43b9f6f, 0xdc0ffebf, ++ 0xb576f5da, 0x0efde875, 0x6784e310, 0xa29ad56a, 0xcbe3de0f, ++ 0x7068c3a0, 0x1911c8c5, 0x424b522a, 0x2b32594f, 0x90b944e0, ++ 0xf9c04f85, 0x3cde79ff, 0x55a7729a, 0xee2c6f35, 0x87556450, ++ 0xbf610580, 0xd6180ee5, 0x6d93134a, 0x04ea182f, 0xc1f42e55, ++ 0xa88d2530, 0x1306389f, 0x7a7f33fa, 0x636efb3f, 0x0a17f05a, ++ 0xb19cedf5, 0xd8e5e690, 0x1dfbd0ea, 0x7482db8f, 0xcf09c620, ++ 0xa670cd45, 0x9e44ac95, 0xf73da7f0, 0x4cb6ba5f, 0x25cfb13a, ++ 0xe0d18740, 0x89a88c25, 0x3223918a, 0x5b5a9aef, 0x8496a454, ++ 0xedefaf31, 0x5664b29e, 0x3f1db9fb, 0xfa038f81, 0x937a84e4, ++ 0x28f1994b, 0x4188922e, 0x79bcf3fe, 0x10c5f89b, 0xab4ee534, ++ 0xc237ee51, 0x0729d82b, 0x6e50d34e, 0xd5dbcee1, 0xbca2c584, ++ 0xa5b30d41, 0xccca0624, 0x77411b8b, 0x1e3810ee, 0xdb262694, ++ 0xb25f2df1, 0x09d4305e, 0x60ad3b3b, 0x58995aeb, 0x31e0518e, ++ 0x8a6b4c21, 0xe3124744, 0x260c713e, 0x4f757a5b, 0xf4fe67f4, ++ 0x9d876c91, 0xc6ddf67e, 0xafa4fd1b, 0x142fe0b4, 0x7d56ebd1, ++ 0xb848ddab, 0xd131d6ce, 0x6abacb61, 0x03c3c004, 0x3bf7a1d4, ++ 0x528eaab1, 0xe905b71e, 0x807cbc7b, 0x45628a01, 0x2c1b8164, ++ 0x97909ccb, 0xfee997ae, 0xe7f85f6b, 0x8e81540e, 0x350a49a1, ++ 0x5c7342c4, 0x996d74be, 0xf0147fdb, 0x4b9f6274, 0x22e66911, ++ 0x1ad208c1, 0x73ab03a4, 0xc8201e0b, 0xa159156e, 0x64472314, ++ 0x0d3e2871, 0xb6b535de, 0xdfcc3ebb, 0xd25c4ee9, 0xbb25458c, ++ 0x00ae5823, 0x69d75346, 0xacc9653c, 0xc5b06e59, 0x7e3b73f6, ++ 0x17427893, 0x2f761943, 0x460f1226, 0xfd840f89, 0x94fd04ec, ++ 0x51e33296, 0x389a39f3, 0x8311245c, 0xea682f39, 0xf379e7fc, ++ 0x9a00ec99, 0x218bf136, 0x48f2fa53, 0x8deccc29, 0xe495c74c, ++ 0x5f1edae3, 0x3667d186, 0x0e53b056, 0x672abb33, 0xdca1a69c, ++ 0xb5d8adf9, 0x70c69b83, 0x19bf90e6, 0xa2348d49, 0xcb4d862c, ++ 0x90171cc3, 0xf96e17a6, 0x42e50a09, 0x2b9c016c, 0xee823716, ++ 0x87fb3c73, 0x3c7021dc, 0x55092ab9, 0x6d3d4b69, 0x0444400c, ++ 0xbfcf5da3, 0xd6b656c6, 0x13a860bc, 0x7ad16bd9, 0xc15a7676, ++ 0xa8237d13, 0xb132b5d6, 0xd84bbeb3, 0x63c0a31c, 0x0ab9a879, ++ 0xcfa79e03, 0xa6de9566, 0x1d5588c9, 0x742c83ac, 0x4c18e27c, ++ 0x2561e919, 0x9eeaf4b6, 0xf793ffd3, 0x328dc9a9, 0x5bf4c2cc, ++ 0xe07fdf63, 0x8906d406, 0x56caeabd, 0x3fb3e1d8, 0x8438fc77, ++ 0xed41f712, 0x285fc168, 0x4126ca0d, 0xfaadd7a2, 0x93d4dcc7, ++ 0xabe0bd17, 0xc299b672, 0x7912abdd, 0x106ba0b8, 0xd57596c2, ++ 0xbc0c9da7, 0x07878008, 0x6efe8b6d, 0x77ef43a8, 0x1e9648cd, ++ 0xa51d5562, 0xcc645e07, 0x097a687d, 0x60036318, 0xdb887eb7, ++ 0xb2f175d2, 0x8ac51402, 0xe3bc1f67, 0x583702c8, 0x314e09ad, ++ 0xf4503fd7, 0x9d2934b2, 0x26a2291d, 0x4fdb2278, 0x1481b897, ++ 0x7df8b3f2, 0xc673ae5d, 0xaf0aa538, 0x6a149342, 0x036d9827, ++ 0xb8e68588, 0xd19f8eed, 0xe9abef3d, 0x80d2e458, 0x3b59f9f7, ++ 0x5220f292, 0x973ec4e8, 0xfe47cf8d, 0x45ccd222, 0x2cb5d947, ++ 0x35a41182, 0x5cdd1ae7, 0xe7560748, 0x8e2f0c2d, 0x4b313a57, ++ 0x22483132, 0x99c32c9d, 0xf0ba27f8, 0xc88e4628, 0xa1f74d4d, ++ 0x1a7c50e2, 0x73055b87, 0xb61b6dfd, 0xdf626698, 0x64e97b37, ++ 0x0d907052}, ++ {0x00000000, 0x7fc99b93, 0xff933726, 0x805aacb5, 0x2457680d, ++ 0x5b9ef39e, 0xdbc45f2b, 0xa40dc4b8, 0x48aed01a, 0x37674b89, ++ 0xb73de73c, 0xc8f47caf, 0x6cf9b817, 0x13302384, 0x936a8f31, ++ 0xeca314a2, 0x915da034, 0xee943ba7, 0x6ece9712, 0x11070c81, ++ 0xb50ac839, 0xcac353aa, 0x4a99ff1f, 0x3550648c, 0xd9f3702e, ++ 0xa63aebbd, 0x26604708, 0x59a9dc9b, 0xfda41823, 0x826d83b0, ++ 0x02372f05, 0x7dfeb496, 0xf9ca4629, 0x8603ddba, 0x0659710f, ++ 0x7990ea9c, 0xdd9d2e24, 0xa254b5b7, 0x220e1902, 0x5dc78291, ++ 0xb1649633, 0xcead0da0, 0x4ef7a115, 0x313e3a86, 0x9533fe3e, ++ 0xeafa65ad, 0x6aa0c918, 0x1569528b, 0x6897e61d, 0x175e7d8e, ++ 0x9704d13b, 0xe8cd4aa8, 0x4cc08e10, 0x33091583, 0xb353b936, ++ 0xcc9a22a5, 0x20393607, 0x5ff0ad94, 0xdfaa0121, 0xa0639ab2, ++ 0x046e5e0a, 0x7ba7c599, 0xfbfd692c, 0x8434f2bf, 0x28e58a13, ++ 0x572c1180, 0xd776bd35, 0xa8bf26a6, 0x0cb2e21e, 0x737b798d, ++ 0xf321d538, 0x8ce84eab, 0x604b5a09, 0x1f82c19a, 0x9fd86d2f, ++ 0xe011f6bc, 0x441c3204, 0x3bd5a997, 0xbb8f0522, 0xc4469eb1, ++ 0xb9b82a27, 0xc671b1b4, 0x462b1d01, 0x39e28692, 0x9def422a, ++ 0xe226d9b9, 0x627c750c, 0x1db5ee9f, 0xf116fa3d, 0x8edf61ae, ++ 0x0e85cd1b, 0x714c5688, 0xd5419230, 0xaa8809a3, 0x2ad2a516, ++ 0x551b3e85, 0xd12fcc3a, 0xaee657a9, 0x2ebcfb1c, 0x5175608f, ++ 0xf578a437, 0x8ab13fa4, 0x0aeb9311, 0x75220882, 0x99811c20, ++ 0xe64887b3, 0x66122b06, 0x19dbb095, 0xbdd6742d, 0xc21fefbe, ++ 0x4245430b, 0x3d8cd898, 0x40726c0e, 0x3fbbf79d, 0xbfe15b28, ++ 0xc028c0bb, 0x64250403, 0x1bec9f90, 0x9bb63325, 0xe47fa8b6, ++ 0x08dcbc14, 0x77152787, 0xf74f8b32, 0x888610a1, 0x2c8bd419, ++ 0x53424f8a, 0xd318e33f, 0xacd178ac, 0x51cb1426, 0x2e028fb5, ++ 0xae582300, 0xd191b893, 0x759c7c2b, 0x0a55e7b8, 0x8a0f4b0d, ++ 0xf5c6d09e, 0x1965c43c, 0x66ac5faf, 0xe6f6f31a, 0x993f6889, ++ 0x3d32ac31, 0x42fb37a2, 0xc2a19b17, 0xbd680084, 0xc096b412, ++ 0xbf5f2f81, 0x3f058334, 0x40cc18a7, 0xe4c1dc1f, 0x9b08478c, ++ 0x1b52eb39, 0x649b70aa, 0x88386408, 0xf7f1ff9b, 0x77ab532e, ++ 0x0862c8bd, 0xac6f0c05, 0xd3a69796, 0x53fc3b23, 0x2c35a0b0, ++ 0xa801520f, 0xd7c8c99c, 0x57926529, 0x285bfeba, 0x8c563a02, ++ 0xf39fa191, 0x73c50d24, 0x0c0c96b7, 0xe0af8215, 0x9f661986, ++ 0x1f3cb533, 0x60f52ea0, 0xc4f8ea18, 0xbb31718b, 0x3b6bdd3e, ++ 0x44a246ad, 0x395cf23b, 0x469569a8, 0xc6cfc51d, 0xb9065e8e, ++ 0x1d0b9a36, 0x62c201a5, 0xe298ad10, 0x9d513683, 0x71f22221, ++ 0x0e3bb9b2, 0x8e611507, 0xf1a88e94, 0x55a54a2c, 0x2a6cd1bf, ++ 0xaa367d0a, 0xd5ffe699, 0x792e9e35, 0x06e705a6, 0x86bda913, ++ 0xf9743280, 0x5d79f638, 0x22b06dab, 0xa2eac11e, 0xdd235a8d, ++ 0x31804e2f, 0x4e49d5bc, 0xce137909, 0xb1dae29a, 0x15d72622, ++ 0x6a1ebdb1, 0xea441104, 0x958d8a97, 0xe8733e01, 0x97baa592, ++ 0x17e00927, 0x682992b4, 0xcc24560c, 0xb3edcd9f, 0x33b7612a, ++ 0x4c7efab9, 0xa0ddee1b, 0xdf147588, 0x5f4ed93d, 0x208742ae, ++ 0x848a8616, 0xfb431d85, 0x7b19b130, 0x04d02aa3, 0x80e4d81c, ++ 0xff2d438f, 0x7f77ef3a, 0x00be74a9, 0xa4b3b011, 0xdb7a2b82, ++ 0x5b208737, 0x24e91ca4, 0xc84a0806, 0xb7839395, 0x37d93f20, ++ 0x4810a4b3, 0xec1d600b, 0x93d4fb98, 0x138e572d, 0x6c47ccbe, ++ 0x11b97828, 0x6e70e3bb, 0xee2a4f0e, 0x91e3d49d, 0x35ee1025, ++ 0x4a278bb6, 0xca7d2703, 0xb5b4bc90, 0x5917a832, 0x26de33a1, ++ 0xa6849f14, 0xd94d0487, 0x7d40c03f, 0x02895bac, 0x82d3f719, ++ 0xfd1a6c8a}, ++ {0x00000000, 0xa396284c, 0x9c5d56d9, 0x3fcb7e95, 0xe3cbabf3, ++ 0x405d83bf, 0x7f96fd2a, 0xdc00d566, 0x1ce651a7, 0xbf7079eb, ++ 0x80bb077e, 0x232d2f32, 0xff2dfa54, 0x5cbbd218, 0x6370ac8d, ++ 0xc0e684c1, 0x39cca34e, 0x9a5a8b02, 0xa591f597, 0x0607dddb, ++ 0xda0708bd, 0x799120f1, 0x465a5e64, 0xe5cc7628, 0x252af2e9, ++ 0x86bcdaa5, 0xb977a430, 0x1ae18c7c, 0xc6e1591a, 0x65777156, ++ 0x5abc0fc3, 0xf92a278f, 0x7399469c, 0xd00f6ed0, 0xefc41045, ++ 0x4c523809, 0x9052ed6f, 0x33c4c523, 0x0c0fbbb6, 0xaf9993fa, ++ 0x6f7f173b, 0xcce93f77, 0xf32241e2, 0x50b469ae, 0x8cb4bcc8, ++ 0x2f229484, 0x10e9ea11, 0xb37fc25d, 0x4a55e5d2, 0xe9c3cd9e, ++ 0xd608b30b, 0x759e9b47, 0xa99e4e21, 0x0a08666d, 0x35c318f8, ++ 0x965530b4, 0x56b3b475, 0xf5259c39, 0xcaeee2ac, 0x6978cae0, ++ 0xb5781f86, 0x16ee37ca, 0x2925495f, 0x8ab36113, 0xe7328d38, ++ 0x44a4a574, 0x7b6fdbe1, 0xd8f9f3ad, 0x04f926cb, 0xa76f0e87, ++ 0x98a47012, 0x3b32585e, 0xfbd4dc9f, 0x5842f4d3, 0x67898a46, ++ 0xc41fa20a, 0x181f776c, 0xbb895f20, 0x844221b5, 0x27d409f9, ++ 0xdefe2e76, 0x7d68063a, 0x42a378af, 0xe13550e3, 0x3d358585, ++ 0x9ea3adc9, 0xa168d35c, 0x02fefb10, 0xc2187fd1, 0x618e579d, ++ 0x5e452908, 0xfdd30144, 0x21d3d422, 0x8245fc6e, 0xbd8e82fb, ++ 0x1e18aab7, 0x94abcba4, 0x373de3e8, 0x08f69d7d, 0xab60b531, ++ 0x77606057, 0xd4f6481b, 0xeb3d368e, 0x48ab1ec2, 0x884d9a03, ++ 0x2bdbb24f, 0x1410ccda, 0xb786e496, 0x6b8631f0, 0xc81019bc, ++ 0xf7db6729, 0x544d4f65, 0xad6768ea, 0x0ef140a6, 0x313a3e33, ++ 0x92ac167f, 0x4eacc319, 0xed3aeb55, 0xd2f195c0, 0x7167bd8c, ++ 0xb181394d, 0x12171101, 0x2ddc6f94, 0x8e4a47d8, 0x524a92be, ++ 0xf1dcbaf2, 0xce17c467, 0x6d81ec2b, 0x15141c31, 0xb682347d, ++ 0x89494ae8, 0x2adf62a4, 0xf6dfb7c2, 0x55499f8e, 0x6a82e11b, ++ 0xc914c957, 0x09f24d96, 0xaa6465da, 0x95af1b4f, 0x36393303, ++ 0xea39e665, 0x49afce29, 0x7664b0bc, 0xd5f298f0, 0x2cd8bf7f, ++ 0x8f4e9733, 0xb085e9a6, 0x1313c1ea, 0xcf13148c, 0x6c853cc0, ++ 0x534e4255, 0xf0d86a19, 0x303eeed8, 0x93a8c694, 0xac63b801, ++ 0x0ff5904d, 0xd3f5452b, 0x70636d67, 0x4fa813f2, 0xec3e3bbe, ++ 0x668d5aad, 0xc51b72e1, 0xfad00c74, 0x59462438, 0x8546f15e, ++ 0x26d0d912, 0x191ba787, 0xba8d8fcb, 0x7a6b0b0a, 0xd9fd2346, ++ 0xe6365dd3, 0x45a0759f, 0x99a0a0f9, 0x3a3688b5, 0x05fdf620, ++ 0xa66bde6c, 0x5f41f9e3, 0xfcd7d1af, 0xc31caf3a, 0x608a8776, ++ 0xbc8a5210, 0x1f1c7a5c, 0x20d704c9, 0x83412c85, 0x43a7a844, ++ 0xe0318008, 0xdffafe9d, 0x7c6cd6d1, 0xa06c03b7, 0x03fa2bfb, ++ 0x3c31556e, 0x9fa77d22, 0xf2269109, 0x51b0b945, 0x6e7bc7d0, ++ 0xcdedef9c, 0x11ed3afa, 0xb27b12b6, 0x8db06c23, 0x2e26446f, ++ 0xeec0c0ae, 0x4d56e8e2, 0x729d9677, 0xd10bbe3b, 0x0d0b6b5d, ++ 0xae9d4311, 0x91563d84, 0x32c015c8, 0xcbea3247, 0x687c1a0b, ++ 0x57b7649e, 0xf4214cd2, 0x282199b4, 0x8bb7b1f8, 0xb47ccf6d, ++ 0x17eae721, 0xd70c63e0, 0x749a4bac, 0x4b513539, 0xe8c71d75, ++ 0x34c7c813, 0x9751e05f, 0xa89a9eca, 0x0b0cb686, 0x81bfd795, ++ 0x2229ffd9, 0x1de2814c, 0xbe74a900, 0x62747c66, 0xc1e2542a, ++ 0xfe292abf, 0x5dbf02f3, 0x9d598632, 0x3ecfae7e, 0x0104d0eb, ++ 0xa292f8a7, 0x7e922dc1, 0xdd04058d, 0xe2cf7b18, 0x41595354, ++ 0xb87374db, 0x1be55c97, 0x242e2202, 0x87b80a4e, 0x5bb8df28, ++ 0xf82ef764, 0xc7e589f1, 0x6473a1bd, 0xa495257c, 0x07030d30, ++ 0x38c873a5, 0x9b5e5be9, 0x475e8e8f, 0xe4c8a6c3, 0xdb03d856, ++ 0x7895f01a}, ++ {0x00000000, 0x2a283862, 0x545070c4, 0x7e7848a6, 0xa8a0e188, ++ 0x8288d9ea, 0xfcf0914c, 0xd6d8a92e, 0x8a30c551, 0xa018fd33, ++ 0xde60b595, 0xf4488df7, 0x229024d9, 0x08b81cbb, 0x76c0541d, ++ 0x5ce86c7f, 0xcf108ce3, 0xe538b481, 0x9b40fc27, 0xb168c445, ++ 0x67b06d6b, 0x4d985509, 0x33e01daf, 0x19c825cd, 0x452049b2, ++ 0x6f0871d0, 0x11703976, 0x3b580114, 0xed80a83a, 0xc7a89058, ++ 0xb9d0d8fe, 0x93f8e09c, 0x45501f87, 0x6f7827e5, 0x11006f43, ++ 0x3b285721, 0xedf0fe0f, 0xc7d8c66d, 0xb9a08ecb, 0x9388b6a9, ++ 0xcf60dad6, 0xe548e2b4, 0x9b30aa12, 0xb1189270, 0x67c03b5e, ++ 0x4de8033c, 0x33904b9a, 0x19b873f8, 0x8a409364, 0xa068ab06, ++ 0xde10e3a0, 0xf438dbc2, 0x22e072ec, 0x08c84a8e, 0x76b00228, ++ 0x5c983a4a, 0x00705635, 0x2a586e57, 0x542026f1, 0x7e081e93, ++ 0xa8d0b7bd, 0x82f88fdf, 0xfc80c779, 0xd6a8ff1b, 0x8aa03f0e, ++ 0xa088076c, 0xdef04fca, 0xf4d877a8, 0x2200de86, 0x0828e6e4, ++ 0x7650ae42, 0x5c789620, 0x0090fa5f, 0x2ab8c23d, 0x54c08a9b, ++ 0x7ee8b2f9, 0xa8301bd7, 0x821823b5, 0xfc606b13, 0xd6485371, ++ 0x45b0b3ed, 0x6f988b8f, 0x11e0c329, 0x3bc8fb4b, 0xed105265, ++ 0xc7386a07, 0xb94022a1, 0x93681ac3, 0xcf8076bc, 0xe5a84ede, ++ 0x9bd00678, 0xb1f83e1a, 0x67209734, 0x4d08af56, 0x3370e7f0, ++ 0x1958df92, 0xcff02089, 0xe5d818eb, 0x9ba0504d, 0xb188682f, ++ 0x6750c101, 0x4d78f963, 0x3300b1c5, 0x192889a7, 0x45c0e5d8, ++ 0x6fe8ddba, 0x1190951c, 0x3bb8ad7e, 0xed600450, 0xc7483c32, ++ 0xb9307494, 0x93184cf6, 0x00e0ac6a, 0x2ac89408, 0x54b0dcae, ++ 0x7e98e4cc, 0xa8404de2, 0x82687580, 0xfc103d26, 0xd6380544, ++ 0x8ad0693b, 0xa0f85159, 0xde8019ff, 0xf4a8219d, 0x227088b3, ++ 0x0858b0d1, 0x7620f877, 0x5c08c015, 0xce31785d, 0xe419403f, ++ 0x9a610899, 0xb04930fb, 0x669199d5, 0x4cb9a1b7, 0x32c1e911, ++ 0x18e9d173, 0x4401bd0c, 0x6e29856e, 0x1051cdc8, 0x3a79f5aa, ++ 0xeca15c84, 0xc68964e6, 0xb8f12c40, 0x92d91422, 0x0121f4be, ++ 0x2b09ccdc, 0x5571847a, 0x7f59bc18, 0xa9811536, 0x83a92d54, ++ 0xfdd165f2, 0xd7f95d90, 0x8b1131ef, 0xa139098d, 0xdf41412b, ++ 0xf5697949, 0x23b1d067, 0x0999e805, 0x77e1a0a3, 0x5dc998c1, ++ 0x8b6167da, 0xa1495fb8, 0xdf31171e, 0xf5192f7c, 0x23c18652, ++ 0x09e9be30, 0x7791f696, 0x5db9cef4, 0x0151a28b, 0x2b799ae9, ++ 0x5501d24f, 0x7f29ea2d, 0xa9f14303, 0x83d97b61, 0xfda133c7, ++ 0xd7890ba5, 0x4471eb39, 0x6e59d35b, 0x10219bfd, 0x3a09a39f, ++ 0xecd10ab1, 0xc6f932d3, 0xb8817a75, 0x92a94217, 0xce412e68, ++ 0xe469160a, 0x9a115eac, 0xb03966ce, 0x66e1cfe0, 0x4cc9f782, ++ 0x32b1bf24, 0x18998746, 0x44914753, 0x6eb97f31, 0x10c13797, ++ 0x3ae90ff5, 0xec31a6db, 0xc6199eb9, 0xb861d61f, 0x9249ee7d, ++ 0xcea18202, 0xe489ba60, 0x9af1f2c6, 0xb0d9caa4, 0x6601638a, ++ 0x4c295be8, 0x3251134e, 0x18792b2c, 0x8b81cbb0, 0xa1a9f3d2, ++ 0xdfd1bb74, 0xf5f98316, 0x23212a38, 0x0909125a, 0x77715afc, ++ 0x5d59629e, 0x01b10ee1, 0x2b993683, 0x55e17e25, 0x7fc94647, ++ 0xa911ef69, 0x8339d70b, 0xfd419fad, 0xd769a7cf, 0x01c158d4, ++ 0x2be960b6, 0x55912810, 0x7fb91072, 0xa961b95c, 0x8349813e, ++ 0xfd31c998, 0xd719f1fa, 0x8bf19d85, 0xa1d9a5e7, 0xdfa1ed41, ++ 0xf589d523, 0x23517c0d, 0x0979446f, 0x77010cc9, 0x5d2934ab, ++ 0xced1d437, 0xe4f9ec55, 0x9a81a4f3, 0xb0a99c91, 0x667135bf, ++ 0x4c590ddd, 0x3221457b, 0x18097d19, 0x44e11166, 0x6ec92904, ++ 0x10b161a2, 0x3a9959c0, 0xec41f0ee, 0xc669c88c, 0xb811802a, ++ 0x9239b848}, ++ {0x00000000, 0x4713f6fb, 0x8e27edf6, 0xc9341b0d, 0xc73eddad, ++ 0x802d2b56, 0x4919305b, 0x0e0ac6a0, 0x550cbd1b, 0x121f4be0, ++ 0xdb2b50ed, 0x9c38a616, 0x923260b6, 0xd521964d, 0x1c158d40, ++ 0x5b067bbb, 0xaa197a36, 0xed0a8ccd, 0x243e97c0, 0x632d613b, ++ 0x6d27a79b, 0x2a345160, 0xe3004a6d, 0xa413bc96, 0xff15c72d, ++ 0xb80631d6, 0x71322adb, 0x3621dc20, 0x382b1a80, 0x7f38ec7b, ++ 0xb60cf776, 0xf11f018d, 0x8f43f22d, 0xc85004d6, 0x01641fdb, ++ 0x4677e920, 0x487d2f80, 0x0f6ed97b, 0xc65ac276, 0x8149348d, ++ 0xda4f4f36, 0x9d5cb9cd, 0x5468a2c0, 0x137b543b, 0x1d71929b, ++ 0x5a626460, 0x93567f6d, 0xd4458996, 0x255a881b, 0x62497ee0, ++ 0xab7d65ed, 0xec6e9316, 0xe26455b6, 0xa577a34d, 0x6c43b840, ++ 0x2b504ebb, 0x70563500, 0x3745c3fb, 0xfe71d8f6, 0xb9622e0d, ++ 0xb768e8ad, 0xf07b1e56, 0x394f055b, 0x7e5cf3a0, 0xc5f6e21b, ++ 0x82e514e0, 0x4bd10fed, 0x0cc2f916, 0x02c83fb6, 0x45dbc94d, ++ 0x8cefd240, 0xcbfc24bb, 0x90fa5f00, 0xd7e9a9fb, 0x1eddb2f6, ++ 0x59ce440d, 0x57c482ad, 0x10d77456, 0xd9e36f5b, 0x9ef099a0, ++ 0x6fef982d, 0x28fc6ed6, 0xe1c875db, 0xa6db8320, 0xa8d14580, ++ 0xefc2b37b, 0x26f6a876, 0x61e55e8d, 0x3ae32536, 0x7df0d3cd, ++ 0xb4c4c8c0, 0xf3d73e3b, 0xfdddf89b, 0xbace0e60, 0x73fa156d, ++ 0x34e9e396, 0x4ab51036, 0x0da6e6cd, 0xc492fdc0, 0x83810b3b, ++ 0x8d8bcd9b, 0xca983b60, 0x03ac206d, 0x44bfd696, 0x1fb9ad2d, ++ 0x58aa5bd6, 0x919e40db, 0xd68db620, 0xd8877080, 0x9f94867b, ++ 0x56a09d76, 0x11b36b8d, 0xe0ac6a00, 0xa7bf9cfb, 0x6e8b87f6, ++ 0x2998710d, 0x2792b7ad, 0x60814156, 0xa9b55a5b, 0xeea6aca0, ++ 0xb5a0d71b, 0xf2b321e0, 0x3b873aed, 0x7c94cc16, 0x729e0ab6, ++ 0x358dfc4d, 0xfcb9e740, 0xbbaa11bb, 0x509cc277, 0x178f348c, ++ 0xdebb2f81, 0x99a8d97a, 0x97a21fda, 0xd0b1e921, 0x1985f22c, ++ 0x5e9604d7, 0x05907f6c, 0x42838997, 0x8bb7929a, 0xcca46461, ++ 0xc2aea2c1, 0x85bd543a, 0x4c894f37, 0x0b9ab9cc, 0xfa85b841, ++ 0xbd964eba, 0x74a255b7, 0x33b1a34c, 0x3dbb65ec, 0x7aa89317, ++ 0xb39c881a, 0xf48f7ee1, 0xaf89055a, 0xe89af3a1, 0x21aee8ac, ++ 0x66bd1e57, 0x68b7d8f7, 0x2fa42e0c, 0xe6903501, 0xa183c3fa, ++ 0xdfdf305a, 0x98ccc6a1, 0x51f8ddac, 0x16eb2b57, 0x18e1edf7, ++ 0x5ff21b0c, 0x96c60001, 0xd1d5f6fa, 0x8ad38d41, 0xcdc07bba, ++ 0x04f460b7, 0x43e7964c, 0x4ded50ec, 0x0afea617, 0xc3cabd1a, ++ 0x84d94be1, 0x75c64a6c, 0x32d5bc97, 0xfbe1a79a, 0xbcf25161, ++ 0xb2f897c1, 0xf5eb613a, 0x3cdf7a37, 0x7bcc8ccc, 0x20caf777, ++ 0x67d9018c, 0xaeed1a81, 0xe9feec7a, 0xe7f42ada, 0xa0e7dc21, ++ 0x69d3c72c, 0x2ec031d7, 0x956a206c, 0xd279d697, 0x1b4dcd9a, ++ 0x5c5e3b61, 0x5254fdc1, 0x15470b3a, 0xdc731037, 0x9b60e6cc, ++ 0xc0669d77, 0x87756b8c, 0x4e417081, 0x0952867a, 0x075840da, ++ 0x404bb621, 0x897fad2c, 0xce6c5bd7, 0x3f735a5a, 0x7860aca1, ++ 0xb154b7ac, 0xf6474157, 0xf84d87f7, 0xbf5e710c, 0x766a6a01, ++ 0x31799cfa, 0x6a7fe741, 0x2d6c11ba, 0xe4580ab7, 0xa34bfc4c, ++ 0xad413aec, 0xea52cc17, 0x2366d71a, 0x647521e1, 0x1a29d241, ++ 0x5d3a24ba, 0x940e3fb7, 0xd31dc94c, 0xdd170fec, 0x9a04f917, ++ 0x5330e21a, 0x142314e1, 0x4f256f5a, 0x083699a1, 0xc10282ac, ++ 0x86117457, 0x881bb2f7, 0xcf08440c, 0x063c5f01, 0x412fa9fa, ++ 0xb030a877, 0xf7235e8c, 0x3e174581, 0x7904b37a, 0x770e75da, ++ 0x301d8321, 0xf929982c, 0xbe3a6ed7, 0xe53c156c, 0xa22fe397, ++ 0x6b1bf89a, 0x2c080e61, 0x2202c8c1, 0x65113e3a, 0xac252537, ++ 0xeb36d3cc}, ++ {0x00000000, 0xa13984ee, 0x99020f9d, 0x383b8b73, 0xe975197b, ++ 0x484c9d95, 0x707716e6, 0xd14e9208, 0x099b34b7, 0xa8a2b059, ++ 0x90993b2a, 0x31a0bfc4, 0xe0ee2dcc, 0x41d7a922, 0x79ec2251, ++ 0xd8d5a6bf, 0x1336696e, 0xb20fed80, 0x8a3466f3, 0x2b0de21d, ++ 0xfa437015, 0x5b7af4fb, 0x63417f88, 0xc278fb66, 0x1aad5dd9, ++ 0xbb94d937, 0x83af5244, 0x2296d6aa, 0xf3d844a2, 0x52e1c04c, ++ 0x6ada4b3f, 0xcbe3cfd1, 0x266cd2dc, 0x87555632, 0xbf6edd41, ++ 0x1e5759af, 0xcf19cba7, 0x6e204f49, 0x561bc43a, 0xf72240d4, ++ 0x2ff7e66b, 0x8ece6285, 0xb6f5e9f6, 0x17cc6d18, 0xc682ff10, ++ 0x67bb7bfe, 0x5f80f08d, 0xfeb97463, 0x355abbb2, 0x94633f5c, ++ 0xac58b42f, 0x0d6130c1, 0xdc2fa2c9, 0x7d162627, 0x452dad54, ++ 0xe41429ba, 0x3cc18f05, 0x9df80beb, 0xa5c38098, 0x04fa0476, ++ 0xd5b4967e, 0x748d1290, 0x4cb699e3, 0xed8f1d0d, 0x4cd9a5b8, ++ 0xede02156, 0xd5dbaa25, 0x74e22ecb, 0xa5acbcc3, 0x0495382d, ++ 0x3caeb35e, 0x9d9737b0, 0x4542910f, 0xe47b15e1, 0xdc409e92, ++ 0x7d791a7c, 0xac378874, 0x0d0e0c9a, 0x353587e9, 0x940c0307, ++ 0x5fefccd6, 0xfed64838, 0xc6edc34b, 0x67d447a5, 0xb69ad5ad, ++ 0x17a35143, 0x2f98da30, 0x8ea15ede, 0x5674f861, 0xf74d7c8f, ++ 0xcf76f7fc, 0x6e4f7312, 0xbf01e11a, 0x1e3865f4, 0x2603ee87, ++ 0x873a6a69, 0x6ab57764, 0xcb8cf38a, 0xf3b778f9, 0x528efc17, ++ 0x83c06e1f, 0x22f9eaf1, 0x1ac26182, 0xbbfbe56c, 0x632e43d3, ++ 0xc217c73d, 0xfa2c4c4e, 0x5b15c8a0, 0x8a5b5aa8, 0x2b62de46, ++ 0x13595535, 0xb260d1db, 0x79831e0a, 0xd8ba9ae4, 0xe0811197, ++ 0x41b89579, 0x90f60771, 0x31cf839f, 0x09f408ec, 0xa8cd8c02, ++ 0x70182abd, 0xd121ae53, 0xe91a2520, 0x4823a1ce, 0x996d33c6, ++ 0x3854b728, 0x006f3c5b, 0xa156b8b5, 0x99b34b70, 0x388acf9e, ++ 0x00b144ed, 0xa188c003, 0x70c6520b, 0xd1ffd6e5, 0xe9c45d96, ++ 0x48fdd978, 0x90287fc7, 0x3111fb29, 0x092a705a, 0xa813f4b4, ++ 0x795d66bc, 0xd864e252, 0xe05f6921, 0x4166edcf, 0x8a85221e, ++ 0x2bbca6f0, 0x13872d83, 0xb2bea96d, 0x63f03b65, 0xc2c9bf8b, ++ 0xfaf234f8, 0x5bcbb016, 0x831e16a9, 0x22279247, 0x1a1c1934, ++ 0xbb259dda, 0x6a6b0fd2, 0xcb528b3c, 0xf369004f, 0x525084a1, ++ 0xbfdf99ac, 0x1ee61d42, 0x26dd9631, 0x87e412df, 0x56aa80d7, ++ 0xf7930439, 0xcfa88f4a, 0x6e910ba4, 0xb644ad1b, 0x177d29f5, ++ 0x2f46a286, 0x8e7f2668, 0x5f31b460, 0xfe08308e, 0xc633bbfd, ++ 0x670a3f13, 0xace9f0c2, 0x0dd0742c, 0x35ebff5f, 0x94d27bb1, ++ 0x459ce9b9, 0xe4a56d57, 0xdc9ee624, 0x7da762ca, 0xa572c475, ++ 0x044b409b, 0x3c70cbe8, 0x9d494f06, 0x4c07dd0e, 0xed3e59e0, ++ 0xd505d293, 0x743c567d, 0xd56aeec8, 0x74536a26, 0x4c68e155, ++ 0xed5165bb, 0x3c1ff7b3, 0x9d26735d, 0xa51df82e, 0x04247cc0, ++ 0xdcf1da7f, 0x7dc85e91, 0x45f3d5e2, 0xe4ca510c, 0x3584c304, ++ 0x94bd47ea, 0xac86cc99, 0x0dbf4877, 0xc65c87a6, 0x67650348, ++ 0x5f5e883b, 0xfe670cd5, 0x2f299edd, 0x8e101a33, 0xb62b9140, ++ 0x171215ae, 0xcfc7b311, 0x6efe37ff, 0x56c5bc8c, 0xf7fc3862, ++ 0x26b2aa6a, 0x878b2e84, 0xbfb0a5f7, 0x1e892119, 0xf3063c14, ++ 0x523fb8fa, 0x6a043389, 0xcb3db767, 0x1a73256f, 0xbb4aa181, ++ 0x83712af2, 0x2248ae1c, 0xfa9d08a3, 0x5ba48c4d, 0x639f073e, ++ 0xc2a683d0, 0x13e811d8, 0xb2d19536, 0x8aea1e45, 0x2bd39aab, ++ 0xe030557a, 0x4109d194, 0x79325ae7, 0xd80bde09, 0x09454c01, ++ 0xa87cc8ef, 0x9047439c, 0x317ec772, 0xe9ab61cd, 0x4892e523, ++ 0x70a96e50, 0xd190eabe, 0x00de78b6, 0xa1e7fc58, 0x99dc772b, ++ 0x38e5f3c5}, ++ {0x00000000, 0xe81790a1, 0x0b5e2703, 0xe349b7a2, 0x16bc4e06, ++ 0xfeabdea7, 0x1de26905, 0xf5f5f9a4, 0x2d789c0c, 0xc56f0cad, ++ 0x2626bb0f, 0xce312bae, 0x3bc4d20a, 0xd3d342ab, 0x309af509, ++ 0xd88d65a8, 0x5af13818, 0xb2e6a8b9, 0x51af1f1b, 0xb9b88fba, ++ 0x4c4d761e, 0xa45ae6bf, 0x4713511d, 0xaf04c1bc, 0x7789a414, ++ 0x9f9e34b5, 0x7cd78317, 0x94c013b6, 0x6135ea12, 0x89227ab3, ++ 0x6a6bcd11, 0x827c5db0, 0xb5e27030, 0x5df5e091, 0xbebc5733, ++ 0x56abc792, 0xa35e3e36, 0x4b49ae97, 0xa8001935, 0x40178994, ++ 0x989aec3c, 0x708d7c9d, 0x93c4cb3f, 0x7bd35b9e, 0x8e26a23a, ++ 0x6631329b, 0x85788539, 0x6d6f1598, 0xef134828, 0x0704d889, ++ 0xe44d6f2b, 0x0c5aff8a, 0xf9af062e, 0x11b8968f, 0xf2f1212d, ++ 0x1ae6b18c, 0xc26bd424, 0x2a7c4485, 0xc935f327, 0x21226386, ++ 0xd4d79a22, 0x3cc00a83, 0xdf89bd21, 0x379e2d80, 0xb0b5e621, ++ 0x58a27680, 0xbbebc122, 0x53fc5183, 0xa609a827, 0x4e1e3886, ++ 0xad578f24, 0x45401f85, 0x9dcd7a2d, 0x75daea8c, 0x96935d2e, ++ 0x7e84cd8f, 0x8b71342b, 0x6366a48a, 0x802f1328, 0x68388389, ++ 0xea44de39, 0x02534e98, 0xe11af93a, 0x090d699b, 0xfcf8903f, ++ 0x14ef009e, 0xf7a6b73c, 0x1fb1279d, 0xc73c4235, 0x2f2bd294, ++ 0xcc626536, 0x2475f597, 0xd1800c33, 0x39979c92, 0xdade2b30, ++ 0x32c9bb91, 0x05579611, 0xed4006b0, 0x0e09b112, 0xe61e21b3, ++ 0x13ebd817, 0xfbfc48b6, 0x18b5ff14, 0xf0a26fb5, 0x282f0a1d, ++ 0xc0389abc, 0x23712d1e, 0xcb66bdbf, 0x3e93441b, 0xd684d4ba, ++ 0x35cd6318, 0xdddaf3b9, 0x5fa6ae09, 0xb7b13ea8, 0x54f8890a, ++ 0xbcef19ab, 0x491ae00f, 0xa10d70ae, 0x4244c70c, 0xaa5357ad, ++ 0x72de3205, 0x9ac9a2a4, 0x79801506, 0x919785a7, 0x64627c03, ++ 0x8c75eca2, 0x6f3c5b00, 0x872bcba1, 0xba1aca03, 0x520d5aa2, ++ 0xb144ed00, 0x59537da1, 0xaca68405, 0x44b114a4, 0xa7f8a306, ++ 0x4fef33a7, 0x9762560f, 0x7f75c6ae, 0x9c3c710c, 0x742be1ad, ++ 0x81de1809, 0x69c988a8, 0x8a803f0a, 0x6297afab, 0xe0ebf21b, ++ 0x08fc62ba, 0xebb5d518, 0x03a245b9, 0xf657bc1d, 0x1e402cbc, ++ 0xfd099b1e, 0x151e0bbf, 0xcd936e17, 0x2584feb6, 0xc6cd4914, ++ 0x2edad9b5, 0xdb2f2011, 0x3338b0b0, 0xd0710712, 0x386697b3, ++ 0x0ff8ba33, 0xe7ef2a92, 0x04a69d30, 0xecb10d91, 0x1944f435, ++ 0xf1536494, 0x121ad336, 0xfa0d4397, 0x2280263f, 0xca97b69e, ++ 0x29de013c, 0xc1c9919d, 0x343c6839, 0xdc2bf898, 0x3f624f3a, ++ 0xd775df9b, 0x5509822b, 0xbd1e128a, 0x5e57a528, 0xb6403589, ++ 0x43b5cc2d, 0xaba25c8c, 0x48ebeb2e, 0xa0fc7b8f, 0x78711e27, ++ 0x90668e86, 0x732f3924, 0x9b38a985, 0x6ecd5021, 0x86dac080, ++ 0x65937722, 0x8d84e783, 0x0aaf2c22, 0xe2b8bc83, 0x01f10b21, ++ 0xe9e69b80, 0x1c136224, 0xf404f285, 0x174d4527, 0xff5ad586, ++ 0x27d7b02e, 0xcfc0208f, 0x2c89972d, 0xc49e078c, 0x316bfe28, ++ 0xd97c6e89, 0x3a35d92b, 0xd222498a, 0x505e143a, 0xb849849b, ++ 0x5b003339, 0xb317a398, 0x46e25a3c, 0xaef5ca9d, 0x4dbc7d3f, ++ 0xa5abed9e, 0x7d268836, 0x95311897, 0x7678af35, 0x9e6f3f94, ++ 0x6b9ac630, 0x838d5691, 0x60c4e133, 0x88d37192, 0xbf4d5c12, ++ 0x575accb3, 0xb4137b11, 0x5c04ebb0, 0xa9f11214, 0x41e682b5, ++ 0xa2af3517, 0x4ab8a5b6, 0x9235c01e, 0x7a2250bf, 0x996be71d, ++ 0x717c77bc, 0x84898e18, 0x6c9e1eb9, 0x8fd7a91b, 0x67c039ba, ++ 0xe5bc640a, 0x0dabf4ab, 0xeee24309, 0x06f5d3a8, 0xf3002a0c, ++ 0x1b17baad, 0xf85e0d0f, 0x10499dae, 0xc8c4f806, 0x20d368a7, ++ 0xc39adf05, 0x2b8d4fa4, 0xde78b600, 0x366f26a1, 0xd5269103, ++ 0x3d3101a2}}; ++ ++static const z_word_t crc_braid_big_table[][256] = { ++ {0x0000000000000000, 0xa19017e800000000, 0x03275e0b00000000, ++ 0xa2b749e300000000, 0x064ebc1600000000, 0xa7deabfe00000000, ++ 0x0569e21d00000000, 0xa4f9f5f500000000, 0x0c9c782d00000000, ++ 0xad0c6fc500000000, 0x0fbb262600000000, 0xae2b31ce00000000, ++ 0x0ad2c43b00000000, 0xab42d3d300000000, 0x09f59a3000000000, ++ 0xa8658dd800000000, 0x1838f15a00000000, 0xb9a8e6b200000000, ++ 0x1b1faf5100000000, 0xba8fb8b900000000, 0x1e764d4c00000000, ++ 0xbfe65aa400000000, 0x1d51134700000000, 0xbcc104af00000000, ++ 0x14a4897700000000, 0xb5349e9f00000000, 0x1783d77c00000000, ++ 0xb613c09400000000, 0x12ea356100000000, 0xb37a228900000000, ++ 0x11cd6b6a00000000, 0xb05d7c8200000000, 0x3070e2b500000000, ++ 0x91e0f55d00000000, 0x3357bcbe00000000, 0x92c7ab5600000000, ++ 0x363e5ea300000000, 0x97ae494b00000000, 0x351900a800000000, ++ 0x9489174000000000, 0x3cec9a9800000000, 0x9d7c8d7000000000, ++ 0x3fcbc49300000000, 0x9e5bd37b00000000, 0x3aa2268e00000000, ++ 0x9b32316600000000, 0x3985788500000000, 0x98156f6d00000000, ++ 0x284813ef00000000, 0x89d8040700000000, 0x2b6f4de400000000, ++ 0x8aff5a0c00000000, 0x2e06aff900000000, 0x8f96b81100000000, ++ 0x2d21f1f200000000, 0x8cb1e61a00000000, 0x24d46bc200000000, ++ 0x85447c2a00000000, 0x27f335c900000000, 0x8663222100000000, ++ 0x229ad7d400000000, 0x830ac03c00000000, 0x21bd89df00000000, ++ 0x802d9e3700000000, 0x21e6b5b000000000, 0x8076a25800000000, ++ 0x22c1ebbb00000000, 0x8351fc5300000000, 0x27a809a600000000, ++ 0x86381e4e00000000, 0x248f57ad00000000, 0x851f404500000000, ++ 0x2d7acd9d00000000, 0x8ceada7500000000, 0x2e5d939600000000, ++ 0x8fcd847e00000000, 0x2b34718b00000000, 0x8aa4666300000000, ++ 0x28132f8000000000, 0x8983386800000000, 0x39de44ea00000000, ++ 0x984e530200000000, 0x3af91ae100000000, 0x9b690d0900000000, ++ 0x3f90f8fc00000000, 0x9e00ef1400000000, 0x3cb7a6f700000000, ++ 0x9d27b11f00000000, 0x35423cc700000000, 0x94d22b2f00000000, ++ 0x366562cc00000000, 0x97f5752400000000, 0x330c80d100000000, ++ 0x929c973900000000, 0x302bdeda00000000, 0x91bbc93200000000, ++ 0x1196570500000000, 0xb00640ed00000000, 0x12b1090e00000000, ++ 0xb3211ee600000000, 0x17d8eb1300000000, 0xb648fcfb00000000, ++ 0x14ffb51800000000, 0xb56fa2f000000000, 0x1d0a2f2800000000, ++ 0xbc9a38c000000000, 0x1e2d712300000000, 0xbfbd66cb00000000, ++ 0x1b44933e00000000, 0xbad484d600000000, 0x1863cd3500000000, ++ 0xb9f3dadd00000000, 0x09aea65f00000000, 0xa83eb1b700000000, ++ 0x0a89f85400000000, 0xab19efbc00000000, 0x0fe01a4900000000, ++ 0xae700da100000000, 0x0cc7444200000000, 0xad5753aa00000000, ++ 0x0532de7200000000, 0xa4a2c99a00000000, 0x0615807900000000, ++ 0xa785979100000000, 0x037c626400000000, 0xa2ec758c00000000, ++ 0x005b3c6f00000000, 0xa1cb2b8700000000, 0x03ca1aba00000000, ++ 0xa25a0d5200000000, 0x00ed44b100000000, 0xa17d535900000000, ++ 0x0584a6ac00000000, 0xa414b14400000000, 0x06a3f8a700000000, ++ 0xa733ef4f00000000, 0x0f56629700000000, 0xaec6757f00000000, ++ 0x0c713c9c00000000, 0xade12b7400000000, 0x0918de8100000000, ++ 0xa888c96900000000, 0x0a3f808a00000000, 0xabaf976200000000, ++ 0x1bf2ebe000000000, 0xba62fc0800000000, 0x18d5b5eb00000000, ++ 0xb945a20300000000, 0x1dbc57f600000000, 0xbc2c401e00000000, ++ 0x1e9b09fd00000000, 0xbf0b1e1500000000, 0x176e93cd00000000, ++ 0xb6fe842500000000, 0x1449cdc600000000, 0xb5d9da2e00000000, ++ 0x11202fdb00000000, 0xb0b0383300000000, 0x120771d000000000, ++ 0xb397663800000000, 0x33baf80f00000000, 0x922aefe700000000, ++ 0x309da60400000000, 0x910db1ec00000000, 0x35f4441900000000, ++ 0x946453f100000000, 0x36d31a1200000000, 0x97430dfa00000000, ++ 0x3f26802200000000, 0x9eb697ca00000000, 0x3c01de2900000000, ++ 0x9d91c9c100000000, 0x39683c3400000000, 0x98f82bdc00000000, ++ 0x3a4f623f00000000, 0x9bdf75d700000000, 0x2b82095500000000, ++ 0x8a121ebd00000000, 0x28a5575e00000000, 0x893540b600000000, ++ 0x2dccb54300000000, 0x8c5ca2ab00000000, 0x2eebeb4800000000, ++ 0x8f7bfca000000000, 0x271e717800000000, 0x868e669000000000, ++ 0x24392f7300000000, 0x85a9389b00000000, 0x2150cd6e00000000, ++ 0x80c0da8600000000, 0x2277936500000000, 0x83e7848d00000000, ++ 0x222caf0a00000000, 0x83bcb8e200000000, 0x210bf10100000000, ++ 0x809be6e900000000, 0x2462131c00000000, 0x85f204f400000000, ++ 0x27454d1700000000, 0x86d55aff00000000, 0x2eb0d72700000000, ++ 0x8f20c0cf00000000, 0x2d97892c00000000, 0x8c079ec400000000, ++ 0x28fe6b3100000000, 0x896e7cd900000000, 0x2bd9353a00000000, ++ 0x8a4922d200000000, 0x3a145e5000000000, 0x9b8449b800000000, ++ 0x3933005b00000000, 0x98a317b300000000, 0x3c5ae24600000000, ++ 0x9dcaf5ae00000000, 0x3f7dbc4d00000000, 0x9eedaba500000000, ++ 0x3688267d00000000, 0x9718319500000000, 0x35af787600000000, ++ 0x943f6f9e00000000, 0x30c69a6b00000000, 0x91568d8300000000, ++ 0x33e1c46000000000, 0x9271d38800000000, 0x125c4dbf00000000, ++ 0xb3cc5a5700000000, 0x117b13b400000000, 0xb0eb045c00000000, ++ 0x1412f1a900000000, 0xb582e64100000000, 0x1735afa200000000, ++ 0xb6a5b84a00000000, 0x1ec0359200000000, 0xbf50227a00000000, ++ 0x1de76b9900000000, 0xbc777c7100000000, 0x188e898400000000, ++ 0xb91e9e6c00000000, 0x1ba9d78f00000000, 0xba39c06700000000, ++ 0x0a64bce500000000, 0xabf4ab0d00000000, 0x0943e2ee00000000, ++ 0xa8d3f50600000000, 0x0c2a00f300000000, 0xadba171b00000000, ++ 0x0f0d5ef800000000, 0xae9d491000000000, 0x06f8c4c800000000, ++ 0xa768d32000000000, 0x05df9ac300000000, 0xa44f8d2b00000000, ++ 0x00b678de00000000, 0xa1266f3600000000, 0x039126d500000000, ++ 0xa201313d00000000}, ++ {0x0000000000000000, 0xee8439a100000000, 0x9d0f029900000000, ++ 0x738b3b3800000000, 0x7b1975e900000000, 0x959d4c4800000000, ++ 0xe616777000000000, 0x08924ed100000000, 0xb7349b0900000000, ++ 0x59b0a2a800000000, 0x2a3b999000000000, 0xc4bfa03100000000, ++ 0xcc2deee000000000, 0x22a9d74100000000, 0x5122ec7900000000, ++ 0xbfa6d5d800000000, 0x6e69361300000000, 0x80ed0fb200000000, ++ 0xf366348a00000000, 0x1de20d2b00000000, 0x157043fa00000000, ++ 0xfbf47a5b00000000, 0x887f416300000000, 0x66fb78c200000000, ++ 0xd95dad1a00000000, 0x37d994bb00000000, 0x4452af8300000000, ++ 0xaad6962200000000, 0xa244d8f300000000, 0x4cc0e15200000000, ++ 0x3f4bda6a00000000, 0xd1cfe3cb00000000, 0xdcd26c2600000000, ++ 0x3256558700000000, 0x41dd6ebf00000000, 0xaf59571e00000000, ++ 0xa7cb19cf00000000, 0x494f206e00000000, 0x3ac41b5600000000, ++ 0xd44022f700000000, 0x6be6f72f00000000, 0x8562ce8e00000000, ++ 0xf6e9f5b600000000, 0x186dcc1700000000, 0x10ff82c600000000, ++ 0xfe7bbb6700000000, 0x8df0805f00000000, 0x6374b9fe00000000, ++ 0xb2bb5a3500000000, 0x5c3f639400000000, 0x2fb458ac00000000, ++ 0xc130610d00000000, 0xc9a22fdc00000000, 0x2726167d00000000, ++ 0x54ad2d4500000000, 0xba2914e400000000, 0x058fc13c00000000, ++ 0xeb0bf89d00000000, 0x9880c3a500000000, 0x7604fa0400000000, ++ 0x7e96b4d500000000, 0x90128d7400000000, 0xe399b64c00000000, ++ 0x0d1d8fed00000000, 0xb8a5d94c00000000, 0x5621e0ed00000000, ++ 0x25aadbd500000000, 0xcb2ee27400000000, 0xc3bcaca500000000, ++ 0x2d38950400000000, 0x5eb3ae3c00000000, 0xb037979d00000000, ++ 0x0f91424500000000, 0xe1157be400000000, 0x929e40dc00000000, ++ 0x7c1a797d00000000, 0x748837ac00000000, 0x9a0c0e0d00000000, ++ 0xe987353500000000, 0x07030c9400000000, 0xd6ccef5f00000000, ++ 0x3848d6fe00000000, 0x4bc3edc600000000, 0xa547d46700000000, ++ 0xadd59ab600000000, 0x4351a31700000000, 0x30da982f00000000, ++ 0xde5ea18e00000000, 0x61f8745600000000, 0x8f7c4df700000000, ++ 0xfcf776cf00000000, 0x12734f6e00000000, 0x1ae101bf00000000, ++ 0xf465381e00000000, 0x87ee032600000000, 0x696a3a8700000000, ++ 0x6477b56a00000000, 0x8af38ccb00000000, 0xf978b7f300000000, ++ 0x17fc8e5200000000, 0x1f6ec08300000000, 0xf1eaf92200000000, ++ 0x8261c21a00000000, 0x6ce5fbbb00000000, 0xd3432e6300000000, ++ 0x3dc717c200000000, 0x4e4c2cfa00000000, 0xa0c8155b00000000, ++ 0xa85a5b8a00000000, 0x46de622b00000000, 0x3555591300000000, ++ 0xdbd160b200000000, 0x0a1e837900000000, 0xe49abad800000000, ++ 0x971181e000000000, 0x7995b84100000000, 0x7107f69000000000, ++ 0x9f83cf3100000000, 0xec08f40900000000, 0x028ccda800000000, ++ 0xbd2a187000000000, 0x53ae21d100000000, 0x20251ae900000000, ++ 0xcea1234800000000, 0xc6336d9900000000, 0x28b7543800000000, ++ 0x5b3c6f0000000000, 0xb5b856a100000000, 0x704bb39900000000, ++ 0x9ecf8a3800000000, 0xed44b10000000000, 0x03c088a100000000, ++ 0x0b52c67000000000, 0xe5d6ffd100000000, 0x965dc4e900000000, ++ 0x78d9fd4800000000, 0xc77f289000000000, 0x29fb113100000000, ++ 0x5a702a0900000000, 0xb4f413a800000000, 0xbc665d7900000000, ++ 0x52e264d800000000, 0x21695fe000000000, 0xcfed664100000000, ++ 0x1e22858a00000000, 0xf0a6bc2b00000000, 0x832d871300000000, ++ 0x6da9beb200000000, 0x653bf06300000000, 0x8bbfc9c200000000, ++ 0xf834f2fa00000000, 0x16b0cb5b00000000, 0xa9161e8300000000, ++ 0x4792272200000000, 0x34191c1a00000000, 0xda9d25bb00000000, ++ 0xd20f6b6a00000000, 0x3c8b52cb00000000, 0x4f0069f300000000, ++ 0xa184505200000000, 0xac99dfbf00000000, 0x421de61e00000000, ++ 0x3196dd2600000000, 0xdf12e48700000000, 0xd780aa5600000000, ++ 0x390493f700000000, 0x4a8fa8cf00000000, 0xa40b916e00000000, ++ 0x1bad44b600000000, 0xf5297d1700000000, 0x86a2462f00000000, ++ 0x68267f8e00000000, 0x60b4315f00000000, 0x8e3008fe00000000, ++ 0xfdbb33c600000000, 0x133f0a6700000000, 0xc2f0e9ac00000000, ++ 0x2c74d00d00000000, 0x5fffeb3500000000, 0xb17bd29400000000, ++ 0xb9e99c4500000000, 0x576da5e400000000, 0x24e69edc00000000, ++ 0xca62a77d00000000, 0x75c472a500000000, 0x9b404b0400000000, ++ 0xe8cb703c00000000, 0x064f499d00000000, 0x0edd074c00000000, ++ 0xe0593eed00000000, 0x93d205d500000000, 0x7d563c7400000000, ++ 0xc8ee6ad500000000, 0x266a537400000000, 0x55e1684c00000000, ++ 0xbb6551ed00000000, 0xb3f71f3c00000000, 0x5d73269d00000000, ++ 0x2ef81da500000000, 0xc07c240400000000, 0x7fdaf1dc00000000, ++ 0x915ec87d00000000, 0xe2d5f34500000000, 0x0c51cae400000000, ++ 0x04c3843500000000, 0xea47bd9400000000, 0x99cc86ac00000000, ++ 0x7748bf0d00000000, 0xa6875cc600000000, 0x4803656700000000, ++ 0x3b885e5f00000000, 0xd50c67fe00000000, 0xdd9e292f00000000, ++ 0x331a108e00000000, 0x40912bb600000000, 0xae15121700000000, ++ 0x11b3c7cf00000000, 0xff37fe6e00000000, 0x8cbcc55600000000, ++ 0x6238fcf700000000, 0x6aaab22600000000, 0x842e8b8700000000, ++ 0xf7a5b0bf00000000, 0x1921891e00000000, 0x143c06f300000000, ++ 0xfab83f5200000000, 0x8933046a00000000, 0x67b73dcb00000000, ++ 0x6f25731a00000000, 0x81a14abb00000000, 0xf22a718300000000, ++ 0x1cae482200000000, 0xa3089dfa00000000, 0x4d8ca45b00000000, ++ 0x3e079f6300000000, 0xd083a6c200000000, 0xd811e81300000000, ++ 0x3695d1b200000000, 0x451eea8a00000000, 0xab9ad32b00000000, ++ 0x7a5530e000000000, 0x94d1094100000000, 0xe75a327900000000, ++ 0x09de0bd800000000, 0x014c450900000000, 0xefc87ca800000000, ++ 0x9c43479000000000, 0x72c77e3100000000, 0xcd61abe900000000, ++ 0x23e5924800000000, 0x506ea97000000000, 0xbeea90d100000000, ++ 0xb678de0000000000, 0x58fce7a100000000, 0x2b77dc9900000000, ++ 0xc5f3e53800000000}, ++ {0x0000000000000000, 0xfbf6134700000000, 0xf6ed278e00000000, ++ 0x0d1b34c900000000, 0xaddd3ec700000000, 0x562b2d8000000000, ++ 0x5b30194900000000, 0xa0c60a0e00000000, 0x1bbd0c5500000000, ++ 0xe04b1f1200000000, 0xed502bdb00000000, 0x16a6389c00000000, ++ 0xb660329200000000, 0x4d9621d500000000, 0x408d151c00000000, ++ 0xbb7b065b00000000, 0x367a19aa00000000, 0xcd8c0aed00000000, ++ 0xc0973e2400000000, 0x3b612d6300000000, 0x9ba7276d00000000, ++ 0x6051342a00000000, 0x6d4a00e300000000, 0x96bc13a400000000, ++ 0x2dc715ff00000000, 0xd63106b800000000, 0xdb2a327100000000, ++ 0x20dc213600000000, 0x801a2b3800000000, 0x7bec387f00000000, ++ 0x76f70cb600000000, 0x8d011ff100000000, 0x2df2438f00000000, ++ 0xd60450c800000000, 0xdb1f640100000000, 0x20e9774600000000, ++ 0x802f7d4800000000, 0x7bd96e0f00000000, 0x76c25ac600000000, ++ 0x8d34498100000000, 0x364f4fda00000000, 0xcdb95c9d00000000, ++ 0xc0a2685400000000, 0x3b547b1300000000, 0x9b92711d00000000, ++ 0x6064625a00000000, 0x6d7f569300000000, 0x968945d400000000, ++ 0x1b885a2500000000, 0xe07e496200000000, 0xed657dab00000000, ++ 0x16936eec00000000, 0xb65564e200000000, 0x4da377a500000000, ++ 0x40b8436c00000000, 0xbb4e502b00000000, 0x0035567000000000, ++ 0xfbc3453700000000, 0xf6d871fe00000000, 0x0d2e62b900000000, ++ 0xade868b700000000, 0x561e7bf000000000, 0x5b054f3900000000, ++ 0xa0f35c7e00000000, 0x1be2f6c500000000, 0xe014e58200000000, ++ 0xed0fd14b00000000, 0x16f9c20c00000000, 0xb63fc80200000000, ++ 0x4dc9db4500000000, 0x40d2ef8c00000000, 0xbb24fccb00000000, ++ 0x005ffa9000000000, 0xfba9e9d700000000, 0xf6b2dd1e00000000, ++ 0x0d44ce5900000000, 0xad82c45700000000, 0x5674d71000000000, ++ 0x5b6fe3d900000000, 0xa099f09e00000000, 0x2d98ef6f00000000, ++ 0xd66efc2800000000, 0xdb75c8e100000000, 0x2083dba600000000, ++ 0x8045d1a800000000, 0x7bb3c2ef00000000, 0x76a8f62600000000, ++ 0x8d5ee56100000000, 0x3625e33a00000000, 0xcdd3f07d00000000, ++ 0xc0c8c4b400000000, 0x3b3ed7f300000000, 0x9bf8ddfd00000000, ++ 0x600eceba00000000, 0x6d15fa7300000000, 0x96e3e93400000000, ++ 0x3610b54a00000000, 0xcde6a60d00000000, 0xc0fd92c400000000, ++ 0x3b0b818300000000, 0x9bcd8b8d00000000, 0x603b98ca00000000, ++ 0x6d20ac0300000000, 0x96d6bf4400000000, 0x2dadb91f00000000, ++ 0xd65baa5800000000, 0xdb409e9100000000, 0x20b68dd600000000, ++ 0x807087d800000000, 0x7b86949f00000000, 0x769da05600000000, ++ 0x8d6bb31100000000, 0x006aace000000000, 0xfb9cbfa700000000, ++ 0xf6878b6e00000000, 0x0d71982900000000, 0xadb7922700000000, ++ 0x5641816000000000, 0x5b5ab5a900000000, 0xa0aca6ee00000000, ++ 0x1bd7a0b500000000, 0xe021b3f200000000, 0xed3a873b00000000, ++ 0x16cc947c00000000, 0xb60a9e7200000000, 0x4dfc8d3500000000, ++ 0x40e7b9fc00000000, 0xbb11aabb00000000, 0x77c29c5000000000, ++ 0x8c348f1700000000, 0x812fbbde00000000, 0x7ad9a89900000000, ++ 0xda1fa29700000000, 0x21e9b1d000000000, 0x2cf2851900000000, ++ 0xd704965e00000000, 0x6c7f900500000000, 0x9789834200000000, ++ 0x9a92b78b00000000, 0x6164a4cc00000000, 0xc1a2aec200000000, ++ 0x3a54bd8500000000, 0x374f894c00000000, 0xccb99a0b00000000, ++ 0x41b885fa00000000, 0xba4e96bd00000000, 0xb755a27400000000, ++ 0x4ca3b13300000000, 0xec65bb3d00000000, 0x1793a87a00000000, ++ 0x1a889cb300000000, 0xe17e8ff400000000, 0x5a0589af00000000, ++ 0xa1f39ae800000000, 0xace8ae2100000000, 0x571ebd6600000000, ++ 0xf7d8b76800000000, 0x0c2ea42f00000000, 0x013590e600000000, ++ 0xfac383a100000000, 0x5a30dfdf00000000, 0xa1c6cc9800000000, ++ 0xacddf85100000000, 0x572beb1600000000, 0xf7ede11800000000, ++ 0x0c1bf25f00000000, 0x0100c69600000000, 0xfaf6d5d100000000, ++ 0x418dd38a00000000, 0xba7bc0cd00000000, 0xb760f40400000000, ++ 0x4c96e74300000000, 0xec50ed4d00000000, 0x17a6fe0a00000000, ++ 0x1abdcac300000000, 0xe14bd98400000000, 0x6c4ac67500000000, ++ 0x97bcd53200000000, 0x9aa7e1fb00000000, 0x6151f2bc00000000, ++ 0xc197f8b200000000, 0x3a61ebf500000000, 0x377adf3c00000000, ++ 0xcc8ccc7b00000000, 0x77f7ca2000000000, 0x8c01d96700000000, ++ 0x811aedae00000000, 0x7aecfee900000000, 0xda2af4e700000000, ++ 0x21dce7a000000000, 0x2cc7d36900000000, 0xd731c02e00000000, ++ 0x6c206a9500000000, 0x97d679d200000000, 0x9acd4d1b00000000, ++ 0x613b5e5c00000000, 0xc1fd545200000000, 0x3a0b471500000000, ++ 0x371073dc00000000, 0xcce6609b00000000, 0x779d66c000000000, ++ 0x8c6b758700000000, 0x8170414e00000000, 0x7a86520900000000, ++ 0xda40580700000000, 0x21b64b4000000000, 0x2cad7f8900000000, ++ 0xd75b6cce00000000, 0x5a5a733f00000000, 0xa1ac607800000000, ++ 0xacb754b100000000, 0x574147f600000000, 0xf7874df800000000, ++ 0x0c715ebf00000000, 0x016a6a7600000000, 0xfa9c793100000000, ++ 0x41e77f6a00000000, 0xba116c2d00000000, 0xb70a58e400000000, ++ 0x4cfc4ba300000000, 0xec3a41ad00000000, 0x17cc52ea00000000, ++ 0x1ad7662300000000, 0xe121756400000000, 0x41d2291a00000000, ++ 0xba243a5d00000000, 0xb73f0e9400000000, 0x4cc91dd300000000, ++ 0xec0f17dd00000000, 0x17f9049a00000000, 0x1ae2305300000000, ++ 0xe114231400000000, 0x5a6f254f00000000, 0xa199360800000000, ++ 0xac8202c100000000, 0x5774118600000000, 0xf7b21b8800000000, ++ 0x0c4408cf00000000, 0x015f3c0600000000, 0xfaa92f4100000000, ++ 0x77a830b000000000, 0x8c5e23f700000000, 0x8145173e00000000, ++ 0x7ab3047900000000, 0xda750e7700000000, 0x21831d3000000000, ++ 0x2c9829f900000000, 0xd76e3abe00000000, 0x6c153ce500000000, ++ 0x97e32fa200000000, 0x9af81b6b00000000, 0x610e082c00000000, ++ 0xc1c8022200000000, 0x3a3e116500000000, 0x372525ac00000000, ++ 0xccd336eb00000000}, ++ {0x0000000000000000, 0x6238282a00000000, 0xc470505400000000, ++ 0xa648787e00000000, 0x88e1a0a800000000, 0xead9888200000000, ++ 0x4c91f0fc00000000, 0x2ea9d8d600000000, 0x51c5308a00000000, ++ 0x33fd18a000000000, 0x95b560de00000000, 0xf78d48f400000000, ++ 0xd924902200000000, 0xbb1cb80800000000, 0x1d54c07600000000, ++ 0x7f6ce85c00000000, 0xe38c10cf00000000, 0x81b438e500000000, ++ 0x27fc409b00000000, 0x45c468b100000000, 0x6b6db06700000000, ++ 0x0955984d00000000, 0xaf1de03300000000, 0xcd25c81900000000, ++ 0xb249204500000000, 0xd071086f00000000, 0x7639701100000000, ++ 0x1401583b00000000, 0x3aa880ed00000000, 0x5890a8c700000000, ++ 0xfed8d0b900000000, 0x9ce0f89300000000, 0x871f504500000000, ++ 0xe527786f00000000, 0x436f001100000000, 0x2157283b00000000, ++ 0x0ffef0ed00000000, 0x6dc6d8c700000000, 0xcb8ea0b900000000, ++ 0xa9b6889300000000, 0xd6da60cf00000000, 0xb4e248e500000000, ++ 0x12aa309b00000000, 0x709218b100000000, 0x5e3bc06700000000, ++ 0x3c03e84d00000000, 0x9a4b903300000000, 0xf873b81900000000, ++ 0x6493408a00000000, 0x06ab68a000000000, 0xa0e310de00000000, ++ 0xc2db38f400000000, 0xec72e02200000000, 0x8e4ac80800000000, ++ 0x2802b07600000000, 0x4a3a985c00000000, 0x3556700000000000, ++ 0x576e582a00000000, 0xf126205400000000, 0x931e087e00000000, ++ 0xbdb7d0a800000000, 0xdf8ff88200000000, 0x79c780fc00000000, ++ 0x1bffa8d600000000, 0x0e3fa08a00000000, 0x6c0788a000000000, ++ 0xca4ff0de00000000, 0xa877d8f400000000, 0x86de002200000000, ++ 0xe4e6280800000000, 0x42ae507600000000, 0x2096785c00000000, ++ 0x5ffa900000000000, 0x3dc2b82a00000000, 0x9b8ac05400000000, ++ 0xf9b2e87e00000000, 0xd71b30a800000000, 0xb523188200000000, ++ 0x136b60fc00000000, 0x715348d600000000, 0xedb3b04500000000, ++ 0x8f8b986f00000000, 0x29c3e01100000000, 0x4bfbc83b00000000, ++ 0x655210ed00000000, 0x076a38c700000000, 0xa12240b900000000, ++ 0xc31a689300000000, 0xbc7680cf00000000, 0xde4ea8e500000000, ++ 0x7806d09b00000000, 0x1a3ef8b100000000, 0x3497206700000000, ++ 0x56af084d00000000, 0xf0e7703300000000, 0x92df581900000000, ++ 0x8920f0cf00000000, 0xeb18d8e500000000, 0x4d50a09b00000000, ++ 0x2f6888b100000000, 0x01c1506700000000, 0x63f9784d00000000, ++ 0xc5b1003300000000, 0xa789281900000000, 0xd8e5c04500000000, ++ 0xbadde86f00000000, 0x1c95901100000000, 0x7eadb83b00000000, ++ 0x500460ed00000000, 0x323c48c700000000, 0x947430b900000000, ++ 0xf64c189300000000, 0x6aace00000000000, 0x0894c82a00000000, ++ 0xaedcb05400000000, 0xcce4987e00000000, 0xe24d40a800000000, ++ 0x8075688200000000, 0x263d10fc00000000, 0x440538d600000000, ++ 0x3b69d08a00000000, 0x5951f8a000000000, 0xff1980de00000000, ++ 0x9d21a8f400000000, 0xb388702200000000, 0xd1b0580800000000, ++ 0x77f8207600000000, 0x15c0085c00000000, 0x5d7831ce00000000, ++ 0x3f4019e400000000, 0x9908619a00000000, 0xfb3049b000000000, ++ 0xd599916600000000, 0xb7a1b94c00000000, 0x11e9c13200000000, ++ 0x73d1e91800000000, 0x0cbd014400000000, 0x6e85296e00000000, ++ 0xc8cd511000000000, 0xaaf5793a00000000, 0x845ca1ec00000000, ++ 0xe66489c600000000, 0x402cf1b800000000, 0x2214d99200000000, ++ 0xbef4210100000000, 0xdccc092b00000000, 0x7a84715500000000, ++ 0x18bc597f00000000, 0x361581a900000000, 0x542da98300000000, ++ 0xf265d1fd00000000, 0x905df9d700000000, 0xef31118b00000000, ++ 0x8d0939a100000000, 0x2b4141df00000000, 0x497969f500000000, ++ 0x67d0b12300000000, 0x05e8990900000000, 0xa3a0e17700000000, ++ 0xc198c95d00000000, 0xda67618b00000000, 0xb85f49a100000000, ++ 0x1e1731df00000000, 0x7c2f19f500000000, 0x5286c12300000000, ++ 0x30bee90900000000, 0x96f6917700000000, 0xf4ceb95d00000000, ++ 0x8ba2510100000000, 0xe99a792b00000000, 0x4fd2015500000000, ++ 0x2dea297f00000000, 0x0343f1a900000000, 0x617bd98300000000, ++ 0xc733a1fd00000000, 0xa50b89d700000000, 0x39eb714400000000, ++ 0x5bd3596e00000000, 0xfd9b211000000000, 0x9fa3093a00000000, ++ 0xb10ad1ec00000000, 0xd332f9c600000000, 0x757a81b800000000, ++ 0x1742a99200000000, 0x682e41ce00000000, 0x0a1669e400000000, ++ 0xac5e119a00000000, 0xce6639b000000000, 0xe0cfe16600000000, ++ 0x82f7c94c00000000, 0x24bfb13200000000, 0x4687991800000000, ++ 0x5347914400000000, 0x317fb96e00000000, 0x9737c11000000000, ++ 0xf50fe93a00000000, 0xdba631ec00000000, 0xb99e19c600000000, ++ 0x1fd661b800000000, 0x7dee499200000000, 0x0282a1ce00000000, ++ 0x60ba89e400000000, 0xc6f2f19a00000000, 0xa4cad9b000000000, ++ 0x8a63016600000000, 0xe85b294c00000000, 0x4e13513200000000, ++ 0x2c2b791800000000, 0xb0cb818b00000000, 0xd2f3a9a100000000, ++ 0x74bbd1df00000000, 0x1683f9f500000000, 0x382a212300000000, ++ 0x5a12090900000000, 0xfc5a717700000000, 0x9e62595d00000000, ++ 0xe10eb10100000000, 0x8336992b00000000, 0x257ee15500000000, ++ 0x4746c97f00000000, 0x69ef11a900000000, 0x0bd7398300000000, ++ 0xad9f41fd00000000, 0xcfa769d700000000, 0xd458c10100000000, ++ 0xb660e92b00000000, 0x1028915500000000, 0x7210b97f00000000, ++ 0x5cb961a900000000, 0x3e81498300000000, 0x98c931fd00000000, ++ 0xfaf119d700000000, 0x859df18b00000000, 0xe7a5d9a100000000, ++ 0x41eda1df00000000, 0x23d589f500000000, 0x0d7c512300000000, ++ 0x6f44790900000000, 0xc90c017700000000, 0xab34295d00000000, ++ 0x37d4d1ce00000000, 0x55ecf9e400000000, 0xf3a4819a00000000, ++ 0x919ca9b000000000, 0xbf35716600000000, 0xdd0d594c00000000, ++ 0x7b45213200000000, 0x197d091800000000, 0x6611e14400000000, ++ 0x0429c96e00000000, 0xa261b11000000000, 0xc059993a00000000, ++ 0xeef041ec00000000, 0x8cc869c600000000, 0x2a8011b800000000, ++ 0x48b8399200000000}, ++ {0x0000000000000000, 0x4c2896a300000000, 0xd9565d9c00000000, ++ 0x957ecb3f00000000, 0xf3abcbe300000000, 0xbf835d4000000000, ++ 0x2afd967f00000000, 0x66d500dc00000000, 0xa751e61c00000000, ++ 0xeb7970bf00000000, 0x7e07bb8000000000, 0x322f2d2300000000, ++ 0x54fa2dff00000000, 0x18d2bb5c00000000, 0x8dac706300000000, ++ 0xc184e6c000000000, 0x4ea3cc3900000000, 0x028b5a9a00000000, ++ 0x97f591a500000000, 0xdbdd070600000000, 0xbd0807da00000000, ++ 0xf120917900000000, 0x645e5a4600000000, 0x2876cce500000000, ++ 0xe9f22a2500000000, 0xa5dabc8600000000, 0x30a477b900000000, ++ 0x7c8ce11a00000000, 0x1a59e1c600000000, 0x5671776500000000, ++ 0xc30fbc5a00000000, 0x8f272af900000000, 0x9c46997300000000, ++ 0xd06e0fd000000000, 0x4510c4ef00000000, 0x0938524c00000000, ++ 0x6fed529000000000, 0x23c5c43300000000, 0xb6bb0f0c00000000, ++ 0xfa9399af00000000, 0x3b177f6f00000000, 0x773fe9cc00000000, ++ 0xe24122f300000000, 0xae69b45000000000, 0xc8bcb48c00000000, ++ 0x8494222f00000000, 0x11eae91000000000, 0x5dc27fb300000000, ++ 0xd2e5554a00000000, 0x9ecdc3e900000000, 0x0bb308d600000000, ++ 0x479b9e7500000000, 0x214e9ea900000000, 0x6d66080a00000000, ++ 0xf818c33500000000, 0xb430559600000000, 0x75b4b35600000000, ++ 0x399c25f500000000, 0xace2eeca00000000, 0xe0ca786900000000, ++ 0x861f78b500000000, 0xca37ee1600000000, 0x5f49252900000000, ++ 0x1361b38a00000000, 0x388d32e700000000, 0x74a5a44400000000, ++ 0xe1db6f7b00000000, 0xadf3f9d800000000, 0xcb26f90400000000, ++ 0x870e6fa700000000, 0x1270a49800000000, 0x5e58323b00000000, ++ 0x9fdcd4fb00000000, 0xd3f4425800000000, 0x468a896700000000, ++ 0x0aa21fc400000000, 0x6c771f1800000000, 0x205f89bb00000000, ++ 0xb521428400000000, 0xf909d42700000000, 0x762efede00000000, ++ 0x3a06687d00000000, 0xaf78a34200000000, 0xe35035e100000000, ++ 0x8585353d00000000, 0xc9ada39e00000000, 0x5cd368a100000000, ++ 0x10fbfe0200000000, 0xd17f18c200000000, 0x9d578e6100000000, ++ 0x0829455e00000000, 0x4401d3fd00000000, 0x22d4d32100000000, ++ 0x6efc458200000000, 0xfb828ebd00000000, 0xb7aa181e00000000, ++ 0xa4cbab9400000000, 0xe8e33d3700000000, 0x7d9df60800000000, ++ 0x31b560ab00000000, 0x5760607700000000, 0x1b48f6d400000000, ++ 0x8e363deb00000000, 0xc21eab4800000000, 0x039a4d8800000000, ++ 0x4fb2db2b00000000, 0xdacc101400000000, 0x96e486b700000000, ++ 0xf031866b00000000, 0xbc1910c800000000, 0x2967dbf700000000, ++ 0x654f4d5400000000, 0xea6867ad00000000, 0xa640f10e00000000, ++ 0x333e3a3100000000, 0x7f16ac9200000000, 0x19c3ac4e00000000, ++ 0x55eb3aed00000000, 0xc095f1d200000000, 0x8cbd677100000000, ++ 0x4d3981b100000000, 0x0111171200000000, 0x946fdc2d00000000, ++ 0xd8474a8e00000000, 0xbe924a5200000000, 0xf2badcf100000000, ++ 0x67c417ce00000000, 0x2bec816d00000000, 0x311c141500000000, ++ 0x7d3482b600000000, 0xe84a498900000000, 0xa462df2a00000000, ++ 0xc2b7dff600000000, 0x8e9f495500000000, 0x1be1826a00000000, ++ 0x57c914c900000000, 0x964df20900000000, 0xda6564aa00000000, ++ 0x4f1baf9500000000, 0x0333393600000000, 0x65e639ea00000000, ++ 0x29ceaf4900000000, 0xbcb0647600000000, 0xf098f2d500000000, ++ 0x7fbfd82c00000000, 0x33974e8f00000000, 0xa6e985b000000000, ++ 0xeac1131300000000, 0x8c1413cf00000000, 0xc03c856c00000000, ++ 0x55424e5300000000, 0x196ad8f000000000, 0xd8ee3e3000000000, ++ 0x94c6a89300000000, 0x01b863ac00000000, 0x4d90f50f00000000, ++ 0x2b45f5d300000000, 0x676d637000000000, 0xf213a84f00000000, ++ 0xbe3b3eec00000000, 0xad5a8d6600000000, 0xe1721bc500000000, ++ 0x740cd0fa00000000, 0x3824465900000000, 0x5ef1468500000000, ++ 0x12d9d02600000000, 0x87a71b1900000000, 0xcb8f8dba00000000, ++ 0x0a0b6b7a00000000, 0x4623fdd900000000, 0xd35d36e600000000, ++ 0x9f75a04500000000, 0xf9a0a09900000000, 0xb588363a00000000, ++ 0x20f6fd0500000000, 0x6cde6ba600000000, 0xe3f9415f00000000, ++ 0xafd1d7fc00000000, 0x3aaf1cc300000000, 0x76878a6000000000, ++ 0x10528abc00000000, 0x5c7a1c1f00000000, 0xc904d72000000000, ++ 0x852c418300000000, 0x44a8a74300000000, 0x088031e000000000, ++ 0x9dfefadf00000000, 0xd1d66c7c00000000, 0xb7036ca000000000, ++ 0xfb2bfa0300000000, 0x6e55313c00000000, 0x227da79f00000000, ++ 0x099126f200000000, 0x45b9b05100000000, 0xd0c77b6e00000000, ++ 0x9cefedcd00000000, 0xfa3aed1100000000, 0xb6127bb200000000, ++ 0x236cb08d00000000, 0x6f44262e00000000, 0xaec0c0ee00000000, ++ 0xe2e8564d00000000, 0x77969d7200000000, 0x3bbe0bd100000000, ++ 0x5d6b0b0d00000000, 0x11439dae00000000, 0x843d569100000000, ++ 0xc815c03200000000, 0x4732eacb00000000, 0x0b1a7c6800000000, ++ 0x9e64b75700000000, 0xd24c21f400000000, 0xb499212800000000, ++ 0xf8b1b78b00000000, 0x6dcf7cb400000000, 0x21e7ea1700000000, ++ 0xe0630cd700000000, 0xac4b9a7400000000, 0x3935514b00000000, ++ 0x751dc7e800000000, 0x13c8c73400000000, 0x5fe0519700000000, ++ 0xca9e9aa800000000, 0x86b60c0b00000000, 0x95d7bf8100000000, ++ 0xd9ff292200000000, 0x4c81e21d00000000, 0x00a974be00000000, ++ 0x667c746200000000, 0x2a54e2c100000000, 0xbf2a29fe00000000, ++ 0xf302bf5d00000000, 0x3286599d00000000, 0x7eaecf3e00000000, ++ 0xebd0040100000000, 0xa7f892a200000000, 0xc12d927e00000000, ++ 0x8d0504dd00000000, 0x187bcfe200000000, 0x5453594100000000, ++ 0xdb7473b800000000, 0x975ce51b00000000, 0x02222e2400000000, ++ 0x4e0ab88700000000, 0x28dfb85b00000000, 0x64f72ef800000000, ++ 0xf189e5c700000000, 0xbda1736400000000, 0x7c2595a400000000, ++ 0x300d030700000000, 0xa573c83800000000, 0xe95b5e9b00000000, ++ 0x8f8e5e4700000000, 0xc3a6c8e400000000, 0x56d803db00000000, ++ 0x1af0957800000000}, ++ {0x0000000000000000, 0x939bc97f00000000, 0x263793ff00000000, ++ 0xb5ac5a8000000000, 0x0d68572400000000, 0x9ef39e5b00000000, ++ 0x2b5fc4db00000000, 0xb8c40da400000000, 0x1ad0ae4800000000, ++ 0x894b673700000000, 0x3ce73db700000000, 0xaf7cf4c800000000, ++ 0x17b8f96c00000000, 0x8423301300000000, 0x318f6a9300000000, ++ 0xa214a3ec00000000, 0x34a05d9100000000, 0xa73b94ee00000000, ++ 0x1297ce6e00000000, 0x810c071100000000, 0x39c80ab500000000, ++ 0xaa53c3ca00000000, 0x1fff994a00000000, 0x8c64503500000000, ++ 0x2e70f3d900000000, 0xbdeb3aa600000000, 0x0847602600000000, ++ 0x9bdca95900000000, 0x2318a4fd00000000, 0xb0836d8200000000, ++ 0x052f370200000000, 0x96b4fe7d00000000, 0x2946caf900000000, ++ 0xbadd038600000000, 0x0f71590600000000, 0x9cea907900000000, ++ 0x242e9ddd00000000, 0xb7b554a200000000, 0x02190e2200000000, ++ 0x9182c75d00000000, 0x339664b100000000, 0xa00dadce00000000, ++ 0x15a1f74e00000000, 0x863a3e3100000000, 0x3efe339500000000, ++ 0xad65faea00000000, 0x18c9a06a00000000, 0x8b52691500000000, ++ 0x1de6976800000000, 0x8e7d5e1700000000, 0x3bd1049700000000, ++ 0xa84acde800000000, 0x108ec04c00000000, 0x8315093300000000, ++ 0x36b953b300000000, 0xa5229acc00000000, 0x0736392000000000, ++ 0x94adf05f00000000, 0x2101aadf00000000, 0xb29a63a000000000, ++ 0x0a5e6e0400000000, 0x99c5a77b00000000, 0x2c69fdfb00000000, ++ 0xbff2348400000000, 0x138ae52800000000, 0x80112c5700000000, ++ 0x35bd76d700000000, 0xa626bfa800000000, 0x1ee2b20c00000000, ++ 0x8d797b7300000000, 0x38d521f300000000, 0xab4ee88c00000000, ++ 0x095a4b6000000000, 0x9ac1821f00000000, 0x2f6dd89f00000000, ++ 0xbcf611e000000000, 0x04321c4400000000, 0x97a9d53b00000000, ++ 0x22058fbb00000000, 0xb19e46c400000000, 0x272ab8b900000000, ++ 0xb4b171c600000000, 0x011d2b4600000000, 0x9286e23900000000, ++ 0x2a42ef9d00000000, 0xb9d926e200000000, 0x0c757c6200000000, ++ 0x9feeb51d00000000, 0x3dfa16f100000000, 0xae61df8e00000000, ++ 0x1bcd850e00000000, 0x88564c7100000000, 0x309241d500000000, ++ 0xa30988aa00000000, 0x16a5d22a00000000, 0x853e1b5500000000, ++ 0x3acc2fd100000000, 0xa957e6ae00000000, 0x1cfbbc2e00000000, ++ 0x8f60755100000000, 0x37a478f500000000, 0xa43fb18a00000000, ++ 0x1193eb0a00000000, 0x8208227500000000, 0x201c819900000000, ++ 0xb38748e600000000, 0x062b126600000000, 0x95b0db1900000000, ++ 0x2d74d6bd00000000, 0xbeef1fc200000000, 0x0b43454200000000, ++ 0x98d88c3d00000000, 0x0e6c724000000000, 0x9df7bb3f00000000, ++ 0x285be1bf00000000, 0xbbc028c000000000, 0x0304256400000000, ++ 0x909fec1b00000000, 0x2533b69b00000000, 0xb6a87fe400000000, ++ 0x14bcdc0800000000, 0x8727157700000000, 0x328b4ff700000000, ++ 0xa110868800000000, 0x19d48b2c00000000, 0x8a4f425300000000, ++ 0x3fe318d300000000, 0xac78d1ac00000000, 0x2614cb5100000000, ++ 0xb58f022e00000000, 0x002358ae00000000, 0x93b891d100000000, ++ 0x2b7c9c7500000000, 0xb8e7550a00000000, 0x0d4b0f8a00000000, ++ 0x9ed0c6f500000000, 0x3cc4651900000000, 0xaf5fac6600000000, ++ 0x1af3f6e600000000, 0x89683f9900000000, 0x31ac323d00000000, ++ 0xa237fb4200000000, 0x179ba1c200000000, 0x840068bd00000000, ++ 0x12b496c000000000, 0x812f5fbf00000000, 0x3483053f00000000, ++ 0xa718cc4000000000, 0x1fdcc1e400000000, 0x8c47089b00000000, ++ 0x39eb521b00000000, 0xaa709b6400000000, 0x0864388800000000, ++ 0x9bfff1f700000000, 0x2e53ab7700000000, 0xbdc8620800000000, ++ 0x050c6fac00000000, 0x9697a6d300000000, 0x233bfc5300000000, ++ 0xb0a0352c00000000, 0x0f5201a800000000, 0x9cc9c8d700000000, ++ 0x2965925700000000, 0xbafe5b2800000000, 0x023a568c00000000, ++ 0x91a19ff300000000, 0x240dc57300000000, 0xb7960c0c00000000, ++ 0x1582afe000000000, 0x8619669f00000000, 0x33b53c1f00000000, ++ 0xa02ef56000000000, 0x18eaf8c400000000, 0x8b7131bb00000000, ++ 0x3edd6b3b00000000, 0xad46a24400000000, 0x3bf25c3900000000, ++ 0xa869954600000000, 0x1dc5cfc600000000, 0x8e5e06b900000000, ++ 0x369a0b1d00000000, 0xa501c26200000000, 0x10ad98e200000000, ++ 0x8336519d00000000, 0x2122f27100000000, 0xb2b93b0e00000000, ++ 0x0715618e00000000, 0x948ea8f100000000, 0x2c4aa55500000000, ++ 0xbfd16c2a00000000, 0x0a7d36aa00000000, 0x99e6ffd500000000, ++ 0x359e2e7900000000, 0xa605e70600000000, 0x13a9bd8600000000, ++ 0x803274f900000000, 0x38f6795d00000000, 0xab6db02200000000, ++ 0x1ec1eaa200000000, 0x8d5a23dd00000000, 0x2f4e803100000000, ++ 0xbcd5494e00000000, 0x097913ce00000000, 0x9ae2dab100000000, ++ 0x2226d71500000000, 0xb1bd1e6a00000000, 0x041144ea00000000, ++ 0x978a8d9500000000, 0x013e73e800000000, 0x92a5ba9700000000, ++ 0x2709e01700000000, 0xb492296800000000, 0x0c5624cc00000000, ++ 0x9fcdedb300000000, 0x2a61b73300000000, 0xb9fa7e4c00000000, ++ 0x1beedda000000000, 0x887514df00000000, 0x3dd94e5f00000000, ++ 0xae42872000000000, 0x16868a8400000000, 0x851d43fb00000000, ++ 0x30b1197b00000000, 0xa32ad00400000000, 0x1cd8e48000000000, ++ 0x8f432dff00000000, 0x3aef777f00000000, 0xa974be0000000000, ++ 0x11b0b3a400000000, 0x822b7adb00000000, 0x3787205b00000000, ++ 0xa41ce92400000000, 0x06084ac800000000, 0x959383b700000000, ++ 0x203fd93700000000, 0xb3a4104800000000, 0x0b601dec00000000, ++ 0x98fbd49300000000, 0x2d578e1300000000, 0xbecc476c00000000, ++ 0x2878b91100000000, 0xbbe3706e00000000, 0x0e4f2aee00000000, ++ 0x9dd4e39100000000, 0x2510ee3500000000, 0xb68b274a00000000, ++ 0x03277dca00000000, 0x90bcb4b500000000, 0x32a8175900000000, ++ 0xa133de2600000000, 0x149f84a600000000, 0x87044dd900000000, ++ 0x3fc0407d00000000, 0xac5b890200000000, 0x19f7d38200000000, ++ 0x8a6c1afd00000000}, ++ {0x0000000000000000, 0x650b796900000000, 0xca16f2d200000000, ++ 0xaf1d8bbb00000000, 0xd52b957e00000000, 0xb020ec1700000000, ++ 0x1f3d67ac00000000, 0x7a361ec500000000, 0xaa572afd00000000, ++ 0xcf5c539400000000, 0x6041d82f00000000, 0x054aa14600000000, ++ 0x7f7cbf8300000000, 0x1a77c6ea00000000, 0xb56a4d5100000000, ++ 0xd061343800000000, 0x15a9252100000000, 0x70a25c4800000000, ++ 0xdfbfd7f300000000, 0xbab4ae9a00000000, 0xc082b05f00000000, ++ 0xa589c93600000000, 0x0a94428d00000000, 0x6f9f3be400000000, ++ 0xbffe0fdc00000000, 0xdaf576b500000000, 0x75e8fd0e00000000, ++ 0x10e3846700000000, 0x6ad59aa200000000, 0x0fdee3cb00000000, ++ 0xa0c3687000000000, 0xc5c8111900000000, 0x2a524b4200000000, ++ 0x4f59322b00000000, 0xe044b99000000000, 0x854fc0f900000000, ++ 0xff79de3c00000000, 0x9a72a75500000000, 0x356f2cee00000000, ++ 0x5064558700000000, 0x800561bf00000000, 0xe50e18d600000000, ++ 0x4a13936d00000000, 0x2f18ea0400000000, 0x552ef4c100000000, ++ 0x30258da800000000, 0x9f38061300000000, 0xfa337f7a00000000, ++ 0x3ffb6e6300000000, 0x5af0170a00000000, 0xf5ed9cb100000000, ++ 0x90e6e5d800000000, 0xead0fb1d00000000, 0x8fdb827400000000, ++ 0x20c609cf00000000, 0x45cd70a600000000, 0x95ac449e00000000, ++ 0xf0a73df700000000, 0x5fbab64c00000000, 0x3ab1cf2500000000, ++ 0x4087d1e000000000, 0x258ca88900000000, 0x8a91233200000000, ++ 0xef9a5a5b00000000, 0x54a4968400000000, 0x31afefed00000000, ++ 0x9eb2645600000000, 0xfbb91d3f00000000, 0x818f03fa00000000, ++ 0xe4847a9300000000, 0x4b99f12800000000, 0x2e92884100000000, ++ 0xfef3bc7900000000, 0x9bf8c51000000000, 0x34e54eab00000000, ++ 0x51ee37c200000000, 0x2bd8290700000000, 0x4ed3506e00000000, ++ 0xe1cedbd500000000, 0x84c5a2bc00000000, 0x410db3a500000000, ++ 0x2406cacc00000000, 0x8b1b417700000000, 0xee10381e00000000, ++ 0x942626db00000000, 0xf12d5fb200000000, 0x5e30d40900000000, ++ 0x3b3bad6000000000, 0xeb5a995800000000, 0x8e51e03100000000, ++ 0x214c6b8a00000000, 0x444712e300000000, 0x3e710c2600000000, ++ 0x5b7a754f00000000, 0xf467fef400000000, 0x916c879d00000000, ++ 0x7ef6ddc600000000, 0x1bfda4af00000000, 0xb4e02f1400000000, ++ 0xd1eb567d00000000, 0xabdd48b800000000, 0xced631d100000000, ++ 0x61cbba6a00000000, 0x04c0c30300000000, 0xd4a1f73b00000000, ++ 0xb1aa8e5200000000, 0x1eb705e900000000, 0x7bbc7c8000000000, ++ 0x018a624500000000, 0x64811b2c00000000, 0xcb9c909700000000, ++ 0xae97e9fe00000000, 0x6b5ff8e700000000, 0x0e54818e00000000, ++ 0xa1490a3500000000, 0xc442735c00000000, 0xbe746d9900000000, ++ 0xdb7f14f000000000, 0x74629f4b00000000, 0x1169e62200000000, ++ 0xc108d21a00000000, 0xa403ab7300000000, 0x0b1e20c800000000, ++ 0x6e1559a100000000, 0x1423476400000000, 0x71283e0d00000000, ++ 0xde35b5b600000000, 0xbb3eccdf00000000, 0xe94e5cd200000000, ++ 0x8c4525bb00000000, 0x2358ae0000000000, 0x4653d76900000000, ++ 0x3c65c9ac00000000, 0x596eb0c500000000, 0xf6733b7e00000000, ++ 0x9378421700000000, 0x4319762f00000000, 0x26120f4600000000, ++ 0x890f84fd00000000, 0xec04fd9400000000, 0x9632e35100000000, ++ 0xf3399a3800000000, 0x5c24118300000000, 0x392f68ea00000000, ++ 0xfce779f300000000, 0x99ec009a00000000, 0x36f18b2100000000, ++ 0x53faf24800000000, 0x29ccec8d00000000, 0x4cc795e400000000, ++ 0xe3da1e5f00000000, 0x86d1673600000000, 0x56b0530e00000000, ++ 0x33bb2a6700000000, 0x9ca6a1dc00000000, 0xf9add8b500000000, ++ 0x839bc67000000000, 0xe690bf1900000000, 0x498d34a200000000, ++ 0x2c864dcb00000000, 0xc31c179000000000, 0xa6176ef900000000, ++ 0x090ae54200000000, 0x6c019c2b00000000, 0x163782ee00000000, ++ 0x733cfb8700000000, 0xdc21703c00000000, 0xb92a095500000000, ++ 0x694b3d6d00000000, 0x0c40440400000000, 0xa35dcfbf00000000, ++ 0xc656b6d600000000, 0xbc60a81300000000, 0xd96bd17a00000000, ++ 0x76765ac100000000, 0x137d23a800000000, 0xd6b532b100000000, ++ 0xb3be4bd800000000, 0x1ca3c06300000000, 0x79a8b90a00000000, ++ 0x039ea7cf00000000, 0x6695dea600000000, 0xc988551d00000000, ++ 0xac832c7400000000, 0x7ce2184c00000000, 0x19e9612500000000, ++ 0xb6f4ea9e00000000, 0xd3ff93f700000000, 0xa9c98d3200000000, ++ 0xccc2f45b00000000, 0x63df7fe000000000, 0x06d4068900000000, ++ 0xbdeaca5600000000, 0xd8e1b33f00000000, 0x77fc388400000000, ++ 0x12f741ed00000000, 0x68c15f2800000000, 0x0dca264100000000, ++ 0xa2d7adfa00000000, 0xc7dcd49300000000, 0x17bde0ab00000000, ++ 0x72b699c200000000, 0xddab127900000000, 0xb8a06b1000000000, ++ 0xc29675d500000000, 0xa79d0cbc00000000, 0x0880870700000000, ++ 0x6d8bfe6e00000000, 0xa843ef7700000000, 0xcd48961e00000000, ++ 0x62551da500000000, 0x075e64cc00000000, 0x7d687a0900000000, ++ 0x1863036000000000, 0xb77e88db00000000, 0xd275f1b200000000, ++ 0x0214c58a00000000, 0x671fbce300000000, 0xc802375800000000, ++ 0xad094e3100000000, 0xd73f50f400000000, 0xb234299d00000000, ++ 0x1d29a22600000000, 0x7822db4f00000000, 0x97b8811400000000, ++ 0xf2b3f87d00000000, 0x5dae73c600000000, 0x38a50aaf00000000, ++ 0x4293146a00000000, 0x27986d0300000000, 0x8885e6b800000000, ++ 0xed8e9fd100000000, 0x3defabe900000000, 0x58e4d28000000000, ++ 0xf7f9593b00000000, 0x92f2205200000000, 0xe8c43e9700000000, ++ 0x8dcf47fe00000000, 0x22d2cc4500000000, 0x47d9b52c00000000, ++ 0x8211a43500000000, 0xe71add5c00000000, 0x480756e700000000, ++ 0x2d0c2f8e00000000, 0x573a314b00000000, 0x3231482200000000, ++ 0x9d2cc39900000000, 0xf827baf000000000, 0x28468ec800000000, ++ 0x4d4df7a100000000, 0xe2507c1a00000000, 0x875b057300000000, ++ 0xfd6d1bb600000000, 0x986662df00000000, 0x377be96400000000, ++ 0x5270900d00000000}, ++ {0x0000000000000000, 0xdcecb13d00000000, 0xb8d9637b00000000, ++ 0x6435d24600000000, 0x70b3c7f600000000, 0xac5f76cb00000000, ++ 0xc86aa48d00000000, 0x148615b000000000, 0xa160fe3600000000, ++ 0x7d8c4f0b00000000, 0x19b99d4d00000000, 0xc5552c7000000000, ++ 0xd1d339c000000000, 0x0d3f88fd00000000, 0x690a5abb00000000, ++ 0xb5e6eb8600000000, 0x42c1fc6d00000000, 0x9e2d4d5000000000, ++ 0xfa189f1600000000, 0x26f42e2b00000000, 0x32723b9b00000000, ++ 0xee9e8aa600000000, 0x8aab58e000000000, 0x5647e9dd00000000, ++ 0xe3a1025b00000000, 0x3f4db36600000000, 0x5b78612000000000, ++ 0x8794d01d00000000, 0x9312c5ad00000000, 0x4ffe749000000000, ++ 0x2bcba6d600000000, 0xf72717eb00000000, 0x8482f9db00000000, ++ 0x586e48e600000000, 0x3c5b9aa000000000, 0xe0b72b9d00000000, ++ 0xf4313e2d00000000, 0x28dd8f1000000000, 0x4ce85d5600000000, ++ 0x9004ec6b00000000, 0x25e207ed00000000, 0xf90eb6d000000000, ++ 0x9d3b649600000000, 0x41d7d5ab00000000, 0x5551c01b00000000, ++ 0x89bd712600000000, 0xed88a36000000000, 0x3164125d00000000, ++ 0xc64305b600000000, 0x1aafb48b00000000, 0x7e9a66cd00000000, ++ 0xa276d7f000000000, 0xb6f0c24000000000, 0x6a1c737d00000000, ++ 0x0e29a13b00000000, 0xd2c5100600000000, 0x6723fb8000000000, ++ 0xbbcf4abd00000000, 0xdffa98fb00000000, 0x031629c600000000, ++ 0x17903c7600000000, 0xcb7c8d4b00000000, 0xaf495f0d00000000, ++ 0x73a5ee3000000000, 0x4903826c00000000, 0x95ef335100000000, ++ 0xf1dae11700000000, 0x2d36502a00000000, 0x39b0459a00000000, ++ 0xe55cf4a700000000, 0x816926e100000000, 0x5d8597dc00000000, ++ 0xe8637c5a00000000, 0x348fcd6700000000, 0x50ba1f2100000000, ++ 0x8c56ae1c00000000, 0x98d0bbac00000000, 0x443c0a9100000000, ++ 0x2009d8d700000000, 0xfce569ea00000000, 0x0bc27e0100000000, ++ 0xd72ecf3c00000000, 0xb31b1d7a00000000, 0x6ff7ac4700000000, ++ 0x7b71b9f700000000, 0xa79d08ca00000000, 0xc3a8da8c00000000, ++ 0x1f446bb100000000, 0xaaa2803700000000, 0x764e310a00000000, ++ 0x127be34c00000000, 0xce97527100000000, 0xda1147c100000000, ++ 0x06fdf6fc00000000, 0x62c824ba00000000, 0xbe24958700000000, ++ 0xcd817bb700000000, 0x116dca8a00000000, 0x755818cc00000000, ++ 0xa9b4a9f100000000, 0xbd32bc4100000000, 0x61de0d7c00000000, ++ 0x05ebdf3a00000000, 0xd9076e0700000000, 0x6ce1858100000000, ++ 0xb00d34bc00000000, 0xd438e6fa00000000, 0x08d457c700000000, ++ 0x1c52427700000000, 0xc0bef34a00000000, 0xa48b210c00000000, ++ 0x7867903100000000, 0x8f4087da00000000, 0x53ac36e700000000, ++ 0x3799e4a100000000, 0xeb75559c00000000, 0xfff3402c00000000, ++ 0x231ff11100000000, 0x472a235700000000, 0x9bc6926a00000000, ++ 0x2e2079ec00000000, 0xf2ccc8d100000000, 0x96f91a9700000000, ++ 0x4a15abaa00000000, 0x5e93be1a00000000, 0x827f0f2700000000, ++ 0xe64add6100000000, 0x3aa66c5c00000000, 0x920604d900000000, ++ 0x4eeab5e400000000, 0x2adf67a200000000, 0xf633d69f00000000, ++ 0xe2b5c32f00000000, 0x3e59721200000000, 0x5a6ca05400000000, ++ 0x8680116900000000, 0x3366faef00000000, 0xef8a4bd200000000, ++ 0x8bbf999400000000, 0x575328a900000000, 0x43d53d1900000000, ++ 0x9f398c2400000000, 0xfb0c5e6200000000, 0x27e0ef5f00000000, ++ 0xd0c7f8b400000000, 0x0c2b498900000000, 0x681e9bcf00000000, ++ 0xb4f22af200000000, 0xa0743f4200000000, 0x7c988e7f00000000, ++ 0x18ad5c3900000000, 0xc441ed0400000000, 0x71a7068200000000, ++ 0xad4bb7bf00000000, 0xc97e65f900000000, 0x1592d4c400000000, ++ 0x0114c17400000000, 0xddf8704900000000, 0xb9cda20f00000000, ++ 0x6521133200000000, 0x1684fd0200000000, 0xca684c3f00000000, ++ 0xae5d9e7900000000, 0x72b12f4400000000, 0x66373af400000000, ++ 0xbadb8bc900000000, 0xdeee598f00000000, 0x0202e8b200000000, ++ 0xb7e4033400000000, 0x6b08b20900000000, 0x0f3d604f00000000, ++ 0xd3d1d17200000000, 0xc757c4c200000000, 0x1bbb75ff00000000, ++ 0x7f8ea7b900000000, 0xa362168400000000, 0x5445016f00000000, ++ 0x88a9b05200000000, 0xec9c621400000000, 0x3070d32900000000, ++ 0x24f6c69900000000, 0xf81a77a400000000, 0x9c2fa5e200000000, ++ 0x40c314df00000000, 0xf525ff5900000000, 0x29c94e6400000000, ++ 0x4dfc9c2200000000, 0x91102d1f00000000, 0x859638af00000000, ++ 0x597a899200000000, 0x3d4f5bd400000000, 0xe1a3eae900000000, ++ 0xdb0586b500000000, 0x07e9378800000000, 0x63dce5ce00000000, ++ 0xbf3054f300000000, 0xabb6414300000000, 0x775af07e00000000, ++ 0x136f223800000000, 0xcf83930500000000, 0x7a65788300000000, ++ 0xa689c9be00000000, 0xc2bc1bf800000000, 0x1e50aac500000000, ++ 0x0ad6bf7500000000, 0xd63a0e4800000000, 0xb20fdc0e00000000, ++ 0x6ee36d3300000000, 0x99c47ad800000000, 0x4528cbe500000000, ++ 0x211d19a300000000, 0xfdf1a89e00000000, 0xe977bd2e00000000, ++ 0x359b0c1300000000, 0x51aede5500000000, 0x8d426f6800000000, ++ 0x38a484ee00000000, 0xe44835d300000000, 0x807de79500000000, ++ 0x5c9156a800000000, 0x4817431800000000, 0x94fbf22500000000, ++ 0xf0ce206300000000, 0x2c22915e00000000, 0x5f877f6e00000000, ++ 0x836bce5300000000, 0xe75e1c1500000000, 0x3bb2ad2800000000, ++ 0x2f34b89800000000, 0xf3d809a500000000, 0x97eddbe300000000, ++ 0x4b016ade00000000, 0xfee7815800000000, 0x220b306500000000, ++ 0x463ee22300000000, 0x9ad2531e00000000, 0x8e5446ae00000000, ++ 0x52b8f79300000000, 0x368d25d500000000, 0xea6194e800000000, ++ 0x1d46830300000000, 0xc1aa323e00000000, 0xa59fe07800000000, ++ 0x7973514500000000, 0x6df544f500000000, 0xb119f5c800000000, ++ 0xd52c278e00000000, 0x09c096b300000000, 0xbc267d3500000000, ++ 0x60cacc0800000000, 0x04ff1e4e00000000, 0xd813af7300000000, ++ 0xcc95bac300000000, 0x10790bfe00000000, 0x744cd9b800000000, ++ 0xa8a0688500000000}}; ++ ++#else /* W == 4 */ ++ ++static const uint32_t crc_braid_table[][256] = { ++ {0x00000000, 0x81256527, 0xd93bcc0f, 0x581ea928, 0x69069e5f, ++ 0xe823fb78, 0xb03d5250, 0x31183777, 0xd20d3cbe, 0x53285999, ++ 0x0b36f0b1, 0x8a139596, 0xbb0ba2e1, 0x3a2ec7c6, 0x62306eee, ++ 0xe3150bc9, 0x7f6b7f3d, 0xfe4e1a1a, 0xa650b332, 0x2775d615, ++ 0x166de162, 0x97488445, 0xcf562d6d, 0x4e73484a, 0xad664383, ++ 0x2c4326a4, 0x745d8f8c, 0xf578eaab, 0xc460dddc, 0x4545b8fb, ++ 0x1d5b11d3, 0x9c7e74f4, 0xfed6fe7a, 0x7ff39b5d, 0x27ed3275, ++ 0xa6c85752, 0x97d06025, 0x16f50502, 0x4eebac2a, 0xcfcec90d, ++ 0x2cdbc2c4, 0xadfea7e3, 0xf5e00ecb, 0x74c56bec, 0x45dd5c9b, ++ 0xc4f839bc, 0x9ce69094, 0x1dc3f5b3, 0x81bd8147, 0x0098e460, ++ 0x58864d48, 0xd9a3286f, 0xe8bb1f18, 0x699e7a3f, 0x3180d317, ++ 0xb0a5b630, 0x53b0bdf9, 0xd295d8de, 0x8a8b71f6, 0x0bae14d1, ++ 0x3ab623a6, 0xbb934681, 0xe38defa9, 0x62a88a8e, 0x26dcfab5, ++ 0xa7f99f92, 0xffe736ba, 0x7ec2539d, 0x4fda64ea, 0xceff01cd, ++ 0x96e1a8e5, 0x17c4cdc2, 0xf4d1c60b, 0x75f4a32c, 0x2dea0a04, ++ 0xaccf6f23, 0x9dd75854, 0x1cf23d73, 0x44ec945b, 0xc5c9f17c, ++ 0x59b78588, 0xd892e0af, 0x808c4987, 0x01a92ca0, 0x30b11bd7, ++ 0xb1947ef0, 0xe98ad7d8, 0x68afb2ff, 0x8bbab936, 0x0a9fdc11, ++ 0x52817539, 0xd3a4101e, 0xe2bc2769, 0x6399424e, 0x3b87eb66, ++ 0xbaa28e41, 0xd80a04cf, 0x592f61e8, 0x0131c8c0, 0x8014ade7, ++ 0xb10c9a90, 0x3029ffb7, 0x6837569f, 0xe91233b8, 0x0a073871, ++ 0x8b225d56, 0xd33cf47e, 0x52199159, 0x6301a62e, 0xe224c309, ++ 0xba3a6a21, 0x3b1f0f06, 0xa7617bf2, 0x26441ed5, 0x7e5ab7fd, ++ 0xff7fd2da, 0xce67e5ad, 0x4f42808a, 0x175c29a2, 0x96794c85, ++ 0x756c474c, 0xf449226b, 0xac578b43, 0x2d72ee64, 0x1c6ad913, ++ 0x9d4fbc34, 0xc551151c, 0x4474703b, 0x4db9f56a, 0xcc9c904d, ++ 0x94823965, 0x15a75c42, 0x24bf6b35, 0xa59a0e12, 0xfd84a73a, ++ 0x7ca1c21d, 0x9fb4c9d4, 0x1e91acf3, 0x468f05db, 0xc7aa60fc, ++ 0xf6b2578b, 0x779732ac, 0x2f899b84, 0xaeacfea3, 0x32d28a57, ++ 0xb3f7ef70, 0xebe94658, 0x6acc237f, 0x5bd41408, 0xdaf1712f, ++ 0x82efd807, 0x03cabd20, 0xe0dfb6e9, 0x61fad3ce, 0x39e47ae6, ++ 0xb8c11fc1, 0x89d928b6, 0x08fc4d91, 0x50e2e4b9, 0xd1c7819e, ++ 0xb36f0b10, 0x324a6e37, 0x6a54c71f, 0xeb71a238, 0xda69954f, ++ 0x5b4cf068, 0x03525940, 0x82773c67, 0x616237ae, 0xe0475289, ++ 0xb859fba1, 0x397c9e86, 0x0864a9f1, 0x8941ccd6, 0xd15f65fe, ++ 0x507a00d9, 0xcc04742d, 0x4d21110a, 0x153fb822, 0x941add05, ++ 0xa502ea72, 0x24278f55, 0x7c39267d, 0xfd1c435a, 0x1e094893, ++ 0x9f2c2db4, 0xc732849c, 0x4617e1bb, 0x770fd6cc, 0xf62ab3eb, ++ 0xae341ac3, 0x2f117fe4, 0x6b650fdf, 0xea406af8, 0xb25ec3d0, ++ 0x337ba6f7, 0x02639180, 0x8346f4a7, 0xdb585d8f, 0x5a7d38a8, ++ 0xb9683361, 0x384d5646, 0x6053ff6e, 0xe1769a49, 0xd06ead3e, ++ 0x514bc819, 0x09556131, 0x88700416, 0x140e70e2, 0x952b15c5, ++ 0xcd35bced, 0x4c10d9ca, 0x7d08eebd, 0xfc2d8b9a, 0xa43322b2, ++ 0x25164795, 0xc6034c5c, 0x4726297b, 0x1f388053, 0x9e1de574, ++ 0xaf05d203, 0x2e20b724, 0x763e1e0c, 0xf71b7b2b, 0x95b3f1a5, ++ 0x14969482, 0x4c883daa, 0xcdad588d, 0xfcb56ffa, 0x7d900add, ++ 0x258ea3f5, 0xa4abc6d2, 0x47becd1b, 0xc69ba83c, 0x9e850114, ++ 0x1fa06433, 0x2eb85344, 0xaf9d3663, 0xf7839f4b, 0x76a6fa6c, ++ 0xead88e98, 0x6bfdebbf, 0x33e34297, 0xb2c627b0, 0x83de10c7, ++ 0x02fb75e0, 0x5ae5dcc8, 0xdbc0b9ef, 0x38d5b226, 0xb9f0d701, ++ 0xe1ee7e29, 0x60cb1b0e, 0x51d32c79, 0xd0f6495e, 0x88e8e076, ++ 0x09cd8551}, ++ {0x00000000, 0x9b73ead4, 0xed96d3e9, 0x76e5393d, 0x005ca193, ++ 0x9b2f4b47, 0xedca727a, 0x76b998ae, 0x00b94326, 0x9bcaa9f2, ++ 0xed2f90cf, 0x765c7a1b, 0x00e5e2b5, 0x9b960861, 0xed73315c, ++ 0x7600db88, 0x0172864c, 0x9a016c98, 0xece455a5, 0x7797bf71, ++ 0x012e27df, 0x9a5dcd0b, 0xecb8f436, 0x77cb1ee2, 0x01cbc56a, ++ 0x9ab82fbe, 0xec5d1683, 0x772efc57, 0x019764f9, 0x9ae48e2d, ++ 0xec01b710, 0x77725dc4, 0x02e50c98, 0x9996e64c, 0xef73df71, ++ 0x740035a5, 0x02b9ad0b, 0x99ca47df, 0xef2f7ee2, 0x745c9436, ++ 0x025c4fbe, 0x992fa56a, 0xefca9c57, 0x74b97683, 0x0200ee2d, ++ 0x997304f9, 0xef963dc4, 0x74e5d710, 0x03978ad4, 0x98e46000, ++ 0xee01593d, 0x7572b3e9, 0x03cb2b47, 0x98b8c193, 0xee5df8ae, ++ 0x752e127a, 0x032ec9f2, 0x985d2326, 0xeeb81a1b, 0x75cbf0cf, ++ 0x03726861, 0x980182b5, 0xeee4bb88, 0x7597515c, 0x05ca1930, ++ 0x9eb9f3e4, 0xe85ccad9, 0x732f200d, 0x0596b8a3, 0x9ee55277, ++ 0xe8006b4a, 0x7373819e, 0x05735a16, 0x9e00b0c2, 0xe8e589ff, ++ 0x7396632b, 0x052ffb85, 0x9e5c1151, 0xe8b9286c, 0x73cac2b8, ++ 0x04b89f7c, 0x9fcb75a8, 0xe92e4c95, 0x725da641, 0x04e43eef, ++ 0x9f97d43b, 0xe972ed06, 0x720107d2, 0x0401dc5a, 0x9f72368e, ++ 0xe9970fb3, 0x72e4e567, 0x045d7dc9, 0x9f2e971d, 0xe9cbae20, ++ 0x72b844f4, 0x072f15a8, 0x9c5cff7c, 0xeab9c641, 0x71ca2c95, ++ 0x0773b43b, 0x9c005eef, 0xeae567d2, 0x71968d06, 0x0796568e, ++ 0x9ce5bc5a, 0xea008567, 0x71736fb3, 0x07caf71d, 0x9cb91dc9, ++ 0xea5c24f4, 0x712fce20, 0x065d93e4, 0x9d2e7930, 0xebcb400d, ++ 0x70b8aad9, 0x06013277, 0x9d72d8a3, 0xeb97e19e, 0x70e40b4a, ++ 0x06e4d0c2, 0x9d973a16, 0xeb72032b, 0x7001e9ff, 0x06b87151, ++ 0x9dcb9b85, 0xeb2ea2b8, 0x705d486c, 0x0b943260, 0x90e7d8b4, ++ 0xe602e189, 0x7d710b5d, 0x0bc893f3, 0x90bb7927, 0xe65e401a, ++ 0x7d2daace, 0x0b2d7146, 0x905e9b92, 0xe6bba2af, 0x7dc8487b, ++ 0x0b71d0d5, 0x90023a01, 0xe6e7033c, 0x7d94e9e8, 0x0ae6b42c, ++ 0x91955ef8, 0xe77067c5, 0x7c038d11, 0x0aba15bf, 0x91c9ff6b, ++ 0xe72cc656, 0x7c5f2c82, 0x0a5ff70a, 0x912c1dde, 0xe7c924e3, ++ 0x7cbace37, 0x0a035699, 0x9170bc4d, 0xe7958570, 0x7ce66fa4, ++ 0x09713ef8, 0x9202d42c, 0xe4e7ed11, 0x7f9407c5, 0x092d9f6b, ++ 0x925e75bf, 0xe4bb4c82, 0x7fc8a656, 0x09c87dde, 0x92bb970a, ++ 0xe45eae37, 0x7f2d44e3, 0x0994dc4d, 0x92e73699, 0xe4020fa4, ++ 0x7f71e570, 0x0803b8b4, 0x93705260, 0xe5956b5d, 0x7ee68189, ++ 0x085f1927, 0x932cf3f3, 0xe5c9cace, 0x7eba201a, 0x08bafb92, ++ 0x93c91146, 0xe52c287b, 0x7e5fc2af, 0x08e65a01, 0x9395b0d5, ++ 0xe57089e8, 0x7e03633c, 0x0e5e2b50, 0x952dc184, 0xe3c8f8b9, ++ 0x78bb126d, 0x0e028ac3, 0x95716017, 0xe394592a, 0x78e7b3fe, ++ 0x0ee76876, 0x959482a2, 0xe371bb9f, 0x7802514b, 0x0ebbc9e5, ++ 0x95c82331, 0xe32d1a0c, 0x785ef0d8, 0x0f2cad1c, 0x945f47c8, ++ 0xe2ba7ef5, 0x79c99421, 0x0f700c8f, 0x9403e65b, 0xe2e6df66, ++ 0x799535b2, 0x0f95ee3a, 0x94e604ee, 0xe2033dd3, 0x7970d707, ++ 0x0fc94fa9, 0x94baa57d, 0xe25f9c40, 0x792c7694, 0x0cbb27c8, ++ 0x97c8cd1c, 0xe12df421, 0x7a5e1ef5, 0x0ce7865b, 0x97946c8f, ++ 0xe17155b2, 0x7a02bf66, 0x0c0264ee, 0x97718e3a, 0xe194b707, ++ 0x7ae75dd3, 0x0c5ec57d, 0x972d2fa9, 0xe1c81694, 0x7abbfc40, ++ 0x0dc9a184, 0x96ba4b50, 0xe05f726d, 0x7b2c98b9, 0x0d950017, ++ 0x96e6eac3, 0xe003d3fe, 0x7b70392a, 0x0d70e2a2, 0x96030876, ++ 0xe0e6314b, 0x7b95db9f, 0x0d2c4331, 0x965fa9e5, 0xe0ba90d8, ++ 0x7bc97a0c}, ++ {0x00000000, 0x172864c0, 0x2e50c980, 0x3978ad40, 0x5ca19300, ++ 0x4b89f7c0, 0x72f15a80, 0x65d93e40, 0xb9432600, 0xae6b42c0, ++ 0x9713ef80, 0x803b8b40, 0xe5e2b500, 0xf2cad1c0, 0xcbb27c80, ++ 0xdc9a1840, 0xa9f74a41, 0xbedf2e81, 0x87a783c1, 0x908fe701, ++ 0xf556d941, 0xe27ebd81, 0xdb0610c1, 0xcc2e7401, 0x10b46c41, ++ 0x079c0881, 0x3ee4a5c1, 0x29ccc101, 0x4c15ff41, 0x5b3d9b81, ++ 0x624536c1, 0x756d5201, 0x889f92c3, 0x9fb7f603, 0xa6cf5b43, ++ 0xb1e73f83, 0xd43e01c3, 0xc3166503, 0xfa6ec843, 0xed46ac83, ++ 0x31dcb4c3, 0x26f4d003, 0x1f8c7d43, 0x08a41983, 0x6d7d27c3, ++ 0x7a554303, 0x432dee43, 0x54058a83, 0x2168d882, 0x3640bc42, ++ 0x0f381102, 0x181075c2, 0x7dc94b82, 0x6ae12f42, 0x53998202, ++ 0x44b1e6c2, 0x982bfe82, 0x8f039a42, 0xb67b3702, 0xa15353c2, ++ 0xc48a6d82, 0xd3a20942, 0xeadaa402, 0xfdf2c0c2, 0xca4e23c7, ++ 0xdd664707, 0xe41eea47, 0xf3368e87, 0x96efb0c7, 0x81c7d407, ++ 0xb8bf7947, 0xaf971d87, 0x730d05c7, 0x64256107, 0x5d5dcc47, ++ 0x4a75a887, 0x2fac96c7, 0x3884f207, 0x01fc5f47, 0x16d43b87, ++ 0x63b96986, 0x74910d46, 0x4de9a006, 0x5ac1c4c6, 0x3f18fa86, ++ 0x28309e46, 0x11483306, 0x066057c6, 0xdafa4f86, 0xcdd22b46, ++ 0xf4aa8606, 0xe382e2c6, 0x865bdc86, 0x9173b846, 0xa80b1506, ++ 0xbf2371c6, 0x42d1b104, 0x55f9d5c4, 0x6c817884, 0x7ba91c44, ++ 0x1e702204, 0x095846c4, 0x3020eb84, 0x27088f44, 0xfb929704, ++ 0xecbaf3c4, 0xd5c25e84, 0xc2ea3a44, 0xa7330404, 0xb01b60c4, ++ 0x8963cd84, 0x9e4ba944, 0xeb26fb45, 0xfc0e9f85, 0xc57632c5, ++ 0xd25e5605, 0xb7876845, 0xa0af0c85, 0x99d7a1c5, 0x8effc505, ++ 0x5265dd45, 0x454db985, 0x7c3514c5, 0x6b1d7005, 0x0ec44e45, ++ 0x19ec2a85, 0x209487c5, 0x37bce305, 0x4fed41cf, 0x58c5250f, ++ 0x61bd884f, 0x7695ec8f, 0x134cd2cf, 0x0464b60f, 0x3d1c1b4f, ++ 0x2a347f8f, 0xf6ae67cf, 0xe186030f, 0xd8feae4f, 0xcfd6ca8f, ++ 0xaa0ff4cf, 0xbd27900f, 0x845f3d4f, 0x9377598f, 0xe61a0b8e, ++ 0xf1326f4e, 0xc84ac20e, 0xdf62a6ce, 0xbabb988e, 0xad93fc4e, ++ 0x94eb510e, 0x83c335ce, 0x5f592d8e, 0x4871494e, 0x7109e40e, ++ 0x662180ce, 0x03f8be8e, 0x14d0da4e, 0x2da8770e, 0x3a8013ce, ++ 0xc772d30c, 0xd05ab7cc, 0xe9221a8c, 0xfe0a7e4c, 0x9bd3400c, ++ 0x8cfb24cc, 0xb583898c, 0xa2abed4c, 0x7e31f50c, 0x691991cc, ++ 0x50613c8c, 0x4749584c, 0x2290660c, 0x35b802cc, 0x0cc0af8c, ++ 0x1be8cb4c, 0x6e85994d, 0x79adfd8d, 0x40d550cd, 0x57fd340d, ++ 0x32240a4d, 0x250c6e8d, 0x1c74c3cd, 0x0b5ca70d, 0xd7c6bf4d, ++ 0xc0eedb8d, 0xf99676cd, 0xeebe120d, 0x8b672c4d, 0x9c4f488d, ++ 0xa537e5cd, 0xb21f810d, 0x85a36208, 0x928b06c8, 0xabf3ab88, ++ 0xbcdbcf48, 0xd902f108, 0xce2a95c8, 0xf7523888, 0xe07a5c48, ++ 0x3ce04408, 0x2bc820c8, 0x12b08d88, 0x0598e948, 0x6041d708, ++ 0x7769b3c8, 0x4e111e88, 0x59397a48, 0x2c542849, 0x3b7c4c89, ++ 0x0204e1c9, 0x152c8509, 0x70f5bb49, 0x67dddf89, 0x5ea572c9, ++ 0x498d1609, 0x95170e49, 0x823f6a89, 0xbb47c7c9, 0xac6fa309, ++ 0xc9b69d49, 0xde9ef989, 0xe7e654c9, 0xf0ce3009, 0x0d3cf0cb, ++ 0x1a14940b, 0x236c394b, 0x34445d8b, 0x519d63cb, 0x46b5070b, ++ 0x7fcdaa4b, 0x68e5ce8b, 0xb47fd6cb, 0xa357b20b, 0x9a2f1f4b, ++ 0x8d077b8b, 0xe8de45cb, 0xfff6210b, 0xc68e8c4b, 0xd1a6e88b, ++ 0xa4cbba8a, 0xb3e3de4a, 0x8a9b730a, 0x9db317ca, 0xf86a298a, ++ 0xef424d4a, 0xd63ae00a, 0xc11284ca, 0x1d889c8a, 0x0aa0f84a, ++ 0x33d8550a, 0x24f031ca, 0x41290f8a, 0x56016b4a, 0x6f79c60a, ++ 0x7851a2ca}, ++ {0x00000000, 0x9fda839e, 0xe4c4017d, 0x7b1e82e3, 0x12f904bb, ++ 0x8d238725, 0xf63d05c6, 0x69e78658, 0x25f20976, 0xba288ae8, ++ 0xc136080b, 0x5eec8b95, 0x370b0dcd, 0xa8d18e53, 0xd3cf0cb0, ++ 0x4c158f2e, 0x4be412ec, 0xd43e9172, 0xaf201391, 0x30fa900f, ++ 0x591d1657, 0xc6c795c9, 0xbdd9172a, 0x220394b4, 0x6e161b9a, ++ 0xf1cc9804, 0x8ad21ae7, 0x15089979, 0x7cef1f21, 0xe3359cbf, ++ 0x982b1e5c, 0x07f19dc2, 0x97c825d8, 0x0812a646, 0x730c24a5, ++ 0xecd6a73b, 0x85312163, 0x1aeba2fd, 0x61f5201e, 0xfe2fa380, ++ 0xb23a2cae, 0x2de0af30, 0x56fe2dd3, 0xc924ae4d, 0xa0c32815, ++ 0x3f19ab8b, 0x44072968, 0xdbddaaf6, 0xdc2c3734, 0x43f6b4aa, ++ 0x38e83649, 0xa732b5d7, 0xced5338f, 0x510fb011, 0x2a1132f2, ++ 0xb5cbb16c, 0xf9de3e42, 0x6604bddc, 0x1d1a3f3f, 0x82c0bca1, ++ 0xeb273af9, 0x74fdb967, 0x0fe33b84, 0x9039b81a, 0xf4e14df1, ++ 0x6b3bce6f, 0x10254c8c, 0x8fffcf12, 0xe618494a, 0x79c2cad4, ++ 0x02dc4837, 0x9d06cba9, 0xd1134487, 0x4ec9c719, 0x35d745fa, ++ 0xaa0dc664, 0xc3ea403c, 0x5c30c3a2, 0x272e4141, 0xb8f4c2df, ++ 0xbf055f1d, 0x20dfdc83, 0x5bc15e60, 0xc41bddfe, 0xadfc5ba6, ++ 0x3226d838, 0x49385adb, 0xd6e2d945, 0x9af7566b, 0x052dd5f5, ++ 0x7e335716, 0xe1e9d488, 0x880e52d0, 0x17d4d14e, 0x6cca53ad, ++ 0xf310d033, 0x63296829, 0xfcf3ebb7, 0x87ed6954, 0x1837eaca, ++ 0x71d06c92, 0xee0aef0c, 0x95146def, 0x0aceee71, 0x46db615f, ++ 0xd901e2c1, 0xa21f6022, 0x3dc5e3bc, 0x542265e4, 0xcbf8e67a, ++ 0xb0e66499, 0x2f3ce707, 0x28cd7ac5, 0xb717f95b, 0xcc097bb8, ++ 0x53d3f826, 0x3a347e7e, 0xa5eefde0, 0xdef07f03, 0x412afc9d, ++ 0x0d3f73b3, 0x92e5f02d, 0xe9fb72ce, 0x7621f150, 0x1fc67708, ++ 0x801cf496, 0xfb027675, 0x64d8f5eb, 0x32b39da3, 0xad691e3d, ++ 0xd6779cde, 0x49ad1f40, 0x204a9918, 0xbf901a86, 0xc48e9865, ++ 0x5b541bfb, 0x174194d5, 0x889b174b, 0xf38595a8, 0x6c5f1636, ++ 0x05b8906e, 0x9a6213f0, 0xe17c9113, 0x7ea6128d, 0x79578f4f, ++ 0xe68d0cd1, 0x9d938e32, 0x02490dac, 0x6bae8bf4, 0xf474086a, ++ 0x8f6a8a89, 0x10b00917, 0x5ca58639, 0xc37f05a7, 0xb8618744, ++ 0x27bb04da, 0x4e5c8282, 0xd186011c, 0xaa9883ff, 0x35420061, ++ 0xa57bb87b, 0x3aa13be5, 0x41bfb906, 0xde653a98, 0xb782bcc0, ++ 0x28583f5e, 0x5346bdbd, 0xcc9c3e23, 0x8089b10d, 0x1f533293, ++ 0x644db070, 0xfb9733ee, 0x9270b5b6, 0x0daa3628, 0x76b4b4cb, ++ 0xe96e3755, 0xee9faa97, 0x71452909, 0x0a5babea, 0x95812874, ++ 0xfc66ae2c, 0x63bc2db2, 0x18a2af51, 0x87782ccf, 0xcb6da3e1, ++ 0x54b7207f, 0x2fa9a29c, 0xb0732102, 0xd994a75a, 0x464e24c4, ++ 0x3d50a627, 0xa28a25b9, 0xc652d052, 0x598853cc, 0x2296d12f, ++ 0xbd4c52b1, 0xd4abd4e9, 0x4b715777, 0x306fd594, 0xafb5560a, ++ 0xe3a0d924, 0x7c7a5aba, 0x0764d859, 0x98be5bc7, 0xf159dd9f, ++ 0x6e835e01, 0x159ddce2, 0x8a475f7c, 0x8db6c2be, 0x126c4120, ++ 0x6972c3c3, 0xf6a8405d, 0x9f4fc605, 0x0095459b, 0x7b8bc778, ++ 0xe45144e6, 0xa844cbc8, 0x379e4856, 0x4c80cab5, 0xd35a492b, ++ 0xbabdcf73, 0x25674ced, 0x5e79ce0e, 0xc1a34d90, 0x519af58a, ++ 0xce407614, 0xb55ef4f7, 0x2a847769, 0x4363f131, 0xdcb972af, ++ 0xa7a7f04c, 0x387d73d2, 0x7468fcfc, 0xebb27f62, 0x90acfd81, ++ 0x0f767e1f, 0x6691f847, 0xf94b7bd9, 0x8255f93a, 0x1d8f7aa4, ++ 0x1a7ee766, 0x85a464f8, 0xfebae61b, 0x61606585, 0x0887e3dd, ++ 0x975d6043, 0xec43e2a0, 0x7399613e, 0x3f8cee10, 0xa0566d8e, ++ 0xdb48ef6d, 0x44926cf3, 0x2d75eaab, 0xb2af6935, 0xc9b1ebd6, ++ 0x566b6848}}; ++ ++static const z_word_t crc_braid_big_table[][256] = { ++ {0x00000000, 0x9e83da9f, 0x7d01c4e4, 0xe3821e7b, 0xbb04f912, ++ 0x2587238d, 0xc6053df6, 0x5886e769, 0x7609f225, 0xe88a28ba, ++ 0x0b0836c1, 0x958bec5e, 0xcd0d0b37, 0x538ed1a8, 0xb00ccfd3, ++ 0x2e8f154c, 0xec12e44b, 0x72913ed4, 0x911320af, 0x0f90fa30, ++ 0x57161d59, 0xc995c7c6, 0x2a17d9bd, 0xb4940322, 0x9a1b166e, ++ 0x0498ccf1, 0xe71ad28a, 0x79990815, 0x211fef7c, 0xbf9c35e3, ++ 0x5c1e2b98, 0xc29df107, 0xd825c897, 0x46a61208, 0xa5240c73, ++ 0x3ba7d6ec, 0x63213185, 0xfda2eb1a, 0x1e20f561, 0x80a32ffe, ++ 0xae2c3ab2, 0x30afe02d, 0xd32dfe56, 0x4dae24c9, 0x1528c3a0, ++ 0x8bab193f, 0x68290744, 0xf6aadddb, 0x34372cdc, 0xaab4f643, ++ 0x4936e838, 0xd7b532a7, 0x8f33d5ce, 0x11b00f51, 0xf232112a, ++ 0x6cb1cbb5, 0x423edef9, 0xdcbd0466, 0x3f3f1a1d, 0xa1bcc082, ++ 0xf93a27eb, 0x67b9fd74, 0x843be30f, 0x1ab83990, 0xf14de1f4, ++ 0x6fce3b6b, 0x8c4c2510, 0x12cfff8f, 0x4a4918e6, 0xd4cac279, ++ 0x3748dc02, 0xa9cb069d, 0x874413d1, 0x19c7c94e, 0xfa45d735, ++ 0x64c60daa, 0x3c40eac3, 0xa2c3305c, 0x41412e27, 0xdfc2f4b8, ++ 0x1d5f05bf, 0x83dcdf20, 0x605ec15b, 0xfedd1bc4, 0xa65bfcad, ++ 0x38d82632, 0xdb5a3849, 0x45d9e2d6, 0x6b56f79a, 0xf5d52d05, ++ 0x1657337e, 0x88d4e9e1, 0xd0520e88, 0x4ed1d417, 0xad53ca6c, ++ 0x33d010f3, 0x29682963, 0xb7ebf3fc, 0x5469ed87, 0xcaea3718, ++ 0x926cd071, 0x0cef0aee, 0xef6d1495, 0x71eece0a, 0x5f61db46, ++ 0xc1e201d9, 0x22601fa2, 0xbce3c53d, 0xe4652254, 0x7ae6f8cb, ++ 0x9964e6b0, 0x07e73c2f, 0xc57acd28, 0x5bf917b7, 0xb87b09cc, ++ 0x26f8d353, 0x7e7e343a, 0xe0fdeea5, 0x037ff0de, 0x9dfc2a41, ++ 0xb3733f0d, 0x2df0e592, 0xce72fbe9, 0x50f12176, 0x0877c61f, ++ 0x96f41c80, 0x757602fb, 0xebf5d864, 0xa39db332, 0x3d1e69ad, ++ 0xde9c77d6, 0x401fad49, 0x18994a20, 0x861a90bf, 0x65988ec4, ++ 0xfb1b545b, 0xd5944117, 0x4b179b88, 0xa89585f3, 0x36165f6c, ++ 0x6e90b805, 0xf013629a, 0x13917ce1, 0x8d12a67e, 0x4f8f5779, ++ 0xd10c8de6, 0x328e939d, 0xac0d4902, 0xf48bae6b, 0x6a0874f4, ++ 0x898a6a8f, 0x1709b010, 0x3986a55c, 0xa7057fc3, 0x448761b8, ++ 0xda04bb27, 0x82825c4e, 0x1c0186d1, 0xff8398aa, 0x61004235, ++ 0x7bb87ba5, 0xe53ba13a, 0x06b9bf41, 0x983a65de, 0xc0bc82b7, ++ 0x5e3f5828, 0xbdbd4653, 0x233e9ccc, 0x0db18980, 0x9332531f, ++ 0x70b04d64, 0xee3397fb, 0xb6b57092, 0x2836aa0d, 0xcbb4b476, ++ 0x55376ee9, 0x97aa9fee, 0x09294571, 0xeaab5b0a, 0x74288195, ++ 0x2cae66fc, 0xb22dbc63, 0x51afa218, 0xcf2c7887, 0xe1a36dcb, ++ 0x7f20b754, 0x9ca2a92f, 0x022173b0, 0x5aa794d9, 0xc4244e46, ++ 0x27a6503d, 0xb9258aa2, 0x52d052c6, 0xcc538859, 0x2fd19622, ++ 0xb1524cbd, 0xe9d4abd4, 0x7757714b, 0x94d56f30, 0x0a56b5af, ++ 0x24d9a0e3, 0xba5a7a7c, 0x59d86407, 0xc75bbe98, 0x9fdd59f1, ++ 0x015e836e, 0xe2dc9d15, 0x7c5f478a, 0xbec2b68d, 0x20416c12, ++ 0xc3c37269, 0x5d40a8f6, 0x05c64f9f, 0x9b459500, 0x78c78b7b, ++ 0xe64451e4, 0xc8cb44a8, 0x56489e37, 0xb5ca804c, 0x2b495ad3, ++ 0x73cfbdba, 0xed4c6725, 0x0ece795e, 0x904da3c1, 0x8af59a51, ++ 0x147640ce, 0xf7f45eb5, 0x6977842a, 0x31f16343, 0xaf72b9dc, ++ 0x4cf0a7a7, 0xd2737d38, 0xfcfc6874, 0x627fb2eb, 0x81fdac90, ++ 0x1f7e760f, 0x47f89166, 0xd97b4bf9, 0x3af95582, 0xa47a8f1d, ++ 0x66e77e1a, 0xf864a485, 0x1be6bafe, 0x85656061, 0xdde38708, ++ 0x43605d97, 0xa0e243ec, 0x3e619973, 0x10ee8c3f, 0x8e6d56a0, ++ 0x6def48db, 0xf36c9244, 0xabea752d, 0x3569afb2, 0xd6ebb1c9, ++ 0x48686b56}, ++ {0x00000000, 0xc0642817, 0x80c9502e, 0x40ad7839, 0x0093a15c, ++ 0xc0f7894b, 0x805af172, 0x403ed965, 0x002643b9, 0xc0426bae, ++ 0x80ef1397, 0x408b3b80, 0x00b5e2e5, 0xc0d1caf2, 0x807cb2cb, ++ 0x40189adc, 0x414af7a9, 0x812edfbe, 0xc183a787, 0x01e78f90, ++ 0x41d956f5, 0x81bd7ee2, 0xc11006db, 0x01742ecc, 0x416cb410, ++ 0x81089c07, 0xc1a5e43e, 0x01c1cc29, 0x41ff154c, 0x819b3d5b, ++ 0xc1364562, 0x01526d75, 0xc3929f88, 0x03f6b79f, 0x435bcfa6, ++ 0x833fe7b1, 0xc3013ed4, 0x036516c3, 0x43c86efa, 0x83ac46ed, ++ 0xc3b4dc31, 0x03d0f426, 0x437d8c1f, 0x8319a408, 0xc3277d6d, ++ 0x0343557a, 0x43ee2d43, 0x838a0554, 0x82d86821, 0x42bc4036, ++ 0x0211380f, 0xc2751018, 0x824bc97d, 0x422fe16a, 0x02829953, ++ 0xc2e6b144, 0x82fe2b98, 0x429a038f, 0x02377bb6, 0xc25353a1, ++ 0x826d8ac4, 0x4209a2d3, 0x02a4daea, 0xc2c0f2fd, 0xc7234eca, ++ 0x074766dd, 0x47ea1ee4, 0x878e36f3, 0xc7b0ef96, 0x07d4c781, ++ 0x4779bfb8, 0x871d97af, 0xc7050d73, 0x07612564, 0x47cc5d5d, ++ 0x87a8754a, 0xc796ac2f, 0x07f28438, 0x475ffc01, 0x873bd416, ++ 0x8669b963, 0x460d9174, 0x06a0e94d, 0xc6c4c15a, 0x86fa183f, ++ 0x469e3028, 0x06334811, 0xc6576006, 0x864ffada, 0x462bd2cd, ++ 0x0686aaf4, 0xc6e282e3, 0x86dc5b86, 0x46b87391, 0x06150ba8, ++ 0xc67123bf, 0x04b1d142, 0xc4d5f955, 0x8478816c, 0x441ca97b, ++ 0x0422701e, 0xc4465809, 0x84eb2030, 0x448f0827, 0x049792fb, ++ 0xc4f3baec, 0x845ec2d5, 0x443aeac2, 0x040433a7, 0xc4601bb0, ++ 0x84cd6389, 0x44a94b9e, 0x45fb26eb, 0x859f0efc, 0xc53276c5, ++ 0x05565ed2, 0x456887b7, 0x850cafa0, 0xc5a1d799, 0x05c5ff8e, ++ 0x45dd6552, 0x85b94d45, 0xc514357c, 0x05701d6b, 0x454ec40e, ++ 0x852aec19, 0xc5879420, 0x05e3bc37, 0xcf41ed4f, 0x0f25c558, ++ 0x4f88bd61, 0x8fec9576, 0xcfd24c13, 0x0fb66404, 0x4f1b1c3d, ++ 0x8f7f342a, 0xcf67aef6, 0x0f0386e1, 0x4faefed8, 0x8fcad6cf, ++ 0xcff40faa, 0x0f9027bd, 0x4f3d5f84, 0x8f597793, 0x8e0b1ae6, ++ 0x4e6f32f1, 0x0ec24ac8, 0xcea662df, 0x8e98bbba, 0x4efc93ad, ++ 0x0e51eb94, 0xce35c383, 0x8e2d595f, 0x4e497148, 0x0ee40971, ++ 0xce802166, 0x8ebef803, 0x4edad014, 0x0e77a82d, 0xce13803a, ++ 0x0cd372c7, 0xccb75ad0, 0x8c1a22e9, 0x4c7e0afe, 0x0c40d39b, ++ 0xcc24fb8c, 0x8c8983b5, 0x4cedaba2, 0x0cf5317e, 0xcc911969, ++ 0x8c3c6150, 0x4c584947, 0x0c669022, 0xcc02b835, 0x8cafc00c, ++ 0x4ccbe81b, 0x4d99856e, 0x8dfdad79, 0xcd50d540, 0x0d34fd57, ++ 0x4d0a2432, 0x8d6e0c25, 0xcdc3741c, 0x0da75c0b, 0x4dbfc6d7, ++ 0x8ddbeec0, 0xcd7696f9, 0x0d12beee, 0x4d2c678b, 0x8d484f9c, ++ 0xcde537a5, 0x0d811fb2, 0x0862a385, 0xc8068b92, 0x88abf3ab, ++ 0x48cfdbbc, 0x08f102d9, 0xc8952ace, 0x883852f7, 0x485c7ae0, ++ 0x0844e03c, 0xc820c82b, 0x888db012, 0x48e99805, 0x08d74160, ++ 0xc8b36977, 0x881e114e, 0x487a3959, 0x4928542c, 0x894c7c3b, ++ 0xc9e10402, 0x09852c15, 0x49bbf570, 0x89dfdd67, 0xc972a55e, ++ 0x09168d49, 0x490e1795, 0x896a3f82, 0xc9c747bb, 0x09a36fac, ++ 0x499db6c9, 0x89f99ede, 0xc954e6e7, 0x0930cef0, 0xcbf03c0d, ++ 0x0b94141a, 0x4b396c23, 0x8b5d4434, 0xcb639d51, 0x0b07b546, ++ 0x4baacd7f, 0x8bcee568, 0xcbd67fb4, 0x0bb257a3, 0x4b1f2f9a, ++ 0x8b7b078d, 0xcb45dee8, 0x0b21f6ff, 0x4b8c8ec6, 0x8be8a6d1, ++ 0x8abacba4, 0x4adee3b3, 0x0a739b8a, 0xca17b39d, 0x8a296af8, ++ 0x4a4d42ef, 0x0ae03ad6, 0xca8412c1, 0x8a9c881d, 0x4af8a00a, ++ 0x0a55d833, 0xca31f024, 0x8a0f2941, 0x4a6b0156, 0x0ac6796f, ++ 0xcaa25178}, ++ {0x00000000, 0xd4ea739b, 0xe9d396ed, 0x3d39e576, 0x93a15c00, ++ 0x474b2f9b, 0x7a72caed, 0xae98b976, 0x2643b900, 0xf2a9ca9b, ++ 0xcf902fed, 0x1b7a5c76, 0xb5e2e500, 0x6108969b, 0x5c3173ed, ++ 0x88db0076, 0x4c867201, 0x986c019a, 0xa555e4ec, 0x71bf9777, ++ 0xdf272e01, 0x0bcd5d9a, 0x36f4b8ec, 0xe21ecb77, 0x6ac5cb01, ++ 0xbe2fb89a, 0x83165dec, 0x57fc2e77, 0xf9649701, 0x2d8ee49a, ++ 0x10b701ec, 0xc45d7277, 0x980ce502, 0x4ce69699, 0x71df73ef, ++ 0xa5350074, 0x0badb902, 0xdf47ca99, 0xe27e2fef, 0x36945c74, ++ 0xbe4f5c02, 0x6aa52f99, 0x579ccaef, 0x8376b974, 0x2dee0002, ++ 0xf9047399, 0xc43d96ef, 0x10d7e574, 0xd48a9703, 0x0060e498, ++ 0x3d5901ee, 0xe9b37275, 0x472bcb03, 0x93c1b898, 0xaef85dee, ++ 0x7a122e75, 0xf2c92e03, 0x26235d98, 0x1b1ab8ee, 0xcff0cb75, ++ 0x61687203, 0xb5820198, 0x88bbe4ee, 0x5c519775, 0x3019ca05, ++ 0xe4f3b99e, 0xd9ca5ce8, 0x0d202f73, 0xa3b89605, 0x7752e59e, ++ 0x4a6b00e8, 0x9e817373, 0x165a7305, 0xc2b0009e, 0xff89e5e8, ++ 0x2b639673, 0x85fb2f05, 0x51115c9e, 0x6c28b9e8, 0xb8c2ca73, ++ 0x7c9fb804, 0xa875cb9f, 0x954c2ee9, 0x41a65d72, 0xef3ee404, ++ 0x3bd4979f, 0x06ed72e9, 0xd2070172, 0x5adc0104, 0x8e36729f, ++ 0xb30f97e9, 0x67e5e472, 0xc97d5d04, 0x1d972e9f, 0x20aecbe9, ++ 0xf444b872, 0xa8152f07, 0x7cff5c9c, 0x41c6b9ea, 0x952cca71, ++ 0x3bb47307, 0xef5e009c, 0xd267e5ea, 0x068d9671, 0x8e569607, ++ 0x5abce59c, 0x678500ea, 0xb36f7371, 0x1df7ca07, 0xc91db99c, ++ 0xf4245cea, 0x20ce2f71, 0xe4935d06, 0x30792e9d, 0x0d40cbeb, ++ 0xd9aab870, 0x77320106, 0xa3d8729d, 0x9ee197eb, 0x4a0be470, ++ 0xc2d0e406, 0x163a979d, 0x2b0372eb, 0xffe90170, 0x5171b806, ++ 0x859bcb9d, 0xb8a22eeb, 0x6c485d70, 0x6032940b, 0xb4d8e790, ++ 0x89e102e6, 0x5d0b717d, 0xf393c80b, 0x2779bb90, 0x1a405ee6, ++ 0xceaa2d7d, 0x46712d0b, 0x929b5e90, 0xafa2bbe6, 0x7b48c87d, ++ 0xd5d0710b, 0x013a0290, 0x3c03e7e6, 0xe8e9947d, 0x2cb4e60a, ++ 0xf85e9591, 0xc56770e7, 0x118d037c, 0xbf15ba0a, 0x6bffc991, ++ 0x56c62ce7, 0x822c5f7c, 0x0af75f0a, 0xde1d2c91, 0xe324c9e7, ++ 0x37ceba7c, 0x9956030a, 0x4dbc7091, 0x708595e7, 0xa46fe67c, ++ 0xf83e7109, 0x2cd40292, 0x11ede7e4, 0xc507947f, 0x6b9f2d09, ++ 0xbf755e92, 0x824cbbe4, 0x56a6c87f, 0xde7dc809, 0x0a97bb92, ++ 0x37ae5ee4, 0xe3442d7f, 0x4ddc9409, 0x9936e792, 0xa40f02e4, ++ 0x70e5717f, 0xb4b80308, 0x60527093, 0x5d6b95e5, 0x8981e67e, ++ 0x27195f08, 0xf3f32c93, 0xcecac9e5, 0x1a20ba7e, 0x92fbba08, ++ 0x4611c993, 0x7b282ce5, 0xafc25f7e, 0x015ae608, 0xd5b09593, ++ 0xe88970e5, 0x3c63037e, 0x502b5e0e, 0x84c12d95, 0xb9f8c8e3, ++ 0x6d12bb78, 0xc38a020e, 0x17607195, 0x2a5994e3, 0xfeb3e778, ++ 0x7668e70e, 0xa2829495, 0x9fbb71e3, 0x4b510278, 0xe5c9bb0e, ++ 0x3123c895, 0x0c1a2de3, 0xd8f05e78, 0x1cad2c0f, 0xc8475f94, ++ 0xf57ebae2, 0x2194c979, 0x8f0c700f, 0x5be60394, 0x66dfe6e2, ++ 0xb2359579, 0x3aee950f, 0xee04e694, 0xd33d03e2, 0x07d77079, ++ 0xa94fc90f, 0x7da5ba94, 0x409c5fe2, 0x94762c79, 0xc827bb0c, ++ 0x1ccdc897, 0x21f42de1, 0xf51e5e7a, 0x5b86e70c, 0x8f6c9497, ++ 0xb25571e1, 0x66bf027a, 0xee64020c, 0x3a8e7197, 0x07b794e1, ++ 0xd35de77a, 0x7dc55e0c, 0xa92f2d97, 0x9416c8e1, 0x40fcbb7a, ++ 0x84a1c90d, 0x504bba96, 0x6d725fe0, 0xb9982c7b, 0x1700950d, ++ 0xc3eae696, 0xfed303e0, 0x2a39707b, 0xa2e2700d, 0x76080396, ++ 0x4b31e6e0, 0x9fdb957b, 0x31432c0d, 0xe5a95f96, 0xd890bae0, ++ 0x0c7ac97b}, ++ {0x00000000, 0x27652581, 0x0fcc3bd9, 0x28a91e58, 0x5f9e0669, ++ 0x78fb23e8, 0x50523db0, 0x77371831, 0xbe3c0dd2, 0x99592853, ++ 0xb1f0360b, 0x9695138a, 0xe1a20bbb, 0xc6c72e3a, 0xee6e3062, ++ 0xc90b15e3, 0x3d7f6b7f, 0x1a1a4efe, 0x32b350a6, 0x15d67527, ++ 0x62e16d16, 0x45844897, 0x6d2d56cf, 0x4a48734e, 0x834366ad, ++ 0xa426432c, 0x8c8f5d74, 0xabea78f5, 0xdcdd60c4, 0xfbb84545, ++ 0xd3115b1d, 0xf4747e9c, 0x7afed6fe, 0x5d9bf37f, 0x7532ed27, ++ 0x5257c8a6, 0x2560d097, 0x0205f516, 0x2aaceb4e, 0x0dc9cecf, ++ 0xc4c2db2c, 0xe3a7fead, 0xcb0ee0f5, 0xec6bc574, 0x9b5cdd45, ++ 0xbc39f8c4, 0x9490e69c, 0xb3f5c31d, 0x4781bd81, 0x60e49800, ++ 0x484d8658, 0x6f28a3d9, 0x181fbbe8, 0x3f7a9e69, 0x17d38031, ++ 0x30b6a5b0, 0xf9bdb053, 0xded895d2, 0xf6718b8a, 0xd114ae0b, ++ 0xa623b63a, 0x814693bb, 0xa9ef8de3, 0x8e8aa862, 0xb5fadc26, ++ 0x929ff9a7, 0xba36e7ff, 0x9d53c27e, 0xea64da4f, 0xcd01ffce, ++ 0xe5a8e196, 0xc2cdc417, 0x0bc6d1f4, 0x2ca3f475, 0x040aea2d, ++ 0x236fcfac, 0x5458d79d, 0x733df21c, 0x5b94ec44, 0x7cf1c9c5, ++ 0x8885b759, 0xafe092d8, 0x87498c80, 0xa02ca901, 0xd71bb130, ++ 0xf07e94b1, 0xd8d78ae9, 0xffb2af68, 0x36b9ba8b, 0x11dc9f0a, ++ 0x39758152, 0x1e10a4d3, 0x6927bce2, 0x4e429963, 0x66eb873b, ++ 0x418ea2ba, 0xcf040ad8, 0xe8612f59, 0xc0c83101, 0xe7ad1480, ++ 0x909a0cb1, 0xb7ff2930, 0x9f563768, 0xb83312e9, 0x7138070a, ++ 0x565d228b, 0x7ef43cd3, 0x59911952, 0x2ea60163, 0x09c324e2, ++ 0x216a3aba, 0x060f1f3b, 0xf27b61a7, 0xd51e4426, 0xfdb75a7e, ++ 0xdad27fff, 0xade567ce, 0x8a80424f, 0xa2295c17, 0x854c7996, ++ 0x4c476c75, 0x6b2249f4, 0x438b57ac, 0x64ee722d, 0x13d96a1c, ++ 0x34bc4f9d, 0x1c1551c5, 0x3b707444, 0x6af5b94d, 0x4d909ccc, ++ 0x65398294, 0x425ca715, 0x356bbf24, 0x120e9aa5, 0x3aa784fd, ++ 0x1dc2a17c, 0xd4c9b49f, 0xf3ac911e, 0xdb058f46, 0xfc60aac7, ++ 0x8b57b2f6, 0xac329777, 0x849b892f, 0xa3feacae, 0x578ad232, ++ 0x70eff7b3, 0x5846e9eb, 0x7f23cc6a, 0x0814d45b, 0x2f71f1da, ++ 0x07d8ef82, 0x20bdca03, 0xe9b6dfe0, 0xced3fa61, 0xe67ae439, ++ 0xc11fc1b8, 0xb628d989, 0x914dfc08, 0xb9e4e250, 0x9e81c7d1, ++ 0x100b6fb3, 0x376e4a32, 0x1fc7546a, 0x38a271eb, 0x4f9569da, ++ 0x68f04c5b, 0x40595203, 0x673c7782, 0xae376261, 0x895247e0, ++ 0xa1fb59b8, 0x869e7c39, 0xf1a96408, 0xd6cc4189, 0xfe655fd1, ++ 0xd9007a50, 0x2d7404cc, 0x0a11214d, 0x22b83f15, 0x05dd1a94, ++ 0x72ea02a5, 0x558f2724, 0x7d26397c, 0x5a431cfd, 0x9348091e, ++ 0xb42d2c9f, 0x9c8432c7, 0xbbe11746, 0xccd60f77, 0xebb32af6, ++ 0xc31a34ae, 0xe47f112f, 0xdf0f656b, 0xf86a40ea, 0xd0c35eb2, ++ 0xf7a67b33, 0x80916302, 0xa7f44683, 0x8f5d58db, 0xa8387d5a, ++ 0x613368b9, 0x46564d38, 0x6eff5360, 0x499a76e1, 0x3ead6ed0, ++ 0x19c84b51, 0x31615509, 0x16047088, 0xe2700e14, 0xc5152b95, ++ 0xedbc35cd, 0xcad9104c, 0xbdee087d, 0x9a8b2dfc, 0xb22233a4, ++ 0x95471625, 0x5c4c03c6, 0x7b292647, 0x5380381f, 0x74e51d9e, ++ 0x03d205af, 0x24b7202e, 0x0c1e3e76, 0x2b7b1bf7, 0xa5f1b395, ++ 0x82949614, 0xaa3d884c, 0x8d58adcd, 0xfa6fb5fc, 0xdd0a907d, ++ 0xf5a38e25, 0xd2c6aba4, 0x1bcdbe47, 0x3ca89bc6, 0x1401859e, ++ 0x3364a01f, 0x4453b82e, 0x63369daf, 0x4b9f83f7, 0x6cfaa676, ++ 0x988ed8ea, 0xbfebfd6b, 0x9742e333, 0xb027c6b2, 0xc710de83, ++ 0xe075fb02, 0xc8dce55a, 0xefb9c0db, 0x26b2d538, 0x01d7f0b9, ++ 0x297eeee1, 0x0e1bcb60, 0x792cd351, 0x5e49f6d0, 0x76e0e888, ++ 0x5185cd09}}; ++ ++#endif /* W */ ++ ++#endif /* N == 6 */ ++ ++static const uint32_t x2n_table[] = { ++ 0x40000000, 0x20000000, 0x08000000, 0x00800000, 0x00008000, ++ 0xedb88320, 0xb1e6b092, 0xa06a2517, 0xed627dae, 0x88d14467, ++ 0xd7bbfe6a, 0xec447f11, 0x8e7ea170, 0x6427800e, 0x4d47bae0, ++ 0x09fe548f, 0x83852d0f, 0x30362f1a, 0x7b5a9cc3, 0x31fec169, ++ 0x9fec022a, 0x6c8dedc4, 0x15d6874d, 0x5fde7a4e, 0xbad90e37, ++ 0x2e4e5eef, 0x4eaba214, 0xa8a472c0, 0x429a969e, 0x148d302a, ++ 0xc40ba6d0, 0xc4e22c3c}; ++ ++#endif /* CRC32_BRAID_TBL_H_ */ +diff --git a/crc32_fold.c b/crc32_fold.c +new file mode 100644 +index 0000000..5b3c7c4 +--- /dev/null ++++ b/crc32_fold.c +@@ -0,0 +1,33 @@ ++/* crc32_fold.c -- crc32 folding interface ++ * Copyright (C) 2021 Nathan Moinvaziri ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++#include "zbuild.h" ++#include "functable.h" ++ ++#include "crc32_fold.h" ++ ++#include ++ ++Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc) { ++ crc->value = CRC32_INITIAL_VALUE; ++ return crc->value; ++} ++ ++Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) { ++ crc->value = functable.crc32(crc->value, src, len); ++ memcpy(dst, src, len); ++} ++ ++Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc) { ++ /* Note: while this is basically the same thing as the vanilla CRC function, we still need ++ * a functable entry for it so that we can generically dispatch to this function with the ++ * same arguments for the versions that _do_ do a folding CRC but we don't want a copy. The ++ * init_crc is an unused argument in this context */ ++ Z_UNUSED(init_crc); ++ crc->value = functable.crc32(crc->value, src, len); ++} ++ ++Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc) { ++ return crc->value; ++} +diff --git a/crc32_fold.h b/crc32_fold.h +new file mode 100644 +index 0000000..0d2ff66 +--- /dev/null ++++ b/crc32_fold.h +@@ -0,0 +1,21 @@ ++/* crc32_fold.h -- crc32 folding interface ++ * Copyright (C) 2021 Nathan Moinvaziri ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++#ifndef CRC32_FOLD_H_ ++#define CRC32_FOLD_H_ ++ ++#define CRC32_FOLD_BUFFER_SIZE (16 * 4) ++/* sizeof(__m128i) * (4 folds) */ ++ ++typedef struct crc32_fold_s { ++ uint8_t fold[CRC32_FOLD_BUFFER_SIZE]; ++ uint32_t value; ++} crc32_fold; ++ ++Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc); ++Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); ++Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); ++Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc); ++ ++#endif +diff --git a/deflate.c b/deflate.c +index 031a1bb..2a0a20e 100644 +--- a/deflate.c ++++ b/deflate.c +@@ -1,5 +1,5 @@ + /* deflate.c -- compress data using the deflation algorithm +- * Copyright (C) 1995-2016 Jean-loup Gailly and Mark Adler ++ * Copyright (C) 1995-2023 Jean-loup Gailly and Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +@@ -52,7 +52,13 @@ + #include "deflate_p.h" + #include "functable.h" + +-const char PREFIX(deflate_copyright)[] = " deflate 1.2.11.f Copyright 1995-2016 Jean-loup Gailly and Mark Adler "; ++/* Avoid conflicts with zlib.h macros */ ++#ifdef ZLIB_COMPAT ++# undef deflateInit ++# undef deflateInit2 ++#endif ++ ++const char PREFIX(deflate_copyright)[] = " deflate 1.3.0 Copyright 1995-2023 Jean-loup Gailly and Mark Adler "; + /* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot +@@ -67,9 +73,9 @@ const char PREFIX(deflate_copyright)[] = " deflate 1.2.11.f Copyright 1995-2016 + # include "arch/s390/dfltcc_deflate.h" + #else + /* Memory management for the deflate state. Useful for allocating arch-specific extension blocks. */ +-# define ZALLOC_STATE(strm, items, size) ZALLOC(strm, items, size) ++# define ZALLOC_DEFLATE_STATE(strm) ((deflate_state *)ZALLOC(strm, 1, sizeof(deflate_state))) + # define ZFREE_STATE(strm, addr) ZFREE(strm, addr) +-# define ZCOPY_STATE(dst, src, size) memcpy(dst, src, size) ++# define ZCOPY_DEFLATE_STATE(dst, src) memcpy(dst, src, sizeof(deflate_state)) + /* Memory management for the window. Useful for allocation the aligned window. */ + # define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size) + # define TRY_FREE_WINDOW(strm, addr) TRY_FREE(strm, addr) +@@ -82,7 +88,7 @@ const char PREFIX(deflate_copyright)[] = " deflate 1.2.11.f Copyright 1995-2016 + /* Invoked at the beginning of deflateParams(). Useful for updating arch-specific compression parameters. */ + # define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) do {} while (0) + /* Returns whether the last deflate(flush) operation did everything it's supposed to do. */ +-# define DEFLATE_DONE(strm, flush) 1 ++# define DEFLATE_DONE(strm, flush) 1 + /* Adjusts the upper bound on compressed data length based on compression parameters and uncompressed data length. + * Useful when arch-specific deflation code behaves differently than regular zlib-ng algorithms. */ + # define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, sourceLen) do {} while (0) +@@ -100,27 +106,23 @@ const char PREFIX(deflate_copyright)[] = " deflate 1.2.11.f Copyright 1995-2016 + /* =========================================================================== + * Function prototypes. + */ +-typedef block_state (*compress_func) (deflate_state *s, int flush); +-/* Compression function. Returns the block state after the call. */ +- + static int deflateStateCheck (PREFIX3(stream) *strm); +-static block_state deflate_stored (deflate_state *s, int flush); +-Z_INTERNAL block_state deflate_fast (deflate_state *s, int flush); +-Z_INTERNAL block_state deflate_quick (deflate_state *s, int flush); ++Z_INTERNAL block_state deflate_stored(deflate_state *s, int flush); ++Z_INTERNAL block_state deflate_fast (deflate_state *s, int flush); ++Z_INTERNAL block_state deflate_quick (deflate_state *s, int flush); + #ifndef NO_MEDIUM_STRATEGY +-Z_INTERNAL block_state deflate_medium (deflate_state *s, int flush); ++Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush); + #endif +-Z_INTERNAL block_state deflate_slow (deflate_state *s, int flush); +-static block_state deflate_rle (deflate_state *s, int flush); +-static block_state deflate_huff (deflate_state *s, int flush); ++Z_INTERNAL block_state deflate_slow (deflate_state *s, int flush); ++Z_INTERNAL block_state deflate_rle (deflate_state *s, int flush); ++Z_INTERNAL block_state deflate_huff (deflate_state *s, int flush); ++static void lm_set_level (deflate_state *s, int level); + static void lm_init (deflate_state *s); + Z_INTERNAL unsigned read_buf (PREFIX3(stream) *strm, unsigned char *buf, unsigned size); + +-extern void crc_reset(deflate_state *const s); +-#ifdef X86_PCLMULQDQ_CRC +-extern void crc_finalize(deflate_state *const s); +-#endif +-extern void copy_with_crc(PREFIX3(stream) *strm, unsigned char *dst, unsigned long size); ++extern uint32_t update_hash_roll (deflate_state *const s, uint32_t h, uint32_t val); ++extern void insert_string_roll (deflate_state *const s, uint32_t str, uint32_t count); ++extern Pos quick_insert_string_roll(deflate_state *const s, uint32_t str); + + /* =========================================================================== + * Local data +@@ -143,22 +145,22 @@ static const config configuration_table[10] = { + /* good lazy nice chain */ + /* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ + +-#ifndef NO_QUICK_STRATEGY +-/* 1 */ {4, 4, 8, 4, deflate_quick}, +-/* 2 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ +-#else ++#ifdef NO_QUICK_STRATEGY + /* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ + /* 2 */ {4, 5, 16, 8, deflate_fast}, ++#else ++/* 1 */ {0, 0, 0, 0, deflate_quick}, ++/* 2 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ + #endif + +-/* 3 */ {4, 6, 32, 32, deflate_fast}, +- + #ifdef NO_MEDIUM_STRATEGY ++/* 3 */ {4, 6, 32, 32, deflate_fast}, + /* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ + /* 5 */ {8, 16, 32, 32, deflate_slow}, + /* 6 */ {8, 16, 128, 128, deflate_slow}, + #else +-/* 4 */ {4, 4, 16, 16, deflate_medium}, /* lazy matches */ ++/* 3 */ {4, 6, 16, 6, deflate_medium}, ++/* 4 */ {4, 12, 32, 24, deflate_medium}, /* lazy matches */ + /* 5 */ {8, 16, 32, 32, deflate_medium}, + /* 6 */ {8, 16, 128, 128, deflate_medium}, + #endif +@@ -167,7 +169,7 @@ static const config configuration_table[10] = { + /* 8 */ {32, 128, 258, 1024, deflate_slow}, + /* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */ + +-/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 ++/* Note: the deflate() code requires max_lazy >= STD_MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ +@@ -179,132 +181,56 @@ static const config configuration_table[10] = { + /* =========================================================================== + * Initialize the hash table. prev[] will be initialized on the fly. + */ +-#define CLEAR_HASH(s) do { \ ++#define CLEAR_HASH(s) do { \ + memset((unsigned char *)s->head, 0, HASH_SIZE * sizeof(*s->head)); \ + } while (0) + +-/* =========================================================================== +- * Slide the hash table when sliding the window down (could be avoided with 32 +- * bit values at the expense of memory usage). We slide even when level == 0 to +- * keep the hash table consistent if we switch back to level > 0 later. +- */ +-Z_INTERNAL void slide_hash_c(deflate_state *s) { +- Pos *p; +- unsigned n; +- unsigned int wsize = s->w_size; +- +- n = HASH_SIZE; +- p = &s->head[n]; +-#ifdef NOT_TWEAK_COMPILER +- do { +- unsigned m; +- m = *--p; +- *p = (Pos)(m >= wsize ? m-wsize : 0); +- } while (--n); +-#else +- /* As of I make this change, gcc (4.8.*) isn't able to vectorize +- * this hot loop using saturated-subtraction on x86-64 architecture. +- * To avoid this defect, we can change the loop such that +- * o. the pointer advance forward, and +- * o. demote the variable 'm' to be local to the loop, and +- * choose type "Pos" (instead of 'unsigned int') for the +- * variable to avoid unnecessary zero-extension. +- */ +- { +- unsigned int i; +- Pos *q = p - n; +- for (i = 0; i < n; i++) { +- Pos m = *q; +- Pos t = (Pos)wsize; +- *q++ = (Pos)(m >= t ? m-t: 0); +- } +- } +-#endif /* NOT_TWEAK_COMPILER */ +- +- n = wsize; +- p = &s->prev[n]; +-#ifdef NOT_TWEAK_COMPILER +- do { +- unsigned m; +- m = *--p; +- *p = (Pos)(m >= wsize ? m-wsize : 0); +- /* If n is not on any hash chain, prev[n] is garbage but +- * its value will never be used. +- */ +- } while (--n); +-#else +- { +- unsigned int i; +- Pos *q = p - n; +- for (i = 0; i < n; i++) { +- Pos m = *q; +- Pos t = (Pos)wsize; +- *q++ = (Pos)(m >= t ? m-t: 0); +- } +- } +-#endif /* NOT_TWEAK_COMPILER */ +-} +- + /* ========================================================================= */ +-int32_t Z_EXPORT PREFIX(deflateInit_)(PREFIX3(stream) *strm, int32_t level, const char *version, int32_t stream_size) { +- return PREFIX(deflateInit2_)(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, version, stream_size); ++/* This function is hidden in ZLIB_COMPAT builds. */ ++int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level, int32_t method, int32_t windowBits, ++ int32_t memLevel, int32_t strategy) { + /* Todo: ignore strm->next_in if we use it as window */ +-} +- +-/* ========================================================================= */ +-int32_t Z_EXPORT PREFIX(deflateInit2_)(PREFIX3(stream) *strm, int32_t level, int32_t method, int32_t windowBits, +- int32_t memLevel, int32_t strategy, const char *version, int32_t stream_size) { + uint32_t window_padding = 0; + deflate_state *s; + int wrap = 1; +- static const char my_version[] = PREFIX2(VERSION); + +-#if defined(X86_FEATURES) +- x86_check_features(); +-#elif defined(ARM_FEATURES) +- arm_check_features(); +-#endif ++ /* Force initialization functable, because deflate captures function pointers from functable. */ ++ functable.force_init(); + +- if (version == NULL || version[0] != my_version[0] || stream_size != sizeof(PREFIX3(stream))) { +- return Z_VERSION_ERROR; +- } + if (strm == NULL) + return Z_STREAM_ERROR; + + strm->msg = NULL; + if (strm->zalloc == NULL) { +- strm->zalloc = zng_calloc; ++ strm->zalloc = PREFIX(zcalloc); + strm->opaque = NULL; + } + if (strm->zfree == NULL) +- strm->zfree = zng_cfree; ++ strm->zfree = PREFIX(zcfree); + + if (level == Z_DEFAULT_COMPRESSION) + level = 6; + + if (windowBits < 0) { /* suppress zlib wrapper */ + wrap = 0; ++ if (windowBits < -MAX_WBITS) ++ return Z_STREAM_ERROR; + windowBits = -windowBits; + #ifdef GZIP +- } else if (windowBits > 15) { ++ } else if (windowBits > MAX_WBITS) { + wrap = 2; /* write gzip wrapper instead */ + windowBits -= 16; + #endif + } +- if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || windowBits < 8 || +- windowBits > 15 || level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED || ++ if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || windowBits < MIN_WBITS || ++ windowBits > MAX_WBITS || level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED || + (windowBits == 8 && wrap != 1)) { + return Z_STREAM_ERROR; + } + if (windowBits == 8) + windowBits = 9; /* until 256-byte window bug fixed */ + +-#if !defined(NO_QUICK_STRATEGY) && !defined(S390_DFLTCC_DEFLATE) +- if (level == 1) +- windowBits = 13; +-#endif +- +- s = (deflate_state *) ZALLOC_STATE(strm, 1, sizeof(deflate_state)); ++ s = ZALLOC_DEFLATE_STATE(strm); + if (s == NULL) + return Z_MEM_ERROR; + strm->state = (struct internal_state *)s; +@@ -393,24 +319,36 @@ int32_t Z_EXPORT PREFIX(deflateInit2_)(PREFIX3(stream) *strm, int32_t level, int + return PREFIX(deflateReset)(strm); + } + ++#ifndef ZLIB_COMPAT ++int32_t Z_EXPORT PREFIX(deflateInit)(PREFIX3(stream) *strm, int32_t level) { ++ return PREFIX(deflateInit2)(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); ++} ++#endif ++ ++/* Function used by zlib.h and zlib-ng version 2.0 macros */ ++int32_t Z_EXPORT PREFIX(deflateInit_)(PREFIX3(stream) *strm, int32_t level, const char *version, int32_t stream_size) { ++ if (CHECK_VER_STSIZE(version, stream_size)) ++ return Z_VERSION_ERROR; ++ return PREFIX(deflateInit2)(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); ++} ++ ++/* Function used by zlib.h and zlib-ng version 2.0 macros */ ++int32_t Z_EXPORT PREFIX(deflateInit2_)(PREFIX3(stream) *strm, int32_t level, int32_t method, int32_t windowBits, ++ int32_t memLevel, int32_t strategy, const char *version, int32_t stream_size) { ++ if (CHECK_VER_STSIZE(version, stream_size)) ++ return Z_VERSION_ERROR; ++ return PREFIX(deflateInit2)(strm, level, method, windowBits, memLevel, strategy); ++} ++ + /* ========================================================================= + * Check for a valid deflate stream state. Return 0 if ok, 1 if not. + */ +-static int deflateStateCheck (PREFIX3(stream) *strm) { ++static int deflateStateCheck(PREFIX3(stream) *strm) { + deflate_state *s; + if (strm == NULL || strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) + return 1; + s = strm->state; +- if (s == NULL || s->strm != strm || (s->status != INIT_STATE && +-#ifdef GZIP +- s->status != GZIP_STATE && +-#endif +- s->status != EXTRA_STATE && +- s->status != NAME_STATE && +- s->status != COMMENT_STATE && +- s->status != HCRC_STATE && +- s->status != BUSY_STATE && +- s->status != FINISH_STATE)) ++ if (s == NULL || s->strm != strm || (s->status < INIT_STATE || s->status > MAX_STATE)) + return 1; + return 0; + } +@@ -453,20 +391,20 @@ int32_t Z_EXPORT PREFIX(deflateSetDictionary)(PREFIX3(stream) *strm, const uint8 + next = strm->next_in; + strm->avail_in = dictLength; + strm->next_in = (z_const unsigned char *)dictionary; +- fill_window(s); +- while (s->lookahead >= MIN_MATCH) { ++ PREFIX(fill_window)(s); ++ while (s->lookahead >= STD_MIN_MATCH) { + str = s->strstart; +- n = s->lookahead - (MIN_MATCH-1); +- functable.insert_string(s, str, n); ++ n = s->lookahead - (STD_MIN_MATCH - 1); ++ s->insert_string(s, str, n); + s->strstart = str + n; +- s->lookahead = MIN_MATCH-1; +- fill_window(s); ++ s->lookahead = STD_MIN_MATCH - 1; ++ PREFIX(fill_window)(s); + } + s->strstart += s->lookahead; + s->block_start = (int)s->strstart; + s->insert = s->lookahead; + s->lookahead = 0; +- s->prev_length = MIN_MATCH-1; ++ s->prev_length = 0; + s->match_available = 0; + strm->next_in = (z_const unsigned char *)next; + strm->avail_in = avail; +@@ -518,9 +456,9 @@ int32_t Z_EXPORT PREFIX(deflateResetKeep)(PREFIX3(stream) *strm) { + INIT_STATE; + + #ifdef GZIP +- if (s->wrap == 2) +- crc_reset(s); +- else ++ if (s->wrap == 2) { ++ strm->adler = functable.crc32_fold_reset(&s->crc_fold); ++ } else + #endif + strm->adler = ADLER32_INITIAL_VALUE; + s->last_flush = -2; +@@ -534,9 +472,7 @@ int32_t Z_EXPORT PREFIX(deflateResetKeep)(PREFIX3(stream) *strm) { + + /* ========================================================================= */ + int32_t Z_EXPORT PREFIX(deflateReset)(PREFIX3(stream) *strm) { +- int ret; +- +- ret = PREFIX(deflateResetKeep)(strm); ++ int ret = PREFIX(deflateResetKeep)(strm); + if (ret == Z_OK) + lm_init(strm->state); + return ret; +@@ -575,8 +511,8 @@ int32_t Z_EXPORT PREFIX(deflatePrime)(PREFIX3(stream) *strm, int32_t bits, int32 + return Z_BUF_ERROR; + do { + put = BIT_BUF_SIZE - s->bi_valid; +- if (put > bits) +- put = bits; ++ put = MIN(put, bits); ++ + if (s->bi_valid == 0) + s->bi_buf = value64; + else +@@ -625,11 +561,8 @@ int32_t Z_EXPORT PREFIX(deflateParams)(PREFIX3(stream) *strm, int32_t level, int + } + s->matches = 0; + } +- s->level = level; +- s->max_lazy_match = configuration_table[level].max_lazy; +- s->good_match = configuration_table[level].good_length; +- s->nice_match = configuration_table[level].nice_length; +- s->max_chain_length = configuration_table[level].max_chain; ++ ++ lm_set_level(s, level); + } + s->strategy = strategy; + return Z_OK; +@@ -718,11 +651,20 @@ unsigned long Z_EXPORT PREFIX(deflateBound)(PREFIX3(stream) *strm, unsigned long + + /* if not default parameters, return conservative bound */ + if (DEFLATE_NEED_CONSERVATIVE_BOUND(strm) || /* hook for IBM Z DFLTCC */ +- s->w_bits != 15 || HASH_BITS < 15) ++ s->w_bits != MAX_WBITS || HASH_BITS < 15) { ++ if (s->level == 0) { ++ /* upper bound for stored blocks with length 127 (memLevel == 1) -- ++ ~4% overhead plus a small constant */ ++ complen = sourceLen + (sourceLen >> 5) + (sourceLen >> 7) + (sourceLen >> 11) + 7; ++ } ++ + return complen + wraplen; ++ } + + #ifndef NO_QUICK_STRATEGY + return sourceLen /* The source size itself */ ++ + (sourceLen == 0 ? 1 : 0) /* Always at least one byte for any input */ ++ + (sourceLen < 9 ? 1 : 0) /* One extra byte for lengths less than 9 */ + + DEFLATE_QUICK_OVERHEAD(sourceLen) /* Source encoding overhead, padded to next full byte */ + + DEFLATE_BLOCK_OVERHEAD /* Deflate block overhead bytes */ + + wraplen; /* none, zlib or gzip wrapper */ +@@ -737,14 +679,12 @@ unsigned long Z_EXPORT PREFIX(deflateBound)(PREFIX3(stream) *strm, unsigned long + * applications may wish to modify it to avoid allocating a large + * strm->next_out buffer and copying into it. (See also read_buf()). + */ +-Z_INTERNAL void flush_pending(PREFIX3(stream) *strm) { ++Z_INTERNAL void PREFIX(flush_pending)(PREFIX3(stream) *strm) { + uint32_t len; + deflate_state *s = strm->state; + + zng_tr_flush_bits(s); +- len = s->pending; +- if (len > strm->avail_out) +- len = strm->avail_out; ++ len = MIN(s->pending, strm->avail_out); + if (len == 0) + return; + +@@ -790,7 +730,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { + + /* Flush as much pending output as possible */ + if (s->pending != 0) { +- flush_pending(strm); ++ PREFIX(flush_pending)(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and +@@ -845,7 +785,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ +- flush_pending(strm); ++ PREFIX(flush_pending)(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; +@@ -854,7 +794,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { + #ifdef GZIP + if (s->status == GZIP_STATE) { + /* gzip header */ +- crc_reset(s); ++ functable.crc32_fold_reset(&s->crc_fold); + put_byte(s, 31); + put_byte(s, 139); + put_byte(s, 8); +@@ -867,7 +807,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ +- flush_pending(strm); ++ PREFIX(flush_pending)(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; +@@ -901,7 +841,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { + s->pending = s->pending_buf_size; + HCRC_UPDATE(beg); + s->gzindex += copy; +- flush_pending(strm); ++ PREFIX(flush_pending)(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; +@@ -924,7 +864,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { + do { + if (s->pending == s->pending_buf_size) { + HCRC_UPDATE(beg); +- flush_pending(strm); ++ PREFIX(flush_pending)(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; +@@ -947,7 +887,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { + do { + if (s->pending == s->pending_buf_size) { + HCRC_UPDATE(beg); +- flush_pending(strm); ++ PREFIX(flush_pending)(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; +@@ -964,19 +904,19 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { + if (s->status == HCRC_STATE) { + if (s->gzhead->hcrc) { + if (s->pending + 2 > s->pending_buf_size) { +- flush_pending(strm); ++ PREFIX(flush_pending)(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } + put_short(s, (uint16_t)strm->adler); +- crc_reset(s); ++ functable.crc32_fold_reset(&s->crc_fold); + } + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ +- flush_pending(strm); ++ PREFIX(flush_pending)(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; +@@ -1028,7 +968,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { + } + } + } +- flush_pending(strm); ++ PREFIX(flush_pending)(strm); + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ + return Z_OK; +@@ -1042,16 +982,17 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { + /* Write the trailer */ + #ifdef GZIP + if (s->wrap == 2) { +-# ifdef X86_PCLMULQDQ_CRC +- crc_finalize(s); +-# endif ++ strm->adler = functable.crc32_fold_final(&s->crc_fold); ++ + put_uint32(s, strm->adler); + put_uint32(s, (uint32_t)strm->total_in); + } else + #endif +- if (s->wrap == 1) +- put_uint32_msb(s, strm->adler); +- flush_pending(strm); ++ { ++ if (s->wrap == 1) ++ put_uint32_msb(s, strm->adler); ++ } ++ PREFIX(flush_pending)(strm); + /* If avail_out is zero, the application will call deflate again + * to flush the rest. + */ +@@ -1100,11 +1041,11 @@ int32_t Z_EXPORT PREFIX(deflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *sou + + memcpy((void *)dest, (void *)source, sizeof(PREFIX3(stream))); + +- ds = (deflate_state *) ZALLOC_STATE(dest, 1, sizeof(deflate_state)); ++ ds = ZALLOC_DEFLATE_STATE(dest); + if (ds == NULL) + return Z_MEM_ERROR; + dest->state = (struct internal_state *) ds; +- ZCOPY_STATE((void *)ds, (void *)ss, sizeof(deflate_state)); ++ ZCOPY_DEFLATE_STATE(ds, ss); + ds->strm = dest; + + #ifdef X86_PCLMULQDQ_CRC +@@ -1143,11 +1084,8 @@ int32_t Z_EXPORT PREFIX(deflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *sou + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +-Z_INTERNAL unsigned read_buf(PREFIX3(stream) *strm, unsigned char *buf, unsigned size) { +- uint32_t len = strm->avail_in; +- +- if (len > size) +- len = size; ++Z_INTERNAL unsigned PREFIX(read_buf)(PREFIX3(stream) *strm, unsigned char *buf, unsigned size) { ++ uint32_t len = MIN(strm->avail_in, size); + if (len == 0) + return 0; + +@@ -1157,12 +1095,12 @@ Z_INTERNAL unsigned read_buf(PREFIX3(stream) *strm, unsigned char *buf, unsigned + memcpy(buf, strm->next_in, len); + #ifdef GZIP + } else if (strm->state->wrap == 2) { +- copy_with_crc(strm, buf, len); ++ functable.crc32_fold_copy(&strm->state->crc_fold, buf, strm->next_in, len); + #endif ++ } else if (strm->state->wrap == 1) { ++ strm->adler = functable.adler32_fold_copy(strm->adler, buf, strm->next_in, len); + } else { + memcpy(buf, strm->next_in, len); +- if (strm->state->wrap == 1) +- strm->adler = functable.adler32(strm->adler, buf, len); + } + strm->next_in += len; + strm->total_in += len; +@@ -1170,6 +1108,31 @@ Z_INTERNAL unsigned read_buf(PREFIX3(stream) *strm, unsigned char *buf, unsigned + return len; + } + ++/* =========================================================================== ++ * Set longest match variables based on level configuration ++ */ ++static void lm_set_level(deflate_state *s, int level) { ++ s->max_lazy_match = configuration_table[level].max_lazy; ++ s->good_match = configuration_table[level].good_length; ++ s->nice_match = configuration_table[level].nice_length; ++ s->max_chain_length = configuration_table[level].max_chain; ++ ++ /* Use rolling hash for deflate_slow algorithm with level 9. It allows us to ++ * properly lookup different hash chains to speed up longest_match search. Since hashing ++ * method changes depending on the level we cannot put this into functable. */ ++ if (s->max_chain_length > 1024) { ++ s->update_hash = &update_hash_roll; ++ s->insert_string = &insert_string_roll; ++ s->quick_insert_string = &quick_insert_string_roll; ++ } else { ++ s->update_hash = functable.update_hash; ++ s->insert_string = functable.insert_string; ++ s->quick_insert_string = functable.quick_insert_string; ++ } ++ ++ s->level = level; ++} ++ + /* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +@@ -1180,58 +1143,18 @@ static void lm_init(deflate_state *s) { + + /* Set the default configuration parameters: + */ +- s->max_lazy_match = configuration_table[s->level].max_lazy; +- s->good_match = configuration_table[s->level].good_length; +- s->nice_match = configuration_table[s->level].nice_length; +- s->max_chain_length = configuration_table[s->level].max_chain; ++ lm_set_level(s, s->level); + + s->strstart = 0; + s->block_start = 0; + s->lookahead = 0; + s->insert = 0; +- s->prev_length = MIN_MATCH-1; ++ s->prev_length = 0; + s->match_available = 0; + s->match_start = 0; ++ s->ins_h = 0; + } + +-#ifdef ZLIB_DEBUG +-#define EQUAL 0 +-/* result of memcmp for equal strings */ +- +-/* =========================================================================== +- * Check that the match at match_start is indeed a match. +- */ +-void check_match(deflate_state *s, Pos start, Pos match, int length) { +- /* check that the match length is valid*/ +- if (length < MIN_MATCH || length > MAX_MATCH) { +- fprintf(stderr, " start %u, match %u, length %d\n", start, match, length); +- z_error("invalid match length"); +- } +- /* check that the match isn't at the same position as the start string */ +- if (match == start) { +- fprintf(stderr, " start %u, match %u, length %d\n", start, match, length); +- z_error("invalid match position"); +- } +- /* check that the match is indeed a match */ +- if (memcmp(s->window + match, s->window + start, length) != EQUAL) { +- int32_t i = 0; +- fprintf(stderr, " start %u, match %u, length %d\n", start, match, length); +- do { +- fprintf(stderr, " %03d: match [%02x] start [%02x]\n", i++, s->window[match++], s->window[start++]); +- } while (--length != 0); +- z_error("invalid match"); +- } +- if (z_verbose > 1) { +- fprintf(stderr, "\\[%u,%d]", start-match, length); +- do { +- putc(s->window[start++], stderr); +- } while (--length != 0); +- } +-} +-#else +-# define check_match(s, start, match, length) +-#endif /* ZLIB_DEBUG */ +- + /* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. +@@ -1243,7 +1166,7 @@ void check_match(deflate_state *s, Pos start, Pos match, int length) { + * option -- not supported here). + */ + +-void Z_INTERNAL fill_window(deflate_state *s) { ++void Z_INTERNAL PREFIX(fill_window)(deflate_state *s) { + unsigned n; + unsigned int more; /* Amount of free space at the end of the window. */ + unsigned int wsize = s->w_size; +@@ -1287,37 +1210,27 @@ void Z_INTERNAL fill_window(deflate_state *s) { + */ + Assert(more >= 2, "more < 2"); + +- n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); ++ n = PREFIX(read_buf)(s->strm, s->window + s->strstart + s->lookahead, more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ +- if (s->lookahead + s->insert >= MIN_MATCH) { ++ if (s->lookahead + s->insert >= STD_MIN_MATCH) { + unsigned int str = s->strstart - s->insert; +- if (str >= 1) +- functable.quick_insert_string(s, str + 2 - MIN_MATCH); +-#if MIN_MATCH != 3 +-#error Call insert_string() MIN_MATCH-3 more times +- while (s->insert) { +- functable.quick_insert_string(s, str); +- str++; +- s->insert--; +- if (s->lookahead + s->insert < MIN_MATCH) +- break; ++ if (UNLIKELY(s->max_chain_length > 1024)) { ++ s->ins_h = s->update_hash(s, s->window[str], s->window[str+1]); ++ } else if (str >= 1) { ++ s->quick_insert_string(s, str + 2 - STD_MIN_MATCH); + } +-#else +- unsigned int count; ++ unsigned int count = s->insert; + if (UNLIKELY(s->lookahead == 1)) { +- count = s->insert - 1; +- } else { +- count = s->insert; ++ count -= 1; + } + if (count > 0) { +- functable.insert_string(s, str, count); ++ s->insert_string(s, str, count); + s->insert -= count; + } +-#endif + } +- /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, ++ /* If the whole input has less than STD_MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); +@@ -1326,8 +1239,8 @@ void Z_INTERNAL fill_window(deflate_state *s) { + * written, then zero those bytes in order to avoid memory check reports of + * the use of uninitialized (or uninitialised as Julian writes) bytes by + * the longest match routines. Update the high water mark for the next +- * time through here. WIN_INIT is set to MAX_MATCH since the longest match +- * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead. ++ * time through here. WIN_INIT is set to STD_MAX_MATCH since the longest match ++ * routines allow scanning to strstart + STD_MAX_MATCH, ignoring lookahead. + */ + if (s->high_water < s->window_size) { + unsigned int curr = s->strstart + s->lookahead; +@@ -1359,295 +1272,6 @@ void Z_INTERNAL fill_window(deflate_state *s) { + "not enough room for search"); + } + +-/* =========================================================================== +- * Copy without compression as much as possible from the input stream, return +- * the current block state. +- * +- * In case deflateParams() is used to later switch to a non-zero compression +- * level, s->matches (otherwise unused when storing) keeps track of the number +- * of hash table slides to perform. If s->matches is 1, then one hash table +- * slide will be done when switching. If s->matches is 2, the maximum value +- * allowed here, then the hash table will be cleared, since two or more slides +- * is the same as a clear. +- * +- * deflate_stored() is written to minimize the number of times an input byte is +- * copied. It is most efficient with large input and output buffers, which +- * maximizes the opportunites to have a single copy from next_in to next_out. +- */ +-static block_state deflate_stored(deflate_state *s, int flush) { +- /* Smallest worthy block size when not flushing or finishing. By default +- * this is 32K. This can be as small as 507 bytes for memLevel == 1. For +- * large input and output buffers, the stored block size will be larger. +- */ +- unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size); +- +- /* Copy as many min_block or larger stored blocks directly to next_out as +- * possible. If flushing, copy the remaining available input to next_out as +- * stored blocks, if there is enough space. +- */ +- unsigned len, left, have, last = 0; +- unsigned used = s->strm->avail_in; +- do { +- /* Set len to the maximum size block that we can copy directly with the +- * available input data and output space. Set left to how much of that +- * would be copied from what's left in the window. +- */ +- len = MAX_STORED; /* maximum deflate stored block length */ +- have = (s->bi_valid + 42) >> 3; /* number of header bytes */ +- if (s->strm->avail_out < have) /* need room for header */ +- break; +- /* maximum stored block length that will fit in avail_out: */ +- have = s->strm->avail_out - have; +- left = (int)s->strstart - s->block_start; /* bytes left in window */ +- if (len > (unsigned long)left + s->strm->avail_in) +- len = left + s->strm->avail_in; /* limit len to the input */ +- if (len > have) +- len = have; /* limit len to the output */ +- +- /* If the stored block would be less than min_block in length, or if +- * unable to copy all of the available input when flushing, then try +- * copying to the window and the pending buffer instead. Also don't +- * write an empty block when flushing -- deflate() does that. +- */ +- if (len < min_block && ((len == 0 && flush != Z_FINISH) || flush == Z_NO_FLUSH || len != left + s->strm->avail_in)) +- break; +- +- /* Make a dummy stored block in pending to get the header bytes, +- * including any pending bits. This also updates the debugging counts. +- */ +- last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0; +- zng_tr_stored_block(s, (char *)0, 0L, last); +- +- /* Replace the lengths in the dummy stored block with len. */ +- s->pending -= 4; +- put_short(s, (uint16_t)len); +- put_short(s, (uint16_t)~len); +- +- /* Write the stored block header bytes. */ +- flush_pending(s->strm); +- +- /* Update debugging counts for the data about to be copied. */ +- cmpr_bits_add(s, len << 3); +- sent_bits_add(s, len << 3); +- +- /* Copy uncompressed bytes from the window to next_out. */ +- if (left) { +- if (left > len) +- left = len; +- memcpy(s->strm->next_out, s->window + s->block_start, left); +- s->strm->next_out += left; +- s->strm->avail_out -= left; +- s->strm->total_out += left; +- s->block_start += (int)left; +- len -= left; +- } +- +- /* Copy uncompressed bytes directly from next_in to next_out, updating +- * the check value. +- */ +- if (len) { +- read_buf(s->strm, s->strm->next_out, len); +- s->strm->next_out += len; +- s->strm->avail_out -= len; +- s->strm->total_out += len; +- } +- } while (last == 0); +- +- /* Update the sliding window with the last s->w_size bytes of the copied +- * data, or append all of the copied data to the existing window if less +- * than s->w_size bytes were copied. Also update the number of bytes to +- * insert in the hash tables, in the event that deflateParams() switches to +- * a non-zero compression level. +- */ +- used -= s->strm->avail_in; /* number of input bytes directly copied */ +- if (used) { +- /* If any input was used, then no unused input remains in the window, +- * therefore s->block_start == s->strstart. +- */ +- if (used >= s->w_size) { /* supplant the previous history */ +- s->matches = 2; /* clear hash */ +- memcpy(s->window, s->strm->next_in - s->w_size, s->w_size); +- s->strstart = s->w_size; +- s->insert = s->strstart; +- } else { +- if (s->window_size - s->strstart <= used) { +- /* Slide the window down. */ +- s->strstart -= s->w_size; +- memcpy(s->window, s->window + s->w_size, s->strstart); +- if (s->matches < 2) +- s->matches++; /* add a pending slide_hash() */ +- if (s->insert > s->strstart) +- s->insert = s->strstart; +- } +- memcpy(s->window + s->strstart, s->strm->next_in - used, used); +- s->strstart += used; +- s->insert += MIN(used, s->w_size - s->insert); +- } +- s->block_start = (int)s->strstart; +- } +- if (s->high_water < s->strstart) +- s->high_water = s->strstart; +- +- /* If the last block was written to next_out, then done. */ +- if (last) +- return finish_done; +- +- /* If flushing and all input has been consumed, then done. */ +- if (flush != Z_NO_FLUSH && flush != Z_FINISH && s->strm->avail_in == 0 && (int)s->strstart == s->block_start) +- return block_done; +- +- /* Fill the window with any remaining input. */ +- have = s->window_size - s->strstart; +- if (s->strm->avail_in > have && s->block_start >= (int)s->w_size) { +- /* Slide the window down. */ +- s->block_start -= (int)s->w_size; +- s->strstart -= s->w_size; +- memcpy(s->window, s->window + s->w_size, s->strstart); +- if (s->matches < 2) +- s->matches++; /* add a pending slide_hash() */ +- have += s->w_size; /* more space now */ +- if (s->insert > s->strstart) +- s->insert = s->strstart; +- } +- if (have > s->strm->avail_in) +- have = s->strm->avail_in; +- if (have) { +- read_buf(s->strm, s->window + s->strstart, have); +- s->strstart += have; +- s->insert += MIN(have, s->w_size - s->insert); +- } +- if (s->high_water < s->strstart) +- s->high_water = s->strstart; +- +- /* There was not enough avail_out to write a complete worthy or flushed +- * stored block to next_out. Write a stored block to pending instead, if we +- * have enough input for a worthy block, or if flushing and there is enough +- * room for the remaining input as a stored block in the pending buffer. +- */ +- have = (s->bi_valid + 42) >> 3; /* number of header bytes */ +- /* maximum stored block length that will fit in pending: */ +- have = MIN(s->pending_buf_size - have, MAX_STORED); +- min_block = MIN(have, s->w_size); +- left = (int)s->strstart - s->block_start; +- if (left >= min_block || ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH && s->strm->avail_in == 0 && left <= have)) { +- len = MIN(left, have); +- last = flush == Z_FINISH && s->strm->avail_in == 0 && len == left ? 1 : 0; +- zng_tr_stored_block(s, (char *)s->window + s->block_start, len, last); +- s->block_start += (int)len; +- flush_pending(s->strm); +- } +- +- /* We've done all we can with the available input and output. */ +- return last ? finish_started : need_more; +-} +- +- +-/* =========================================================================== +- * For Z_RLE, simply look for runs of bytes, generate matches only of distance +- * one. Do not maintain a hash table. (It will be regenerated if this run of +- * deflate switches away from Z_RLE.) +- */ +-static block_state deflate_rle(deflate_state *s, int flush) { +- int bflush = 0; /* set if current block must be flushed */ +- unsigned int prev; /* byte at distance one to match */ +- unsigned char *scan, *strend; /* scan goes up to strend for length of run */ +- uint32_t match_len = 0; +- +- for (;;) { +- /* Make sure that we always have enough lookahead, except +- * at the end of the input file. We need MAX_MATCH bytes +- * for the longest run, plus one for the unrolled loop. +- */ +- if (s->lookahead <= MAX_MATCH) { +- fill_window(s); +- if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) +- return need_more; +- if (s->lookahead == 0) +- break; /* flush the current block */ +- } +- +- /* See how many times the previous byte repeats */ +- if (s->lookahead >= MIN_MATCH && s->strstart > 0) { +- scan = s->window + s->strstart - 1; +- prev = *scan; +- if (prev == *++scan && prev == *++scan && prev == *++scan) { +- strend = s->window + s->strstart + MAX_MATCH; +- do { +- } while (prev == *++scan && prev == *++scan && +- prev == *++scan && prev == *++scan && +- prev == *++scan && prev == *++scan && +- prev == *++scan && prev == *++scan && +- scan < strend); +- match_len = MAX_MATCH - (unsigned int)(strend - scan); +- if (match_len > s->lookahead) +- match_len = s->lookahead; +- } +- Assert(scan <= s->window + s->window_size - 1, "wild scan"); +- } +- +- /* Emit match if have run of MIN_MATCH or longer, else emit literal */ +- if (match_len >= MIN_MATCH) { +- check_match(s, s->strstart, s->strstart - 1, match_len); +- +- bflush = zng_tr_tally_dist(s, 1, match_len - MIN_MATCH); +- +- s->lookahead -= match_len; +- s->strstart += match_len; +- match_len = 0; +- } else { +- /* No match, output a literal byte */ +- bflush = zng_tr_tally_lit(s, s->window[s->strstart]); +- s->lookahead--; +- s->strstart++; +- } +- if (bflush) +- FLUSH_BLOCK(s, 0); +- } +- s->insert = 0; +- if (flush == Z_FINISH) { +- FLUSH_BLOCK(s, 1); +- return finish_done; +- } +- if (s->sym_next) +- FLUSH_BLOCK(s, 0); +- return block_done; +-} +- +-/* =========================================================================== +- * For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table. +- * (It will be regenerated if this run of deflate switches away from Huffman.) +- */ +-static block_state deflate_huff(deflate_state *s, int flush) { +- int bflush = 0; /* set if current block must be flushed */ +- +- for (;;) { +- /* Make sure that we have a literal to write. */ +- if (s->lookahead == 0) { +- fill_window(s); +- if (s->lookahead == 0) { +- if (flush == Z_NO_FLUSH) +- return need_more; +- break; /* flush the current block */ +- } +- } +- +- /* Output a literal byte */ +- bflush = zng_tr_tally_lit(s, s->window[s->strstart]); +- s->lookahead--; +- s->strstart++; +- if (bflush) +- FLUSH_BLOCK(s, 0); +- } +- s->insert = 0; +- if (flush == Z_FINISH) { +- FLUSH_BLOCK(s, 1); +- return finish_done; +- } +- if (s->sym_next) +- FLUSH_BLOCK(s, 0); +- return block_done; +-} +- + #ifndef ZLIB_COMPAT + /* ========================================================================= + * Checks whether buffer size is sufficient and whether this parameter is a duplicate. +@@ -1674,8 +1298,6 @@ int32_t Z_EXPORT zng_deflateSetParams(zng_stream *strm, zng_deflate_param_value + int version_error = 0; + int buf_error = 0; + int stream_error = 0; +- int ret; +- int val; + + /* Initialize the statuses. */ + for (i = 0; i < count; i++) +@@ -1715,8 +1337,8 @@ int32_t Z_EXPORT zng_deflateSetParams(zng_stream *strm, zng_deflate_param_value + + /* Apply changes, remember if there were errors. */ + if (new_level != NULL || new_strategy != NULL) { +- ret = PREFIX(deflateParams)(strm, new_level == NULL ? s->level : *(int *)new_level->buf, +- new_strategy == NULL ? s->strategy : *(int *)new_strategy->buf); ++ int ret = PREFIX(deflateParams)(strm, new_level == NULL ? s->level : *(int *)new_level->buf, ++ new_strategy == NULL ? s->strategy : *(int *)new_strategy->buf); + if (ret != Z_OK) { + if (new_level != NULL) + new_level->status = Z_STREAM_ERROR; +@@ -1726,7 +1348,7 @@ int32_t Z_EXPORT zng_deflateSetParams(zng_stream *strm, zng_deflate_param_value + } + } + if (new_reproducible != NULL) { +- val = *(int *)new_reproducible->buf; ++ int val = *(int *)new_reproducible->buf; + if (DEFLATE_CAN_SET_REPRODUCIBLE(strm, val)) { + s->reproducible = val; + } else { +diff --git a/deflate.h b/deflate.h +index 1a1f4d1..8001b47 100644 +--- a/deflate.h ++++ b/deflate.h +@@ -12,6 +12,8 @@ + + #include "zutil.h" + #include "zendian.h" ++#include "adler32_fold.h" ++#include "crc32_fold.h" + + /* define NO_GZIP when compiling if you want to disable gzip header and + trailer creation by deflate(). NO_GZIP would be used to avoid linking in +@@ -43,25 +45,27 @@ + #define HEAP_SIZE (2*L_CODES+1) + /* maximum heap size */ + +-#define MAX_BITS 15 +-/* All codes must not exceed MAX_BITS bits */ +- + #define BIT_BUF_SIZE 64 + /* size of bit buffer in bi_buf */ + + #define END_BLOCK 256 + /* end of block literal code */ + +-#define INIT_STATE 42 /* zlib header -> BUSY_STATE */ ++#define INIT_STATE 1 /* zlib header -> BUSY_STATE */ ++#ifdef GZIP ++# define GZIP_STATE 4 /* gzip header -> BUSY_STATE | EXTRA_STATE */ ++# define EXTRA_STATE 5 /* gzip extra block -> NAME_STATE */ ++# define NAME_STATE 6 /* gzip file name -> COMMENT_STATE */ ++# define COMMENT_STATE 7 /* gzip comment -> HCRC_STATE */ ++# define HCRC_STATE 8 /* gzip header CRC -> BUSY_STATE */ ++#endif ++#define BUSY_STATE 2 /* deflate -> FINISH_STATE */ ++#define FINISH_STATE 3 /* stream complete */ + #ifdef GZIP +-# define GZIP_STATE 57 /* gzip header -> BUSY_STATE | EXTRA_STATE */ ++# define MAX_STATE HCRC_STATE ++#else ++# define MAX_STATE FINISH_STATE + #endif +-#define EXTRA_STATE 69 /* gzip extra block -> NAME_STATE */ +-#define NAME_STATE 73 /* gzip file name -> COMMENT_STATE */ +-#define COMMENT_STATE 91 /* gzip comment -> HCRC_STATE */ +-#define HCRC_STATE 103 /* gzip header CRC -> BUSY_STATE */ +-#define BUSY_STATE 113 /* deflate -> FINISH_STATE */ +-#define FINISH_STATE 666 /* stream complete */ + /* Stream status */ + + #define HASH_BITS 16u /* log2(HASH_SIZE) */ +@@ -101,8 +105,14 @@ typedef uint16_t Pos; + /* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. + */ ++/* Type definitions for hash callbacks */ ++typedef struct internal_state deflate_state; ++ ++typedef uint32_t (* update_hash_cb) (deflate_state *const s, uint32_t h, uint32_t val); ++typedef void (* insert_string_cb) (deflate_state *const s, uint32_t str, uint32_t count); ++typedef Pos (* quick_insert_string_cb)(deflate_state *const s, uint32_t str); + +-typedef struct internal_state { ++struct internal_state { + PREFIX3(stream) *strm; /* pointer back to this zlib stream */ + unsigned char *pending_buf; /* output still pending */ + unsigned char *pending_out; /* next pending byte to output to the stream */ +@@ -143,7 +153,7 @@ typedef struct internal_state { + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of +- * wSize-MAX_MATCH bytes, but this ensures that IO is always ++ * wSize-STD_MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. +@@ -157,6 +167,8 @@ typedef struct internal_state { + + Pos *head; /* Heads of the hash chains or 0. */ + ++ uint32_t ins_h; /* hash index of string to be inserted */ ++ + int block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. +@@ -188,6 +200,12 @@ typedef struct internal_state { + * max_insert_length is used only for compression levels <= 3. + */ + ++ update_hash_cb update_hash; ++ insert_string_cb insert_string; ++ quick_insert_string_cb quick_insert_string; ++ /* Hash function callbacks that can be configured depending on the deflate ++ * algorithm being used */ ++ + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + +@@ -196,10 +214,7 @@ typedef struct internal_state { + + int nice_match; /* Stop searching when current match exceeds this */ + +-#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +- /* Only used if X86_PCLMULQDQ_CRC is defined */ +- unsigned crc0[4 * 5]; +-#endif ++ struct crc32_fold_s ALIGNED_(16) crc_fold; + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to suppress compiler warning */ +@@ -269,7 +284,7 @@ typedef struct internal_state { + + /* Reserved for future use and alignment purposes */ + int32_t reserved[11]; +-} ALIGNED_(8) deflate_state; ++} ALIGNED_(8); + + typedef enum { + need_more, /* block not completed, need more input or more output */ +@@ -290,13 +305,11 @@ typedef enum { + * IN assertion: there is enough room in pending_buf. + */ + static inline void put_short(deflate_state *s, uint16_t w) { +-#if defined(UNALIGNED_OK) +- *(uint16_t *)(&s->pending_buf[s->pending]) = w; +- s->pending += 2; +-#else +- put_byte(s, (w & 0xff)); +- put_byte(s, ((w >> 8) & 0xff)); ++#if BYTE_ORDER == BIG_ENDIAN ++ w = ZSWAP16(w); + #endif ++ memcpy(&s->pending_buf[s->pending], &w, sizeof(w)); ++ s->pending += 2; + } + + /* =========================================================================== +@@ -304,8 +317,11 @@ static inline void put_short(deflate_state *s, uint16_t w) { + * IN assertion: there is enough room in pending_buf. + */ + static inline void put_short_msb(deflate_state *s, uint16_t w) { +- put_byte(s, ((w >> 8) & 0xff)); +- put_byte(s, (w & 0xff)); ++#if BYTE_ORDER == LITTLE_ENDIAN ++ w = ZSWAP16(w); ++#endif ++ memcpy(&s->pending_buf[s->pending], &w, sizeof(w)); ++ s->pending += 2; + } + + /* =========================================================================== +@@ -313,15 +329,11 @@ static inline void put_short_msb(deflate_state *s, uint16_t w) { + * IN assertion: there is enough room in pending_buf. + */ + static inline void put_uint32(deflate_state *s, uint32_t dw) { +-#if defined(UNALIGNED_OK) +- *(uint32_t *)(&s->pending_buf[s->pending]) = dw; +- s->pending += 4; +-#else +- put_byte(s, (dw & 0xff)); +- put_byte(s, ((dw >> 8) & 0xff)); +- put_byte(s, ((dw >> 16) & 0xff)); +- put_byte(s, ((dw >> 24) & 0xff)); ++#if BYTE_ORDER == BIG_ENDIAN ++ dw = ZSWAP32(dw); + #endif ++ memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw)); ++ s->pending += 4; + } + + /* =========================================================================== +@@ -329,15 +341,11 @@ static inline void put_uint32(deflate_state *s, uint32_t dw) { + * IN assertion: there is enough room in pending_buf. + */ + static inline void put_uint32_msb(deflate_state *s, uint32_t dw) { +-#if defined(UNALIGNED_OK) +- *(uint32_t *)(&s->pending_buf[s->pending]) = ZSWAP32(dw); +- s->pending += 4; +-#else +- put_byte(s, ((dw >> 24) & 0xff)); +- put_byte(s, ((dw >> 16) & 0xff)); +- put_byte(s, ((dw >> 8) & 0xff)); +- put_byte(s, (dw & 0xff)); ++#if BYTE_ORDER == LITTLE_ENDIAN ++ dw = ZSWAP32(dw); + #endif ++ memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw)); ++ s->pending += 4; + } + + /* =========================================================================== +@@ -345,42 +353,29 @@ static inline void put_uint32_msb(deflate_state *s, uint32_t dw) { + * IN assertion: there is enough room in pending_buf. + */ + static inline void put_uint64(deflate_state *s, uint64_t lld) { +-#if defined(UNALIGNED64_OK) +- *(uint64_t *)(&s->pending_buf[s->pending]) = lld; +- s->pending += 8; +-#elif defined(UNALIGNED_OK) +- *(uint32_t *)(&s->pending_buf[s->pending]) = lld & 0xffffffff; +- s->pending += 4; +- *(uint32_t *)(&s->pending_buf[s->pending]) = (lld >> 32) & 0xffffffff; +- s->pending += 4; +-#else +- put_byte(s, (lld & 0xff)); +- put_byte(s, ((lld >> 8) & 0xff)); +- put_byte(s, ((lld >> 16) & 0xff)); +- put_byte(s, ((lld >> 24) & 0xff)); +- put_byte(s, ((lld >> 32) & 0xff)); +- put_byte(s, ((lld >> 40) & 0xff)); +- put_byte(s, ((lld >> 48) & 0xff)); +- put_byte(s, ((lld >> 56) & 0xff)); ++#if BYTE_ORDER == BIG_ENDIAN ++ lld = ZSWAP64(lld); + #endif ++ memcpy(&s->pending_buf[s->pending], &lld, sizeof(lld)); ++ s->pending += 8; + } + +-#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) ++#define MIN_LOOKAHEAD (STD_MAX_MATCH + STD_MIN_MATCH + 1) + /* Minimum amount of lookahead, except at the end of the input file. +- * See deflate.c for comments about the MIN_MATCH+1. ++ * See deflate.c for comments about the STD_MIN_MATCH+1. + */ + +-#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) ++#define MAX_DIST(s) ((s)->w_size - MIN_LOOKAHEAD) + /* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + +-#define WIN_INIT MAX_MATCH ++#define WIN_INIT STD_MAX_MATCH + /* Number of bytes after end of data in window to initialize in order to avoid + memory checker errors from longest match routines */ + + +-void Z_INTERNAL fill_window(deflate_state *s); ++void Z_INTERNAL PREFIX(fill_window)(deflate_state *s); + void Z_INTERNAL slide_hash_c(deflate_state *s); + + /* in trees.c */ +@@ -389,8 +384,8 @@ void Z_INTERNAL zng_tr_flush_block(deflate_state *s, char *buf, uint32_t stored_ + void Z_INTERNAL zng_tr_flush_bits(deflate_state *s); + void Z_INTERNAL zng_tr_align(deflate_state *s); + void Z_INTERNAL zng_tr_stored_block(deflate_state *s, char *buf, uint32_t stored_len, int last); +-unsigned Z_INTERNAL bi_reverse(unsigned code, int len); +-void Z_INTERNAL flush_pending(PREFIX3(streamp) strm); ++uint16_t Z_INTERNAL PREFIX(bi_reverse)(unsigned code, int len); ++void Z_INTERNAL PREFIX(flush_pending)(PREFIX3(streamp) strm); + #define d_code(dist) ((dist) < 256 ? zng_dist_code[dist] : zng_dist_code[256+((dist)>>7)]) + /* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. zng_dist_code[256] and zng_dist_code[257] are never +diff --git a/deflate_fast.c b/deflate_fast.c +index 1e11235..3184aa7 100644 +--- a/deflate_fast.c ++++ b/deflate_fast.c +@@ -24,12 +24,12 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { + + for (;;) { + /* Make sure that we always have enough lookahead, except +- * at the end of the input file. We need MAX_MATCH bytes +- * for the next match, plus MIN_MATCH bytes to insert the ++ * at the end of the input file. We need STD_MAX_MATCH bytes ++ * for the next match, plus WANT_MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { +- fill_window(s); ++ PREFIX(fill_window)(s); + if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH)) { + return need_more; + } +@@ -40,12 +40,12 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ +- if (s->lookahead >= MIN_MATCH) { ++ if (s->lookahead >= WANT_MIN_MATCH) { + hash_head = functable.quick_insert_string(s, s->strstart); + dist = (int64_t)s->strstart - hash_head; + + /* Find the longest match, discarding those <= prev_length. +- * At this point we have always match length < MIN_MATCH ++ * At this point we have always match length < WANT_MIN_MATCH + */ + if (dist <= MAX_DIST(s) && dist > 0 && hash_head != 0) { + /* To simplify the code, we prevent matches with the string +@@ -57,17 +57,17 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { + } + } + +- if (match_len >= MIN_MATCH) { ++ if (match_len >= WANT_MIN_MATCH) { + check_match(s, s->strstart, s->match_start, match_len); + +- bflush = zng_tr_tally_dist(s, s->strstart - s->match_start, match_len - MIN_MATCH); ++ bflush = zng_tr_tally_dist(s, s->strstart - s->match_start, match_len - STD_MIN_MATCH); + + s->lookahead -= match_len; + + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ +- if (match_len <= s->max_insert_length && s->lookahead >= MIN_MATCH) { ++ if (match_len <= s->max_insert_length && s->lookahead >= WANT_MIN_MATCH) { + match_len--; /* string at strstart already in table */ + s->strstart++; + +@@ -75,12 +75,9 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { + s->strstart += match_len; + } else { + s->strstart += match_len; +-#if MIN_MATCH != 3 +- functable.insert_string(s, s->strstart + 2 - MIN_MATCH, MIN_MATCH - 2); +-#else +- functable.quick_insert_string(s, s->strstart + 2 - MIN_MATCH); +-#endif +- /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not ++ functable.quick_insert_string(s, s->strstart + 2 - STD_MIN_MATCH); ++ ++ /* If lookahead < STD_MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } +@@ -94,7 +91,7 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { + if (UNLIKELY(bflush)) + FLUSH_BLOCK(s, 0); + } +- s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; ++ s->insert = s->strstart < (STD_MIN_MATCH - 1) ? s->strstart : (STD_MIN_MATCH - 1); + if (UNLIKELY(flush == Z_FINISH)) { + FLUSH_BLOCK(s, 1); + return finish_done; +diff --git a/deflate_huff.c b/deflate_huff.c +new file mode 100644 +index 0000000..b197e24 +--- /dev/null ++++ b/deflate_huff.c +@@ -0,0 +1,45 @@ ++/* deflate_huff.c -- compress data using huffman encoding only strategy ++ * ++ * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zbuild.h" ++#include "deflate.h" ++#include "deflate_p.h" ++#include "functable.h" ++ ++/* =========================================================================== ++ * For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table. ++ * (It will be regenerated if this run of deflate switches away from Huffman.) ++ */ ++Z_INTERNAL block_state deflate_huff(deflate_state *s, int flush) { ++ int bflush = 0; /* set if current block must be flushed */ ++ ++ for (;;) { ++ /* Make sure that we have a literal to write. */ ++ if (s->lookahead == 0) { ++ PREFIX(fill_window)(s); ++ if (s->lookahead == 0) { ++ if (flush == Z_NO_FLUSH) ++ return need_more; ++ break; /* flush the current block */ ++ } ++ } ++ ++ /* Output a literal byte */ ++ bflush = zng_tr_tally_lit(s, s->window[s->strstart]); ++ s->lookahead--; ++ s->strstart++; ++ if (bflush) ++ FLUSH_BLOCK(s, 0); ++ } ++ s->insert = 0; ++ if (flush == Z_FINISH) { ++ FLUSH_BLOCK(s, 1); ++ return finish_done; ++ } ++ if (s->sym_next) ++ FLUSH_BLOCK(s, 0); ++ return block_done; ++} +diff --git a/deflate_medium.c b/deflate_medium.c +index 59ccfa8..47796e3 100644 +--- a/deflate_medium.c ++++ b/deflate_medium.c +@@ -7,7 +7,6 @@ + * For conditions of distribution and use, see copyright notice in zlib.h + */ + #ifndef NO_MEDIUM_STRATEGY +-#include + #include "zbuild.h" + #include "deflate.h" + #include "deflate_p.h" +@@ -24,7 +23,7 @@ static int emit_match(deflate_state *s, struct match match) { + int bflush = 0; + + /* matches that are not long enough we need to emit as literals */ +- if (match.match_length < MIN_MATCH) { ++ if (match.match_length < WANT_MIN_MATCH) { + while (match.match_length) { + bflush += zng_tr_tally_lit(s, s->window[match.strstart]); + s->lookahead--; +@@ -36,18 +35,18 @@ static int emit_match(deflate_state *s, struct match match) { + + check_match(s, match.strstart, match.match_start, match.match_length); + +- bflush += zng_tr_tally_dist(s, match.strstart - match.match_start, match.match_length - MIN_MATCH); ++ bflush += zng_tr_tally_dist(s, match.strstart - match.match_start, match.match_length - STD_MIN_MATCH); + + s->lookahead -= match.match_length; + return bflush; + } + + static void insert_match(deflate_state *s, struct match match) { +- if (UNLIKELY(s->lookahead <= (unsigned int)(match.match_length + MIN_MATCH))) ++ if (UNLIKELY(s->lookahead <= (unsigned int)(match.match_length + WANT_MIN_MATCH))) + return; + + /* matches that are not long enough we need to emit as literals */ +- if (LIKELY(match.match_length < MIN_MATCH)) { ++ if (LIKELY(match.match_length < WANT_MIN_MATCH)) { + match.strstart++; + match.match_length--; + if (UNLIKELY(match.match_length > 0)) { +@@ -67,7 +66,7 @@ static void insert_match(deflate_state *s, struct match match) { + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ +- if (match.match_length <= 16* s->max_insert_length && s->lookahead >= MIN_MATCH) { ++ if (match.match_length <= 16 * s->max_insert_length && s->lookahead >= WANT_MIN_MATCH) { + match.match_length--; /* string at strstart already in table */ + match.strstart++; + +@@ -85,13 +84,11 @@ static void insert_match(deflate_state *s, struct match match) { + } else { + match.strstart += match.match_length; + match.match_length = 0; +- if (match.strstart >= (MIN_MATCH - 2)) +-#if MIN_MATCH != 3 +- functable.insert_string(s, match.strstart + 2 - MIN_MATCH, MIN_MATCH - 2); +-#else +- functable.quick_insert_string(s, match.strstart + 2 - MIN_MATCH); +-#endif +- /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not ++ ++ if (match.strstart >= (STD_MIN_MATCH - 2)) ++ functable.quick_insert_string(s, match.strstart + 2 - STD_MIN_MATCH); ++ ++ /* If lookahead < WANT_MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } +@@ -165,6 +162,9 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { + ALIGNED_(16) struct match current_match; + struct match next_match; + ++ /* For levels below 5, don't check the next position for a better match */ ++ int early_exit = s->level < 5; ++ + memset(¤t_match, 0, sizeof(struct match)); + memset(&next_match, 0, sizeof(struct match)); + +@@ -174,12 +174,12 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { + int64_t dist; + + /* Make sure that we always have enough lookahead, except +- * at the end of the input file. We need MAX_MATCH bytes +- * for the next match, plus MIN_MATCH bytes to insert the ++ * at the end of the input file. We need STD_MAX_MATCH bytes ++ * for the next match, plus WANT_MIN_MATCH bytes to insert the + * string following the next current_match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { +- fill_window(s); ++ PREFIX(fill_window)(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } +@@ -193,12 +193,12 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { + */ + + /* If we already have a future match from a previous round, just use that */ +- if (next_match.match_length > 0) { ++ if (!early_exit && next_match.match_length > 0) { + current_match = next_match; + next_match.match_length = 0; + } else { + hash_head = 0; +- if (s->lookahead >= MIN_MATCH) { ++ if (s->lookahead >= WANT_MIN_MATCH) { + hash_head = functable.quick_insert_string(s, s->strstart); + } + +@@ -206,7 +206,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { + current_match.orgstart = current_match.strstart; + + /* Find the longest match, discarding those <= prev_length. +- * At this point we have always match_length < MIN_MATCH ++ * At this point we have always match_length < WANT_MIN_MATCH + */ + + dist = (int64_t)s->strstart - hash_head; +@@ -217,7 +217,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { + */ + current_match.match_length = (uint16_t)functable.longest_match(s, hash_head); + current_match.match_start = (uint16_t)s->match_start; +- if (UNLIKELY(current_match.match_length < MIN_MATCH)) ++ if (UNLIKELY(current_match.match_length < WANT_MIN_MATCH)) + current_match.match_length = 1; + if (UNLIKELY(current_match.match_start >= current_match.strstart)) { + /* this can happen due to some restarts */ +@@ -233,7 +233,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { + insert_match(s, current_match); + + /* now, look ahead one */ +- if (LIKELY(s->lookahead > MIN_LOOKAHEAD && (uint32_t)(current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD))) { ++ if (LIKELY(!early_exit && s->lookahead > MIN_LOOKAHEAD && (uint32_t)(current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD))) { + s->strstart = current_match.strstart + current_match.match_length; + hash_head = functable.quick_insert_string(s, s->strstart); + +@@ -241,7 +241,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { + next_match.orgstart = next_match.strstart; + + /* Find the longest match, discarding those <= prev_length. +- * At this point we have always match_length < MIN_MATCH ++ * At this point we have always match_length < WANT_MIN_MATCH + */ + + dist = (int64_t)s->strstart - hash_head; +@@ -256,7 +256,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { + /* this can happen due to some restarts */ + next_match.match_length = 1; + } +- if (next_match.match_length < MIN_MATCH) ++ if (next_match.match_length < WANT_MIN_MATCH) + next_match.match_length = 1; + else + fizzle_matches(s, ¤t_match, &next_match); +@@ -280,7 +280,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { + if (UNLIKELY(bflush)) + FLUSH_BLOCK(s, 0); + } +- s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; ++ s->insert = s->strstart < (STD_MIN_MATCH - 1) ? s->strstart : (STD_MIN_MATCH - 1); + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; +diff --git a/deflate_p.h b/deflate_p.h +index 7cad8ab..dd2021a 100644 +--- a/deflate_p.h ++++ b/deflate_p.h +@@ -12,11 +12,43 @@ + /* Forward declare common non-inlined functions declared in deflate.c */ + + #ifdef ZLIB_DEBUG +-void check_match(deflate_state *s, Pos start, Pos match, int length); ++/* =========================================================================== ++ * Check that the match at match_start is indeed a match. ++ */ ++static inline void check_match(deflate_state *s, Pos start, Pos match, int length) { ++ /* check that the match length is valid*/ ++ if (length < STD_MIN_MATCH || length > STD_MAX_MATCH) { ++ fprintf(stderr, " start %u, match %u, length %d\n", start, match, length); ++ z_error("invalid match length"); ++ } ++ /* check that the match isn't at the same position as the start string */ ++ if (match == start) { ++ fprintf(stderr, " start %u, match %u, length %d\n", start, match, length); ++ z_error("invalid match position"); ++ } ++ /* check that the match is indeed a match */ ++ if (memcmp(s->window + match, s->window + start, length) != 0) { ++ int32_t i = 0; ++ fprintf(stderr, " start %u, match %u, length %d\n", start, match, length); ++ do { ++ fprintf(stderr, " %03d: match [%02x] start [%02x]\n", i++, ++ s->window[match++], s->window[start++]); ++ } while (--length != 0); ++ z_error("invalid match"); ++ } ++ if (z_verbose > 1) { ++ fprintf(stderr, "\\[%u,%d]", start-match, length); ++ do { ++ putc(s->window[start++], stderr); ++ } while (--length != 0); ++ } ++} + #else + #define check_match(s, start, match, length) + #endif +-void flush_pending(PREFIX3(stream) *strm); ++ ++Z_INTERNAL void PREFIX(flush_pending)(PREFIX3(stream) *strm); ++Z_INTERNAL unsigned PREFIX(read_buf)(PREFIX3(stream) *strm, unsigned char *buf, unsigned size); + + /* =========================================================================== + * Save the match info and tally the frequency counts. Return true if +@@ -33,19 +65,19 @@ static inline int zng_tr_tally_lit(deflate_state *s, unsigned char c) { + s->sym_buf[s->sym_next++] = c; + s->dyn_ltree[c].Freq++; + Tracevv((stderr, "%c", c)); +- Assert(c <= (MAX_MATCH-MIN_MATCH), "zng_tr_tally: bad literal"); ++ Assert(c <= (STD_MAX_MATCH-STD_MIN_MATCH), "zng_tr_tally: bad literal"); + return (s->sym_next == s->sym_end); + } + + static inline int zng_tr_tally_dist(deflate_state *s, uint32_t dist, uint32_t len) { + /* dist: distance of matched string */ +- /* len: match length-MIN_MATCH */ ++ /* len: match length-STD_MIN_MATCH */ + s->sym_buf[s->sym_next++] = (uint8_t)(dist); + s->sym_buf[s->sym_next++] = (uint8_t)(dist >> 8); + s->sym_buf[s->sym_next++] = (uint8_t)len; + s->matches++; + dist--; +- Assert(dist < MAX_DIST(s) && (uint16_t)d_code(dist) < (uint16_t)D_CODES, ++ Assert(dist < MAX_DIST(s) && (uint16_t)d_code(dist) < (uint16_t)D_CODES, + "zng_tr_tally: bad match"); + + s->dyn_ltree[zng_length_code[len]+LITERALS+1].Freq++; +@@ -64,7 +96,7 @@ static inline int zng_tr_tally_dist(deflate_state *s, uint32_t dist, uint32_t le + (uint32_t)((int)s->strstart - s->block_start), \ + (last)); \ + s->block_start = (int)s->strstart; \ +- flush_pending(s->strm); \ ++ PREFIX(flush_pending)(s->strm); \ + } + + /* Same but force premature exit if necessary. */ +@@ -76,4 +108,9 @@ static inline int zng_tr_tally_dist(deflate_state *s, uint32_t dist, uint32_t le + /* Maximum stored block length in deflate format (not including header). */ + #define MAX_STORED 65535 + ++/* Compression function. Returns the block state after the call. */ ++typedef block_state (*compress_func) (deflate_state *s, int flush); ++/* Match function. Returns the longest match. */ ++typedef uint32_t (*match_func) (deflate_state *const s, Pos cur_match); ++ + #endif +diff --git a/deflate_quick.c b/deflate_quick.c +index b439743..df5a17b 100644 +--- a/deflate_quick.c ++++ b/deflate_quick.c +@@ -18,6 +18,7 @@ + */ + + #include "zbuild.h" ++#include "zutil_p.h" + #include "deflate.h" + #include "deflate_p.h" + #include "functable.h" +@@ -37,7 +38,7 @@ extern const ct_data static_dtree[D_CODES]; + zng_tr_emit_end_block(s, static_ltree, last); \ + s->block_open = 0; \ + s->block_start = (int)s->strstart; \ +- flush_pending(s->strm); \ ++ PREFIX(flush_pending)(s->strm); \ + if (s->strm->avail_out == 0) \ + return (last) ? finish_started : need_more; \ + } \ +@@ -63,14 +64,14 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) { + + for (;;) { + if (UNLIKELY(s->pending + ((BIT_BUF_SIZE + 7) >> 3) >= s->pending_buf_size)) { +- flush_pending(s->strm); ++ PREFIX(flush_pending)(s->strm); + if (s->strm->avail_out == 0) { + return (last && s->strm->avail_in == 0 && s->bi_valid == 0 && s->block_open == 0) ? finish_started : need_more; + } + } + + if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD)) { +- fill_window(s); ++ PREFIX(fill_window)(s); + if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH)) { + return need_more; + } +@@ -84,23 +85,30 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) { + } + } + +- if (LIKELY(s->lookahead >= MIN_MATCH)) { ++ if (LIKELY(s->lookahead >= WANT_MIN_MATCH)) { + hash_head = functable.quick_insert_string(s, s->strstart); + dist = (int64_t)s->strstart - hash_head; + + if (dist <= MAX_DIST(s) && dist > 0) { +- match_len = functable.compare258(s->window + s->strstart, s->window + hash_head); ++ const uint8_t *str_start = s->window + s->strstart; ++ const uint8_t *match_start = s->window + hash_head; + +- if (match_len >= MIN_MATCH) { +- if (UNLIKELY(match_len > s->lookahead)) +- match_len = s->lookahead; ++ if (zng_memcmp_2(str_start, match_start) == 0) { ++ match_len = functable.compare256(str_start+2, match_start+2) + 2; + +- check_match(s, s->strstart, hash_head, match_len); ++ if (match_len >= WANT_MIN_MATCH) { ++ if (UNLIKELY(match_len > s->lookahead)) ++ match_len = s->lookahead; ++ if (UNLIKELY(match_len > STD_MAX_MATCH)) ++ match_len = STD_MAX_MATCH; + +- zng_tr_emit_dist(s, static_ltree, static_dtree, match_len - MIN_MATCH, (uint32_t)dist); +- s->lookahead -= match_len; +- s->strstart += match_len; +- continue; ++ check_match(s, s->strstart, hash_head, match_len); ++ ++ zng_tr_emit_dist(s, static_ltree, static_dtree, match_len - STD_MIN_MATCH, (uint32_t)dist); ++ s->lookahead -= match_len; ++ s->strstart += match_len; ++ continue; ++ } + } + } + } +@@ -110,7 +118,7 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) { + s->lookahead--; + } + +- s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; ++ s->insert = s->strstart < (STD_MIN_MATCH - 1) ? s->strstart : (STD_MIN_MATCH - 1); + if (UNLIKELY(last)) { + QUICK_END_BLOCK(s, 1); + return finish_done; +diff --git a/deflate_rle.c b/deflate_rle.c +new file mode 100644 +index 0000000..cd08509 +--- /dev/null ++++ b/deflate_rle.c +@@ -0,0 +1,85 @@ ++/* deflate_rle.c -- compress data using RLE strategy of deflation algorithm ++ * ++ * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zbuild.h" ++#include "compare256_rle.h" ++#include "deflate.h" ++#include "deflate_p.h" ++#include "functable.h" ++ ++#ifdef UNALIGNED_OK ++# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) ++# define compare256_rle compare256_rle_unaligned_64 ++# elif defined(HAVE_BUILTIN_CTZ) ++# define compare256_rle compare256_rle_unaligned_32 ++# else ++# define compare256_rle compare256_rle_unaligned_16 ++# endif ++#else ++# define compare256_rle compare256_rle_c ++#endif ++ ++/* =========================================================================== ++ * For Z_RLE, simply look for runs of bytes, generate matches only of distance ++ * one. Do not maintain a hash table. (It will be regenerated if this run of ++ * deflate switches away from Z_RLE.) ++ */ ++Z_INTERNAL block_state deflate_rle(deflate_state *s, int flush) { ++ int bflush = 0; /* set if current block must be flushed */ ++ unsigned char *scan; /* scan goes up to strend for length of run */ ++ uint32_t match_len = 0; ++ ++ for (;;) { ++ /* Make sure that we always have enough lookahead, except ++ * at the end of the input file. We need STD_MAX_MATCH bytes ++ * for the longest run, plus one for the unrolled loop. ++ */ ++ if (s->lookahead <= STD_MAX_MATCH) { ++ PREFIX(fill_window)(s); ++ if (s->lookahead <= STD_MAX_MATCH && flush == Z_NO_FLUSH) ++ return need_more; ++ if (s->lookahead == 0) ++ break; /* flush the current block */ ++ } ++ ++ /* See how many times the previous byte repeats */ ++ if (s->lookahead >= STD_MIN_MATCH && s->strstart > 0) { ++ scan = s->window + s->strstart - 1; ++ if (scan[0] == scan[1] && scan[1] == scan[2]) { ++ match_len = compare256_rle(scan, scan+3)+2; ++ match_len = MIN(match_len, s->lookahead); ++ match_len = MIN(match_len, STD_MAX_MATCH); ++ } ++ Assert(scan+match_len <= s->window + s->window_size - 1, "wild scan"); ++ } ++ ++ /* Emit match if have run of STD_MIN_MATCH or longer, else emit literal */ ++ if (match_len >= STD_MIN_MATCH) { ++ check_match(s, s->strstart, s->strstart - 1, match_len); ++ ++ bflush = zng_tr_tally_dist(s, 1, match_len - STD_MIN_MATCH); ++ ++ s->lookahead -= match_len; ++ s->strstart += match_len; ++ match_len = 0; ++ } else { ++ /* No match, output a literal byte */ ++ bflush = zng_tr_tally_lit(s, s->window[s->strstart]); ++ s->lookahead--; ++ s->strstart++; ++ } ++ if (bflush) ++ FLUSH_BLOCK(s, 0); ++ } ++ s->insert = 0; ++ if (flush == Z_FINISH) { ++ FLUSH_BLOCK(s, 1); ++ return finish_done; ++ } ++ if (s->sym_next) ++ FLUSH_BLOCK(s, 0); ++ return block_done; ++} +diff --git a/deflate_slow.c b/deflate_slow.c +index dc1c072..9f1c913 100644 +--- a/deflate_slow.c ++++ b/deflate_slow.c +@@ -19,16 +19,22 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { + int bflush; /* set if current block must be flushed */ + int64_t dist; + uint32_t match_len; ++ match_func *longest_match; ++ ++ if (s->max_chain_length <= 1024) ++ longest_match = &functable.longest_match; ++ else ++ longest_match = &functable.longest_match_slow; + + /* Process the input block. */ + for (;;) { + /* Make sure that we always have enough lookahead, except +- * at the end of the input file. We need MAX_MATCH bytes +- * for the next match, plus MIN_MATCH bytes to insert the ++ * at the end of the input file. We need STD_MAX_MATCH bytes ++ * for the next match, plus WANT_MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { +- fill_window(s); ++ PREFIX(fill_window)(s); + if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH)) { + return need_more; + } +@@ -40,14 +46,14 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { + * dictionary, and set hash_head to the head of the hash chain: + */ + hash_head = 0; +- if (LIKELY(s->lookahead >= MIN_MATCH)) { +- hash_head = functable.quick_insert_string(s, s->strstart); ++ if (LIKELY(s->lookahead >= WANT_MIN_MATCH)) { ++ hash_head = s->quick_insert_string(s, s->strstart); + } + + /* Find the longest match, discarding those <= prev_length. + */ + s->prev_match = (Pos)s->match_start; +- match_len = MIN_MATCH-1; ++ match_len = STD_MIN_MATCH - 1; + dist = (int64_t)s->strstart - hash_head; + + if (dist <= MAX_DIST(s) && dist > 0 && s->prev_length < s->max_lazy_match && hash_head != 0) { +@@ -55,41 +61,41 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ +- match_len = functable.longest_match(s, hash_head); ++ match_len = (*longest_match)(s, hash_head); + /* longest_match() sets match_start */ + + if (match_len <= 5 && (s->strategy == Z_FILTERED)) { +- /* If prev_match is also MIN_MATCH, match_start is garbage ++ /* If prev_match is also WANT_MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ +- match_len = MIN_MATCH-1; ++ match_len = STD_MIN_MATCH - 1; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ +- if (s->prev_length >= MIN_MATCH && match_len <= s->prev_length) { +- unsigned int max_insert = s->strstart + s->lookahead - MIN_MATCH; ++ if (s->prev_length >= STD_MIN_MATCH && match_len <= s->prev_length) { ++ unsigned int max_insert = s->strstart + s->lookahead - STD_MIN_MATCH; + /* Do not insert strings in hash table beyond this. */ + + check_match(s, s->strstart-1, s->prev_match, s->prev_length); + +- bflush = zng_tr_tally_dist(s, s->strstart -1 - s->prev_match, s->prev_length - MIN_MATCH); ++ bflush = zng_tr_tally_dist(s, s->strstart -1 - s->prev_match, s->prev_length - STD_MIN_MATCH); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. If there is not + * enough lookahead, the last two strings are not inserted in + * the hash table. + */ +- s->lookahead -= s->prev_length-1; ++ s->prev_length -= 1; ++ s->lookahead -= s->prev_length; + +- unsigned int mov_fwd = s->prev_length - 2; ++ unsigned int mov_fwd = s->prev_length - 1; + if (max_insert > s->strstart) { + unsigned int insert_cnt = mov_fwd; + if (UNLIKELY(insert_cnt > max_insert - s->strstart)) + insert_cnt = max_insert - s->strstart; +- +- functable.insert_string(s, s->strstart + 1, insert_cnt); ++ s->insert_string(s, s->strstart + 1, insert_cnt); + } + s->prev_length = 0; + s->match_available = 0; +@@ -126,7 +132,7 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { + (void) zng_tr_tally_lit(s, s->window[s->strstart-1]); + s->match_available = 0; + } +- s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; ++ s->insert = s->strstart < (STD_MIN_MATCH - 1) ? s->strstart : (STD_MIN_MATCH - 1); + if (UNLIKELY(flush == Z_FINISH)) { + FLUSH_BLOCK(s, 1); + return finish_done; +diff --git a/deflate_stored.c b/deflate_stored.c +new file mode 100644 +index 0000000..6160896 +--- /dev/null ++++ b/deflate_stored.c +@@ -0,0 +1,186 @@ ++/* deflate_stored.c -- store data without compression using deflation algorithm ++ * ++ * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zbuild.h" ++#include "deflate.h" ++#include "deflate_p.h" ++#include "functable.h" ++ ++/* =========================================================================== ++ * Copy without compression as much as possible from the input stream, return ++ * the current block state. ++ * ++ * In case deflateParams() is used to later switch to a non-zero compression ++ * level, s->matches (otherwise unused when storing) keeps track of the number ++ * of hash table slides to perform. If s->matches is 1, then one hash table ++ * slide will be done when switching. If s->matches is 2, the maximum value ++ * allowed here, then the hash table will be cleared, since two or more slides ++ * is the same as a clear. ++ * ++ * deflate_stored() is written to minimize the number of times an input byte is ++ * copied. It is most efficient with large input and output buffers, which ++ * maximizes the opportunites to have a single copy from next_in to next_out. ++ */ ++Z_INTERNAL block_state deflate_stored(deflate_state *s, int flush) { ++ /* Smallest worthy block size when not flushing or finishing. By default ++ * this is 32K. This can be as small as 507 bytes for memLevel == 1. For ++ * large input and output buffers, the stored block size will be larger. ++ */ ++ unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size); ++ ++ /* Copy as many min_block or larger stored blocks directly to next_out as ++ * possible. If flushing, copy the remaining available input to next_out as ++ * stored blocks, if there is enough space. ++ */ ++ unsigned len, left, have, last = 0; ++ unsigned used = s->strm->avail_in; ++ do { ++ /* Set len to the maximum size block that we can copy directly with the ++ * available input data and output space. Set left to how much of that ++ * would be copied from what's left in the window. ++ */ ++ len = MAX_STORED; /* maximum deflate stored block length */ ++ have = (s->bi_valid + 42) >> 3; /* number of header bytes */ ++ if (s->strm->avail_out < have) /* need room for header */ ++ break; ++ /* maximum stored block length that will fit in avail_out: */ ++ have = s->strm->avail_out - have; ++ left = (int)s->strstart - s->block_start; /* bytes left in window */ ++ if (len > (unsigned long)left + s->strm->avail_in) ++ len = left + s->strm->avail_in; /* limit len to the input */ ++ len = MIN(len, have); /* limit len to the output */ ++ ++ /* If the stored block would be less than min_block in length, or if ++ * unable to copy all of the available input when flushing, then try ++ * copying to the window and the pending buffer instead. Also don't ++ * write an empty block when flushing -- deflate() does that. ++ */ ++ if (len < min_block && ((len == 0 && flush != Z_FINISH) || flush == Z_NO_FLUSH || len != left + s->strm->avail_in)) ++ break; ++ ++ /* Make a dummy stored block in pending to get the header bytes, ++ * including any pending bits. This also updates the debugging counts. ++ */ ++ last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0; ++ zng_tr_stored_block(s, (char *)0, 0L, last); ++ ++ /* Replace the lengths in the dummy stored block with len. */ ++ s->pending -= 4; ++ put_short(s, (uint16_t)len); ++ put_short(s, (uint16_t)~len); ++ ++ /* Write the stored block header bytes. */ ++ PREFIX(flush_pending)(s->strm); ++ ++ /* Update debugging counts for the data about to be copied. */ ++ cmpr_bits_add(s, len << 3); ++ sent_bits_add(s, len << 3); ++ ++ /* Copy uncompressed bytes from the window to next_out. */ ++ if (left) { ++ left = MIN(left, len); ++ memcpy(s->strm->next_out, s->window + s->block_start, left); ++ s->strm->next_out += left; ++ s->strm->avail_out -= left; ++ s->strm->total_out += left; ++ s->block_start += (int)left; ++ len -= left; ++ } ++ ++ /* Copy uncompressed bytes directly from next_in to next_out, updating ++ * the check value. ++ */ ++ if (len) { ++ PREFIX(read_buf)(s->strm, s->strm->next_out, len); ++ s->strm->next_out += len; ++ s->strm->avail_out -= len; ++ s->strm->total_out += len; ++ } ++ } while (last == 0); ++ ++ /* Update the sliding window with the last s->w_size bytes of the copied ++ * data, or append all of the copied data to the existing window if less ++ * than s->w_size bytes were copied. Also update the number of bytes to ++ * insert in the hash tables, in the event that deflateParams() switches to ++ * a non-zero compression level. ++ */ ++ used -= s->strm->avail_in; /* number of input bytes directly copied */ ++ if (used) { ++ /* If any input was used, then no unused input remains in the window, ++ * therefore s->block_start == s->strstart. ++ */ ++ if (used >= s->w_size) { /* supplant the previous history */ ++ s->matches = 2; /* clear hash */ ++ memcpy(s->window, s->strm->next_in - s->w_size, s->w_size); ++ s->strstart = s->w_size; ++ s->insert = s->strstart; ++ } else { ++ if (s->window_size - s->strstart <= used) { ++ /* Slide the window down. */ ++ s->strstart -= s->w_size; ++ memcpy(s->window, s->window + s->w_size, s->strstart); ++ if (s->matches < 2) ++ s->matches++; /* add a pending slide_hash() */ ++ s->insert = MIN(s->insert, s->strstart); ++ } ++ memcpy(s->window + s->strstart, s->strm->next_in - used, used); ++ s->strstart += used; ++ s->insert += MIN(used, s->w_size - s->insert); ++ } ++ s->block_start = (int)s->strstart; ++ } ++ s->high_water = MAX(s->high_water, s->strstart); ++ ++ /* If the last block was written to next_out, then done. */ ++ if (last) ++ return finish_done; ++ ++ /* If flushing and all input has been consumed, then done. */ ++ if (flush != Z_NO_FLUSH && flush != Z_FINISH && s->strm->avail_in == 0 && (int)s->strstart == s->block_start) ++ return block_done; ++ ++ /* Fill the window with any remaining input. */ ++ have = s->window_size - s->strstart; ++ if (s->strm->avail_in > have && s->block_start >= (int)s->w_size) { ++ /* Slide the window down. */ ++ s->block_start -= (int)s->w_size; ++ s->strstart -= s->w_size; ++ memcpy(s->window, s->window + s->w_size, s->strstart); ++ if (s->matches < 2) ++ s->matches++; /* add a pending slide_hash() */ ++ have += s->w_size; /* more space now */ ++ s->insert = MIN(s->insert, s->strstart); ++ } ++ ++ have = MIN(have, s->strm->avail_in); ++ if (have) { ++ PREFIX(read_buf)(s->strm, s->window + s->strstart, have); ++ s->strstart += have; ++ s->insert += MIN(have, s->w_size - s->insert); ++ } ++ s->high_water = MAX(s->high_water, s->strstart); ++ ++ /* There was not enough avail_out to write a complete worthy or flushed ++ * stored block to next_out. Write a stored block to pending instead, if we ++ * have enough input for a worthy block, or if flushing and there is enough ++ * room for the remaining input as a stored block in the pending buffer. ++ */ ++ have = (s->bi_valid + 42) >> 3; /* number of header bytes */ ++ /* maximum stored block length that will fit in pending: */ ++ have = MIN(s->pending_buf_size - have, MAX_STORED); ++ min_block = MIN(have, s->w_size); ++ left = (int)s->strstart - s->block_start; ++ if (left >= min_block || ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH && s->strm->avail_in == 0 && left <= have)) { ++ len = MIN(left, have); ++ last = flush == Z_FINISH && s->strm->avail_in == 0 && len == left ? 1 : 0; ++ zng_tr_stored_block(s, (char *)s->window + s->block_start, len, last); ++ s->block_start += (int)len; ++ PREFIX(flush_pending)(s->strm); ++ } ++ ++ /* We've done all we can with the available input and output. */ ++ return last ? finish_started : need_more; ++} +diff --git a/fallback_builtins.h b/fallback_builtins.h +index 314ad32..79072a1 100644 +--- a/fallback_builtins.h ++++ b/fallback_builtins.h +@@ -1,44 +1,50 @@ +-#ifndef X86_BUILTIN_CTZ_H +-#define X86_BUILTIN_CTZ_H ++#ifndef FALLBACK_BUILTINS_H ++#define FALLBACK_BUILTINS_H + + #if defined(_MSC_VER) && !defined(__clang__) +-#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM64) ++#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) + + #include + #ifdef X86_FEATURES +-# include "arch/x86/x86.h" ++# include "arch/x86/x86_features.h" + #endif + +-/* This is not a general purpose replacement for __builtin_ctz. The function expects that value is != 0 +- * Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked ++/* This is not a general purpose replacement for __builtin_ctz. The function expects that value is != 0. ++ * Because of that assumption trailing_zero is not initialized and the return value is not checked. ++ * Tzcnt and bsf give identical results except when input value is 0, therefore this can not be allowed. ++ * If tzcnt instruction is not supported, the cpu will itself execute bsf instead. ++ * Performance tzcnt/bsf is identical on Intel cpu, tzcnt is faster than bsf on AMD cpu. + */ +-static __forceinline unsigned long __builtin_ctz(uint32_t value) { +-#ifdef X86_FEATURES +- if (x86_cpu_has_tzcnt) +- return _tzcnt_u32(value); +-#endif ++static __forceinline int __builtin_ctz(unsigned int value) { ++ Assert(value != 0, "Invalid input value: 0"); ++# if defined(X86_FEATURES) && !(_MSC_VER < 1700) ++ return (int)_tzcnt_u32(value); ++# else + unsigned long trailing_zero; + _BitScanForward(&trailing_zero, value); +- return trailing_zero; ++ return (int)trailing_zero; ++# endif + } + #define HAVE_BUILTIN_CTZ + + #ifdef _M_AMD64 +-/* This is not a general purpose replacement for __builtin_ctzll. The function expects that value is != 0 +- * Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward64 is not checked ++/* This is not a general purpose replacement for __builtin_ctzll. The function expects that value is != 0. ++ * Because of that assumption trailing_zero is not initialized and the return value is not checked. + */ +-static __forceinline unsigned long long __builtin_ctzll(uint64_t value) { +-#ifdef X86_FEATURES +- if (x86_cpu_has_tzcnt) +- return _tzcnt_u64(value); +-#endif ++static __forceinline int __builtin_ctzll(unsigned long long value) { ++ Assert(value != 0, "Invalid input value: 0"); ++# if defined(X86_FEATURES) && !(_MSC_VER < 1700) ++ return (int)_tzcnt_u64(value); ++# else + unsigned long trailing_zero; + _BitScanForward64(&trailing_zero, value); +- return trailing_zero; ++ return (int)trailing_zero; ++# endif + } + #define HAVE_BUILTIN_CTZLL +-#endif ++#endif // Microsoft AMD64 + +-#endif +-#endif +-#endif ++#endif // Microsoft AMD64/IA64/x86/ARM/ARM64 test ++#endif // _MSC_VER & !clang ++ ++#endif // include guard FALLBACK_BUILTINS_H +diff --git a/functable.c b/functable.c +index 807f7ae..37c4aee 100644 +--- a/functable.c ++++ b/functable.c +@@ -5,462 +5,399 @@ + + #include "zbuild.h" + #include "zendian.h" ++#include "crc32_braid_p.h" + #include "deflate.h" + #include "deflate_p.h" +- + #include "functable.h" +- +-#ifdef X86_FEATURES +-# include "fallback_builtins.h" ++#include "cpu_features.h" ++ ++#if defined(_MSC_VER) ++# include ++#endif ++ ++/* Platform has pointer size atomic store */ ++#if defined(__GNUC__) || defined(__clang__) ++# define FUNCTABLE_ASSIGN(VAR, FUNC_NAME) \ ++ __atomic_store(&(functable.FUNC_NAME), &(VAR.FUNC_NAME), __ATOMIC_SEQ_CST) ++# define FUNCTABLE_BARRIER() __atomic_thread_fence(__ATOMIC_SEQ_CST) ++#elif defined(_MSC_VER) ++# define FUNCTABLE_ASSIGN(VAR, FUNC_NAME) \ ++ _InterlockedExchangePointer((void * volatile *)&(functable.FUNC_NAME), (void *)(VAR.FUNC_NAME)) ++# if defined(_M_ARM) || defined(_M_ARM64) ++# define FUNCTABLE_BARRIER() do { \ ++ _ReadWriteBarrier(); \ ++ __dmb(0xB); /* _ARM_BARRIER_ISH */ \ ++ _ReadWriteBarrier(); \ ++} while (0) ++# else ++# define FUNCTABLE_BARRIER() _ReadWriteBarrier() ++# endif ++#else ++# warning Unable to detect atomic intrinsic support. ++# define FUNCTABLE_ASSIGN(VAR, FUNC_NAME) \ ++ *((void * volatile *)&(functable.FUNC_NAME)) = (void *)(VAR.FUNC_NAME) ++# define FUNCTABLE_BARRIER() do { /* Empty */ } while (0) + #endif + +-/* insert_string */ +-extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count); +-#ifdef X86_SSE42_CRC_HASH +-extern void insert_string_sse4(deflate_state *const s, const uint32_t str, uint32_t count); +-#elif defined(ARM_ACLE_CRC_HASH) +-extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count); +-#endif ++static void force_init_empty(void) { ++ // empty ++} + +-/* quick_insert_string */ +-extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str); +-#ifdef X86_SSE42_CRC_HASH +-extern Pos quick_insert_string_sse4(deflate_state *const s, const uint32_t str); +-#elif defined(ARM_ACLE_CRC_HASH) +-extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str); ++static void init_functable(void) { ++ struct functable_s ft; ++ struct cpu_features cf; ++ ++ cpu_check_features(&cf); ++ ++ // Generic code ++ ft.force_init = &force_init_empty; ++ ft.adler32 = &adler32_c; ++ ft.adler32_fold_copy = &adler32_fold_copy_c; ++ ft.chunkmemset_safe = &chunkmemset_safe_c; ++ ft.chunksize = &chunksize_c; ++ ft.crc32 = &PREFIX(crc32_braid); ++ ft.crc32_fold = &crc32_fold_c; ++ ft.crc32_fold_copy = &crc32_fold_copy_c; ++ ft.crc32_fold_final = &crc32_fold_final_c; ++ ft.crc32_fold_reset = &crc32_fold_reset_c; ++ ft.inflate_fast = &inflate_fast_c; ++ ft.insert_string = &insert_string_c; ++ ft.quick_insert_string = &quick_insert_string_c; ++ ft.slide_hash = &slide_hash_c; ++ ft.update_hash = &update_hash_c; ++ ++#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN ++# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) ++ ft.longest_match = &longest_match_unaligned_64; ++ ft.longest_match_slow = &longest_match_slow_unaligned_64; ++ ft.compare256 = &compare256_unaligned_64; ++# elif defined(HAVE_BUILTIN_CTZ) ++ ft.longest_match = &longest_match_unaligned_32; ++ ft.longest_match_slow = &longest_match_slow_unaligned_32; ++ ft.compare256 = &compare256_unaligned_32; ++# else ++ ft.longest_match = &longest_match_unaligned_16; ++ ft.longest_match_slow = &longest_match_slow_unaligned_16; ++ ft.compare256 = &compare256_unaligned_16; ++# endif ++#else ++ ft.longest_match = &longest_match_c; ++ ft.longest_match_slow = &longest_match_slow_c; ++ ft.compare256 = &compare256_c; + #endif + +-/* slide_hash */ +-#ifdef X86_SSE2 +-void slide_hash_sse2(deflate_state *s); +-#elif defined(ARM_NEON_SLIDEHASH) +-void slide_hash_neon(deflate_state *s); +-#elif defined(POWER8_VSX_SLIDEHASH) +-void slide_hash_power8(deflate_state *s); +-#endif +-#ifdef X86_AVX2 +-void slide_hash_avx2(deflate_state *s); +-#endif + +-/* adler32 */ +-extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len); +-#ifdef ARM_NEON_ADLER32 +-extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len); ++ // Select arch-optimized functions ++ ++ // X86 - SSE2 ++#ifdef X86_SSE2 ++# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) ++ if (cf.x86.has_sse2) ++# endif ++ { ++ ft.chunkmemset_safe = &chunkmemset_safe_sse2; ++ ft.chunksize = &chunksize_sse2; ++ ft.inflate_fast = &inflate_fast_sse2; ++ ft.slide_hash = &slide_hash_sse2; ++# ifdef HAVE_BUILTIN_CTZ ++ ft.compare256 = &compare256_sse2; ++ ft.longest_match = &longest_match_sse2; ++ ft.longest_match_slow = &longest_match_slow_sse2; ++# endif ++ } + #endif +-#ifdef X86_SSSE3_ADLER32 +-extern uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size_t len); ++ // X86 - SSSE3 ++#ifdef X86_SSSE3 ++ if (cf.x86.has_ssse3) { ++ ft.adler32 = &adler32_ssse3; ++# ifdef X86_SSE2 ++ ft.chunkmemset_safe = &chunkmemset_safe_ssse3; ++ ft.inflate_fast = &inflate_fast_ssse3; ++# endif ++ } + #endif +-#ifdef X86_AVX2_ADLER32 +-extern uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, size_t len); ++ // X86 - SSE4.2 ++#ifdef X86_SSE42 ++ if (cf.x86.has_sse42) { ++ ft.adler32_fold_copy = &adler32_fold_copy_sse42; ++ ft.insert_string = &insert_string_sse42; ++ ft.quick_insert_string = &quick_insert_string_sse42; ++ ft.update_hash = &update_hash_sse42; ++ } + #endif +-#ifdef POWER8_VSX_ADLER32 +-extern uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, size_t len); ++ // X86 - PCLMUL ++#ifdef X86_PCLMULQDQ_CRC ++ if (cf.x86.has_pclmulqdq) { ++ ft.crc32 = &crc32_pclmulqdq; ++ ft.crc32_fold = &crc32_fold_pclmulqdq; ++ ft.crc32_fold_copy = &crc32_fold_pclmulqdq_copy; ++ ft.crc32_fold_final = &crc32_fold_pclmulqdq_final; ++ ft.crc32_fold_reset = &crc32_fold_pclmulqdq_reset; ++ } + #endif +- +-/* memory chunking */ +-extern uint32_t chunksize_c(void); +-extern uint8_t* chunkcopy_c(uint8_t *out, uint8_t const *from, unsigned len); +-extern uint8_t* chunkcopy_safe_c(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe); +-extern uint8_t* chunkunroll_c(uint8_t *out, unsigned *dist, unsigned *len); +-extern uint8_t* chunkmemset_c(uint8_t *out, unsigned dist, unsigned len); +-extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left); +-#ifdef X86_SSE2_CHUNKSET +-extern uint32_t chunksize_sse2(void); +-extern uint8_t* chunkcopy_sse2(uint8_t *out, uint8_t const *from, unsigned len); +-extern uint8_t* chunkcopy_safe_sse2(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe); +-extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len); +-extern uint8_t* chunkmemset_sse2(uint8_t *out, unsigned dist, unsigned len); +-extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left); ++ // X86 - AVX ++#ifdef X86_AVX2 ++ if (cf.x86.has_avx2) { ++ ft.adler32 = &adler32_avx2; ++ ft.adler32_fold_copy = &adler32_fold_copy_avx2; ++ ft.chunkmemset_safe = &chunkmemset_safe_avx2; ++ ft.chunksize = &chunksize_avx2; ++ ft.inflate_fast = &inflate_fast_avx2; ++ ft.slide_hash = &slide_hash_avx2; ++# ifdef HAVE_BUILTIN_CTZ ++ ft.compare256 = &compare256_avx2; ++ ft.longest_match = &longest_match_avx2; ++ ft.longest_match_slow = &longest_match_slow_avx2; ++# endif ++ } + #endif +-#ifdef X86_AVX_CHUNKSET +-extern uint32_t chunksize_avx(void); +-extern uint8_t* chunkcopy_avx(uint8_t *out, uint8_t const *from, unsigned len); +-extern uint8_t* chunkcopy_safe_avx(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe); +-extern uint8_t* chunkunroll_avx(uint8_t *out, unsigned *dist, unsigned *len); +-extern uint8_t* chunkmemset_avx(uint8_t *out, unsigned dist, unsigned len); +-extern uint8_t* chunkmemset_safe_avx(uint8_t *out, unsigned dist, unsigned len, unsigned left); ++#ifdef X86_AVX512 ++ if (cf.x86.has_avx512) { ++ ft.adler32 = &adler32_avx512; ++ ft.adler32_fold_copy = &adler32_fold_copy_avx512; ++ } + #endif +-#ifdef ARM_NEON_CHUNKSET +-extern uint32_t chunksize_neon(void); +-extern uint8_t* chunkcopy_neon(uint8_t *out, uint8_t const *from, unsigned len); +-extern uint8_t* chunkcopy_safe_neon(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe); +-extern uint8_t* chunkunroll_neon(uint8_t *out, unsigned *dist, unsigned *len); +-extern uint8_t* chunkmemset_neon(uint8_t *out, unsigned dist, unsigned len); +-extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left); ++#ifdef X86_AVX512VNNI ++ if (cf.x86.has_avx512vnni) { ++ ft.adler32 = &adler32_avx512_vnni; ++ ft.adler32_fold_copy = &adler32_fold_copy_avx512_vnni; ++ } + #endif +- +-/* CRC32 */ +-Z_INTERNAL uint32_t crc32_generic(uint32_t, const unsigned char *, uint64_t); +- +-#ifdef ARM_ACLE_CRC_HASH +-extern uint32_t crc32_acle(uint32_t, const unsigned char *, uint64_t); ++ // X86 - VPCLMULQDQ ++#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC) ++ if (cf.x86.has_pclmulqdq && cf.x86.has_avx512 && cf.x86.has_vpclmulqdq) { ++ ft.crc32 = &crc32_vpclmulqdq; ++ ft.crc32_fold = &crc32_fold_vpclmulqdq; ++ ft.crc32_fold_copy = &crc32_fold_vpclmulqdq_copy; ++ ft.crc32_fold_final = &crc32_fold_vpclmulqdq_final; ++ ft.crc32_fold_reset = &crc32_fold_vpclmulqdq_reset; ++ } + #endif + +-#if BYTE_ORDER == LITTLE_ENDIAN +-extern uint32_t crc32_little(uint32_t, const unsigned char *, uint64_t); +-#elif BYTE_ORDER == BIG_ENDIAN +-extern uint32_t crc32_big(uint32_t, const unsigned char *, uint64_t); +-#endif + +-/* compare258 */ +-extern uint32_t compare258_c(const unsigned char *src0, const unsigned char *src1); +-#ifdef UNALIGNED_OK +-extern uint32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1); +-extern uint32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1); +-#ifdef UNALIGNED64_OK +-extern uint32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1); +-#endif +-#ifdef X86_SSE42_CMP_STR +-extern uint32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1); ++ // ARM - SIMD ++#ifdef ARM_SIMD ++# ifndef ARM_NOCHECK_SIMD ++ if (cf.arm.has_simd) ++# endif ++ { ++ ft.slide_hash = &slide_hash_armv6; ++ } + #endif +-#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) +-extern uint32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char *src1); ++ // ARM - NEON ++#ifdef ARM_NEON ++# ifndef ARM_NOCHECK_NEON ++ if (cf.arm.has_neon) ++# endif ++ { ++ ft.adler32 = &adler32_neon; ++ ft.chunkmemset_safe = &chunkmemset_safe_neon; ++ ft.chunksize = &chunksize_neon; ++ ft.inflate_fast = &inflate_fast_neon; ++ ft.slide_hash = &slide_hash_neon; ++# ifdef HAVE_BUILTIN_CTZLL ++ ft.compare256 = &compare256_neon; ++ ft.longest_match = &longest_match_neon; ++ ft.longest_match_slow = &longest_match_slow_neon; ++# endif ++ } + #endif ++ // ARM - ACLE ++#ifdef ARM_ACLE ++ if (cf.arm.has_crc32) { ++ ft.crc32 = &crc32_acle; ++ ft.insert_string = &insert_string_acle; ++ ft.quick_insert_string = &quick_insert_string_acle; ++ ft.update_hash = &update_hash_acle; ++ } + #endif + +-/* longest_match */ +-extern uint32_t longest_match_c(deflate_state *const s, Pos cur_match); +-#ifdef UNALIGNED_OK +-extern uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match); +-extern uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match); +-#ifdef UNALIGNED64_OK +-extern uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match); ++ ++ // Power - VMX ++#ifdef PPC_VMX ++ if (cf.power.has_altivec) { ++ ft.adler32 = &adler32_vmx; ++ ft.slide_hash = &slide_hash_vmx; ++ } + #endif +-#ifdef X86_SSE42_CMP_STR +-extern uint32_t longest_match_unaligned_sse4(deflate_state *const s, Pos cur_match); ++ // Power8 - VSX ++#ifdef POWER8_VSX ++ if (cf.power.has_arch_2_07) { ++ ft.adler32 = &adler32_power8; ++ ft.chunkmemset_safe = &chunkmemset_safe_power8; ++ ft.chunksize = &chunksize_power8; ++ ft.inflate_fast = &inflate_fast_power8; ++ ft.slide_hash = &slide_hash_power8; ++ } + #endif +-#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) +-extern uint32_t longest_match_unaligned_avx2(deflate_state *const s, Pos cur_match); ++#ifdef POWER8_VSX_CRC32 ++ if (cf.power.has_arch_2_07) ++ ft.crc32 = &crc32_power8; + #endif ++ // Power9 ++#ifdef POWER9 ++ if (cf.power.has_arch_3_00) { ++ ft.compare256 = &compare256_power9; ++ ft.longest_match = &longest_match_power9; ++ ft.longest_match_slow = &longest_match_slow_power9; ++ } + #endif + +-Z_INTERNAL Z_TLS struct functable_s functable; +- +-Z_INTERNAL void cpu_check_features(void) +-{ +- static int features_checked = 0; +- if (features_checked) +- return; +-#if defined(X86_FEATURES) +- x86_check_features(); +-#elif defined(ARM_FEATURES) +- arm_check_features(); +-#elif defined(POWER_FEATURES) +- power_check_features(); +-#endif +- features_checked = 1; +-} + +-/* stub functions */ +-Z_INTERNAL void insert_string_stub(deflate_state *const s, const uint32_t str, uint32_t count) { +- // Initialize default +- +- functable.insert_string = &insert_string_c; +- cpu_check_features(); +- +-#ifdef X86_SSE42_CRC_HASH +- if (x86_cpu_has_sse42) +- functable.insert_string = &insert_string_sse4; +-#elif defined(ARM_ACLE_CRC_HASH) +- if (arm_cpu_has_crc32) +- functable.insert_string = &insert_string_acle; ++ // RISCV - RVV ++#ifdef RISCV_RVV ++ if (cf.riscv.has_rvv) { ++ ft.adler32 = &adler32_rvv; ++ ft.adler32_fold_copy = &adler32_fold_copy_rvv; ++ ft.chunkmemset_safe = &chunkmemset_safe_rvv; ++ ft.chunksize = &chunksize_rvv; ++ ft.compare256 = &compare256_rvv; ++ ft.inflate_fast = &inflate_fast_rvv; ++ ft.longest_match = &longest_match_rvv; ++ ft.longest_match_slow = &longest_match_slow_rvv; ++ ft.slide_hash = &slide_hash_rvv; ++ } + #endif + +- functable.insert_string(s, str, count); +-} +- +-Z_INTERNAL Pos quick_insert_string_stub(deflate_state *const s, const uint32_t str) { +- functable.quick_insert_string = &quick_insert_string_c; +- +-#ifdef X86_SSE42_CRC_HASH +- if (x86_cpu_has_sse42) +- functable.quick_insert_string = &quick_insert_string_sse4; +-#elif defined(ARM_ACLE_CRC_HASH) +- if (arm_cpu_has_crc32) +- functable.quick_insert_string = &quick_insert_string_acle; +-#endif + +- return functable.quick_insert_string(s, str); ++ // S390 ++#ifdef S390_CRC32_VX ++ if (cf.s390.has_vx) ++ ft.crc32 = crc32_s390_vx; ++#endif ++ ++ // Assign function pointers individually for atomic operation ++ FUNCTABLE_ASSIGN(ft, force_init); ++ FUNCTABLE_ASSIGN(ft, adler32); ++ FUNCTABLE_ASSIGN(ft, adler32_fold_copy); ++ FUNCTABLE_ASSIGN(ft, chunkmemset_safe); ++ FUNCTABLE_ASSIGN(ft, chunksize); ++ FUNCTABLE_ASSIGN(ft, compare256); ++ FUNCTABLE_ASSIGN(ft, crc32); ++ FUNCTABLE_ASSIGN(ft, crc32_fold); ++ FUNCTABLE_ASSIGN(ft, crc32_fold_copy); ++ FUNCTABLE_ASSIGN(ft, crc32_fold_final); ++ FUNCTABLE_ASSIGN(ft, crc32_fold_reset); ++ FUNCTABLE_ASSIGN(ft, inflate_fast); ++ FUNCTABLE_ASSIGN(ft, insert_string); ++ FUNCTABLE_ASSIGN(ft, longest_match); ++ FUNCTABLE_ASSIGN(ft, longest_match_slow); ++ FUNCTABLE_ASSIGN(ft, quick_insert_string); ++ FUNCTABLE_ASSIGN(ft, slide_hash); ++ FUNCTABLE_ASSIGN(ft, update_hash); ++ ++ // Memory barrier for weak memory order CPUs ++ FUNCTABLE_BARRIER(); + } + +-Z_INTERNAL void slide_hash_stub(deflate_state *s) { +- +- functable.slide_hash = &slide_hash_c; +- cpu_check_features(); +- +-#ifdef X86_SSE2 +-# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) +- if (x86_cpu_has_sse2) +-# endif +- functable.slide_hash = &slide_hash_sse2; +-#elif defined(ARM_NEON_SLIDEHASH) +-# ifndef ARM_NOCHECK_NEON +- if (arm_cpu_has_neon) +-# endif +- functable.slide_hash = &slide_hash_neon; +-#endif +-#ifdef X86_AVX2 +- if (x86_cpu_has_avx2) +- functable.slide_hash = &slide_hash_avx2; +-#endif +-#ifdef POWER8_VSX_SLIDEHASH +- if (power_cpu_has_arch_2_07) +- functable.slide_hash = &slide_hash_power8; +-#endif +- +- functable.slide_hash(s); ++/* stub functions */ ++static void force_init_stub(void) { ++ init_functable(); + } + +-Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len) { +- // Initialize default +- functable.adler32 = &adler32_c; +- cpu_check_features(); +- +-#ifdef ARM_NEON_ADLER32 +-# ifndef ARM_NOCHECK_NEON +- if (arm_cpu_has_neon) +-# endif +- functable.adler32 = &adler32_neon; +-#endif +-#ifdef X86_SSSE3_ADLER32 +- if (x86_cpu_has_ssse3) +- functable.adler32 = &adler32_ssse3; +-#endif +-#ifdef X86_AVX2_ADLER32 +- if (x86_cpu_has_avx2) +- functable.adler32 = &adler32_avx2; +-#endif +-#ifdef POWER8_VSX_ADLER32 +- if (power_cpu_has_arch_2_07) +- functable.adler32 = &adler32_power8; +-#endif +- ++static uint32_t adler32_stub(uint32_t adler, const uint8_t* buf, size_t len) { ++ init_functable(); + return functable.adler32(adler, buf, len); + } + +-Z_INTERNAL uint32_t chunksize_stub(void) { +- // Initialize default +- functable.chunksize = &chunksize_c; +- cpu_check_features(); ++static uint32_t adler32_fold_copy_stub(uint32_t adler, uint8_t* dst, const uint8_t* src, size_t len) { ++ init_functable(); ++ return functable.adler32_fold_copy(adler, dst, src, len); ++} + +-#ifdef X86_SSE2_CHUNKSET +-# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) +- if (x86_cpu_has_sse2) +-# endif +- functable.chunksize = &chunksize_sse2; +-#endif +-#ifdef X86_AVX_CHUNKSET +- if (x86_cpu_has_avx2) +- functable.chunksize = &chunksize_avx; +-#endif +-#ifdef ARM_NEON_CHUNKSET +- if (arm_cpu_has_neon) +- functable.chunksize = &chunksize_neon; +-#endif ++static uint8_t* chunkmemset_safe_stub(uint8_t* out, unsigned dist, unsigned len, unsigned left) { ++ init_functable(); ++ return functable.chunkmemset_safe(out, dist, len, left); ++} + ++static uint32_t chunksize_stub(void) { ++ init_functable(); + return functable.chunksize(); + } + +-Z_INTERNAL uint8_t* chunkcopy_stub(uint8_t *out, uint8_t const *from, unsigned len) { +- // Initialize default +- functable.chunkcopy = &chunkcopy_c; +- +-#ifdef X86_SSE2_CHUNKSET +-# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) +- if (x86_cpu_has_sse2) +-# endif +- functable.chunkcopy = &chunkcopy_sse2; +-#endif +-#ifdef X86_AVX_CHUNKSET +- if (x86_cpu_has_avx2) +- functable.chunkcopy = &chunkcopy_avx; +-#endif +-#ifdef ARM_NEON_CHUNKSET +- if (arm_cpu_has_neon) +- functable.chunkcopy = &chunkcopy_neon; +-#endif +- +- return functable.chunkcopy(out, from, len); ++static uint32_t compare256_stub(const uint8_t* src0, const uint8_t* src1) { ++ init_functable(); ++ return functable.compare256(src0, src1); + } + +-Z_INTERNAL uint8_t* chunkcopy_safe_stub(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) { +- // Initialize default +- functable.chunkcopy_safe = &chunkcopy_safe_c; +- +-#ifdef X86_SSE2_CHUNKSET +-# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) +- if (x86_cpu_has_sse2) +-# endif +- functable.chunkcopy_safe = &chunkcopy_safe_sse2; +-#endif +-#ifdef X86_AVX_CHUNKSET +- if (x86_cpu_has_avx2) +- functable.chunkcopy_safe = &chunkcopy_safe_avx; +-#endif +-#ifdef ARM_NEON_CHUNKSET +- if (arm_cpu_has_neon) +- functable.chunkcopy_safe = &chunkcopy_safe_neon; +-#endif +- +- return functable.chunkcopy_safe(out, from, len, safe); ++static uint32_t crc32_stub(uint32_t crc, const uint8_t* buf, size_t len) { ++ init_functable(); ++ return functable.crc32(crc, buf, len); + } + +-Z_INTERNAL uint8_t* chunkunroll_stub(uint8_t *out, unsigned *dist, unsigned *len) { +- // Initialize default +- functable.chunkunroll = &chunkunroll_c; +- +-#ifdef X86_SSE2_CHUNKSET +-# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) +- if (x86_cpu_has_sse2) +-# endif +- functable.chunkunroll = &chunkunroll_sse2; +-#endif +-#ifdef X86_AVX_CHUNKSET +- if (x86_cpu_has_avx2) +- functable.chunkunroll = &chunkunroll_avx; +-#endif +-#ifdef ARM_NEON_CHUNKSET +- if (arm_cpu_has_neon) +- functable.chunkunroll = &chunkunroll_neon; +-#endif +- +- return functable.chunkunroll(out, dist, len); ++static void crc32_fold_stub(crc32_fold* crc, const uint8_t* src, size_t len, uint32_t init_crc) { ++ init_functable(); ++ functable.crc32_fold(crc, src, len, init_crc); + } + +-Z_INTERNAL uint8_t* chunkmemset_stub(uint8_t *out, unsigned dist, unsigned len) { +- // Initialize default +- functable.chunkmemset = &chunkmemset_c; +- +-#ifdef X86_SSE2_CHUNKSET +-# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) +- if (x86_cpu_has_sse2) +-# endif +- functable.chunkmemset = &chunkmemset_sse2; +-#endif +-#ifdef X86_AVX_CHUNKSET +- if (x86_cpu_has_avx2) +- functable.chunkmemset = &chunkmemset_avx; +-#endif +-#ifdef ARM_NEON_CHUNKSET +- if (arm_cpu_has_neon) +- functable.chunkmemset = &chunkmemset_neon; +-#endif +- +- return functable.chunkmemset(out, dist, len); ++static void crc32_fold_copy_stub(crc32_fold* crc, uint8_t* dst, const uint8_t* src, size_t len) { ++ init_functable(); ++ functable.crc32_fold_copy(crc, dst, src, len); + } + +-Z_INTERNAL uint8_t* chunkmemset_safe_stub(uint8_t *out, unsigned dist, unsigned len, unsigned left) { +- // Initialize default +- functable.chunkmemset_safe = &chunkmemset_safe_c; +- +-#ifdef X86_SSE2_CHUNKSET +-# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) +- if (x86_cpu_has_sse2) +-# endif +- functable.chunkmemset_safe = &chunkmemset_safe_sse2; +-#endif +-#ifdef X86_AVX_CHUNKSET +- if (x86_cpu_has_avx2) +- functable.chunkmemset_safe = &chunkmemset_safe_avx; +-#endif +-#ifdef ARM_NEON_CHUNKSET +- if (arm_cpu_has_neon) +- functable.chunkmemset_safe = &chunkmemset_safe_neon; +-#endif +- +- return functable.chunkmemset_safe(out, dist, len, left); ++static uint32_t crc32_fold_final_stub(crc32_fold* crc) { ++ init_functable(); ++ return functable.crc32_fold_final(crc); + } + +-Z_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len) { +- int32_t use_byfour = sizeof(void *) == sizeof(ptrdiff_t); +- +- Assert(sizeof(uint64_t) >= sizeof(size_t), +- "crc32_z takes size_t but internally we have a uint64_t len"); +- /* return a function pointer for optimized arches here after a capability test */ +- +- functable.crc32 = &crc32_generic; +- cpu_check_features(); +- +- if (use_byfour) { +-#if BYTE_ORDER == LITTLE_ENDIAN +- functable.crc32 = crc32_little; +-# if defined(ARM_ACLE_CRC_HASH) +- if (arm_cpu_has_crc32) +- functable.crc32 = crc32_acle; +-# endif +-#elif BYTE_ORDER == BIG_ENDIAN +- functable.crc32 = crc32_big; +-#else +-# error No endian defined +-#endif +- } +- +- return functable.crc32(crc, buf, len); ++static uint32_t crc32_fold_reset_stub(crc32_fold* crc) { ++ init_functable(); ++ return functable.crc32_fold_reset(crc); + } + +-Z_INTERNAL uint32_t compare258_stub(const unsigned char *src0, const unsigned char *src1) { +- +- functable.compare258 = &compare258_c; ++static void inflate_fast_stub(PREFIX3(stream) *strm, uint32_t start) { ++ init_functable(); ++ functable.inflate_fast(strm, start); ++} + +-#ifdef UNALIGNED_OK +-# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) +- functable.compare258 = &compare258_unaligned_64; +-# elif defined(HAVE_BUILTIN_CTZ) +- functable.compare258 = &compare258_unaligned_32; +-# else +- functable.compare258 = &compare258_unaligned_16; +-# endif +-# ifdef X86_SSE42_CMP_STR +- if (x86_cpu_has_sse42) +- functable.compare258 = &compare258_unaligned_sse4; +-# endif +-# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) +- if (x86_cpu_has_avx2) +- functable.compare258 = &compare258_unaligned_avx2; +-# endif +-#endif ++static void insert_string_stub(deflate_state* const s, uint32_t str, uint32_t count) { ++ init_functable(); ++ functable.insert_string(s, str, count); ++} + +- return functable.compare258(src0, src1); ++static uint32_t longest_match_stub(deflate_state* const s, Pos cur_match) { ++ init_functable(); ++ return functable.longest_match(s, cur_match); + } + +-Z_INTERNAL uint32_t longest_match_stub(deflate_state *const s, Pos cur_match) { ++static uint32_t longest_match_slow_stub(deflate_state* const s, Pos cur_match) { ++ init_functable(); ++ return functable.longest_match_slow(s, cur_match); ++} + +- functable.longest_match = &longest_match_c; ++static Pos quick_insert_string_stub(deflate_state* const s, const uint32_t str) { ++ init_functable(); ++ return functable.quick_insert_string(s, str); ++} + +-#ifdef UNALIGNED_OK +-# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) +- functable.longest_match = &longest_match_unaligned_64; +-# elif defined(HAVE_BUILTIN_CTZ) +- functable.longest_match = &longest_match_unaligned_32; +-# else +- functable.longest_match = &longest_match_unaligned_16; +-# endif +-# ifdef X86_SSE42_CMP_STR +- if (x86_cpu_has_sse42) +- functable.longest_match = &longest_match_unaligned_sse4; +-# endif +-# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) +- if (x86_cpu_has_avx2) +- functable.longest_match = &longest_match_unaligned_avx2; +-# endif +-#endif ++static void slide_hash_stub(deflate_state* s) { ++ init_functable(); ++ functable.slide_hash(s); ++} + +- return functable.longest_match(s, cur_match); ++static uint32_t update_hash_stub(deflate_state* const s, uint32_t h, uint32_t val) { ++ init_functable(); ++ return functable.update_hash(s, h, val); + } + + /* functable init */ +-Z_INTERNAL Z_TLS struct functable_s functable = { +- insert_string_stub, +- quick_insert_string_stub, ++Z_INTERNAL struct functable_s functable = { ++ force_init_stub, + adler32_stub, ++ adler32_fold_copy_stub, ++ chunkmemset_safe_stub, ++ chunksize_stub, ++ compare256_stub, + crc32_stub, +- slide_hash_stub, +- compare258_stub, ++ crc32_fold_stub, ++ crc32_fold_copy_stub, ++ crc32_fold_final_stub, ++ crc32_fold_reset_stub, ++ inflate_fast_stub, ++ insert_string_stub, + longest_match_stub, +- chunksize_stub, +- chunkcopy_stub, +- chunkcopy_safe_stub, +- chunkunroll_stub, +- chunkmemset_stub, +- chunkmemset_safe_stub ++ longest_match_slow_stub, ++ quick_insert_string_stub, ++ slide_hash_stub, ++ update_hash_stub + }; +diff --git a/functable.h b/functable.h +index 276c284..9f78188 100644 +--- a/functable.h ++++ b/functable.h +@@ -7,23 +7,36 @@ + #define FUNCTABLE_H_ + + #include "deflate.h" ++#include "crc32_fold.h" ++#include "adler32_fold.h" ++ ++#ifdef ZLIB_COMPAT ++typedef struct z_stream_s z_stream; ++#else ++typedef struct zng_stream_s zng_stream; ++#endif + + struct functable_s { +- void (* insert_string) (deflate_state *const s, const uint32_t str, uint32_t count); +- Pos (* quick_insert_string)(deflate_state *const s, const uint32_t str); +- uint32_t (* adler32) (uint32_t adler, const unsigned char *buf, size_t len); +- uint32_t (* crc32) (uint32_t crc, const unsigned char *buf, uint64_t len); +- void (* slide_hash) (deflate_state *s); +- uint32_t (* compare258) (const unsigned char *src0, const unsigned char *src1); +- uint32_t (* longest_match) (deflate_state *const s, Pos cur_match); +- uint32_t (* chunksize) (void); +- uint8_t* (* chunkcopy) (uint8_t *out, uint8_t const *from, unsigned len); +- uint8_t* (* chunkcopy_safe) (uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe); +- uint8_t* (* chunkunroll) (uint8_t *out, unsigned *dist, unsigned *len); +- uint8_t* (* chunkmemset) (uint8_t *out, unsigned dist, unsigned len); ++ void (* force_init) (void); ++ uint32_t (* adler32) (uint32_t adler, const uint8_t *buf, size_t len); ++ uint32_t (* adler32_fold_copy) (uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); + uint8_t* (* chunkmemset_safe) (uint8_t *out, unsigned dist, unsigned len, unsigned left); ++ uint32_t (* chunksize) (void); ++ uint32_t (* compare256) (const uint8_t *src0, const uint8_t *src1); ++ uint32_t (* crc32) (uint32_t crc, const uint8_t *buf, size_t len); ++ void (* crc32_fold) (struct crc32_fold_s *crc, const uint8_t *src, size_t len, uint32_t init_crc); ++ void (* crc32_fold_copy) (struct crc32_fold_s *crc, uint8_t *dst, const uint8_t *src, size_t len); ++ uint32_t (* crc32_fold_final) (struct crc32_fold_s *crc); ++ uint32_t (* crc32_fold_reset) (struct crc32_fold_s *crc); ++ void (* inflate_fast) (PREFIX3(stream) *strm, uint32_t start); ++ void (* insert_string) (deflate_state *const s, uint32_t str, uint32_t count); ++ uint32_t (* longest_match) (deflate_state *const s, Pos cur_match); ++ uint32_t (* longest_match_slow) (deflate_state *const s, Pos cur_match); ++ Pos (* quick_insert_string)(deflate_state *const s, uint32_t str); ++ void (* slide_hash) (deflate_state *s); ++ uint32_t (* update_hash) (deflate_state *const s, uint32_t h, uint32_t val); + }; + +-Z_INTERNAL extern Z_TLS struct functable_s functable; ++Z_INTERNAL extern struct functable_s functable; + + #endif +diff --git a/gzguts.h b/gzguts.h +index 1602960..a663844 100644 +--- a/gzguts.h ++++ b/gzguts.h +@@ -1,7 +1,7 @@ + #ifndef GZGUTS_H_ + #define GZGUTS_H_ + /* gzguts.h -- zlib internal header definitions for gz* operations +- * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler ++ * Copyright (C) 2004-2019 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +@@ -9,9 +9,8 @@ + # ifndef _LARGEFILE_SOURCE + # define _LARGEFILE_SOURCE 1 + # endif +-# ifdef _FILE_OFFSET_BITS +-# undef _FILE_OFFSET_BITS +-# endif ++# undef _FILE_OFFSET_BITS ++# undef _TIME_BITS + #endif + + #if defined(HAVE_VISIBILITY_INTERNAL) +@@ -38,10 +37,6 @@ + # include + #endif + +-#if !defined(_MSC_VER) || defined(__MINGW__) +-# include /* for lseek(), read(), close(), write(), unlink() */ +-#endif +- + #if defined(_WIN32) + # include + # define WIDECHAR +@@ -88,7 +83,7 @@ + /* default i/o buffer size -- double this for output when reading (this and + twice this must be able to fit in an unsigned type) */ + #ifndef GZBUFSIZE +-# define GZBUFSIZE 8192 ++# define GZBUFSIZE 131072 + #endif + + /* gzip modes, also provide a little integrity check on the passed structure */ +@@ -144,11 +139,6 @@ void Z_INTERNAL gz_error(gz_state *, int, const char *); + /* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t + value -- needed when comparing unsigned to z_off64_t, which is signed + (possible z_off64_t types off_t, off64_t, and long are all signed) */ +-#ifdef INT_MAX +-# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX) +-#else +-unsigned Z_INTERNAL gz_intmax(void); +-# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax()) +-#endif ++#define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX) + + #endif /* GZGUTS_H_ */ +diff --git a/gzlib.c b/gzlib.c +index 4905516..e613837 100644 +--- a/gzlib.c ++++ b/gzlib.c +@@ -1,5 +1,5 @@ + /* gzlib.c -- zlib functions common to reading and writing gzip files +- * Copyright (C) 2004-2017 Mark Adler ++ * Copyright (C) 2004-2019 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +@@ -274,8 +274,8 @@ int Z_EXPORT PREFIX(gzbuffer)(gzFile file, unsigned size) { + /* check and set requested size */ + if ((size << 1) < size) + return -1; /* need to be able to double it */ +- if (size < 2) +- size = 2; /* need two bytes to check magic header */ ++ if (size < 8) ++ size = 8; /* needed to behave well with flushing */ + state->want = size; + return 0; + } +@@ -523,21 +523,3 @@ void Z_INTERNAL gz_error(gz_state *state, int err, const char *msg) { + } + (void)snprintf(state->msg, strlen(state->path) + strlen(msg) + 3, "%s%s%s", state->path, ": ", msg); + } +- +-#ifndef INT_MAX +-/* portably return maximum value for an int (when limits.h presumed not +- available) -- we need to do this to cover cases where 2's complement not +- used, since C standard permits 1's complement and sign-bit representations, +- otherwise we could just use ((unsigned)-1) >> 1 */ +-unsigned Z_INTERNAL gz_intmax() { +- unsigned p, q; +- +- p = 1; +- do { +- q = p; +- p <<= 1; +- p++; +- } while (p > q); +- return q >> 1; +-} +-#endif +diff --git a/gzread.c.in b/gzread.c.in +new file mode 100644 +index 0000000..1fc7b37 +--- /dev/null ++++ b/gzread.c.in +@@ -0,0 +1,606 @@ ++/* gzread.c -- zlib functions for reading gzip files ++ * Copyright (C) 2004-2017 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zbuild.h" ++#include "zutil_p.h" ++#include "gzguts.h" ++ ++/* Local functions */ ++static int gz_load(gz_state *, unsigned char *, unsigned, unsigned *); ++static int gz_avail(gz_state *); ++static int gz_look(gz_state *); ++static int gz_decomp(gz_state *); ++static int gz_fetch(gz_state *); ++static int gz_skip(gz_state *, z_off64_t); ++static size_t gz_read(gz_state *, void *, size_t); ++ ++/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from ++ state->fd, and update state->eof, state->err, and state->msg as appropriate. ++ This function needs to loop on read(), since read() is not guaranteed to ++ read the number of bytes requested, depending on the type of descriptor. */ ++static int gz_load(gz_state *state, unsigned char *buf, unsigned len, unsigned *have) { ++ ssize_t ret; ++ ++ *have = 0; ++ do { ++ ret = read(state->fd, buf + *have, len - *have); ++ if (ret <= 0) ++ break; ++ *have += (unsigned)ret; ++ } while (*have < len); ++ if (ret < 0) { ++ gz_error(state, Z_ERRNO, zstrerror()); ++ return -1; ++ } ++ if (ret == 0) ++ state->eof = 1; ++ return 0; ++} ++ ++/* Load up input buffer and set eof flag if last data loaded -- return -1 on ++ error, 0 otherwise. Note that the eof flag is set when the end of the input ++ file is reached, even though there may be unused data in the buffer. Once ++ that data has been used, no more attempts will be made to read the file. ++ If strm->avail_in != 0, then the current data is moved to the beginning of ++ the input buffer, and then the remainder of the buffer is loaded with the ++ available data from the input file. */ ++static int gz_avail(gz_state *state) { ++ unsigned got; ++ PREFIX3(stream) *strm = &(state->strm); ++ ++ if (state->err != Z_OK && state->err != Z_BUF_ERROR) ++ return -1; ++ if (state->eof == 0) { ++ if (strm->avail_in) { /* copy what's there to the start */ ++ unsigned char *p = state->in; ++ unsigned const char *q = strm->next_in; ++ unsigned n = strm->avail_in; ++ do { ++ *p++ = *q++; ++ } while (--n); ++ } ++ if (gz_load(state, state->in + strm->avail_in, state->size - strm->avail_in, &got) == -1) ++ return -1; ++ strm->avail_in += got; ++ strm->next_in = state->in; ++ } ++ return 0; ++} ++ ++/* Look for gzip header, set up for inflate or copy. state->x.have must be 0. ++ If this is the first time in, allocate required memory. state->how will be ++ left unchanged if there is no more input data available, will be set to COPY ++ if there is no gzip header and direct copying will be performed, or it will ++ be set to GZIP for decompression. If direct copying, then leftover input ++ data from the input buffer will be copied to the output buffer. In that ++ case, all further file reads will be directly to either the output buffer or ++ a user buffer. If decompressing, the inflate state will be initialized. ++ gz_look() will return 0 on success or -1 on failure. */ ++static int gz_look(gz_state *state) { ++ PREFIX3(stream) *strm = &(state->strm); ++ ++ /* allocate read buffers and inflate memory */ ++ if (state->size == 0) { ++ /* allocate buffers */ ++ state->in = (unsigned char *)zng_alloc(state->want); ++ state->out = (unsigned char *)zng_alloc(state->want << 1); ++ if (state->in == NULL || state->out == NULL) { ++ zng_free(state->out); ++ zng_free(state->in); ++ gz_error(state, Z_MEM_ERROR, "out of memory"); ++ return -1; ++ } ++ state->size = state->want; ++ ++ /* allocate inflate memory */ ++ state->strm.zalloc = NULL; ++ state->strm.zfree = NULL; ++ state->strm.opaque = NULL; ++ state->strm.avail_in = 0; ++ state->strm.next_in = NULL; ++ if (PREFIX(inflateInit2)(&(state->strm), MAX_WBITS + 16) != Z_OK) { /* gunzip */ ++ zng_free(state->out); ++ zng_free(state->in); ++ state->size = 0; ++ gz_error(state, Z_MEM_ERROR, "out of memory"); ++ return -1; ++ } ++ } ++ ++ /* get at least the magic bytes in the input buffer */ ++ if (strm->avail_in < 2) { ++ if (gz_avail(state) == -1) ++ return -1; ++ if (strm->avail_in == 0) ++ return 0; ++ } ++ ++ /* look for gzip magic bytes -- if there, do gzip decoding (note: there is ++ a logical dilemma here when considering the case of a partially written ++ gzip file, to wit, if a single 31 byte is written, then we cannot tell ++ whether this is a single-byte file, or just a partially written gzip ++ file -- for here we assume that if a gzip file is being written, then ++ the header will be written in a single operation, so that reading a ++ single byte is sufficient indication that it is not a gzip file) */ ++ if (strm->avail_in > 1 && ++ strm->next_in[0] == 31 && strm->next_in[1] == 139) { ++ PREFIX(inflateReset)(strm); ++ state->how = GZIP; ++ state->direct = 0; ++ return 0; ++ } ++ ++ /* no gzip header -- if we were decoding gzip before, then this is trailing ++ garbage. Ignore the trailing garbage and finish. */ ++ if (state->direct == 0) { ++ strm->avail_in = 0; ++ state->eof = 1; ++ state->x.have = 0; ++ return 0; ++ } ++ ++ /* doing raw i/o, copy any leftover input to output -- this assumes that ++ the output buffer is larger than the input buffer, which also assures ++ space for gzungetc() */ ++ state->x.next = state->out; ++ memcpy(state->x.next, strm->next_in, strm->avail_in); ++ state->x.have = strm->avail_in; ++ strm->avail_in = 0; ++ state->how = COPY; ++ state->direct = 1; ++ return 0; ++} ++ ++/* Decompress from input to the provided next_out and avail_out in the state. ++ On return, state->x.have and state->x.next point to the just decompressed ++ data. If the gzip stream completes, state->how is reset to LOOK to look for ++ the next gzip stream or raw data, once state->x.have is depleted. Returns 0 ++ on success, -1 on failure. */ ++static int gz_decomp(gz_state *state) { ++ int ret = Z_OK; ++ unsigned had; ++ PREFIX3(stream) *strm = &(state->strm); ++ ++ /* fill output buffer up to end of deflate stream */ ++ had = strm->avail_out; ++ do { ++ /* get more input for inflate() */ ++ if (strm->avail_in == 0 && gz_avail(state) == -1) ++ return -1; ++ if (strm->avail_in == 0) { ++ gz_error(state, Z_BUF_ERROR, "unexpected end of file"); ++ break; ++ } ++ ++ /* decompress and handle errors */ ++ ret = PREFIX(inflate)(strm, Z_NO_FLUSH); ++ if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { ++ gz_error(state, Z_STREAM_ERROR, "internal error: inflate stream corrupt"); ++ return -1; ++ } ++ if (ret == Z_MEM_ERROR) { ++ gz_error(state, Z_MEM_ERROR, "out of memory"); ++ return -1; ++ } ++ if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ ++ gz_error(state, Z_DATA_ERROR, strm->msg == NULL ? "compressed data error" : strm->msg); ++ return -1; ++ } ++ } while (strm->avail_out && ret != Z_STREAM_END); ++ ++ /* update available output */ ++ state->x.have = had - strm->avail_out; ++ state->x.next = strm->next_out - state->x.have; ++ ++ /* if the gzip stream completed successfully, look for another */ ++ if (ret == Z_STREAM_END) ++ state->how = LOOK; ++ ++ /* good decompression */ ++ return 0; ++} ++ ++/* Fetch data and put it in the output buffer. Assumes state->x.have is 0. ++ Data is either copied from the input file or decompressed from the input ++ file depending on state->how. If state->how is LOOK, then a gzip header is ++ looked for to determine whether to copy or decompress. Returns -1 on error, ++ otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the ++ end of the input file has been reached and all data has been processed. */ ++static int gz_fetch(gz_state *state) { ++ PREFIX3(stream) *strm = &(state->strm); ++ ++ do { ++ switch (state->how) { ++ case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ ++ if (gz_look(state) == -1) ++ return -1; ++ if (state->how == LOOK) ++ return 0; ++ break; ++ case COPY: /* -> COPY */ ++ if (gz_load(state, state->out, state->size << 1, &(state->x.have)) ++ == -1) ++ return -1; ++ state->x.next = state->out; ++ return 0; ++ case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ ++ strm->avail_out = state->size << 1; ++ strm->next_out = state->out; ++ if (gz_decomp(state) == -1) ++ return -1; ++ } ++ } while (state->x.have == 0 && (!state->eof || strm->avail_in)); ++ return 0; ++} ++ ++/* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ ++static int gz_skip(gz_state *state, z_off64_t len) { ++ unsigned n; ++ ++ /* skip over len bytes or reach end-of-file, whichever comes first */ ++ while (len) ++ /* skip over whatever is in output buffer */ ++ if (state->x.have) { ++ n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ? ++ (unsigned)len : state->x.have; ++ state->x.have -= n; ++ state->x.next += n; ++ state->x.pos += n; ++ len -= n; ++ } else if (state->eof && state->strm.avail_in == 0) { ++ /* output buffer empty -- return if we're at the end of the input */ ++ break; ++ } else { ++ /* need more data to skip -- load up output buffer */ ++ /* get more output, looking for header if required */ ++ if (gz_fetch(state) == -1) ++ return -1; ++ } ++ return 0; ++} ++ ++/* Read len bytes into buf from file, or less than len up to the end of the ++ input. Return the number of bytes read. If zero is returned, either the ++ end of file was reached, or there was an error. state->err must be ++ consulted in that case to determine which. */ ++static size_t gz_read(gz_state *state, void *buf, size_t len) { ++ size_t got; ++ unsigned n; ++ ++ /* if len is zero, avoid unnecessary operations */ ++ if (len == 0) ++ return 0; ++ ++ /* process a skip request */ ++ if (state->seek) { ++ state->seek = 0; ++ if (gz_skip(state, state->skip) == -1) ++ return 0; ++ } ++ ++ /* get len bytes to buf, or less than len if at the end */ ++ got = 0; ++ do { ++ /* set n to the maximum amount of len that fits in an unsigned int */ ++ n = (unsigned)-1; ++ if (n > len) ++ n = (unsigned)len; ++ ++ /* first just try copying data from the output buffer */ ++ if (state->x.have) { ++ if (state->x.have < n) ++ n = state->x.have; ++ memcpy(buf, state->x.next, n); ++ state->x.next += n; ++ state->x.have -= n; ++ } ++ ++ /* output buffer empty -- return if we're at the end of the input */ ++ else if (state->eof && state->strm.avail_in == 0) { ++ state->past = 1; /* tried to read past end */ ++ break; ++ } ++ ++ /* need output data -- for small len or new stream load up our output ++ buffer */ ++ else if (state->how == LOOK || n < (state->size << 1)) { ++ /* get more output, looking for header if required */ ++ if (gz_fetch(state) == -1) ++ return 0; ++ continue; /* no progress yet -- go back to copy above */ ++ /* the copy above assures that we will leave with space in the ++ output buffer, allowing at least one gzungetc() to succeed */ ++ } ++ ++ /* large len -- read directly into user buffer */ ++ else if (state->how == COPY) { /* read directly */ ++ if (gz_load(state, (unsigned char *)buf, n, &n) == -1) ++ return 0; ++ } ++ ++ /* large len -- decompress directly into user buffer */ ++ else { /* state->how == GZIP */ ++ state->strm.avail_out = n; ++ state->strm.next_out = (unsigned char *)buf; ++ if (gz_decomp(state) == -1) ++ return 0; ++ n = state->x.have; ++ state->x.have = 0; ++ } ++ ++ /* update progress */ ++ len -= n; ++ buf = (char *)buf + n; ++ got += n; ++ state->x.pos += n; ++ } while (len); ++ ++ /* return number of bytes read into user buffer */ ++ return got; ++} ++ ++/* -- see zlib.h -- */ ++int Z_EXPORT PREFIX(gzread)(gzFile file, void *buf, unsigned len) { ++ gz_state *state; ++ ++ /* get internal structure */ ++ if (file == NULL) ++ return -1; ++ state = (gz_state *)file; ++ ++ /* check that we're reading and that there's no (serious) error */ ++ if (state->mode != GZ_READ || ++ (state->err != Z_OK && state->err != Z_BUF_ERROR)) ++ return -1; ++ ++ /* since an int is returned, make sure len fits in one, otherwise return ++ with an error (this avoids a flaw in the interface) */ ++ if ((int)len < 0) { ++ gz_error(state, Z_STREAM_ERROR, "request does not fit in an int"); ++ return -1; ++ } ++ ++ /* read len or fewer bytes to buf */ ++ len = (unsigned)gz_read(state, buf, len); ++ ++ /* check for an error */ ++ if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR) ++ return -1; ++ ++ /* return the number of bytes read (this is assured to fit in an int) */ ++ return (int)len; ++} ++ ++/* -- see zlib.h -- */ ++size_t Z_EXPORT PREFIX(gzfread)(void *buf, size_t size, size_t nitems, gzFile file) { ++ size_t len; ++ gz_state *state; ++ ++ /* Exit early if size is zero, also prevents potential division by zero */ ++ if (size == 0) ++ return 0; ++ ++ /* get internal structure */ ++ if (file == NULL) ++ return 0; ++ state = (gz_state *)file; ++ ++ /* check that we're reading and that there's no (serious) error */ ++ if (state->mode != GZ_READ || ++ (state->err != Z_OK && state->err != Z_BUF_ERROR)) ++ return 0; ++ ++ /* compute bytes to read -- error on overflow */ ++ if (size && SIZE_MAX / size < nitems) { ++ gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); ++ return 0; ++ } ++ len = nitems * size; ++ ++ /* read len or fewer bytes to buf, return the number of full items read */ ++ return len ? gz_read(state, buf, len) / size : 0; ++} ++ ++/* -- see zlib.h -- */ ++#undef @ZLIB_SYMBOL_PREFIX@gzgetc ++#undef @ZLIB_SYMBOL_PREFIX@zng_gzgetc ++int Z_EXPORT PREFIX(gzgetc)(gzFile file) { ++ unsigned char buf[1]; ++ gz_state *state; ++ ++ /* get internal structure */ ++ if (file == NULL) ++ return -1; ++ state = (gz_state *)file; ++ ++ /* check that we're reading and that there's no (serious) error */ ++ if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR)) ++ return -1; ++ ++ /* try output buffer (no need to check for skip request) */ ++ if (state->x.have) { ++ state->x.have--; ++ state->x.pos++; ++ return *(state->x.next)++; ++ } ++ ++ /* nothing there -- try gz_read() */ ++ return gz_read(state, buf, 1) < 1 ? -1 : buf[0]; ++} ++ ++#ifdef ZLIB_COMPAT ++int Z_EXPORT PREFIX(gzgetc_)(gzFile file) { ++ return PREFIX(gzgetc)(file); ++} ++#endif ++ ++/* -- see zlib.h -- */ ++int Z_EXPORT PREFIX(gzungetc)(int c, gzFile file) { ++ gz_state *state; ++ ++ /* get internal structure */ ++ if (file == NULL) ++ return -1; ++ state = (gz_state *)file; ++ ++ /* in case this was just opened, set up the input buffer */ ++ if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) ++ (void)gz_look(state); ++ ++ /* check that we're reading and that there's no (serious) error */ ++ if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR)) ++ return -1; ++ ++ /* process a skip request */ ++ if (state->seek) { ++ state->seek = 0; ++ if (gz_skip(state, state->skip) == -1) ++ return -1; ++ } ++ ++ /* can't push EOF */ ++ if (c < 0) ++ return -1; ++ ++ /* if output buffer empty, put byte at end (allows more pushing) */ ++ if (state->x.have == 0) { ++ state->x.have = 1; ++ state->x.next = state->out + (state->size << 1) - 1; ++ state->x.next[0] = (unsigned char)c; ++ state->x.pos--; ++ state->past = 0; ++ return c; ++ } ++ ++ /* if no room, give up (must have already done a gzungetc()) */ ++ if (state->x.have == (state->size << 1)) { ++ gz_error(state, Z_DATA_ERROR, "out of room to push characters"); ++ return -1; ++ } ++ ++ /* slide output data if needed and insert byte before existing data */ ++ if (state->x.next == state->out) { ++ unsigned char *src = state->out + state->x.have; ++ unsigned char *dest = state->out + (state->size << 1); ++ while (src > state->out) ++ *--dest = *--src; ++ state->x.next = dest; ++ } ++ state->x.have++; ++ state->x.next--; ++ state->x.next[0] = (unsigned char)c; ++ state->x.pos--; ++ state->past = 0; ++ return c; ++} ++ ++/* -- see zlib.h -- */ ++char * Z_EXPORT PREFIX(gzgets)(gzFile file, char *buf, int len) { ++ unsigned left, n; ++ char *str; ++ unsigned char *eol; ++ gz_state *state; ++ ++ /* check parameters and get internal structure */ ++ if (file == NULL || buf == NULL || len < 1) ++ return NULL; ++ state = (gz_state *)file; ++ ++ /* check that we're reading and that there's no (serious) error */ ++ if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR)) ++ return NULL; ++ ++ /* process a skip request */ ++ if (state->seek) { ++ state->seek = 0; ++ if (gz_skip(state, state->skip) == -1) ++ return NULL; ++ } ++ ++ /* copy output bytes up to new line or len - 1, whichever comes first -- ++ append a terminating zero to the string (we don't check for a zero in ++ the contents, let the user worry about that) */ ++ str = buf; ++ left = (unsigned)len - 1; ++ if (left) { ++ do { ++ /* assure that something is in the output buffer */ ++ if (state->x.have == 0 && gz_fetch(state) == -1) ++ return NULL; /* error */ ++ if (state->x.have == 0) { /* end of file */ ++ state->past = 1; /* read past end */ ++ break; /* return what we have */ ++ } ++ ++ /* look for end-of-line in current output buffer */ ++ n = state->x.have > left ? left : state->x.have; ++ eol = (unsigned char *)memchr(state->x.next, '\n', n); ++ if (eol != NULL) ++ n = (unsigned)(eol - state->x.next) + 1; ++ ++ /* copy through end-of-line, or remainder if not found */ ++ memcpy(buf, state->x.next, n); ++ state->x.have -= n; ++ state->x.next += n; ++ state->x.pos += n; ++ left -= n; ++ buf += n; ++ } while (left && eol == NULL); ++ } ++ ++ /* return terminated string, or if nothing, end of file */ ++ if (buf == str) ++ return NULL; ++ buf[0] = 0; ++ return str; ++} ++ ++/* -- see zlib.h -- */ ++int Z_EXPORT PREFIX(gzdirect)(gzFile file) { ++ gz_state *state; ++ ++ /* get internal structure */ ++ if (file == NULL) ++ return 0; ++ ++ state = (gz_state *)file; ++ ++ /* if the state is not known, but we can find out, then do so (this is ++ mainly for right after a gzopen() or gzdopen()) */ ++ if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) ++ (void)gz_look(state); ++ ++ /* return 1 if transparent, 0 if processing a gzip stream */ ++ return state->direct; ++} ++ ++/* -- see zlib.h -- */ ++int Z_EXPORT PREFIX(gzclose_r)(gzFile file) { ++ int ret, err; ++ gz_state *state; ++ ++ /* get internal structure */ ++ if (file == NULL) ++ return Z_STREAM_ERROR; ++ ++ state = (gz_state *)file; ++ ++ /* check that we're reading */ ++ if (state->mode != GZ_READ) ++ return Z_STREAM_ERROR; ++ ++ /* free memory and close file */ ++ if (state->size) { ++ PREFIX(inflateEnd)(&(state->strm)); ++ zng_free(state->out); ++ zng_free(state->in); ++ } ++ err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; ++ gz_error(state, Z_OK, NULL); ++ free(state->path); ++ ret = close(state->fd); ++ zng_free(state); ++ return ret ? Z_ERRNO : err; ++} +diff --git a/gzwrite.c b/gzwrite.c +index c4e178f..08e0ce9 100644 +--- a/gzwrite.c ++++ b/gzwrite.c +@@ -1,5 +1,5 @@ + /* gzwrite.c -- zlib functions for writing gzip files +- * Copyright (C) 2004-2017 Mark Adler ++ * Copyright (C) 2004-2019 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +@@ -274,7 +274,7 @@ size_t Z_EXPORT PREFIX(gzfwrite)(void const *buf, size_t size, size_t nitems, gz + + /* compute bytes to read -- error on overflow */ + len = nitems * size; +- if (size && len / size != nitems) { ++ if (len / size != nitems) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); + return 0; + } +@@ -460,7 +460,7 @@ int Z_EXPORT PREFIX(gzsetparams)(gzFile file, int level, int strategy) { + strm = &(state->strm); + + /* check that we're writing and that there's no error */ +- if (state->mode != GZ_WRITE || state->err != Z_OK) ++ if (state->mode != GZ_WRITE || state->err != Z_OK || state->direct) + return Z_STREAM_ERROR; + + /* if no change is requested, then do nothing */ +diff --git a/infback.c b/infback.c +index 6236939..9f5042b 100644 +--- a/infback.c ++++ b/infback.c +@@ -1,5 +1,5 @@ + /* infback.c -- inflate using a call-back interface +- * Copyright (C) 1995-2016 Mark Adler ++ * Copyright (C) 1995-2022 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +@@ -14,33 +14,36 @@ + #include "zutil.h" + #include "inftrees.h" + #include "inflate.h" +-#include "inffast.h" + #include "inflate_p.h" + #include "functable.h" + ++/* Avoid conflicts with zlib.h macros */ ++#ifdef ZLIB_COMPAT ++# undef inflateBackInit ++#endif ++ + /* + strm provides memory allocation functions in zalloc and zfree, or + NULL to use the library memory allocation functions. + + windowBits is in the range 8..15, and window is a user-supplied + window and output buffer that is 2**windowBits bytes. ++ ++ This function is hidden in ZLIB_COMPAT builds. + */ +-int32_t Z_EXPORT PREFIX(inflateBackInit_)(PREFIX3(stream) *strm, int32_t windowBits, uint8_t *window, +- const char *version, int32_t stream_size) { ++int32_t ZNG_CONDEXPORT PREFIX(inflateBackInit)(PREFIX3(stream) *strm, int32_t windowBits, uint8_t *window) { + struct inflate_state *state; + +- if (version == NULL || version[0] != PREFIX2(VERSION)[0] || stream_size != (int)(sizeof(PREFIX3(stream)))) +- return Z_VERSION_ERROR; +- if (strm == NULL || window == NULL || windowBits < 8 || windowBits > 15) ++ if (strm == NULL || window == NULL || windowBits < MIN_WBITS || windowBits > MAX_WBITS) + return Z_STREAM_ERROR; + strm->msg = NULL; /* in case we return an error */ + if (strm->zalloc == NULL) { +- strm->zalloc = zng_calloc; ++ strm->zalloc = PREFIX(zcalloc); + strm->opaque = NULL; + } + if (strm->zfree == NULL) +- strm->zfree = zng_cfree; +- state = (struct inflate_state *) ZALLOC(strm, 1, sizeof(struct inflate_state)); ++ strm->zfree = PREFIX(zcfree); ++ state = ZALLOC_INFLATE_STATE(strm); + if (state == NULL) + return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); +@@ -51,10 +54,19 @@ int32_t Z_EXPORT PREFIX(inflateBackInit_)(PREFIX3(stream) *strm, int32_t windowB + state->window = window; + state->wnext = 0; + state->whave = 0; ++ state->sane = 1; + state->chunksize = functable.chunksize(); + return Z_OK; + } + ++/* Function used by zlib.h and zlib-ng version 2.0 macros */ ++int32_t Z_EXPORT PREFIX(inflateBackInit_)(PREFIX3(stream) *strm, int32_t windowBits, uint8_t *window, ++ const char *version, int32_t stream_size) { ++ if (CHECK_VER_STSIZE(version, stream_size)) ++ return Z_VERSION_ERROR; ++ return PREFIX(inflateBackInit)(strm, windowBits, window); ++} ++ + /* + Private macros for inflateBack() + Look in inflate_p.h for macros shared with inflate() +@@ -179,7 +191,7 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in + state->mode = STORED; + break; + case 1: /* fixed block */ +- fixedtables(state); ++ PREFIX(fixedtables)(state); + Tracev((stderr, "inflate: fixed codes block%s\n", state->last ? " (last)" : "")); + state->mode = LEN; /* decode codes */ + break; +@@ -210,8 +222,8 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in + copy = state->length; + PULL(); + ROOM(); +- if (copy > have) copy = have; +- if (copy > left) copy = left; ++ copy = MIN(copy, have); ++ copy = MIN(copy, left); + memcpy(put, next, copy); + have -= copy; + next += copy; +@@ -316,18 +328,18 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in + } + + /* build code tables -- note: do not change the lenbits or distbits +- values here (9 and 6) without reading the comments in inftrees.h ++ values here (10 and 9) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (const code *)(state->next); +- state->lenbits = 9; ++ state->lenbits = 10; + ret = zng_inflate_table(LENS, state->lens, state->nlen, &(state->next), &(state->lenbits), state->work); + if (ret) { + SET_BAD("invalid literal/lengths set"); + break; + } + state->distcode = (const code *)(state->next); +- state->distbits = 6; ++ state->distbits = 9; + ret = zng_inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { +@@ -336,6 +348,7 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN; ++ Z_FALLTHROUGH; + + case LEN: + /* use inflate_fast() if we have enough input and output */ +@@ -344,7 +357,7 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in + RESTORE(); + if (state->whave < state->wsize) + state->whave = state->wsize - left; +- zng_inflate_fast(strm, state->wsize); ++ functable.inflate_fast(strm, state->wsize); + LOAD(); + break; + } +@@ -395,7 +408,7 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in + } + + /* length code -- get extra bits, if any */ +- state->extra = (here.op & 15); ++ state->extra = (here.op & MAX_BITS); + if (state->extra) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); +@@ -426,7 +439,7 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in + break; + } + state->offset = here.val; +- state->extra = (here.op & 15); ++ state->extra = (here.op & MAX_BITS); + + /* get distance extra bits, if any */ + if (state->extra) { +@@ -453,8 +466,7 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in + from = put - state->offset; + copy = left; + } +- if (copy > state->length) +- copy = state->length; ++ copy = MIN(copy, state->length); + state->length -= copy; + left -= copy; + do { +@@ -464,12 +476,8 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in + break; + + case DONE: +- /* inflate stream terminated properly -- write leftover output */ ++ /* inflate stream terminated properly */ + ret = Z_STREAM_END; +- if (left < state->wsize) { +- if (out(out_desc, state->window, state->wsize - left)) +- ret = Z_BUF_ERROR; +- } + goto inf_leave; + + case BAD: +@@ -481,8 +489,13 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in + goto inf_leave; + } + +- /* Return unused input */ ++ /* Write leftover output and return unused input */ + inf_leave: ++ if (left < state->wsize) { ++ if (out(out_desc, state->window, state->wsize - left) && (ret == Z_STREAM_END)) { ++ ret = Z_BUF_ERROR; ++ } ++ } + strm->next_in = next; + strm->avail_in = have; + return ret; +@@ -491,7 +504,7 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in + int32_t Z_EXPORT PREFIX(inflateBackEnd)(PREFIX3(stream) *strm) { + if (strm == NULL || strm->state == NULL || strm->zfree == NULL) + return Z_STREAM_ERROR; +- ZFREE(strm, strm->state); ++ ZFREE_STATE(strm, strm->state); + strm->state = NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +diff --git a/inffast_tpl.h b/inffast_tpl.h +new file mode 100644 +index 0000000..9ddd187 +--- /dev/null ++++ b/inffast_tpl.h +@@ -0,0 +1,326 @@ ++/* inffast.c -- fast decoding ++ * Copyright (C) 1995-2017 Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zbuild.h" ++#include "zendian.h" ++#include "zutil.h" ++#include "inftrees.h" ++#include "inflate.h" ++#include "inflate_p.h" ++#include "functable.h" ++ ++/* ++ Decode literal, length, and distance codes and write out the resulting ++ literal and match bytes until either not enough input or output is ++ available, an end-of-block is encountered, or a data error is encountered. ++ When large enough input and output buffers are supplied to inflate(), for ++ example, a 16K input buffer and a 64K output buffer, more than 95% of the ++ inflate execution time is spent in this routine. ++ ++ Entry assumptions: ++ ++ state->mode == LEN ++ strm->avail_in >= INFLATE_FAST_MIN_HAVE ++ strm->avail_out >= INFLATE_FAST_MIN_LEFT ++ start >= strm->avail_out ++ state->bits < 8 ++ ++ On return, state->mode is one of: ++ ++ LEN -- ran out of enough output space or enough available input ++ TYPE -- reached end of block code, inflate() to interpret next block ++ BAD -- error in block data ++ ++ Notes: ++ ++ - The maximum input bits used by a length/distance pair is 15 bits for the ++ length code, 5 bits for the length extra, 15 bits for the distance code, ++ and 13 bits for the distance extra. This totals 48 bits, or six bytes. ++ Therefore if strm->avail_in >= 6, then there is enough input to avoid ++ checking for available input while decoding. ++ ++ - On some architectures, it can be significantly faster (e.g. up to 1.2x ++ faster on x86_64) to load from strm->next_in 64 bits, or 8 bytes, at a ++ time, so INFLATE_FAST_MIN_HAVE == 8. ++ ++ - The maximum bytes that a single length/distance pair can output is 258 ++ bytes, which is the maximum length that can be coded. inflate_fast() ++ requires strm->avail_out >= 258 for each loop to avoid checking for ++ output space. ++ */ ++void Z_INTERNAL INFLATE_FAST(PREFIX3(stream) *strm, uint32_t start) { ++ /* start: inflate()'s starting value for strm->avail_out */ ++ struct inflate_state *state; ++ z_const unsigned char *in; /* local strm->next_in */ ++ const unsigned char *last; /* have enough input while in < last */ ++ unsigned char *out; /* local strm->next_out */ ++ unsigned char *beg; /* inflate()'s initial strm->next_out */ ++ unsigned char *end; /* while out < end, enough space available */ ++ unsigned char *safe; /* can use chunkcopy provided out < safe */ ++#ifdef INFLATE_STRICT ++ unsigned dmax; /* maximum distance from zlib header */ ++#endif ++ unsigned wsize; /* window size or zero if not using window */ ++ unsigned whave; /* valid bytes in the window */ ++ unsigned wnext; /* window write index */ ++ unsigned char *window; /* allocated sliding window, if wsize != 0 */ ++ ++ /* hold is a local copy of strm->hold. By default, hold satisfies the same ++ invariants that strm->hold does, namely that (hold >> bits) == 0. This ++ invariant is kept by loading bits into hold one byte at a time, like: ++ ++ hold |= next_byte_of_input << bits; in++; bits += 8; ++ ++ If we need to ensure that bits >= 15 then this code snippet is simply ++ repeated. Over one iteration of the outermost do/while loop, this ++ happens up to six times (48 bits of input), as described in the NOTES ++ above. ++ ++ However, on some little endian architectures, it can be significantly ++ faster to load 64 bits once instead of 8 bits six times: ++ ++ if (bits <= 16) { ++ hold |= next_8_bytes_of_input << bits; in += 6; bits += 48; ++ } ++ ++ Unlike the simpler one byte load, shifting the next_8_bytes_of_input ++ by bits will overflow and lose those high bits, up to 2 bytes' worth. ++ The conservative estimate is therefore that we have read only 6 bytes ++ (48 bits). Again, as per the NOTES above, 48 bits is sufficient for the ++ rest of the iteration, and we will not need to load another 8 bytes. ++ ++ Inside this function, we no longer satisfy (hold >> bits) == 0, but ++ this is not problematic, even if that overflow does not land on an 8 bit ++ byte boundary. Those excess bits will eventually shift down lower as the ++ Huffman decoder consumes input, and when new input bits need to be loaded ++ into the bits variable, the same input bits will be or'ed over those ++ existing bits. A bitwise or is idempotent: (a | b | b) equals (a | b). ++ Note that we therefore write that load operation as "hold |= etc" and not ++ "hold += etc". ++ ++ Outside that loop, at the end of the function, hold is bitwise and'ed ++ with (1<hold >> state->bits) == 0. ++ */ ++ uint64_t hold; /* local strm->hold */ ++ unsigned bits; /* local strm->bits */ ++ code const *lcode; /* local strm->lencode */ ++ code const *dcode; /* local strm->distcode */ ++ unsigned lmask; /* mask for first level of length codes */ ++ unsigned dmask; /* mask for first level of distance codes */ ++ const code *here; /* retrieved table entry */ ++ unsigned op; /* code bits, operation, extra bits, or */ ++ /* window position, window bytes to copy */ ++ unsigned len; /* match length, unused bytes */ ++ unsigned dist; /* match distance */ ++ unsigned char *from; /* where to copy match from */ ++ unsigned extra_safe; /* copy chunks safely in all cases */ ++ ++ /* copy state to local variables */ ++ state = (struct inflate_state *)strm->state; ++ in = strm->next_in; ++ last = in + (strm->avail_in - (INFLATE_FAST_MIN_HAVE - 1)); ++ out = strm->next_out; ++ beg = out - (start - strm->avail_out); ++ end = out + (strm->avail_out - (INFLATE_FAST_MIN_LEFT - 1)); ++ safe = out + strm->avail_out; ++#ifdef INFLATE_STRICT ++ dmax = state->dmax; ++#endif ++ wsize = state->wsize; ++ whave = state->whave; ++ wnext = state->wnext; ++ window = state->window; ++ hold = state->hold; ++ bits = state->bits; ++ lcode = state->lencode; ++ dcode = state->distcode; ++ lmask = (1U << state->lenbits) - 1; ++ dmask = (1U << state->distbits) - 1; ++ ++ /* Detect if out and window point to the same memory allocation. In this instance it is ++ necessary to use safe chunk copy functions to prevent overwriting the window. If the ++ window is overwritten then future matches with far distances will fail to copy correctly. */ ++ extra_safe = (wsize != 0 && out >= window && out + INFLATE_FAST_MIN_LEFT <= window + wsize); ++ ++#define REFILL() do { \ ++ hold |= load_64_bits(in, bits); \ ++ in += 7; \ ++ in -= ((bits >> 3) & 7); \ ++ bits |= 56; \ ++ } while (0) ++ ++ /* decode literals and length/distances until end-of-block or not enough ++ input data or output space */ ++ do { ++ REFILL(); ++ here = lcode + (hold & lmask); ++ if (here->op == 0) { ++ *out++ = (unsigned char)(here->val); ++ DROPBITS(here->bits); ++ here = lcode + (hold & lmask); ++ if (here->op == 0) { ++ *out++ = (unsigned char)(here->val); ++ DROPBITS(here->bits); ++ here = lcode + (hold & lmask); ++ } ++ } ++ dolen: ++ DROPBITS(here->bits); ++ op = here->op; ++ if (op == 0) { /* literal */ ++ Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ? ++ "inflate: literal '%c'\n" : ++ "inflate: literal 0x%02x\n", here->val)); ++ *out++ = (unsigned char)(here->val); ++ } else if (op & 16) { /* length base */ ++ len = here->val; ++ op &= MAX_BITS; /* number of extra bits */ ++ len += BITS(op); ++ DROPBITS(op); ++ Tracevv((stderr, "inflate: length %u\n", len)); ++ here = dcode + (hold & dmask); ++ if (bits < MAX_BITS + MAX_DIST_EXTRA_BITS) { ++ REFILL(); ++ } ++ dodist: ++ DROPBITS(here->bits); ++ op = here->op; ++ if (op & 16) { /* distance base */ ++ dist = here->val; ++ op &= MAX_BITS; /* number of extra bits */ ++ dist += BITS(op); ++#ifdef INFLATE_STRICT ++ if (dist > dmax) { ++ SET_BAD("invalid distance too far back"); ++ break; ++ } ++#endif ++ DROPBITS(op); ++ Tracevv((stderr, "inflate: distance %u\n", dist)); ++ op = (unsigned)(out - beg); /* max distance in output */ ++ if (dist > op) { /* see if copy from window */ ++ op = dist - op; /* distance back in window */ ++ if (op > whave) { ++ if (state->sane) { ++ SET_BAD("invalid distance too far back"); ++ break; ++ } ++#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR ++ if (len <= op - whave) { ++ do { ++ *out++ = 0; ++ } while (--len); ++ continue; ++ } ++ len -= op - whave; ++ do { ++ *out++ = 0; ++ } while (--op > whave); ++ if (op == 0) { ++ from = out - dist; ++ do { ++ *out++ = *from++; ++ } while (--len); ++ continue; ++ } ++#endif ++ } ++ from = window; ++ if (wnext == 0) { /* very common case */ ++ from += wsize - op; ++ } else if (wnext >= op) { /* contiguous in window */ ++ from += wnext - op; ++ } else { /* wrap around window */ ++ op -= wnext; ++ from += wsize - op; ++ if (op < len) { /* some from end of window */ ++ len -= op; ++ out = chunkcopy_safe(out, from, op, safe); ++ from = window; /* more from start of window */ ++ op = wnext; ++ /* This (rare) case can create a situation where ++ the first chunkcopy below must be checked. ++ */ ++ } ++ } ++ if (op < len) { /* still need some from output */ ++ len -= op; ++ out = chunkcopy_safe(out, from, op, safe); ++ out = CHUNKUNROLL(out, &dist, &len); ++ out = chunkcopy_safe(out, out - dist, len, safe); ++ } else { ++ out = chunkcopy_safe(out, from, len, safe); ++ } ++ } else if (extra_safe) { ++ /* Whole reference is in range of current output. */ ++ if (dist >= len || dist >= state->chunksize) ++ out = chunkcopy_safe(out, out - dist, len, safe); ++ else ++ out = CHUNKMEMSET_SAFE(out, dist, len, (unsigned)((safe - out) + 1)); ++ } else { ++ /* Whole reference is in range of current output. No range checks are ++ necessary because we start with room for at least 258 bytes of output, ++ so unroll and roundoff operations can write beyond `out+len` so long ++ as they stay within 258 bytes of `out`. ++ */ ++ if (dist >= len || dist >= state->chunksize) ++ out = CHUNKCOPY(out, out - dist, len); ++ else ++ out = CHUNKMEMSET(out, dist, len); ++ } ++ } else if ((op & 64) == 0) { /* 2nd level distance code */ ++ here = dcode + here->val + BITS(op); ++ goto dodist; ++ } else { ++ SET_BAD("invalid distance code"); ++ break; ++ } ++ } else if ((op & 64) == 0) { /* 2nd level length code */ ++ here = lcode + here->val + BITS(op); ++ goto dolen; ++ } else if (op & 32) { /* end-of-block */ ++ Tracevv((stderr, "inflate: end of block\n")); ++ state->mode = TYPE; ++ break; ++ } else { ++ SET_BAD("invalid literal/length code"); ++ break; ++ } ++ } while (in < last && out < end); ++ ++ /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ ++ len = bits >> 3; ++ in -= len; ++ bits -= len << 3; ++ hold &= (UINT64_C(1) << bits) - 1; ++ ++ /* update state and return */ ++ strm->next_in = in; ++ strm->next_out = out; ++ strm->avail_in = (unsigned)(in < last ? (INFLATE_FAST_MIN_HAVE - 1) + (last - in) ++ : (INFLATE_FAST_MIN_HAVE - 1) - (in - last)); ++ strm->avail_out = (unsigned)(out < end ? (INFLATE_FAST_MIN_LEFT - 1) + (end - out) ++ : (INFLATE_FAST_MIN_LEFT - 1) - (out - end)); ++ ++ Assert(bits <= 32, "Remaining bits greater than 32"); ++ state->hold = (uint32_t)hold; ++ state->bits = bits; ++ return; ++} ++ ++/* ++ inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): ++ - Using bit fields for code structure ++ - Different op definition to avoid & for extra bits (do & for table bits) ++ - Three separate decoding do-loops for direct, window, and wnext == 0 ++ - Special case for distance > 1 copies to do overlapped load and store copy ++ - Explicit branch predictions (based on measured branch probabilities) ++ - Deferring match copy and interspersed it with decoding subsequent codes ++ - Swapping literal/length else ++ - Swapping window/direct else ++ - Larger unrolled copy loops (three is about right) ++ - Moving len -= 3 statement into middle of loop ++ */ +diff --git a/inflate.c b/inflate.c +index a59cd44..af5abf1 100644 +--- a/inflate.c ++++ b/inflate.c +@@ -1,5 +1,5 @@ + /* inflate.c -- zlib decompression +- * Copyright (C) 1995-2016 Mark Adler ++ * Copyright (C) 1995-2022 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +@@ -7,43 +7,47 @@ + #include "zutil.h" + #include "inftrees.h" + #include "inflate.h" +-#include "inffast.h" + #include "inflate_p.h" + #include "inffixed_tbl.h" + #include "functable.h" + +-/* Architecture-specific hooks. */ +-#ifdef S390_DFLTCC_INFLATE +-# include "arch/s390/dfltcc_inflate.h" +-#else +-/* Memory management for the inflate state. Useful for allocating arch-specific extension blocks. */ +-# define ZALLOC_STATE(strm, items, size) ZALLOC(strm, items, size) +-# define ZFREE_STATE(strm, addr) ZFREE(strm, addr) +-# define ZCOPY_STATE(dst, src, size) memcpy(dst, src, size) +-/* Memory management for the window. Useful for allocation the aligned window. */ +-# define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size) +-# define ZFREE_WINDOW(strm, addr) ZFREE(strm, addr) +-/* Invoked at the end of inflateResetKeep(). Useful for initializing arch-specific extension blocks. */ +-# define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0) +-/* Invoked at the beginning of inflatePrime(). Useful for updating arch-specific buffers. */ +-# define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0) +-/* Invoked at the beginning of each block. Useful for plugging arch-specific inflation code. */ +-# define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0) +-/* Returns whether zlib-ng should compute a checksum. Set to 0 if arch-specific inflation code already does that. */ +-# define INFLATE_NEED_CHECKSUM(strm) 1 +-/* Returns whether zlib-ng should update a window. Set to 0 if arch-specific inflation code already does that. */ +-# define INFLATE_NEED_UPDATEWINDOW(strm) 1 +-/* Invoked at the beginning of inflateMark(). Useful for updating arch-specific pointers and offsets. */ +-# define INFLATE_MARK_HOOK(strm) do {} while (0) +-/* Invoked at the beginning of inflateSyncPoint(). Useful for performing arch-specific state checks. */ +-#define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0) ++/* Avoid conflicts with zlib.h macros */ ++#ifdef ZLIB_COMPAT ++# undef inflateInit ++# undef inflateInit2 + #endif + + /* function prototypes */ + static int inflateStateCheck(PREFIX3(stream) *strm); +-static int updatewindow(PREFIX3(stream) *strm, const unsigned char *end, uint32_t copy); ++static int updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t len, int32_t cksum); + static uint32_t syncsearch(uint32_t *have, const unsigned char *buf, uint32_t len); + ++static inline void inf_chksum_cpy(PREFIX3(stream) *strm, uint8_t *dst, ++ const uint8_t *src, uint32_t copy) { ++ if (!copy) return; ++ struct inflate_state *state = (struct inflate_state*)strm->state; ++#ifdef GUNZIP ++ if (state->flags) { ++ functable.crc32_fold_copy(&state->crc_fold, dst, src, copy); ++ } else ++#endif ++ { ++ strm->adler = state->check = functable.adler32_fold_copy(state->check, dst, src, copy); ++ } ++} ++ ++static inline void inf_chksum(PREFIX3(stream) *strm, const uint8_t *src, uint32_t len) { ++ struct inflate_state *state = (struct inflate_state*)strm->state; ++#ifdef GUNZIP ++ if (state->flags) { ++ functable.crc32_fold(&state->crc_fold, src, len, 0); ++ } else ++#endif ++ { ++ strm->adler = state->check = functable.adler32(state->check, src, len); ++ } ++} ++ + static int inflateStateCheck(PREFIX3(stream) *strm) { + struct inflate_state *state; + if (strm == NULL || strm->zalloc == NULL || strm->zfree == NULL) +@@ -105,17 +109,19 @@ int32_t Z_EXPORT PREFIX(inflateReset2)(PREFIX3(stream) *strm, int32_t windowBits + /* extract wrap request from windowBits parameter */ + if (windowBits < 0) { + wrap = 0; ++ if (windowBits < -MAX_WBITS) ++ return Z_STREAM_ERROR; + windowBits = -windowBits; + } else { + wrap = (windowBits >> 4) + 5; + #ifdef GUNZIP + if (windowBits < 48) +- windowBits &= 15; ++ windowBits &= MAX_WBITS; + #endif + } + + /* set number of window bits, free window if different */ +- if (windowBits && (windowBits < 8 || windowBits > 15)) ++ if (windowBits && (windowBits < MIN_WBITS || windowBits > MAX_WBITS)) + return Z_STREAM_ERROR; + if (state->window != NULL && state->wbits != (unsigned)windowBits) { + ZFREE_WINDOW(strm, state->window); +@@ -128,28 +134,24 @@ int32_t Z_EXPORT PREFIX(inflateReset2)(PREFIX3(stream) *strm, int32_t windowBits + return PREFIX(inflateReset)(strm); + } + +-int32_t Z_EXPORT PREFIX(inflateInit2_)(PREFIX3(stream) *strm, int32_t windowBits, const char *version, int32_t stream_size) { ++/* This function is hidden in ZLIB_COMPAT builds. */ ++int32_t ZNG_CONDEXPORT PREFIX(inflateInit2)(PREFIX3(stream) *strm, int32_t windowBits) { + int32_t ret; + struct inflate_state *state; + +-#if defined(X86_FEATURES) +- x86_check_features(); +-#elif defined(ARM_FEATURES) +- arm_check_features(); +-#endif ++ /* Initialize functable earlier. */ ++ functable.force_init(); + +- if (version == NULL || version[0] != PREFIX2(VERSION)[0] || stream_size != (int)(sizeof(PREFIX3(stream)))) +- return Z_VERSION_ERROR; + if (strm == NULL) + return Z_STREAM_ERROR; + strm->msg = NULL; /* in case we return an error */ + if (strm->zalloc == NULL) { +- strm->zalloc = zng_calloc; ++ strm->zalloc = PREFIX(zcalloc); + strm->opaque = NULL; + } + if (strm->zfree == NULL) +- strm->zfree = zng_cfree; +- state = (struct inflate_state *) ZALLOC_STATE(strm, 1, sizeof(struct inflate_state)); ++ strm->zfree = PREFIX(zcfree); ++ state = ZALLOC_INFLATE_STATE(strm); + if (state == NULL) + return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); +@@ -166,8 +168,24 @@ int32_t Z_EXPORT PREFIX(inflateInit2_)(PREFIX3(stream) *strm, int32_t windowBits + return ret; + } + ++#ifndef ZLIB_COMPAT ++int32_t Z_EXPORT PREFIX(inflateInit)(PREFIX3(stream) *strm) { ++ return PREFIX(inflateInit2)(strm, DEF_WBITS); ++} ++#endif ++ ++/* Function used by zlib.h and zlib-ng version 2.0 macros */ + int32_t Z_EXPORT PREFIX(inflateInit_)(PREFIX3(stream) *strm, const char *version, int32_t stream_size) { +- return PREFIX(inflateInit2_)(strm, DEF_WBITS, version, stream_size); ++ if (CHECK_VER_STSIZE(version, stream_size)) ++ return Z_VERSION_ERROR; ++ return PREFIX(inflateInit2)(strm, DEF_WBITS); ++} ++ ++/* Function used by zlib.h and zlib-ng version 2.0 macros */ ++int32_t Z_EXPORT PREFIX(inflateInit2_)(PREFIX3(stream) *strm, int32_t windowBits, const char *version, int32_t stream_size) { ++ if (CHECK_VER_STSIZE(version, stream_size)) ++ return Z_VERSION_ERROR; ++ return PREFIX(inflateInit2)(strm, windowBits); + } + + int32_t Z_EXPORT PREFIX(inflatePrime)(PREFIX3(stream) *strm, int32_t bits, int32_t value) { +@@ -175,6 +193,8 @@ int32_t Z_EXPORT PREFIX(inflatePrime)(PREFIX3(stream) *strm, int32_t bits, int32 + + if (inflateStateCheck(strm)) + return Z_STREAM_ERROR; ++ if (bits == 0) ++ return Z_OK; + INFLATE_PRIME_HOOK(strm, bits, value); /* hook for IBM Z DFLTCC */ + state = (struct inflate_state *)strm->state; + if (bits < 0) { +@@ -195,21 +215,26 @@ int32_t Z_EXPORT PREFIX(inflatePrime)(PREFIX3(stream) *strm, int32_t bits, int32 + fixed code decoding. This returns fixed tables from inffixed_tbl.h. + */ + +-void Z_INTERNAL fixedtables(struct inflate_state *state) { ++void Z_INTERNAL PREFIX(fixedtables)(struct inflate_state *state) { + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; + } + +-int Z_INTERNAL inflate_ensure_window(struct inflate_state *state) { ++int Z_INTERNAL PREFIX(inflate_ensure_window)(struct inflate_state *state) { + /* if it hasn't been done already, allocate space for the window */ + if (state->window == NULL) { + unsigned wsize = 1U << state->wbits; +- state->window = (unsigned char *) ZALLOC_WINDOW(state->strm, wsize + state->chunksize, sizeof(unsigned char)); +- if (state->window == Z_NULL) +- return 1; +- memset(state->window + wsize, 0, state->chunksize); ++ state->window = (unsigned char *)ZALLOC_WINDOW(state->strm, wsize + state->chunksize, sizeof(unsigned char)); ++ if (state->window == NULL) ++ return Z_MEM_ERROR; ++#ifdef Z_MEMORY_SANITIZER ++ /* This is _not_ to subvert the memory sanitizer but to instead unposion some ++ data we willingly and purposefully load uninitialized into vector registers ++ in order to safely read the last < chunksize bytes of the window. */ ++ __msan_unpoison(state->window + wsize, state->chunksize); ++#endif + } + + /* if window not in use yet, initialize */ +@@ -219,7 +244,7 @@ int Z_INTERNAL inflate_ensure_window(struct inflate_state *state) { + state->whave = 0; + } + +- return 0; ++ return Z_OK; + } + + /* +@@ -236,28 +261,50 @@ int Z_INTERNAL inflate_ensure_window(struct inflate_state *state) { + output will fall in the output data, making match copies simpler and faster. + The advantage may be dependent on the size of the processor's data caches. + */ +-static int32_t updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t copy) { ++static int32_t updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t len, int32_t cksum) { + struct inflate_state *state; + uint32_t dist; + + state = (struct inflate_state *)strm->state; + +- if (inflate_ensure_window(state)) return 1; ++ if (PREFIX(inflate_ensure_window)(state)) return 1; ++ ++ /* len state->wsize or less output bytes into the circular window */ ++ if (len >= state->wsize) { ++ /* Only do this if the caller specifies to checksum bytes AND the platform requires ++ * it (s/390 being the primary exception to this. Also, for now, do the adler checksums ++ * if not a gzip based header. The inline adler checksums will come in the near future, ++ * possibly the next commit */ ++ if (INFLATE_NEED_CHECKSUM(strm) && cksum) { ++ /* We have to split the checksum over non-copied and copied bytes */ ++ if (len > state->wsize) ++ inf_chksum(strm, end - len, len - state->wsize); ++ inf_chksum_cpy(strm, state->window, end - state->wsize, state->wsize); ++ } else { ++ memcpy(state->window, end - state->wsize, state->wsize); ++ } + +- /* copy state->wsize or less output bytes into the circular window */ +- if (copy >= state->wsize) { +- memcpy(state->window, end - state->wsize, state->wsize); + state->wnext = 0; + state->whave = state->wsize; + } else { + dist = state->wsize - state->wnext; +- if (dist > copy) +- dist = copy; +- memcpy(state->window + state->wnext, end - copy, dist); +- copy -= dist; +- if (copy) { +- memcpy(state->window, end - copy, copy); +- state->wnext = copy; ++ /* Only do this if the caller specifies to checksum bytes AND the platform requires ++ * We need to maintain the correct order here for the checksum */ ++ dist = MIN(dist, len); ++ if (INFLATE_NEED_CHECKSUM(strm) && cksum) { ++ inf_chksum_cpy(strm, state->window + state->wnext, end - len, dist); ++ } else { ++ memcpy(state->window + state->wnext, end - len, dist); ++ } ++ len -= dist; ++ if (len) { ++ if (INFLATE_NEED_CHECKSUM(strm) && cksum) { ++ inf_chksum_cpy(strm, state->window, end - len, len); ++ } else { ++ memcpy(state->window, end - len, len); ++ } ++ ++ state->wnext = len; + state->whave = state->wsize; + } else { + state->wnext += dist; +@@ -270,7 +317,6 @@ static int32_t updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t + return 0; + } + +- + /* + Private macros for inflate() + Look in inflate_p.h for macros shared with inflateBack() +@@ -409,8 +455,8 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + #ifdef GUNZIP + if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */ + if (state->wbits == 0) +- state->wbits = 15; +- state->check = PREFIX(crc32)(0L, NULL, 0); ++ state->wbits = MAX_WBITS; ++ state->check = CRC32_INITIAL_VALUE; + CRC2(state->check, hold); + INITBITS(); + state->mode = FLAGS; +@@ -434,7 +480,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + len = BITS(4) + 8; + if (state->wbits == 0) + state->wbits = len; +- if (len > 15 || len > state->wbits) { ++ if (len > MAX_WBITS || len > state->wbits) { + SET_BAD("invalid window size"); + break; + } +@@ -464,6 +510,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + CRC2(state->check, hold); + INITBITS(); + state->mode = TIME; ++ Z_FALLTHROUGH; + + case TIME: + NEEDBITS(32); +@@ -473,6 +520,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + CRC4(state->check, hold); + INITBITS(); + state->mode = OS; ++ Z_FALLTHROUGH; + + case OS: + NEEDBITS(16); +@@ -484,6 +532,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + CRC2(state->check, hold); + INITBITS(); + state->mode = EXLEN; ++ Z_FALLTHROUGH; + + case EXLEN: + if (state->flags & 0x0400) { +@@ -498,6 +547,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + state->head->extra = NULL; + } + state->mode = EXTRA; ++ Z_FALLTHROUGH; + + case EXTRA: + if (state->flags & 0x0400) { +@@ -507,12 +557,15 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + if (copy) { + if (state->head != NULL && state->head->extra != NULL) { + len = state->head->extra_len - state->length; +- memcpy(state->head->extra + len, next, +- len + copy > state->head->extra_max ? +- state->head->extra_max - len : copy); ++ if (len < state->head->extra_max) { ++ memcpy(state->head->extra + len, next, ++ len + copy > state->head->extra_max ? ++ state->head->extra_max - len : copy); ++ } + } +- if ((state->flags & 0x0200) && (state->wrap & 4)) ++ if ((state->flags & 0x0200) && (state->wrap & 4)) { + state->check = PREFIX(crc32)(state->check, next, copy); ++ } + have -= copy; + next += copy; + state->length -= copy; +@@ -522,6 +575,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + } + state->length = 0; + state->mode = NAME; ++ Z_FALLTHROUGH; + + case NAME: + if (state->flags & 0x0800) { +@@ -543,6 +597,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + } + state->length = 0; + state->mode = COMMENT; ++ Z_FALLTHROUGH; + + case COMMENT: + if (state->flags & 0x1000) { +@@ -564,6 +619,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + state->head->comment = NULL; + } + state->mode = HCRC; ++ Z_FALLTHROUGH; + + case HCRC: + if (state->flags & 0x0200) { +@@ -578,7 +634,9 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + state->head->hcrc = (int)((state->flags >> 9) & 1); + state->head->done = 1; + } +- strm->adler = state->check = PREFIX(crc32)(0L, NULL, 0); ++ /* compute crc32 checksum if not in raw mode */ ++ if ((state->wrap & 4) && state->flags) ++ strm->adler = state->check = functable.crc32_fold_reset(&state->crc_fold); + state->mode = TYPE; + break; + #endif +@@ -587,6 +645,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + strm->adler = state->check = ZSWAP32(hold); + INITBITS(); + state->mode = DICT; ++ Z_FALLTHROUGH; + + case DICT: + if (state->havedict == 0) { +@@ -595,10 +654,12 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + } + strm->adler = state->check = ADLER32_INITIAL_VALUE; + state->mode = TYPE; ++ Z_FALLTHROUGH; + + case TYPE: + if (flush == Z_BLOCK || flush == Z_TREES) + goto inf_leave; ++ Z_FALLTHROUGH; + + case TYPEDO: + /* determine and dispatch block type */ +@@ -617,7 +678,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + state->mode = STORED; + break; + case 1: /* fixed block */ +- fixedtables(state); ++ PREFIX(fixedtables)(state); + Tracev((stderr, "inflate: fixed codes block%s\n", state->last ? " (last)" : "")); + state->mode = LEN_; /* decode codes */ + if (flush == Z_TREES) { +@@ -649,17 +710,20 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + state->mode = COPY_; + if (flush == Z_TREES) + goto inf_leave; ++ Z_FALLTHROUGH; + + case COPY_: + state->mode = COPY; ++ Z_FALLTHROUGH; + + case COPY: + /* copy stored block from input to output */ + copy = state->length; + if (copy) { +- if (copy > have) copy = have; +- if (copy > left) copy = left; +- if (copy == 0) goto inf_leave; ++ copy = MIN(copy, have); ++ copy = MIN(copy, left); ++ if (copy == 0) ++ goto inf_leave; + memcpy(put, next, copy); + have -= copy; + next += copy; +@@ -690,6 +754,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + Tracev((stderr, "inflate: table sizes ok\n")); + state->have = 0; + state->mode = LENLENS; ++ Z_FALLTHROUGH; + + case LENLENS: + /* get code length code lengths (not a typo) */ +@@ -711,6 +776,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + Tracev((stderr, "inflate: code lengths ok\n")); + state->have = 0; + state->mode = CODELENS; ++ Z_FALLTHROUGH; + + case CODELENS: + /* get length and distance code code lengths */ +@@ -769,18 +835,18 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + } + + /* build code tables -- note: do not change the lenbits or distbits +- values here (9 and 6) without reading the comments in inftrees.h ++ values here (10 and 9) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (const code *)(state->next); +- state->lenbits = 9; ++ state->lenbits = 10; + ret = zng_inflate_table(LENS, state->lens, state->nlen, &(state->next), &(state->lenbits), state->work); + if (ret) { + SET_BAD("invalid literal/lengths set"); + break; + } + state->distcode = (const code *)(state->next); +- state->distbits = 6; ++ state->distbits = 9; + ret = zng_inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { +@@ -791,15 +857,17 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + state->mode = LEN_; + if (flush == Z_TREES) + goto inf_leave; ++ Z_FALLTHROUGH; + + case LEN_: + state->mode = LEN; ++ Z_FALLTHROUGH; + + case LEN: + /* use inflate_fast() if we have enough input and output */ + if (have >= INFLATE_FAST_MIN_HAVE && left >= INFLATE_FAST_MIN_LEFT) { + RESTORE(); +- zng_inflate_fast(strm, out); ++ functable.inflate_fast(strm, out); + LOAD(); + if (state->mode == TYPE) + state->back = -1; +@@ -853,8 +921,9 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + } + + /* length code */ +- state->extra = (here.op & 15); ++ state->extra = (here.op & MAX_BITS); + state->mode = LENEXT; ++ Z_FALLTHROUGH; + + case LENEXT: + /* get extra bits, if any */ +@@ -867,6 +936,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + Tracevv((stderr, "inflate: length %u\n", state->length)); + state->was = state->length; + state->mode = DIST; ++ Z_FALLTHROUGH; + + case DIST: + /* get distance code */ +@@ -894,8 +964,9 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + break; + } + state->offset = here.val; +- state->extra = (here.op & 15); ++ state->extra = (here.op & MAX_BITS); + state->mode = DISTEXT; ++ Z_FALLTHROUGH; + + case DISTEXT: + /* get distance extra bits, if any */ +@@ -913,10 +984,12 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + #endif + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + state->mode = MATCH; ++ Z_FALLTHROUGH; + + case MATCH: + /* copy match from window to output */ +- if (left == 0) goto inf_leave; ++ if (left == 0) ++ goto inf_leave; + copy = out - left; + if (state->offset > copy) { /* copy from window */ + copy = state->offset - copy; +@@ -928,10 +1001,8 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + Trace((stderr, "inflate.c too far\n")); + copy -= state->whave; +- if (copy > state->length) +- copy = state->length; +- if (copy > left) +- copy = left; ++ copy = MIN(copy, state->length); ++ copy = MIN(copy, left); + left -= copy; + state->length -= copy; + do { +@@ -948,16 +1019,12 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + } else { + from = state->window + (state->wnext - copy); + } +- if (copy > state->length) +- copy = state->length; +- if (copy > left) +- copy = left; ++ copy = MIN(copy, state->length); ++ copy = MIN(copy, left); + +- put = functable.chunkcopy_safe(put, from, copy, put + left); +- } else { /* copy from output */ +- copy = state->length; +- if (copy > left) +- copy = left; ++ put = chunkcopy_safe(put, from, copy, put + left); ++ } else { ++ copy = MIN(state->length, left); + + put = functable.chunkmemset_safe(put, state->offset, copy, left); + } +@@ -981,8 +1048,17 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + out -= left; + strm->total_out += out; + state->total += out; +- if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out) +- strm->adler = state->check = UPDATE(state->check, put - out, out); ++ ++ /* compute crc32 checksum if not in raw mode */ ++ if (INFLATE_NEED_CHECKSUM(strm) && state->wrap & 4) { ++ if (out) { ++ inf_chksum(strm, put - out, out); ++ } ++#ifdef GUNZIP ++ if (state->flags) ++ strm->adler = state->check = functable.crc32_fold_final(&state->crc_fold); ++#endif ++ } + out = left; + if ((state->wrap & 4) && ( + #ifdef GUNZIP +@@ -997,6 +1073,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + } + #ifdef GUNZIP + state->mode = LENGTH; ++ Z_FALLTHROUGH; + + case LENGTH: + if (state->wrap && state->flags) { +@@ -1010,6 +1087,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + } + #endif + state->mode = DONE; ++ Z_FALLTHROUGH; + + case DONE: + /* inflate stream terminated properly */ +@@ -1037,10 +1115,12 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + */ + inf_leave: + RESTORE(); ++ uint32_t check_bytes = out - strm->avail_out; + if (INFLATE_NEED_UPDATEWINDOW(strm) && + (state->wsize || (out != strm->avail_out && state->mode < BAD && + (state->mode < CHECK || flush != Z_FINISH)))) { +- if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { ++ /* update sliding window with respective checksum if not in "raw" mode */ ++ if (updatewindow(strm, strm->next_out, check_bytes, state->wrap & 4)) { + state->mode = MEM; + return Z_MEM_ERROR; + } +@@ -1050,12 +1130,16 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { + strm->total_in += in; + strm->total_out += out; + state->total += out; +- if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out) +- strm->adler = state->check = UPDATE(state->check, strm->next_out - out, out); ++ + strm->data_type = (int)state->bits + (state->last ? 64 : 0) + + (state->mode == TYPE ? 128 : 0) + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); +- if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) ++ if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) { ++ /* when no sliding window is used, hash the output bytes if no CHECK state */ ++ if (INFLATE_NEED_CHECKSUM(strm) && !state->wsize && flush == Z_FINISH) { ++ inf_chksum(strm, put - check_bytes, check_bytes); ++ } + ret = Z_BUF_ERROR; ++ } + return ret; + } + +@@ -1080,6 +1164,8 @@ int32_t Z_EXPORT PREFIX(inflateGetDictionary)(PREFIX3(stream) *strm, uint8_t *di + return Z_STREAM_ERROR; + state = (struct inflate_state *)strm->state; + ++ INFLATE_GET_DICTIONARY_HOOK(strm, dictionary, dictLength); /* hook for IBM Z DFLTCC */ ++ + /* copy dictionary */ + if (state->whave && dictionary != NULL) { + memcpy(dictionary, state->window + state->wnext, state->whave - state->wnext); +@@ -1109,9 +1195,11 @@ int32_t Z_EXPORT PREFIX(inflateSetDictionary)(PREFIX3(stream) *strm, const uint8 + return Z_DATA_ERROR; + } + ++ INFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength); /* hook for IBM Z DFLTCC */ ++ + /* copy dictionary to window using updatewindow(), which will amend the + existing dictionary if appropriate */ +- ret = updatewindow(strm, dictionary + dictLength, dictLength); ++ ret = updatewindow(strm, dictionary + dictLength, dictLength, 0); + if (ret) { + state->mode = MEM; + return Z_MEM_ERROR; +@@ -1212,8 +1300,8 @@ int32_t Z_EXPORT PREFIX(inflateSync)(PREFIX3(stream) *strm) { + in = strm->total_in; + out = strm->total_out; + PREFIX(inflateReset)(strm); +- strm->total_in = (z_size_t)in; +- strm->total_out = (z_size_t)out; ++ strm->total_in = (z_uintmax_t)in; /* Can't use z_size_t here as it will overflow on 64-bit Windows */ ++ strm->total_out = (z_uintmax_t)out; + state->flags = flags; + state->mode = TYPE; + return Z_OK; +@@ -1240,8 +1328,6 @@ int32_t Z_EXPORT PREFIX(inflateSyncPoint)(PREFIX3(stream) *strm) { + int32_t Z_EXPORT PREFIX(inflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *source) { + struct inflate_state *state; + struct inflate_state *copy; +- unsigned char *window; +- unsigned wsize; + + /* check input */ + if (inflateStateCheck(source) || dest == NULL) +@@ -1249,32 +1335,30 @@ int32_t Z_EXPORT PREFIX(inflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *sou + state = (struct inflate_state *)source->state; + + /* allocate space */ +- copy = (struct inflate_state *)ZALLOC_STATE(source, 1, sizeof(struct inflate_state)); ++ copy = ZALLOC_INFLATE_STATE(source); + if (copy == NULL) + return Z_MEM_ERROR; +- window = NULL; +- if (state->window != NULL) { +- window = (unsigned char *)ZALLOC_WINDOW(source, 1U << state->wbits, sizeof(unsigned char)); +- if (window == NULL) { +- ZFREE_STATE(source, copy); +- return Z_MEM_ERROR; +- } +- } + + /* copy state */ + memcpy((void *)dest, (void *)source, sizeof(PREFIX3(stream))); +- ZCOPY_STATE((void *)copy, (void *)state, sizeof(struct inflate_state)); ++ ZCOPY_INFLATE_STATE(copy, state); + copy->strm = dest; + if (state->lencode >= state->codes && state->lencode <= state->codes + ENOUGH - 1) { + copy->lencode = copy->codes + (state->lencode - state->codes); + copy->distcode = copy->codes + (state->distcode - state->codes); + } + copy->next = copy->codes + (state->next - state->codes); +- if (window != NULL) { +- wsize = 1U << state->wbits; +- memcpy(window, state->window, wsize); ++ ++ /* window */ ++ if (state->window != NULL) { ++ copy->window = NULL; ++ if (PREFIX(inflate_ensure_window)(copy)) { ++ ZFREE_STATE(source, copy); ++ return Z_MEM_ERROR; ++ } ++ ZCOPY_WINDOW(copy->window, state->window, (size_t)state->wsize); + } +- copy->window = window; ++ + dest->state = (struct internal_state *)copy; + return Z_OK; + } +diff --git a/inflate.h b/inflate.h +index a427494..39cdf5d 100644 +--- a/inflate.h ++++ b/inflate.h +@@ -1,5 +1,5 @@ + /* inflate.h -- internal inflate state definition +- * Copyright (C) 1995-2016 Mark Adler ++ * Copyright (C) 1995-2019 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +@@ -11,6 +11,9 @@ + #ifndef INFLATE_H_ + #define INFLATE_H_ + ++#include "adler32_fold.h" ++#include "crc32_fold.h" ++ + /* define NO_GZIP when compiling if you want to disable gzip header and trailer decoding by inflate(). + NO_GZIP would be used to avoid linking in the crc code when it is not needed. + For shared libraries, gzip decoding should be left enabled. */ +@@ -100,6 +103,9 @@ struct inflate_state { + uint32_t whave; /* valid bytes in the window */ + uint32_t wnext; /* window write index */ + unsigned char *window; /* allocated sliding window, if needed */ ++ ++ struct crc32_fold_s ALIGNED_(16) crc_fold; ++ + /* bit accumulator */ + uint32_t hold; /* input bit accumulator */ + unsigned bits; /* number of bits in "in" */ +@@ -128,7 +134,7 @@ struct inflate_state { + uint32_t chunksize; /* size of memory copying chunk */ + }; + +-int Z_INTERNAL inflate_ensure_window(struct inflate_state *state); +-void Z_INTERNAL fixedtables(struct inflate_state *state); ++int Z_INTERNAL PREFIX(inflate_ensure_window)(struct inflate_state *state); ++void Z_INTERNAL PREFIX(fixedtables)(struct inflate_state *state); + + #endif /* INFLATE_H_ */ +diff --git a/inflate_p.h b/inflate_p.h +index 76fe2dc..eff7387 100644 +--- a/inflate_p.h ++++ b/inflate_p.h +@@ -5,6 +5,40 @@ + #ifndef INFLATE_P_H + #define INFLATE_P_H + ++#include ++ ++/* Architecture-specific hooks. */ ++#ifdef S390_DFLTCC_INFLATE ++# include "arch/s390/dfltcc_inflate.h" ++#else ++/* Memory management for the inflate state. Useful for allocating arch-specific extension blocks. */ ++# define ZALLOC_INFLATE_STATE(strm) ((struct inflate_state *)ZALLOC(strm, 1, sizeof(struct inflate_state))) ++# define ZFREE_STATE(strm, addr) ZFREE(strm, addr) ++# define ZCOPY_INFLATE_STATE(dst, src) memcpy(dst, src, sizeof(struct inflate_state)) ++/* Memory management for the window. Useful for allocation the aligned window. */ ++# define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size) ++# define ZCOPY_WINDOW(dest, src, n) memcpy(dest, src, n) ++# define ZFREE_WINDOW(strm, addr) ZFREE(strm, addr) ++/* Invoked at the end of inflateResetKeep(). Useful for initializing arch-specific extension blocks. */ ++# define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0) ++/* Invoked at the beginning of inflatePrime(). Useful for updating arch-specific buffers. */ ++# define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0) ++/* Invoked at the beginning of each block. Useful for plugging arch-specific inflation code. */ ++# define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0) ++/* Returns whether zlib-ng should compute a checksum. Set to 0 if arch-specific inflation code already does that. */ ++# define INFLATE_NEED_CHECKSUM(strm) 1 ++/* Returns whether zlib-ng should update a window. Set to 0 if arch-specific inflation code already does that. */ ++# define INFLATE_NEED_UPDATEWINDOW(strm) 1 ++/* Invoked at the beginning of inflateMark(). Useful for updating arch-specific pointers and offsets. */ ++# define INFLATE_MARK_HOOK(strm) do {} while (0) ++/* Invoked at the beginning of inflateSyncPoint(). Useful for performing arch-specific state checks. */ ++# define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0) ++/* Invoked at the beginning of inflateSetDictionary(). Useful for checking arch-specific window data. */ ++# define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) ++/* Invoked at the beginning of inflateGetDictionary(). Useful for adjusting arch-specific window data. */ ++# define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) ++#endif ++ + /* + * Macros shared by inflate() and inflateBack() + */ +@@ -91,11 +125,106 @@ + bits -= bits & 7; \ + } while (0) + +-#endif +- + /* Set mode=BAD and prepare error message */ + #define SET_BAD(errmsg) \ + do { \ + state->mode = BAD; \ + strm->msg = (char *)errmsg; \ + } while (0) ++ ++#define INFLATE_FAST_MIN_HAVE 15 ++#define INFLATE_FAST_MIN_LEFT 260 ++ ++/* Load 64 bits from IN and place the bytes at offset BITS in the result. */ ++static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) { ++ uint64_t chunk; ++ memcpy(&chunk, in, sizeof(chunk)); ++ ++#if BYTE_ORDER == LITTLE_ENDIAN ++ return chunk << bits; ++#else ++ return ZSWAP64(chunk) << bits; ++#endif ++} ++ ++/* Behave like chunkcopy, but avoid writing beyond of legal output. */ ++static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len, uint8_t *safe) { ++ uint64_t safelen = (safe - out) + 1; ++ len = MIN(len, safelen); ++ int32_t olap_src = from >= out && from < out + len; ++ int32_t olap_dst = out >= from && out < from + len; ++ uint64_t tocopy; ++ ++ /* For all cases without overlap, memcpy is ideal */ ++ if (!(olap_src || olap_dst)) { ++ memcpy(out, from, (size_t)len); ++ return out + len; ++ } ++ ++ /* Complete overlap: Source == destination */ ++ if (out == from) { ++ return out + len; ++ } ++ ++ /* We are emulating a self-modifying copy loop here. To do this in a way that doesn't produce undefined behavior, ++ * we have to get a bit clever. First if the overlap is such that src falls between dst and dst+len, we can do the ++ * initial bulk memcpy of the nonoverlapping region. Then, we can leverage the size of this to determine the safest ++ * atomic memcpy size we can pick such that we have non-overlapping regions. This effectively becomes a safe look ++ * behind or lookahead distance. */ ++ uint64_t non_olap_size = llabs(from - out); // llabs vs labs for compatibility with windows ++ ++ memcpy(out, from, (size_t)non_olap_size); ++ out += non_olap_size; ++ from += non_olap_size; ++ len -= non_olap_size; ++ ++ /* So this doesn't give use a worst case scenario of function calls in a loop, ++ * we want to instead break this down into copy blocks of fixed lengths */ ++ while (len) { ++ tocopy = MIN(non_olap_size, len); ++ len -= tocopy; ++ ++ while (tocopy >= 32) { ++ memcpy(out, from, 32); ++ out += 32; ++ from += 32; ++ tocopy -= 32; ++ } ++ ++ if (tocopy >= 16) { ++ memcpy(out, from, 16); ++ out += 16; ++ from += 16; ++ tocopy -= 16; ++ } ++ ++ if (tocopy >= 8) { ++ memcpy(out, from, 8); ++ out += 8; ++ from += 8; ++ tocopy -= 8; ++ } ++ ++ if (tocopy >= 4) { ++ memcpy(out, from, 4); ++ out += 4; ++ from += 4; ++ tocopy -= 4; ++ } ++ ++ if (tocopy >= 2) { ++ memcpy(out, from, 2); ++ out += 2; ++ from += 2; ++ tocopy -= 2; ++ } ++ ++ if (tocopy) { ++ *out++ = *from++; ++ } ++ } ++ ++ return out; ++} ++ ++#endif +diff --git a/inftrees.c b/inftrees.c +index faf1d24..423f7b4 100644 +--- a/inftrees.c ++++ b/inftrees.c +@@ -1,5 +1,5 @@ + /* inftrees.c -- generate Huffman trees for efficient decoding +- * Copyright (C) 1995-2016 Mark Adler ++ * Copyright (C) 1995-2023 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +@@ -7,9 +7,7 @@ + #include "zutil.h" + #include "inftrees.h" + +-#define MAXBITS 15 +- +-const char PREFIX(inflate_copyright)[] = " inflate 1.2.11.f Copyright 1995-2016 Mark Adler "; ++const char PREFIX(inflate_copyright)[] = " inflate 1.3.0 Copyright 1995-2023 Mark Adler "; + /* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot +@@ -49,8 +47,8 @@ int Z_INTERNAL zng_inflate_table(codetype type, uint16_t *lens, unsigned codes, + const uint16_t *base; /* base value table to use */ + const uint16_t *extra; /* extra bits table to use */ + unsigned match; /* use base and extra for symbol >= match */ +- uint16_t count[MAXBITS+1]; /* number of codes of each length */ +- uint16_t offs[MAXBITS+1]; /* offsets in table for each length */ ++ uint16_t count[MAX_BITS+1]; /* number of codes of each length */ ++ uint16_t offs[MAX_BITS+1]; /* offsets in table for each length */ + static const uint16_t lbase[31] = { /* Length codes 257..285 base */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; +@@ -98,17 +96,17 @@ int Z_INTERNAL zng_inflate_table(codetype type, uint16_t *lens, unsigned codes, + */ + + /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ +- for (len = 0; len <= MAXBITS; len++) ++ for (len = 0; len <= MAX_BITS; len++) + count[len] = 0; + for (sym = 0; sym < codes; sym++) + count[lens[sym]]++; + + /* bound code lengths, force root to be within code lengths */ + root = *bits; +- for (max = MAXBITS; max >= 1; max--) ++ for (max = MAX_BITS; max >= 1; max--) + if (count[max] != 0) break; +- if (root > max) root = max; +- if (max == 0) { /* no symbols to code at all */ ++ root = MIN(root, max); ++ if (UNLIKELY(max == 0)) { /* no symbols to code at all */ + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)1; + here.val = (uint16_t)0; +@@ -119,11 +117,11 @@ int Z_INTERNAL zng_inflate_table(codetype type, uint16_t *lens, unsigned codes, + } + for (min = 1; min < max; min++) + if (count[min] != 0) break; +- if (root < min) root = min; ++ root = MAX(root, min); + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; +- for (len = 1; len <= MAXBITS; len++) { ++ for (len = 1; len <= MAX_BITS; len++) { + left <<= 1; + left -= count[len]; + if (left < 0) return -1; /* over-subscribed */ +@@ -133,7 +131,7 @@ int Z_INTERNAL zng_inflate_table(codetype type, uint16_t *lens, unsigned codes, + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; +- for (len = 1; len < MAXBITS; len++) ++ for (len = 1; len < MAX_BITS; len++) + offs[len + 1] = offs[len] + count[len]; + + /* sort symbols by length, by symbol order within each length */ +@@ -208,12 +206,12 @@ int Z_INTERNAL zng_inflate_table(codetype type, uint16_t *lens, unsigned codes, + for (;;) { + /* create table entry */ + here.bits = (unsigned char)(len - drop); +- if (work[sym] + 1U < match) { +- here.op = (unsigned char)0; +- here.val = work[sym]; +- } else if (work[sym] >= match) { ++ if (LIKELY(work[sym] >= match)) { + here.op = (unsigned char)(extra[work[sym] - match]); + here.val = base[work[sym] - match]; ++ } else if (work[sym] + 1U < match) { ++ here.op = (unsigned char)0; ++ here.val = work[sym]; + } else { + here.op = (unsigned char)(32 + 64); /* end of block */ + here.val = 0; +@@ -283,7 +281,7 @@ int Z_INTERNAL zng_inflate_table(codetype type, uint16_t *lens, unsigned codes, + /* fill in remaining table entry if code is incomplete (guaranteed to have + at most one remaining entry, since if the code is incomplete, the + maximum code length that was allowed to get this far is one bit) */ +- if (huff != 0) { ++ if (UNLIKELY(huff != 0)) { + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)(len - drop); + here.val = (uint16_t)0; +diff --git a/inftrees.h b/inftrees.h +index 031c2a1..ad2be15 100644 +--- a/inftrees.h ++++ b/inftrees.h +@@ -2,7 +2,7 @@ + #define INFTREES_H_ + + /* inftrees.h -- header to use inftrees.c +- * Copyright (C) 1995-2005, 2010 Mark Adler ++ * Copyright (C) 1995-2022 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +@@ -39,17 +39,17 @@ typedef struct { + */ + + /* Maximum size of the dynamic table. The maximum number of code structures is +- 1444, which is the sum of 852 for literal/length codes and 592 for distance ++ 1924, which is the sum of 1332 for literal/length codes and 592 for distance + codes. These values were found by exhaustive searches using the program + examples/enough.c found in the zlib distributions. The arguments to that + program are the number of symbols, the initial root table size, and the +- maximum bit length of a code. "enough 286 9 15" for literal/length codes +- returns returns 852, and "enough 30 6 15" for distance codes returns 592. +- The initial root table size (9 or 6) is found in the fifth argument of the ++ maximum bit length of a code. "enough 286 10 15" for literal/length codes ++ returns 1332, and "enough 30 9 15" for distance codes returns 592. ++ The initial root table size (10 or 9) is found in the fifth argument of the + inflate_table() calls in inflate.c and infback.c. If the root table size is + changed, then these maximum sizes would be need to be recalculated and + updated. */ +-#define ENOUGH_LENS 852 ++#define ENOUGH_LENS 1332 + #define ENOUGH_DISTS 592 + #define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS) + +diff --git a/insert_string.c b/insert_string.c +index 4ddf9ae..cfe3983 100644 +--- a/insert_string.c ++++ b/insert_string.c +@@ -1,4 +1,4 @@ +-/* insert_string_c -- insert_string variant for c ++/* insert_string.c -- insert_string integer hash variant + * + * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h +@@ -8,18 +8,14 @@ + #include "zbuild.h" + #include "deflate.h" + +-/* =========================================================================== +- * Update a hash value with the given input byte +- * IN assertion: all calls to to UPDATE_HASH are made with consecutive +- * input characters, so that a running hash key can be computed from the +- * previous key instead of complete recalculation each time. +- */ +-#define HASH_SLIDE 16 // Number of bits to slide hash ++#define HASH_SLIDE 16 + +-#define UPDATE_HASH(s, h, val) \ +- h = ((val * 2654435761U) >> HASH_SLIDE); ++#define HASH_CALC(s, h, val) h = ((val * 2654435761U) >> HASH_SLIDE); ++#define HASH_CALC_VAR h ++#define HASH_CALC_VAR_INIT uint32_t h = 0 + +-#define INSERT_STRING insert_string_c +-#define QUICK_INSERT_STRING quick_insert_string_c ++#define UPDATE_HASH update_hash_c ++#define INSERT_STRING insert_string_c ++#define QUICK_INSERT_STRING quick_insert_string_c + + #include "insert_string_tpl.h" +diff --git a/insert_string_roll.c b/insert_string_roll.c +new file mode 100644 +index 0000000..dfea347 +--- /dev/null ++++ b/insert_string_roll.c +@@ -0,0 +1,24 @@ ++/* insert_string_roll.c -- insert_string rolling hash variant ++ * ++ * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ * ++ */ ++ ++#include "zbuild.h" ++#include "deflate.h" ++ ++#define HASH_SLIDE 5 ++ ++#define HASH_CALC(s, h, val) h = ((h << HASH_SLIDE) ^ ((uint8_t)val)) ++#define HASH_CALC_VAR s->ins_h ++#define HASH_CALC_VAR_INIT ++#define HASH_CALC_READ val = strstart[0] ++#define HASH_CALC_MASK (32768u - 1u) ++#define HASH_CALC_OFFSET (STD_MIN_MATCH-1) ++ ++#define UPDATE_HASH update_hash_roll ++#define INSERT_STRING insert_string_roll ++#define QUICK_INSERT_STRING quick_insert_string_roll ++ ++#include "insert_string_tpl.h" +diff --git a/insert_string_tpl.h b/insert_string_tpl.h +index 9796e51..c846177 100644 +--- a/insert_string_tpl.h ++++ b/insert_string_tpl.h +@@ -22,27 +22,52 @@ + * + */ + ++#ifndef HASH_CALC_OFFSET ++# define HASH_CALC_OFFSET 0 ++#endif ++#ifndef HASH_CALC_MASK ++# define HASH_CALC_MASK HASH_MASK ++#endif ++#ifndef HASH_CALC_READ ++# if BYTE_ORDER == LITTLE_ENDIAN ++# define HASH_CALC_READ \ ++ memcpy(&val, strstart, sizeof(val)); ++# else ++# define HASH_CALC_READ \ ++ val = ((uint32_t)(strstart[0])); \ ++ val |= ((uint32_t)(strstart[1]) << 8); \ ++ val |= ((uint32_t)(strstart[2]) << 16); \ ++ val |= ((uint32_t)(strstart[3]) << 24); ++# endif ++#endif ++ ++/* =========================================================================== ++ * Update a hash value with the given input byte ++ * IN assertion: all calls to UPDATE_HASH are made with consecutive ++ * input characters, so that a running hash key can be computed from the ++ * previous key instead of complete recalculation each time. ++ */ ++Z_INTERNAL uint32_t UPDATE_HASH(deflate_state *const s, uint32_t h, uint32_t val) { ++ (void)s; ++ HASH_CALC(s, h, val); ++ return h & HASH_CALC_MASK; ++} ++ + /* =========================================================================== + * Quick insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + */ +-Z_INTERNAL Pos QUICK_INSERT_STRING(deflate_state *const s, const uint32_t str) { ++Z_INTERNAL Pos QUICK_INSERT_STRING(deflate_state *const s, uint32_t str) { + Pos head; +- uint8_t *strstart = s->window + str; +- uint32_t val, hm, h = 0; +- +-#ifdef UNALIGNED_OK +- val = *(uint32_t *)(strstart); +-#else +- val = ((uint32_t)(strstart[0])); +- val |= ((uint32_t)(strstart[1]) << 8); +- val |= ((uint32_t)(strstart[2]) << 16); +- val |= ((uint32_t)(strstart[3]) << 24); +-#endif ++ uint8_t *strstart = s->window + str + HASH_CALC_OFFSET; ++ uint32_t val, hm; + +- UPDATE_HASH(s, h, val); +- hm = h & HASH_MASK; ++ HASH_CALC_VAR_INIT; ++ HASH_CALC_READ; ++ HASH_CALC(s, HASH_CALC_VAR, val); ++ HASH_CALC_VAR &= HASH_CALC_MASK; ++ hm = HASH_CALC_VAR; + + head = s->head[hm]; + if (LIKELY(head != str)) { +@@ -56,28 +81,22 @@ Z_INTERNAL Pos QUICK_INSERT_STRING(deflate_state *const s, const uint32_t str) { + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. +- * IN assertion: all calls to to INSERT_STRING are made with consecutive +- * input characters and the first MIN_MATCH bytes of str are valid +- * (except for the last MIN_MATCH-1 bytes of the input file). ++ * IN assertion: all calls to INSERT_STRING are made with consecutive ++ * input characters and the first STD_MIN_MATCH bytes of str are valid ++ * (except for the last STD_MIN_MATCH-1 bytes of the input file). + */ +-Z_INTERNAL void INSERT_STRING(deflate_state *const s, const uint32_t str, uint32_t count) { +- uint8_t *strstart = s->window + str; +- uint8_t *strend = strstart + count - 1; /* last position */ ++Z_INTERNAL void INSERT_STRING(deflate_state *const s, uint32_t str, uint32_t count) { ++ uint8_t *strstart = s->window + str + HASH_CALC_OFFSET; ++ uint8_t *strend = strstart + count; + +- for (Pos idx = (Pos)str; strstart <= strend; idx++, strstart++) { +- uint32_t val, hm, h = 0; +- +-#ifdef UNALIGNED_OK +- val = *(uint32_t *)(strstart); +-#else +- val = ((uint32_t)(strstart[0])); +- val |= ((uint32_t)(strstart[1]) << 8); +- val |= ((uint32_t)(strstart[2]) << 16); +- val |= ((uint32_t)(strstart[3]) << 24); +-#endif ++ for (Pos idx = (Pos)str; strstart < strend; idx++, strstart++) { ++ uint32_t val, hm; + +- UPDATE_HASH(s, h, val); +- hm = h & HASH_MASK; ++ HASH_CALC_VAR_INIT; ++ HASH_CALC_READ; ++ HASH_CALC(s, HASH_CALC_VAR, val); ++ HASH_CALC_VAR &= HASH_CALC_MASK; ++ hm = HASH_CALC_VAR; + + Pos head = s->head[hm]; + if (LIKELY(head != idx)) { +diff --git a/match_tpl.h b/match_tpl.h +index b15ca17..d076798 100644 +--- a/match_tpl.h ++++ b/match_tpl.h +@@ -1,21 +1,21 @@ ++/* match_tpl.h -- find longest match template for compare256 variants ++ * ++ * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ * ++ * Portions copyright (C) 2014-2021 Konstantin Nosov ++ * Fast-zlib optimized longest_match ++ * https://github.com/gildor2/fast_zlib ++ */ + + #include "zbuild.h" ++#include "zutil_p.h" + #include "deflate.h" + #include "functable.h" + + #ifndef MATCH_TPL_H + #define MATCH_TPL_H + +-#ifdef UNALIGNED_OK +-# ifdef UNALIGNED64_OK +-typedef uint64_t bestcmp_t; +-# else +-typedef uint32_t bestcmp_t; +-# endif +-#else +-typedef uint8_t bestcmp_t; +-#endif +- + #define EARLY_EXIT_TRIGGER_LEVEL 5 + + #endif +@@ -37,25 +37,28 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { + Z_REGISTER unsigned char *mbase_end; + const Pos *prev = s->prev; + Pos limit; ++#ifdef LONGEST_MATCH_SLOW ++ Pos limit_base; ++#else + int32_t early_exit; ++#endif + uint32_t chain_length, nice_match, best_len, offset; + uint32_t lookahead = s->lookahead; +- bestcmp_t scan_end; +-#ifndef UNALIGNED_OK +- bestcmp_t scan_end0; +-#else +- bestcmp_t scan_start; ++ Pos match_offset = 0; ++#ifdef UNALIGNED_OK ++ uint8_t scan_start[8]; + #endif ++ uint8_t scan_end[8]; + + #define GOTO_NEXT_CHAIN \ + if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \ + continue; \ + return best_len; + +- /* The code is optimized for MAX_MATCH-2 multiple of 16. */ +- Assert(MAX_MATCH == 258, "Code too clever"); ++ /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */ ++ Assert(STD_MAX_MATCH == 258, "Code too clever"); + +- best_len = s->prev_length ? s->prev_length : 1; ++ best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1; + + /* Calculate read offset which should only extend an extra byte + * to find the next best match length. +@@ -71,17 +74,20 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { + } + #endif + +- scan_end = *(bestcmp_t *)(scan+offset); +-#ifndef UNALIGNED_OK +- scan_end0 = *(bestcmp_t *)(scan+offset+1); ++#ifdef UNALIGNED64_OK ++ memcpy(scan_start, scan, sizeof(uint64_t)); ++ memcpy(scan_end, scan+offset, sizeof(uint64_t)); ++#elif defined(UNALIGNED_OK) ++ memcpy(scan_start, scan, sizeof(uint32_t)); ++ memcpy(scan_end, scan+offset, sizeof(uint32_t)); + #else +- scan_start = *(bestcmp_t *)(scan); ++ scan_end[0] = *(scan+offset); ++ scan_end[1] = *(scan+offset+1); + #endif + mbase_end = (mbase_start+offset); + + /* Do not waste too much time if we already have a good match */ + chain_length = s->max_chain_length; +- early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL; + if (best_len >= s->good_match) + chain_length >>= 2; + nice_match = (uint32_t)s->nice_match; +@@ -90,7 +96,41 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { + * we prevent matches with the string of window index 0 + */ + limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0; ++#ifdef LONGEST_MATCH_SLOW ++ limit_base = limit; ++ if (best_len >= STD_MIN_MATCH) { ++ /* We're continuing search (lazy evaluation). */ ++ uint32_t i, hash; ++ Pos pos; ++ ++ /* Find a most distant chain starting from scan with index=1 (index=0 corresponds ++ * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because ++ * these strings are not yet inserted into the hash table. ++ */ ++ hash = s->update_hash(s, 0, scan[1]); ++ hash = s->update_hash(s, hash, scan[2]); + ++ for (i = 3; i <= best_len; i++) { ++ hash = s->update_hash(s, hash, scan[i]); ++ ++ /* If we're starting with best_len >= 3, we can use offset search. */ ++ pos = s->head[hash]; ++ if (pos < cur_match) { ++ match_offset = (Pos)(i - 2); ++ cur_match = pos; ++ } ++ } ++ ++ /* Update offset-dependent variables */ ++ limit = limit_base+match_offset; ++ if (cur_match <= limit) ++ goto break_matching; ++ mbase_start -= match_offset; ++ mbase_end -= match_offset; ++ } ++#else ++ early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL; ++#endif + Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead"); + for (;;) { + if (cur_match >= strstart) +@@ -106,31 +146,31 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { + #ifdef UNALIGNED_OK + if (best_len < sizeof(uint32_t)) { + for (;;) { +- if (*(uint16_t *)(mbase_end+cur_match) == (uint16_t)scan_end && +- *(uint16_t *)(mbase_start+cur_match) == (uint16_t)scan_start) ++ if (zng_memcmp_2(mbase_end+cur_match, scan_end) == 0 && ++ zng_memcmp_2(mbase_start+cur_match, scan_start) == 0) + break; + GOTO_NEXT_CHAIN; + } + # ifdef UNALIGNED64_OK + } else if (best_len >= sizeof(uint64_t)) { + for (;;) { +- if (*(uint64_t *)(mbase_end+cur_match) == (uint64_t)scan_end && +- *(uint64_t *)(mbase_start+cur_match) == (uint64_t)scan_start) ++ if (zng_memcmp_8(mbase_end+cur_match, scan_end) == 0 && ++ zng_memcmp_8(mbase_start+cur_match, scan_start) == 0) + break; + GOTO_NEXT_CHAIN; + } + # endif + } else { + for (;;) { +- if (*(uint32_t *)(mbase_end+cur_match) == (uint32_t)scan_end && +- *(uint32_t *)(mbase_start+cur_match) == (uint32_t)scan_start) ++ if (zng_memcmp_4(mbase_end+cur_match, scan_end) == 0 && ++ zng_memcmp_4(mbase_start+cur_match, scan_start) == 0) + break; + GOTO_NEXT_CHAIN; + } + } + #else + for (;;) { +- if (mbase_end[cur_match] == scan_end && mbase_end[cur_match+1] == scan_end0 && ++ if (mbase_end[cur_match] == scan_end[0] && mbase_end[cur_match+1] == scan_end[1] && + mbase_start[cur_match] == scan[0] && mbase_start[cur_match+1] == scan[1]) + break; + GOTO_NEXT_CHAIN; +@@ -140,7 +180,9 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { + Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan"); + + if (len > best_len) { +- s->match_start = cur_match; ++ uint32_t match_start = cur_match - match_offset; ++ s->match_start = match_start; ++ + /* Do not look for matches beyond the end of the input. */ + if (len > lookahead) + return lookahead; +@@ -158,23 +200,90 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { + #endif + } + #endif +- scan_end = *(bestcmp_t *)(scan+offset); +-#ifndef UNALIGNED_OK +- scan_end0 = *(bestcmp_t *)(scan+offset+1); ++ ++#ifdef UNALIGNED64_OK ++ memcpy(scan_end, scan+offset, sizeof(uint64_t)); ++#elif defined(UNALIGNED_OK) ++ memcpy(scan_end, scan+offset, sizeof(uint32_t)); ++#else ++ scan_end[0] = *(scan+offset); ++ scan_end[1] = *(scan+offset+1); ++#endif ++ ++#ifdef LONGEST_MATCH_SLOW ++ /* Look for a better string offset */ ++ if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) { ++ Pos pos, next_pos; ++ uint32_t i, hash; ++ unsigned char *scan_endstr; ++ ++ /* Go back to offset 0 */ ++ cur_match -= match_offset; ++ match_offset = 0; ++ next_pos = cur_match; ++ for (i = 0; i <= len - STD_MIN_MATCH; i++) { ++ pos = prev[(cur_match + i) & wmask]; ++ if (pos < next_pos) { ++ /* Hash chain is more distant, use it */ ++ if (pos <= limit_base + i) ++ goto break_matching; ++ next_pos = pos; ++ match_offset = (Pos)i; ++ } ++ } ++ /* Switch cur_match to next_pos chain */ ++ cur_match = next_pos; ++ ++ /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get ++ * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets ++ * us include one more byte into hash - the byte which will be checked ++ * in main loop now, and which allows to grow match by 1. ++ */ ++ scan_endstr = scan + len - (STD_MIN_MATCH+1); ++ ++ hash = s->update_hash(s, 0, scan_endstr[0]); ++ hash = s->update_hash(s, hash, scan_endstr[1]); ++ hash = s->update_hash(s, hash, scan_endstr[2]); ++ ++ pos = s->head[hash]; ++ if (pos < cur_match) { ++ match_offset = (Pos)(len - (STD_MIN_MATCH+1)); ++ if (pos <= limit_base + match_offset) ++ goto break_matching; ++ cur_match = pos; ++ } ++ ++ /* Update offset-dependent variables */ ++ limit = limit_base+match_offset; ++ mbase_start = window-match_offset; ++ mbase_end = (mbase_start+offset); ++ continue; ++ } + #endif + mbase_end = (mbase_start+offset); +- } else if (UNLIKELY(early_exit)) { ++ } ++#ifndef LONGEST_MATCH_SLOW ++ else if (UNLIKELY(early_exit)) { + /* The probability of finding a match later if we here is pretty low, so for + * performance it's best to outright stop here for the lower compression levels + */ + break; + } ++#endif + GOTO_NEXT_CHAIN; + } +- + return best_len; ++ ++#ifdef LONGEST_MATCH_SLOW ++break_matching: ++ ++ if (best_len < s->lookahead) ++ return best_len; ++ ++ return s->lookahead; ++#endif + } + ++#undef LONGEST_MATCH_SLOW + #undef LONGEST_MATCH + #undef COMPARE256 +-#undef COMPARE258 +diff --git a/slide_hash.c b/slide_hash.c +new file mode 100644 +index 0000000..b9fbbdb +--- /dev/null ++++ b/slide_hash.c +@@ -0,0 +1,52 @@ ++/* slide_hash.c -- slide hash table C implementation ++ * ++ * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zbuild.h" ++#include "deflate.h" ++ ++/* =========================================================================== ++ * Slide the hash table when sliding the window down (could be avoided with 32 ++ * bit values at the expense of memory usage). We slide even when level == 0 to ++ * keep the hash table consistent if we switch back to level > 0 later. ++ */ ++static inline void slide_hash_c_chain(Pos *table, uint32_t entries, uint16_t wsize) { ++#ifdef NOT_TWEAK_COMPILER ++ table += entries; ++ do { ++ unsigned m; ++ m = *--table; ++ *table = (Pos)(m >= wsize ? m-wsize : 0); ++ /* If entries is not on any hash chain, prev[entries] is garbage but ++ * its value will never be used. ++ */ ++ } while (--entries); ++#else ++ { ++ /* As of I make this change, gcc (4.8.*) isn't able to vectorize ++ * this hot loop using saturated-subtraction on x86-64 architecture. ++ * To avoid this defect, we can change the loop such that ++ * o. the pointer advance forward, and ++ * o. demote the variable 'm' to be local to the loop, and ++ * choose type "Pos" (instead of 'unsigned int') for the ++ * variable to avoid unnecessary zero-extension. ++ */ ++ unsigned int i; ++ Pos *q = table; ++ for (i = 0; i < entries; i++) { ++ Pos m = *q; ++ Pos t = (Pos)wsize; ++ *q++ = (Pos)(m >= t ? m-t: 0); ++ } ++ } ++#endif /* NOT_TWEAK_COMPILER */ ++} ++ ++Z_INTERNAL void slide_hash_c(deflate_state *s) { ++ uint16_t wsize = (uint16_t)s->w_size; ++ ++ slide_hash_c_chain(s->head, HASH_SIZE, wsize); ++ slide_hash_c_chain(s->prev, wsize, wsize); ++} +diff --git a/test/Makefile.in b/test/Makefile.in +index 6a075d3..429e2c7 100644 +--- a/test/Makefile.in ++++ b/test/Makefile.in +@@ -9,46 +9,26 @@ SRCDIR= + SRCTOP= + LIBNAME= + TEST_LDFLAGS=-L.. ../$(LIBNAME).a +-WITH_FUZZERS= + +-COMPATTESTS = +-QEMU_RUN= +-QEMU_VER:=$(shell command -v $(QEMU_RUN) --version 2> /dev/null) ++EMU_RUN= + +-all: oldtests cvetests $(COMPATTESTS) fuzzer ghtests ++all: alltests + +-oldtests: #set by ../configure ++alltests: #set by ../configure + check_cross_dep: +-ifneq (,$(QEMU_RUN)) ++ifneq (,$(findstring qemu,$(EMU_RUN))) ++QEMU_VER:=$(shell command -v $(EMU_RUN) --version 2> /dev/null) + ifeq (,$(QEMU_VER)) +- $(error "You need QEMU to run tests on non-native platform") ++$(error You need QEMU to run tests on non-native platform) + endif + endif + + ALL_SRC_FILES := $(wildcard ../*) + +-# Only check the fuzzer when it is a stand-alone executable. +-ifneq (,$(LIB_FUZZING_ENGINE)) +-fuzzer: +-else +- ifeq (0,$(WITH_FUZZERS)) +-fuzzer: +- else +-fuzzer: +- @${QEMU_RUN} ../checksum_fuzzer$(EXE) $(ALL_SRC_FILES) && \ +- ${QEMU_RUN} ../compress_fuzzer$(EXE) $(ALL_SRC_FILES) && \ +- ${QEMU_RUN} ../example_small_fuzzer$(EXE) $(ALL_SRC_FILES) && \ +- ${QEMU_RUN} ../example_large_fuzzer$(EXE) $(ALL_SRC_FILES) && \ +- ${QEMU_RUN} ../example_flush_fuzzer$(EXE) $(ALL_SRC_FILES) && \ +- ${QEMU_RUN} ../example_dict_fuzzer$(EXE) $(ALL_SRC_FILES) && \ +- ${QEMU_RUN} ../minigzip_fuzzer$(EXE) $(ALL_SRC_FILES) +- endif +-endif +- + teststatic: check_cross_dep + @TMPST=tmpst_$$$$; \ + HELLOST=tmphellost_$$$$; \ +- if echo hello world | ${QEMU_RUN} ../minigzip$(EXE) > $$HELLOST && ${QEMU_RUN} ../minigzip$(EXE) -d < $$HELLOST && ${QEMU_RUN} ../example$(EXE) $$TMPST && ${QEMU_RUN} ../adler32_test$(EXE) && ${QEMU_RUN} ../crc32_test$(EXE); then \ ++ if echo hello world | ${EMU_RUN} ../minigzip$(EXE) > $$HELLOST && ${EMU_RUN} ../minigzip$(EXE) -d < $$HELLOST && ${EMU_RUN} ../example$(EXE) $$TMPST; then \ + echo ' *** zlib test OK ***'; \ + else \ + echo ' *** zlib test FAILED ***'; exit 1; \ +@@ -62,55 +42,41 @@ testshared: check_cross_dep + SHLIB_PATH=`pwd`/..:$(SHLIB_PATH) ; export SHLIB_PATH; \ + TMPSH=tmpsh_$$$$; \ + HELLOSH=tmphellosh_$$$$; \ +- if echo hello world | ${QEMU_RUN} ../minigzipsh$(EXE) > $$HELLOSH && ${QEMU_RUN} ../minigzipsh$(EXE) -d < $$HELLOSH && ${QEMU_RUN} ../examplesh$(EXE) $$TMPSH && ${QEMU_RUN} ../adler32_testsh$(EXE) && ${QEMU_RUN} ../crc32_testsh$(EXE); then \ ++ if echo hello world | ${EMU_RUN} ../minigzipsh$(EXE) > $$HELLOSH && ${EMU_RUN} ../minigzipsh$(EXE) -d < $$HELLOSH && ${EMU_RUN} ../examplesh$(EXE) $$TMPSH; then \ + echo ' *** zlib shared test OK ***'; \ + else \ + echo ' *** zlib shared test FAILED ***'; exit 1; \ + fi; \ + rm -f $$TMPSH $$HELLOSH + +-cvetests: testCVEinputs +- +-# Tests requiring zlib-ng to be built with --zlib-compat +-compattests: testCVE-2003-0107 +- +-testCVEinputs: check_cross_dep +- @EXE=$(EXE) QEMU_RUN="${QEMU_RUN}" $(SRCDIR)/testCVEinputs.sh +- +-testCVE-2003-0107: CVE-2003-0107$(EXE) check_cross_dep +- @if ${QEMU_RUN} ./CVE-2003-0107$(EXE); then \ +- echo ' *** zlib not vulnerable to CVE-2003-0107 ***'; \ +- else \ +- echo ' *** zlib VULNERABLE to CVE-2003-0107 ***'; exit 1; \ +- fi +- +-CVE-2003-0107.o: $(SRCDIR)/CVE-2003-0107.c +- $(CC) $(CFLAGS) -I.. -I$(SRCTOP) -c -o $@ $(SRCDIR)/CVE-2003-0107.c +- +-CVE-2003-0107$(EXE): CVE-2003-0107.o +- $(CC) $(CFLAGS) -o $@ CVE-2003-0107.o $(TEST_LDFLAGS) +- + .PHONY: ghtests +-ghtests: testGH-361 testGH-364 testGH-751 ++ghtests: testGH-361 testGH-364 testGH-751 testGH-1235 + + .PHONY: testGH-361 + testGH-361: +- $(QEMU_RUN) ../minigzip$(EXE) -4 <$(SRCDIR)/GH-361/test.txt >/dev/null ++ $(EMU_RUN) ../minigzip$(EXE) -4 <$(SRCDIR)/GH-361/test.txt >/dev/null + + switchlevels$(EXE): $(SRCDIR)/switchlevels.c + $(CC) $(CFLAGS) -I.. -I$(SRCTOP) -o $@ $< $(TEST_LDFLAGS) + + .PHONY: testGH-364 + testGH-364: switchlevels$(EXE) +- $(QEMU_RUN) ./switchlevels$(EXE) 1 5 9 3 <$(SRCDIR)/GH-364/test.bin >/dev/null ++ $(EMU_RUN) ./switchlevels$(EXE) 1 5 9 3 <$(SRCDIR)/GH-364/test.bin >/dev/null + + .PHONY: testGH-751 + testGH-751: +- $(QEMU_RUN) ../minigzip$(EXE) <$(SRCDIR)/GH-751/test.txt | $(QEMU_RUN) ../minigzip$(EXE) -d >/dev/null ++ $(EMU_RUN) ../minigzip$(EXE) <$(SRCDIR)/GH-751/test.txt | $(EMU_RUN) ../minigzip$(EXE) -d >/dev/null ++ ++gh1235$(EXE): $(SRCDIR)/gh1235.c ++ $(CC) $(CFLAGS) -I.. -I$(SRCTOP) -o $@ $< $(TEST_LDFLAGS) ++ ++.PHONY: testGH-1235 ++testGH-1235: gh1235$(EXE) ++ $(EMU_RUN) ./gh1235$(EXE) + + clean: + rm -f *.o *.gcda *.gcno *.gcov +- rm -f CVE-2003-0107$(EXE) switchlevels$(EXE) ++ rm -f switchlevels$(EXE) gh1235$(EXE) + +-distclean: ++distclean: clean + rm -f Makefile +diff --git a/test/README.md b/test/README.md +index b15b01d..d844ba5 100644 +--- a/test/README.md ++++ b/test/README.md +@@ -3,11 +3,12 @@ Contents + + |Name|Description| + |-|-| +-|[CVE-2003-0107.c](https://nvd.nist.gov/vuln/detail/CVE-2003-0107)|Buffer overflow in the gzprintf function, requires ZLIB_COMPAT| ++|[CVE-2003-0107](https://nvd.nist.gov/vuln/detail/CVE-2003-0107)|Buffer overflow in the gzprintf function, requires ZLIB_COMPAT| + |[CVE-2002-0059](https://nvd.nist.gov/vuln/detail/CVE-2002-0059)|inflateEnd to release memory more than once| + |[CVE-2004-0797](https://nvd.nist.gov/vuln/detail/CVE-2004-0797)|Error handling in inflate and inflateBack causes crash| + |[CVE-2005-1849](https://nvd.nist.gov/vuln/detail/CVE-2005-1849)|inftrees.h bug causes crash| +-|[CVE-2005-2096](https://nvd.nist.gov/vuln/detail/CVE-2005-2096)|Buffer overflow when incomplete code description ++|[CVE-2005-2096](https://nvd.nist.gov/vuln/detail/CVE-2005-2096)|Buffer overflow when incomplete code description| ++|[CVE-2018-25032](https://nvd.nist.gov/vuln/detail/CVE-2018-25032)|Memory corruption when compressing if the input has many distant matches.| + |[GH-361](https://github.com/zlib-ng/zlib-ng/issues/361)|Test case for overlapping matches| + |[GH-364](https://github.com/zlib-ng/zlib-ng/issues/364)|Test case for switching compression levels| + |[GH-382](https://github.com/zlib-ng/zlib-ng/issues/382)|Test case for deflateEnd returning -3 in deflate quick| +@@ -28,7 +29,7 @@ Some of the files in _test_ are licensed differently: + which is licensed under the CC-BY license. See + https://www.ploscompbiol.org/static/license for more information. + +- - test/data/lcet10.txt is from Project Gutenberg. It does not have expired ++ - test/data/lcet10.txt is from Project Gutenberg. It does not have expired + copyright, but is still in the public domain according to the license information. + (https://www.gutenberg.org/ebooks/53). + +diff --git a/test/abi/ignore b/test/abi/ignore +index dba3639..583c921 100644 +--- a/test/abi/ignore ++++ b/test/abi/ignore +@@ -8,5 +8,5 @@ + + # Size varies with version number + [suppress_variable] +- name = zlibng_string ++ name_regexp = z(|ng|libng)_(|v)string + +diff --git a/test/abicheck.md b/test/abicheck.md +index 3e29126..57337f5 100644 +--- a/test/abicheck.md ++++ b/test/abicheck.md +@@ -29,7 +29,7 @@ means someone has to check out and build + the previous source tree and extract its .abi + using abidw. This can be slow. + +-If you don't mind the slowness, run abicheck.sh --refresh_if, ++If you don't mind the slowness, run abicheck.sh --refresh-if, + and it will download and build the reference version + and extract the .abi on the spot if needed. + (FIXME: should this be the default?) +diff --git a/test/abicheck.sh b/test/abicheck.sh +index 89199a5..1656711 100755 +--- a/test/abicheck.sh ++++ b/test/abicheck.sh +@@ -49,7 +49,7 @@ do + --refresh) + refresh=true + ;; +- --refresh_if) ++ --refresh-if) + refresh_if=true + ;; + --help) +@@ -67,17 +67,14 @@ done + # Choose reference repo and commit + if test "$suffix" = "" + then +- # Reference is zlib 1.2.11 ++ # Reference is zlib 1.2.13. + ABI_GIT_REPO=https://github.com/madler/zlib.git +- ABI_GIT_COMMIT=v1.2.11 ++ ABI_GIT_COMMIT=04f42ceca40f73e2978b50e93806c2a18c1281fc + else +- # Reference should be the tag for zlib-ng 2.0 +- # but until that bright, shining day, use some +- # random recent SHA. Annoyingly, can't shorten it. ++ # Reference is most recent zlib-ng develop with zlib 1.2.12 compatible api. + ABI_GIT_REPO=https://github.com/zlib-ng/zlib-ng.git +- ABI_GIT_COMMIT=56ce27343bf295ae9457f8e3d38ec96d2f949a1c ++ ABI_GIT_COMMIT=e4614ebcb9b3e5b108dc983c155e4baf80882311 + fi +-# FIXME: even when using a tag, check the hash. + + # Test compat build for ABI compatibility with zlib + if test "$CHOST" = "" +@@ -94,7 +91,11 @@ then + fi + + # Canonicalize CHOST to work around bug in original zlib's configure +-export CHOST=$(sh $TESTDIR/../tools/config.sub $CHOST) ++# (Don't export it if it wasn't already exported, else may cause ++# default compiler detection failure and shared library link error ++# when building both zlib and zlib-ng. ++# See https://github.com/zlib-ng/zlib-ng/issues/1219) ++CHOST=$(sh $TESTDIR/../tools/config.sub $CHOST) + + if test "$CHOST" = "" + then +@@ -121,7 +122,7 @@ then + git reset --hard FETCH_HEAD + cd .. + # Build unstripped, uninstalled, very debug shared library +- CFLAGS="$CFLAGS -ggdb" sh src.d/configure $CONFIGURE_ARGS ++ CFLAGS="$CFLAGS -ggdb" src.d/configure $CONFIGURE_ARGS + make -j2 + cd .. + # Find shared library, extract its abi +@@ -134,12 +135,10 @@ then + # caching abi files in git (but that would slow builds down). + fi + +-if test -f "$ABIFILE" ++if ! test -f "$ABIFILE" + then +- ABIFILE="$ABIFILE" +-else +- echo "abicheck: SKIP: $ABIFILE not found; rerun with --refresh or --refresh_if" +- exit 0 ++ echo "abicheck: SKIP: $ABIFILE not found; rerun with --refresh or --refresh-if" ++ exit 1 + fi + + # Build unstripped, uninstalled, very debug shared library +diff --git a/test/example.c b/test/example.c +index c31d1cf..d0e38c9 100644 +--- a/test/example.c ++++ b/test/example.c +@@ -12,19 +12,12 @@ + #include "deflate.h" + + #include +- +-#include +-#include +-#include +-#include + #include ++#include + +-#define TESTFILE "foo.gz" ++#include "test_shared_ng.h" + +-static const char hello[] = "hello, hello!"; +-/* "hello world" would be more standard, but the repeated "hello" +- * stresses the compression code better, sorry... +- */ ++#define TESTFILE "foo.gz" + + static const char dictionary[] = "hello"; + static unsigned long dictId = 0; /* Adler32 value of the dictionary */ +@@ -33,13 +26,13 @@ static unsigned long dictId = 0; /* Adler32 value of the dictionary */ + #define MAX_DICTIONARY_SIZE 32768 + + +-void test_compress (unsigned char *compr, z_size_t comprLen,unsigned char *uncompr, z_size_t uncomprLen); ++void test_compress (unsigned char *compr, z_uintmax_t comprLen, unsigned char *uncompr, z_uintmax_t uncomprLen); + void test_gzio (const char *fname, unsigned char *uncompr, z_size_t uncomprLen); + void test_deflate (unsigned char *compr, size_t comprLen); + void test_inflate (unsigned char *compr, size_t comprLen, unsigned char *uncompr, size_t uncomprLen); + void test_large_deflate (unsigned char *compr, size_t comprLen, unsigned char *uncompr, size_t uncomprLen, int zng_params); + void test_large_inflate (unsigned char *compr, size_t comprLen, unsigned char *uncompr, size_t uncomprLen); +-void test_flush (unsigned char *compr, z_size_t *comprLen); ++void test_flush (unsigned char *compr, z_uintmax_t *comprLen); + void test_sync (unsigned char *compr, size_t comprLen, unsigned char *uncompr, size_t uncomprLen); + void test_dict_deflate (unsigned char *compr, size_t comprLen); + void test_dict_inflate (unsigned char *compr, size_t comprLen, unsigned char *uncompr, size_t uncomprLen); +@@ -70,11 +63,11 @@ void error(const char *format, ...) { + /* =========================================================================== + * Test compress() and uncompress() + */ +-void test_compress(unsigned char *compr, z_size_t comprLen, unsigned char *uncompr, z_size_t uncomprLen) { ++void test_compress(unsigned char *compr, z_uintmax_t comprLen, unsigned char *uncompr, z_uintmax_t uncomprLen) { + int err; +- size_t len = strlen(hello)+1; ++ unsigned int len = (unsigned int)strlen(hello)+1; + +- err = PREFIX(compress)(compr, &comprLen, (const unsigned char*)hello, (z_size_t)len); ++ err = PREFIX(compress)(compr, &comprLen, (const unsigned char*)hello, len); + CHECK_ERR(err, "compress"); + + strcpy((char*)uncompr, "garbage"); +@@ -409,7 +402,7 @@ void test_large_inflate(unsigned char *compr, size_t comprLen, unsigned char *un + /* =========================================================================== + * Test deflate() with full flush + */ +-void test_flush(unsigned char *compr, z_size_t *comprLen) { ++void test_flush(unsigned char *compr, z_uintmax_t *comprLen) { + PREFIX3(stream) c_stream; /* compression stream */ + int err; + unsigned int len = (unsigned int)strlen(hello)+1; +@@ -441,8 +434,10 @@ void test_flush(unsigned char *compr, z_size_t *comprLen) { + *comprLen = (z_size_t)c_stream.total_out; + } + ++#ifdef ZLIBNG_ENABLE_TESTS + /* =========================================================================== + * Test inflateSync() ++ * We expect a certain compressed block layout, so skip this with the original zlib. + */ + void test_sync(unsigned char *compr, size_t comprLen, unsigned char *uncompr, size_t uncomprLen) { + int err; +@@ -478,6 +473,7 @@ void test_sync(unsigned char *compr, size_t comprLen, unsigned char *uncompr, si + + printf("after inflateSync(): hel%s\n", (char *)uncompr); + } ++#endif + + /* =========================================================================== + * Test deflate() with preset dictionary +@@ -547,14 +543,14 @@ void test_dict_inflate(unsigned char *compr, size_t comprLen, unsigned char *unc + } + CHECK_ERR(err, "inflate with dict"); + } +- ++ + err = PREFIX(inflateGetDictionary)(&d_stream, NULL, &check_dictionary_len); + CHECK_ERR(err, "inflateGetDictionary"); + #ifndef S390_DFLTCC_INFLATE +- if (check_dictionary_len != sizeof(dictionary)) ++ if (check_dictionary_len < sizeof(dictionary)) + error("bad dictionary length\n"); + #endif +- ++ + err = PREFIX(inflateGetDictionary)(&d_stream, check_dictionary, &check_dictionary_len); + CHECK_ERR(err, "inflateGetDictionary"); + #ifndef S390_DFLTCC_INFLATE +@@ -789,7 +785,7 @@ void test_deflate_prime(unsigned char *compr, size_t comprLen, unsigned char *un + err = PREFIX(deflatePrime)(&c_stream, 5, 0x0); + CHECK_ERR(err, "deflatePrime"); + /* Gzip modified time */ +- err = PREFIX(deflatePrime)(&c_stream, 32, 0x0); ++ err = deflate_prime_32(&c_stream, 0); + CHECK_ERR(err, "deflatePrime"); + /* Gzip extra flags */ + err = PREFIX(deflatePrime)(&c_stream, 8, 0x0); +@@ -809,10 +805,10 @@ void test_deflate_prime(unsigned char *compr, size_t comprLen, unsigned char *un + + /* Gzip uncompressed data crc32 */ + crc = PREFIX(crc32)(0, (const uint8_t *)hello, (uint32_t)len); +- err = PREFIX(deflatePrime)(&c_stream, 32, crc); ++ err = deflate_prime_32(&c_stream, crc); + CHECK_ERR(err, "deflatePrime"); + /* Gzip uncompressed data length */ +- err = PREFIX(deflatePrime)(&c_stream, 32, (uint32_t)len); ++ err = deflate_prime_32(&c_stream, (uint32_t)len); + CHECK_ERR(err, "deflatePrime"); + + err = PREFIX(deflateEnd)(&c_stream); +@@ -957,8 +953,8 @@ void test_deflate_tune(unsigned char *compr, size_t comprLen) { + */ + int main(int argc, char *argv[]) { + unsigned char *compr, *uncompr; +- z_size_t comprLen = 10000*sizeof(int); /* don't overflow on MSDOS */ +- z_size_t uncomprLen = comprLen; ++ z_uintmax_t comprLen = 10000*sizeof(int); /* don't overflow on MSDOS */ ++ z_uintmax_t uncomprLen = comprLen; + static const char* myVersion = PREFIX2(VERSION); + + if (zVersion()[0] != myVersion[0]) { +@@ -966,11 +962,11 @@ int main(int argc, char *argv[]) { + exit(1); + + } else if (strcmp(zVersion(), PREFIX2(VERSION)) != 0) { +- fprintf(stderr, "warning: different zlib version\n"); ++ fprintf(stderr, "warning: different zlib version linked: %s\n", zVersion()); + } + +- printf("zlib version %s = 0x%04x, compile flags = 0x%lx\n", +- PREFIX2(VERSION), PREFIX2(VERNUM), PREFIX(zlibCompileFlags)()); ++ printf("zlib-ng version %s = 0x%08lx, compile flags = 0x%lx\n", ++ ZLIBNG_VERSION, ZLIBNG_VERNUM, PREFIX(zlibCompileFlags)()); + + compr = (unsigned char*)calloc((unsigned int)comprLen, 1); + uncompr = (unsigned char*)calloc((unsigned int)uncomprLen, 1); +@@ -997,7 +993,9 @@ int main(int argc, char *argv[]) { + #endif + + test_flush(compr, &comprLen); ++#ifdef ZLIBNG_ENABLE_TESTS + test_sync(compr, comprLen, uncompr, uncomprLen); ++#endif + comprLen = uncomprLen; + + test_dict_deflate(compr, comprLen); +diff --git a/test/fuzz/standalone_fuzz_target_runner.c b/test/fuzz/standalone_fuzz_target_runner.c +index 49f5e7f..810a560 100644 +--- a/test/fuzz/standalone_fuzz_target_runner.c ++++ b/test/fuzz/standalone_fuzz_target_runner.c +@@ -1,6 +1,5 @@ + #include + #include +-#include + + #include "zbuild.h" + +diff --git a/test/infcover.c b/test/infcover.c +index 3446289..6606d22 100644 +--- a/test/infcover.c ++++ b/test/infcover.c +@@ -11,17 +11,11 @@ + #undef NDEBUG + #include + #include +-#include + + /* get definition of internal structure so we can mess with it (see pull()), + and so we can call inflate_trees() (see cover5()) */ +-#define ZLIB_INTERNAL + #include "zbuild.h" +-#ifdef ZLIB_COMPAT +-# include "zlib.h" +-#else +-# include "zlib-ng.h" +-#endif ++#include "zutil.h" + #include "inftrees.h" + #include "inflate.h" + +@@ -374,12 +368,14 @@ static void cover_support(void) { + inf("3 0", "use fixed blocks", 0, -15, 1, Z_STREAM_END); + inf("", "bad window size", 0, 1, 0, Z_STREAM_ERROR); + ++#ifdef ZLIB_COMPAT + mem_setup(&strm); + strm.avail_in = 0; + strm.next_in = NULL; + ret = PREFIX(inflateInit_)(&strm, &PREFIX2(VERSION)[1], (int)sizeof(PREFIX3(stream))); + assert(ret == Z_VERSION_ERROR); + mem_done(&strm, "wrong version"); ++#endif + + strm.avail_in = 0; + strm.next_in = NULL; +@@ -480,8 +476,11 @@ static void cover_back(void) { + PREFIX3(stream) strm; + unsigned char win[32768]; + ++#ifdef ZLIB_COMPAT + ret = PREFIX(inflateBackInit_)(NULL, 0, win, 0, 0); + assert(ret == Z_VERSION_ERROR); ++#endif ++ + ret = PREFIX(inflateBackInit)(NULL, 0, win); + assert(ret == Z_STREAM_ERROR); + ret = PREFIX(inflateBack)(NULL, NULL, NULL, NULL, NULL); +@@ -670,6 +669,10 @@ static void cover_fast(void) { + Z_STREAM_END); + } + ++static void cover_cve_2022_37434(void) { ++ inf("1f 8b 08 04 61 62 63 64 61 62 52 51 1f 8b 08 04 61 62 63 64 61 62 52 51 1f 8b 08 04 61 62 63 64 61 62 52 51 1f 8b 08 04 61 62 63 64 61 62 52 51", "wtf", 13, 47, 12, Z_OK); ++} ++ + int main(void) { + fprintf(stderr, "%s\n", zVersion()); + cover_support(); +@@ -678,5 +681,6 @@ int main(void) { + cover_inflate(); + cover_trees(); + cover_fast(); ++ cover_cve_2022_37434(); + return 0; + } +diff --git a/test/minideflate.c b/test/minideflate.c +index ae04a29..6b16a03 100644 +--- a/test/minideflate.c ++++ b/test/minideflate.c +@@ -3,23 +3,12 @@ + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +-#define _POSIX_SOURCE 1 /* This file needs POSIX for fileno(). */ +-#define _POSIX_C_SOURCE 200112 /* For snprintf(). */ ++#include "zbuild.h" + + #include +-#include +-#include +-#include + #include +-#include +-#include + +-#include "zbuild.h" +-#ifdef ZLIB_COMPAT +-# include "zlib.h" +-#else +-# include "zlib-ng.h" +-#endif ++#include "zutil.h" + + #if defined(_WIN32) || defined(__CYGWIN__) + # include +@@ -29,10 +18,11 @@ + # define SET_BINARY_MODE(file) + #endif + +-#if MAX_MEM_LEVEL >= 8 +-# define DEF_MEM_LEVEL 8 ++#ifdef _MSC_VER ++# include ++# define strcasecmp _stricmp + #else +-# define DEF_MEM_LEVEL MAX_MEM_LEVEL ++# include + #endif + + #define CHECK_ERR(err, msg) { \ +@@ -42,6 +32,9 @@ + } \ + } + ++/* Default read/write i/o buffer size based on GZBUFSIZE */ ++#define BUFSIZE 131072 ++ + /* =========================================================================== + * deflate() using specialized parameters + */ +@@ -112,7 +105,7 @@ void deflate_params(FILE *fin, FILE *fout, int32_t read_buf_size, int32_t write_ + err = PREFIX(deflate)(&c_stream, Z_FINISH); + if (err == Z_STREAM_END) break; + CHECK_ERR(err, "deflate"); +- } while (err == Z_OK); ++ } while (1); + } + + /* Output remaining data in write buffer */ +@@ -174,8 +167,14 @@ void inflate_params(FILE *fin, FILE *fout, int32_t read_buf_size, int32_t write_ + + do { + err = PREFIX(inflate)(&d_stream, flush); ++ ++ /* Ignore Z_BUF_ERROR if we are finishing and read buffer size is ++ * purposefully limited */ ++ if (flush == Z_FINISH && err == Z_BUF_ERROR && read_buf_size != BUFSIZE) ++ err = Z_OK; ++ + if (err == Z_STREAM_END) break; +- CHECK_ERR(err, "deflate"); ++ CHECK_ERR(err, "inflate"); + + if (d_stream.next_out == write_buf + write_buf_size) { + fwrite(write_buf, 1, write_buf_size, fout); +@@ -198,7 +197,7 @@ void inflate_params(FILE *fin, FILE *fout, int32_t read_buf_size, int32_t write_ + err = PREFIX(inflate)(&d_stream, Z_FINISH); + if (err == Z_STREAM_END) break; + CHECK_ERR(err, "inflate"); +- } while (err == Z_OK); ++ } while (1); + } + + /* Output remaining data in write buffer */ +@@ -214,36 +213,46 @@ void inflate_params(FILE *fin, FILE *fout, int32_t read_buf_size, int32_t write_ + } + + void show_help(void) { +- printf("Usage: minideflate [-c] [-f|-h|-R|-F] [-m level] [-r/-t size] [-s flush] [-w bits] [-0 to -9] [input file]\n\n" \ +- " -c : write to standard output\n" \ +- " -d : decompress\n" \ +- " -f : compress with Z_FILTERED\n" \ +- " -h : compress with Z_HUFFMAN_ONLY\n" \ +- " -R : compress with Z_RLE\n" \ +- " -F : compress with Z_FIXED\n" \ +- " -m : memory level (1 to 8)\n" \ +- " -w : window bits (8 to 15 for gzip, -8 to -15 for zlib)\n" \ +- " -s : flush type (0 to 5)\n" \ +- " -r : read buffer size\n" \ +- " -t : write buffer size\n" \ ++ printf("Usage: minideflate [-c][-d][-k] [-f|-h|-R|-F] [-m level] [-r/-t size] [-s flush] [-w bits] [-0 to -9] [input file]\n\n" ++ " -c : write to standard output\n" ++ " -d : decompress\n" ++ " -k : keep input file\n" ++ " -f : compress with Z_FILTERED\n" ++ " -h : compress with Z_HUFFMAN_ONLY\n" ++ " -R : compress with Z_RLE\n" ++ " -F : compress with Z_FIXED\n" ++ " -m : memory level (1 to 8)\n" ++ " -w : window bits..\n" ++ " : -1 to -15 for raw deflate\n" ++ " : 0 to 15 for deflate (adler32)\n" ++ " : 16 to 31 for gzip (crc32)\n" ++ " -s : flush type (0 to 5)\n" ++ " -r : read buffer size\n" ++ " -t : write buffer size\n" + " -0 to -9 : compression level\n\n"); + } + + int main(int argc, char **argv) { + int32_t i; + int32_t mem_level = DEF_MEM_LEVEL; +- int32_t window_bits = MAX_WBITS; ++ int32_t window_bits = INT32_MAX; + int32_t strategy = Z_DEFAULT_STRATEGY; + int32_t level = Z_DEFAULT_COMPRESSION; +- int32_t read_buf_size = 4096; +- int32_t write_buf_size = 4096; ++ int32_t read_buf_size = BUFSIZE; ++ int32_t write_buf_size = BUFSIZE; + int32_t flush = Z_NO_FLUSH; + uint8_t copyout = 0; + uint8_t uncompr = 0; +- char out_file[320]; ++ uint8_t keep = 0; + FILE *fin = stdin; + FILE *fout = stdout; + ++ ++ if (argc == 1) { ++ show_help(); ++ return 64; /* EX_USAGE */ ++ } ++ + for (i = 1; i < argc; i++) { + if ((strcmp(argv[i], "-m") == 0) && (i + 1 < argc)) + mem_level = atoi(argv[++i]); +@@ -259,8 +268,12 @@ int main(int argc, char **argv) { + copyout = 1; + else if (strcmp(argv[i], "-d") == 0) + uncompr = 1; ++ else if (strcmp(argv[i], "-k") == 0) ++ keep = 1; + else if (strcmp(argv[i], "-f") == 0) + strategy = Z_FILTERED; ++ else if (strcmp(argv[i], "-F") == 0) ++ strategy = Z_FIXED; + else if (strcmp(argv[i], "-h") == 0) + strategy = Z_HUFFMAN_ONLY; + else if (strcmp(argv[i], "-R") == 0) +@@ -273,12 +286,13 @@ int main(int argc, char **argv) { + } else if (argv[i][0] == '-') { + show_help(); + return 64; /* EX_USAGE */ +- } else ++ } else + break; + } + + SET_BINARY_MODE(stdin); + SET_BINARY_MODE(stdout); ++ + if (i != argc) { + fin = fopen(argv[i], "rb+"); + if (fin == NULL) { +@@ -286,15 +300,53 @@ int main(int argc, char **argv) { + exit(1); + } + if (!copyout) { +- snprintf(out_file, sizeof(out_file), "%s%s", argv[i], (window_bits < 0) ? ".zz" : ".gz"); ++ char *out_file = (char *)calloc(1, strlen(argv[i]) + 6); ++ if (out_file == NULL) { ++ fprintf(stderr, "Not enough memory\n"); ++ exit(1); ++ } ++ strcat(out_file, argv[i]); ++ if (!uncompr) { ++ if (window_bits < 0) { ++ strcat(out_file, ".zraw"); ++ } else if (window_bits > MAX_WBITS) { ++ strcat(out_file, ".gz"); ++ } else { ++ strcat(out_file, ".z"); ++ } ++ } else { ++ char *out_ext = strrchr(out_file, '.'); ++ if (out_ext != NULL) { ++ if (strcasecmp(out_ext, ".zraw") == 0 && window_bits == INT32_MAX) { ++ fprintf(stderr, "Must specify window bits for raw deflate stream\n"); ++ exit(1); ++ } ++ *out_ext = 0; ++ } ++ } + fout = fopen(out_file, "wb"); + if (fout == NULL) { + fprintf(stderr, "Failed to open file: %s\n", out_file); + exit(1); + } ++ free(out_file); + } + } + ++ if (window_bits == INT32_MAX) { ++ window_bits = MAX_WBITS; ++ /* Auto-detect wrapper for inflateInit */ ++ if (uncompr) ++ window_bits += 32; ++ } ++ ++ if (window_bits == INT32_MAX) { ++ window_bits = MAX_WBITS; ++ /* Auto-detect wrapper for inflateInit */ ++ if (uncompr) ++ window_bits += 32; ++ } ++ + if (uncompr) { + inflate_params(fin, fout, read_buf_size, write_buf_size, window_bits, flush); + } else { +@@ -303,6 +355,9 @@ int main(int argc, char **argv) { + + if (fin != stdin) { + fclose(fin); ++ if (!copyout && !keep) { ++ unlink(argv[i]); ++ } + } + if (fout != stdout) { + fclose(fout); +diff --git a/test/minigzip.c b/test/minigzip.c +index 29729f3..537f64b 100644 +--- a/test/minigzip.c ++++ b/test/minigzip.c +@@ -12,9 +12,6 @@ + * real thing. + */ + +-#define _POSIX_SOURCE 1 /* This file needs POSIX for fdopen(). */ +-#define _POSIX_C_SOURCE 200112 /* For snprintf(). */ +- + #include "zbuild.h" + #ifdef ZLIB_COMPAT + # include "zlib.h" +@@ -64,6 +61,7 @@ extern int unlink (const char *); + static char *prog; + + void error (const char *msg); ++void gz_fatal (gzFile file); + void gz_compress (FILE *in, gzFile out); + #ifdef USE_MMAP + int gz_compress_mmap (FILE *in, gzFile out); +@@ -81,6 +79,17 @@ void error(const char *msg) { + exit(1); + } + ++/* =========================================================================== ++ * Display last error message of gzFile, close it and exit ++ */ ++ ++void gz_fatal(gzFile file) { ++ int err; ++ fprintf(stderr, "%s: %s\n", prog, PREFIX(gzerror)(file, &err)); ++ PREFIX(gzclose)(file); ++ exit(1); ++} ++ + /* =========================================================================== + * Compress input to output then close both files. + */ +@@ -88,7 +97,6 @@ void error(const char *msg) { + void gz_compress(FILE *in, gzFile out) { + char *buf; + int len; +- int err; + + #ifdef USE_MMAP + /* Try first compressing with mmap. If mmap fails (minigzip used in a +@@ -111,7 +119,7 @@ void gz_compress(FILE *in, gzFile out) { + } + if (len == 0) break; + +- if (PREFIX(gzwrite)(out, buf, (unsigned)len) != len) error(PREFIX(gzerror)(out, &err)); ++ if (PREFIX(gzwrite)(out, buf, (unsigned)len) != len) gz_fatal(out); + } + free(buf); + fclose(in); +@@ -121,11 +129,10 @@ void gz_compress(FILE *in, gzFile out) { + #ifdef USE_MMAP /* MMAP version, Miguel Albrecht */ + + /* Try compressing the input file at once using mmap. Return Z_OK if +- * if success, Z_ERRNO otherwise. ++ * success, Z_ERRNO otherwise. + */ + int gz_compress_mmap(FILE *in, gzFile out) { + int len; +- int err; + int ifd = fileno(in); + char *buf; /* mmap'ed buffer for the entire input file */ + off_t buf_len; /* length of the input file */ +@@ -143,7 +150,7 @@ int gz_compress_mmap(FILE *in, gzFile out) { + /* Compress the whole file at once: */ + len = PREFIX(gzwrite)(out, buf, (unsigned)buf_len); + +- if (len != (int)buf_len) error(PREFIX(gzerror)(out, &err)); ++ if (len != (int)buf_len) gz_fatal(out); + + munmap(buf, buf_len); + fclose(in); +@@ -158,7 +165,6 @@ int gz_compress_mmap(FILE *in, gzFile out) { + void gz_uncompress(gzFile in, FILE *out) { + char *buf = (char *)malloc(BUFLENW); + int len; +- int err; + + if (buf == NULL) error("out of memory"); + +@@ -166,7 +172,7 @@ void gz_uncompress(gzFile in, FILE *out) { + len = PREFIX(gzread)(in, buf, BUFLENW); + if (len < 0) { + free(buf); +- error(PREFIX(gzerror)(in, &err)); ++ gz_fatal(in); + } + if (len == 0) break; + +@@ -259,16 +265,16 @@ void file_uncompress(char *file, int keep) { + } + + void show_help(void) { +- printf("Usage: minigzip [-c] [-d] [-k] [-f|-h|-R|-F|-T] [-A] [-0 to -9] [files...]\n\n" \ +- " -c : write to standard output\n" \ +- " -d : decompress\n" \ +- " -k : keep input files\n" \ +- " -f : compress with Z_FILTERED\n" \ +- " -h : compress with Z_HUFFMAN_ONLY\n" \ +- " -R : compress with Z_RLE\n" \ +- " -F : compress with Z_FIXED\n" \ +- " -T : stored raw\n" \ +- " -A : auto detect type\n" \ ++ printf("Usage: minigzip [-c] [-d] [-k] [-f|-h|-R|-F|-T] [-A] [-0 to -9] [files...]\n\n" ++ " -c : write to standard output\n" ++ " -d : decompress\n" ++ " -k : keep input files\n" ++ " -f : compress with Z_FILTERED\n" ++ " -h : compress with Z_HUFFMAN_ONLY\n" ++ " -R : compress with Z_RLE\n" ++ " -F : compress with Z_FIXED\n" ++ " -T : stored raw\n" ++ " -A : auto detect type\n" + " -0 to -9 : compression level\n\n"); + } + +diff --git a/test/pigz/CMakeLists.txt b/test/pigz/CMakeLists.txt +index 43082cf..bc6830a 100644 +--- a/test/pigz/CMakeLists.txt ++++ b/test/pigz/CMakeLists.txt +@@ -13,6 +13,7 @@ + # WITH_CODE_COVERAGE - Enable code coverage reporting + # WITH_THREADS - Enable threading support + # PIGZ_ENABLE_TESTS - Enable adding unit tests ++# PIGZ_VERSION - Set the version of pigz to build + # ZLIB_ROOT - Path to the zlib source directory + # PTHREADS4W_ROOT - Path to pthreads4w source directory on Windows. + # If not specified then threading will be disabled. +@@ -28,6 +29,7 @@ include(../../cmake/detect-coverage.cmake) + option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF) + option(WITH_THREADS "Enable threading support" ON) + option(PIGZ_ENABLE_TESTS "Build unit tests" ON) ++option(PIGZ_VERSION "Set the version of pigz to build" "") + + project(pigz LANGUAGES C) + +@@ -54,11 +56,16 @@ elseif(WIN32) + endif() + + # Fetch pigz source code from official repository ++if(PIGZ_VERSION STREQUAL "") ++ set(PIGZ_TAG master) ++else() ++ set(PIGZ_TAG ${PIGZ_VERSION}) ++endif() + FetchContent_Declare(pigz +- GIT_REPOSITORY https://github.com/madler/pigz.git) +-FetchContent_MakeAvailable(pigz) +-FetchContent_GetProperties(pigz) ++ GIT_REPOSITORY https://github.com/madler/pigz.git ++ GIT_TAG ${PIGZ_TAG}) + ++FetchContent_GetProperties(pigz) + if(NOT pigz_POPULATED) + FetchContent_Populate(pigz) + endif() +@@ -159,7 +166,7 @@ if(PIGZ_ENABLE_TESTS) + "-DDECOMPRESS_ARGS=-d;-c" + -DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/${path} + -DTEST_NAME=${test_id} +- -P ${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/test-compress.cmake) ++ -P ${CMAKE_CURRENT_SOURCE_DIR}/../cmake/compress-and-verify.cmake) + endif() + endmacro() + +diff --git a/test/pkgcheck.sh b/test/pkgcheck.sh +index 4c757df..9421401 100644 +--- a/test/pkgcheck.sh ++++ b/test/pkgcheck.sh +@@ -47,10 +47,6 @@ _EOF_ + + set -ex + +-# Caller can also set CMAKE_ARGS or CONFIGURE_ARGS if desired +-CMAKE_ARGS=${CMAKE_ARGS} +-CONFIGURE_ARGS=${CONFIGURE_ARGS} +- + case "$1" in + --zlib-compat) + suffix="" +@@ -117,7 +113,7 @@ cd .. + # Original build system + rm -rf btmp1 pkgtmp1 + mkdir btmp1 pkgtmp1 +-export DESTDIR=$(pwd)/pkgtmp1 ++export DESTDIR=$(pwd)/pkgtmp1/ + cd btmp1 + case $(uname) in + Darwin) +@@ -125,18 +121,20 @@ cd btmp1 + ;; + esac + ../configure $CONFIGURE_ARGS +- make ++ make -j2 + make install + cd .. + + repack_ar() { +- if ! cmp --silent pkgtmp1/usr/local/lib/libz$suffix.a pkgtmp2/usr/local/lib/libz$suffix.a ++ archive1=$(cd pkgtmp1; find . -type f -name '*.a'; cd ..) ++ archive2=$(cd pkgtmp2; find . -type f -name '*.a'; cd ..) ++ if ! cmp --silent pkgtmp1/$archive1 pkgtmp2/$archive2 + then + echo "libz$suffix.a does not match. Probably filenames differ (.o vs .c.o). Unpacking and renaming..." + # Note: %% is posix shell syntax meaning "Remove Largest Suffix Pattern", see + # https://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_06_02 +- cd pkgtmp1; ar x usr/local/lib/libz$suffix.a; rm usr/local/lib/libz$suffix.a; cd .. +- cd pkgtmp2; ar x usr/local/lib/libz$suffix.a; rm usr/local/lib/libz$suffix.a; for a in *.c.o; do mv $a ${a%%.c.o}.o; done; cd .. ++ cd pkgtmp1; ar x $archive1; rm $archive1; cd .. ++ cd pkgtmp2; ar x $archive2; rm $archive2; for a in *.c.o; do mv $a ${a%%.c.o}.o; done; cd .. + # Also, remove __.SYMDEF SORTED if present, as it has those funky .c.o names embedded in it. + rm -f pkgtmp[12]/__.SYMDEF\ SORTED + fi +@@ -152,6 +150,9 @@ Darwin) + ;; + esac + ++# Remove cmake target files to avoid mismatch with configure ++find pkgtmp2 -type f -name '*.cmake' -exec rm '{}' \; ++ + # The ar on newer systems defaults to -D (i.e. deterministic), + # but FreeBSD 12.1, Debian 8, and Ubuntu 14.04 seem to not do that. + # I had trouble passing -D safely to the ar inside CMakeLists.txt, +diff --git a/test/switchlevels.c b/test/switchlevels.c +index 1e1fb00..b31bc0f 100644 +--- a/test/switchlevels.c ++++ b/test/switchlevels.c +@@ -2,8 +2,6 @@ + * Each chunk is compressed with a user-specified level. + */ + +-#define _POSIX_SOURCE 1 /* This file needs POSIX for fileno(). */ +- + #include "zbuild.h" + #ifdef ZLIB_COMPAT + # include "zlib.h" +@@ -12,8 +10,6 @@ + #endif + + #include +-#include +-#include + + #if defined(_WIN32) || defined(__CYGWIN__) + # include +@@ -115,7 +111,7 @@ done: + + void show_help(void) + { +- printf("Usage: switchlevels [-w bits] level1 size1 [level2 size2 ...]\n\n" \ ++ printf("Usage: switchlevels [-w bits] level1 size1 [level2 size2 ...]\n\n" + " -w : window bits (8 to 15 for gzip, -8 to -15 for zlib)\n\n"); + } + +diff --git a/tools/makecrct.c b/tools/makecrct.c +index 3f6b37b..5c3ba58 100644 +--- a/tools/makecrct.c ++++ b/tools/makecrct.c +@@ -1,177 +1,250 @@ +-/* crc32.c -- output crc32 tables +- * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016, 2018 Mark Adler ++/* makecrct.c -- output crc32 tables ++ * Copyright (C) 1995-2022 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + + #include + #include + #include "zbuild.h" +-#include "deflate.h" +-#include "crc32_p.h" ++#include "zutil.h" + +-static uint32_t crc_table[8][256]; +-static uint32_t crc_comb[GF2_DIM][GF2_DIM]; ++/* ++ The crc32 table header file contains tables for both 32-bit and 64-bit ++ z_word_t's, and so requires a 64-bit type be available. In that case, ++ z_word_t must be defined to be 64-bits. This code then also generates ++ and writes out the tables for the case that z_word_t is 32 bits. ++*/ ++ ++#define W 8 /* Need a 64-bit integer type in order to generate crc32 tables. */ ++ ++#include "crc32_braid_p.h" ++ ++static uint32_t crc_table[256]; ++static z_word_t crc_big_table[256]; ++ ++static uint32_t crc_braid_table[W][256]; ++static z_word_t crc_braid_big_table[W][256]; ++static uint32_t x2n_table[32]; ++ ++#include "crc32_braid_comb_p.h" + +-static void gf2_matrix_square(uint32_t *square, const uint32_t *mat); + static void make_crc_table(void); +-static void make_crc_combine_table(void); + static void print_crc_table(void); +-static void print_crc_combine_table(void); +-static void write_table(const uint32_t *, int); + ++static void braid(uint32_t ltl[][256], z_word_t big[][256], int n, int w); + +-/* ========================================================================= */ +-static void gf2_matrix_square(uint32_t *square, const uint32_t *mat) { +- int n; +- +- for (n = 0; n < GF2_DIM; n++) +- square[n] = gf2_matrix_times(mat, mat[n]); +-} ++static void write_table(const uint32_t *table, int k); ++static void write_table32hi(const z_word_t *table, int k); ++static void write_table64(const z_word_t *table, int k); + +-/* ========================================================================= ++/* ========================================================================= */ ++/* + Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: + x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + + Polynomials over GF(2) are represented in binary, one bit per coefficient, +- with the lowest powers in the most significant bit. Then adding polynomials ++ with the lowest powers in the most significant bit. Then adding polynomials + is just exclusive-or, and multiplying a polynomial by x is a right shift by +- one. If we call the above polynomial p, and represent a byte as the ++ one. If we call the above polynomial p, and represent a byte as the + polynomial q, also with the lowest power in the most significant bit (so the + byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, + where a mod b means the remainder after dividing a by b. + + This calculation is done using the shift-register method of multiplying and +- taking the remainder. The register is initialized to zero, and for each ++ taking the remainder. The register is initialized to zero, and for each + incoming bit, x^32 is added mod p to the register if the bit is a one (where +- x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by +- x (which is shifting right by one and adding x^32 mod p if the bit shifted +- out is a one). We start with the highest power (least significant bit) of +- q and repeat for all eight bits of q. +- +- The first table is simply the CRC of all possible eight bit values. This is +- all the information needed to generate CRCs on data a byte at a time for all +- combinations of CRC register values and incoming bytes. The remaining tables +- allow for word-at-a-time CRC calculation for both big-endian and little- +- endian machines, where a word is four bytes. ++ x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x ++ (which is shifting right by one and adding x^32 mod p if the bit shifted out ++ is a one). We start with the highest power (least significant bit) of q and ++ repeat for all eight bits of q. ++ ++ The table is simply the CRC of all possible eight bit values. This is all the ++ information needed to generate CRCs on data a byte at a time for all ++ combinations of CRC register values and incoming bytes. + */ + static void make_crc_table(void) { +- int n, k; +- uint32_t c; +- uint32_t poly; /* polynomial exclusive-or pattern */ +- /* terms of polynomial defining this crc (except x^32): */ +- static const unsigned char p[] = {0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 16, 22, 23, 26}; +- +- /* make exclusive-or pattern from polynomial (0xedb88320) */ +- poly = 0; +- for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) +- poly |= (uint32_t)1 << (31 - p[n]); +- +- /* generate a crc for every 8-bit value */ +- for (n = 0; n < 256; n++) { +- c = (uint32_t)n; +- for (k = 0; k < 8; k++) +- c = c & 1 ? poly ^ (c >> 1) : c >> 1; +- crc_table[0][n] = c; ++ unsigned i, j, n; ++ uint32_t p; ++ ++ /* initialize the CRC of bytes tables */ ++ for (i = 0; i < 256; i++) { ++ p = i; ++ for (j = 0; j < 8; j++) ++ p = p & 1 ? (p >> 1) ^ POLY : p >> 1; ++ crc_table[i] = p; ++ crc_big_table[i] = ZSWAP64(p); + } + +- /* generate crc for each value followed by one, two, and three zeros, +- and then the byte reversal of those as well as the first table */ +- for (n = 0; n < 256; n++) { +- c = crc_table[0][n]; +- crc_table[4][n] = ZSWAP32(c); +- for (k = 1; k < 4; k++) { +- c = crc_table[0][c & 0xff] ^ (c >> 8); +- crc_table[k][n] = c; +- crc_table[k + 4][n] = ZSWAP32(c); +- } +- } ++ /* initialize the x^2^n mod p(x) table */ ++ p = (uint32_t)1 << 30; /* x^1 */ ++ x2n_table[0] = p; ++ for (n = 1; n < 32; n++) ++ x2n_table[n] = p = multmodp(p, p); ++ ++ /* initialize the braiding tables -- needs x2n_table[] */ ++ braid(crc_braid_table, crc_braid_big_table, N, W); + } + +-static void make_crc_combine_table(void) { +- int n, k; +- /* generate zero operators table for crc32_combine() */ +- +- /* generate the operator to apply a single zero bit to a CRC -- the +- first row adds the polynomial if the low bit is a 1, and the +- remaining rows shift the CRC right one bit */ +- k = GF2_DIM - 3; +- crc_comb[k][0] = 0xedb88320UL; /* CRC-32 polynomial */ +- uint32_t row = 1; +- for (n = 1; n < GF2_DIM; n++) { +- crc_comb[k][n] = row; +- row <<= 1; ++/* ++ Generate the little and big-endian braid tables for the given n and z_word_t ++ size w. Each array must have room for w blocks of 256 elements. ++ */ ++static void braid(uint32_t ltl[][256], z_word_t big[][256], int n, int w) { ++ int k; ++ uint32_t i, p, q; ++ for (k = 0; k < w; k++) { ++ p = x2nmodp(((z_off64_t)n * w + 3 - k) << 3, 0); ++ ltl[k][0] = 0; ++ big[w - 1 - k][0] = 0; ++ for (i = 1; i < 256; i++) { ++ ltl[k][i] = q = multmodp(i << 24, p); ++ big[w - 1 - k][i] = ZSWAP64(q); ++ } + } +- /* generate operators that apply 2, 4, and 8 zeros to a CRC, putting +- the last one, the operator for one zero byte, at the 0 position */ +- gf2_matrix_square(crc_comb[k + 1], crc_comb[k]); +- gf2_matrix_square(crc_comb[k + 2], crc_comb[k + 1]); +- gf2_matrix_square(crc_comb[0], crc_comb[k + 2]); +- +- /* generate operators for applying 2^n zero bytes to a CRC, filling out +- the remainder of the table -- the operators repeat after GF2_DIM +- values of n, so the table only needs GF2_DIM entries, regardless of +- the size of the length being processed */ +- for (n = 1; n < k; n++) +- gf2_matrix_square(crc_comb[n], crc_comb[n - 1]); + } + ++/* ++ Write the 32-bit values in table[0..k-1] to out, five per line in ++ hexadecimal separated by commas. ++ */ + static void write_table(const uint32_t *table, int k) { + int n; + + for (n = 0; n < k; n++) +- printf("%s0x%08" PRIx32 "%s", n % 5 ? "" : " ", ++ printf("%s0x%08" PRIx32 "%s", n == 0 || n % 5 ? "" : " ", + (uint32_t)(table[n]), +- n == k - 1 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); ++ n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); + } + +-static void print_crc_table(void) { +- int k; +- printf("#ifndef CRC32_TBL_H_\n"); +- printf("#define CRC32_TBL_H_\n\n"); +- printf("/* crc32_tbl.h -- tables for rapid CRC calculation\n"); +- printf(" * Generated automatically by makecrct.c\n */\n\n"); ++/* ++ Write the high 32-bits of each value in table[0..k-1] to out, five per line ++ in hexadecimal separated by commas. ++ */ ++static void write_table32hi(const z_word_t *table, int k) { ++ int n; + +- /* print CRC table */ +- printf("static const uint32_t "); +- printf("crc_table[8][256] =\n{\n {\n"); +- write_table(crc_table[0], 256); +- for (k = 1; k < 8; k++) { +- printf(" },\n {\n"); +- write_table(crc_table[k], 256); +- } +- printf(" }\n};\n\n"); ++ for (n = 0; n < k; n++) ++ printf("%s0x%08" PRIx32 "%s", n == 0 || n % 5 ? "" : " ", ++ (uint32_t)(table[n] >> 32), ++ n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); ++} + +- printf("#endif /* CRC32_TBL_H_ */\n"); ++/* ++ Write the 64-bit values in table[0..k-1] to out, three per line in ++ hexadecimal separated by commas. This assumes that if there is a 64-bit ++ type, then there is also a long long integer type, and it is at least 64 ++ bits. If not, then the type cast and format string can be adjusted ++ accordingly. ++ */ ++static void write_table64(const z_word_t *table, int k) { ++ int n; ++ ++ for (n = 0; n < k; n++) ++ printf("%s0x%016" PRIx64 "%s", n == 0 || n % 3 ? "" : " ", ++ (uint64_t)(table[n]), ++ n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); + } + +-static void print_crc_combine_table(void) { +- int k; +- printf("#ifndef CRC32_COMB_TBL_H_\n"); +- printf("#define CRC32_COMB_TBL_H_\n\n"); +- printf("/* crc32_comb_tbl.h -- zero operators table for CRC combine\n"); ++static void print_crc_table(void) { ++ int k, n; ++ uint32_t ltl[8][256]; ++ z_word_t big[8][256]; ++ ++ printf("#ifndef CRC32_BRAID_TBL_H_\n"); ++ printf("#define CRC32_BRAID_TBL_H_\n\n"); ++ printf("/* crc32_braid_tbl.h -- tables for braided CRC calculation\n"); + printf(" * Generated automatically by makecrct.c\n */\n\n"); + +- /* print zero operator table */ +- printf("static const uint32_t "); +- printf("crc_comb[%d][%d] =\n{\n {\n", GF2_DIM, GF2_DIM); +- write_table(crc_comb[0], GF2_DIM); +- for (k = 1; k < GF2_DIM; k++) { +- printf(" },\n {\n"); +- write_table(crc_comb[k], GF2_DIM); ++ /* print little-endian CRC table */ ++ printf("static const uint32_t crc_table[] = {\n"); ++ printf(" "); ++ write_table(crc_table, 256); ++ printf("};\n\n"); ++ ++ /* print big-endian CRC table for 64-bit z_word_t */ ++ printf("#ifdef W\n\n"); ++ printf("#if W == 8\n\n"); ++ printf("static const z_word_t crc_big_table[] = {\n"); ++ printf(" "); ++ write_table64(crc_big_table, 256); ++ printf("};\n\n"); ++ ++ /* print big-endian CRC table for 32-bit z_word_t */ ++ printf("#else /* W == 4 */\n\n"); ++ printf("static const z_word_t crc_big_table[] = {\n"); ++ printf(" "); ++ write_table32hi(crc_big_table, 256); ++ printf("};\n\n"); ++ printf("#endif\n\n"); ++ printf("#endif /* W */\n\n"); ++ ++ /* write out braid tables for each value of N */ ++ for (n = 1; n <= 6; n++) { ++ printf("#if N == %d\n", n); ++ ++ /* compute braid tables for this N and 64-bit word_t */ ++ braid(ltl, big, n, 8); ++ ++ /* write out braid tables for 64-bit z_word_t */ ++ printf("\n"); ++ printf("#if W == 8\n\n"); ++ printf("static const uint32_t crc_braid_table[][256] = {\n"); ++ for (k = 0; k < 8; k++) { ++ printf(" {"); ++ write_table(ltl[k], 256); ++ printf("}%s", k < 7 ? ",\n" : ""); ++ } ++ printf("};\n\n"); ++ printf("static const z_word_t crc_braid_big_table[][256] = {\n"); ++ for (k = 0; k < 8; k++) { ++ printf(" {"); ++ write_table64(big[k], 256); ++ printf("}%s", k < 7 ? ",\n" : ""); ++ } ++ printf("};\n"); ++ ++ /* compute braid tables for this N and 32-bit word_t */ ++ braid(ltl, big, n, 4); ++ ++ /* write out braid tables for 32-bit z_word_t */ ++ printf("\n"); ++ printf("#else /* W == 4 */\n\n"); ++ printf("static const uint32_t crc_braid_table[][256] = {\n"); ++ for (k = 0; k < 4; k++) { ++ printf(" {"); ++ write_table(ltl[k], 256); ++ printf("}%s", k < 3 ? ",\n" : ""); ++ } ++ printf("};\n\n"); ++ printf("static const z_word_t crc_braid_big_table[][256] = {\n"); ++ for (k = 0; k < 4; k++) { ++ printf(" {"); ++ write_table32hi(big[k], 256); ++ printf("}%s", k < 3 ? ",\n" : ""); ++ } ++ printf("};\n\n"); ++ printf("#endif /* W */\n\n"); ++ ++ printf("#endif /* N == %d */\n", n); + } +- printf(" }\n};\n\n"); ++ printf("\n"); ++ ++ /* write out zeros operator table */ ++ printf("static const uint32_t x2n_table[] = {\n"); ++ printf(" "); ++ write_table(x2n_table, 32); ++ printf("};\n"); + +- printf("#endif /* CRC32_COMB_TBL_H_ */\n"); ++ printf("\n"); ++ printf("#endif /* CRC32_BRAID_TBL_H_ */\n"); + } + +-// The output of this application can be piped out to recreate crc32.h ++// The output of this application can be piped out to recreate crc32 tables + int main(int argc, char *argv[]) { +- if (argc > 1 && strcmp(argv[1], "-c") == 0) { +- make_crc_combine_table(); +- print_crc_combine_table(); +- } else { +- make_crc_table(); +- print_crc_table(); +- } ++ Z_UNUSED(argc); ++ Z_UNUSED(argv); ++ ++ make_crc_table(); ++ print_crc_table(); + return 0; + } +diff --git a/tools/maketrees.c b/tools/maketrees.c +index 337f2fc..2c32cca 100644 +--- a/tools/maketrees.c ++++ b/tools/maketrees.c +@@ -23,11 +23,11 @@ static unsigned char dist_code[DIST_CODE_LEN]; + * the last 256 values correspond to the top 8 bits of the 15 bit distances. + */ + +-static unsigned char length_code[MAX_MATCH-MIN_MATCH+1]; +-/* length code for each normalized match length (0 == MIN_MATCH) */ ++static unsigned char length_code[STD_MAX_MATCH-STD_MIN_MATCH+1]; ++/* length code for each normalized match length (0 == STD_MIN_MATCH) */ + + static int base_length[LENGTH_CODES]; +-/* First normalized length for each code (0 = MIN_MATCH) */ ++/* First normalized length for each code (0 = STD_MIN_MATCH) */ + + static int base_dist[D_CODES]; + /* First normalized distance for each code (0 = distance of 1) */ +@@ -90,7 +90,7 @@ static void tr_static_init(void) { + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; +- static_dtree[n].Code = (uint16_t)bi_reverse((unsigned)n, 5); ++ static_dtree[n].Code = PREFIX(bi_reverse)((unsigned)n, 5); + } + } + +@@ -98,7 +98,7 @@ static void tr_static_init(void) { + ((i) == (last)? "\n};\n\n" : \ + ((i) % (width) == (width)-1 ? ",\n" : ", ")) + +-static void gen_trees_header() { ++static void gen_trees_header(void) { + int i; + + printf("#ifndef TREES_TBL_H_\n"); +@@ -121,9 +121,9 @@ static void gen_trees_header() { + printf("%2u%s", dist_code[i], SEPARATOR(i, DIST_CODE_LEN-1, 20)); + } + +- printf("const unsigned char Z_INTERNAL zng_length_code[MAX_MATCH-MIN_MATCH+1] = {\n"); +- for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { +- printf("%2u%s", length_code[i], SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); ++ printf("const unsigned char Z_INTERNAL zng_length_code[STD_MAX_MATCH-STD_MIN_MATCH+1] = {\n"); ++ for (i = 0; i < STD_MAX_MATCH-STD_MIN_MATCH+1; i++) { ++ printf("%2u%s", length_code[i], SEPARATOR(i, STD_MAX_MATCH-STD_MIN_MATCH, 20)); + } + + printf("Z_INTERNAL const int base_length[LENGTH_CODES] = {\n"); +diff --git a/trees.c b/trees.c +index c3d4849..5bb8838 100644 +--- a/trees.c ++++ b/trees.c +@@ -1,5 +1,5 @@ + /* trees.c -- output deflated data using Huffman coding +- * Copyright (C) 1995-2017 Jean-loup Gailly ++ * Copyright (C) 1995-2021 Jean-loup Gailly + * detect_data_type() function provided freely by Cosmin Truta, 2006 + * For conditions of distribution and use, see copyright notice in zlib.h + */ +@@ -305,7 +305,7 @@ Z_INTERNAL void gen_codes(ct_data *tree, int max_code, uint16_t *bl_count) { + if (len == 0) + continue; + /* Now reverse the bits */ +- tree[n].Code = (uint16_t)bi_reverse(next_code[len]++, len); ++ tree[n].Code = PREFIX(bi_reverse)(next_code[len]++, len); + + Tracecv(tree != static_ltree, (stderr, "\nn %3d %c l %2d c %4x (%x) ", + n, (isgraph(n & 0xff) ? n : ' '), len, tree[n].Code, next_code[len]-1)); +@@ -670,7 +670,7 @@ void Z_INTERNAL zng_tr_flush_block(deflate_state *s, char *buf, uint32_t stored_ + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->sym_next / 3)); + +- if (static_lenb <= opt_lenb) ++ if (static_lenb <= opt_lenb || s->strategy == Z_FIXED) + opt_lenb = static_lenb; + + } else { +@@ -688,7 +688,7 @@ void Z_INTERNAL zng_tr_flush_block(deflate_state *s, char *buf, uint32_t stored_ + */ + zng_tr_stored_block(s, buf, stored_len, last); + +- } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { ++ } else if (static_lenb == opt_lenb) { + zng_tr_emit_tree(s, STATIC_TREES, last); + compress_block(s, (const ct_data *)static_ltree, (const ct_data *)static_dtree); + cmpr_bits_add(s, s->static_len); +@@ -806,17 +806,13 @@ static void bi_flush(deflate_state *s) { + } + + /* =========================================================================== +- * Reverse the first len bits of a code, using straightforward code (a faster +- * method would use a table) +- * IN assertion: 1 <= len <= 15 ++ * Reverse the first len bits of a code using bit manipulation + */ +-Z_INTERNAL unsigned bi_reverse(unsigned code, int len) { ++Z_INTERNAL uint16_t PREFIX(bi_reverse)(unsigned code, int len) { + /* code: the value to invert */ + /* len: its bit length */ +- Z_REGISTER unsigned res = 0; +- do { +- res |= code & 1; +- code >>= 1, res <<= 1; +- } while (--len > 0); +- return res >> 1; ++ Assert(len >= 1 && len <= 15, "code length must be 1-15"); ++#define bitrev8(b) \ ++ (uint8_t)((((uint8_t)(b) * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32) ++ return (bitrev8(code >> 8) | (uint16_t)bitrev8(code) << 8) >> (16 - len); + } +diff --git a/trees_emit.h b/trees_emit.h +index 3280845..922daae 100644 +--- a/trees_emit.h ++++ b/trees_emit.h +@@ -7,7 +7,6 @@ + #ifdef ZLIB_DEBUG + # include + # include +-# include + #endif + + +@@ -16,7 +15,7 @@ extern Z_INTERNAL const ct_data static_ltree[L_CODES+2]; + extern Z_INTERNAL const ct_data static_dtree[D_CODES]; + + extern const unsigned char Z_INTERNAL zng_dist_code[DIST_CODE_LEN]; +-extern const unsigned char Z_INTERNAL zng_length_code[MAX_MATCH-MIN_MATCH+1]; ++extern const unsigned char Z_INTERNAL zng_length_code[STD_MAX_MATCH-STD_MIN_MATCH+1]; + + extern Z_INTERNAL const int base_length[LENGTH_CODES]; + extern Z_INTERNAL const int base_dist[D_CODES]; +@@ -126,7 +125,7 @@ static inline uint32_t zng_emit_dist(deflate_state *s, const ct_data *ltree, con + uint32_t bi_valid = s->bi_valid; + uint64_t bi_buf = s->bi_buf; + +- /* Send the length code, len is the match length - MIN_MATCH */ ++ /* Send the length code, len is the match length - STD_MIN_MATCH */ + code = zng_length_code[lc]; + c = code+LITERALS+1; + Assert(c < L_CODES, "bad l_code"); +diff --git a/trees_tbl.h b/trees_tbl.h +index a4c68a5..a3912b7 100644 +--- a/trees_tbl.h ++++ b/trees_tbl.h +@@ -102,7 +102,7 @@ const unsigned char Z_INTERNAL zng_dist_code[DIST_CODE_LEN] = { + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 + }; + +-const unsigned char Z_INTERNAL zng_length_code[MAX_MATCH-MIN_MATCH+1] = { ++const unsigned char Z_INTERNAL zng_length_code[STD_MAX_MATCH-STD_MIN_MATCH+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, + 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, +diff --git a/uncompr.c b/uncompr.c +index 1435fab..311eca2 100644 +--- a/uncompr.c ++++ b/uncompr.c +@@ -3,13 +3,8 @@ + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +-#define Z_INTERNAL + #include "zbuild.h" +-#ifdef ZLIB_COMPAT +-# include "zlib.h" +-#else +-# include "zlib-ng.h" +-#endif ++#include "zutil.h" + + /* =========================================================================== + Decompresses the source buffer into the destination buffer. *sourceLen is +@@ -27,11 +22,11 @@ + Z_DATA_ERROR if the input data was corrupted, including if the input data is + an incomplete zlib stream. + */ +-int Z_EXPORT PREFIX(uncompress2)(unsigned char *dest, z_size_t *destLen, const unsigned char *source, z_size_t *sourceLen) { ++int Z_EXPORT PREFIX(uncompress2)(unsigned char *dest, z_uintmax_t *destLen, const unsigned char *source, z_uintmax_t *sourceLen) { + PREFIX3(stream) stream; + int err; + const unsigned int max = (unsigned int)-1; +- z_size_t len, left; ++ z_uintmax_t len, left; + unsigned char buf[1]; /* for detection of incomplete stream when *destLen == 0 */ + + len = *sourceLen; +@@ -80,6 +75,6 @@ int Z_EXPORT PREFIX(uncompress2)(unsigned char *dest, z_size_t *destLen, const u + err; + } + +-int Z_EXPORT PREFIX(uncompress)(unsigned char *dest, z_size_t *destLen, const unsigned char *source, z_size_t sourceLen) { ++int Z_EXPORT PREFIX(uncompress)(unsigned char *dest, z_uintmax_t *destLen, const unsigned char *source, z_uintmax_t sourceLen) { + return PREFIX(uncompress2)(dest, destLen, source, &sourceLen); + } +diff --git a/win32/Makefile.a64 b/win32/Makefile.a64 +index a2f2e6a..2a0f3cf 100644 +--- a/win32/Makefile.a64 ++++ b/win32/Makefile.a64 +@@ -16,6 +16,7 @@ LOC = + STATICLIB = zlib.lib + SHAREDLIB = zlib1.dll + IMPLIB = zdll.lib ++SYMBOL_PREFIX = + + CC = cl + LD = link +@@ -24,11 +25,10 @@ RC = rc + CP = copy /y + CFLAGS = -nologo -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC) + WFLAGS = \ ++ -D_ARM64_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1 \ + -D_CRT_SECURE_NO_DEPRECATE \ + -D_CRT_NONSTDC_NO_DEPRECATE \ +- -DUNALIGNED_OK \ +- -DUNALIGNED64_OK \ +- -D_ARM64_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1 \ ++ -DARM_NEON_HASLD4 \ + -DARM_FEATURES \ + # + LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest +@@ -43,23 +43,30 @@ SUFFIX = + + OBJS = \ + adler32.obj \ +- armfeature.obj \ ++ adler32_fold.obj \ ++ arm_features.obj \ + chunkset.obj \ +- compare258.obj \ ++ compare256.obj \ + compress.obj \ +- crc32.obj \ +- crc32_comb.obj \ ++ cpu_features.obj \ ++ crc32_braid.obj \ ++ crc32_braid_comb.obj \ ++ crc32_fold.obj \ + deflate.obj \ + deflate_fast.obj \ +- deflate_slow.obj \ ++ deflate_huff.obj \ + deflate_quick.obj \ + deflate_medium.obj \ ++ deflate_rle.obj \ ++ deflate_slow.obj \ ++ deflate_stored.obj \ + functable.obj \ + infback.obj \ + inflate.obj \ + inftrees.obj \ +- inffast.obj \ + insert_string.obj \ ++ insert_string_roll.obj \ ++ slide_hash.obj \ + trees.obj \ + uncompr.obj \ + zutil.obj \ +@@ -84,21 +91,36 @@ OBJS = $(OBJS) gzlib.obj gzread.obj gzwrite.obj + !endif + + WFLAGS = $(WFLAGS) \ +- -DARM_ACLE_CRC_HASH \ ++ -DARM_ACLE \ + -D__ARM_NEON__=1 \ +- -DARM_NEON_ADLER32 \ +- -DARM_NEON_CHUNKSET \ +- -DARM_NEON_SLIDEHASH \ ++ -DARM_NEON \ + -DARM_NOCHECK_NEON \ + # +-OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj adler32_neon.obj chunkset_neon.obj slide_neon.obj ++OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj adler32_neon.obj chunkset_neon.obj compare256_neon.obj slide_hash_neon.obj + + # targets + all: $(STATICLIB) $(SHAREDLIB) $(IMPLIB) \ + example.exe minigzip.exe example_d.exe minigzip_d.exe + +-zconf: $(TOP)/zconf$(SUFFIX).h.in +- $(CP) $(TOP)\zconf$(SUFFIX).h.in $(TOP)\zconf$(SUFFIX).h ++!if "$(SYMBOL_PREFIX)" != "" ++zlib_name_mangling$(SUFFIX).h: zlib_name_mangling$(SUFFIX).h.in ++ cscript $(TOP)\win32\replace.vbs $(TOP)\zlib_name_mangling$(SUFFIX).h.in zlib_name_mangling$(SUFFIX).h "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" ++!else ++zlib_name_mangling$(SUFFIX).h: zlib_name_mangling.h.empty ++ $(CP) $(TOP)\zlib_name_mangling.h.empty zlib_name_mangling$(SUFFIX).h ++!endif ++ ++zlib$(SUFFIX).h: zlib$(SUFFIX).h.in ++ cscript $(TOP)\win32\replace.vbs $(TOP)\zlib$(SUFFIX).h.in zlib$(SUFFIX).h "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" ++ ++gzread.c: gzread.c.in ++ cscript $(TOP)\win32\replace.vbs $(TOP)\gzread.c.in gzread.c "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" ++ ++zconf: $(TOP)/zconf$(SUFFIX).h.in $(TOP)/zlib$(SUFFIX).h $(TOP)/zlib_name_mangling$(SUFFIX).h ++ $(CP) $(TOP)\zconf$(SUFFIX).h.in $(TOP)\zconf$(SUFFIX).h ++ ++$(TOP)/win32/$(DEFFILE): $(TOP)/win32/$(DEFFILE).in ++ cscript $(TOP)\win32\replace.vbs $(TOP)/win32/$(DEFFILE).in $(TOP)/win32/$(DEFFILE) "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" + + $(STATICLIB): zconf $(OBJS) + $(AR) $(ARFLAGS) -out:$@ $(OBJS) +@@ -154,24 +176,31 @@ $(TOP)/zconf$(SUFFIX).h: zconf + SRCDIR = $(TOP) + # Keep the dependences in sync with top-level Makefile.in + adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h ++adler32_fold.obj: $(SRCDIR)/adler32_fold.c $(SRCDIR)/zbuild.h $(SRCDIR)/adler32_fold.h $(SRCDIR)/functable.h + chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h +-functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86.h ++functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h + gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h + gzread.obj: $(SRCDIR)/gzread.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h + gzwrite.obj: $(SRCDIR)/gzwrite.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h + compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h + uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h +-crc32.obj: $(SRCDIR)/crc32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_tbl.h +-crc32_comb.obj: $(SRCDIR)/crc32_comb.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/crc32_comb_tbl.h ++cpu_features.obj: $(SRCDIR)/cpu_features.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h ++crc32_braid.obj: $(SRCDIR)/crc32_braid.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h ++crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h ++crc32_fold.obj: $(SRCDIR)/crc32_fold.c $(SRCDIR)/zbuild.h + deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h +-deflate_quick.obj: $(SRCDIR)/deflate_quick.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/trees_emit.h + deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h ++deflate_huff.obj: $(SRCDIR)/deflate_huff.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h ++deflate_quick.obj: $(SRCDIR)/deflate_quick.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/trees_emit.h + deflate_medium.obj: $(SRCDIR)/deflate_medium.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h ++deflate_rle.obj: $(SRCDIR)/deflate_rle.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h + deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h +-infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h +-inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h +-inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h ++deflate_stored.obj: $(SRCDIR)/deflate_stored.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h ++infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h ++inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h + inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h ++slide_hash.obj: $(SRCDIR)/slide_hash.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h ++slide_hash_neon.obj: $(SRCDIR)/arch/arm/slide_hash_neon.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h + trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees_tbl.h + zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/zutil_p.h + +@@ -206,3 +235,9 @@ clean: + + distclean: clean + -del zconf$(SUFFIX).h ++ -del zlib$(SUFFIX).h ++ -del zlib_name_mangling$(SUFFIX).h ++ -del $(TOP)\win32\zlib.def ++ -del $(TOP)\win32\zlibcompat.def ++ -del $(TOP)\win32\zlib-ng.def ++ -del gzread.c +diff --git a/win32/Makefile.arm b/win32/Makefile.arm +index 5ed53f5..34dfe6b 100644 +--- a/win32/Makefile.arm ++++ b/win32/Makefile.arm +@@ -16,6 +16,7 @@ LOC = + STATICLIB = zlib.lib + SHAREDLIB = zlib1.dll + IMPLIB = zdll.lib ++SYMBOL_PREFIX = + + CC = cl + LD = link +@@ -24,11 +25,11 @@ RC = rc + CP = copy /y + CFLAGS = -nologo -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC) + WFLAGS = \ ++ -D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1 \ + -D_CRT_SECURE_NO_DEPRECATE \ + -D_CRT_NONSTDC_NO_DEPRECATE \ +- -DUNALIGNED_OK \ +- -D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1 \ + -DARM_FEATURES \ ++ -DARM_NEON_HASLD4 \ + # + LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest + ARFLAGS = -nologo +@@ -40,29 +41,37 @@ WITH_GZFILEOP = yes + ZLIB_COMPAT = + WITH_ACLE = + WITH_NEON = ++WITH_ARMV6 = + WITH_VFPV3 = + NEON_ARCH = /arch:VFPv4 + SUFFIX = + + OBJS = \ + adler32.obj \ +- armfeature.obj \ ++ adler32_fold.obj \ ++ arm_features.obj \ + chunkset.obj \ +- compare258.obj \ ++ compare256.obj \ + compress.obj \ +- crc32.obj \ +- crc32_comb.obj \ ++ cpu_features.obj \ ++ crc32_braid.obj \ ++ crc32_braid_comb.obj \ ++ crc32_fold.obj \ + deflate.obj \ + deflate_fast.obj \ +- deflate_slow.obj \ +- deflate_quick.obj \ ++ deflate_huff.obj \ + deflate_medium.obj \ ++ deflate_quick.obj \ ++ deflate_rle.obj \ ++ deflate_slow.obj \ ++ deflate_stored.obj \ + functable.obj \ + infback.obj \ + inflate.obj \ + inftrees.obj \ +- inffast.obj \ + insert_string.obj \ ++ insert_string_roll.obj \ ++ slide_hash.obj \ + trees.obj \ + uncompr.obj \ + zutil.obj \ +@@ -87,7 +96,7 @@ OBJS = $(OBJS) gzlib.obj gzread.obj gzwrite.obj + !endif + + !if "$(WITH_ACLE)" != "" +-WFLAGS = $(WFLAGS) -DARM_ACLE_CRC_HASH ++WFLAGS = $(WFLAGS) -DARM_ACLE + OBJS = $(OBJS) crc32_acle.obj insert_string_acle.obj + !endif + !if "$(WITH_VFPV3)" != "" +@@ -97,20 +106,42 @@ NEON_ARCH = /arch:VFPv3 + CFLAGS = $(CFLAGS) $(NEON_ARCH) + WFLAGS = $(WFLAGS) \ + -D__ARM_NEON__=1 \ +- -DARM_NEON_ADLER32 \ +- -DARM_NEON_CHUNKSET \ +- -DARM_NEON_SLIDEHASH \ ++ -DARM_NEON \ + -DARM_NOCHECK_NEON \ + # +-OBJS = $(OBJS) adler32_neon.obj chunkset_neon.obj slide_neon.obj ++OBJS = $(OBJS) adler32_neon.obj chunkset_neon.obj compare256_neon.obj slide_hash_neon.obj ++!endif ++!if "$(WITH_ARMV6)" != "" ++WFLAGS = $(WFLAGS) \ ++ -DARM_SIMD \ ++ -DARM_NOCHECK_SIMD \ ++ # ++OBJS = $(OBJS) slide_hash_armv6.obj + !endif + + # targets + all: $(STATICLIB) $(SHAREDLIB) $(IMPLIB) \ + example.exe minigzip.exe example_d.exe minigzip_d.exe + +-zconf: $(TOP)/zconf$(SUFFIX).h.in +- $(CP) $(TOP)\zconf$(SUFFIX).h.in $(TOP)\zconf$(SUFFIX).h ++!if "$(SYMBOL_PREFIX)" != "" ++zlib_name_mangling$(SUFFIX).h: zlib_name_mangling$(SUFFIX).h.in ++ cscript $(TOP)\win32\replace.vbs $(TOP)\zlib_name_mangling$(SUFFIX).h.in zlib_name_mangling$(SUFFIX).h "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" ++!else ++zlib_name_mangling$(SUFFIX).h: zlib_name_mangling.h.empty ++ $(CP) $(TOP)\zlib_name_mangling.h.empty zlib_name_mangling$(SUFFIX).h ++!endif ++ ++zlib$(SUFFIX).h: zlib$(SUFFIX).h.in ++ cscript $(TOP)\win32\replace.vbs $(TOP)\zlib$(SUFFIX).h.in zlib$(SUFFIX).h "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" ++ ++gzread.c: gzread.c.in ++ cscript $(TOP)\win32\replace.vbs $(TOP)\gzread.c.in gzread.c "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" ++ ++zconf: $(TOP)/zconf$(SUFFIX).h.in $(TOP)/zlib$(SUFFIX).h $(TOP)/zlib_name_mangling$(SUFFIX).h ++ $(CP) $(TOP)\zconf$(SUFFIX).h.in $(TOP)\zconf$(SUFFIX).h ++ ++$(TOP)/win32/$(DEFFILE): $(TOP)/win32/$(DEFFILE).in ++ cscript $(TOP)\win32\replace.vbs $(TOP)/win32/$(DEFFILE).in $(TOP)/win32/$(DEFFILE) "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" + + $(STATICLIB): zconf $(OBJS) + $(AR) $(ARFLAGS) -out:$@ $(OBJS) +@@ -166,24 +197,30 @@ $(TOP)/zconf$(SUFFIX).h: zconf + SRCDIR = $(TOP) + # Keep the dependences in sync with top-level Makefile.in + adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h +-functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86.h ++adler32_fold.obj: $(SRCDIR)/adler32_fold.c $(SRCDIR)/zbuild.h $(SRCDIR)/adler32_fold.h $(SRCDIR)/functable.h ++functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h + gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h + gzread.obj: $(SRCDIR)/gzread.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h + gzwrite.obj: $(SRCDIR)/gzwrite.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h + compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h + uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h + chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h +-crc32.obj: $(SRCDIR)/crc32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_tbl.h +-crc32_comb.obj: $(SRCDIR)/crc32_comb.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/crc32_comb_tbl.h ++cpu_features.obj: $(SRCDIR)/cpu_features.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h ++crc32_braid.obj: $(SRCDIR)/crc32_braid.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h ++crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h ++crc32_fold.obj: $(SRCDIR)/crc32_fold.c $(SRCDIR)/zbuild.h + deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h + deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h ++deflate_huff.obj: $(SRCDIR)/deflate_huff.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h + deflate_medium.obj: $(SRCDIR)/deflate_medium.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h + deflate_quick.obj: $(SRCDIR)/deflate_quick.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/trees_emit.h ++deflate_rle.obj: $(SRCDIR)/deflate_rle.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h + deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h +-infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h +-inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h +-inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h ++deflate_stored.obj: $(SRCDIR)/deflate_stored.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h ++infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h ++inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h + inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h ++slide_hash.obj: $(SRCDIR)/slide_hash.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h + trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees_tbl.h + zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/zutil_p.h + +@@ -218,3 +255,9 @@ clean: + + distclean: clean + -del zconf$(SUFFIX).h ++ -del zlib$(SUFFIX).h ++ -del zlib_name_mangling$(SUFFIX).h ++ -del $(TOP)\win32\zlib.def ++ -del $(TOP)\win32\zlibcompat.def ++ -del $(TOP)\win32\zlib-ng.def ++ -del gzread.c +diff --git a/win32/Makefile.msc b/win32/Makefile.msc +index f1c711c..3035072 100644 +--- a/win32/Makefile.msc ++++ b/win32/Makefile.msc +@@ -16,6 +16,7 @@ LOC = + STATICLIB = zlib.lib + SHAREDLIB = zlib1.dll + IMPLIB = zdll.lib ++SYMBOL_PREFIX = + + CC = cl + LD = link +@@ -29,14 +30,10 @@ WFLAGS = \ + -DX86_FEATURES \ + -DX86_PCLMULQDQ_CRC \ + -DX86_SSE2 \ +- -DX86_SSE42_CRC_INTRIN \ +- -DX86_SSE42_CRC_HASH \ +- -DX86_AVX2 \ +- -DX86_AVX_CHUNKSET \ +- -DX86_SSE2_CHUNKSET \ +- -DUNALIGNED_OK \ +- -DUNALIGNED64_OK \ +- # ++ -DX86_SSE42 \ ++ -DX86_SSSE3 \ ++ -DX86_AVX2 ++ + LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest + ARFLAGS = -nologo + RCFLAGS = /dWIN32 /r +@@ -49,34 +46,47 @@ SUFFIX = + + OBJS = \ + adler32.obj \ ++ adler32_avx2.obj \ ++ adler32_avx512.obj \ ++ adler32_avx512_vnni.obj \ ++ adler32_sse42.obj \ ++ adler32_ssse3.obj \ ++ adler32_fold.obj \ + chunkset.obj \ +- chunkset_avx.obj \ +- chunkset_sse.obj \ +- compare258.obj \ +- compare258_avx.obj \ +- compare258_sse.obj \ ++ chunkset_avx2.obj \ ++ chunkset_sse2.obj \ ++ chunkset_ssse3.obj \ ++ compare256.obj \ ++ compare256_avx2.obj \ ++ compare256_sse2.obj \ + compress.obj \ +- crc32.obj \ +- crc32_comb.obj \ +- crc_folding.obj \ ++ cpu_features.obj \ ++ crc32_braid.obj \ ++ crc32_braid_comb.obj \ ++ crc32_fold.obj \ ++ crc32_pclmulqdq.obj \ + deflate.obj \ + deflate_fast.obj \ ++ deflate_huff.obj \ ++ deflate_medium.obj \ + deflate_quick.obj \ ++ deflate_rle.obj \ + deflate_slow.obj \ +- deflate_medium.obj \ ++ deflate_stored.obj \ + functable.obj \ + infback.obj \ + inflate.obj \ + inftrees.obj \ +- inffast.obj \ + insert_string.obj \ +- insert_string_sse.obj \ +- slide_avx.obj \ +- slide_sse.obj \ ++ insert_string_roll.obj \ ++ insert_string_sse42.obj \ ++ slide_hash.obj \ ++ slide_hash_avx2.obj \ ++ slide_hash_sse2.obj \ + trees.obj \ + uncompr.obj \ + zutil.obj \ +- x86.obj \ ++ x86_features.obj \ + # + !if "$(ZLIB_COMPAT)" != "" + WITH_GZFILEOP = yes +@@ -101,8 +111,25 @@ OBJS = $(OBJS) gzlib.obj gzread.obj gzwrite.obj + all: $(STATICLIB) $(SHAREDLIB) $(IMPLIB) \ + example.exe minigzip.exe example_d.exe minigzip_d.exe + +-zconf: $(TOP)/zconf$(SUFFIX).h.in +- $(CP) $(TOP)\zconf$(SUFFIX).h.in $(TOP)\zconf$(SUFFIX).h ++!if "$(SYMBOL_PREFIX)" != "" ++zlib_name_mangling$(SUFFIX).h: zlib_name_mangling$(SUFFIX).h.in ++ cscript $(TOP)\win32\replace.vbs $(TOP)\zlib_name_mangling$(SUFFIX).h.in zlib_name_mangling$(SUFFIX).h "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" ++!else ++zlib_name_mangling$(SUFFIX).h: zlib_name_mangling.h.empty ++ $(CP) $(TOP)\zlib_name_mangling.h.empty zlib_name_mangling$(SUFFIX).h ++!endif ++ ++zlib$(SUFFIX).h: zlib$(SUFFIX).h.in ++ cscript $(TOP)\win32\replace.vbs $(TOP)\zlib$(SUFFIX).h.in zlib$(SUFFIX).h "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" ++ ++gzread.c: gzread.c.in ++ cscript $(TOP)\win32\replace.vbs $(TOP)\gzread.c.in gzread.c "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" ++ ++zconf: $(TOP)/zconf$(SUFFIX).h.in $(TOP)/zlib$(SUFFIX).h $(TOP)/zlib_name_mangling$(SUFFIX).h ++ $(CP) $(TOP)\zconf$(SUFFIX).h.in $(TOP)\zconf$(SUFFIX).h ++ ++$(TOP)/win32/$(DEFFILE): $(TOP)/win32/$(DEFFILE).in ++ cscript $(TOP)\win32\replace.vbs $(TOP)/win32/$(DEFFILE).in $(TOP)/win32/$(DEFFILE) "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" + + $(STATICLIB): zconf $(OBJS) + $(AR) $(ARFLAGS) -out:$@ $(OBJS) +@@ -158,27 +185,44 @@ $(TOP)/zconf$(SUFFIX).h: zconf + SRCDIR = $(TOP) + # Keep the dependences in sync with top-level Makefile.in + adler32.obj: $(SRCDIR)/adler32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/functable.h $(SRCDIR)/adler32_p.h +-functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86.h ++adler32_avx2.obj: $(SRCDIR)/arch/x86/adler32_avx2.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/fallback_builtins.h ++adler32_avx512.obj: $(SRCDIR)/arch/x86/adler32_avx512.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/arch/x86/adler32_avx512_p.h ++adler32_avx512_vnni.obj: $(SRCDIR)/arch/x86/adler32_avx512_vnni.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/arch/x86/adler32_avx512_p.h ++adler32_sse42.obj: $(SRCDIR)/arch/x86/adler32_sse42.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/adler32_fold.h \ ++ $(SRCDIR)/arch/x86/adler32_ssse3_p.h ++adler32_ssse3.obj: $(SRCDIR)/arch/x86/adler32_ssse3.c $(SRCDIR)/zbuild.h $(SRCDIR)/cpu_features.h $(SRCDIR)/adler32_p.h $(SRCDIR)/adler32_fold.h \ ++ $(SRCDIR)/arch/x86/adler32_ssse3_p.h ++adler32_fold.obj: $(SRCDIR)/adler32_fold.c $(SRCDIR)/zbuild.h $(SRCDIR)/adler32_fold.h $(SRCDIR)/functable.h ++functable.obj: $(SRCDIR)/functable.c $(SRCDIR)/zbuild.h $(SRCDIR)/functable.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/zendian.h $(SRCDIR)/arch/x86/x86_features.h + gzlib.obj: $(SRCDIR)/gzlib.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h + gzread.obj: $(SRCDIR)/gzread.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h + gzwrite.obj: $(SRCDIR)/gzwrite.c $(SRCDIR)/zbuild.h $(SRCDIR)/gzguts.h $(SRCDIR)/zutil_p.h + compress.obj: $(SRCDIR)/compress.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h + uncompr.obj: $(SRCDIR)/uncompr.c $(SRCDIR)/zbuild.h $(SRCDIR)/zlib$(SUFFIX).h + chunkset.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h +-chunkset_avx.obj: $(SRCDIR)/arch/x86/chunkset_avx.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h +-chunkset_sse.obj: $(SRCDIR)/arch/x86/chunkset_sse.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h +-crc32.obj: $(SRCDIR)/crc32.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_tbl.h +-crc32_comb.obj: $(SRCDIR)/crc32_comb.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/crc32_comb_tbl.h ++chunkset_avx2.obj: $(SRCDIR)/arch/x86/chunkset_avx2.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h ++chunkset_sse2.obj: $(SRCDIR)/arch/x86/chunkset_sse2.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h ++chunkset_ssse3.obj: $(SRCDIR)/arch/x86/chunkset_ssse3.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h ++cpu_features.obj: $(SRCDIR)/cpu_features.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h ++crc32_braid.obj: $(SRCDIR)/crc32_braid.c $(SRCDIR)/zbuild.h $(SRCDIR)/zendian.h $(SRCDIR)/deflate.h $(SRCDIR)/functable.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h ++crc32_braid_comb.obj: $(SRCDIR)/crc32_braid_comb.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/crc32_braid_p.h $(SRCDIR)/crc32_braid_tbl.h $(SRCDIR)/crc32_braid_comb_p.h ++crc32_fold.obj: $(SRCDIR)/crc32_fold.c $(SRCDIR)/zbuild.h ++crc32_pclmulqdq.obj: $(SRCDIR)/arch/x86/crc32_pclmulqdq.c $(SRCDIR)/arch/x86/crc32_pclmulqdq_tpl.h $(SRCDIR)/arch/x86/crc32_fold_pclmulqdq_tpl.h \ ++ $(SRCDIR)/crc32_fold.h $(SRCDIR)/zbuild.h + deflate.obj: $(SRCDIR)/deflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h + deflate_fast.obj: $(SRCDIR)/deflate_fast.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h ++deflate_huff.obj: $(SRCDIR)/deflate_huff.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h + deflate_medium.obj: $(SRCDIR)/deflate_medium.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h + deflate_quick.obj: $(SRCDIR)/deflate_quick.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/trees_emit.h ++deflate_rle.obj: $(SRCDIR)/deflate_rle.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h + deflate_slow.obj: $(SRCDIR)/deflate_slow.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h +-infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h +-inffast.obj: $(SRCDIR)/inffast.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h +-inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inffast.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h ++deflate_stored.obj: $(SRCDIR)/deflate_stored.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/deflate_p.h $(SRCDIR)/functable.h ++infback.obj: $(SRCDIR)/infback.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h ++inflate.obj: $(SRCDIR)/inflate.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h $(SRCDIR)/inflate.h $(SRCDIR)/inflate_p.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h $(SRCDIR)/functable.h + inftrees.obj: $(SRCDIR)/inftrees.c $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/inftrees.h +-slide_sse.obj: $(SRCDIR)/arch/x86/slide_sse.c $(SRCDIR)/deflate.h ++slide_hash.obj: $(SRCDIR)/slide_hash.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h ++slide_hash_avx2.obj: $(SRCDIR)/arch/x86/slide_hash_avx2.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h ++slide_hash_sse2.obj: $(SRCDIR)/arch/x86/slide_hash_sse2.c $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h + trees.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/deflate.h $(SRCDIR)/trees_tbl.h + zutil.obj: $(SRCDIR)/zbuild.h $(SRCDIR)/zutil.h $(SRCDIR)/zutil_p.h + +@@ -213,3 +257,9 @@ clean: + + distclean: clean + -del zconf$(SUFFIX).h ++ -del zlib$(SUFFIX).h ++ -del zlib_name_mangling$(SUFFIX).h ++ -del $(TOP)\win32\zlib.def ++ -del $(TOP)\win32\zlibcompat.def ++ -del $(TOP)\win32\zlib-ng.def ++ -del gzread.c +diff --git a/win32/replace.vbs b/win32/replace.vbs +new file mode 100644 +index 0000000..6779971 +--- /dev/null ++++ b/win32/replace.vbs +@@ -0,0 +1,15 @@ ++strInputFileName = Wscript.Arguments(0) ++strOutputFileName = Wscript.Arguments(1) ++strOldText = Wscript.Arguments(2) ++strNewText = Wscript.Arguments(3) ++ ++Set objFSO = CreateObject("Scripting.FileSystemObject") ++Set objFile = objFSO.OpenTextFile(strInputFileName, 1) ++ ++strText = objFile.ReadAll ++objFile.Close ++strNewText = Replace(strText, strOldText, strNewText) ++ ++Set objFile = objFSO.OpenTextFile(strOutputFileName, 2, True) ++objFile.Write strNewText ++objFile.Close +diff --git a/win32/zlib-ng.def.in b/win32/zlib-ng.def.in +new file mode 100644 +index 0000000..53b2bc2 +--- /dev/null ++++ b/win32/zlib-ng.def.in +@@ -0,0 +1,60 @@ ++; zlib-ng data compression library ++EXPORTS ++; basic functions ++ @ZLIB_SYMBOL_PREFIX@zlibng_version ++ @ZLIB_SYMBOL_PREFIX@zng_deflate ++ @ZLIB_SYMBOL_PREFIX@zng_deflateEnd ++ @ZLIB_SYMBOL_PREFIX@zng_deflateInit ++ @ZLIB_SYMBOL_PREFIX@zng_deflateInit2 ++ @ZLIB_SYMBOL_PREFIX@zng_inflate ++ @ZLIB_SYMBOL_PREFIX@zng_inflateEnd ++ @ZLIB_SYMBOL_PREFIX@zng_inflateInit ++ @ZLIB_SYMBOL_PREFIX@zng_inflateInit2 ++ @ZLIB_SYMBOL_PREFIX@zng_inflateBackInit ++; advanced functions ++ @ZLIB_SYMBOL_PREFIX@zng_deflateSetDictionary ++ @ZLIB_SYMBOL_PREFIX@zng_deflateGetDictionary ++ @ZLIB_SYMBOL_PREFIX@zng_deflateCopy ++ @ZLIB_SYMBOL_PREFIX@zng_deflateReset ++ @ZLIB_SYMBOL_PREFIX@zng_deflateParams ++ @ZLIB_SYMBOL_PREFIX@zng_deflateTune ++ @ZLIB_SYMBOL_PREFIX@zng_deflateBound ++ @ZLIB_SYMBOL_PREFIX@zng_deflatePending ++ @ZLIB_SYMBOL_PREFIX@zng_deflatePrime ++ @ZLIB_SYMBOL_PREFIX@zng_deflateSetHeader ++ @ZLIB_SYMBOL_PREFIX@zng_deflateSetParams ++ @ZLIB_SYMBOL_PREFIX@zng_deflateGetParams ++ @ZLIB_SYMBOL_PREFIX@zng_inflateSetDictionary ++ @ZLIB_SYMBOL_PREFIX@zng_inflateGetDictionary ++ @ZLIB_SYMBOL_PREFIX@zng_inflateSync ++ @ZLIB_SYMBOL_PREFIX@zng_inflateCopy ++ @ZLIB_SYMBOL_PREFIX@zng_inflateReset ++ @ZLIB_SYMBOL_PREFIX@zng_inflateReset2 ++ @ZLIB_SYMBOL_PREFIX@zng_inflatePrime ++ @ZLIB_SYMBOL_PREFIX@zng_inflateMark ++ @ZLIB_SYMBOL_PREFIX@zng_inflateGetHeader ++ @ZLIB_SYMBOL_PREFIX@zng_inflateBack ++ @ZLIB_SYMBOL_PREFIX@zng_inflateBackEnd ++ @ZLIB_SYMBOL_PREFIX@zng_zlibCompileFlags ++; utility functions ++ @ZLIB_SYMBOL_PREFIX@zng_compress ++ @ZLIB_SYMBOL_PREFIX@zng_compress2 ++ @ZLIB_SYMBOL_PREFIX@zng_compressBound ++ @ZLIB_SYMBOL_PREFIX@zng_uncompress ++ @ZLIB_SYMBOL_PREFIX@zng_uncompress2 ++; checksum functions ++ @ZLIB_SYMBOL_PREFIX@zng_adler32 ++ @ZLIB_SYMBOL_PREFIX@zng_adler32_z ++ @ZLIB_SYMBOL_PREFIX@zng_crc32 ++ @ZLIB_SYMBOL_PREFIX@zng_crc32_z ++ @ZLIB_SYMBOL_PREFIX@zng_adler32_combine ++ @ZLIB_SYMBOL_PREFIX@zng_crc32_combine ++; various hacks, don't look :) ++ @ZLIB_SYMBOL_PREFIX@zng_zError ++ @ZLIB_SYMBOL_PREFIX@zng_inflateSyncPoint ++ @ZLIB_SYMBOL_PREFIX@zng_get_crc_table ++ @ZLIB_SYMBOL_PREFIX@zng_inflateUndermine ++ @ZLIB_SYMBOL_PREFIX@zng_inflateValidate ++ @ZLIB_SYMBOL_PREFIX@zng_inflateCodesUsed ++ @ZLIB_SYMBOL_PREFIX@zng_inflateResetKeep ++ @ZLIB_SYMBOL_PREFIX@zng_deflateResetKeep +diff --git a/win32/zlib-ng1.rc b/win32/zlib-ng1.rc +index b539069..327f17f 100644 +--- a/win32/zlib-ng1.rc ++++ b/win32/zlib-ng1.rc +@@ -1,11 +1,7 @@ + #include +-#include "../zlib-ng.h" ++#include "zlib-ng.h" + +-#ifdef GCC_WINDRES + VS_VERSION_INFO VERSIONINFO +-#else +-VS_VERSION_INFO VERSIONINFO MOVEABLE IMPURE LOADONCALL DISCARDABLE +-#endif + FILEVERSION ZLIBNG_VER_MAJOR,ZLIBNG_VER_MINOR,ZLIBNG_VER_REVISION,0 + PRODUCTVERSION ZLIBNG_VER_MAJOR,ZLIBNG_VER_MINOR,ZLIBNG_VER_REVISION,0 + FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +diff --git a/win32/zlib.def.in b/win32/zlib.def.in +new file mode 100644 +index 0000000..561a42f +--- /dev/null ++++ b/win32/zlib.def.in +@@ -0,0 +1,64 @@ ++; zlib data compression library ++EXPORTS ++; basic functions ++ @ZLIB_SYMBOL_PREFIX@zlibVersion ++ @ZLIB_SYMBOL_PREFIX@deflate ++ @ZLIB_SYMBOL_PREFIX@deflateEnd ++ @ZLIB_SYMBOL_PREFIX@inflate ++ @ZLIB_SYMBOL_PREFIX@inflateEnd ++; advanced functions ++ @ZLIB_SYMBOL_PREFIX@deflateSetDictionary ++ @ZLIB_SYMBOL_PREFIX@deflateGetDictionary ++ @ZLIB_SYMBOL_PREFIX@deflateCopy ++ @ZLIB_SYMBOL_PREFIX@deflateReset ++ @ZLIB_SYMBOL_PREFIX@deflateParams ++ @ZLIB_SYMBOL_PREFIX@deflateTune ++ @ZLIB_SYMBOL_PREFIX@deflateBound ++ @ZLIB_SYMBOL_PREFIX@deflatePending ++ @ZLIB_SYMBOL_PREFIX@deflatePrime ++ @ZLIB_SYMBOL_PREFIX@deflateSetHeader ++ @ZLIB_SYMBOL_PREFIX@inflateSetDictionary ++ @ZLIB_SYMBOL_PREFIX@inflateGetDictionary ++ @ZLIB_SYMBOL_PREFIX@inflateSync ++ @ZLIB_SYMBOL_PREFIX@inflateCopy ++ @ZLIB_SYMBOL_PREFIX@inflateReset ++ @ZLIB_SYMBOL_PREFIX@inflateReset2 ++ @ZLIB_SYMBOL_PREFIX@inflatePrime ++ @ZLIB_SYMBOL_PREFIX@inflateMark ++ @ZLIB_SYMBOL_PREFIX@inflateGetHeader ++ @ZLIB_SYMBOL_PREFIX@inflateBack ++ @ZLIB_SYMBOL_PREFIX@inflateBackEnd ++ @ZLIB_SYMBOL_PREFIX@zlibCompileFlags ++; utility functions ++ @ZLIB_SYMBOL_PREFIX@compress ++ @ZLIB_SYMBOL_PREFIX@compress2 ++ @ZLIB_SYMBOL_PREFIX@compressBound ++ @ZLIB_SYMBOL_PREFIX@uncompress ++ @ZLIB_SYMBOL_PREFIX@uncompress2 ++; large file functions ++ @ZLIB_SYMBOL_PREFIX@adler32_combine64 ++ @ZLIB_SYMBOL_PREFIX@crc32_combine64 ++ @ZLIB_SYMBOL_PREFIX@crc32_combine_gen64 ++; checksum functions ++ @ZLIB_SYMBOL_PREFIX@adler32 ++ @ZLIB_SYMBOL_PREFIX@adler32_z ++ @ZLIB_SYMBOL_PREFIX@crc32 ++ @ZLIB_SYMBOL_PREFIX@crc32_z ++ @ZLIB_SYMBOL_PREFIX@adler32_combine ++ @ZLIB_SYMBOL_PREFIX@crc32_combine ++ @ZLIB_SYMBOL_PREFIX@crc32_combine_gen ++ @ZLIB_SYMBOL_PREFIX@crc32_combine_op ++; various hacks, don't look :) ++ @ZLIB_SYMBOL_PREFIX@deflateInit_ ++ @ZLIB_SYMBOL_PREFIX@deflateInit2_ ++ @ZLIB_SYMBOL_PREFIX@inflateInit_ ++ @ZLIB_SYMBOL_PREFIX@inflateInit2_ ++ @ZLIB_SYMBOL_PREFIX@inflateBackInit_ ++ @ZLIB_SYMBOL_PREFIX@zError ++ @ZLIB_SYMBOL_PREFIX@inflateSyncPoint ++ @ZLIB_SYMBOL_PREFIX@get_crc_table ++ @ZLIB_SYMBOL_PREFIX@inflateUndermine ++ @ZLIB_SYMBOL_PREFIX@inflateValidate ++ @ZLIB_SYMBOL_PREFIX@inflateCodesUsed ++ @ZLIB_SYMBOL_PREFIX@inflateResetKeep ++ @ZLIB_SYMBOL_PREFIX@deflateResetKeep +diff --git a/win32/zlib1.rc b/win32/zlib1.rc +index 11be5f4..73bc438 100644 +--- a/win32/zlib1.rc ++++ b/win32/zlib1.rc +@@ -1,11 +1,7 @@ + #include +-#include "../zlib.h" ++#include "zlib.h" + +-#ifdef GCC_WINDRES + VS_VERSION_INFO VERSIONINFO +-#else +-VS_VERSION_INFO VERSIONINFO MOVEABLE IMPURE LOADONCALL DISCARDABLE +-#endif + FILEVERSION ZLIB_VER_MAJOR,ZLIB_VER_MINOR,ZLIB_VER_REVISION,0 + PRODUCTVERSION ZLIB_VER_MAJOR,ZLIB_VER_MINOR,ZLIB_VER_REVISION,0 + FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +@@ -26,7 +22,7 @@ BEGIN + VALUE "FileDescription", "zlib data compression library\0" + VALUE "FileVersion", ZLIB_VERSION "\0" + VALUE "InternalName", "zlib1.dll\0" +- VALUE "LegalCopyright", "(C) 1995-2013 Jean-loup Gailly & Mark Adler\0" ++ VALUE "LegalCopyright", "(C) 1995-2022 Jean-loup Gailly & Mark Adler\0" + VALUE "OriginalFilename", "zlib1.dll\0" + VALUE "ProductName", "zlib\0" + VALUE "ProductVersion", ZLIB_VERSION "\0" +diff --git a/win32/zlibcompat.def.in b/win32/zlibcompat.def.in +new file mode 100644 +index 0000000..52a713c +--- /dev/null ++++ b/win32/zlibcompat.def.in +@@ -0,0 +1,97 @@ ++; zlib data compression library ++EXPORTS ++; basic functions ++ @ZLIB_SYMBOL_PREFIX@zlibVersion ++ @ZLIB_SYMBOL_PREFIX@deflate ++ @ZLIB_SYMBOL_PREFIX@deflateEnd ++ @ZLIB_SYMBOL_PREFIX@inflate ++ @ZLIB_SYMBOL_PREFIX@inflateEnd ++; advanced functions ++ @ZLIB_SYMBOL_PREFIX@deflateSetDictionary ++ @ZLIB_SYMBOL_PREFIX@deflateGetDictionary ++ @ZLIB_SYMBOL_PREFIX@deflateCopy ++ @ZLIB_SYMBOL_PREFIX@deflateReset ++ @ZLIB_SYMBOL_PREFIX@deflateParams ++ @ZLIB_SYMBOL_PREFIX@deflateTune ++ @ZLIB_SYMBOL_PREFIX@deflateBound ++ @ZLIB_SYMBOL_PREFIX@deflatePending ++ @ZLIB_SYMBOL_PREFIX@deflatePrime ++ @ZLIB_SYMBOL_PREFIX@deflateSetHeader ++ @ZLIB_SYMBOL_PREFIX@inflateSetDictionary ++ @ZLIB_SYMBOL_PREFIX@inflateGetDictionary ++ @ZLIB_SYMBOL_PREFIX@inflateSync ++ @ZLIB_SYMBOL_PREFIX@inflateCopy ++ @ZLIB_SYMBOL_PREFIX@inflateReset ++ @ZLIB_SYMBOL_PREFIX@inflateReset2 ++ @ZLIB_SYMBOL_PREFIX@inflatePrime ++ @ZLIB_SYMBOL_PREFIX@inflateMark ++ @ZLIB_SYMBOL_PREFIX@inflateGetHeader ++ @ZLIB_SYMBOL_PREFIX@inflateBack ++ @ZLIB_SYMBOL_PREFIX@inflateBackEnd ++ @ZLIB_SYMBOL_PREFIX@zlibCompileFlags ++; utility functions ++ @ZLIB_SYMBOL_PREFIX@compress ++ @ZLIB_SYMBOL_PREFIX@compress2 ++ @ZLIB_SYMBOL_PREFIX@compressBound ++ @ZLIB_SYMBOL_PREFIX@uncompress ++ @ZLIB_SYMBOL_PREFIX@uncompress2 ++ @ZLIB_SYMBOL_PREFIX@gzopen ++ @ZLIB_SYMBOL_PREFIX@gzdopen ++ @ZLIB_SYMBOL_PREFIX@gzbuffer ++ @ZLIB_SYMBOL_PREFIX@gzsetparams ++ @ZLIB_SYMBOL_PREFIX@gzread ++ @ZLIB_SYMBOL_PREFIX@gzfread ++ @ZLIB_SYMBOL_PREFIX@gzwrite ++ @ZLIB_SYMBOL_PREFIX@gzfwrite ++ @ZLIB_SYMBOL_PREFIX@gzprintf ++ @ZLIB_SYMBOL_PREFIX@gzvprintf ++ @ZLIB_SYMBOL_PREFIX@gzputs ++ @ZLIB_SYMBOL_PREFIX@gzgets ++ @ZLIB_SYMBOL_PREFIX@gzputc ++ @ZLIB_SYMBOL_PREFIX@gzgetc ++ @ZLIB_SYMBOL_PREFIX@gzungetc ++ @ZLIB_SYMBOL_PREFIX@gzflush ++ @ZLIB_SYMBOL_PREFIX@gzseek ++ @ZLIB_SYMBOL_PREFIX@gzrewind ++ @ZLIB_SYMBOL_PREFIX@gztell ++ @ZLIB_SYMBOL_PREFIX@gzoffset ++ @ZLIB_SYMBOL_PREFIX@gzeof ++ @ZLIB_SYMBOL_PREFIX@gzdirect ++ @ZLIB_SYMBOL_PREFIX@gzclose ++ @ZLIB_SYMBOL_PREFIX@gzclose_r ++ @ZLIB_SYMBOL_PREFIX@gzclose_w ++ @ZLIB_SYMBOL_PREFIX@gzerror ++ @ZLIB_SYMBOL_PREFIX@gzclearerr ++; large file functions ++ @ZLIB_SYMBOL_PREFIX@gzopen64 ++ @ZLIB_SYMBOL_PREFIX@gzseek64 ++ @ZLIB_SYMBOL_PREFIX@gztell64 ++ @ZLIB_SYMBOL_PREFIX@gzoffset64 ++ @ZLIB_SYMBOL_PREFIX@adler32_combine64 ++ @ZLIB_SYMBOL_PREFIX@crc32_combine64 ++ @ZLIB_SYMBOL_PREFIX@crc32_combine_gen64 ++; checksum functions ++ @ZLIB_SYMBOL_PREFIX@adler32 ++ @ZLIB_SYMBOL_PREFIX@adler32_z ++ @ZLIB_SYMBOL_PREFIX@crc32 ++ @ZLIB_SYMBOL_PREFIX@crc32_z ++ @ZLIB_SYMBOL_PREFIX@adler32_combine ++ @ZLIB_SYMBOL_PREFIX@crc32_combine ++ @ZLIB_SYMBOL_PREFIX@crc32_combine_gen ++ @ZLIB_SYMBOL_PREFIX@crc32_combine_op ++; various hacks, don't look :) ++ @ZLIB_SYMBOL_PREFIX@deflateInit_ ++ @ZLIB_SYMBOL_PREFIX@deflateInit2_ ++ @ZLIB_SYMBOL_PREFIX@inflateInit_ ++ @ZLIB_SYMBOL_PREFIX@inflateInit2_ ++ @ZLIB_SYMBOL_PREFIX@inflateBackInit_ ++ @ZLIB_SYMBOL_PREFIX@gzgetc_ ++ @ZLIB_SYMBOL_PREFIX@zError ++ @ZLIB_SYMBOL_PREFIX@inflateSyncPoint ++ @ZLIB_SYMBOL_PREFIX@get_crc_table ++ @ZLIB_SYMBOL_PREFIX@inflateUndermine ++ @ZLIB_SYMBOL_PREFIX@inflateValidate ++ @ZLIB_SYMBOL_PREFIX@inflateCodesUsed ++ @ZLIB_SYMBOL_PREFIX@inflateResetKeep ++ @ZLIB_SYMBOL_PREFIX@deflateResetKeep ++ @ZLIB_SYMBOL_PREFIX@gzopen_w +diff --git a/zbuild.h b/zbuild.h +index 3c5e5fb..d550b4c 100644 +--- a/zbuild.h ++++ b/zbuild.h +@@ -1,6 +1,60 @@ + #ifndef _ZBUILD_H + #define _ZBUILD_H + ++#define _POSIX_SOURCE 1 /* fileno */ ++#ifndef _POSIX_C_SOURCE ++# define _POSIX_C_SOURCE 200809L /* snprintf, posix_memalign, strdup */ ++#endif ++#ifndef _ISOC11_SOURCE ++# define _ISOC11_SOURCE 1 /* aligned_alloc */ ++#endif ++#ifdef __OpenBSD__ ++# define _BSD_SOURCE 1 ++#endif ++ ++#include ++#include ++#include ++#include ++ ++/* Determine compiler version of C Standard */ ++#ifdef __STDC_VERSION__ ++# if __STDC_VERSION__ >= 199901L ++# ifndef STDC99 ++# define STDC99 ++# endif ++# endif ++# if __STDC_VERSION__ >= 201112L ++# ifndef STDC11 ++# define STDC11 ++# endif ++# endif ++#endif ++ ++#ifndef Z_HAS_ATTRIBUTE ++# if defined(__has_attribute) ++# define Z_HAS_ATTRIBUTE(a) __has_attribute(a) ++# else ++# define Z_HAS_ATTRIBUTE(a) 0 ++# endif ++#endif ++ ++#ifndef Z_FALLTHROUGH ++# if Z_HAS_ATTRIBUTE(__fallthrough__) || (defined(__GNUC__) && (__GNUC__ >= 7)) ++# define Z_FALLTHROUGH __attribute__((__fallthrough__)) ++# else ++# define Z_FALLTHROUGH do {} while(0) /* fallthrough */ ++# endif ++#endif ++ ++#ifndef Z_TARGET ++# if Z_HAS_ATTRIBUTE(__target__) ++# define Z_TARGET(x) __attribute__((__target__(x))) ++# else ++# define Z_TARGET(x) ++# endif ++#endif ++ + /* This has to be first include that defines any types */ + #if defined(_MSC_VER) + # if defined(_WIN64) +@@ -8,6 +62,18 @@ + # else + typedef long ssize_t; + # endif ++ ++# if defined(_WIN64) ++ #define SSIZE_MAX _I64_MAX ++# else ++ #define SSIZE_MAX LONG_MAX ++# endif ++#endif ++ ++/* MS Visual Studio does not allow inline in C, only C++. ++ But it provides __inline instead, so use that. */ ++#if defined(_MSC_VER) && !defined(inline) && !defined(__cplusplus) ++# define inline __inline + #endif + + #if defined(ZLIB_COMPAT) +@@ -16,7 +82,6 @@ + # define PREFIX3(x) z_ ## x + # define PREFIX4(x) x ## 64 + # define zVersion zlibVersion +-# define z_size_t unsigned long + #else + # define PREFIX(x) zng_ ## x + # define PREFIX2(x) ZLIBNG_ ## x +@@ -26,6 +91,13 @@ + # define z_size_t size_t + #endif + ++/* In zlib-compat some functions and types use unsigned long, but zlib-ng use size_t */ ++#if defined(ZLIB_COMPAT) ++# define z_uintmax_t unsigned long ++#else ++# define z_uintmax_t size_t ++#endif ++ + /* Minimum of a and b. */ + #define MIN(a, b) ((a) > (b) ? (b) : (a)) + /* Maximum of a and b. */ +@@ -33,4 +105,156 @@ + /* Ignore unused variable warning */ + #define Z_UNUSED(var) (void)(var) + ++#if defined(HAVE_VISIBILITY_INTERNAL) ++# define Z_INTERNAL __attribute__((visibility ("internal"))) ++#elif defined(HAVE_VISIBILITY_HIDDEN) ++# define Z_INTERNAL __attribute__((visibility ("hidden"))) ++#else ++# define Z_INTERNAL ++#endif ++ ++/* Symbol versioning helpers, allowing multiple versions of a function to exist. ++ * Functions using this must also be added to zlib-ng.map for each version. ++ * Double @@ means this is the default for newly compiled applications to link against. ++ * Single @ means this is kept for backwards compatibility. ++ * This is only used for Zlib-ng native API, and only on platforms supporting this. ++ */ ++#if defined(HAVE_SYMVER) ++# define ZSYMVER(func,alias,ver) __asm__(".symver " func ", " alias "@ZLIB_NG_" ver); ++# define ZSYMVER_DEF(func,alias,ver) __asm__(".symver " func ", " alias "@@ZLIB_NG_" ver); ++#else ++# define ZSYMVER(func,alias,ver) ++# define ZSYMVER_DEF(func,alias,ver) ++#endif ++ ++#ifndef __cplusplus ++# define Z_REGISTER register ++#else ++# define Z_REGISTER ++#endif ++ ++/* Reverse the bytes in a value. Use compiler intrinsics when ++ possible to take advantage of hardware implementations. */ ++#if defined(_MSC_VER) && (_MSC_VER >= 1300) ++# include ++# pragma intrinsic(_byteswap_ulong) ++# define ZSWAP16(q) _byteswap_ushort(q) ++# define ZSWAP32(q) _byteswap_ulong(q) ++# define ZSWAP64(q) _byteswap_uint64(q) ++ ++#elif defined(__clang__) || (defined(__GNUC__) && \ ++ (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) ++# define ZSWAP16(q) __builtin_bswap16(q) ++# define ZSWAP32(q) __builtin_bswap32(q) ++# define ZSWAP64(q) __builtin_bswap64(q) ++ ++#elif defined(__GNUC__) && (__GNUC__ >= 2) && defined(__linux__) ++# include ++# define ZSWAP16(q) bswap_16(q) ++# define ZSWAP32(q) bswap_32(q) ++# define ZSWAP64(q) bswap_64(q) ++ ++#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) ++# include ++# define ZSWAP16(q) bswap16(q) ++# define ZSWAP32(q) bswap32(q) ++# define ZSWAP64(q) bswap64(q) ++#elif defined(__OpenBSD__) ++# include ++# define ZSWAP16(q) swap16(q) ++# define ZSWAP32(q) swap32(q) ++# define ZSWAP64(q) swap64(q) ++#elif defined(__INTEL_COMPILER) ++/* ICC does not provide a two byte swap. */ ++# define ZSWAP16(q) ((((q) & 0xff) << 8) | (((q) & 0xff00) >> 8)) ++# define ZSWAP32(q) _bswap(q) ++# define ZSWAP64(q) _bswap64(q) ++ ++#else ++# define ZSWAP16(q) ((((q) & 0xff) << 8) | (((q) & 0xff00) >> 8)) ++# define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ ++ (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) ++# define ZSWAP64(q) \ ++ (((q & 0xFF00000000000000u) >> 56u) | \ ++ ((q & 0x00FF000000000000u) >> 40u) | \ ++ ((q & 0x0000FF0000000000u) >> 24u) | \ ++ ((q & 0x000000FF00000000u) >> 8u) | \ ++ ((q & 0x00000000FF000000u) << 8u) | \ ++ ((q & 0x0000000000FF0000u) << 24u) | \ ++ ((q & 0x000000000000FF00u) << 40u) | \ ++ ((q & 0x00000000000000FFu) << 56u)) ++#endif ++ ++/* Only enable likely/unlikely if the compiler is known to support it */ ++#if (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__INTEL_COMPILER) || defined(__clang__) ++# define LIKELY_NULL(x) __builtin_expect((x) != 0, 0) ++# define LIKELY(x) __builtin_expect(!!(x), 1) ++# define UNLIKELY(x) __builtin_expect(!!(x), 0) ++#else ++# define LIKELY_NULL(x) x ++# define LIKELY(x) x ++# define UNLIKELY(x) x ++#endif /* (un)likely */ ++ ++#if defined(HAVE_ATTRIBUTE_ALIGNED) ++# define ALIGNED_(x) __attribute__ ((aligned(x))) ++#elif defined(_MSC_VER) ++# define ALIGNED_(x) __declspec(align(x)) ++#endif ++ ++/* Diagnostic functions */ ++#ifdef ZLIB_DEBUG ++# include ++ extern int Z_INTERNAL z_verbose; ++ extern void Z_INTERNAL z_error(const char *m); ++# define Assert(cond, msg) {if (!(cond)) z_error(msg);} ++# define Trace(x) {if (z_verbose >= 0) fprintf x;} ++# define Tracev(x) {if (z_verbose > 0) fprintf x;} ++# define Tracevv(x) {if (z_verbose > 1) fprintf x;} ++# define Tracec(c, x) {if (z_verbose > 0 && (c)) fprintf x;} ++# define Tracecv(c, x) {if (z_verbose > 1 && (c)) fprintf x;} ++#else ++# define Assert(cond, msg) ++# define Trace(x) ++# define Tracev(x) ++# define Tracevv(x) ++# define Tracec(c, x) ++# define Tracecv(c, x) ++#endif ++ ++#ifndef NO_UNALIGNED ++# if defined(__x86_64__) || defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64) ++# define UNALIGNED_OK ++# define UNALIGNED64_OK ++# elif defined(__i386__) || defined(__i486__) || defined(__i586__) || \ ++ defined(__i686__) || defined(_X86_) || defined(_M_IX86) ++# define UNALIGNED_OK ++# elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) ++# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__) ++# define UNALIGNED_OK ++# define UNALIGNED64_OK ++# endif ++# elif defined(__arm__) || (_M_ARM >= 7) ++# if (defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED)) || !defined(__GNUC__) ++# define UNALIGNED_OK ++# endif ++# elif defined(__powerpc64__) || defined(__ppc64__) ++# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++# define UNALIGNED_OK ++# define UNALIGNED64_OK ++# endif ++# endif ++#endif ++ ++#if defined(__has_feature) ++# if __has_feature(memory_sanitizer) ++# define Z_MEMORY_SANITIZER 1 ++# include ++# endif ++#endif ++ ++#ifndef Z_MEMORY_SANITIZER ++# define __msan_unpoison(a, size) do { Z_UNUSED(a); Z_UNUSED(size); } while (0) ++#endif ++ + #endif +diff --git a/zconf-ng.h.in b/zconf-ng.h.in +index 7d54668..226f06a 100644 +--- a/zconf-ng.h.in ++++ b/zconf-ng.h.in +@@ -6,18 +6,12 @@ + #ifndef ZCONFNG_H + #define ZCONFNG_H + ++#include "zlib_name_mangling-ng.h" ++ + #if !defined(_WIN32) && defined(__WIN32__) + # define _WIN32 + #endif + +-#ifdef __STDC_VERSION__ +-# if __STDC_VERSION__ >= 199901L +-# ifndef STDC99 +-# define STDC99 +-# endif +-# endif +-#endif +- + /* Clang macro for detecting declspec support + * https://clang.llvm.org/docs/LanguageExtensions.html#has-declspec-attribute + */ +@@ -38,6 +32,9 @@ + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ ++#ifndef MIN_WBITS ++# define MIN_WBITS 8 /* 256 LZ77 window */ ++#endif + #ifndef MAX_WBITS + # define MAX_WBITS 15 /* 32K LZ77 window */ + #endif +@@ -94,6 +91,9 @@ + # define Z_EXPORTVA + #endif + ++/* Conditional exports */ ++#define ZNG_CONDEXPORT Z_EXPORT ++ + /* Fallback for something that includes us. */ + typedef unsigned char Byte; + typedef Byte Bytef; +@@ -119,7 +119,6 @@ typedef PTRDIFF_TYPE ptrdiff_t; + #endif + + #include /* for off_t */ +-#include /* for va_list */ + + #include /* for wchar_t and NULL */ + +diff --git a/zconf.h.in b/zconf.h.in +index ae2a309..074f025 100644 +--- a/zconf.h.in ++++ b/zconf.h.in +@@ -6,18 +6,12 @@ + #ifndef ZCONF_H + #define ZCONF_H + ++#include "zlib_name_mangling.h" ++ + #if !defined(_WIN32) && defined(__WIN32__) + # define _WIN32 + #endif + +-#ifdef __STDC_VERSION__ +-# if __STDC_VERSION__ >= 199901L +-# ifndef STDC99 +-# define STDC99 +-# endif +-# endif +-#endif +- + /* Clang macro for detecting declspec support + * https://clang.llvm.org/docs/LanguageExtensions.html#has-declspec-attribute + */ +@@ -41,6 +35,9 @@ + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ ++#ifndef MIN_WBITS ++# define MIN_WBITS 8 /* 256 LZ77 window */ ++#endif + #ifndef MAX_WBITS + # define MAX_WBITS 15 /* 32K LZ77 window */ + #endif +@@ -85,6 +82,9 @@ + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ + #if defined(ZLIB_WINAPI) && defined(_WIN32) ++# ifndef WIN32_LEAN_AND_MEAN ++# define WIN32_LEAN_AND_MEAN ++# endif + # include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +@@ -102,6 +102,9 @@ + # define Z_EXPORTVA + #endif + ++/* Conditional exports */ ++#define ZNG_CONDEXPORT Z_INTERNAL ++ + /* For backwards compatibility */ + + #ifndef ZEXTERN +@@ -114,7 +117,7 @@ + # define ZEXPORTVA Z_EXPORTVA + #endif + +-/* Fallback for something that includes us. */ ++/* Legacy zlib typedefs for backwards compatibility. Don't assume stdint.h is defined. */ + typedef unsigned char Byte; + typedef Byte Bytef; + +@@ -130,6 +133,8 @@ typedef void const *voidpc; + typedef void *voidpf; + typedef void *voidp; + ++typedef unsigned int z_crc_t; ++ + #ifdef HAVE_UNISTD_H /* may be set to #if 1 by configure/cmake/etc */ + # define Z_HAVE_UNISTD_H + #endif +@@ -139,7 +144,6 @@ typedef PTRDIFF_TYPE ptrdiff_t; + #endif + + #include /* for off_t */ +-#include /* for va_list */ + + #include /* for wchar_t and NULL */ + +@@ -194,4 +198,6 @@ typedef PTRDIFF_TYPE ptrdiff_t; + # endif + #endif + ++typedef size_t z_size_t; ++ + #endif /* ZCONF_H */ +diff --git a/zendian.h b/zendian.h +index 54718ed..28177a6 100644 +--- a/zendian.h ++++ b/zendian.h +@@ -27,14 +27,14 @@ + #elif defined(_WIN32) + # define LITTLE_ENDIAN 1234 + # define BIG_ENDIAN 4321 +-# if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined (_M_ARM) || defined (_M_ARM64) ++# if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined (_M_ARM) || defined (_M_ARM64) || defined (_M_ARM64EC) + # define BYTE_ORDER LITTLE_ENDIAN + # else + # error Unknown endianness! + # endif + #elif defined(__linux__) + # include +-#elif defined(__APPLE__) || defined(__arm__) || defined(__aarch64__) ++#elif defined(__APPLE__) + # include + #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) + # include +diff --git a/zlib-ng.h.in b/zlib-ng.h.in +new file mode 100644 +index 0000000..de811e0 +--- /dev/null ++++ b/zlib-ng.h.in +@@ -0,0 +1,1871 @@ ++#ifndef ZNGLIB_H_ ++#define ZNGLIB_H_ ++/* zlib-ng.h -- interface of the 'zlib-ng' compression library, forked from zlib. ++ ++ Copyright (C) 1995-2016 Jean-loup Gailly and Mark Adler ++ ++ This software is provided 'as-is', without any express or implied ++ warranty. In no event will the authors be held liable for any damages ++ arising from the use of this software. ++ ++ Permission is granted to anyone to use this software for any purpose, ++ including commercial applications, and to alter it and redistribute it ++ freely, subject to the following restrictions: ++ ++ 1. The origin of this software must not be misrepresented; you must not ++ claim that you wrote the original software. If you use this software ++ in a product, an acknowledgment in the product documentation would be ++ appreciated but is not required. ++ 2. Altered source versions must be plainly marked as such, and must not be ++ misrepresented as being the original software. ++ 3. This notice may not be removed or altered from any source distribution. ++ ++ Jean-loup Gailly Mark Adler ++ jloup@gzip.org madler@alumni.caltech.edu ++ ++ ++ The data format used by the zlib library is described by RFCs (Request for ++ Comments) 1950 to 1952 in the files https://tools.ietf.org/html/rfc1950 ++ (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). ++*/ ++ ++#ifdef ZLIB_H_ ++# error Include zlib-ng.h for zlib-ng API or zlib.h for zlib-compat API but not both ++#endif ++ ++#ifndef RC_INVOKED ++#include ++#include ++ ++#include "zconf-ng.h" ++ ++#ifndef ZCONFNG_H ++# error Missing zconf-ng.h add binary output directory to include directories ++#endif ++#endif /* RC_INVOKED */ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#define ZLIBNG_VERSION "2.1.5" ++#define ZLIBNG_VERNUM 0x020105F0L /* MMNNRRSM: major minor revision status modified */ ++#define ZLIBNG_VER_MAJOR 2 ++#define ZLIBNG_VER_MINOR 1 ++#define ZLIBNG_VER_REVISION 5 ++#define ZLIBNG_VER_STATUS F /* 0=devel, 1-E=beta, F=Release (DEPRECATED) */ ++#define ZLIBNG_VER_STATUSH 0xF /* Hex values: 0=devel, 1-E=beta, F=Release */ ++#define ZLIBNG_VER_MODIFIED 0 /* non-zero if modified externally from zlib-ng */ ++ ++/* ++ The 'zlib' compression library provides in-memory compression and ++ decompression functions, including integrity checks of the uncompressed data. ++ This version of the library supports only one compression method (deflation) ++ but other algorithms will be added later and will have the same stream ++ interface. ++ ++ Compression can be done in a single step if the buffers are large enough, ++ or can be done by repeated calls of the compression function. In the latter ++ case, the application must provide more input and/or consume the output ++ (providing more output space) before each call. ++ ++ The compressed data format used by default by the in-memory functions is ++ the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped ++ around a deflate stream, which is itself documented in RFC 1951. ++ ++ The library also supports reading and writing files in gzip (.gz) format ++ with an interface similar to that of stdio using the functions that start ++ with "gz". The gzip format is different from the zlib format. gzip is a ++ gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. ++ ++ This library can optionally read and write gzip and raw deflate streams in ++ memory as well. ++ ++ The zlib format was designed to be compact and fast for use in memory ++ and on communications channels. The gzip format was designed for single- ++ file compression on file systems, has a larger header than zlib to maintain ++ directory information, and uses a different, slower check method than zlib. ++ ++ The library does not install any signal handler. The decoder checks ++ the consistency of the compressed data, so the library should never crash ++ even in the case of corrupted input. ++*/ ++ ++typedef void *(*alloc_func) (void *opaque, unsigned int items, unsigned int size); ++typedef void (*free_func) (void *opaque, void *address); ++ ++struct internal_state; ++ ++typedef struct zng_stream_s { ++ const uint8_t *next_in; /* next input byte */ ++ uint32_t avail_in; /* number of bytes available at next_in */ ++ size_t total_in; /* total number of input bytes read so far */ ++ ++ uint8_t *next_out; /* next output byte will go here */ ++ uint32_t avail_out; /* remaining free space at next_out */ ++ size_t total_out; /* total number of bytes output so far */ ++ ++ const char *msg; /* last error message, NULL if no error */ ++ struct internal_state *state; /* not visible by applications */ ++ ++ alloc_func zalloc; /* used to allocate the internal state */ ++ free_func zfree; /* used to free the internal state */ ++ void *opaque; /* private data object passed to zalloc and zfree */ ++ ++ int data_type; /* best guess about the data type: binary or text ++ for deflate, or the decoding state for inflate */ ++ uint32_t adler; /* Adler-32 or CRC-32 value of the uncompressed data */ ++ unsigned long reserved; /* reserved for future use */ ++} zng_stream; ++ ++typedef zng_stream *zng_streamp; /* Obsolete type, retained for compatibility only */ ++ ++/* ++ gzip header information passed to and from zlib routines. See RFC 1952 ++ for more details on the meanings of these fields. ++*/ ++typedef struct zng_gz_header_s { ++ int32_t text; /* true if compressed data believed to be text */ ++ unsigned long time; /* modification time */ ++ int32_t xflags; /* extra flags (not used when writing a gzip file) */ ++ int32_t os; /* operating system */ ++ uint8_t *extra; /* pointer to extra field or NULL if none */ ++ uint32_t extra_len; /* extra field length (valid if extra != NULL) */ ++ uint32_t extra_max; /* space at extra (only when reading header) */ ++ uint8_t *name; /* pointer to zero-terminated file name or NULL */ ++ uint32_t name_max; /* space at name (only when reading header) */ ++ uint8_t *comment; /* pointer to zero-terminated comment or NULL */ ++ uint32_t comm_max; /* space at comment (only when reading header) */ ++ int32_t hcrc; /* true if there was or will be a header crc */ ++ int32_t done; /* true when done reading gzip header (not used when writing a gzip file) */ ++} zng_gz_header; ++ ++typedef zng_gz_header *zng_gz_headerp; ++ ++/* ++ The application must update next_in and avail_in when avail_in has dropped ++ to zero. It must update next_out and avail_out when avail_out has dropped ++ to zero. The application must initialize zalloc, zfree and opaque before ++ calling the init function. All other fields are set by the compression ++ library and must not be updated by the application. ++ ++ The opaque value provided by the application will be passed as the first ++ parameter for calls of zalloc and zfree. This can be useful for custom ++ memory management. The compression library attaches no meaning to the ++ opaque value. ++ ++ zalloc must return NULL if there is not enough memory for the object. ++ If zlib is used in a multi-threaded application, zalloc and zfree must be ++ thread safe. In that case, zlib is thread-safe. When zalloc and zfree are ++ Z_NULL on entry to the initialization function, they are set to internal ++ routines that use the standard library functions malloc() and free(). ++ ++ The fields total_in and total_out can be used for statistics or progress ++ reports. After compression, total_in holds the total size of the ++ uncompressed data and may be saved for use by the decompressor (particularly ++ if the decompressor wants to decompress everything in a single step). ++*/ ++ ++ /* constants */ ++ ++#define Z_NO_FLUSH 0 ++#define Z_PARTIAL_FLUSH 1 ++#define Z_SYNC_FLUSH 2 ++#define Z_FULL_FLUSH 3 ++#define Z_FINISH 4 ++#define Z_BLOCK 5 ++#define Z_TREES 6 ++/* Allowed flush values; see deflate() and inflate() below for details */ ++ ++#define Z_OK 0 ++#define Z_STREAM_END 1 ++#define Z_NEED_DICT 2 ++#define Z_ERRNO (-1) ++#define Z_STREAM_ERROR (-2) ++#define Z_DATA_ERROR (-3) ++#define Z_MEM_ERROR (-4) ++#define Z_BUF_ERROR (-5) ++#define Z_VERSION_ERROR (-6) ++/* Return codes for the compression/decompression functions. Negative values ++ * are errors, positive values are used for special but normal events. ++ */ ++ ++#define Z_NO_COMPRESSION 0 ++#define Z_BEST_SPEED 1 ++#define Z_BEST_COMPRESSION 9 ++#define Z_DEFAULT_COMPRESSION (-1) ++/* compression levels */ ++ ++#define Z_FILTERED 1 ++#define Z_HUFFMAN_ONLY 2 ++#define Z_RLE 3 ++#define Z_FIXED 4 ++#define Z_DEFAULT_STRATEGY 0 ++/* compression strategy; see deflateInit2() below for details */ ++ ++#define Z_BINARY 0 ++#define Z_TEXT 1 ++#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ ++#define Z_UNKNOWN 2 ++/* Possible values of the data_type field for deflate() */ ++ ++#define Z_DEFLATED 8 ++/* The deflate compression method (the only one supported in this version) */ ++ ++#define Z_NULL NULL /* for compatibility with zlib, was for initializing zalloc, zfree, opaque */ ++ ++ ++ /* basic functions */ ++ ++Z_EXTERN Z_EXPORT ++const char *zlibng_version(void); ++/* The application can compare zlibng_version and ZLIBNG_VERSION for consistency. ++ If the first character differs, the library code actually used is not ++ compatible with the zlib-ng.h header file used by the application. ++ */ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflateInit(zng_stream *strm, int32_t level); ++/* ++ Initializes the internal stream state for compression. The fields ++ zalloc, zfree and opaque must be initialized before by the caller. If ++ zalloc and zfree are set to Z_NULL, deflateInit updates them to use default ++ allocation functions. total_in, total_out, adler, and msg are initialized. ++ ++ The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: ++ 1 gives best speed, 9 gives best compression, 0 gives no compression at all ++ (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION ++ requests a default compromise between speed and compression (currently ++ equivalent to level 6). ++ ++ deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough ++ memory, Z_STREAM_ERROR if level is not a valid compression level. ++ msg is set to null if there is no error message. deflateInit does not perform ++ any compression: this will be done by deflate(). ++*/ ++ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflate(zng_stream *strm, int32_t flush); ++/* ++ deflate compresses as much data as possible, and stops when the input ++ buffer becomes empty or the output buffer becomes full. It may introduce ++ some output latency (reading input without producing any output) except when ++ forced to flush. ++ ++ The detailed semantics are as follows. deflate performs one or both of the ++ following actions: ++ ++ - Compress more input starting at next_in and update next_in and avail_in ++ accordingly. If not all input can be processed (because there is not ++ enough room in the output buffer), next_in and avail_in are updated and ++ processing will resume at this point for the next call of deflate(). ++ ++ - Generate more output starting at next_out and update next_out and avail_out ++ accordingly. This action is forced if the parameter flush is non zero. ++ Forcing flush frequently degrades the compression ratio, so this parameter ++ should be set only when necessary. Some output may be provided even if ++ flush is zero. ++ ++ Before the call of deflate(), the application should ensure that at least ++ one of the actions is possible, by providing more input and/or consuming more ++ output, and updating avail_in or avail_out accordingly; avail_out should ++ never be zero before the call. The application can consume the compressed ++ output when it wants, for example when the output buffer is full (avail_out ++ == 0), or after each call of deflate(). If deflate returns Z_OK and with ++ zero avail_out, it must be called again after making room in the output ++ buffer because there might be more output pending. See deflatePending(), ++ which can be used if desired to determine whether or not there is more output ++ in that case. ++ ++ Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to ++ decide how much data to accumulate before producing output, in order to ++ maximize compression. ++ ++ If the parameter flush is set to Z_SYNC_FLUSH, all pending output is ++ flushed to the output buffer and the output is aligned on a byte boundary, so ++ that the decompressor can get all input data available so far. (In ++ particular avail_in is zero after the call if enough output space has been ++ provided before the call.) Flushing may degrade compression for some ++ compression algorithms and so it should be used only when necessary. This ++ completes the current deflate block and follows it with an empty stored block ++ that is three bits plus filler bits to the next byte, followed by four bytes ++ (00 00 ff ff). ++ ++ If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the ++ output buffer, but the output is not aligned to a byte boundary. All of the ++ input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. ++ This completes the current deflate block and follows it with an empty fixed ++ codes block that is 10 bits long. This assures that enough bytes are output ++ in order for the decompressor to finish the block before the empty fixed ++ codes block. ++ ++ If flush is set to Z_BLOCK, a deflate block is completed and emitted, as ++ for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to ++ seven bits of the current block are held to be written as the next byte after ++ the next deflate block is completed. In this case, the decompressor may not ++ be provided enough bits at this point in order to complete decompression of ++ the data provided so far to the compressor. It may need to wait for the next ++ block to be emitted. This is for advanced applications that need to control ++ the emission of deflate blocks. ++ ++ If flush is set to Z_FULL_FLUSH, all output is flushed as with ++ Z_SYNC_FLUSH, and the compression state is reset so that decompression can ++ restart from this point if previous compressed data has been damaged or if ++ random access is desired. Using Z_FULL_FLUSH too often can seriously degrade ++ compression. ++ ++ If deflate returns with avail_out == 0, this function must be called again ++ with the same value of the flush parameter and more output space (updated ++ avail_out), until the flush is complete (deflate returns with non-zero ++ avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that ++ avail_out is greater than six when the flush marker begins, in order to avoid ++ repeated flush markers upon calling deflate() again when avail_out == 0. ++ ++ If the parameter flush is set to Z_FINISH, pending input is processed, ++ pending output is flushed and deflate returns with Z_STREAM_END if there was ++ enough output space. If deflate returns with Z_OK or Z_BUF_ERROR, this ++ function must be called again with Z_FINISH and more output space (updated ++ avail_out) but no more input data, until it returns with Z_STREAM_END or an ++ error. After deflate has returned Z_STREAM_END, the only possible operations ++ on the stream are deflateReset or deflateEnd. ++ ++ Z_FINISH can be used in the first deflate call after deflateInit if all the ++ compression is to be done in a single step. In order to complete in one ++ call, avail_out must be at least the value returned by deflateBound (see ++ below). Then deflate is guaranteed to return Z_STREAM_END. If not enough ++ output space is provided, deflate will not return Z_STREAM_END, and it must ++ be called again as described above. ++ ++ deflate() sets strm->adler to the Adler-32 checksum of all input read ++ so far (that is, total_in bytes). If a gzip stream is being generated, then ++ strm->adler will be the CRC-32 checksum of the input read so far. (See ++ deflateInit2 below.) ++ ++ deflate() may update strm->data_type if it can make a good guess about ++ the input data type (Z_BINARY or Z_TEXT). If in doubt, the data is ++ considered binary. This field is only for information purposes and does not ++ affect the compression algorithm in any manner. ++ ++ deflate() returns Z_OK if some progress has been made (more input ++ processed or more output produced), Z_STREAM_END if all input has been ++ consumed and all output has been produced (only when flush is set to ++ Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example ++ if next_in or next_out was NULL) or the state was inadvertently written over ++ by the application), or Z_BUF_ERROR if no progress is possible (for example ++ avail_in or avail_out was zero). Note that Z_BUF_ERROR is not fatal, and ++ deflate() can be called again with more input and more output space to ++ continue compressing. ++*/ ++ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflateEnd(zng_stream *strm); ++/* ++ All dynamically allocated data structures for this stream are freed. ++ This function discards any unprocessed input and does not flush any pending ++ output. ++ ++ deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the ++ stream state was inconsistent, Z_DATA_ERROR if the stream was freed ++ prematurely (some input or output was discarded). In the error case, msg ++ may be set but then points to a static string (which must not be ++ deallocated). ++*/ ++ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflateInit(zng_stream *strm); ++/* ++ Initializes the internal stream state for decompression. The fields ++ next_in, avail_in, zalloc, zfree and opaque must be initialized before by ++ the caller. In the current version of inflate, the provided input is not ++ read or consumed. The allocation of a sliding window will be deferred to ++ the first call of inflate (if the decompression does not complete on the ++ first call). If zalloc and zfree are set to Z_NULL, inflateInit updates ++ them to use default allocation functions. total_in, total_out, adler, and ++ msg are initialized. ++ ++ inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough ++ memory, or Z_STREAM_ERROR if the parameters are invalid, such as a null ++ pointer to the structure. msg is set to null if there is no error message. ++ inflateInit does not perform any decompression. Actual decompression will ++ be done by inflate(). So next_in, and avail_in, next_out, and avail_out ++ are unused and unchanged. The current implementation of inflateInit() ++ does not process any header information -- that is deferred until inflate() ++ is called. ++*/ ++ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflate(zng_stream *strm, int32_t flush); ++/* ++ inflate decompresses as much data as possible, and stops when the input ++ buffer becomes empty or the output buffer becomes full. It may introduce ++ some output latency (reading input without producing any output) except when ++ forced to flush. ++ ++ The detailed semantics are as follows. inflate performs one or both of the ++ following actions: ++ ++ - Decompress more input starting at next_in and update next_in and avail_in ++ accordingly. If not all input can be processed (because there is not ++ enough room in the output buffer), then next_in and avail_in are updated ++ accordingly, and processing will resume at this point for the next call of ++ inflate(). ++ ++ - Generate more output starting at next_out and update next_out and avail_out ++ accordingly. inflate() provides as much output as possible, until there is ++ no more input data or no more space in the output buffer (see below about ++ the flush parameter). ++ ++ Before the call of inflate(), the application should ensure that at least ++ one of the actions is possible, by providing more input and/or consuming more ++ output, and updating the next_* and avail_* values accordingly. If the ++ caller of inflate() does not provide both available input and available ++ output space, it is possible that there will be no progress made. The ++ application can consume the uncompressed output when it wants, for example ++ when the output buffer is full (avail_out == 0), or after each call of ++ inflate(). If inflate returns Z_OK and with zero avail_out, it must be ++ called again after making room in the output buffer because there might be ++ more output pending. ++ ++ The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, ++ Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much ++ output as possible to the output buffer. Z_BLOCK requests that inflate() ++ stop if and when it gets to the next deflate block boundary. When decoding ++ the zlib or gzip format, this will cause inflate() to return immediately ++ after the header and before the first block. When doing a raw inflate, ++ inflate() will go ahead and process the first block, and will return when it ++ gets to the end of that block, or when it runs out of data. ++ ++ The Z_BLOCK option assists in appending to or combining deflate streams. ++ To assist in this, on return inflate() always sets strm->data_type to the ++ number of unused bits in the last byte taken from strm->next_in, plus 64 if ++ inflate() is currently decoding the last block in the deflate stream, plus ++ 128 if inflate() returned immediately after decoding an end-of-block code or ++ decoding the complete header up to just before the first byte of the deflate ++ stream. The end-of-block will not be indicated until all of the uncompressed ++ data from that block has been written to strm->next_out. The number of ++ unused bits may in general be greater than seven, except when bit 7 of ++ data_type is set, in which case the number of unused bits will be less than ++ eight. data_type is set as noted here every time inflate() returns for all ++ flush options, and so can be used to determine the amount of currently ++ consumed input in bits. ++ ++ The Z_TREES option behaves as Z_BLOCK does, but it also returns when the ++ end of each deflate block header is reached, before any actual data in that ++ block is decoded. This allows the caller to determine the length of the ++ deflate block header for later use in random access within a deflate block. ++ 256 is added to the value of strm->data_type when inflate() returns ++ immediately after reaching the end of the deflate block header. ++ ++ inflate() should normally be called until it returns Z_STREAM_END or an ++ error. However if all decompression is to be performed in a single step (a ++ single call of inflate), the parameter flush should be set to Z_FINISH. In ++ this case all pending input is processed and all pending output is flushed; ++ avail_out must be large enough to hold all of the uncompressed data for the ++ operation to complete. (The size of the uncompressed data may have been ++ saved by the compressor for this purpose.) The use of Z_FINISH is not ++ required to perform an inflation in one step. However it may be used to ++ inform inflate that a faster approach can be used for the single inflate() ++ call. Z_FINISH also informs inflate to not maintain a sliding window if the ++ stream completes, which reduces inflate's memory footprint. If the stream ++ does not complete, either because not all of the stream is provided or not ++ enough output space is provided, then a sliding window will be allocated and ++ inflate() can be called again to continue the operation as if Z_NO_FLUSH had ++ been used. ++ ++ In this implementation, inflate() always flushes as much output as ++ possible to the output buffer, and always uses the faster approach on the ++ first call. So the effects of the flush parameter in this implementation are ++ on the return value of inflate() as noted below, when inflate() returns early ++ when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of ++ memory for a sliding window when Z_FINISH is used. ++ ++ If a preset dictionary is needed after this call (see inflateSetDictionary ++ below), inflate sets strm->adler to the Adler-32 checksum of the dictionary ++ chosen by the compressor and returns Z_NEED_DICT; otherwise it sets ++ strm->adler to the Adler-32 checksum of all output produced so far (that is, ++ total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described ++ below. At the end of the stream, inflate() checks that its computed Adler-32 ++ checksum is equal to that saved by the compressor and returns Z_STREAM_END ++ only if the checksum is correct. ++ ++ inflate() can decompress and check either zlib-wrapped or gzip-wrapped ++ deflate data. The header type is detected automatically, if requested when ++ initializing with inflateInit2(). Any information contained in the gzip ++ header is not retained unless inflateGetHeader() is used. When processing ++ gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output ++ produced so far. The CRC-32 is checked against the gzip trailer, as is the ++ uncompressed length, modulo 2^32. ++ ++ inflate() returns Z_OK if some progress has been made (more input processed ++ or more output produced), Z_STREAM_END if the end of the compressed data has ++ been reached and all uncompressed output has been produced, Z_NEED_DICT if a ++ preset dictionary is needed at this point, Z_DATA_ERROR if the input data was ++ corrupted (input stream not conforming to the zlib format or incorrect check ++ value, in which case strm->msg points to a string with a more specific ++ error), Z_STREAM_ERROR if the stream structure was inconsistent (for example ++ next_in or next_out was NULL, or the state was inadvertently written over ++ by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR ++ if no progress is possible or if there was not enough room in the output ++ buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and ++ inflate() can be called again with more input and more output space to ++ continue decompressing. If Z_DATA_ERROR is returned, the application may ++ then call inflateSync() to look for a good compression block if a partial ++ recovery of the data is to be attempted. ++*/ ++ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflateEnd(zng_stream *strm); ++/* ++ All dynamically allocated data structures for this stream are freed. ++ This function discards any unprocessed input and does not flush any pending ++ output. ++ ++ inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state ++ was inconsistent. ++*/ ++ ++ ++ /* Advanced functions */ ++ ++/* ++ The following functions are needed only in some special applications. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflateInit2(zng_stream *strm, int32_t level, int32_t method, int32_t windowBits, int32_t memLevel, int32_t strategy); ++/* ++ This is another version of deflateInit with more compression options. The ++ fields zalloc, zfree and opaque must be initialized before by the caller. ++ ++ The method parameter is the compression method. It must be Z_DEFLATED in ++ this version of the library. ++ ++ The windowBits parameter is the base two logarithm of the window size ++ (the size of the history buffer). It should be in the range 8..15 for this ++ version of the library. Larger values of this parameter result in better ++ compression at the expense of memory usage. The default value is 15 if ++ deflateInit is used instead. ++ ++ For the current implementation of deflate(), a windowBits value of 8 (a ++ window size of 256 bytes) is not supported. As a result, a request for 8 ++ will result in 9 (a 512-byte window). In that case, providing 8 to ++ inflateInit2() will result in an error when the zlib header with 9 is ++ checked against the initialization of inflate(). The remedy is to not use 8 ++ with deflateInit2() with this initialization, or at least in that case use 9 ++ with inflateInit2(). ++ ++ windowBits can also be -8..-15 for raw deflate. In this case, -windowBits ++ determines the window size. deflate() will then generate raw deflate data ++ with no zlib header or trailer, and will not compute a check value. ++ ++ windowBits can also be greater than 15 for optional gzip encoding. Add ++ 16 to windowBits to write a simple gzip header and trailer around the ++ compressed data instead of a zlib wrapper. The gzip header will have no ++ file name, no extra data, no comment, no modification time (set to zero), no ++ header crc, and the operating system will be set to the appropriate value, ++ if the operating system was determined at compile time. If a gzip stream is ++ being written, strm->adler is a CRC-32 instead of an Adler-32. ++ ++ For raw deflate or gzip encoding, a request for a 256-byte window is ++ rejected as invalid, since only the zlib header provides a means of ++ transmitting the window size to the decompressor. ++ ++ The memLevel parameter specifies how much memory should be allocated ++ for the internal compression state. memLevel=1 uses minimum memory but is ++ slow and reduces compression ratio; memLevel=9 uses maximum memory for ++ optimal speed. The default value is 8. See zconf.h for total memory usage ++ as a function of windowBits and memLevel. ++ ++ The strategy parameter is used to tune the compression algorithm. Use the ++ value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a ++ filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no ++ string match), or Z_RLE to limit match distances to one (run-length ++ encoding). Filtered data consists mostly of small values with a somewhat ++ random distribution. In this case, the compression algorithm is tuned to ++ compress them better. The effect of Z_FILTERED is to force more Huffman ++ coding and less string matching; it is somewhat intermediate between ++ Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as ++ fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The ++ strategy parameter only affects the compression ratio but not the ++ correctness of the compressed output even if it is not set appropriately. ++ Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler ++ decoder for special applications. ++ ++ deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough ++ memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid method). ++ msg is set to null if there is no error message. deflateInit2 does not perform ++ any compression: this will be done by deflate(). ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflateSetDictionary(zng_stream *strm, const uint8_t *dictionary, uint32_t dictLength); ++/* ++ Initializes the compression dictionary from the given byte sequence ++ without producing any compressed output. When using the zlib format, this ++ function must be called immediately after deflateInit, deflateInit2 or ++ deflateReset, and before any call of deflate. When doing raw deflate, this ++ function must be called either before any call of deflate, or immediately ++ after the completion of a deflate block, i.e. after all input has been ++ consumed and all output has been delivered when using any of the flush ++ options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The ++ compressor and decompressor must use exactly the same dictionary (see ++ inflateSetDictionary). ++ ++ The dictionary should consist of strings (byte sequences) that are likely ++ to be encountered later in the data to be compressed, with the most commonly ++ used strings preferably put towards the end of the dictionary. Using a ++ dictionary is most useful when the data to be compressed is short and can be ++ predicted with good accuracy; the data can then be compressed better than ++ with the default empty dictionary. ++ ++ Depending on the size of the compression data structures selected by ++ deflateInit or deflateInit2, a part of the dictionary may in effect be ++ discarded, for example if the dictionary is larger than the window size ++ provided in deflateInit or deflateInit2. Thus the strings most likely to be ++ useful should be put at the end of the dictionary, not at the front. In ++ addition, the current implementation of deflate will use at most the window ++ size minus 262 bytes of the provided dictionary. ++ ++ Upon return of this function, strm->adler is set to the Adler-32 value ++ of the dictionary; the decompressor may later use this value to determine ++ which dictionary has been used by the compressor. (The Adler-32 value ++ applies to the whole dictionary even if only a subset of the dictionary is ++ actually used by the compressor.) If a raw deflate was requested, then the ++ Adler-32 value is not computed and strm->adler is not set. ++ ++ deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a ++ parameter is invalid (e.g. dictionary being NULL) or the stream state is ++ inconsistent (for example if deflate has already been called for this stream ++ or if not at a block boundary for raw deflate). deflateSetDictionary does ++ not perform any compression: this will be done by deflate(). ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflateGetDictionary(zng_stream *strm, uint8_t *dictionary, uint32_t *dictLength); ++/* ++ Returns the sliding dictionary being maintained by deflate. dictLength is ++ set to the number of bytes in the dictionary, and that many bytes are copied ++ to dictionary. dictionary must have enough space, where 32768 bytes is ++ always enough. If deflateGetDictionary() is called with dictionary equal to ++ Z_NULL, then only the dictionary length is returned, and nothing is copied. ++ Similarly, if dictLength is Z_NULL, then it is not set. ++ ++ deflateGetDictionary() may return a length less than the window size, even ++ when more than the window size in input has been provided. It may return up ++ to 258 bytes less in that case, due to how zlib's implementation of deflate ++ manages the sliding window and lookahead for matches, where matches can be ++ up to 258 bytes long. If the application needs the last window-size bytes of ++ input, then that would need to be saved by the application outside of zlib. ++ ++ deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the ++ stream state is inconsistent. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflateCopy(zng_stream *dest, zng_stream *source); ++/* ++ Sets the destination stream as a complete copy of the source stream. ++ ++ This function can be useful when several compression strategies will be ++ tried, for example when there are several ways of pre-processing the input ++ data with a filter. The streams that will be discarded should then be freed ++ by calling deflateEnd. Note that deflateCopy duplicates the internal ++ compression state which can be quite large, so this strategy is slow and can ++ consume lots of memory. ++ ++ deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not ++ enough memory, Z_STREAM_ERROR if the source stream state was inconsistent ++ (such as zalloc being NULL). msg is left unchanged in both source and ++ destination. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflateReset(zng_stream *strm); ++/* ++ This function is equivalent to deflateEnd followed by deflateInit, but ++ does not free and reallocate the internal compression state. The stream ++ will leave the compression level and any other attributes that may have been ++ set unchanged. total_in, total_out, adler, and msg are initialized. ++ ++ deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent (such as zalloc or state being NULL). ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflateParams(zng_stream *strm, int32_t level, int32_t strategy); ++/* ++ Dynamically update the compression level and compression strategy. The ++ interpretation of level and strategy is as in deflateInit2(). This can be ++ used to switch between compression and straight copy of the input data, or ++ to switch to a different kind of input data requiring a different strategy. ++ If the compression approach (which is a function of the level) or the ++ strategy is changed, and if there have been any deflate() calls since the ++ state was initialized or reset, then the input available so far is ++ compressed with the old level and strategy using deflate(strm, Z_BLOCK). ++ There are three approaches for the compression levels 0, 1..3, and 4..9 ++ respectively. The new level and strategy will take effect at the next call ++ of deflate(). ++ ++ If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does ++ not have enough output space to complete, then the parameter change will not ++ take effect. In this case, deflateParams() can be called again with the ++ same parameters and more output space to try again. ++ ++ In order to assure a change in the parameters on the first try, the ++ deflate stream should be flushed using deflate() with Z_BLOCK or other flush ++ request until strm.avail_out is not zero, before calling deflateParams(). ++ Then no more input data should be provided before the deflateParams() call. ++ If this is done, the old level and strategy will be applied to the data ++ compressed before deflateParams(), and the new level and strategy will be ++ applied to the data compressed after deflateParams(). ++ ++ deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream ++ state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if ++ there was not enough output space to complete the compression of the ++ available input data before a change in the strategy or approach. Note that ++ in the case of a Z_BUF_ERROR, the parameters are not changed. A return ++ value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be ++ retried with more output space. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflateTune(zng_stream *strm, int32_t good_length, int32_t max_lazy, int32_t nice_length, int32_t max_chain); ++/* ++ Fine tune deflate's internal compression parameters. This should only be ++ used by someone who understands the algorithm used by zlib's deflate for ++ searching for the best matching string, and even then only by the most ++ fanatic optimizer trying to squeeze out the last compressed bit for their ++ specific input data. Read the deflate.c source code for the meaning of the ++ max_lazy, good_length, nice_length, and max_chain parameters. ++ ++ deflateTune() can be called after deflateInit() or deflateInit2(), and ++ returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. ++ */ ++ ++Z_EXTERN Z_EXPORT ++unsigned long zng_deflateBound(zng_stream *strm, unsigned long sourceLen); ++/* ++ deflateBound() returns an upper bound on the compressed size after ++ deflation of sourceLen bytes. It must be called after deflateInit() or ++ deflateInit2(), and after deflateSetHeader(), if used. This would be used ++ to allocate an output buffer for deflation in a single pass, and so would be ++ called before deflate(). If that first deflate() call is provided the ++ sourceLen input bytes, an output buffer allocated to the size returned by ++ deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed ++ to return Z_STREAM_END. Note that it is possible for the compressed size to ++ be larger than the value returned by deflateBound() if flush options other ++ than Z_FINISH or Z_NO_FLUSH are used. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflatePending(zng_stream *strm, uint32_t *pending, int32_t *bits); ++/* ++ deflatePending() returns the number of bytes and bits of output that have ++ been generated, but not yet provided in the available output. The bytes not ++ provided would be due to the available output space having being consumed. ++ The number of bits of output not provided are between 0 and 7, where they ++ await more bits to join them in order to fill out a full byte. If pending ++ or bits are NULL, then those values are not set. ++ ++ deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent. ++ */ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflatePrime(zng_stream *strm, int32_t bits, int32_t value); ++/* ++ deflatePrime() inserts bits in the deflate output stream. The intent ++ is that this function is used to start off the deflate output with the bits ++ leftover from a previous deflate stream when appending to it. As such, this ++ function can only be used for raw deflate, and must be used before the first ++ deflate() call after a deflateInit2() or deflateReset(). bits must be less ++ than or equal to 16, and that many of the least significant bits of value ++ will be inserted in the output. ++ ++ deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough ++ room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the ++ source stream state was inconsistent. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflateSetHeader(zng_stream *strm, zng_gz_headerp head); ++/* ++ deflateSetHeader() provides gzip header information for when a gzip ++ stream is requested by deflateInit2(). deflateSetHeader() may be called ++ after deflateInit2() or deflateReset() and before the first call of ++ deflate(). The text, time, os, extra field, name, and comment information ++ in the provided zng_gz_header structure are written to the gzip header (xflag is ++ ignored -- the extra flags are set according to the compression level). The ++ caller must assure that, if not NULL, name and comment are terminated with ++ a zero byte, and that if extra is not NULL, that extra_len bytes are ++ available there. If hcrc is true, a gzip header crc is included. Note that ++ the current versions of the command-line version of gzip (up through version ++ 1.3.x) do not support header crc's, and will report that it is a "multi-part ++ gzip file" and give up. ++ ++ If deflateSetHeader is not used, the default gzip header has text false, ++ the time set to zero, and os set to the current operating system, with no ++ extra, name, or comment fields. The gzip header is returned to the default ++ state by deflateReset(). ++ ++ deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflateInit2(zng_stream *strm, int32_t windowBits); ++/* ++ This is another version of inflateInit with an extra parameter. The ++ fields next_in, avail_in, zalloc, zfree and opaque must be initialized ++ before by the caller. ++ ++ The windowBits parameter is the base two logarithm of the maximum window ++ size (the size of the history buffer). It should be in the range 8..15 for ++ this version of the library. The default value is 15 if inflateInit is used ++ instead. windowBits must be greater than or equal to the windowBits value ++ provided to deflateInit2() while compressing, or it must be equal to 15 if ++ deflateInit2() was not used. If a compressed stream with a larger window ++ size is given as input, inflate() will return with the error code ++ Z_DATA_ERROR instead of trying to allocate a larger window. ++ ++ windowBits can also be zero to request that inflate use the window size in ++ the zlib header of the compressed stream. ++ ++ windowBits can also be -8..-15 for raw inflate. In this case, -windowBits ++ determines the window size. inflate() will then process raw deflate data, ++ not looking for a zlib or gzip header, not generating a check value, and not ++ looking for any check values for comparison at the end of the stream. This ++ is for use with other formats that use the deflate compressed data format ++ such as zip. Those formats provide their own check values. If a custom ++ format is developed using the raw deflate format for compressed data, it is ++ recommended that a check value such as an Adler-32 or a CRC-32 be applied to ++ the uncompressed data as is done in the zlib, gzip, and zip formats. For ++ most applications, the zlib format should be used as is. Note that comments ++ above on the use in deflateInit2() applies to the magnitude of windowBits. ++ ++ windowBits can also be greater than 15 for optional gzip decoding. Add ++ 32 to windowBits to enable zlib and gzip decoding with automatic header ++ detection, or add 16 to decode only the gzip format (the zlib format will ++ return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a ++ CRC-32 instead of an Adler-32. Unlike the gunzip utility and gzread() (see ++ below), inflate() will *not* automatically decode concatenated gzip members. ++ inflate() will return Z_STREAM_END at the end of the gzip member. The state ++ would need to be reset to continue decoding a subsequent gzip member. This ++ *must* be done if there is more data after a gzip member, in order for the ++ decompression to be compliant with the gzip standard (RFC 1952). ++ ++ inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough ++ memory, or Z_STREAM_ERROR if the parameters are invalid, such as a null ++ pointer to the structure. msg is set to null if there is no error message. ++ inflateInit2 does not perform any decompression apart from possibly reading ++ the zlib header if present: actual decompression will be done by inflate(). ++ (So next_in and avail_in may be modified, but next_out and avail_out are ++ unused and unchanged.) The current implementation of inflateInit2() does not ++ process any header information -- that is deferred until inflate() is called. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflateSetDictionary(zng_stream *strm, const uint8_t *dictionary, uint32_t dictLength); ++/* ++ Initializes the decompression dictionary from the given uncompressed byte ++ sequence. This function must be called immediately after a call of inflate, ++ if that call returned Z_NEED_DICT. The dictionary chosen by the compressor ++ can be determined from the Adler-32 value returned by that call of inflate. ++ The compressor and decompressor must use exactly the same dictionary (see ++ deflateSetDictionary). For raw inflate, this function can be called at any ++ time to set the dictionary. If the provided dictionary is smaller than the ++ window and there is already data in the window, then the provided dictionary ++ will amend what's there. The application must insure that the dictionary ++ that was used for compression is provided. ++ ++ inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a ++ parameter is invalid (e.g. dictionary being NULL) or the stream state is ++ inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the ++ expected one (incorrect Adler-32 value). inflateSetDictionary does not ++ perform any decompression: this will be done by subsequent calls of ++ inflate(). ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflateGetDictionary(zng_stream *strm, uint8_t *dictionary, uint32_t *dictLength); ++/* ++ Returns the sliding dictionary being maintained by inflate. dictLength is ++ set to the number of bytes in the dictionary, and that many bytes are copied ++ to dictionary. dictionary must have enough space, where 32768 bytes is ++ always enough. If inflateGetDictionary() is called with dictionary equal to ++ NULL, then only the dictionary length is returned, and nothing is copied. ++ Similarly, if dictLength is NULL, then it is not set. ++ ++ inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the ++ stream state is inconsistent. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflateSync(zng_stream *strm); ++/* ++ Skips invalid compressed data until a possible full flush point (see above ++ for the description of deflate with Z_FULL_FLUSH) can be found, or until all ++ available input is skipped. No output is provided. ++ ++ inflateSync searches for a 00 00 FF FF pattern in the compressed data. ++ All full flush points have this pattern, but not all occurrences of this ++ pattern are full flush points. ++ ++ inflateSync returns Z_OK if a possible full flush point has been found, ++ Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point ++ has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. ++ In the success case, the application may save the current current value of ++ total_in which indicates where valid compressed data was found. In the ++ error case, the application may repeatedly call inflateSync, providing more ++ input each time, until success or end of the input data. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflateCopy(zng_stream *dest, zng_stream *source); ++/* ++ Sets the destination stream as a complete copy of the source stream. ++ ++ This function can be useful when randomly accessing a large stream. The ++ first pass through the stream can periodically record the inflate state, ++ allowing restarting inflate at those points when randomly accessing the ++ stream. ++ ++ inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not ++ enough memory, Z_STREAM_ERROR if the source stream state was inconsistent ++ (such as zalloc being NULL). msg is left unchanged in both source and ++ destination. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflateReset(zng_stream *strm); ++/* ++ This function is equivalent to inflateEnd followed by inflateInit, ++ but does not free and reallocate the internal decompression state. The ++ stream will keep attributes that may have been set by inflateInit2. ++ total_in, total_out, adler, and msg are initialized. ++ ++ inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent (such as zalloc or state being NULL). ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflateReset2(zng_stream *strm, int32_t windowBits); ++/* ++ This function is the same as inflateReset, but it also permits changing ++ the wrap and window size requests. The windowBits parameter is interpreted ++ the same as it is for inflateInit2. If the window size is changed, then the ++ memory allocated for the window is freed, and the window will be reallocated ++ by inflate() if needed. ++ ++ inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent (such as zalloc or state being NULL), or if ++ the windowBits parameter is invalid. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflatePrime(zng_stream *strm, int32_t bits, int32_t value); ++/* ++ This function inserts bits in the inflate input stream. The intent is ++ that this function is used to start inflating at a bit position in the ++ middle of a byte. The provided bits will be used before any bytes are used ++ from next_in. This function should only be used with raw inflate, and ++ should be used before the first inflate() call after inflateInit2() or ++ inflateReset(). bits must be less than or equal to 16, and that many of the ++ least significant bits of value will be inserted in the input. ++ ++ If bits is negative, then the input stream bit buffer is emptied. Then ++ inflatePrime() can be called again to put bits in the buffer. This is used ++ to clear out bits leftover after feeding inflate a block description prior ++ to feeding inflate codes. ++ ++ inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent. ++*/ ++ ++Z_EXTERN Z_EXPORT ++long zng_inflateMark(zng_stream *strm); ++/* ++ This function returns two values, one in the lower 16 bits of the return ++ value, and the other in the remaining upper bits, obtained by shifting the ++ return value down 16 bits. If the upper value is -1 and the lower value is ++ zero, then inflate() is currently decoding information outside of a block. ++ If the upper value is -1 and the lower value is non-zero, then inflate is in ++ the middle of a stored block, with the lower value equaling the number of ++ bytes from the input remaining to copy. If the upper value is not -1, then ++ it is the number of bits back from the current bit position in the input of ++ the code (literal or length/distance pair) currently being processed. In ++ that case the lower value is the number of bytes already emitted for that ++ code. ++ ++ A code is being processed if inflate is waiting for more input to complete ++ decoding of the code, or if it has completed decoding but is waiting for ++ more output space to write the literal or match data. ++ ++ inflateMark() is used to mark locations in the input data for random ++ access, which may be at bit positions, and to note those cases where the ++ output of a code may span boundaries of random access blocks. The current ++ location in the input stream can be determined from avail_in and data_type ++ as noted in the description for the Z_BLOCK flush parameter for inflate. ++ ++ inflateMark returns the value noted above, or -65536 if the provided ++ source stream state was inconsistent. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflateGetHeader(zng_stream *strm, zng_gz_headerp head); ++/* ++ inflateGetHeader() requests that gzip header information be stored in the ++ provided zng_gz_header structure. inflateGetHeader() may be called after ++ inflateInit2() or inflateReset(), and before the first call of inflate(). ++ As inflate() processes the gzip stream, head->done is zero until the header ++ is completed, at which time head->done is set to one. If a zlib stream is ++ being decoded, then head->done is set to -1 to indicate that there will be ++ no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be ++ used to force inflate() to return immediately after header processing is ++ complete and before any actual data is decompressed. ++ ++ The text, time, xflags, and os fields are filled in with the gzip header ++ contents. hcrc is set to true if there is a header CRC. (The header CRC ++ was valid if done is set to one.) If extra is not NULL, then extra_max ++ contains the maximum number of bytes to write to extra. Once done is true, ++ extra_len contains the actual extra field length, and extra contains the ++ extra field, or that field truncated if extra_max is less than extra_len. ++ If name is not NULL, then up to name_max characters are written there, ++ terminated with a zero unless the length is greater than name_max. If ++ comment is not NULL, then up to comm_max characters are written there, ++ terminated with a zero unless the length is greater than comm_max. When any ++ of extra, name, or comment are not NULL and the respective field is not ++ present in the header, then that field is set to NULL to signal its ++ absence. This allows the use of deflateSetHeader() with the returned ++ structure to duplicate the header. However if those fields are set to ++ allocated memory, then the application will need to save those pointers ++ elsewhere so that they can be eventually freed. ++ ++ If inflateGetHeader is not used, then the header information is simply ++ discarded. The header is always checked for validity, including the header ++ CRC if present. inflateReset() will reset the process to discard the header ++ information. The application would need to call inflateGetHeader() again to ++ retrieve the header from the next gzip stream. ++ ++ inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflateBackInit(zng_stream *strm, int32_t windowBits, uint8_t *window); ++/* ++ Initialize the internal stream state for decompression using inflateBack() ++ calls. The fields zalloc, zfree and opaque in strm must be initialized ++ before the call. If zalloc and zfree are NULL, then the default library- ++ derived memory allocation routines are used. windowBits is the base two ++ logarithm of the window size, in the range 8..15. window is a caller ++ supplied buffer of that size. Except for special applications where it is ++ assured that deflate was used with small window sizes, windowBits must be 15 ++ and a 32K byte window must be supplied to be able to decompress general ++ deflate streams. ++ ++ See inflateBack() for the usage of these routines. ++ ++ inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of ++ the parameters are invalid, Z_MEM_ERROR if the internal state could not be ++ allocated. ++*/ ++ ++typedef uint32_t (*in_func) (void *, const uint8_t * *); ++typedef int32_t (*out_func) (void *, uint8_t *, uint32_t); ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflateBack(zng_stream *strm, in_func in, void *in_desc, out_func out, void *out_desc); ++/* ++ inflateBack() does a raw inflate with a single call using a call-back ++ interface for input and output. This is potentially more efficient than ++ inflate() for file i/o applications, in that it avoids copying between the ++ output and the sliding window by simply making the window itself the output ++ buffer. inflate() can be faster on modern CPUs when used with large ++ buffers. inflateBack() trusts the application to not change the output ++ buffer passed by the output function, at least until inflateBack() returns. ++ ++ inflateBackInit() must be called first to allocate the internal state ++ and to initialize the state with the user-provided window buffer. ++ inflateBack() may then be used multiple times to inflate a complete, raw ++ deflate stream with each call. inflateBackEnd() is then called to free the ++ allocated state. ++ ++ A raw deflate stream is one with no zlib or gzip header or trailer. ++ This routine would normally be used in a utility that reads zip or gzip ++ files and writes out uncompressed files. The utility would decode the ++ header and process the trailer on its own, hence this routine expects only ++ the raw deflate stream to decompress. This is different from the default ++ behavior of inflate(), which expects a zlib header and trailer around the ++ deflate stream. ++ ++ inflateBack() uses two subroutines supplied by the caller that are then ++ called by inflateBack() for input and output. inflateBack() calls those ++ routines until it reads a complete deflate stream and writes out all of the ++ uncompressed data, or until it encounters an error. The function's ++ parameters and return types are defined above in the in_func and out_func ++ typedefs. inflateBack() will call in(in_desc, &buf) which should return the ++ number of bytes of provided input, and a pointer to that input in buf. If ++ there is no input available, in() must return zero -- buf is ignored in that ++ case -- and inflateBack() will return a buffer error. inflateBack() will ++ call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. ++ out() should return zero on success, or non-zero on failure. If out() ++ returns non-zero, inflateBack() will return with an error. Neither in() nor ++ out() are permitted to change the contents of the window provided to ++ inflateBackInit(), which is also the buffer that out() uses to write from. ++ The length written by out() will be at most the window size. Any non-zero ++ amount of input may be provided by in(). ++ ++ For convenience, inflateBack() can be provided input on the first call by ++ setting strm->next_in and strm->avail_in. If that input is exhausted, then ++ in() will be called. Therefore strm->next_in must be initialized before ++ calling inflateBack(). If strm->next_in is NULL, then in() will be called ++ immediately for input. If strm->next_in is not NULL, then strm->avail_in ++ must also be initialized, and then if strm->avail_in is not zero, input will ++ initially be taken from strm->next_in[0 .. strm->avail_in - 1]. ++ ++ The in_desc and out_desc parameters of inflateBack() is passed as the ++ first parameter of in() and out() respectively when they are called. These ++ descriptors can be optionally used to pass any information that the caller- ++ supplied in() and out() functions need to do their job. ++ ++ On return, inflateBack() will set strm->next_in and strm->avail_in to ++ pass back any unused input that was provided by the last in() call. The ++ return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR ++ if in() or out() returned an error, Z_DATA_ERROR if there was a format error ++ in the deflate stream (in which case strm->msg is set to indicate the nature ++ of the error), or Z_STREAM_ERROR if the stream was not properly initialized. ++ In the case of Z_BUF_ERROR, an input or output error can be distinguished ++ using strm->next_in which will be NULL only if in() returned an error. If ++ strm->next_in is not NULL, then the Z_BUF_ERROR was due to out() returning ++ non-zero. (in() will always be called before out(), so strm->next_in is ++ assured to be defined if out() returns non-zero.) Note that inflateBack() ++ cannot return Z_OK. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_inflateBackEnd(zng_stream *strm); ++/* ++ All memory allocated by inflateBackInit() is freed. ++ ++ inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream ++ state was inconsistent. ++*/ ++ ++Z_EXTERN Z_EXPORT ++unsigned long zng_zlibCompileFlags(void); ++/* Return flags indicating compile-time options. ++ ++ Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: ++ 1.0: size of unsigned int ++ 3.2: size of unsigned long ++ 5.4: size of void * (pointer) ++ 7.6: size of z_off_t ++ ++ Compiler, assembler, and debug options: ++ 8: ZLIB_DEBUG ++ 9: ASMV or ASMINF -- use ASM code ++ 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention ++ 11: 0 (reserved) ++ ++ One-time table building (smaller code, but not thread-safe if true): ++ 12: BUILDFIXED -- build static block decoding tables when needed (not supported by zlib-ng) ++ 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed ++ 14,15: 0 (reserved) ++ ++ Library content (indicates missing functionality): ++ 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking ++ deflate code when not needed) ++ 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect ++ and decode gzip streams (to avoid linking crc code) ++ 18-19: 0 (reserved) ++ ++ Operation variations (changes in library functionality): ++ 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate ++ 21: FASTEST -- deflate algorithm with only one, lowest compression level ++ 22,23: 0 (reserved) ++ ++ The sprintf variant used by gzprintf (zero is best): ++ 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format ++ 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! ++ 26: 0 = returns value, 1 = void -- 1 means inferred string length returned ++ ++ Remainder: ++ 27-31: 0 (reserved) ++ */ ++ ++ ++ /* utility functions */ ++ ++/* ++ The following utility functions are implemented on top of the basic ++ stream-oriented functions. To simplify the interface, some default options ++ are assumed (compression level and memory usage, standard memory allocation ++ functions). The source code of these utility functions can be modified if ++ you need special options. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_compress(uint8_t *dest, size_t *destLen, const uint8_t *source, size_t sourceLen); ++/* ++ Compresses the source buffer into the destination buffer. sourceLen is ++ the byte length of the source buffer. Upon entry, destLen is the total size ++ of the destination buffer, which must be at least the value returned by ++ compressBound(sourceLen). Upon exit, destLen is the actual size of the ++ compressed data. compress() is equivalent to compress2() with a level ++ parameter of Z_DEFAULT_COMPRESSION. ++ ++ compress returns Z_OK if success, Z_MEM_ERROR if there was not ++ enough memory, Z_BUF_ERROR if there was not enough room in the output ++ buffer. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_compress2(uint8_t *dest, size_t *destLen, const uint8_t *source, size_t sourceLen, int32_t level); ++/* ++ Compresses the source buffer into the destination buffer. The level ++ parameter has the same meaning as in deflateInit. sourceLen is the byte ++ length of the source buffer. Upon entry, destLen is the total size of the ++ destination buffer, which must be at least the value returned by ++ compressBound(sourceLen). Upon exit, destLen is the actual size of the ++ compressed data. ++ ++ compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough ++ memory, Z_BUF_ERROR if there was not enough room in the output buffer, ++ Z_STREAM_ERROR if the level parameter is invalid. ++*/ ++ ++Z_EXTERN Z_EXPORT ++size_t zng_compressBound(size_t sourceLen); ++/* ++ compressBound() returns an upper bound on the compressed size after ++ compress() or compress2() on sourceLen bytes. It would be used before a ++ compress() or compress2() call to allocate the destination buffer. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_uncompress(uint8_t *dest, size_t *destLen, const uint8_t *source, size_t sourceLen); ++/* ++ Decompresses the source buffer into the destination buffer. sourceLen is ++ the byte length of the source buffer. Upon entry, destLen is the total size ++ of the destination buffer, which must be large enough to hold the entire ++ uncompressed data. (The size of the uncompressed data must have been saved ++ previously by the compressor and transmitted to the decompressor by some ++ mechanism outside the scope of this compression library.) Upon exit, destLen ++ is the actual size of the uncompressed data. ++ ++ uncompress returns Z_OK if success, Z_MEM_ERROR if there was not ++ enough memory, Z_BUF_ERROR if there was not enough room in the output ++ buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In ++ the case where there is not enough room, uncompress() will fill the output ++ buffer with the uncompressed data up to that point. ++*/ ++ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_uncompress2(uint8_t *dest, size_t *destLen, const uint8_t *source, size_t *sourceLen); ++/* ++ Same as uncompress, except that sourceLen is a pointer, where the ++ length of the source is *sourceLen. On return, *sourceLen is the number of ++ source bytes consumed. ++*/ ++ ++ ++#ifdef WITH_GZFILEOP ++ /* gzip file access functions */ ++ ++/* ++ This library supports reading and writing files in gzip (.gz) format with ++ an interface similar to that of stdio, using the functions that start with ++ "gz". The gzip format is different from the zlib format. gzip is a gzip ++ wrapper, documented in RFC 1952, wrapped around a deflate stream. ++*/ ++ ++typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ ++ ++Z_EXTERN Z_EXPORT ++gzFile zng_gzopen(const char *path, const char *mode); ++/* ++ Open the gzip (.gz) file at path for reading and decompressing, or ++ compressing and writing. The mode parameter is as in fopen ("rb" or "wb") ++ but can also include a compression level ("wb9") or a strategy: 'f' for ++ filtered data as in "wb6f", 'h' for Huffman-only compression as in "wb1h", ++ 'R' for run-length encoding as in "wb1R", or 'F' for fixed code compression ++ as in "wb9F". (See the description of deflateInit2 for more information ++ about the strategy parameter.) 'T' will request transparent writing or ++ appending with no compression and not using the gzip format. ++ ++ "a" can be used instead of "w" to request that the gzip stream that will ++ be written be appended to the file. "+" will result in an error, since ++ reading and writing to the same gzip file is not supported. The addition of ++ "x" when writing will create the file exclusively, which fails if the file ++ already exists. On systems that support it, the addition of "e" when ++ reading or writing will set the flag to close the file on an execve() call. ++ ++ These functions, as well as gzip, will read and decode a sequence of gzip ++ streams in a file. The append function of gzopen() can be used to create ++ such a file. (Also see gzflush() for another way to do this.) When ++ appending, gzopen does not test whether the file begins with a gzip stream, ++ nor does it look for the end of the gzip streams to begin appending. gzopen ++ will simply append a gzip stream to the existing file. ++ ++ gzopen can be used to read a file which is not in gzip format; in this ++ case gzread will directly read from the file without decompression. When ++ reading, this will be detected automatically by looking for the magic two- ++ byte gzip header. ++ ++ gzopen returns NULL if the file could not be opened, if there was ++ insufficient memory to allocate the gzFile state, or if an invalid mode was ++ specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). ++ errno can be checked to determine if the reason gzopen failed was that the ++ file could not be opened. ++*/ ++ ++Z_EXTERN Z_EXPORT ++gzFile zng_gzdopen(int fd, const char *mode); ++/* ++ Associate a gzFile with the file descriptor fd. File descriptors are ++ obtained from calls like open, dup, creat, pipe or fileno (if the file has ++ been previously opened with fopen). The mode parameter is as in gzopen. ++ ++ The next call of gzclose on the returned gzFile will also close the file ++ descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor ++ fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, ++ mode);. The duplicated descriptor should be saved to avoid a leak, since ++ gzdopen does not close fd if it fails. If you are using fileno() to get the ++ file descriptor from a FILE *, then you will have to use dup() to avoid ++ double-close()ing the file descriptor. Both gzclose() and fclose() will ++ close the associated file descriptor, so they need to have different file ++ descriptors. ++ ++ gzdopen returns NULL if there was insufficient memory to allocate the ++ gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not ++ provided, or '+' was provided), or if fd is -1. The file descriptor is not ++ used until the next gz* read, write, seek, or close operation, so gzdopen ++ will not detect if fd is invalid (unless fd is -1). ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_gzbuffer(gzFile file, uint32_t size); ++/* ++ Set the internal buffer size used by this library's functions for file to ++ size. The default buffer size is 8192 bytes. This function must be called ++ after gzopen() or gzdopen(), and before any other calls that read or write ++ the file. The buffer memory allocation is always deferred to the first read ++ or write. Three times that size in buffer space is allocated. A larger ++ buffer size of, for example, 64K or 128K bytes will noticeably increase the ++ speed of decompression (reading). ++ ++ The new buffer size also affects the maximum length for gzprintf(). ++ ++ gzbuffer() returns 0 on success, or -1 on failure, such as being called ++ too late. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_gzsetparams(gzFile file, int32_t level, int32_t strategy); ++/* ++ Dynamically update the compression level and strategy for file. See the ++ description of deflateInit2 for the meaning of these parameters. Previously ++ provided data is flushed before applying the parameter changes. ++ ++ gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not ++ opened for writing, Z_ERRNO if there is an error writing the flushed data, ++ or Z_MEM_ERROR if there is a memory allocation error. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_gzread(gzFile file, void *buf, uint32_t len); ++/* ++ Read and decompress up to len uncompressed bytes from file into buf. If ++ the input file is not in gzip format, gzread copies the given number of ++ bytes into the buffer directly from the file. ++ ++ After reaching the end of a gzip stream in the input, gzread will continue ++ to read, looking for another gzip stream. Any number of gzip streams may be ++ concatenated in the input file, and will all be decompressed by gzread(). ++ If something other than a gzip stream is encountered after a gzip stream, ++ that remaining trailing garbage is ignored (and no error is returned). ++ ++ gzread can be used to read a gzip file that is being concurrently written. ++ Upon reaching the end of the input, gzread will return with the available ++ data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then ++ gzclearerr can be used to clear the end of file indicator in order to permit ++ gzread to be tried again. Z_OK indicates that a gzip stream was completed ++ on the last gzread. Z_BUF_ERROR indicates that the input file ended in the ++ middle of a gzip stream. Note that gzread does not return -1 in the event ++ of an incomplete gzip stream. This error is deferred until gzclose(), which ++ will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip ++ stream. Alternatively, gzerror can be used before gzclose to detect this ++ case. ++ ++ gzread returns the number of uncompressed bytes actually read, less than ++ len for end of file, or -1 for error. If len is too large to fit in an int, ++ then nothing is read, -1 is returned, and the error state is set to ++ Z_STREAM_ERROR. ++*/ ++ ++Z_EXTERN Z_EXPORT ++size_t zng_gzfread(void *buf, size_t size, size_t nitems, gzFile file); ++/* ++ Read and decompress up to nitems items of size size from file into buf, ++ otherwise operating as gzread() does. This duplicates the interface of ++ stdio's fread(), with size_t request and return types. ++ ++ gzfread() returns the number of full items read of size size, or zero if ++ the end of the file was reached and a full item could not be read, or if ++ there was an error. gzerror() must be consulted if zero is returned in ++ order to determine if there was an error. If the multiplication of size and ++ nitems overflows, i.e. the product does not fit in a size_t, then nothing ++ is read, zero is returned, and the error state is set to Z_STREAM_ERROR. ++ ++ In the event that the end of file is reached and only a partial item is ++ available at the end, i.e. the remaining uncompressed data length is not a ++ multiple of size, then the final partial item is nevertheless read into buf ++ and the end-of-file flag is set. The length of the partial item read is not ++ provided, but could be inferred from the result of gztell(). This behavior ++ is the same as the behavior of fread() implementations in common libraries, ++ but it prevents the direct use of gzfread() to read a concurrently written ++ file, resetting and retrying on end-of-file, when size is not 1. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_gzwrite(gzFile file, void const *buf, uint32_t len); ++/* ++ Compress and write the len uncompressed bytes at buf to file. gzwrite ++ returns the number of uncompressed bytes written or 0 in case of error. ++*/ ++ ++Z_EXTERN Z_EXPORT ++size_t zng_gzfwrite(void const *buf, size_t size, size_t nitems, gzFile file); ++/* ++ Compress and write nitems items of size size from buf to file, duplicating ++ the interface of stdio's fwrite(), with size_t request and return types. ++ ++ gzfwrite() returns the number of full items written of size size, or zero ++ if there was an error. If the multiplication of size and nitems overflows, ++ i.e. the product does not fit in a size_t, then nothing is written, zero ++ is returned, and the error state is set to Z_STREAM_ERROR. ++*/ ++ ++Z_EXTERN Z_EXPORTVA ++int32_t zng_gzprintf(gzFile file, const char *format, ...); ++/* ++ Convert, format, compress, and write the arguments (...) to file under ++ control of the string format, as in fprintf. gzprintf returns the number of ++ uncompressed bytes actually written, or a negative zlib error code in case ++ of error. The number of uncompressed bytes written is limited to 8191, or ++ one less than the buffer size given to gzbuffer(). The caller should assure ++ that this limit is not exceeded. If it is exceeded, then gzprintf() will ++ return an error (0) with nothing written. In this case, there may also be a ++ buffer overflow with unpredictable consequences, which is possible only if ++ zlib was compiled with the insecure functions sprintf() or vsprintf(), ++ because the secure snprintf() or vsnprintf() functions were not available. ++ This can be determined using zlibCompileFlags(). ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_gzputs(gzFile file, const char *s); ++/* ++ Compress and write the given null-terminated string s to file, excluding ++ the terminating null character. ++ ++ gzputs returns the number of characters written, or -1 in case of error. ++*/ ++ ++Z_EXTERN Z_EXPORT ++char * zng_gzgets(gzFile file, char *buf, int32_t len); ++/* ++ Read and decompress bytes from file into buf, until len-1 characters are ++ read, or until a newline character is read and transferred to buf, or an ++ end-of-file condition is encountered. If any characters are read or if len ++ is one, the string is terminated with a null character. If no characters ++ are read due to an end-of-file or len is less than one, then the buffer is ++ left untouched. ++ ++ gzgets returns buf which is a null-terminated string, or it returns NULL ++ for end-of-file or in case of error. If there was an error, the contents at ++ buf are indeterminate. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_gzputc(gzFile file, int32_t c); ++/* ++ Compress and write c, converted to an unsigned char, into file. gzputc ++ returns the value that was written, or -1 in case of error. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_gzgetc(gzFile file); ++/* ++ Read and decompress one byte from file. gzgetc returns this byte or -1 ++ in case of end of file or error. This is implemented as a macro for speed. ++ As such, it does not do all of the checking the other functions do. I.e. ++ it does not check to see if file is NULL, nor whether the structure file ++ points to has been clobbered or not. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_gzungetc(int32_t c, gzFile file); ++/* ++ Push c back onto the stream for file to be read as the first character on ++ the next read. At least one character of push-back is always allowed. ++ gzungetc() returns the character pushed, or -1 on failure. gzungetc() will ++ fail if c is -1, and may fail if a character has been pushed but not read ++ yet. If gzungetc is used immediately after gzopen or gzdopen, at least the ++ output buffer size of pushed characters is allowed. (See gzbuffer above.) ++ The pushed character will be discarded if the stream is repositioned with ++ gzseek() or gzrewind(). ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_gzflush(gzFile file, int32_t flush); ++/* ++ Flush all pending output to file. The parameter flush is as in the ++ deflate() function. The return value is the zlib error number (see function ++ gzerror below). gzflush is only permitted when writing. ++ ++ If the flush parameter is Z_FINISH, the remaining data is written and the ++ gzip stream is completed in the output. If gzwrite() is called again, a new ++ gzip stream will be started in the output. gzread() is able to read such ++ concatenated gzip streams. ++ ++ gzflush should be called only when strictly necessary because it will ++ degrade compression if called too often. ++*/ ++ ++Z_EXTERN Z_EXPORT ++z_off64_t zng_gzseek(gzFile file, z_off64_t offset, int whence); ++/* ++ Set the starting position to offset relative to whence for the next gzread ++ or gzwrite on file. The offset represents a number of bytes in the ++ uncompressed data stream. The whence parameter is defined as in lseek(2); ++ the value SEEK_END is not supported. ++ ++ If the file is opened for reading, this function is emulated but can be ++ extremely slow. If the file is opened for writing, only forward seeks are ++ supported; gzseek then compresses a sequence of zeroes up to the new ++ starting position. ++ ++ gzseek returns the resulting offset location as measured in bytes from ++ the beginning of the uncompressed stream, or -1 in case of error, in ++ particular if the file is opened for writing and the new starting position ++ would be before the current position. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_gzrewind(gzFile file); ++/* ++ Rewind file. This function is supported only for reading. ++ ++ gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET). ++*/ ++ ++Z_EXTERN Z_EXPORT ++z_off64_t zng_gztell(gzFile file); ++/* ++ Return the starting position for the next gzread or gzwrite on file. ++ This position represents a number of bytes in the uncompressed data stream, ++ and is zero when starting, even if appending or reading a gzip stream from ++ the middle of a file using gzdopen(). ++ ++ gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) ++*/ ++ ++Z_EXTERN Z_EXPORT ++z_off64_t zng_gzoffset(gzFile file); ++/* ++ Return the current compressed (actual) read or write offset of file. This ++ offset includes the count of bytes that precede the gzip stream, for example ++ when appending or when using gzdopen() for reading. When reading, the ++ offset does not include as yet unused buffered input. This information can ++ be used for a progress indicator. On error, gzoffset() returns -1. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_gzeof(gzFile file); ++/* ++ Return true (1) if the end-of-file indicator for file has been set while ++ reading, false (0) otherwise. Note that the end-of-file indicator is set ++ only if the read tried to go past the end of the input, but came up short. ++ Therefore, just like feof(), gzeof() may return false even if there is no ++ more data to read, in the event that the last read request was for the exact ++ number of bytes remaining in the input file. This will happen if the input ++ file size is an exact multiple of the buffer size. ++ ++ If gzeof() returns true, then the read functions will return no more data, ++ unless the end-of-file indicator is reset by gzclearerr() and the input file ++ has grown since the previous end of file was detected. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_gzdirect(gzFile file); ++/* ++ Return true (1) if file is being copied directly while reading, or false ++ (0) if file is a gzip stream being decompressed. ++ ++ If the input file is empty, gzdirect() will return true, since the input ++ does not contain a gzip stream. ++ ++ If gzdirect() is used immediately after gzopen() or gzdopen() it will ++ cause buffers to be allocated to allow reading the file to determine if it ++ is a gzip file. Therefore if gzbuffer() is used, it should be called before ++ gzdirect(). ++ ++ When writing, gzdirect() returns true (1) if transparent writing was ++ requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: ++ gzdirect() is not needed when writing. Transparent writing must be ++ explicitly requested, so the application already knows the answer. When ++ linking statically, using gzdirect() will include all of the zlib code for ++ gzip file reading and decompression, which may not be desired.) ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_gzclose(gzFile file); ++/* ++ Flush all pending output for file, if necessary, close file and ++ deallocate the (de)compression state. Note that once file is closed, you ++ cannot call gzerror with file, since its structures have been deallocated. ++ gzclose must not be called more than once on the same file, just as free ++ must not be called more than once on the same allocation. ++ ++ gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a ++ file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the ++ last read ended in the middle of a gzip stream, or Z_OK on success. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_gzclose_r(gzFile file); ++Z_EXTERN Z_EXPORT ++int32_t zng_gzclose_w(gzFile file); ++/* ++ Same as gzclose(), but gzclose_r() is only for use when reading, and ++ gzclose_w() is only for use when writing or appending. The advantage to ++ using these instead of gzclose() is that they avoid linking in zlib ++ compression or decompression code that is not used when only reading or only ++ writing respectively. If gzclose() is used, then both compression and ++ decompression code will be included the application when linking to a static ++ zlib library. ++*/ ++ ++Z_EXTERN Z_EXPORT ++const char * zng_gzerror(gzFile file, int32_t *errnum); ++/* ++ Return the error message for the last error which occurred on file. ++ errnum is set to zlib error number. If an error occurred in the file system ++ and not in the compression library, errnum is set to Z_ERRNO and the ++ application may consult errno to get the exact error code. ++ ++ The application must not modify the returned string. Future calls to ++ this function may invalidate the previously returned string. If file is ++ closed, then the string previously returned by gzerror will no longer be ++ available. ++ ++ gzerror() should be used to distinguish errors from end-of-file for those ++ functions above that do not distinguish those cases in their return values. ++*/ ++ ++Z_EXTERN Z_EXPORT ++void zng_gzclearerr(gzFile file); ++/* ++ Clear the error and end-of-file flags for file. This is analogous to the ++ clearerr() function in stdio. This is useful for continuing to read a gzip ++ file that is being written concurrently. ++*/ ++ ++#endif /* WITH_GZFILEOP */ ++ ++ /* checksum functions */ ++ ++/* ++ These functions are not related to compression but are exported ++ anyway because they might be useful in applications using the compression ++ library. ++*/ ++ ++Z_EXTERN Z_EXPORT ++uint32_t zng_adler32(uint32_t adler, const uint8_t *buf, uint32_t len); ++/* ++ Update a running Adler-32 checksum with the bytes buf[0..len-1] and ++ return the updated checksum. An Adler-32 value is in the range of a 32-bit ++ unsigned integer. If buf is Z_NULL, this function returns the required ++ initial value for the checksum. ++ ++ An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed ++ much faster. ++ ++ Usage example: ++ ++ uint32_t adler = adler32(0L, NULL, 0); ++ ++ while (read_buffer(buffer, length) != EOF) { ++ adler = adler32(adler, buffer, length); ++ } ++ if (adler != original_adler) error(); ++*/ ++ ++Z_EXTERN Z_EXPORT ++uint32_t zng_adler32_z(uint32_t adler, const uint8_t *buf, size_t len); ++/* ++ Same as adler32(), but with a size_t length. ++*/ ++ ++Z_EXTERN Z_EXPORT ++uint32_t zng_adler32_combine(uint32_t adler1, uint32_t adler2, z_off64_t len2); ++/* ++ Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 ++ and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for ++ each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of ++ seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note ++ that the z_off_t type (like off_t) is a signed integer. If len2 is ++ negative, the result has no meaning or utility. ++*/ ++ ++Z_EXTERN Z_EXPORT ++uint32_t zng_crc32(uint32_t crc, const uint8_t *buf, uint32_t len); ++/* ++ Update a running CRC-32 with the bytes buf[0..len-1] and return the ++ updated CRC-32. A CRC-32 value is in the range of a 32-bit unsigned integer. ++ If buf is Z_NULL, this function returns the required initial value for the ++ crc. Pre- and post-conditioning (one's complement) is performed within this ++ function so it shouldn't be done by the application. ++ ++ Usage example: ++ ++ uint32_t crc = crc32(0L, NULL, 0); ++ ++ while (read_buffer(buffer, length) != EOF) { ++ crc = crc32(crc, buffer, length); ++ } ++ if (crc != original_crc) error(); ++*/ ++ ++Z_EXTERN Z_EXPORT ++uint32_t zng_crc32_z(uint32_t crc, const uint8_t *buf, size_t len); ++/* ++ Same as crc32(), but with a size_t length. ++*/ ++ ++Z_EXTERN Z_EXPORT ++uint32_t zng_crc32_combine(uint32_t crc1, uint32_t crc2, z_off64_t len2); ++ ++/* ++ Combine two CRC-32 check values into one. For two sequences of bytes, ++ seq1 and seq2 with lengths len1 and len2, CRC-32 check values were ++ calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 ++ check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and ++ len2. ++*/ ++ ++Z_EXTERN Z_EXPORT ++uint32_t zng_crc32_combine_gen(z_off64_t len2); ++ ++/* ++ Return the operator corresponding to length len2, to be used with ++ crc32_combine_op(). ++*/ ++ ++Z_EXTERN Z_EXPORT ++uint32_t zng_crc32_combine_op(uint32_t crc1, uint32_t crc2, const uint32_t op); ++/* ++ Give the same result as crc32_combine(), using op in place of len2. op is ++ is generated from len2 by crc32_combine_gen(). This will be faster than ++ crc32_combine() if the generated op is used more than once. ++*/ ++ ++ /* various hacks, don't look :) */ ++ ++#ifdef WITH_GZFILEOP ++ ++/* gzgetc() macro and its supporting function and exposed data structure. Note ++ * that the real internal state is much larger than the exposed structure. ++ * This abbreviated structure exposes just enough for the gzgetc() macro. The ++ * user should not mess with these exposed elements, since their names or ++ * behavior could change in the future, perhaps even capriciously. They can ++ * only be used by the gzgetc() macro. You have been warned. ++ */ ++struct gzFile_s { ++ unsigned have; ++ unsigned char *next; ++ z_off64_t pos; ++}; ++# define @ZLIB_SYMBOL_PREFIX@zng_gzgetc(g) ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (@ZLIB_SYMBOL_PREFIX@zng_gzgetc)(g)) ++ ++#endif /* WITH_GZFILEOP */ ++ ++ ++typedef enum { ++ Z_DEFLATE_LEVEL = 0, /* compression level, represented as an int */ ++ Z_DEFLATE_STRATEGY = 1, /* compression strategy, represented as an int */ ++ Z_DEFLATE_REPRODUCIBLE = 2, ++ /* ++ Whether reproducible compression results are required. Represented as an int, where 0 means that it is allowed ++ to trade reproducibility for e.g. improved performance or compression ratio, and non-0 means that ++ reproducibility is strictly required. Reproducibility is guaranteed only when using an identical zlib-ng build. ++ Default is 0. ++ */ ++} zng_deflate_param; ++ ++typedef struct { ++ zng_deflate_param param; /* parameter ID */ ++ void *buf; /* parameter value */ ++ size_t size; /* parameter value size */ ++ int32_t status; /* result of the last set/get call */ ++} zng_deflate_param_value; ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflateSetParams(zng_stream *strm, zng_deflate_param_value *params, size_t count); ++/* ++ Sets the values of the given zlib-ng deflate stream parameters. All the buffers are copied internally, so the ++ caller still owns them after this function returns. Returns Z_OK if success. ++ ++ If the size of at least one of the buffers is too small to hold the entire value of the corresponding parameter, ++ or if the same parameter is specified multiple times, Z_BUF_ERROR is returned. The caller may inspect status fields ++ in order to determine which of the parameters caused this error. No other changes are performed. ++ ++ If the stream state is inconsistent or if at least one of the values cannot be updated, Z_STREAM_ERROR is ++ returned. The caller may inspect status fields in order to determine which of the parameters caused this error. ++ Parameters, whose status field is equal to Z_OK, have been applied successfully. If all status fields are not equal ++ to Z_STREAM_ERROR, then the error was caused by a stream state inconsistency. ++ ++ If there are no other errors, but at least one parameter is not supported by the current zlib-ng version, ++ Z_VERSION_ERROR is returned. The caller may inspect status fields in order to determine which of the parameters ++ caused this error. ++*/ ++ ++Z_EXTERN Z_EXPORT ++int32_t zng_deflateGetParams(zng_stream *strm, zng_deflate_param_value *params, size_t count); ++/* ++ Copies the values of the given zlib-ng deflate stream parameters into the user-provided buffers. Returns Z_OK if ++ success, Z_VERSION_ERROR if at least one parameter is not supported by the current zlib-ng version, Z_STREAM_ERROR ++ if the stream state is inconsistent, and Z_BUF_ERROR if the size of at least one buffer is too small to hold the ++ entire value of the corresponding parameter. ++*/ ++ ++/* undocumented functions */ ++Z_EXTERN Z_EXPORT const char * zng_zError (int32_t); ++Z_EXTERN Z_EXPORT int32_t zng_inflateSyncPoint (zng_stream *); ++Z_EXTERN Z_EXPORT const uint32_t * zng_get_crc_table (void); ++Z_EXTERN Z_EXPORT int32_t zng_inflateUndermine (zng_stream *, int32_t); ++Z_EXTERN Z_EXPORT int32_t zng_inflateValidate (zng_stream *, int32_t); ++Z_EXTERN Z_EXPORT unsigned long zng_inflateCodesUsed (zng_stream *); ++Z_EXTERN Z_EXPORT int32_t zng_inflateResetKeep (zng_stream *); ++Z_EXTERN Z_EXPORT int32_t zng_deflateResetKeep (zng_stream *); ++ ++#ifdef WITH_GZFILEOP ++# if defined(_WIN32) ++ Z_EXTERN Z_EXPORT gzFile zng_gzopen_w(const wchar_t *path, const char *mode); ++# endif ++Z_EXTERN Z_EXPORTVA int32_t zng_gzvprintf(gzFile file, const char *format, va_list va); ++#endif ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* ZNGLIB_H_ */ +diff --git a/zlib-ng.map b/zlib-ng.map +index b004603..64411ab 100644 +--- a/zlib-ng.map ++++ b/zlib-ng.map +@@ -1,7 +1,16 @@ ++ZLIB_NG_2.1.0 { ++ global: ++ zng_deflateInit; ++ zng_deflateInit2; ++ zng_inflateBackInit; ++ zng_inflateInit; ++ zng_inflateInit2; ++ zlibng_version; ++}; ++ + ZLIB_NG_2.0.0 { + global: + zng_adler32; +- zng_adler32_c; + zng_adler32_combine; + zng_adler32_z; + zng_compress; +@@ -55,18 +64,14 @@ ZLIB_NG_2.0.0 { + zng_uncompress2; + zng_zError; + zng_zlibCompileFlags; +- zng_zlibng_string; +- zng_version; ++ zng_vstring; + local: + zng_deflate_copyright; + zng_inflate_copyright; +- zng_inflate_fast; +- zng_inflate_table; + zng_zcalloc; + zng_zcfree; + zng_z_errmsg; +- zng_gz_error; +- zng_gz_intmax; ++ gz_error; + _*; + }; + +@@ -85,23 +90,22 @@ ZLIB_NG_GZ_2.0.0 { + zng_gzfread; + zng_gzfwrite; + zng_gzgetc; +- zng_gzgetc_; + zng_gzgets; + zng_gzoffset; +- zng_gzoffset64; + zng_gzopen; +- zng_gzopen64; + zng_gzprintf; + zng_gzputc; + zng_gzputs; + zng_gzread; + zng_gzrewind; + zng_gzseek; +- zng_gzseek64; + zng_gzsetparams; + zng_gztell; +- zng_gztell64; + zng_gzungetc; + zng_gzvprintf; + zng_gzwrite; + }; ++ ++FAIL { ++ local: *; ++}; +diff --git a/zlib.h.in b/zlib.h.in +new file mode 100644 +index 0000000..9e27d99 +--- /dev/null ++++ b/zlib.h.in +@@ -0,0 +1,1859 @@ ++#ifndef ZLIB_H_ ++#define ZLIB_H_ ++/* zlib.h -- interface of the 'zlib-ng' compression library ++ Forked from and compatible with zlib 1.2.13 ++ ++ Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler ++ ++ This software is provided 'as-is', without any express or implied ++ warranty. In no event will the authors be held liable for any damages ++ arising from the use of this software. ++ ++ Permission is granted to anyone to use this software for any purpose, ++ including commercial applications, and to alter it and redistribute it ++ freely, subject to the following restrictions: ++ ++ 1. The origin of this software must not be misrepresented; you must not ++ claim that you wrote the original software. If you use this software ++ in a product, an acknowledgment in the product documentation would be ++ appreciated but is not required. ++ 2. Altered source versions must be plainly marked as such, and must not be ++ misrepresented as being the original software. ++ 3. This notice may not be removed or altered from any source distribution. ++ ++ Jean-loup Gailly Mark Adler ++ jloup@gzip.org madler@alumni.caltech.edu ++ ++ ++ The data format used by the zlib library is described by RFCs (Request for ++ Comments) 1950 to 1952 in the files https://tools.ietf.org/html/rfc1950 ++ (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). ++*/ ++ ++#ifdef ZNGLIB_H_ ++# error Include zlib-ng.h for zlib-ng API or zlib.h for zlib-compat API but not both ++#endif ++ ++#ifndef RC_INVOKED ++#include ++#include ++ ++#include "zconf.h" ++ ++#ifndef ZCONF_H ++# error Missing zconf.h add binary output directory to include directories ++#endif ++#endif /* RC_INVOKED */ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#define ZLIBNG_VERSION "2.1.5" ++#define ZLIBNG_VERNUM 0x020105F0L /* MMNNRRSM: major minor revision status modified */ ++#define ZLIBNG_VER_MAJOR 2 ++#define ZLIBNG_VER_MINOR 1 ++#define ZLIBNG_VER_REVISION 5 ++#define ZLIBNG_VER_STATUS F /* 0=devel, 1-E=beta, F=Release (DEPRECATED) */ ++#define ZLIBNG_VER_STATUSH 0xF /* Hex values: 0=devel, 1-E=beta, F=Release */ ++#define ZLIBNG_VER_MODIFIED 0 /* non-zero if modified externally from zlib-ng */ ++ ++#define ZLIB_VERSION "1.3.0.zlib-ng" ++#define ZLIB_VERNUM 0x130f ++#define ZLIB_VER_MAJOR 1 ++#define ZLIB_VER_MINOR 3 ++#define ZLIB_VER_REVISION 0 ++#define ZLIB_VER_SUBREVISION 15 /* 15=fork (0xf) */ ++ ++/* ++ The 'zlib' compression library provides in-memory compression and ++ decompression functions, including integrity checks of the uncompressed data. ++ This version of the library supports only one compression method (deflation) ++ but other algorithms will be added later and will have the same stream ++ interface. ++ ++ Compression can be done in a single step if the buffers are large enough, ++ or can be done by repeated calls of the compression function. In the latter ++ case, the application must provide more input and/or consume the output ++ (providing more output space) before each call. ++ ++ The compressed data format used by default by the in-memory functions is ++ the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped ++ around a deflate stream, which is itself documented in RFC 1951. ++ ++ The library also supports reading and writing files in gzip (.gz) format ++ with an interface similar to that of stdio using the functions that start ++ with "gz". The gzip format is different from the zlib format. gzip is a ++ gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. ++ ++ This library can optionally read and write gzip and raw deflate streams in ++ memory as well. ++ ++ The zlib format was designed to be compact and fast for use in memory ++ and on communications channels. The gzip format was designed for single- ++ file compression on file systems, has a larger header than zlib to maintain ++ directory information, and uses a different, slower check method than zlib. ++ ++ The library does not install any signal handler. The decoder checks ++ the consistency of the compressed data, so the library should never crash ++ even in the case of corrupted input. ++*/ ++ ++typedef void *(*alloc_func) (void *opaque, unsigned int items, unsigned int size); ++typedef void (*free_func) (void *opaque, void *address); ++ ++struct internal_state; ++ ++typedef struct z_stream_s { ++ z_const unsigned char *next_in; /* next input byte */ ++ uint32_t avail_in; /* number of bytes available at next_in */ ++ unsigned long total_in; /* total number of input bytes read so far */ ++ ++ unsigned char *next_out; /* next output byte will go here */ ++ uint32_t avail_out; /* remaining free space at next_out */ ++ unsigned long total_out; /* total number of bytes output so far */ ++ ++ z_const char *msg; /* last error message, NULL if no error */ ++ struct internal_state *state; /* not visible by applications */ ++ ++ alloc_func zalloc; /* used to allocate the internal state */ ++ free_func zfree; /* used to free the internal state */ ++ void *opaque; /* private data object passed to zalloc and zfree */ ++ ++ int data_type; /* best guess about the data type: binary or text ++ for deflate, or the decoding state for inflate */ ++ unsigned long adler; /* Adler-32 or CRC-32 value of the uncompressed data */ ++ unsigned long reserved; /* reserved for future use */ ++} z_stream; ++ ++typedef z_stream *z_streamp; /* Obsolete type, retained for compatibility only */ ++ ++/* ++ gzip header information passed to and from zlib routines. See RFC 1952 ++ for more details on the meanings of these fields. ++*/ ++typedef struct gz_header_s { ++ int text; /* true if compressed data believed to be text */ ++ unsigned long time; /* modification time */ ++ int xflags; /* extra flags (not used when writing a gzip file) */ ++ int os; /* operating system */ ++ unsigned char *extra; /* pointer to extra field or NULL if none */ ++ unsigned int extra_len; /* extra field length (valid if extra != NULL) */ ++ unsigned int extra_max; /* space at extra (only when reading header) */ ++ unsigned char *name; /* pointer to zero-terminated file name or NULL */ ++ unsigned int name_max; /* space at name (only when reading header) */ ++ unsigned char *comment; /* pointer to zero-terminated comment or NULL */ ++ unsigned int comm_max; /* space at comment (only when reading header) */ ++ int hcrc; /* true if there was or will be a header crc */ ++ int done; /* true when done reading gzip header (not used when writing a gzip file) */ ++} gz_header; ++ ++typedef gz_header *gz_headerp; ++ ++/* ++ The application must update next_in and avail_in when avail_in has dropped ++ to zero. It must update next_out and avail_out when avail_out has dropped ++ to zero. The application must initialize zalloc, zfree and opaque before ++ calling the init function. All other fields are set by the compression ++ library and must not be updated by the application. ++ ++ The opaque value provided by the application will be passed as the first ++ parameter for calls of zalloc and zfree. This can be useful for custom ++ memory management. The compression library attaches no meaning to the ++ opaque value. ++ ++ zalloc must return NULL if there is not enough memory for the object. ++ If zlib is used in a multi-threaded application, zalloc and zfree must be ++ thread safe. In that case, zlib is thread-safe. When zalloc and zfree are ++ Z_NULL on entry to the initialization function, they are set to internal ++ routines that use the standard library functions malloc() and free(). ++ ++ The fields total_in and total_out can be used for statistics or progress ++ reports. After compression, total_in holds the total size of the ++ uncompressed data and may be saved for use by the decompressor (particularly ++ if the decompressor wants to decompress everything in a single step). ++*/ ++ ++ /* constants */ ++ ++#define Z_NO_FLUSH 0 ++#define Z_PARTIAL_FLUSH 1 ++#define Z_SYNC_FLUSH 2 ++#define Z_FULL_FLUSH 3 ++#define Z_FINISH 4 ++#define Z_BLOCK 5 ++#define Z_TREES 6 ++/* Allowed flush values; see deflate() and inflate() below for details */ ++ ++#define Z_OK 0 ++#define Z_STREAM_END 1 ++#define Z_NEED_DICT 2 ++#define Z_ERRNO (-1) ++#define Z_STREAM_ERROR (-2) ++#define Z_DATA_ERROR (-3) ++#define Z_MEM_ERROR (-4) ++#define Z_BUF_ERROR (-5) ++#define Z_VERSION_ERROR (-6) ++/* Return codes for the compression/decompression functions. Negative values ++ * are errors, positive values are used for special but normal events. ++ */ ++ ++#define Z_NO_COMPRESSION 0 ++#define Z_BEST_SPEED 1 ++#define Z_BEST_COMPRESSION 9 ++#define Z_DEFAULT_COMPRESSION (-1) ++/* compression levels */ ++ ++#define Z_FILTERED 1 ++#define Z_HUFFMAN_ONLY 2 ++#define Z_RLE 3 ++#define Z_FIXED 4 ++#define Z_DEFAULT_STRATEGY 0 ++/* compression strategy; see deflateInit2() below for details */ ++ ++#define Z_BINARY 0 ++#define Z_TEXT 1 ++#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ ++#define Z_UNKNOWN 2 ++/* Possible values of the data_type field for deflate() */ ++ ++#define Z_DEFLATED 8 ++/* The deflate compression method (the only one supported in this version) */ ++ ++#define Z_NULL NULL /* for compatibility with zlib, was for initializing zalloc, zfree, opaque */ ++ ++#define zlib_version zlibVersion() ++/* for compatibility with versions < 1.0.2 */ ++ ++ ++ /* basic functions */ ++ ++Z_EXTERN const char * Z_EXPORT zlibVersion(void); ++/* The application can compare zlibVersion and ZLIB_VERSION for consistency. ++ If the first character differs, the library code actually used is not ++ compatible with the zlib.h header file used by the application. This check ++ is automatically made by deflateInit and inflateInit. ++ */ ++ ++/* ++Z_EXTERN int Z_EXPORT deflateInit (z_stream *strm, int level); ++ ++ Initializes the internal stream state for compression. The fields ++ zalloc, zfree and opaque must be initialized before by the caller. If ++ zalloc and zfree are set to Z_NULL, deflateInit updates them to use default ++ allocation functions. total_in, total_out, adler, and msg are initialized. ++ ++ The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: ++ 1 gives best speed, 9 gives best compression, 0 gives no compression at all ++ (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION ++ requests a default compromise between speed and compression (currently ++ equivalent to level 6). ++ ++ deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough ++ memory, Z_STREAM_ERROR if level is not a valid compression level, or ++ Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible ++ with the version assumed by the caller (ZLIB_VERSION). msg is set to null ++ if there is no error message. deflateInit does not perform any compression: ++ this will be done by deflate(). ++*/ ++ ++ ++Z_EXTERN int Z_EXPORT deflate(z_stream *strm, int flush); ++/* ++ deflate compresses as much data as possible, and stops when the input ++ buffer becomes empty or the output buffer becomes full. It may introduce ++ some output latency (reading input without producing any output) except when ++ forced to flush. ++ ++ The detailed semantics are as follows. deflate performs one or both of the ++ following actions: ++ ++ - Compress more input starting at next_in and update next_in and avail_in ++ accordingly. If not all input can be processed (because there is not ++ enough room in the output buffer), next_in and avail_in are updated and ++ processing will resume at this point for the next call of deflate(). ++ ++ - Generate more output starting at next_out and update next_out and avail_out ++ accordingly. This action is forced if the parameter flush is non zero. ++ Forcing flush frequently degrades the compression ratio, so this parameter ++ should be set only when necessary. Some output may be provided even if ++ flush is zero. ++ ++ Before the call of deflate(), the application should ensure that at least ++ one of the actions is possible, by providing more input and/or consuming more ++ output, and updating avail_in or avail_out accordingly; avail_out should ++ never be zero before the call. The application can consume the compressed ++ output when it wants, for example when the output buffer is full (avail_out ++ == 0), or after each call of deflate(). If deflate returns Z_OK and with ++ zero avail_out, it must be called again after making room in the output ++ buffer because there might be more output pending. See deflatePending(), ++ which can be used if desired to determine whether or not there is more output ++ in that case. ++ ++ Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to ++ decide how much data to accumulate before producing output, in order to ++ maximize compression. ++ ++ If the parameter flush is set to Z_SYNC_FLUSH, all pending output is ++ flushed to the output buffer and the output is aligned on a byte boundary, so ++ that the decompressor can get all input data available so far. (In ++ particular avail_in is zero after the call if enough output space has been ++ provided before the call.) Flushing may degrade compression for some ++ compression algorithms and so it should be used only when necessary. This ++ completes the current deflate block and follows it with an empty stored block ++ that is three bits plus filler bits to the next byte, followed by four bytes ++ (00 00 ff ff). ++ ++ If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the ++ output buffer, but the output is not aligned to a byte boundary. All of the ++ input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. ++ This completes the current deflate block and follows it with an empty fixed ++ codes block that is 10 bits long. This assures that enough bytes are output ++ in order for the decompressor to finish the block before the empty fixed ++ codes block. ++ ++ If flush is set to Z_BLOCK, a deflate block is completed and emitted, as ++ for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to ++ seven bits of the current block are held to be written as the next byte after ++ the next deflate block is completed. In this case, the decompressor may not ++ be provided enough bits at this point in order to complete decompression of ++ the data provided so far to the compressor. It may need to wait for the next ++ block to be emitted. This is for advanced applications that need to control ++ the emission of deflate blocks. ++ ++ If flush is set to Z_FULL_FLUSH, all output is flushed as with ++ Z_SYNC_FLUSH, and the compression state is reset so that decompression can ++ restart from this point if previous compressed data has been damaged or if ++ random access is desired. Using Z_FULL_FLUSH too often can seriously degrade ++ compression. ++ ++ If deflate returns with avail_out == 0, this function must be called again ++ with the same value of the flush parameter and more output space (updated ++ avail_out), until the flush is complete (deflate returns with non-zero ++ avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that ++ avail_out is greater than six when the flush marker begins, in order to avoid ++ repeated flush markers upon calling deflate() again when avail_out == 0. ++ ++ If the parameter flush is set to Z_FINISH, pending input is processed, ++ pending output is flushed and deflate returns with Z_STREAM_END if there was ++ enough output space. If deflate returns with Z_OK or Z_BUF_ERROR, this ++ function must be called again with Z_FINISH and more output space (updated ++ avail_out) but no more input data, until it returns with Z_STREAM_END or an ++ error. After deflate has returned Z_STREAM_END, the only possible operations ++ on the stream are deflateReset or deflateEnd. ++ ++ Z_FINISH can be used in the first deflate call after deflateInit if all the ++ compression is to be done in a single step. In order to complete in one ++ call, avail_out must be at least the value returned by deflateBound (see ++ below). Then deflate is guaranteed to return Z_STREAM_END. If not enough ++ output space is provided, deflate will not return Z_STREAM_END, and it must ++ be called again as described above. ++ ++ deflate() sets strm->adler to the Adler-32 checksum of all input read ++ so far (that is, total_in bytes). If a gzip stream is being generated, then ++ strm->adler will be the CRC-32 checksum of the input read so far. (See ++ deflateInit2 below.) ++ ++ deflate() may update strm->data_type if it can make a good guess about ++ the input data type (Z_BINARY or Z_TEXT). If in doubt, the data is ++ considered binary. This field is only for information purposes and does not ++ affect the compression algorithm in any manner. ++ ++ deflate() returns Z_OK if some progress has been made (more input ++ processed or more output produced), Z_STREAM_END if all input has been ++ consumed and all output has been produced (only when flush is set to ++ Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example ++ if next_in or next_out was NULL) or the state was inadvertently written over ++ by the application), or Z_BUF_ERROR if no progress is possible (for example ++ avail_in or avail_out was zero). Note that Z_BUF_ERROR is not fatal, and ++ deflate() can be called again with more input and more output space to ++ continue compressing. ++*/ ++ ++ ++Z_EXTERN int Z_EXPORT deflateEnd(z_stream *strm); ++/* ++ All dynamically allocated data structures for this stream are freed. ++ This function discards any unprocessed input and does not flush any pending ++ output. ++ ++ deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the ++ stream state was inconsistent, Z_DATA_ERROR if the stream was freed ++ prematurely (some input or output was discarded). In the error case, msg ++ may be set but then points to a static string (which must not be ++ deallocated). ++*/ ++ ++ ++/* ++Z_EXTERN int Z_EXPORT inflateInit (z_stream *strm); ++ ++ Initializes the internal stream state for decompression. The fields ++ next_in, avail_in, zalloc, zfree and opaque must be initialized before by ++ the caller. In the current version of inflate, the provided input is not ++ read or consumed. The allocation of a sliding window will be deferred to ++ the first call of inflate (if the decompression does not complete on the ++ first call). If zalloc and zfree are set to Z_NULL, inflateInit updates ++ them to use default allocation functions. total_in, total_out, adler, and ++ msg are initialized. ++ ++ inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough ++ memory, Z_VERSION_ERROR if the zlib library version is incompatible with the ++ version assumed by the caller, or Z_STREAM_ERROR if the parameters are ++ invalid, such as a null pointer to the structure. msg is set to null if ++ there is no error message. inflateInit does not perform any decompression. ++ Actual decompression will be done by inflate(). So next_in, and avail_in, ++ next_out, and avail_out are unused and unchanged. The current ++ implementation of inflateInit() does not process any header information -- ++ that is deferred until inflate() is called. ++*/ ++ ++ ++Z_EXTERN int Z_EXPORT inflate(z_stream *strm, int flush); ++/* ++ inflate decompresses as much data as possible, and stops when the input ++ buffer becomes empty or the output buffer becomes full. It may introduce ++ some output latency (reading input without producing any output) except when ++ forced to flush. ++ ++ The detailed semantics are as follows. inflate performs one or both of the ++ following actions: ++ ++ - Decompress more input starting at next_in and update next_in and avail_in ++ accordingly. If not all input can be processed (because there is not ++ enough room in the output buffer), then next_in and avail_in are updated ++ accordingly, and processing will resume at this point for the next call of ++ inflate(). ++ ++ - Generate more output starting at next_out and update next_out and avail_out ++ accordingly. inflate() provides as much output as possible, until there is ++ no more input data or no more space in the output buffer (see below about ++ the flush parameter). ++ ++ Before the call of inflate(), the application should ensure that at least ++ one of the actions is possible, by providing more input and/or consuming more ++ output, and updating the next_* and avail_* values accordingly. If the ++ caller of inflate() does not provide both available input and available ++ output space, it is possible that there will be no progress made. The ++ application can consume the uncompressed output when it wants, for example ++ when the output buffer is full (avail_out == 0), or after each call of ++ inflate(). If inflate returns Z_OK and with zero avail_out, it must be ++ called again after making room in the output buffer because there might be ++ more output pending. ++ ++ The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, ++ Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much ++ output as possible to the output buffer. Z_BLOCK requests that inflate() ++ stop if and when it gets to the next deflate block boundary. When decoding ++ the zlib or gzip format, this will cause inflate() to return immediately ++ after the header and before the first block. When doing a raw inflate, ++ inflate() will go ahead and process the first block, and will return when it ++ gets to the end of that block, or when it runs out of data. ++ ++ The Z_BLOCK option assists in appending to or combining deflate streams. ++ To assist in this, on return inflate() always sets strm->data_type to the ++ number of unused bits in the last byte taken from strm->next_in, plus 64 if ++ inflate() is currently decoding the last block in the deflate stream, plus ++ 128 if inflate() returned immediately after decoding an end-of-block code or ++ decoding the complete header up to just before the first byte of the deflate ++ stream. The end-of-block will not be indicated until all of the uncompressed ++ data from that block has been written to strm->next_out. The number of ++ unused bits may in general be greater than seven, except when bit 7 of ++ data_type is set, in which case the number of unused bits will be less than ++ eight. data_type is set as noted here every time inflate() returns for all ++ flush options, and so can be used to determine the amount of currently ++ consumed input in bits. ++ ++ The Z_TREES option behaves as Z_BLOCK does, but it also returns when the ++ end of each deflate block header is reached, before any actual data in that ++ block is decoded. This allows the caller to determine the length of the ++ deflate block header for later use in random access within a deflate block. ++ 256 is added to the value of strm->data_type when inflate() returns ++ immediately after reaching the end of the deflate block header. ++ ++ inflate() should normally be called until it returns Z_STREAM_END or an ++ error. However if all decompression is to be performed in a single step (a ++ single call of inflate), the parameter flush should be set to Z_FINISH. In ++ this case all pending input is processed and all pending output is flushed; ++ avail_out must be large enough to hold all of the uncompressed data for the ++ operation to complete. (The size of the uncompressed data may have been ++ saved by the compressor for this purpose.) The use of Z_FINISH is not ++ required to perform an inflation in one step. However it may be used to ++ inform inflate that a faster approach can be used for the single inflate() ++ call. Z_FINISH also informs inflate to not maintain a sliding window if the ++ stream completes, which reduces inflate's memory footprint. If the stream ++ does not complete, either because not all of the stream is provided or not ++ enough output space is provided, then a sliding window will be allocated and ++ inflate() can be called again to continue the operation as if Z_NO_FLUSH had ++ been used. ++ ++ In this implementation, inflate() always flushes as much output as ++ possible to the output buffer, and always uses the faster approach on the ++ first call. So the effects of the flush parameter in this implementation are ++ on the return value of inflate() as noted below, when inflate() returns early ++ when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of ++ memory for a sliding window when Z_FINISH is used. ++ ++ If a preset dictionary is needed after this call (see inflateSetDictionary ++ below), inflate sets strm->adler to the Adler-32 checksum of the dictionary ++ chosen by the compressor and returns Z_NEED_DICT; otherwise it sets ++ strm->adler to the Adler-32 checksum of all output produced so far (that is, ++ total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described ++ below. At the end of the stream, inflate() checks that its computed Adler-32 ++ checksum is equal to that saved by the compressor and returns Z_STREAM_END ++ only if the checksum is correct. ++ ++ inflate() can decompress and check either zlib-wrapped or gzip-wrapped ++ deflate data. The header type is detected automatically, if requested when ++ initializing with inflateInit2(). Any information contained in the gzip ++ header is not retained unless inflateGetHeader() is used. When processing ++ gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output ++ produced so far. The CRC-32 is checked against the gzip trailer, as is the ++ uncompressed length, modulo 2^32. ++ ++ inflate() returns Z_OK if some progress has been made (more input processed ++ or more output produced), Z_STREAM_END if the end of the compressed data has ++ been reached and all uncompressed output has been produced, Z_NEED_DICT if a ++ preset dictionary is needed at this point, Z_DATA_ERROR if the input data was ++ corrupted (input stream not conforming to the zlib format or incorrect check ++ value, in which case strm->msg points to a string with a more specific ++ error), Z_STREAM_ERROR if the stream structure was inconsistent (for example ++ next_in or next_out was NULL, or the state was inadvertently written over ++ by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR ++ if no progress is possible or if there was not enough room in the output ++ buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and ++ inflate() can be called again with more input and more output space to ++ continue decompressing. If Z_DATA_ERROR is returned, the application may ++ then call inflateSync() to look for a good compression block if a partial ++ recovery of the data is to be attempted. ++*/ ++ ++ ++Z_EXTERN int Z_EXPORT inflateEnd(z_stream *strm); ++/* ++ All dynamically allocated data structures for this stream are freed. ++ This function discards any unprocessed input and does not flush any pending ++ output. ++ ++ inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state ++ was inconsistent. ++*/ ++ ++ ++ /* Advanced functions */ ++ ++/* ++ The following functions are needed only in some special applications. ++*/ ++ ++/* ++Z_EXTERN int Z_EXPORT deflateInit2 (z_stream *strm, ++ int level, ++ int method, ++ int windowBits, ++ int memLevel, ++ int strategy); ++ ++ This is another version of deflateInit with more compression options. The ++ fields zalloc, zfree and opaque must be initialized before by the caller. ++ ++ The method parameter is the compression method. It must be Z_DEFLATED in ++ this version of the library. ++ ++ The windowBits parameter is the base two logarithm of the window size ++ (the size of the history buffer). It should be in the range 8..15 for this ++ version of the library. Larger values of this parameter result in better ++ compression at the expense of memory usage. The default value is 15 if ++ deflateInit is used instead. ++ ++ For the current implementation of deflate(), a windowBits value of 8 (a ++ window size of 256 bytes) is not supported. As a result, a request for 8 ++ will result in 9 (a 512-byte window). In that case, providing 8 to ++ inflateInit2() will result in an error when the zlib header with 9 is ++ checked against the initialization of inflate(). The remedy is to not use 8 ++ with deflateInit2() with this initialization, or at least in that case use 9 ++ with inflateInit2(). ++ ++ windowBits can also be -8..-15 for raw deflate. In this case, -windowBits ++ determines the window size. deflate() will then generate raw deflate data ++ with no zlib header or trailer, and will not compute a check value. ++ ++ windowBits can also be greater than 15 for optional gzip encoding. Add ++ 16 to windowBits to write a simple gzip header and trailer around the ++ compressed data instead of a zlib wrapper. The gzip header will have no ++ file name, no extra data, no comment, no modification time (set to zero), no ++ header crc, and the operating system will be set to the appropriate value, ++ if the operating system was determined at compile time. If a gzip stream is ++ being written, strm->adler is a CRC-32 instead of an Adler-32. ++ ++ For raw deflate or gzip encoding, a request for a 256-byte window is ++ rejected as invalid, since only the zlib header provides a means of ++ transmitting the window size to the decompressor. ++ ++ The memLevel parameter specifies how much memory should be allocated ++ for the internal compression state. memLevel=1 uses minimum memory but is ++ slow and reduces compression ratio; memLevel=9 uses maximum memory for ++ optimal speed. The default value is 8. See zconf.h for total memory usage ++ as a function of windowBits and memLevel. ++ ++ The strategy parameter is used to tune the compression algorithm. Use the ++ value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a ++ filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no ++ string match), or Z_RLE to limit match distances to one (run-length ++ encoding). Filtered data consists mostly of small values with a somewhat ++ random distribution. In this case, the compression algorithm is tuned to ++ compress them better. The effect of Z_FILTERED is to force more Huffman ++ coding and less string matching; it is somewhat intermediate between ++ Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as ++ fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The ++ strategy parameter only affects the compression ratio but not the ++ correctness of the compressed output even if it is not set appropriately. ++ Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler ++ decoder for special applications. ++ ++ deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough ++ memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid ++ method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is ++ incompatible with the version assumed by the caller (ZLIB_VERSION). msg is ++ set to null if there is no error message. deflateInit2 does not perform any ++ compression: this will be done by deflate(). ++*/ ++ ++Z_EXTERN int Z_EXPORT deflateSetDictionary(z_stream *strm, ++ const unsigned char *dictionary, ++ unsigned int dictLength); ++/* ++ Initializes the compression dictionary from the given byte sequence ++ without producing any compressed output. When using the zlib format, this ++ function must be called immediately after deflateInit, deflateInit2 or ++ deflateReset, and before any call of deflate. When doing raw deflate, this ++ function must be called either before any call of deflate, or immediately ++ after the completion of a deflate block, i.e. after all input has been ++ consumed and all output has been delivered when using any of the flush ++ options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The ++ compressor and decompressor must use exactly the same dictionary (see ++ inflateSetDictionary). ++ ++ The dictionary should consist of strings (byte sequences) that are likely ++ to be encountered later in the data to be compressed, with the most commonly ++ used strings preferably put towards the end of the dictionary. Using a ++ dictionary is most useful when the data to be compressed is short and can be ++ predicted with good accuracy; the data can then be compressed better than ++ with the default empty dictionary. ++ ++ Depending on the size of the compression data structures selected by ++ deflateInit or deflateInit2, a part of the dictionary may in effect be ++ discarded, for example if the dictionary is larger than the window size ++ provided in deflateInit or deflateInit2. Thus the strings most likely to be ++ useful should be put at the end of the dictionary, not at the front. In ++ addition, the current implementation of deflate will use at most the window ++ size minus 262 bytes of the provided dictionary. ++ ++ Upon return of this function, strm->adler is set to the Adler-32 value ++ of the dictionary; the decompressor may later use this value to determine ++ which dictionary has been used by the compressor. (The Adler-32 value ++ applies to the whole dictionary even if only a subset of the dictionary is ++ actually used by the compressor.) If a raw deflate was requested, then the ++ Adler-32 value is not computed and strm->adler is not set. ++ ++ deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a ++ parameter is invalid (e.g. dictionary being NULL) or the stream state is ++ inconsistent (for example if deflate has already been called for this stream ++ or if not at a block boundary for raw deflate). deflateSetDictionary does ++ not perform any compression: this will be done by deflate(). ++*/ ++ ++Z_EXTERN int Z_EXPORT deflateGetDictionary (z_stream *strm, unsigned char *dictionary, unsigned int *dictLength); ++/* ++ Returns the sliding dictionary being maintained by deflate. dictLength is ++ set to the number of bytes in the dictionary, and that many bytes are copied ++ to dictionary. dictionary must have enough space, where 32768 bytes is ++ always enough. If deflateGetDictionary() is called with dictionary equal to ++ Z_NULL, then only the dictionary length is returned, and nothing is copied. ++ Similarly, if dictLength is Z_NULL, then it is not set. ++ ++ deflateGetDictionary() may return a length less than the window size, even ++ when more than the window size in input has been provided. It may return up ++ to 258 bytes less in that case, due to how zlib's implementation of deflate ++ manages the sliding window and lookahead for matches, where matches can be ++ up to 258 bytes long. If the application needs the last window-size bytes of ++ input, then that would need to be saved by the application outside of zlib. ++ ++ deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the ++ stream state is inconsistent. ++*/ ++ ++Z_EXTERN int Z_EXPORT deflateCopy(z_stream *dest, z_stream *source); ++/* ++ Sets the destination stream as a complete copy of the source stream. ++ ++ This function can be useful when several compression strategies will be ++ tried, for example when there are several ways of pre-processing the input ++ data with a filter. The streams that will be discarded should then be freed ++ by calling deflateEnd. Note that deflateCopy duplicates the internal ++ compression state which can be quite large, so this strategy is slow and can ++ consume lots of memory. ++ ++ deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not ++ enough memory, Z_STREAM_ERROR if the source stream state was inconsistent ++ (such as zalloc being NULL). msg is left unchanged in both source and ++ destination. ++*/ ++ ++Z_EXTERN int Z_EXPORT deflateReset(z_stream *strm); ++/* ++ This function is equivalent to deflateEnd followed by deflateInit, but ++ does not free and reallocate the internal compression state. The stream ++ will leave the compression level and any other attributes that may have been ++ set unchanged. total_in, total_out, adler, and msg are initialized. ++ ++ deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent (such as zalloc or state being NULL). ++*/ ++ ++Z_EXTERN int Z_EXPORT deflateParams(z_stream *strm, int level, int strategy); ++/* ++ Dynamically update the compression level and compression strategy. The ++ interpretation of level and strategy is as in deflateInit2(). This can be ++ used to switch between compression and straight copy of the input data, or ++ to switch to a different kind of input data requiring a different strategy. ++ If the compression approach (which is a function of the level) or the ++ strategy is changed, and if there have been any deflate() calls since the ++ state was initialized or reset, then the input available so far is ++ compressed with the old level and strategy using deflate(strm, Z_BLOCK). ++ There are three approaches for the compression levels 0, 1..3, and 4..9 ++ respectively. The new level and strategy will take effect at the next call ++ of deflate(). ++ ++ If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does ++ not have enough output space to complete, then the parameter change will not ++ take effect. In this case, deflateParams() can be called again with the ++ same parameters and more output space to try again. ++ ++ In order to assure a change in the parameters on the first try, the ++ deflate stream should be flushed using deflate() with Z_BLOCK or other flush ++ request until strm.avail_out is not zero, before calling deflateParams(). ++ Then no more input data should be provided before the deflateParams() call. ++ If this is done, the old level and strategy will be applied to the data ++ compressed before deflateParams(), and the new level and strategy will be ++ applied to the data compressed after deflateParams(). ++ ++ deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream ++ state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if ++ there was not enough output space to complete the compression of the ++ available input data before a change in the strategy or approach. Note that ++ in the case of a Z_BUF_ERROR, the parameters are not changed. A return ++ value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be ++ retried with more output space. ++*/ ++ ++Z_EXTERN int Z_EXPORT deflateTune(z_stream *strm, int good_length, int max_lazy, int nice_length, int max_chain); ++/* ++ Fine tune deflate's internal compression parameters. This should only be ++ used by someone who understands the algorithm used by zlib's deflate for ++ searching for the best matching string, and even then only by the most ++ fanatic optimizer trying to squeeze out the last compressed bit for their ++ specific input data. Read the deflate.c source code for the meaning of the ++ max_lazy, good_length, nice_length, and max_chain parameters. ++ ++ deflateTune() can be called after deflateInit() or deflateInit2(), and ++ returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. ++ */ ++ ++Z_EXTERN unsigned long Z_EXPORT deflateBound(z_stream *strm, unsigned long sourceLen); ++/* ++ deflateBound() returns an upper bound on the compressed size after ++ deflation of sourceLen bytes. It must be called after deflateInit() or ++ deflateInit2(), and after deflateSetHeader(), if used. This would be used ++ to allocate an output buffer for deflation in a single pass, and so would be ++ called before deflate(). If that first deflate() call is provided the ++ sourceLen input bytes, an output buffer allocated to the size returned by ++ deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed ++ to return Z_STREAM_END. Note that it is possible for the compressed size to ++ be larger than the value returned by deflateBound() if flush options other ++ than Z_FINISH or Z_NO_FLUSH are used. ++*/ ++ ++Z_EXTERN int Z_EXPORT deflatePending(z_stream *strm, uint32_t *pending, int *bits); ++/* ++ deflatePending() returns the number of bytes and bits of output that have ++ been generated, but not yet provided in the available output. The bytes not ++ provided would be due to the available output space having being consumed. ++ The number of bits of output not provided are between 0 and 7, where they ++ await more bits to join them in order to fill out a full byte. If pending ++ or bits are NULL, then those values are not set. ++ ++ deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent. ++ */ ++ ++Z_EXTERN int Z_EXPORT deflatePrime(z_stream *strm, int bits, int value); ++/* ++ deflatePrime() inserts bits in the deflate output stream. The intent ++ is that this function is used to start off the deflate output with the bits ++ leftover from a previous deflate stream when appending to it. As such, this ++ function can only be used for raw deflate, and must be used before the first ++ deflate() call after a deflateInit2() or deflateReset(). bits must be less ++ than or equal to 16, and that many of the least significant bits of value ++ will be inserted in the output. ++ ++ deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough ++ room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the ++ source stream state was inconsistent. ++*/ ++ ++Z_EXTERN int Z_EXPORT deflateSetHeader(z_stream *strm, gz_headerp head); ++/* ++ deflateSetHeader() provides gzip header information for when a gzip ++ stream is requested by deflateInit2(). deflateSetHeader() may be called ++ after deflateInit2() or deflateReset() and before the first call of ++ deflate(). The text, time, os, extra field, name, and comment information ++ in the provided gz_header structure are written to the gzip header (xflag is ++ ignored -- the extra flags are set according to the compression level). The ++ caller must assure that, if not NULL, name and comment are terminated with ++ a zero byte, and that if extra is not NULL, that extra_len bytes are ++ available there. If hcrc is true, a gzip header crc is included. Note that ++ the current versions of the command-line version of gzip (up through version ++ 1.3.x) do not support header crc's, and will report that it is a "multi-part ++ gzip file" and give up. ++ ++ If deflateSetHeader is not used, the default gzip header has text false, ++ the time set to zero, and os set to the current operating system, with no ++ extra, name, or comment fields. The gzip header is returned to the default ++ state by deflateReset(). ++ ++ deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent. ++*/ ++ ++/* ++Z_EXTERN int Z_EXPORT inflateInit2(z_stream *strm, int windowBits); ++ ++ This is another version of inflateInit with an extra parameter. The ++ fields next_in, avail_in, zalloc, zfree and opaque must be initialized ++ before by the caller. ++ ++ The windowBits parameter is the base two logarithm of the maximum window ++ size (the size of the history buffer). It should be in the range 8..15 for ++ this version of the library. The default value is 15 if inflateInit is used ++ instead. windowBits must be greater than or equal to the windowBits value ++ provided to deflateInit2() while compressing, or it must be equal to 15 if ++ deflateInit2() was not used. If a compressed stream with a larger window ++ size is given as input, inflate() will return with the error code ++ Z_DATA_ERROR instead of trying to allocate a larger window. ++ ++ windowBits can also be zero to request that inflate use the window size in ++ the zlib header of the compressed stream. ++ ++ windowBits can also be -8..-15 for raw inflate. In this case, -windowBits ++ determines the window size. inflate() will then process raw deflate data, ++ not looking for a zlib or gzip header, not generating a check value, and not ++ looking for any check values for comparison at the end of the stream. This ++ is for use with other formats that use the deflate compressed data format ++ such as zip. Those formats provide their own check values. If a custom ++ format is developed using the raw deflate format for compressed data, it is ++ recommended that a check value such as an Adler-32 or a CRC-32 be applied to ++ the uncompressed data as is done in the zlib, gzip, and zip formats. For ++ most applications, the zlib format should be used as is. Note that comments ++ above on the use in deflateInit2() applies to the magnitude of windowBits. ++ ++ windowBits can also be greater than 15 for optional gzip decoding. Add ++ 32 to windowBits to enable zlib and gzip decoding with automatic header ++ detection, or add 16 to decode only the gzip format (the zlib format will ++ return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a ++ CRC-32 instead of an Adler-32. Unlike the gunzip utility and gzread() (see ++ below), inflate() will *not* automatically decode concatenated gzip members. ++ inflate() will return Z_STREAM_END at the end of the gzip member. The state ++ would need to be reset to continue decoding a subsequent gzip member. This ++ *must* be done if there is more data after a gzip member, in order for the ++ decompression to be compliant with the gzip standard (RFC 1952). ++ ++ inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough ++ memory, Z_VERSION_ERROR if the zlib library version is incompatible with the ++ version assumed by the caller, or Z_STREAM_ERROR if the parameters are ++ invalid, such as a null pointer to the structure. msg is set to null if ++ there is no error message. inflateInit2 does not perform any decompression ++ apart from possibly reading the zlib header if present: actual decompression ++ will be done by inflate(). (So next_in and avail_in may be modified, but ++ next_out and avail_out are unused and unchanged.) The current implementation ++ of inflateInit2() does not process any header information -- that is ++ deferred until inflate() is called. ++*/ ++ ++Z_EXTERN int Z_EXPORT inflateSetDictionary(z_stream *strm, const unsigned char *dictionary, unsigned int dictLength); ++/* ++ Initializes the decompression dictionary from the given uncompressed byte ++ sequence. This function must be called immediately after a call of inflate, ++ if that call returned Z_NEED_DICT. The dictionary chosen by the compressor ++ can be determined from the Adler-32 value returned by that call of inflate. ++ The compressor and decompressor must use exactly the same dictionary (see ++ deflateSetDictionary). For raw inflate, this function can be called at any ++ time to set the dictionary. If the provided dictionary is smaller than the ++ window and there is already data in the window, then the provided dictionary ++ will amend what's there. The application must insure that the dictionary ++ that was used for compression is provided. ++ ++ inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a ++ parameter is invalid (e.g. dictionary being NULL) or the stream state is ++ inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the ++ expected one (incorrect Adler-32 value). inflateSetDictionary does not ++ perform any decompression: this will be done by subsequent calls of ++ inflate(). ++*/ ++ ++Z_EXTERN int Z_EXPORT inflateGetDictionary(z_stream *strm, unsigned char *dictionary, unsigned int *dictLength); ++/* ++ Returns the sliding dictionary being maintained by inflate. dictLength is ++ set to the number of bytes in the dictionary, and that many bytes are copied ++ to dictionary. dictionary must have enough space, where 32768 bytes is ++ always enough. If inflateGetDictionary() is called with dictionary equal to ++ NULL, then only the dictionary length is returned, and nothing is copied. ++ Similarly, if dictLength is NULL, then it is not set. ++ ++ inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the ++ stream state is inconsistent. ++*/ ++ ++Z_EXTERN int Z_EXPORT inflateSync(z_stream *strm); ++/* ++ Skips invalid compressed data until a possible full flush point (see above ++ for the description of deflate with Z_FULL_FLUSH) can be found, or until all ++ available input is skipped. No output is provided. ++ ++ inflateSync searches for a 00 00 FF FF pattern in the compressed data. ++ All full flush points have this pattern, but not all occurrences of this ++ pattern are full flush points. ++ ++ inflateSync returns Z_OK if a possible full flush point has been found, ++ Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point ++ has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. ++ In the success case, the application may save the current value of ++ total_in which indicates where valid compressed data was found. In the ++ error case, the application may repeatedly call inflateSync, providing more ++ input each time, until success or end of the input data. ++*/ ++ ++Z_EXTERN int Z_EXPORT inflateCopy(z_stream *dest, z_stream *source); ++/* ++ Sets the destination stream as a complete copy of the source stream. ++ ++ This function can be useful when randomly accessing a large stream. The ++ first pass through the stream can periodically record the inflate state, ++ allowing restarting inflate at those points when randomly accessing the ++ stream. ++ ++ inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not ++ enough memory, Z_STREAM_ERROR if the source stream state was inconsistent ++ (such as zalloc being NULL). msg is left unchanged in both source and ++ destination. ++*/ ++ ++Z_EXTERN int Z_EXPORT inflateReset(z_stream *strm); ++/* ++ This function is equivalent to inflateEnd followed by inflateInit, ++ but does not free and reallocate the internal decompression state. The ++ stream will keep attributes that may have been set by inflateInit2. ++ total_in, total_out, adler, and msg are initialized. ++ ++ inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent (such as zalloc or state being NULL). ++*/ ++ ++Z_EXTERN int Z_EXPORT inflateReset2(z_stream *strm, int windowBits); ++/* ++ This function is the same as inflateReset, but it also permits changing ++ the wrap and window size requests. The windowBits parameter is interpreted ++ the same as it is for inflateInit2. If the window size is changed, then the ++ memory allocated for the window is freed, and the window will be reallocated ++ by inflate() if needed. ++ ++ inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent (such as zalloc or state being NULL), or if ++ the windowBits parameter is invalid. ++*/ ++ ++Z_EXTERN int Z_EXPORT inflatePrime(z_stream *strm, int bits, int value); ++/* ++ This function inserts bits in the inflate input stream. The intent is ++ that this function is used to start inflating at a bit position in the ++ middle of a byte. The provided bits will be used before any bytes are used ++ from next_in. This function should only be used with raw inflate, and ++ should be used before the first inflate() call after inflateInit2() or ++ inflateReset(). bits must be less than or equal to 16, and that many of the ++ least significant bits of value will be inserted in the input. ++ ++ If bits is negative, then the input stream bit buffer is emptied. Then ++ inflatePrime() can be called again to put bits in the buffer. This is used ++ to clear out bits leftover after feeding inflate a block description prior ++ to feeding inflate codes. ++ ++ inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent. ++*/ ++ ++Z_EXTERN long Z_EXPORT inflateMark(z_stream *strm); ++/* ++ This function returns two values, one in the lower 16 bits of the return ++ value, and the other in the remaining upper bits, obtained by shifting the ++ return value down 16 bits. If the upper value is -1 and the lower value is ++ zero, then inflate() is currently decoding information outside of a block. ++ If the upper value is -1 and the lower value is non-zero, then inflate is in ++ the middle of a stored block, with the lower value equaling the number of ++ bytes from the input remaining to copy. If the upper value is not -1, then ++ it is the number of bits back from the current bit position in the input of ++ the code (literal or length/distance pair) currently being processed. In ++ that case the lower value is the number of bytes already emitted for that ++ code. ++ ++ A code is being processed if inflate is waiting for more input to complete ++ decoding of the code, or if it has completed decoding but is waiting for ++ more output space to write the literal or match data. ++ ++ inflateMark() is used to mark locations in the input data for random ++ access, which may be at bit positions, and to note those cases where the ++ output of a code may span boundaries of random access blocks. The current ++ location in the input stream can be determined from avail_in and data_type ++ as noted in the description for the Z_BLOCK flush parameter for inflate. ++ ++ inflateMark returns the value noted above, or -65536 if the provided ++ source stream state was inconsistent. ++*/ ++ ++Z_EXTERN int Z_EXPORT inflateGetHeader(z_stream *strm, gz_headerp head); ++/* ++ inflateGetHeader() requests that gzip header information be stored in the ++ provided gz_header structure. inflateGetHeader() may be called after ++ inflateInit2() or inflateReset(), and before the first call of inflate(). ++ As inflate() processes the gzip stream, head->done is zero until the header ++ is completed, at which time head->done is set to one. If a zlib stream is ++ being decoded, then head->done is set to -1 to indicate that there will be ++ no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be ++ used to force inflate() to return immediately after header processing is ++ complete and before any actual data is decompressed. ++ ++ The text, time, xflags, and os fields are filled in with the gzip header ++ contents. hcrc is set to true if there is a header CRC. (The header CRC ++ was valid if done is set to one.) If extra is not NULL, then extra_max ++ contains the maximum number of bytes to write to extra. Once done is true, ++ extra_len contains the actual extra field length, and extra contains the ++ extra field, or that field truncated if extra_max is less than extra_len. ++ If name is not NULL, then up to name_max characters are written there, ++ terminated with a zero unless the length is greater than name_max. If ++ comment is not NULL, then up to comm_max characters are written there, ++ terminated with a zero unless the length is greater than comm_max. When any ++ of extra, name, or comment are not NULL and the respective field is not ++ present in the header, then that field is set to NULL to signal its ++ absence. This allows the use of deflateSetHeader() with the returned ++ structure to duplicate the header. However if those fields are set to ++ allocated memory, then the application will need to save those pointers ++ elsewhere so that they can be eventually freed. ++ ++ If inflateGetHeader is not used, then the header information is simply ++ discarded. The header is always checked for validity, including the header ++ CRC if present. inflateReset() will reset the process to discard the header ++ information. The application would need to call inflateGetHeader() again to ++ retrieve the header from the next gzip stream. ++ ++ inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source ++ stream state was inconsistent. ++*/ ++ ++/* ++Z_EXTERN int Z_EXPORT inflateBackInit (z_stream *strm, int windowBits, unsigned char *window); ++ ++ Initialize the internal stream state for decompression using inflateBack() ++ calls. The fields zalloc, zfree and opaque in strm must be initialized ++ before the call. If zalloc and zfree are NULL, then the default library- ++ derived memory allocation routines are used. windowBits is the base two ++ logarithm of the window size, in the range 8..15. window is a caller ++ supplied buffer of that size. Except for special applications where it is ++ assured that deflate was used with small window sizes, windowBits must be 15 ++ and a 32K byte window must be supplied to be able to decompress general ++ deflate streams. ++ ++ See inflateBack() for the usage of these routines. ++ ++ inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of ++ the parameters are invalid, Z_MEM_ERROR if the internal state could not be ++ allocated, or Z_VERSION_ERROR if the version of the library does not match ++ the version of the header file. ++*/ ++ ++typedef uint32_t (*in_func) (void *, z_const unsigned char * *); ++typedef int (*out_func) (void *, unsigned char *, uint32_t); ++ ++Z_EXTERN int Z_EXPORT inflateBack(z_stream *strm, in_func in, void *in_desc, out_func out, void *out_desc); ++/* ++ inflateBack() does a raw inflate with a single call using a call-back ++ interface for input and output. This is potentially more efficient than ++ inflate() for file i/o applications, in that it avoids copying between the ++ output and the sliding window by simply making the window itself the output ++ buffer. inflate() can be faster on modern CPUs when used with large ++ buffers. inflateBack() trusts the application to not change the output ++ buffer passed by the output function, at least until inflateBack() returns. ++ ++ inflateBackInit() must be called first to allocate the internal state ++ and to initialize the state with the user-provided window buffer. ++ inflateBack() may then be used multiple times to inflate a complete, raw ++ deflate stream with each call. inflateBackEnd() is then called to free the ++ allocated state. ++ ++ A raw deflate stream is one with no zlib or gzip header or trailer. ++ This routine would normally be used in a utility that reads zip or gzip ++ files and writes out uncompressed files. The utility would decode the ++ header and process the trailer on its own, hence this routine expects only ++ the raw deflate stream to decompress. This is different from the default ++ behavior of inflate(), which expects a zlib header and trailer around the ++ deflate stream. ++ ++ inflateBack() uses two subroutines supplied by the caller that are then ++ called by inflateBack() for input and output. inflateBack() calls those ++ routines until it reads a complete deflate stream and writes out all of the ++ uncompressed data, or until it encounters an error. The function's ++ parameters and return types are defined above in the in_func and out_func ++ typedefs. inflateBack() will call in(in_desc, &buf) which should return the ++ number of bytes of provided input, and a pointer to that input in buf. If ++ there is no input available, in() must return zero -- buf is ignored in that ++ case -- and inflateBack() will return a buffer error. inflateBack() will ++ call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. ++ out() should return zero on success, or non-zero on failure. If out() ++ returns non-zero, inflateBack() will return with an error. Neither in() nor ++ out() are permitted to change the contents of the window provided to ++ inflateBackInit(), which is also the buffer that out() uses to write from. ++ The length written by out() will be at most the window size. Any non-zero ++ amount of input may be provided by in(). ++ ++ For convenience, inflateBack() can be provided input on the first call by ++ setting strm->next_in and strm->avail_in. If that input is exhausted, then ++ in() will be called. Therefore strm->next_in must be initialized before ++ calling inflateBack(). If strm->next_in is NULL, then in() will be called ++ immediately for input. If strm->next_in is not NULL, then strm->avail_in ++ must also be initialized, and then if strm->avail_in is not zero, input will ++ initially be taken from strm->next_in[0 .. strm->avail_in - 1]. ++ ++ The in_desc and out_desc parameters of inflateBack() is passed as the ++ first parameter of in() and out() respectively when they are called. These ++ descriptors can be optionally used to pass any information that the caller- ++ supplied in() and out() functions need to do their job. ++ ++ On return, inflateBack() will set strm->next_in and strm->avail_in to ++ pass back any unused input that was provided by the last in() call. The ++ return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR ++ if in() or out() returned an error, Z_DATA_ERROR if there was a format error ++ in the deflate stream (in which case strm->msg is set to indicate the nature ++ of the error), or Z_STREAM_ERROR if the stream was not properly initialized. ++ In the case of Z_BUF_ERROR, an input or output error can be distinguished ++ using strm->next_in which will be NULL only if in() returned an error. If ++ strm->next_in is not NULL, then the Z_BUF_ERROR was due to out() returning ++ non-zero. (in() will always be called before out(), so strm->next_in is ++ assured to be defined if out() returns non-zero.) Note that inflateBack() ++ cannot return Z_OK. ++*/ ++ ++Z_EXTERN int Z_EXPORT inflateBackEnd(z_stream *strm); ++/* ++ All memory allocated by inflateBackInit() is freed. ++ ++ inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream ++ state was inconsistent. ++*/ ++ ++Z_EXTERN unsigned long Z_EXPORT zlibCompileFlags(void); ++/* Return flags indicating compile-time options. ++ ++ Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: ++ 1.0: size of unsigned int ++ 3.2: size of unsigned long ++ 5.4: size of void * (pointer) ++ 7.6: size of z_off_t ++ ++ Compiler, assembler, and debug options: ++ 8: ZLIB_DEBUG ++ 9: ASMV or ASMINF -- use ASM code ++ 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention ++ 11: 0 (reserved) ++ ++ One-time table building (smaller code, but not thread-safe if true): ++ 12: BUILDFIXED -- build static block decoding tables when needed (not supported by zlib-ng) ++ 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed ++ 14,15: 0 (reserved) ++ ++ Library content (indicates missing functionality): ++ 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking ++ deflate code when not needed) ++ 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect ++ and decode gzip streams (to avoid linking crc code) ++ 18-19: 0 (reserved) ++ ++ Operation variations (changes in library functionality): ++ 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate ++ 21: FASTEST -- deflate algorithm with only one, lowest compression level ++ 22,23: 0 (reserved) ++ ++ The sprintf variant used by gzprintf (zero is best): ++ 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format ++ 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! ++ 26: 0 = returns value, 1 = void -- 1 means inferred string length returned ++ ++ Remainder: ++ 27-31: 0 (reserved) ++ */ ++ ++ ++#ifndef Z_SOLO ++ ++ /* utility functions */ ++ ++/* ++ The following utility functions are implemented on top of the basic ++ stream-oriented functions. To simplify the interface, some default options ++ are assumed (compression level and memory usage, standard memory allocation ++ functions). The source code of these utility functions can be modified if ++ you need special options. ++*/ ++ ++Z_EXTERN int Z_EXPORT compress(unsigned char *dest, unsigned long *destLen, const unsigned char *source, unsigned long sourceLen); ++/* ++ Compresses the source buffer into the destination buffer. sourceLen is ++ the byte length of the source buffer. Upon entry, destLen is the total size ++ of the destination buffer, which must be at least the value returned by ++ compressBound(sourceLen). Upon exit, destLen is the actual size of the ++ compressed data. compress() is equivalent to compress2() with a level ++ parameter of Z_DEFAULT_COMPRESSION. ++ ++ compress returns Z_OK if success, Z_MEM_ERROR if there was not ++ enough memory, Z_BUF_ERROR if there was not enough room in the output ++ buffer. ++*/ ++ ++Z_EXTERN int Z_EXPORT compress2(unsigned char *dest, unsigned long *destLen, const unsigned char *source, ++ unsigned long sourceLen, int level); ++/* ++ Compresses the source buffer into the destination buffer. The level ++ parameter has the same meaning as in deflateInit. sourceLen is the byte ++ length of the source buffer. Upon entry, destLen is the total size of the ++ destination buffer, which must be at least the value returned by ++ compressBound(sourceLen). Upon exit, destLen is the actual size of the ++ compressed data. ++ ++ compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough ++ memory, Z_BUF_ERROR if there was not enough room in the output buffer, ++ Z_STREAM_ERROR if the level parameter is invalid. ++*/ ++ ++Z_EXTERN unsigned long Z_EXPORT compressBound(unsigned long sourceLen); ++/* ++ compressBound() returns an upper bound on the compressed size after ++ compress() or compress2() on sourceLen bytes. It would be used before a ++ compress() or compress2() call to allocate the destination buffer. ++*/ ++ ++Z_EXTERN int Z_EXPORT uncompress(unsigned char *dest, unsigned long *destLen, const unsigned char *source, unsigned long sourceLen); ++/* ++ Decompresses the source buffer into the destination buffer. sourceLen is ++ the byte length of the source buffer. Upon entry, destLen is the total size ++ of the destination buffer, which must be large enough to hold the entire ++ uncompressed data. (The size of the uncompressed data must have been saved ++ previously by the compressor and transmitted to the decompressor by some ++ mechanism outside the scope of this compression library.) Upon exit, destLen ++ is the actual size of the uncompressed data. ++ ++ uncompress returns Z_OK if success, Z_MEM_ERROR if there was not ++ enough memory, Z_BUF_ERROR if there was not enough room in the output ++ buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In ++ the case where there is not enough room, uncompress() will fill the output ++ buffer with the uncompressed data up to that point. ++*/ ++ ++ ++Z_EXTERN int Z_EXPORT uncompress2 (unsigned char *dest, unsigned long *destLen, ++ const unsigned char *source, unsigned long *sourceLen); ++/* ++ Same as uncompress, except that sourceLen is a pointer, where the ++ length of the source is *sourceLen. On return, *sourceLen is the number of ++ source bytes consumed. ++*/ ++ ++ ++ /* gzip file access functions */ ++ ++/* ++ This library supports reading and writing files in gzip (.gz) format with ++ an interface similar to that of stdio, using the functions that start with ++ "gz". The gzip format is different from the zlib format. gzip is a gzip ++ wrapper, documented in RFC 1952, wrapped around a deflate stream. ++*/ ++ ++typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ ++ ++/* ++Z_EXTERN gzFile Z_EXPORT gzopen(const char *path, const char *mode); ++ ++ Open the gzip (.gz) file at path for reading and decompressing, or ++ compressing and writing. The mode parameter is as in fopen ("rb" or "wb") ++ but can also include a compression level ("wb9") or a strategy: 'f' for ++ filtered data as in "wb6f", 'h' for Huffman-only compression as in "wb1h", ++ 'R' for run-length encoding as in "wb1R", or 'F' for fixed code compression ++ as in "wb9F". (See the description of deflateInit2 for more information ++ about the strategy parameter.) 'T' will request transparent writing or ++ appending with no compression and not using the gzip format. ++ ++ "a" can be used instead of "w" to request that the gzip stream that will ++ be written be appended to the file. "+" will result in an error, since ++ reading and writing to the same gzip file is not supported. The addition of ++ "x" when writing will create the file exclusively, which fails if the file ++ already exists. On systems that support it, the addition of "e" when ++ reading or writing will set the flag to close the file on an execve() call. ++ ++ These functions, as well as gzip, will read and decode a sequence of gzip ++ streams in a file. The append function of gzopen() can be used to create ++ such a file. (Also see gzflush() for another way to do this.) When ++ appending, gzopen does not test whether the file begins with a gzip stream, ++ nor does it look for the end of the gzip streams to begin appending. gzopen ++ will simply append a gzip stream to the existing file. ++ ++ gzopen can be used to read a file which is not in gzip format; in this ++ case gzread will directly read from the file without decompression. When ++ reading, this will be detected automatically by looking for the magic two- ++ byte gzip header. ++ ++ gzopen returns NULL if the file could not be opened, if there was ++ insufficient memory to allocate the gzFile state, or if an invalid mode was ++ specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). ++ errno can be checked to determine if the reason gzopen failed was that the ++ file could not be opened. ++*/ ++ ++Z_EXTERN gzFile Z_EXPORT gzdopen(int fd, const char *mode); ++/* ++ Associate a gzFile with the file descriptor fd. File descriptors are ++ obtained from calls like open, dup, creat, pipe or fileno (if the file has ++ been previously opened with fopen). The mode parameter is as in gzopen. ++ ++ The next call of gzclose on the returned gzFile will also close the file ++ descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor ++ fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, ++ mode);. The duplicated descriptor should be saved to avoid a leak, since ++ gzdopen does not close fd if it fails. If you are using fileno() to get the ++ file descriptor from a FILE *, then you will have to use dup() to avoid ++ double-close()ing the file descriptor. Both gzclose() and fclose() will ++ close the associated file descriptor, so they need to have different file ++ descriptors. ++ ++ gzdopen returns NULL if there was insufficient memory to allocate the ++ gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not ++ provided, or '+' was provided), or if fd is -1. The file descriptor is not ++ used until the next gz* read, write, seek, or close operation, so gzdopen ++ will not detect if fd is invalid (unless fd is -1). ++*/ ++ ++Z_EXTERN int Z_EXPORT gzbuffer(gzFile file, unsigned size); ++/* ++ Set the internal buffer size used by this library's functions for file to ++ size. The default buffer size is 8192 bytes. This function must be called ++ after gzopen() or gzdopen(), and before any other calls that read or write ++ the file. The buffer memory allocation is always deferred to the first read ++ or write. Three times that size in buffer space is allocated. A larger ++ buffer size of, for example, 64K or 128K bytes will noticeably increase the ++ speed of decompression (reading). ++ ++ The new buffer size also affects the maximum length for gzprintf(). ++ ++ gzbuffer() returns 0 on success, or -1 on failure, such as being called ++ too late. ++*/ ++ ++Z_EXTERN int Z_EXPORT gzsetparams(gzFile file, int level, int strategy); ++/* ++ Dynamically update the compression level and strategy for file. See the ++ description of deflateInit2 for the meaning of these parameters. Previously ++ provided data is flushed before applying the parameter changes. ++ ++ gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not ++ opened for writing, Z_ERRNO if there is an error writing the flushed data, ++ or Z_MEM_ERROR if there is a memory allocation error. ++*/ ++ ++Z_EXTERN int Z_EXPORT gzread(gzFile file, void *buf, unsigned len); ++/* ++ Read and decompress up to len uncompressed bytes from file into buf. If ++ the input file is not in gzip format, gzread copies the given number of ++ bytes into the buffer directly from the file. ++ ++ After reaching the end of a gzip stream in the input, gzread will continue ++ to read, looking for another gzip stream. Any number of gzip streams may be ++ concatenated in the input file, and will all be decompressed by gzread(). ++ If something other than a gzip stream is encountered after a gzip stream, ++ that remaining trailing garbage is ignored (and no error is returned). ++ ++ gzread can be used to read a gzip file that is being concurrently written. ++ Upon reaching the end of the input, gzread will return with the available ++ data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then ++ gzclearerr can be used to clear the end of file indicator in order to permit ++ gzread to be tried again. Z_OK indicates that a gzip stream was completed ++ on the last gzread. Z_BUF_ERROR indicates that the input file ended in the ++ middle of a gzip stream. Note that gzread does not return -1 in the event ++ of an incomplete gzip stream. This error is deferred until gzclose(), which ++ will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip ++ stream. Alternatively, gzerror can be used before gzclose to detect this ++ case. ++ ++ gzread returns the number of uncompressed bytes actually read, less than ++ len for end of file, or -1 for error. If len is too large to fit in an int, ++ then nothing is read, -1 is returned, and the error state is set to ++ Z_STREAM_ERROR. ++*/ ++ ++Z_EXTERN size_t Z_EXPORT gzfread (void *buf, size_t size, size_t nitems, gzFile file); ++/* ++ Read and decompress up to nitems items of size size from file into buf, ++ otherwise operating as gzread() does. This duplicates the interface of ++ stdio's fread(), with size_t request and return types. If the library ++ defines size_t, then z_size_t is identical to size_t. If not, then z_size_t ++ is an unsigned integer type that can contain a pointer. ++ ++ gzfread() returns the number of full items read of size size, or zero if ++ the end of the file was reached and a full item could not be read, or if ++ there was an error. gzerror() must be consulted if zero is returned in ++ order to determine if there was an error. If the multiplication of size and ++ nitems overflows, i.e. the product does not fit in a size_t, then nothing ++ is read, zero is returned, and the error state is set to Z_STREAM_ERROR. ++ ++ In the event that the end of file is reached and only a partial item is ++ available at the end, i.e. the remaining uncompressed data length is not a ++ multiple of size, then the final partial item is nevertheless read into buf ++ and the end-of-file flag is set. The length of the partial item read is not ++ provided, but could be inferred from the result of gztell(). This behavior ++ is the same as the behavior of fread() implementations in common libraries, ++ but it prevents the direct use of gzfread() to read a concurrently written ++ file, resetting and retrying on end-of-file, when size is not 1. ++*/ ++ ++Z_EXTERN int Z_EXPORT gzwrite(gzFile file, void const *buf, unsigned len); ++/* ++ Compress and write the len uncompressed bytes at buf to file. gzwrite ++ returns the number of uncompressed bytes written or 0 in case of error. ++*/ ++ ++Z_EXTERN size_t Z_EXPORT gzfwrite(void const *buf, size_t size, size_t nitems, gzFile file); ++/* ++ Compress and write nitems items of size size from buf to file, duplicating ++ the interface of stdio's fwrite(), with size_t request and return types. ++ ++ gzfwrite() returns the number of full items written of size size, or zero ++ if there was an error. If the multiplication of size and nitems overflows, ++ i.e. the product does not fit in a size_t, then nothing is written, zero ++ is returned, and the error state is set to Z_STREAM_ERROR. ++*/ ++ ++Z_EXTERN int Z_EXPORTVA gzprintf(gzFile file, const char *format, ...); ++/* ++ Convert, format, compress, and write the arguments (...) to file under ++ control of the string format, as in fprintf. gzprintf returns the number of ++ uncompressed bytes actually written, or a negative zlib error code in case ++ of error. The number of uncompressed bytes written is limited to 8191, or ++ one less than the buffer size given to gzbuffer(). The caller should assure ++ that this limit is not exceeded. If it is exceeded, then gzprintf() will ++ return an error (0) with nothing written. In this case, there may also be a ++ buffer overflow with unpredictable consequences, which is possible only if ++ zlib was compiled with the insecure functions sprintf() or vsprintf(), ++ because the secure snprintf() or vsnprintf() functions were not available. ++ This can be determined using zlibCompileFlags(). ++*/ ++ ++Z_EXTERN int Z_EXPORT gzputs(gzFile file, const char *s); ++/* ++ Compress and write the given null-terminated string s to file, excluding ++ the terminating null character. ++ ++ gzputs returns the number of characters written, or -1 in case of error. ++*/ ++ ++Z_EXTERN char * Z_EXPORT gzgets(gzFile file, char *buf, int len); ++/* ++ Read and decompress bytes from file into buf, until len-1 characters are ++ read, or until a newline character is read and transferred to buf, or an ++ end-of-file condition is encountered. If any characters are read or if len ++ is one, the string is terminated with a null character. If no characters ++ are read due to an end-of-file or len is less than one, then the buffer is ++ left untouched. ++ ++ gzgets returns buf which is a null-terminated string, or it returns NULL ++ for end-of-file or in case of error. If there was an error, the contents at ++ buf are indeterminate. ++*/ ++ ++Z_EXTERN int Z_EXPORT gzputc(gzFile file, int c); ++/* ++ Compress and write c, converted to an unsigned char, into file. gzputc ++ returns the value that was written, or -1 in case of error. ++*/ ++ ++Z_EXTERN int Z_EXPORT gzgetc(gzFile file); ++/* ++ Read and decompress one byte from file. gzgetc returns this byte or -1 ++ in case of end of file or error. This is implemented as a macro for speed. ++ As such, it does not do all of the checking the other functions do. I.e. ++ it does not check to see if file is NULL, nor whether the structure file ++ points to has been clobbered or not. ++*/ ++ ++Z_EXTERN int Z_EXPORT gzungetc(int c, gzFile file); ++/* ++ Push c back onto the stream for file to be read as the first character on ++ the next read. At least one character of push-back is always allowed. ++ gzungetc() returns the character pushed, or -1 on failure. gzungetc() will ++ fail if c is -1, and may fail if a character has been pushed but not read ++ yet. If gzungetc is used immediately after gzopen or gzdopen, at least the ++ output buffer size of pushed characters is allowed. (See gzbuffer above.) ++ The pushed character will be discarded if the stream is repositioned with ++ gzseek() or gzrewind(). ++*/ ++ ++Z_EXTERN int Z_EXPORT gzflush(gzFile file, int flush); ++/* ++ Flush all pending output to file. The parameter flush is as in the ++ deflate() function. The return value is the zlib error number (see function ++ gzerror below). gzflush is only permitted when writing. ++ ++ If the flush parameter is Z_FINISH, the remaining data is written and the ++ gzip stream is completed in the output. If gzwrite() is called again, a new ++ gzip stream will be started in the output. gzread() is able to read such ++ concatenated gzip streams. ++ ++ gzflush should be called only when strictly necessary because it will ++ degrade compression if called too often. ++*/ ++ ++/* ++Z_EXTERN z_off_t Z_EXPORT gzseek (gzFile file, z_off_t offset, int whence); ++ ++ Set the starting position to offset relative to whence for the next gzread ++ or gzwrite on file. The offset represents a number of bytes in the ++ uncompressed data stream. The whence parameter is defined as in lseek(2); ++ the value SEEK_END is not supported. ++ ++ If the file is opened for reading, this function is emulated but can be ++ extremely slow. If the file is opened for writing, only forward seeks are ++ supported; gzseek then compresses a sequence of zeroes up to the new ++ starting position. ++ ++ gzseek returns the resulting offset location as measured in bytes from ++ the beginning of the uncompressed stream, or -1 in case of error, in ++ particular if the file is opened for writing and the new starting position ++ would be before the current position. ++*/ ++ ++Z_EXTERN int Z_EXPORT gzrewind(gzFile file); ++/* ++ Rewind file. This function is supported only for reading. ++ ++ gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET). ++*/ ++ ++/* ++Z_EXTERN z_off_t Z_EXPORT gztell(gzFile file); ++ ++ Return the starting position for the next gzread or gzwrite on file. ++ This position represents a number of bytes in the uncompressed data stream, ++ and is zero when starting, even if appending or reading a gzip stream from ++ the middle of a file using gzdopen(). ++ ++ gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) ++*/ ++ ++/* ++Z_EXTERN z_off_t Z_EXPORT gzoffset(gzFile file); ++ ++ Return the current compressed (actual) read or write offset of file. This ++ offset includes the count of bytes that precede the gzip stream, for example ++ when appending or when using gzdopen() for reading. When reading, the ++ offset does not include as yet unused buffered input. This information can ++ be used for a progress indicator. On error, gzoffset() returns -1. ++*/ ++ ++Z_EXTERN int Z_EXPORT gzeof(gzFile file); ++/* ++ Return true (1) if the end-of-file indicator for file has been set while ++ reading, false (0) otherwise. Note that the end-of-file indicator is set ++ only if the read tried to go past the end of the input, but came up short. ++ Therefore, just like feof(), gzeof() may return false even if there is no ++ more data to read, in the event that the last read request was for the exact ++ number of bytes remaining in the input file. This will happen if the input ++ file size is an exact multiple of the buffer size. ++ ++ If gzeof() returns true, then the read functions will return no more data, ++ unless the end-of-file indicator is reset by gzclearerr() and the input file ++ has grown since the previous end of file was detected. ++*/ ++ ++Z_EXTERN int Z_EXPORT gzdirect(gzFile file); ++/* ++ Return true (1) if file is being copied directly while reading, or false ++ (0) if file is a gzip stream being decompressed. ++ ++ If the input file is empty, gzdirect() will return true, since the input ++ does not contain a gzip stream. ++ ++ If gzdirect() is used immediately after gzopen() or gzdopen() it will ++ cause buffers to be allocated to allow reading the file to determine if it ++ is a gzip file. Therefore if gzbuffer() is used, it should be called before ++ gzdirect(). ++ ++ When writing, gzdirect() returns true (1) if transparent writing was ++ requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: ++ gzdirect() is not needed when writing. Transparent writing must be ++ explicitly requested, so the application already knows the answer. When ++ linking statically, using gzdirect() will include all of the zlib code for ++ gzip file reading and decompression, which may not be desired.) ++*/ ++ ++Z_EXTERN int Z_EXPORT gzclose(gzFile file); ++/* ++ Flush all pending output for file, if necessary, close file and ++ deallocate the (de)compression state. Note that once file is closed, you ++ cannot call gzerror with file, since its structures have been deallocated. ++ gzclose must not be called more than once on the same file, just as free ++ must not be called more than once on the same allocation. ++ ++ gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a ++ file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the ++ last read ended in the middle of a gzip stream, or Z_OK on success. ++*/ ++ ++Z_EXTERN int Z_EXPORT gzclose_r(gzFile file); ++Z_EXTERN int Z_EXPORT gzclose_w(gzFile file); ++/* ++ Same as gzclose(), but gzclose_r() is only for use when reading, and ++ gzclose_w() is only for use when writing or appending. The advantage to ++ using these instead of gzclose() is that they avoid linking in zlib ++ compression or decompression code that is not used when only reading or only ++ writing respectively. If gzclose() is used, then both compression and ++ decompression code will be included the application when linking to a static ++ zlib library. ++*/ ++ ++Z_EXTERN const char * Z_EXPORT gzerror(gzFile file, int *errnum); ++/* ++ Return the error message for the last error which occurred on file. ++ errnum is set to zlib error number. If an error occurred in the file system ++ and not in the compression library, errnum is set to Z_ERRNO and the ++ application may consult errno to get the exact error code. ++ ++ The application must not modify the returned string. Future calls to ++ this function may invalidate the previously returned string. If file is ++ closed, then the string previously returned by gzerror will no longer be ++ available. ++ ++ gzerror() should be used to distinguish errors from end-of-file for those ++ functions above that do not distinguish those cases in their return values. ++*/ ++ ++Z_EXTERN void Z_EXPORT gzclearerr(gzFile file); ++/* ++ Clear the error and end-of-file flags for file. This is analogous to the ++ clearerr() function in stdio. This is useful for continuing to read a gzip ++ file that is being written concurrently. ++*/ ++ ++#endif ++ ++ /* checksum functions */ ++ ++/* ++ These functions are not related to compression but are exported ++ anyway because they might be useful in applications using the compression ++ library. ++*/ ++ ++Z_EXTERN unsigned long Z_EXPORT adler32(unsigned long adler, const unsigned char *buf, unsigned int len); ++/* ++ Update a running Adler-32 checksum with the bytes buf[0..len-1] and ++ return the updated checksum. An Adler-32 value is in the range of a 32-bit ++ unsigned integer. If buf is Z_NULL, this function returns the required ++ initial value for the checksum. ++ ++ An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed ++ much faster. ++ ++ Usage example: ++ ++ uint32_t adler = adler32(0L, NULL, 0); ++ ++ while (read_buffer(buffer, length) != EOF) { ++ adler = adler32(adler, buffer, length); ++ } ++ if (adler != original_adler) error(); ++*/ ++ ++Z_EXTERN unsigned long Z_EXPORT adler32_z(unsigned long adler, const unsigned char *buf, size_t len); ++/* ++ Same as adler32(), but with a size_t length. ++*/ ++ ++/* ++Z_EXTERN unsigned long Z_EXPORT adler32_combine(unsigned long adler1, unsigned long adler2, z_off_t len2); ++ ++ Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 ++ and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for ++ each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of ++ seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note ++ that the z_off_t type (like off_t) is a signed integer. If len2 is ++ negative, the result has no meaning or utility. ++*/ ++ ++Z_EXTERN unsigned long Z_EXPORT crc32(unsigned long crc, const unsigned char *buf, unsigned int len); ++/* ++ Update a running CRC-32 with the bytes buf[0..len-1] and return the ++ updated CRC-32. A CRC-32 value is in the range of a 32-bit unsigned integer. ++ If buf is Z_NULL, this function returns the required initial value for the ++ crc. Pre- and post-conditioning (one's complement) is performed within this ++ function so it shouldn't be done by the application. ++ ++ Usage example: ++ ++ uint32_t crc = crc32(0L, NULL, 0); ++ ++ while (read_buffer(buffer, length) != EOF) { ++ crc = crc32(crc, buffer, length); ++ } ++ if (crc != original_crc) error(); ++*/ ++ ++Z_EXTERN unsigned long Z_EXPORT crc32_z(unsigned long crc, const unsigned char *buf, size_t len); ++/* ++ Same as crc32(), but with a size_t length. ++*/ ++ ++/* ++Z_EXTERN unsigned long Z_EXPORT crc32_combine(unsigned long crc1, unsigned long crc2, z_off64_t len2); ++ ++ Combine two CRC-32 check values into one. For two sequences of bytes, ++ seq1 and seq2 with lengths len1 and len2, CRC-32 check values were ++ calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 ++ check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and ++ len2. ++*/ ++ ++/* ++Z_EXTERN unsigned long Z_EXPORT crc32_combine_gen(z_off_t len2); ++ ++ Return the operator corresponding to length len2, to be used with ++ crc32_combine_op(). ++*/ ++ ++Z_EXTERN unsigned long Z_EXPORT crc32_combine_op(unsigned long crc1, unsigned long crc2, ++ const unsigned long op); ++/* ++ Give the same result as crc32_combine(), using op in place of len2. op is ++ is generated from len2 by crc32_combine_gen(). This will be faster than ++ crc32_combine() if the generated op is used more than once. ++*/ ++ ++ ++ /* various hacks, don't look :) */ ++ ++/* deflateInit and inflateInit are macros to allow checking the zlib version ++ * and the compiler's view of z_stream: ++ */ ++Z_EXTERN int Z_EXPORT deflateInit_(z_stream *strm, int level, const char *version, int stream_size); ++Z_EXTERN int Z_EXPORT inflateInit_(z_stream *strm, const char *version, int stream_size); ++Z_EXTERN int Z_EXPORT deflateInit2_(z_stream *strm, int level, int method, int windowBits, int memLevel, ++ int strategy, const char *version, int stream_size); ++Z_EXTERN int Z_EXPORT inflateInit2_(z_stream *strm, int windowBits, const char *version, int stream_size); ++Z_EXTERN int Z_EXPORT inflateBackInit_(z_stream *strm, int windowBits, unsigned char *window, ++ const char *version, int stream_size); ++#define @ZLIB_SYMBOL_PREFIX@deflateInit(strm, level) deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) ++#define @ZLIB_SYMBOL_PREFIX@inflateInit(strm) inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) ++#define @ZLIB_SYMBOL_PREFIX@deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ ++ deflateInit2_((strm), (level), (method), (windowBits), (memLevel), \ ++ (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) ++#define @ZLIB_SYMBOL_PREFIX@inflateInit2(strm, windowBits) inflateInit2_((strm), (windowBits), ZLIB_VERSION, (int)sizeof(z_stream)) ++#define @ZLIB_SYMBOL_PREFIX@inflateBackInit(strm, windowBits, window) \ ++ inflateBackInit_((strm), (windowBits), (window), ZLIB_VERSION, (int)sizeof(z_stream)) ++ ++ ++#ifndef Z_SOLO ++/* gzgetc() macro and its supporting function and exposed data structure. Note ++ * that the real internal state is much larger than the exposed structure. ++ * This abbreviated structure exposes just enough for the gzgetc() macro. The ++ * user should not mess with these exposed elements, since their names or ++ * behavior could change in the future, perhaps even capriciously. They can ++ * only be used by the gzgetc() macro. You have been warned. ++ */ ++struct gzFile_s { ++ unsigned have; ++ unsigned char *next; ++ z_off64_t pos; ++}; ++Z_EXTERN int Z_EXPORT gzgetc_(gzFile file); /* backward compatibility */ ++# define @ZLIB_SYMBOL_PREFIX@gzgetc(g) ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (@ZLIB_SYMBOL_PREFIX@gzgetc)(g)) ++ ++/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or ++ * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if ++ * both are true, the application gets the *64 functions, and the regular ++ * functions are changed to 64 bits) -- in case these are set on systems ++ * without large file support, _LFS64_LARGEFILE must also be true ++ */ ++#ifdef Z_LARGE64 ++ Z_EXTERN gzFile Z_EXPORT gzopen64(const char *, const char *); ++ Z_EXTERN z_off64_t Z_EXPORT gzseek64(gzFile, z_off64_t, int); ++ Z_EXTERN z_off64_t Z_EXPORT gztell64(gzFile); ++ Z_EXTERN z_off64_t Z_EXPORT gzoffset64(gzFile); ++ Z_EXTERN unsigned long Z_EXPORT adler32_combine64(unsigned long, unsigned long, z_off64_t); ++ Z_EXTERN unsigned long Z_EXPORT crc32_combine64(unsigned long, unsigned long, z_off64_t); ++ Z_EXTERN unsigned long Z_EXPORT crc32_combine_gen64(z_off64_t); ++#endif ++#endif ++ ++#if !defined(Z_SOLO) && !defined(Z_INTERNAL) && defined(Z_WANT64) ++# define @ZLIB_SYMBOL_PREFIX@gzopen @ZLIB_SYMBOL_PREFIX@gzopen64 ++# define @ZLIB_SYMBOL_PREFIX@gzseek @ZLIB_SYMBOL_PREFIX@gzseek64 ++# define @ZLIB_SYMBOL_PREFIX@gztell @ZLIB_SYMBOL_PREFIX@gztell64 ++# define @ZLIB_SYMBOL_PREFIX@gzoffset @ZLIB_SYMBOL_PREFIX@gzoffset64 ++# define @ZLIB_SYMBOL_PREFIX@adler32_combine @ZLIB_SYMBOL_PREFIX@adler32_combine64 ++# define @ZLIB_SYMBOL_PREFIX@crc32_combine @ZLIB_SYMBOL_PREFIX@crc32_combine64 ++# define @ZLIB_SYMBOL_PREFIX@crc32_combine_gen @ZLIB_SYMBOL_PREFIX@crc32_combine_gen64 ++# ifndef Z_LARGE64 ++ Z_EXTERN gzFile Z_EXPORT @ZLIB_SYMBOL_PREFIX@gzopen64(const char *, const char *); ++ Z_EXTERN z_off_t Z_EXPORT @ZLIB_SYMBOL_PREFIX@gzseek64(gzFile, z_off_t, int); ++ Z_EXTERN z_off_t Z_EXPORT @ZLIB_SYMBOL_PREFIX@gztell64(gzFile); ++ Z_EXTERN z_off_t Z_EXPORT @ZLIB_SYMBOL_PREFIX@gzoffset64(gzFile); ++ Z_EXTERN unsigned long Z_EXPORT @ZLIB_SYMBOL_PREFIX@adler32_combine64(unsigned long, unsigned long, z_off_t); ++ Z_EXTERN unsigned long Z_EXPORT @ZLIB_SYMBOL_PREFIX@crc32_combine64(unsigned long, unsigned long, z_off_t); ++ Z_EXTERN unsigned long Z_EXPORT @ZLIB_SYMBOL_PREFIX@crc32_combine_gen64(z_off64_t); ++# endif ++#else ++# ifndef Z_SOLO ++ Z_EXTERN gzFile Z_EXPORT @ZLIB_SYMBOL_PREFIX@gzopen(const char *, const char *); ++ Z_EXTERN z_off_t Z_EXPORT @ZLIB_SYMBOL_PREFIX@gzseek(gzFile, z_off_t, int); ++ Z_EXTERN z_off_t Z_EXPORT @ZLIB_SYMBOL_PREFIX@gztell(gzFile); ++ Z_EXTERN z_off_t Z_EXPORT @ZLIB_SYMBOL_PREFIX@gzoffset(gzFile); ++# endif ++ Z_EXTERN unsigned long Z_EXPORT @ZLIB_SYMBOL_PREFIX@adler32_combine(unsigned long, unsigned long, z_off_t); ++ Z_EXTERN unsigned long Z_EXPORT @ZLIB_SYMBOL_PREFIX@crc32_combine(unsigned long, unsigned long, z_off_t); ++ Z_EXTERN unsigned long Z_EXPORT @ZLIB_SYMBOL_PREFIX@crc32_combine_gen(z_off_t); ++#endif ++ ++/* undocumented functions */ ++Z_EXTERN const char * Z_EXPORT zError (int); ++Z_EXTERN int Z_EXPORT inflateSyncPoint (z_stream *); ++Z_EXTERN const uint32_t * Z_EXPORT get_crc_table (void); ++Z_EXTERN int Z_EXPORT inflateUndermine (z_stream *, int); ++Z_EXTERN int Z_EXPORT inflateValidate (z_stream *, int); ++Z_EXTERN unsigned long Z_EXPORT inflateCodesUsed (z_stream *); ++Z_EXTERN int Z_EXPORT inflateResetKeep (z_stream *); ++Z_EXTERN int Z_EXPORT deflateResetKeep (z_stream *); ++ ++#ifndef Z_SOLO ++#if defined(_WIN32) ++ Z_EXTERN gzFile Z_EXPORT gzopen_w(const wchar_t *path, const char *mode); ++#endif ++Z_EXTERN int Z_EXPORTVA gzvprintf(gzFile file, const char *format, va_list va); ++#endif ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* ZLIB_H_ */ +diff --git a/zlib.map b/zlib.map +index f608f2b..ebca10d 100644 +--- a/zlib.map ++++ b/zlib.map +@@ -9,13 +9,10 @@ ZLIB_1.2.0 { + local: + deflate_copyright; + inflate_copyright; +- inflate_fast; +- inflate_table; + zcalloc; + zcfree; + z_errmsg; + gz_error; +- gz_intmax; + _*; + }; + +diff --git a/zlib.pc.cmakein b/zlib.pc.cmakein +index 9b64252..df8bf9f 100644 +--- a/zlib.pc.cmakein ++++ b/zlib.pc.cmakein +@@ -1,5 +1,6 @@ + prefix=@CMAKE_INSTALL_PREFIX@ + exec_prefix=${prefix} ++symbol_prefix=@ZLIB_SYMBOL_PREFIX@ + libdir=@PC_LIB_INSTALL_DIR@ + sharedlibdir=${libdir} + includedir=@PC_INC_INSTALL_DIR@ +@@ -10,4 +11,4 @@ Version: @ZLIB_FULL_VERSION@ + + Requires: + Libs: -L${libdir} -L${sharedlibdir} -lz@SUFFIX@ +-Cflags: -I${includedir} ++Cflags: -I${includedir} @PKG_CONFIG_CFLAGS@ +diff --git a/zlib.pc.in b/zlib.pc.in +index d0a6766..45b3598 100644 +--- a/zlib.pc.in ++++ b/zlib.pc.in +@@ -1,5 +1,6 @@ + prefix=@prefix@ + exec_prefix=@exec_prefix@ ++symbol_prefix=@symbol_prefix@ + libdir=@libdir@ + sharedlibdir=@sharedlibdir@ + includedir=@includedir@ +@@ -10,4 +11,4 @@ Version: @VERSION@ + + Requires: + Libs: -L${libdir} -L${sharedlibdir} -lz@SUFFIX@ +-Cflags: -I${includedir} ++Cflags: -I${includedir} @PKG_CONFIG_CFLAGS@ +diff --git a/zlib_name_mangling-ng.h.in b/zlib_name_mangling-ng.h.in +new file mode 100644 +index 0000000..e90904a +--- /dev/null ++++ b/zlib_name_mangling-ng.h.in +@@ -0,0 +1,178 @@ ++/* zlib_name_mangling.h has been automatically generated from ++ * zlib_name_mangling.h.in because ZLIB_SYMBOL_PREFIX was set. ++ */ ++ ++#ifndef ZLIB_NAME_MANGLING_H ++#define ZLIB_NAME_MANGLING_H ++ ++/* all linked symbols and init macros */ ++#define zng__dist_code @ZLIB_SYMBOL_PREFIX@zng__dist_code ++#define zng__length_code @ZLIB_SYMBOL_PREFIX@zng__length_code ++#define zng__tr_align @ZLIB_SYMBOL_PREFIX@zng__tr_align ++#define zng__tr_flush_bits @ZLIB_SYMBOL_PREFIX@zng__tr_flush_bits ++#define zng__tr_flush_block @ZLIB_SYMBOL_PREFIX@zng__tr_flush_block ++#define zng__tr_init @ZLIB_SYMBOL_PREFIX@zng__tr_init ++#define zng__tr_stored_block @ZLIB_SYMBOL_PREFIX@zng__tr_stored_block ++#define zng__tr_tally @ZLIB_SYMBOL_PREFIX@zng__tr_tally ++#define zng_adler32 @ZLIB_SYMBOL_PREFIX@zng_adler32 ++#define zng_adler32_combine @ZLIB_SYMBOL_PREFIX@zng_adler32_combine ++#define zng_adler32_combine64 @ZLIB_SYMBOL_PREFIX@zng_adler32_combine64 ++#define zng_adler32_z @ZLIB_SYMBOL_PREFIX@zng_adler32_z ++#define zng_compress @ZLIB_SYMBOL_PREFIX@zng_compress ++#define zng_compress2 @ZLIB_SYMBOL_PREFIX@zng_compress2 ++#define zng_compressBound @ZLIB_SYMBOL_PREFIX@zng_compressBound ++#define zng_crc32 @ZLIB_SYMBOL_PREFIX@zng_crc32 ++#define zng_crc32_combine @ZLIB_SYMBOL_PREFIX@zng_crc32_combine ++#define zng_crc32_combine64 @ZLIB_SYMBOL_PREFIX@zng_crc32_combine64 ++#define zng_crc32_combine_gen @ZLIB_SYMBOL_PREFIX@zng_crc32_combine_gen ++#define zng_crc32_combine_gen64 @ZLIB_SYMBOL_PREFIX@zng_crc32_combine_gen64 ++#define zng_crc32_combine_op @ZLIB_SYMBOL_PREFIX@zng_crc32_combine_op ++#define zng_crc32_z @ZLIB_SYMBOL_PREFIX@zng_crc32_z ++#define zng_deflate @ZLIB_SYMBOL_PREFIX@zng_deflate ++#define zng_deflateBound @ZLIB_SYMBOL_PREFIX@zng_deflateBound ++#define zng_deflateCopy @ZLIB_SYMBOL_PREFIX@zng_deflateCopy ++#define zng_deflateEnd @ZLIB_SYMBOL_PREFIX@zng_deflateEnd ++#define zng_deflateGetDictionary @ZLIB_SYMBOL_PREFIX@zng_deflateGetDictionary ++#define zng_deflateInit @ZLIB_SYMBOL_PREFIX@zng_deflateInit ++#define zng_deflateInit2 @ZLIB_SYMBOL_PREFIX@zng_deflateInit2 ++#define zng_deflateInit2_ @ZLIB_SYMBOL_PREFIX@zng_deflateInit2_ ++#define zng_deflateInit_ @ZLIB_SYMBOL_PREFIX@zng_deflateInit_ ++#define zng_deflateParams @ZLIB_SYMBOL_PREFIX@zng_deflateParams ++#define zng_deflatePending @ZLIB_SYMBOL_PREFIX@zng_deflatePending ++#define zng_deflatePrime @ZLIB_SYMBOL_PREFIX@zng_deflatePrime ++#define zng_deflateReset @ZLIB_SYMBOL_PREFIX@zng_deflateReset ++#define zng_deflateResetKeep @ZLIB_SYMBOL_PREFIX@zng_deflateResetKeep ++#define zng_deflateSetDictionary @ZLIB_SYMBOL_PREFIX@zng_deflateSetDictionary ++#define zng_deflateSetHeader @ZLIB_SYMBOL_PREFIX@zng_deflateSetHeader ++#define zng_deflateTune @ZLIB_SYMBOL_PREFIX@zng_deflateTune ++#define zng_deflate_copyright @ZLIB_SYMBOL_PREFIX@zng_deflate_copyright ++#define zng_fill_window @ZLIB_SYMBOL_PREFIX@zng_fill_window ++#define zng_fixedtables @ZLIB_SYMBOL_PREFIX@zng_fixedtables ++#define zng_flush_pending @ZLIB_SYMBOL_PREFIX@zng_flush_pending ++#define zng_get_crc_table @ZLIB_SYMBOL_PREFIX@zng_get_crc_table ++#ifdef WITH_GZFILEOP ++# define zng_gz_error @ZLIB_SYMBOL_PREFIX@zng_gz_error ++# define zng_gz_strwinerror @ZLIB_SYMBOL_PREFIX@zng_gz_strwinerror ++# define zng_gzbuffer @ZLIB_SYMBOL_PREFIX@zng_gzbuffer ++# define zng_gzclearerr @ZLIB_SYMBOL_PREFIX@zng_gzclearerr ++# define zng_gzclose @ZLIB_SYMBOL_PREFIX@zng_gzclose ++# define zng_gzclose_r @ZLIB_SYMBOL_PREFIX@zng_gzclose_r ++# define zng_gzclose_w @ZLIB_SYMBOL_PREFIX@zng_gzclose_w ++# define zng_gzdirect @ZLIB_SYMBOL_PREFIX@zng_gzdirect ++# define zng_gzdopen @ZLIB_SYMBOL_PREFIX@zng_gzdopen ++# define zng_gzeof @ZLIB_SYMBOL_PREFIX@zng_gzeof ++# define zng_gzerror @ZLIB_SYMBOL_PREFIX@zng_gzerror ++# define zng_gzflush @ZLIB_SYMBOL_PREFIX@zng_gzflush ++# define zng_gzfread @ZLIB_SYMBOL_PREFIX@zng_gzfread ++# define zng_gzfwrite @ZLIB_SYMBOL_PREFIX@zng_gzfwrite ++# define zng_gzgetc @ZLIB_SYMBOL_PREFIX@zng_gzgetc ++# define zng_gzgetc_ @ZLIB_SYMBOL_PREFIX@zng_gzgetc_ ++# define zng_gzgets @ZLIB_SYMBOL_PREFIX@zng_gzgets ++# define zng_gzoffset @ZLIB_SYMBOL_PREFIX@zng_gzoffset ++# define zng_gzoffset64 @ZLIB_SYMBOL_PREFIX@zng_gzoffset64 ++# define zng_gzopen @ZLIB_SYMBOL_PREFIX@zng_gzopen ++# define zng_gzopen64 @ZLIB_SYMBOL_PREFIX@zng_gzopen64 ++# ifdef _WIN32 ++# define zng_gzopen_w @ZLIB_SYMBOL_PREFIX@zng_gzopen_w ++# endif ++# define zng_gzprintf @ZLIB_SYMBOL_PREFIX@zng_gzprintf ++# define zng_gzputc @ZLIB_SYMBOL_PREFIX@zng_gzputc ++# define zng_gzputs @ZLIB_SYMBOL_PREFIX@zng_gzputs ++# define zng_gzread @ZLIB_SYMBOL_PREFIX@zng_gzread ++# define zng_gzrewind @ZLIB_SYMBOL_PREFIX@zng_gzrewind ++# define zng_gzseek @ZLIB_SYMBOL_PREFIX@zng_gzseek ++# define zng_gzseek64 @ZLIB_SYMBOL_PREFIX@zng_gzseek64 ++# define zng_gzsetparams @ZLIB_SYMBOL_PREFIX@zng_gzsetparams ++# define zng_gztell @ZLIB_SYMBOL_PREFIX@zng_gztell ++# define zng_gztell64 @ZLIB_SYMBOL_PREFIX@zng_gztell64 ++# define zng_gzungetc @ZLIB_SYMBOL_PREFIX@zng_gzungetc ++# define zng_gzvprintf @ZLIB_SYMBOL_PREFIX@zng_gzvprintf ++# define zng_gzwrite @ZLIB_SYMBOL_PREFIX@zng_gzwrite ++#endif ++#define zng_inflate @ZLIB_SYMBOL_PREFIX@zng_inflate ++#define zng_inflateBack @ZLIB_SYMBOL_PREFIX@zng_inflateBack ++#define zng_inflateBackEnd @ZLIB_SYMBOL_PREFIX@zng_inflateBackEnd ++#define zng_inflateBackInit @ZLIB_SYMBOL_PREFIX@zng_inflateBackInit ++#define zng_inflateBackInit_ @ZLIB_SYMBOL_PREFIX@zng_inflateBackInit_ ++#define zng_inflateCodesUsed @ZLIB_SYMBOL_PREFIX@zng_inflateCodesUsed ++#define zng_inflateCopy @ZLIB_SYMBOL_PREFIX@zng_inflateCopy ++#define zng_inflateEnd @ZLIB_SYMBOL_PREFIX@zng_inflateEnd ++#define zng_inflateGetDictionary @ZLIB_SYMBOL_PREFIX@zng_inflateGetDictionary ++#define zng_inflateGetHeader @ZLIB_SYMBOL_PREFIX@zng_inflateGetHeader ++#define zng_inflateInit @ZLIB_SYMBOL_PREFIX@zng_inflateInit ++#define zng_inflateInit2 @ZLIB_SYMBOL_PREFIX@zng_inflateInit2 ++#define zng_inflateInit2_ @ZLIB_SYMBOL_PREFIX@zng_inflateInit2_ ++#define zng_inflateInit_ @ZLIB_SYMBOL_PREFIX@zng_inflateInit_ ++#define zng_inflateMark @ZLIB_SYMBOL_PREFIX@zng_inflateMark ++#define zng_inflatePrime @ZLIB_SYMBOL_PREFIX@zng_inflatePrime ++#define zng_inflateReset @ZLIB_SYMBOL_PREFIX@zng_inflateReset ++#define zng_inflateReset2 @ZLIB_SYMBOL_PREFIX@zng_inflateReset2 ++#define zng_inflateResetKeep @ZLIB_SYMBOL_PREFIX@zng_inflateResetKeep ++#define zng_inflateSetDictionary @ZLIB_SYMBOL_PREFIX@zng_inflateSetDictionary ++#define zng_inflateSync @ZLIB_SYMBOL_PREFIX@zng_inflateSync ++#define zng_inflateSyncPoint @ZLIB_SYMBOL_PREFIX@zng_inflateSyncPoint ++#define zng_inflateUndermine @ZLIB_SYMBOL_PREFIX@zng_inflateUndermine ++#define zng_inflateValidate @ZLIB_SYMBOL_PREFIX@zng_inflateValidate ++#define zng_inflate_copyright @ZLIB_SYMBOL_PREFIX@zng_inflate_copyright ++#define zng_inflate_ensure_window @ZLIB_SYMBOL_PREFIX@zng_inflate_ensure_window ++#define zng_inflate_fast @ZLIB_SYMBOL_PREFIX@zng_inflate_fast ++#define zng_inflate_table @ZLIB_SYMBOL_PREFIX@zng_inflate_table ++#define zng_read_buf @ZLIB_SYMBOL_PREFIX@zng_read_buf ++#define zng_uncompress @ZLIB_SYMBOL_PREFIX@zng_uncompress ++#define zng_uncompress2 @ZLIB_SYMBOL_PREFIX@zng_uncompress2 ++#define zng_zError @ZLIB_SYMBOL_PREFIX@zng_zError ++#define zng_zcalloc @ZLIB_SYMBOL_PREFIX@zng_zcalloc ++#define zng_zcfree @ZLIB_SYMBOL_PREFIX@zng_zcfree ++#define zng_zlibCompileFlags @ZLIB_SYMBOL_PREFIX@zng_zlibCompileFlags ++#define zng_zlibVersion @ZLIB_SYMBOL_PREFIX@zng_zlibVersion ++ ++/* all zlib typedefs in zlib.h and zconf.h */ ++#define Byte @ZLIB_SYMBOL_PREFIX@Byte ++#define Bytef @ZLIB_SYMBOL_PREFIX@Bytef ++#define alloc_func @ZLIB_SYMBOL_PREFIX@alloc_func ++#define charf @ZLIB_SYMBOL_PREFIX@charf ++#define free_func @ZLIB_SYMBOL_PREFIX@free_func ++#ifdef WITH_GZFILEOP ++# define gzFile @ZLIB_SYMBOL_PREFIX@gzFile ++#endif ++#define gz_header @ZLIB_SYMBOL_PREFIX@gz_header ++#define gz_headerp @ZLIB_SYMBOL_PREFIX@gz_headerp ++#define in_func @ZLIB_SYMBOL_PREFIX@in_func ++#define intf @ZLIB_SYMBOL_PREFIX@intf ++#define out_func @ZLIB_SYMBOL_PREFIX@out_func ++#define uInt @ZLIB_SYMBOL_PREFIX@uInt ++#define uIntf @ZLIB_SYMBOL_PREFIX@uIntf ++#define uLong @ZLIB_SYMBOL_PREFIX@uLong ++#define uLongf @ZLIB_SYMBOL_PREFIX@uLongf ++#define voidp @ZLIB_SYMBOL_PREFIX@voidp ++#define voidpc @ZLIB_SYMBOL_PREFIX@voidpc ++#define voidpf @ZLIB_SYMBOL_PREFIX@voidpf ++ ++/* all zlib structs in zlib.h and zconf.h */ ++#define zng_gz_header_s @ZLIB_SYMBOL_PREFIX@zng_gz_header_s ++#define internal_state @ZLIB_SYMBOL_PREFIX@internal_state ++ ++/* zlib-ng specific symbols */ ++#define zng_deflate_param @ZLIB_SYMBOL_PREFIX@zng_deflate_param ++#define zng_deflate_param_value @ZLIB_SYMBOL_PREFIX@zng_deflate_param_value ++#define zng_deflateSetParams @ZLIB_SYMBOL_PREFIX@zng_deflateSetParams ++#define zng_deflateGetParams @ZLIB_SYMBOL_PREFIX@zng_deflateGetParams ++ ++#define zlibng_version @ZLIB_SYMBOL_PREFIX@zlibng_version ++#define zng_vstring @ZLIB_SYMBOL_PREFIX@zng_vstring ++#define zng_zError @ZLIB_SYMBOL_PREFIX@zng_zError ++ ++#define zng_alloc_aligned @ZLIB_SYMBOL_PREFIX@zng_alloc_aligned ++#define zng_free_aligned @ZLIB_SYMBOL_PREFIX@zng_free_aligned ++#define zng_get_crc_table @ZLIB_SYMBOL_PREFIX@zng_get_crc_table ++#define zng_inflateSyncPoint @ZLIB_SYMBOL_PREFIX@zng_inflateSyncPoint ++#define zng_inflateUndermine @ZLIB_SYMBOL_PREFIX@zng_inflateUndermine ++#define zng_inflateValidate @ZLIB_SYMBOL_PREFIX@zng_inflateValidate ++#define zng_inflateCodesUsed @ZLIB_SYMBOL_PREFIX@zng_inflateCodesUsed ++#define zng_inflateResetKeep @ZLIB_SYMBOL_PREFIX@zng_inflateResetKeep ++#define zng_deflateResetKeep @ZLIB_SYMBOL_PREFIX@zng_deflateResetKeep ++ ++#define zng_gzopen_w @ZLIB_SYMBOL_PREFIX@zng_gzopen_w ++#define zng_gzvprintf @ZLIB_SYMBOL_PREFIX@zng_gzvprintf ++ ++#endif /* ZLIB_NAME_MANGLING_H */ +diff --git a/zlib_name_mangling.h.empty b/zlib_name_mangling.h.empty +new file mode 100644 +index 0000000..b24cb83 +--- /dev/null ++++ b/zlib_name_mangling.h.empty +@@ -0,0 +1,8 @@ ++/* zlib_name_mangling.h has been automatically generated from ++ * zlib_name_mangling.h.empty because ZLIB_SYMBOL_PREFIX was NOT set. ++ */ ++ ++#ifndef ZLIB_NAME_MANGLING_H ++#define ZLIB_NAME_MANGLING_H ++ ++#endif /* ZLIB_NAME_MANGLING_H */ +diff --git a/zlib_name_mangling.h.in b/zlib_name_mangling.h.in +new file mode 100644 +index 0000000..f496158 +--- /dev/null ++++ b/zlib_name_mangling.h.in +@@ -0,0 +1,170 @@ ++/* zlib_name_mangling.h has been automatically generated from ++ * zlib_name_mangling.h.in because ZLIB_SYMBOL_PREFIX was set. ++ */ ++ ++#ifndef ZLIB_NAME_MANGLING_H ++#define ZLIB_NAME_MANGLING_H ++ ++/* all linked symbols and init macros */ ++#define _dist_code @ZLIB_SYMBOL_PREFIX@_dist_code ++#define _length_code @ZLIB_SYMBOL_PREFIX@_length_code ++#define _tr_align @ZLIB_SYMBOL_PREFIX@_tr_align ++#define _tr_flush_bits @ZLIB_SYMBOL_PREFIX@_tr_flush_bits ++#define _tr_flush_block @ZLIB_SYMBOL_PREFIX@_tr_flush_block ++#define _tr_init @ZLIB_SYMBOL_PREFIX@_tr_init ++#define _tr_stored_block @ZLIB_SYMBOL_PREFIX@_tr_stored_block ++#define _tr_tally @ZLIB_SYMBOL_PREFIX@_tr_tally ++#define adler32 @ZLIB_SYMBOL_PREFIX@adler32 ++#define adler32_combine @ZLIB_SYMBOL_PREFIX@adler32_combine ++#define adler32_combine64 @ZLIB_SYMBOL_PREFIX@adler32_combine64 ++#define adler32_z @ZLIB_SYMBOL_PREFIX@adler32_z ++#ifndef Z_SOLO ++# define compress @ZLIB_SYMBOL_PREFIX@compress ++# define compress2 @ZLIB_SYMBOL_PREFIX@compress2 ++# define compressBound @ZLIB_SYMBOL_PREFIX@compressBound ++#endif ++#define crc32 @ZLIB_SYMBOL_PREFIX@crc32 ++#define crc32_combine @ZLIB_SYMBOL_PREFIX@crc32_combine ++#define crc32_combine64 @ZLIB_SYMBOL_PREFIX@crc32_combine64 ++#define crc32_combine_gen @ZLIB_SYMBOL_PREFIX@crc32_combine_gen ++#define crc32_combine_gen64 @ZLIB_SYMBOL_PREFIX@crc32_combine_gen64 ++#define crc32_combine_op @ZLIB_SYMBOL_PREFIX@crc32_combine_op ++#define crc32_z @ZLIB_SYMBOL_PREFIX@crc32_z ++#define deflate @ZLIB_SYMBOL_PREFIX@deflate ++#define deflateBound @ZLIB_SYMBOL_PREFIX@deflateBound ++#define deflateCopy @ZLIB_SYMBOL_PREFIX@deflateCopy ++#define deflateEnd @ZLIB_SYMBOL_PREFIX@deflateEnd ++#define deflateGetDictionary @ZLIB_SYMBOL_PREFIX@deflateGetDictionary ++#define deflateInit @ZLIB_SYMBOL_PREFIX@deflateInit ++#define deflateInit2 @ZLIB_SYMBOL_PREFIX@deflateInit2 ++#define deflateInit2_ @ZLIB_SYMBOL_PREFIX@deflateInit2_ ++#define deflateInit_ @ZLIB_SYMBOL_PREFIX@deflateInit_ ++#define deflateParams @ZLIB_SYMBOL_PREFIX@deflateParams ++#define deflatePending @ZLIB_SYMBOL_PREFIX@deflatePending ++#define deflatePrime @ZLIB_SYMBOL_PREFIX@deflatePrime ++#define deflateReset @ZLIB_SYMBOL_PREFIX@deflateReset ++#define deflateResetKeep @ZLIB_SYMBOL_PREFIX@deflateResetKeep ++#define deflateSetDictionary @ZLIB_SYMBOL_PREFIX@deflateSetDictionary ++#define deflateSetHeader @ZLIB_SYMBOL_PREFIX@deflateSetHeader ++#define deflateTune @ZLIB_SYMBOL_PREFIX@deflateTune ++#define deflate_copyright @ZLIB_SYMBOL_PREFIX@deflate_copyright ++#define fill_window @ZLIB_SYMBOL_PREFIX@fill_window ++#define fixedtables @ZLIB_SYMBOL_PREFIX@fixedtables ++#define flush_pending @ZLIB_SYMBOL_PREFIX@flush_pending ++#define get_crc_table @ZLIB_SYMBOL_PREFIX@get_crc_table ++#ifndef Z_SOLO ++# define gz_error @ZLIB_SYMBOL_PREFIX@gz_error ++# define gz_strwinerror @ZLIB_SYMBOL_PREFIX@gz_strwinerror ++# define gzbuffer @ZLIB_SYMBOL_PREFIX@gzbuffer ++# define gzclearerr @ZLIB_SYMBOL_PREFIX@gzclearerr ++# define gzclose @ZLIB_SYMBOL_PREFIX@gzclose ++# define gzclose_r @ZLIB_SYMBOL_PREFIX@gzclose_r ++# define gzclose_w @ZLIB_SYMBOL_PREFIX@gzclose_w ++# define gzdirect @ZLIB_SYMBOL_PREFIX@gzdirect ++# define gzdopen @ZLIB_SYMBOL_PREFIX@gzdopen ++# define gzeof @ZLIB_SYMBOL_PREFIX@gzeof ++# define gzerror @ZLIB_SYMBOL_PREFIX@gzerror ++# define gzflush @ZLIB_SYMBOL_PREFIX@gzflush ++# define gzfread @ZLIB_SYMBOL_PREFIX@gzfread ++# define gzfwrite @ZLIB_SYMBOL_PREFIX@gzfwrite ++# define gzgetc @ZLIB_SYMBOL_PREFIX@gzgetc ++# define gzgetc_ @ZLIB_SYMBOL_PREFIX@gzgetc_ ++# define gzgets @ZLIB_SYMBOL_PREFIX@gzgets ++# define gzoffset @ZLIB_SYMBOL_PREFIX@gzoffset ++# define gzoffset64 @ZLIB_SYMBOL_PREFIX@gzoffset64 ++# define gzopen @ZLIB_SYMBOL_PREFIX@gzopen ++# define gzopen64 @ZLIB_SYMBOL_PREFIX@gzopen64 ++# ifdef _WIN32 ++# define gzopen_w @ZLIB_SYMBOL_PREFIX@gzopen_w ++# endif ++# define gzprintf @ZLIB_SYMBOL_PREFIX@gzprintf ++# define gzputc @ZLIB_SYMBOL_PREFIX@gzputc ++# define gzputs @ZLIB_SYMBOL_PREFIX@gzputs ++# define gzread @ZLIB_SYMBOL_PREFIX@gzread ++# define gzrewind @ZLIB_SYMBOL_PREFIX@gzrewind ++# define gzseek @ZLIB_SYMBOL_PREFIX@gzseek ++# define gzseek64 @ZLIB_SYMBOL_PREFIX@gzseek64 ++# define gzsetparams @ZLIB_SYMBOL_PREFIX@gzsetparams ++# define gztell @ZLIB_SYMBOL_PREFIX@gztell ++# define gztell64 @ZLIB_SYMBOL_PREFIX@gztell64 ++# define gzungetc @ZLIB_SYMBOL_PREFIX@gzungetc ++# define gzvprintf @ZLIB_SYMBOL_PREFIX@gzvprintf ++# define gzwrite @ZLIB_SYMBOL_PREFIX@gzwrite ++#endif ++#define inflate @ZLIB_SYMBOL_PREFIX@inflate ++#define inflateBack @ZLIB_SYMBOL_PREFIX@inflateBack ++#define inflateBackEnd @ZLIB_SYMBOL_PREFIX@inflateBackEnd ++#define inflateBackInit @ZLIB_SYMBOL_PREFIX@inflateBackInit ++#define inflateBackInit_ @ZLIB_SYMBOL_PREFIX@inflateBackInit_ ++#define inflateCodesUsed @ZLIB_SYMBOL_PREFIX@inflateCodesUsed ++#define inflateCopy @ZLIB_SYMBOL_PREFIX@inflateCopy ++#define inflateEnd @ZLIB_SYMBOL_PREFIX@inflateEnd ++#define inflateGetDictionary @ZLIB_SYMBOL_PREFIX@inflateGetDictionary ++#define inflateGetHeader @ZLIB_SYMBOL_PREFIX@inflateGetHeader ++#define inflateInit @ZLIB_SYMBOL_PREFIX@inflateInit ++#define inflateInit2 @ZLIB_SYMBOL_PREFIX@inflateInit2 ++#define inflateInit2_ @ZLIB_SYMBOL_PREFIX@inflateInit2_ ++#define inflateInit_ @ZLIB_SYMBOL_PREFIX@inflateInit_ ++#define inflateMark @ZLIB_SYMBOL_PREFIX@inflateMark ++#define inflatePrime @ZLIB_SYMBOL_PREFIX@inflatePrime ++#define inflateReset @ZLIB_SYMBOL_PREFIX@inflateReset ++#define inflateReset2 @ZLIB_SYMBOL_PREFIX@inflateReset2 ++#define inflateResetKeep @ZLIB_SYMBOL_PREFIX@inflateResetKeep ++#define inflateSetDictionary @ZLIB_SYMBOL_PREFIX@inflateSetDictionary ++#define inflateSync @ZLIB_SYMBOL_PREFIX@inflateSync ++#define inflateSyncPoint @ZLIB_SYMBOL_PREFIX@inflateSyncPoint ++#define inflateUndermine @ZLIB_SYMBOL_PREFIX@inflateUndermine ++#define inflateValidate @ZLIB_SYMBOL_PREFIX@inflateValidate ++#define inflate_copyright @ZLIB_SYMBOL_PREFIX@inflate_copyright ++#define inflate_ensure_window @ZLIB_SYMBOL_PREFIX@inflate_ensure_window ++#define inflate_fast @ZLIB_SYMBOL_PREFIX@inflate_fast ++#define inflate_table @ZLIB_SYMBOL_PREFIX@inflate_table ++#define read_buf @ZLIB_SYMBOL_PREFIX@read_buf ++#ifndef Z_SOLO ++# define uncompress @ZLIB_SYMBOL_PREFIX@uncompress ++# define uncompress2 @ZLIB_SYMBOL_PREFIX@uncompress2 ++#endif ++#define zError @ZLIB_SYMBOL_PREFIX@zError ++#ifndef Z_SOLO ++# define zcalloc @ZLIB_SYMBOL_PREFIX@zcalloc ++# define zcfree @ZLIB_SYMBOL_PREFIX@zcfree ++#endif ++#define zlibCompileFlags @ZLIB_SYMBOL_PREFIX@zlibCompileFlags ++#define zlibVersion @ZLIB_SYMBOL_PREFIX@zlibVersion ++ ++/* all zlib typedefs in zlib.h and zconf.h */ ++#define Byte @ZLIB_SYMBOL_PREFIX@Byte ++#define Bytef @ZLIB_SYMBOL_PREFIX@Bytef ++#define alloc_func @ZLIB_SYMBOL_PREFIX@alloc_func ++#define charf @ZLIB_SYMBOL_PREFIX@charf ++#define free_func @ZLIB_SYMBOL_PREFIX@free_func ++#ifndef Z_SOLO ++# define gzFile @ZLIB_SYMBOL_PREFIX@gzFile ++#endif ++#define gz_header @ZLIB_SYMBOL_PREFIX@gz_header ++#define gz_headerp @ZLIB_SYMBOL_PREFIX@gz_headerp ++#define in_func @ZLIB_SYMBOL_PREFIX@in_func ++#define intf @ZLIB_SYMBOL_PREFIX@intf ++#define out_func @ZLIB_SYMBOL_PREFIX@out_func ++#define uInt @ZLIB_SYMBOL_PREFIX@uInt ++#define uIntf @ZLIB_SYMBOL_PREFIX@uIntf ++#define uLong @ZLIB_SYMBOL_PREFIX@uLong ++#define uLongf @ZLIB_SYMBOL_PREFIX@uLongf ++#define voidp @ZLIB_SYMBOL_PREFIX@voidp ++#define voidpc @ZLIB_SYMBOL_PREFIX@voidpc ++#define voidpf @ZLIB_SYMBOL_PREFIX@voidpf ++ ++/* all zlib structs in zlib.h and zconf.h */ ++#define gz_header_s @ZLIB_SYMBOL_PREFIX@gz_header_s ++#define internal_state @ZLIB_SYMBOL_PREFIX@internal_state ++ ++/* all zlib structs in zutil.h */ ++#define z_errmsg @ZLIB_SYMBOL_PREFIX@z_errmsg ++#define z_vstring @ZLIB_SYMBOL_PREFIX@z_vstring ++#define zlibng_version @ZLIB_SYMBOL_PREFIX@zlibng_version ++ ++/* zlib-ng specific symbols */ ++#define zng_alloc_aligned @ZLIB_SYMBOL_PREFIX@zng_alloc_aligned ++#define zng_free_aligned @ZLIB_SYMBOL_PREFIX@zng_free_aligned ++ ++#endif /* ZLIB_NAME_MANGLING_H */ +diff --git a/zutil.c b/zutil.c +index 398e17e..bd937c5 100644 +--- a/zutil.c ++++ b/zutil.c +@@ -20,18 +20,18 @@ z_const char * const PREFIX(z_errmsg)[10] = { + (z_const char *)"" + }; + +-const char zlibng_string[] = +- " zlib-ng 2.0.6 forked from zlib"; ++const char PREFIX3(vstring)[] = ++ " zlib-ng 2.1.5"; + + #ifdef ZLIB_COMPAT + const char * Z_EXPORT zlibVersion(void) { + return ZLIB_VERSION; + } +-#endif +- ++#else + const char * Z_EXPORT zlibng_version(void) { + return ZLIBNG_VERSION; + } ++#endif + + unsigned long Z_EXPORT PREFIX(zlibCompileFlags)(void) { + unsigned long flags; +@@ -87,7 +87,7 @@ unsigned long Z_EXPORT PREFIX(zlibCompileFlags)(void) { + # endif + int Z_INTERNAL z_verbose = verbose; + +-void Z_INTERNAL z_error(char *m) { ++void Z_INTERNAL z_error(const char *m) { + fprintf(stderr, "%s\n", m); + exit(1); + } +@@ -100,12 +100,60 @@ const char * Z_EXPORT PREFIX(zError)(int err) { + return ERR_MSG(err); + } + +-void Z_INTERNAL *zng_calloc(void *opaque, unsigned items, unsigned size) { ++void Z_INTERNAL *PREFIX(zcalloc)(void *opaque, unsigned items, unsigned size) { + Z_UNUSED(opaque); + return zng_alloc((size_t)items * (size_t)size); + } + +-void Z_INTERNAL zng_cfree(void *opaque, void *ptr) { ++void Z_INTERNAL PREFIX(zcfree)(void *opaque, void *ptr) { + Z_UNUSED(opaque); + zng_free(ptr); + } ++ ++/* Since we support custom memory allocators, some which might not align memory as we expect, ++ * we have to ask for extra memory and return an aligned pointer. */ ++void Z_INTERNAL *PREFIX3(alloc_aligned)(zng_calloc_func zalloc, void *opaque, unsigned items, unsigned size, unsigned align) { ++ uintptr_t return_ptr, original_ptr; ++ uint32_t alloc_size, align_diff; ++ void *ptr; ++ ++ /* If no custom calloc function used then call zlib-ng's aligned calloc */ ++ if (zalloc == PREFIX(zcalloc)) ++ return PREFIX(zcalloc)(opaque, items, size); ++ ++ /* Allocate enough memory for proper alignment and to store the original memory pointer */ ++ alloc_size = sizeof(void *) + (items * size) + align; ++ ptr = zalloc(opaque, 1, alloc_size); ++ if (!ptr) ++ return NULL; ++ ++ /* Calculate return pointer address with space enough to store original pointer */ ++ align_diff = align - ((uintptr_t)ptr % align); ++ return_ptr = (uintptr_t)ptr + align_diff; ++ if (align_diff < sizeof(void *)) ++ return_ptr += align; ++ ++ /* Store the original pointer for free() */ ++ original_ptr = return_ptr - sizeof(void *); ++ memcpy((void *)original_ptr, &ptr, sizeof(void *)); ++ ++ /* Return properly aligned pointer in allocation */ ++ return (void *)return_ptr; ++} ++ ++void Z_INTERNAL PREFIX3(free_aligned)(zng_cfree_func zfree, void *opaque, void *ptr) { ++ /* If no custom cfree function used then call zlib-ng's aligned cfree */ ++ if (zfree == PREFIX(zcfree)) { ++ PREFIX(zcfree)(opaque, ptr); ++ return; ++ } ++ if (!ptr) ++ return; ++ ++ /* Calculate offset to original memory allocation pointer */ ++ void *original_ptr = (void *)((uintptr_t)ptr - sizeof(void *)); ++ void *free_ptr = *(void **)original_ptr; ++ ++ /* Free original memory allocation */ ++ zfree(opaque, free_ptr); ++} +diff --git a/zutil.h b/zutil.h +index 7578737..663616b 100644 +--- a/zutil.h ++++ b/zutil.h +@@ -1,7 +1,7 @@ + #ifndef ZUTIL_H_ + #define ZUTIL_H_ + /* zutil.h -- internal interface and configuration of the compression library +- * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler ++ * Copyright (C) 1995-2022 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +@@ -10,34 +10,12 @@ + subject to change. Applications should only use zlib.h. + */ + +-#if defined(HAVE_VISIBILITY_INTERNAL) +-# define Z_INTERNAL __attribute__((visibility ("internal"))) +-#elif defined(HAVE_VISIBILITY_HIDDEN) +-# define Z_INTERNAL __attribute__((visibility ("hidden"))) +-#else +-# define Z_INTERNAL +-#endif +- +-#ifndef __cplusplus +-# define Z_REGISTER register +-#else +-# define Z_REGISTER +-#endif +- +-#ifndef Z_TLS +-# define Z_TLS +-#endif +- +-#include +-#include +-#include +-#include ++#include "zbuild.h" + #ifdef ZLIB_COMPAT + # include "zlib.h" + #else + # include "zlib-ng.h" + #endif +-#include "zbuild.h" + + typedef unsigned char uch; /* Included for compatibility with external code only */ + typedef uint16_t ush; /* Included for compatibility with external code only */ +@@ -58,6 +36,11 @@ extern z_const char * const PREFIX(z_errmsg)[10]; /* indexed by 2-zlib_error */ + #endif + /* default windowBits for decompression. MAX_WBITS is for compression only */ + ++#define MAX_BITS 15 ++/* all codes must not exceed MAX_BITS bits */ ++#define MAX_DIST_EXTRA_BITS 13 ++/* maximum number of extra distance bits */ ++ + #if MAX_MEM_LEVEL >= 8 + # define DEF_MEM_LEVEL 8 + #else +@@ -70,13 +53,17 @@ extern z_const char * const PREFIX(z_errmsg)[10]; /* indexed by 2-zlib_error */ + #define DYN_TREES 2 + /* The three kinds of block type */ + +-#define MIN_MATCH 3 +-#define MAX_MATCH 258 +-/* The minimum and maximum match lengths */ ++#define STD_MIN_MATCH 3 ++#define STD_MAX_MATCH 258 ++/* The minimum and maximum match lengths mandated by the deflate standard */ ++ ++#define WANT_MIN_MATCH 4 ++/* The minimum wanted match length, affects deflate_quick, deflate_fast, deflate_medium and deflate_slow */ + + #define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + #define ADLER32_INITIAL_VALUE 1 /* initial adler-32 hash value */ ++#define CRC32_INITIAL_VALUE 0 /* initial crc-32 hash value */ + + #define ZLIB_WRAPLEN 6 /* zlib format overhead */ + #define GZIP_WRAPLEN 18 /* gzip format overhead */ +@@ -92,8 +79,6 @@ extern z_const char * const PREFIX(z_errmsg)[10]; /* indexed by 2-zlib_error */ + /* deflate_quick worst-case overhead: 9 bits per literal, round up to next byte (+7) */ + + +-#define ZLIB_WRAPLEN 6 /* zlib format overhead */ +- + /* target dependencies */ + + #ifdef AMIGA +@@ -132,16 +117,6 @@ extern z_const char * const PREFIX(z_errmsg)[10]; /* indexed by 2-zlib_error */ + + #ifdef __APPLE__ + # define OS_CODE 19 +-#endif +- +-#if (defined(_MSC_VER) && (_MSC_VER > 600)) +-# define fdopen(fd, type) _fdopen(fd, type) +-#endif +- +-/* MS Visual Studio does not allow inline in C, only C++. +- But it provides __inline instead, so use that. */ +-#if defined(_MSC_VER) && !defined(inline) && !defined(__cplusplus) +-# define inline __inline + #endif + + /* common defaults */ +@@ -150,125 +125,24 @@ extern z_const char * const PREFIX(z_errmsg)[10]; /* indexed by 2-zlib_error */ + # define OS_CODE 3 /* assume Unix */ + #endif + +- /* functions */ +- +-/* Diagnostic functions */ +-#ifdef ZLIB_DEBUG +-# include +- extern int Z_INTERNAL z_verbose; +- extern void Z_INTERNAL z_error(char *m); +-# define Assert(cond, msg) {if (!(cond)) z_error(msg);} +-# define Trace(x) {if (z_verbose >= 0) fprintf x;} +-# define Tracev(x) {if (z_verbose > 0) fprintf x;} +-# define Tracevv(x) {if (z_verbose > 1) fprintf x;} +-# define Tracec(c, x) {if (z_verbose > 0 && (c)) fprintf x;} +-# define Tracecv(c, x) {if (z_verbose > 1 && (c)) fprintf x;} +-#else +-# define Assert(cond, msg) +-# define Trace(x) +-# define Tracev(x) +-# define Tracevv(x) +-# define Tracec(c, x) +-# define Tracecv(c, x) +-#endif ++ /* macros */ + +-void Z_INTERNAL *zng_calloc(void *opaque, unsigned items, unsigned size); +-void Z_INTERNAL zng_cfree(void *opaque, void *ptr); ++#define CHECK_VER_STSIZE(_ver,_stsize) ((_ver) == NULL || (_ver)[0] != PREFIX2(VERSION)[0] || (_stsize) != (int32_t)sizeof(PREFIX3(stream))) + +-#define ZALLOC(strm, items, size) (*((strm)->zalloc))((strm)->opaque, (items), (size)) +-#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (void *)(addr)) +-#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} ++ /* memory allocation functions */ + +-/* Reverse the bytes in a value. Use compiler intrinsics when +- possible to take advantage of hardware implementations. */ +-#if defined(_MSC_VER) && (_MSC_VER >= 1300) +-# pragma intrinsic(_byteswap_ulong) +-# define ZSWAP16(q) _byteswap_ushort(q) +-# define ZSWAP32(q) _byteswap_ulong(q) +-# define ZSWAP64(q) _byteswap_uint64(q) +- +-#elif defined(__Clang__) || (defined(__GNUC__) && \ +- (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) +-# define ZSWAP16(q) __builtin_bswap16(q) +-# define ZSWAP32(q) __builtin_bswap32(q) +-# define ZSWAP64(q) __builtin_bswap64(q) +- +-#elif defined(__GNUC__) && (__GNUC__ >= 2) && defined(__linux__) +-# include +-# define ZSWAP16(q) bswap_16(q) +-# define ZSWAP32(q) bswap_32(q) +-# define ZSWAP64(q) bswap_64(q) +- +-#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) +-# include +-# define ZSWAP16(q) bswap16(q) +-# define ZSWAP32(q) bswap32(q) +-# define ZSWAP64(q) bswap64(q) +-#elif defined(__OpenBSD__) +-# include +-# define ZSWAP16(q) swap16(q) +-# define ZSWAP32(q) swap32(q) +-# define ZSWAP64(q) swap64(q) +-#elif defined(__INTEL_COMPILER) +-/* ICC does not provide a two byte swap. */ +-# define ZSWAP16(q) ((((q) & 0xff) << 8) | (((q) & 0xff00) >> 8)) +-# define ZSWAP32(q) _bswap(q) +-# define ZSWAP64(q) _bswap64(q) ++void Z_INTERNAL *PREFIX(zcalloc)(void *opaque, unsigned items, unsigned size); ++void Z_INTERNAL PREFIX(zcfree)(void *opaque, void *ptr); + +-#else +-# define ZSWAP16(q) ((((q) & 0xff) << 8) | (((q) & 0xff00) >> 8)) +-# define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ +- (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) +-# define ZSWAP64(q) \ +- (((q & 0xFF00000000000000u) >> 56u) | \ +- ((q & 0x00FF000000000000u) >> 40u) | \ +- ((q & 0x0000FF0000000000u) >> 24u) | \ +- ((q & 0x000000FF00000000u) >> 8u) | \ +- ((q & 0x00000000FF000000u) << 8u) | \ +- ((q & 0x0000000000FF0000u) << 24u) | \ +- ((q & 0x000000000000FF00u) << 40u) | \ +- ((q & 0x00000000000000FFu) << 56u)) +-#endif ++typedef void *zng_calloc_func(void *opaque, unsigned items, unsigned size); ++typedef void zng_cfree_func(void *opaque, void *ptr); + +-/* Only enable likely/unlikely if the compiler is known to support it */ +-#if (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__INTEL_COMPILER) || defined(__Clang__) +-# define LIKELY_NULL(x) __builtin_expect((x) != 0, 0) +-# define LIKELY(x) __builtin_expect(!!(x), 1) +-# define UNLIKELY(x) __builtin_expect(!!(x), 0) +-# define PREFETCH_L1(addr) __builtin_prefetch(addr, 0, 3) +-# define PREFETCH_L2(addr) __builtin_prefetch(addr, 0, 2) +-# define PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 2) +-#elif defined(__WIN__) +-# include +-# define LIKELY_NULL(x) x +-# define LIKELY(x) x +-# define UNLIKELY(x) x +-# define PREFETCH_L1(addr) _mm_prefetch((char *) addr, _MM_HINT_T0) +-# define PREFETCH_L2(addr) _mm_prefetch((char *) addr, _MM_HINT_T1) +-# define PREFETCH_RW(addr) _mm_prefetch((char *) addr, _MM_HINT_T1) +-#else +-# define LIKELY_NULL(x) x +-# define LIKELY(x) x +-# define UNLIKELY(x) x +-# define PREFETCH_L1(addr) addr +-# define PREFETCH_L2(addr) addr +-# define PREFETCH_RW(addr) addr +-#endif /* (un)likely */ +- +-#if defined(_MSC_VER) +-# define ALIGNED_(x) __declspec(align(x)) +-#else +-# if defined(__GNUC__) +-# define ALIGNED_(x) __attribute__ ((aligned(x))) +-# endif +-#endif ++void Z_INTERNAL *PREFIX3(alloc_aligned)(zng_calloc_func zalloc, void *opaque, unsigned items, unsigned size, unsigned align); ++void Z_INTERNAL PREFIX3(free_aligned)(zng_cfree_func zfree, void *opaque, void *ptr); + +-#if defined(X86_FEATURES) +-# include "arch/x86/x86.h" +-#elif defined(ARM_FEATURES) +-# include "arch/arm/arm.h" +-#elif defined(POWER_FEATURES) +-# include "arch/power/power.h" +-#endif ++#define ZALLOC(strm, items, size) PREFIX3(alloc_aligned)((strm)->zalloc, (strm)->opaque, (items), (size), 64) ++#define ZFREE(strm, addr) PREFIX3(free_aligned)((strm)->zfree, (strm)->opaque, (void *)(addr)) ++ ++#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + + #endif /* ZUTIL_H_ */ +diff --git a/zutil_p.h b/zutil_p.h +index 55f0061..caec91d 100644 +--- a/zutil_p.h ++++ b/zutil_p.h +@@ -1,15 +1,11 @@ + /* zutil_p.h -- Private inline functions used internally in zlib-ng +- * ++ * For conditions of distribution and use, see copyright notice in zlib.h + */ + + #ifndef ZUTIL_P_H + #define ZUTIL_P_H + +-#if defined(HAVE_POSIX_MEMALIGN) && !defined(_POSIX_C_SOURCE) +-# define _POSIX_C_SOURCE 200112L /* For posix_memalign(). */ +-#endif +- +-#if defined(__APPLE__) || defined(HAVE_POSIX_MEMALIGN) ++#if defined(__APPLE__) || defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_ALIGNED_ALLOC) + # include + #elif defined(__FreeBSD__) + # include +@@ -27,6 +23,8 @@ static inline void *zng_alloc(size_t size) { + return (void *)_aligned_malloc(size, 64); + #elif defined(__APPLE__) + return (void *)malloc(size); /* MacOS always aligns to 16 bytes */ ++#elif defined(HAVE_ALIGNED_ALLOC) ++ return (void *)aligned_alloc(64, size); + #else + return (void *)memalign(64, size); + #endif +@@ -41,4 +39,33 @@ static inline void zng_free(void *ptr) { + #endif + } + ++/* Use memcpy instead of memcmp to avoid older compilers not converting memcmp calls to ++ unaligned comparisons when unaligned access is supported. */ ++static inline int32_t zng_memcmp_2(const void *src0, const void *src1) { ++ uint16_t src0_cmp, src1_cmp; ++ ++ memcpy(&src0_cmp, src0, sizeof(src0_cmp)); ++ memcpy(&src1_cmp, src1, sizeof(src1_cmp)); ++ ++ return src0_cmp != src1_cmp; ++} ++ ++static inline int32_t zng_memcmp_4(const void *src0, const void *src1) { ++ uint32_t src0_cmp, src1_cmp; ++ ++ memcpy(&src0_cmp, src0, sizeof(src0_cmp)); ++ memcpy(&src1_cmp, src1, sizeof(src1_cmp)); ++ ++ return src0_cmp != src1_cmp; ++} ++ ++static inline int32_t zng_memcmp_8(const void *src0, const void *src1) { ++ uint64_t src0_cmp, src1_cmp; ++ ++ memcpy(&src0_cmp, src0, sizeof(src0_cmp)); ++ memcpy(&src1_cmp, src1, sizeof(src1_cmp)); ++ ++ return src0_cmp != src1_cmp; ++} ++ + #endif +-- +2.25.1 + diff --git a/package/zlib-ng/zlib-ng.mk b/package/zlib-ng/zlib-ng.mk index aa65d843..9897bd85 100644 --- a/package/zlib-ng/zlib-ng.mk +++ b/package/zlib-ng/zlib-ng.mk @@ -23,6 +23,10 @@ ifeq ($(BR2_arm),y) ZLIB_NG_CONF_OPTS += -DWITH_ACLE=1 endif +ifeq ($(BR2_RISCV_64),y) +ZLIB_NG_CONF_OPTS += -DCMAKE_C_FLAGS="-march=rv64gcv" +endif + ifeq ($(BR2_ARM_CPU_HAS_NEON)$(BR2_aarch64),y) ZLIB_NG_CONF_OPTS += -DWITH_NEON=ON else