Fix and optimize openjpeg J2C encoder (#4017, #4032)

master
Ayane 2025-05-06 14:26:35 +02:00 committed by GitHub
parent efdb86dcbb
commit a30342a78b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 116 additions and 85 deletions

View File

@ -32,8 +32,6 @@
#include "event.h" #include "event.h"
#include "cio.h" #include "cio.h"
#define MAX_ENCODED_DISCARD_LEVELS 5
// Factory function: see declaration in llimagej2c.cpp // Factory function: see declaration in llimagej2c.cpp
LLImageJ2CImpl* fallbackCreateLLImageJ2CImpl() LLImageJ2CImpl* fallbackCreateLLImageJ2CImpl()
{ {
@ -132,73 +130,96 @@ static void opj_error(const char* msg, void* user_data)
static OPJ_SIZE_T opj_read(void * buffer, OPJ_SIZE_T bytes, void* user_data) static OPJ_SIZE_T opj_read(void * buffer, OPJ_SIZE_T bytes, void* user_data)
{ {
llassert(user_data); llassert(user_data && buffer);
JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data); JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data);
OPJ_SIZE_T remainder = (jpeg_codec->size - jpeg_codec->offset);
if (remainder <= 0) if (jpeg_codec->offset < 0 || static_cast<OPJ_SIZE_T>(jpeg_codec->offset) >= jpeg_codec->size)
{ {
jpeg_codec->offset = jpeg_codec->size; jpeg_codec->offset = jpeg_codec->size;
// Indicate end of stream (hacky?) return static_cast<OPJ_SIZE_T>(-1); // Indicate EOF
return (OPJ_OFF_T)-1;
} }
OPJ_SIZE_T to_read = llclamp(U32(bytes), U32(0), U32(remainder));
OPJ_SIZE_T remainder = jpeg_codec->size - static_cast<OPJ_SIZE_T>(jpeg_codec->offset);
OPJ_SIZE_T to_read = (bytes < remainder) ? bytes : remainder;
memcpy(buffer, jpeg_codec->buffer + jpeg_codec->offset, to_read); memcpy(buffer, jpeg_codec->buffer + jpeg_codec->offset, to_read);
jpeg_codec->offset += to_read; jpeg_codec->offset += to_read;
return to_read; return to_read;
} }
static OPJ_SIZE_T opj_write(void * buffer, OPJ_SIZE_T bytes, void* user_data) static OPJ_SIZE_T opj_write(void * buffer, OPJ_SIZE_T bytes, void* user_data)
{ {
llassert(user_data); llassert(user_data && buffer);
JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data); JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data);
OPJ_SIZE_T remainder = jpeg_codec->size - jpeg_codec->offset; OPJ_OFF_T required_offset = jpeg_codec->offset + static_cast<OPJ_OFF_T>(bytes);
if (remainder < bytes)
// Overflow check
if (required_offset < jpeg_codec->offset)
return 0; // Overflow detected
// Resize if needed (exponential growth)
if (required_offset > static_cast<OPJ_OFF_T>(jpeg_codec->size))
{ {
OPJ_SIZE_T new_size = jpeg_codec->size + (bytes - remainder); OPJ_SIZE_T new_size = jpeg_codec->size ? jpeg_codec->size : 1024;
while (required_offset > static_cast<OPJ_OFF_T>(new_size))
new_size *= 2;
const OPJ_SIZE_T MAX_BUFFER_SIZE = 512 * 1024 * 1024; // 512 MB, increase if needed
if (new_size > MAX_BUFFER_SIZE) return 0;
U8* new_buffer = (U8*)ll_aligned_malloc_16(new_size); U8* new_buffer = (U8*)ll_aligned_malloc_16(new_size);
memcpy(new_buffer, jpeg_codec->buffer, jpeg_codec->offset); if (!new_buffer) return 0; // Allocation failed
U8* old_buffer = jpeg_codec->buffer;
if (jpeg_codec->offset > 0)
memcpy(new_buffer, jpeg_codec->buffer, static_cast<size_t>(jpeg_codec->offset));
ll_aligned_free_16(jpeg_codec->buffer);
jpeg_codec->buffer = new_buffer; jpeg_codec->buffer = new_buffer;
ll_aligned_free_16(old_buffer);
jpeg_codec->size = new_size; jpeg_codec->size = new_size;
} }
memcpy(jpeg_codec->buffer + jpeg_codec->offset, buffer, bytes);
jpeg_codec->offset += bytes; memcpy(jpeg_codec->buffer + jpeg_codec->offset, buffer, static_cast<size_t>(bytes));
jpeg_codec->offset = required_offset;
return bytes; return bytes;
} }
static OPJ_OFF_T opj_skip(OPJ_OFF_T bytes, void* user_data) static OPJ_OFF_T opj_skip(OPJ_OFF_T bytes, void* user_data)
{ {
llassert(user_data);
JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data); JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data);
jpeg_codec->offset += bytes;
if (jpeg_codec->offset > (OPJ_OFF_T)jpeg_codec->size) OPJ_OFF_T new_offset = jpeg_codec->offset + bytes;
if (new_offset < 0 || new_offset > static_cast<OPJ_OFF_T>(jpeg_codec->size))
{ {
jpeg_codec->offset = jpeg_codec->size; // Clamp and indicate EOF or error
// Indicate end of stream jpeg_codec->offset = llclamp<OPJ_OFF_T>(new_offset, 0, static_cast<OPJ_OFF_T>(jpeg_codec->size));
return (OPJ_OFF_T)-1;
}
if (jpeg_codec->offset < 0)
{
// Shouldn't be possible?
jpeg_codec->offset = 0;
return (OPJ_OFF_T)-1; return (OPJ_OFF_T)-1;
} }
jpeg_codec->offset = new_offset;
return bytes; return bytes;
} }
static OPJ_BOOL opj_seek(OPJ_OFF_T bytes, void * user_data) static OPJ_BOOL opj_seek(OPJ_OFF_T offset, void * user_data)
{ {
llassert(user_data);
JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data); JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data);
jpeg_codec->offset = bytes;
jpeg_codec->offset = llclamp(U32(jpeg_codec->offset), U32(0), U32(jpeg_codec->size)); if (offset < 0 || offset > static_cast<OPJ_OFF_T>(jpeg_codec->size))
return OPJ_FALSE;
jpeg_codec->offset = offset;
return OPJ_TRUE; return OPJ_TRUE;
} }
static void opj_free_user_data(void * user_data) static void opj_free_user_data(void * user_data)
{ {
llassert(user_data);
JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data); JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data);
// Don't free, data is managed externally // Don't free, data is managed externally
jpeg_codec->buffer = nullptr; jpeg_codec->buffer = nullptr;
@ -208,14 +229,54 @@ static void opj_free_user_data(void * user_data)
static void opj_free_user_data_write(void * user_data) static void opj_free_user_data_write(void * user_data)
{ {
llassert(user_data);
JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data); JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data);
// Free, data was allocated here // Free, data was allocated here
ll_aligned_free_16(jpeg_codec->buffer); if (jpeg_codec->buffer)
jpeg_codec->buffer = nullptr; {
ll_aligned_free_16(jpeg_codec->buffer);
jpeg_codec->buffer = nullptr;
}
jpeg_codec->size = 0; jpeg_codec->size = 0;
jpeg_codec->offset = 0; jpeg_codec->offset = 0;
} }
/**
* Estimates the number of layers necessary depending on the image surface (w x h)
*/
static U32 estimate_num_layers(U32 surface)
{
if (surface <= 1024) return 2; // Tiny (≤32×32)
else if (surface <= 16384) return 3; // Small (≤128×128)
else if (surface <= 262144) return 4; // Medium (≤512×512)
else if (surface <= 1048576) return 5; // Up to ~1MP
else return 6; // Up to ~1.52MP
}
/**
* Sets the parameters.tcp_rates according to the number of layers and a last tcp_rate value (which equals to the final compression ratio).
*
* Example for 6 layers:
*
* i = 5, parameters.tcp_rates[6 - 1 - 5] = 8.0f * (1 << (5 << 1)) = 8192 // Layer 5 (lowest quality)
* i = 4, parameters.tcp_rates[6 - 1 - 4] = 8.0f * (1 << (4 << 1)) = 2048 // Layer 4
* i = 3, parameters.tcp_rates[6 - 1 - 3] = 8.0f * (1 << (3 << 1)) = 512 // Layer 3
* i = 2, parameters.tcp_rates[6 - 1 - 2] = 8.0f * (1 << (2 << 1)) = 128 // Layer 2
* i = 1, parameters.tcp_rates[6 - 1 - 1] = 8.0f * (1 << (1 << 1)) = 32 // Layer 1
* i = 0, parameters.tcp_rates[6 - 1 - 0] = 8.0f * (1 << (0 << 1)) = 8 // Layer 0 (highest quality)
*
*/
static void set_tcp_rates(opj_cparameters_t* parameters, U32 num_layers = 1, F32 last_tcp_rate = LAST_TCP_RATE)
{
parameters->tcp_numlayers = num_layers;
for (int i = num_layers - 1; i >= 0; i--)
{
parameters->tcp_rates[num_layers - 1 - i] = last_tcp_rate * static_cast<F32>(1 << (i << 1));
}
}
class JPEG2KDecode : public JPEG2KBase class JPEG2KDecode : public JPEG2KBase
{ {
public: public:
@ -430,15 +491,16 @@ public:
opj_set_default_encoder_parameters(&parameters); opj_set_default_encoder_parameters(&parameters);
parameters.cod_format = OPJ_CODEC_J2K; parameters.cod_format = OPJ_CODEC_J2K;
parameters.cp_disto_alloc = 1; parameters.prog_order = OPJ_RLCP; // should be the default, but, just in case
parameters.cp_disto_alloc = 1; // enable rate allocation by distortion
parameters.max_cs_size = 0; // do not cap max size because we're using tcp_rates and also irrelevant with lossless.
if (reversible) if (reversible)
{ {
parameters.max_cs_size = 0; // do not limit size for reversible compression
parameters.irreversible = 0; // should be the default, but, just in case parameters.irreversible = 0; // should be the default, but, just in case
parameters.tcp_numlayers = 1; parameters.tcp_numlayers = 1;
/* documentation seems to be wrong, should be 0.0f for lossless, not 1.0f /* documentation seems to be wrong, should be 0.0f for lossless, not 1.0f
see https://github.com/uclouvain/openjpeg/blob/39e8c50a2f9bdcf36810ee3d41bcbf1cc78968ae/src/lib/openjp2/j2k.c#L7755 see https://github.com/uclouvain/openjpeg/blob/e7453e398b110891778d8da19209792c69ca7169/src/lib/openjp2/j2k.c#L7817
*/ */
parameters.tcp_rates[0] = 0.0f; parameters.tcp_rates[0] = 0.0f;
} }
@ -493,53 +555,22 @@ public:
encoder = opj_create_compress(OPJ_CODEC_J2K); encoder = opj_create_compress(OPJ_CODEC_J2K);
parameters.tcp_mct = (image->numcomps >= 3) ? 1 : 0; parameters.tcp_mct = (image->numcomps >= 3) ? 1 : 0; // no color transform for RGBA images
parameters.cod_format = OPJ_CODEC_J2K;
parameters.prog_order = OPJ_RLCP;
parameters.cp_disto_alloc = 1;
// if not lossless compression, computes tcp_numlayers and max_cs_size depending on the image dimensions // if not lossless compression, computes tcp_numlayers and max_cs_size depending on the image dimensions
if( parameters.irreversible ) { if( parameters.irreversible )
{
// computes a number of layers // computes a number of layers
U32 surface = rawImageIn.getWidth() * rawImageIn.getHeight(); U32 surface = rawImageIn.getWidth() * rawImageIn.getHeight();
U32 nb_layers = 1;
U32 s = 64*64;
while (surface > s)
{
nb_layers++;
s *= 4;
}
nb_layers = llclamp(nb_layers, 1, 6);
parameters.tcp_numlayers = nb_layers; // gets the necessary number of layers
parameters.tcp_rates[nb_layers - 1] = (U32)(1.f / DEFAULT_COMPRESSION_RATE); // 1:8 by default U32 nb_layers = estimate_num_layers(surface);
// for each subsequent layer, computes its rate and adds surface * numcomps * 1/rate to the max_cs_size // fills parameters.tcp_rates and updates parameters.tcp_numlayers
U32 max_cs_size = (U32)(surface * image->numcomps * DEFAULT_COMPRESSION_RATE); set_tcp_rates(&parameters, nb_layers, LAST_TCP_RATE);
U32 multiplier;
for (int i = nb_layers - 2; i >= 0; i--)
{
if( i == nb_layers - 2 )
{
multiplier = 15;
}
else if( i == nb_layers - 3 )
{
multiplier = 4;
}
else
{
multiplier = 2;
}
parameters.tcp_rates[i] = parameters.tcp_rates[i + 1] * multiplier;
max_cs_size += (U32)(surface * image->numcomps * (1 / parameters.tcp_rates[i]));
}
//ensure that we have at least a minimal size
max_cs_size = llmax(max_cs_size, (U32)FIRST_PACKET_SIZE);
parameters.max_cs_size = max_cs_size;
} }
if (!opj_setup_encoder(encoder, &parameters, image)) if (!opj_setup_encoder(encoder, &parameters, image))
@ -579,7 +610,7 @@ public:
opj_stream_destroy(stream); opj_stream_destroy(stream);
} }
stream = opj_stream_create(data_size_guess, false); stream = opj_stream_create(data_size_guess, OPJ_FALSE);
if (!stream) if (!stream)
{ {
return false; return false;
@ -620,17 +651,15 @@ public:
void setImage(const LLImageRaw& raw) void setImage(const LLImageRaw& raw)
{ {
opj_image_cmptparm_t cmptparm[MAX_ENCODED_DISCARD_LEVELS];
memset(&cmptparm[0], 0, MAX_ENCODED_DISCARD_LEVELS * sizeof(opj_image_cmptparm_t));
S32 numcomps = raw.getComponents(); S32 numcomps = raw.getComponents();
S32 width = raw.getWidth(); S32 width = raw.getWidth();
S32 height = raw.getHeight(); S32 height = raw.getHeight();
std::vector<opj_image_cmptparm_t> cmptparm(numcomps);
for (S32 c = 0; c < numcomps; c++) for (S32 c = 0; c < numcomps; c++)
{ {
cmptparm[c].prec = 8; cmptparm[c].prec = 8; // replaces .bpp
cmptparm[c].bpp = 8;
cmptparm[c].sgnd = 0; cmptparm[c].sgnd = 0;
cmptparm[c].dx = parameters.subsampling_dx; cmptparm[c].dx = parameters.subsampling_dx;
cmptparm[c].dy = parameters.subsampling_dy; cmptparm[c].dy = parameters.subsampling_dy;
@ -638,7 +667,7 @@ public:
cmptparm[c].h = height; cmptparm[c].h = height;
} }
image = opj_image_create(numcomps, &cmptparm[0], OPJ_CLRSPC_SRGB); image = opj_image_create(numcomps, cmptparm.data(), OPJ_CLRSPC_SRGB);
image->x1 = width; image->x1 = width;
image->y1 = height; image->y1 = height;
@ -650,7 +679,7 @@ public:
{ {
for (S32 x = 0; x < width; x++) for (S32 x = 0; x < width; x++)
{ {
const U8 *pixel = src_datap + (y*width + x) * numcomps; const U8 *pixel = src_datap + (y * width + x) * numcomps;
for (S32 c = 0; c < numcomps; c++) for (S32 c = 0; c < numcomps; c++)
{ {
image->comps[c].data[i] = *pixel; image->comps[c].data[i] = *pixel;

View File

@ -29,6 +29,8 @@
#include "llimagej2c.h" #include "llimagej2c.h"
const F32 LAST_TCP_RATE = 1.f/DEFAULT_COMPRESSION_RATE; // should be 8, giving a 1:8 ratio
class LLImageJ2COJ : public LLImageJ2CImpl class LLImageJ2COJ : public LLImageJ2CImpl
{ {
public: public: