Fix and optimize openjpeg J2C encoder (#4017, #4032)

master
Ayane 2025-05-06 14:26:35 +02:00 committed by GitHub
parent efdb86dcbb
commit a30342a78b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 116 additions and 85 deletions

View File

@ -32,8 +32,6 @@
#include "event.h"
#include "cio.h"
#define MAX_ENCODED_DISCARD_LEVELS 5
// Factory function: see declaration in llimagej2c.cpp
LLImageJ2CImpl* fallbackCreateLLImageJ2CImpl()
{
@ -132,73 +130,96 @@ static void opj_error(const char* msg, void* user_data)
static OPJ_SIZE_T opj_read(void * buffer, OPJ_SIZE_T bytes, void* user_data)
{
llassert(user_data);
llassert(user_data && buffer);
JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data);
OPJ_SIZE_T remainder = (jpeg_codec->size - jpeg_codec->offset);
if (remainder <= 0)
if (jpeg_codec->offset < 0 || static_cast<OPJ_SIZE_T>(jpeg_codec->offset) >= jpeg_codec->size)
{
jpeg_codec->offset = jpeg_codec->size;
// Indicate end of stream (hacky?)
return (OPJ_OFF_T)-1;
return static_cast<OPJ_SIZE_T>(-1); // Indicate EOF
}
OPJ_SIZE_T to_read = llclamp(U32(bytes), U32(0), U32(remainder));
OPJ_SIZE_T remainder = jpeg_codec->size - static_cast<OPJ_SIZE_T>(jpeg_codec->offset);
OPJ_SIZE_T to_read = (bytes < remainder) ? bytes : remainder;
memcpy(buffer, jpeg_codec->buffer + jpeg_codec->offset, to_read);
jpeg_codec->offset += to_read;
return to_read;
}
static OPJ_SIZE_T opj_write(void * buffer, OPJ_SIZE_T bytes, void* user_data)
{
llassert(user_data);
llassert(user_data && buffer);
JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data);
OPJ_SIZE_T remainder = jpeg_codec->size - jpeg_codec->offset;
if (remainder < bytes)
OPJ_OFF_T required_offset = jpeg_codec->offset + static_cast<OPJ_OFF_T>(bytes);
// Overflow check
if (required_offset < jpeg_codec->offset)
return 0; // Overflow detected
// Resize if needed (exponential growth)
if (required_offset > static_cast<OPJ_OFF_T>(jpeg_codec->size))
{
OPJ_SIZE_T new_size = jpeg_codec->size + (bytes - remainder);
OPJ_SIZE_T new_size = jpeg_codec->size ? jpeg_codec->size : 1024;
while (required_offset > static_cast<OPJ_OFF_T>(new_size))
new_size *= 2;
const OPJ_SIZE_T MAX_BUFFER_SIZE = 512 * 1024 * 1024; // 512 MB, increase if needed
if (new_size > MAX_BUFFER_SIZE) return 0;
U8* new_buffer = (U8*)ll_aligned_malloc_16(new_size);
memcpy(new_buffer, jpeg_codec->buffer, jpeg_codec->offset);
U8* old_buffer = jpeg_codec->buffer;
if (!new_buffer) return 0; // Allocation failed
if (jpeg_codec->offset > 0)
memcpy(new_buffer, jpeg_codec->buffer, static_cast<size_t>(jpeg_codec->offset));
ll_aligned_free_16(jpeg_codec->buffer);
jpeg_codec->buffer = new_buffer;
ll_aligned_free_16(old_buffer);
jpeg_codec->size = new_size;
}
memcpy(jpeg_codec->buffer + jpeg_codec->offset, buffer, bytes);
jpeg_codec->offset += bytes;
memcpy(jpeg_codec->buffer + jpeg_codec->offset, buffer, static_cast<size_t>(bytes));
jpeg_codec->offset = required_offset;
return bytes;
}
static OPJ_OFF_T opj_skip(OPJ_OFF_T bytes, void* user_data)
{
llassert(user_data);
JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data);
jpeg_codec->offset += bytes;
if (jpeg_codec->offset > (OPJ_OFF_T)jpeg_codec->size)
OPJ_OFF_T new_offset = jpeg_codec->offset + bytes;
if (new_offset < 0 || new_offset > static_cast<OPJ_OFF_T>(jpeg_codec->size))
{
jpeg_codec->offset = jpeg_codec->size;
// Indicate end of stream
return (OPJ_OFF_T)-1;
}
if (jpeg_codec->offset < 0)
{
// Shouldn't be possible?
jpeg_codec->offset = 0;
// Clamp and indicate EOF or error
jpeg_codec->offset = llclamp<OPJ_OFF_T>(new_offset, 0, static_cast<OPJ_OFF_T>(jpeg_codec->size));
return (OPJ_OFF_T)-1;
}
jpeg_codec->offset = new_offset;
return bytes;
}
static OPJ_BOOL opj_seek(OPJ_OFF_T bytes, void * user_data)
static OPJ_BOOL opj_seek(OPJ_OFF_T offset, void * user_data)
{
llassert(user_data);
JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data);
jpeg_codec->offset = bytes;
jpeg_codec->offset = llclamp(U32(jpeg_codec->offset), U32(0), U32(jpeg_codec->size));
if (offset < 0 || offset > static_cast<OPJ_OFF_T>(jpeg_codec->size))
return OPJ_FALSE;
jpeg_codec->offset = offset;
return OPJ_TRUE;
}
static void opj_free_user_data(void * user_data)
{
llassert(user_data);
JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data);
// Don't free, data is managed externally
jpeg_codec->buffer = nullptr;
@ -208,14 +229,54 @@ static void opj_free_user_data(void * user_data)
static void opj_free_user_data_write(void * user_data)
{
llassert(user_data);
JPEG2KBase* jpeg_codec = static_cast<JPEG2KBase*>(user_data);
// Free, data was allocated here
ll_aligned_free_16(jpeg_codec->buffer);
jpeg_codec->buffer = nullptr;
if (jpeg_codec->buffer)
{
ll_aligned_free_16(jpeg_codec->buffer);
jpeg_codec->buffer = nullptr;
}
jpeg_codec->size = 0;
jpeg_codec->offset = 0;
}
/**
* Estimates the number of layers necessary depending on the image surface (w x h)
*/
static U32 estimate_num_layers(U32 surface)
{
if (surface <= 1024) return 2; // Tiny (≤32×32)
else if (surface <= 16384) return 3; // Small (≤128×128)
else if (surface <= 262144) return 4; // Medium (≤512×512)
else if (surface <= 1048576) return 5; // Up to ~1MP
else return 6; // Up to ~1.52MP
}
/**
* Sets the parameters.tcp_rates according to the number of layers and a last tcp_rate value (which equals to the final compression ratio).
*
* Example for 6 layers:
*
* i = 5, parameters.tcp_rates[6 - 1 - 5] = 8.0f * (1 << (5 << 1)) = 8192 // Layer 5 (lowest quality)
* i = 4, parameters.tcp_rates[6 - 1 - 4] = 8.0f * (1 << (4 << 1)) = 2048 // Layer 4
* i = 3, parameters.tcp_rates[6 - 1 - 3] = 8.0f * (1 << (3 << 1)) = 512 // Layer 3
* i = 2, parameters.tcp_rates[6 - 1 - 2] = 8.0f * (1 << (2 << 1)) = 128 // Layer 2
* i = 1, parameters.tcp_rates[6 - 1 - 1] = 8.0f * (1 << (1 << 1)) = 32 // Layer 1
* i = 0, parameters.tcp_rates[6 - 1 - 0] = 8.0f * (1 << (0 << 1)) = 8 // Layer 0 (highest quality)
*
*/
static void set_tcp_rates(opj_cparameters_t* parameters, U32 num_layers = 1, F32 last_tcp_rate = LAST_TCP_RATE)
{
parameters->tcp_numlayers = num_layers;
for (int i = num_layers - 1; i >= 0; i--)
{
parameters->tcp_rates[num_layers - 1 - i] = last_tcp_rate * static_cast<F32>(1 << (i << 1));
}
}
class JPEG2KDecode : public JPEG2KBase
{
public:
@ -430,15 +491,16 @@ public:
opj_set_default_encoder_parameters(&parameters);
parameters.cod_format = OPJ_CODEC_J2K;
parameters.cp_disto_alloc = 1;
parameters.prog_order = OPJ_RLCP; // should be the default, but, just in case
parameters.cp_disto_alloc = 1; // enable rate allocation by distortion
parameters.max_cs_size = 0; // do not cap max size because we're using tcp_rates and also irrelevant with lossless.
if (reversible)
{
parameters.max_cs_size = 0; // do not limit size for reversible compression
parameters.irreversible = 0; // should be the default, but, just in case
parameters.tcp_numlayers = 1;
/* documentation seems to be wrong, should be 0.0f for lossless, not 1.0f
see https://github.com/uclouvain/openjpeg/blob/39e8c50a2f9bdcf36810ee3d41bcbf1cc78968ae/src/lib/openjp2/j2k.c#L7755
see https://github.com/uclouvain/openjpeg/blob/e7453e398b110891778d8da19209792c69ca7169/src/lib/openjp2/j2k.c#L7817
*/
parameters.tcp_rates[0] = 0.0f;
}
@ -493,53 +555,22 @@ public:
encoder = opj_create_compress(OPJ_CODEC_J2K);
parameters.tcp_mct = (image->numcomps >= 3) ? 1 : 0;
parameters.cod_format = OPJ_CODEC_J2K;
parameters.prog_order = OPJ_RLCP;
parameters.cp_disto_alloc = 1;
parameters.tcp_mct = (image->numcomps >= 3) ? 1 : 0; // no color transform for RGBA images
// if not lossless compression, computes tcp_numlayers and max_cs_size depending on the image dimensions
if( parameters.irreversible ) {
if( parameters.irreversible )
{
// computes a number of layers
U32 surface = rawImageIn.getWidth() * rawImageIn.getHeight();
U32 nb_layers = 1;
U32 s = 64*64;
while (surface > s)
{
nb_layers++;
s *= 4;
}
nb_layers = llclamp(nb_layers, 1, 6);
parameters.tcp_numlayers = nb_layers;
parameters.tcp_rates[nb_layers - 1] = (U32)(1.f / DEFAULT_COMPRESSION_RATE); // 1:8 by default
// gets the necessary number of layers
U32 nb_layers = estimate_num_layers(surface);
// for each subsequent layer, computes its rate and adds surface * numcomps * 1/rate to the max_cs_size
U32 max_cs_size = (U32)(surface * image->numcomps * DEFAULT_COMPRESSION_RATE);
U32 multiplier;
for (int i = nb_layers - 2; i >= 0; i--)
{
if( i == nb_layers - 2 )
{
multiplier = 15;
}
else if( i == nb_layers - 3 )
{
multiplier = 4;
}
else
{
multiplier = 2;
}
parameters.tcp_rates[i] = parameters.tcp_rates[i + 1] * multiplier;
max_cs_size += (U32)(surface * image->numcomps * (1 / parameters.tcp_rates[i]));
}
// fills parameters.tcp_rates and updates parameters.tcp_numlayers
set_tcp_rates(&parameters, nb_layers, LAST_TCP_RATE);
//ensure that we have at least a minimal size
max_cs_size = llmax(max_cs_size, (U32)FIRST_PACKET_SIZE);
parameters.max_cs_size = max_cs_size;
}
if (!opj_setup_encoder(encoder, &parameters, image))
@ -579,7 +610,7 @@ public:
opj_stream_destroy(stream);
}
stream = opj_stream_create(data_size_guess, false);
stream = opj_stream_create(data_size_guess, OPJ_FALSE);
if (!stream)
{
return false;
@ -620,17 +651,15 @@ public:
void setImage(const LLImageRaw& raw)
{
opj_image_cmptparm_t cmptparm[MAX_ENCODED_DISCARD_LEVELS];
memset(&cmptparm[0], 0, MAX_ENCODED_DISCARD_LEVELS * sizeof(opj_image_cmptparm_t));
S32 numcomps = raw.getComponents();
S32 width = raw.getWidth();
S32 height = raw.getHeight();
S32 width = raw.getWidth();
S32 height = raw.getHeight();
std::vector<opj_image_cmptparm_t> cmptparm(numcomps);
for (S32 c = 0; c < numcomps; c++)
{
cmptparm[c].prec = 8;
cmptparm[c].bpp = 8;
cmptparm[c].prec = 8; // replaces .bpp
cmptparm[c].sgnd = 0;
cmptparm[c].dx = parameters.subsampling_dx;
cmptparm[c].dy = parameters.subsampling_dy;
@ -638,7 +667,7 @@ public:
cmptparm[c].h = height;
}
image = opj_image_create(numcomps, &cmptparm[0], OPJ_CLRSPC_SRGB);
image = opj_image_create(numcomps, cmptparm.data(), OPJ_CLRSPC_SRGB);
image->x1 = width;
image->y1 = height;
@ -650,7 +679,7 @@ public:
{
for (S32 x = 0; x < width; x++)
{
const U8 *pixel = src_datap + (y*width + x) * numcomps;
const U8 *pixel = src_datap + (y * width + x) * numcomps;
for (S32 c = 0; c < numcomps; c++)
{
image->comps[c].data[i] = *pixel;

View File

@ -29,6 +29,8 @@
#include "llimagej2c.h"
const F32 LAST_TCP_RATE = 1.f/DEFAULT_COMPRESSION_RATE; // should be 8, giving a 1:8 ratio
class LLImageJ2COJ : public LLImageJ2CImpl
{
public: