fix(ocr): Improve DVB subtitle OCR quality (fixes #243)

This commit addresses Issue #243 where DVB subtitles from Spanish
broadcasts were producing corrupt/garbled OCR output like
"alajentiegaranual dep jemios" instead of "a la entrega anual de premios".

Root cause analysis:
1. Image preprocessing was degrading quality - pixContrastNorm was
   causing issues for some DVB sources
2. Default quantization mode (ocr_quantmode=1) was too aggressive,
   reducing images to just 3 colors which lost important detail

Changes:
- Remove pixContrastNorm calls from ocr.c (both main OCR and color
  detection passes) - these were causing more harm than good
- Change default ocr_quantmode from 1 to 0 (no quantization) in both
  C code (ccx_common_option.c) and Rust code (options.rs)
- Add NULL checks in dvbsub_close_decoder() and telxcc_close() for
  safety
- Add proper cleanup of codec_private_data pointers in lib_ccx.c and
  ts_info.c to prevent double-free crashes

Testing performed:
- Test 21 (English DVB): Completes in ~1 second with good OCR quality
- Test 239 (DVB timing): All 8 subtitles have correct timing
- Spanish DVB (Issue #243): Now produces readable text like
  "¡Bienvenidos a la entrega anual de premios" instead of garbage

Users can still use --quant 1 to restore the old quantization behavior
if needed.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Carlos
2025-12-15 11:47:03 +01:00
parent b728ddadfa
commit 7e1a01447a
7 changed files with 47 additions and 24 deletions

View File

@@ -73,7 +73,7 @@ void init_options(struct ccx_s_options *options)
options->ocrlang = NULL; // By default, autodetect .traineddata file
options->ocr_oem = -1; // By default, OEM mode depends on the tesseract version
options->psm = 3; // Default PSM mode (3 is the default tesseract as well)
options->ocr_quantmode = 1; // CCExtractor's internal
options->ocr_quantmode = 0; // No quantization (better OCR accuracy for DVB subtitles)
options->mkvlang = NULL; // By default, all the languages are extracted
options->ignore_pts_jumps = 1;
options->analyze_video_stream = 0;

View File

@@ -528,9 +528,14 @@ void *dvbsub_init_decoder(struct dvb_config *cfg, int initialized_ocr)
}
int dvbsub_close_decoder(void **dvb_ctx)
{
DVBSubContext *ctx = (DVBSubContext *)*dvb_ctx;
DVBSubContext *ctx;
DVBSubRegionDisplay *display;
if (!dvb_ctx || !*dvb_ctx)
return 0;
ctx = (DVBSubContext *)*dvb_ctx;
delete_regions(ctx);
delete_objects(ctx);

View File

@@ -224,6 +224,7 @@ void dinit_libraries(struct lib_ccx_ctx **ctx)
list_for_each_entry_safe(dec_ctx, dec_ctx1, &lctx->dec_ctx_head, list, struct lib_cc_decode)
{
LLONG cfts;
void *saved_private_data = dec_ctx->private_data; // Save before close NULLs it
if (dec_ctx->codec == CCX_CODEC_DVB)
dvbsub_close_decoder(&dec_ctx->private_data);
// Test memory for teletext
@@ -232,6 +233,18 @@ void dinit_libraries(struct lib_ccx_ctx **ctx)
else if (dec_ctx->codec == CCX_CODEC_ISDB_CC)
delete_isdb_decoder(&dec_ctx->private_data);
// Also NULL out any cinfo entries that shared this private_data pointer
// to prevent double-free in dinit_cap
if (saved_private_data && lctx->demux_ctx)
{
struct cap_info *cinfo_iter;
list_for_each_entry(cinfo_iter, &lctx->demux_ctx->cinfo_tree.all_stream, all_stream, struct cap_info)
{
if (cinfo_iter->codec_private_data == saved_private_data)
cinfo_iter->codec_private_data = NULL;
}
}
flush_cc_decode(dec_ctx, &dec_ctx->dec_sub);
cfts = get_fts(dec_ctx->timing, dec_ctx->current_field);
enc_ctx = get_encoder_by_pn(lctx, dec_ctx->program_number);

View File

@@ -387,17 +387,8 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
if (cpix_gs != NULL)
pixInvert(cpix_gs, cpix_gs);
// Apply contrast enhancement to improve OCR accuracy
// This stretches the histogram to use the full range, improving character recognition
if (cpix_gs != NULL)
{
PIX *enhanced = pixContrastNorm(NULL, cpix_gs, 100, 100, 55, 1, 1);
if (enhanced != NULL)
{
pixDestroy(&cpix_gs);
cpix_gs = enhanced;
}
}
// Note: Upscaling was removed - testing showed it degrades OCR quality for DVB subtitles
// The original bitmap quality (e.g., 520x84) is sufficient for Tesseract
if (cpix_gs == NULL)
tess_ret = -1;
@@ -455,12 +446,8 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
goto skip_color_detection;
}
pixInvert(color_pix_processed, color_pix_processed);
PIX *color_pix_enhanced = pixContrastNorm(NULL, color_pix_processed, 100, 100, 55, 1, 1);
if (color_pix_enhanced != NULL)
{
pixDestroy(&color_pix_processed);
color_pix_processed = color_pix_enhanced;
}
// Note: Upscaling removed from color detection pass as well
TessBaseAPISetImage2(ctx->api, color_pix_processed);
tess_ret = TessBaseAPIRecognize(ctx->api, NULL);

View File

@@ -1567,7 +1567,12 @@ void telxcc_update_gt(void *codec, uint32_t global_timestamp)
// Close output
void telxcc_close(void **ctx, struct cc_subtitle *sub)
{
struct TeletextCtx *ttext = *ctx;
struct TeletextCtx *ttext;
if (!ctx || !*ctx)
return;
ttext = *ctx;
if (!ttext)
return;

View File

@@ -206,15 +206,17 @@ int update_capinfo(struct ccx_demuxer *ctx, int pid, enum ccx_stream_type stream
if (codec != CCX_CODEC_NONE)
{
tmp->codec = codec;
tmp->codec_private_data = init_private_data(codec);
// Use provided private_data if available, otherwise create new one
if (private_data)
tmp->codec_private_data = private_data;
else
tmp->codec_private_data = init_private_data(codec);
}
tmp->saw_pesstart = 0;
tmp->capbuflen = 0;
tmp->capbufsize = 0;
tmp->ignore = 0;
if (private_data)
tmp->codec_private_data = private_data;
}
return CCX_OK;
}
@@ -269,6 +271,17 @@ void dinit_cap(struct ccx_demuxer *ctx)
iter = list_entry(ctx->cinfo_tree.all_stream.next, struct cap_info, all_stream);
list_del(&iter->all_stream);
freep(&iter->capbuf);
// Free codec-specific private data to prevent memory leaks
// The pointer may have been NULLed by dinit_libraries if it was shared
if (iter->codec_private_data)
{
if (iter->codec == CCX_CODEC_DVB)
dvbsub_close_decoder(&iter->codec_private_data);
else if (iter->codec == CCX_CODEC_TELETEXT)
telxcc_close(&iter->codec_private_data, NULL);
else
free(iter->codec_private_data);
}
free(iter);
}
INIT_LIST_HEAD(&ctx->cinfo_tree.all_stream);

View File

@@ -578,7 +578,7 @@ impl Default for Options {
ocrlang: Default::default(),
ocr_oem: -1,
psm: 3,
ocr_quantmode: 1,
ocr_quantmode: 0, // No quantization - better OCR accuracy for DVB subtitles
mkvlang: Default::default(),
analyze_video_stream: Default::default(),
hardsubx_ocr_mode: Default::default(),