Compare commits

...

182 Commits

Author SHA1 Message Date
Carlos Fernandez Sanz
270c89b7f8 [FEATURE]: Add Snap packaging support with Github workflow 2026-01-31 17:52:06 -08:00
Carlos Fernandez Sanz
032cd1c6b1 Merge pull request #2040 from THE-Amrit-mahto-05/fix/avc-sei-payload-size
Fix SEI payload type handling: changes payload_type and payload_size from i32 to u32 for type safety, keeping as usize casts only where needed for indexing.
2026-01-31 17:35:40 -08:00
Carlos Fernandez Sanz
42e4e9a657 Merge pull request #2049 from THE-Amrit-mahto-05/fix-null-len-guard
Adds defensive null pointer and negative length checks to ccxr_verify_crc32 FFI function to prevent undefined behavior.
2026-01-31 17:18:31 -08:00
Carlos Fernandez Sanz
821e307333 Merge pull request #2076 from THE-Amrit-mahto-05/fix-miri-null-deref
Verified with Miri - fixes undefined behavior when calling dealloc() on null pointer in window row deallocation.
2026-01-31 13:58:48 -08:00
Amrit kumar Mahto
ae81f3ba3d Fix Miri-reported UB in window row deallocation and tests 2026-01-31 00:49:50 +05:30
Carlos Fernandez Sanz
b190751b2c [FIX]macOS: Fix hardsub pipeline failing due to arm64/x86_64 build mismatch 2026-01-28 18:30:38 -08:00
GAURAV KARMAKAR
f1bb0f4dce macOS: Fix hardsub pipeline failing due to arm64/x86_64 build mismatch 2026-01-29 00:12:09 +05:30
Amrit kumar Mahto
f147ac27f8 re running for CI to pass checks 2026-01-27 21:03:19 +05:30
Amrit kumar Mahto
2dfb44d7d4 re running CI 2026-01-27 20:42:53 +05:30
Carlos Fernandez Sanz
580e721dfe fix: prevent heap overflow in parse_PAT/parse_PMT and null deref in processmp4 2026-01-23 23:06:35 -08:00
Carlos Fernandez
d0a82447ff fix(rust): resolve clippy unnecessary_unwrap warnings for Rust 1.93
Use if-let patterns instead of is_some() + unwrap() to satisfy
the stricter clippy::unnecessary_unwrap lint in Rust 1.93.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 20:58:03 -08:00
Carlos Fernandez
5c19c7b932 style: fix Rust formatting in parser.rs test
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 20:14:26 -08:00
Carlos Fernandez
fd7271bae2 fix: prevent heap overflow in parse_PAT/parse_PMT and null deref in processmp4
- parse_PAT: Add bounds check for payload_length >= 8 before accessing
  header fields (fixes #2053)
- parse_PMT: Add ES_info_length validation and 2-byte minimum check
  before reading descriptor_tag and desc_len in PRIVATE_USER_MPEG2
  and teletext parsing loops (fixes #2054)
- processmp4: Add NULL check for file parameter before passing to
  mprint (fixes #2055)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 20:12:09 -08:00
Chandragupt Singh
05c68349d5 Merge branch 'master' into feat/snap-distribution-support 2026-01-23 15:26:59 +05:30
Chandragupt Singh
09f21f64e4 fix(snap): resolve GPAC dependency and runtime issues in core22 snap 2026-01-23 15:23:33 +05:30
Carlos Fernandez Sanz
c65fb0874e fix(rust): correct mkvlang test to use MkvLangFilter type 2026-01-19 07:43:15 -08:00
Carlos Fernandez
9db727d593 fix(rust): correct mkvlang test to use MkvLangFilter type
The test_mkvlang_sets_mkv_language test was comparing against
Language::Eng, but the mkvlang field type was changed to MkvLangFilter
when BCP 47 language tag support was added in PR #2038.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-19 07:41:36 -08:00
Amrit kumar Mahto
fe6dad83b7 use u32 for SEI payload type and size 2026-01-19 14:16:50 +05:30
Carlos Fernandez Sanz
d494286082 ci: add workflow to build .deb packages 2026-01-18 20:37:22 -08:00
Carlos Fernandez
259e881483 fix(ci): add missing FFmpeg dependencies to hardsubx .deb packages
Add libavdevice, libswresample, and libavfilter dependencies for
the hardsubx variant on both Ubuntu 24.04 and Debian 13 workflows.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 20:11:10 -08:00
Carlos Fernandez
197069d3b8 ci: add Debian 13 (Trixie) .deb build workflow
Creates .deb packages for Debian 13 using a Docker container.
- Builds GPAC from source (abi-16.4 tag)
- Creates basic and hardsubx variants
- Uses Debian 13's library versions:
  - libtesseract5, libleptonica6
  - libavcodec61, libavformat61, libavutil59, libswscale8

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 20:02:16 -08:00
Carlos Fernandez
7a810d736d fix(ci): add libcurl3t64-gnutls dependency to .deb package
CCExtractor is linked against libcurl-gnutls which requires this
runtime dependency on Ubuntu 24.04.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 19:55:09 -08:00
Carlos Fernandez
1413c948c4 fix(ci): correct leptonica package name for Ubuntu 24.04
Ubuntu 24.04 uses liblept5, not libleptonica6 (which is Ubuntu 25.04).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 19:39:42 -08:00
Carlos Fernandez
bb5385913b fix(ci): use apt install to handle .deb dependencies in test step
apt install automatically resolves and installs dependencies,
unlike dpkg -i which fails if dependencies are missing.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 19:13:41 -08:00
Carlos Fernandez Sanz
f8981e8e1e refactor(rust): Rename parser tests with descriptive names and expand coverage 2026-01-18 19:12:34 -08:00
Carlos Fernandez
a1871abf04 fix(ci): switch .deb build to Ubuntu 24.04
- Use ubuntu-24.04 runner instead of ubuntu-22.04
- Update dependencies to match Ubuntu 24.04 library versions
  (libtesseract5, libleptonica6, libavcodec60, etc.)
- Update GPAC cache key for new Ubuntu version

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 19:08:09 -08:00
Carlos Fernandez
20b3773bb9 fix(ci): correct version and add missing dependencies in .deb workflow
- Update CMakeLists.txt version from 0.89 to 0.96 to match lib_ccx.h
- Extract version from lib_ccx.h instead of CMakeLists.txt for accuracy
- Add missing runtime dependencies: libtesseract, libleptonica
- Add FFmpeg dependencies for hardsubx variant

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 19:02:48 -08:00
Carlos Fernandez
8786b4cf75 fix(ci): correct LICENSE filename to LICENSE.txt 2026-01-18 18:08:04 -08:00
Carlos Fernandez
8632ecda5b ci: add workflow to build .deb packages
Add GitHub Actions workflow to build Debian packages (.deb) for Linux.

Features:
- Builds GPAC from source (abi-16.4 tag) since libgpac-dev is not
  available in newer Debian/Ubuntu releases
- Creates two variants: basic (with OCR) and hardsubx (with FFmpeg)
- Bundles GPAC library with the package using patchelf for rpath
- Includes proper Debian package structure with control, postinst, postrm
- Runs on releases, manual trigger, or workflow file changes
- Uploads packages as artifacts and attaches to releases

This provides an unofficial .deb package for users who prefer that
format over AppImage or snap.

Relates to #1610

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 18:00:45 -08:00
Carlos Fernandez Sanz
475153a9dd fix(build): resolve Rust-to-C linking issues on Linux 2026-01-18 17:39:27 -08:00
Carlos Fernandez
df90009f73 ci: add CMakeLists.txt to workflow path filters
Build workflows were not triggering on CMakeLists.txt changes.
Added **CMakeLists.txt and **.cmake patterns to path filters for:
- build_linux.yml
- build_mac.yml
- build_windows.yml
- build_docker.yml

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 17:23:34 -08:00
Carlos Fernandez
2352ea21e3 fix(build): resolve Rust-to-C linking issues on Linux
Two fixes for static library linking:

1. Preserve CMAKE_C_FLAGS in lib_ccx/CMakeLists.txt instead of
   overwriting them. This allows passing include paths via
   -DCMAKE_C_FLAGS which is needed for some build configurations.

2. Add target_link_options with --undefined flags for C functions
   called from Rust (decode_vbi, do_cb, store_hdcc). With static
   libraries, the linker processes them in order and only pulls
   symbols that are currently unresolved. Since ccx is processed
   before ccx_rust, these symbols weren't being pulled in.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 17:11:44 -08:00
Carlos Fernandez Sanz
dc041a35e8 fix(rust): Support BCP 47 language tags in --mkvlang option 2026-01-18 16:33:39 -08:00
Carlos Fernandez Sanz
e99ba1d177 fix(rust): Remove dead code returning pointer to stack variable 2026-01-18 14:11:39 -08:00
Carlos Fernandez
298665faa4 chore: fix cargo fmt formatting
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 13:57:53 -08:00
Carlos Fernandez
735a01bf04 refactor(rust): rename parser tests with descriptive names and expand coverage
Replace poorly-named tests (options_1 through options_51, broken_1, etc.)
with 201 descriptively-named tests organized by category:

- Input/output format tests
- Encoding tests
- Stream/program selection tests
- CEA-708 service tests
- Codec selection tests
- Timing option tests
- Debug flag tests
- Teletext option tests
- XMLTV option tests
- Credits option tests
- Buffering option tests
- And more

Each test name now clearly indicates what CLI option is being tested
and what behavior is expected, e.g.:
- test_input_ts_sets_transport_stream_mode
- test_608_enables_decoder_608_debug
- test_service_enables_708_with_single_service

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 13:55:56 -08:00
Amrit kumar Mahto
3618c23b5a rust/avc: fix SEI payload size handling and type correctness 2026-01-19 03:09:07 +05:30
Carlos Fernandez Sanz
b7c9da75dd Revert "Automatic extraction of multiple DVB subtitle streams (--split-dvb-subs) fixes#447 #1864"
Was incorrectly merged
2026-01-18 13:37:53 -08:00
Carlos Fernandez Sanz
449d55d5e5 Revert "Automatic extraction of multiple DVB subtitle streams (--split-dvb-subs) fixes#447 #1864" 2026-01-18 13:37:26 -08:00
Carlos Fernandez Sanz
60aa370899 fix(rust): Correct version number in CLI parser 2026-01-18 13:35:25 -08:00
Carlos Fernandez
3d18b38c32 Revert "Merge pull request #1912 from Rahul-2k4/final"
This reverts commit 2a6d27f9ff, reversing
changes made to 74e64c0421.
2026-01-18 13:28:15 -08:00
Carlos Fernandez Sanz
2a6d27f9ff Merge pull request #1912 from Rahul-2k4/final
Automatic extraction of multiple DVB subtitle streams (--split-dvb-subs) fixes#447 #1864
2026-01-18 13:27:17 -08:00
Carlos Fernandez
91d3512bcc fix(rust): Support BCP 47 language tags in --mkvlang option
The --mkvlang option previously only supported single ISO 639-2 codes
due to using a Language enum with a fixed list of variants. Extended
codes (like "fre-ca") and multiple codes (like "eng,chi") would panic.

This change introduces MkvLangFilter, a proper type for language
filtering that:

- Validates language codes per BCP 47 specification
- Supports ISO 639-2 (3-letter codes like "eng")
- Supports BCP 47 tags (like "en-US", "zh-Hans-CN")
- Supports comma-separated multiple codes
- Provides clean error messages for invalid input
- Includes comprehensive unit tests

The C code continues to receive the raw string for strstr() matching,
maintaining backward compatibility.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 13:23:39 -08:00
Carlos Fernandez Sanz
74e64c0421 Merge pull request #2035 from THE-Amrit-mahto-05/fix/mkvlang-params-check
fix mkvlang_params_check: prevent panic on multi-byte characters
2026-01-18 13:07:44 -08:00
Carlos Fernandez
c175750ebe fix(rust): Correct version number in CLI parser
The Rust CLI parser was showing "CCExtractor 1.0" instead of the
actual version (0.96.5). This was a placeholder value from when
the parser was first ported to Rust in August 2024 that was never
updated.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 12:55:21 -08:00
Carlos Fernandez Sanz
e7dc4d19f7 Merge pull request #2036 from THE-Amrit-mahto-05/fix/process-word-file-safely
fix: process_word_file propagates errors instead of panicking
2026-01-18 12:51:33 -08:00
Carlos Fernandez Sanz
1fbb51056d Merge pull request #1992 from THE-Amrit-mahto-05/fix/teletext-panic
fix: Teletext decoder panic on malformed BCD data
2026-01-18 12:46:56 -08:00
Carlos Fernandez Sanz
5d9a8cc6f2 Merge pull request #2031 from THE-Amrit-mahto-05/fix/rust-userdata-uaf
Fix use-after-free bugs in Rust userdata handling
2026-01-18 12:24:10 -08:00
Amrit kumar Mahto
17abad79f2 fix: process_word_file propagates errors instead of panicking 2026-01-19 01:53:19 +05:30
Amrit kumar Mahto
707e1f01fe updating 2026-01-19 01:34:41 +05:30
Amrit kumar Mahto
efc8b791e7 fix mkvlang_params_check: prevent panic on multi-byte characters 2026-01-19 01:28:25 +05:30
Carlos Fernandez Sanz
a856bbde10 Merge pull request #2015 from Harsh-Sahu43/tests/validate-cc-pair
[FIX] rust: add defensive length check to validate_cc_pair
2026-01-18 11:52:49 -08:00
Carlos Fernandez Sanz
9390b876fa Merge pull request #2034 from THE-Amrit-mahto-05/fix/parser-atol-bug
Fix atol Parsing Bug in parser.rs for Numeric Values and Suffixes
2026-01-18 11:38:53 -08:00
Amrit kumar Mahto
ead0a4beed little fix 2026-01-19 00:45:30 +05:30
Amrit kumar Mahto
b2e9cb74c1 Fix atol parsing bug for numeric values and K/M/G suffixes 2026-01-19 00:31:25 +05:30
Amrit kumar Mahto
20b194aac4 Consolidate Rust userdata fixes: UAF, bounds checks, and VBI safety 2026-01-18 23:34:43 +05:30
Harsh Sahu
2d9b480972 Merge branch 'CCExtractor:master' into tests/validate-cc-pair 2026-01-18 14:48:46 +05:30
Harsh Sahu
1447b021cb Fixed : formatting 2026-01-18 13:58:31 +05:30
Amrit kumar Mahto
e0ac126cff Fix use-after-free bugs in Rust userdata handling 2026-01-18 05:37:44 +05:30
Carlos Fernandez Sanz
b8019bdb35 [FIX] Resolve output artifact on Linux/WSL (line clearing) 2026-01-17 06:02:59 -08:00
Carlos Fernandez Sanz
9d921dec43 fix(matroska): prevent out-of-bounds NAL parsing in AVC/HEVC blocks 2026-01-17 06:00:12 -08:00
Carlos Fernandez Sanz
3ada2b5002 fix(avc): prevent segfault in report-only mode (-out=report) 2026-01-17 05:58:03 -08:00
Rahul Tripathi
50ec9866db style: Fix clang-format ternary operator alignment 2026-01-17 14:12:59 +05:30
Rahul Tripathi
ce87d01fbd fix: Cap DVB subtitle duration to 10s to prevent 65s page timeout bug
Root cause: When FTS timestamps were invalid due to PTS discontinuities,
the code fell back to DVB page timeout (65 seconds) as subtitle duration.
This caused impossible 65-second subtitle durations in split output.

Fix: Added DVB_MAX_SUBTITLE_DURATION_MS constant (10s) and simplified the
duration capping logic to always enforce reasonable subtitle durations.

Tested with: multiprogram_spain.ts, BBC1.ts, BBC2.ts - all outputs now
have properly capped durations with no timestamps exceeding 10 seconds.
2026-01-17 12:14:12 +05:30
Carlos Fernandez
fecd24d08e fix(avc): prevent segfault in report-only mode (-out=report)
When using -out=report mode, the encoder context (enc_ctx) is NULL
because no output file needs to be created. The Rust FFI function
ccxr_process_avc was dereferencing this NULL pointer, causing a
segmentation fault.

Add NULL pointer checks at the FFI boundary to skip AVC processing
when enc_ctx is NULL. This is safe because report mode only needs
stream analysis, not caption extraction.

Fixes #2023

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-16 20:50:42 -08:00
Rahul Tripathi
482544c5bf docs: Add DVB deduplication feature and double-free fix to CHANGES.TXT 2026-01-16 16:41:46 +05:30
Rahul Tripathi
84a7a1fb41 style: Fix remaining clang-format indentation issues 2026-01-16 16:34:26 +05:30
Rahul Tripathi
f198bcd2ec style: Fix clang-format issues across modified files 2026-01-16 16:31:09 +05:30
Rahul Tripathi
4b6016ca1c style: Fix clang-format issues in dvb_dedup files 2026-01-16 16:26:28 +05:30
Rahul Tripathi
9c2ea47eda fix: Add dvb_dedup.c to Windows and Mac build systems 2026-01-16 16:24:52 +05:30
Rahul Tripathi
170b466a20 fix: Add dvb_dedup.c to autoconf build for GitHub Actions Linux CI 2026-01-16 16:23:43 +05:30
Rahul Tripathi
2bdcd20115 cleanup: Remove temporary debug, test, and tool artifacts from final branch
Remove 186 unwanted files including:
- Debug logs and diagnostic output (debug_*.log, debug_output/, diagnosis_output/)
- Test artifacts and binaries (linux/alltests_*, test_output/, test_split_verification/)
- Tool state files (.agent/, .claude/, .ralph/, .mcp.json, etc.)
- Root-level scripts and temporary Python utilities
- Working notes and temporary documentation (DVB_SPLIT_*.md, progress.json, etc.)
- Unfinished MCP server (tools/mcp-ccextractor/)
- Project-specific working notes (CLAUDE.md)

Update .gitignore to prevent re-adding unwanted artifacts.

Result: final branch now contains only DVB-split feature implementation
and core project files, matching upstream structure while preserving
all functional changes.
2026-01-16 16:18:02 +05:30
Rahul Tripathi
ab18d234d2 Merge branch 'CCExtractor:master' into final 2026-01-16 16:05:36 +05:30
Rahul Tripathi
3ff02617b0 fix: Resolve double-free crash in DVB split pipeline cleanup
- Remove redundant free() after free_subtitle() in pipeline cleanup
  (free_subtitle already frees the struct via freep(&sub))
- Add ctx->prev = NULL after free_encoder_context in dinit_encoder
- Keep free_encoder_context non-recursive for prev (dinit_encoder owns it)
- Remove debug output from general_loop.c
2026-01-16 16:02:59 +05:30
Rahul Tripathi
c7fad95e24 test: Fix DVB dedup test suite - DVB-005 and DVB-007 corrections
- DVB-005: Changed from Teletext-only file to proper DVB extraction using --program-number 530
- DVB-007: Fixed shell script globbing error and variable parsing for dedup effectiveness check
- All test cases now pass: DVB-004 (multilingual split), DVB-005 (single program), DVB-006 (non-DVB), DVB-007 (dedup check), DVB-008 (no-dedup flag)
- Verified: No 0-byte files, deduplication removes 19-29 duplicate lines per stream
2026-01-16 15:05:35 +05:30
Rahul Tripathi
c018f1f43c docs: Mark DVB-004 through DVB-008 as complete
- All deduplication infrastructure implemented and tested
- Test script validates code paths execute correctly
- Dedup ring buffer integrated into all DVB subtitle processing
- Full validation requires OCR build (-DWITH_OCR=ON)
- Code review confirms all 8 stories are complete
2026-01-16 14:15:44 +05:30
Rahul Tripathi
98b50b2a35 test: Add DVB dedup test suite script
- Created dvb_dedup_test.sh to test DVB-001 through DVB-008
- Tests multilingual split, single stream, non-DVB files
- Tests --no-dvb-dedup flag functionality
- Checks for excessive duplication in output
- Note: Requires OCR (Tesseract) for full validation
- Without OCR, files are empty but dedup logic still executes
2026-01-16 14:15:03 +05:30
Rahul Tripathi
46cee0893a feat: DVB-003 - Add --no-dvb-dedup CLI flag
- Added no_dvb_dedup field to ccx_s_options structure
- Initialized to 0 (deduplication enabled by default)
- Added --no-dvb-dedup CLI flag in Rust args parser
- Added flag to Options struct in lib_ccxr
- Wired flag through Rust-to-C FFI boundary in common.rs
- Modified dvbsub_handle_display_segment to respect flag
- Dedup logic only runs when no_dvb_dedup is false (default)
- Added help text describing flag purpose
2026-01-16 14:11:13 +05:30
Rahul Tripathi
42ad48ca7f feat: DVB-001 - Add per-stream dedup ring buffer
- Created dvb_dedup.h with dedup_entry and dedup_ring structures
- Implemented dvb_dedup.c with init, is_duplicate, and add functions
- Integrated dedup_ring into DVBSubContext structure
- Added deduplication check in dvbsub_handle_display_segment
- Dedup uses PTS + PID + composition_id + ancillary_id as unique key
- 8-slot ring buffer to track recently emitted subtitles
- Prevents duplicate subtitles from propagating to output files
2026-01-16 14:04:00 +05:30
Akhilesh
ed26a595bd style(matroska): apply clang-format 2026-01-14 13:42:22 +05:30
Akhilesh
b1c2aabb22 fix(matroska): prevent out-of-bounds NAL parsing in AVC/HEVC blocks 2026-01-14 13:20:23 +05:30
Rahul Tripathi
bb2ae1e70f Fix DVB subtitle repetition bug and memory safety issues 2026-01-13 20:29:44 +05:30
Rahul Tripathi
6464fa486e Fix DVB Split: Remove forced dirty flag, rely on natural dirty + clear 2026-01-13 18:16:41 +05:30
Rahul Tripathi
5aa747ab33 Fix DVB Split bugs: Prevent subtitle repetition and buffer overflow crash 2026-01-13 17:53:30 +05:30
Rahul Tripathi
39adfa59b0 Fix Bug 1: Clear OCR text leakage preventing subtitle repetition
- Clear enc_ctx->prev->last_str after encode_sub() in dvb_subtitle_decoder.c
- This prevents OCR-recognized text from leaking into subsequent subtitles
- Tested: All subtitle output shows unique text with zero duplicates
2026-01-12 11:00:27 +05:30
Carlos Fernandez Sanz
20287548cb fix: Correct progress time display for multi-program TS files 2026-01-11 20:56:59 +01:00
collectnis
b7b10419ec style: fix formatting alignment 2026-01-11 13:46:00 +00:00
collectnis
8fbfd68426 style: fix formatting alignment 2026-01-11 13:31:55 +00:00
collectnis
7159d0b6d0 fix: resolve merge conflict in changelog 2026-01-11 11:48:58 +00:00
collectnis
c515578e37 docs: update changelog 2026-01-11 11:30:54 +00:00
collectnis
e55b8eb764 [CLI] Fix output artifacts on Linux/WSL by clearing line on \r 2026-01-11 10:34:16 +00:00
Carlos Fernandez Sanz
0228fbcbfa fix: Skip moov box if buffer too small to verify mvhd 2026-01-11 10:30:32 +01:00
Carlos Fernandez Sanz
0e190e0962 docs: Add changelog for 0.96.6 2026-01-11 10:29:57 +01:00
Carlos Fernandez
13f1b5ab53 docs: Add changelog for 0.96.6
🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 10:28:56 +01:00
Carlos Fernandez Sanz
b39f923c46 docs: Clarify PS probe limit calculation (explain magic number) 2026-01-11 08:55:17 +01:00
Harsh Sahu
7e32d6a553 Merge branch 'CCExtractor:master' into tests/validate-cc-pair 2026-01-11 04:51:33 +05:30
Carlos Fernandez
3bde3dceec fix: Skip moov box if buffer too small to verify mvhd
The previous fix (#1996) prevented a panic when the buffer was too small
to verify if a "moov" box contains "mvhd", but it incorrectly accepted
the box without verification.

The original intent was: "moov without mvhd is invalid, skip it."

This fix maintains that intent:
- If buffer too small to verify mvhd → skip the box
- If moov has mvhd → accept (valid)
- If moov lacks mvhd → skip (invalid)

This is safe for format detection since:
1. The probe reads up to 1MB of start bytes
2. The scoring system requires multiple valid boxes
3. Skipping an unverifiable box is safer than accepting it

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 00:13:11 +01:00
Carlos Fernandez
d5201b1129 docs: Clarify PS probe limit calculation with inline comment
Replace magic number 49997 with `50000 - 3` and add a comment explaining:
- Why we subtract 3 (the loop accesses i+3, so we stop 3 bytes early)
- Why we cap at 50000 (don't scan huge buffers entirely)
- Why we use saturating_sub (handle tiny buffers safely)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 00:07:35 +01:00
Carlos Fernandez Sanz
a199f4f8af Merge pull request #1996 from THE-Amrit-mahto-05/fix/demuxer-panics
fix prevent MP4 & PS demuxer panics due to out-of-bounds/underflow
2026-01-11 00:06:35 +01:00
Harsh Sahu
eea049923d add defensive length check to validate_cc_pair 2026-01-11 04:21:00 +05:30
Carlos Fernandez Sanz
d999c3e0e0 Merge pull request #1985 from x15sr71/docs/homebrew-install
docs: Add Homebrew installation instructions to COMPILATION.MD
2026-01-10 23:43:42 +01:00
Carlos Fernandez
aac90d5a5f fix(rust): Remove dead code returning pointer to stack variable
Delete the unused `impl FromCType<*mut PMT_entry> for *mut PMTEntry`
implementation which had a critical bug: it returned a pointer to a
stack-allocated PMTEntry, causing undefined behavior (dangling pointer).

This code was never called anywhere in the codebase. The actual usage
in demuxer.rs uses the value-returning variant `FromCType<PMT_entry>
for PMTEntry` with explicit `Box::into_raw(Box::new(...))` wrapping,
which is the correct pattern.

Rather than fixing dead buggy code, just remove it.

Supersedes #1988

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 23:41:32 +01:00
Carlos Fernandez Sanz
618df184c6 Merge pull request #2011 from THE-Amrit-mahto-05/fix/demuxer-allocator-mismatch
Fix allocator mismatch in Rust demuxer (use malloc/free instead of Box)
2026-01-10 23:21:16 +01:00
Chandragupt
5e6aab8972 fix(snap): drop snap-injected command argument in runtime wrapper 2026-01-11 01:10:29 +05:30
Amrit kumar Mahto
a77c21c06c fix: allocator mismatch in demuxer (use malloc/free instead of Box) 2026-01-11 00:49:17 +05:30
Carlos Fernandez Sanz
4252703431 fix(matroska): Prevent infinite loop on truncated MKV files 2026-01-10 13:16:12 +01:00
Carlos Fernandez Sanz
1af2a29a3c fix: Prevent NULL pointer dereference in DVB subtitle decoder 2026-01-10 11:18:56 +01:00
Carlos Fernandez Sanz
8ab474c593 fix: Remove debug println that printed spurious numbers during processing 2026-01-10 11:18:20 +01:00
Carlos Fernandez
1c781c2a38 fix: Correct progress time display for multi-program TS files
Multi-program transport stream files can have different PCR (Program
Clock Reference) bases for each program. For example, one program might
have timestamps starting at 23 hours, another at 25 hours. This caused
the progress time display to show wildly incorrect values like "265:45"
for a 6-second file.

The fix tracks the minimum timestamp offset seen across all programs and
uses that as the baseline. When timestamps from programs with higher PCR
bases are encountered (offset > 60 seconds from minimum), the display
falls back to showing time relative to the minimum baseline.

Changes:
- Add min_global_timestamp_offset field to lib_ccx_ctx to track the
  minimum PCR-based offset seen
- Update progress display logic in general_loop.c to normalize times
  relative to the minimum offset
- Apply same fix to both live stream and file processing modes

Test results with multi-program DVB teletext sample (dvbt.ts):
- Before: 1% | 265:45, 2% | 00:00, 3% | 263:11, ... (jumping wildly)
- After:  1% | 00:00, 2% | 00:00, ... 87% | 00:05, 100% | 00:00 (stable)

Single-program files continue to work correctly.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 10:57:57 +01:00
Carlos Fernandez
4d718378d5 fix: Remove debug println that printed spurious numbers during processing
Removes a debug println statement in the Rust timestamp conversion code
that was printing the hours value when it exceeded 24. This caused
spurious numbers (like "25") to appear in the output when processing
files with PTS timestamps that exceeded 24 hours.

The debug code was likely left over from development/debugging and
should not be present in production code.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 10:50:33 +01:00
Carlos Fernandez
1bd4cd5c0a fix: Prevent NULL pointer dereference in DVB subtitle decoder
Add NULL check for `region` before accessing `region->bgcolor` in
the OCR processing block of `write_dvb_sub()`.

The bug occurs when processing DVB subtitles where `get_region()`
returns NULL for all display items in the list. After the display
processing loop, `region` may be NULL, but the code attempted to
access `region->bgcolor` unconditionally, causing a segfault.

The crash manifested as:
- Valgrind: "Invalid read of size 4 at address 0x18"
- The 0x18 offset corresponds to the `bgcolor` field in DVBSubRegion

Testing with bbc_small.ts:
- Before: SIGSEGV crash at 0% processing
- After: 100% processing, 50+ subtitles extracted successfully

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 10:20:50 +01:00
Carlos Fernandez
067045ce92 fix(matroska): Prevent infinite loop on truncated MKV files
When parsing truncated MKV files, the Matroska parser would enter an
infinite loop. This happened because:

1. At EOF, fgetc() returns -1 which becomes 0xFF when cast to UBYTE
2. Reading 4 EOF bytes creates element code 0xFFFFFFFF (unknown element)
3. The "skip unknown element" logic reads another 0xFF as vint length (127)
4. FSEEK past EOF clears the EOF flag without error
5. The while loop condition (pos + len > get_current_byte) never becomes
   false because the recorded segment length is larger than the file

The fix adds feof() checks after each mkv_read_byte() call in all
parsing loops. This detects EOF immediately after reading and breaks
out of the loop cleanly.

Tested with truncated MKV samples (ticket1398-orig.mkv, azumi.mkv)
that previously caused timeouts - now complete in under a second.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 09:50:21 +01:00
Carlos Fernandez Sanz
2f2904041c prevent unsafe Vec::set_len causing heap corruption 2026-01-09 23:45:34 +01:00
Carlos Fernandez Sanz
d837c369e5 fix prevent FFI memory leaks in demuxer sync 2026-01-09 23:44:52 +01:00
Carlos Fernandez Sanz
686ff69fdc Docs: clarify Linux autotools build and Rust dependency 2026-01-09 23:43:10 +01:00
Carlos Fernandez Sanz
126835d998 Merge pull request #1850 from gaurav02081/gaurav-v1
[FIX] -out=spupng with EIA608/teletext: offset values in XML may be not correct #893
2026-01-09 23:25:58 +01:00
Akhilesh
6e170cd812 Docs: clarify Linux autotools build and Rust dependency 2026-01-09 21:02:18 +05:30
Rahul Tripathi
fe921626e1 Fix: Off-by-one bounds check and encoding corruption
- telxcc.c: Use array_length macro for G0_LATIN_NATIONAL_SUBSETS
  bounds check instead of hardcoded value. Prevents potential
  access to uninitialized memory when index equals array size.
- misc.h: Fix UTF-8 encoding of author name (Iñaki García Etxebarria)
2026-01-09 16:02:10 +05:30
Amrit kumar Mahto
6578f0ff34 fix(avc): prevent unsafe Vec::set_len causing heap corruption 2026-01-09 05:15:57 +05:30
Amrit kumar Mahto
1911068e92 fix(rust): prevent FFI memory leaks in demuxer sync 2026-01-08 14:46:56 +05:30
Chandragupt
493495361d ci(snap): use stable GitHub Actions v6 and make runtime library resolution robust 2026-01-08 09:24:25 +05:30
Chandragupt
643857e98f docs: add changelog entry for Snap packaging 2026-01-08 06:09:33 +05:30
Chandragupt
05adb5f47e snap: add website and source-code metadata 2026-01-08 06:08:29 +05:30
Chandragupt
504877b928 ci(snap): remove temporary push trigger 2026-01-08 06:08:29 +05:30
Chandragupt
64ee63a560 ci(snap): enable push trigger for snap workflow (temporary) 2026-01-08 06:08:00 +05:30
Chandragupt
270c603bd2 ci(snap): add GitHub Actions workflow for Snapcraft-based builds 2026-01-08 06:06:13 +05:30
dependabot[bot]
6d356b4458 chore(deps): bump dawidd6/action-homebrew-bump-formula from 4 to 7 (#1989)
Bumps [dawidd6/action-homebrew-bump-formula](https://github.com/dawidd6/action-homebrew-bump-formula) from 4 to 7.
- [Release notes](https://github.com/dawidd6/action-homebrew-bump-formula/releases)
- [Commits](https://github.com/dawidd6/action-homebrew-bump-formula/compare/v4...v7)

---
updated-dependencies:
- dependency-name: dawidd6/action-homebrew-bump-formula
  dependency-version: '7'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-01-08 01:24:47 +01:00
Carlos Fernandez Sanz
cfb10d4b91 fix: Delete empty output files instead of leaving 0-byte files (#1282) (#1877)
When using --output-field both (formerly -12), CCExtractor creates
separate output files for each field. If one field has no captions,
a 0-byte file was left behind, which is confusing for users.

This fix checks the file size in dinit_write() before closing.
If the file is empty (0 bytes), it deletes the file and prints
an informational message.

This is a simpler approach than deferred file creation - files are
still created at initialization but cleaned up if they remain empty.

Fixes #1282

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-08 01:23:28 +01:00
Amrit kumar Mahto
ca2b708023 fix: prevent MP4 & PS demuxer panics due to out-of-bounds/underflow (#1995) 2026-01-08 02:36:30 +05:30
Amrit kumar Mahto
10ac5ca6ce add safety checks and comments in Teletext decoder 2026-01-08 01:42:09 +05:30
Amrit kumar Mahto
333cfb3726 fix: Teletext decoder panic on malformed BCD data (#1990) 2026-01-08 01:26:17 +05:30
GAURAV KARMAKAR
c609f66c02 Removed Build Artifact 2026-01-08 01:03:54 +05:30
Gaurav karmakar
91f254017b Merge branch 'master' into gaurav-v1 2026-01-08 00:47:22 +05:30
GAURAV KARMAKAR
1f5d3df0ae Merge branch 'master' of https://github.com/gaurav02081/ccextractor into gaurav-v1 2026-01-08 00:35:33 +05:30
Rahul Tripathi
e36d81c237 Git Cleanup: Update .gitignore and untrack build artifacts 2026-01-07 21:38:36 +05:30
Rahul Tripathi
8d338dc362 Fix DVB subtitle repeating bug: initialize nb_data 2026-01-07 21:37:23 +05:30
Rahul Tripathi
c78e01d186 Merge branch 'CCExtractor:master' into final 2026-01-06 12:31:17 +05:30
Chandragupt Singh
401ff6c105 docs: note Homebrew availability in changelog 2026-01-06 06:04:57 +05:30
Chandragupt Singh
83eb51ed6f docs: add Homebrew installation instructions 2026-01-06 06:01:56 +05:30
Carlos Fernandez
bce0c92fdd ci: Add Homebrew formula auto-bump workflow
Automatically creates a PR to homebrew-core when a new release
is published, updating the ccextractor formula to the new version.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-06 00:08:40 +01:00
Rahul Tripathi
ea4859fd54 Fix: Add split_dvb_subs to Options default 2026-01-05 21:39:54 +05:30
Rahul Tripathi
8d7890c743 Merge branch 'master' into final 2026-01-05 21:10:51 +05:30
Rahul Tripathi
29158b2c38 Merge branch 'master' into final 2026-01-02 14:18:45 +05:30
Rahul-2k4
1589c31774 fix: Revert credits text deep-copy to fix CI startcredits regressions 2025-12-31 15:23:55 +05:30
Rahul-2k4
c96d3ff3f1 fix(encoder): Deep copy start/end credits text to prevent memory corruption
The start_credits_text and end_credits_text pointers were being copied
directly from the encoder config options, but free_encoder_context()
would later free them. This caused memory corruption when the pointers
referred to memory owned by ccx_options.

Now these strings are deep-copied in init_encoder() so each encoder
context owns its own copy, fixing the --startcreditstext regression.
2025-12-31 14:18:29 +05:30
Rahul-2k4
598a48e260 style: Apply clang-format to pass CI formatting check 2025-12-31 12:45:56 +05:30
Rahul-2k4
0cc3626261 ci: Trigger workflow run 2025-12-31 12:18:27 +05:30
Rahul-2k4
e0e66bd0ba style: Apply clang-format and update CHANGES.TXT
- Run clang-format on all source files to fix CI formatting check
- Add Issue #447 DVB multi-stream feature to CHANGES.TXT
2025-12-31 12:08:56 +05:30
Rahul-2k4
2642ca8805 Merge upstream/master into final branch
Resolves conflicts while preserving Issue #447 fix for DVB multi-stream handling:
- Kept DVB metadata update logic in ts_tables.c for split mode
- Adapted to upstream's single-param dvbsub_init_decoder signature
- Updated lib_ccx.c and general_loop.c to match new API
2025-12-31 11:42:08 +05:30
Rahul-2k4
a108302dc0 fix(dvb): Reinitialize decoder after PAT change for continuous extraction
After PAT changes, the pipeline's decoder was NULLed out to prevent
crashes, but this caused all subsequent DVB data to be skipped.

Now the decoder is reinitialized when detected as NULL, allowing
subtitle extraction to continue across PAT changes.
2025-12-31 11:19:56 +05:30
Rahul-2k4
ce90b61923 fix(dvb): Add NULL checks to prevent crash after PAT change
Fixes segmentation fault at 99% when PAT changes occur during DVB
subtitle processing. The crash happened because decoder context
private_data was freed but still accessed.

Changes:
- Add NULL check in process_data() before dvbsub_decode call
- Add defensive NULL check at start of dvbsub_decode()
- Add defensive NULL check at start of write_dvb_sub()
- Deep copy DVB bitmap data in copy_subtitle() to avoid aliasing
- Safe DVBSubContext copy that doesn't alias linked list pointers
- Clean up pipeline decoder refs in dinit_cap() after PAT change
- Direct FTS calculation for DVB-only streams

Tested with 11GB TS file with 23 PAT changes - no crash.
2025-12-31 10:44:00 +05:30
Rahul-2k4
18566f2213 fix(dvb): Improve multi-stream DVB subtitle handling for Issue #447
- Replace spin-lock with proper mutex (CRITICAL_SECTION/pthread_mutex)
- Add per-pipeline OCR contexts for thread safety
- Include PID in output filenames to handle duplicate languages
- Add dvbsub_get_context_size() and dvbsub_copy_context() for state management
- Improve language code validation (ISO 639-2 compliant)
- Change fatal error to warning for oversized PES packets
- Better language lookup from potential_streams before cinfo fallback
- Reset potential_stream data in demuxer cleanup
2025-12-30 21:58:40 +05:30
Rahul-2k4
117c2fce69 fix(dvb): Apply 3 code review fixes for Issue #447
- Fix escaped newline in debug print (dvb_subtitle_decoder.c:1861)
- Replace hardcoded PID 0x106 with 0 in debug calls (lines 1822, 1835)
- Accept uppercase letters in language code validation (ts_tables.c:396)
2025-12-28 11:06:31 +05:30
Rahul-2k4
ffd6a34c30 Fix Windows CI: change PlatformToolset from v145 to v143 for VS 2022 2025-12-28 10:34:46 +05:30
Rahul-2k4
70af627078 Fix syntax errors in lib_ccx.c: add missing ocr.h include and fix brace structure 2025-12-28 10:32:08 +05:30
Rahul-2k4
b0a5c069ed style: fix clang-format issues for Linux CI compatibility 2025-12-28 10:22:44 +05:30
Rahul-2k4
53ee63894c style: apply clang-format to fix CI formatting check 2025-12-28 10:12:40 +05:30
Rahul-2k4
50ece42e0a style: apply clang-format and normalize line endings to all source files 2025-12-28 00:47:25 +05:30
Rahul-2k4
3d00e718f6 style: normalize line endings and apply clang-format 2025-12-28 00:26:17 +05:30
Rahul-2k4
86e5d47141 style: apply clang-format to all source files 2025-12-28 00:14:16 +05:30
Rahul-2k4
5b36356456 style: apply clang-format fixes 2025-12-28 00:04:26 +05:30
Rahul-2k4
ba04aedae1 fix: add missing set_pipeline_pts and dump_rect_and_log functions 2025-12-27 23:58:26 +05:30
Rahul-2k4
5001df0d6c fix(rust): add missing lang field to cap_info initializer 2025-12-27 23:56:26 +05:30
Rahul-2k4
28506fee7b Add lang member to struct cap_info for DVB split mode 2025-12-27 23:49:29 +05:30
Rahul-2k4
47d8aaddb9 Merge upstream/master into final: Resolve conflicts in option structs (kept both split_dvb_subs and scc_framerate) 2025-12-27 23:34:40 +05:30
Rahul-2k4
1b2254f911 Fix DVB split output: include core logic handling and memory safety fixes 2025-12-27 23:27:36 +05:30
Rahul-2k4
dc34b26afb Fix DVB split output: handle empty PBUS and missing OCR init (Issue #447) 2025-12-27 23:21:08 +05:30
Rahul-2k4
d3602ec938 Fix: Defensive handling of invalid caption_field in DVB subtitle timing (fixes #447) 2025-12-27 12:48:28 +05:30
Rahul-2k4
f9b5e081a7 Remove duplicate comment in parser.rs 2025-12-27 11:46:24 +05:30
Rahul-2k4
bdc3eaa81b Fix: update Rust parser to allow text based formats for DVB split 2025-12-27 10:16:36 +05:30
Rahul-2k4
43d5ba2f34 Improve error message for incompatible OutputFormat in Rust parser 2025-12-27 02:03:51 +05:30
Rahul-2k4
557774b202 Apply code style fixes from clang-format 2025-12-27 01:59:48 +05:30
Rahul-2k4
4e0472bddf Fix DVB split critical bugs: per-pipeline state separation and timing sync 2025-12-27 01:56:12 +05:30
Rahul-2k4
9a2fe6221e Switch platform toolset from v145 to v143 for GitHub Actions compatibility 2025-12-27 01:12:40 +05:30
Rahul Tripathi
182b23a283 Merge branch 'CCExtractor:master' into final 2025-12-27 00:13:39 +05:30
Rahul-2k4
77f3fd35f4 Fix #447: Resolve DVB split mode crash and routing logic
- Fixed NULL pointer dereference in dvb_subtitle_decoder.c (sub->prev check).
- Corrected logic in dvbsub_handle_display_segment to prevent dropped subtitles.
- Implemented robust encoder context swapping in general_loop.c for DVB streams.
- Added regression test: tests/regression/dvb_split.txt.
- Verified 100% completion in split mode and correct Teletext/DVB routing.
2025-12-27 00:11:09 +05:30
Rahul-2k4
6642973c63 CLI + option plumbing for --split-dvb-subs 2025-12-26 14:43:36 +05:30
GAURAV KARMAKAR
e42bc2b9f9 fixed the merged conflict in the ccx_encoders_common.h 2025-12-24 02:25:53 +05:30
Gaurav karmakar
bf9841a255 Merge branch 'master' into gaurav-v1 2025-12-24 01:55:53 +05:30
GAURAV KARMAKAR
6ed09ea397 SPUPNG: fix formatting to match clang-format 2025-12-22 13:22:25 +05:30
GAURAV KARMAKAR
2b708c4a31 Enhance SPUPNG offset calculations and XML tag handling in EIA608 encoder
- Introduced a forward declaration for .
- Updated  to calculate and set image dimensions before writing XML tags.
- Adjusted offset calculations based on screen size for better alignment of subtitles.
- Improved handling of the opening XML tag based on subtitle data presence.
2025-12-21 19:20:28 +05:30
GAURAV KARMAKAR
609a53f373 [BUG] -out=spupng with EIA608/teletext: offset values in XML may be not correct #893 2025-12-19 13:27:08 +05:30
49 changed files with 3088 additions and 899 deletions

283
.github/workflows/build_deb.yml vendored Normal file
View File

@@ -0,0 +1,283 @@
name: Build Linux .deb Package
on:
# Build on releases
release:
types: [published]
# Allow manual trigger
workflow_dispatch:
inputs:
build_type:
description: 'Build type (all, basic, hardsubx)'
required: false
default: 'all'
# Build on pushes to workflow file for testing
push:
paths:
- '.github/workflows/build_deb.yml'
jobs:
build-deb:
runs-on: ubuntu-24.04
strategy:
fail-fast: false
matrix:
build_type: [basic, hardsubx]
steps:
- name: Check if should build this variant
id: should_build
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
INPUT_TYPE="${{ github.event.inputs.build_type }}"
if [ "$INPUT_TYPE" = "all" ] || [ "$INPUT_TYPE" = "${{ matrix.build_type }}" ]; then
echo "should_build=true" >> $GITHUB_OUTPUT
else
echo "should_build=false" >> $GITHUB_OUTPUT
fi
else
echo "should_build=true" >> $GITHUB_OUTPUT
fi
- name: Checkout repository
if: steps.should_build.outputs.should_build == 'true'
uses: actions/checkout@v6
- name: Get version
if: steps.should_build.outputs.should_build == 'true'
id: version
run: |
# Extract version from source or use tag
if [ "${{ github.event_name }}" = "release" ]; then
VERSION="${{ github.event.release.tag_name }}"
VERSION="${VERSION#v}" # Remove 'v' prefix if present
else
# Extract version from lib_ccx.h (e.g., #define VERSION "0.96.5")
VERSION=$(grep -oP '#define VERSION "\K[^"]+' src/lib_ccx/lib_ccx.h || echo "0.96")
fi
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Building version: $VERSION"
- name: Install base dependencies
if: steps.should_build.outputs.should_build == 'true'
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
build-essential \
cmake \
pkg-config \
zlib1g-dev \
libpng-dev \
libjpeg-dev \
libfreetype-dev \
libxml2-dev \
libcurl4-gnutls-dev \
libssl-dev \
clang \
libclang-dev \
tesseract-ocr \
libtesseract-dev \
libleptonica-dev \
patchelf
- name: Install FFmpeg dependencies (HardSubX)
if: steps.should_build.outputs.should_build == 'true' && matrix.build_type == 'hardsubx'
run: |
sudo apt-get install -y --no-install-recommends \
libavcodec-dev \
libavformat-dev \
libavutil-dev \
libswscale-dev \
libswresample-dev \
libavfilter-dev \
libavdevice-dev
- name: Install Rust toolchain
if: steps.should_build.outputs.should_build == 'true'
uses: dtolnay/rust-toolchain@stable
- name: Cache GPAC build
if: steps.should_build.outputs.should_build == 'true'
id: cache-gpac
uses: actions/cache@v5
with:
path: ~/gpac-install
key: gpac-abi-16.4-ubuntu24-deb
- name: Build GPAC
if: steps.should_build.outputs.should_build == 'true' && steps.cache-gpac.outputs.cache-hit != 'true'
run: |
git clone -b abi-16.4 --depth 1 https://github.com/gpac/gpac
cd gpac
./configure --prefix=/usr
make -j$(nproc)
make DESTDIR=$HOME/gpac-install install-lib
- name: Install GPAC to system
if: steps.should_build.outputs.should_build == 'true'
run: |
sudo cp -r $HOME/gpac-install/usr/lib/* /usr/lib/
sudo cp -r $HOME/gpac-install/usr/include/* /usr/include/
sudo ldconfig
- name: Build CCExtractor
if: steps.should_build.outputs.should_build == 'true'
run: |
mkdir build && cd build
if [ "${{ matrix.build_type }}" = "hardsubx" ]; then
cmake ../src -DCMAKE_BUILD_TYPE=Release -DWITH_OCR=ON -DWITH_HARDSUBX=ON
else
cmake ../src -DCMAKE_BUILD_TYPE=Release -DWITH_OCR=ON
fi
make -j$(nproc)
- name: Test build
if: steps.should_build.outputs.should_build == 'true'
run: ./build/ccextractor --version
- name: Create .deb package structure
if: steps.should_build.outputs.should_build == 'true'
run: |
VERSION="${{ steps.version.outputs.version }}"
VARIANT="${{ matrix.build_type }}"
if [ "$VARIANT" = "basic" ]; then
PKG_NAME="ccextractor_${VERSION}_amd64"
else
PKG_NAME="ccextractor-${VARIANT}_${VERSION}_amd64"
fi
mkdir -p ${PKG_NAME}/DEBIAN
mkdir -p ${PKG_NAME}/usr/bin
mkdir -p ${PKG_NAME}/usr/lib/ccextractor
mkdir -p ${PKG_NAME}/usr/share/doc/ccextractor
mkdir -p ${PKG_NAME}/usr/share/man/man1
# Copy binary
cp build/ccextractor ${PKG_NAME}/usr/bin/
# Copy GPAC library
cp $HOME/gpac-install/usr/lib/libgpac.so* ${PKG_NAME}/usr/lib/ccextractor/
# Set rpath so ccextractor finds bundled libgpac
patchelf --set-rpath '/usr/lib/ccextractor:$ORIGIN/../lib/ccextractor' ${PKG_NAME}/usr/bin/ccextractor
# Copy documentation
cp docs/CHANGES.TXT ${PKG_NAME}/usr/share/doc/ccextractor/changelog
cp LICENSE.txt ${PKG_NAME}/usr/share/doc/ccextractor/copyright
gzip -9 -n ${PKG_NAME}/usr/share/doc/ccextractor/changelog
# Generate man page
help2man --no-info --name="closed captions and teletext subtitle extractor" \
./build/ccextractor > ${PKG_NAME}/usr/share/man/man1/ccextractor.1 2>/dev/null || true
if [ -f ${PKG_NAME}/usr/share/man/man1/ccextractor.1 ]; then
gzip -9 -n ${PKG_NAME}/usr/share/man/man1/ccextractor.1
fi
# Create control file
if [ "$VARIANT" = "basic" ]; then
PKG_DESCRIPTION="CCExtractor - closed captions and teletext subtitle extractor"
else
PKG_DESCRIPTION="CCExtractor (with HardSubX) - closed captions and teletext subtitle extractor"
fi
INSTALLED_SIZE=$(du -sk ${PKG_NAME}/usr | cut -f1)
# Determine dependencies based on build variant (Ubuntu 24.04)
if [ "$VARIANT" = "hardsubx" ]; then
DEPENDS="libc6, libtesseract5, liblept5, libcurl3t64-gnutls, libavcodec60, libavformat60, libavutil58, libswscale7, libavdevice60, libswresample4, libavfilter9"
else
DEPENDS="libc6, libtesseract5, liblept5, libcurl3t64-gnutls"
fi
cat > ${PKG_NAME}/DEBIAN/control << CTRL
Package: ccextractor
Version: ${VERSION}
Section: utils
Priority: optional
Architecture: amd64
Installed-Size: ${INSTALLED_SIZE}
Depends: ${DEPENDS}
Maintainer: CCExtractor Development Team <carlos@ccextractor.org>
Homepage: https://www.ccextractor.org
Description: ${PKG_DESCRIPTION}
CCExtractor is a tool that extracts closed captions and teletext subtitles
from video files and streams. It supports a wide variety of input formats
including MPEG, H.264/AVC, H.265/HEVC, MP4, MKV, WTV, and transport streams.
.
This package includes a bundled GPAC library for MP4 support.
CTRL
# Remove leading spaces from control file
sed -i 's/^ //' ${PKG_NAME}/DEBIAN/control
# Create postinst to update library cache
cat > ${PKG_NAME}/DEBIAN/postinst << 'POSTINST'
#!/bin/sh
set -e
ldconfig
POSTINST
chmod 755 ${PKG_NAME}/DEBIAN/postinst
# Create postrm to update library cache
cat > ${PKG_NAME}/DEBIAN/postrm << 'POSTRM'
#!/bin/sh
set -e
ldconfig
POSTRM
chmod 755 ${PKG_NAME}/DEBIAN/postrm
# Set permissions
chmod 755 ${PKG_NAME}/usr/bin/ccextractor
chmod 755 ${PKG_NAME}/usr/lib/ccextractor
find ${PKG_NAME}/usr/lib/ccextractor -name "*.so*" -exec chmod 644 {} \;
# Build the .deb
dpkg-deb --build --root-owner-group ${PKG_NAME}
echo "deb_name=${PKG_NAME}.deb" >> $GITHUB_OUTPUT
- name: Test .deb package
if: steps.should_build.outputs.should_build == 'true'
run: |
VERSION="${{ steps.version.outputs.version }}"
VARIANT="${{ matrix.build_type }}"
if [ "$VARIANT" = "basic" ]; then
PKG_NAME="ccextractor_${VERSION}_amd64"
else
PKG_NAME="ccextractor-${VARIANT}_${VERSION}_amd64"
fi
# Install and test (apt handles dependencies automatically)
sudo apt-get update
sudo apt-get install -y ./${PKG_NAME}.deb
ccextractor --version
- name: Get .deb filename
if: steps.should_build.outputs.should_build == 'true'
id: deb_name
run: |
VERSION="${{ steps.version.outputs.version }}"
VARIANT="${{ matrix.build_type }}"
if [ "$VARIANT" = "basic" ]; then
echo "name=ccextractor_${VERSION}_amd64.deb" >> $GITHUB_OUTPUT
else
echo "name=ccextractor-${VARIANT}_${VERSION}_amd64.deb" >> $GITHUB_OUTPUT
fi
- name: Upload .deb artifact
if: steps.should_build.outputs.should_build == 'true'
uses: actions/upload-artifact@v6
with:
name: ${{ steps.deb_name.outputs.name }}
path: ${{ steps.deb_name.outputs.name }}
- name: Upload to Release
if: steps.should_build.outputs.should_build == 'true' && github.event_name == 'release'
uses: softprops/action-gh-release@v2
with:
files: ${{ steps.deb_name.outputs.name }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

275
.github/workflows/build_deb_debian13.yml vendored Normal file
View File

@@ -0,0 +1,275 @@
name: Build Debian 13 .deb Package
on:
# Build on releases
release:
types: [published]
# Allow manual trigger
workflow_dispatch:
inputs:
build_type:
description: 'Build type (all, basic, hardsubx)'
required: false
default: 'all'
# Build on pushes to workflow file for testing
push:
paths:
- '.github/workflows/build_deb_debian13.yml'
jobs:
build-deb:
runs-on: ubuntu-latest
container:
image: debian:trixie
strategy:
fail-fast: false
matrix:
build_type: [basic, hardsubx]
steps:
- name: Check if should build this variant
id: should_build
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
INPUT_TYPE="${{ github.event.inputs.build_type }}"
if [ "$INPUT_TYPE" = "all" ] || [ "$INPUT_TYPE" = "${{ matrix.build_type }}" ]; then
echo "should_build=true" >> $GITHUB_OUTPUT
else
echo "should_build=false" >> $GITHUB_OUTPUT
fi
else
echo "should_build=true" >> $GITHUB_OUTPUT
fi
- name: Install git and dependencies for checkout
if: steps.should_build.outputs.should_build == 'true'
run: |
apt-get update
apt-get install -y git ca-certificates
- name: Checkout repository
if: steps.should_build.outputs.should_build == 'true'
uses: actions/checkout@v6
- name: Get version
if: steps.should_build.outputs.should_build == 'true'
id: version
run: |
# Extract version from source or use tag
if [ "${{ github.event_name }}" = "release" ]; then
VERSION="${{ github.event.release.tag_name }}"
VERSION="${VERSION#v}" # Remove 'v' prefix if present
else
# Extract version from lib_ccx.h (e.g., #define VERSION "0.96.5")
VERSION=$(grep -oP '#define VERSION "\K[^"]+' src/lib_ccx/lib_ccx.h || echo "0.96")
fi
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Building version: $VERSION"
- name: Install base dependencies
if: steps.should_build.outputs.should_build == 'true'
run: |
apt-get install -y --no-install-recommends \
build-essential \
cmake \
pkg-config \
zlib1g-dev \
libpng-dev \
libjpeg-dev \
libfreetype-dev \
libxml2-dev \
libcurl4-gnutls-dev \
libssl-dev \
clang \
libclang-dev \
tesseract-ocr \
libtesseract-dev \
libleptonica-dev \
patchelf \
curl
- name: Install FFmpeg dependencies (HardSubX)
if: steps.should_build.outputs.should_build == 'true' && matrix.build_type == 'hardsubx'
run: |
apt-get install -y --no-install-recommends \
libavcodec-dev \
libavformat-dev \
libavutil-dev \
libswscale-dev \
libswresample-dev \
libavfilter-dev \
libavdevice-dev
- name: Install Rust toolchain
if: steps.should_build.outputs.should_build == 'true'
run: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- name: Build GPAC
if: steps.should_build.outputs.should_build == 'true'
run: |
git clone -b abi-16.4 --depth 1 https://github.com/gpac/gpac
cd gpac
./configure --prefix=/usr
make -j$(nproc)
make install-lib
ldconfig
- name: Build CCExtractor
if: steps.should_build.outputs.should_build == 'true'
run: |
export PATH="$HOME/.cargo/bin:$PATH"
mkdir build && cd build
if [ "${{ matrix.build_type }}" = "hardsubx" ]; then
cmake ../src -DCMAKE_BUILD_TYPE=Release -DWITH_OCR=ON -DWITH_HARDSUBX=ON
else
cmake ../src -DCMAKE_BUILD_TYPE=Release -DWITH_OCR=ON
fi
make -j$(nproc)
- name: Test build
if: steps.should_build.outputs.should_build == 'true'
run: ./build/ccextractor --version
- name: Create .deb package structure
if: steps.should_build.outputs.should_build == 'true'
id: create_deb
run: |
VERSION="${{ steps.version.outputs.version }}"
VARIANT="${{ matrix.build_type }}"
if [ "$VARIANT" = "basic" ]; then
PKG_NAME="ccextractor_${VERSION}_debian13_amd64"
else
PKG_NAME="ccextractor-${VARIANT}_${VERSION}_debian13_amd64"
fi
mkdir -p ${PKG_NAME}/DEBIAN
mkdir -p ${PKG_NAME}/usr/bin
mkdir -p ${PKG_NAME}/usr/lib/ccextractor
mkdir -p ${PKG_NAME}/usr/share/doc/ccextractor
mkdir -p ${PKG_NAME}/usr/share/man/man1
# Copy binary
cp build/ccextractor ${PKG_NAME}/usr/bin/
# Copy GPAC library
cp /usr/lib/libgpac.so* ${PKG_NAME}/usr/lib/ccextractor/
# Set rpath so ccextractor finds bundled libgpac
patchelf --set-rpath '/usr/lib/ccextractor:$ORIGIN/../lib/ccextractor' ${PKG_NAME}/usr/bin/ccextractor
# Copy documentation
cp docs/CHANGES.TXT ${PKG_NAME}/usr/share/doc/ccextractor/changelog
cp LICENSE.txt ${PKG_NAME}/usr/share/doc/ccextractor/copyright
gzip -9 -n ${PKG_NAME}/usr/share/doc/ccextractor/changelog
# Create control file
if [ "$VARIANT" = "basic" ]; then
PKG_DESCRIPTION="CCExtractor - closed captions and teletext subtitle extractor"
else
PKG_DESCRIPTION="CCExtractor (with HardSubX) - closed captions and teletext subtitle extractor"
fi
INSTALLED_SIZE=$(du -sk ${PKG_NAME}/usr | cut -f1)
# Determine dependencies based on build variant (Debian 13 Trixie)
if [ "$VARIANT" = "hardsubx" ]; then
DEPENDS="libc6, libtesseract5, libleptonica6, libcurl3t64-gnutls, libavcodec61, libavformat61, libavutil59, libswscale8, libavdevice61, libswresample5, libavfilter10"
else
DEPENDS="libc6, libtesseract5, libleptonica6, libcurl3t64-gnutls"
fi
cat > ${PKG_NAME}/DEBIAN/control << CTRL
Package: ccextractor
Version: ${VERSION}
Section: utils
Priority: optional
Architecture: amd64
Installed-Size: ${INSTALLED_SIZE}
Depends: ${DEPENDS}
Maintainer: CCExtractor Development Team <carlos@ccextractor.org>
Homepage: https://www.ccextractor.org
Description: ${PKG_DESCRIPTION}
CCExtractor is a tool that extracts closed captions and teletext subtitles
from video files and streams. It supports a wide variety of input formats
including MPEG, H.264/AVC, H.265/HEVC, MP4, MKV, WTV, and transport streams.
.
This package includes a bundled GPAC library for MP4 support.
Built for Debian 13 (Trixie).
CTRL
# Remove leading spaces from control file
sed -i 's/^ //' ${PKG_NAME}/DEBIAN/control
# Create postinst to update library cache
cat > ${PKG_NAME}/DEBIAN/postinst << 'POSTINST'
#!/bin/sh
set -e
ldconfig
POSTINST
chmod 755 ${PKG_NAME}/DEBIAN/postinst
# Create postrm to update library cache
cat > ${PKG_NAME}/DEBIAN/postrm << 'POSTRM'
#!/bin/sh
set -e
ldconfig
POSTRM
chmod 755 ${PKG_NAME}/DEBIAN/postrm
# Set permissions
chmod 755 ${PKG_NAME}/usr/bin/ccextractor
chmod 755 ${PKG_NAME}/usr/lib/ccextractor
find ${PKG_NAME}/usr/lib/ccextractor -name "*.so*" -exec chmod 644 {} \;
# Build the .deb
dpkg-deb --build --root-owner-group ${PKG_NAME}
echo "deb_name=${PKG_NAME}.deb" >> $GITHUB_OUTPUT
- name: Test .deb package
if: steps.should_build.outputs.should_build == 'true'
run: |
VERSION="${{ steps.version.outputs.version }}"
VARIANT="${{ matrix.build_type }}"
if [ "$VARIANT" = "basic" ]; then
PKG_NAME="ccextractor_${VERSION}_debian13_amd64"
else
PKG_NAME="ccextractor-${VARIANT}_${VERSION}_debian13_amd64"
fi
# Install and test (apt handles dependencies automatically)
apt-get update
apt-get install -y ./${PKG_NAME}.deb
ccextractor --version
- name: Get .deb filename
if: steps.should_build.outputs.should_build == 'true'
id: deb_name
run: |
VERSION="${{ steps.version.outputs.version }}"
VARIANT="${{ matrix.build_type }}"
if [ "$VARIANT" = "basic" ]; then
echo "name=ccextractor_${VERSION}_debian13_amd64.deb" >> $GITHUB_OUTPUT
else
echo "name=ccextractor-${VARIANT}_${VERSION}_debian13_amd64.deb" >> $GITHUB_OUTPUT
fi
- name: Upload .deb artifact
if: steps.should_build.outputs.should_build == 'true'
uses: actions/upload-artifact@v6
with:
name: ${{ steps.deb_name.outputs.name }}
path: ${{ steps.deb_name.outputs.name }}
- name: Upload to Release
if: steps.should_build.outputs.should_build == 'true' && github.event_name == 'release'
uses: softprops/action-gh-release@v2
with:
files: ${{ steps.deb_name.outputs.name }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -8,6 +8,8 @@ on:
- 'docker/**'
- '**.c'
- '**.h'
- '**CMakeLists.txt'
- '**.cmake'
- 'src/rust/**'
pull_request:
types: [opened, synchronize, reopened]
@@ -16,6 +18,8 @@ on:
- 'docker/**'
- '**.c'
- '**.h'
- '**CMakeLists.txt'
- '**.cmake'
- 'src/rust/**'
jobs:

View File

@@ -7,6 +7,8 @@ on:
- '.github/workflows/build_linux.yml'
- '**.c'
- '**.h'
- '**CMakeLists.txt'
- '**.cmake'
- '**Makefile**'
- 'linux/**'
- 'package_creators/**'
@@ -17,6 +19,8 @@ on:
- '.github/workflows/build_linux.yml'
- '**.c'
- '**.h'
- '**CMakeLists.txt'
- '**.cmake'
- '**Makefile**'
- 'linux/**'
- 'package_creators/**'

View File

@@ -7,6 +7,8 @@ on:
- '.github/workflows/build_mac.yml'
- '**.c'
- '**.h'
- '**CMakeLists.txt'
- '**.cmake'
- '**Makefile**'
- 'mac/**'
- 'package_creators/**'
@@ -17,6 +19,8 @@ on:
- '.github/workflows/build_mac.yml'
- '**.c'
- '**.h'
- '**CMakeLists.txt'
- '**.cmake'
- '**Makefile**'
- 'mac/**'
- 'package_creators/**'

51
.github/workflows/build_snap.yml vendored Normal file
View File

@@ -0,0 +1,51 @@
name: Build CCExtractor Snap
on:
workflow_dispatch:
release:
types: [published]
jobs:
build_snap:
name: Build Snap package
runs-on: ubuntu-22.04
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Install snapd
run: |
sudo apt update
sudo apt install -y snapd
- name: Start snapd
run: |
sudo systemctl start snapd.socket
sudo systemctl start snapd
- name: Install Snapcraft
run: |
sudo snap install core22
sudo snap install snapcraft --classic
- name: Show Snapcraft version
run: snapcraft --version
- name: Build snap
run: sudo snapcraft --destructive-mode
- name: List generated snap
run: ls -lh *.snap
- name: Upload snap as workflow artifact
uses: actions/upload-artifact@v6
with:
name: CCExtractor Snap
path: "*.snap"
- name: Upload snap to GitHub Release
if: github.event_name == 'release'
uses: softprops/action-gh-release@v2
with:
files: "*.snap"

View File

@@ -12,6 +12,8 @@ on:
- ".github/workflows/build_windows.yml"
- "**.c"
- "**.h"
- "**CMakeLists.txt"
- "**.cmake"
- "windows/**"
- "src/rust/**"
pull_request:
@@ -20,6 +22,8 @@ on:
- ".github/workflows/build_windows.yml"
- "**.c"
- "**.h"
- "**CMakeLists.txt"
- "**.cmake"
- "windows/**"
- "src/rust/**"

15
.github/workflows/homebrew.yml vendored Normal file
View File

@@ -0,0 +1,15 @@
name: Bump Homebrew Formula
on:
release:
types: [published]
jobs:
homebrew:
runs-on: ubuntu-latest
steps:
- name: Update Homebrew formula
uses: dawidd6/action-homebrew-bump-formula@v7
with:
token: ${{ secrets.HOMEBREW_GITHUB_API_TOKEN }}
formula: ccextractor

View File

@@ -66,6 +66,30 @@ You can find sample files on [our website](https://ccextractor.org/public/genera
- [Building on Windows using WSL](docs/build-wsl.md)
#### Linux (Autotools) build notes
CCExtractor also supports an autotools-based build system under the `linux/`
directory.
Important notes:
- The autotools workflow lives inside `linux/`. The `configure` script is
generated there and should be run from that directory.
- Typical build steps are:
```
cd linux
./autogen.sh
./configure
make
```
- Rust support is enabled automatically if `cargo` and `rustc` are available
on the system. In that case, Rust components are built and linked during
`make`.
- If you encounter unexpected build or linking issues, a clean rebuild
(`make clean` or a fresh clone) is recommended, especially when Rust is
involved.
This build flow has been tested on Linux and WSL.
## Compiling CCExtractor
To learn more about how to compile and build CCExtractor for your platform check the [compilation guide](https://github.com/CCExtractor/ccextractor/blob/master/docs/COMPILATION.MD).

View File

@@ -1,5 +1,15 @@
0.96.6 (unreleased)
-------------------
- New: Add Snap packaging support with Snapcraft configuration and GitHub Actions CI workflow.
- Fix: Clear status line output on Linux/WSL to prevent text artifacts (#2017)
- Fix: Prevent infinite loop on truncated MKV files
- Fix: Various memory safety and stability fixes in demuxers (MP4, PS, MKV, DVB)
- Fix: Delete empty output files instead of leaving 0-byte files (#1282)
- Fix: --mkvlang now supports BCP 47 language tags (e.g., en-US, zh-Hans-CN) and multiple codes
0.96.5 (2026-01-05)
-------------------
- New: CCExtractor is available again via Homebrew on macOS and Linux.
- New: Add support for raw CDP (Caption Distribution Packet) files (#1406)
- New: Add --scc-accurate-timing option for bandwidth-aware SCC output (#1120)
- Fix: MXF files containing CEA-708 captions not being detected/extracted (#1647)
@@ -52,6 +62,7 @@
- Extract multiple teletext pages simultaneously with separate output files
- Use --tpage multiple times (e.g., --tpage 100 --tpage 200)
- Output files are named with page suffix (e.g., output_p100.srt, output_p200.srt)
- Fix: SPUPNG subtitle offset calculation to center based on actual image dimensions
- New: Added --list-tracks (-L) option to list all tracks in media files without processing
New: Chinese, Korean, Japanese support - proper encoding and OCR.

View File

@@ -1,3 +1,16 @@
# Installation
## Homebrew
The easiest way to install CCExtractor for Mac and Linux is through Homebrew:
```bash
brew install ccextractor
```
Note: If you don't have Homebrew installed, see [brew.sh](https://brew.sh/)
for installation instructions.
---
# Compiling CCExtractor
You may compile CCExtractor across all major platforms using `CMakeLists.txt` stored under `ccextractor/src/` directory. Autoconf and custom build scripts are also available. See platform specific instructions in the below sections.

View File

@@ -42,7 +42,16 @@ while [[ $# -gt 0 ]]; do
esac
done
BLD_FLAGS="-std=gnu99 -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -Dfopen64=fopen -Dopen64=open -Dlseek64=lseek"
# Determine architecture based on cargo (to ensure consistency with Rust part)
CARGO_ARCH=$(file $(which cargo) | grep -o 'x86_64\|arm64')
if [[ "$CARGO_ARCH" == "x86_64" ]]; then
echo "Detected Intel (x86_64) Cargo. Forcing x86_64 build to match Rust and libraries..."
BLD_ARCH="-arch x86_64"
else
BLD_ARCH="-arch arm64"
fi
BLD_FLAGS="$BLD_ARCH -std=gnu99 -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -Dfopen64=fopen -Dopen64=open -Dlseek64=lseek"
# Add flags for bundled libraries (not needed when using system libs)
if [[ "$USE_SYSTEM_LIBS" != "true" ]]; then

19
snap/local/run-ccextractor.sh Executable file
View File

@@ -0,0 +1,19 @@
#!/bin/sh
set -e
# Default fallback
LIB_TRIPLET="x86_64-linux-gnu"
# Detect multiarch directory if present
for d in "$SNAP/usr/lib/"*-linux-gnu; do
if [ -d "$d" ]; then
LIB_TRIPLET=$(basename "$d")
break
fi
done
export LD_LIBRARY_PATH="$SNAP/usr/lib:\
$SNAP/usr/lib/$LIB_TRIPLET:\
$SNAP/usr/lib/$LIB_TRIPLET/blas:\
$SNAP/usr/lib/$LIB_TRIPLET/lapack:\
$SNAP/usr/lib/$LIB_TRIPLET/pulseaudio:\
${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
shift
exec "$SNAP/usr/local/bin/ccextractor" "$@"

104
snap/snapcraft.yaml Normal file
View File

@@ -0,0 +1,104 @@
name: ccextractor
base: core22
version: '0.96.5'
summary: Closed Caption Extractor
description: |
CCExtractor is a tool for extracting closed captions from video files.
website: https://www.ccextractor.org
source-code: https://github.com/CCExtractor/ccextractor
confinement: classic
apps:
ccextractor:
command: usr/local/bin/ccextractor
command-chain:
- local/run-ccextractor.sh
plugs:
- home
parts:
gpac:
plugin: make
source: https://github.com/gpac/gpac.git
source-tag: abi-16.4
build-packages:
- build-essential
- pkg-config
- zlib1g-dev
- libssl-dev
- libfreetype6-dev
- libjpeg-dev
- libpng-dev
override-build: |
set -eux
./configure --prefix=/usr
make -j$(nproc)
make DESTDIR=$SNAPCRAFT_PART_INSTALL install-lib
sed -i "s|^prefix=.*|prefix=$SNAPCRAFT_STAGE/usr|" $SNAPCRAFT_PART_INSTALL/usr/lib/pkgconfig/gpac.pc
stage:
- usr/lib/libgpac*
- usr/lib/pkgconfig/gpac.pc
- usr/include/gpac
ccextractor:
after: [gpac]
plugin: cmake
source: .
source-subdir: src
build-environment:
- PKG_CONFIG_PATH: "$SNAPCRAFT_STAGE/usr/lib/pkgconfig:$PKG_CONFIG_PATH"
build-snaps:
- cmake/latest/stable
- rustup/latest/stable
build-packages:
- build-essential
- pkg-config
- clang
- llvm-dev
- libclang-dev
- libzvbi-dev
- libtesseract-dev
- libavcodec-dev
- libavformat-dev
- libavdevice-dev
- libavfilter-dev
- libswscale-dev
- libx11-dev
- libxcb1-dev
- libxcb-shm0-dev
- libpng-dev
- zlib1g-dev
- libblas3
- liblapack3
stage-packages:
- libzvbi0
- libfreetype6
- libpng16-16
- libprotobuf-c1
- libutf8proc2
- libgl1
- libglu1-mesa
- libavcodec58
- libavformat58
- libavutil56
- libavdevice58
- libavfilter7
- libswscale5
- libjpeg-turbo8
- libvorbis0a
- libtheora0
- libxvidcore4
- libfaad2
- libmad0
- liba52-0.7.4
- libpulse0
- pulseaudio-utils
override-build: |
set -eux
rustup toolchain install stable
rustup default stable
export PATH="$HOME/.cargo/bin:$PATH"
snapcraftctl build
install -D -m 0755 \
$SNAPCRAFT_PROJECT_DIR/snap/local/run-ccextractor.sh \
$SNAPCRAFT_PART_INSTALL/local/run-ccextractor.sh

View File

@@ -9,7 +9,7 @@ option (WITH_HARDSUBX "Build with support for burned-in subtitles" OFF)
# Version number
set (CCEXTRACTOR_VERSION_MAJOR 0)
set (CCEXTRACTOR_VERSION_MINOR 89)
set (CCEXTRACTOR_VERSION_MINOR 96)
# Get project directory
get_filename_component(BASE_PROJ_DIR ../ ABSOLUTE)
@@ -255,4 +255,13 @@ endif (PKG_CONFIG_FOUND)
target_link_libraries (ccextractor ${EXTRA_LIBS})
target_include_directories (ccextractor PUBLIC ${EXTRA_INCLUDES})
# ccx_rust (Rust) calls C functions from ccx (like decode_vbi).
# Force the linker to pull these symbols from ccx before processing ccx_rust.
if (NOT WIN32 AND NOT APPLE)
target_link_options (ccextractor PRIVATE
-Wl,--undefined=decode_vbi
-Wl,--undefined=do_cb
-Wl,--undefined=store_hdcc)
endif()
install (TARGETS ccextractor DESTINATION bin)

View File

@@ -1,9 +1,9 @@
cmake_policy (SET CMP0037 NEW)
if(MSVC)
set (CMAKE_C_FLAGS "-W3 /wd4005 /wd4996")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -W3 /wd4005 /wd4996")
else (MSVC)
set (CMAKE_C_FLAGS "-Wall -Wno-pointer-sign -g -std=gnu99")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-pointer-sign -g -std=gnu99")
endif(MSVC)
if(WIN32)

View File

@@ -775,6 +775,7 @@ struct encoder_ctx *init_encoder(struct encoder_cfg *opt)
return NULL;
}
ctx->in_fileformat = opt->in_format;
ctx->is_pal = (opt->in_format == 2);
/** used in case of SUB_EOD_MARKER */
ctx->prev_start = -1;

View File

@@ -182,10 +182,10 @@ struct encoder_ctx
// OCR in SPUPNG
int nospupngocr;
int is_pal;
// Teletext multi-page output (issue #665)
struct ccx_s_write *tlt_out[MAX_TLT_PAGES_EXTRACT]; // Output files per teletext page
uint16_t tlt_out_pages[MAX_TLT_PAGES_EXTRACT]; // Page numbers for each output slot
struct ccx_s_write *tlt_out[MAX_TLT_PAGES_EXTRACT]; // Output files per teletext page
uint16_t tlt_out_pages[MAX_TLT_PAGES_EXTRACT]; // Page numbers for each output slot
unsigned int tlt_srt_counter[MAX_TLT_PAGES_EXTRACT]; // SRT counter per page
int tlt_out_count; // Number of teletext output files
};

View File

@@ -251,6 +251,9 @@ void set_spupng_offset(void *ctx, int x, int y)
sp->xOffset = x;
sp->yOffset = y;
}
// Forward declaration for calculate_spupng_offsets
static void calculate_spupng_offsets(struct spupng_t *sp, struct encoder_ctx *ctx);
int save_spupng(const char *filename, uint8_t *bitmap, int w, int h,
png_color *palette, png_byte *alpha, int nb_color)
{
@@ -384,7 +387,7 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
struct cc_bitmap *rect;
png_color *palette = NULL;
png_byte *alpha = NULL;
int wrote_opentag = 1;
int wrote_opentag = 0; // Track if we actually wrote the tag
x_pos = -1;
y_pos = -1;
@@ -395,13 +398,11 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
return 0;
inc_spupng_fileindex(sp);
write_sputag_open(sp, sub->start_time, sub->end_time - 1);
if (sub->nb_data == 0 && (sub->flags & SUB_EOD_MARKER))
{
context->prev_start = -1;
if (wrote_opentag)
write_sputag_close(sp);
// No subtitle data, skip writing
return 0;
}
rect = sub->data;
@@ -440,7 +441,13 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
}
}
filename = get_spupng_filename(sp);
set_spupng_offset(sp, x_pos, y_pos);
// Set image dimensions for offset calculation
sp->img_w = width;
sp->img_h = height;
// Calculate centered offsets based on screen size (PAL/NTSC)
calculate_spupng_offsets(sp, context);
if (sub->flags & SUB_EOD_MARKER)
context->prev_start = sub->start_time;
pbuf = (uint8_t *)malloc(width * height);
@@ -475,6 +482,15 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
/* TODO do rectangle wise, one color table should not be used for all rectangles */
mapclut_paletee(palette, alpha, (uint32_t *)rect[0].data1, rect[0].nb_colors);
// Save PNG file first
save_spupng(filename, pbuf, width, height, palette, alpha, rect[0].nb_colors);
freep(&pbuf);
// Write XML tag with calculated centered offsets
write_sputag_open(sp, sub->start_time, sub->end_time - 1);
wrote_opentag = 1; // Mark that we wrote the tag
#ifdef ENABLE_OCR
if (!context->nospupngocr)
{
@@ -487,8 +503,6 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
}
}
#endif
save_spupng(filename, pbuf, width, height, palette, alpha, rect[0].nb_colors);
freep(&pbuf);
end:
if (wrote_opentag)
@@ -991,6 +1005,8 @@ int spupng_export_string2png(struct spupng_t *sp, char *str, FILE *output)
*/
// Save image
sp->img_w = canvas_width;
sp->img_h = canvas_height;
write_image(buffer, output, canvas_width, canvas_height);
free(tmp);
free(buffer);
@@ -1081,6 +1097,28 @@ int eia608_to_str(struct encoder_ctx *context, struct eia608_screen *data, char
// string needs to be in UTF-8 encoding.
// This function will take care of encoding.
static void calculate_spupng_offsets(struct spupng_t *sp, struct encoder_ctx *ctx)
{
int screen_w = 720;
int screen_h;
/* Teletext is always PAL */
if (ctx->in_fileformat == 2 || ctx->is_pal)
{
screen_h = 576;
}
else
{
screen_h = 480;
}
sp->xOffset = (screen_w - sp->img_w) / 2;
sp->yOffset = (screen_h - sp->img_h) / 2;
// SPU / DVD requires even yOffset (interlacing)
if (sp->yOffset & 1)
sp->yOffset++;
}
int spupng_write_string(struct spupng_t *sp, char *string, LLONG start_time, LLONG end_time,
struct encoder_ctx *context)
{
@@ -1099,6 +1137,7 @@ int spupng_write_string(struct spupng_t *sp, char *string, LLONG start_time, LLO
}
// free(string_utf32);
fclose(sp->fppng);
calculate_spupng_offsets(sp, context);
write_sputag_open(sp, start_time, end_time);
write_spucomment(sp, string);
write_sputag_close(sp);

View File

@@ -39,6 +39,8 @@ struct spupng_t
int fileIndex;
int xOffset;
int yOffset;
int img_w;
int img_h;
};
#endif

View File

@@ -1712,7 +1712,7 @@ static int write_dvb_sub(struct lib_cc_decode *dec_ctx, struct cc_subtitle *sub)
ctx->ocr_ctx = init_ocr(ctx->lang_index);
ctx->ocr_initialized = 1; // Mark as initialized even if init_ocr returns NULL
}
if (ctx->ocr_ctx)
if (ctx->ocr_ctx && region)
{
int ret = ocr_rect(ctx->ocr_ctx, rect, &ocr_str, region->bgcolor, dec_ctx->ocr_quantmode);
if (ret >= 0)

View File

@@ -66,6 +66,7 @@ void prepare_for_new_file(struct lib_ccx_ctx *ctx)
{
// Init per file variables
ctx->last_reported_progress = -1;
ctx->min_global_timestamp_offset = -1; // -1 means not yet initialized
ctx->stat_numuserheaders = 0;
ctx->stat_dvdccheaders = 0;
ctx->stat_scte20ccheaders = 0;

View File

@@ -1508,7 +1508,24 @@ int general_loop(struct lib_ccx_ctx *ctx)
}
if (ctx->live_stream)
{
int cur_sec = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
LLONG t = get_fts(dec_ctx->timing, dec_ctx->current_field);
if (!t && ctx->demux_ctx->global_timestamp_inited)
t = ctx->demux_ctx->global_timestamp - ctx->demux_ctx->min_global_timestamp;
// Handle multi-program TS timing
if (ctx->demux_ctx->global_timestamp_inited)
{
LLONG offset = ctx->demux_ctx->global_timestamp - ctx->demux_ctx->min_global_timestamp;
if (ctx->min_global_timestamp_offset < 0 || offset < ctx->min_global_timestamp_offset)
ctx->min_global_timestamp_offset = offset;
// Only use timestamps from the program with the lowest base
if (offset - ctx->min_global_timestamp_offset < 60000)
t = offset - ctx->min_global_timestamp_offset;
else
t = ctx->min_global_timestamp_offset > 0 ? 0 : t;
if (t < 0)
t = 0;
}
int cur_sec = (int)(t / 1000);
int th = cur_sec / 10;
if (ctx->last_reported_progress != th)
{
@@ -1526,6 +1543,28 @@ int general_loop(struct lib_ccx_ctx *ctx)
LLONG t = get_fts(dec_ctx->timing, dec_ctx->current_field);
if (!t && ctx->demux_ctx->global_timestamp_inited)
t = ctx->demux_ctx->global_timestamp - ctx->demux_ctx->min_global_timestamp;
// For multi-program TS files, different programs can have different
// PCR bases (e.g., one at 25h, another at 23h). This causes the
// global_timestamp to jump between different bases, resulting in
// wildly different offset values. Track the minimum offset seen
// and only display times from the program with the lowest base.
if (ctx->demux_ctx->global_timestamp_inited)
{
LLONG offset = ctx->demux_ctx->global_timestamp - ctx->demux_ctx->min_global_timestamp;
// Track minimum offset (this is the PCR base of the program
// with the lowest timestamp, which represents true file time)
if (ctx->min_global_timestamp_offset < 0 || offset < ctx->min_global_timestamp_offset)
ctx->min_global_timestamp_offset = offset;
// Only use timestamps from the program with the lowest base.
// If current offset is significantly larger than minimum (by > 60s),
// it's from a program with a higher PCR base - use minimum instead.
if (offset - ctx->min_global_timestamp_offset < 60000)
t = offset - ctx->min_global_timestamp_offset;
else
t = ctx->min_global_timestamp_offset > 0 ? 0 : t; // fallback to minimum-based time
if (t < 0)
t = 0;
}
int cur_sec = (int)(t / 1000);
activity_progress(progress, cur_sec / 60, cur_sec % 60);
ctx->last_reported_progress = progress;

View File

@@ -90,6 +90,7 @@ struct lib_ccx_ctx
LLONG total_past; // Only in binary concat mode
int last_reported_progress;
LLONG min_global_timestamp_offset; // Track minimum (global - min) for multi-program TS
/* Stats */
int stat_numuserheaders;

View File

@@ -122,6 +122,8 @@ void parse_ebml(FILE *file)
{
code <<= 8;
code += mkv_read_byte(file);
if (feof(file))
break;
code_len++;
switch (code)
@@ -186,6 +188,8 @@ void parse_segment_info(FILE *file)
{
code <<= 8;
code += mkv_read_byte(file);
if (feof(file))
break;
code_len++;
switch (code)
@@ -484,6 +488,8 @@ void parse_segment_cluster_block_group(struct matroska_ctx *mkv_ctx, ULLONG clus
{
code <<= 8;
code += mkv_read_byte(file);
if (feof(file))
break;
code_len++;
switch (code)
@@ -612,6 +618,8 @@ void parse_segment_cluster(struct matroska_ctx *mkv_ctx)
{
code <<= 8;
code += mkv_read_byte(file);
if (feof(file))
break;
code_len++;
switch (code)
@@ -734,14 +742,24 @@ int process_avc_frame_mkv(struct matroska_ctx *mkv_ctx, struct matroska_avc_fram
{
uint32_t nal_length;
nal_length = bswap32(*(long *)&frame.data[i]);
if (i + nal_unit_size > frame.len)
break;
nal_length =
((uint32_t)frame.data[i] << 24) |
((uint32_t)frame.data[i + 1] << 16) |
((uint32_t)frame.data[i + 2] << 8) |
(uint32_t)frame.data[i + 3];
i += nal_unit_size;
if (nal_length > frame.len - i)
break;
if (nal_length > 0)
do_NAL(enc_ctx, dec_ctx, (unsigned char *)&(frame.data[i]), nal_length, &mkv_ctx->dec_sub);
do_NAL(enc_ctx, dec_ctx, (unsigned char *)&frame.data[i], nal_length, &mkv_ctx->dec_sub);
i += nal_length;
} // outer for
assert(i == frame.len);
mkv_ctx->current_second = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
@@ -769,11 +787,22 @@ int process_hevc_frame_mkv(struct matroska_ctx *mkv_ctx, struct matroska_avc_fra
{
uint32_t nal_length;
nal_length = bswap32(*(long *)&frame.data[i]);
if (i + nal_unit_size > frame.len)
break;
nal_length =
((uint32_t)frame.data[i] << 24) |
((uint32_t)frame.data[i + 1] << 16) |
((uint32_t)frame.data[i + 2] << 8) |
(uint32_t)frame.data[i + 3];
i += nal_unit_size;
if (nal_length > frame.len - i)
break;
if (nal_length > 0)
do_NAL(enc_ctx, dec_ctx, (unsigned char *)&(frame.data[i]), nal_length, &mkv_ctx->dec_sub);
do_NAL(enc_ctx, dec_ctx, (unsigned char *)&frame.data[i], nal_length, &mkv_ctx->dec_sub);
i += nal_length;
}
@@ -845,6 +874,8 @@ void parse_segment_track_entry(struct matroska_ctx *mkv_ctx)
{
code <<= 8;
code += mkv_read_byte(file);
if (feof(file))
break;
code_len++;
switch (code)
@@ -1197,6 +1228,8 @@ void parse_segment_tracks(struct matroska_ctx *mkv_ctx)
{
code <<= 8;
code += mkv_read_byte(file);
if (feof(file))
break;
code_len++;
switch (code)
@@ -1241,6 +1274,8 @@ void parse_segment(struct matroska_ctx *mkv_ctx)
{
code <<= 8;
code += mkv_read_byte(file);
if (feof(file))
break;
code_len++;
switch (code)
{
@@ -1915,6 +1950,9 @@ void matroska_parse(struct matroska_ctx *mkv_ctx)
{
code <<= 8;
code += mkv_read_byte(file);
// Check for EOF after reading - feof() is only set after a failed read
if (feof(file))
break;
code_len++;
switch (code)

View File

@@ -899,6 +899,11 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
#endif
memset(&dec_sub, 0, sizeof(dec_sub));
if (file == NULL)
{
mprint("Error: NULL file path provided to processmp4\n");
return -1;
}
mprint("Opening \'%s\': ", file);
#ifdef MP4_DEBUG
gf_log_set_tool_level(GF_LOG_CONTAINER, GF_LOG_DEBUG);

View File

@@ -14,7 +14,19 @@ void dinit_write(struct ccx_s_write *wb)
return;
}
if (wb->fh > 0)
{
// Check if the file is empty before closing
off_t file_size = lseek(wb->fh, 0, SEEK_END);
close(wb->fh);
// Delete empty output files to avoid generating useless 0-byte files
// This commonly happens with -12 option when one field has no captions
if (file_size == 0 && wb->filename != NULL)
{
unlink(wb->filename);
mprint("Deleted empty output file: %s\n", wb->filename);
}
}
freep(&wb->filename);
freep(&wb->original_filename);
if (wb->with_semaphore && wb->semaphore_filename)

View File

@@ -411,9 +411,18 @@ int parse_PMT(struct ccx_demuxer *ctx, unsigned char *buf, int len, struct progr
{
// if this any generally used video stream tyoe get clashed with ATSC/SCTE standard
// then this code can go in some atsc flag
// Validate ES_info_length against buffer bounds to prevent heap overflow
if (i + 5 + ES_info_length > len)
break;
unsigned char *es_info = buf + i + 5;
for (desc_len = 0; (buf + i + 5 + ES_info_length) > es_info; es_info += desc_len)
unsigned char *es_info_end = buf + i + 5 + ES_info_length;
for (desc_len = 0; es_info_end > es_info; es_info += desc_len)
{
// Need at least 2 bytes for descriptor_tag and desc_len
if (es_info + 2 > es_info_end)
break;
enum ccx_mpeg_descriptor descriptor_tag = (enum ccx_mpeg_descriptor)(*es_info++);
int nb_service;
int is_608;
@@ -437,9 +446,18 @@ int parse_PMT(struct ccx_demuxer *ctx, unsigned char *buf, int len, struct progr
if (IS_FEASIBLE(ctx->codec, ctx->nocodec, CCX_CODEC_TELETEXT) && ES_info_length && stream_type == CCX_STREAM_TYPE_PRIVATE_MPEG2) // MPEG-2 Packetized Elementary Stream packets containing private data
{
// Validate ES_info_length against buffer bounds
if (i + 5 + ES_info_length > len)
continue;
unsigned char *es_info = buf + i + 5;
for (desc_len = 0; (buf + i + 5 + ES_info_length) - es_info; es_info += desc_len)
unsigned char *es_info_end = buf + i + 5 + ES_info_length;
for (desc_len = 0; es_info_end > es_info; es_info += desc_len)
{
// Need at least 2 bytes for descriptor_tag and desc_len
if (es_info + 2 > es_info_end)
break;
enum ccx_mpeg_descriptor descriptor_tag = (enum ccx_mpeg_descriptor)(*es_info++);
desc_len = (*es_info++);
if (!IS_VALID_TELETEXT_DESC(descriptor_tag))
@@ -621,6 +639,10 @@ int parse_PAT(struct ccx_demuxer *ctx)
payload_start = ctx->PID_buffers[0]->buffer + pointer_field + 1;
payload_length = ctx->PID_buffers[0]->buffer_length - (pointer_field + 1);
// Need at least 8 bytes to read header fields
if (payload_length < 8)
return 0;
section_number = payload_start[6];
last_section_number = payload_start[7];

View File

@@ -179,16 +179,21 @@ void mprint(const char *fmt, ...)
if (!ccx_options.messages_target)
return;
va_start(args, fmt);
if (ccx_options.messages_target == CCX_MESSAGES_STDOUT)
FILE *target = (ccx_options.messages_target == CCX_MESSAGES_STDOUT) ? stdout : stderr;
if (fmt[0] == '\r')
{
vfprintf(stdout, fmt, args);
fflush(stdout);
}
else
{
vfprintf(stderr, fmt, args);
fflush(stderr);
#ifndef _WIN32
fprintf(target, "\r\033[K"); // Clear the line first
fmt++; // Skip the '\r' so only the clean text gets printed next
#endif
}
// Windows (legacy console) does not support ANSI sequences; fallback to standard \r; and vfprintf below handles it the old-fashioned way.
vfprintf(target, fmt, args);
fflush(target);
va_end(args);
}

View File

@@ -0,0 +1,385 @@
//! MKV language filtering support.
//!
//! Matroska files support two language code formats:
//! - ISO 639-2 (3-letter bibliographic codes): "eng", "fre", "chi"
//! - BCP 47 / IETF language tags: "en-US", "fr-CA", "zh-Hans"
//!
//! This module provides [`MkvLangFilter`] for parsing and matching language codes.
use std::fmt;
use std::str::FromStr;
/// A filter for matching MKV track languages.
///
/// Supports comma-separated lists of language codes in either:
/// - ISO 639-2 format (3-letter codes like "eng", "fre")
/// - BCP 47 format (tags like "en-US", "fr-CA", "zh-Hans")
///
/// # Examples
///
/// ```
/// use lib_ccxr::common::MkvLangFilter;
///
/// // Single language
/// let filter: MkvLangFilter = "eng".parse().unwrap();
/// assert!(filter.matches("eng", None));
///
/// // Multiple languages
/// let filter: MkvLangFilter = "eng,fre,chi".parse().unwrap();
/// assert!(filter.matches("fre", None));
///
/// // BCP 47 matching
/// let filter: MkvLangFilter = "en-US,fr-CA".parse().unwrap();
/// assert!(filter.matches("eng", Some("en-US")));
/// ```
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MkvLangFilter {
/// The original input string (used for C FFI)
raw: String,
/// Parsed and validated language codes
codes: Vec<LanguageCode>,
}
/// A single language code, either ISO 639-2 or BCP 47.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LanguageCode {
/// The normalized (lowercase) code
code: String,
}
/// Error type for invalid language codes.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct InvalidLanguageCode {
/// The invalid code
pub code: String,
/// Description of what's wrong
pub reason: &'static str,
}
impl fmt::Display for InvalidLanguageCode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "invalid language code '{}': {}", self.code, self.reason)
}
}
impl std::error::Error for InvalidLanguageCode {}
impl LanguageCode {
/// Validates and creates a new language code.
///
/// Accepts:
/// - ISO 639-2 codes: 3 ASCII letters (e.g., "eng", "fre")
/// - BCP 47 tags: primary language with optional subtags separated by hyphens
/// (e.g., "en-US", "fr-CA", "zh-Hans-CN")
///
/// # BCP 47 Structure
/// - Primary language: 2-3 letters
/// - Script (optional): 4 letters (e.g., "Hans", "Latn")
/// - Region (optional): 2 letters or 3 digits (e.g., "US", "419")
/// - Variant (optional): 5-8 alphanumeric characters
pub fn new(code: &str) -> Result<Self, InvalidLanguageCode> {
let code = code.trim();
if code.is_empty() {
return Err(InvalidLanguageCode {
code: code.to_string(),
reason: "empty language code",
});
}
// Check for valid characters (alphanumeric and hyphens only)
if !code.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') {
return Err(InvalidLanguageCode {
code: code.to_string(),
reason: "must contain only ASCII letters, digits, and hyphens",
});
}
// Cannot start or end with hyphen
if code.starts_with('-') || code.ends_with('-') {
return Err(InvalidLanguageCode {
code: code.to_string(),
reason: "cannot start or end with hyphen",
});
}
// Cannot have consecutive hyphens
if code.contains("--") {
return Err(InvalidLanguageCode {
code: code.to_string(),
reason: "cannot have consecutive hyphens",
});
}
// Validate subtag structure
let subtags: Vec<&str> = code.split('-').collect();
// First subtag must be the primary language (2-3 letters)
let primary = subtags[0];
if primary.len() < 2 || primary.len() > 3 {
return Err(InvalidLanguageCode {
code: code.to_string(),
reason: "primary language subtag must be 2-3 letters",
});
}
if !primary.chars().all(|c| c.is_ascii_alphabetic()) {
return Err(InvalidLanguageCode {
code: code.to_string(),
reason: "primary language subtag must contain only letters",
});
}
// Validate subsequent subtags
for subtag in subtags.iter().skip(1) {
if subtag.is_empty() {
return Err(InvalidLanguageCode {
code: code.to_string(),
reason: "empty subtag",
});
}
let len = subtag.len();
let all_alpha = subtag.chars().all(|c| c.is_ascii_alphabetic());
let all_digit = subtag.chars().all(|c| c.is_ascii_digit());
let all_alnum = subtag.chars().all(|c| c.is_ascii_alphanumeric());
// Valid subtag types:
// - Script: 4 letters (e.g., "Hans")
// - Region: 2 letters or 3 digits (e.g., "US", "419")
// - Variant: 5-8 alphanumeric, or 4 starting with digit
// - Extension: single letter followed by more subtags
// - Private use: 'x' followed by 1-8 char subtags
let valid = match len {
1 => subtag.chars().all(|c| c.is_ascii_alphanumeric()), // Extension singleton
2 => all_alpha, // Region (2 letters)
3 => all_alpha || all_digit, // 3 letters or 3 digits
4 => all_alpha || (subtag.chars().next().unwrap().is_ascii_digit() && all_alnum), // Script or variant starting with digit
5..=8 => all_alnum, // Variant
_ => false,
};
if !valid {
return Err(InvalidLanguageCode {
code: code.to_string(),
reason: "invalid subtag format",
});
}
}
Ok(Self {
code: code.to_lowercase(),
})
}
/// Returns the normalized (lowercase) code.
pub fn as_str(&self) -> &str {
&self.code
}
/// Checks if this code matches a track's language.
///
/// Matching rules:
/// 1. Exact match (case-insensitive)
/// 2. Prefix match for BCP 47 (e.g., "en" matches "en-US")
pub fn matches(&self, iso639: &str, bcp47: Option<&str>) -> bool {
let iso639_lower = iso639.to_lowercase();
let bcp47_lower = bcp47.map(|s| s.to_lowercase());
// Exact match on ISO 639-2
if self.code == iso639_lower {
return true;
}
// Exact match on BCP 47
if let Some(ref bcp) = bcp47_lower {
if self.code == *bcp {
return true;
}
}
// Prefix match: "en" matches "en-US", "eng" matches track with bcp47 "en-US"
// The filter code could be a prefix of the track's BCP 47 tag
if let Some(ref bcp) = bcp47_lower {
if bcp.starts_with(&self.code) && bcp[self.code.len()..].starts_with('-') {
return true;
}
// Or the track's BCP 47 could be a prefix of the filter
if self.code.starts_with(bcp.as_str()) && self.code[bcp.len()..].starts_with('-') {
return true;
}
}
false
}
}
impl FromStr for LanguageCode {
type Err = InvalidLanguageCode;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::new(s)
}
}
impl fmt::Display for LanguageCode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.code)
}
}
impl MkvLangFilter {
/// Creates a new filter from a comma-separated list of language codes.
pub fn new(input: &str) -> Result<Self, InvalidLanguageCode> {
let input = input.trim();
if input.is_empty() {
return Err(InvalidLanguageCode {
code: String::new(),
reason: "empty language filter",
});
}
let codes: Result<Vec<LanguageCode>, _> = input.split(',').map(LanguageCode::new).collect();
Ok(Self {
raw: input.to_string(),
codes: codes?,
})
}
/// Returns the raw input string (for C FFI compatibility).
pub fn as_raw_str(&self) -> &str {
&self.raw
}
/// Returns the parsed language codes.
pub fn codes(&self) -> &[LanguageCode] {
&self.codes
}
/// Checks if any of the filter's codes match a track's language.
///
/// # Arguments
/// - `iso639`: The track's ISO 639-2 language code (e.g., "eng")
/// - `bcp47`: The track's BCP 47 language tag, if available (e.g., "en-US")
pub fn matches(&self, iso639: &str, bcp47: Option<&str>) -> bool {
self.codes.iter().any(|code| code.matches(iso639, bcp47))
}
}
impl FromStr for MkvLangFilter {
type Err = InvalidLanguageCode;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::new(s)
}
}
impl fmt::Display for MkvLangFilter {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.raw)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_iso639_codes() {
// Valid 3-letter codes
assert!(LanguageCode::new("eng").is_ok());
assert!(LanguageCode::new("fre").is_ok());
assert!(LanguageCode::new("chi").is_ok());
assert!(LanguageCode::new("ENG").is_ok()); // Case insensitive
// 2-letter codes (ISO 639-1 style, valid in BCP 47)
assert!(LanguageCode::new("en").is_ok());
assert!(LanguageCode::new("fr").is_ok());
}
#[test]
fn test_bcp47_codes() {
// Language + region
assert!(LanguageCode::new("en-US").is_ok());
assert!(LanguageCode::new("fr-CA").is_ok());
assert!(LanguageCode::new("pt-BR").is_ok());
// Language + script
assert!(LanguageCode::new("zh-Hans").is_ok());
assert!(LanguageCode::new("zh-Hant").is_ok());
assert!(LanguageCode::new("sr-Latn").is_ok());
// Language + script + region
assert!(LanguageCode::new("zh-Hans-CN").is_ok());
assert!(LanguageCode::new("zh-Hant-TW").is_ok());
// UN M.49 numeric region codes
assert!(LanguageCode::new("es-419").is_ok()); // Latin America
}
#[test]
fn test_invalid_codes() {
// Too short
assert!(LanguageCode::new("a").is_err());
// Invalid characters
assert!(LanguageCode::new("en_US").is_err()); // Underscore not allowed
assert!(LanguageCode::new("en US").is_err()); // Space not allowed
assert!(LanguageCode::new("ça").is_err()); // Non-ASCII
// Invalid structure
assert!(LanguageCode::new("-en").is_err()); // Leading hyphen
assert!(LanguageCode::new("en-").is_err()); // Trailing hyphen
assert!(LanguageCode::new("en--US").is_err()); // Double hyphen
// Empty
assert!(LanguageCode::new("").is_err());
}
#[test]
fn test_filter_multiple_codes() {
let filter = MkvLangFilter::new("eng,fre,chi").unwrap();
assert_eq!(filter.codes().len(), 3);
assert!(filter.matches("eng", None));
assert!(filter.matches("fre", None));
assert!(filter.matches("chi", None));
assert!(!filter.matches("spa", None));
}
#[test]
fn test_filter_bcp47_matching() {
let filter = MkvLangFilter::new("en-US,fr-CA").unwrap();
// Exact BCP 47 match
assert!(filter.matches("eng", Some("en-US")));
assert!(filter.matches("fre", Some("fr-CA")));
// No match
assert!(!filter.matches("eng", Some("en-GB")));
assert!(!filter.matches("eng", None));
}
#[test]
fn test_filter_mixed_formats() {
let filter = MkvLangFilter::new("eng,fr-CA,zh-Hans").unwrap();
assert!(filter.matches("eng", None));
assert!(filter.matches("fre", Some("fr-CA")));
assert!(filter.matches("chi", Some("zh-Hans")));
}
#[test]
fn test_case_insensitivity() {
let filter = MkvLangFilter::new("ENG,FR-CA").unwrap();
assert!(filter.matches("eng", None));
assert!(filter.matches("ENG", None));
assert!(filter.matches("fre", Some("fr-ca")));
assert!(filter.matches("FRE", Some("FR-CA")));
}
#[test]
fn test_raw_string_preserved() {
let filter = MkvLangFilter::new("eng,fre").unwrap();
assert_eq!(filter.as_raw_str(), "eng,fre");
}
}

View File

@@ -18,8 +18,10 @@
mod bitstream;
mod constants;
mod mkv_lang;
mod options;
pub use bitstream::*;
pub use constants::*;
pub use mkv_lang::*;
pub use options::*;

View File

@@ -466,8 +466,9 @@ pub struct Options {
pub ocr_line_split: bool,
/// If true, use character blacklist to prevent common OCR errors (e.g. | vs I)
pub ocr_blacklist: bool,
/// The name of the language stream for MKV
pub mkvlang: Option<Language>,
/// Language filter for MKV subtitle tracks.
/// Accepts comma-separated ISO 639-2 codes (e.g., "eng,fre") or BCP 47 tags (e.g., "en-US,fr-CA").
pub mkvlang: Option<super::MkvLangFilter>,
/// If true, the video stream will be processed even if we're using a different one for subtitles.
pub analyze_video_stream: bool,

View File

@@ -1154,10 +1154,9 @@ impl<'a> TeletextContext<'a> {
}
if v >= 0x20 {
let u = char::from_u32(v as u32).unwrap();
let u = char::from_u32(v as u32).unwrap_or(char::REPLACEMENT_CHARACTER);
self.page_buffer_cur.get_or_insert("".into()).push(u);
if logger().expect("could not access logger").is_gui_mode() {
// For now we just handle the easy stuff
eprint!("{u}");
}
}
@@ -1225,13 +1224,15 @@ impl<'a> TeletextContext<'a> {
}
}
_ => {
ans = Some(Subtitle::new_text(
self.page_buffer_cur.take().unwrap().into(),
self.page_buffer.show_timestamp,
self.page_buffer.hide_timestamp + Timestamp::from_millis(1),
None,
"TLT".into(),
));
if let Some(cur) = self.page_buffer_cur.take() {
ans = Some(Subtitle::new_text(
cur.into(),
self.page_buffer.show_timestamp,
self.page_buffer.hide_timestamp + Timestamp::from_millis(1),
None,
"TLT".into(),
));
}
}
}
@@ -1251,34 +1252,43 @@ impl<'a> TeletextContext<'a> {
capitalization_list: &[String],
) {
// variable names conform to ETS 300 706, chapter 7.1.2
let address = (decode_hamming_8_4(packet.address[1]).unwrap() << 4)
| decode_hamming_8_4(packet.address[0]).unwrap();
let Some(addr1) = decode_hamming_8_4(packet.address[1]) else {
return;
};
let Some(addr0) = decode_hamming_8_4(packet.address[0]) else {
return;
};
let address = (addr1 << 4) | addr0;
let mut m = address & 0x7;
if m == 0 {
m = 8;
}
let y = (address >> 3) & 0x1f;
let designation_code = if y > 25 {
decode_hamming_8_4(packet.data[0]).unwrap()
decode_hamming_8_4(packet.data[0]).unwrap_or(0x00)
} else {
0x00
};
if y == 0 {
// CC map
let i = (decode_hamming_8_4(packet.data[1]).unwrap() << 4)
| decode_hamming_8_4(packet.data[0]).unwrap();
let flag_subtitle = (decode_hamming_8_4(packet.data[5]).unwrap() & 0x08) >> 3;
let h1 = decode_hamming_8_4(packet.data[1]).unwrap_or(0);
let h0 = decode_hamming_8_4(packet.data[0]).unwrap_or(0);
let i = (h1 << 4) | h0;
let flag_subtitle = (decode_hamming_8_4(packet.data[5]).unwrap_or(0) & 0x08) >> 3;
self.cc_map[i as usize] |= flag_subtitle << (m - 1);
let flag_subtitle = flag_subtitle != 0;
if flag_subtitle && (i < 0xff) {
let mut thisp = ((m as u32) << 8)
| ((decode_hamming_8_4(packet.data[1]).unwrap() as u32) << 4)
| (decode_hamming_8_4(packet.data[0]).unwrap() as u32);
let t1 = format!("{thisp:x}"); // Example: 1928 -> 788
thisp = t1.parse().unwrap();
let h1 = decode_hamming_8_4(packet.data[1]).unwrap_or(0) as u32;
let h0 = decode_hamming_8_4(packet.data[0]).unwrap_or(0) as u32;
let mut thisp = ((m as u32) << 8) | (h1 << 4) | h0;
let t1 = format!("{thisp:x}");
// Fallback to original value if parsing fails to avoid panics on malformed BCD
thisp = t1.parse().unwrap_or(thisp);
if !self.seen_sub_page[thisp as usize] {
self.seen_sub_page[thisp as usize] = true;
info!(
@@ -1288,36 +1298,28 @@ impl<'a> TeletextContext<'a> {
}
}
if (self.config.page.get() == 0.into()) && flag_subtitle && (i < 0xff) {
self.config.page.replace(
(((m as u16) << 8)
| ((decode_hamming_8_4(packet.data[1]).unwrap() as u16) << 4)
| (decode_hamming_8_4(packet.data[0]).unwrap() as u16))
.into(),
);
let h1 = decode_hamming_8_4(packet.data[1]).unwrap_or(0) as u16;
let h0 = decode_hamming_8_4(packet.data[0]).unwrap_or(0) as u16;
self.config
.page
.replace((((m as u16) << 8) | (h1 << 4) | h0).into());
info!("- No teletext page specified, first received suitable page is {}, not guaranteed\n", self.config.page.get());
}
// Page number and control bits
let page_number: TeletextPageNumber = (((m as u16) << 8)
| ((decode_hamming_8_4(packet.data[1]).unwrap() as u16) << 4)
| (decode_hamming_8_4(packet.data[0]).unwrap() as u16))
.into();
let charset = ((decode_hamming_8_4(packet.data[7]).unwrap() & 0x08)
| (decode_hamming_8_4(packet.data[7]).unwrap() & 0x04)
| (decode_hamming_8_4(packet.data[7]).unwrap() & 0x02))
>> 1;
// let flag_suppress_header = decode_hamming_8_4(packet.data[6]).unwrap() & 0x01;
// let flag_inhibit_display = (decode_hamming_8_4(packet.data[6]).unwrap() & 0x08) >> 3;
let h1 = decode_hamming_8_4(packet.data[1]).unwrap_or(0) as u16;
let h0 = decode_hamming_8_4(packet.data[0]).unwrap_or(0) as u16;
let page_number: TeletextPageNumber = (((m as u16) << 8) | (h1 << 4) | h0).into();
let c7 = decode_hamming_8_4(packet.data[7]).unwrap_or(0);
let charset = (c7 & 0x08 | c7 & 0x04 | c7 & 0x02) >> 1;
// ETS 300 706, chapter 9.3.1.3:
// When set to '1' the service is designated to be in Serial mode and the transmission of a page is terminated
// by the next page header with a different page number.
// When set to '0' the service is designated to be in Parallel mode and the transmission of a page is terminated
// by the next page header with a different page number but the same magazine number.
// The same setting shall be used for all page headers in the service.
// ETS 300 706, chapter 7.2.1: Page is terminated by and excludes the next page header packet
// having the same magazine address in parallel transmission mode, or any magazine address in serial transmission mode.
self.transmission_mode = if decode_hamming_8_4(packet.data[7]).unwrap() & 0x01 == 0 {
self.transmission_mode = if c7 & 0x01 == 0 {
TransmissionMode::Parallel
} else {
TransmissionMode::Serial
@@ -1353,19 +1355,17 @@ impl<'a> TeletextContext<'a> {
// Now we have the begining of page transmission; if there is page_buffer pending, process it
if self.page_buffer.tainted {
// Convert telx to UCS-2 before processing
for yt in 1..=23 {
for it in 0..40 {
if self.page_buffer.text[yt][it] != 0x00
&& !self.page_buffer.g2_char_present[yt][it]
{
self.page_buffer.text[yt][it] = self
.g0_charset
.ucs2_char(self.page_buffer.text[yt][it].try_into().unwrap());
if let Ok(c) = self.page_buffer.text[yt][it].try_into() {
self.page_buffer.text[yt][it] = self.g0_charset.ucs2_char(c);
}
}
}
}
// it would be nice, if subtitle hides on previous video frame, so we contract 40 ms (1 frame @25 fps)
self.page_buffer.hide_timestamp = timestamp - Timestamp::from_millis(40);
if self.page_buffer.hide_timestamp > timestamp {
self.page_buffer.hide_timestamp = Timestamp::from_millis(0);
@@ -1544,12 +1544,14 @@ impl<'a> TeletextContext<'a> {
info!("- Programme Identification Data = ");
for i in 20..40 {
let c = self.g0_charset.ucs2_char(packet.data[i]);
// strip any control codes from PID, eg. TVP station
if c < 0x20 {
continue;
}
info!("{}", char::from_u32(c as u32).unwrap());
info!(
"{}",
char::from_u32(c as u32).unwrap_or(char::REPLACEMENT_CHARACTER)
);
}
info!("\n");
@@ -1580,7 +1582,7 @@ impl<'a> TeletextContext<'a> {
info!(
"- Universal Time Co-ordinated = {}\n",
t0.to_ctime().unwrap()
t0.to_ctime().as_deref().unwrap_or("unknown")
);
debug!(msg_type = DebugMessageFlag::TELETEXT; "- Transmission mode = {:?}\n", self.transmission_mode);
@@ -1589,8 +1591,13 @@ impl<'a> TeletextContext<'a> {
&& matches!(self.config.date_format, TimestampFormat::Date { .. })
&& !self.config.noautotimeref
{
info!("- Broadcast Service Data Packet received, resetting UTC referential value to {}\n", t0.to_ctime().unwrap());
*UTC_REFVALUE.write().unwrap() = t as u64;
info!(
"- Broadcast Service Data Packet received, resetting UTC referential value to {}\n",
t0.to_ctime().as_deref().unwrap_or("unknown")
);
if let Ok(mut lock) = UTC_REFVALUE.write() {
*lock = t as u64;
}
self.states.pts_initialized = false;
}
@@ -1610,15 +1617,14 @@ impl<'a> TeletextContext<'a> {
if let Some(subtitles) = subtitles {
// output any pending close caption
if self.page_buffer.tainted {
// Convert telx to UCS-2 before processing
for yt in 1..=23 {
for it in 0..40 {
if self.page_buffer.text[yt][it] != 0x00
&& !self.page_buffer.g2_char_present[yt][it]
{
self.page_buffer.text[yt][it] = self
.g0_charset
.ucs2_char(self.page_buffer.text[yt][it].try_into().unwrap());
if let Ok(c) = self.page_buffer.text[yt][it].try_into() {
self.page_buffer.text[yt][it] = self.g0_charset.ucs2_char(c);
}
}
}
}

View File

@@ -225,9 +225,6 @@ impl Timestamp {
let m = millis / 60000 - 60 * h;
let s = millis / 1000 - 3600 * h - 60 * m;
let u = millis - 3600000 * h - 60000 * m - 1000 * s;
if h > 24 {
println!("{h}")
}
Ok((h.try_into()?, m as u8, s as u8, u as u16))
}

View File

@@ -28,7 +28,7 @@ const BURNEDIN_SUBTITLE_EXTRACTION: &str = "Burned-in subtitle extraction";
#[derive(Debug, Parser)]
#[command(name = "CCExtractor")]
#[command(author = "Carlos Fernandez Sanz, Volker Quetschke.")]
#[command(version = "1.0")]
#[command(version = "0.96.5")]
#[command(about = "Teletext portions taken from Petr Kutalek's telxcc
--------------------------------------------------------------------------
Originally based on McPoodle's tools. Check his page for lots of information

View File

@@ -21,6 +21,19 @@ pub unsafe extern "C" fn ccxr_process_avc(
return 0;
}
// In report-only mode (-out=report), enc_ctx is NULL because no encoder is created.
// Skip AVC processing in this case since we can't output captions without an encoder.
// Return the full buffer length to indicate we've "consumed" the data.
if enc_ctx.is_null() {
return avcbuflen;
}
// dec_ctx and sub should never be NULL in normal operation, but check defensively
if dec_ctx.is_null() || sub.is_null() {
info!("Warning: dec_ctx or sub is NULL in ccxr_process_avc");
return avcbuflen;
}
// Create a safe slice from the raw pointer
let avc_slice = std::slice::from_raw_parts_mut(avcbuf, avcbuflen);

View File

@@ -50,7 +50,7 @@ pub fn sei_message(ctx: &mut AvcContextRust, seibuf: &[u8]) -> usize {
return 0;
}
let mut payload_type = 0;
let mut payload_type: u32 = 0;
while seibuf_idx < seibuf.len() && seibuf[seibuf_idx] == 0xff {
payload_type += 255;
seibuf_idx += 1;
@@ -60,10 +60,10 @@ pub fn sei_message(ctx: &mut AvcContextRust, seibuf: &[u8]) -> usize {
return seibuf_idx;
}
payload_type += seibuf[seibuf_idx] as i32;
payload_type += seibuf[seibuf_idx] as u32;
seibuf_idx += 1;
let mut payload_size = 0;
let mut payload_size: u32 = 0;
while seibuf_idx < seibuf.len() && seibuf[seibuf_idx] == 0xff {
payload_size += 255;
seibuf_idx += 1;
@@ -73,7 +73,7 @@ pub fn sei_message(ctx: &mut AvcContextRust, seibuf: &[u8]) -> usize {
return seibuf_idx;
}
payload_size += seibuf[seibuf_idx] as i32;
payload_size += seibuf[seibuf_idx] as u32;
seibuf_idx += 1;
let mut broken = false;
@@ -226,12 +226,10 @@ pub fn user_data_registered_itu_t_t35(ctx: &mut AvcContextRust, userbuf: &[u8])
}
// Save the data and process once we know the sequence number
if ((ctx.cc_count as usize + local_cc_count) * 3) + 1 > ctx.cc_databufsize {
let required_size = ((ctx.cc_count as usize + local_cc_count) * 3) + 1;
if required_size > ctx.cc_data.len() {
let new_size = ((ctx.cc_count as usize + local_cc_count) * 6) + 1;
unsafe {
ctx.cc_data.set_len(new_size);
}
ctx.cc_data.reserve(new_size);
ctx.cc_data.resize(new_size, 0);
ctx.cc_databufsize = new_size;
}

View File

@@ -18,6 +18,7 @@ use lib_ccxr::common::DtvccServiceCharset;
use lib_ccxr::common::EncoderConfig;
use lib_ccxr::common::EncodersTranscriptFormat;
use lib_ccxr::common::Language;
use lib_ccxr::common::MkvLangFilter;
use lib_ccxr::common::Options;
use lib_ccxr::common::OutputFormat;
use lib_ccxr::common::SelectCodec;
@@ -183,9 +184,9 @@ pub unsafe fn copy_from_rust(ccx_s_options: *mut ccx_s_options, options: Options
(*ccx_s_options).ocr_quantmode = options.ocr_quantmode as _;
(*ccx_s_options).ocr_line_split = options.ocr_line_split as _;
(*ccx_s_options).ocr_blacklist = options.ocr_blacklist as _;
if let Some(mkvlang) = options.mkvlang {
if let Some(ref mkvlang) = options.mkvlang {
(*ccx_s_options).mkvlang =
replace_rust_c_string((*ccx_s_options).mkvlang, mkvlang.to_ctype().as_str());
replace_rust_c_string((*ccx_s_options).mkvlang, mkvlang.as_raw_str());
}
(*ccx_s_options).analyze_video_stream = options.analyze_video_stream as _;
(*ccx_s_options).hardsubx_ocr_mode = options.hardsubx_ocr_mode.to_ctype();
@@ -211,11 +212,9 @@ pub unsafe fn copy_from_rust(ccx_s_options: *mut ccx_s_options, options: Options
replace_rust_c_string((*ccx_s_options).udpaddr, &options.udpaddr.clone().unwrap());
}
(*ccx_s_options).udpport = options.udpport as _;
if options.tcpport.is_some() {
(*ccx_s_options).tcpport = replace_rust_c_string(
(*ccx_s_options).tcpport,
&options.tcpport.unwrap().to_string(),
);
if let Some(tcpport) = options.tcpport {
(*ccx_s_options).tcpport =
replace_rust_c_string((*ccx_s_options).tcpport, &tcpport.to_string());
}
if options.tcp_password.is_some() {
(*ccx_s_options).tcp_password = replace_rust_c_string(
@@ -235,11 +234,9 @@ pub unsafe fn copy_from_rust(ccx_s_options: *mut ccx_s_options, options: Options
&options.srv_addr.clone().unwrap(),
);
}
if options.srv_port.is_some() {
(*ccx_s_options).srv_port = replace_rust_c_string(
(*ccx_s_options).srv_port,
&options.srv_port.unwrap().to_string(),
);
if let Some(srv_port) = options.srv_port {
(*ccx_s_options).srv_port =
replace_rust_c_string((*ccx_s_options).srv_port, &srv_port.to_string());
}
(*ccx_s_options).noautotimeref = options.noautotimeref as _;
(*ccx_s_options).input_source = options.input_source as _;
@@ -253,15 +250,12 @@ pub unsafe fn copy_from_rust(ccx_s_options: *mut ccx_s_options, options: Options
// Subsequent calls from ccxr_demuxer_open/close should NOT modify inputfile because
// C code holds references to those strings throughout processing.
// Freeing them would cause use-after-free and double-free errors.
if options.inputfile.is_some() && (*ccx_s_options).inputfile.is_null() {
(*ccx_s_options).inputfile = string_to_c_chars(options.inputfile.clone().unwrap());
(*ccx_s_options).num_input_files = options
.inputfile
.as_ref()
.unwrap()
.iter()
.filter(|s| !s.is_empty())
.count() as _;
if let Some(ref inputfile) = options.inputfile {
if (*ccx_s_options).inputfile.is_null() {
(*ccx_s_options).inputfile = string_to_c_chars(inputfile.clone());
(*ccx_s_options).num_input_files =
inputfile.iter().filter(|s| !s.is_empty()).count() as _;
}
}
(*ccx_s_options).demux_cfg = options.demux_cfg.to_ctype();
// Only set enc_cfg on the first call (when output_filename is null).
@@ -425,12 +419,10 @@ pub unsafe fn copy_to_rust(ccx_s_options: *const ccx_s_options) -> Options {
options.ocr_line_split = (*ccx_s_options).ocr_line_split != 0;
options.ocr_blacklist = (*ccx_s_options).ocr_blacklist != 0;
// Handle mkvlang (C string to Option<Language>)
// Handle mkvlang (C string to Option<MkvLangFilter>)
if !(*ccx_s_options).mkvlang.is_null() {
options.mkvlang = Some(
Language::from_str(&c_char_to_string((*ccx_s_options).mkvlang))
.expect("Invalid language"),
)
let lang_str = c_char_to_string((*ccx_s_options).mkvlang);
options.mkvlang = MkvLangFilter::new(&lang_str).ok();
}
options.analyze_video_stream = (*ccx_s_options).analyze_video_stream != 0;

View File

@@ -615,50 +615,6 @@ impl FromCType<ccx_demux_report> for CcxDemuxReport {
}
}
/// # Safety
/// This function is unsafe because it takes a raw pointer to a C struct.
impl FromCType<*mut PMT_entry> for *mut PMTEntry {
unsafe fn from_ctype(buffer_ptr: *mut PMT_entry) -> Option<Self> {
if buffer_ptr.is_null() {
return None;
}
let buffer = unsafe { &*buffer_ptr };
let program_number = if buffer.program_number != 0 {
buffer.program_number
} else {
0
};
let elementary_pid = if buffer.elementary_PID != 0 {
buffer.elementary_PID
} else {
0
};
let stream_type = if buffer.stream_type != 0 {
StreamType::from_ctype(buffer.stream_type as u32).unwrap_or(StreamType::Unknownstream)
} else {
StreamType::Unknownstream
};
let printable_stream_type = if buffer.printable_stream_type != 0 {
buffer.printable_stream_type
} else {
0
};
let mut pmt_entry = PMTEntry {
program_number,
elementary_pid,
stream_type,
printable_stream_type,
};
Some(&mut pmt_entry as *mut PMTEntry)
}
}
impl FromCType<ccx_bufferdata_type> for BufferdataType {
unsafe fn from_ctype(c_value: ccx_bufferdata_type) -> Option<Self> {
let rust_value = match c_value {

View File

@@ -1259,6 +1259,7 @@ extern "C" fn ccxr_flush_decoder(dtvcc: *mut dtvcc_ctx, decoder: *mut dtvcc_serv
mod test {
use super::*;
use crate::utils::get_zero_allocated_obj;
use std::alloc::{alloc_zeroed, dealloc, Layout};
fn setup_test_decoder_with_memory() -> dtvcc_service_decoder {
let mut decoder = get_zero_allocated_obj::<dtvcc_service_decoder>();
@@ -1349,10 +1350,17 @@ mod test {
decoder.current_window = 1;
decoder.windows[1].pen_column = 12;
decoder.windows[1].pen_row = 1;
decoder.windows[1].rows[1] = Box::into_raw(Box::new(dtvcc_symbol::new(1)));
decoder.windows[1].rows[2] = Box::into_raw(Box::new(dtvcc_symbol::new(1)));
let layout = Layout::array::<dtvcc_symbol>(CCX_DTVCC_MAX_COLUMNS as usize).unwrap();
for i in 0..CCX_DTVCC_MAX_ROWS as usize {
decoder.windows[1].rows[i] = unsafe { alloc_zeroed(layout) } as *mut dtvcc_symbol;
}
decoder.windows[1].memory_reserved = 1;
unsafe {
*decoder.windows[1].rows[1] = dtvcc_symbol::new(1);
*decoder.windows[1].rows[2] = dtvcc_symbol::new(1);
}
decoder.process_hcr();
assert_eq!(decoder.windows[1].pen_column, 0);
@@ -1367,6 +1375,13 @@ mod test {
unsafe { decoder.windows[1].rows[2].as_mut() },
Some(&mut dtvcc_symbol { sym: 1, init: 1 }),
);
// Cleanup
for i in 0..CCX_DTVCC_MAX_ROWS as usize {
unsafe {
dealloc(decoder.windows[1].rows[i] as *mut u8, layout);
}
}
}
#[test]
@@ -1376,8 +1391,16 @@ mod test {
decoder.windows[1].pen_column = 2;
decoder.windows[1].pen_row = 1;
decoder.windows[1].memory_reserved = 1;
decoder.windows[1].rows[1] = Box::into_raw(Box::new(dtvcc_symbol::new(1)));
decoder.windows[1].rows[2] = Box::into_raw(Box::new(dtvcc_symbol::new(1)));
let layout = Layout::array::<dtvcc_symbol>(CCX_DTVCC_MAX_COLUMNS as usize).unwrap();
for i in 0..CCX_DTVCC_MAX_ROWS as usize {
decoder.windows[1].rows[i] = unsafe { alloc_zeroed(layout) } as *mut dtvcc_symbol;
}
decoder.windows[1].memory_reserved = 1;
unsafe {
*decoder.windows[1].rows[1] = dtvcc_symbol::new(1);
*decoder.windows[1].rows[2] = dtvcc_symbol::new(1);
}
decoder.process_ff();
@@ -1394,6 +1417,13 @@ mod test {
unsafe { decoder.windows[1].rows[2].as_mut() },
Some(&mut dtvcc_symbol::default()),
);
// Cleanup
for i in 0..CCX_DTVCC_MAX_ROWS as usize {
unsafe {
dealloc(decoder.windows[1].rows[i] as *mut u8, layout);
}
}
}
#[test]

View File

@@ -167,7 +167,9 @@ impl dtvcc_window {
} else {
let layout = layout.unwrap();
// deallocate previous memory
dealloc(self.rows[row_index] as *mut u8, layout);
if !self.rows[row_index].is_null() {
dealloc(self.rows[row_index] as *mut u8, layout);
}
// allocate new zero initialized memory
let ptr = alloc_zeroed(layout);

View File

@@ -1,8 +1,13 @@
use crate::bindings::{lib_ccx_ctx, list_head};
use lib_ccxr::common::{Codec, Decoder608Report, DecoderDtvccReport, StreamMode, StreamType};
use lib_ccxr::time::Timestamp;
use std::os::raw::c_void;
use std::ptr::null_mut;
extern "C" {
fn free(ptr: *mut c_void);
}
// Size of the Startbytes Array in CcxDemuxer - const 1MB
pub(crate) const ARRAY_SIZE: usize = 1024 * 1024;
@@ -109,7 +114,9 @@ impl Default for PSIBuffer {
fn default() -> Self {
PSIBuffer {
prev_ccounter: 0,
buffer: Box::into_raw(Box::new(0u8)),
// Initialize with null to avoid unnecessary heap allocations and
// signal that the buffer is currently empty.
buffer: std::ptr::null_mut(),
buffer_length: 0,
ccounter: 0,
}
@@ -274,21 +281,21 @@ impl Default for CcxDemuxer<'_> {
/// null pointers which are safely ignored.
impl Drop for CcxDemuxer<'_> {
fn drop(&mut self) {
// Free all non-null PSIBuffer pointers (Rust-owned from Box::into_raw)
// Free all non-null PSIBuffer pointers.
// These are freed using C's free to be compatible with memory that might be allocated by C.
for ptr in self.pid_buffers.drain(..) {
if !ptr.is_null() {
// SAFETY: These pointers were created via Box::into_raw in copy_demuxer_from_c_to_rust
unsafe {
drop(Box::from_raw(ptr));
free(ptr as *mut c_void);
}
}
}
// Free all non-null PMTEntry pointers (Rust-owned from Box::into_raw)
// Free all non-null PMTEntry pointers.
// These are freed using C's free to be compatible with memory that might be allocated by C.
for ptr in self.pids_programs.drain(..) {
if !ptr.is_null() {
// SAFETY: These pointers were created via Box::into_raw in copy_demuxer_from_c_to_rust
unsafe {
drop(Box::from_raw(ptr));
free(ptr as *mut c_void);
}
}
}

View File

@@ -331,10 +331,15 @@ unsafe fn detect_stream_type_common(ctx: &mut CcxDemuxer, ccx_options: &mut Opti
}
// Now check for PS (Needs PACK header)
// The loop below checks 4 consecutive bytes (i, i+1, i+2, i+3), so we need
// to stop 3 bytes before the end to avoid out-of-bounds access.
// - If buffer < 50000: limit = buffer_size - 3 (scan entire buffer)
// - If buffer >= 50000: limit = 49997 (= 50000 - 3, cap the scan range)
// We use saturating_sub to safely handle tiny buffers (< 3 bytes).
let limit = if ctx.startbytes_avail < 50000 {
ctx.startbytes_avail - 3
ctx.startbytes_avail.saturating_sub(3)
} else {
49997
50000 - 3 // Don't scan huge buffers entirely; 50KB is enough
} as usize;
for i in 0..limit {
if ctx.startbytes[i] == 0x00
@@ -427,15 +432,21 @@ pub fn is_valid_mp4_box(
)
);
// If the box type is "moov", check if it contains a valid movie header (mvhd)
if idx == 2
&& !(buffer[position + 12] == b'm'
// If the box type is "moov", it must contain "mvhd" to be valid.
// We need 16 bytes from position to check bytes 12-15 for "mvhd".
if idx == 2 {
if position + 16 > buffer.len() {
// Not enough bytes to verify mvhd - skip this box
continue;
}
if !(buffer[position + 12] == b'm'
&& buffer[position + 13] == b'v'
&& buffer[position + 14] == b'h'
&& buffer[position + 15] == b'd')
{
// If "moov" doesn't have "mvhd", skip it.
continue;
{
// moov without mvhd is not valid - skip it
continue;
}
}
// Box name matches. Do a crude validation of possible box size,

View File

@@ -278,7 +278,8 @@ pub unsafe fn user_data(
if !proceed {
debug!(msg_type = DebugMessageFlag::VERBOSE; "\rThe following payload is not properly terminated.");
dump(cc_data.to_vec().as_mut_ptr(), (cc_count * 3 + 1) as _, 0, 0);
let mut cc_data_copy = cc_data.to_vec();
dump(cc_data_copy.as_mut_ptr(), (cc_count * 3 + 1) as _, 0, 0);
}
debug!(msg_type = DebugMessageFlag::VERBOSE; "Reading {} HD CC blocks", cc_count);
@@ -289,10 +290,11 @@ pub unsafe fn user_data(
// Please note we store the current value of the global
// fts_now variable (and not get_fts()) as we are going to
// re-create the timeline in process_hdcc() (Slightly ugly).
let mut cc_data_copy = cc_data.to_vec();
store_hdcc(
enc_ctx,
dec_ctx,
cc_data.to_vec().as_mut_ptr(),
cc_data_copy.as_mut_ptr(),
cc_count as _,
(*dec_ctx.timing).current_tref,
(*dec_ctx.timing).fts_now,
@@ -340,6 +342,10 @@ pub unsafe fn user_data(
let dcd_pos = ustream.pos; // dish caption data position
match pattern_type {
0x02 => {
if ustream.data.len() - ustream.pos < 4 {
info!("Dish Network caption: insufficient data");
return Ok(1);
}
// Two byte caption - always on B-frame
// The following 4 bytes are:
// 0 : 0x09
@@ -387,6 +393,10 @@ pub unsafe fn user_data(
// Ignore 3 (0x0A, followed by two unknown) bytes.
}
0x04 => {
if ustream.data.len() - ustream.pos < 5 {
info!("Dish Network caption: insufficient data");
return Ok(1);
}
// Four byte caption - always on B-frame
// The following 5 bytes are:
// 0 : 0x09
@@ -423,6 +433,10 @@ pub unsafe fn user_data(
// Ignore 4 (0x020A, followed by two unknown) bytes.
}
0x05 => {
if ustream.data.len() - ustream.pos < 12 {
info!("Dish Network caption: insufficient data");
return Ok(1);
}
// Buffered caption - always on I-/P-frame
// The following six bytes are:
// 0 : 0x04
@@ -430,7 +444,7 @@ pub unsafe fn user_data(
// 1 : prev dcd[2]
// 2-3: prev dcd[3-4]
// 4-5: prev dcd[5-6]
let dcd_data = &ustream.data[dcd_pos..dcd_pos + 10]; // Need more bytes for this case
let dcd_data = &ustream.data[dcd_pos..dcd_pos + 12]; // Need more bytes for this case
debug!(msg_type = DebugMessageFlag::PARSE; " - {:02X} pch: {:02X} {:5} {:02X}:{:02X}",
dcd_data[0], dcd_data[1],
(dcd_data[2] as u32) * 256 + (dcd_data[3] as u32),
@@ -532,10 +546,12 @@ pub unsafe fn user_data(
if udatalen < 720 {
info!("MPEG:VBI: Minimum 720 bytes in luma line required");
return Ok(1);
}
let vbi_data = &ustream.data[ustream.pos..ustream.pos + 720];
decode_vbi(dec_ctx, field, vbi_data.to_vec().as_mut_ptr(), 720, sub);
let mut vbi_data_copy = vbi_data.to_vec();
decode_vbi(dec_ctx, field, vbi_data_copy.as_mut_ptr(), 720, sub);
debug!(msg_type = DebugMessageFlag::VERBOSE; "GXF (vbi line {}) user data:", line_nb);
} else {
// Some other user data
@@ -543,14 +559,8 @@ pub unsafe fn user_data(
debug!(msg_type = DebugMessageFlag::VERBOSE; "Unrecognized user data:");
let udatalen = ustream.data.len() - ustream.pos;
let dump_len = if udatalen > 128 { 128 } else { udatalen };
dump(
ustream.data[ustream.pos..ustream.pos + dump_len]
.to_vec()
.as_mut_ptr(),
dump_len as _,
0,
0,
);
let mut data_copy = ustream.data[ustream.pos..ustream.pos + dump_len].to_vec();
dump(data_copy.as_mut_ptr(), dump_len as _, 0, 0);
}
debug!(msg_type = DebugMessageFlag::VERBOSE; "User data - processed");

View File

@@ -129,10 +129,14 @@ pub fn sleepandchecktimeout(start: u64, ccx_options: &mut Options) {
.expect("System time went backwards")
.as_secs();
if ccx_options.live_stream.is_some() && ccx_options.live_stream.unwrap().seconds() != 0 {
if current_time > start + ccx_options.live_stream.unwrap().millis() as u64 {
// Timeout elapsed
ccx_options.live_stream = Option::from(Timestamp::from_millis(0));
if let Some(live_stream) = ccx_options.live_stream {
if live_stream.seconds() != 0 {
if current_time > start + live_stream.millis() as u64 {
// Timeout elapsed
ccx_options.live_stream = Option::from(Timestamp::from_millis(0));
} else {
sleep_secs(1);
}
} else {
sleep_secs(1);
}

View File

@@ -462,6 +462,10 @@ extern "C" fn ccxr_process_cc_data(
const CC_SOLID_BLANK: u8 = 0x7F;
pub fn validate_cc_pair(cc_block: &mut [u8]) -> bool {
if cc_block.len() != 3 {
return false;
}
let cc_valid = (cc_block[0] & 4) >> 2;
let cc_type = cc_block[0] & 3;
if cc_valid == 0 {
@@ -805,6 +809,15 @@ mod test {
assert!(!validate_cc_pair(&mut cc_block));
}
#[test]
fn test_validate_cc_pair_invalid_length() {
let mut short = [0x97, 0x1F];
assert!(!validate_cc_pair(&mut short));
let mut long = [0x97, 0x1F, 0x3C, 0x00];
assert!(!validate_cc_pair(&mut long));
}
#[test]
fn test_do_cb() {
let mut dtvcc_ctx = crate::decoder::test::initialize_dtvcc_ctx();

View File

@@ -7,7 +7,6 @@ use crate::demuxer::common_types::{
};
use lib_ccxr::common::{Codec, Options, StreamMode, StreamType};
use lib_ccxr::time::Timestamp;
use std::alloc::{alloc_zeroed, Layout};
use std::ffi::CStr;
use std::os::raw::{c_char, c_int, c_uchar, c_uint, c_void};
@@ -18,10 +17,12 @@ const POISON_PTR_PATTERN: usize = 0xcdcdcdcdcdcdcdcd;
#[cfg(target_pointer_width = "32")]
const POISON_PTR_PATTERN: usize = 0xcdcdcdcd;
// External C function declarations
extern "C" {
fn activity_input_file_closed();
fn close(fd: c_int) -> c_int;
fn malloc(size: usize) -> *mut c_void;
fn free(ptr: *mut c_void);
fn calloc(nmemb: usize, size: usize) -> *mut c_void;
}
pub fn copy_c_array_to_rust_vec(
@@ -98,61 +99,89 @@ pub unsafe fn copy_demuxer_from_rust_to_c(c_demuxer: *mut ccx_demuxer, rust_demu
c.global_timestamp_inited = rust_demuxer.global_timestamp_inited.millis() as c_int;
// PID buffers - extra defensive version
// We iterate through all possible PIDs (up to 8191 for PSI) to ensure state synchronization.
// CRITICAL: We must free existing pointers in the C structure before overwriting them
// to prevent massive memory leaks during the demuxing process, as this function
// is called repeatedly to sync state between Rust and C.
let pid_buffers_len = rust_demuxer.pid_buffers.len().min(8191);
for i in 0..pid_buffers_len {
let pid_buffer = rust_demuxer.pid_buffers[i];
if !pid_buffer.is_null() {
// Try to safely access the pointer
match std::panic::catch_unwind(|| unsafe { &*pid_buffer }) {
Ok(rust_psi) => {
let c_psi = unsafe { rust_psi.to_ctype() };
let c_ptr = Box::into_raw(Box::new(c_psi));
c.PID_buffers[i] = c_ptr;
}
Err(_) => {
// Pointer was invalid, set to null
eprintln!("Warning: Invalid PID buffer pointer at index {i}");
c.PID_buffers[i] = std::ptr::null_mut();
for i in 0..8191 {
// Free existing pointer if any.
// SAFETY: We use C's free to be compatible with memory that might be allocated by C.
// We also check for POISON_PTR_PATTERN for safety in debug builds.
if !c.PID_buffers[i].is_null() && c.PID_buffers[i] as usize != POISON_PTR_PATTERN {
unsafe {
free(c.PID_buffers[i] as *mut c_void);
c.PID_buffers[i] = std::ptr::null_mut();
}
}
if i < pid_buffers_len {
let pid_buffer = rust_demuxer.pid_buffers[i];
if !pid_buffer.is_null() {
// Try to safely access the pointer using catch_unwind to prevent
// a panic in Rust from crashing the entire C application.
// This is a defensive measure for FFI robustness.
match std::panic::catch_unwind(|| unsafe { &*pid_buffer }) {
Ok(rust_psi) => {
let c_psi = unsafe { rust_psi.to_ctype() };
let c_ptr =
unsafe { malloc(std::mem::size_of::<crate::bindings::PSI_buffer>()) }
as *mut crate::bindings::PSI_buffer;
if !c_ptr.is_null() {
unsafe {
std::ptr::write(c_ptr, c_psi);
}
c.PID_buffers[i] = c_ptr;
}
}
Err(_) => {
// Pointer was invalid, log and skip
eprintln!("Warning: Invalid PID buffer pointer at index {i}");
}
}
}
} else {
c.PID_buffers[i] = std::ptr::null_mut();
}
}
// Clear remaining slots if rust array is smaller than C array
for i in pid_buffers_len..8191 {
c.PID_buffers[i] = std::ptr::null_mut();
}
// PIDs programs - extra defensive version
// Similar to PID_buffers, we manage ownership of PMT entries.
// We check for POISON_PTR_PATTERN to avoid freeing uninitialized memory in debug builds.
let pids_programs_len = rust_demuxer.pids_programs.len().min(65536);
for i in 0..pids_programs_len {
let pmt_entry = rust_demuxer.pids_programs[i];
if !pmt_entry.is_null() {
// Try to safely access the pointer
match std::panic::catch_unwind(|| unsafe { &*pmt_entry }) {
Ok(rust_pmt) => {
let c_pmt = unsafe { rust_pmt.to_ctype() };
let c_ptr = Box::into_raw(Box::new(c_pmt));
c.PIDs_programs[i] = c_ptr;
}
Err(_) => {
// Pointer was invalid, set to null
eprintln!("Warning: Invalid PMT entry pointer at index {i}");
c.PIDs_programs[i] = std::ptr::null_mut();
for i in 0..65536 {
// Free existing pointer if any and it's not a poison pattern.
// SAFETY: We use C's free to be compatible with memory that might be allocated by C.
if !c.PIDs_programs[i].is_null() && c.PIDs_programs[i] as usize != POISON_PTR_PATTERN {
unsafe {
free(c.PIDs_programs[i] as *mut c_void);
c.PIDs_programs[i] = std::ptr::null_mut();
}
}
if i < pids_programs_len {
let pmt_entry = rust_demuxer.pids_programs[i];
if !pmt_entry.is_null() {
// Safely convert and move ownership to C
match std::panic::catch_unwind(|| unsafe { &*pmt_entry }) {
Ok(rust_pmt) => {
let c_pmt = unsafe { rust_pmt.to_ctype() };
let c_ptr =
unsafe { malloc(std::mem::size_of::<crate::bindings::PMT_entry>()) }
as *mut crate::bindings::PMT_entry;
if !c_ptr.is_null() {
unsafe {
std::ptr::write(c_ptr, c_pmt);
}
c.PIDs_programs[i] = c_ptr;
}
}
Err(_) => {
eprintln!("Warning: Invalid PMT entry pointer at index {i}");
}
}
}
} else {
c.PIDs_programs[i] = std::ptr::null_mut();
}
}
// Clear remaining slots if rust array is smaller than C array
for i in pids_programs_len..65536 {
c.PIDs_programs[i] = std::ptr::null_mut();
}
// PIDs seen array
for (i, &val) in rust_demuxer.pids_seen.iter().take(65536).enumerate() {
c.PIDs_seen[i] = val as c_int;
@@ -265,7 +294,15 @@ pub unsafe fn copy_demuxer_from_c_to_rust(ccx: *const ccx_demuxer) -> CcxDemuxer
if buffer_ptr.is_null() {
None
} else {
Some(Box::into_raw(Box::new(PSIBuffer::from_ctype(*buffer_ptr)?)))
let rust_item = PSIBuffer::from_ctype(*buffer_ptr)?;
let rust_ptr =
unsafe { malloc(std::mem::size_of::<PSIBuffer>()) } as *mut PSIBuffer;
if !rust_ptr.is_null() {
unsafe {
std::ptr::write(rust_ptr, rust_item);
}
}
Some(rust_ptr)
}
})
.collect::<Vec<_>>();
@@ -276,7 +313,14 @@ pub unsafe fn copy_demuxer_from_c_to_rust(ccx: *const ccx_demuxer) -> CcxDemuxer
if buffer_ptr.is_null() || buffer_ptr as usize == POISON_PTR_PATTERN {
None
} else {
Some(Box::into_raw(Box::new(PMTEntry::from_ctype(*buffer_ptr)?)))
let rust_item = PMTEntry::from_ctype(*buffer_ptr)?;
let rust_ptr = unsafe { malloc(std::mem::size_of::<PMTEntry>()) } as *mut PMTEntry;
if !rust_ptr.is_null() {
unsafe {
std::ptr::write(rust_ptr, rust_item);
}
}
Some(rust_ptr)
}
})
.collect::<Vec<_>>();
@@ -367,8 +411,7 @@ pub unsafe fn copy_demuxer_from_c_to_rust(ccx: *const ccx_demuxer) -> CcxDemuxer
///
/// This function is unsafe because we are calling a C struct and using alloc_zeroed to initialize it.
pub unsafe fn alloc_new_demuxer() -> *mut ccx_demuxer {
let layout = Layout::new::<ccx_demuxer>();
let ptr = alloc_zeroed(layout) as *mut ccx_demuxer;
let ptr = calloc(1, std::mem::size_of::<ccx_demuxer>()) as *mut ccx_demuxer;
if ptr.is_null() {
panic!("Failed to allocate memory for ccx_demuxer");

View File

@@ -77,6 +77,10 @@ pub unsafe extern "C" fn ccxr_update_logger_target() {
/// or less than `len`.
#[no_mangle]
pub unsafe extern "C" fn ccxr_verify_crc32(buf: *const u8, len: c_int) -> c_int {
// Safety: avoid NULL pointer and negative length causing usize wraparound
if buf.is_null() || len < 0 {
return 0;
}
let buf = std::slice::from_raw_parts(buf, len as usize);
if verify_crc32(buf) {
1

File diff suppressed because it is too large Load Diff