mirror of
https://github.com/CCExtractor/ccextractor.git
synced 2026-02-08 13:34:59 +00:00
Compare commits
447 Commits
fix/tesser
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dd2931153e | ||
|
|
10288243b9 | ||
|
|
9762223105 | ||
|
|
cad4d3d62d | ||
|
|
a30b8d7a83 | ||
|
|
f920c16a53 | ||
|
|
5c05173c75 | ||
|
|
2582f628dd | ||
|
|
99f7d1955a | ||
|
|
78e94f85c2 | ||
|
|
36f13922d2 | ||
|
|
3a547fdeab | ||
|
|
528021b577 | ||
|
|
270c89b7f8 | ||
|
|
032cd1c6b1 | ||
|
|
42e4e9a657 | ||
|
|
821e307333 | ||
|
|
ae81f3ba3d | ||
|
|
b190751b2c | ||
|
|
f1bb0f4dce | ||
|
|
f147ac27f8 | ||
|
|
2dfb44d7d4 | ||
|
|
580e721dfe | ||
|
|
d0a82447ff | ||
|
|
5c19c7b932 | ||
|
|
fd7271bae2 | ||
|
|
05c68349d5 | ||
|
|
09f21f64e4 | ||
|
|
c65fb0874e | ||
|
|
9db727d593 | ||
|
|
fe6dad83b7 | ||
|
|
d494286082 | ||
|
|
259e881483 | ||
|
|
197069d3b8 | ||
|
|
7a810d736d | ||
|
|
1413c948c4 | ||
|
|
bb5385913b | ||
|
|
f8981e8e1e | ||
|
|
a1871abf04 | ||
|
|
20b3773bb9 | ||
|
|
8786b4cf75 | ||
|
|
8632ecda5b | ||
|
|
475153a9dd | ||
|
|
df90009f73 | ||
|
|
2352ea21e3 | ||
|
|
dc041a35e8 | ||
|
|
e99ba1d177 | ||
|
|
298665faa4 | ||
|
|
735a01bf04 | ||
|
|
3618c23b5a | ||
|
|
b7c9da75dd | ||
|
|
449d55d5e5 | ||
|
|
60aa370899 | ||
|
|
3d18b38c32 | ||
|
|
2a6d27f9ff | ||
|
|
91d3512bcc | ||
|
|
74e64c0421 | ||
|
|
c175750ebe | ||
|
|
e7dc4d19f7 | ||
|
|
1fbb51056d | ||
|
|
5d9a8cc6f2 | ||
|
|
17abad79f2 | ||
|
|
707e1f01fe | ||
|
|
efc8b791e7 | ||
|
|
a856bbde10 | ||
|
|
9390b876fa | ||
|
|
ead0a4beed | ||
|
|
b2e9cb74c1 | ||
|
|
20b194aac4 | ||
|
|
2d9b480972 | ||
|
|
1447b021cb | ||
|
|
e0ac126cff | ||
|
|
b8019bdb35 | ||
|
|
9d921dec43 | ||
|
|
3ada2b5002 | ||
|
|
50ec9866db | ||
|
|
ce87d01fbd | ||
|
|
fecd24d08e | ||
|
|
482544c5bf | ||
|
|
84a7a1fb41 | ||
|
|
f198bcd2ec | ||
|
|
4b6016ca1c | ||
|
|
9c2ea47eda | ||
|
|
170b466a20 | ||
|
|
2bdcd20115 | ||
|
|
ab18d234d2 | ||
|
|
3ff02617b0 | ||
|
|
c7fad95e24 | ||
|
|
c018f1f43c | ||
|
|
98b50b2a35 | ||
|
|
46cee0893a | ||
|
|
42ad48ca7f | ||
|
|
ed26a595bd | ||
|
|
b1c2aabb22 | ||
|
|
bb2ae1e70f | ||
|
|
6464fa486e | ||
|
|
5aa747ab33 | ||
|
|
39adfa59b0 | ||
|
|
20287548cb | ||
|
|
b7b10419ec | ||
|
|
8fbfd68426 | ||
|
|
7159d0b6d0 | ||
|
|
c515578e37 | ||
|
|
e55b8eb764 | ||
|
|
0228fbcbfa | ||
|
|
0e190e0962 | ||
|
|
13f1b5ab53 | ||
|
|
b39f923c46 | ||
|
|
7e32d6a553 | ||
|
|
3bde3dceec | ||
|
|
d5201b1129 | ||
|
|
a199f4f8af | ||
|
|
eea049923d | ||
|
|
d999c3e0e0 | ||
|
|
aac90d5a5f | ||
|
|
618df184c6 | ||
|
|
5e6aab8972 | ||
|
|
a77c21c06c | ||
|
|
4252703431 | ||
|
|
1af2a29a3c | ||
|
|
8ab474c593 | ||
|
|
1c781c2a38 | ||
|
|
4d718378d5 | ||
|
|
1bd4cd5c0a | ||
|
|
067045ce92 | ||
|
|
2f2904041c | ||
|
|
d837c369e5 | ||
|
|
686ff69fdc | ||
|
|
126835d998 | ||
|
|
6e170cd812 | ||
|
|
fe921626e1 | ||
|
|
6578f0ff34 | ||
|
|
1911068e92 | ||
|
|
493495361d | ||
|
|
643857e98f | ||
|
|
05adb5f47e | ||
|
|
504877b928 | ||
|
|
64ee63a560 | ||
|
|
270c603bd2 | ||
|
|
6d356b4458 | ||
|
|
cfb10d4b91 | ||
|
|
ca2b708023 | ||
|
|
10ac5ca6ce | ||
|
|
333cfb3726 | ||
|
|
c609f66c02 | ||
|
|
91f254017b | ||
|
|
1f5d3df0ae | ||
|
|
e36d81c237 | ||
|
|
8d338dc362 | ||
|
|
c78e01d186 | ||
|
|
401ff6c105 | ||
|
|
83eb51ed6f | ||
|
|
bce0c92fdd | ||
|
|
ea4859fd54 | ||
|
|
8d7890c743 | ||
|
|
477307e438 | ||
|
|
4a4911bcec | ||
|
|
dc946168e7 | ||
|
|
3a60b1268b | ||
|
|
e3d1c56ad0 | ||
|
|
b5bc0e2616 | ||
|
|
600a9a0e75 | ||
|
|
694b61f862 | ||
|
|
86925727e0 | ||
|
|
1c7515681e | ||
|
|
2bcac83761 | ||
|
|
efc28d87d5 | ||
|
|
b4d8e0ffaf | ||
|
|
0b7b7fd031 | ||
|
|
90041554a3 | ||
|
|
6950a7661e | ||
|
|
41fb966f6f | ||
|
|
04ed95f8b5 | ||
|
|
ddf29672fd | ||
|
|
0890e06d84 | ||
|
|
8c33412888 | ||
|
|
f40294cc5c | ||
|
|
22d5d35158 | ||
|
|
51cae1c2f0 | ||
|
|
dfaebd5db8 | ||
|
|
cfa7d912ca | ||
|
|
ad971f0e72 | ||
|
|
8aadbfb5f2 | ||
|
|
44eb665cd8 | ||
|
|
1255b318ae | ||
|
|
1b0e66bc67 | ||
|
|
f5dc1cf467 | ||
|
|
aaf937a135 | ||
|
|
317c66f14e | ||
|
|
946c5859d4 | ||
|
|
7166e48698 | ||
|
|
d31ea87c03 | ||
|
|
028ce9d0b5 | ||
|
|
cc7a43b5e2 | ||
|
|
3e1424cda8 | ||
|
|
82109e6cd9 | ||
|
|
5dc8292dd2 | ||
|
|
a5b8bc8bf6 | ||
|
|
29158b2c38 | ||
|
|
ad2ee70743 | ||
|
|
562de8893b | ||
|
|
12adb5e92b | ||
|
|
203eb23030 | ||
|
|
774c3a0d3a | ||
|
|
07f1ddc3fe | ||
|
|
303bec8d5d | ||
|
|
e43a6b5ced | ||
|
|
64484af49e | ||
|
|
7526da884c | ||
|
|
3529bb29b4 | ||
|
|
925560f773 | ||
|
|
200eb1750a | ||
|
|
6dcdb4b2d8 | ||
|
|
a2d2c4f063 | ||
|
|
4ab6c83c27 | ||
|
|
e66a0183c3 | ||
|
|
a8ec28630a | ||
|
|
432d4237ec | ||
|
|
e9519c4a67 | ||
|
|
fef005ddaf | ||
|
|
546c776e57 | ||
|
|
daeed5df71 | ||
|
|
b56ab005a8 | ||
|
|
f1681ee929 | ||
|
|
031f463b5c | ||
|
|
b23866f5a8 | ||
|
|
2ec93c3d3d | ||
|
|
5564aa8a54 | ||
|
|
868fac5423 | ||
|
|
9ca26171d6 | ||
|
|
ead4cbb278 | ||
|
|
dfd7101f54 | ||
|
|
9659d3cf4c | ||
|
|
34c7cd6d2e | ||
|
|
7448a260c7 | ||
|
|
54236f840c | ||
|
|
f2aeef167b | ||
|
|
6a4a1c97ec | ||
|
|
f369959096 | ||
|
|
1c2bcb5088 | ||
|
|
da79ee44d9 | ||
|
|
26434a7f89 | ||
|
|
718eb1a37f | ||
|
|
ace6361bfb | ||
|
|
7041441d39 | ||
|
|
1589c31774 | ||
|
|
c96d3ff3f1 | ||
|
|
598a48e260 | ||
|
|
0cc3626261 | ||
|
|
e0e66bd0ba | ||
|
|
2642ca8805 | ||
|
|
a108302dc0 | ||
|
|
ce90b61923 | ||
|
|
18566f2213 | ||
|
|
125c5e8821 | ||
|
|
64ce4ac84f | ||
|
|
674b859284 | ||
|
|
9a761331f8 | ||
|
|
046ee71eda | ||
|
|
b5fc3e63c4 | ||
|
|
5eaf805d27 | ||
|
|
0ba941e8c0 | ||
|
|
a9413a2312 | ||
|
|
a2eb03cb73 | ||
|
|
06063f26a4 | ||
|
|
82daa7fb2b | ||
|
|
a71687e19f | ||
|
|
25162fe40a | ||
|
|
3365a715a6 | ||
|
|
26e0f64720 | ||
|
|
a1ed940c8b | ||
|
|
f5f4768503 | ||
|
|
e4374204bd | ||
|
|
7f55ae5c1d | ||
|
|
8bf1bc16de | ||
|
|
5352a8b877 | ||
|
|
fd155285d2 | ||
|
|
a6fd8d468a | ||
|
|
5b05ce5073 | ||
|
|
d28bc4e114 | ||
|
|
285e81f9a7 | ||
|
|
730156f33b | ||
|
|
152bbd308c | ||
|
|
8c586bccbd | ||
|
|
434cd3959a | ||
|
|
3cb0f61b0c | ||
|
|
a18eaa2c96 | ||
|
|
69b7f9f4c3 | ||
|
|
63dde6f3b2 | ||
|
|
8f64eeb54f | ||
|
|
02d91c4a03 | ||
|
|
463a4a85a1 | ||
|
|
ba2833b819 | ||
|
|
635a305c37 | ||
|
|
6fe612db3e | ||
|
|
2930c61420 | ||
|
|
173db88dcf | ||
|
|
29c3f4e684 | ||
|
|
d4a7b1d6ed | ||
|
|
9d14766b0d | ||
|
|
6f2a73d706 | ||
|
|
1fccb783f2 | ||
|
|
ec30a79be9 | ||
|
|
5beb4389f6 | ||
|
|
a6ccf29630 | ||
|
|
b6d7c7e778 | ||
|
|
117c2fce69 | ||
|
|
ffd6a34c30 | ||
|
|
70af627078 | ||
|
|
b0a5c069ed | ||
|
|
53ee63894c | ||
|
|
50ece42e0a | ||
|
|
3d00e718f6 | ||
|
|
021b788461 | ||
|
|
86e5d47141 | ||
|
|
5b36356456 | ||
|
|
ba04aedae1 | ||
|
|
5001df0d6c | ||
|
|
28506fee7b | ||
|
|
47d8aaddb9 | ||
|
|
1b2254f911 | ||
|
|
dc34b26afb | ||
|
|
c06102678e | ||
|
|
b0800a112c | ||
|
|
2b0d9ed427 | ||
|
|
fd4db0e7bf | ||
|
|
00d8c9cb0a | ||
|
|
7829c14c60 | ||
|
|
d3602ec938 | ||
|
|
f9b5e081a7 | ||
|
|
bdc3eaa81b | ||
|
|
2820042c1d | ||
|
|
d4d228125a | ||
|
|
43d5ba2f34 | ||
|
|
557774b202 | ||
|
|
4e0472bddf | ||
|
|
9a2fe6221e | ||
|
|
182b23a283 | ||
|
|
77f3fd35f4 | ||
|
|
14e6919f2e | ||
|
|
353a37010d | ||
|
|
921cbe0c57 | ||
|
|
f0523ceaa3 | ||
|
|
7284430fc6 | ||
|
|
68d0d4094e | ||
|
|
7075f6291d | ||
|
|
170d769476 | ||
|
|
1ff3457744 | ||
|
|
dc352a2202 | ||
|
|
c8750e42d1 | ||
|
|
20448bfeb2 | ||
|
|
807df0339e | ||
|
|
6642973c63 | ||
|
|
f08fd658e6 | ||
|
|
5ae3116a6c | ||
|
|
826afcd991 | ||
|
|
46af5ce9bb | ||
|
|
123b35ae69 | ||
|
|
f6e9d55838 | ||
|
|
6f7d3f6169 | ||
|
|
07cc78c2f1 | ||
|
|
affa34848c | ||
|
|
45ee03aecc | ||
|
|
c6e27ca809 | ||
|
|
a8f25ce25e | ||
|
|
2781a7f7d6 | ||
|
|
903ccc1442 | ||
|
|
857a3bc9c6 | ||
|
|
c2c589d6f6 | ||
|
|
941604b33c | ||
|
|
1950f096b6 | ||
|
|
1fc5ec00d4 | ||
|
|
c0deae4b0c | ||
|
|
84692b5658 | ||
|
|
4a51ad114e | ||
|
|
6789376b92 | ||
|
|
ea5125f030 | ||
|
|
000b39775c | ||
|
|
23fe02f0d2 | ||
|
|
394fb39a9c | ||
|
|
294bf5bc18 | ||
|
|
4e52e61c91 | ||
|
|
faaaabf63c | ||
|
|
f5a9018ef0 | ||
|
|
e01720c05e | ||
|
|
f80b1f26ca | ||
|
|
e42bc2b9f9 | ||
|
|
f9ebfd2a32 | ||
|
|
bf9841a255 | ||
|
|
9f670de8ed | ||
|
|
fc4a14e7d6 | ||
|
|
4f13b861cd | ||
|
|
df692f296d | ||
|
|
419fc4694d | ||
|
|
fc230fc217 | ||
|
|
825e160e72 | ||
|
|
8e24c17c1e | ||
|
|
4e21fae053 | ||
|
|
be239a5c46 | ||
|
|
1d9f32239e | ||
|
|
cbb5f0b0a8 | ||
|
|
fd063931ea | ||
|
|
7a9acb7bd2 | ||
|
|
cbf180eb39 | ||
|
|
614e6c42b5 | ||
|
|
38bcb7ed85 | ||
|
|
d57354830e | ||
|
|
7b43201ce1 | ||
|
|
ea1c82ac17 | ||
|
|
b3f1e27f5c | ||
|
|
82c92d3910 | ||
|
|
5bf8e7de0d | ||
|
|
5b8a9709df | ||
|
|
063786c4b7 | ||
|
|
6ed09ea397 | ||
|
|
44363c0acd | ||
|
|
701271ec82 | ||
|
|
7c74ea4112 | ||
|
|
ed42525f44 | ||
|
|
b88d1ebab2 | ||
|
|
ec11b00f9f | ||
|
|
8c0fe08781 | ||
|
|
3304c1b094 | ||
|
|
5bad3732c3 | ||
|
|
e3b0defb49 | ||
|
|
2065c5509d | ||
|
|
5458370346 | ||
|
|
9e19c58edf | ||
|
|
0bb56d508a | ||
|
|
2c67381d2b | ||
|
|
2b708c4a31 | ||
|
|
94a43928ad | ||
|
|
25d68b75bd | ||
|
|
73cd19f5d0 | ||
|
|
d0caf23a82 | ||
|
|
da3dc52b45 | ||
|
|
0fdfb751ba | ||
|
|
0b5f13e2c4 | ||
|
|
60cec9e6de | ||
|
|
d758f3156a | ||
|
|
da802a0a39 | ||
|
|
8f78a8bbb2 | ||
|
|
e87807ec27 | ||
|
|
d097ec881c | ||
|
|
49b698259d | ||
|
|
9fddaab3b0 | ||
|
|
609a53f373 |
157
.github/workflows/build_appimage.yml
vendored
Normal file
157
.github/workflows/build_appimage.yml
vendored
Normal file
@@ -0,0 +1,157 @@
|
||||
name: Build Linux AppImage
|
||||
|
||||
on:
|
||||
# Build on releases
|
||||
release:
|
||||
types: [published]
|
||||
# Allow manual trigger
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
build_type:
|
||||
description: 'Build type (all, minimal, ocr, hardsubx)'
|
||||
required: false
|
||||
default: 'all'
|
||||
# Build on pushes to workflow file for testing
|
||||
push:
|
||||
paths:
|
||||
- '.github/workflows/build_appimage.yml'
|
||||
- 'linux/build_appimage.sh'
|
||||
|
||||
jobs:
|
||||
build-appimage:
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [minimal, ocr, hardsubx]
|
||||
|
||||
steps:
|
||||
- name: Check if should build this variant
|
||||
id: should_build
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
INPUT_TYPE="${{ github.event.inputs.build_type }}"
|
||||
if [ "$INPUT_TYPE" = "all" ] || [ "$INPUT_TYPE" = "${{ matrix.build_type }}" ]; then
|
||||
echo "should_build=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "should_build=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
else
|
||||
echo "should_build=true" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Checkout repository
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Install base dependencies
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
cmake \
|
||||
pkg-config \
|
||||
wget \
|
||||
file \
|
||||
libfuse2 \
|
||||
zlib1g-dev \
|
||||
libpng-dev \
|
||||
libjpeg-dev \
|
||||
libfreetype-dev \
|
||||
libxml2-dev \
|
||||
libcurl4-gnutls-dev \
|
||||
libssl-dev \
|
||||
clang \
|
||||
libclang-dev
|
||||
|
||||
- name: Install OCR dependencies
|
||||
if: steps.should_build.outputs.should_build == 'true' && (matrix.build_type == 'ocr' || matrix.build_type == 'hardsubx')
|
||||
run: |
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
tesseract-ocr \
|
||||
libtesseract-dev \
|
||||
libleptonica-dev \
|
||||
tesseract-ocr-eng
|
||||
|
||||
- name: Install FFmpeg dependencies (HardSubX)
|
||||
if: steps.should_build.outputs.should_build == 'true' && matrix.build_type == 'hardsubx'
|
||||
run: |
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
libavcodec-dev \
|
||||
libavformat-dev \
|
||||
libavutil-dev \
|
||||
libswscale-dev \
|
||||
libswresample-dev \
|
||||
libavfilter-dev \
|
||||
libavdevice-dev
|
||||
|
||||
- name: Install Rust toolchain
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
|
||||
- name: Cache GPAC build
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: cache-gpac
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: /usr/local/lib/libgpac*
|
||||
key: gpac-v2.4.0-ubuntu22
|
||||
|
||||
- name: Build and install GPAC
|
||||
if: steps.should_build.outputs.should_build == 'true' && steps.cache-gpac.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
git clone -b v2.4.0 --depth 1 https://github.com/gpac/gpac
|
||||
cd gpac
|
||||
./configure
|
||||
make -j$(nproc) lib
|
||||
sudo make install-lib
|
||||
sudo ldconfig
|
||||
|
||||
- name: Update library cache
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: sudo ldconfig
|
||||
|
||||
- name: Build AppImage
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
cd linux
|
||||
chmod +x build_appimage.sh
|
||||
BUILD_TYPE=${{ matrix.build_type }} ./build_appimage.sh
|
||||
|
||||
- name: Get AppImage name
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: appimage_name
|
||||
run: |
|
||||
case "${{ matrix.build_type }}" in
|
||||
minimal)
|
||||
echo "name=ccextractor-minimal-x86_64.AppImage" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
ocr)
|
||||
echo "name=ccextractor-x86_64.AppImage" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
hardsubx)
|
||||
echo "name=ccextractor-hardsubx-x86_64.AppImage" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Test AppImage
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
chmod +x linux/${{ steps.appimage_name.outputs.name }}
|
||||
linux/${{ steps.appimage_name.outputs.name }} --version
|
||||
|
||||
- name: Upload AppImage artifact
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: ${{ steps.appimage_name.outputs.name }}
|
||||
path: linux/${{ steps.appimage_name.outputs.name }}
|
||||
|
||||
- name: Upload to Release
|
||||
if: steps.should_build.outputs.should_build == 'true' && github.event_name == 'release'
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: linux/${{ steps.appimage_name.outputs.name }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
283
.github/workflows/build_deb.yml
vendored
Normal file
283
.github/workflows/build_deb.yml
vendored
Normal file
@@ -0,0 +1,283 @@
|
||||
name: Build Linux .deb Package
|
||||
|
||||
on:
|
||||
# Build on releases
|
||||
release:
|
||||
types: [published]
|
||||
# Allow manual trigger
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
build_type:
|
||||
description: 'Build type (all, basic, hardsubx)'
|
||||
required: false
|
||||
default: 'all'
|
||||
# Build on pushes to workflow file for testing
|
||||
push:
|
||||
paths:
|
||||
- '.github/workflows/build_deb.yml'
|
||||
|
||||
jobs:
|
||||
build-deb:
|
||||
runs-on: ubuntu-24.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [basic, hardsubx]
|
||||
|
||||
steps:
|
||||
- name: Check if should build this variant
|
||||
id: should_build
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
INPUT_TYPE="${{ github.event.inputs.build_type }}"
|
||||
if [ "$INPUT_TYPE" = "all" ] || [ "$INPUT_TYPE" = "${{ matrix.build_type }}" ]; then
|
||||
echo "should_build=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "should_build=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
else
|
||||
echo "should_build=true" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Checkout repository
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Get version
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: version
|
||||
run: |
|
||||
# Extract version from source or use tag
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
VERSION="${{ github.event.release.tag_name }}"
|
||||
VERSION="${VERSION#v}" # Remove 'v' prefix if present
|
||||
else
|
||||
# Extract version from lib_ccx.h (e.g., #define VERSION "0.96.5")
|
||||
VERSION=$(grep -oP '#define VERSION "\K[^"]+' src/lib_ccx/lib_ccx.h || echo "0.96")
|
||||
fi
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
echo "Building version: $VERSION"
|
||||
|
||||
- name: Install base dependencies
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
cmake \
|
||||
pkg-config \
|
||||
zlib1g-dev \
|
||||
libpng-dev \
|
||||
libjpeg-dev \
|
||||
libfreetype-dev \
|
||||
libxml2-dev \
|
||||
libcurl4-gnutls-dev \
|
||||
libssl-dev \
|
||||
clang \
|
||||
libclang-dev \
|
||||
tesseract-ocr \
|
||||
libtesseract-dev \
|
||||
libleptonica-dev \
|
||||
patchelf
|
||||
|
||||
- name: Install FFmpeg dependencies (HardSubX)
|
||||
if: steps.should_build.outputs.should_build == 'true' && matrix.build_type == 'hardsubx'
|
||||
run: |
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
libavcodec-dev \
|
||||
libavformat-dev \
|
||||
libavutil-dev \
|
||||
libswscale-dev \
|
||||
libswresample-dev \
|
||||
libavfilter-dev \
|
||||
libavdevice-dev
|
||||
|
||||
- name: Install Rust toolchain
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
|
||||
- name: Cache GPAC build
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: cache-gpac
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/gpac-install
|
||||
key: gpac-abi-16.4-ubuntu24-deb
|
||||
|
||||
- name: Build GPAC
|
||||
if: steps.should_build.outputs.should_build == 'true' && steps.cache-gpac.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
git clone -b abi-16.4 --depth 1 https://github.com/gpac/gpac
|
||||
cd gpac
|
||||
./configure --prefix=/usr
|
||||
make -j$(nproc)
|
||||
make DESTDIR=$HOME/gpac-install install-lib
|
||||
|
||||
- name: Install GPAC to system
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
sudo cp -r $HOME/gpac-install/usr/lib/* /usr/lib/
|
||||
sudo cp -r $HOME/gpac-install/usr/include/* /usr/include/
|
||||
sudo ldconfig
|
||||
|
||||
- name: Build CCExtractor
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
mkdir build && cd build
|
||||
if [ "${{ matrix.build_type }}" = "hardsubx" ]; then
|
||||
cmake ../src -DCMAKE_BUILD_TYPE=Release -DWITH_OCR=ON -DWITH_HARDSUBX=ON
|
||||
else
|
||||
cmake ../src -DCMAKE_BUILD_TYPE=Release -DWITH_OCR=ON
|
||||
fi
|
||||
make -j$(nproc)
|
||||
|
||||
- name: Test build
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: ./build/ccextractor --version
|
||||
|
||||
- name: Create .deb package structure
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
VARIANT="${{ matrix.build_type }}"
|
||||
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
PKG_NAME="ccextractor_${VERSION}_amd64"
|
||||
else
|
||||
PKG_NAME="ccextractor-${VARIANT}_${VERSION}_amd64"
|
||||
fi
|
||||
|
||||
mkdir -p ${PKG_NAME}/DEBIAN
|
||||
mkdir -p ${PKG_NAME}/usr/bin
|
||||
mkdir -p ${PKG_NAME}/usr/lib/ccextractor
|
||||
mkdir -p ${PKG_NAME}/usr/share/doc/ccextractor
|
||||
mkdir -p ${PKG_NAME}/usr/share/man/man1
|
||||
|
||||
# Copy binary
|
||||
cp build/ccextractor ${PKG_NAME}/usr/bin/
|
||||
|
||||
# Copy GPAC library
|
||||
cp $HOME/gpac-install/usr/lib/libgpac.so* ${PKG_NAME}/usr/lib/ccextractor/
|
||||
|
||||
# Set rpath so ccextractor finds bundled libgpac
|
||||
patchelf --set-rpath '/usr/lib/ccextractor:$ORIGIN/../lib/ccextractor' ${PKG_NAME}/usr/bin/ccextractor
|
||||
|
||||
# Copy documentation
|
||||
cp docs/CHANGES.TXT ${PKG_NAME}/usr/share/doc/ccextractor/changelog
|
||||
cp LICENSE.txt ${PKG_NAME}/usr/share/doc/ccextractor/copyright
|
||||
gzip -9 -n ${PKG_NAME}/usr/share/doc/ccextractor/changelog
|
||||
|
||||
# Generate man page
|
||||
help2man --no-info --name="closed captions and teletext subtitle extractor" \
|
||||
./build/ccextractor > ${PKG_NAME}/usr/share/man/man1/ccextractor.1 2>/dev/null || true
|
||||
if [ -f ${PKG_NAME}/usr/share/man/man1/ccextractor.1 ]; then
|
||||
gzip -9 -n ${PKG_NAME}/usr/share/man/man1/ccextractor.1
|
||||
fi
|
||||
|
||||
# Create control file
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
PKG_DESCRIPTION="CCExtractor - closed captions and teletext subtitle extractor"
|
||||
else
|
||||
PKG_DESCRIPTION="CCExtractor (with HardSubX) - closed captions and teletext subtitle extractor"
|
||||
fi
|
||||
|
||||
INSTALLED_SIZE=$(du -sk ${PKG_NAME}/usr | cut -f1)
|
||||
|
||||
# Determine dependencies based on build variant (Ubuntu 24.04)
|
||||
if [ "$VARIANT" = "hardsubx" ]; then
|
||||
DEPENDS="libc6, libtesseract5, liblept5, libcurl3t64-gnutls, libavcodec60, libavformat60, libavutil58, libswscale7, libavdevice60, libswresample4, libavfilter9"
|
||||
else
|
||||
DEPENDS="libc6, libtesseract5, liblept5, libcurl3t64-gnutls"
|
||||
fi
|
||||
|
||||
cat > ${PKG_NAME}/DEBIAN/control << CTRL
|
||||
Package: ccextractor
|
||||
Version: ${VERSION}
|
||||
Section: utils
|
||||
Priority: optional
|
||||
Architecture: amd64
|
||||
Installed-Size: ${INSTALLED_SIZE}
|
||||
Depends: ${DEPENDS}
|
||||
Maintainer: CCExtractor Development Team <carlos@ccextractor.org>
|
||||
Homepage: https://www.ccextractor.org
|
||||
Description: ${PKG_DESCRIPTION}
|
||||
CCExtractor is a tool that extracts closed captions and teletext subtitles
|
||||
from video files and streams. It supports a wide variety of input formats
|
||||
including MPEG, H.264/AVC, H.265/HEVC, MP4, MKV, WTV, and transport streams.
|
||||
.
|
||||
This package includes a bundled GPAC library for MP4 support.
|
||||
CTRL
|
||||
|
||||
# Remove leading spaces from control file
|
||||
sed -i 's/^ //' ${PKG_NAME}/DEBIAN/control
|
||||
|
||||
# Create postinst to update library cache
|
||||
cat > ${PKG_NAME}/DEBIAN/postinst << 'POSTINST'
|
||||
#!/bin/sh
|
||||
set -e
|
||||
ldconfig
|
||||
POSTINST
|
||||
chmod 755 ${PKG_NAME}/DEBIAN/postinst
|
||||
|
||||
# Create postrm to update library cache
|
||||
cat > ${PKG_NAME}/DEBIAN/postrm << 'POSTRM'
|
||||
#!/bin/sh
|
||||
set -e
|
||||
ldconfig
|
||||
POSTRM
|
||||
chmod 755 ${PKG_NAME}/DEBIAN/postrm
|
||||
|
||||
# Set permissions
|
||||
chmod 755 ${PKG_NAME}/usr/bin/ccextractor
|
||||
chmod 755 ${PKG_NAME}/usr/lib/ccextractor
|
||||
find ${PKG_NAME}/usr/lib/ccextractor -name "*.so*" -exec chmod 644 {} \;
|
||||
|
||||
# Build the .deb
|
||||
dpkg-deb --build --root-owner-group ${PKG_NAME}
|
||||
|
||||
echo "deb_name=${PKG_NAME}.deb" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Test .deb package
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
VARIANT="${{ matrix.build_type }}"
|
||||
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
PKG_NAME="ccextractor_${VERSION}_amd64"
|
||||
else
|
||||
PKG_NAME="ccextractor-${VARIANT}_${VERSION}_amd64"
|
||||
fi
|
||||
|
||||
# Install and test (apt handles dependencies automatically)
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ./${PKG_NAME}.deb
|
||||
ccextractor --version
|
||||
|
||||
- name: Get .deb filename
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: deb_name
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
VARIANT="${{ matrix.build_type }}"
|
||||
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
echo "name=ccextractor_${VERSION}_amd64.deb" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "name=ccextractor-${VARIANT}_${VERSION}_amd64.deb" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Upload .deb artifact
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: ${{ steps.deb_name.outputs.name }}
|
||||
path: ${{ steps.deb_name.outputs.name }}
|
||||
|
||||
- name: Upload to Release
|
||||
if: steps.should_build.outputs.should_build == 'true' && github.event_name == 'release'
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: ${{ steps.deb_name.outputs.name }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
275
.github/workflows/build_deb_debian13.yml
vendored
Normal file
275
.github/workflows/build_deb_debian13.yml
vendored
Normal file
@@ -0,0 +1,275 @@
|
||||
name: Build Debian 13 .deb Package
|
||||
|
||||
on:
|
||||
# Build on releases
|
||||
release:
|
||||
types: [published]
|
||||
# Allow manual trigger
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
build_type:
|
||||
description: 'Build type (all, basic, hardsubx)'
|
||||
required: false
|
||||
default: 'all'
|
||||
# Build on pushes to workflow file for testing
|
||||
push:
|
||||
paths:
|
||||
- '.github/workflows/build_deb_debian13.yml'
|
||||
|
||||
jobs:
|
||||
build-deb:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: debian:trixie
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [basic, hardsubx]
|
||||
|
||||
steps:
|
||||
- name: Check if should build this variant
|
||||
id: should_build
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
INPUT_TYPE="${{ github.event.inputs.build_type }}"
|
||||
if [ "$INPUT_TYPE" = "all" ] || [ "$INPUT_TYPE" = "${{ matrix.build_type }}" ]; then
|
||||
echo "should_build=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "should_build=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
else
|
||||
echo "should_build=true" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Install git and dependencies for checkout
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install -y git ca-certificates
|
||||
|
||||
- name: Checkout repository
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Get version
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: version
|
||||
run: |
|
||||
# Extract version from source or use tag
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
VERSION="${{ github.event.release.tag_name }}"
|
||||
VERSION="${VERSION#v}" # Remove 'v' prefix if present
|
||||
else
|
||||
# Extract version from lib_ccx.h (e.g., #define VERSION "0.96.5")
|
||||
VERSION=$(grep -oP '#define VERSION "\K[^"]+' src/lib_ccx/lib_ccx.h || echo "0.96")
|
||||
fi
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
echo "Building version: $VERSION"
|
||||
|
||||
- name: Install base dependencies
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
cmake \
|
||||
pkg-config \
|
||||
zlib1g-dev \
|
||||
libpng-dev \
|
||||
libjpeg-dev \
|
||||
libfreetype-dev \
|
||||
libxml2-dev \
|
||||
libcurl4-gnutls-dev \
|
||||
libssl-dev \
|
||||
clang \
|
||||
libclang-dev \
|
||||
tesseract-ocr \
|
||||
libtesseract-dev \
|
||||
libleptonica-dev \
|
||||
patchelf \
|
||||
curl
|
||||
|
||||
- name: Install FFmpeg dependencies (HardSubX)
|
||||
if: steps.should_build.outputs.should_build == 'true' && matrix.build_type == 'hardsubx'
|
||||
run: |
|
||||
apt-get install -y --no-install-recommends \
|
||||
libavcodec-dev \
|
||||
libavformat-dev \
|
||||
libavutil-dev \
|
||||
libswscale-dev \
|
||||
libswresample-dev \
|
||||
libavfilter-dev \
|
||||
libavdevice-dev
|
||||
|
||||
- name: Install Rust toolchain
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Build GPAC
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
git clone -b abi-16.4 --depth 1 https://github.com/gpac/gpac
|
||||
cd gpac
|
||||
./configure --prefix=/usr
|
||||
make -j$(nproc)
|
||||
make install-lib
|
||||
ldconfig
|
||||
|
||||
- name: Build CCExtractor
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
export PATH="$HOME/.cargo/bin:$PATH"
|
||||
mkdir build && cd build
|
||||
if [ "${{ matrix.build_type }}" = "hardsubx" ]; then
|
||||
cmake ../src -DCMAKE_BUILD_TYPE=Release -DWITH_OCR=ON -DWITH_HARDSUBX=ON
|
||||
else
|
||||
cmake ../src -DCMAKE_BUILD_TYPE=Release -DWITH_OCR=ON
|
||||
fi
|
||||
make -j$(nproc)
|
||||
|
||||
- name: Test build
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: ./build/ccextractor --version
|
||||
|
||||
- name: Create .deb package structure
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: create_deb
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
VARIANT="${{ matrix.build_type }}"
|
||||
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
PKG_NAME="ccextractor_${VERSION}_debian13_amd64"
|
||||
else
|
||||
PKG_NAME="ccextractor-${VARIANT}_${VERSION}_debian13_amd64"
|
||||
fi
|
||||
|
||||
mkdir -p ${PKG_NAME}/DEBIAN
|
||||
mkdir -p ${PKG_NAME}/usr/bin
|
||||
mkdir -p ${PKG_NAME}/usr/lib/ccextractor
|
||||
mkdir -p ${PKG_NAME}/usr/share/doc/ccextractor
|
||||
mkdir -p ${PKG_NAME}/usr/share/man/man1
|
||||
|
||||
# Copy binary
|
||||
cp build/ccextractor ${PKG_NAME}/usr/bin/
|
||||
|
||||
# Copy GPAC library
|
||||
cp /usr/lib/libgpac.so* ${PKG_NAME}/usr/lib/ccextractor/
|
||||
|
||||
# Set rpath so ccextractor finds bundled libgpac
|
||||
patchelf --set-rpath '/usr/lib/ccextractor:$ORIGIN/../lib/ccextractor' ${PKG_NAME}/usr/bin/ccextractor
|
||||
|
||||
# Copy documentation
|
||||
cp docs/CHANGES.TXT ${PKG_NAME}/usr/share/doc/ccextractor/changelog
|
||||
cp LICENSE.txt ${PKG_NAME}/usr/share/doc/ccextractor/copyright
|
||||
gzip -9 -n ${PKG_NAME}/usr/share/doc/ccextractor/changelog
|
||||
|
||||
# Create control file
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
PKG_DESCRIPTION="CCExtractor - closed captions and teletext subtitle extractor"
|
||||
else
|
||||
PKG_DESCRIPTION="CCExtractor (with HardSubX) - closed captions and teletext subtitle extractor"
|
||||
fi
|
||||
|
||||
INSTALLED_SIZE=$(du -sk ${PKG_NAME}/usr | cut -f1)
|
||||
|
||||
# Determine dependencies based on build variant (Debian 13 Trixie)
|
||||
if [ "$VARIANT" = "hardsubx" ]; then
|
||||
DEPENDS="libc6, libtesseract5, libleptonica6, libcurl3t64-gnutls, libavcodec61, libavformat61, libavutil59, libswscale8, libavdevice61, libswresample5, libavfilter10"
|
||||
else
|
||||
DEPENDS="libc6, libtesseract5, libleptonica6, libcurl3t64-gnutls"
|
||||
fi
|
||||
|
||||
cat > ${PKG_NAME}/DEBIAN/control << CTRL
|
||||
Package: ccextractor
|
||||
Version: ${VERSION}
|
||||
Section: utils
|
||||
Priority: optional
|
||||
Architecture: amd64
|
||||
Installed-Size: ${INSTALLED_SIZE}
|
||||
Depends: ${DEPENDS}
|
||||
Maintainer: CCExtractor Development Team <carlos@ccextractor.org>
|
||||
Homepage: https://www.ccextractor.org
|
||||
Description: ${PKG_DESCRIPTION}
|
||||
CCExtractor is a tool that extracts closed captions and teletext subtitles
|
||||
from video files and streams. It supports a wide variety of input formats
|
||||
including MPEG, H.264/AVC, H.265/HEVC, MP4, MKV, WTV, and transport streams.
|
||||
.
|
||||
This package includes a bundled GPAC library for MP4 support.
|
||||
Built for Debian 13 (Trixie).
|
||||
CTRL
|
||||
|
||||
# Remove leading spaces from control file
|
||||
sed -i 's/^ //' ${PKG_NAME}/DEBIAN/control
|
||||
|
||||
# Create postinst to update library cache
|
||||
cat > ${PKG_NAME}/DEBIAN/postinst << 'POSTINST'
|
||||
#!/bin/sh
|
||||
set -e
|
||||
ldconfig
|
||||
POSTINST
|
||||
chmod 755 ${PKG_NAME}/DEBIAN/postinst
|
||||
|
||||
# Create postrm to update library cache
|
||||
cat > ${PKG_NAME}/DEBIAN/postrm << 'POSTRM'
|
||||
#!/bin/sh
|
||||
set -e
|
||||
ldconfig
|
||||
POSTRM
|
||||
chmod 755 ${PKG_NAME}/DEBIAN/postrm
|
||||
|
||||
# Set permissions
|
||||
chmod 755 ${PKG_NAME}/usr/bin/ccextractor
|
||||
chmod 755 ${PKG_NAME}/usr/lib/ccextractor
|
||||
find ${PKG_NAME}/usr/lib/ccextractor -name "*.so*" -exec chmod 644 {} \;
|
||||
|
||||
# Build the .deb
|
||||
dpkg-deb --build --root-owner-group ${PKG_NAME}
|
||||
|
||||
echo "deb_name=${PKG_NAME}.deb" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Test .deb package
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
VARIANT="${{ matrix.build_type }}"
|
||||
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
PKG_NAME="ccextractor_${VERSION}_debian13_amd64"
|
||||
else
|
||||
PKG_NAME="ccextractor-${VARIANT}_${VERSION}_debian13_amd64"
|
||||
fi
|
||||
|
||||
# Install and test (apt handles dependencies automatically)
|
||||
apt-get update
|
||||
apt-get install -y ./${PKG_NAME}.deb
|
||||
ccextractor --version
|
||||
|
||||
- name: Get .deb filename
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: deb_name
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
VARIANT="${{ matrix.build_type }}"
|
||||
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
echo "name=ccextractor_${VERSION}_debian13_amd64.deb" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "name=ccextractor-${VARIANT}_${VERSION}_debian13_amd64.deb" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Upload .deb artifact
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: ${{ steps.deb_name.outputs.name }}
|
||||
path: ${{ steps.deb_name.outputs.name }}
|
||||
|
||||
- name: Upload to Release
|
||||
if: steps.should_build.outputs.should_build == 'true' && github.event_name == 'release'
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: ${{ steps.deb_name.outputs.name }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
4
.github/workflows/build_docker.yml
vendored
4
.github/workflows/build_docker.yml
vendored
@@ -8,6 +8,8 @@ on:
|
||||
- 'docker/**'
|
||||
- '**.c'
|
||||
- '**.h'
|
||||
- '**CMakeLists.txt'
|
||||
- '**.cmake'
|
||||
- 'src/rust/**'
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
@@ -16,6 +18,8 @@ on:
|
||||
- 'docker/**'
|
||||
- '**.c'
|
||||
- '**.h'
|
||||
- '**CMakeLists.txt'
|
||||
- '**.cmake'
|
||||
- 'src/rust/**'
|
||||
|
||||
jobs:
|
||||
|
||||
4
.github/workflows/build_linux.yml
vendored
4
.github/workflows/build_linux.yml
vendored
@@ -7,6 +7,8 @@ on:
|
||||
- '.github/workflows/build_linux.yml'
|
||||
- '**.c'
|
||||
- '**.h'
|
||||
- '**CMakeLists.txt'
|
||||
- '**.cmake'
|
||||
- '**Makefile**'
|
||||
- 'linux/**'
|
||||
- 'package_creators/**'
|
||||
@@ -17,6 +19,8 @@ on:
|
||||
- '.github/workflows/build_linux.yml'
|
||||
- '**.c'
|
||||
- '**.h'
|
||||
- '**CMakeLists.txt'
|
||||
- '**.cmake'
|
||||
- '**Makefile**'
|
||||
- 'linux/**'
|
||||
- 'package_creators/**'
|
||||
|
||||
154
.github/workflows/build_linux_systemlibs.yml
vendored
Normal file
154
.github/workflows/build_linux_systemlibs.yml
vendored
Normal file
@@ -0,0 +1,154 @@
|
||||
name: Build Linux (System Libs)
|
||||
|
||||
on:
|
||||
# Build on releases
|
||||
release:
|
||||
types: [published]
|
||||
# Allow manual trigger
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
build_type:
|
||||
description: 'Build type (all, basic, hardsubx)'
|
||||
required: false
|
||||
default: 'all'
|
||||
# Build on pushes to workflow file for testing
|
||||
push:
|
||||
paths:
|
||||
- '.github/workflows/build_linux_systemlibs.yml'
|
||||
- 'linux/build'
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
build-systemlibs:
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [basic, hardsubx]
|
||||
|
||||
steps:
|
||||
- name: Check if should build this variant
|
||||
id: should_build
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
INPUT_TYPE="${{ github.event.inputs.build_type }}"
|
||||
if [ "$INPUT_TYPE" = "all" ] || [ "$INPUT_TYPE" = "${{ matrix.build_type }}" ]; then
|
||||
echo "should_build=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "should_build=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
else
|
||||
echo "should_build=true" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Checkout repository
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Install base dependencies
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
pkg-config \
|
||||
zlib1g-dev \
|
||||
libpng-dev \
|
||||
libfreetype-dev \
|
||||
libutf8proc-dev \
|
||||
libgpac-dev \
|
||||
libtesseract-dev \
|
||||
libleptonica-dev \
|
||||
tesseract-ocr-eng \
|
||||
clang \
|
||||
libclang-dev
|
||||
|
||||
- name: Install FFmpeg dependencies (HardSubX)
|
||||
if: steps.should_build.outputs.should_build == 'true' && matrix.build_type == 'hardsubx'
|
||||
run: |
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
libavcodec-dev \
|
||||
libavformat-dev \
|
||||
libavutil-dev \
|
||||
libswscale-dev \
|
||||
libswresample-dev \
|
||||
libavfilter-dev \
|
||||
libavdevice-dev \
|
||||
libxcb1-dev \
|
||||
libxcb-shm0-dev \
|
||||
libx11-dev \
|
||||
liblzma-dev
|
||||
|
||||
- name: Install Rust toolchain
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
|
||||
- name: Build with system libraries
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
cd linux
|
||||
if [ "${{ matrix.build_type }}" = "hardsubx" ]; then
|
||||
./build -system-libs -hardsubx
|
||||
else
|
||||
./build -system-libs
|
||||
fi
|
||||
|
||||
- name: Verify build
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
./linux/ccextractor --version
|
||||
echo "=== Library dependencies ==="
|
||||
ldd ./linux/ccextractor | grep -E 'freetype|png|utf8proc|tesseract|leptonica' || true
|
||||
|
||||
- name: Get output name
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: output_name
|
||||
run: |
|
||||
case "${{ matrix.build_type }}" in
|
||||
basic)
|
||||
echo "name=ccextractor-linux-systemlibs-x86_64" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
hardsubx)
|
||||
echo "name=ccextractor-linux-systemlibs-hardsubx-x86_64" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Package binary
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
mkdir -p package
|
||||
cp linux/ccextractor package/
|
||||
# Create a simple README for the package
|
||||
cat > package/README.txt << 'EOF'
|
||||
CCExtractor - System Libraries Build
|
||||
=====================================
|
||||
|
||||
This build uses system libraries (dynamic linking).
|
||||
|
||||
Required system packages (Debian/Ubuntu):
|
||||
sudo apt install libgpac12 libtesseract5 libleptonica6 \
|
||||
libpng16-16 libfreetype6 libutf8proc3
|
||||
|
||||
For HardSubX builds, also install:
|
||||
sudo apt install libavcodec60 libavformat60 libswscale7 libavfilter9
|
||||
|
||||
Run with: ./ccextractor --help
|
||||
EOF
|
||||
tar -czvf ${{ steps.output_name.outputs.name }}.tar.gz -C package .
|
||||
|
||||
- name: Upload artifact
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: ${{ steps.output_name.outputs.name }}
|
||||
path: ${{ steps.output_name.outputs.name }}.tar.gz
|
||||
|
||||
- name: Upload to Release
|
||||
if: steps.should_build.outputs.should_build == 'true' && github.event_name == 'release'
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: ${{ steps.output_name.outputs.name }}.tar.gz
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
4
.github/workflows/build_mac.yml
vendored
4
.github/workflows/build_mac.yml
vendored
@@ -7,6 +7,8 @@ on:
|
||||
- '.github/workflows/build_mac.yml'
|
||||
- '**.c'
|
||||
- '**.h'
|
||||
- '**CMakeLists.txt'
|
||||
- '**.cmake'
|
||||
- '**Makefile**'
|
||||
- 'mac/**'
|
||||
- 'package_creators/**'
|
||||
@@ -17,6 +19,8 @@ on:
|
||||
- '.github/workflows/build_mac.yml'
|
||||
- '**.c'
|
||||
- '**.h'
|
||||
- '**CMakeLists.txt'
|
||||
- '**.cmake'
|
||||
- '**Makefile**'
|
||||
- 'mac/**'
|
||||
- 'package_creators/**'
|
||||
|
||||
51
.github/workflows/build_snap.yml
vendored
Normal file
51
.github/workflows/build_snap.yml
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
name: Build CCExtractor Snap
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
build_snap:
|
||||
name: Build Snap package
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Install snapd
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y snapd
|
||||
|
||||
- name: Start snapd
|
||||
run: |
|
||||
sudo systemctl start snapd.socket
|
||||
sudo systemctl start snapd
|
||||
|
||||
- name: Install Snapcraft
|
||||
run: |
|
||||
sudo snap install core22
|
||||
sudo snap install snapcraft --classic
|
||||
|
||||
- name: Show Snapcraft version
|
||||
run: snapcraft --version
|
||||
|
||||
- name: Build snap
|
||||
run: sudo snapcraft --destructive-mode
|
||||
|
||||
- name: List generated snap
|
||||
run: ls -lh *.snap
|
||||
|
||||
- name: Upload snap as workflow artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: CCExtractor Snap
|
||||
path: "*.snap"
|
||||
|
||||
- name: Upload snap to GitHub Release
|
||||
if: github.event_name == 'release'
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: "*.snap"
|
||||
129
.github/workflows/build_windows.yml
vendored
129
.github/workflows/build_windows.yml
vendored
@@ -3,7 +3,6 @@ name: Build CCExtractor on Windows
|
||||
env:
|
||||
RUSTFLAGS: -Ctarget-feature=+crt-static
|
||||
VCPKG_DEFAULT_TRIPLET: x64-windows-static
|
||||
VCPKG_DEFAULT_BINARY_CACHE: C:\vcpkg\.cache
|
||||
VCPKG_COMMIT: ab2977be50c702126336e5088f4836060733c899
|
||||
|
||||
on:
|
||||
@@ -13,6 +12,8 @@ on:
|
||||
- ".github/workflows/build_windows.yml"
|
||||
- "**.c"
|
||||
- "**.h"
|
||||
- "**CMakeLists.txt"
|
||||
- "**.cmake"
|
||||
- "windows/**"
|
||||
- "src/rust/**"
|
||||
pull_request:
|
||||
@@ -21,108 +22,118 @@ on:
|
||||
- ".github/workflows/build_windows.yml"
|
||||
- "**.c"
|
||||
- "**.h"
|
||||
- "**CMakeLists.txt"
|
||||
- "**.cmake"
|
||||
- "windows/**"
|
||||
- "src/rust/**"
|
||||
|
||||
jobs:
|
||||
build_release:
|
||||
build:
|
||||
runs-on: windows-2022
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Setup MSBuild.exe
|
||||
uses: microsoft/setup-msbuild@v2.0.0
|
||||
with:
|
||||
msbuild-architecture: x64
|
||||
|
||||
# Install GPAC (fast, ~30s, not worth caching complexity)
|
||||
- name: Install gpac
|
||||
run: choco install gpac --version 2.4.0
|
||||
run: choco install gpac --version 2.4.0 --no-progress
|
||||
|
||||
# Use lukka/run-vcpkg for better caching
|
||||
- name: Setup vcpkg
|
||||
run: mkdir C:\vcpkg\.cache
|
||||
- name: Cache vcpkg
|
||||
id: cache
|
||||
uses: lukka/run-vcpkg@v11
|
||||
id: runvcpkg
|
||||
with:
|
||||
vcpkgGitCommitId: ${{ env.VCPKG_COMMIT }}
|
||||
vcpkgDirectory: ${{ github.workspace }}/vcpkg
|
||||
vcpkgJsonGlob: 'windows/vcpkg.json'
|
||||
|
||||
# Cache vcpkg installed packages separately for faster restores
|
||||
- name: Cache vcpkg installed packages
|
||||
id: vcpkg-installed-cache
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ${{ github.workspace }}/vcpkg/installed
|
||||
key: vcpkg-installed-${{ runner.os }}-${{ env.VCPKG_COMMIT }}-${{ hashFiles('windows/vcpkg.json') }}
|
||||
restore-keys: |
|
||||
vcpkg-installed-${{ runner.os }}-${{ env.VCPKG_COMMIT }}-
|
||||
|
||||
- name: Install vcpkg dependencies
|
||||
if: steps.vcpkg-installed-cache.outputs.cache-hit != 'true'
|
||||
run: ${{ github.workspace }}/vcpkg/vcpkg.exe install --x-install-root ${{ github.workspace }}/vcpkg/installed/
|
||||
working-directory: windows
|
||||
|
||||
# Cache Rust/Cargo artifacts
|
||||
- name: Cache Cargo registry
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: |
|
||||
C:\vcpkg\.cache
|
||||
key: vcpkg-${{ runner.os }}-${{ env.VCPKG_COMMIT }}
|
||||
- name: Build vcpkg
|
||||
run: |
|
||||
git clone https://github.com/microsoft/vcpkg
|
||||
./vcpkg/bootstrap-vcpkg.bat
|
||||
- name: Install dependencies
|
||||
run: ${{ github.workspace }}/vcpkg/vcpkg.exe install --x-install-root ${{ github.workspace }}/vcpkg/installed/
|
||||
working-directory: windows
|
||||
- uses: actions-rs/toolchain@v1
|
||||
~/.cargo/registry
|
||||
~/.cargo/git
|
||||
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-cargo-registry-
|
||||
|
||||
# Cache Cargo build artifacts - rust.bat sets CARGO_TARGET_DIR to windows/
|
||||
# which results in artifacts at windows/x86_64-pc-windows-msvc/
|
||||
- name: Cache Cargo build artifacts
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
path: ${{ github.workspace }}/windows/x86_64-pc-windows-msvc
|
||||
key: ${{ runner.os }}-cargo-build-${{ hashFiles('**/Cargo.lock') }}-${{ hashFiles('src/rust/**/*.rs') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-cargo-build-${{ hashFiles('**/Cargo.lock') }}-
|
||||
${{ runner.os }}-cargo-build-
|
||||
|
||||
- name: Setup Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
|
||||
- name: Install Win 10 SDK
|
||||
uses: ilammy/msvc-dev-cmd@v1
|
||||
- name: build Release-Full
|
||||
|
||||
# Build Release-Full
|
||||
- name: Build Release-Full
|
||||
env:
|
||||
LIBCLANG_PATH: "C:\\Program Files\\LLVM\\lib"
|
||||
LLVM_CONFIG_PATH: "C:\\Program Files\\LLVM\\bin\\llvm-config"
|
||||
CARGO_TARGET_DIR: "..\\..\\windows"
|
||||
BINDGEN_EXTRA_CLANG_ARGS: -fmsc-version=0
|
||||
VCPKG_ROOT: ${{ github.workspace }}/vcpkg
|
||||
run: msbuild ccextractor.sln /p:Configuration=Release-Full /p:Platform=x64
|
||||
working-directory: ./windows
|
||||
- name: Display version information
|
||||
|
||||
- name: Display Release version information
|
||||
run: ./ccextractorwinfull.exe --version
|
||||
working-directory: ./windows/x64/Release-Full
|
||||
- uses: actions/upload-artifact@v6
|
||||
|
||||
- name: Upload Release artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: CCExtractor Windows Release build
|
||||
path: |
|
||||
./windows/x64/Release-Full/ccextractorwinfull.exe
|
||||
./windows/x64/Release-Full/*.dll
|
||||
build_debug:
|
||||
runs-on: windows-2022
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v6
|
||||
- name: Setup MSBuild.exe
|
||||
uses: microsoft/setup-msbuild@v2.0.0
|
||||
with:
|
||||
msbuild-architecture: x64
|
||||
- name: Install gpac
|
||||
run: choco install gpac --version 2.4.0
|
||||
- name: Setup vcpkg
|
||||
run: mkdir C:\vcpkg\.cache
|
||||
- name: Cache vcpkg
|
||||
id: cache
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: |
|
||||
C:\vcpkg\.cache
|
||||
key: vcpkg-${{ runner.os }}-${{ env.VCPKG_COMMIT }}
|
||||
- name: Build vcpkg
|
||||
run: |
|
||||
git clone https://github.com/microsoft/vcpkg
|
||||
./vcpkg/bootstrap-vcpkg.bat
|
||||
- name: Install dependencies
|
||||
run: ${{ github.workspace }}/vcpkg/vcpkg.exe install --x-install-root ${{ github.workspace }}/vcpkg/installed/
|
||||
working-directory: windows
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Install Win 10 SDK
|
||||
uses: ilammy/msvc-dev-cmd@v1
|
||||
- name: build Debug-Full
|
||||
|
||||
# Build Debug-Full (reuses cached Cargo artifacts)
|
||||
- name: Build Debug-Full
|
||||
env:
|
||||
LIBCLANG_PATH: "C:\\Program Files\\LLVM\\lib"
|
||||
LLVM_CONFIG_PATH: "C:\\Program Files\\LLVM\\bin\\llvm-config"
|
||||
CARGO_TARGET_DIR: "..\\..\\windows"
|
||||
BINDGEN_EXTRA_CLANG_ARGS: -fmsc-version=0
|
||||
VCPKG_ROOT: ${{ github.workspace }}/vcpkg
|
||||
run: msbuild ccextractor.sln /p:Configuration=Debug-Full /p:Platform=x64
|
||||
working-directory: ./windows
|
||||
- name: Display version information
|
||||
|
||||
- name: Display Debug version information
|
||||
continue-on-error: true
|
||||
run: ./ccextractorwinfull.exe --version
|
||||
working-directory: ./windows/x64/Debug-Full
|
||||
- uses: actions/upload-artifact@v6
|
||||
|
||||
- name: Upload Debug artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: CCExtractor Windows Debug build
|
||||
path: |
|
||||
|
||||
15
.github/workflows/homebrew.yml
vendored
Normal file
15
.github/workflows/homebrew.yml
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
name: Bump Homebrew Formula
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
homebrew:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Update Homebrew formula
|
||||
uses: dawidd6/action-homebrew-bump-formula@v7
|
||||
with:
|
||||
token: ${{ secrets.HOMEBREW_GITHUB_API_TOKEN }}
|
||||
formula: ccextractor
|
||||
136
.github/workflows/publish_chocolatey.yml
vendored
Normal file
136
.github/workflows/publish_chocolatey.yml
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
# Publish to Chocolatey Community Repository
|
||||
#
|
||||
# PREREQUISITES:
|
||||
# 1. Create a Chocolatey account at https://community.chocolatey.org/account/Register
|
||||
# 2. Get your API key from https://community.chocolatey.org/account
|
||||
# 3. Add the API key as repository secret: CHOCOLATEY_API_KEY
|
||||
#
|
||||
# Reference: https://docs.chocolatey.org/en-us/create/create-packages-quick-start
|
||||
|
||||
name: Publish to Chocolatey
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [released]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
release_tag:
|
||||
description: 'Release tag to publish (e.g., v0.96.1)'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Get version from tag
|
||||
id: version
|
||||
shell: bash
|
||||
run: |
|
||||
TAG="${{ github.event.inputs.release_tag || github.event.release.tag_name }}"
|
||||
# Strip 'v' prefix if present
|
||||
VERSION="${TAG#v}"
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
echo "tag=$TAG" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Download MSI from release
|
||||
shell: pwsh
|
||||
run: |
|
||||
$version = "${{ steps.version.outputs.version }}"
|
||||
$tag = "${{ steps.version.outputs.tag }}"
|
||||
$msiUrl = "https://github.com/CCExtractor/ccextractor/releases/download/$tag/CCExtractor.$version.msi"
|
||||
|
||||
Write-Host "Downloading MSI from: $msiUrl"
|
||||
Invoke-WebRequest -Uri $msiUrl -OutFile "CCExtractor.msi"
|
||||
|
||||
# Calculate SHA256 checksum
|
||||
$hash = (Get-FileHash -Path "CCExtractor.msi" -Algorithm SHA256).Hash
|
||||
Write-Host "SHA256: $hash"
|
||||
echo "MSI_CHECKSUM=$hash" >> $env:GITHUB_ENV
|
||||
|
||||
- name: Update nuspec version
|
||||
shell: pwsh
|
||||
run: |
|
||||
$version = "${{ steps.version.outputs.version }}"
|
||||
$nuspecPath = "packaging/chocolatey/ccextractor.nuspec"
|
||||
|
||||
$content = Get-Content $nuspecPath -Raw
|
||||
$content = $content -replace '<version>.*</version>', "<version>$version</version>"
|
||||
Set-Content -Path $nuspecPath -Value $content
|
||||
|
||||
Write-Host "Updated nuspec to version $version"
|
||||
|
||||
- name: Update install script
|
||||
shell: pwsh
|
||||
run: |
|
||||
$version = "${{ steps.version.outputs.version }}"
|
||||
$tag = "${{ steps.version.outputs.tag }}"
|
||||
$checksum = $env:MSI_CHECKSUM
|
||||
$installScript = "packaging/chocolatey/tools/chocolateyInstall.ps1"
|
||||
|
||||
$content = Get-Content $installScript -Raw
|
||||
|
||||
# Update URL
|
||||
$newUrl = "https://github.com/CCExtractor/ccextractor/releases/download/$tag/CCExtractor.$version.msi"
|
||||
$content = $content -replace "url64bit\s*=\s*'[^']*'", "url64bit = '$newUrl'"
|
||||
|
||||
# Update checksum
|
||||
$content = $content -replace "checksum64\s*=\s*'[^']*'", "checksum64 = '$checksum'"
|
||||
|
||||
Set-Content -Path $installScript -Value $content
|
||||
|
||||
Write-Host "Updated install script with URL and checksum"
|
||||
|
||||
- name: Build Chocolatey package
|
||||
shell: pwsh
|
||||
run: |
|
||||
cd packaging/chocolatey
|
||||
choco pack ccextractor.nuspec
|
||||
|
||||
# List the generated package
|
||||
Get-ChildItem *.nupkg
|
||||
|
||||
- name: Test package locally
|
||||
shell: pwsh
|
||||
run: |
|
||||
cd packaging/chocolatey
|
||||
$nupkg = Get-ChildItem *.nupkg | Select-Object -First 1
|
||||
Write-Host "Testing package: $($nupkg.Name)"
|
||||
|
||||
# Install from local package
|
||||
choco install ccextractor --source="'.;https://community.chocolatey.org/api/v2/'" --yes --force
|
||||
|
||||
# Verify installation
|
||||
$ccx = Get-Command ccextractor -ErrorAction SilentlyContinue
|
||||
if ($ccx) {
|
||||
Write-Host "CCExtractor found at: $($ccx.Source)"
|
||||
& ccextractor --version
|
||||
} else {
|
||||
Write-Host "CCExtractor not found in PATH, checking Program Files..."
|
||||
$exePath = Join-Path $env:ProgramFiles "CCExtractor\ccextractor.exe"
|
||||
if (Test-Path $exePath) {
|
||||
& $exePath --version
|
||||
}
|
||||
}
|
||||
|
||||
- name: Push to Chocolatey
|
||||
shell: pwsh
|
||||
env:
|
||||
CHOCOLATEY_API_KEY: ${{ secrets.CHOCOLATEY_API_KEY }}
|
||||
run: |
|
||||
cd packaging/chocolatey
|
||||
$nupkg = Get-ChildItem *.nupkg | Select-Object -First 1
|
||||
|
||||
Write-Host "Pushing $($nupkg.Name) to Chocolatey..."
|
||||
choco push $nupkg.Name --source="https://push.chocolatey.org/" --api-key="$env:CHOCOLATEY_API_KEY"
|
||||
|
||||
Write-Host "Package submitted to Chocolatey! It may take some time to be moderated and published."
|
||||
|
||||
- name: Upload package artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: chocolatey-package
|
||||
path: packaging/chocolatey/*.nupkg
|
||||
38
.github/workflows/publish_winget.yml
vendored
Normal file
38
.github/workflows/publish_winget.yml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
# Publish to Windows Package Manager (winget)
|
||||
#
|
||||
# PREREQUISITES:
|
||||
# 1. CCExtractor must already have ONE version in winget-pkgs before this works
|
||||
# - Submit the initial manifest manually from packaging/winget/
|
||||
# - PR to: https://github.com/microsoft/winget-pkgs
|
||||
#
|
||||
# 2. Create a fork of microsoft/winget-pkgs under the CCExtractor organization
|
||||
# - https://github.com/CCExtractor/winget-pkgs (needs to be created)
|
||||
#
|
||||
# 3. Create a GitHub Personal Access Token (classic) with 'public_repo' scope
|
||||
# - Add as repository secret: WINGET_TOKEN
|
||||
#
|
||||
# Reference: https://github.com/vedantmgoyal9/winget-releaser
|
||||
|
||||
name: Publish to WinGet
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [released]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
release_tag:
|
||||
description: 'Release tag to publish (e.g., v0.96.1)'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- name: Publish to WinGet
|
||||
uses: vedantmgoyal9/winget-releaser@v2
|
||||
with:
|
||||
identifier: CCExtractor.CCExtractor
|
||||
installers-regex: '\.msi$' # Only use the MSI installer
|
||||
token: ${{ secrets.WINGET_TOKEN }}
|
||||
release-tag: ${{ github.event.inputs.release_tag || github.event.release.tag_name }}
|
||||
90
.github/workflows/release.yml
vendored
90
.github/workflows/release.yml
vendored
@@ -5,23 +5,64 @@ on:
|
||||
types:
|
||||
- created
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
env:
|
||||
RUSTFLAGS: -Ctarget-feature=+crt-static
|
||||
VCPKG_DEFAULT_TRIPLET: x64-windows-static
|
||||
VCPKG_DEFAULT_BINARY_CACHE: C:\vcpkg\.cache
|
||||
VCPKG_COMMIT: ab2977be50c702126336e5088f4836060733c899
|
||||
|
||||
jobs:
|
||||
build_windows:
|
||||
runs-on: windows-latest
|
||||
runs-on: windows-2022
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v6
|
||||
- name: Get the version
|
||||
id: get_version
|
||||
run: echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\/v/}
|
||||
run: |
|
||||
# Extract version from tag, strip 'v' prefix and everything after first dash
|
||||
VERSION=${GITHUB_REF/refs\/tags\/v/}
|
||||
VERSION=${VERSION%%-*}
|
||||
# Save display version for filenames (e.g., 0.96.1)
|
||||
echo ::set-output name=DISPLAY_VERSION::$VERSION
|
||||
# Count dots to determine version format
|
||||
DOTS="${VERSION//[^.]}"
|
||||
PART_COUNT=$((${#DOTS} + 1))
|
||||
# MSI requires 4-part version (major.minor.build.revision)
|
||||
if [ "$PART_COUNT" -eq 2 ]; then
|
||||
MSI_VERSION="${VERSION}.0.0"
|
||||
elif [ "$PART_COUNT" -eq 3 ]; then
|
||||
MSI_VERSION="${VERSION}.0"
|
||||
else
|
||||
MSI_VERSION="${VERSION}"
|
||||
fi
|
||||
echo ::set-output name=VERSION::$MSI_VERSION
|
||||
shell: bash
|
||||
- name: Setup MSBuild.exe
|
||||
uses: microsoft/setup-msbuild@v2.0.0
|
||||
- name: Install llvm and clang
|
||||
uses: egor-tensin/setup-clang@v1
|
||||
with:
|
||||
version: latest
|
||||
platform: x64
|
||||
msbuild-architecture: x64
|
||||
- name: Install gpac
|
||||
run: choco install gpac --version 2.4.0
|
||||
- name: Setup vcpkg
|
||||
run: mkdir C:\vcpkg\.cache
|
||||
- name: Cache vcpkg
|
||||
id: cache
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: |
|
||||
C:\vcpkg\.cache
|
||||
key: vcpkg-${{ runner.os }}-${{ env.VCPKG_COMMIT }}
|
||||
- name: Build vcpkg
|
||||
run: |
|
||||
git clone https://github.com/microsoft/vcpkg
|
||||
./vcpkg/bootstrap-vcpkg.bat
|
||||
- name: Install dependencies
|
||||
run: ${{ github.workspace }}/vcpkg/vcpkg.exe install --x-install-root ${{ github.workspace }}/vcpkg/installed/
|
||||
working-directory: windows
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
@@ -34,15 +75,24 @@ jobs:
|
||||
LLVM_CONFIG_PATH: "C:\\Program Files\\LLVM\\bin\\llvm-config"
|
||||
CARGO_TARGET_DIR: "..\\..\\windows"
|
||||
BINDGEN_EXTRA_CLANG_ARGS: -fmsc-version=0
|
||||
run: msbuild ccextractor.sln /p:Configuration=Release-Full /p:Platform=Win32
|
||||
VCPKG_ROOT: ${{ github.workspace }}/vcpkg
|
||||
run: msbuild ccextractor.sln /p:Configuration=Release-Full /p:Platform=x64
|
||||
working-directory: ./windows
|
||||
- name: Copy files to directory for installer
|
||||
run: mkdir installer; cp ./Release-Full/ccextractorwinfull.exe ./installer; cp ./Release-Full/*.dll ./installer
|
||||
run: mkdir installer; cp ./x64/Release-Full/ccextractorwinfull.exe ./installer; cp ./x64/Release-Full/*.dll ./installer
|
||||
working-directory: ./windows
|
||||
- name: Download tessdata for OCR support
|
||||
run: |
|
||||
mkdir -p ./installer/tessdata
|
||||
# Download English traineddata from tessdata_fast (smaller, faster, good for most use cases)
|
||||
Invoke-WebRequest -Uri "https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata" -OutFile "./installer/tessdata/eng.traineddata"
|
||||
# Download OSD (Orientation and Script Detection) for automatic script detection
|
||||
Invoke-WebRequest -Uri "https://github.com/tesseract-ocr/tessdata_fast/raw/main/osd.traineddata" -OutFile "./installer/tessdata/osd.traineddata"
|
||||
working-directory: ./windows
|
||||
- name: install WiX
|
||||
run: dotnet tool install --global wix --version 4.0.0-preview.0 && wix extension -g add WixToolset.UI.wixext
|
||||
run: dotnet tool uninstall --global wix; dotnet tool install --global wix --version 6.0.2 && wix extension add -g WixToolset.UI.wixext/6.0.2
|
||||
- name: Make sure WiX works
|
||||
run: wix --version && wix extension -g list
|
||||
run: wix --version && wix extension list -g
|
||||
- name: Download Flutter GUI
|
||||
run: ((Invoke-WebRequest -UseBasicParsing https://api.github.com/repos/CCExtractor/ccextractorfluttergui/releases/latest).Content | ConvertFrom-Json).assets | ForEach-Object {if ($_.name -eq "windows.zip") { Invoke-WebRequest -UseBasicParsing -Uri $_.browser_download_url -OutFile windows.zip}}
|
||||
working-directory: ./windows
|
||||
@@ -50,32 +100,38 @@ jobs:
|
||||
run: ls
|
||||
working-directory: ./windows
|
||||
- name: Unzip Flutter GUI
|
||||
run: Expand-Archive -Path ./windows.zip -DestinationPath ./installer
|
||||
run: Expand-Archive -Path ./windows.zip -DestinationPath ./installer -Force
|
||||
working-directory: ./windows
|
||||
- name: Display installer folder contents
|
||||
run: Get-ChildItem -Recurse ./installer
|
||||
working-directory: ./windows
|
||||
- name: Create portable zip
|
||||
run: Compress-Archive -Path ./installer/* -DestinationPath ./CCExtractor_win_portable.zip
|
||||
run: Compress-Archive -Path ./installer/* -DestinationPath ./CCExtractor.${{ steps.get_version.outputs.DISPLAY_VERSION }}_win_portable.zip
|
||||
working-directory: ./windows
|
||||
- name: Build installer
|
||||
run: wix build -ext "$HOME\.wix\extensions\WixToolset.UI.wixext\4.0.0-preview.0\tools\WixToolset.UI.wixext.dll" -d "AppVersion=${{ steps.get_version.outputs.VERSION }}.0.0" -o CCExtractor.msi installer.wxs
|
||||
run: wix build -arch x64 -ext WixToolset.UI.wixext -d "AppVersion=${{ steps.get_version.outputs.VERSION }}" -o CCExtractor.${{ steps.get_version.outputs.DISPLAY_VERSION }}.msi installer.wxs CustomUI.wxs
|
||||
working-directory: ./windows
|
||||
- name: Upload as asset
|
||||
uses: AButler/upload-release-assets@v3.0
|
||||
with:
|
||||
files: './windows/CCExtractor.msi;./windows/CCExtractor_win_portable.zip'
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
files: './windows/CCExtractor.${{ steps.get_version.outputs.DISPLAY_VERSION }}.msi;./windows/CCExtractor.${{ steps.get_version.outputs.DISPLAY_VERSION }}_win_portable.zip'
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
create_linux_package:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
path: ./ccextractor
|
||||
- name: Get the version
|
||||
id: get_version
|
||||
run: |
|
||||
VERSION=${GITHUB_REF/refs\/tags\/v/}
|
||||
VERSION=${VERSION%%-*}
|
||||
echo ::set-output name=DISPLAY_VERSION::$VERSION
|
||||
- name: Create .tar.gz without git and windows folders
|
||||
run: tar -pczf ./ccextractor_minimal.tar.gz --exclude "ccextractor/windows" --exclude "ccextractor/.git" ccextractor
|
||||
run: tar -pczf ./ccextractor.${{ steps.get_version.outputs.DISPLAY_VERSION }}.tar.gz --exclude "ccextractor/windows" --exclude "ccextractor/.git" ccextractor
|
||||
- name: Upload as asset
|
||||
uses: AButler/upload-release-assets@v3.0
|
||||
with:
|
||||
files: './ccextractor_minimal.tar.gz'
|
||||
files: './ccextractor.${{ steps.get_version.outputs.DISPLAY_VERSION }}.tar.gz'
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -17,6 +17,7 @@ CVS
|
||||
mac/ccextractor
|
||||
linux/ccextractor
|
||||
linux/depend
|
||||
linux/build_scan/
|
||||
windows/x86_64-pc-windows-msvc/**
|
||||
windows/Debug/**
|
||||
windows/Debug-OCR/**
|
||||
@@ -28,6 +29,7 @@ windows/Debug-Full/**
|
||||
windows/x64/**
|
||||
windows/ccextractor.VC.db
|
||||
build/
|
||||
build_*/
|
||||
|
||||
####
|
||||
# Python
|
||||
@@ -143,6 +145,9 @@ bazel*
|
||||
#Intellij IDEs
|
||||
.idea/
|
||||
|
||||
# Plans (local only)
|
||||
plans/
|
||||
|
||||
# Rust build and MakeFiles (and CMake files)
|
||||
src/rust/CMakeFiles/
|
||||
src/rust/CMakeCache.txt
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
# CI Test Triage - December 2025
|
||||
|
||||
This PR is used to trigger CI runs and track the triage of failing regression tests.
|
||||
|
||||
## Purpose
|
||||
|
||||
Several PRs have been merged recently that improved CCExtractor behavior, but the Sample Platform
|
||||
considers them regressions because the "ground truth" baseline is outdated. This PR helps:
|
||||
|
||||
1. Get a fresh CI run against current master
|
||||
2. Systematically analyze each failing test
|
||||
3. Determine whether to update ground truth or fix code
|
||||
|
||||
## Merged Fixes
|
||||
|
||||
The following PRs have been merged and this run verifies their combined effect:
|
||||
|
||||
- **PR #1847**: Hardsubx crash fix, memory leak fixes, rcwt exit code fix
|
||||
- **PR #1848**: XDS empty content entries fix
|
||||
|
||||
## Status
|
||||
|
||||
- [x] PR #1847 merged
|
||||
- [x] PR #1848 merged
|
||||
- [ ] Verification CI run triggered
|
||||
- [ ] Results analyzed
|
||||
@@ -4,7 +4,7 @@ MAINTAINER = Marc Espie <espie@openbsd.org>
|
||||
CATEGORIES = multimedia
|
||||
COMMENT = closed caption subtitles extractor
|
||||
HOMEPAGE = https://ccextractor.org
|
||||
V = 0.95
|
||||
V = 0.96.5
|
||||
DISTFILES = ccextractor.${V:S/.//}-src.zip
|
||||
MASTER_SITES = ${MASTER_SITE_SOURCEFORGE:=ccextractor/}
|
||||
DISTNAME = ccextractor-$V
|
||||
|
||||
48
README.md
48
README.md
@@ -2,7 +2,6 @@
|
||||
|
||||
# CCExtractor
|
||||
|
||||
<a href="https://travis-ci.org/CCExtractor/ccextractor"><img src="https://raw.githubusercontent.com/CCExtractor/ccextractor-org-media/master/static/macOS-build-badge-logo.png" width="20"></a> [](https://travis-ci.org/CCExtractor/ccextractor)
|
||||
[](https://sampleplatform.ccextractor.org/test/master/windows)
|
||||
[](https://sampleplatform.ccextractor.org/test/master/linux)
|
||||
[](https://sourceforge.net/projects/ccextractor/)
|
||||
@@ -29,6 +28,25 @@ The core functionality is written in C. Other languages used include C++ and Pyt
|
||||
|
||||
Downloads for precompiled binaries and source code can be found [on our website](https://ccextractor.org/public/general/downloads/).
|
||||
|
||||
|
||||
### Windows Package Managers
|
||||
|
||||
**WinGet:**
|
||||
```powershell
|
||||
winget install CCExtractor.CCExtractor
|
||||
```
|
||||
|
||||
**Chocolatey:**
|
||||
```powershell
|
||||
choco install ccextractor
|
||||
```
|
||||
|
||||
**Scoop:**
|
||||
```powershell
|
||||
scoop bucket add extras
|
||||
scoop install ccextractor
|
||||
```
|
||||
|
||||
Extracting subtitles is relatively simple. Just run the following command:
|
||||
|
||||
`ccextractor <input>`
|
||||
@@ -44,6 +62,34 @@ You can also find the list of parameters and their brief description by running
|
||||
|
||||
You can find sample files on [our website](https://ccextractor.org/public/general/tvsamples/) to test the software.
|
||||
|
||||
### Building from Source
|
||||
|
||||
- [Building on Windows using WSL](docs/build-wsl.md)
|
||||
|
||||
#### Linux (Autotools) build notes
|
||||
|
||||
CCExtractor also supports an autotools-based build system under the `linux/`
|
||||
directory.
|
||||
|
||||
Important notes:
|
||||
- The autotools workflow lives inside `linux/`. The `configure` script is
|
||||
generated there and should be run from that directory.
|
||||
- Typical build steps are:
|
||||
```
|
||||
cd linux
|
||||
./autogen.sh
|
||||
./configure
|
||||
make
|
||||
```
|
||||
- Rust support is enabled automatically if `cargo` and `rustc` are available
|
||||
on the system. In that case, Rust components are built and linked during
|
||||
`make`.
|
||||
- If you encounter unexpected build or linking issues, a clean rebuild
|
||||
(`make clean` or a fresh clone) is recommended, especially when Rust is
|
||||
involved.
|
||||
|
||||
This build flow has been tested on Linux and WSL.
|
||||
|
||||
## Compiling CCExtractor
|
||||
|
||||
To learn more about how to compile and build CCExtractor for your platform check the [compilation guide](https://github.com/CCExtractor/ccextractor/blob/master/docs/COMPILATION.MD).
|
||||
|
||||
157
docs/Building_macos_system_libs.md
Normal file
157
docs/Building_macos_system_libs.md
Normal file
@@ -0,0 +1,157 @@
|
||||
# Building CCExtractor on macOS using System Libraries (-system-libs)
|
||||
|
||||
## Overview
|
||||
|
||||
This document explains how to build CCExtractor on macOS using system-installed libraries instead of bundled third-party libraries.
|
||||
|
||||
This build mode is required for Homebrew compatibility and is enabled via the `-system-libs` flag introduced in PR #1862.
|
||||
|
||||
## Why is -system-libs needed?
|
||||
|
||||
### Background
|
||||
|
||||
CCExtractor was removed from Homebrew (homebrew-core) because:
|
||||
|
||||
- Homebrew does not allow bundling third-party libraries
|
||||
- The default CCExtractor build compiles libraries from `src/thirdparty/`
|
||||
- This violates Homebrew packaging policies
|
||||
|
||||
### What -system-libs fixes
|
||||
|
||||
The `-system-libs` flag allows CCExtractor to:
|
||||
|
||||
- Use system-installed libraries via Homebrew
|
||||
- Resolve headers and linker flags using `pkg-config`
|
||||
- Skip compiling bundled copies of common libraries
|
||||
|
||||
This makes CCExtractor acceptable for Homebrew packaging.
|
||||
|
||||
## Build Modes Explained
|
||||
|
||||
### 1️⃣ Default Build (Bundled Libraries)
|
||||
|
||||
**Command:**
|
||||
|
||||
```bash
|
||||
./mac/build.command
|
||||
```
|
||||
|
||||
**Behavior:**
|
||||
|
||||
- Compiles bundled libraries:
|
||||
- `freetype`
|
||||
- `libpng`
|
||||
- `zlib`
|
||||
- `utf8proc`
|
||||
- Self-contained binary
|
||||
- Larger size
|
||||
- Suitable for standalone builds
|
||||
|
||||
### 2️⃣ System Libraries Build (Homebrew-compatible)
|
||||
|
||||
**Command:**
|
||||
|
||||
```bash
|
||||
./mac/build.command -system-libs
|
||||
```
|
||||
|
||||
**Behavior:**
|
||||
|
||||
- Uses system libraries via `pkg-config`
|
||||
- Does not compile bundled libraries
|
||||
- Smaller binary
|
||||
- Faster build
|
||||
- Required for Homebrew
|
||||
|
||||
## Required Homebrew Dependencies
|
||||
|
||||
Install required dependencies:
|
||||
|
||||
```bash
|
||||
brew install pkg-config autoconf automake libtool \
|
||||
gpac freetype libpng protobuf-c utf8proc zlib
|
||||
```
|
||||
|
||||
**Optional** (OCR / HARDSUBX support):
|
||||
|
||||
```bash
|
||||
brew install tesseract leptonica ffmpeg
|
||||
```
|
||||
|
||||
## How to Build
|
||||
|
||||
```bash
|
||||
cd mac
|
||||
./build.command -system-libs
|
||||
```
|
||||
|
||||
**Verify:**
|
||||
|
||||
```bash
|
||||
./ccextractor --version
|
||||
```
|
||||
|
||||
## What Changes Internally with -system-libs
|
||||
|
||||
### Libraries NOT compiled (system-provided)
|
||||
|
||||
- **FreeType**
|
||||
- **libpng**
|
||||
- **zlib**
|
||||
- **utf8proc**
|
||||
|
||||
### Libraries STILL bundled
|
||||
|
||||
- **lib_hash** (Custom SHA-256 implementation, no system equivalent)
|
||||
|
||||
## CI Coverage
|
||||
|
||||
A new CI job was added:
|
||||
|
||||
- `build_shell_system_libs`
|
||||
|
||||
**What it does:**
|
||||
|
||||
- Installs Homebrew dependencies
|
||||
- Runs `./build.command -system-libs`
|
||||
- Verifies the binary runs correctly
|
||||
|
||||
This ensures Homebrew-compatible builds stay working.
|
||||
|
||||
## Verification (Local)
|
||||
|
||||
You can confirm system libraries are used:
|
||||
|
||||
```bash
|
||||
otool -L mac/ccextractor
|
||||
```
|
||||
|
||||
**Expected output includes paths like:**
|
||||
|
||||
```
|
||||
/opt/homebrew/opt/gpac/lib/libgpac.dylib
|
||||
```
|
||||
|
||||
## Homebrew Formula Usage (Future)
|
||||
|
||||
Example formula snippet:
|
||||
|
||||
```ruby
|
||||
def install
|
||||
system "./mac/build.command", "-system-libs"
|
||||
bin.install "mac/ccextractor"
|
||||
end
|
||||
```
|
||||
|
||||
## Summary
|
||||
|
||||
- `-system-libs` is opt-in
|
||||
- Default build remains unchanged
|
||||
- Enables CCExtractor to return to Homebrew
|
||||
- Fully tested in CI and locally
|
||||
|
||||
## Related
|
||||
|
||||
- **PR #1862** — Add `-system-libs` flag
|
||||
- **Issue #1580** — Homebrew compatibility
|
||||
- **Issue #1534** — System library support
|
||||
173
docs/CHANGES.TXT
173
docs/CHANGES.TXT
@@ -1,103 +1,98 @@
|
||||
0.95 (2025-09-15)
|
||||
0.96.6 (unreleased)
|
||||
-------------------
|
||||
- Fix: DVB EIT start time BCD decoding in XMLTV output causing invalid timestamps (#1835)
|
||||
- New: Add Snap packaging support with Snapcraft configuration and GitHub Actions CI workflow.
|
||||
- Fix: Clear status line output on Linux/WSL to prevent text artifacts (#2017)
|
||||
- Fix: Prevent infinite loop on truncated MKV files
|
||||
- Fix: Various memory safety and stability fixes in demuxers (MP4, PS, MKV, DVB)
|
||||
- Fix: Delete empty output files instead of leaving 0-byte files (#1282)
|
||||
- Fix: --mkvlang now supports BCP 47 language tags (e.g., en-US, zh-Hans-CN) and multiple codes
|
||||
- Fix: segmentation fault when using --multiprogram
|
||||
|
||||
0.96.5 (2026-01-05)
|
||||
-------------------
|
||||
- New: CCExtractor is available again via Homebrew on macOS and Linux.
|
||||
- New: Add support for raw CDP (Caption Distribution Packet) files (#1406)
|
||||
- New: Add --scc-accurate-timing option for bandwidth-aware SCC output (#1120)
|
||||
- Fix: MXF files containing CEA-708 captions not being detected/extracted (#1647)
|
||||
- Docs: Add Windows WSL build instructions
|
||||
- Fix: Security fixes (out-of-bounds read/write) in a few places in the legacy C code.
|
||||
|
||||
0.96.4 (2026-01-01)
|
||||
-------------------
|
||||
- New: Persistent CEA-708 decoder context - maintains state across multiple calls for proper subtitle continuity
|
||||
- New: OCR character blacklist options (--ocr-blacklist, --ocr-blacklist-file) for improved accuracy
|
||||
- New: OCR line-split option (--ocr-splitontimechange) for better subtitle segmentation
|
||||
- Fix: 32-bit build failures on i686 and armv7l architectures
|
||||
- Fix: Legacy command-line argument compatibility (-1, -2, -12, --sc, --svc)
|
||||
- Fix: Prevent heap buffer overflow in Teletext processing (security fix)
|
||||
- Fix: Prevent integer overflow leading to heap buffer overflow in Transport Stream handling (security fix)
|
||||
- Fix: Lazy OCR initialization - only initialize when first DVB subtitle is encountered
|
||||
- Build: Optimized Windows CI workflow for faster builds
|
||||
- Fix: Updated GUI with version 0.7.1. A blind attempt to fix a hang on start on some Windows.
|
||||
|
||||
0.96.3 (2025-12-29)
|
||||
-------------------
|
||||
- New: VOBSUB subtitle extraction with OCR support for MP4 files
|
||||
- New: VOBSUB subtitle extraction support for MKV/Matroska files
|
||||
- New: Native SCC (Scenarist Closed Caption) input file support - CCExtractor can now read SCC files
|
||||
- New: Configurable frame rate (--scc-framerate) and styled PAC codes for SCC output
|
||||
- Fix: Apply --delay option to DVB/bitmap subtitles (previously only worked with text-based subtitles)
|
||||
- Fix: 200ms timing offset in MOV/MP4 caption extraction
|
||||
- Fix: utf8proc include path for system library builds
|
||||
- Fix: Use fixed-width integer types in MP4 bswap functions for better portability
|
||||
- Fix: Guard ocr_text access with ENABLE_OCR preprocessor check
|
||||
- Fix: Preserve FFmpeg libs when building with -system-libs -hardsubx
|
||||
- Build: Add vobsub_decoder to Windows and autoconf build systems
|
||||
- Build: Add winget and Chocolatey packaging workflows for Windows distribution
|
||||
- Docs: Add VOBSUB extraction documentation and subtile-ocr Dockerfile
|
||||
|
||||
0.96.2 (2025-12-26)
|
||||
-------------------
|
||||
- Fix: Resolve utf8proc header include path when building against system libraries on Linux.
|
||||
- Rebundle Windows version to include required runtime files to process hardcoded subtitles
|
||||
(hardcodex mode).
|
||||
- New: Add optional -system-libs flag to Linux build script for package manager compatibility
|
||||
|
||||
0.96.1 (2025-12-25)
|
||||
-------------------
|
||||
- Rebundle Windows version to include an updated GUI. No changes in CCExtractor itself.
|
||||
|
||||
0.96 (2025-12-23)
|
||||
-----------------
|
||||
- New: Multi-page teletext extraction support (#665)
|
||||
- Extract multiple teletext pages simultaneously with separate output files
|
||||
- Use --tpage multiple times (e.g., --tpage 100 --tpage 200)
|
||||
- Output files are named with page suffix (e.g., output_p100.srt, output_p200.srt)
|
||||
- Fix: SPUPNG subtitle offset calculation to center based on actual image dimensions
|
||||
|
||||
- New: Added --list-tracks (-L) option to list all tracks in media files without processing
|
||||
- Fix: Garbled captions from HDHomeRun and I/P-only H.264 streams (#1109)
|
||||
- Fix: Enable stdout output for CEA-708 captions on Windows (#1693)
|
||||
- Fix: McPoodle DVD raw format read/write - properly handle loop markers (#1524)
|
||||
- Fix: Variable shadowing in general_loop causing false "premature end of file" messages
|
||||
- Fix: Double-free crash in teletext cleanup when processing multiple files
|
||||
- Fix: Uninitialized memory and memory leaks found by Valgrind testing
|
||||
- Fix: Dangling pointers in Rust FFI copy_from_rust functions
|
||||
- New: Improve -out=report to show detected Teletext subtitle pages (#1034)
|
||||
- FIX: Include ATSC VCT virtual channel numbers and call signs in XMLTV output
|
||||
- FIX: Restore ATSC XMLTV generation with ETT parsing for extended descriptions, multi-segment handling, extended table ID's (EIT/VCT), corrected <programme> XMLTV formatting, buffer bounds fixes
|
||||
- Fix: DVB subtitle extraction improvements for Chinese broadcasts (#224):
|
||||
- Fix crash in parse_PMT() due to missing bounds checks
|
||||
- Fix negative timestamps in DVB subtitle output
|
||||
- Fix crash in ignore_alpha_at_edge() OCR cropping
|
||||
- Improve DVB subtitle OCR accuracy with image inversion
|
||||
- Fix --ocrlang to accept Tesseract language names (chi_tra, chi_sim, etc.)
|
||||
- Add case-insensitive matching for --dvblang parameter
|
||||
- FIX: Add HEVC/H.265 stream type recognition to prevent crashes on ATSC 3.0 streams
|
||||
- Fix: ARM64/aarch64 build failure due to c_char type mismatch in nal.rs
|
||||
- Fix: HardSubX OCR on Rust
|
||||
- Removed the Share Module
|
||||
- Fix: Regression failures on DVD files
|
||||
- Fix: Segmentation faults on MP4 files with CEA-708 captions
|
||||
- Refactor: Remove API structures from ccextractor
|
||||
- New: Add Encoder Module to Rust
|
||||
- Fix: Elementary stream regressions
|
||||
- Fix: Segmentation faults on XDS files
|
||||
- Fix: Clippy Errors Based on Rust 1.88
|
||||
- IMPROVEMENT: Refactor and optimize Dockerfile
|
||||
- New: Add demuxer and file_functions module in lib_ccxr (#1662)
|
||||
- Fix: Improved handling of IETF language tags in Matroska files (#1665)
|
||||
- New: Create unit test for rust code (#1615)
|
||||
- Breaking: Major argument flags revamp for CCExtractor (#1564 & #1619)
|
||||
New: Chinese, Korean, Japanese support - proper encoding and OCR.
|
||||
New: Correct McPoodle DVD raw format support
|
||||
Fix: Timing is now frame perfect (using FFMpeg timing dump as reference) in all formats.
|
||||
Fix: Solved garbling in all the pending issues we had on GitHub.
|
||||
Fix: All causes of "premature end of file" messages due to bugs and not actual file cuts.
|
||||
Fix: All memory leaks, double frees and usual C nastyness that valgrind could find.
|
||||
- Fix Include ATSC VCT virtual channel numbers and call signs in XMLTV output
|
||||
- Fix: Restore ATSC XMLTV generation with ETT parsing for extended descriptions, multi-segment handling, extended table ID's (EIT/VCT), corrected <programme> XMLTV formatting, buffer bounds fixes
|
||||
- Fix: Add HEVC/H.265 stream type recognition to prevent crashes on ATSC 3.0 streams.
|
||||
Fix: Tolerance to damaged streams - recover where possible instead of terminating.
|
||||
Issues closed: Over 40! Too many to list here, but each of them was either a bug squashed or a feature implemented.
|
||||
|
||||
0.95 (2025-09-15 - never formally packaged)
|
||||
-----------------
|
||||
- New: Create a Docker image to simplify the CCExtractor usage without any environmental hustle (#1611)
|
||||
- New: Add time units module in lib_ccxr (#1623)
|
||||
- New: Add bits and levenshtein module in lib_ccxr (#1627)
|
||||
- New: Add constants module in lib_ccxr (#1624)
|
||||
- New: Add log module in lib_ccxr (#1622)
|
||||
- New: Create `lib_ccxr` and `libccxr_exports` (#1621)
|
||||
- Fix: Unexpected behavior of get_write_interval (#1609)
|
||||
- Update: Bump rsmpeg to latest version for ffmpeg bindings (#1600)
|
||||
- New: Add SCC support for CEA-708 decoder (#1595)
|
||||
- Fix: respect `-stdout` even if multiple CC tracks are present in a Matroska input file (#1453)
|
||||
- Fix: crash in Rust decoder on ATSC1.0 TS Files (#1407)
|
||||
- Removed the --with-gui flag for linux/configure and mac/configure (use the Flutter GUI instead)
|
||||
Refactor: Lots of code ported to Rust.
|
||||
- Fix: Improved handling of IETF language tags in Matroska files (#1665)
|
||||
- Breaking: Major argument flags revamp for CCExtractor (#1564 & #1619)
|
||||
- Fix: segmentation fault in using hardsubx
|
||||
- New: Add function (and command) that extracts closed caption subtitles as well as burnt-in subtitles from a file in a single pass. (As proposed in issue 726)
|
||||
- Refactored: the `general_loop` function has some code moved to a new function
|
||||
- Fix: WebVTT X-TIMESTAMP-MAP placement (#1463)
|
||||
- Disable X-TIMESTAMP-MAP by default (changed option --no-timestamp-map to --timestamp-map)
|
||||
- Fix: missing `#` in color attribute of font tag
|
||||
- Fix: ffmpeg 5.0, tesseract 5.0 compatibility and remove deprecated methods
|
||||
- Fix: tesseract 5.x traineddata location in ocr
|
||||
- Fix: fix autoconf tesseract detection problem (#1503)
|
||||
- Fix: add missing compile_info_real.h source to Autotools build
|
||||
- Fix: add missing `-lavfilter` for hardsubx linking
|
||||
- Fix: make webvtt-full work correctly with multi-byte utf-8 characters
|
||||
- Fix: encoding of solid block in latin-1 and unicode
|
||||
- Fix: McPoodle Broadcast Raw format for field 1
|
||||
- Fix: Incorrect skipping of packets
|
||||
- Fix: Repeated values for enums
|
||||
- Cleanup: Remove the (unmaintained) Nuklear GUI code
|
||||
- Cleanup: Reduce the amount of Windows build options in the project file
|
||||
- Fix: infinite loop in MP4 file type detector.
|
||||
- Improvement: Use Corrosion to build Rust code
|
||||
- Improvement: Ignore MXF Caption Essence Container version byte to enhance SRT subtitle extraction compatibility
|
||||
- New: Add tesseract page segmentation modes control with `--psm` flag
|
||||
- Fix: Resolve compile-time error about implicit declarations (#1646)
|
||||
- Fix: fatal out of memory error extracting from a VOB PS
|
||||
- Fix: Unit Test Rust failing due to changes in Rust Version 1.86.0
|
||||
- Fix: handle row_count decrease in CEA-708 C decoder
|
||||
- Fix: Support for MINGW-w64 cross compiling
|
||||
- Fix: Build with ENABLE_FFMPEG to support ffmpeg 5
|
||||
- Fix: Bounds checks to prevent panic on malformed CEA-708 data
|
||||
- Fix: Multiprogram logic in is_decoder_processed_enough() causing false warnings
|
||||
- Fix: Write consistent 2-byte UTF-16BE encoding for CEA-708 captions (Japanese/Chinese)
|
||||
- New: Add --ttxtforcelatin option to force Latin G0 charset in Teletext
|
||||
- Fix: Add fallback for TS files without PAT/PMT tables
|
||||
- Fix: PTS jump handling to continue fts_now updates after jump
|
||||
- Fix: Null checks for unchecked memory allocations throughout codebase
|
||||
- Fix: Null checks and invalid UTF-8 handling in Rust FFI functions
|
||||
- Fix: Panics in timing code when processing multiple files
|
||||
- Fix: Caption start/end times to match FFmpeg timing in MP4/MPEG/TS
|
||||
- Fix: Correctly count and store multiple input files
|
||||
- Fix: Handle MP4 c608 tracks and improve garbage frame detection
|
||||
- Fix: Update fts_now for each frame in elementary streams
|
||||
- Fix: Preserve CR time during pop-on to roll-up transition
|
||||
- Fix: Defer min_pts until frame type is known
|
||||
- Fix: Skip leading non-I-frames when setting min_pts
|
||||
- Fix: Memory leaks in ts_tables_epg, ocr, and ccx_encoders_spupng
|
||||
- Fix: Buffer overruns in 708_output, mcc_encoder, utility, xds_decoder
|
||||
- Fix: Replace sprintf/strcpy with bounds-checked snprintf/strncpy in encoders
|
||||
- Fix: HHMMSSFFF format for ttxt output timestamps
|
||||
- Fix: Always emit position codes at start of SCC caption
|
||||
- Fix: Memory safety issues in ccx_decoders_common
|
||||
- Fix: Null checks after malloc calls in dvb_subtitle_decoder
|
||||
- Fix: Memory safety checks and memory leaks in Matroska parser
|
||||
|
||||
0.94 (2021-12-14)
|
||||
-----------------
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
# Installation
|
||||
|
||||
## Homebrew
|
||||
The easiest way to install CCExtractor for Mac and Linux is through Homebrew:
|
||||
|
||||
```bash
|
||||
brew install ccextractor
|
||||
```
|
||||
Note: If you don't have Homebrew installed, see [brew.sh](https://brew.sh/)
|
||||
for installation instructions.
|
||||
|
||||
---
|
||||
|
||||
# Compiling CCExtractor
|
||||
|
||||
You may compile CCExtractor across all major platforms using `CMakeLists.txt` stored under `ccextractor/src/` directory. Autoconf and custom build scripts are also available. See platform specific instructions in the below sections.
|
||||
|
||||
@@ -26,6 +26,14 @@ Running ccextractor without parameters shows the help screen. Usage is
|
||||
trivial - you just need to pass the input file and (optionally) some
|
||||
details about the input and output files.
|
||||
|
||||
Example:
|
||||
|
||||
ccextractor input_video.ts
|
||||
|
||||
This command extracts subtitles from the input video file and generates a subtitle output file
|
||||
(such as .srt) in the same directory.
|
||||
|
||||
|
||||
|
||||
## Languages
|
||||
Usually English captions are transmitted in line 21 field 1 data,
|
||||
|
||||
129
docs/VOBSUB.md
Normal file
129
docs/VOBSUB.md
Normal file
@@ -0,0 +1,129 @@
|
||||
# VOBSUB Subtitle Extraction from MKV Files
|
||||
|
||||
CCExtractor supports extracting VOBSUB (S_VOBSUB) subtitles from Matroska (MKV) containers. VOBSUB is an image-based subtitle format originally from DVD video.
|
||||
|
||||
## Overview
|
||||
|
||||
VOBSUB subtitles consist of two files:
|
||||
- `.idx` - Index file containing metadata, palette, and timestamp/position entries
|
||||
- `.sub` - Binary file containing the actual subtitle bitmap data in MPEG Program Stream format
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```bash
|
||||
ccextractor movie.mkv
|
||||
```
|
||||
|
||||
This will extract all VOBSUB tracks and create paired `.idx` and `.sub` files:
|
||||
- `movie_eng.idx` + `movie_eng.sub` (first English track)
|
||||
- `movie_eng_1.idx` + `movie_eng_1.sub` (second English track, if present)
|
||||
- etc.
|
||||
|
||||
## Converting VOBSUB to SRT (Text)
|
||||
|
||||
Since VOBSUB subtitles are images, you need OCR (Optical Character Recognition) to convert them to text-based formats like SRT.
|
||||
|
||||
### Using subtile-ocr (Recommended)
|
||||
|
||||
[subtile-ocr](https://github.com/gwen-lg/subtile-ocr) is an actively maintained Rust tool that provides accurate OCR conversion.
|
||||
|
||||
#### Option 1: Docker (Easiest)
|
||||
|
||||
We provide a Dockerfile that builds subtile-ocr with all dependencies:
|
||||
|
||||
```bash
|
||||
# Build the Docker image (one-time)
|
||||
cd tools/vobsubocr
|
||||
docker build -t subtile-ocr .
|
||||
|
||||
# Extract VOBSUB from MKV
|
||||
ccextractor movie.mkv
|
||||
|
||||
# Convert to SRT using OCR
|
||||
docker run --rm -v $(pwd):/data subtile-ocr -l eng -o /data/movie_eng.srt /data/movie_eng.idx
|
||||
```
|
||||
|
||||
#### Option 2: Install subtile-ocr Natively
|
||||
|
||||
If you have Rust and Tesseract development libraries installed:
|
||||
|
||||
```bash
|
||||
# Install dependencies (Ubuntu/Debian)
|
||||
sudo apt-get install libleptonica-dev libtesseract-dev tesseract-ocr tesseract-ocr-eng
|
||||
|
||||
# Install subtile-ocr
|
||||
cargo install --git https://github.com/gwen-lg/subtile-ocr
|
||||
|
||||
# Convert
|
||||
subtile-ocr -l eng -o movie_eng.srt movie_eng.idx
|
||||
```
|
||||
|
||||
### subtile-ocr Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `-l, --lang <LANG>` | Tesseract language code (required). Examples: `eng`, `fra`, `deu`, `chi_sim` |
|
||||
| `-o, --output <FILE>` | Output SRT file (stdout if not specified) |
|
||||
| `-t, --threshold <0.0-1.0>` | Binarization threshold (default: 0.6) |
|
||||
| `-d, --dpi <DPI>` | Image DPI for OCR (default: 150) |
|
||||
| `--dump` | Save processed subtitle images as PNG files |
|
||||
|
||||
### Language Codes
|
||||
|
||||
Install additional Tesseract language packs as needed:
|
||||
|
||||
```bash
|
||||
# Examples
|
||||
sudo apt-get install tesseract-ocr-fra # French
|
||||
sudo apt-get install tesseract-ocr-deu # German
|
||||
sudo apt-get install tesseract-ocr-spa # Spanish
|
||||
sudo apt-get install tesseract-ocr-chi-sim # Simplified Chinese
|
||||
```
|
||||
|
||||
## Technical Details
|
||||
|
||||
### .idx File Format
|
||||
|
||||
The index file contains:
|
||||
1. Header with metadata (size, palette, alignment settings)
|
||||
2. Language identifier line
|
||||
3. Timestamp entries with file positions
|
||||
|
||||
Example:
|
||||
```
|
||||
# VobSub index file, v7 (do not modify this line!)
|
||||
size: 720x576
|
||||
palette: 000000, 828282, ...
|
||||
|
||||
id: eng, index: 0
|
||||
timestamp: 00:01:12:920, filepos: 000000000
|
||||
timestamp: 00:01:18:640, filepos: 000000800
|
||||
...
|
||||
```
|
||||
|
||||
### .sub File Format
|
||||
|
||||
The binary file contains MPEG Program Stream packets:
|
||||
- Each subtitle is wrapped in a PS Pack header (14 bytes) + PES header (15 bytes)
|
||||
- Subtitles are aligned to 2048-byte boundaries
|
||||
- Contains raw SPU (SubPicture Unit) bitmap data
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Empty output files
|
||||
- Ensure the MKV file actually contains VOBSUB tracks (check with `mediainfo` or `ffprobe`)
|
||||
- CCExtractor will report "No VOBSUB subtitles to write" if the track is empty
|
||||
|
||||
### OCR quality issues
|
||||
- Try adjusting the `-t` threshold parameter
|
||||
- Ensure the correct language pack is installed
|
||||
- Use `--dump` to inspect the processed images
|
||||
|
||||
### Docker permission issues
|
||||
- The output files may be owned by root; use `sudo chown` to fix ownership
|
||||
- Or run Docker with `--user $(id -u):$(id -g)`
|
||||
|
||||
## See Also
|
||||
|
||||
- [OCR.md](OCR.md) - General OCR support in CCExtractor
|
||||
- [subtile-ocr GitHub](https://github.com/gwen-lg/subtile-ocr) - OCR tool documentation
|
||||
137
docs/build-wsl.md
Normal file
137
docs/build-wsl.md
Normal file
@@ -0,0 +1,137 @@
|
||||
# Building CCExtractor on Windows using WSL
|
||||
|
||||
This guide explains how to build CCExtractor on Windows using WSL (Ubuntu).
|
||||
It is based on a fresh setup and includes all required dependencies and
|
||||
common build issues encountered during compilation.
|
||||
|
||||
---
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Windows 10 or Windows 11
|
||||
- WSL enabled
|
||||
- Ubuntu installed via Microsoft Store
|
||||
|
||||
---
|
||||
|
||||
## Install WSL and Ubuntu
|
||||
|
||||
From PowerShell (run as Administrator):
|
||||
|
||||
```powershell
|
||||
wsl --install -d Ubuntu
|
||||
```
|
||||
|
||||
Restart the system if prompted, then launch Ubuntu from the Start menu.
|
||||
|
||||
---
|
||||
|
||||
## Update system packages
|
||||
|
||||
```bash
|
||||
sudo apt update
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Install basic build tools
|
||||
|
||||
```bash
|
||||
sudo apt install -y build-essential git pkg-config
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Install Rust (required)
|
||||
|
||||
CCExtractor includes Rust components, so Rust and Cargo are required.
|
||||
|
||||
```bash
|
||||
curl https://sh.rustup.rs -sSf | sh
|
||||
source ~/.cargo/env
|
||||
```
|
||||
|
||||
Verify installation:
|
||||
|
||||
```bash
|
||||
cargo --version
|
||||
rustc --version
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Install required libraries
|
||||
|
||||
```bash
|
||||
sudo apt install -y \
|
||||
libclang-dev clang \
|
||||
libtesseract-dev tesseract-ocr \
|
||||
libgpac-dev
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Clone the repository
|
||||
|
||||
```bash
|
||||
git clone https://github.com/CCExtractor/ccextractor.git
|
||||
cd ccextractor
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Build CCExtractor
|
||||
|
||||
```bash
|
||||
cd linux
|
||||
./build
|
||||
```
|
||||
|
||||
After a successful build, verify by running:
|
||||
|
||||
```bash
|
||||
./ccextractor
|
||||
```
|
||||
|
||||
You should see the help/usage output.
|
||||
|
||||
---
|
||||
|
||||
## Common build issues
|
||||
|
||||
### cargo: command not found
|
||||
|
||||
```bash
|
||||
source ~/.cargo/env
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Unable to find libclang
|
||||
|
||||
```bash
|
||||
sudo apt install libclang-dev clang
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### gpac/isomedia.h: No such file or directory
|
||||
|
||||
```bash
|
||||
sudo apt install libgpac-dev
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### please install tesseract development library
|
||||
|
||||
```bash
|
||||
sudo apt install libtesseract-dev tesseract-ocr
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Notes
|
||||
|
||||
- Compiler warnings during the build process are expected and do not indicate failure.
|
||||
- This guide was tested on Ubuntu (WSL) running on Windows 11.
|
||||
@@ -151,6 +151,8 @@ ccextractor_SOURCES = \
|
||||
../src/lib_ccx/list.h \
|
||||
../src/lib_ccx/matroska.c \
|
||||
../src/lib_ccx/matroska.h \
|
||||
../src/lib_ccx/vobsub_decoder.c \
|
||||
../src/lib_ccx/vobsub_decoder.h \
|
||||
../src/lib_ccx/mp4.c \
|
||||
../src/lib_ccx/myth.c \
|
||||
../src/lib_ccx/networking.c \
|
||||
|
||||
64
linux/build
64
linux/build
@@ -2,6 +2,7 @@
|
||||
|
||||
RUST_LIB="rust/release/libccx_rust.a"
|
||||
RUST_PROFILE="--release"
|
||||
USE_SYSTEM_LIBS=false
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-debug)
|
||||
@@ -23,6 +24,10 @@ while [[ $# -gt 0 ]]; do
|
||||
BLD_LINKER="$BLD_LINKER -lswscale -lavutil -pthread -lavformat -lavcodec -lavfilter -lxcb-shm -lxcb -lX11 -llzma -lswresample"
|
||||
shift
|
||||
;;
|
||||
-system-libs)
|
||||
USE_SYSTEM_LIBS=true
|
||||
shift
|
||||
;;
|
||||
-*)
|
||||
echo "Unknown option $1"
|
||||
exit 1
|
||||
@@ -30,7 +35,42 @@ while [[ $# -gt 0 ]]; do
|
||||
esac
|
||||
done
|
||||
|
||||
BLD_FLAGS="$BLD_FLAGS -std=gnu99 -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -DENABLE_OCR -DFT2_BUILD_LIBRARY -DGPAC_DISABLE_VTT -DGPAC_DISABLE_OD_DUMP -DGPAC_DISABLE_REMOTERY -DNO_GZIP"
|
||||
if [ "$USE_SYSTEM_LIBS" = true ]; then
|
||||
command -v pkg-config >/dev/null || {
|
||||
echo "Error: pkg-config is required for -system-libs mode"
|
||||
exit 1
|
||||
}
|
||||
|
||||
MISSING=""
|
||||
for lib in libpng zlib freetype2 libutf8proc; do
|
||||
if ! pkg-config --exists "$lib" 2>/dev/null; then
|
||||
MISSING="$MISSING $lib"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "$MISSING" ]; then
|
||||
echo "Error: Missing required system libraries:$MISSING"
|
||||
echo ""
|
||||
echo "On Debian/Ubuntu: sudo apt install libpng-dev zlib1g-dev libfreetype-dev libutf8proc-dev"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for hdr in leptonica/allheaders.h tesseract/capi.h; do
|
||||
if ! echo "#include <$hdr>" | gcc -E - >/dev/null 2>&1; then
|
||||
echo "Error: Missing headers for <$hdr>"
|
||||
echo "On Debian/Ubuntu: sudo apt install libleptonica-dev libtesseract-dev"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
PKG_CFLAGS="$(pkg-config --cflags libpng zlib freetype2 libutf8proc)"
|
||||
PKG_LIBS="$(pkg-config --libs libpng zlib freetype2 libutf8proc)"
|
||||
fi
|
||||
|
||||
BLD_FLAGS="$BLD_FLAGS -std=gnu99 -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -DENABLE_OCR -DGPAC_DISABLE_VTT -DGPAC_DISABLE_OD_DUMP -DGPAC_DISABLE_REMOTERY -DNO_GZIP"
|
||||
if [ "$USE_SYSTEM_LIBS" != true ]; then
|
||||
BLD_FLAGS="$BLD_FLAGS -DFT2_BUILD_LIBRARY"
|
||||
fi
|
||||
bit_os=$(getconf LONG_BIT)
|
||||
if [ "$bit_os" == "64" ]
|
||||
then
|
||||
@@ -87,6 +127,24 @@ SRC_FREETYPE="../src/thirdparty/freetype/autofit/autofit.c
|
||||
BLD_SOURCES="../src/ccextractor.c $SRC_CCX $SRC_GPAC $SRC_ZLIB $SRC_LIBPNG $SRC_HASH $SRC_UTF8PROC $SRC_FREETYPE"
|
||||
BLD_LINKER="$BLD_LINKER -lm -zmuldefs -l tesseract -l leptonica -lpthread -ldl -lgpac"
|
||||
|
||||
if [ "$USE_SYSTEM_LIBS" = true ]; then
|
||||
LEPTONICA_CFLAGS="$(pkg-config --cflags --silence-errors lept)"
|
||||
TESSERACT_CFLAGS="$(pkg-config --cflags --silence-errors tesseract)"
|
||||
GPAC_CFLAGS="$(pkg-config --cflags --silence-errors gpac)"
|
||||
|
||||
BLD_INCLUDE="-I../src -I../src/lib_ccx -I../src/lib_ccx/zvbi -I../src/thirdparty/lib_hash \
|
||||
$PKG_CFLAGS $LEPTONICA_CFLAGS $TESSERACT_CFLAGS $GPAC_CFLAGS"
|
||||
|
||||
BLD_SOURCES="../src/ccextractor.c $SRC_CCX $SRC_HASH"
|
||||
# Preserve FFmpeg libraries if -hardsubx was specified
|
||||
FFMPEG_LIBS=""
|
||||
if [ "$HARDSUBX" = true ]; then
|
||||
FFMPEG_LIBS="-lswscale -lavutil -pthread -lavformat -lavcodec -lavfilter -lxcb-shm -lxcb -lX11 -llzma -lswresample"
|
||||
fi
|
||||
BLD_LINKER="$PKG_LIBS -ltesseract -lleptonica -lgpac -lpthread -ldl -lm $FFMPEG_LIBS"
|
||||
fi
|
||||
|
||||
|
||||
echo "Running pre-build script..."
|
||||
./pre-build.sh
|
||||
echo "Trying to compile..."
|
||||
@@ -149,3 +207,7 @@ if [[ "$out" != "" ]] ; then
|
||||
else
|
||||
echo "Compilation successful, no compiler messages."
|
||||
fi
|
||||
|
||||
if [ -d ./utf8proc_compat ]; then
|
||||
rm -rf ./utf8proc_compat
|
||||
fi
|
||||
|
||||
@@ -1,63 +1,230 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# CCExtractor AppImage Build Script
|
||||
#
|
||||
# Build variants via BUILD_TYPE environment variable:
|
||||
# - minimal: Basic CCExtractor without OCR (smallest size)
|
||||
# - ocr: CCExtractor with OCR support (default)
|
||||
# - hardsubx: CCExtractor with burned-in subtitle extraction (requires FFmpeg)
|
||||
#
|
||||
# Usage:
|
||||
# ./build_appimage.sh # Builds 'ocr' variant (default)
|
||||
# BUILD_TYPE=minimal ./build_appimage.sh
|
||||
# BUILD_TYPE=hardsubx ./build_appimage.sh
|
||||
#
|
||||
# Requirements:
|
||||
# - CMake, GCC, pkg-config, Rust toolchain
|
||||
# - For OCR: tesseract-ocr, libtesseract-dev, libleptonica-dev
|
||||
# - For HardSubX: libavcodec-dev, libavformat-dev, libswscale-dev, etc.
|
||||
# - wget for downloading linuxdeploy
|
||||
#
|
||||
|
||||
set -x
|
||||
set -e
|
||||
|
||||
# store the path of where the script is
|
||||
OLD_CWD=$(readlink -f .)
|
||||
# Build type: minimal, ocr, hardsubx (default: ocr)
|
||||
BUILD_TYPE="${BUILD_TYPE:-ocr}"
|
||||
|
||||
# store repo root as variable
|
||||
REPO_ROOT=$(dirname $OLD_CWD)
|
||||
echo "=========================================="
|
||||
echo "CCExtractor AppImage Builder"
|
||||
echo "Build type: $BUILD_TYPE"
|
||||
echo "=========================================="
|
||||
|
||||
# Make a temp directory for building stuff which will be cleaned automatically
|
||||
BUILD_DIR="$OLD_CWD/temp"
|
||||
# Validate build type
|
||||
case "$BUILD_TYPE" in
|
||||
minimal|ocr|hardsubx)
|
||||
;;
|
||||
*)
|
||||
echo "Error: Invalid BUILD_TYPE '$BUILD_TYPE'"
|
||||
echo "Valid options: minimal, ocr, hardsubx"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# Check if temp directory exist, and if so then remove contents from it
|
||||
# if not then create temp directory
|
||||
if [ -d "$BUILD_DIR" ]; then
|
||||
rm -r "$BUILD_DIR/*" | true
|
||||
else
|
||||
mkdir -p "$BUILD_DIR"
|
||||
fi
|
||||
# Store paths
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(dirname "$SCRIPT_DIR")"
|
||||
BUILD_DIR="$SCRIPT_DIR/appimage_build"
|
||||
|
||||
# make sure to clean up build dir, even if errors occur
|
||||
# Clean up function
|
||||
cleanup() {
|
||||
if [ -d "$BUILD_DIR" ]; then
|
||||
rm -rf "$BUILD_DIR"
|
||||
fi
|
||||
if [ -d "$BUILD_DIR" ]; then
|
||||
echo "Cleaning up build directory..."
|
||||
rm -rf "$BUILD_DIR"
|
||||
fi
|
||||
}
|
||||
|
||||
# Automatically trigger Cleanup function
|
||||
# Cleanup on exit (comment out for debugging)
|
||||
trap cleanup EXIT
|
||||
|
||||
# switch to build dir
|
||||
pushd "$BUILD_DIR"
|
||||
# Create fresh build directory
|
||||
rm -rf "$BUILD_DIR" 2>/dev/null || true
|
||||
mkdir -p "$BUILD_DIR"
|
||||
|
||||
# configure build files with CMake
|
||||
# we need to explicitly set the install prefix, as CMake's default is /usr/local for some reason...
|
||||
cmake "$REPO_ROOT/src"
|
||||
cd "$BUILD_DIR"
|
||||
|
||||
# build project and install files into AppDir
|
||||
make -j$(nproc) ENABLE_OCR=yes
|
||||
# Determine CMake options based on build type
|
||||
CMAKE_OPTIONS=""
|
||||
case "$BUILD_TYPE" in
|
||||
minimal)
|
||||
CMAKE_OPTIONS=""
|
||||
;;
|
||||
ocr)
|
||||
CMAKE_OPTIONS="-DWITH_OCR=ON"
|
||||
;;
|
||||
hardsubx)
|
||||
CMAKE_OPTIONS="-DWITH_OCR=ON -DWITH_HARDSUBX=ON -DWITH_FFMPEG=ON"
|
||||
;;
|
||||
esac
|
||||
|
||||
# download linuxdeploy tool
|
||||
wget https://github.com/linuxdeploy/linuxdeploy/releases/download/continuous/linuxdeploy-x86_64.AppImage
|
||||
echo "CMake options: $CMAKE_OPTIONS"
|
||||
|
||||
# make them executable
|
||||
chmod +x linuxdeploy*.AppImage
|
||||
# Configure with CMake
|
||||
echo "Configuring with CMake..."
|
||||
cmake $CMAKE_OPTIONS "$REPO_ROOT/src"
|
||||
|
||||
# Create AppDir
|
||||
mkdir -p "$BUILD_DIR/AppDir"
|
||||
# Build
|
||||
echo "Building CCExtractor..."
|
||||
make -j$(nproc)
|
||||
|
||||
# Link of CCExtractor image of any of these resolution(8x8, 16x16, 20x20, 22x22, 24x24, 28x28, 32x32, 36x36, 42x42,
|
||||
# 48x48, 64x64, 72x72, 96x96, 128x128, 160x160, 192x192, 256x256, 384x384, 480x480, 512x512) in png extension
|
||||
PNG_LINK="https://ccextractor.org/images/ccextractor.png"
|
||||
# Verify binary was built
|
||||
if [ ! -f "$BUILD_DIR/ccextractor" ]; then
|
||||
echo "Error: ccextractor binary not found after build"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Download the image and put it in AppDir
|
||||
wget "$PNG_LINK" -P AppDir
|
||||
echo "Build successful!"
|
||||
"$BUILD_DIR/ccextractor" --version
|
||||
|
||||
# now, build AppImage using linuxdeploy
|
||||
./linuxdeploy-x86_64.AppImage --appdir=AppDir -e ccextractor --create-desktop-file --output appimage -i AppDir/ccextractor.png
|
||||
# Download linuxdeploy
|
||||
echo "Downloading linuxdeploy..."
|
||||
LINUXDEPLOY_URL="https://github.com/linuxdeploy/linuxdeploy/releases/download/continuous/linuxdeploy-x86_64.AppImage"
|
||||
wget -q --show-progress "$LINUXDEPLOY_URL" -O linuxdeploy-x86_64.AppImage
|
||||
chmod +x linuxdeploy-x86_64.AppImage
|
||||
|
||||
# Move resulted AppImage binary to base directory
|
||||
mv ccextractor*.AppImage "$OLD_CWD"
|
||||
# Create AppDir structure
|
||||
echo "Creating AppDir structure..."
|
||||
mkdir -p AppDir/usr/bin
|
||||
mkdir -p AppDir/usr/share/icons/hicolor/256x256/apps
|
||||
mkdir -p AppDir/usr/share/applications
|
||||
mkdir -p AppDir/usr/share/tessdata
|
||||
|
||||
# Copy binary
|
||||
cp "$BUILD_DIR/ccextractor" AppDir/usr/bin/
|
||||
|
||||
# Download icon
|
||||
echo "Downloading icon..."
|
||||
PNG_URL="https://ccextractor.org/images/ccextractor.png"
|
||||
if wget -q "$PNG_URL" -O AppDir/usr/share/icons/hicolor/256x256/apps/ccextractor.png 2>/dev/null; then
|
||||
echo "Icon downloaded successfully"
|
||||
else
|
||||
# Create a simple placeholder icon if download fails
|
||||
echo "Warning: Could not download icon, creating placeholder"
|
||||
convert -size 256x256 xc:navy -fill white -gravity center -pointsize 40 -annotate 0 "CCX" \
|
||||
AppDir/usr/share/icons/hicolor/256x256/apps/ccextractor.png 2>/dev/null || \
|
||||
echo "P3 256 256 255" > AppDir/usr/share/icons/hicolor/256x256/apps/ccextractor.ppm
|
||||
fi
|
||||
|
||||
# Create desktop file
|
||||
cat > AppDir/usr/share/applications/ccextractor.desktop << 'EOF'
|
||||
[Desktop Entry]
|
||||
Type=Application
|
||||
Name=CCExtractor
|
||||
Comment=Extract closed captions and subtitles from video files
|
||||
Exec=ccextractor
|
||||
Icon=ccextractor
|
||||
Categories=AudioVideo;Video;
|
||||
Terminal=true
|
||||
NoDisplay=true
|
||||
EOF
|
||||
|
||||
# Copy desktop file to AppDir root (required by linuxdeploy)
|
||||
cp AppDir/usr/share/applications/ccextractor.desktop AppDir/
|
||||
|
||||
# Copy icon to AppDir root
|
||||
cp AppDir/usr/share/icons/hicolor/256x256/apps/ccextractor.png AppDir/ 2>/dev/null || true
|
||||
|
||||
# For OCR builds, bundle tessdata
|
||||
if [ "$BUILD_TYPE" = "ocr" ] || [ "$BUILD_TYPE" = "hardsubx" ]; then
|
||||
echo "Bundling tessdata for OCR support..."
|
||||
|
||||
# Try to find system tessdata
|
||||
TESSDATA_PATHS=(
|
||||
"/usr/share/tesseract-ocr/5/tessdata"
|
||||
"/usr/share/tesseract-ocr/4.00/tessdata"
|
||||
"/usr/share/tessdata"
|
||||
"/usr/local/share/tessdata"
|
||||
)
|
||||
|
||||
TESSDATA_SRC=""
|
||||
for path in "${TESSDATA_PATHS[@]}"; do
|
||||
if [ -d "$path" ] && [ -f "$path/eng.traineddata" ]; then
|
||||
TESSDATA_SRC="$path"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "$TESSDATA_SRC" ]; then
|
||||
echo "Found tessdata at: $TESSDATA_SRC"
|
||||
# Copy English language data (most common)
|
||||
cp "$TESSDATA_SRC/eng.traineddata" AppDir/usr/share/tessdata/ 2>/dev/null || true
|
||||
# Copy OSD (orientation and script detection) if available
|
||||
cp "$TESSDATA_SRC/osd.traineddata" AppDir/usr/share/tessdata/ 2>/dev/null || true
|
||||
else
|
||||
echo "Warning: tessdata not found, downloading English language data..."
|
||||
wget -q "https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata" \
|
||||
-O AppDir/usr/share/tessdata/eng.traineddata || true
|
||||
fi
|
||||
|
||||
# Create wrapper script that sets TESSDATA_PREFIX
|
||||
mv AppDir/usr/bin/ccextractor AppDir/usr/bin/ccextractor.bin
|
||||
cat > AppDir/usr/bin/ccextractor << 'WRAPPER'
|
||||
#!/bin/bash
|
||||
SELF_DIR="$(dirname "$(readlink -f "$0")")"
|
||||
export TESSDATA_PREFIX="${SELF_DIR}/../share/tessdata"
|
||||
exec "${SELF_DIR}/ccextractor.bin" "$@"
|
||||
WRAPPER
|
||||
chmod +x AppDir/usr/bin/ccextractor
|
||||
fi
|
||||
|
||||
# Determine output name based on build type
|
||||
ARCH="x86_64"
|
||||
case "$BUILD_TYPE" in
|
||||
minimal)
|
||||
OUTPUT_NAME="ccextractor-minimal-${ARCH}.AppImage"
|
||||
;;
|
||||
ocr)
|
||||
OUTPUT_NAME="ccextractor-${ARCH}.AppImage"
|
||||
;;
|
||||
hardsubx)
|
||||
OUTPUT_NAME="ccextractor-hardsubx-${ARCH}.AppImage"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Build AppImage
|
||||
echo "Building AppImage..."
|
||||
export OUTPUT="$OUTPUT_NAME"
|
||||
|
||||
# Determine which executable to pass to linuxdeploy
|
||||
# For OCR builds, we have a wrapper script, so pass the actual binary (.bin)
|
||||
if [ -f "AppDir/usr/bin/ccextractor.bin" ]; then
|
||||
LINUXDEPLOY_EXEC="AppDir/usr/bin/ccextractor.bin"
|
||||
else
|
||||
LINUXDEPLOY_EXEC="AppDir/usr/bin/ccextractor"
|
||||
fi
|
||||
|
||||
./linuxdeploy-x86_64.AppImage \
|
||||
--appdir=AppDir \
|
||||
--executable="$LINUXDEPLOY_EXEC" \
|
||||
--desktop-file=AppDir/ccextractor.desktop \
|
||||
--icon-file=AppDir/ccextractor.png \
|
||||
--output=appimage
|
||||
|
||||
# Move to output directory
|
||||
mv "$OUTPUT_NAME" "$SCRIPT_DIR/"
|
||||
|
||||
echo "=========================================="
|
||||
echo "AppImage built successfully!"
|
||||
echo "Output: $SCRIPT_DIR/$OUTPUT_NAME"
|
||||
echo ""
|
||||
echo "Test with: $SCRIPT_DIR/$OUTPUT_NAME --version"
|
||||
echo "=========================================="
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Process this file with autoconf to produce a configure script.
|
||||
|
||||
AC_PREREQ([2.71])
|
||||
AC_INIT([CCExtractor], [0.95], [carlos@ccextractor.org])
|
||||
AC_INIT([CCExtractor], [0.96.5], [carlos@ccextractor.org])
|
||||
AC_CONFIG_AUX_DIR([build-conf])
|
||||
AC_CONFIG_SRCDIR([../src/ccextractor.c])
|
||||
AM_INIT_AUTOMAKE([foreign subdir-objects])
|
||||
|
||||
@@ -123,6 +123,8 @@ ccextractor_SOURCES = \
|
||||
../src/lib_ccx/list.h \
|
||||
../src/lib_ccx/matroska.c \
|
||||
../src/lib_ccx/matroska.h \
|
||||
../src/lib_ccx/vobsub_decoder.c \
|
||||
../src/lib_ccx/vobsub_decoder.h \
|
||||
../src/lib_ccx/mp4.c \
|
||||
../src/lib_ccx/myth.c \
|
||||
../src/lib_ccx/networking.c \
|
||||
|
||||
@@ -42,7 +42,16 @@ while [[ $# -gt 0 ]]; do
|
||||
esac
|
||||
done
|
||||
|
||||
BLD_FLAGS="-std=gnu99 -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -Dfopen64=fopen -Dopen64=open -Dlseek64=lseek"
|
||||
# Determine architecture based on cargo (to ensure consistency with Rust part)
|
||||
CARGO_ARCH=$(file $(which cargo) | grep -o 'x86_64\|arm64')
|
||||
if [[ "$CARGO_ARCH" == "x86_64" ]]; then
|
||||
echo "Detected Intel (x86_64) Cargo. Forcing x86_64 build to match Rust and libraries..."
|
||||
BLD_ARCH="-arch x86_64"
|
||||
else
|
||||
BLD_ARCH="-arch arm64"
|
||||
fi
|
||||
|
||||
BLD_FLAGS="$BLD_ARCH -std=gnu99 -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -Dfopen64=fopen -Dopen64=open -Dlseek64=lseek"
|
||||
|
||||
# Add flags for bundled libraries (not needed when using system libs)
|
||||
if [[ "$USE_SYSTEM_LIBS" != "true" ]]; then
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Process this file with autoconf to produce a configure script.
|
||||
|
||||
AC_PREREQ([2.71])
|
||||
AC_INIT([CCExtractor],[0.95],[carlos@ccextractor.org])
|
||||
AC_INIT([CCExtractor],[0.96.5],[carlos@ccextractor.org])
|
||||
AC_CONFIG_AUX_DIR([build-conf])
|
||||
AC_CONFIG_SRCDIR([../src/ccextractor.c])
|
||||
AM_INIT_AUTOMAKE([foreign subdir-objects])
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
pkgname=ccextractor
|
||||
pkgver=0.95
|
||||
pkgver=0.96.5
|
||||
pkgrel=1
|
||||
pkgdesc="A closed captions and teletext subtitles extractor for video streams."
|
||||
arch=('i686' 'x86_64')
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
Name: ccextractor
|
||||
Version: 0.95
|
||||
Version: 0.96.5
|
||||
Release: 1
|
||||
Summary: A closed captions and teletext subtitles extractor for video streams.
|
||||
Group: Applications/Internet
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/bin/bash
|
||||
TYPE="debian" # can be one of 'slackware', 'debian', 'rpm'
|
||||
PROGRAM_NAME="ccextractor"
|
||||
VERSION="0.95"
|
||||
VERSION="0.96.5"
|
||||
RELEASE="1"
|
||||
LICENSE="GPL-2.0"
|
||||
MAINTAINER="carlos@ccextractor.org"
|
||||
|
||||
96
packaging/README.md
Normal file
96
packaging/README.md
Normal file
@@ -0,0 +1,96 @@
|
||||
# CCExtractor Packaging
|
||||
|
||||
This directory contains packaging configurations for Windows package managers.
|
||||
|
||||
## Windows Package Manager (winget)
|
||||
|
||||
### Initial Setup (One-time)
|
||||
|
||||
1. **Calculate MSI hash** for the current release:
|
||||
```powershell
|
||||
certutil -hashfile CCExtractor.0.96.1.msi SHA256
|
||||
```
|
||||
|
||||
2. **Update the manifest files** in `winget/` with the SHA256 hash
|
||||
|
||||
3. **Fork microsoft/winget-pkgs** to the CCExtractor organization:
|
||||
- Go to https://github.com/microsoft/winget-pkgs
|
||||
- Fork to https://github.com/CCExtractor/winget-pkgs
|
||||
|
||||
4. **Submit initial manifest** via PR:
|
||||
- Clone your fork
|
||||
- Create directory: `manifests/c/CCExtractor/CCExtractor/0.96.1/`
|
||||
- Copy the three YAML files from `winget/`
|
||||
- Submit PR to microsoft/winget-pkgs
|
||||
|
||||
5. **Create GitHub token** for automation:
|
||||
- Go to GitHub Settings > Developer settings > Personal access tokens > Tokens (classic)
|
||||
- Create token with `public_repo` scope
|
||||
- Add as secret `WINGET_TOKEN` in CCExtractor/ccextractor repository
|
||||
|
||||
### Automated Updates
|
||||
|
||||
After the initial submission is merged, the `publish_winget.yml` workflow will automatically submit PRs for new releases.
|
||||
|
||||
## Chocolatey
|
||||
|
||||
### Initial Setup (One-time)
|
||||
|
||||
1. **Create Chocolatey account**:
|
||||
- Register at https://community.chocolatey.org/account/Register
|
||||
|
||||
2. **Get API key**:
|
||||
- Go to https://community.chocolatey.org/account
|
||||
- Copy your API key
|
||||
|
||||
3. **Add secret**:
|
||||
- Add `CHOCOLATEY_API_KEY` secret to CCExtractor/ccextractor repository
|
||||
|
||||
### Package Structure
|
||||
|
||||
```
|
||||
chocolatey/
|
||||
├── ccextractor.nuspec # Package metadata
|
||||
└── tools/
|
||||
├── chocolateyInstall.ps1 # Installation script
|
||||
└── chocolateyUninstall.ps1 # Uninstallation script
|
||||
```
|
||||
|
||||
### Manual Testing
|
||||
|
||||
```powershell
|
||||
cd packaging/chocolatey
|
||||
|
||||
# Update version and checksum in files first, then:
|
||||
choco pack ccextractor.nuspec
|
||||
|
||||
# Test locally
|
||||
choco install ccextractor --source="'.'" --yes --force
|
||||
|
||||
# Verify
|
||||
ccextractor --version
|
||||
```
|
||||
|
||||
### Automated Updates
|
||||
|
||||
The `publish_chocolatey.yml` workflow automatically:
|
||||
1. Downloads the MSI from the release
|
||||
2. Calculates the SHA256 checksum
|
||||
3. Updates the nuspec and install script
|
||||
4. Builds and tests the package
|
||||
5. Pushes to Chocolatey
|
||||
|
||||
Note: Chocolatey packages go through moderation before being publicly available.
|
||||
|
||||
## Workflow Triggers
|
||||
|
||||
Both workflows trigger on:
|
||||
- **Release published**: Automatic publishing when a new release is created
|
||||
- **Manual dispatch**: Can be triggered manually with a specific tag
|
||||
|
||||
## Secrets Required
|
||||
|
||||
| Secret | Purpose |
|
||||
|--------|---------|
|
||||
| `WINGET_TOKEN` | GitHub PAT with `public_repo` scope for winget PRs |
|
||||
| `CHOCOLATEY_API_KEY` | Chocolatey API key for package uploads |
|
||||
43
packaging/chocolatey/ccextractor.nuspec
Normal file
43
packaging/chocolatey/ccextractor.nuspec
Normal file
@@ -0,0 +1,43 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<package xmlns="http://schemas.microsoft.com/packaging/2015/06/nuspec.xsd">
|
||||
<metadata>
|
||||
<id>ccextractor</id>
|
||||
<version>0.96.5</version>
|
||||
<title>CCExtractor</title>
|
||||
<authors>CCExtractor Development Team</authors>
|
||||
<owners>CCExtractor</owners>
|
||||
<licenseUrl>https://github.com/CCExtractor/ccextractor/blob/master/LICENSE.txt</licenseUrl>
|
||||
<projectUrl>https://ccextractor.org</projectUrl>
|
||||
<iconUrl>https://raw.githubusercontent.com/CCExtractor/ccextractor/master/windows/CCX.ico</iconUrl>
|
||||
<requireLicenseAcceptance>false</requireLicenseAcceptance>
|
||||
<description>CCExtractor is a tool that analyzes video files and produces independent subtitle files from the closed captions data.
|
||||
|
||||
### Features
|
||||
- Extracts closed captions from various video formats (MPEG, H.264, MKV, MP4, etc.)
|
||||
- Supports multiple input sources including DVDs, DVRs, and live TV captures
|
||||
- Outputs to multiple formats (SRT, WebVTT, SAMI, transcript, etc.)
|
||||
- OCR support for bitmap-based subtitles (DVB, teletext)
|
||||
- Includes a graphical user interface
|
||||
|
||||
### Usage
|
||||
After installation, run `ccextractor` from the command line or use the GUI.
|
||||
|
||||
```
|
||||
ccextractor video.ts -o output.srt
|
||||
```
|
||||
|
||||
For more options: `ccextractor --help`
|
||||
</description>
|
||||
<summary>Extract closed captions and subtitles from video files</summary>
|
||||
<releaseNotes>https://github.com/CCExtractor/ccextractor/releases</releaseNotes>
|
||||
<copyright>Copyright (c) CCExtractor Development</copyright>
|
||||
<tags>subtitles closed-captions video extraction accessibility srt dvb teletext ocr media cli</tags>
|
||||
<projectSourceUrl>https://github.com/CCExtractor/ccextractor</projectSourceUrl>
|
||||
<packageSourceUrl>https://github.com/CCExtractor/ccextractor/tree/master/packaging/chocolatey</packageSourceUrl>
|
||||
<docsUrl>https://github.com/CCExtractor/ccextractor/wiki</docsUrl>
|
||||
<bugTrackerUrl>https://github.com/CCExtractor/ccextractor/issues</bugTrackerUrl>
|
||||
</metadata>
|
||||
<files>
|
||||
<file src="tools\**" target="tools" />
|
||||
</files>
|
||||
</package>
|
||||
24
packaging/chocolatey/tools/chocolateyInstall.ps1
Normal file
24
packaging/chocolatey/tools/chocolateyInstall.ps1
Normal file
@@ -0,0 +1,24 @@
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
||||
$packageName = 'ccextractor'
|
||||
$toolsDir = "$(Split-Path -parent $MyInvocation.MyCommand.Definition)"
|
||||
|
||||
# Package parameters
|
||||
$packageArgs = @{
|
||||
packageName = $packageName
|
||||
fileType = 'MSI'
|
||||
url64bit = 'https://github.com/CCExtractor/ccextractor/releases/download/v0.96.5/CCExtractor.0.96.5.msi'
|
||||
checksum64 = 'FFCAB0D766180AFC2832277397CDEC885D15270DECE33A9A51947B790F1F095B'
|
||||
checksumType64 = 'sha256'
|
||||
silentArgs = '/quiet /norestart'
|
||||
validExitCodes = @(0, 3010, 1641)
|
||||
}
|
||||
|
||||
Install-ChocolateyPackage @packageArgs
|
||||
|
||||
# Add to PATH if not already there
|
||||
$installPath = Join-Path $env:ProgramFiles 'CCExtractor'
|
||||
if (Test-Path $installPath) {
|
||||
Install-ChocolateyPath -PathToInstall $installPath -PathType 'Machine'
|
||||
Write-Host "CCExtractor installed to: $installPath"
|
||||
}
|
||||
23
packaging/chocolatey/tools/chocolateyUninstall.ps1
Normal file
23
packaging/chocolatey/tools/chocolateyUninstall.ps1
Normal file
@@ -0,0 +1,23 @@
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
||||
$packageName = 'ccextractor'
|
||||
|
||||
# Get the uninstall registry key
|
||||
$regKey = Get-UninstallRegistryKey -SoftwareName 'CCExtractor*'
|
||||
|
||||
if ($regKey) {
|
||||
$silentArgs = '/quiet /norestart'
|
||||
$file = $regKey.UninstallString -replace 'msiexec.exe','msiexec.exe ' -replace '/I','/X'
|
||||
|
||||
$packageArgs = @{
|
||||
packageName = $packageName
|
||||
fileType = 'MSI'
|
||||
silentArgs = "$($regKey.PSChildName) $silentArgs"
|
||||
file = ''
|
||||
validExitCodes = @(0, 3010, 1605, 1614, 1641)
|
||||
}
|
||||
|
||||
Uninstall-ChocolateyPackage @packageArgs
|
||||
} else {
|
||||
Write-Warning "CCExtractor was not found in the registry. It may have been uninstalled already."
|
||||
}
|
||||
21
packaging/winget/CCExtractor.CCExtractor.installer.yaml
Normal file
21
packaging/winget/CCExtractor.CCExtractor.installer.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
# yaml-language-server: $schema=https://aka.ms/winget-manifest.installer.1.9.0.schema.json
|
||||
PackageIdentifier: CCExtractor.CCExtractor
|
||||
PackageVersion: 0.96.5
|
||||
Platform:
|
||||
- Windows.Desktop
|
||||
MinimumOSVersion: 10.0.0.0
|
||||
InstallModes:
|
||||
- interactive
|
||||
- silent
|
||||
- silentWithProgress
|
||||
InstallerSwitches:
|
||||
Silent: /quiet
|
||||
SilentWithProgress: /passive
|
||||
UpgradeBehavior: install
|
||||
Installers:
|
||||
- Architecture: x64
|
||||
InstallerType: msi
|
||||
InstallerUrl: https://github.com/CCExtractor/ccextractor/releases/download/v0.96.5/CCExtractor.0.96.5.msi
|
||||
InstallerSha256: FFCAB0D766180AFC2832277397CDEC885D15270DECE33A9A51947B790F1F095B
|
||||
ManifestType: installer
|
||||
ManifestVersion: 1.9.0
|
||||
39
packaging/winget/CCExtractor.CCExtractor.locale.en-US.yaml
Normal file
39
packaging/winget/CCExtractor.CCExtractor.locale.en-US.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
# yaml-language-server: $schema=https://aka.ms/winget-manifest.defaultLocale.1.9.0.schema.json
|
||||
PackageIdentifier: CCExtractor.CCExtractor
|
||||
PackageVersion: 0.96.5
|
||||
PackageLocale: en-US
|
||||
Publisher: CCExtractor Development
|
||||
PublisherUrl: https://ccextractor.org
|
||||
PublisherSupportUrl: https://github.com/CCExtractor/ccextractor/issues
|
||||
Author: CCExtractor Development Team
|
||||
PackageName: CCExtractor
|
||||
PackageUrl: https://ccextractor.org
|
||||
License: GPL-2.0
|
||||
LicenseUrl: https://github.com/CCExtractor/ccextractor/blob/master/LICENSE.txt
|
||||
Copyright: Copyright (c) CCExtractor Development
|
||||
ShortDescription: A tool to extract subtitles from video files
|
||||
Description: |-
|
||||
CCExtractor is a tool that analyzes video files and produces independent subtitle files from the closed captions data.
|
||||
|
||||
Key features:
|
||||
- Extracts closed captions from various video formats (MPEG, H.264, MKV, MP4, etc.)
|
||||
- Supports multiple input sources including DVDs, DVRs, and live TV captures
|
||||
- Outputs to multiple formats (SRT, WebVTT, SAMI, transcript, etc.)
|
||||
- OCR support for bitmap-based subtitles (DVB, teletext)
|
||||
- Cross-platform (Windows, Linux, macOS)
|
||||
- Includes a GUI for easy operation
|
||||
Moniker: ccextractor
|
||||
Tags:
|
||||
- subtitles
|
||||
- closed-captions
|
||||
- video
|
||||
- extraction
|
||||
- accessibility
|
||||
- srt
|
||||
- dvb
|
||||
- teletext
|
||||
- ocr
|
||||
- media
|
||||
ReleaseNotesUrl: https://github.com/CCExtractor/ccextractor/releases
|
||||
ManifestType: defaultLocale
|
||||
ManifestVersion: 1.9.0
|
||||
6
packaging/winget/CCExtractor.CCExtractor.yaml
Normal file
6
packaging/winget/CCExtractor.CCExtractor.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
# yaml-language-server: $schema=https://aka.ms/winget-manifest.version.1.9.0.schema.json
|
||||
PackageIdentifier: CCExtractor.CCExtractor
|
||||
PackageVersion: 0.96.5
|
||||
DefaultLocale: en-US
|
||||
ManifestType: version
|
||||
ManifestVersion: 1.9.0
|
||||
@@ -1,745 +0,0 @@
|
||||
# Batch Timing Verification Plan
|
||||
|
||||
## Objective
|
||||
|
||||
Compare caption timing between CCExtractor and FFmpeg for all media files in `/home/cfsmp3/media_samples/`, identify any timing discrepancies, and fix them if possible.
|
||||
|
||||
---
|
||||
|
||||
## EXECUTION COMPLETE
|
||||
|
||||
### Summary Statistics
|
||||
|
||||
| Category | Count |
|
||||
|----------|-------|
|
||||
| Total files processed | 165 |
|
||||
| Completed (captions found in both) | 55 |
|
||||
| Failed (no FFmpeg captions or error) | 110 |
|
||||
| Timing OK (within 50ms) | 12 |
|
||||
| Timing Issues (>50ms offset) | 43 |
|
||||
|
||||
### Timing Issue Breakdown
|
||||
|
||||
| Severity | Count | Description |
|
||||
|----------|-------|-------------|
|
||||
| Minor (50-500ms) | 5 | 1-15 frames late |
|
||||
| Moderate (500ms-5s) | 16 | Noticeable delay |
|
||||
| Severe (>5s) | 22 | Major discrepancy (likely matching issues) |
|
||||
|
||||
---
|
||||
|
||||
## Complete File List (Ordered by Fix Difficulty)
|
||||
|
||||
All files are located in `/home/cfsmp3/media_samples/completed/`
|
||||
|
||||
### Category 1: Timing OK (within 50ms) - NO FIX NEEDED
|
||||
|
||||
These files have timing that matches FFmpeg within acceptable tolerance.
|
||||
|
||||
| File | Max Offset | Matched Captions | Avg Offset |
|
||||
|------|------------|------------------|------------|
|
||||
| c37ea08ccbec663a4d58977eff8a9cf176e38912e67a9222abdfae882516ea63.ts | 0ms | 0 | 0.0ms |
|
||||
| d0291cdcf69f765bdb990a726a9014a08808a40e32b27002325859c1d24029e4.ts | 0ms | 0 | 0.0ms |
|
||||
| c4dd893cb9d67be50f88bdbd2368111e16b9d1887741d66932ff2732969d9478.ts | 0ms | 0 | 0.0ms |
|
||||
| bd6f33a6697e1bef8a5d74501ae943bb4ddd2ec285054ed2c63f177ee0142a47.wtv | 0ms | 0 | 0.0ms |
|
||||
| a65d39ccb3ee798824434b8ebf2d7598ec30cf17ccae3bd88e73dd0c482ae44e.ts | 0ms | 0 | 0.0ms |
|
||||
| f41d4c29a153f81a2be067d9d8b005dad616a1fae92f8f4f40c84c992daadd09.ts | 0ms | 0 | 0.0ms |
|
||||
| 56c9f345482c635f20340d13001f1083a7c1913c787075d6055c112fe8e2fcaa.mpg | 0ms | 0 | 0.0ms |
|
||||
| f23a544ba8a081498f5dee2b6858f59be30245e62098da4876be4510e44e88de.wtv | 0ms | 0 | 0.0ms |
|
||||
| 5df914ce773d212423591cce19c9c48d41c77e9c043421e8e21fcea8ecb0e2df.mp4 | 1ms | 8 | 0.6ms |
|
||||
| 80848c45f86a747b8e6d95acd878309fed3fd61892d2e53191c33528be94e45c.mpg | 1ms | 2 | 0.5ms |
|
||||
| da904de35dbe6e08cdc450db259d84c7ce338598c2f2989485f9090ddae66e83.mpg | 1ms | 2 | 0.5ms |
|
||||
| e9b9008fdf37afa7d0487452b1b2b4a69e160a8b3255b5e02ed22bea8edc3eeb.mpg | 1ms | 24 | 0.5ms |
|
||||
|
||||
**Notes**: Files with 0 matched captions had captions that couldn't be matched by text (different formatting) but no timing issues were detected.
|
||||
|
||||
---
|
||||
|
||||
### Category 2: Minor Issues (50-500ms) - EASIEST TO FIX
|
||||
|
||||
These show consistent small offsets that suggest systematic timing bugs.
|
||||
|
||||
| File | Max Offset | Matched | Avg Offset | Issues | Notes |
|
||||
|------|------------|---------|------------|--------|-------|
|
||||
| addf5e2fc9c2f8f3827d1b9f143848cab82e619895c3c402cc1c0263a5b289db.ts | 68ms | 3 | 67.7ms | 3 | ~2 frames late |
|
||||
| 8e8229b88bc6b3cecabe6d90e6243922fc8a0e947062a7abedec54055e21c2bf.mpg | 101ms | 14 | 11.4ms | 1 | Only 1st caption affected |
|
||||
| 7f41299cc70a9fe48ea396791b35a94c1a759baf77cbb7a8d49fb399ceb436ad.ts | 134ms | 117 | 111.5ms | 98 | ~4 frames late, consistent |
|
||||
| c032183ef018ec67c22f9cb54964b803a8bd6a0fa42cb11cb6a8793198547b6a.ts | 284ms | 10 | 284.0ms | 10 | ~8.5 frames late, very consistent |
|
||||
| add511677cc42400d053afeeb31fee183cb5fc99b122cd40d5e40f256ea6d538.vob | 366ms | 7 | 242.7ms | 7 | ~11 frames late, DVD VOB |
|
||||
|
||||
**Priority for investigation**: These files have consistent offsets suggesting a fixable timing bug in CCExtractor.
|
||||
|
||||
---
|
||||
|
||||
### Category 3: Moderate Issues (500ms-5s) - MEDIUM DIFFICULTY
|
||||
|
||||
| File | Max Offset | Matched | Avg Offset | Issues | Notes |
|
||||
|------|------------|---------|------------|--------|-------|
|
||||
| 5ae2007a798576767b1098da580e5af650a0f21607ad7ad568b02a1ee2c30aa9.vob | 501ms | 8 | 400.1ms | 8 | DVD VOB |
|
||||
| 725a49f871dc5a2ebe9094cf9f838095aae86126e9629f96ca6f31eb0f4ba968.mpg | 535ms | 8 | -62.5ms | 1 | Single outlier, avg is good |
|
||||
| ab9cf8cfad69d039a7c97fbfb0a7a5eadf980518779b056a1e0e1d520d1b504b.mpg | 567ms | 7 | 385.7ms | 7 | |
|
||||
| 97cc394d877bb28a06921555f65238602799a4ca7e951c065b54b5b94241fe2f.wtv | 752ms | 4 | 751.5ms | 4 | Windows TV recording |
|
||||
| c83f765c661595e1bfa4750756a54c006c6f2c697a436bc0726986f71f0706cd.ts | 1302ms | 3 | -389.3ms | 3 | Negative avg = CCX early? |
|
||||
| dc7169d7c4e5098dbbe7abf323bdb866615f35495059cc9509e895e020890eb5.h264 | 1303ms | 3 | -392.0ms | 3 | Raw H.264 stream |
|
||||
| 6395b281adf0932dfe6e96514f212ce54d72e405681e05f3a3b677068d501800.asf | 1368ms | 3 | -455.3ms | 1 | ASF container |
|
||||
| 0069dffd21806a08d21a0f2ef8209c00c84a5a7e5cd5468ad326898f7431eb8e.mpg | 1836ms | 20 | 96.4ms | 17 | MPEG, most captions OK |
|
||||
| b2771c84c2a3e7914a8aa7acbb053c2f74eea5e5fba558b3274ca909757d556e.mp4 | 2102ms | 19 | 15.6ms | 19 | MP4, low avg |
|
||||
| 5cbb21adb6e0a67de88aa864bf096214126e3ded46e5a2a9cfcbe5b0a1028969.dvr-ms | 2270ms | 97 | 550.8ms | 97 | DVR-MS recording |
|
||||
| 70000200c0b9421b983a8cba0f0ccd90ca600a86d39692144eeeeb270d2f8446.mpg | 3304ms | 1458 | 56.9ms | 1458 | Large file, small avg |
|
||||
| acf871cbfd8498c9f7e0aa36fa44ff6061618960b881c1bb8e0583209fbfc180.dvr-ms | 3504ms | 177 | 510.8ms | 177 | DVR-MS recording |
|
||||
| 132d7df7e993d5d4f033860074a7dc4ddf2fb3432f39ae0b2e825d75670cb7df.mov | 3537ms | 885 | 28.1ms | 471 | MOV, most captions OK |
|
||||
| 01509e4d27bddb1e47b52f5b42e7c88dfea9c5b64fd056d49e9a49bc5f4a2699.ts | 3704ms | 947 | 162.4ms | 947 | TS, consistent offset |
|
||||
| 31a4ca6ac179acb335bd6d77e7099b9caed31ccec1d70efd9e529b4d68e0ba64.mp4 | 4571ms | 136 | 973.8ms | 134 | MP4 |
|
||||
| c41f73056aed397a51c0c0c7bb971e27341194e2591f58fc4a74d2cd8afec55d.mpg | 4738ms | 520 | 1440.9ms | 520 | MPEG, ~1.4s consistent |
|
||||
|
||||
**Notes**:
|
||||
- Files with negative average offset mean CCExtractor is sometimes *earlier* than FFmpeg
|
||||
- Files with low average but high max likely have a few badly matched captions
|
||||
|
||||
---
|
||||
|
||||
### Category 4: Severe Issues (>5s) - HARDEST TO FIX (Likely matching issues)
|
||||
|
||||
These large offsets are most likely caused by caption text matching failures in the comparison, not actual timing bugs.
|
||||
|
||||
| File | Max Offset | Matched | Avg Offset | Issues | Notes |
|
||||
|------|------------|---------|------------|--------|-------|
|
||||
| 7236304cfcfce141c7cec31647c1268a3886063390ce43c2f71188c70f5494c4.ts | 5406ms | 7 | -762.4ms | 1 | Single bad match |
|
||||
| d037c7509e0ac518b0945247d0f968517f94904eaa391366633da60c1cdcc85f.ts | 5406ms | 1030 | -14.8ms | 6 | Mostly OK, 6 bad matches |
|
||||
| 83b03036a2fa19a8a02e20bcbf2c3597fb00e8b367bcca6c4ed8383657fd209f.ts | 7041ms | 615 | -13.7ms | 4 | Mostly OK |
|
||||
| 1974a299f0502fc8199dabcaadb20e422e79df45972e554d58d1d025ef7d0686.mov | 8576ms | 914 | 36.1ms | 469 | Half have issues |
|
||||
| c8dc039a880df340d795a777fbb9be8a5fbee39bbb4851ac83a05234af78b9e9.ts | 13247ms | 1254 | -33.1ms | 22 | Mostly OK |
|
||||
| 53339f345506e34cd0fea7a5c7c88098ee18f75de578e3e1ee3b7f727241a66a.ts | 14548ms | 15 | 909.5ms | 15 | All captions affected |
|
||||
| b22260d065ab537899baaf34e78a5184671f4bcb2df0414d05e6345adfd7812f.ts | 47415ms | 598 | 7.2ms | 23 | Very low avg |
|
||||
| 5c70576bf37e33d9425e18605fc836041d92e03de8d83a92f98b08e0205bd317.mp4 | 63.5s | 37 | 2.8s | 36 | Wrong captions matched |
|
||||
| 1e44efd810c020884ea97b2792b5ba6b9d3a6e0198ee5284d3a5afaaf348c055.vob | 80.9s | 44 | 29.5s | 43 | DVD, different program? |
|
||||
| d7e7dbdf6807321c450774288664d195b9830314aa8b5ffe9ed934c10fd09e6a.wtv | 382.4s | 341 | 3.7s | 323 | WTV recording |
|
||||
| 7d3f25c32c1c91060ecfc97f7bfe5d45e764ca6fcc879855bcab7d2c7f47244b.ts | 438.1s | 474 | 0.9s | 2 | Only 2 bad matches |
|
||||
| 15feae913371b8cf7596f122f40806c34c1ef8354165593ac81a6a4c889f288a.ts | 1087.0s | 741 | 9.2s | 677 | ~18 minutes max |
|
||||
| ae6327683e6bb1491b98a318a8910ae4194515fd139ae0421cf4c90a5ec19ffd.wtv | 1127.4s | 333 | 21.4s | 333 | WTV, all affected |
|
||||
| 8849331ddae9c3169024d569ce17b9a4fdd917401cd6c6bfb8dc1fd59c6af21e.mp4 | 1156.5s | 295 | -0.8s | 295 | Low avg, matching issue |
|
||||
| 7d2730d38e71353446e205c84bb262abc993692a91275493df4dcc161e58f252.ts | 1434.8s | 382 | 18.6s | 17 | ~24 min max |
|
||||
| 7aad20907e88c8297a724a9e54961e45c9c856b82acbfde5ed9e2baf00920848.mpg | 1685.7s | 638 | -2.8s | 638 | ~28 min max |
|
||||
| c813e713a0b665e93149440bc3f877203052e20fa99414474b3e273b7c786c3d.ts | 1770.8s | 599 | 16.7s | 249 | ~30 min max |
|
||||
| 27fab4dbb603753d76e8b7701bbf0a5b1b9d41d2748759bb22f7fe6afc75597b.ts | 1857.2s | 659 | 20.8s | 26 | ~31 min max |
|
||||
| 99e5eaafdc55b90c163aef470e540341c5d6992f50098933a7f21f7595b4d72d.mov | 2210.3s | 1246 | 8.9s | 719 | Original test file |
|
||||
| e274a73653ce721a1876bfe1ed387dd7365515ba15fe28a570541c076561ba7c.ts | 2245.2s | 612 | 25.2s | 37 | ~37 min max |
|
||||
| 88cd42b89aa0795c40388e562bc05c128027c856ecbb61b039bdf72645ac018f.ts | 2509.1s | 531 | 39.0s | 150 | ~42 min max |
|
||||
| 5d3a29f9f87a131402e16376b3e45ecaef8b050bb2e4f71ef06e46dd85d79684.mpg | 3668.8s | 1143 | 1.6s | 1143 | ~61 min max, low avg |
|
||||
|
||||
**Analysis**:
|
||||
- Files with low average but high max offset have a few badly matched captions skewing results
|
||||
- Files with consistently high average likely have different programs being compared (e.g., FFmpeg matched program 1, CCExtractor matched program 2)
|
||||
- These need manual verification before treating as bugs
|
||||
|
||||
---
|
||||
|
||||
## Root Cause Analysis
|
||||
|
||||
### Investigated Case: c032183ef018ec67c22f9cb54964b803a8bd6a0fa42cb11cb6a8793198547b6a.ts
|
||||
|
||||
**FFmpeg**: First caption at `00:00:01,552`
|
||||
**CCExtractor**: First caption at `00:00:01,836`
|
||||
**Offset**: 284ms (consistently throughout file)
|
||||
|
||||
This offset persists even with the timing fix from PR #1808, suggesting a different root cause than the cb_field offset accumulation.
|
||||
|
||||
Possible causes:
|
||||
1. **B-frame reordering**: CCExtractor may use DTS instead of PTS somewhere
|
||||
2. **GOP timing offset**: Initial GOP time might add delay
|
||||
3. **Caption buffering**: Pop-on caption buffering might add delay
|
||||
4. **Different frame association**: Different interpretation of which frame contains the caption
|
||||
|
||||
### Comparison Methodology Notes
|
||||
|
||||
The comparison script matches captions by text content. For files with very different caption counts or text formats, matching can fail and produce false large offsets.
|
||||
|
||||
---
|
||||
|
||||
## Recommendations
|
||||
|
||||
### Short-term (Fix Real Timing Issues)
|
||||
|
||||
1. **Investigate consistent offset patterns**:
|
||||
- 68ms (2 frames) - addf5e2fc9...ts
|
||||
- 134ms (4 frames) - 7f41299cc70...ts
|
||||
- 284ms (8-9 frames) - c032183ef01...ts
|
||||
|
||||
These suggest systematic issues in timestamp calculation.
|
||||
|
||||
2. **Check B-frame handling**: Ensure PTS (not DTS) is used for caption timing.
|
||||
|
||||
3. **Review GOP time application**: The "Initial GOP time" in CCExtractor output may be affecting timestamps.
|
||||
|
||||
### Medium-term
|
||||
|
||||
1. **Improve comparison methodology**: Match captions by timestamp proximity as well as text.
|
||||
|
||||
2. **Add timing accuracy tests**: Include files with known correct timing in regression suite.
|
||||
|
||||
### Files for Further Investigation
|
||||
|
||||
Priority files for debugging (consistent small offset):
|
||||
1. `addf5e2fc9c2f8f3827d1b9f143848cab82e619895c3c402cc1c0263a5b289db.ts` (68ms) - Easiest
|
||||
2. `7f41299cc70a9fe48ea396791b35a94c1a759baf77cbb7a8d49fb399ceb436ad.ts` (134ms)
|
||||
3. `c032183ef018ec67c22f9cb54964b803a8bd6a0fa42cb11cb6a8793198547b6a.ts` (284ms)
|
||||
4. `add511677cc42400d053afeeb31fee183cb5fc99b122cd40d5e40f256ea6d538.vob` (366ms) - DVD format
|
||||
|
||||
---
|
||||
|
||||
## Files Moved
|
||||
|
||||
### To `completed/` (55 files)
|
||||
Files where both FFmpeg and CCExtractor produced captions, comparison completed.
|
||||
|
||||
### To `failed/` (110 files)
|
||||
Files where:
|
||||
- FFmpeg couldn't extract captions (no CEA-608 data in video stream)
|
||||
- Unsupported format (bin, txt, png, raw)
|
||||
- Processing error
|
||||
|
||||
---
|
||||
|
||||
## Idempotency
|
||||
|
||||
State file: `/home/cfsmp3/media_samples/.timing_check_state.json`
|
||||
|
||||
Contains full processing results for all 165 files. Can be used to:
|
||||
- Resume interrupted processing
|
||||
- Generate reports
|
||||
- Track which files have been analyzed
|
||||
|
||||
---
|
||||
|
||||
## Current Status
|
||||
|
||||
**Phase**: Regressions Fixed - Ready for Merge
|
||||
**Date**: 2025-12-13
|
||||
**Latest Update**: All regressions fixed. MP4 c608 track timing fixed (666ms -> 0ms). MPEG-PS 66ms offset is a known limitation due to FFmpeg using different reference points for MPEG-PS vs TS containers.
|
||||
|
||||
---
|
||||
|
||||
## FULL RETEST RESULTS (2025-12-13)
|
||||
|
||||
### Summary Comparison
|
||||
|
||||
| Metric | Before Fixes | After Fixes | Change |
|
||||
|--------|--------------|-------------|--------|
|
||||
| Timing OK (≤50ms) | 12 | 20 | **+8** |
|
||||
| Timing Issues (>50ms) | 43 | 34 | **-9** |
|
||||
| Failed | 1 | 1 | 0 |
|
||||
|
||||
### Files Successfully Fixed (11 files)
|
||||
|
||||
All original Category 2 (Minor) issues have been resolved:
|
||||
|
||||
| File | Before | After | Status |
|
||||
|------|--------|-------|--------|
|
||||
| `addf5e2fc9c2f8...ts` | 68ms | 1ms | ✅ FIXED |
|
||||
| `8e8229b88bc6b3...mpg` | 101ms | 18ms | ✅ FIXED |
|
||||
| `7f41299cc70a9f...ts` | 134ms | 1ms | ✅ FIXED |
|
||||
| `c032183ef018ec...ts` | 284ms | 1ms | ✅ FIXED |
|
||||
| `add511677cc424...vob` | 366ms | 34ms | ✅ FIXED |
|
||||
| `5ae2007a798576...vob` | 501ms | 34ms | ✅ FIXED |
|
||||
| `ab9cf8cfad69d0...mpg` | 567ms | 34ms | ✅ FIXED |
|
||||
| `c83f765c661595...ts` | 1302ms | 1ms | ✅ FIXED |
|
||||
| `6395b281adf093...asf` | 1368ms | 1ms | ✅ FIXED |
|
||||
| `01509e4d27bddb...ts` | 3704ms | 1ms | ✅ FIXED |
|
||||
| `7236304cfcfce1...ts` | 5406ms | 17ms | ✅ FIXED |
|
||||
|
||||
### ✅ REGRESSIONS FIXED (2025-12-13)
|
||||
|
||||
All regressions from Fix 1-6 have been addressed:
|
||||
|
||||
| File | Before | After Fix 7 | Status |
|
||||
|------|--------|-------------|--------|
|
||||
| `5df914ce773d21...mp4` | 666ms | **0ms** | ✅ **FIXED** by Fix 7 |
|
||||
| `80848c45f86a74...mpg` | 66ms | 66ms | ⚠️ **KNOWN LIMITATION** - FFmpeg uses different timing reference for MPEG-PS |
|
||||
| `da904de35dbe6e...mpg` | 66ms | 66ms | ⚠️ **KNOWN LIMITATION** - FFmpeg uses different timing reference for MPEG-PS |
|
||||
|
||||
#### Resolution Details
|
||||
|
||||
**`5df914ce...mp4` (666ms → 0ms) ✅ FIXED**:
|
||||
- Root cause: MP4 c608/c708 caption tracks have no video frames, so frame type stayed Unknown
|
||||
- `min_pts` was never set because we wait for I-frame to set it
|
||||
- Fix: Set frame type to I-frame for caption tracks in mp4.c
|
||||
- Also fixed: premature `pts_set = MinPtsSet` assignment in timing.rs
|
||||
|
||||
**`80848c45...mpg` and `da904de3...mpg` (66ms) - KNOWN LIMITATION**:
|
||||
- This is NOT a bug in CCExtractor - it's a difference in timing reference between containers
|
||||
- For MPEG-PS (Program Stream): FFmpeg uses the lowest PTS (from B-frames) as reference
|
||||
- For MPEG-TS (Transport Stream): FFmpeg uses the I-frame PTS as reference
|
||||
- CCExtractor now consistently uses I-frame PTS as reference for all containers
|
||||
- The 66ms (2 frames) offset is the difference between I-frame and first B-frame PTS
|
||||
- **Decision**: Accept this as a known limitation. CCExtractor's behavior is more consistent
|
||||
and matches FFmpeg's TS behavior. The 66ms offset for MPEG-PS is acceptable.
|
||||
|
||||
### Remaining Issues (34 files with timing issues)
|
||||
|
||||
Most remaining issues fall into categories that are NOT timing bugs:
|
||||
|
||||
1. **Very large offsets (>100 seconds)** - Caption text matching failures in comparison script
|
||||
2. **WTV files (751ms)** - Known MSTV vs video-embedded CEA-608 timing difference (pre-existing)
|
||||
3. **Multi-program streams** - FFmpeg and CCExtractor extracting different caption programs
|
||||
|
||||
---
|
||||
|
||||
## TODO: Next Steps
|
||||
|
||||
### ✅ COMPLETED: Fix Regressions Before Merge
|
||||
|
||||
1. **[✅] Fix "First Caption = 0ms" regression** - FIXED by Fix 7
|
||||
- Fixed by ensuring `pts_set = MinPtsSet` is only set AFTER `min_pts` is actually set
|
||||
- `fts_now` calculation now properly gated by `pts_set == MinPtsSet`
|
||||
|
||||
2. **[✅] Fix `5df914ce...mp4` regression** - FIXED by Fix 7
|
||||
- MP4 c608/c708 caption tracks now set frame type to I-frame
|
||||
- This triggers proper min_pts initialization from first sample
|
||||
- Result: 666ms → 0ms offset
|
||||
|
||||
3. **[✅] 66ms early regression** - DOCUMENTED AS KNOWN LIMITATION
|
||||
- Files: `80848c45...mpg`, `da904de3...mpg`
|
||||
- This is a container-specific timing reference difference, not a bug
|
||||
- FFmpeg uses B-frame PTS for MPEG-PS, I-frame PTS for MPEG-TS
|
||||
- CCExtractor now consistently uses I-frame PTS for all containers
|
||||
- 66ms offset is acceptable (within 2 frames)
|
||||
|
||||
### PRIORITY 1: Validation (RECOMMENDED)
|
||||
|
||||
4. **[ ] Run CCExtractor regression test suite**
|
||||
- Ensure no existing tests are broken by timing changes
|
||||
|
||||
5. **[✅] Re-run batch timing verification** - DONE
|
||||
- All regressions verified fixed or documented as known limitations
|
||||
|
||||
### PRIORITY 2: Optional Improvements (LOW)
|
||||
|
||||
6. **[ ] WTV timing offset**
|
||||
- 751ms offset caused by MSTV vs video-embedded caption sources
|
||||
- Pre-existing architectural difference, not caused by recent fixes
|
||||
|
||||
7. **[ ] Consider adding timing accuracy tests**
|
||||
- Add test files with known-correct timing to regression suite
|
||||
|
||||
---
|
||||
|
||||
## Category 4 Investigation Results (2025-12-13)
|
||||
|
||||
Investigation of 3 Category 4 files revealed:
|
||||
|
||||
| File | Original Issue | Actual Status |
|
||||
|------|---------------|---------------|
|
||||
| `d037c7509e...ts` | 5406ms max | First caption = 0ms bug, rest OK (1ms accuracy) |
|
||||
| `53339f345...ts` | 909ms avg | NOT A BUG - comparison methodology issue |
|
||||
| `83b03036a...ts` | 7041ms max | First caption = 0ms bug, rest OK (0ms accuracy) |
|
||||
|
||||
**Key Finding**: Most Category 4 "severe issues" are NOT actual timing bugs:
|
||||
- Large offsets come from comparison script text matching failures
|
||||
- FFmpeg shows intermediate roll-up states, CCExtractor shows complete states
|
||||
- When comparing equivalent caption states, timing matches within 1-2ms
|
||||
|
||||
**New Regression Identified**: "First Caption = 0ms" affects pop-on captions that
|
||||
arrive before the first I-frame establishes timing. This is a side effect of
|
||||
Fix 2/3 which defer `min_pts` setting until I-frame is seen.
|
||||
|
||||
---
|
||||
|
||||
## FIXES IMPLEMENTED
|
||||
|
||||
### Fix 1: B-Frame PTS Timing Issue (MAJOR FIX)
|
||||
|
||||
**Root Cause Identified**: CCExtractor was updating `min_pts` whenever it encountered a frame with a lower PTS. This caused B-frames (which have earlier PTS than I-frames due to temporal reordering) to shift the timing baseline earlier, resulting in consistent timing offsets.
|
||||
|
||||
**Files Changed**:
|
||||
1. `src/rust/lib_ccxr/src/time/timing.rs` - Modified `set_fts()` to only set `min_pts` from the FIRST valid frame (when `min_pts` is still at initial value `0x01FFFFFFFF`). This prevents B-frames from shifting the timing baseline.
|
||||
|
||||
2. `src/lib_ccx/ccx_decoders_common.c` - Added checks to NOT increment `cb_field1`, `cb_field2`, `cb_708` counters for container formats (`CCX_H264` and `CCX_PES`), since container formats associate all captions with the frame's PTS directly.
|
||||
|
||||
3. `src/lib_ccx/sequencing.c` - Extended `reset_cb` logic to include `CCX_PES` in addition to `CCX_H264` for resetting cb_field counters in `process_hdcc()`.
|
||||
|
||||
**Test Results for `addf5e2fc9...ts` (68ms offset file)**:
|
||||
- Before fix: CCExtractor 4,472ms vs FFmpeg 4,404ms = **68ms offset**
|
||||
- After fix: CCExtractor 4,405ms vs FFmpeg 4,404ms = **1ms offset** ✓
|
||||
|
||||
**Technical Explanation**:
|
||||
- MPEG-2 video uses B-frame reordering where B-frames have lower PTS but display after I/P-frames
|
||||
- CCExtractor was using the lowest PTS (from a B-frame) as the timing baseline
|
||||
- FFmpeg uses the first I-frame's PTS as the baseline
|
||||
- This caused a consistent offset equal to (I-frame PTS - first B-frame PTS)
|
||||
- For the test file: 144335 - 138329 = 6006 clock ticks = 66.7ms at 90kHz
|
||||
|
||||
### Fix 2: I-Frame Only min_pts (Additional Fix for c032183)
|
||||
|
||||
**Root Cause Identified**: For `c032183ef01...ts`, the 284ms offset persisted after Fix 1 because the issue was different:
|
||||
- The stream contains leading video PES packets from a truncated GOP (trailing B/P frames)
|
||||
- These packets have earlier PTS values than the first complete I-frame
|
||||
- CCExtractor was setting min_pts from the first PES packet (PTS=2508198438)
|
||||
- FFmpeg uses the first *decoded* frame PTS (PTS=2508223963)
|
||||
- Difference: 25525 clock ticks = 283.6ms ≈ 284ms
|
||||
|
||||
**Files Changed**:
|
||||
1. `src/rust/lib_ccxr/src/time/timing.rs` - Modified `set_fts()` to only set `min_pts` when `current_picture_coding_type == IFrame`. This ensures min_pts is set from the first decodable I-frame, not from leading garbage frames.
|
||||
|
||||
**Test Results**:
|
||||
- Before fix: CCExtractor 1,836ms vs FFmpeg 1,552ms = **284ms offset**
|
||||
- After fix: CCExtractor 1,552ms vs FFmpeg 1,552ms = **0ms offset** ✓
|
||||
|
||||
**Technical Explanation**:
|
||||
- MPEG-2 TS recordings often start mid-GOP due to hardware recording limitations
|
||||
- The first packets contain B/P frame data from the previous GOP that cannot be decoded
|
||||
- These packets have PTS values earlier than the first complete I-frame
|
||||
- FFmpeg's decoder naturally skips these and starts from the first I-frame
|
||||
- CCExtractor was using the earliest PTS (from undecodable frames) as the timing reference
|
||||
- The fix ensures CCExtractor uses the same reference point as FFmpeg
|
||||
|
||||
### Fix 3: Defer min_pts Until Frame Type is Known (MAJOR FIX)
|
||||
|
||||
**Root Cause Identified**: The previous fixes were being bypassed because `set_fts()` is called
|
||||
multiple times per frame - first from the PES/TS layer (with unknown frame type) and later from
|
||||
the ES parsing layer (with known frame type). The first call was setting `min_pts` before we
|
||||
knew whether it was an I-frame.
|
||||
|
||||
**Files Changed**:
|
||||
1. `src/rust/lib_ccxr/src/time/timing.rs` - Modified `set_fts()` logic:
|
||||
- When frame type is unknown, track PTS in `pending_min_pts` but DON'T set `min_pts`
|
||||
- Only set `min_pts` when frame type is known AND it's an I-frame
|
||||
- Added `unknown_frame_count` for fallback handling of H.264 streams
|
||||
|
||||
2. `src/lib_ccx/ccx_common_timing.h` - Added `unknown_frame_count` field to timing struct
|
||||
|
||||
3. `src/lib_ccx/ccx_common_timing.c` - Initialize `unknown_frame_count` to 0
|
||||
|
||||
4. Multiple Rust FFI files updated to handle the new field
|
||||
|
||||
### Final Test Results (Category 2 Files)
|
||||
|
||||
| File | Original | After Fix | Status |
|
||||
|------|----------|-----------|--------|
|
||||
| `8e8229b88bc6...mpg` | 101ms | **1ms** | ✓ FIXED |
|
||||
| `c032183ef018...ts` | 284ms | **0ms** | ✓ FIXED |
|
||||
| `7f41299cc70a9...ts` | 134ms | N/A | Different caption streams (not comparable) |
|
||||
| `addf5e2fc9c2f8...ts` | 68ms | N/A | Different caption streams (not comparable) |
|
||||
| `add511677cc42...vob` | 366ms | **34ms** | ✓ FIXED (within 1 frame) |
|
||||
|
||||
**Note on "different caption streams"**: For `7f41299cc70a9...ts` and `addf5e2fc9c2f8...ts`,
|
||||
FFmpeg and CCExtractor extract different caption content. FFmpeg shows "THE VIEW" program
|
||||
captions starting at 1s, while CCExtractor shows different program captions starting at 12.5s.
|
||||
This is a caption extraction difference (different programs/channels), not a timing bug.
|
||||
|
||||
### Fix 4: Pop-on to Roll-up Mode Transition Timing (725a49f8...mpg)
|
||||
|
||||
**Root Cause Identified**: When transitioning from pop-on to roll-up mode, CCExtractor was setting
|
||||
the caption start time when the first character was typed (1,501ms). FFmpeg uses the time when
|
||||
the display state changed to show multiple lines (~1,985ms). This caused the first roll-up caption
|
||||
after a mode switch to be timestamped too early.
|
||||
|
||||
**Files Changed**:
|
||||
1. `src/lib_ccx/ccx_decoders_608.h` - Added `rollup_from_popon` flag to track mode transitions
|
||||
|
||||
2. `src/lib_ccx/ccx_decoders_608.c` - Multiple changes:
|
||||
- Fixed duplicate init bug where `ts_start_of_current_line` was set to 0 instead of -1
|
||||
- Set `rollup_from_popon=1` and reset `ts_start_of_current_line=-1` on mode switch
|
||||
- Modified `write_char()` to NOT set `current_visible_start_ms` when `rollup_from_popon` is set
|
||||
- Modified CR handler to use `ts_start_of_current_line` when buffer scrolls and `rollup_from_popon` is set
|
||||
|
||||
**Test Results**:
|
||||
- Before fix: CCExtractor 1,501ms vs FFmpeg 1,985ms = **484ms early**
|
||||
- After fix: CCExtractor 2,118ms vs FFmpeg 1,985ms = **133ms late** (~4 frames)
|
||||
|
||||
**Technical Explanation**:
|
||||
- Pop-on mode shows captions as complete screens (e.g., "(door opens)")
|
||||
- Roll-up mode shows captions character-by-character in real-time
|
||||
- When switching modes, CCExtractor was using the time of the first character on the new roll-up
|
||||
- FFmpeg tracks display state changes and uses the time when the multi-line state was reached
|
||||
- The fix defers setting the start time until the first CR command causes scrolling, using the
|
||||
time when the current (second) line started being typed
|
||||
- ~~The remaining 133ms difference is acceptable variation between decoder implementations~~ (Fixed by Fix 5)
|
||||
|
||||
### Fix 5: Roll-up First CR Timing (c83f765c...ts and 725a49f8...mpg)
|
||||
|
||||
**Root Cause Identified**: When transitioning from pop-on to roll-up mode, Fix 4 handled the case
|
||||
where `changes=1` (a line scrolls off). However, when the first CR happens with only one line
|
||||
visible (`changes=0`), `ts_start_of_current_line` was reset to -1. This caused the next caption's
|
||||
start time to be set when characters were typed (~133ms later), not when the CR happened.
|
||||
|
||||
**Files Changed**:
|
||||
1. `src/lib_ccx/ccx_decoders_608.c` - Modified CR handler:
|
||||
- When `rollup_from_popon=1` and `changes=0` (first CR with only 1 line), set
|
||||
`ts_start_of_current_line` to the current CR time instead of -1
|
||||
- This preserves the CR time so subsequent characters don't overwrite it
|
||||
- The next CR (with `changes=1`) uses the saved CR time for the caption start
|
||||
|
||||
**Test Results**:
|
||||
```
|
||||
c83f765c...ts:
|
||||
Before Fix 5: CCExtractor 2,469ms vs FFmpeg 2,336ms = 133ms late
|
||||
After Fix 5: CCExtractor 2,335ms vs FFmpeg 2,336ms = 1ms difference ✓
|
||||
|
||||
725a49f8...mpg:
|
||||
After Fix 4: CCExtractor 2,118ms vs FFmpeg 1,985ms = 133ms late
|
||||
After Fix 5: CCExtractor 1,985ms vs FFmpeg 1,985ms = 0ms difference ✓
|
||||
```
|
||||
|
||||
**Technical Explanation**:
|
||||
- In roll-up mode, the first CR after switching from pop-on mode scrolls the first line up
|
||||
- At this point, only 1 line is visible, so `check_roll_up()` returns 0 (no line would disappear)
|
||||
- Previously, `ts_start_of_current_line` was reset to -1 after every CR
|
||||
- This caused the next line's start time to be set when characters were typed (~133ms later)
|
||||
- The fix preserves the CR time when `rollup_from_popon=1` and `changes=0`
|
||||
- This ensures the caption start time matches when the display state changed (CR command)
|
||||
|
||||
### Fix 6: Elementary Stream (ES) Frame-by-Frame Timing (dc7169...h264)
|
||||
|
||||
**Root Cause Identified**: For elementary streams (raw MPEG-2 video without a container), CCExtractor
|
||||
uses GOP timing (`use_gop_as_pts = 1`). However, `fts_now` was only updated when a GOP header was
|
||||
parsed, not for each frame. This meant all frames within a GOP had the same timestamp (the GOP start
|
||||
time), causing caption timestamps to be nearly 0ms instead of proper frame-based times.
|
||||
|
||||
**Files Changed**:
|
||||
1. `src/rust/src/es/pic.rs` - Added frame-by-frame `fts_now` update when `use_gop_as_pts == 1`:
|
||||
- After incrementing `frames_since_last_gop`, calculate frame offset in ms
|
||||
- Set `fts_now = fts_at_gop_start + frame_offset_ms`
|
||||
- Update `fts_max` if needed
|
||||
|
||||
**Test Results for `dc7169d7...h264`**:
|
||||
```
|
||||
Before fix: CCExtractor 1ms, 9ms, 17ms, 25ms (completely broken)
|
||||
After fix: CCExtractor 2867ms, 4634ms, 6368ms (meaningful timestamps)
|
||||
FFmpeg: 2336ms, 4471ms, 5839ms
|
||||
|
||||
Offset: ~500ms (CCExtractor is late, acceptable for roll-up caption interpretation difference)
|
||||
```
|
||||
|
||||
**Technical Explanation**:
|
||||
- Elementary streams don't have PTS in the bitstream, so CCExtractor uses GOP timecodes
|
||||
- When `use_gop_as_pts == 1`, `set_fts()` is skipped in `read_pic_info()` to avoid PTS-based timing
|
||||
- However, this meant `fts_now` was never updated between GOPs
|
||||
- The fix calculates `fts_now` for each frame based on `fts_at_gop_start + (frames * 1000/fps)`
|
||||
- The remaining ~500ms offset is due to different interpretations of roll-up caption timing:
|
||||
- FFmpeg shows intermediate states (as lines appear)
|
||||
- CCExtractor shows completed states (when lines scroll off)
|
||||
|
||||
### Fix 7: MP4 c608 Track Timing and Garbage Frame Detection (MAJOR FIX)
|
||||
|
||||
**Root Cause Identified**: MP4 files with dedicated c608/c708 caption tracks (clcp subtype) had
|
||||
completely broken timing (666ms offset). The caption track has no video frames, so the frame type
|
||||
stayed Unknown forever. Since Fix 2/3 only set `min_pts` from I-frames, `min_pts` was never set,
|
||||
and `fts_now` used uninitialized values.
|
||||
|
||||
Additionally, the "First Caption = 0ms" regression was caused by `pts_set` being set to `MinPtsSet`
|
||||
before `min_pts` was actually updated - the old code set `pts_set` unconditionally, but the
|
||||
`min_pts` update was conditional on frame type.
|
||||
|
||||
**Files Changed**:
|
||||
1. `src/lib_ccx/mp4.c` - Set frame type to I-frame for c608/c708 caption tracks:
|
||||
```c
|
||||
if (type == GF_ISOM_MEDIA_CLOSED_CAPTION)
|
||||
dec_ctx->timing->current_picture_coding_type = CCX_FRAME_TYPE_I_FRAME;
|
||||
```
|
||||
|
||||
2. `src/rust/lib_ccxr/src/time/timing.rs` - Multiple fixes:
|
||||
- Only set `pts_set = MinPtsSet` AFTER actually setting `min_pts`
|
||||
- Gate `fts_now` calculation on `pts_set == MinPtsSet`
|
||||
- Add garbage frame detection threshold (100ms gap indicates garbage, not B-frame)
|
||||
- Track `pending_min_pts` for ALL frames (not just unknown types)
|
||||
|
||||
**Test Results**:
|
||||
```
|
||||
5df914ce...mp4:
|
||||
Before: 666ms offset (first caption at 1ms instead of 667ms)
|
||||
After: 0ms offset ✓
|
||||
|
||||
c032183e...ts:
|
||||
Before: 284ms (was fixed by earlier commits)
|
||||
After: 0ms (still fixed) ✓
|
||||
|
||||
addf5e2f...ts:
|
||||
Before: 68ms (was fixed by earlier commits)
|
||||
After: 1ms (still fixed) ✓
|
||||
|
||||
80848c45...mpg:
|
||||
Before: 66ms (regression)
|
||||
After: 66ms (KNOWN LIMITATION - FFmpeg uses B-frame PTS for MPEG-PS)
|
||||
|
||||
da904de3...mpg:
|
||||
Before: 66ms (regression)
|
||||
After: 66ms (KNOWN LIMITATION - FFmpeg uses B-frame PTS for MPEG-PS)
|
||||
```
|
||||
|
||||
**Technical Explanation**:
|
||||
- MP4 c608/c708 caption tracks are separate from video - they contain only caption samples
|
||||
- Without video frames to parse, frame type stays Unknown
|
||||
- By setting frame type to I-frame for caption tracks, we trigger proper `min_pts` initialization
|
||||
- The garbage frame detection (100ms threshold) distinguishes between:
|
||||
- B-frames with lower PTS (valid, use pending_min_pts)
|
||||
- Garbage frames from truncated GOPs (invalid, use I-frame PTS)
|
||||
- For MPEG-PS, the 66ms offset is because FFmpeg uses B-frame PTS as reference while
|
||||
CCExtractor now consistently uses I-frame PTS across all container formats
|
||||
|
||||
### Next Steps
|
||||
|
||||
1. ~~Test remaining Category 2 files with the fix~~ ✓ DONE
|
||||
2. ~~Commit the timing fix~~ ✓ DONE (commit a1a00941)
|
||||
3. ~~Investigate `725a49f871dc...mpg`~~ ✓ FIXED (0ms with Fix 5!)
|
||||
4. ~~Investigate `c83f765c...ts`~~ ✓ FIXED (1ms with Fix 5!)
|
||||
4. ~~**Investigate `97cc394d877b...wtv`**~~ ✓ ROOT CAUSE IDENTIFIED (see below)
|
||||
- 751ms offset caused by MSTV vs video-embedded CEA-608 timing difference
|
||||
- Pre-existing issue, not caused by recent fixes
|
||||
- LOW PRIORITY: Requires WTV-specific timing adjustment or `-wtvmpeg2` fix
|
||||
5. ~~Investigate `dc7169d7...h264`~~ ✓ FIXED (Fix 6 - ES frame timing)
|
||||
6. ~~Fix MP4 c608 track regression~~ ✓ FIXED (Fix 7)
|
||||
7. ~~Fix "First Caption = 0ms" regression~~ ✓ FIXED (Fix 7)
|
||||
8. ~~Document MPEG-PS 66ms offset as known limitation~~ ✓ DONE
|
||||
9. Run full regression tests before merging
|
||||
10. Consider investigating the caption channel selection difference for multi-program streams
|
||||
|
||||
---
|
||||
|
||||
## Category 3 Testing Results (After Fix)
|
||||
|
||||
### Files Fixed (within 1 frame ~33ms)
|
||||
|
||||
| File | Original Max | Original Avg | After Fix | Status |
|
||||
|------|--------------|--------------|-----------|--------|
|
||||
| `5ae2007a...vob` | 501ms | 400.1ms | **33ms** | ✓ FIXED |
|
||||
| `ab9cf8cf...mpg` | 567ms | 385.7ms | **33ms** | ✓ FIXED |
|
||||
|
||||
### Files Fully Fixed by Fix 5 (0ms difference!)
|
||||
|
||||
| File | Original Max | Original Avg | After Fix 4 | After Fix 5 | Status |
|
||||
|------|--------------|--------------|-------------|-------------|--------|
|
||||
| `c83f765c...ts` | 1302ms | -389.3ms | N/A (not tested) | **1ms** | ✓ FIXED |
|
||||
| `725a49f8...mpg` | 535ms | -62.5ms | 133ms | **0ms** | ✓ FIXED |
|
||||
|
||||
### Files Fixed by Fix 6 (Elementary Stream Timing)
|
||||
|
||||
| File | Original Max | Original Avg | After Fix | Status | Notes |
|
||||
|------|--------------|--------------|-----------|--------|-------|
|
||||
| `dc7169d7...h264` | 1303ms | -392.0ms | **~500ms** | ✓ FIXED | Raw MPEG-2 ES, acceptable roll-up timing diff |
|
||||
|
||||
### Files Already Fixed (verified after all fixes)
|
||||
|
||||
| File | Original Max | Original Avg | After Fix | Status | Notes |
|
||||
|------|--------------|--------------|-----------|--------|-------|
|
||||
| `6395b281...asf` | 1368ms | -455.3ms | **1ms** | ✓ FIXED | ASF container, perfect timing now |
|
||||
| `0069dffd...mpg` | 1836ms | 96.4ms | **N/A** | ⚠️ COMPARISON INVALID | FFmpeg mixes English+Spanish CC; CCExtractor correct |
|
||||
| `b2771c84...mp4` | 2102ms | 15.6ms | **N/A** | ⚠️ NO CAPTIONS | Both CCExtractor and FFmpeg find no captions |
|
||||
|
||||
### Files NOT Fixed (>1 frame offset)
|
||||
|
||||
| File | Original Max | Original Avg | After Fix | Status | Notes |
|
||||
|------|--------------|--------------|-----------|--------|-------|
|
||||
| `97cc394d...wtv` | 752ms | 751.5ms | **751ms** | ❌ NOT FIXED | WTV container, consistent 751ms offset |
|
||||
|
||||
### 725a49f8...mpg - FIXED ✓
|
||||
|
||||
**File**: `725a49f871dc5a2ebe9094cf9f838095aae86126e9629f96ca6f31eb0f4ba968.mpg`
|
||||
|
||||
**Original Symptoms**:
|
||||
- First caption was 484ms EARLY compared to FFmpeg
|
||||
- Subsequent captions matched perfectly (0-2ms offset)
|
||||
- FFmpeg extracts "(door opens)" caption at 00:00:00,501 that CCExtractor misses
|
||||
|
||||
**Root Cause**: Pop-on to roll-up mode transition timing. See Fix 4 above.
|
||||
|
||||
**After Fix**:
|
||||
```
|
||||
Caption "HI, HONEY / I'M HOME":
|
||||
CCExtractor: 00:00:02,118
|
||||
FFmpeg: 00:00:01,985
|
||||
Offset: +133ms (CCX late, ~4 frames - acceptable)
|
||||
|
||||
Caption "I'M HOME / DEAR JOHN":
|
||||
CCExtractor: 00:00:02,503
|
||||
FFmpeg: 00:00:02,503
|
||||
Offset: 0ms ✓
|
||||
|
||||
Subsequent captions: 0-2ms offset ✓
|
||||
```
|
||||
|
||||
**Note**: The "(door opens)" pop-on caption is still not extracted by CCExtractor because it
|
||||
uses a different caption mode. This is expected behavior - CCExtractor extracts roll-up captions
|
||||
while FFmpeg's cc_dec shows intermediate display states.
|
||||
|
||||
### WTV File Investigation - ROOT CAUSE IDENTIFIED
|
||||
|
||||
**File**: `97cc394d877bb28a06921555f65238602799a4ca7e951c065b54b5b94241fe2f.wtv`
|
||||
|
||||
**Symptoms**:
|
||||
- Consistent ~751ms offset (22-23 frames at 29.97fps)
|
||||
- Offset is uniform across all captions
|
||||
- WTV (Windows TV Recording) container format
|
||||
- Pre-existing issue (NOT caused by recent timing fixes)
|
||||
|
||||
**Comparison Data**:
|
||||
```
|
||||
Caption 1: CCX 00:00:02,970 vs FFmpeg 00:00:02,219 = +751ms
|
||||
Caption 2: CCX 00:00:05,473 vs FFmpeg 00:00:04,721 = +752ms
|
||||
Caption 3: CCX 00:00:07,308 vs FFmpeg 00:00:06,557 = +751ms
|
||||
Caption 4: CCX 00:00:08,843 vs FFmpeg 00:00:08,091 = +752ms
|
||||
Caption 5: CCX 00:00:10,344 vs FFmpeg 00:00:09,593 = +751ms
|
||||
```
|
||||
|
||||
**Root Cause Analysis**:
|
||||
|
||||
The 751ms offset is caused by **different caption data sources** used by CCExtractor vs FFmpeg:
|
||||
|
||||
1. **CCExtractor** uses the **MSTV caption stream** (`WTV_STREAM_MSTVCAPTION`)
|
||||
- Reads from a dedicated caption stream with its own timing packets
|
||||
- Uses WTV_TIMING GUIDs associated with stream 0xD (MSTV captions)
|
||||
- First timing value: 23732572 (100ns units) = 2373ms → min_pts
|
||||
|
||||
2. **FFmpeg** uses the **video-embedded CEA-608 data**
|
||||
- Uses lavfi movie filter with `subcc` output
|
||||
- Extracts CEA-608 from MPEG-2 video frame user data
|
||||
- Timestamps based on video frame PTS, potentially with different reference
|
||||
|
||||
**Timing Math**:
|
||||
```
|
||||
CCExtractor min_pts: 2373ms (from first WTV_TIMING packet)
|
||||
CCExtractor first caption: 2970ms
|
||||
FFmpeg first caption: 2219ms
|
||||
Offset: 751ms
|
||||
|
||||
If FFmpeg's implied min_pts = caption_pts - ffmpeg_output
|
||||
= 5343ms - 2219ms = 3124ms
|
||||
Difference: 3124ms - 2373ms = 751ms ✓
|
||||
```
|
||||
|
||||
**Conclusion**:
|
||||
The MSTV caption stream uses a different timestamp epoch than the video-embedded CEA-608.
|
||||
CCExtractor's min_pts (2373ms) is 751ms earlier than what FFmpeg uses as reference (3124ms).
|
||||
|
||||
**Recommendation**: LOW PRIORITY FIX
|
||||
- This is a pre-existing architectural difference, not a bug from recent changes
|
||||
- Fixing would require understanding the WTV container's timestamp relationship between
|
||||
MSTV caption stream and video stream
|
||||
- Consider adding a WTV-specific timing offset if investigation reveals a consistent
|
||||
relationship between these two timestamp sources
|
||||
- Alternatively, use `-wtvmpeg2` mode to read captions from video stream (matches FFmpeg)
|
||||
but this mode currently crashes and needs separate fixing
|
||||
|
||||
---
|
||||
|
||||
## Technical Notes
|
||||
|
||||
### Frame Rate Math
|
||||
```
|
||||
29.97fps: 1001/30000 seconds/frame = 33.367ms
|
||||
68ms ≈ 2 frames
|
||||
134ms ≈ 4 frames
|
||||
284ms ≈ 8.5 frames
|
||||
366ms ≈ 11 frames
|
||||
```
|
||||
|
||||
### CEA-608 Caption Flow
|
||||
1. Caption data embedded in H.264 SEI or MPEG user data
|
||||
2. Pop-on captions: data sent to buffer, EOC command makes visible
|
||||
3. Start time should be PTS of frame with EOC command
|
||||
4. FFmpeg's cc_dec uses frame PTS directly
|
||||
|
||||
### CCExtractor Timing Flow
|
||||
1. `set_fts()` called per video frame with frame PTS
|
||||
2. `get_fts()` returns `fts_now + fts_global + cb_field * 1001/30`
|
||||
3. `get_visible_start()` returns start time when caption becomes visible
|
||||
4. PR #1808 fixed cb_field offset for container formats
|
||||
19
snap/local/run-ccextractor.sh
Executable file
19
snap/local/run-ccextractor.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
# Default fallback
|
||||
LIB_TRIPLET="x86_64-linux-gnu"
|
||||
# Detect multiarch directory if present
|
||||
for d in "$SNAP/usr/lib/"*-linux-gnu; do
|
||||
if [ -d "$d" ]; then
|
||||
LIB_TRIPLET=$(basename "$d")
|
||||
break
|
||||
fi
|
||||
done
|
||||
export LD_LIBRARY_PATH="$SNAP/usr/lib:\
|
||||
$SNAP/usr/lib/$LIB_TRIPLET:\
|
||||
$SNAP/usr/lib/$LIB_TRIPLET/blas:\
|
||||
$SNAP/usr/lib/$LIB_TRIPLET/lapack:\
|
||||
$SNAP/usr/lib/$LIB_TRIPLET/pulseaudio:\
|
||||
${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
|
||||
shift
|
||||
exec "$SNAP/usr/local/bin/ccextractor" "$@"
|
||||
104
snap/snapcraft.yaml
Normal file
104
snap/snapcraft.yaml
Normal file
@@ -0,0 +1,104 @@
|
||||
name: ccextractor
|
||||
base: core22
|
||||
version: '0.96.5'
|
||||
summary: Closed Caption Extractor
|
||||
description: |
|
||||
CCExtractor is a tool for extracting closed captions from video files.
|
||||
website: https://www.ccextractor.org
|
||||
source-code: https://github.com/CCExtractor/ccextractor
|
||||
confinement: classic
|
||||
|
||||
apps:
|
||||
ccextractor:
|
||||
command: usr/local/bin/ccextractor
|
||||
command-chain:
|
||||
- local/run-ccextractor.sh
|
||||
plugs:
|
||||
- home
|
||||
|
||||
parts:
|
||||
gpac:
|
||||
plugin: make
|
||||
source: https://github.com/gpac/gpac.git
|
||||
source-tag: abi-16.4
|
||||
build-packages:
|
||||
- build-essential
|
||||
- pkg-config
|
||||
- zlib1g-dev
|
||||
- libssl-dev
|
||||
- libfreetype6-dev
|
||||
- libjpeg-dev
|
||||
- libpng-dev
|
||||
override-build: |
|
||||
set -eux
|
||||
./configure --prefix=/usr
|
||||
make -j$(nproc)
|
||||
make DESTDIR=$SNAPCRAFT_PART_INSTALL install-lib
|
||||
sed -i "s|^prefix=.*|prefix=$SNAPCRAFT_STAGE/usr|" $SNAPCRAFT_PART_INSTALL/usr/lib/pkgconfig/gpac.pc
|
||||
stage:
|
||||
- usr/lib/libgpac*
|
||||
- usr/lib/pkgconfig/gpac.pc
|
||||
- usr/include/gpac
|
||||
|
||||
ccextractor:
|
||||
after: [gpac]
|
||||
plugin: cmake
|
||||
source: .
|
||||
source-subdir: src
|
||||
build-environment:
|
||||
- PKG_CONFIG_PATH: "$SNAPCRAFT_STAGE/usr/lib/pkgconfig:$PKG_CONFIG_PATH"
|
||||
build-snaps:
|
||||
- cmake/latest/stable
|
||||
- rustup/latest/stable
|
||||
build-packages:
|
||||
- build-essential
|
||||
- pkg-config
|
||||
- clang
|
||||
- llvm-dev
|
||||
- libclang-dev
|
||||
- libzvbi-dev
|
||||
- libtesseract-dev
|
||||
- libavcodec-dev
|
||||
- libavformat-dev
|
||||
- libavdevice-dev
|
||||
- libavfilter-dev
|
||||
- libswscale-dev
|
||||
- libx11-dev
|
||||
- libxcb1-dev
|
||||
- libxcb-shm0-dev
|
||||
- libpng-dev
|
||||
- zlib1g-dev
|
||||
- libblas3
|
||||
- liblapack3
|
||||
stage-packages:
|
||||
- libzvbi0
|
||||
- libfreetype6
|
||||
- libpng16-16
|
||||
- libprotobuf-c1
|
||||
- libutf8proc2
|
||||
- libgl1
|
||||
- libglu1-mesa
|
||||
- libavcodec58
|
||||
- libavformat58
|
||||
- libavutil56
|
||||
- libavdevice58
|
||||
- libavfilter7
|
||||
- libswscale5
|
||||
- libjpeg-turbo8
|
||||
- libvorbis0a
|
||||
- libtheora0
|
||||
- libxvidcore4
|
||||
- libfaad2
|
||||
- libmad0
|
||||
- liba52-0.7.4
|
||||
- libpulse0
|
||||
- pulseaudio-utils
|
||||
override-build: |
|
||||
set -eux
|
||||
rustup toolchain install stable
|
||||
rustup default stable
|
||||
export PATH="$HOME/.cargo/bin:$PATH"
|
||||
snapcraftctl build
|
||||
install -D -m 0755 \
|
||||
$SNAPCRAFT_PROJECT_DIR/snap/local/run-ccextractor.sh \
|
||||
$SNAPCRAFT_PART_INSTALL/local/run-ccextractor.sh
|
||||
@@ -9,7 +9,7 @@ option (WITH_HARDSUBX "Build with support for burned-in subtitles" OFF)
|
||||
|
||||
# Version number
|
||||
set (CCEXTRACTOR_VERSION_MAJOR 0)
|
||||
set (CCEXTRACTOR_VERSION_MINOR 89)
|
||||
set (CCEXTRACTOR_VERSION_MINOR 96)
|
||||
|
||||
# Get project directory
|
||||
get_filename_component(BASE_PROJ_DIR ../ ABSOLUTE)
|
||||
@@ -255,4 +255,13 @@ endif (PKG_CONFIG_FOUND)
|
||||
target_link_libraries (ccextractor ${EXTRA_LIBS})
|
||||
target_include_directories (ccextractor PUBLIC ${EXTRA_INCLUDES})
|
||||
|
||||
# ccx_rust (Rust) calls C functions from ccx (like decode_vbi).
|
||||
# Force the linker to pull these symbols from ccx before processing ccx_rust.
|
||||
if (NOT WIN32 AND NOT APPLE)
|
||||
target_link_options (ccextractor PRIVATE
|
||||
-Wl,--undefined=decode_vbi
|
||||
-Wl,--undefined=do_cb
|
||||
-Wl,--undefined=store_hdcc)
|
||||
endif()
|
||||
|
||||
install (TARGETS ccextractor DESTINATION bin)
|
||||
|
||||
@@ -186,6 +186,11 @@ int start_ccx()
|
||||
ccx_options.use_gop_as_pts = 0;
|
||||
if (ccx_options.ignore_pts_jumps)
|
||||
ccx_common_timing_settings.disable_sync_check = 1;
|
||||
// When using GOP timing (--goptime), disable sync check because
|
||||
// GOP time (wall-clock) and PES PTS (stream-relative) are in
|
||||
// different time bases and will always appear as huge jumps.
|
||||
if (ccx_options.use_gop_as_pts == 1)
|
||||
ccx_common_timing_settings.disable_sync_check = 1;
|
||||
mprint("\rAnalyzing data in general mode\n");
|
||||
tmp = general_loop(ctx);
|
||||
if (!ret)
|
||||
@@ -197,6 +202,12 @@ int start_ccx()
|
||||
if (!ret)
|
||||
ret = tmp;
|
||||
break;
|
||||
case CCX_SM_SCC:
|
||||
mprint("\rAnalyzing data in SCC (Scenarist Closed Caption) mode\n");
|
||||
tmp = raw_loop(ctx);
|
||||
if (!ret)
|
||||
ret = tmp;
|
||||
break;
|
||||
case CCX_SM_RCWT:
|
||||
mprint("\rAnalyzing data in CCExtractor's binary format\n");
|
||||
tmp = rcwt_loop(ctx);
|
||||
@@ -424,6 +435,9 @@ int main(int argc, char *argv[])
|
||||
|
||||
int compile_ret = ccxr_parse_parameters(argc, argv);
|
||||
|
||||
// Update the Rust logger target after parsing so --quiet is respected
|
||||
ccxr_update_logger_target();
|
||||
|
||||
if (compile_ret == EXIT_NO_INPUT_FILES)
|
||||
{
|
||||
print_usage();
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
cmake_policy (SET CMP0037 NEW)
|
||||
|
||||
if(MSVC)
|
||||
set (CMAKE_C_FLAGS "-W3 /wd4005 /wd4996")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -W3 /wd4005 /wd4996")
|
||||
else (MSVC)
|
||||
set (CMAKE_C_FLAGS "-Wall -Wno-pointer-sign -g -std=gnu99")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-pointer-sign -g -std=gnu99")
|
||||
endif(MSVC)
|
||||
|
||||
if(WIN32)
|
||||
|
||||
@@ -53,7 +53,7 @@ typedef struct
|
||||
{
|
||||
// Generic buffer to hold data
|
||||
unsigned char *parsebuf;
|
||||
long parsebufsize;
|
||||
int64_t parsebufsize;
|
||||
// Header Object variables
|
||||
int64_t HeaderObjectSize;
|
||||
int64_t FileSize;
|
||||
|
||||
@@ -379,11 +379,10 @@ void sei_rbsp(struct avc_ctx *ctx, unsigned char *seibuf, unsigned char *seiend)
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: This really really looks bad
|
||||
mprint("WARNING: Unexpected SEI unit length...trying to continue.");
|
||||
temp_debug = 1;
|
||||
mprint("\n Failed block (at sei_rbsp) was:\n");
|
||||
dump(CCX_DMT_GENERIC_NOTICES, (unsigned char *)seibuf, seiend - seibuf, 0, 0);
|
||||
// Unexpected SEI length - common with malformed streams, don't spam output
|
||||
dbg_print(CCX_DMT_VERBOSE, "WARNING: Unexpected SEI unit length (parsed to %p, expected %p)...trying to continue.\n",
|
||||
(void *)tbuf, (void *)(seiend - 1));
|
||||
dump(CCX_DMT_VERBOSE, (unsigned char *)seibuf, seiend - seibuf, 0, 0);
|
||||
|
||||
ctx->num_unexpected_sei_length++;
|
||||
}
|
||||
@@ -393,20 +392,24 @@ void sei_rbsp(struct avc_ctx *ctx, unsigned char *seibuf, unsigned char *seiend)
|
||||
unsigned char *sei_message(struct avc_ctx *ctx, unsigned char *seibuf, unsigned char *seiend)
|
||||
{
|
||||
int payload_type = 0;
|
||||
while (*seibuf == 0xff)
|
||||
while (seibuf < seiend && *seibuf == 0xff)
|
||||
{
|
||||
payload_type += 255;
|
||||
seibuf++;
|
||||
}
|
||||
if (seibuf >= seiend)
|
||||
return NULL;
|
||||
payload_type += *seibuf;
|
||||
seibuf++;
|
||||
|
||||
int payload_size = 0;
|
||||
while (*seibuf == 0xff)
|
||||
while (seibuf < seiend && *seibuf == 0xff)
|
||||
{
|
||||
payload_size += 255;
|
||||
seibuf++;
|
||||
}
|
||||
if (seibuf >= seiend)
|
||||
return NULL;
|
||||
payload_size += *seibuf;
|
||||
seibuf++;
|
||||
|
||||
@@ -904,10 +907,10 @@ void seq_parameter_set_rbsp(struct avc_ctx *ctx, unsigned char *seqbuf, unsigned
|
||||
dvprint("vcl_hrd_parameters_present_flag= %llX\n", tmp1);
|
||||
if (tmp)
|
||||
{
|
||||
// TODO.
|
||||
mprint("vcl_hrd. Not implemented for now. Hopefully not needed. Skipping rest of NAL\n");
|
||||
// VCL HRD parameters are for video buffering compliance, not needed for caption extraction.
|
||||
// Just skip and continue - this doesn't affect our ability to extract captions.
|
||||
mprint("Skipping VCL HRD parameters (not needed for caption extraction)\n");
|
||||
ctx->num_vcl_hrd++;
|
||||
// exit(1);
|
||||
}
|
||||
if (tmp || tmp1)
|
||||
{
|
||||
@@ -954,6 +957,15 @@ void slice_header(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, un
|
||||
dvprint("first_mb_in_slice= % 4lld (%#llX)\n", tmp, tmp);
|
||||
slice_type = read_exp_golomb_unsigned(&q1);
|
||||
dvprint("slice_type= % 4llX\n", slice_type);
|
||||
|
||||
// Validate slice_type to prevent buffer overflow in slice_types[] array
|
||||
// Valid H.264 slice_type values are 0-9 (H.264 spec Table 7-6)
|
||||
if (slice_type >= 10)
|
||||
{
|
||||
mprint("Invalid slice_type %lld in slice header, skipping.\n", slice_type);
|
||||
return;
|
||||
}
|
||||
|
||||
tmp = read_exp_golomb_unsigned(&q1);
|
||||
dvprint("pic_parameter_set_id= % 4lld (%#llX)\n", tmp, tmp);
|
||||
|
||||
@@ -984,9 +996,9 @@ void slice_header(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, un
|
||||
|
||||
if (nal_unit_type == 5)
|
||||
{
|
||||
// idr_pic_id: Read to advance bitstream position; value not needed for caption extraction
|
||||
tmp = read_exp_golomb_unsigned(&q1);
|
||||
dvprint("idr_pic_id= % 4lld (%#llX)\n", tmp, tmp);
|
||||
// TODO
|
||||
}
|
||||
if (dec_ctx->avc_ctx->pic_order_cnt_type == 0)
|
||||
{
|
||||
|
||||
@@ -6,7 +6,7 @@ struct avc_ctx
|
||||
unsigned char cc_count;
|
||||
// buffer to hold cc data
|
||||
unsigned char *cc_data;
|
||||
long cc_databufsize;
|
||||
int64_t cc_databufsize;
|
||||
int cc_buffer_saved; // Was the CC buffer saved after it was last updated?
|
||||
|
||||
int is_hevc; // Flag to indicate HEVC (H.265) mode vs H.264
|
||||
@@ -19,11 +19,11 @@ struct avc_ctx
|
||||
int frame_mbs_only_flag;
|
||||
|
||||
// Use and throw stats for debug, remove this ugliness soon
|
||||
long num_nal_unit_type_7;
|
||||
long num_vcl_hrd;
|
||||
long num_nal_hrd;
|
||||
long num_jump_in_frames;
|
||||
long num_unexpected_sei_length;
|
||||
int64_t num_nal_unit_type_7;
|
||||
int64_t num_vcl_hrd;
|
||||
int64_t num_nal_hrd;
|
||||
int64_t num_jump_in_frames;
|
||||
int64_t num_unexpected_sei_length;
|
||||
|
||||
int ccblocks_in_avc_total;
|
||||
int ccblocks_in_avc_lost;
|
||||
|
||||
@@ -212,6 +212,7 @@ enum ccx_stream_mode_enum
|
||||
CCX_SM_GXF = 11,
|
||||
CCX_SM_MKV = 12,
|
||||
CCX_SM_MXF = 13,
|
||||
CCX_SM_SCC = 14, // Scenarist Closed Caption input
|
||||
|
||||
CCX_SM_AUTODETECT = 16
|
||||
};
|
||||
|
||||
@@ -74,6 +74,8 @@ void init_options(struct ccx_s_options *options)
|
||||
options->ocr_oem = -1; // By default, OEM mode depends on the tesseract version
|
||||
options->psm = 3; // Default PSM mode (3 is the default tesseract as well)
|
||||
options->ocr_quantmode = 0; // No quantization (better OCR accuracy for DVB subtitles)
|
||||
options->ocr_line_split = 0; // By default, don't split images into lines (pending testing)
|
||||
options->ocr_blacklist = 1; // By default, use character blacklist to prevent common OCR errors (| vs I, etc.)
|
||||
options->mkvlang = NULL; // By default, all the languages are extracted
|
||||
options->ignore_pts_jumps = 1;
|
||||
options->analyze_video_stream = 0;
|
||||
@@ -139,7 +141,9 @@ void init_options(struct ccx_s_options *options)
|
||||
options->enc_cfg.services_charsets = NULL;
|
||||
options->enc_cfg.all_services_charset = NULL;
|
||||
options->enc_cfg.with_semaphore = 0;
|
||||
options->enc_cfg.force_dropframe = 0; // Assume No Drop Frame for MCC Encode.
|
||||
options->enc_cfg.force_dropframe = 0; // Assume No Drop Frame for MCC Encode.
|
||||
options->enc_cfg.scc_framerate = 0; // Default: 29.97fps for SCC output
|
||||
options->enc_cfg.scc_accurate_timing = 0; // Default: off for backwards compatibility (issue #1120)
|
||||
options->enc_cfg.extract_only_708 = 0;
|
||||
|
||||
options->settings_dtvcc.enabled = 0;
|
||||
@@ -152,6 +156,8 @@ void init_options(struct ccx_s_options *options)
|
||||
options->settings_dtvcc.services_enabled, 0,
|
||||
CCX_DTVCC_MAX_SERVICES * sizeof(options->settings_dtvcc.services_enabled[0]));
|
||||
|
||||
options->scc_framerate = 0; // Default: 29.97fps
|
||||
|
||||
#ifdef WITH_LIBCURL
|
||||
options->curlposturl = NULL;
|
||||
#endif
|
||||
|
||||
@@ -75,6 +75,10 @@ struct encoder_cfg
|
||||
// MCC File
|
||||
int force_dropframe; // 1 if dropframe frame count should be used. defaults to no drop frame.
|
||||
|
||||
// SCC output framerate
|
||||
int scc_framerate; // SCC output framerate: 0=29.97 (default), 1=24, 2=25, 3=30
|
||||
int scc_accurate_timing; // If 1, use bandwidth-aware timing for broadcast compliance (issue #1120)
|
||||
|
||||
// text -> png (text render)
|
||||
char *render_font; // The font used to render text if needed (e.g. teletext->spupng)
|
||||
char *render_font_italics;
|
||||
@@ -149,6 +153,8 @@ struct ccx_s_options // Options from user parameters
|
||||
int ocr_oem; // The Tesseract OEM mode, could be 0 (default), 1 or 2
|
||||
int psm; // The Tesseract PSM mode, could be between 0 and 13. 3 is tesseract default
|
||||
int ocr_quantmode; // How to quantize the bitmap before passing to to tesseract (0=no quantization at all, 1=CCExtractor's internal)
|
||||
int ocr_line_split; // If 1, split images into lines before OCR (uses PSM 7 for better accuracy)
|
||||
int ocr_blacklist; // If 1, use character blacklist to prevent common OCR errors (default: enabled)
|
||||
char *mkvlang; // The name of the language stream for MKV
|
||||
int analyze_video_stream; // If 1, the video stream will be processed even if we're using a different one for subtitles.
|
||||
|
||||
@@ -195,6 +201,7 @@ struct ccx_s_options // Options from user parameters
|
||||
int multiprogram;
|
||||
int out_interval;
|
||||
int segment_on_key_frames_only;
|
||||
int scc_framerate; // SCC input framerate: 0=29.97 (default), 1=24, 2=25, 3=30
|
||||
#ifdef WITH_LIBCURL
|
||||
char *curlposturl;
|
||||
#endif
|
||||
|
||||
@@ -84,6 +84,9 @@ struct cc_subtitle
|
||||
/** Raw PTS value when this subtitle started (for DVB timing) */
|
||||
LLONG start_pts;
|
||||
|
||||
/** Teletext page number (for multi-page extraction, issue #665) */
|
||||
uint16_t teletext_page;
|
||||
|
||||
struct cc_subtitle *next;
|
||||
struct cc_subtitle *prev;
|
||||
};
|
||||
|
||||
@@ -201,6 +201,9 @@ void delete_to_end_of_row(ccx_decoder_608_context *context)
|
||||
{
|
||||
if (context->mode != MODE_TEXT)
|
||||
{
|
||||
if (context->cursor_row >= CCX_DECODER_608_SCREEN_ROWS)
|
||||
return;
|
||||
|
||||
struct eia608_screen *use_buffer = get_writing_buffer(context);
|
||||
for (int i = context->cursor_column; i <= CCX_DECODER_608_SCREEN_WIDTH - 1; i++)
|
||||
{
|
||||
@@ -221,6 +224,10 @@ void write_char(const unsigned char c, ccx_decoder_608_context *context)
|
||||
/* printf ("\rWriting char [%c] at %s:%d:%d\n",c,
|
||||
use_buffer == &wb->data608->buffer1?"B1":"B2",
|
||||
wb->data608->cursor_row,wb->data608->cursor_column); */
|
||||
|
||||
if (context->cursor_row >= CCX_DECODER_608_SCREEN_ROWS || context->cursor_column >= CCX_DECODER_608_SCREEN_WIDTH)
|
||||
return;
|
||||
|
||||
use_buffer->characters[context->cursor_row][context->cursor_column] = c;
|
||||
use_buffer->colors[context->cursor_row][context->cursor_column] = context->current_color;
|
||||
use_buffer->fonts[context->cursor_row][context->cursor_column] = context->font;
|
||||
@@ -316,10 +323,20 @@ int write_cc_buffer(ccx_decoder_608_context *context, struct cc_subtitle *sub)
|
||||
|
||||
if (!data->empty && context->output_format != CCX_OF_NULL)
|
||||
{
|
||||
struct eia608_screen *new_data = (struct eia608_screen *)realloc(sub->data, (sub->nb_data + 1) * sizeof(*data));
|
||||
size_t new_size;
|
||||
|
||||
if (sub->nb_data + 1 > SIZE_MAX / sizeof(struct eia608_screen))
|
||||
{
|
||||
ccx_common_logging.log_ftn("Too many screens, cannot allocate more memory.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
new_size = (sub->nb_data + 1) * sizeof(struct eia608_screen);
|
||||
|
||||
struct eia608_screen *new_data = (struct eia608_screen *)realloc(sub->data, new_size);
|
||||
if (!new_data)
|
||||
{
|
||||
ccx_common_logging.log_ftn("No Memory left");
|
||||
ccx_common_logging.log_ftn("Out of memory while reallocating screen buffer\n");
|
||||
return 0;
|
||||
}
|
||||
sub->data = new_data;
|
||||
@@ -386,10 +403,20 @@ int write_cc_line(ccx_decoder_608_context *context, struct cc_subtitle *sub)
|
||||
|
||||
if (!data->empty)
|
||||
{
|
||||
struct eia608_screen *new_data = (struct eia608_screen *)realloc(sub->data, (sub->nb_data + 1) * sizeof(*data));
|
||||
size_t new_size;
|
||||
|
||||
if (sub->nb_data + 1 > SIZE_MAX / sizeof(struct eia608_screen))
|
||||
{
|
||||
ccx_common_logging.log_ftn("Too many screens, cannot allocate more memory.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
new_size = (sub->nb_data + 1) * sizeof(struct eia608_screen);
|
||||
|
||||
struct eia608_screen *new_data = (struct eia608_screen *)realloc(sub->data, new_size);
|
||||
if (!new_data)
|
||||
{
|
||||
ccx_common_logging.log_ftn("No Memory left");
|
||||
ccx_common_logging.log_ftn("Out of memory while reallocating screen buffer\n");
|
||||
return 0;
|
||||
}
|
||||
sub->data = new_data;
|
||||
|
||||
@@ -48,7 +48,7 @@ typedef struct ccx_decoder_608_context
|
||||
int my_field; // Used for sanity checks
|
||||
int my_channel; // Used for sanity checks
|
||||
int rollup_from_popon; // Track transition from pop-on/paint-on to roll-up mode
|
||||
long bytes_processed_608; // To be written ONLY by process_608
|
||||
int64_t bytes_processed_608; // To be written ONLY by process_608
|
||||
int have_cursor_position;
|
||||
|
||||
int *halt; // Can be used to halt the feeding of caption data. Set to 1 if screens_to_progress != -1 && screenfuls_counter >= screens_to_process
|
||||
|
||||
@@ -998,6 +998,14 @@ void dtvcc_handle_DFx_DefineWindow(dtvcc_service_decoder *decoder, int window_id
|
||||
int row_count = (data[4] & 0xf) + 1; // according to CEA-708-D
|
||||
int anchor_point = data[4] >> 4;
|
||||
int col_count = (data[5] & 0x3f) + 1; // according to CEA-708-D
|
||||
|
||||
if (row_count > CCX_DTVCC_MAX_ROWS || col_count > CCX_DTVCC_MAX_COLUMNS)
|
||||
{
|
||||
ccx_common_logging.log_ftn("[CEA-708] Invalid window size %dx%d (max %dx%d), rejecting window definition\n",
|
||||
row_count, col_count, CCX_DTVCC_MAX_ROWS, CCX_DTVCC_MAX_COLUMNS);
|
||||
return;
|
||||
}
|
||||
|
||||
int pen_style = data[6] & 0x7;
|
||||
int win_style = (data[6] >> 3) & 0x7;
|
||||
|
||||
@@ -1341,6 +1349,14 @@ void dtvcc_handle_SPL_SetPenLocation(dtvcc_service_decoder *decoder, unsigned ch
|
||||
}
|
||||
|
||||
dtvcc_window *window = &decoder->windows[decoder->current_window];
|
||||
if (row >= window->row_count || col >= window->col_count)
|
||||
{
|
||||
ccx_common_logging.log_ftn("[CEA-708] dtvcc_handle_SPL_SetPenLocation: "
|
||||
"Invalid pen location %d:%d for window size %dx%d, rejecting command\n",
|
||||
row, col, window->row_count, window->col_count);
|
||||
return;
|
||||
}
|
||||
|
||||
window->pen_row = row;
|
||||
window->pen_column = col;
|
||||
}
|
||||
@@ -1479,7 +1495,12 @@ int dtvcc_handle_C0(dtvcc_ctx *dtvcc,
|
||||
else if (c0 >= 0x18 && c0 <= 0x1F)
|
||||
{
|
||||
if (c0 == DTVCC_C0_P16) // PE16
|
||||
dtvcc_handle_C0_P16(decoder, data + 1);
|
||||
{
|
||||
if (data_length >= 3)
|
||||
dtvcc_handle_C0_P16(decoder, data + 1);
|
||||
else
|
||||
ccx_common_logging.debug_ftn(CCX_DMT_708, "[CEA-708] dtvcc_handle_C0: Not enough data for P16\n");
|
||||
}
|
||||
len = 3;
|
||||
}
|
||||
if (len == -1)
|
||||
@@ -1633,6 +1654,9 @@ int dtvcc_handle_extended_char(dtvcc_service_decoder *decoder, unsigned char *da
|
||||
ccx_common_logging.debug_ftn(CCX_DMT_708, "[CEA-708] In dtvcc_handle_extended_char, "
|
||||
"first data code: [%c], length: [%u]\n",
|
||||
data[0], data_length);
|
||||
if (data_length < 1)
|
||||
return 0;
|
||||
|
||||
unsigned char c = 0x20; // Default to space
|
||||
unsigned char code = data[0];
|
||||
if (/* data[i]>=0x00 && */ code <= 0x1F) // Comment to silence warning
|
||||
@@ -1701,8 +1725,17 @@ void dtvcc_process_service_block(dtvcc_ctx *dtvcc,
|
||||
}
|
||||
else // Use extended set
|
||||
{
|
||||
used = dtvcc_handle_extended_char(decoder, data + i + 1, data_length - 1);
|
||||
used++; // Since we had DTVCC_C0_EXT1
|
||||
if (i + 1 >= data_length)
|
||||
{
|
||||
used = 1; // skip EXT1
|
||||
}
|
||||
else
|
||||
{
|
||||
used = dtvcc_handle_extended_char(decoder,
|
||||
data + i + 1,
|
||||
data_length - i - 1) +
|
||||
1;
|
||||
}
|
||||
}
|
||||
i += used;
|
||||
}
|
||||
@@ -1754,6 +1787,12 @@ void dtvcc_process_current_packet(dtvcc_ctx *dtvcc, int len)
|
||||
|
||||
if (service_number == 7) // There is an extended header
|
||||
{
|
||||
if (pos + 1 >= dtvcc->current_packet + len)
|
||||
{
|
||||
ccx_common_logging.debug_ftn(CCX_DMT_708, "[CEA-708] dtvcc_process_current_packet: "
|
||||
"Truncated extended header, stopping.\n");
|
||||
break;
|
||||
}
|
||||
pos++;
|
||||
service_number = (pos[0] & 0x3F); // 6 more significant bits
|
||||
// printf ("Extended header: Service number: [%d]\n",service_number);
|
||||
|
||||
@@ -224,7 +224,12 @@ int do_cb(struct lib_cc_decode *ctx, unsigned char *cc_block, struct cc_subtitle
|
||||
void dinit_cc_decode(struct lib_cc_decode **ctx)
|
||||
{
|
||||
struct lib_cc_decode *lctx = *ctx;
|
||||
#ifndef DISABLE_RUST
|
||||
ccxr_dtvcc_free(lctx->dtvcc_rust);
|
||||
lctx->dtvcc_rust = NULL;
|
||||
#else
|
||||
dtvcc_free(&lctx->dtvcc);
|
||||
#endif
|
||||
dinit_avc(&lctx->avc_ctx);
|
||||
ccx_decoder_608_dinit_library(&lctx->context_cc608_field_1);
|
||||
ccx_decoder_608_dinit_library(&lctx->context_cc608_field_2);
|
||||
@@ -294,10 +299,16 @@ struct lib_cc_decode *init_cc_decode(struct ccx_decoders_common_settings_t *sett
|
||||
ctx->no_rollup = setting->no_rollup;
|
||||
ctx->noscte20 = setting->noscte20;
|
||||
|
||||
#ifndef DISABLE_RUST
|
||||
ctx->dtvcc_rust = ccxr_dtvcc_init(setting->settings_dtvcc);
|
||||
ctx->dtvcc = NULL; // Not used when Rust is enabled
|
||||
#else
|
||||
ctx->dtvcc = dtvcc_init(setting->settings_dtvcc);
|
||||
if (!ctx->dtvcc)
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "In init_cc_decode: Out of memory initializing dtvcc.");
|
||||
ctx->dtvcc->is_active = setting->settings_dtvcc->enabled;
|
||||
ctx->dtvcc_rust = NULL;
|
||||
#endif
|
||||
|
||||
if (setting->codec == CCX_CODEC_ATSC_CC)
|
||||
{
|
||||
@@ -477,6 +488,13 @@ void flush_cc_decode(struct lib_cc_decode *ctx, struct cc_subtitle *sub)
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifndef DISABLE_RUST
|
||||
if (ccxr_dtvcc_is_active(ctx->dtvcc_rust))
|
||||
{
|
||||
ctx->current_field = 3;
|
||||
ccxr_flush_active_decoders(ctx->dtvcc_rust);
|
||||
}
|
||||
#else
|
||||
if (ctx->dtvcc->is_active)
|
||||
{
|
||||
for (int i = 0; i < CCX_DTVCC_MAX_SERVICES; i++)
|
||||
@@ -491,6 +509,7 @@ void flush_cc_decode(struct lib_cc_decode *ctx, struct cc_subtitle *sub)
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
struct encoder_ctx *copy_encoder_context(struct encoder_ctx *ctx)
|
||||
{
|
||||
|
||||
@@ -32,4 +32,10 @@ struct cc_subtitle *copy_subtitle(struct cc_subtitle *sub);
|
||||
void free_encoder_context(struct encoder_ctx *ctx);
|
||||
void free_decoder_context(struct lib_cc_decode *ctx);
|
||||
void free_subtitle(struct cc_subtitle *sub);
|
||||
|
||||
#ifndef DISABLE_RUST
|
||||
// Rust FFI function to flush active CEA-708 service decoders
|
||||
extern void ccxr_flush_active_decoders(void *dtvcc_rust);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -724,16 +724,17 @@ static int parse_csi(ISDBSubContext *ctx, const uint8_t *buf, int len)
|
||||
// Copy buf in arg
|
||||
for (i = 0; *buf != 0x20; i++)
|
||||
{
|
||||
if (i >= (sizeof(arg)) + 1)
|
||||
if (i >= sizeof(arg) - 1)
|
||||
{
|
||||
isdb_log("UnExpected CSI %d >= %d", sizeof(arg) + 1, i);
|
||||
isdb_log("UnExpected CSI: too long");
|
||||
break;
|
||||
}
|
||||
arg[i] = *buf;
|
||||
buf++;
|
||||
}
|
||||
/* ignore terminating 0x20 character */
|
||||
arg[i] = *buf++;
|
||||
if (i < sizeof(arg))
|
||||
arg[i] = *buf++;
|
||||
|
||||
switch (*buf)
|
||||
{
|
||||
|
||||
@@ -208,6 +208,7 @@ struct lib_cc_decode
|
||||
int false_pict_header;
|
||||
|
||||
dtvcc_ctx *dtvcc;
|
||||
void *dtvcc_rust; // Persistent Rust CEA-708 decoder context
|
||||
int current_field;
|
||||
// Analyse/use the picture information
|
||||
int maxtref; // Use to remember the temporal reference number
|
||||
|
||||
@@ -285,6 +285,9 @@ static void ccx_demuxer_print_cfg(struct ccx_demuxer *ctx)
|
||||
case CCX_SM_MXF:
|
||||
mprint("MXF");
|
||||
break;
|
||||
case CCX_SM_SCC:
|
||||
mprint("SCC");
|
||||
break;
|
||||
#ifdef WTV_DEBUG
|
||||
case CCX_SM_HEX_DUMP:
|
||||
mprint("Hex");
|
||||
@@ -348,7 +351,6 @@ struct ccx_demuxer *init_demuxer(void *parent, struct demuxer_cfg *cfg)
|
||||
{
|
||||
ctx->pinfo[i].got_important_streams_min_pts[j] = UINT64_MAX;
|
||||
}
|
||||
ctx->pinfo[i].initialized_ocr = 0;
|
||||
ctx->pinfo[i].version = 0xFF; // Not real in a real stream since it's 5 bits. FF => Not initialized
|
||||
}
|
||||
|
||||
|
||||
@@ -35,7 +35,6 @@ struct program_info
|
||||
{
|
||||
int pid;
|
||||
int program_number;
|
||||
int initialized_ocr; // Avoid initializing the OCR more than once
|
||||
uint8_t analysed_PMT_once : 1;
|
||||
uint8_t version;
|
||||
uint8_t saved_section[1021];
|
||||
@@ -57,9 +56,9 @@ struct cap_info
|
||||
int program_number;
|
||||
enum ccx_stream_type stream;
|
||||
enum ccx_code_type codec;
|
||||
long capbufsize;
|
||||
int64_t capbufsize;
|
||||
unsigned char *capbuf;
|
||||
long capbuflen; // Bytes read in capbuf
|
||||
int64_t capbuflen; // Bytes read in capbuf
|
||||
int saw_pesstart;
|
||||
int prev_counter;
|
||||
void *codec_private_data;
|
||||
|
||||
@@ -75,12 +75,15 @@ enum MXFLocalTag
|
||||
void update_tid_lut(struct MXFContext *ctx, uint32_t track_id, uint8_t *track_number, struct ccx_rational edit_rate)
|
||||
{
|
||||
int i;
|
||||
debug("update_tid_lut: track_id=%u (0x%x), track_number=%02X%02X%02X%02X, cap_track_id=%u\n",
|
||||
track_id, track_id, track_number[0], track_number[1], track_number[2], track_number[3], ctx->cap_track_id);
|
||||
// Update essence element key if we have track Id of caption
|
||||
if (ctx->cap_track_id == track_id)
|
||||
{
|
||||
memcpy(ctx->cap_essence_key, mxf_essence_element_key, 12);
|
||||
memcpy(ctx->cap_essence_key + 12, track_number, 4);
|
||||
ctx->edit_rate = edit_rate;
|
||||
debug("MXF: Found caption track, track_id=%u\n", track_id);
|
||||
}
|
||||
|
||||
for (i = 0; i < ctx->nb_tracks; i++)
|
||||
@@ -248,6 +251,7 @@ static int mxf_read_vanc_vbi_desc(struct ccx_demuxer *demux, uint64_t size)
|
||||
{
|
||||
case MXF_TAG_LTRACK_ID:
|
||||
ctx->cap_track_id = buffered_get_be32(demux);
|
||||
debug("MXF: VANC/VBI descriptor found, Linked Track ID = %u\n", ctx->cap_track_id);
|
||||
update_cap_essence_key(ctx, ctx->cap_track_id);
|
||||
break;
|
||||
default:
|
||||
@@ -304,6 +308,17 @@ static int mxf_read_cdp_data(struct ccx_demuxer *demux, int size, struct demuxer
|
||||
log("Incomplete CDP packet\n");
|
||||
|
||||
ret = buffered_read(demux, data->buffer + data->len, cc_count * 3);
|
||||
// Log first few bytes of cc_data for debugging
|
||||
if (cc_count > 0)
|
||||
{
|
||||
unsigned char *cc_ptr = data->buffer + data->len;
|
||||
debug("cc_data (first 6 triplets): ");
|
||||
for (int j = 0; j < (cc_count < 6 ? cc_count : 6); j++)
|
||||
{
|
||||
debug("%02X%02X%02X ", cc_ptr[j * 3], cc_ptr[j * 3 + 1], cc_ptr[j * 3 + 2]);
|
||||
}
|
||||
debug("\n");
|
||||
}
|
||||
data->len += cc_count * 3;
|
||||
demux->past += cc_count * 3;
|
||||
len += ret;
|
||||
@@ -361,7 +376,10 @@ static int mxf_read_vanc_data(struct ccx_demuxer *demux, uint64_t size, struct d
|
||||
// uint8_t count; /* Currently unused */
|
||||
|
||||
if (size < 19)
|
||||
{
|
||||
debug("VANC data too small: %" PRIu64 " < 19\n", size);
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = buffered_read(demux, vanc_header, 16);
|
||||
|
||||
@@ -370,31 +388,39 @@ static int mxf_read_vanc_data(struct ccx_demuxer *demux, uint64_t size, struct d
|
||||
return CCX_EOF;
|
||||
len += ret;
|
||||
|
||||
debug("VANC header: num_packets=%d, line=0x%02x%02x, wrap_type=0x%02x, sample_config=0x%02x\n",
|
||||
vanc_header[1], vanc_header[2], vanc_header[3], vanc_header[4], vanc_header[5]);
|
||||
|
||||
for (int i = 0; i < vanc_header[1]; i++)
|
||||
{
|
||||
DID = buffered_get_byte(demux);
|
||||
len++;
|
||||
debug("VANC packet %d: DID=0x%02x\n", i, DID);
|
||||
if (!(DID == 0x61 || DID == 0x80))
|
||||
{
|
||||
debug("DID 0x%02x not recognized as caption DID\n", DID);
|
||||
goto error;
|
||||
}
|
||||
|
||||
SDID = buffered_get_byte(demux);
|
||||
len++;
|
||||
debug("VANC packet %d: SDID=0x%02x\n", i, SDID);
|
||||
if (SDID == 0x01)
|
||||
debug("Caption Type 708\n");
|
||||
else if (SDID == 0x02)
|
||||
debug("Caption Type 608\n");
|
||||
|
||||
cdp_size = buffered_get_byte(demux);
|
||||
debug("VANC packet %d: cdp_size=%d\n", i, cdp_size);
|
||||
if (cdp_size + 19 > size)
|
||||
{
|
||||
debug("Incomplete cdp(%d) in anc data(%d)\n", cdp_size, size);
|
||||
log("Incomplete cdp(%d) in anc data(%" PRIu64 ")\n", cdp_size, size);
|
||||
goto error;
|
||||
}
|
||||
len++;
|
||||
|
||||
ret = mxf_read_cdp_data(demux, cdp_size, data);
|
||||
debug("mxf_read_cdp_data returned %d, data->len=%d\n", ret, data->len);
|
||||
len += ret;
|
||||
// len += (3 + count + 4);
|
||||
}
|
||||
@@ -411,15 +437,33 @@ static int mxf_read_essence_element(struct ccx_demuxer *demux, uint64_t size, st
|
||||
int ret;
|
||||
struct MXFContext *ctx = demux->private_data;
|
||||
|
||||
debug("mxf_read_essence_element: ctx->type=%d (ANC=%d, VBI=%d), size=%" PRIu64 "\n",
|
||||
ctx->type, MXF_CT_ANC, MXF_CT_VBI, size);
|
||||
|
||||
if (ctx->type == MXF_CT_ANC)
|
||||
{
|
||||
data->bufferdatatype = CCX_RAW_TYPE;
|
||||
ret = mxf_read_vanc_data(demux, size, data);
|
||||
data->pts = ctx->cap_count;
|
||||
debug("mxf_read_vanc_data returned %d, data->len=%d\n", ret, data->len);
|
||||
// Calculate PTS in 90kHz units from frame count and edit rate
|
||||
// edit_rate is frames per second (e.g., 25/1 for 25fps)
|
||||
// PTS = frame_count * 90000 / fps = frame_count * 90000 * edit_rate.den / edit_rate.num
|
||||
if (ctx->edit_rate.num > 0 && ctx->edit_rate.den > 0)
|
||||
{
|
||||
data->pts = (int64_t)ctx->cap_count * 90000 * ctx->edit_rate.den / ctx->edit_rate.num;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fallback to 25fps if edit_rate not set
|
||||
data->pts = (int64_t)ctx->cap_count * 90000 / 25;
|
||||
}
|
||||
debug("Frame %d, PTS=%" PRId64 " (edit_rate=%d/%d)\n",
|
||||
ctx->cap_count, data->pts, ctx->edit_rate.num, ctx->edit_rate.den);
|
||||
ctx->cap_count++;
|
||||
}
|
||||
else
|
||||
{
|
||||
debug("Skipping essence element (not ANC type)\n");
|
||||
ret = buffered_skip(demux, size);
|
||||
demux->past += ret;
|
||||
}
|
||||
@@ -514,6 +558,7 @@ static int read_packet(struct ccx_demuxer *demux, struct demuxer_data *data)
|
||||
KLVPacket klv;
|
||||
const MXFReadTableEntry *reader;
|
||||
struct MXFContext *ctx = demux->private_data;
|
||||
static int first_essence_logged = 0;
|
||||
while ((ret = klv_read_packet(&klv, demux)) == 0)
|
||||
{
|
||||
debug("Key %02X%02X%02X%02X%02X%02X%02X%02X.%02X%02X%02X%02X%02X%02X%02X%02X size %" PRIu64 "\n",
|
||||
@@ -523,8 +568,25 @@ static int read_packet(struct ccx_demuxer *demux, struct demuxer_data *data)
|
||||
klv.key[12], klv.key[13], klv.key[14], klv.key[15],
|
||||
klv.length);
|
||||
|
||||
// Check if this is an essence element key (first 12 bytes match)
|
||||
if (IS_KLV_KEY(klv.key, mxf_essence_element_key) && !first_essence_logged)
|
||||
{
|
||||
debug("MXF: First essence element key: %02X%02X%02X%02X%02X%02X%02X%02X.%02X%02X%02X%02X%02X%02X%02X%02X\n",
|
||||
klv.key[0], klv.key[1], klv.key[2], klv.key[3],
|
||||
klv.key[4], klv.key[5], klv.key[6], klv.key[7],
|
||||
klv.key[8], klv.key[9], klv.key[10], klv.key[11],
|
||||
klv.key[12], klv.key[13], klv.key[14], klv.key[15]);
|
||||
debug("MXF: cap_essence_key: %02X%02X%02X%02X%02X%02X%02X%02X.%02X%02X%02X%02X%02X%02X%02X%02X\n",
|
||||
ctx->cap_essence_key[0], ctx->cap_essence_key[1], ctx->cap_essence_key[2], ctx->cap_essence_key[3],
|
||||
ctx->cap_essence_key[4], ctx->cap_essence_key[5], ctx->cap_essence_key[6], ctx->cap_essence_key[7],
|
||||
ctx->cap_essence_key[8], ctx->cap_essence_key[9], ctx->cap_essence_key[10], ctx->cap_essence_key[11],
|
||||
ctx->cap_essence_key[12], ctx->cap_essence_key[13], ctx->cap_essence_key[14], ctx->cap_essence_key[15]);
|
||||
first_essence_logged = 1;
|
||||
}
|
||||
|
||||
if (IS_KLV_KEY(klv.key, ctx->cap_essence_key))
|
||||
{
|
||||
debug("MXF: Found ANC essence element, size=%" PRIu64 "\n", klv.length);
|
||||
mxf_read_essence_element(demux, klv.length, data);
|
||||
if (data->len > 0)
|
||||
break;
|
||||
@@ -566,8 +628,15 @@ int ccx_mxf_getmoredata(struct lib_ccx_ctx *ctx, struct demuxer_data **ppdata)
|
||||
data->program_number = 1;
|
||||
data->stream_pid = 1;
|
||||
data->codec = CCX_CODEC_ATSC_CC;
|
||||
data->tb.num = 1001;
|
||||
data->tb.den = 30000;
|
||||
// PTS is already calculated in 90kHz units by mxf_read_essence_element
|
||||
data->tb.num = 1;
|
||||
data->tb.den = 90000;
|
||||
|
||||
// Enable CEA-708 (DTVCC) decoder for MXF files with VANC captions
|
||||
if (ctx->dec_global_setting && ctx->dec_global_setting->settings_dtvcc)
|
||||
{
|
||||
ctx->dec_global_setting->settings_dtvcc->enabled = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -576,6 +645,11 @@ int ccx_mxf_getmoredata(struct lib_ccx_ctx *ctx, struct demuxer_data **ppdata)
|
||||
|
||||
ret = read_packet(ctx->demux_ctx, data);
|
||||
|
||||
// Ensure timebase is 90kHz since PTS is calculated in 90kHz units
|
||||
// CDP parsing may have set a frame-based timebase which would cause incorrect conversion
|
||||
data->tb.num = 1;
|
||||
data->tb.den = 90000;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ void dtvcc_process_data(struct dtvcc_ctx *dtvcc,
|
||||
ccx_common_logging.debug_ftn(CCX_DMT_708, "[CEA-708] dtvcc_process_data: DTVCC Channel Packet Data\n");
|
||||
if (cc_valid && dtvcc->is_current_packet_header_parsed)
|
||||
{
|
||||
if (dtvcc->current_packet_length > 253)
|
||||
if (dtvcc->current_packet_length + 2 > CCX_DTVCC_MAX_PACKET_LENGTH)
|
||||
{
|
||||
ccx_common_logging.debug_ftn(CCX_DMT_708, "[CEA-708] dtvcc_process_data: "
|
||||
"Warning: Legal packet size exceeded (1), data not added.\n");
|
||||
@@ -51,7 +51,7 @@ void dtvcc_process_data(struct dtvcc_ctx *dtvcc,
|
||||
ccx_common_logging.debug_ftn(CCX_DMT_708, "[CEA-708] dtvcc_process_data: DTVCC Channel Packet Start\n");
|
||||
if (cc_valid)
|
||||
{
|
||||
if (dtvcc->current_packet_length > CCX_DTVCC_MAX_PACKET_LENGTH - 1)
|
||||
if (dtvcc->current_packet_length + 2 > CCX_DTVCC_MAX_PACKET_LENGTH)
|
||||
{
|
||||
ccx_common_logging.debug_ftn(CCX_DMT_708, "[CEA-708] dtvcc_process_data: "
|
||||
"Warning: Legal packet size exceeded (2), data not added.\n");
|
||||
|
||||
@@ -10,4 +10,14 @@ void dtvcc_process_data(struct dtvcc_ctx *dtvcc,
|
||||
dtvcc_ctx *dtvcc_init(ccx_decoder_dtvcc_settings *opts);
|
||||
void dtvcc_free(dtvcc_ctx **);
|
||||
|
||||
#ifndef DISABLE_RUST
|
||||
// Rust FFI functions for persistent CEA-708 decoder
|
||||
extern void *ccxr_dtvcc_init(struct ccx_decoder_dtvcc_settings *settings_dtvcc);
|
||||
extern void ccxr_dtvcc_free(void *dtvcc_rust);
|
||||
extern void ccxr_dtvcc_process_data(void *dtvcc_rust, const unsigned char cc_valid,
|
||||
const unsigned char cc_type, const unsigned char data1, const unsigned char data2);
|
||||
extern int ccxr_dtvcc_is_active(void *dtvcc_rust);
|
||||
extern void ccxr_dtvcc_set_active(void *dtvcc_rust, int active);
|
||||
#endif
|
||||
|
||||
#endif // CCEXTRACTOR_CCX_DTVCC_H
|
||||
|
||||
@@ -176,6 +176,14 @@ int write_subtitle_file_footer(struct encoder_ctx *ctx, struct ccx_s_write *out)
|
||||
case CCX_OF_CCD:
|
||||
ret = write(out->fh, ctx->encoded_crlf, ctx->encoded_crlf_length);
|
||||
break;
|
||||
case CCX_OF_WEBVTT:
|
||||
// Ensure WebVTT header is written even if no subtitles were found (issue #1743)
|
||||
// This is required for HLS compatibility
|
||||
if (!ctx->wrote_webvtt_header)
|
||||
{
|
||||
write_webvtt_header(ctx);
|
||||
}
|
||||
break;
|
||||
default: // Nothing to do, no footer on this format
|
||||
break;
|
||||
}
|
||||
@@ -719,6 +727,9 @@ void dinit_encoder(struct encoder_ctx **arg, LLONG current_fts)
|
||||
write_subtitle_file_footer(ctx, ctx->out + i);
|
||||
}
|
||||
|
||||
// Clean up teletext multi-page output files (issue #665)
|
||||
dinit_teletext_outputs(ctx);
|
||||
|
||||
free_encoder_context(ctx->prev);
|
||||
dinit_output_ctx(ctx);
|
||||
freep(&ctx->subline);
|
||||
@@ -772,6 +783,7 @@ struct encoder_ctx *init_encoder(struct encoder_cfg *opt)
|
||||
return NULL;
|
||||
}
|
||||
ctx->in_fileformat = opt->in_format;
|
||||
ctx->is_pal = (opt->in_format == 2);
|
||||
|
||||
/** used in case of SUB_EOD_MARKER */
|
||||
ctx->prev_start = -1;
|
||||
@@ -837,6 +849,19 @@ struct encoder_ctx *init_encoder(struct encoder_cfg *opt)
|
||||
ctx->segment_pending = 0;
|
||||
ctx->segment_last_key_frame = 0;
|
||||
ctx->nospupngocr = opt->nospupngocr;
|
||||
ctx->scc_framerate = opt->scc_framerate;
|
||||
ctx->scc_accurate_timing = opt->scc_accurate_timing;
|
||||
ctx->scc_last_transmission_end = 0;
|
||||
ctx->scc_last_display_end = 0;
|
||||
|
||||
// Initialize teletext multi-page output arrays (issue #665)
|
||||
ctx->tlt_out_count = 0;
|
||||
for (int i = 0; i < MAX_TLT_PAGES_EXTRACT; i++)
|
||||
{
|
||||
ctx->tlt_out[i] = NULL;
|
||||
ctx->tlt_out_pages[i] = 0;
|
||||
ctx->tlt_srt_counter[i] = 0;
|
||||
}
|
||||
|
||||
ctx->prev = NULL;
|
||||
return ctx;
|
||||
@@ -1033,6 +1058,28 @@ int encode_sub(struct encoder_ctx *context, struct cc_subtitle *sub)
|
||||
freep(&sub->data);
|
||||
break;
|
||||
case CC_BITMAP:;
|
||||
// Apply subs_delay to bitmap subtitles (DVB, DVD, etc.)
|
||||
// This is the same as what's done for CC_608 above
|
||||
sub->start_time += context->subs_delay;
|
||||
sub->end_time += context->subs_delay;
|
||||
|
||||
// After adding delay, if start/end time is lower than 0, skip this subtitle
|
||||
if (sub->start_time < 0 || sub->end_time <= 0)
|
||||
{
|
||||
// Free bitmap data to avoid memory leak
|
||||
if (sub->datatype == CC_DATATYPE_DVB)
|
||||
{
|
||||
struct cc_bitmap *bitmap_tmp = (struct cc_bitmap *)sub->data;
|
||||
if (bitmap_tmp)
|
||||
{
|
||||
freep(&bitmap_tmp->data0);
|
||||
freep(&bitmap_tmp->data1);
|
||||
}
|
||||
}
|
||||
freep(&sub->data);
|
||||
sub->nb_data = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
struct cc_bitmap *rect;
|
||||
@@ -1298,3 +1345,168 @@ void switch_output_file(struct lib_ccx_ctx *ctx, struct encoder_ctx *enc_ctx, in
|
||||
enc_ctx->cea_708_counter = 0;
|
||||
enc_ctx->srt_counter = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get or create the output file for a specific teletext page (issue #665)
|
||||
* Creates output files on-demand with suffix _pNNN (e.g., output_p891.srt)
|
||||
* Returns NULL if we're in stdout mode or if too many pages are being extracted
|
||||
*/
|
||||
struct ccx_s_write *get_teletext_output(struct encoder_ctx *ctx, uint16_t teletext_page)
|
||||
{
|
||||
// If teletext_page is 0, use the default output
|
||||
if (teletext_page == 0 || ctx->out == NULL)
|
||||
return ctx->out;
|
||||
|
||||
// Check if we're sending to stdout - can't do multi-page in that case
|
||||
if (ctx->out[0].fh == STDOUT_FILENO)
|
||||
return ctx->out;
|
||||
|
||||
// Check if we already have an output file for this page
|
||||
for (int i = 0; i < ctx->tlt_out_count; i++)
|
||||
{
|
||||
if (ctx->tlt_out_pages[i] == teletext_page)
|
||||
return ctx->tlt_out[i];
|
||||
}
|
||||
|
||||
// If we only have one teletext page requested, use the default output
|
||||
// (no suffix needed for backward compatibility)
|
||||
extern struct ccx_s_teletext_config tlt_config;
|
||||
if (tlt_config.num_user_pages <= 1 && !tlt_config.extract_all_pages)
|
||||
return ctx->out;
|
||||
|
||||
// Need to create a new output file for this page
|
||||
if (ctx->tlt_out_count >= MAX_TLT_PAGES_EXTRACT)
|
||||
{
|
||||
mprint("Warning: Too many teletext pages to extract (max %d), using default output for page %03d\n",
|
||||
MAX_TLT_PAGES_EXTRACT, teletext_page);
|
||||
return ctx->out;
|
||||
}
|
||||
|
||||
// Allocate the new write structure
|
||||
struct ccx_s_write *new_out = (struct ccx_s_write *)malloc(sizeof(struct ccx_s_write));
|
||||
if (!new_out)
|
||||
{
|
||||
mprint("Error: Memory allocation failed for teletext output\n");
|
||||
return ctx->out;
|
||||
}
|
||||
memset(new_out, 0, sizeof(struct ccx_s_write));
|
||||
|
||||
// Create the filename with page suffix
|
||||
const char *ext = get_file_extension(ctx->write_format);
|
||||
char suffix[16];
|
||||
snprintf(suffix, sizeof(suffix), "_p%03d", teletext_page);
|
||||
|
||||
char *basefilename = NULL;
|
||||
if (ctx->out[0].filename != NULL)
|
||||
{
|
||||
basefilename = get_basename(ctx->out[0].filename);
|
||||
}
|
||||
else if (ctx->first_input_file != NULL)
|
||||
{
|
||||
basefilename = get_basename(ctx->first_input_file);
|
||||
}
|
||||
else
|
||||
{
|
||||
basefilename = strdup("untitled");
|
||||
}
|
||||
|
||||
if (basefilename == NULL)
|
||||
{
|
||||
free(new_out);
|
||||
return ctx->out;
|
||||
}
|
||||
|
||||
char *filename = create_outfilename(basefilename, suffix, ext);
|
||||
free(basefilename);
|
||||
|
||||
if (filename == NULL)
|
||||
{
|
||||
free(new_out);
|
||||
return ctx->out;
|
||||
}
|
||||
|
||||
// Open the file
|
||||
new_out->filename = filename;
|
||||
new_out->fh = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IREAD | S_IWRITE);
|
||||
if (new_out->fh == -1)
|
||||
{
|
||||
mprint("Error: Failed to open output file %s: %s\n", filename, strerror(errno));
|
||||
free(filename);
|
||||
free(new_out);
|
||||
return ctx->out;
|
||||
}
|
||||
|
||||
mprint("Creating teletext output file: %s\n", filename);
|
||||
|
||||
// Store in our array
|
||||
int idx = ctx->tlt_out_count;
|
||||
ctx->tlt_out[idx] = new_out;
|
||||
ctx->tlt_out_pages[idx] = teletext_page;
|
||||
ctx->tlt_srt_counter[idx] = 0;
|
||||
ctx->tlt_out_count++;
|
||||
|
||||
// Write the subtitle file header
|
||||
write_subtitle_file_header(ctx, new_out);
|
||||
|
||||
return new_out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the SRT counter for a specific teletext page (issue #665)
|
||||
* Returns pointer to the counter, or NULL if page not found
|
||||
*/
|
||||
unsigned int *get_teletext_srt_counter(struct encoder_ctx *ctx, uint16_t teletext_page)
|
||||
{
|
||||
// If teletext_page is 0, use the default counter
|
||||
if (teletext_page == 0)
|
||||
return &ctx->srt_counter;
|
||||
|
||||
// Check if we're using multi-page mode
|
||||
extern struct ccx_s_teletext_config tlt_config;
|
||||
if (tlt_config.num_user_pages <= 1 && !tlt_config.extract_all_pages)
|
||||
return &ctx->srt_counter;
|
||||
|
||||
// Find the counter for this page
|
||||
for (int i = 0; i < ctx->tlt_out_count; i++)
|
||||
{
|
||||
if (ctx->tlt_out_pages[i] == teletext_page)
|
||||
return &ctx->tlt_srt_counter[i];
|
||||
}
|
||||
|
||||
// Not found, use default counter
|
||||
return &ctx->srt_counter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up all teletext output files (issue #665)
|
||||
*/
|
||||
void dinit_teletext_outputs(struct encoder_ctx *ctx)
|
||||
{
|
||||
if (!ctx)
|
||||
return;
|
||||
|
||||
for (int i = 0; i < ctx->tlt_out_count; i++)
|
||||
{
|
||||
if (ctx->tlt_out[i] != NULL)
|
||||
{
|
||||
// Write footer
|
||||
write_subtitle_file_footer(ctx, ctx->tlt_out[i]);
|
||||
|
||||
// Close file
|
||||
if (ctx->tlt_out[i]->fh != -1)
|
||||
{
|
||||
close(ctx->tlt_out[i]->fh);
|
||||
}
|
||||
|
||||
// Free filename
|
||||
if (ctx->tlt_out[i]->filename != NULL)
|
||||
{
|
||||
free(ctx->tlt_out[i]->filename);
|
||||
}
|
||||
|
||||
free(ctx->tlt_out[i]);
|
||||
ctx->tlt_out[i] = NULL;
|
||||
}
|
||||
}
|
||||
ctx->tlt_out_count = 0;
|
||||
}
|
||||
|
||||
@@ -16,6 +16,11 @@
|
||||
#include "ccx_encoders_structs.h"
|
||||
#include "ccx_common_option.h"
|
||||
|
||||
// Maximum number of teletext pages to extract simultaneously (issue #665)
|
||||
#ifndef MAX_TLT_PAGES_EXTRACT
|
||||
#define MAX_TLT_PAGES_EXTRACT 8
|
||||
#endif
|
||||
|
||||
#define REQUEST_BUFFER_CAPACITY(ctx, length) \
|
||||
if (length > ctx->capacity) \
|
||||
{ \
|
||||
@@ -148,6 +153,14 @@ struct encoder_ctx
|
||||
unsigned int cdp_hdr_seq;
|
||||
int force_dropframe;
|
||||
|
||||
// SCC output framerate
|
||||
int scc_framerate; // SCC output framerate: 0=29.97 (default), 1=24, 2=25, 3=30
|
||||
|
||||
// SCC accurate timing (issue #1120)
|
||||
int scc_accurate_timing; // If 1, use bandwidth-aware timing for broadcast compliance
|
||||
LLONG scc_last_transmission_end; // When last caption transmission ends (ms)
|
||||
LLONG scc_last_display_end; // When last caption display ends (ms)
|
||||
|
||||
int new_sentence; // Capitalize next letter?
|
||||
|
||||
int program_number;
|
||||
@@ -169,6 +182,12 @@ struct encoder_ctx
|
||||
|
||||
// OCR in SPUPNG
|
||||
int nospupngocr;
|
||||
int is_pal;
|
||||
|
||||
struct ccx_s_write *tlt_out[MAX_TLT_PAGES_EXTRACT]; // Output files per teletext page
|
||||
uint16_t tlt_out_pages[MAX_TLT_PAGES_EXTRACT]; // Page numbers for each output slot
|
||||
unsigned int tlt_srt_counter[MAX_TLT_PAGES_EXTRACT]; // SRT counter per page
|
||||
int tlt_out_count; // Number of teletext output files
|
||||
};
|
||||
|
||||
#define INITIAL_ENC_BUFFER_CAPACITY 2048
|
||||
@@ -244,6 +263,9 @@ int write_cc_bitmap_as_libcurl(struct cc_subtitle *sub, struct encoder_ctx *cont
|
||||
void write_spumux_header(struct encoder_ctx *ctx, struct ccx_s_write *out);
|
||||
void write_spumux_footer(struct ccx_s_write *out);
|
||||
|
||||
// WebVTT header writer (issue #1743 - ensures header is written even for empty files)
|
||||
void write_webvtt_header(struct encoder_ctx *context);
|
||||
|
||||
struct cc_subtitle *reformat_cc_bitmap_through_sentence_buffer(struct cc_subtitle *sub, struct encoder_ctx *context);
|
||||
|
||||
void set_encoder_last_displayed_subs_ms(struct encoder_ctx *ctx, LLONG last_displayed_subs_ms);
|
||||
@@ -263,4 +285,9 @@ unsigned int get_font_encoded(struct encoder_ctx *ctx, unsigned char *buffer, in
|
||||
|
||||
struct lib_ccx_ctx;
|
||||
void switch_output_file(struct lib_ccx_ctx *ctx, struct encoder_ctx *enc_ctx, int track_id);
|
||||
|
||||
// Teletext multi-page output (issue #665)
|
||||
struct ccx_s_write *get_teletext_output(struct encoder_ctx *ctx, uint16_t teletext_page);
|
||||
unsigned int *get_teletext_srt_counter(struct encoder_ctx *ctx, uint16_t teletext_page);
|
||||
void dinit_teletext_outputs(struct encoder_ctx *ctx);
|
||||
#endif
|
||||
|
||||
@@ -10,6 +10,171 @@ unsigned char odd_parity(const unsigned char byte)
|
||||
return byte | !(cc608_parity(byte) % 2) << 7;
|
||||
}
|
||||
|
||||
/**
|
||||
* SCC Accurate Timing Implementation (Issue #1120)
|
||||
*
|
||||
* EIA-608 bandwidth constraints:
|
||||
* - 2 bytes per frame at 29.97 FPS (or configured frame rate)
|
||||
* - Captions must be pre-loaded before display time
|
||||
* - Each control code takes 2 bytes (sent twice for reliability = 4 bytes total)
|
||||
* - Text characters take 1 byte each
|
||||
*/
|
||||
|
||||
// Get frame rate value from scc_framerate setting
|
||||
// 0=29.97 (default), 1=24, 2=25, 3=30
|
||||
static float get_scc_fps_internal(int scc_framerate)
|
||||
{
|
||||
switch (scc_framerate)
|
||||
{
|
||||
case 1:
|
||||
return 24.0f;
|
||||
case 2:
|
||||
return 25.0f;
|
||||
case 3:
|
||||
return 30.0f;
|
||||
default:
|
||||
return 29.97f;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate total bytes needed to transmit a caption
|
||||
*
|
||||
* Byte costs:
|
||||
* - Control code (RCL, EOC, ENM, EDM): 2 bytes x 2 (sent twice) = 4 bytes
|
||||
* - Preamble code: 2 bytes x 2 = 4 bytes
|
||||
* - Tab offset: 2 bytes x 2 = 4 bytes
|
||||
* - Mid-row code (color/style): 2 bytes x 2 = 4 bytes
|
||||
* - Text character: 1 byte each
|
||||
* - Padding: 1 byte if odd number of text bytes
|
||||
*/
|
||||
static unsigned int calculate_caption_bytes(const struct eia608_screen *data)
|
||||
{
|
||||
unsigned int total_bytes = 0;
|
||||
|
||||
// RCL (Resume Caption Loading): 4 bytes
|
||||
total_bytes += 4;
|
||||
|
||||
for (unsigned char row = 0; row < 15; ++row)
|
||||
{
|
||||
if (!data->row_used[row])
|
||||
continue;
|
||||
|
||||
int first, last;
|
||||
find_limit_characters(data->characters[row], &first, &last, CCX_DECODER_608_SCREEN_WIDTH);
|
||||
|
||||
if (first > last)
|
||||
continue;
|
||||
|
||||
// Assume we need at least one preamble per row: 4 bytes
|
||||
total_bytes += 4;
|
||||
|
||||
// Count characters on this row
|
||||
unsigned int char_count = 0;
|
||||
enum font_bits prev_font = FONT_REGULAR;
|
||||
enum ccx_decoder_608_color_code prev_color = COL_WHITE;
|
||||
int prev_col = -1;
|
||||
|
||||
for (int col = first; col <= last; ++col)
|
||||
{
|
||||
// Check if we need position codes
|
||||
if (prev_col != col - 1 && prev_col != -1)
|
||||
{
|
||||
// Need preamble + possible tab offset: 4-8 bytes
|
||||
total_bytes += 4;
|
||||
if (col % 4 != 0)
|
||||
total_bytes += 4; // Tab offset
|
||||
}
|
||||
|
||||
// Check if we need mid-row style codes
|
||||
if (data->fonts[row][col] != prev_font || data->colors[row][col] != prev_color)
|
||||
{
|
||||
total_bytes += 4; // Mid-row code
|
||||
prev_font = data->fonts[row][col];
|
||||
prev_color = data->colors[row][col];
|
||||
}
|
||||
|
||||
// Text character
|
||||
char_count++;
|
||||
prev_col = col;
|
||||
}
|
||||
|
||||
// Add text bytes (1 per character, rounded up to even)
|
||||
total_bytes += char_count;
|
||||
if (char_count % 2 == 1)
|
||||
total_bytes++; // Padding
|
||||
}
|
||||
|
||||
// EOC (End of Caption): 4 bytes
|
||||
total_bytes += 4;
|
||||
|
||||
// ENM (Erase Non-displayed Memory): 4 bytes
|
||||
total_bytes += 4;
|
||||
|
||||
return total_bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the pre-roll start time for a caption
|
||||
*
|
||||
* @param display_time When the caption should appear on screen (ms)
|
||||
* @param total_bytes Total bytes to transmit
|
||||
* @param fps Frame rate
|
||||
* @return Time to begin loading the caption (ms)
|
||||
*/
|
||||
static LLONG calculate_preroll_time(LLONG display_time, unsigned int total_bytes, float fps)
|
||||
{
|
||||
// Calculate transmission time in milliseconds
|
||||
// 2 bytes per frame, so frames_needed = (total_bytes + 1) / 2
|
||||
float ms_per_frame = 1000.0f / fps;
|
||||
unsigned int frames_needed = (total_bytes + 1) / 2;
|
||||
LLONG transmission_time_ms = (LLONG)(frames_needed * ms_per_frame);
|
||||
|
||||
// Add 1 frame for EOC to be sent before display
|
||||
LLONG one_frame_ms = (LLONG)ms_per_frame;
|
||||
|
||||
LLONG preroll_start = display_time - transmission_time_ms - one_frame_ms;
|
||||
|
||||
// Don't go negative
|
||||
if (preroll_start < 0)
|
||||
preroll_start = 0;
|
||||
|
||||
return preroll_start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for collision with previous caption transmission and resolve it
|
||||
*
|
||||
* @param context Encoder context with timing state
|
||||
* @param preroll_start Proposed pre-roll start time (will be modified if collision)
|
||||
* @param display_time Caption display time (may be adjusted)
|
||||
* @param fps Frame rate
|
||||
* @return true if timing was adjusted due to collision
|
||||
*/
|
||||
static bool resolve_collision(struct encoder_ctx *context, LLONG *preroll_start,
|
||||
LLONG *display_time, float fps)
|
||||
{
|
||||
// Check if our preroll would start before previous caption finishes transmitting
|
||||
// This prevents bandwidth collision but allows visual overlap (like scc_tools)
|
||||
// Visual overlap is fine - the EOC command swaps buffers atomically
|
||||
if (context->scc_last_transmission_end > 0 &&
|
||||
*preroll_start < context->scc_last_transmission_end)
|
||||
{
|
||||
// Bandwidth collision detected - shift our caption forward
|
||||
// Add 1 frame buffer to ensure no overlap
|
||||
LLONG one_frame_ms = (LLONG)(1000.0f / fps);
|
||||
LLONG new_preroll = context->scc_last_transmission_end + one_frame_ms;
|
||||
LLONG shift = new_preroll - *preroll_start;
|
||||
|
||||
*preroll_start = new_preroll;
|
||||
*display_time += shift;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct control_code_info
|
||||
{
|
||||
unsigned int byte1_odd;
|
||||
@@ -484,14 +649,156 @@ void write_control_code(const int fd, const unsigned char channel, const enum co
|
||||
* @param row 0-14 (inclusive)
|
||||
* @param column 0-31 (inclusive)
|
||||
*
|
||||
* //TODO: Preamble code need to take into account font as well
|
||||
*
|
||||
* Returns an indent-based preamble code (positions cursor at column with white color)
|
||||
*/
|
||||
enum control_code get_preamble_code(const unsigned char row, const unsigned char column)
|
||||
{
|
||||
return PREAMBLE_CC_START + 1 + (row * 8) + (column / 4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get byte2 value for a styled PAC (color/font at column 0)
|
||||
* Returns 0x40-0x4F or 0x60-0x6F depending on the style
|
||||
*
|
||||
* @param color The color to use
|
||||
* @param font The font style to use
|
||||
* @param use_high_range If true, use 0x60-0x6F range instead of 0x40-0x4F
|
||||
*
|
||||
* PAC style encoding (byte2):
|
||||
* 0x40/0x60: white, regular 0x41/0x61: white, underline
|
||||
* 0x42/0x62: green, regular 0x43/0x63: green, underline
|
||||
* 0x44/0x64: blue, regular 0x45/0x65: blue, underline
|
||||
* 0x46/0x66: cyan, regular 0x47/0x67: cyan, underline
|
||||
* 0x48/0x68: red, regular 0x49/0x69: red, underline
|
||||
* 0x4a/0x6a: yellow, regular 0x4b/0x6b: yellow, underline
|
||||
* 0x4c/0x6c: magenta, regular 0x4d/0x6d: magenta, underline
|
||||
* 0x4e/0x6e: white, italics 0x4f/0x6f: white, italic underline
|
||||
*/
|
||||
static unsigned char get_styled_pac_byte2(enum ccx_decoder_608_color_code color, enum font_bits font, bool use_high_range)
|
||||
{
|
||||
unsigned char base = use_high_range ? 0x60 : 0x40;
|
||||
unsigned char style_offset;
|
||||
|
||||
// Handle italics specially - they're always white
|
||||
if (font == FONT_ITALICS)
|
||||
return base + 0x0e;
|
||||
if (font == FONT_UNDERLINED_ITALICS)
|
||||
return base + 0x0f;
|
||||
|
||||
// Map color to base offset (0, 2, 4, 6, 8, 10, 12)
|
||||
switch (color)
|
||||
{
|
||||
case COL_WHITE:
|
||||
style_offset = 0x00;
|
||||
break;
|
||||
case COL_GREEN:
|
||||
style_offset = 0x02;
|
||||
break;
|
||||
case COL_BLUE:
|
||||
style_offset = 0x04;
|
||||
break;
|
||||
case COL_CYAN:
|
||||
style_offset = 0x06;
|
||||
break;
|
||||
case COL_RED:
|
||||
style_offset = 0x08;
|
||||
break;
|
||||
case COL_YELLOW:
|
||||
style_offset = 0x0a;
|
||||
break;
|
||||
case COL_MAGENTA:
|
||||
style_offset = 0x0c;
|
||||
break;
|
||||
default:
|
||||
// For unsupported colors (black, transparent, userdefined), fall back to white
|
||||
style_offset = 0x00;
|
||||
break;
|
||||
}
|
||||
|
||||
// Add 1 for underlined
|
||||
if (font == FONT_UNDERLINED)
|
||||
style_offset += 1;
|
||||
|
||||
return base + style_offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the row uses high range (0x60-0x6F) or low range (0x40-0x4F) for styled PACs
|
||||
* Rows that have byte2 in 0x70-0x7F range for indents use 0x60-0x6F for styles
|
||||
*/
|
||||
static bool row_uses_high_range(unsigned char row)
|
||||
{
|
||||
// Based on the preamble code table:
|
||||
// Rows 2, 4, 6, 8, 10, 13, 15 use the "high" range (byte2 0x70-0x7F for indents)
|
||||
// which corresponds to 0x60-0x6F for styled PACs
|
||||
return (row == 1 || row == 3 || row == 5 || row == 7 || row == 9 || row == 12 || row == 14);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a styled PAC code (color/font at column 0) directly
|
||||
* This is more efficient than using indent PAC + mid-row code when at column 0
|
||||
*
|
||||
* @param fd File descriptor
|
||||
* @param channel Caption channel (1-4)
|
||||
* @param row Row number (0-14)
|
||||
* @param color Color to set
|
||||
* @param font Font style to set
|
||||
* @param disassemble If true, output assembly format
|
||||
* @param bytes_written Pointer to byte counter
|
||||
*/
|
||||
static void write_styled_preamble(const int fd, const unsigned char channel, const unsigned char row,
|
||||
enum ccx_decoder_608_color_code color, enum font_bits font,
|
||||
const bool disassemble, unsigned int *bytes_written)
|
||||
{
|
||||
// Get the preamble code for column 0 to obtain byte1
|
||||
enum control_code base_preamble = get_preamble_code(row, 0);
|
||||
unsigned char byte1 = odd_parity(get_first_byte(channel, base_preamble));
|
||||
|
||||
// Get styled byte2
|
||||
bool use_high_range = row_uses_high_range(row);
|
||||
unsigned char byte2 = odd_parity(get_styled_pac_byte2(color, font, use_high_range));
|
||||
|
||||
check_padding(fd, disassemble, bytes_written);
|
||||
|
||||
if (disassemble)
|
||||
{
|
||||
// Output assembly format like {0100Gr} for row 1, green
|
||||
const char *color_names[] = {"Wh", "Gr", "Bl", "Cy", "R", "Y", "Ma", "Wh", "Bk", "Wh"};
|
||||
const char *font_suffix = "";
|
||||
if (font == FONT_UNDERLINED)
|
||||
font_suffix = "U";
|
||||
else if (font == FONT_ITALICS)
|
||||
font_suffix = "I";
|
||||
else if (font == FONT_UNDERLINED_ITALICS)
|
||||
font_suffix = "IU";
|
||||
|
||||
fdprintf(fd, "{%02d00%s%s}", row + 1, color_names[color], font_suffix);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (*bytes_written % 2 == 0)
|
||||
write_wrapped(fd, " ", 1);
|
||||
fdprintf(fd, "%02x%02x", byte1, byte2);
|
||||
}
|
||||
*bytes_written += 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a styled PAC can be used (when color/font differs from white/regular and column is 0)
|
||||
*/
|
||||
static bool can_use_styled_pac(enum ccx_decoder_608_color_code color, enum font_bits font, unsigned char column)
|
||||
{
|
||||
// Styled PACs can only be used at column 0
|
||||
if (column != 0)
|
||||
return false;
|
||||
|
||||
// If style is already white/regular, no need for styled PAC
|
||||
if (color == COL_WHITE && font == FONT_REGULAR)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
enum control_code get_tab_offset_code(const unsigned char column)
|
||||
{
|
||||
int offset = column % 4;
|
||||
@@ -519,6 +826,23 @@ enum control_code get_font_code(enum font_bits font, enum ccx_decoder_608_color_
|
||||
}
|
||||
}
|
||||
|
||||
// Get frame rate value from scc_framerate setting
|
||||
// 0=29.97 (default), 1=24, 2=25, 3=30
|
||||
static float get_scc_fps(int scc_framerate)
|
||||
{
|
||||
switch (scc_framerate)
|
||||
{
|
||||
case 1:
|
||||
return 24.0f;
|
||||
case 2:
|
||||
return 25.0f;
|
||||
case 3:
|
||||
return 30.0f;
|
||||
default:
|
||||
return 29.97f;
|
||||
}
|
||||
}
|
||||
|
||||
void add_timestamp(const struct encoder_ctx *context, LLONG time, const bool disassemble)
|
||||
{
|
||||
write_wrapped(context->out->fh, context->encoded_crlf, context->encoded_crlf_length);
|
||||
@@ -528,9 +852,15 @@ void add_timestamp(const struct encoder_ctx *context, LLONG time, const bool dis
|
||||
unsigned hour, minute, second, milli;
|
||||
millis_to_time(time, &hour, &minute, &second, &milli);
|
||||
|
||||
// SMPTE format
|
||||
float frame = milli * 29.97 / 1000;
|
||||
fdprintf(context->out->fh, "%02u:%02u:%02u:%02.f\t", hour, minute, second, frame);
|
||||
// SMPTE format - use configurable frame rate (issue #1191)
|
||||
float fps = get_scc_fps(context->scc_framerate);
|
||||
// Calculate frame number from milliseconds, ensuring it stays in valid range 0 to fps-1
|
||||
// Use floor to avoid rounding up to fps (e.g., 29.97 -> 30 is invalid)
|
||||
int max_frames = (int)fps;
|
||||
int frame = (int)(milli * fps / 1000.0f);
|
||||
if (frame >= max_frames)
|
||||
frame = max_frames - 1; // Cap at max valid frame (e.g., 29 for 29.97fps)
|
||||
fdprintf(context->out->fh, "%02u:%02u:%02u:%02d\t", hour, minute, second, frame);
|
||||
}
|
||||
|
||||
void clear_screen(const struct encoder_ctx *context, LLONG end_time, const unsigned char channel, const bool disassemble)
|
||||
@@ -550,8 +880,51 @@ int write_cc_buffer_as_scenarist(const struct eia608_screen *data, struct encode
|
||||
unsigned char current_row = UINT8_MAX;
|
||||
unsigned char current_column = UINT8_MAX;
|
||||
|
||||
// 1. Load the caption
|
||||
add_timestamp(context, data->start_time, disassemble);
|
||||
// Timing variables for accurate timing mode (issue #1120)
|
||||
LLONG actual_start_time = data->start_time; // When caption should display
|
||||
LLONG actual_end_time = data->end_time; // When caption should clear
|
||||
LLONG preroll_start = data->start_time; // When to start loading (default: same as display)
|
||||
float fps = get_scc_fps_internal(context->scc_framerate);
|
||||
bool use_separate_display_time = false; // Whether to write EOC at separate timestamp
|
||||
|
||||
// If accurate timing is enabled, calculate pre-roll and handle collisions
|
||||
if (context->scc_accurate_timing)
|
||||
{
|
||||
// Calculate total bytes needed for this caption
|
||||
unsigned int total_bytes = calculate_caption_bytes(data);
|
||||
|
||||
// Calculate when we need to start loading
|
||||
preroll_start = calculate_preroll_time(actual_start_time, total_bytes, fps);
|
||||
|
||||
// Check for collisions with previous caption and resolve
|
||||
if (resolve_collision(context, &preroll_start, &actual_start_time, fps))
|
||||
{
|
||||
// Timing was adjusted due to collision
|
||||
// Also adjust end time by the same amount
|
||||
LLONG shift = actual_start_time - data->start_time;
|
||||
actual_end_time = data->end_time + shift;
|
||||
}
|
||||
|
||||
// Update timing state for next caption
|
||||
float ms_per_frame = 1000.0f / fps;
|
||||
unsigned int frames_needed = (total_bytes + 1) / 2;
|
||||
LLONG transmission_time_ms = (LLONG)(frames_needed * ms_per_frame);
|
||||
context->scc_last_transmission_end = preroll_start + transmission_time_ms;
|
||||
context->scc_last_display_end = actual_end_time;
|
||||
|
||||
// Enable separate display timing (like scc_tools)
|
||||
use_separate_display_time = true;
|
||||
|
||||
// 1. Load the caption at pre-roll time
|
||||
add_timestamp(context, preroll_start, disassemble);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Legacy mode: use original timing
|
||||
// 1. Load the caption
|
||||
add_timestamp(context, data->start_time, disassemble);
|
||||
}
|
||||
|
||||
write_control_code(context->out->fh, data->channel, RCL, disassemble, &bytes_written);
|
||||
for (uint8_t row = 0; row < 15; ++row)
|
||||
{
|
||||
@@ -578,6 +951,23 @@ int write_cc_buffer_as_scenarist(const struct eia608_screen *data, struct encode
|
||||
{
|
||||
if (switch_font || switch_color)
|
||||
{
|
||||
// Optimization (issue #1191): Use styled PAC when at column 0 with non-default style
|
||||
// This avoids needing a separate mid-row code
|
||||
if (column == 0 && can_use_styled_pac(data->colors[row][column], data->fonts[row][column], 0))
|
||||
{
|
||||
write_styled_preamble(context->out->fh, data->channel, row,
|
||||
data->colors[row][column], data->fonts[row][column],
|
||||
disassemble, &bytes_written);
|
||||
current_row = row;
|
||||
current_column = 0;
|
||||
current_font = data->fonts[row][column];
|
||||
current_color = data->colors[row][column];
|
||||
// Write the character and continue
|
||||
write_character(context->out->fh, data->characters[row][column], disassemble, &bytes_written);
|
||||
++current_column;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (data->characters[row][column] == ' ')
|
||||
{
|
||||
// The MID-ROW code is going to move the cursor to the
|
||||
@@ -617,12 +1007,26 @@ int write_cc_buffer_as_scenarist(const struct eia608_screen *data, struct encode
|
||||
check_padding(context->out->fh, disassemble, &bytes_written);
|
||||
}
|
||||
|
||||
// 2. Show the caption
|
||||
// 2. Show the caption (EOC = End of Caption, makes it visible)
|
||||
if (use_separate_display_time)
|
||||
{
|
||||
// For accurate timing: write display command at actual display time
|
||||
// This matches scc_tools behavior where load and display are separate
|
||||
add_timestamp(context, actual_start_time, disassemble);
|
||||
}
|
||||
write_control_code(context->out->fh, data->channel, EOC, disassemble, &bytes_written);
|
||||
write_control_code(context->out->fh, data->channel, ENM, disassemble, &bytes_written);
|
||||
|
||||
// 3. Clear the caption
|
||||
clear_screen(context, data->end_time, data->channel, disassemble);
|
||||
// 3. Clear the caption at the end time
|
||||
// In accurate timing mode, skip clear - the next caption's EOC will handle the transition
|
||||
// This matches scc_tools behavior which doesn't write EDM between consecutive captions
|
||||
if (!use_separate_display_time)
|
||||
{
|
||||
// Legacy mode: always write clear
|
||||
clear_screen(context, actual_end_time, data->channel, disassemble);
|
||||
}
|
||||
// In accurate timing mode, scc_last_display_end is still tracked for reference
|
||||
// but we don't write the clear command to avoid out-of-order timestamps
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -251,6 +251,9 @@ void set_spupng_offset(void *ctx, int x, int y)
|
||||
sp->xOffset = x;
|
||||
sp->yOffset = y;
|
||||
}
|
||||
|
||||
// Forward declaration for calculate_spupng_offsets
|
||||
static void calculate_spupng_offsets(struct spupng_t *sp, struct encoder_ctx *ctx);
|
||||
int save_spupng(const char *filename, uint8_t *bitmap, int w, int h,
|
||||
png_color *palette, png_byte *alpha, int nb_color)
|
||||
{
|
||||
@@ -384,7 +387,7 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
|
||||
struct cc_bitmap *rect;
|
||||
png_color *palette = NULL;
|
||||
png_byte *alpha = NULL;
|
||||
int wrote_opentag = 1;
|
||||
int wrote_opentag = 0; // Track if we actually wrote the tag
|
||||
|
||||
x_pos = -1;
|
||||
y_pos = -1;
|
||||
@@ -395,13 +398,11 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
|
||||
return 0;
|
||||
|
||||
inc_spupng_fileindex(sp);
|
||||
write_sputag_open(sp, sub->start_time, sub->end_time - 1);
|
||||
|
||||
if (sub->nb_data == 0 && (sub->flags & SUB_EOD_MARKER))
|
||||
{
|
||||
context->prev_start = -1;
|
||||
if (wrote_opentag)
|
||||
write_sputag_close(sp);
|
||||
// No subtitle data, skip writing
|
||||
return 0;
|
||||
}
|
||||
rect = sub->data;
|
||||
@@ -440,7 +441,13 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
|
||||
}
|
||||
}
|
||||
filename = get_spupng_filename(sp);
|
||||
set_spupng_offset(sp, x_pos, y_pos);
|
||||
|
||||
// Set image dimensions for offset calculation
|
||||
sp->img_w = width;
|
||||
sp->img_h = height;
|
||||
|
||||
// Calculate centered offsets based on screen size (PAL/NTSC)
|
||||
calculate_spupng_offsets(sp, context);
|
||||
if (sub->flags & SUB_EOD_MARKER)
|
||||
context->prev_start = sub->start_time;
|
||||
pbuf = (uint8_t *)malloc(width * height);
|
||||
@@ -475,6 +482,15 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
|
||||
|
||||
/* TODO do rectangle wise, one color table should not be used for all rectangles */
|
||||
mapclut_paletee(palette, alpha, (uint32_t *)rect[0].data1, rect[0].nb_colors);
|
||||
|
||||
// Save PNG file first
|
||||
save_spupng(filename, pbuf, width, height, palette, alpha, rect[0].nb_colors);
|
||||
freep(&pbuf);
|
||||
|
||||
// Write XML tag with calculated centered offsets
|
||||
write_sputag_open(sp, sub->start_time, sub->end_time - 1);
|
||||
wrote_opentag = 1; // Mark that we wrote the tag
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
if (!context->nospupngocr)
|
||||
{
|
||||
@@ -487,8 +503,6 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
|
||||
}
|
||||
}
|
||||
#endif
|
||||
save_spupng(filename, pbuf, width, height, palette, alpha, rect[0].nb_colors);
|
||||
freep(&pbuf);
|
||||
|
||||
end:
|
||||
if (wrote_opentag)
|
||||
@@ -991,6 +1005,8 @@ int spupng_export_string2png(struct spupng_t *sp, char *str, FILE *output)
|
||||
*/
|
||||
|
||||
// Save image
|
||||
sp->img_w = canvas_width;
|
||||
sp->img_h = canvas_height;
|
||||
write_image(buffer, output, canvas_width, canvas_height);
|
||||
free(tmp);
|
||||
free(buffer);
|
||||
@@ -1081,6 +1097,28 @@ int eia608_to_str(struct encoder_ctx *context, struct eia608_screen *data, char
|
||||
|
||||
// string needs to be in UTF-8 encoding.
|
||||
// This function will take care of encoding.
|
||||
static void calculate_spupng_offsets(struct spupng_t *sp, struct encoder_ctx *ctx)
|
||||
{
|
||||
int screen_w = 720;
|
||||
int screen_h;
|
||||
|
||||
/* Teletext is always PAL */
|
||||
if (ctx->in_fileformat == 2 || ctx->is_pal)
|
||||
{
|
||||
screen_h = 576;
|
||||
}
|
||||
else
|
||||
{
|
||||
screen_h = 480;
|
||||
}
|
||||
|
||||
sp->xOffset = (screen_w - sp->img_w) / 2;
|
||||
sp->yOffset = (screen_h - sp->img_h) / 2;
|
||||
|
||||
// SPU / DVD requires even yOffset (interlacing)
|
||||
if (sp->yOffset & 1)
|
||||
sp->yOffset++;
|
||||
}
|
||||
int spupng_write_string(struct spupng_t *sp, char *string, LLONG start_time, LLONG end_time,
|
||||
struct encoder_ctx *context)
|
||||
{
|
||||
@@ -1099,6 +1137,7 @@ int spupng_write_string(struct spupng_t *sp, char *string, LLONG start_time, LLO
|
||||
}
|
||||
// free(string_utf32);
|
||||
fclose(sp->fppng);
|
||||
calculate_spupng_offsets(sp, context);
|
||||
write_sputag_open(sp, start_time, end_time);
|
||||
write_spucomment(sp, string);
|
||||
write_sputag_close(sp);
|
||||
|
||||
@@ -6,9 +6,10 @@
|
||||
#include "ocr.h"
|
||||
#include "ccextractor.h"
|
||||
|
||||
/* The timing here is not PTS based, but output based, i.e. user delay must be accounted for
|
||||
if there is any */
|
||||
int write_stringz_as_srt(char *string, struct encoder_ctx *context, LLONG ms_start, LLONG ms_end)
|
||||
/* Helper function to write SRT to a specific output file (issue #665 - teletext multi-page)
|
||||
Takes output file descriptor and counter pointer as parameters */
|
||||
static int write_stringz_as_srt_to_output(char *string, struct encoder_ctx *context, LLONG ms_start, LLONG ms_end,
|
||||
int out_fh, unsigned int *srt_counter)
|
||||
{
|
||||
int used;
|
||||
unsigned h1, m1, s1, ms1;
|
||||
@@ -20,17 +21,17 @@ int write_stringz_as_srt(char *string, struct encoder_ctx *context, LLONG ms_sta
|
||||
|
||||
millis_to_time(ms_start, &h1, &m1, &s1, &ms1);
|
||||
millis_to_time(ms_end - 1, &h2, &m2, &s2, &ms2); // -1 To prevent overlapping with next line.
|
||||
context->srt_counter++;
|
||||
snprintf(timeline, sizeof(timeline), "%u%s", context->srt_counter, context->encoded_crlf);
|
||||
(*srt_counter)++;
|
||||
snprintf(timeline, sizeof(timeline), "%u%s", *srt_counter, context->encoded_crlf);
|
||||
used = encode_line(context, context->buffer, (unsigned char *)timeline);
|
||||
write_wrapped(context->out->fh, context->buffer, used);
|
||||
write_wrapped(out_fh, context->buffer, used);
|
||||
snprintf(timeline, sizeof(timeline), "%02u:%02u:%02u,%03u --> %02u:%02u:%02u,%03u%s",
|
||||
h1, m1, s1, ms1, h2, m2, s2, ms2, context->encoded_crlf);
|
||||
used = encode_line(context, context->buffer, (unsigned char *)timeline);
|
||||
dbg_print(CCX_DMT_DECODER_608, "\n- - - SRT caption - - -\n");
|
||||
dbg_print(CCX_DMT_DECODER_608, "%s", timeline);
|
||||
|
||||
write_wrapped(context->out->fh, context->buffer, used);
|
||||
write_wrapped(out_fh, context->buffer, used);
|
||||
int len = strlen(string);
|
||||
unsigned char *unescaped = (unsigned char *)malloc(len + 1);
|
||||
if (!unescaped)
|
||||
@@ -69,20 +70,28 @@ int write_stringz_as_srt(char *string, struct encoder_ctx *context, LLONG ms_sta
|
||||
dbg_print(CCX_DMT_DECODER_608, "\r");
|
||||
dbg_print(CCX_DMT_DECODER_608, "%s\n", context->subline);
|
||||
}
|
||||
write_wrapped(context->out->fh, el, u);
|
||||
write_wrapped(context->out->fh, context->encoded_crlf, context->encoded_crlf_length);
|
||||
write_wrapped(out_fh, el, u);
|
||||
write_wrapped(out_fh, context->encoded_crlf, context->encoded_crlf_length);
|
||||
begin += strlen((const char *)begin) + 1;
|
||||
}
|
||||
|
||||
dbg_print(CCX_DMT_DECODER_608, "- - - - - - - - - - - -\r\n");
|
||||
|
||||
write_wrapped(context->out->fh, context->encoded_crlf, context->encoded_crlf_length);
|
||||
write_wrapped(out_fh, context->encoded_crlf, context->encoded_crlf_length);
|
||||
free(el);
|
||||
free(unescaped);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The timing here is not PTS based, but output based, i.e. user delay must be accounted for
|
||||
if there is any */
|
||||
int write_stringz_as_srt(char *string, struct encoder_ctx *context, LLONG ms_start, LLONG ms_end)
|
||||
{
|
||||
return write_stringz_as_srt_to_output(string, context, ms_start, ms_end,
|
||||
context->out->fh, &context->srt_counter);
|
||||
}
|
||||
|
||||
int write_cc_bitmap_as_srt(struct cc_subtitle *sub, struct encoder_ctx *context)
|
||||
{
|
||||
int ret = 0;
|
||||
@@ -155,7 +164,18 @@ int write_cc_subtitle_as_srt(struct cc_subtitle *sub, struct encoder_ctx *contex
|
||||
{
|
||||
if (sub->type == CC_TEXT)
|
||||
{
|
||||
ret = write_stringz_as_srt(sub->data, context, sub->start_time, sub->end_time);
|
||||
// For teletext multi-page extraction (issue #665), use page-specific output
|
||||
struct ccx_s_write *out = get_teletext_output(context, sub->teletext_page);
|
||||
unsigned int *counter = get_teletext_srt_counter(context, sub->teletext_page);
|
||||
if (out && counter)
|
||||
{
|
||||
ret = write_stringz_as_srt_to_output(sub->data, context, sub->start_time, sub->end_time,
|
||||
out->fh, counter);
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = write_stringz_as_srt(sub->data, context, sub->start_time, sub->end_time);
|
||||
}
|
||||
freep(&sub->data);
|
||||
sub->nb_data = 0;
|
||||
ret = 1;
|
||||
|
||||
@@ -39,6 +39,8 @@ struct spupng_t
|
||||
int fileIndex;
|
||||
int xOffset;
|
||||
int yOffset;
|
||||
int img_w;
|
||||
int img_h;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -220,8 +220,9 @@ void write_webvtt_header(struct encoder_ctx *context)
|
||||
millis_to_time(context->timing->sync_pts2fts_fts, &h1, &m1, &s1, &ms1);
|
||||
|
||||
// If the user has enabled X-TIMESTAMP-MAP
|
||||
snprintf(header_string, sizeof(header_string), "X-TIMESTAMP-MAP=MPEGTS:%ld,LOCAL:%02u:%02u:%02u.%03u%s",
|
||||
context->timing->sync_pts2fts_pts, h1, m1, s1, ms1,
|
||||
// LOCAL must come before MPEGTS for HLS compatibility (issue #1743)
|
||||
snprintf(header_string, sizeof(header_string), "X-TIMESTAMP-MAP=LOCAL:%02u:%02u:%02u.%03u,MPEGTS:%ld%s",
|
||||
h1, m1, s1, ms1, context->timing->sync_pts2fts_pts,
|
||||
ccx_options.enc_cfg.line_terminator_lf ? "\n\n" : "\r\n\r\n");
|
||||
|
||||
used = encode_line(context, context->buffer, (unsigned char *)header_string);
|
||||
|
||||
@@ -182,6 +182,7 @@ typedef struct DVBSubContext
|
||||
LLONG time_out;
|
||||
#ifdef ENABLE_OCR
|
||||
void *ocr_ctx;
|
||||
int ocr_initialized; // Flag to track if OCR has been lazily initialized
|
||||
#endif
|
||||
DVBSubRegion *region_list;
|
||||
DVBSubCLUT *clut_list;
|
||||
@@ -418,7 +419,7 @@ static void delete_regions(DVBSubContext *ctx)
|
||||
* @return DVB context kept as void* for abstraction
|
||||
*
|
||||
*/
|
||||
void *dvbsub_init_decoder(struct dvb_config *cfg, int initialized_ocr)
|
||||
void *dvbsub_init_decoder(struct dvb_config *cfg)
|
||||
{
|
||||
int i, r, g, b, a = 0;
|
||||
DVBSubContext *ctx = (DVBSubContext *)malloc(sizeof(DVBSubContext));
|
||||
@@ -442,8 +443,11 @@ void *dvbsub_init_decoder(struct dvb_config *cfg, int initialized_ocr)
|
||||
}
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
if (!initialized_ocr)
|
||||
ctx->ocr_ctx = init_ocr(ctx->lang_index);
|
||||
// Lazy OCR initialization: don't init here, wait until a bitmap actually needs OCR
|
||||
// This avoids ~10 second Tesseract startup overhead for files that have DVB streams
|
||||
// but don't actually produce any bitmap subtitles (e.g., files with CEA-608 captions)
|
||||
ctx->ocr_ctx = NULL;
|
||||
ctx->ocr_initialized = 0;
|
||||
#endif
|
||||
ctx->version = -1;
|
||||
|
||||
@@ -1117,7 +1121,7 @@ static int dvbsub_parse_object_segment(void *dvb_ctx, const uint8_t *buf,
|
||||
}
|
||||
else if (coding_method == 1)
|
||||
{
|
||||
mprint("FIXME support for sring coding standard\n");
|
||||
mprint("FIXME support for string coding standard\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1702,7 +1706,13 @@ static int write_dvb_sub(struct lib_cc_decode *dec_ctx, struct cc_subtitle *sub)
|
||||
// Perform OCR
|
||||
#ifdef ENABLE_OCR
|
||||
char *ocr_str = NULL;
|
||||
if (ctx->ocr_ctx)
|
||||
// Lazy OCR initialization: only init when we actually have a bitmap to process
|
||||
if (!ctx->ocr_initialized)
|
||||
{
|
||||
ctx->ocr_ctx = init_ocr(ctx->lang_index);
|
||||
ctx->ocr_initialized = 1; // Mark as initialized even if init_ocr returns NULL
|
||||
}
|
||||
if (ctx->ocr_ctx && region)
|
||||
{
|
||||
int ret = ocr_rect(ctx->ocr_ctx, rect, &ocr_str, region->bgcolor, dec_ctx->ocr_quantmode);
|
||||
if (ret >= 0)
|
||||
|
||||
@@ -42,7 +42,7 @@ extern "C"
|
||||
* @return DVB context kept as void* for abstraction
|
||||
*
|
||||
*/
|
||||
void *dvbsub_init_decoder(struct dvb_config *cfg, int initialized_ocr);
|
||||
void *dvbsub_init_decoder(struct dvb_config *cfg);
|
||||
|
||||
int dvbsub_close_decoder(void **dvb_ctx);
|
||||
|
||||
|
||||
@@ -142,7 +142,7 @@ int user_data(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, struct
|
||||
{
|
||||
if ((ud_header[1] & 0x7F) == 0x01)
|
||||
{
|
||||
unsigned char cc_data[3 * 31 + 1]; // Maximum cc_count is 31
|
||||
unsigned char cc_data[3 * 32]; // Increased for safety margin, 31 is max count
|
||||
|
||||
dec_ctx->stat_scte20ccheaders++;
|
||||
read_bytes(ustream, 2); // "03 01"
|
||||
@@ -370,6 +370,7 @@ int user_data(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, struct
|
||||
dbg_print(CCX_DMT_PARSE, "%s", debug_608_to_ASC(dishdata, 0));
|
||||
dbg_print(CCX_DMT_PARSE, "%s:\n", debug_608_to_ASC(dishdata + 3, 0));
|
||||
|
||||
dishdata[cc_count * 3] = 0xFF; // Ensure termination for store_hdcc
|
||||
store_hdcc(enc_ctx, dec_ctx, dishdata, cc_count, dec_ctx->timing->current_tref, dec_ctx->timing->fts_now, sub);
|
||||
|
||||
// Ignore 4 (0x020A, followed by two unknown) bytes.
|
||||
@@ -484,7 +485,10 @@ int user_data(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, struct
|
||||
mprint("MPEG:VBI: only support Luma line\n");
|
||||
|
||||
if (udatalen < 720)
|
||||
mprint("MPEG:VBI: Minimum 720 bytes in luma line required\n");
|
||||
{
|
||||
mprint("MPEG:VBI: Minimum 720 bytes in luma line required, skipping truncated packet.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
decode_vbi(dec_ctx, field, ustream->pos, 720, sub);
|
||||
dbg_print(CCX_DMT_VERBOSE, "GXF (vbi line %d) user data:\n", line_nb);
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#include "ccx_common_option.h"
|
||||
#include "activity.h"
|
||||
#include "file_buffer.h"
|
||||
long FILEBUFFERSIZE = 1024 * 1024 * 16; // 16 Mbytes no less. Minimize number of real read calls()
|
||||
int64_t FILEBUFFERSIZE = 1024 * 1024 * 16; // 16 Mbytes no less. Minimize number of real read calls()
|
||||
|
||||
#ifdef _WIN32
|
||||
WSADATA wsaData = {0};
|
||||
@@ -66,6 +66,7 @@ void prepare_for_new_file(struct lib_ccx_ctx *ctx)
|
||||
{
|
||||
// Init per file variables
|
||||
ctx->last_reported_progress = -1;
|
||||
ctx->min_global_timestamp_offset = -1; // -1 means not yet initialized
|
||||
ctx->stat_numuserheaders = 0;
|
||||
ctx->stat_dvdccheaders = 0;
|
||||
ctx->stat_scte20ccheaders = 0;
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "ccx_gxf.h"
|
||||
#include "dvd_subtitle_decoder.h"
|
||||
#include "ccx_demuxer_mxf.h"
|
||||
#include "ccx_dtvcc.h"
|
||||
|
||||
int end_of_file = 0; // End of file?
|
||||
|
||||
@@ -75,7 +76,7 @@ int ps_get_more_data(struct lib_ccx_ctx *ctx, struct demuxer_data **ppdata)
|
||||
if (!ctx->demux_ctx->strangeheader)
|
||||
{
|
||||
mprint("\nNot a recognized header. Searching for next header.\n");
|
||||
dump(CCX_DMT_GENERIC_NOTICES, nextheader, 6, 0, 0);
|
||||
dump(CCX_DMT_PARSE, nextheader, 6, 0, 0);
|
||||
// Only print the message once per loop / unrecognized header
|
||||
ctx->demux_ctx->strangeheader = 1;
|
||||
}
|
||||
@@ -566,6 +567,104 @@ static size_t process_raw_for_mcc(struct encoder_ctx *enc_ctx, struct lib_cc_dec
|
||||
}
|
||||
|
||||
// Raw file process
|
||||
// Parse raw CDP (Caption Distribution Packet) data
|
||||
// Returns number of bytes processed
|
||||
static size_t process_raw_cdp(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx,
|
||||
struct cc_subtitle *sub, unsigned char *buffer, size_t len)
|
||||
{
|
||||
size_t pos = 0;
|
||||
int cdp_count = 0;
|
||||
|
||||
while (pos + 10 < len) // Minimum CDP size
|
||||
{
|
||||
// Check for CDP identifier
|
||||
if (buffer[pos] != 0x96 || buffer[pos + 1] != 0x69)
|
||||
{
|
||||
pos++;
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned char cdp_length = buffer[pos + 2];
|
||||
if (pos + cdp_length > len)
|
||||
break; // Incomplete CDP packet
|
||||
|
||||
unsigned char framerate_byte = buffer[pos + 3];
|
||||
int framerate_code = framerate_byte >> 4;
|
||||
|
||||
// Skip to find cc_data section (0x72)
|
||||
size_t cdp_pos = pos + 4; // After identifier, length, framerate
|
||||
int cc_count = 0;
|
||||
unsigned char *cc_data = NULL;
|
||||
|
||||
// Skip header sequence counter (2 bytes)
|
||||
cdp_pos += 2;
|
||||
|
||||
// Look for cc_data section (0x72) within CDP
|
||||
while (cdp_pos < pos + cdp_length - 4)
|
||||
{
|
||||
if (buffer[cdp_pos] == 0x72) // cc_data section
|
||||
{
|
||||
cc_count = buffer[cdp_pos + 1] & 0x1F;
|
||||
cc_data = buffer + cdp_pos + 2;
|
||||
break;
|
||||
}
|
||||
else if (buffer[cdp_pos] == 0x71) // time code section
|
||||
{
|
||||
cdp_pos += 5; // Skip time code section
|
||||
}
|
||||
else if (buffer[cdp_pos] == 0x73) // service info section
|
||||
{
|
||||
break; // Past cc_data
|
||||
}
|
||||
else if (buffer[cdp_pos] == 0x74) // footer
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
cdp_pos++;
|
||||
}
|
||||
}
|
||||
|
||||
if (cc_count > 0 && cc_data != NULL)
|
||||
{
|
||||
// Calculate PTS based on CDP frame count and frame rate
|
||||
static const int fps_table[] = {0, 24, 24, 25, 30, 30, 50, 60, 60};
|
||||
int fps = (framerate_code < 9) ? fps_table[framerate_code] : 30;
|
||||
LLONG pts = (LLONG)cdp_count * 90000 / fps;
|
||||
|
||||
// Set timing if not already set
|
||||
if (dec_ctx->timing->pts_set == 0)
|
||||
{
|
||||
dec_ctx->timing->min_pts = pts;
|
||||
dec_ctx->timing->pts_set = 2;
|
||||
dec_ctx->timing->sync_pts = pts;
|
||||
}
|
||||
set_current_pts(dec_ctx->timing, pts);
|
||||
set_fts(dec_ctx->timing);
|
||||
|
||||
#ifndef DISABLE_RUST
|
||||
// Enable DTVCC decoder for CEA-708 captions
|
||||
if (dec_ctx->dtvcc_rust)
|
||||
{
|
||||
int is_active = ccxr_dtvcc_is_active(dec_ctx->dtvcc_rust);
|
||||
if (!is_active)
|
||||
{
|
||||
ccxr_dtvcc_set_active(dec_ctx->dtvcc_rust, 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// Process cc_data triplets through process_cc_data for 708 support
|
||||
process_cc_data(enc_ctx, dec_ctx, cc_data, cc_count, sub);
|
||||
cdp_count++;
|
||||
}
|
||||
|
||||
pos += cdp_length;
|
||||
}
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
int raw_loop(struct lib_ccx_ctx *ctx)
|
||||
{
|
||||
LLONG ret;
|
||||
@@ -575,6 +674,8 @@ int raw_loop(struct lib_ccx_ctx *ctx)
|
||||
struct lib_cc_decode *dec_ctx = NULL;
|
||||
int caps = 0;
|
||||
int is_dvdraw = 0; // Flag to track if this is DVD raw format
|
||||
int is_scc = 0; // Flag to track if this is SCC format
|
||||
int is_cdp = 0; // Flag to track if this is raw CDP format
|
||||
int is_mcc_output = 0; // Flag for MCC output format
|
||||
|
||||
dec_ctx = update_decoder_list(ctx);
|
||||
@@ -607,13 +708,28 @@ int raw_loop(struct lib_ccx_ctx *ctx)
|
||||
break;
|
||||
|
||||
// Check if this is DVD raw format using Rust detection
|
||||
if (!is_dvdraw && ccxr_is_dvdraw_header(data->buffer, (unsigned int)data->len))
|
||||
if (!is_dvdraw && !is_scc && ccxr_is_dvdraw_header(data->buffer, (unsigned int)data->len))
|
||||
{
|
||||
is_dvdraw = 1;
|
||||
mprint("Detected McPoodle's DVD raw format\n");
|
||||
}
|
||||
|
||||
if (is_mcc_output && !is_dvdraw)
|
||||
// Check if this is SCC format using Rust detection
|
||||
if (!is_scc && !is_dvdraw && ccxr_is_scc_file(data->buffer, (unsigned int)data->len))
|
||||
{
|
||||
is_scc = 1;
|
||||
mprint("Detected SCC (Scenarist Closed Caption) format\n");
|
||||
}
|
||||
|
||||
// Check if this is raw CDP format (starts with 0x9669)
|
||||
if (!is_cdp && !is_scc && !is_dvdraw && data->len >= 2 &&
|
||||
data->buffer[0] == 0x96 && data->buffer[1] == 0x69)
|
||||
{
|
||||
is_cdp = 1;
|
||||
mprint("Detected raw CDP (Caption Distribution Packet) format\n");
|
||||
}
|
||||
|
||||
if (is_mcc_output && !is_dvdraw && !is_scc && !is_cdp)
|
||||
{
|
||||
// For MCC output, encode raw data directly without decoding
|
||||
// This preserves the original CEA-608 byte pairs in CDP format
|
||||
@@ -626,6 +742,18 @@ int raw_loop(struct lib_ccx_ctx *ctx)
|
||||
// Use Rust implementation - handles timing internally
|
||||
ret = ccxr_process_dvdraw(dec_ctx, dec_sub, data->buffer, (unsigned int)data->len);
|
||||
}
|
||||
else if (is_scc)
|
||||
{
|
||||
// Use Rust SCC implementation - handles timing internally via SMPTE timecodes
|
||||
ret = ccxr_process_scc(dec_ctx, dec_sub, data->buffer, (unsigned int)data->len, ccx_options.scc_framerate);
|
||||
}
|
||||
else if (is_cdp)
|
||||
{
|
||||
// Process raw CDP packets (e.g., from SDI VANC capture)
|
||||
ret = process_raw_cdp(enc_ctx, dec_ctx, dec_sub, data->buffer, data->len);
|
||||
if (ret > 0)
|
||||
caps = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = process_raw(dec_ctx, dec_sub, data->buffer, data->len);
|
||||
@@ -796,10 +924,6 @@ int process_data(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, str
|
||||
got = data_node->len;
|
||||
}
|
||||
}
|
||||
else if (data_node->bufferdatatype == CCX_PRIVATE_MPEG2_CC)
|
||||
{
|
||||
got = data_node->len; // Do nothing. Still don't know how to process it
|
||||
}
|
||||
else if (data_node->bufferdatatype == CCX_RAW) // Raw two byte 608 data from DVR-MS/ASF
|
||||
{
|
||||
// The asf_get_more_data() loop sets current_pts when possible
|
||||
@@ -852,7 +976,34 @@ int process_data(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, str
|
||||
}
|
||||
else if (data_node->bufferdatatype == CCX_RAW_TYPE)
|
||||
{
|
||||
got = process_raw_with_field(dec_ctx, dec_sub, data_node->buffer, data_node->len);
|
||||
// CCX_RAW_TYPE contains cc_data triplets (cc_type + 2 data bytes each)
|
||||
// Used by MXF and GXF demuxers
|
||||
|
||||
// Initialize timing if not set (use caption PTS as reference)
|
||||
if (dec_ctx->timing->pts_set == 0 && data_node->pts != CCX_NOPTS)
|
||||
{
|
||||
dec_ctx->timing->min_pts = data_node->pts;
|
||||
dec_ctx->timing->pts_set = 2; // MinPtsSet
|
||||
dec_ctx->timing->sync_pts = data_node->pts;
|
||||
set_fts(dec_ctx->timing);
|
||||
}
|
||||
|
||||
#ifndef DISABLE_RUST
|
||||
// Enable DTVCC decoder for CEA-708 captions from MXF/GXF
|
||||
if (dec_ctx->dtvcc_rust)
|
||||
{
|
||||
int is_active = ccxr_dtvcc_is_active(dec_ctx->dtvcc_rust);
|
||||
if (!is_active)
|
||||
{
|
||||
ccxr_dtvcc_set_active(dec_ctx->dtvcc_rust, 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Use process_cc_data to properly invoke DTVCC decoder for 708 captions
|
||||
int cc_count = data_node->len / 3;
|
||||
process_cc_data(enc_ctx, dec_ctx, data_node->buffer, cc_count, dec_sub);
|
||||
got = data_node->len;
|
||||
}
|
||||
else if (data_node->bufferdatatype == CCX_ISDB_SUBTITLE)
|
||||
{
|
||||
@@ -1018,8 +1169,15 @@ int process_non_multiprogram_general_loop(struct lib_ccx_ctx *ctx,
|
||||
pts = data_node_video->pts;
|
||||
}
|
||||
|
||||
set_current_pts(dec_ctx_video->timing, pts);
|
||||
set_fts(dec_ctx_video->timing);
|
||||
// When using GOP timing (--goptime), timing is set from GOP headers
|
||||
// in gop_header(), not from PES PTS. Skip PTS-based timing here
|
||||
// to avoid conflicts between GOP time (absolute time-of-day) and
|
||||
// PTS (relative stream time) that cause sync detection failures.
|
||||
if (ccx_options.use_gop_as_pts != 1)
|
||||
{
|
||||
set_current_pts(dec_ctx_video->timing, pts);
|
||||
set_fts(dec_ctx_video->timing);
|
||||
}
|
||||
}
|
||||
size_t got = process_m2v(*enc_ctx, dec_ctx_video, data_node_video->buffer, data_node_video->len, dec_sub_video);
|
||||
if (got > 0)
|
||||
@@ -1034,7 +1192,11 @@ int process_non_multiprogram_general_loop(struct lib_ccx_ctx *ctx,
|
||||
cinfo = get_cinfo(ctx->demux_ctx, pid);
|
||||
*enc_ctx = update_encoder_list_cinfo(ctx, cinfo);
|
||||
*dec_ctx = update_decoder_list_cinfo(ctx, cinfo);
|
||||
#ifndef DISABLE_RUST
|
||||
ccxr_dtvcc_set_encoder((*dec_ctx)->dtvcc_rust, *enc_ctx);
|
||||
#else
|
||||
(*dec_ctx)->dtvcc->encoder = (void *)(*enc_ctx);
|
||||
#endif
|
||||
|
||||
if ((*dec_ctx)->timing->min_pts == 0x01FFFFFFFFLL) // if we didn't set the min_pts of the program
|
||||
{
|
||||
@@ -1258,7 +1420,11 @@ int general_loop(struct lib_ccx_ctx *ctx)
|
||||
|
||||
enc_ctx = update_encoder_list_cinfo(ctx, cinfo);
|
||||
dec_ctx = update_decoder_list_cinfo(ctx, cinfo);
|
||||
#ifndef DISABLE_RUST
|
||||
ccxr_dtvcc_set_encoder(dec_ctx->dtvcc_rust, enc_ctx);
|
||||
#else
|
||||
dec_ctx->dtvcc->encoder = (void *)enc_ctx; // WARN: otherwise cea-708 will not work
|
||||
#endif
|
||||
|
||||
if (dec_ctx->timing->min_pts == 0x01FFFFFFFFLL) // if we didn't set the min_pts of the program
|
||||
{
|
||||
@@ -1342,7 +1508,24 @@ int general_loop(struct lib_ccx_ctx *ctx)
|
||||
}
|
||||
if (ctx->live_stream)
|
||||
{
|
||||
int cur_sec = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
|
||||
LLONG t = get_fts(dec_ctx->timing, dec_ctx->current_field);
|
||||
if (!t && ctx->demux_ctx->global_timestamp_inited)
|
||||
t = ctx->demux_ctx->global_timestamp - ctx->demux_ctx->min_global_timestamp;
|
||||
// Handle multi-program TS timing
|
||||
if (ctx->demux_ctx->global_timestamp_inited)
|
||||
{
|
||||
LLONG offset = ctx->demux_ctx->global_timestamp - ctx->demux_ctx->min_global_timestamp;
|
||||
if (ctx->min_global_timestamp_offset < 0 || offset < ctx->min_global_timestamp_offset)
|
||||
ctx->min_global_timestamp_offset = offset;
|
||||
// Only use timestamps from the program with the lowest base
|
||||
if (offset - ctx->min_global_timestamp_offset < 60000)
|
||||
t = offset - ctx->min_global_timestamp_offset;
|
||||
else
|
||||
t = ctx->min_global_timestamp_offset > 0 ? 0 : t;
|
||||
if (t < 0)
|
||||
t = 0;
|
||||
}
|
||||
int cur_sec = (int)(t / 1000);
|
||||
int th = cur_sec / 10;
|
||||
if (ctx->last_reported_progress != th)
|
||||
{
|
||||
@@ -1360,6 +1543,28 @@ int general_loop(struct lib_ccx_ctx *ctx)
|
||||
LLONG t = get_fts(dec_ctx->timing, dec_ctx->current_field);
|
||||
if (!t && ctx->demux_ctx->global_timestamp_inited)
|
||||
t = ctx->demux_ctx->global_timestamp - ctx->demux_ctx->min_global_timestamp;
|
||||
// For multi-program TS files, different programs can have different
|
||||
// PCR bases (e.g., one at 25h, another at 23h). This causes the
|
||||
// global_timestamp to jump between different bases, resulting in
|
||||
// wildly different offset values. Track the minimum offset seen
|
||||
// and only display times from the program with the lowest base.
|
||||
if (ctx->demux_ctx->global_timestamp_inited)
|
||||
{
|
||||
LLONG offset = ctx->demux_ctx->global_timestamp - ctx->demux_ctx->min_global_timestamp;
|
||||
// Track minimum offset (this is the PCR base of the program
|
||||
// with the lowest timestamp, which represents true file time)
|
||||
if (ctx->min_global_timestamp_offset < 0 || offset < ctx->min_global_timestamp_offset)
|
||||
ctx->min_global_timestamp_offset = offset;
|
||||
// Only use timestamps from the program with the lowest base.
|
||||
// If current offset is significantly larger than minimum (by > 60s),
|
||||
// it's from a program with a higher PCR base - use minimum instead.
|
||||
if (offset - ctx->min_global_timestamp_offset < 60000)
|
||||
t = offset - ctx->min_global_timestamp_offset;
|
||||
else
|
||||
t = ctx->min_global_timestamp_offset > 0 ? 0 : t; // fallback to minimum-based time
|
||||
if (t < 0)
|
||||
t = 0;
|
||||
}
|
||||
int cur_sec = (int)(t / 1000);
|
||||
activity_progress(progress, cur_sec / 60, cur_sec % 60);
|
||||
ctx->last_reported_progress = progress;
|
||||
@@ -1468,7 +1673,11 @@ int rcwt_loop(struct lib_ccx_ctx *ctx)
|
||||
}
|
||||
|
||||
dec_ctx = update_decoder_list(ctx);
|
||||
#ifndef DISABLE_RUST
|
||||
ccxr_dtvcc_set_encoder(dec_ctx->dtvcc_rust, enc_ctx);
|
||||
#else
|
||||
dec_ctx->dtvcc->encoder = (void *)enc_ctx; // WARN: otherwise cea-708 will not work
|
||||
#endif
|
||||
if (parsebuf[6] == 0 && parsebuf[7] == 2)
|
||||
{
|
||||
dec_ctx->codec = CCX_CODEC_TELETEXT;
|
||||
|
||||
@@ -473,7 +473,7 @@ struct encoder_ctx *update_encoder_list_cinfo(struct lib_ccx_ctx *ctx, struct ca
|
||||
}
|
||||
|
||||
list_add_tail(&(enc_ctx->list), &(ctx->enc_ctx_head));
|
||||
freep(ccx_options.enc_cfg.output_filename);
|
||||
freep(&ccx_options.enc_cfg.output_filename);
|
||||
}
|
||||
// DVB related
|
||||
enc_ctx->prev = NULL;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CCX_CCEXTRACTOR_H
|
||||
#define CCX_CCEXTRACTOR_H
|
||||
|
||||
#define VERSION "0.95"
|
||||
#define VERSION "0.96.5"
|
||||
|
||||
// Load common includes and constants for library usage
|
||||
#include "ccx_common_platform.h"
|
||||
@@ -43,17 +43,23 @@ struct file_report
|
||||
};
|
||||
|
||||
// Stuff for telxcc.c
|
||||
#define MAX_TLT_PAGES_EXTRACT 8 // Maximum number of teletext pages to extract simultaneously
|
||||
|
||||
struct ccx_s_teletext_config
|
||||
{
|
||||
uint8_t verbose : 1; // should telxcc be verbose?
|
||||
uint16_t page; // teletext page containing cc we want to filter
|
||||
uint16_t page; // teletext page containing cc we want to filter (legacy, first page)
|
||||
uint16_t tid; // 13-bit packet ID for teletext stream
|
||||
double offset; // time offset in seconds
|
||||
uint8_t bom : 1; // print UTF-8 BOM characters at the beginning of output
|
||||
uint8_t nonempty : 1; // produce at least one (dummy) frame
|
||||
// uint8_t se_mode : 1; // search engine compatible mode => Uses CCExtractor's write_format
|
||||
// uint64_t utc_refvalue; // UTC referential value => Moved to ccx_decoders_common, so can be used for other decoders (608/xds) too
|
||||
uint16_t user_page; // Page selected by user, which MIGHT be different to 'page' depending on autodetection stuff
|
||||
uint16_t user_page; // Page selected by user (legacy, first page)
|
||||
// Multi-page teletext extraction (issue #665)
|
||||
uint16_t user_pages[MAX_TLT_PAGES_EXTRACT]; // Pages selected by user for extraction
|
||||
int num_user_pages; // Number of pages to extract (0 = auto-detect single page)
|
||||
int extract_all_pages; // If 1, extract all detected subtitle pages
|
||||
int dolevdist; // 0=Don't attempt to correct errors
|
||||
int levdistmincnt, levdistmaxpct; // Means 2 fails or less is "the same", 10% or less is also "the same"
|
||||
struct ccx_boundary_time extraction_start, extraction_end; // Segment we actually process
|
||||
@@ -84,6 +90,7 @@ struct lib_ccx_ctx
|
||||
LLONG total_past; // Only in binary concat mode
|
||||
|
||||
int last_reported_progress;
|
||||
LLONG min_global_timestamp_offset; // Track minimum (global - min) for multi-program TS
|
||||
|
||||
/* Stats */
|
||||
int stat_numuserheaders;
|
||||
@@ -154,6 +161,7 @@ struct lib_ccx_ctx *init_libraries(struct ccx_s_options *opt);
|
||||
void dinit_libraries(struct lib_ccx_ctx **ctx);
|
||||
|
||||
extern void ccxr_init_basic_logger();
|
||||
extern void ccxr_update_logger_target();
|
||||
|
||||
// ccextractor.c
|
||||
void print_end_msg(void);
|
||||
@@ -177,6 +185,10 @@ size_t process_raw(struct lib_cc_decode *ctx, struct cc_subtitle *sub, unsigned
|
||||
unsigned int ccxr_process_dvdraw(struct lib_cc_decode *ctx, struct cc_subtitle *sub, const unsigned char *buffer, unsigned int len);
|
||||
int ccxr_is_dvdraw_header(const unsigned char *buffer, unsigned int len);
|
||||
|
||||
// Rust FFI: SCC (Scenarist Closed Caption) format processing (see src/rust/src/demuxer/scc.rs)
|
||||
unsigned int ccxr_process_scc(struct lib_cc_decode *ctx, struct cc_subtitle *sub, const unsigned char *buffer, unsigned int len, int framerate);
|
||||
int ccxr_is_scc_file(const unsigned char *buffer, unsigned int len);
|
||||
|
||||
int general_loop(struct lib_ccx_ctx *ctx);
|
||||
void process_hex(struct lib_ccx_ctx *ctx, char *filename);
|
||||
int rcwt_loop(struct lib_ccx_ctx *ctx);
|
||||
@@ -237,7 +249,7 @@ int read_video_pes_header(struct ccx_demuxer *ctx, struct demuxer_data *data, un
|
||||
// ts_functions.c
|
||||
void init_ts(struct ccx_demuxer *ctx);
|
||||
int ts_readpacket(struct ccx_demuxer *ctx, struct ts_payload *payload);
|
||||
long ts_readstream(struct ccx_demuxer *ctx, struct demuxer_data **data);
|
||||
int64_t ts_readstream(struct ccx_demuxer *ctx, struct demuxer_data **data);
|
||||
int ts_get_more_data(struct lib_ccx_ctx *ctx, struct demuxer_data **data);
|
||||
int write_section(struct ccx_demuxer *ctx, struct ts_payload *payload, unsigned char *buf, int size, struct program_info *pinfo);
|
||||
void ts_buffer_psi_packet(struct ccx_demuxer *ctx);
|
||||
@@ -294,7 +306,7 @@ extern int strangeheader;
|
||||
|
||||
extern const char *desc[256];
|
||||
|
||||
extern long FILEBUFFERSIZE; // Uppercase because it used to be a define
|
||||
extern int64_t FILEBUFFERSIZE; // Uppercase because it used to be a define
|
||||
|
||||
extern int firstcall;
|
||||
|
||||
@@ -331,4 +343,9 @@ int process_non_multiprogram_general_loop(struct lib_ccx_ctx *ctx,
|
||||
void segment_output_file(struct lib_ccx_ctx *ctx, struct lib_cc_decode *dec_ctx);
|
||||
int decode_vbi(struct lib_cc_decode *dec_ctx, uint8_t field, unsigned char *buffer, size_t len, struct cc_subtitle *sub);
|
||||
|
||||
#ifndef DISABLE_RUST
|
||||
// Rust FFI function to set encoder on persistent CEA-708 decoder
|
||||
void ccxr_dtvcc_set_encoder(void *dtvcc_rust, struct encoder_ctx *encoder);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <limits.h>
|
||||
#include <assert.h>
|
||||
#include "dvb_subtitle_decoder.h"
|
||||
#include "vobsub_decoder.h"
|
||||
|
||||
void skip_bytes(FILE *file, ULLONG n)
|
||||
{
|
||||
@@ -121,6 +122,8 @@ void parse_ebml(FILE *file)
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
|
||||
switch (code)
|
||||
@@ -185,6 +188,8 @@ void parse_segment_info(FILE *file)
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
|
||||
switch (code)
|
||||
@@ -483,6 +488,8 @@ void parse_segment_cluster_block_group(struct matroska_ctx *mkv_ctx, ULLONG clus
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
|
||||
switch (code)
|
||||
@@ -611,6 +618,8 @@ void parse_segment_cluster(struct matroska_ctx *mkv_ctx)
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
|
||||
switch (code)
|
||||
@@ -678,9 +687,13 @@ void parse_simple_block(struct matroska_ctx *mkv_ctx, ULLONG frame_timestamp)
|
||||
|
||||
ULLONG track = read_vint_length(file);
|
||||
|
||||
if (track != mkv_ctx->avc_track_number)
|
||||
// Check if this is an AVC or HEVC track
|
||||
int is_avc = (track == mkv_ctx->avc_track_number);
|
||||
int is_hevc = (track == mkv_ctx->hevc_track_number);
|
||||
|
||||
if (!is_avc && !is_hevc)
|
||||
{
|
||||
// Skip everything except AVC track
|
||||
// Skip everything except AVC/HEVC tracks
|
||||
skip_bytes(file, len - 1); // 1 byte for track
|
||||
return;
|
||||
}
|
||||
@@ -695,7 +708,10 @@ void parse_simple_block(struct matroska_ctx *mkv_ctx, ULLONG frame_timestamp)
|
||||
frame.data = read_byte_block(file, frame.len);
|
||||
frame.FTS = frame_timestamp + timecode;
|
||||
|
||||
process_avc_frame_mkv(mkv_ctx, frame);
|
||||
if (is_hevc)
|
||||
process_hevc_frame_mkv(mkv_ctx, frame);
|
||||
else
|
||||
process_avc_frame_mkv(mkv_ctx, frame);
|
||||
|
||||
free(frame.data);
|
||||
}
|
||||
@@ -726,14 +742,80 @@ int process_avc_frame_mkv(struct matroska_ctx *mkv_ctx, struct matroska_avc_fram
|
||||
{
|
||||
uint32_t nal_length;
|
||||
|
||||
nal_length = bswap32(*(long *)&frame.data[i]);
|
||||
if (i + nal_unit_size > frame.len)
|
||||
break;
|
||||
|
||||
nal_length =
|
||||
((uint32_t)frame.data[i] << 24) |
|
||||
((uint32_t)frame.data[i + 1] << 16) |
|
||||
((uint32_t)frame.data[i + 2] << 8) |
|
||||
(uint32_t)frame.data[i + 3];
|
||||
|
||||
i += nal_unit_size;
|
||||
|
||||
if (nal_length > frame.len - i)
|
||||
break;
|
||||
|
||||
if (nal_length > 0)
|
||||
do_NAL(enc_ctx, dec_ctx, (unsigned char *)&(frame.data[i]), nal_length, &mkv_ctx->dec_sub);
|
||||
do_NAL(enc_ctx, dec_ctx, (unsigned char *)&frame.data[i], nal_length, &mkv_ctx->dec_sub);
|
||||
i += nal_length;
|
||||
} // outer for
|
||||
assert(i == frame.len);
|
||||
|
||||
mkv_ctx->current_second = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
int process_hevc_frame_mkv(struct matroska_ctx *mkv_ctx, struct matroska_avc_frame frame)
|
||||
{
|
||||
int status = 0;
|
||||
uint32_t i;
|
||||
struct lib_cc_decode *dec_ctx = update_decoder_list(mkv_ctx->ctx);
|
||||
struct encoder_ctx *enc_ctx = update_encoder_list(mkv_ctx->ctx);
|
||||
|
||||
// Set timing
|
||||
set_current_pts(dec_ctx->timing, frame.FTS * (MPEG_CLOCK_FREQ / 1000));
|
||||
set_fts(dec_ctx->timing);
|
||||
|
||||
// Set HEVC mode for NAL parsing
|
||||
dec_ctx->avc_ctx->is_hevc = 1;
|
||||
|
||||
// NAL unit length is assumed to be 4 (same as AVC in Matroska)
|
||||
uint8_t nal_unit_size = 4;
|
||||
|
||||
for (i = 0; i < frame.len;)
|
||||
{
|
||||
uint32_t nal_length;
|
||||
|
||||
if (i + nal_unit_size > frame.len)
|
||||
break;
|
||||
|
||||
nal_length =
|
||||
((uint32_t)frame.data[i] << 24) |
|
||||
((uint32_t)frame.data[i + 1] << 16) |
|
||||
((uint32_t)frame.data[i + 2] << 8) |
|
||||
(uint32_t)frame.data[i + 3];
|
||||
|
||||
i += nal_unit_size;
|
||||
|
||||
if (nal_length > frame.len - i)
|
||||
break;
|
||||
|
||||
if (nal_length > 0)
|
||||
do_NAL(enc_ctx, dec_ctx, (unsigned char *)&frame.data[i], nal_length, &mkv_ctx->dec_sub);
|
||||
i += nal_length;
|
||||
}
|
||||
|
||||
// Flush any accumulated CC data after processing this frame
|
||||
// This is critical for HEVC because store_hdcc() is normally called from
|
||||
// slice_header() which is AVC-only
|
||||
if (dec_ctx->avc_ctx->cc_count > 0)
|
||||
{
|
||||
store_hdcc(enc_ctx, dec_ctx, dec_ctx->avc_ctx->cc_data, dec_ctx->avc_ctx->cc_count,
|
||||
dec_ctx->timing->current_tref, dec_ctx->timing->fts_now, &mkv_ctx->dec_sub);
|
||||
dec_ctx->avc_ctx->cc_buffer_saved = CCX_TRUE;
|
||||
dec_ctx->avc_ctx->cc_count = 0;
|
||||
}
|
||||
|
||||
mkv_ctx->current_second = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
|
||||
|
||||
@@ -792,6 +874,8 @@ void parse_segment_track_entry(struct matroska_ctx *mkv_ctx)
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
|
||||
switch (code)
|
||||
@@ -851,9 +935,11 @@ void parse_segment_track_entry(struct matroska_ctx *mkv_ctx)
|
||||
codec_id_string = read_vint_block_string(file);
|
||||
codec_id = get_track_subtitle_codec_id(codec_id_string);
|
||||
mprint(" Codec ID: %s\n", codec_id_string);
|
||||
// We only support AVC by now for EIA-608
|
||||
// Detect AVC and HEVC tracks for EIA-608/708 caption extraction
|
||||
if (strcmp((const char *)codec_id_string, (const char *)avc_codec_id) == 0)
|
||||
mkv_ctx->avc_track_number = track_number;
|
||||
else if (strcmp((const char *)codec_id_string, (const char *)hevc_codec_id) == 0)
|
||||
mkv_ctx->hevc_track_number = track_number;
|
||||
MATROSKA_SWITCH_BREAK(code, code_len);
|
||||
case MATROSKA_SEGMENT_TRACK_CODEC_PRIVATE:
|
||||
// We handle DVB's private data differently
|
||||
@@ -1028,6 +1114,65 @@ void parse_private_codec_data(struct matroska_ctx *mkv_ctx, char *codec_id_strin
|
||||
data = read_byte_block(file, size);
|
||||
do_NAL(enc_ctx, dec_ctx, data, size, &mkv_ctx->dec_sub);
|
||||
}
|
||||
else if ((strcmp((const char *)codec_id_string, (const char *)hevc_codec_id) == 0) && mkv_ctx->hevc_track_number == track_number)
|
||||
{
|
||||
// HEVC uses HEVCDecoderConfigurationRecord format
|
||||
// We need to parse this to extract VPS/SPS/PPS NAL units
|
||||
dec_ctx->avc_ctx->is_hevc = 1;
|
||||
|
||||
data = read_byte_block(file, len);
|
||||
|
||||
// HEVCDecoderConfigurationRecord structure:
|
||||
// - configurationVersion (1 byte)
|
||||
// - general_profile_space, general_tier_flag, general_profile_idc (1 byte)
|
||||
// - general_profile_compatibility_flags (4 bytes)
|
||||
// - general_constraint_indicator_flags (6 bytes)
|
||||
// - general_level_idc (1 byte)
|
||||
// - reserved + min_spatial_segmentation_idc (2 bytes)
|
||||
// - reserved + parallelismType (1 byte)
|
||||
// - reserved + chromaFormat (1 byte)
|
||||
// - reserved + bitDepthLumaMinus8 (1 byte)
|
||||
// - reserved + bitDepthChromaMinus8 (1 byte)
|
||||
// - avgFrameRate (2 bytes)
|
||||
// - constantFrameRate, numTemporalLayers, temporalIdNested, lengthSizeMinusOne (1 byte)
|
||||
// - numOfArrays (1 byte)
|
||||
// Total header: 23 bytes
|
||||
|
||||
if (len >= 23)
|
||||
{
|
||||
uint8_t num_arrays = data[22];
|
||||
size_t offset = 23;
|
||||
|
||||
for (uint8_t arr = 0; arr < num_arrays && offset < len; arr++)
|
||||
{
|
||||
if (offset + 3 > len)
|
||||
break;
|
||||
|
||||
// uint8_t array_completeness = (data[offset] >> 7) & 1;
|
||||
// uint8_t nal_unit_type = data[offset] & 0x3F;
|
||||
offset++;
|
||||
|
||||
uint16_t num_nalus = (data[offset] << 8) | data[offset + 1];
|
||||
offset += 2;
|
||||
|
||||
for (uint16_t n = 0; n < num_nalus && offset < len; n++)
|
||||
{
|
||||
if (offset + 2 > len)
|
||||
break;
|
||||
|
||||
uint16_t nal_unit_length = (data[offset] << 8) | data[offset + 1];
|
||||
offset += 2;
|
||||
|
||||
if (offset + nal_unit_length > len)
|
||||
break;
|
||||
|
||||
// Process this NAL unit (VPS, SPS, or PPS)
|
||||
do_NAL(enc_ctx, dec_ctx, &data[offset], nal_unit_length, &mkv_ctx->dec_sub);
|
||||
offset += nal_unit_length;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (strcmp((const char *)codec_id_string, (const char *)dvb_codec_id) == 0)
|
||||
{
|
||||
enc_ctx->write_previous = 0;
|
||||
@@ -1059,7 +1204,7 @@ void parse_private_codec_data(struct matroska_ctx *mkv_ctx, char *codec_id_strin
|
||||
memset((void *)&cnf, 0, sizeof(struct dvb_config));
|
||||
|
||||
parse_dvb_description(&cnf, codec_data, 8);
|
||||
dec_ctx->private_data = dvbsub_init_decoder(&cnf, 0);
|
||||
dec_ctx->private_data = dvbsub_init_decoder(&cnf);
|
||||
|
||||
free(codec_data);
|
||||
}
|
||||
@@ -1083,6 +1228,8 @@ void parse_segment_tracks(struct matroska_ctx *mkv_ctx)
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
|
||||
switch (code)
|
||||
@@ -1127,6 +1274,8 @@ void parse_segment(struct matroska_ctx *mkv_ctx)
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
switch (code)
|
||||
{
|
||||
@@ -1221,11 +1370,362 @@ char *ass_ssa_sentence_erase_read_order(char *text)
|
||||
return buf;
|
||||
}
|
||||
|
||||
/* VOBSUB support: Generate PS Pack header
|
||||
* The PS Pack header is 14 bytes:
|
||||
* - 4 bytes: start code (00 00 01 ba)
|
||||
* - 6 bytes: SCR (System Clock Reference) in MPEG-2 format
|
||||
* - 3 bytes: mux rate
|
||||
* - 1 byte: stuffing length (0)
|
||||
*/
|
||||
static void generate_ps_pack_header(unsigned char *buf, ULLONG pts_90khz)
|
||||
{
|
||||
// PS Pack start code
|
||||
buf[0] = 0x00;
|
||||
buf[1] = 0x00;
|
||||
buf[2] = 0x01;
|
||||
buf[3] = 0xBA;
|
||||
|
||||
// SCR (System Clock Reference) - use PTS as SCR base, SCR extension = 0
|
||||
// MPEG-2 format: 01 SCR[32:30] 1 SCR[29:15] 1 SCR[14:0] 1 SCR_ext[8:0] 1
|
||||
ULLONG scr = pts_90khz;
|
||||
ULLONG scr_base = scr;
|
||||
int scr_ext = 0;
|
||||
|
||||
buf[4] = 0x44 | ((scr_base >> 27) & 0x38) | ((scr_base >> 28) & 0x03);
|
||||
buf[5] = (scr_base >> 20) & 0xFF;
|
||||
buf[6] = 0x04 | ((scr_base >> 12) & 0xF8) | ((scr_base >> 13) & 0x03);
|
||||
buf[7] = (scr_base >> 5) & 0xFF;
|
||||
buf[8] = 0x04 | ((scr_base << 3) & 0xF8) | ((scr_ext >> 7) & 0x03);
|
||||
buf[9] = ((scr_ext << 1) & 0xFE) | 0x01;
|
||||
|
||||
// Mux rate (10080 = standard DVD rate)
|
||||
int mux_rate = 10080;
|
||||
buf[10] = (mux_rate >> 14) & 0xFF;
|
||||
buf[11] = (mux_rate >> 6) & 0xFF;
|
||||
buf[12] = ((mux_rate << 2) & 0xFC) | 0x03;
|
||||
|
||||
// Stuffing length = 0, with marker bits
|
||||
buf[13] = 0xF8;
|
||||
}
|
||||
|
||||
/* VOBSUB support: Generate PES header for private stream 1
|
||||
* Returns the total header size (variable based on PTS)
|
||||
*/
|
||||
static int generate_pes_header(unsigned char *buf, ULLONG pts_90khz, int payload_size, int stream_id)
|
||||
{
|
||||
// PES start code for private stream 1
|
||||
buf[0] = 0x00;
|
||||
buf[1] = 0x00;
|
||||
buf[2] = 0x01;
|
||||
buf[3] = 0xBD; // Private stream 1
|
||||
|
||||
// PES packet length = header data (3 + 5 for PTS) + 1 (substream ID) + payload
|
||||
int pes_header_data_len = 5; // PTS only
|
||||
int pes_packet_len = 3 + pes_header_data_len + 1 + payload_size;
|
||||
buf[4] = (pes_packet_len >> 8) & 0xFF;
|
||||
buf[5] = pes_packet_len & 0xFF;
|
||||
|
||||
// PES flags: MPEG-2, original
|
||||
buf[6] = 0x81;
|
||||
// PTS_DTS_flags = 10 (PTS only)
|
||||
buf[7] = 0x80;
|
||||
// PES header data length
|
||||
buf[8] = pes_header_data_len;
|
||||
|
||||
// PTS (5 bytes): '0010' | PTS[32:30] | '1' | PTS[29:15] | '1' | PTS[14:0] | '1'
|
||||
buf[9] = 0x21 | ((pts_90khz >> 29) & 0x0E);
|
||||
buf[10] = (pts_90khz >> 22) & 0xFF;
|
||||
buf[11] = 0x01 | ((pts_90khz >> 14) & 0xFE);
|
||||
buf[12] = (pts_90khz >> 7) & 0xFF;
|
||||
buf[13] = 0x01 | ((pts_90khz << 1) & 0xFE);
|
||||
|
||||
// Substream ID (0x20 = first VOBSUB stream)
|
||||
buf[14] = 0x20 + stream_id;
|
||||
|
||||
return 15; // Total PES header size
|
||||
}
|
||||
|
||||
/* VOBSUB support: Generate timestamp string for .idx file
|
||||
* Format: HH:MM:SS:mmm (where mmm is milliseconds)
|
||||
*/
|
||||
static void generate_vobsub_timestamp(char *buf, size_t bufsize, ULLONG milliseconds)
|
||||
{
|
||||
ULLONG ms = milliseconds % 1000;
|
||||
milliseconds /= 1000;
|
||||
ULLONG seconds = milliseconds % 60;
|
||||
milliseconds /= 60;
|
||||
ULLONG minutes = milliseconds % 60;
|
||||
milliseconds /= 60;
|
||||
ULLONG hours = milliseconds;
|
||||
|
||||
snprintf(buf, bufsize, "%02" LLU_M ":%02" LLU_M ":%02" LLU_M ":%03" LLU_M,
|
||||
hours, minutes, seconds, ms);
|
||||
}
|
||||
|
||||
/* Check if output format is text-based (requires OCR for bitmap subtitles) */
|
||||
static int is_text_output_format(enum ccx_output_format format)
|
||||
{
|
||||
return (format == CCX_OF_SRT || format == CCX_OF_SSA ||
|
||||
format == CCX_OF_WEBVTT || format == CCX_OF_TRANSCRIPT ||
|
||||
format == CCX_OF_SAMI || format == CCX_OF_SMPTETT);
|
||||
}
|
||||
|
||||
/* VOBSUB support: Process VOBSUB track with OCR and output text format */
|
||||
static void process_vobsub_track_ocr(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *track)
|
||||
{
|
||||
if (track->sentence_count == 0)
|
||||
{
|
||||
mprint("\nNo VOBSUB subtitles to process");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check if OCR is available */
|
||||
if (!vobsub_ocr_available())
|
||||
{
|
||||
fatal(EXIT_NOT_CLASSIFIED,
|
||||
"VOBSUB to text conversion requires OCR support.\n"
|
||||
"Please rebuild CCExtractor with -DWITH_OCR=ON or use raw output (--out=idx)");
|
||||
}
|
||||
|
||||
/* Initialize VOBSUB decoder */
|
||||
struct vobsub_ctx *vob_ctx = init_vobsub_decoder();
|
||||
if (!vob_ctx)
|
||||
{
|
||||
fatal(EXIT_NOT_CLASSIFIED,
|
||||
"VOBSUB to text conversion requires OCR, but initialization failed.\n"
|
||||
"Please ensure Tesseract is installed with language data.");
|
||||
}
|
||||
|
||||
/* Parse palette from track header (CodecPrivate) */
|
||||
if (track->header)
|
||||
{
|
||||
vobsub_parse_palette(vob_ctx, track->header);
|
||||
}
|
||||
|
||||
mprint("\nProcessing VOBSUB track with OCR (%d subtitles)", track->sentence_count);
|
||||
|
||||
/* Get encoder context for output */
|
||||
struct encoder_ctx *enc_ctx = update_encoder_list(mkv_ctx->ctx);
|
||||
|
||||
/* Process each subtitle */
|
||||
for (int i = 0; i < track->sentence_count; i++)
|
||||
{
|
||||
struct matroska_sub_sentence *sentence = track->sentences[i];
|
||||
mkv_ctx->sentence_count++;
|
||||
|
||||
/* Calculate end time (use next subtitle start if not specified) */
|
||||
ULLONG end_time = sentence->time_end;
|
||||
if (end_time == 0 && i + 1 < track->sentence_count)
|
||||
{
|
||||
end_time = track->sentences[i + 1]->time_start - 1;
|
||||
}
|
||||
else if (end_time == 0)
|
||||
{
|
||||
end_time = sentence->time_start + 5000; /* Default 5 second duration */
|
||||
}
|
||||
|
||||
/* Decode SPU and run OCR */
|
||||
struct cc_subtitle sub;
|
||||
memset(&sub, 0, sizeof(sub));
|
||||
|
||||
int ret = vobsub_decode_spu(vob_ctx,
|
||||
(unsigned char *)sentence->text,
|
||||
sentence->text_size,
|
||||
sentence->time_start,
|
||||
end_time,
|
||||
&sub);
|
||||
|
||||
if (ret == 0 && sub.got_output)
|
||||
{
|
||||
/* Encode the subtitle to output format */
|
||||
encode_sub(enc_ctx, &sub);
|
||||
|
||||
/* Free subtitle data */
|
||||
if (sub.data)
|
||||
{
|
||||
struct cc_bitmap *rect = (struct cc_bitmap *)sub.data;
|
||||
for (int j = 0; j < sub.nb_data; j++)
|
||||
{
|
||||
if (rect[j].data0)
|
||||
free(rect[j].data0);
|
||||
if (rect[j].data1)
|
||||
free(rect[j].data1);
|
||||
#ifdef ENABLE_OCR
|
||||
if (rect[j].ocr_text)
|
||||
free(rect[j].ocr_text);
|
||||
#endif
|
||||
}
|
||||
free(sub.data);
|
||||
}
|
||||
}
|
||||
|
||||
/* Progress indicator */
|
||||
if ((i + 1) % 50 == 0 || i + 1 == track->sentence_count)
|
||||
{
|
||||
mprint("\rProcessing VOBSUB: %d/%d subtitles", i + 1, track->sentence_count);
|
||||
}
|
||||
}
|
||||
|
||||
delete_vobsub_decoder(&vob_ctx);
|
||||
mprint("\nVOBSUB OCR processing complete");
|
||||
}
|
||||
|
||||
/* VOBSUB support: Save VOBSUB track to .idx and .sub files */
|
||||
#define VOBSUB_BLOCK_SIZE 2048
|
||||
static void save_vobsub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *track)
|
||||
{
|
||||
if (track->sentence_count == 0)
|
||||
{
|
||||
mprint("\nNo VOBSUB subtitles to write");
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate base filename (without extension)
|
||||
const char *lang_to_use = track->lang_ietf ? track->lang_ietf : track->lang;
|
||||
const char *basename = get_basename(mkv_ctx->filename);
|
||||
size_t needed = strlen(basename) + strlen(lang_to_use) + 32;
|
||||
char *base_filename = malloc(needed);
|
||||
if (base_filename == NULL)
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "In save_vobsub_track: Out of memory.");
|
||||
|
||||
if (track->lang_index == 0)
|
||||
snprintf(base_filename, needed, "%s_%s", basename, lang_to_use);
|
||||
else
|
||||
snprintf(base_filename, needed, "%s_%s_" LLD, basename, lang_to_use, track->lang_index);
|
||||
|
||||
// Create .sub filename
|
||||
char *sub_filename = malloc(needed + 5);
|
||||
if (sub_filename == NULL)
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "In save_vobsub_track: Out of memory.");
|
||||
snprintf(sub_filename, needed + 5, "%s.sub", base_filename);
|
||||
|
||||
// Create .idx filename
|
||||
char *idx_filename = malloc(needed + 5);
|
||||
if (idx_filename == NULL)
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "In save_vobsub_track: Out of memory.");
|
||||
snprintf(idx_filename, needed + 5, "%s.idx", base_filename);
|
||||
|
||||
mprint("\nOutput files: %s, %s", idx_filename, sub_filename);
|
||||
|
||||
// Open .sub file
|
||||
int sub_desc;
|
||||
#ifdef WIN32
|
||||
sub_desc = open(sub_filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IREAD | S_IWRITE);
|
||||
#else
|
||||
sub_desc = open(sub_filename, O_WRONLY | O_CREAT | O_TRUNC, S_IWUSR | S_IRUSR);
|
||||
#endif
|
||||
if (sub_desc < 0)
|
||||
{
|
||||
mprint("\nError: Cannot create .sub file");
|
||||
free(base_filename);
|
||||
free(sub_filename);
|
||||
free(idx_filename);
|
||||
return;
|
||||
}
|
||||
|
||||
// Open .idx file
|
||||
int idx_desc;
|
||||
#ifdef WIN32
|
||||
idx_desc = open(idx_filename, O_WRONLY | O_CREAT | O_TRUNC, S_IREAD | S_IWRITE);
|
||||
#else
|
||||
idx_desc = open(idx_filename, O_WRONLY | O_CREAT | O_TRUNC, S_IWUSR | S_IRUSR);
|
||||
#endif
|
||||
if (idx_desc < 0)
|
||||
{
|
||||
mprint("\nError: Cannot create .idx file");
|
||||
close(sub_desc);
|
||||
free(base_filename);
|
||||
free(sub_filename);
|
||||
free(idx_filename);
|
||||
return;
|
||||
}
|
||||
|
||||
// Write .idx header (from CodecPrivate)
|
||||
if (track->header != NULL)
|
||||
write_wrapped(idx_desc, track->header, strlen(track->header));
|
||||
|
||||
// Add language identifier line
|
||||
char lang_line[128];
|
||||
snprintf(lang_line, sizeof(lang_line), "\nid: %s, index: 0\n", lang_to_use);
|
||||
write_wrapped(idx_desc, lang_line, strlen(lang_line));
|
||||
|
||||
// Buffer for PS/PES headers and padding
|
||||
unsigned char header_buf[32];
|
||||
unsigned char zero_buf[VOBSUB_BLOCK_SIZE];
|
||||
memset(zero_buf, 0, VOBSUB_BLOCK_SIZE);
|
||||
|
||||
ULLONG file_pos = 0;
|
||||
|
||||
// Write each subtitle
|
||||
for (int i = 0; i < track->sentence_count; i++)
|
||||
{
|
||||
struct matroska_sub_sentence *sentence = track->sentences[i];
|
||||
mkv_ctx->sentence_count++;
|
||||
|
||||
// Convert timestamp to 90kHz PTS
|
||||
ULLONG pts_90khz = sentence->time_start * 90;
|
||||
|
||||
// Write timestamp entry to .idx
|
||||
char timestamp[32];
|
||||
generate_vobsub_timestamp(timestamp, sizeof(timestamp), sentence->time_start);
|
||||
char idx_entry[128];
|
||||
snprintf(idx_entry, sizeof(idx_entry), "timestamp: %s, filepos: %09" LLX_M "\n",
|
||||
timestamp, file_pos);
|
||||
write_wrapped(idx_desc, idx_entry, strlen(idx_entry));
|
||||
|
||||
// Generate PS Pack header (14 bytes)
|
||||
generate_ps_pack_header(header_buf, pts_90khz);
|
||||
write_wrapped(sub_desc, (char *)header_buf, 14);
|
||||
|
||||
// Generate PES header (15 bytes)
|
||||
int pes_header_len = generate_pes_header(header_buf, pts_90khz, sentence->text_size, 0);
|
||||
write_wrapped(sub_desc, (char *)header_buf, pes_header_len);
|
||||
|
||||
// Write SPU data
|
||||
write_wrapped(sub_desc, sentence->text, sentence->text_size);
|
||||
|
||||
// Calculate bytes written and pad to block boundary
|
||||
ULLONG bytes_written = 14 + pes_header_len + sentence->text_size;
|
||||
ULLONG padding_needed = VOBSUB_BLOCK_SIZE - (bytes_written % VOBSUB_BLOCK_SIZE);
|
||||
if (padding_needed < VOBSUB_BLOCK_SIZE)
|
||||
{
|
||||
write_wrapped(sub_desc, (char *)zero_buf, padding_needed);
|
||||
bytes_written += padding_needed;
|
||||
}
|
||||
|
||||
file_pos += bytes_written;
|
||||
}
|
||||
|
||||
close(sub_desc);
|
||||
close(idx_desc);
|
||||
free(base_filename);
|
||||
free(sub_filename);
|
||||
free(idx_filename);
|
||||
}
|
||||
|
||||
void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *track)
|
||||
{
|
||||
char *filename;
|
||||
int desc;
|
||||
|
||||
// VOBSUB tracks need special handling
|
||||
if (track->codec_id == MATROSKA_TRACK_SUBTITLE_CODEC_ID_VOBSUB)
|
||||
{
|
||||
// Check if user wants text output (SRT, SSA, WebVTT, etc.)
|
||||
if (ccx_options.write_format_rewritten &&
|
||||
is_text_output_format(ccx_options.enc_cfg.write_format))
|
||||
{
|
||||
// Use OCR to convert VOBSUB to text
|
||||
process_vobsub_track_ocr(mkv_ctx, track);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Output raw idx/sub files
|
||||
save_vobsub_track(mkv_ctx, track);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (mkv_ctx->ctx->cc_to_stdout == CCX_TRUE)
|
||||
{
|
||||
desc = 1; // file descriptor of stdout
|
||||
@@ -1245,11 +1745,6 @@ void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *tra
|
||||
if (track->header != NULL)
|
||||
write_wrapped(desc, track->header, strlen(track->header));
|
||||
|
||||
if (track->codec_id == MATROSKA_TRACK_SUBTITLE_CODEC_ID_VOBSUB)
|
||||
{
|
||||
mprint("\nError: VOBSUB not supported");
|
||||
}
|
||||
|
||||
for (int i = 0; i < track->sentence_count; i++)
|
||||
{
|
||||
struct matroska_sub_sentence *sentence = track->sentences[i];
|
||||
@@ -1384,10 +1879,6 @@ void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *tra
|
||||
free(timestamp_start);
|
||||
free(timestamp_end);
|
||||
}
|
||||
else if (track->codec_id == MATROSKA_TRACK_SUBTITLE_CODEC_ID_VOBSUB)
|
||||
{
|
||||
// TODO: Add support for VOBSUB
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1459,6 +1950,9 @@ void matroska_parse(struct matroska_ctx *mkv_ctx)
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
// Check for EOF after reading - feof() is only set after a failed read
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
|
||||
switch (code)
|
||||
@@ -1510,8 +2004,13 @@ int matroska_loop(struct lib_ccx_ctx *ctx)
|
||||
{
|
||||
if (ccx_options.write_format_rewritten)
|
||||
{
|
||||
mprint(MATROSKA_WARNING "You are using --out=<format>, but Matroska parser extract subtitles in a recorded format\n");
|
||||
mprint("--out=<format> will be ignored\n");
|
||||
/* Note: For VOBSUB tracks, text output formats (SRT, SSA, etc.) are
|
||||
* supported via OCR. For other subtitle types, the native format is used. */
|
||||
if (!is_text_output_format(ccx_options.enc_cfg.write_format))
|
||||
{
|
||||
mprint(MATROSKA_WARNING "You are using --out=<format>, but Matroska parser extracts subtitles in their recorded format\n");
|
||||
mprint("--out=<format> will be ignored for non-VOBSUB tracks\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Don't need generated input file
|
||||
@@ -1530,9 +2029,10 @@ int matroska_loop(struct lib_ccx_ctx *ctx)
|
||||
mkv_ctx->sub_tracks = malloc(sizeof(struct matroska_sub_track **));
|
||||
if (mkv_ctx->sub_tracks == NULL)
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "In matroska_loop: Out of memory allocating sub_tracks.");
|
||||
// EIA-608
|
||||
// EIA-608/708
|
||||
memset(&mkv_ctx->dec_sub, 0, sizeof(mkv_ctx->dec_sub));
|
||||
mkv_ctx->avc_track_number = -1;
|
||||
mkv_ctx->hevc_track_number = -1;
|
||||
|
||||
matroska_parse(mkv_ctx);
|
||||
|
||||
@@ -1545,17 +2045,22 @@ int matroska_loop(struct lib_ccx_ctx *ctx)
|
||||
// Save values before freeing mkv_ctx
|
||||
int sentence_count = mkv_ctx->sentence_count;
|
||||
int avc_track_found = mkv_ctx->avc_track_number > -1;
|
||||
int hevc_track_found = mkv_ctx->hevc_track_number > -1;
|
||||
int got_output = mkv_ctx->dec_sub.got_output;
|
||||
|
||||
matroska_free_all(mkv_ctx);
|
||||
|
||||
mprint("\n\n");
|
||||
|
||||
// Support only one AVC track by now
|
||||
if (avc_track_found)
|
||||
// Report video tracks found
|
||||
if (avc_track_found && hevc_track_found)
|
||||
mprint("Found AVC and HEVC tracks. ");
|
||||
else if (avc_track_found)
|
||||
mprint("Found AVC track. ");
|
||||
else if (hevc_track_found)
|
||||
mprint("Found HEVC track. ");
|
||||
else
|
||||
mprint("Found no AVC track. ");
|
||||
mprint("Found no AVC/HEVC track. ");
|
||||
|
||||
if (got_output)
|
||||
return 1;
|
||||
|
||||
@@ -5,26 +5,31 @@
|
||||
#if (defined(WIN32) || defined(_WIN32_WCE)) && (defined(__MINGW32__) || !defined(__GNUC__))
|
||||
#define LLD_M "I64d"
|
||||
#define LLU_M "I64u"
|
||||
#define LLX_M "I64x"
|
||||
#define LLD "%I64d"
|
||||
#define LLU "%I64u"
|
||||
#elif defined(__SYMBIAN32__)
|
||||
#define LLD_M "d"
|
||||
#define LLU_M "u"
|
||||
#define LLX_M "x"
|
||||
#define LLD "%d"
|
||||
#define LLU "%u"
|
||||
#elif defined(__DARWIN__) || defined(__APPLE__)
|
||||
#define LLD_M "lld"
|
||||
#define LLU_M "llu"
|
||||
#define LLX_M "llx"
|
||||
#define LLD "%lld"
|
||||
#define LLU "%llu"
|
||||
#elif defined(_LP64) /* Unix 64 bits */
|
||||
#define LLD_M "ld"
|
||||
#define LLU_M "lu"
|
||||
#define LLX_M "lx"
|
||||
#define LLD "%ld"
|
||||
#define LLU "%lu"
|
||||
#else /* Unix 32 bits */
|
||||
#define LLD_M "lld"
|
||||
#define LLU_M "llu"
|
||||
#define LLX_M "llx"
|
||||
#define LLD "%lld"
|
||||
#define LLU "%llu"
|
||||
#endif
|
||||
@@ -178,6 +183,7 @@ char *matroska_track_text_subtitle_id_extensions[] = {
|
||||
};
|
||||
|
||||
char *avc_codec_id = "V_MPEG4/ISO/AVC";
|
||||
char *hevc_codec_id = "V_MPEGH/ISO/HEVC";
|
||||
char *dvb_codec_id = "S_DVBSUB";
|
||||
|
||||
/* Messages */
|
||||
@@ -239,7 +245,8 @@ struct matroska_ctx
|
||||
struct matroska_sub_track **sub_tracks;
|
||||
struct lib_ccx_ctx *ctx;
|
||||
struct cc_subtitle dec_sub;
|
||||
int avc_track_number; // ID of AVC track. -1 if there is none
|
||||
int avc_track_number; // ID of AVC track. -1 if there is none
|
||||
int hevc_track_number; // ID of HEVC track. -1 if there is none
|
||||
int sub_tracks_count;
|
||||
int block_index;
|
||||
int sentence_count;
|
||||
@@ -270,6 +277,7 @@ void parse_segment_cluster_block_group(struct matroska_ctx *mkv_ctx, ULLONG clus
|
||||
void parse_segment_cluster(struct matroska_ctx *mkv_ctx);
|
||||
void parse_simple_block(struct matroska_ctx *mkv_ctx, ULLONG frame_timestamp);
|
||||
int process_avc_frame_mkv(struct matroska_ctx *mkv_ctx, struct matroska_avc_frame frame);
|
||||
int process_hevc_frame_mkv(struct matroska_ctx *mkv_ctx, struct matroska_avc_frame frame);
|
||||
void parse_segment_track_entry(struct matroska_ctx *mkv_ctx);
|
||||
void parse_private_codec_data(struct matroska_ctx *mkv_ctx, char *codec_id_string, ULLONG track_number, char *lang);
|
||||
void parse_segment_tracks(struct matroska_ctx *mkv_ctx);
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <gpac/isomedia.h>
|
||||
#include <gpac/mpeg4_odf.h>
|
||||
#include "lib_ccx.h"
|
||||
#include "utility.h"
|
||||
#include "ccx_encoders_common.h"
|
||||
@@ -11,20 +12,36 @@
|
||||
#include "ccx_mp4.h"
|
||||
#include "activity.h"
|
||||
#include "ccx_dtvcc.h"
|
||||
#include "vobsub_decoder.h"
|
||||
|
||||
#define MEDIA_TYPE(type, subtype) (((u64)(type) << 32) + (subtype))
|
||||
|
||||
#define GF_ISOM_SUBTYPE_C708 GF_4CC('c', '7', '0', '8')
|
||||
|
||||
static short bswap16(short v)
|
||||
// HEVC subtypes (hev1, hvc1)
|
||||
#ifndef GF_ISOM_SUBTYPE_HEV1
|
||||
#define GF_ISOM_SUBTYPE_HEV1 GF_4CC('h', 'e', 'v', '1')
|
||||
#endif
|
||||
#ifndef GF_ISOM_SUBTYPE_HVC1
|
||||
#define GF_ISOM_SUBTYPE_HVC1 GF_4CC('h', 'v', 'c', '1')
|
||||
#endif
|
||||
|
||||
// VOBSUB subtype (mp4s or MPEG)
|
||||
#ifndef GF_ISOM_SUBTYPE_MPEG4
|
||||
#define GF_ISOM_SUBTYPE_MPEG4 GF_4CC('M', 'P', 'E', 'G')
|
||||
#endif
|
||||
|
||||
static int16_t bswap16(int16_t v)
|
||||
{
|
||||
return ((v >> 8) & 0x00FF) | ((v << 8) & 0xFF00);
|
||||
}
|
||||
|
||||
static long bswap32(long v)
|
||||
static int32_t bswap32(int32_t v)
|
||||
{
|
||||
// For 0x12345678 returns 78563412
|
||||
long swapped = ((v & 0xFF) << 24) | ((v & 0xFF00) << 8) | ((v & 0xFF0000) >> 8) | ((v & 0xFF000000) >> 24);
|
||||
// Use int32_t instead of long for consistent behavior across platforms
|
||||
// (long is 4 bytes on Windows x64 but 8 bytes on Linux x64)
|
||||
int32_t swapped = ((v & 0xFF) << 24) | ((v & 0xFF00) << 8) | ((v & 0xFF0000) >> 8) | ((v & 0xFF000000) >> 24);
|
||||
return swapped;
|
||||
}
|
||||
static struct
|
||||
@@ -67,10 +84,10 @@ static int process_avc_sample(struct lib_ccx_ctx *ctx, u32 timescale, GF_AVCConf
|
||||
nal_length = s->data[i];
|
||||
break;
|
||||
case 2:
|
||||
nal_length = bswap16(*(short *)&s->data[i]);
|
||||
nal_length = bswap16(*(int16_t *)&s->data[i]);
|
||||
break;
|
||||
case 4:
|
||||
nal_length = bswap32(*(long *)&s->data[i]);
|
||||
nal_length = bswap32(*(int32_t *)&s->data[i]);
|
||||
break;
|
||||
}
|
||||
const u32 previous_index = i;
|
||||
@@ -101,6 +118,88 @@ static int process_avc_sample(struct lib_ccx_ctx *ctx, u32 timescale, GF_AVCConf
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static int process_hevc_sample(struct lib_ccx_ctx *ctx, u32 timescale, GF_HEVCConfig *c, GF_ISOSample *s, struct cc_subtitle *sub)
|
||||
{
|
||||
int status = 0;
|
||||
u32 i;
|
||||
s32 signed_cts = (s32)s->CTS_Offset;
|
||||
struct lib_cc_decode *dec_ctx = NULL;
|
||||
struct encoder_ctx *enc_ctx = NULL;
|
||||
|
||||
dec_ctx = update_decoder_list(ctx);
|
||||
enc_ctx = update_encoder_list(ctx);
|
||||
|
||||
// Enable HEVC mode for NAL parsing
|
||||
dec_ctx->avc_ctx->is_hevc = 1;
|
||||
|
||||
set_current_pts(dec_ctx->timing, (s->DTS + signed_cts) * MPEG_CLOCK_FREQ / timescale);
|
||||
set_fts(dec_ctx->timing);
|
||||
|
||||
for (i = 0; i < s->dataLength;)
|
||||
{
|
||||
u32 nal_length;
|
||||
|
||||
if (i + c->nal_unit_size > s->dataLength)
|
||||
{
|
||||
mprint("Corrupted packet detected in process_hevc_sample. dataLength "
|
||||
"%u is less than index %u + nal_unit_size %u. Ignoring.\n",
|
||||
s->dataLength, i, c->nal_unit_size);
|
||||
return status;
|
||||
}
|
||||
switch (c->nal_unit_size)
|
||||
{
|
||||
case 1:
|
||||
nal_length = s->data[i];
|
||||
break;
|
||||
case 2:
|
||||
nal_length = bswap16(*(int16_t *)&s->data[i]);
|
||||
break;
|
||||
case 4:
|
||||
nal_length = bswap32(*(int32_t *)&s->data[i]);
|
||||
break;
|
||||
default:
|
||||
mprint("Unexpected nal_unit_size %u in HEVC config\n", c->nal_unit_size);
|
||||
return status;
|
||||
}
|
||||
const u32 previous_index = i;
|
||||
i += c->nal_unit_size;
|
||||
if (i + nal_length <= previous_index || i + nal_length > s->dataLength)
|
||||
{
|
||||
mprint("Corrupted sample detected in process_hevc_sample. dataLength %u "
|
||||
"is less than index %u + nal_unit_size %u + nal_length %u. Ignoring.\n",
|
||||
s->dataLength, previous_index, c->nal_unit_size, nal_length);
|
||||
return status;
|
||||
}
|
||||
|
||||
s_nalu_stats.total += 1;
|
||||
temp_debug = 0;
|
||||
|
||||
if (nal_length > 0)
|
||||
{
|
||||
// For HEVC, NAL type is in bits [6:1] of byte 0
|
||||
u8 nal_type = (s->data[i] >> 1) & 0x3F;
|
||||
if (nal_type < 32)
|
||||
s_nalu_stats.type[nal_type] += 1;
|
||||
do_NAL(enc_ctx, dec_ctx, (unsigned char *)&(s->data[i]), nal_length, sub);
|
||||
}
|
||||
i += nal_length;
|
||||
}
|
||||
assert(i == s->dataLength);
|
||||
|
||||
// For HEVC, we need to flush CC data after each sample (unlike H.264 which does this in slice_header)
|
||||
// This is because HEVC SEI messages contain the CC data and we don't parse slice headers
|
||||
if (dec_ctx->avc_ctx->cc_count > 0)
|
||||
{
|
||||
// Store the CC data for processing
|
||||
store_hdcc(enc_ctx, dec_ctx, dec_ctx->avc_ctx->cc_data, dec_ctx->avc_ctx->cc_count,
|
||||
dec_ctx->timing->current_tref, dec_ctx->timing->fts_now, sub);
|
||||
dec_ctx->avc_ctx->cc_buffer_saved = CCX_TRUE;
|
||||
dec_ctx->avc_ctx->cc_count = 0;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
static int process_xdvb_track(struct lib_ccx_ctx *ctx, const char *basename, GF_ISOFile *f, u32 track, struct cc_subtitle *sub)
|
||||
{
|
||||
u32 timescale, i, sample_count;
|
||||
@@ -111,6 +210,13 @@ static int process_xdvb_track(struct lib_ccx_ctx *ctx, const char *basename, GF_
|
||||
|
||||
dec_ctx = update_decoder_list(ctx);
|
||||
enc_ctx = update_encoder_list(ctx);
|
||||
|
||||
// Set buffer data type to CCX_PES for MP4/MOV MPEG-2 tracks.
|
||||
// This ensures cb_field counters are not incremented in do_cb(),
|
||||
// which is correct because container formats associate captions
|
||||
// with the frame's PTS directly.
|
||||
dec_ctx->in_bufferdatatype = CCX_PES;
|
||||
|
||||
if ((sample_count = gf_isom_get_sample_count(f, track)) < 1)
|
||||
{
|
||||
return 0;
|
||||
@@ -158,6 +264,12 @@ static int process_avc_track(struct lib_ccx_ctx *ctx, const char *basename, GF_I
|
||||
|
||||
dec_ctx = update_decoder_list(ctx);
|
||||
|
||||
// Set buffer data type to CCX_H264 for MP4/MOV AVC tracks.
|
||||
// This ensures cb_field counters are not incremented in do_cb(),
|
||||
// which is correct because container formats associate captions
|
||||
// with the frame's PTS directly.
|
||||
dec_ctx->in_bufferdatatype = CCX_H264;
|
||||
|
||||
if ((sample_count = gf_isom_get_sample_count(f, track)) < 1)
|
||||
{
|
||||
return 0;
|
||||
@@ -223,6 +335,227 @@ static int process_avc_track(struct lib_ccx_ctx *ctx, const char *basename, GF_I
|
||||
return status;
|
||||
}
|
||||
|
||||
static int process_hevc_track(struct lib_ccx_ctx *ctx, const char *basename, GF_ISOFile *f, u32 track, struct cc_subtitle *sub)
|
||||
{
|
||||
u32 timescale, i, sample_count, last_sdi = 0;
|
||||
int status;
|
||||
GF_HEVCConfig *c = NULL;
|
||||
struct lib_cc_decode *dec_ctx = NULL;
|
||||
|
||||
dec_ctx = update_decoder_list(ctx);
|
||||
|
||||
// Enable HEVC mode
|
||||
dec_ctx->avc_ctx->is_hevc = 1;
|
||||
|
||||
// Set buffer data type to CCX_H264 for MP4/MOV HEVC tracks.
|
||||
// This ensures cb_field counters are not incremented in do_cb(),
|
||||
// which is correct because container formats associate captions
|
||||
// with the frame's PTS directly.
|
||||
dec_ctx->in_bufferdatatype = CCX_H264;
|
||||
|
||||
if ((sample_count = gf_isom_get_sample_count(f, track)) < 1)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
timescale = gf_isom_get_media_timescale(f, track);
|
||||
|
||||
status = 0;
|
||||
|
||||
for (i = 0; i < sample_count; i++)
|
||||
{
|
||||
u32 sdi;
|
||||
|
||||
GF_ISOSample *s = gf_isom_get_sample(f, track, i + 1, &sdi);
|
||||
|
||||
if (s != NULL)
|
||||
{
|
||||
if (sdi != last_sdi)
|
||||
{
|
||||
if (c != NULL)
|
||||
{
|
||||
gf_odf_hevc_cfg_del(c);
|
||||
c = NULL;
|
||||
}
|
||||
|
||||
if ((c = gf_isom_hevc_config_get(f, track, sdi)) == NULL)
|
||||
{
|
||||
gf_isom_sample_del(&s);
|
||||
status = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
last_sdi = sdi;
|
||||
}
|
||||
|
||||
status = process_hevc_sample(ctx, timescale, c, s, sub);
|
||||
|
||||
gf_isom_sample_del(&s);
|
||||
|
||||
if (status != 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int progress = (int)((i * 100) / sample_count);
|
||||
if (ctx->last_reported_progress != progress)
|
||||
{
|
||||
int cur_sec = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
|
||||
activity_progress(progress, cur_sec / 60, cur_sec % 60);
|
||||
ctx->last_reported_progress = progress;
|
||||
}
|
||||
}
|
||||
int cur_sec = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
|
||||
activity_progress(100, cur_sec / 60, cur_sec % 60);
|
||||
|
||||
if (c != NULL)
|
||||
{
|
||||
gf_odf_hevc_cfg_del(c);
|
||||
c = NULL;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static int process_vobsub_track(struct lib_ccx_ctx *ctx, GF_ISOFile *f, u32 track, struct cc_subtitle *sub)
|
||||
{
|
||||
u32 timescale, i, sample_count;
|
||||
int status = 0;
|
||||
struct lib_cc_decode *dec_ctx = NULL;
|
||||
struct encoder_ctx *enc_ctx = NULL;
|
||||
struct vobsub_ctx *vob_ctx = NULL;
|
||||
|
||||
dec_ctx = update_decoder_list(ctx);
|
||||
enc_ctx = update_encoder_list(ctx);
|
||||
|
||||
if ((sample_count = gf_isom_get_sample_count(f, track)) < 1)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
timescale = gf_isom_get_media_timescale(f, track);
|
||||
|
||||
/* Check if OCR is available */
|
||||
if (!vobsub_ocr_available())
|
||||
{
|
||||
fatal(EXIT_NOT_CLASSIFIED,
|
||||
"VOBSUB to text conversion requires OCR support.\n"
|
||||
"Please rebuild CCExtractor with -DWITH_OCR=ON");
|
||||
}
|
||||
|
||||
/* Initialize VOBSUB decoder */
|
||||
vob_ctx = init_vobsub_decoder();
|
||||
if (!vob_ctx)
|
||||
{
|
||||
fatal(EXIT_NOT_CLASSIFIED,
|
||||
"VOBSUB decoder initialization failed.\n"
|
||||
"Please ensure Tesseract is installed with language data.");
|
||||
}
|
||||
|
||||
/* Try to get decoder config for palette info */
|
||||
GF_GenericSampleDescription *gdesc = gf_isom_get_generic_sample_description(f, track, 1);
|
||||
if (gdesc && gdesc->extension_buf && gdesc->extension_buf_size > 0)
|
||||
{
|
||||
/* The extension buffer may contain an idx-like header with palette */
|
||||
char *header = malloc(gdesc->extension_buf_size + 1);
|
||||
if (header)
|
||||
{
|
||||
memcpy(header, gdesc->extension_buf, gdesc->extension_buf_size);
|
||||
header[gdesc->extension_buf_size] = '\0';
|
||||
vobsub_parse_palette(vob_ctx, header);
|
||||
free(header);
|
||||
}
|
||||
}
|
||||
if (gdesc)
|
||||
free(gdesc);
|
||||
|
||||
mprint("Processing VOBSUB track (%u samples)\n", sample_count);
|
||||
|
||||
for (i = 0; i < sample_count; i++)
|
||||
{
|
||||
u32 sdi;
|
||||
GF_ISOSample *s = gf_isom_get_sample(f, track, i + 1, &sdi);
|
||||
|
||||
if (s != NULL)
|
||||
{
|
||||
s32 signed_cts = (s32)s->CTS_Offset;
|
||||
LLONG start_time_ms = (LLONG)((s->DTS + signed_cts) * 1000) / timescale;
|
||||
|
||||
/* Calculate end time from next sample if available */
|
||||
LLONG end_time_ms = 0;
|
||||
if (i + 1 < sample_count)
|
||||
{
|
||||
u32 next_sdi;
|
||||
GF_ISOSample *next_s = gf_isom_get_sample(f, track, i + 2, &next_sdi);
|
||||
if (next_s)
|
||||
{
|
||||
s32 next_signed_cts = (s32)next_s->CTS_Offset;
|
||||
end_time_ms = (LLONG)((next_s->DTS + next_signed_cts) * 1000) / timescale;
|
||||
gf_isom_sample_del(&next_s);
|
||||
}
|
||||
}
|
||||
if (end_time_ms == 0)
|
||||
end_time_ms = start_time_ms + 5000; /* Default 5 second duration */
|
||||
|
||||
set_current_pts(dec_ctx->timing, (s->DTS + signed_cts) * MPEG_CLOCK_FREQ / timescale);
|
||||
set_fts(dec_ctx->timing);
|
||||
|
||||
/* Decode SPU and run OCR */
|
||||
struct cc_subtitle vob_sub;
|
||||
memset(&vob_sub, 0, sizeof(vob_sub));
|
||||
|
||||
int ret = vobsub_decode_spu(vob_ctx,
|
||||
(unsigned char *)s->data, s->dataLength,
|
||||
start_time_ms, end_time_ms,
|
||||
&vob_sub);
|
||||
|
||||
if (ret == 0 && vob_sub.got_output)
|
||||
{
|
||||
/* Encode the subtitle to output format */
|
||||
encode_sub(enc_ctx, &vob_sub);
|
||||
sub->got_output = 1;
|
||||
|
||||
/* Free subtitle data */
|
||||
if (vob_sub.data)
|
||||
{
|
||||
struct cc_bitmap *rect = (struct cc_bitmap *)vob_sub.data;
|
||||
for (int j = 0; j < vob_sub.nb_data; j++)
|
||||
{
|
||||
if (rect[j].data0)
|
||||
free(rect[j].data0);
|
||||
if (rect[j].data1)
|
||||
free(rect[j].data1);
|
||||
#ifdef ENABLE_OCR
|
||||
if (rect[j].ocr_text)
|
||||
free(rect[j].ocr_text);
|
||||
#endif
|
||||
}
|
||||
free(vob_sub.data);
|
||||
}
|
||||
}
|
||||
|
||||
gf_isom_sample_del(&s);
|
||||
}
|
||||
|
||||
int progress = (int)((i * 100) / sample_count);
|
||||
if (ctx->last_reported_progress != progress)
|
||||
{
|
||||
int cur_sec = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
|
||||
activity_progress(progress, cur_sec / 60, cur_sec % 60);
|
||||
ctx->last_reported_progress = progress;
|
||||
}
|
||||
}
|
||||
|
||||
int cur_sec = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
|
||||
activity_progress(100, cur_sec / 60, cur_sec % 60);
|
||||
|
||||
delete_vobsub_decoder(&vob_ctx);
|
||||
mprint("VOBSUB processing complete\n");
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static char *format_duration(u64 dur, u32 timescale, char *szDur, size_t szDur_size)
|
||||
{
|
||||
u32 h, m, s, ms;
|
||||
@@ -416,7 +749,11 @@ static int process_clcp(struct lib_ccx_ctx *ctx, struct encoder_ctx *enc_ctx,
|
||||
dbg_print(CCX_DMT_PARSE, "MP4-708: atom skipped (cc_type < 2)\n");
|
||||
continue;
|
||||
}
|
||||
#ifndef DISABLE_RUST
|
||||
ccxr_dtvcc_process_data(dec_ctx->dtvcc_rust, cc_valid, cc_type, temp[2], temp[3]);
|
||||
#else
|
||||
dtvcc_process_data(dec_ctx->dtvcc, (unsigned char *)temp);
|
||||
#endif
|
||||
cb_708++;
|
||||
}
|
||||
if (ctx->write_format == CCX_OF_MCC)
|
||||
@@ -544,7 +881,7 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
|
||||
{
|
||||
int mp4_ret = 0;
|
||||
GF_ISOFile *f;
|
||||
u32 i, j, track_count, avc_track_count, cc_track_count;
|
||||
u32 i, j, track_count, avc_track_count, hevc_track_count, cc_track_count;
|
||||
struct cc_subtitle dec_sub;
|
||||
struct lib_cc_decode *dec_ctx = NULL;
|
||||
struct encoder_ctx *enc_ctx = update_encoder_list(ctx);
|
||||
@@ -554,10 +891,19 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
|
||||
if (enc_ctx)
|
||||
enc_ctx->timing = dec_ctx->timing;
|
||||
|
||||
// WARN: otherwise cea-708 will not work
|
||||
// WARN: otherwise cea-708 will not work
|
||||
#ifndef DISABLE_RUST
|
||||
ccxr_dtvcc_set_encoder(dec_ctx->dtvcc_rust, enc_ctx);
|
||||
#else
|
||||
dec_ctx->dtvcc->encoder = (void *)enc_ctx;
|
||||
#endif
|
||||
|
||||
memset(&dec_sub, 0, sizeof(dec_sub));
|
||||
if (file == NULL)
|
||||
{
|
||||
mprint("Error: NULL file path provided to processmp4\n");
|
||||
return -1;
|
||||
}
|
||||
mprint("Opening \'%s\': ", file);
|
||||
#ifdef MP4_DEBUG
|
||||
gf_log_set_tool_level(GF_LOG_CONTAINER, GF_LOG_DEBUG);
|
||||
@@ -575,7 +921,9 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
|
||||
track_count = gf_isom_get_track_count(f);
|
||||
|
||||
avc_track_count = 0;
|
||||
hevc_track_count = 0;
|
||||
cc_track_count = 0;
|
||||
u32 vobsub_track_count = 0;
|
||||
|
||||
for (i = 0; i < track_count; i++)
|
||||
{
|
||||
@@ -589,9 +937,13 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
|
||||
cc_track_count++;
|
||||
if (type == GF_ISOM_MEDIA_VISUAL && subtype == GF_ISOM_SUBTYPE_AVC_H264)
|
||||
avc_track_count++;
|
||||
if (type == GF_ISOM_MEDIA_VISUAL && (subtype == GF_ISOM_SUBTYPE_HEV1 || subtype == GF_ISOM_SUBTYPE_HVC1))
|
||||
hevc_track_count++;
|
||||
if (type == GF_ISOM_MEDIA_SUBPIC && subtype == GF_ISOM_SUBTYPE_MPEG4)
|
||||
vobsub_track_count++;
|
||||
}
|
||||
|
||||
mprint("MP4: found %u tracks: %u avc and %u cc\n", track_count, avc_track_count, cc_track_count);
|
||||
mprint("MP4: found %u tracks: %u avc, %u hevc, %u cc, %u vobsub\n", track_count, avc_track_count, hevc_track_count, cc_track_count, vobsub_track_count);
|
||||
|
||||
for (i = 0; i < track_count; i++)
|
||||
{
|
||||
@@ -661,6 +1013,72 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
|
||||
}
|
||||
break;
|
||||
|
||||
case MEDIA_TYPE(GF_ISOM_MEDIA_VISUAL, GF_ISOM_SUBTYPE_HEV1): // vide:hev1 (HEVC)
|
||||
case MEDIA_TYPE(GF_ISOM_MEDIA_VISUAL, GF_ISOM_SUBTYPE_HVC1): // vide:hvc1 (HEVC)
|
||||
if (cc_track_count && !cfg->mp4vidtrack)
|
||||
continue;
|
||||
// If there are multiple tracks, change fd for different tracks
|
||||
if (hevc_track_count > 1)
|
||||
{
|
||||
switch_output_file(ctx, enc_ctx, i);
|
||||
}
|
||||
// Enable HEVC mode for caption extraction
|
||||
dec_ctx->avc_ctx->is_hevc = 1;
|
||||
|
||||
// Process VPS/SPS/PPS from HEVC config to enable SEI parsing
|
||||
GF_HEVCConfig *hevc_cnf = gf_isom_hevc_config_get(f, i + 1, 1);
|
||||
if (hevc_cnf != NULL)
|
||||
{
|
||||
// Process parameter sets from config
|
||||
for (j = 0; j < gf_list_count(hevc_cnf->param_array); j++)
|
||||
{
|
||||
GF_NALUFFParamArray *ar = (GF_NALUFFParamArray *)gf_list_get(hevc_cnf->param_array, j);
|
||||
if (ar)
|
||||
{
|
||||
for (u32 k = 0; k < gf_list_count(ar->nalus); k++)
|
||||
{
|
||||
GF_NALUFFParam *sl = (GF_NALUFFParam *)gf_list_get(ar->nalus, k);
|
||||
if (sl && sl->data && sl->size > 0)
|
||||
{
|
||||
do_NAL(enc_ctx, dec_ctx, (unsigned char *)sl->data, sl->size, &dec_sub);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
gf_odf_hevc_cfg_del(hevc_cnf);
|
||||
}
|
||||
if (process_hevc_track(ctx, file, f, i + 1, &dec_sub) != 0)
|
||||
{
|
||||
mprint("Error on process_hevc_track()\n");
|
||||
free(dec_ctx->xds_ctx);
|
||||
return -3;
|
||||
}
|
||||
if (dec_sub.got_output)
|
||||
{
|
||||
mp4_ret = 1;
|
||||
encode_sub(enc_ctx, &dec_sub);
|
||||
dec_sub.got_output = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case MEDIA_TYPE(GF_ISOM_MEDIA_SUBPIC, GF_ISOM_SUBTYPE_MPEG4): // subp:MPEG (VOBSUB)
|
||||
// If there are multiple VOBSUB tracks, change fd for different tracks
|
||||
if (vobsub_track_count > 1)
|
||||
{
|
||||
switch_output_file(ctx, enc_ctx, i);
|
||||
}
|
||||
if (process_vobsub_track(ctx, f, i + 1, &dec_sub) != 0)
|
||||
{
|
||||
mprint("Error on process_vobsub_track()\n");
|
||||
free(dec_ctx->xds_ctx);
|
||||
return -3;
|
||||
}
|
||||
if (dec_sub.got_output)
|
||||
{
|
||||
mp4_ret = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
if (type != GF_ISOM_MEDIA_CLOSED_CAPTION && type != GF_ISOM_MEDIA_SUBT && type != GF_ISOM_MEDIA_TEXT)
|
||||
break; // ignore non cc track
|
||||
@@ -794,10 +1212,20 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
|
||||
else
|
||||
mprint("Found no AVC track(s). ");
|
||||
|
||||
if (cc_track_count)
|
||||
mprint("Found %d CC track(s).\n", cc_track_count);
|
||||
if (hevc_track_count)
|
||||
mprint("Found %d HEVC track(s). ", hevc_track_count);
|
||||
else
|
||||
mprint("Found no dedicated CC track(s).\n");
|
||||
mprint("Found no HEVC track(s). ");
|
||||
|
||||
if (cc_track_count)
|
||||
mprint("Found %d CC track(s). ", cc_track_count);
|
||||
else
|
||||
mprint("Found no dedicated CC track(s). ");
|
||||
|
||||
if (vobsub_track_count)
|
||||
mprint("Found %d VOBSUB track(s).\n", vobsub_track_count);
|
||||
else
|
||||
mprint("\n");
|
||||
|
||||
ctx->freport.mp4_cc_track_cnt = cc_track_count;
|
||||
|
||||
|
||||
@@ -103,7 +103,8 @@ int set_nonblocking(int fd);
|
||||
void connect_to_srv(const char *addr, const char *port, const char *cc_desc, const char *pwd)
|
||||
{
|
||||
#ifndef DISABLE_RUST
|
||||
return ccxr_connect_to_srv(addr, port, cc_desc, pwd);
|
||||
(void)ccxr_connect_to_srv(addr, port, cc_desc, pwd);
|
||||
return;
|
||||
#endif
|
||||
if (NULL == addr)
|
||||
{
|
||||
@@ -137,7 +138,8 @@ void connect_to_srv(const char *addr, const char *port, const char *cc_desc, con
|
||||
void net_send_header(const unsigned char *data, size_t len)
|
||||
{
|
||||
#ifndef DISABLE_RUST
|
||||
return ccxr_net_send_header(data, len);
|
||||
(void)ccxr_net_send_header(data, len);
|
||||
return;
|
||||
#endif
|
||||
assert(srv_sd > 0);
|
||||
|
||||
@@ -188,7 +190,8 @@ int net_send_cc(const unsigned char *data, int len, void *private_data, struct c
|
||||
void net_check_conn()
|
||||
{
|
||||
#ifndef DISABLE_RUST
|
||||
return ccxr_net_check_conn();
|
||||
ccxr_net_check_conn();
|
||||
return;
|
||||
#endif
|
||||
time_t now;
|
||||
static time_t last_ping = 0;
|
||||
@@ -252,7 +255,8 @@ void net_send_epg(
|
||||
const char *category)
|
||||
{
|
||||
#ifndef DISABLE_RUST
|
||||
return ccxr_net_send_epg(start, stop, title, desc, lang, category);
|
||||
(void)ccxr_net_send_epg(start, stop, title, desc, lang, category);
|
||||
return;
|
||||
#endif
|
||||
size_t st;
|
||||
size_t sp;
|
||||
|
||||
@@ -8,6 +8,11 @@
|
||||
#include <dirent.h>
|
||||
#include "ccx_encoders_helpers.h"
|
||||
#include "ccx_encoders_spupng.h"
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <mach-o/dyld.h>
|
||||
#endif
|
||||
#include "ocr.h"
|
||||
|
||||
struct ocrCtx
|
||||
@@ -100,6 +105,68 @@ void delete_ocr(void **arg)
|
||||
freep(arg);
|
||||
}
|
||||
|
||||
/**
|
||||
* get_executable_directory
|
||||
*
|
||||
* Returns the directory containing the executable.
|
||||
* Returns a pointer to a static buffer, or NULL on failure.
|
||||
*/
|
||||
static const char *get_executable_directory(void)
|
||||
{
|
||||
static char exe_dir[1024] = {0};
|
||||
static int initialized = 0;
|
||||
|
||||
if (initialized)
|
||||
return exe_dir[0] ? exe_dir : NULL;
|
||||
|
||||
initialized = 1;
|
||||
|
||||
#ifdef _WIN32
|
||||
char exe_path[MAX_PATH];
|
||||
DWORD len = GetModuleFileNameA(NULL, exe_path, MAX_PATH);
|
||||
if (len == 0 || len >= MAX_PATH)
|
||||
return NULL;
|
||||
|
||||
// Find the last backslash and truncate there
|
||||
char *last_sep = strrchr(exe_path, '\\');
|
||||
if (last_sep)
|
||||
{
|
||||
*last_sep = '\0';
|
||||
strncpy(exe_dir, exe_path, sizeof(exe_dir) - 1);
|
||||
exe_dir[sizeof(exe_dir) - 1] = '\0';
|
||||
}
|
||||
#elif defined(__linux__)
|
||||
char exe_path[1024];
|
||||
ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1);
|
||||
if (len <= 0)
|
||||
return NULL;
|
||||
exe_path[len] = '\0';
|
||||
|
||||
char *last_sep = strrchr(exe_path, '/');
|
||||
if (last_sep)
|
||||
{
|
||||
*last_sep = '\0';
|
||||
strncpy(exe_dir, exe_path, sizeof(exe_dir) - 1);
|
||||
exe_dir[sizeof(exe_dir) - 1] = '\0';
|
||||
}
|
||||
#elif defined(__APPLE__)
|
||||
char exe_path[1024];
|
||||
uint32_t size = sizeof(exe_path);
|
||||
if (_NSGetExecutablePath(exe_path, &size) != 0)
|
||||
return NULL;
|
||||
|
||||
char *last_sep = strrchr(exe_path, '/');
|
||||
if (last_sep)
|
||||
{
|
||||
*last_sep = '\0';
|
||||
strncpy(exe_dir, exe_path, sizeof(exe_dir) - 1);
|
||||
exe_dir[sizeof(exe_dir) - 1] = '\0';
|
||||
}
|
||||
#endif
|
||||
|
||||
return exe_dir[0] ? exe_dir : NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* probe_tessdata_location
|
||||
*
|
||||
@@ -107,8 +174,10 @@ void delete_ocr(void **arg)
|
||||
*
|
||||
* Priority of Tesseract traineddata file search paths:-
|
||||
* 1. tessdata in TESSDATA_PREFIX, if it is specified. Overrides others
|
||||
* 2. tessdata in current working directory
|
||||
* 3. tessdata in /usr/share
|
||||
* 2. tessdata in executable directory (for bundled tessdata)
|
||||
* 3. tessdata in current working directory
|
||||
* 4. tessdata in system locations (/usr/share, etc.)
|
||||
* 5. tessdata in default Tesseract install location (Windows)
|
||||
*/
|
||||
char *probe_tessdata_location(const char *lang)
|
||||
{
|
||||
@@ -116,6 +185,7 @@ char *probe_tessdata_location(const char *lang)
|
||||
|
||||
const char *paths[] = {
|
||||
getenv("TESSDATA_PREFIX"),
|
||||
get_executable_directory(),
|
||||
"./",
|
||||
"/usr/share/",
|
||||
"/usr/local/share/",
|
||||
@@ -211,6 +281,13 @@ void *init_ocr(int lang_index)
|
||||
// set PSM mode
|
||||
TessBaseAPISetPageSegMode(ctx->api, ccx_options.psm);
|
||||
|
||||
// Set character blacklist to prevent common OCR errors (e.g. | vs I)
|
||||
// These characters are rarely used in subtitles but often misrecognized
|
||||
if (ccx_options.ocr_blacklist)
|
||||
{
|
||||
TessBaseAPISetVariable(ctx->api, "tessedit_char_blacklist", "|\\`_~");
|
||||
}
|
||||
|
||||
free(pars_vec);
|
||||
free(pars_values);
|
||||
|
||||
@@ -281,6 +358,176 @@ BOX *ignore_alpha_at_edge(png_byte *alpha, unsigned char *indata, int w, int h,
|
||||
return cropWindow;
|
||||
}
|
||||
|
||||
/**
|
||||
* Structure to hold the vertical boundaries of a detected text line.
|
||||
*/
|
||||
struct line_bounds
|
||||
{
|
||||
int start_y; // Top row of line (inclusive)
|
||||
int end_y; // Bottom row of line (inclusive)
|
||||
};
|
||||
|
||||
/**
|
||||
* Detects horizontal text line boundaries in a bitmap by finding rows of
|
||||
* fully transparent pixels that separate lines of text.
|
||||
*
|
||||
* @param alpha Palette alpha values (indexed by pixel value)
|
||||
* @param indata Bitmap pixel data (palette indices, w*h bytes)
|
||||
* @param w Image width
|
||||
* @param h Image height
|
||||
* @param lines Output: allocated array of line boundaries (caller must free)
|
||||
* @param num_lines Output: number of lines found
|
||||
* @param min_gap Minimum consecutive transparent rows to count as line separator
|
||||
* @return 0 on success, -1 on failure
|
||||
*/
|
||||
static int detect_text_lines(png_byte *alpha, unsigned char *indata,
|
||||
int w, int h,
|
||||
struct line_bounds **lines, int *num_lines,
|
||||
int min_gap)
|
||||
{
|
||||
if (!alpha || !indata || !lines || !num_lines || w <= 0 || h <= 0)
|
||||
return -1;
|
||||
|
||||
*lines = NULL;
|
||||
*num_lines = 0;
|
||||
|
||||
// Allocate array to track which rows have visible content
|
||||
int *row_has_content = (int *)malloc(h * sizeof(int));
|
||||
if (!row_has_content)
|
||||
return -1;
|
||||
|
||||
// Scan each row to determine if it has any visible (non-transparent) pixels
|
||||
for (int i = 0; i < h; i++)
|
||||
{
|
||||
row_has_content[i] = 0;
|
||||
for (int j = 0; j < w; j++)
|
||||
{
|
||||
int index = indata[i * w + j];
|
||||
if (alpha[index] != 0)
|
||||
{
|
||||
row_has_content[i] = 1;
|
||||
break; // Found visible pixel, no need to check rest of row
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Count lines by finding runs of content rows separated by gaps
|
||||
int max_lines = (h / 2) + 1; // Conservative upper bound
|
||||
struct line_bounds *temp_lines = (struct line_bounds *)malloc(max_lines * sizeof(struct line_bounds));
|
||||
if (!temp_lines)
|
||||
{
|
||||
free(row_has_content);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int line_count = 0;
|
||||
int in_line = 0;
|
||||
int line_start = 0;
|
||||
int gap_count = 0;
|
||||
|
||||
for (int i = 0; i < h; i++)
|
||||
{
|
||||
if (row_has_content[i])
|
||||
{
|
||||
if (!in_line)
|
||||
{
|
||||
// Start of a new line
|
||||
line_start = i;
|
||||
in_line = 1;
|
||||
}
|
||||
gap_count = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (in_line)
|
||||
{
|
||||
gap_count++;
|
||||
if (gap_count >= min_gap)
|
||||
{
|
||||
// End of line found (gap is large enough)
|
||||
if (line_count < max_lines)
|
||||
{
|
||||
temp_lines[line_count].start_y = line_start;
|
||||
temp_lines[line_count].end_y = i - gap_count;
|
||||
line_count++;
|
||||
}
|
||||
in_line = 0;
|
||||
gap_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle last line if we ended while still in a line
|
||||
if (in_line && line_count < max_lines)
|
||||
{
|
||||
temp_lines[line_count].start_y = line_start;
|
||||
// Find the last row with content
|
||||
int last_content = h - 1;
|
||||
while (last_content > line_start && !row_has_content[last_content])
|
||||
last_content--;
|
||||
temp_lines[line_count].end_y = last_content;
|
||||
line_count++;
|
||||
}
|
||||
|
||||
free(row_has_content);
|
||||
|
||||
if (line_count == 0)
|
||||
{
|
||||
free(temp_lines);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Shrink allocation to actual size
|
||||
*lines = (struct line_bounds *)realloc(temp_lines, line_count * sizeof(struct line_bounds));
|
||||
if (!*lines)
|
||||
{
|
||||
*lines = temp_lines; // Keep original if realloc fails
|
||||
}
|
||||
*num_lines = line_count;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs OCR on a single text line image using PSM 7 (single line mode).
|
||||
*
|
||||
* @param ctx OCR context (contains Tesseract API)
|
||||
* @param line_pix Pre-processed PIX for single line (grayscale, inverted)
|
||||
* @return Recognized text (caller must free with free()), or NULL on failure
|
||||
*/
|
||||
static char *ocr_single_line(struct ocrCtx *ctx, PIX *line_pix)
|
||||
{
|
||||
if (!ctx || !ctx->api || !line_pix)
|
||||
return NULL;
|
||||
|
||||
// Save current PSM
|
||||
int saved_psm = TessBaseAPIGetPageSegMode(ctx->api);
|
||||
|
||||
// Set PSM 7 for single line recognition
|
||||
TessBaseAPISetPageSegMode(ctx->api, 7); // PSM_SINGLE_LINE
|
||||
|
||||
// Perform OCR
|
||||
TessBaseAPISetImage2(ctx->api, line_pix);
|
||||
BOOL ret = TessBaseAPIRecognize(ctx->api, NULL);
|
||||
|
||||
char *text = NULL;
|
||||
if (!ret)
|
||||
{
|
||||
char *tess_text = TessBaseAPIGetUTF8Text(ctx->api);
|
||||
if (tess_text)
|
||||
{
|
||||
text = strdup(tess_text);
|
||||
TessDeleteText(tess_text);
|
||||
}
|
||||
}
|
||||
|
||||
// Restore original PSM
|
||||
TessBaseAPISetPageSegMode(ctx->api, saved_psm);
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
void debug_tesseract(struct ocrCtx *ctx, char *dump_path)
|
||||
{
|
||||
#ifdef OCR_DEBUG
|
||||
@@ -327,6 +574,8 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
|
||||
unsigned int *data, *ppixel;
|
||||
BOOL tess_ret = FALSE;
|
||||
struct ocrCtx *ctx = arg;
|
||||
char *combined_text = NULL; // Used by line-split mode
|
||||
size_t combined_len = 0; // Used by line-split mode
|
||||
pix = pixCreate(w, h, 32);
|
||||
color_pix = pixCreate(w, h, 32);
|
||||
if (pix == NULL || color_pix == NULL)
|
||||
@@ -391,30 +640,146 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
|
||||
// The original bitmap quality (e.g., 520x84) is sufficient for Tesseract
|
||||
|
||||
if (cpix_gs == NULL)
|
||||
tess_ret = -1;
|
||||
else
|
||||
{
|
||||
TessBaseAPISetImage2(ctx->api, cpix_gs);
|
||||
tess_ret = TessBaseAPIRecognize(ctx->api, NULL);
|
||||
debug_tesseract(ctx, "./temp/");
|
||||
if (tess_ret)
|
||||
// Grayscale conversion failed (likely due to invalid/corrupt bitmap data)
|
||||
// Skip this bitmap instead of crashing - this can happen with
|
||||
// corrupted DVB subtitle packets or live stream discontinuities
|
||||
mprint("\nIn ocr_bitmap: Failed to convert bitmap to grayscale. Skipped.\n");
|
||||
|
||||
boxDestroy(&crop_points);
|
||||
pixDestroy(&pix);
|
||||
pixDestroy(&cpix);
|
||||
pixDestroy(&color_pix);
|
||||
pixDestroy(&color_pix_out);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Line splitting mode: detect lines and OCR each separately with PSM 7
|
||||
if (ccx_options.ocr_line_split && h > 30)
|
||||
{
|
||||
struct line_bounds *lines = NULL;
|
||||
int num_lines = 0;
|
||||
|
||||
// Use min_gap of 3 rows to detect line boundaries
|
||||
if (detect_text_lines(alpha, indata, w, h, &lines, &num_lines, 3) == 0 && num_lines > 1)
|
||||
{
|
||||
mprint("\nIn ocr_bitmap: Failed to perform OCR. Skipped.\n");
|
||||
// Multiple lines detected - process each separately with PSM 7
|
||||
// (combined_text and combined_len are declared at function scope)
|
||||
|
||||
boxDestroy(&crop_points);
|
||||
pixDestroy(&pix);
|
||||
pixDestroy(&cpix);
|
||||
pixDestroy(&cpix_gs);
|
||||
pixDestroy(&color_pix);
|
||||
pixDestroy(&color_pix_out);
|
||||
for (int line_idx = 0; line_idx < num_lines; line_idx++)
|
||||
{
|
||||
int line_h = lines[line_idx].end_y - lines[line_idx].start_y + 1;
|
||||
if (line_h <= 0)
|
||||
continue;
|
||||
|
||||
return NULL;
|
||||
// Extract line region from the grayscale image
|
||||
BOX *line_box = boxCreate(0, lines[line_idx].start_y,
|
||||
pixGetWidth(cpix_gs), line_h);
|
||||
PIX *line_pix_raw = pixClipRectangle(cpix_gs, line_box, NULL);
|
||||
boxDestroy(&line_box);
|
||||
|
||||
if (line_pix_raw)
|
||||
{
|
||||
// Add white padding around the line (helps Tesseract with edge characters)
|
||||
// The image is inverted (dark text on light bg), so add white (255) border
|
||||
int padding = 10;
|
||||
PIX *line_pix = pixAddBorderGeneral(line_pix_raw, padding, padding, padding, padding, 255);
|
||||
pixDestroy(&line_pix_raw);
|
||||
if (!line_pix)
|
||||
continue;
|
||||
char *line_text = ocr_single_line(ctx, line_pix);
|
||||
pixDestroy(&line_pix);
|
||||
|
||||
if (line_text)
|
||||
{
|
||||
// Trim trailing whitespace from line
|
||||
size_t line_len = strlen(line_text);
|
||||
while (line_len > 0 && (line_text[line_len - 1] == '\n' ||
|
||||
line_text[line_len - 1] == '\r' ||
|
||||
line_text[line_len - 1] == ' '))
|
||||
{
|
||||
line_text[--line_len] = '\0';
|
||||
}
|
||||
|
||||
if (line_len > 0)
|
||||
{
|
||||
// Append to combined result
|
||||
size_t new_len = combined_len + line_len + 2; // +1 for newline, +1 for null
|
||||
char *new_combined = (char *)realloc(combined_text, new_len);
|
||||
if (new_combined)
|
||||
{
|
||||
combined_text = new_combined;
|
||||
if (combined_len > 0)
|
||||
{
|
||||
combined_text[combined_len++] = '\n';
|
||||
}
|
||||
strcpy(combined_text + combined_len, line_text);
|
||||
combined_len += line_len;
|
||||
}
|
||||
}
|
||||
free(line_text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(lines);
|
||||
|
||||
if (combined_text && combined_len > 0)
|
||||
{
|
||||
// Successfully processed lines - skip whole-image OCR
|
||||
// but continue to color detection below
|
||||
goto line_split_color_detection;
|
||||
}
|
||||
|
||||
// If we got here, line splitting didn't produce results
|
||||
// Fall through to whole-image OCR
|
||||
if (combined_text)
|
||||
free(combined_text);
|
||||
combined_text = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Line detection failed or only 1 line - fall through to whole-image OCR
|
||||
if (lines)
|
||||
free(lines);
|
||||
}
|
||||
}
|
||||
|
||||
// Standard whole-image OCR path
|
||||
TessBaseAPISetImage2(ctx->api, cpix_gs);
|
||||
tess_ret = TessBaseAPIRecognize(ctx->api, NULL);
|
||||
debug_tesseract(ctx, "./temp/");
|
||||
if (tess_ret)
|
||||
{
|
||||
mprint("\nIn ocr_bitmap: Failed to perform OCR. Skipped.\n");
|
||||
|
||||
boxDestroy(&crop_points);
|
||||
pixDestroy(&pix);
|
||||
pixDestroy(&cpix);
|
||||
pixDestroy(&cpix_gs);
|
||||
pixDestroy(&color_pix);
|
||||
pixDestroy(&color_pix_out);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *text_out_from_tes = TessBaseAPIGetUTF8Text(ctx->api);
|
||||
if (text_out_from_tes == NULL)
|
||||
fatal(CCX_COMMON_EXIT_BUG_BUG, "In ocr_bitmap: Failed to perform OCR - Failed to get text. Please report.\n", errno);
|
||||
{
|
||||
// OCR succeeded but no text was recognized - this is not a fatal error,
|
||||
// it just means the bitmap didn't contain recognizable text
|
||||
mprint("\nIn ocr_bitmap: OCR returned no text. Skipped.\n");
|
||||
|
||||
boxDestroy(&crop_points);
|
||||
pixDestroy(&pix);
|
||||
pixDestroy(&cpix);
|
||||
pixDestroy(&cpix_gs);
|
||||
pixDestroy(&color_pix);
|
||||
pixDestroy(&color_pix_out);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
// Make a copy and get rid of the one from Tesseract since we're going to be operating on it
|
||||
// and using it directly causes new/free() warnings.
|
||||
char *text_out = strdup(text_out_from_tes);
|
||||
@@ -424,6 +789,14 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "In ocr_bitmap: Out of memory allocating text_out.");
|
||||
}
|
||||
|
||||
// Jump target for line-split mode: use combined_text and continue with color detection
|
||||
if (0)
|
||||
{
|
||||
line_split_color_detection:
|
||||
text_out = combined_text;
|
||||
combined_text = NULL; // Transfer ownership
|
||||
}
|
||||
|
||||
// Begin color detection
|
||||
// Using tlt_config.nofontcolor or ccx_options.nofontcolor (true when "--no-fontcolor" parameter used) to skip color detection if not required
|
||||
// This is also skipped if --no-spupngocr is set since the OCR output won't be used anyway
|
||||
|
||||
@@ -14,7 +14,19 @@ void dinit_write(struct ccx_s_write *wb)
|
||||
return;
|
||||
}
|
||||
if (wb->fh > 0)
|
||||
{
|
||||
// Check if the file is empty before closing
|
||||
off_t file_size = lseek(wb->fh, 0, SEEK_END);
|
||||
close(wb->fh);
|
||||
|
||||
// Delete empty output files to avoid generating useless 0-byte files
|
||||
// This commonly happens with -12 option when one field has no captions
|
||||
if (file_size == 0 && wb->filename != NULL)
|
||||
{
|
||||
unlink(wb->filename);
|
||||
mprint("Deleted empty output file: %s\n", wb->filename);
|
||||
}
|
||||
}
|
||||
freep(&wb->filename);
|
||||
freep(&wb->original_filename);
|
||||
if (wb->with_semaphore && wb->semaphore_filename)
|
||||
@@ -237,9 +249,22 @@ void writercwtdata(struct lib_cc_decode *ctx, const unsigned char *data, struct
|
||||
LLONG currfts = ctx->timing->fts_now + ctx->timing->fts_global;
|
||||
static uint16_t cbcount = 0;
|
||||
static int cbempty = 0;
|
||||
static unsigned char cbbuffer[0xFFFF * 3]; // TODO: use malloc
|
||||
static unsigned char *cbbuffer = NULL;
|
||||
static int cbbuffer_initialized = 0;
|
||||
static unsigned char cbheader[8 + 2];
|
||||
|
||||
if (!cbbuffer_initialized)
|
||||
{
|
||||
cbbuffer = (unsigned char *)malloc(0xFFFF * 3);
|
||||
if (cbbuffer == NULL)
|
||||
{
|
||||
mprint("Error: Failed to allocate memory for cbbuffer\n");
|
||||
return;
|
||||
}
|
||||
cbbuffer_initialized = 1;
|
||||
dbg_print(CCX_DMT_VERBOSE, "Allocated RCWT buffer (%d bytes)\n", 0xFFFF * 3);
|
||||
}
|
||||
|
||||
if ((prevfts != currfts && prevfts != -1) || data == NULL || cbcount == 0xFFFF)
|
||||
{
|
||||
// Remove trailing empty or 608 padding caption blocks
|
||||
|
||||
@@ -13,8 +13,11 @@
|
||||
#include "../lib_hash/sha2.h"
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#if __has_include(<utf8proc.h>)
|
||||
#include <utf8proc.h>
|
||||
#else
|
||||
#include <utf8proc/utf8proc.h>
|
||||
|
||||
#endif
|
||||
#ifdef ENABLE_OCR
|
||||
#include <tesseract/capi.h>
|
||||
#include <leptonica/allheaders.h>
|
||||
@@ -398,6 +401,13 @@ void print_usage(void)
|
||||
mprint(" 12 Sparse text with OSD.\n");
|
||||
mprint(" 13 Raw line. Treat the image as a single text line,\n");
|
||||
mprint(" bypassing hacks that are Tesseract-specific.\n");
|
||||
mprint(" --ocr-line-split: Split subtitle images into lines before OCR.\n");
|
||||
mprint(" Uses PSM 7 (single text line mode) for each line,\n");
|
||||
mprint(" which can improve accuracy for multi-line bitmap subtitles\n");
|
||||
mprint(" (VOBSUB, DVD, DVB).\n");
|
||||
mprint(" --no-ocr-blacklist: Disable the OCR character blacklist. By default,\n");
|
||||
mprint(" CCExtractor blacklists characters like |, \\, `, _, ~\n");
|
||||
mprint(" that are commonly misrecognized (e.g. 'I' as '|').\n");
|
||||
mprint(" --mkvlang: For MKV subtitles, select which language's caption\n");
|
||||
mprint(" stream will be processed. e.g. 'eng' for English.\n");
|
||||
mprint(" Language codes can be either the 3 letters bibliographic\n");
|
||||
|
||||
@@ -78,6 +78,30 @@ void detect_stream_type(struct ccx_demuxer *ctx)
|
||||
ctx->startbytes[7] == 0xf8)
|
||||
ctx->stream_mode = CCX_SM_MCPOODLESRAW;
|
||||
}
|
||||
// Check for SCC (Scenarist Closed Caption) text format
|
||||
// SCC files start with "Scenarist_SCC V1.0" (18 bytes), optionally with UTF-8 BOM (3 bytes)
|
||||
if (ctx->stream_mode == CCX_SM_ELEMENTARY_OR_NOT_FOUND)
|
||||
{
|
||||
unsigned char *check_buf = ctx->startbytes;
|
||||
int check_pos = 0;
|
||||
|
||||
// Skip UTF-8 BOM if present
|
||||
if (ctx->startbytes_avail >= 3 &&
|
||||
ctx->startbytes[0] == 0xEF &&
|
||||
ctx->startbytes[1] == 0xBB &&
|
||||
ctx->startbytes[2] == 0xBF)
|
||||
{
|
||||
check_buf += 3;
|
||||
check_pos = 3;
|
||||
}
|
||||
|
||||
if (ctx->startbytes_avail >= check_pos + 18 &&
|
||||
memcmp(check_buf, "Scenarist_SCC V1.0", 18) == 0)
|
||||
{
|
||||
ctx->stream_mode = CCX_SM_SCC;
|
||||
mprint("Detected SCC (Scenarist Closed Caption) format\n");
|
||||
}
|
||||
}
|
||||
#ifdef WTV_DEBUG
|
||||
if (ctx->stream_mode == CCX_SM_ELEMENTARY_OR_NOT_FOUND && ctx->startbytes_avail >= 6)
|
||||
{
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
// #include <inttypes.h>
|
||||
|
||||
#define MAX_TLT_PAGES 1000
|
||||
#define MAX_TLT_PAGES_EXTRACT 8 // Maximum pages to extract simultaneously (must match lib_ccx.h)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -18,6 +19,28 @@ typedef struct
|
||||
uint8_t tainted; // 1 = text variable contains any data
|
||||
} teletext_page_t;
|
||||
|
||||
// Per-page state for multi-page extraction (issue #665)
|
||||
typedef struct
|
||||
{
|
||||
uint16_t page_number; // BCD-encoded page number (0 = unused slot)
|
||||
teletext_page_t page_buffer; // Current page content being received
|
||||
char *page_buffer_prev; // Previous formatted output
|
||||
char *page_buffer_cur; // Current formatted output
|
||||
unsigned page_buffer_cur_size;
|
||||
unsigned page_buffer_cur_used;
|
||||
unsigned page_buffer_prev_size;
|
||||
unsigned page_buffer_prev_used;
|
||||
uint64_t *ucs2_buffer_prev; // Previous comparison string
|
||||
uint64_t *ucs2_buffer_cur; // Current comparison string
|
||||
unsigned ucs2_buffer_cur_size;
|
||||
unsigned ucs2_buffer_cur_used;
|
||||
unsigned ucs2_buffer_prev_size;
|
||||
unsigned ucs2_buffer_prev_used;
|
||||
uint64_t prev_hide_timestamp;
|
||||
uint64_t prev_show_timestamp;
|
||||
uint8_t receiving_data; // Currently receiving data for this page
|
||||
} teletext_page_state_t;
|
||||
|
||||
// application states -- flags for notices that should be printed only once
|
||||
struct s_states
|
||||
{
|
||||
@@ -62,8 +85,14 @@ struct TeletextCtx
|
||||
char millis_separator;
|
||||
uint32_t global_timestamp;
|
||||
|
||||
// Current and previous page buffers. This is the output written to file when
|
||||
// the time comes.
|
||||
// Multi-page extraction state (issue #665)
|
||||
teletext_page_state_t page_states[MAX_TLT_PAGES_EXTRACT]; // Per-page state
|
||||
int num_active_pages; // Number of pages being extracted
|
||||
int current_page_idx; // Index of page currently receiving data (-1 = none)
|
||||
int multi_page_mode; // 1 = multi-page mode active
|
||||
|
||||
// Current and previous page buffers (legacy single-page mode)
|
||||
// These are still used when multi_page_mode == 0 for backward compatibility
|
||||
teletext_page_t page_buffer;
|
||||
char *page_buffer_prev;
|
||||
char *page_buffer_cur;
|
||||
|
||||
@@ -434,10 +434,21 @@ void remap_g0_charset(uint8_t c)
|
||||
{
|
||||
if (c != primary_charset.current)
|
||||
{
|
||||
if (c >= 56)
|
||||
{
|
||||
fprintf(stderr, "- G0 Latin National Subset ID 0x%1x.%1x is out of bounds\n", (c >> 3), (c & 0x7));
|
||||
return;
|
||||
}
|
||||
uint8_t m = G0_LATIN_NATIONAL_SUBSETS_MAP[c];
|
||||
if (m == 0xff)
|
||||
{
|
||||
fprintf(stderr, "- G0 Latin National Subset ID 0x%1x.%1x is not implemented\n", (c >> 3), (c & 0x7));
|
||||
return;
|
||||
}
|
||||
else if (m >= 14)
|
||||
{
|
||||
fprintf(stderr, "- G0 Latin National Subset index %d is out of bounds\n", m);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -538,6 +549,13 @@ void telxcc_dump_prev_page(struct TeletextCtx *ctx, struct cc_subtitle *sub)
|
||||
add_cc_sub_text(sub, ctx->page_buffer_prev, ctx->prev_show_timestamp,
|
||||
ctx->prev_hide_timestamp, info, "TLT", CCX_ENC_UTF_8);
|
||||
|
||||
// Set teletext page number for multi-page extraction (issue #665)
|
||||
// Find the last subtitle node and set its teletext_page (in decimal format)
|
||||
struct cc_subtitle *last_sub = sub;
|
||||
while (last_sub->next)
|
||||
last_sub = last_sub->next;
|
||||
last_sub->teletext_page = bcd_page_to_int(tlt_config.page);
|
||||
|
||||
if (ctx->page_buffer_prev)
|
||||
free(ctx->page_buffer_prev);
|
||||
if (ctx->ucs2_buffer_prev)
|
||||
@@ -875,6 +893,13 @@ page_is_empty:
|
||||
default:
|
||||
add_cc_sub_text(sub, ctx->page_buffer_cur, page->show_timestamp,
|
||||
page->hide_timestamp + 1, NULL, "TLT", CCX_ENC_UTF_8);
|
||||
// Set teletext page number for multi-page extraction (issue #665)
|
||||
{
|
||||
struct cc_subtitle *last_sub = sub;
|
||||
while (last_sub->next)
|
||||
last_sub = last_sub->next;
|
||||
last_sub->teletext_page = bcd_page_to_int(tlt_config.page);
|
||||
}
|
||||
}
|
||||
|
||||
// Also update GUI...
|
||||
@@ -886,6 +911,44 @@ page_is_empty:
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to check if a page should be accepted for extraction (issue #665)
|
||||
* @param page_number The teletext page number in BCD format
|
||||
* @param is_subtitle_page Whether this page is marked as a subtitle page
|
||||
* @return 1 if the page should be accepted, 0 otherwise
|
||||
*/
|
||||
static int should_accept_page(uint16_t page_number, int is_subtitle_page)
|
||||
{
|
||||
// If extract_all_pages is set, accept all subtitle pages
|
||||
if (tlt_config.extract_all_pages && is_subtitle_page)
|
||||
return 1;
|
||||
|
||||
// If multiple pages are specified, check against the list
|
||||
if (tlt_config.num_user_pages > 0)
|
||||
{
|
||||
// Convert BCD page_number to decimal for comparison
|
||||
int page_dec = bcd_page_to_int(page_number);
|
||||
for (int i = 0; i < tlt_config.num_user_pages && i < MAX_TLT_PAGES_EXTRACT; i++)
|
||||
{
|
||||
if (tlt_config.user_pages[i] == page_dec)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Legacy single-page mode: check against tlt_config.page
|
||||
if (tlt_config.page == 0) // Auto-detect mode
|
||||
return is_subtitle_page;
|
||||
|
||||
return (page_number == tlt_config.page);
|
||||
}
|
||||
|
||||
// Check if we're in multi-page extraction mode
|
||||
static int is_multi_page_mode(void)
|
||||
{
|
||||
return (tlt_config.extract_all_pages || tlt_config.num_user_pages > 1);
|
||||
}
|
||||
|
||||
void process_telx_packet(struct TeletextCtx *ctx, data_unit_t data_unit_id, teletext_packet_payload_t *packet, uint64_t timestamp, struct cc_subtitle *sub)
|
||||
{
|
||||
// variable names conform to ETS 300 706, chapter 7.1.2
|
||||
@@ -923,7 +986,8 @@ void process_telx_packet(struct TeletextCtx *ctx, data_unit_t data_unit_id, tele
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((tlt_config.page == 0) && (flag_subtitle == YES) && (i < 0xff))
|
||||
// Auto-detect page if none specified (and not in extract_all mode)
|
||||
if ((tlt_config.page == 0) && !tlt_config.extract_all_pages && (tlt_config.num_user_pages == 0) && (flag_subtitle == YES) && (i < 0xff))
|
||||
{
|
||||
tlt_config.page = (m << 8) | (unham_8_4(packet->data[1]) << 4) | unham_8_4(packet->data[0]);
|
||||
mprint("- No teletext page specified, first received suitable page is %03x, not guaranteed\n", tlt_config.page);
|
||||
@@ -949,18 +1013,35 @@ void process_telx_packet(struct TeletextCtx *ctx, data_unit_t data_unit_id, tele
|
||||
if ((ctx->transmission_mode == TRANSMISSION_MODE_PARALLEL) && (data_unit_id != DATA_UNIT_EBU_TELETEXT_SUBTITLE) && !(de_ctr && flag_subtitle && ctx->receiving_data == YES))
|
||||
return;
|
||||
|
||||
// Check if this page should be accepted for extraction (issue #665)
|
||||
int accept_this_page = should_accept_page(page_number, flag_subtitle);
|
||||
|
||||
// Handle page transition - if we were receiving a different page, stop
|
||||
if ((ctx->receiving_data == YES) && (((ctx->transmission_mode == TRANSMISSION_MODE_SERIAL) && (PAGE(page_number) != PAGE(tlt_config.page))) ||
|
||||
((ctx->transmission_mode == TRANSMISSION_MODE_PARALLEL) && (PAGE(page_number) != PAGE(tlt_config.page)) && (m == MAGAZINE(tlt_config.page)))))
|
||||
{
|
||||
ctx->receiving_data = NO;
|
||||
if (!(de_ctr && flag_subtitle))
|
||||
return;
|
||||
{
|
||||
// In multi-page mode, check if this new page should be accepted
|
||||
if (!accept_this_page)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Page transmission is terminated, however now we are waiting for our new page
|
||||
if (page_number != tlt_config.page && !(de_ctr && flag_subtitle && ctx->receiving_data == YES))
|
||||
// Modified for multi-page support (issue #665)
|
||||
if (!accept_this_page && !(de_ctr && flag_subtitle && ctx->receiving_data == YES))
|
||||
return;
|
||||
|
||||
// Update tlt_config.page to track the current page being received (multi-page mode only)
|
||||
// In single-page mode, tlt_config.page is set by auto-detect logic or user specification
|
||||
// This prevents overwriting auto-detect selection with an arbitrary page number
|
||||
if (is_multi_page_mode() && accept_this_page && page_number != tlt_config.page)
|
||||
{
|
||||
tlt_config.page = page_number;
|
||||
}
|
||||
|
||||
// Now we have the begining of page transmission; if there is page_buffer pending, process it
|
||||
if (ctx->page_buffer.tainted == YES)
|
||||
{
|
||||
@@ -1322,7 +1403,7 @@ int tlt_process_pes_packet(struct lib_cc_decode *dec_ctx, uint8_t *buffer, uint1
|
||||
uint8_t pes_ext_flag;
|
||||
// extension
|
||||
uint32_t t = 0;
|
||||
uint16_t i;
|
||||
uint32_t i;
|
||||
struct TeletextCtx *ctx = dec_ctx->private_data;
|
||||
ctx->sentence_cap = sentence_cap;
|
||||
|
||||
@@ -1398,6 +1479,9 @@ int tlt_process_pes_packet(struct lib_cc_decode *dec_ctx, uint8_t *buffer, uint1
|
||||
if (pes_packet_length > size)
|
||||
pes_packet_length = size;
|
||||
|
||||
if (size < 9)
|
||||
return CCX_OK;
|
||||
|
||||
// optional PES header marker bits (10.. ....)
|
||||
if ((buffer[6] & 0xc0) == 0x80)
|
||||
{
|
||||
@@ -1410,8 +1494,16 @@ int tlt_process_pes_packet(struct lib_cc_decode *dec_ctx, uint8_t *buffer, uint1
|
||||
{
|
||||
if ((optional_pes_header_included == YES) && ((buffer[7] & 0x80) > 0))
|
||||
{
|
||||
ctx->using_pts = YES;
|
||||
dbg_print(CCX_DMT_TELETEXT, "- PID 0xbd PTS available\n");
|
||||
if (size < 14)
|
||||
{
|
||||
ctx->using_pts = NO;
|
||||
dbg_print(CCX_DMT_TELETEXT, "- PID 0xbd PTS signaled but packet too short, using TS PCR\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx->using_pts = YES;
|
||||
dbg_print(CCX_DMT_TELETEXT, "- PID 0xbd PTS available\n");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1484,11 +1576,17 @@ int tlt_process_pes_packet(struct lib_cc_decode *dec_ctx, uint8_t *buffer, uint1
|
||||
if (optional_pes_header_included == YES)
|
||||
i += 3 + optional_pes_header_length;
|
||||
|
||||
while (i <= pes_packet_length - 6)
|
||||
while (i + 2 <= pes_packet_length)
|
||||
{
|
||||
uint8_t data_unit_id = buffer[i++];
|
||||
uint8_t data_unit_len = buffer[i++];
|
||||
|
||||
if (i + data_unit_len > pes_packet_length)
|
||||
{
|
||||
dbg_print(CCX_DMT_TELETEXT, "- Teletext data unit length %u exceeds PES packet length, stopping.\n", data_unit_len);
|
||||
break;
|
||||
}
|
||||
|
||||
if ((data_unit_id == DATA_UNIT_EBU_TELETEXT_NONSUBTITLE) || (data_unit_id == DATA_UNIT_EBU_TELETEXT_SUBTITLE))
|
||||
{
|
||||
// teletext payload has always size 44 bytes
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "dvb_subtitle_decoder.h"
|
||||
#include "ccx_decoders_isdb.h"
|
||||
#include "file_buffer.h"
|
||||
#include <inttypes.h>
|
||||
|
||||
#ifdef DEBUG_SAVE_TS_PACKETS
|
||||
#include <sys/types.h>
|
||||
@@ -153,12 +154,11 @@ enum ccx_bufferdata_type get_buffer_type(struct cap_info *cinfo)
|
||||
{
|
||||
return CCX_TELETEXT;
|
||||
}
|
||||
else if (cinfo->stream == CCX_STREAM_TYPE_PRIVATE_MPEG2 && cinfo->codec == CCX_CODEC_ATSC_CC)
|
||||
{
|
||||
return CCX_PRIVATE_MPEG2_CC;
|
||||
}
|
||||
else if (cinfo->stream == CCX_STREAM_TYPE_PRIVATE_USER_MPEG2 && cinfo->codec == CCX_CODEC_ATSC_CC)
|
||||
else if ((cinfo->stream == CCX_STREAM_TYPE_PRIVATE_MPEG2 ||
|
||||
cinfo->stream == CCX_STREAM_TYPE_PRIVATE_USER_MPEG2) &&
|
||||
cinfo->codec == CCX_CODEC_ATSC_CC)
|
||||
{
|
||||
// ATSC CC can be in either private stream type - process both as PES
|
||||
return CCX_PES;
|
||||
}
|
||||
else
|
||||
@@ -567,17 +567,15 @@ int copy_capbuf_demux_data(struct ccx_demuxer *ctx, struct demuxer_data **data,
|
||||
if (!cinfo->capbuf || !cinfo->capbuflen)
|
||||
return -1;
|
||||
|
||||
if (ptr->bufferdatatype == CCX_PRIVATE_MPEG2_CC)
|
||||
{
|
||||
dump(CCX_DMT_GENERIC_NOTICES, cinfo->capbuf, cinfo->capbuflen, 0, 1);
|
||||
// Bogus data, so we return something
|
||||
ptr->buffer[ptr->len++] = 0xFA;
|
||||
ptr->buffer[ptr->len++] = 0x80;
|
||||
ptr->buffer[ptr->len++] = 0x80;
|
||||
return CCX_OK;
|
||||
}
|
||||
if (cinfo->codec == CCX_CODEC_TELETEXT)
|
||||
{
|
||||
if (cinfo->capbuflen > BUFSIZE - ptr->len)
|
||||
{
|
||||
fatal(CCX_COMMON_EXIT_BUG_BUG,
|
||||
"Teletext packet (%" PRId64 ") larger than remaining buffer (%" PRId64 ").\n",
|
||||
cinfo->capbuflen, (int64_t)(BUFSIZE - ptr->len));
|
||||
}
|
||||
|
||||
memcpy(ptr->buffer + ptr->len, cinfo->capbuf, cinfo->capbuflen);
|
||||
ptr->len += cinfo->capbuflen;
|
||||
return CCX_OK;
|
||||
@@ -672,7 +670,6 @@ void cinfo_cremation(struct ccx_demuxer *ctx, struct demuxer_data **data)
|
||||
|
||||
int copy_payload_to_capbuf(struct cap_info *cinfo, struct ts_payload *payload)
|
||||
{
|
||||
int newcapbuflen;
|
||||
|
||||
if (cinfo->ignore == CCX_TRUE &&
|
||||
((cinfo->stream != CCX_STREAM_TYPE_VIDEO_MPEG2 &&
|
||||
@@ -698,17 +695,22 @@ int copy_payload_to_capbuf(struct cap_info *cinfo, struct ts_payload *payload)
|
||||
}
|
||||
|
||||
// copy payload to capbuf
|
||||
newcapbuflen = cinfo->capbuflen + payload->length;
|
||||
if (newcapbuflen > cinfo->capbufsize)
|
||||
if (payload->length > INT64_MAX - cinfo->capbuflen)
|
||||
{
|
||||
unsigned char *new_capbuf = (unsigned char *)realloc(cinfo->capbuf, newcapbuflen);
|
||||
mprint("Error: capbuf size overflow\n");
|
||||
return -1;
|
||||
}
|
||||
int64_t newcapbuflen = (int64_t)cinfo->capbuflen + payload->length;
|
||||
if (newcapbuflen > (int64_t)cinfo->capbufsize)
|
||||
{
|
||||
unsigned char *new_capbuf = (unsigned char *)realloc(cinfo->capbuf, (size_t)newcapbuflen);
|
||||
if (!new_capbuf)
|
||||
return -1;
|
||||
cinfo->capbuf = new_capbuf;
|
||||
cinfo->capbufsize = newcapbuflen;
|
||||
cinfo->capbufsize = newcapbuflen; // Note: capbufsize is int in struct cap_info
|
||||
}
|
||||
memcpy(cinfo->capbuf + cinfo->capbuflen, payload->start, payload->length);
|
||||
cinfo->capbuflen = newcapbuflen;
|
||||
cinfo->capbuflen = newcapbuflen; // Note: capbuflen is int in struct cap_info
|
||||
|
||||
return CCX_OK;
|
||||
}
|
||||
@@ -754,7 +756,7 @@ uint64_t get_pts(uint8_t *buffer)
|
||||
// Threshold for enabling packet analysis mode when no PAT is found (in bytes)
|
||||
#define NO_PAT_THRESHOLD (188 * 1000) // After ~1000 packets
|
||||
|
||||
long ts_readstream(struct ccx_demuxer *ctx, struct demuxer_data **data)
|
||||
int64_t ts_readstream(struct ccx_demuxer *ctx, struct demuxer_data **data)
|
||||
{
|
||||
int gotpes = 0;
|
||||
long pespcount = 0; // count packets in PES with captions
|
||||
|
||||
@@ -50,8 +50,8 @@ struct EPG_rating
|
||||
struct EPG_event
|
||||
{
|
||||
uint32_t id;
|
||||
char start_time_string[21]; //"YYYYMMDDHHMMSS +0000" = 20 chars
|
||||
char end_time_string[21];
|
||||
char start_time_string[74]; // "YYYYMMDDHHMMSS +0000" = 20 chars, 74 to silence compiler warning
|
||||
char end_time_string[74];
|
||||
uint8_t running_status;
|
||||
uint8_t free_ca_mode;
|
||||
char ISO_639_language_code[4];
|
||||
|
||||
@@ -173,7 +173,7 @@ static void *init_private_data(enum ccx_code_type codec)
|
||||
case CCX_CODEC_TELETEXT:
|
||||
return telxcc_init();
|
||||
case CCX_CODEC_DVB:
|
||||
return dvbsub_init_decoder(NULL, 0);
|
||||
return dvbsub_init_decoder(NULL);
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -254,7 +254,9 @@ int parse_PMT(struct ccx_demuxer *ctx, unsigned char *buf, int len, struct progr
|
||||
ctx->PIDs_programs[elementary_PID]->printable_stream_type = get_printable_stream_type(stream_type);
|
||||
dbg_print(CCX_DMT_VERBOSE, "%6u | %3X (%3u) | %s\n", elementary_PID, stream_type, stream_type,
|
||||
desc[ctx->PIDs_programs[elementary_PID]->printable_stream_type]);
|
||||
process_ccx_mpeg_descriptor(buf + i + 5, ES_info_length);
|
||||
// Validate ES_info_length against buffer bounds to prevent heap overflow
|
||||
if (i + 5 + ES_info_length <= len)
|
||||
process_ccx_mpeg_descriptor(buf + i + 5, ES_info_length);
|
||||
i += ES_info_length;
|
||||
}
|
||||
dbg_print(CCX_DMT_VERBOSE, "---\n");
|
||||
@@ -275,12 +277,28 @@ int parse_PMT(struct ccx_demuxer *ctx, unsigned char *buf, int len, struct progr
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned char *es_info = buf + i + 5;
|
||||
for (desc_len = 0; (buf + i + 5 + ES_info_length) > es_info; es_info += desc_len)
|
||||
// Validate ES_info_length against buffer bounds to prevent heap overflow
|
||||
if (i + 5 + ES_info_length > len)
|
||||
{
|
||||
dbg_print(CCX_DMT_GENERIC_NOTICES, "Warning: ES_info_length exceeds buffer, skipping.\n");
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned char *es_info = buf + i + 5;
|
||||
unsigned char *es_info_end = buf + i + 5 + ES_info_length;
|
||||
for (desc_len = 0; es_info_end > es_info; es_info += desc_len)
|
||||
{
|
||||
// Need at least 2 bytes for descriptor_tag and desc_len
|
||||
if (es_info + 2 > es_info_end)
|
||||
break;
|
||||
|
||||
enum ccx_mpeg_descriptor descriptor_tag = (enum ccx_mpeg_descriptor)(*es_info++);
|
||||
desc_len = (*es_info++);
|
||||
|
||||
// Validate desc_len doesn't exceed remaining buffer
|
||||
if (es_info + desc_len > es_info_end)
|
||||
break;
|
||||
|
||||
if (descriptor_tag == CCX_MPEG_DSC_DVB_SUBTITLE)
|
||||
{
|
||||
int k = 0;
|
||||
@@ -324,12 +342,29 @@ int parse_PMT(struct ccx_demuxer *ctx, unsigned char *buf, int len, struct progr
|
||||
if (stream_type == CCX_STREAM_TYPE_PRIVATE_MPEG2 &&
|
||||
ES_info_length)
|
||||
{
|
||||
// Validate ES_info_length against buffer bounds to prevent heap overflow
|
||||
if (i + 5 + ES_info_length > len)
|
||||
{
|
||||
dbg_print(CCX_DMT_GENERIC_NOTICES, "Warning: ES_info_length exceeds buffer, skipping.\n");
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned char *es_info = buf + i + 5;
|
||||
for (desc_len = 0; (buf + i + 5 + ES_info_length) > es_info; es_info += desc_len)
|
||||
unsigned char *es_info_end = buf + i + 5 + ES_info_length;
|
||||
for (desc_len = 0; es_info_end > es_info; es_info += desc_len)
|
||||
{
|
||||
void *ptr;
|
||||
// Need at least 2 bytes for descriptor_tag and desc_len
|
||||
if (es_info + 2 > es_info_end)
|
||||
break;
|
||||
|
||||
enum ccx_mpeg_descriptor descriptor_tag = (enum ccx_mpeg_descriptor)(*es_info++);
|
||||
desc_len = (*es_info++);
|
||||
|
||||
// Validate desc_len doesn't exceed remaining buffer
|
||||
if (es_info + desc_len > es_info_end)
|
||||
break;
|
||||
|
||||
if (CCX_MPEG_DESC_DATA_COMP == descriptor_tag)
|
||||
{
|
||||
int16_t component_id = 0;
|
||||
@@ -364,9 +399,7 @@ int parse_PMT(struct ccx_demuxer *ctx, unsigned char *buf, int len, struct progr
|
||||
ret = parse_dvb_description(&cnf, es_info, desc_len);
|
||||
if (ret < 0)
|
||||
break;
|
||||
ptr = dvbsub_init_decoder(&cnf, pinfo->initialized_ocr);
|
||||
if (!pinfo->initialized_ocr)
|
||||
pinfo->initialized_ocr = 1;
|
||||
ptr = dvbsub_init_decoder(&cnf);
|
||||
if (ptr == NULL)
|
||||
break;
|
||||
update_capinfo(ctx, elementary_PID, stream_type, CCX_CODEC_DVB, program_number, ptr);
|
||||
@@ -378,9 +411,18 @@ int parse_PMT(struct ccx_demuxer *ctx, unsigned char *buf, int len, struct progr
|
||||
{
|
||||
// if this any generally used video stream tyoe get clashed with ATSC/SCTE standard
|
||||
// then this code can go in some atsc flag
|
||||
// Validate ES_info_length against buffer bounds to prevent heap overflow
|
||||
if (i + 5 + ES_info_length > len)
|
||||
break;
|
||||
|
||||
unsigned char *es_info = buf + i + 5;
|
||||
for (desc_len = 0; (buf + i + 5 + ES_info_length) > es_info; es_info += desc_len)
|
||||
unsigned char *es_info_end = buf + i + 5 + ES_info_length;
|
||||
for (desc_len = 0; es_info_end > es_info; es_info += desc_len)
|
||||
{
|
||||
// Need at least 2 bytes for descriptor_tag and desc_len
|
||||
if (es_info + 2 > es_info_end)
|
||||
break;
|
||||
|
||||
enum ccx_mpeg_descriptor descriptor_tag = (enum ccx_mpeg_descriptor)(*es_info++);
|
||||
int nb_service;
|
||||
int is_608;
|
||||
@@ -404,9 +446,18 @@ int parse_PMT(struct ccx_demuxer *ctx, unsigned char *buf, int len, struct progr
|
||||
|
||||
if (IS_FEASIBLE(ctx->codec, ctx->nocodec, CCX_CODEC_TELETEXT) && ES_info_length && stream_type == CCX_STREAM_TYPE_PRIVATE_MPEG2) // MPEG-2 Packetized Elementary Stream packets containing private data
|
||||
{
|
||||
// Validate ES_info_length against buffer bounds
|
||||
if (i + 5 + ES_info_length > len)
|
||||
continue;
|
||||
|
||||
unsigned char *es_info = buf + i + 5;
|
||||
for (desc_len = 0; (buf + i + 5 + ES_info_length) - es_info; es_info += desc_len)
|
||||
unsigned char *es_info_end = buf + i + 5 + ES_info_length;
|
||||
for (desc_len = 0; es_info_end > es_info; es_info += desc_len)
|
||||
{
|
||||
// Need at least 2 bytes for descriptor_tag and desc_len
|
||||
if (es_info + 2 > es_info_end)
|
||||
break;
|
||||
|
||||
enum ccx_mpeg_descriptor descriptor_tag = (enum ccx_mpeg_descriptor)(*es_info++);
|
||||
desc_len = (*es_info++);
|
||||
if (!IS_VALID_TELETEXT_DESC(descriptor_tag))
|
||||
@@ -541,6 +592,15 @@ void ts_buffer_psi_packet(struct ccx_demuxer *ctx)
|
||||
else if (ccounter == ctx->PID_buffers[pid]->prev_ccounter + 1 || (ctx->PID_buffers[pid]->prev_ccounter == 0x0f && ccounter == 0))
|
||||
{
|
||||
ctx->PID_buffers[pid]->prev_ccounter = ccounter;
|
||||
// Check for integer overflow and reasonable size limit (1MB)
|
||||
if (ctx->PID_buffers[pid]->buffer_length > 1024 * 1024 ||
|
||||
payload_length > 1024 * 1024 ||
|
||||
ctx->PID_buffers[pid]->buffer_length + payload_length > 1024 * 1024)
|
||||
{
|
||||
dbg_print(CCX_DMT_GENERIC_NOTICES, "\rWarning: PSI buffer for PID %u exceeded reasonable limit (1MB), discarding.\n", pid);
|
||||
return;
|
||||
}
|
||||
|
||||
void *tmp = realloc(ctx->PID_buffers[pid]->buffer, ctx->PID_buffers[pid]->buffer_length + payload_length);
|
||||
if (tmp == NULL)
|
||||
{
|
||||
@@ -579,6 +639,10 @@ int parse_PAT(struct ccx_demuxer *ctx)
|
||||
payload_start = ctx->PID_buffers[0]->buffer + pointer_field + 1;
|
||||
payload_length = ctx->PID_buffers[0]->buffer_length - (pointer_field + 1);
|
||||
|
||||
// Need at least 8 bytes to read header fields
|
||||
if (payload_length < 8)
|
||||
return 0;
|
||||
|
||||
section_number = payload_start[6];
|
||||
last_section_number = payload_start[7];
|
||||
|
||||
|
||||
@@ -87,13 +87,11 @@ void EPG_ATSC_decode_ETT_text(uint8_t *offset, uint32_t length, struct EPG_event
|
||||
|
||||
for (j = 0; j < number_segments && offset < offset_end; j++)
|
||||
{
|
||||
uint8_t compression_type, mode, number_bytes;
|
||||
uint8_t number_bytes;
|
||||
|
||||
if (offset + 3 > offset_end)
|
||||
return;
|
||||
|
||||
compression_type = offset[0];
|
||||
mode = offset[1];
|
||||
number_bytes = offset[2];
|
||||
offset += 3;
|
||||
|
||||
@@ -127,7 +125,7 @@ void EPG_ATSC_calc_time(char *output, uint32_t time)
|
||||
timeinfo.tm_hour = 0;
|
||||
timeinfo.tm_isdst = -1;
|
||||
mktime(&timeinfo);
|
||||
snprintf(output, 21, "%02d%02d%02d%02d%02d%02d +0000", timeinfo.tm_year + 1900, timeinfo.tm_mon + 1, timeinfo.tm_mday, timeinfo.tm_hour, timeinfo.tm_min, timeinfo.tm_sec);
|
||||
snprintf(output, 74, "%02d%02d%02d%02d%02d%02d +0000", timeinfo.tm_year + 1900, timeinfo.tm_mon + 1, timeinfo.tm_mday, timeinfo.tm_hour, timeinfo.tm_min, timeinfo.tm_sec);
|
||||
}
|
||||
|
||||
// Fills event.start_time_string in XMLTV format with passed DVB time
|
||||
@@ -138,6 +136,7 @@ void EPG_DVB_calc_start_time(struct EPG_event *event, uint64_t time)
|
||||
if (mjd > 0)
|
||||
{
|
||||
long y, m, d, k;
|
||||
struct tm timeinfo = {0};
|
||||
|
||||
// algo: ETSI EN 300 468 - ANNEX C
|
||||
y = (long)((mjd - 15078.2) / 365.25);
|
||||
@@ -147,7 +146,32 @@ void EPG_DVB_calc_start_time(struct EPG_event *event, uint64_t time)
|
||||
y = y + k + 1900;
|
||||
m = m - 1 - k * 12;
|
||||
|
||||
snprintf(event->start_time_string, sizeof(event->start_time_string), "%02ld%02ld%02ld%06" PRIu64 "+0000", y, m, d, time & 0xffffff);
|
||||
timeinfo.tm_year = y - 1900;
|
||||
timeinfo.tm_mon = m - 1;
|
||||
timeinfo.tm_mday = d;
|
||||
|
||||
// Decode BCD time (lower 24 bits: HHMMSS)
|
||||
uint32_t bcd = (uint32_t)(time & 0xFFFFFF);
|
||||
|
||||
timeinfo.tm_sec = (bcd & 0x0f) + (10 * ((bcd & 0xf0) >> 4));
|
||||
|
||||
timeinfo.tm_min = ((bcd & 0x0f00) >> 8) + (10 * ((bcd & 0xf000) >> 12));
|
||||
|
||||
timeinfo.tm_hour = ((bcd & 0x0f0000) >> 16) + (10 * ((bcd & 0xf00000) >> 20));
|
||||
|
||||
timeinfo.tm_isdst = -1;
|
||||
|
||||
mktime(&timeinfo);
|
||||
|
||||
snprintf(event->start_time_string,
|
||||
sizeof(event->start_time_string),
|
||||
"%04d%02d%02d%02d%02d%02d +0000",
|
||||
timeinfo.tm_year + 1900,
|
||||
timeinfo.tm_mon + 1,
|
||||
timeinfo.tm_mday,
|
||||
timeinfo.tm_hour,
|
||||
timeinfo.tm_min,
|
||||
timeinfo.tm_sec);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -179,16 +179,21 @@ void mprint(const char *fmt, ...)
|
||||
if (!ccx_options.messages_target)
|
||||
return;
|
||||
va_start(args, fmt);
|
||||
if (ccx_options.messages_target == CCX_MESSAGES_STDOUT)
|
||||
|
||||
FILE *target = (ccx_options.messages_target == CCX_MESSAGES_STDOUT) ? stdout : stderr;
|
||||
|
||||
if (fmt[0] == '\r')
|
||||
{
|
||||
vfprintf(stdout, fmt, args);
|
||||
fflush(stdout);
|
||||
}
|
||||
else
|
||||
{
|
||||
vfprintf(stderr, fmt, args);
|
||||
fflush(stderr);
|
||||
#ifndef _WIN32
|
||||
fprintf(target, "\r\033[K"); // Clear the line first
|
||||
fmt++; // Skip the '\r' so only the clean text gets printed next
|
||||
#endif
|
||||
}
|
||||
// Windows (legacy console) does not support ANSI sequences; fallback to standard \r; and vfprintf below handles it the old-fashioned way.
|
||||
|
||||
vfprintf(target, fmt, args);
|
||||
fflush(target);
|
||||
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
|
||||
517
src/lib_ccx/vobsub_decoder.c
Normal file
517
src/lib_ccx/vobsub_decoder.c
Normal file
@@ -0,0 +1,517 @@
|
||||
/**
|
||||
* VOBSUB decoder with OCR support
|
||||
*
|
||||
* Decodes VOBSUB (DVD bitmap) subtitles from MKV, MP4, or standalone idx/sub files
|
||||
* and optionally performs OCR to convert to text.
|
||||
*
|
||||
* SPU (SubPicture Unit) format:
|
||||
* - 2 bytes: total SPU size
|
||||
* - 2 bytes: offset to control sequence
|
||||
* - RLE-encoded pixel data (interlaced)
|
||||
* - Control sequence with timing, colors, coordinates
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "lib_ccx.h"
|
||||
#include "vobsub_decoder.h"
|
||||
#include "ccx_common_common.h"
|
||||
#include "ccx_decoders_structs.h"
|
||||
#include "ccx_common_constants.h"
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
#include "ocr.h"
|
||||
#endif
|
||||
|
||||
#define RGBA(r, g, b, a) (((unsigned)(a) << 24) | ((r) << 16) | ((g) << 8) | (b))
|
||||
|
||||
/* Control sequence structure */
|
||||
struct vobsub_ctrl_seq
|
||||
{
|
||||
uint8_t color[4]; /* Color indices */
|
||||
uint8_t alpha[4]; /* Alpha values */
|
||||
uint16_t coord[4]; /* x1, x2, y1, y2 */
|
||||
uint16_t pixoffset[2]; /* Offset to 1st and 2nd graphic line */
|
||||
uint16_t start_time;
|
||||
uint16_t stop_time;
|
||||
};
|
||||
|
||||
struct vobsub_ctx
|
||||
{
|
||||
uint32_t palette[16]; /* RGBA palette from idx header */
|
||||
int palette_parsed; /* 1 if palette has been parsed */
|
||||
struct vobsub_ctrl_seq ctrl;
|
||||
unsigned char *bitmap; /* Decoded bitmap */
|
||||
#ifdef ENABLE_OCR
|
||||
void *ocr_ctx; /* OCR context */
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Get 4 bits from buffer for RLE decoding */
|
||||
static int vobsub_get_bits(unsigned char *buffer, uint8_t *nextbyte, int *pos, int *m)
|
||||
{
|
||||
int ret;
|
||||
ret = (*nextbyte & 0xf0) >> 4;
|
||||
if (*m == 0)
|
||||
*pos += 1;
|
||||
*nextbyte = (*nextbyte << 4) | ((*m) ? (buffer[*pos] & 0x0f) : ((buffer[*pos] & 0xf0) >> 4));
|
||||
*m = (*m + 1) % 2;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* RLE decode to get run length and color */
|
||||
static int vobsub_rle_decode(unsigned char *buffer, int *color, uint8_t *nextbyte, int *pos, int *m)
|
||||
{
|
||||
int val = 4;
|
||||
uint16_t rlen = vobsub_get_bits(buffer, nextbyte, pos, m);
|
||||
while (rlen < val && val <= 0x40)
|
||||
{
|
||||
rlen = (rlen << 4) | vobsub_get_bits(buffer, nextbyte, pos, m);
|
||||
val = val << 2;
|
||||
}
|
||||
*color = rlen & 0x3;
|
||||
rlen = rlen >> 2;
|
||||
return rlen;
|
||||
}
|
||||
|
||||
/* Decode bitmap from RLE-encoded SPU data */
|
||||
static void vobsub_get_bitmap(struct vobsub_ctx *ctx, unsigned char *buffer, size_t buf_size)
|
||||
{
|
||||
int w, h, x, lineno;
|
||||
int pos, color, m;
|
||||
int len;
|
||||
uint8_t nextbyte;
|
||||
unsigned char *buffp;
|
||||
|
||||
w = (ctx->ctrl.coord[1] - ctx->ctrl.coord[0]) + 1;
|
||||
h = (ctx->ctrl.coord[3] - ctx->ctrl.coord[2]) + 1;
|
||||
|
||||
if (w <= 0 || h <= 0 || w > 4096 || h > 4096)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Invalid dimensions w=%d h=%d\n", w, h);
|
||||
return;
|
||||
}
|
||||
|
||||
pos = ctx->ctrl.pixoffset[0];
|
||||
if (pos >= (int)buf_size)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Pixel offset out of bounds\n");
|
||||
return;
|
||||
}
|
||||
|
||||
m = 0;
|
||||
nextbyte = buffer[pos];
|
||||
|
||||
ctx->bitmap = malloc(w * h);
|
||||
if (!ctx->bitmap)
|
||||
return;
|
||||
memset(ctx->bitmap, 0, w * h);
|
||||
|
||||
buffp = ctx->bitmap;
|
||||
x = 0;
|
||||
lineno = 0;
|
||||
|
||||
/* Decode first field (odd lines in interlaced) */
|
||||
while (lineno < (h + 1) / 2 && pos < (int)buf_size)
|
||||
{
|
||||
len = vobsub_rle_decode(buffer, &color, &nextbyte, &pos, &m);
|
||||
if (len > (w - x) || len == 0)
|
||||
len = w - x;
|
||||
|
||||
memset(buffp + x, color, len);
|
||||
x += len;
|
||||
if (x >= w)
|
||||
{
|
||||
x = 0;
|
||||
++lineno;
|
||||
buffp += (2 * w); /* Skip 1 line due to interlacing */
|
||||
if ((m == 1))
|
||||
{
|
||||
vobsub_get_bits(buffer, &nextbyte, &pos, &m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Decode second field (even lines) */
|
||||
if (pos > ctx->ctrl.pixoffset[1])
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Error creating bitmap - overlapping fields\n");
|
||||
return;
|
||||
}
|
||||
|
||||
pos = ctx->ctrl.pixoffset[1];
|
||||
if (pos >= (int)buf_size)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Second field offset out of bounds\n");
|
||||
return;
|
||||
}
|
||||
|
||||
buffp = ctx->bitmap + w;
|
||||
x = 0;
|
||||
lineno = 0;
|
||||
m = 0;
|
||||
nextbyte = buffer[pos];
|
||||
|
||||
while (lineno < h / 2 && pos < (int)buf_size)
|
||||
{
|
||||
len = vobsub_rle_decode(buffer, &color, &nextbyte, &pos, &m);
|
||||
if (len > (w - x) || len == 0)
|
||||
len = w - x;
|
||||
|
||||
memset(buffp + x, color, len);
|
||||
x += len;
|
||||
if (x >= w)
|
||||
{
|
||||
x = 0;
|
||||
++lineno;
|
||||
buffp += (2 * w);
|
||||
if ((m == 1))
|
||||
{
|
||||
vobsub_get_bits(buffer, &nextbyte, &pos, &m);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Parse control sequence from SPU data */
|
||||
static void vobsub_decode_control(struct vobsub_ctx *ctx, unsigned char *buffer, size_t buf_size, uint16_t ctrl_offset)
|
||||
{
|
||||
int pos = ctrl_offset;
|
||||
int pack_end = 0;
|
||||
uint16_t date, next_ctrl;
|
||||
|
||||
memset(&ctx->ctrl, 0, sizeof(ctx->ctrl));
|
||||
|
||||
while (pos + 4 <= (int)buf_size && pack_end == 0)
|
||||
{
|
||||
date = (buffer[pos] << 8) | buffer[pos + 1];
|
||||
next_ctrl = (buffer[pos + 2] << 8) | buffer[pos + 3];
|
||||
if (next_ctrl == pos)
|
||||
pack_end = 1;
|
||||
pos += 4;
|
||||
|
||||
int seq_end = 0;
|
||||
while (seq_end == 0 && pos < (int)buf_size)
|
||||
{
|
||||
int command = buffer[pos++];
|
||||
switch (command)
|
||||
{
|
||||
case 0x01: /* Start display */
|
||||
ctx->ctrl.start_time = (date << 10) / 90;
|
||||
break;
|
||||
case 0x02: /* Stop display */
|
||||
ctx->ctrl.stop_time = (date << 10) / 90;
|
||||
break;
|
||||
case 0x03: /* SET_COLOR */
|
||||
if (pos + 2 > (int)buf_size)
|
||||
break;
|
||||
ctx->ctrl.color[3] = (buffer[pos] & 0xf0) >> 4;
|
||||
ctx->ctrl.color[2] = buffer[pos] & 0x0f;
|
||||
ctx->ctrl.color[1] = (buffer[pos + 1] & 0xf0) >> 4;
|
||||
ctx->ctrl.color[0] = buffer[pos + 1] & 0x0f;
|
||||
pos += 2;
|
||||
break;
|
||||
case 0x04: /* SET_CONTR (alpha) */
|
||||
if (pos + 2 > (int)buf_size)
|
||||
break;
|
||||
ctx->ctrl.alpha[3] = (buffer[pos] & 0xf0) >> 4;
|
||||
ctx->ctrl.alpha[2] = buffer[pos] & 0x0f;
|
||||
ctx->ctrl.alpha[1] = (buffer[pos + 1] & 0xf0) >> 4;
|
||||
ctx->ctrl.alpha[0] = buffer[pos + 1] & 0x0f;
|
||||
pos += 2;
|
||||
break;
|
||||
case 0x05: /* SET_DAREA (coordinates) */
|
||||
if (pos + 6 > (int)buf_size)
|
||||
break;
|
||||
ctx->ctrl.coord[0] = ((buffer[pos] << 8) | (buffer[pos + 1] & 0xf0)) >> 4;
|
||||
ctx->ctrl.coord[1] = ((buffer[pos + 1] & 0x0f) << 8) | buffer[pos + 2];
|
||||
ctx->ctrl.coord[2] = ((buffer[pos + 3] << 8) | (buffer[pos + 4] & 0xf0)) >> 4;
|
||||
ctx->ctrl.coord[3] = ((buffer[pos + 4] & 0x0f) << 8) | buffer[pos + 5];
|
||||
pos += 6;
|
||||
break;
|
||||
case 0x06: /* SET_DSPXA (pixel offset) */
|
||||
if (pos + 4 > (int)buf_size)
|
||||
break;
|
||||
ctx->ctrl.pixoffset[0] = (buffer[pos] << 8) | buffer[pos + 1];
|
||||
ctx->ctrl.pixoffset[1] = (buffer[pos + 2] << 8) | buffer[pos + 3];
|
||||
pos += 4;
|
||||
break;
|
||||
case 0x07: /* Extended command */
|
||||
if (pos + 2 > (int)buf_size)
|
||||
break;
|
||||
{
|
||||
uint16_t skip = (buffer[pos] << 8) | buffer[pos + 1];
|
||||
pos += skip;
|
||||
}
|
||||
break;
|
||||
case 0xff: /* End of control sequence */
|
||||
seq_end = 1;
|
||||
break;
|
||||
default:
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Unknown control command 0x%02x\n", command);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Generate RGBA palette from color/alpha indices using parsed palette */
|
||||
static void vobsub_generate_rgba_palette(struct vobsub_ctx *ctx, uint32_t *rgba_palette)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
if (ctx->ctrl.alpha[i] == 0)
|
||||
{
|
||||
rgba_palette[i] = 0; /* Fully transparent */
|
||||
}
|
||||
else if (ctx->palette_parsed)
|
||||
{
|
||||
/* Use parsed palette from idx header */
|
||||
uint32_t color = ctx->palette[ctx->ctrl.color[i] & 0x0f];
|
||||
uint8_t r = (color >> 16) & 0xff;
|
||||
uint8_t g = (color >> 8) & 0xff;
|
||||
uint8_t b = color & 0xff;
|
||||
uint8_t a = ctx->ctrl.alpha[i] * 17; /* Scale 0-15 to 0-255 */
|
||||
rgba_palette[i] = RGBA(r, g, b, a);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Fallback: guess palette (grayscale levels) */
|
||||
static const uint8_t level_map[4][4] = {
|
||||
{0xff},
|
||||
{0x00, 0xff},
|
||||
{0x00, 0x80, 0xff},
|
||||
{0x00, 0x55, 0xaa, 0xff},
|
||||
};
|
||||
|
||||
/* Count opaque colors */
|
||||
int nb_opaque = 0;
|
||||
for (int j = 0; j < 4; j++)
|
||||
if (ctx->ctrl.alpha[j] != 0)
|
||||
nb_opaque++;
|
||||
|
||||
if (nb_opaque == 0)
|
||||
nb_opaque = 1;
|
||||
if (nb_opaque > 4)
|
||||
nb_opaque = 4;
|
||||
|
||||
int level = level_map[nb_opaque - 1][i < nb_opaque ? i : nb_opaque - 1];
|
||||
uint8_t a = ctx->ctrl.alpha[i] * 17;
|
||||
rgba_palette[i] = RGBA(level, level, level, a);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct vobsub_ctx *init_vobsub_decoder(void)
|
||||
{
|
||||
struct vobsub_ctx *ctx = malloc(sizeof(struct vobsub_ctx));
|
||||
if (!ctx)
|
||||
return NULL;
|
||||
|
||||
memset(ctx, 0, sizeof(struct vobsub_ctx));
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
ctx->ocr_ctx = init_ocr(1); /* 1 = default language index (English) */
|
||||
if (!ctx->ocr_ctx)
|
||||
{
|
||||
mprint("VOBSUB: Warning - OCR initialization failed\n");
|
||||
/* Continue anyway - OCR will just not work */
|
||||
}
|
||||
#endif
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
int vobsub_parse_palette(struct vobsub_ctx *ctx, const char *idx_header)
|
||||
{
|
||||
if (!ctx || !idx_header)
|
||||
return -1;
|
||||
|
||||
/* Find "palette:" line */
|
||||
const char *palette_line = strstr(idx_header, "palette:");
|
||||
if (!palette_line)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: No palette line found in idx header\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
palette_line += 8; /* Skip "palette:" */
|
||||
|
||||
/* Skip whitespace */
|
||||
while (*palette_line == ' ' || *palette_line == '\t')
|
||||
palette_line++;
|
||||
|
||||
/* Parse 16 hex RGB colors */
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
unsigned int color;
|
||||
if (sscanf(palette_line, "%x", &color) != 1)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Failed to parse palette color %d\n", i);
|
||||
break;
|
||||
}
|
||||
ctx->palette[i] = color;
|
||||
|
||||
/* Skip to next color (past comma and whitespace) */
|
||||
while (*palette_line && *palette_line != ',' && *palette_line != '\n')
|
||||
palette_line++;
|
||||
if (*palette_line == ',')
|
||||
palette_line++;
|
||||
while (*palette_line == ' ' || *palette_line == '\t')
|
||||
palette_line++;
|
||||
}
|
||||
|
||||
ctx->palette_parsed = 1;
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Parsed palette from idx header\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vobsub_decode_spu(struct vobsub_ctx *ctx,
|
||||
unsigned char *spu_data, size_t spu_size,
|
||||
long long start_time, long long end_time,
|
||||
struct cc_subtitle *sub)
|
||||
{
|
||||
if (!ctx || !spu_data || spu_size < 4 || !sub)
|
||||
return -1;
|
||||
|
||||
/* Parse SPU header */
|
||||
uint16_t size_spu = (spu_data[0] << 8) | spu_data[1];
|
||||
uint16_t ctrl_offset = (spu_data[2] << 8) | spu_data[3];
|
||||
|
||||
if (ctrl_offset > spu_size || size_spu > spu_size)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Invalid SPU header (size=%u, ctrl=%u, buf=%zu)\n",
|
||||
size_spu, ctrl_offset, spu_size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Parse control sequence */
|
||||
vobsub_decode_control(ctx, spu_data, spu_size, ctrl_offset);
|
||||
|
||||
/* Free any previous bitmap */
|
||||
if (ctx->bitmap)
|
||||
{
|
||||
free(ctx->bitmap);
|
||||
ctx->bitmap = NULL;
|
||||
}
|
||||
|
||||
/* Decode bitmap */
|
||||
vobsub_get_bitmap(ctx, spu_data, spu_size);
|
||||
if (!ctx->bitmap)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Failed to decode bitmap\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Build cc_subtitle structure */
|
||||
int w = (ctx->ctrl.coord[1] - ctx->ctrl.coord[0]) + 1;
|
||||
int h = (ctx->ctrl.coord[3] - ctx->ctrl.coord[2]) + 1;
|
||||
|
||||
if (w <= 0 || h <= 0)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Invalid bitmap dimensions\n");
|
||||
free(ctx->bitmap);
|
||||
ctx->bitmap = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
sub->type = CC_BITMAP;
|
||||
sub->nb_data = 1;
|
||||
sub->got_output = 1;
|
||||
|
||||
struct cc_bitmap *rect = malloc(sizeof(struct cc_bitmap));
|
||||
if (!rect)
|
||||
{
|
||||
free(ctx->bitmap);
|
||||
ctx->bitmap = NULL;
|
||||
return -1;
|
||||
}
|
||||
memset(rect, 0, sizeof(struct cc_bitmap));
|
||||
|
||||
sub->data = rect;
|
||||
sub->datatype = CC_DATATYPE_GENERIC;
|
||||
sub->start_time = start_time;
|
||||
sub->end_time = end_time > 0 ? end_time : start_time + ctx->ctrl.stop_time;
|
||||
|
||||
/* Copy bitmap data */
|
||||
rect->data0 = malloc(w * h);
|
||||
if (!rect->data0)
|
||||
{
|
||||
free(rect);
|
||||
sub->data = NULL;
|
||||
free(ctx->bitmap);
|
||||
ctx->bitmap = NULL;
|
||||
return -1;
|
||||
}
|
||||
memcpy(rect->data0, ctx->bitmap, w * h);
|
||||
|
||||
/* Generate RGBA palette */
|
||||
rect->data1 = malloc(1024); /* Space for 256 colors */
|
||||
if (!rect->data1)
|
||||
{
|
||||
free(rect->data0);
|
||||
free(rect);
|
||||
sub->data = NULL;
|
||||
free(ctx->bitmap);
|
||||
ctx->bitmap = NULL;
|
||||
return -1;
|
||||
}
|
||||
memset(rect->data1, 0, 1024);
|
||||
vobsub_generate_rgba_palette(ctx, (uint32_t *)rect->data1);
|
||||
|
||||
rect->nb_colors = 4;
|
||||
rect->x = ctx->ctrl.coord[0];
|
||||
rect->y = ctx->ctrl.coord[2];
|
||||
rect->w = w;
|
||||
rect->h = h;
|
||||
rect->linesize0 = w;
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
/* Run OCR if available */
|
||||
if (ctx->ocr_ctx)
|
||||
{
|
||||
char *ocr_str = NULL;
|
||||
int ret = ocr_rect(ctx->ocr_ctx, rect, &ocr_str, 0, 1); /* quantmode=1 */
|
||||
if (ret >= 0 && ocr_str)
|
||||
{
|
||||
rect->ocr_text = ocr_str;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
free(ctx->bitmap);
|
||||
ctx->bitmap = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vobsub_ocr_available(void)
|
||||
{
|
||||
#ifdef ENABLE_OCR
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void delete_vobsub_decoder(struct vobsub_ctx **ctx)
|
||||
{
|
||||
if (!ctx || !*ctx)
|
||||
return;
|
||||
|
||||
struct vobsub_ctx *c = *ctx;
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
if (c->ocr_ctx)
|
||||
delete_ocr(&c->ocr_ctx);
|
||||
#endif
|
||||
|
||||
if (c->bitmap)
|
||||
free(c->bitmap);
|
||||
|
||||
free(c);
|
||||
*ctx = NULL;
|
||||
}
|
||||
53
src/lib_ccx/vobsub_decoder.h
Normal file
53
src/lib_ccx/vobsub_decoder.h
Normal file
@@ -0,0 +1,53 @@
|
||||
#ifndef VOBSUB_DECODER_H
|
||||
#define VOBSUB_DECODER_H
|
||||
|
||||
#include "ccx_decoders_structs.h"
|
||||
|
||||
/**
|
||||
* VOBSUB decoder context - opaque structure
|
||||
*/
|
||||
struct vobsub_ctx;
|
||||
|
||||
/**
|
||||
* Initialize VOBSUB decoder context
|
||||
* @return Pointer to context, or NULL on failure
|
||||
*/
|
||||
struct vobsub_ctx *init_vobsub_decoder(void);
|
||||
|
||||
/**
|
||||
* Parse palette from idx header string (e.g., from MKV CodecPrivate)
|
||||
* Looks for "palette:" line and parses 16 hex RGB colors
|
||||
* @param ctx VOBSUB decoder context
|
||||
* @param idx_header The idx header string containing palette info
|
||||
* @return 0 on success, -1 on failure
|
||||
*/
|
||||
int vobsub_parse_palette(struct vobsub_ctx *ctx, const char *idx_header);
|
||||
|
||||
/**
|
||||
* Decode single SPU packet and optionally perform OCR
|
||||
* @param ctx VOBSUB decoder context
|
||||
* @param spu_data Raw SPU data (starting with 2-byte size)
|
||||
* @param spu_size Size of SPU data
|
||||
* @param start_time Start time in milliseconds
|
||||
* @param end_time End time in milliseconds (0 if unknown)
|
||||
* @param sub Output subtitle structure
|
||||
* @return 0 on success, -1 on error
|
||||
*/
|
||||
int vobsub_decode_spu(struct vobsub_ctx *ctx,
|
||||
unsigned char *spu_data, size_t spu_size,
|
||||
long long start_time, long long end_time,
|
||||
struct cc_subtitle *sub);
|
||||
|
||||
/**
|
||||
* Check if VOBSUB OCR is available (compiled with OCR support)
|
||||
* @return 1 if OCR available, 0 otherwise
|
||||
*/
|
||||
int vobsub_ocr_available(void);
|
||||
|
||||
/**
|
||||
* Free VOBSUB decoder context and resources
|
||||
* @param ctx Pointer to context pointer (will be set to NULL)
|
||||
*/
|
||||
void delete_vobsub_decoder(struct vobsub_ctx **ctx);
|
||||
|
||||
#endif /* VOBSUB_DECODER_H */
|
||||
@@ -6,6 +6,8 @@
|
||||
#define WTV_STREAM_VIDEO "\x76\x69\x64\x73\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71"
|
||||
#define WTV_STREAM_AUDIO "\x61\x75\x64\x73\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71"
|
||||
#define WTV_STREAM_MSTVCAPTION "\x89\x8A\x8B\xB8\x49\xB0\x80\x4C\xAD\xCF\x58\x98\x98\x5E\x22\xC1"
|
||||
// DVB Teletext stream type (VBI teletext data in PES format)
|
||||
#define WTV_STREAM_TELETEXT "\xE3\x76\x2A\xF7\x0A\xEB\xD0\x11\xAC\xE4\x00\x00\xC0\xCC\x16\xBA"
|
||||
#define WTV_EOF "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
#define WTV_TIMING "\x5B\x05\xE6\x1B\x97\xA9\x49\x43\x88\x17\x1A\x65\x5A\x29\x8A\x97"
|
||||
|
||||
|
||||
@@ -36,6 +36,16 @@ int check_stream_id(int stream_id, int video_streams[], int num_streams)
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Check if the passed stream_id is a teletext stream
|
||||
int check_teletext_stream_id(int stream_id, int teletext_streams[], int num_teletext_streams)
|
||||
{
|
||||
int x;
|
||||
for (x = 0; x < num_teletext_streams; x++)
|
||||
if (teletext_streams[x] == stream_id)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Init passes wtv_chunked_buffer struct
|
||||
void init_chunked_buffer(struct wtv_chunked_buffer *cb)
|
||||
{
|
||||
@@ -335,9 +345,12 @@ int read_header(struct ccx_demuxer *ctx, struct wtv_chunked_buffer *cb)
|
||||
LLONG get_data(struct lib_ccx_ctx *ctx, struct wtv_chunked_buffer *cb, struct demuxer_data *data)
|
||||
{
|
||||
static int video_streams[32];
|
||||
static int teletext_streams[32];
|
||||
static int alt_stream; // Stream to use for timestamps if the cc stream has broken timestamps
|
||||
static int use_alt_stream = 0;
|
||||
static int num_streams = 0;
|
||||
static int num_teletext_streams = 0;
|
||||
static int has_teletext = 0; // Flag to indicate we found teletext streams
|
||||
int64_t result;
|
||||
struct lib_cc_decode *dec_ctx = update_decoder_list(ctx);
|
||||
|
||||
@@ -403,12 +416,18 @@ LLONG get_data(struct lib_ccx_ctx *ctx, struct wtv_chunked_buffer *cb, struct de
|
||||
{
|
||||
// The WTV_STREAM2 GUID appears near the start of the data dir
|
||||
// It maps stream_ids to the type of stream
|
||||
// Structure based on WTV file analysis:
|
||||
// Offset 0x0C: mediatype GUID (16 bytes)
|
||||
// Offset 0x4C: teletext format GUID (16 bytes) - for MSTVCAPTION streams
|
||||
// We read enough data (96 bytes) to get all the info we need
|
||||
dbg_print(CCX_DMT_PARSE, "WTV STREAM2\n");
|
||||
get_sized_buffer(ctx->demux_ctx, cb, 0xc + 16);
|
||||
// Read 96 bytes to get mediatype at 0x0C and format_subtype at 0x4C
|
||||
uint32_t read_size = (len > 96) ? 96 : len;
|
||||
get_sized_buffer(ctx->demux_ctx, cb, read_size);
|
||||
if (cb->buffer == NULL)
|
||||
return CCX_EOF;
|
||||
static unsigned char stream_type[16];
|
||||
memcpy(&stream_type, cb->buffer + 0xc, 16); // Read the stream type GUID
|
||||
memcpy(&stream_type, cb->buffer + 0xc, 16); // Read the mediatype GUID at offset 12
|
||||
const void *stream_guid;
|
||||
if (ccx_options.wtvmpeg2)
|
||||
stream_guid = WTV_STREAM_VIDEO; // We want mpeg2 data if the user set -wtvmpeg2
|
||||
@@ -419,11 +438,40 @@ LLONG get_data(struct lib_ccx_ctx *ctx, struct wtv_chunked_buffer *cb, struct de
|
||||
video_streams[num_streams] = stream_id; // We keep a list of stream ids
|
||||
num_streams++; // Even though there should only be 1
|
||||
}
|
||||
// For MSTVCAPTION streams, check if it's teletext by examining the format GUID
|
||||
// The teletext GUID appears at offset 0x4C from the start of the chunk data
|
||||
if (!memcmp(stream_type, WTV_STREAM_MSTVCAPTION, 16) && read_size >= 0x4C + 16)
|
||||
{
|
||||
static unsigned char format_subtype[16];
|
||||
memcpy(&format_subtype, cb->buffer + 0x4C, 16); // Read format GUID at offset 0x4C
|
||||
dbg_print(CCX_DMT_PARSE, "MSTVCAPTION format_subtype=%02X%02X%02X%02X...\n",
|
||||
format_subtype[0], format_subtype[1], format_subtype[2], format_subtype[3]);
|
||||
// Check for teletext
|
||||
if (!memcmp(format_subtype, WTV_STREAM_TELETEXT, 16))
|
||||
{
|
||||
dbg_print(CCX_DMT_PARSE, "Found DVB Teletext stream, stream_id: 0x%X\n", stream_id);
|
||||
mprint("WTV: Found DVB Teletext stream (stream_id=0x%X)\n", stream_id);
|
||||
mprint(" Note: WTV teletext uses Microsoft VBI format which may not decode correctly.\n");
|
||||
teletext_streams[num_teletext_streams] = stream_id;
|
||||
num_teletext_streams++;
|
||||
has_teletext = 1;
|
||||
// Initialize teletext decoder context
|
||||
if (!dec_ctx->private_data)
|
||||
{
|
||||
dec_ctx->codec = CCX_CODEC_TELETEXT;
|
||||
dec_ctx->private_data = telxcc_init();
|
||||
if (!dec_ctx->private_data)
|
||||
{
|
||||
mprint("Error: Failed to initialize teletext decoder\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (memcmp(stream_type, WTV_STREAM_AUDIO, 16))
|
||||
alt_stream = stream_id;
|
||||
len -= 28;
|
||||
len -= read_size;
|
||||
}
|
||||
if (!memcmp(guid, WTV_TIMING, 16) && ((use_alt_stream < WTV_CC_TIMESTAMP_MAGIC_THRESH && check_stream_id(stream_id, video_streams, num_streams)) || (use_alt_stream == WTV_CC_TIMESTAMP_MAGIC_THRESH && stream_id == alt_stream)))
|
||||
if (!memcmp(guid, WTV_TIMING, 16) && ((use_alt_stream < WTV_CC_TIMESTAMP_MAGIC_THRESH && (check_stream_id(stream_id, video_streams, num_streams) || check_teletext_stream_id(stream_id, teletext_streams, num_teletext_streams))) || (use_alt_stream == WTV_CC_TIMESTAMP_MAGIC_THRESH && stream_id == alt_stream)))
|
||||
{
|
||||
int64_t time;
|
||||
// The WTV_TIMING GUID contains a timestamp for the given stream_id
|
||||
@@ -478,6 +526,54 @@ LLONG get_data(struct lib_ccx_ctx *ctx, struct wtv_chunked_buffer *cb, struct de
|
||||
}
|
||||
return bytesread;
|
||||
}
|
||||
// Handle DVB Teletext data
|
||||
// Note: WTV teletext format is Microsoft-specific and differs from DVB teletext.
|
||||
// The data is not in standard DVB teletext data unit format, so decoding support
|
||||
// is currently limited. The stream is detected and passed to the decoder which
|
||||
// will process what it can parse.
|
||||
if (!memcmp(guid, WTV_DATA, 16) && check_teletext_stream_id(stream_id, teletext_streams, num_teletext_streams) && dec_ctx->timing->current_pts != 0)
|
||||
{
|
||||
get_sized_buffer(ctx->demux_ctx, cb, len);
|
||||
if (cb->buffer == NULL)
|
||||
return CCX_EOF;
|
||||
|
||||
// WTV teletext data is raw VBI data, not PES-encapsulated.
|
||||
// Wrap it in a PES header for the teletext decoder.
|
||||
uint16_t pes_len = len + 8; // payload + header fields after length
|
||||
int64_t pts = dec_ctx->timing->current_pts;
|
||||
|
||||
// Add PES header (14 bytes)
|
||||
data->buffer[data->len++] = 0x00; // start code
|
||||
data->buffer[data->len++] = 0x00;
|
||||
data->buffer[data->len++] = 0x01;
|
||||
data->buffer[data->len++] = 0xBD; // Private Stream 1
|
||||
data->buffer[data->len++] = (pes_len >> 8) & 0xFF;
|
||||
data->buffer[data->len++] = pes_len & 0xFF;
|
||||
data->buffer[data->len++] = 0x80; // PES flags
|
||||
data->buffer[data->len++] = 0x80; // PTS present
|
||||
data->buffer[data->len++] = 0x05; // header data length
|
||||
|
||||
// Encode PTS (33-bit value in 5 bytes)
|
||||
data->buffer[data->len++] = (uint8_t)(0x21 | ((pts >> 29) & 0x0E));
|
||||
data->buffer[data->len++] = (uint8_t)((pts >> 22) & 0xFF);
|
||||
data->buffer[data->len++] = (uint8_t)(0x01 | ((pts >> 14) & 0xFE));
|
||||
data->buffer[data->len++] = (uint8_t)((pts >> 7) & 0xFF);
|
||||
data->buffer[data->len++] = (uint8_t)(0x01 | ((pts << 1) & 0xFE));
|
||||
|
||||
// Add teletext data
|
||||
memcpy(data->buffer + data->len, cb->buffer, len);
|
||||
data->len += len;
|
||||
bytesread += (int)(14 + len);
|
||||
data->codec = CCX_CODEC_TELETEXT;
|
||||
data->bufferdatatype = CCX_TELETEXT;
|
||||
frames_since_ref_time++;
|
||||
set_fts(dec_ctx->timing);
|
||||
if (pad > 0)
|
||||
{
|
||||
skip_sized_buffer(ctx->demux_ctx, cb, pad);
|
||||
}
|
||||
return bytesread;
|
||||
}
|
||||
if (len + pad > 0)
|
||||
{
|
||||
// skip any remaining data
|
||||
|
||||
84
src/rust/Cargo.lock
generated
84
src/rust/Cargo.lock
generated
@@ -129,6 +129,26 @@ dependencies = [
|
||||
"syn 2.0.111",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.72.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
|
||||
dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"itertools",
|
||||
"log",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"rustc-hash 2.1.1",
|
||||
"shlex",
|
||||
"syn 2.0.111",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
@@ -141,6 +161,12 @@ version = "2.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
|
||||
|
||||
[[package]]
|
||||
name = "by_address"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64fa3c856b712db6612c019f14756e64e4bcea13337a6b33b696333a9eaa2d06"
|
||||
|
||||
[[package]]
|
||||
name = "camino"
|
||||
version = "1.2.1"
|
||||
@@ -151,7 +177,7 @@ checksum = "276a59bf2b2c967788139340c9f0c5b12d7fd6630315c15c217e559de85d2609"
|
||||
name = "ccx_rust"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bindgen 0.64.0",
|
||||
"bindgen 0.72.1",
|
||||
"cfg-if",
|
||||
"clap",
|
||||
"encoding_rs",
|
||||
@@ -335,21 +361,18 @@ dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fast-srgb8"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd2e7510819d6fbf51a5545c8f922716ecfb14df168a3242f7d33e0239efe6a1"
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "find-crate"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59a98bbaacea1c0eb6a0876280051b892eb73594fd90cf3b20e9c817029c57d2"
|
||||
dependencies = [
|
||||
"toml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.2.2"
|
||||
@@ -741,9 +764,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "num-conv"
|
||||
version = "0.1.0"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
|
||||
checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
@@ -799,26 +822,26 @@ checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||
|
||||
[[package]]
|
||||
name = "palette"
|
||||
version = "0.6.1"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f9cd68f7112581033f157e56c77ac4a5538ec5836a2e39284e65bd7d7275e49"
|
||||
checksum = "4cbf71184cc5ecc2e4e1baccdb21026c20e5fc3dcf63028a086131b3ab00b6e6"
|
||||
dependencies = [
|
||||
"approx",
|
||||
"num-traits",
|
||||
"fast-srgb8",
|
||||
"palette_derive",
|
||||
"phf",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "palette_derive"
|
||||
version = "0.6.1"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05eedf46a8e7c27f74af0c9cfcdb004ceca158cb1b918c6f68f8d7a549b3e427"
|
||||
checksum = "f5030daf005bface118c096f510ffb781fc28f9ab6a32ab224d8631be6851d30"
|
||||
dependencies = [
|
||||
"find-crate",
|
||||
"by_address",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
"syn 2.0.111",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1377,30 +1400,30 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.44"
|
||||
version = "0.3.47"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d"
|
||||
checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c"
|
||||
dependencies = [
|
||||
"deranged",
|
||||
"itoa",
|
||||
"num-conv",
|
||||
"powerfmt",
|
||||
"serde",
|
||||
"serde_core",
|
||||
"time-core",
|
||||
"time-macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time-core"
|
||||
version = "0.1.6"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
|
||||
checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
|
||||
|
||||
[[package]]
|
||||
name = "time-macros"
|
||||
version = "0.2.24"
|
||||
version = "0.2.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
|
||||
checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215"
|
||||
dependencies = [
|
||||
"num-conv",
|
||||
"time-core",
|
||||
@@ -1416,15 +1439,6 @@ dependencies = [
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml"
|
||||
version = "0.5.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_datetime"
|
||||
version = "0.7.3"
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user