mirror of
https://github.com/CCExtractor/ccextractor.git
synced 2026-02-05 13:35:02 +00:00
Compare commits
358 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
270c89b7f8 | ||
|
|
032cd1c6b1 | ||
|
|
42e4e9a657 | ||
|
|
821e307333 | ||
|
|
ae81f3ba3d | ||
|
|
b190751b2c | ||
|
|
f1bb0f4dce | ||
|
|
f147ac27f8 | ||
|
|
2dfb44d7d4 | ||
|
|
580e721dfe | ||
|
|
d0a82447ff | ||
|
|
5c19c7b932 | ||
|
|
fd7271bae2 | ||
|
|
05c68349d5 | ||
|
|
09f21f64e4 | ||
|
|
c65fb0874e | ||
|
|
9db727d593 | ||
|
|
fe6dad83b7 | ||
|
|
d494286082 | ||
|
|
259e881483 | ||
|
|
197069d3b8 | ||
|
|
7a810d736d | ||
|
|
1413c948c4 | ||
|
|
bb5385913b | ||
|
|
f8981e8e1e | ||
|
|
a1871abf04 | ||
|
|
20b3773bb9 | ||
|
|
8786b4cf75 | ||
|
|
8632ecda5b | ||
|
|
475153a9dd | ||
|
|
df90009f73 | ||
|
|
2352ea21e3 | ||
|
|
dc041a35e8 | ||
|
|
e99ba1d177 | ||
|
|
298665faa4 | ||
|
|
735a01bf04 | ||
|
|
3618c23b5a | ||
|
|
b7c9da75dd | ||
|
|
449d55d5e5 | ||
|
|
60aa370899 | ||
|
|
3d18b38c32 | ||
|
|
2a6d27f9ff | ||
|
|
91d3512bcc | ||
|
|
74e64c0421 | ||
|
|
c175750ebe | ||
|
|
e7dc4d19f7 | ||
|
|
1fbb51056d | ||
|
|
5d9a8cc6f2 | ||
|
|
17abad79f2 | ||
|
|
707e1f01fe | ||
|
|
efc8b791e7 | ||
|
|
a856bbde10 | ||
|
|
9390b876fa | ||
|
|
ead0a4beed | ||
|
|
b2e9cb74c1 | ||
|
|
20b194aac4 | ||
|
|
2d9b480972 | ||
|
|
1447b021cb | ||
|
|
e0ac126cff | ||
|
|
b8019bdb35 | ||
|
|
9d921dec43 | ||
|
|
3ada2b5002 | ||
|
|
50ec9866db | ||
|
|
ce87d01fbd | ||
|
|
fecd24d08e | ||
|
|
482544c5bf | ||
|
|
84a7a1fb41 | ||
|
|
f198bcd2ec | ||
|
|
4b6016ca1c | ||
|
|
9c2ea47eda | ||
|
|
170b466a20 | ||
|
|
2bdcd20115 | ||
|
|
ab18d234d2 | ||
|
|
3ff02617b0 | ||
|
|
c7fad95e24 | ||
|
|
c018f1f43c | ||
|
|
98b50b2a35 | ||
|
|
46cee0893a | ||
|
|
42ad48ca7f | ||
|
|
ed26a595bd | ||
|
|
b1c2aabb22 | ||
|
|
bb2ae1e70f | ||
|
|
6464fa486e | ||
|
|
5aa747ab33 | ||
|
|
39adfa59b0 | ||
|
|
20287548cb | ||
|
|
b7b10419ec | ||
|
|
8fbfd68426 | ||
|
|
7159d0b6d0 | ||
|
|
c515578e37 | ||
|
|
e55b8eb764 | ||
|
|
0228fbcbfa | ||
|
|
0e190e0962 | ||
|
|
13f1b5ab53 | ||
|
|
b39f923c46 | ||
|
|
7e32d6a553 | ||
|
|
3bde3dceec | ||
|
|
d5201b1129 | ||
|
|
a199f4f8af | ||
|
|
eea049923d | ||
|
|
d999c3e0e0 | ||
|
|
aac90d5a5f | ||
|
|
618df184c6 | ||
|
|
5e6aab8972 | ||
|
|
a77c21c06c | ||
|
|
4252703431 | ||
|
|
1af2a29a3c | ||
|
|
8ab474c593 | ||
|
|
1c781c2a38 | ||
|
|
4d718378d5 | ||
|
|
1bd4cd5c0a | ||
|
|
067045ce92 | ||
|
|
2f2904041c | ||
|
|
d837c369e5 | ||
|
|
686ff69fdc | ||
|
|
126835d998 | ||
|
|
6e170cd812 | ||
|
|
fe921626e1 | ||
|
|
6578f0ff34 | ||
|
|
1911068e92 | ||
|
|
493495361d | ||
|
|
643857e98f | ||
|
|
05adb5f47e | ||
|
|
504877b928 | ||
|
|
64ee63a560 | ||
|
|
270c603bd2 | ||
|
|
6d356b4458 | ||
|
|
cfb10d4b91 | ||
|
|
ca2b708023 | ||
|
|
10ac5ca6ce | ||
|
|
333cfb3726 | ||
|
|
c609f66c02 | ||
|
|
91f254017b | ||
|
|
1f5d3df0ae | ||
|
|
e36d81c237 | ||
|
|
8d338dc362 | ||
|
|
c78e01d186 | ||
|
|
401ff6c105 | ||
|
|
83eb51ed6f | ||
|
|
bce0c92fdd | ||
|
|
ea4859fd54 | ||
|
|
8d7890c743 | ||
|
|
477307e438 | ||
|
|
4a4911bcec | ||
|
|
dc946168e7 | ||
|
|
3a60b1268b | ||
|
|
e3d1c56ad0 | ||
|
|
b5bc0e2616 | ||
|
|
600a9a0e75 | ||
|
|
694b61f862 | ||
|
|
86925727e0 | ||
|
|
1c7515681e | ||
|
|
2bcac83761 | ||
|
|
efc28d87d5 | ||
|
|
b4d8e0ffaf | ||
|
|
0b7b7fd031 | ||
|
|
90041554a3 | ||
|
|
6950a7661e | ||
|
|
41fb966f6f | ||
|
|
04ed95f8b5 | ||
|
|
ddf29672fd | ||
|
|
0890e06d84 | ||
|
|
8c33412888 | ||
|
|
f40294cc5c | ||
|
|
22d5d35158 | ||
|
|
51cae1c2f0 | ||
|
|
dfaebd5db8 | ||
|
|
cfa7d912ca | ||
|
|
ad971f0e72 | ||
|
|
8aadbfb5f2 | ||
|
|
44eb665cd8 | ||
|
|
1255b318ae | ||
|
|
1b0e66bc67 | ||
|
|
f5dc1cf467 | ||
|
|
aaf937a135 | ||
|
|
317c66f14e | ||
|
|
946c5859d4 | ||
|
|
7166e48698 | ||
|
|
d31ea87c03 | ||
|
|
028ce9d0b5 | ||
|
|
cc7a43b5e2 | ||
|
|
3e1424cda8 | ||
|
|
82109e6cd9 | ||
|
|
5dc8292dd2 | ||
|
|
a5b8bc8bf6 | ||
|
|
29158b2c38 | ||
|
|
ad2ee70743 | ||
|
|
562de8893b | ||
|
|
12adb5e92b | ||
|
|
203eb23030 | ||
|
|
774c3a0d3a | ||
|
|
07f1ddc3fe | ||
|
|
303bec8d5d | ||
|
|
e43a6b5ced | ||
|
|
64484af49e | ||
|
|
7526da884c | ||
|
|
3529bb29b4 | ||
|
|
925560f773 | ||
|
|
200eb1750a | ||
|
|
6dcdb4b2d8 | ||
|
|
a2d2c4f063 | ||
|
|
4ab6c83c27 | ||
|
|
e66a0183c3 | ||
|
|
a8ec28630a | ||
|
|
432d4237ec | ||
|
|
e9519c4a67 | ||
|
|
fef005ddaf | ||
|
|
546c776e57 | ||
|
|
daeed5df71 | ||
|
|
b56ab005a8 | ||
|
|
f1681ee929 | ||
|
|
031f463b5c | ||
|
|
b23866f5a8 | ||
|
|
2ec93c3d3d | ||
|
|
5564aa8a54 | ||
|
|
868fac5423 | ||
|
|
9ca26171d6 | ||
|
|
ead4cbb278 | ||
|
|
dfd7101f54 | ||
|
|
9659d3cf4c | ||
|
|
34c7cd6d2e | ||
|
|
7448a260c7 | ||
|
|
54236f840c | ||
|
|
f2aeef167b | ||
|
|
6a4a1c97ec | ||
|
|
f369959096 | ||
|
|
1c2bcb5088 | ||
|
|
da79ee44d9 | ||
|
|
26434a7f89 | ||
|
|
718eb1a37f | ||
|
|
ace6361bfb | ||
|
|
7041441d39 | ||
|
|
1589c31774 | ||
|
|
c96d3ff3f1 | ||
|
|
598a48e260 | ||
|
|
0cc3626261 | ||
|
|
e0e66bd0ba | ||
|
|
2642ca8805 | ||
|
|
a108302dc0 | ||
|
|
ce90b61923 | ||
|
|
18566f2213 | ||
|
|
125c5e8821 | ||
|
|
64ce4ac84f | ||
|
|
674b859284 | ||
|
|
9a761331f8 | ||
|
|
046ee71eda | ||
|
|
b5fc3e63c4 | ||
|
|
5eaf805d27 | ||
|
|
0ba941e8c0 | ||
|
|
a9413a2312 | ||
|
|
a2eb03cb73 | ||
|
|
06063f26a4 | ||
|
|
82daa7fb2b | ||
|
|
a71687e19f | ||
|
|
25162fe40a | ||
|
|
3365a715a6 | ||
|
|
26e0f64720 | ||
|
|
a1ed940c8b | ||
|
|
f5f4768503 | ||
|
|
e4374204bd | ||
|
|
7f55ae5c1d | ||
|
|
8bf1bc16de | ||
|
|
5352a8b877 | ||
|
|
fd155285d2 | ||
|
|
a6fd8d468a | ||
|
|
5b05ce5073 | ||
|
|
d28bc4e114 | ||
|
|
285e81f9a7 | ||
|
|
730156f33b | ||
|
|
152bbd308c | ||
|
|
8c586bccbd | ||
|
|
434cd3959a | ||
|
|
3cb0f61b0c | ||
|
|
a18eaa2c96 | ||
|
|
69b7f9f4c3 | ||
|
|
63dde6f3b2 | ||
|
|
8f64eeb54f | ||
|
|
02d91c4a03 | ||
|
|
463a4a85a1 | ||
|
|
ba2833b819 | ||
|
|
635a305c37 | ||
|
|
6fe612db3e | ||
|
|
2930c61420 | ||
|
|
173db88dcf | ||
|
|
29c3f4e684 | ||
|
|
d4a7b1d6ed | ||
|
|
9d14766b0d | ||
|
|
6f2a73d706 | ||
|
|
1fccb783f2 | ||
|
|
ec30a79be9 | ||
|
|
5beb4389f6 | ||
|
|
a6ccf29630 | ||
|
|
b6d7c7e778 | ||
|
|
117c2fce69 | ||
|
|
ffd6a34c30 | ||
|
|
70af627078 | ||
|
|
b0a5c069ed | ||
|
|
53ee63894c | ||
|
|
50ece42e0a | ||
|
|
3d00e718f6 | ||
|
|
021b788461 | ||
|
|
86e5d47141 | ||
|
|
5b36356456 | ||
|
|
ba04aedae1 | ||
|
|
5001df0d6c | ||
|
|
28506fee7b | ||
|
|
47d8aaddb9 | ||
|
|
1b2254f911 | ||
|
|
dc34b26afb | ||
|
|
c06102678e | ||
|
|
b0800a112c | ||
|
|
2b0d9ed427 | ||
|
|
fd4db0e7bf | ||
|
|
00d8c9cb0a | ||
|
|
7829c14c60 | ||
|
|
d3602ec938 | ||
|
|
f9b5e081a7 | ||
|
|
bdc3eaa81b | ||
|
|
2820042c1d | ||
|
|
d4d228125a | ||
|
|
43d5ba2f34 | ||
|
|
557774b202 | ||
|
|
4e0472bddf | ||
|
|
9a2fe6221e | ||
|
|
182b23a283 | ||
|
|
77f3fd35f4 | ||
|
|
14e6919f2e | ||
|
|
353a37010d | ||
|
|
921cbe0c57 | ||
|
|
f0523ceaa3 | ||
|
|
7284430fc6 | ||
|
|
68d0d4094e | ||
|
|
7075f6291d | ||
|
|
170d769476 | ||
|
|
1ff3457744 | ||
|
|
dc352a2202 | ||
|
|
c8750e42d1 | ||
|
|
20448bfeb2 | ||
|
|
807df0339e | ||
|
|
6642973c63 | ||
|
|
f08fd658e6 | ||
|
|
5ae3116a6c | ||
|
|
826afcd991 | ||
|
|
46af5ce9bb | ||
|
|
123b35ae69 | ||
|
|
f6e9d55838 | ||
|
|
6f7d3f6169 | ||
|
|
07cc78c2f1 | ||
|
|
affa34848c | ||
|
|
45ee03aecc | ||
|
|
c6e27ca809 | ||
|
|
857a3bc9c6 | ||
|
|
c2c589d6f6 | ||
|
|
e42bc2b9f9 | ||
|
|
bf9841a255 | ||
|
|
6ed09ea397 | ||
|
|
2b708c4a31 | ||
|
|
609a53f373 |
283
.github/workflows/build_deb.yml
vendored
Normal file
283
.github/workflows/build_deb.yml
vendored
Normal file
@@ -0,0 +1,283 @@
|
||||
name: Build Linux .deb Package
|
||||
|
||||
on:
|
||||
# Build on releases
|
||||
release:
|
||||
types: [published]
|
||||
# Allow manual trigger
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
build_type:
|
||||
description: 'Build type (all, basic, hardsubx)'
|
||||
required: false
|
||||
default: 'all'
|
||||
# Build on pushes to workflow file for testing
|
||||
push:
|
||||
paths:
|
||||
- '.github/workflows/build_deb.yml'
|
||||
|
||||
jobs:
|
||||
build-deb:
|
||||
runs-on: ubuntu-24.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [basic, hardsubx]
|
||||
|
||||
steps:
|
||||
- name: Check if should build this variant
|
||||
id: should_build
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
INPUT_TYPE="${{ github.event.inputs.build_type }}"
|
||||
if [ "$INPUT_TYPE" = "all" ] || [ "$INPUT_TYPE" = "${{ matrix.build_type }}" ]; then
|
||||
echo "should_build=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "should_build=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
else
|
||||
echo "should_build=true" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Checkout repository
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Get version
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: version
|
||||
run: |
|
||||
# Extract version from source or use tag
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
VERSION="${{ github.event.release.tag_name }}"
|
||||
VERSION="${VERSION#v}" # Remove 'v' prefix if present
|
||||
else
|
||||
# Extract version from lib_ccx.h (e.g., #define VERSION "0.96.5")
|
||||
VERSION=$(grep -oP '#define VERSION "\K[^"]+' src/lib_ccx/lib_ccx.h || echo "0.96")
|
||||
fi
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
echo "Building version: $VERSION"
|
||||
|
||||
- name: Install base dependencies
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
cmake \
|
||||
pkg-config \
|
||||
zlib1g-dev \
|
||||
libpng-dev \
|
||||
libjpeg-dev \
|
||||
libfreetype-dev \
|
||||
libxml2-dev \
|
||||
libcurl4-gnutls-dev \
|
||||
libssl-dev \
|
||||
clang \
|
||||
libclang-dev \
|
||||
tesseract-ocr \
|
||||
libtesseract-dev \
|
||||
libleptonica-dev \
|
||||
patchelf
|
||||
|
||||
- name: Install FFmpeg dependencies (HardSubX)
|
||||
if: steps.should_build.outputs.should_build == 'true' && matrix.build_type == 'hardsubx'
|
||||
run: |
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
libavcodec-dev \
|
||||
libavformat-dev \
|
||||
libavutil-dev \
|
||||
libswscale-dev \
|
||||
libswresample-dev \
|
||||
libavfilter-dev \
|
||||
libavdevice-dev
|
||||
|
||||
- name: Install Rust toolchain
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
|
||||
- name: Cache GPAC build
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: cache-gpac
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/gpac-install
|
||||
key: gpac-abi-16.4-ubuntu24-deb
|
||||
|
||||
- name: Build GPAC
|
||||
if: steps.should_build.outputs.should_build == 'true' && steps.cache-gpac.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
git clone -b abi-16.4 --depth 1 https://github.com/gpac/gpac
|
||||
cd gpac
|
||||
./configure --prefix=/usr
|
||||
make -j$(nproc)
|
||||
make DESTDIR=$HOME/gpac-install install-lib
|
||||
|
||||
- name: Install GPAC to system
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
sudo cp -r $HOME/gpac-install/usr/lib/* /usr/lib/
|
||||
sudo cp -r $HOME/gpac-install/usr/include/* /usr/include/
|
||||
sudo ldconfig
|
||||
|
||||
- name: Build CCExtractor
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
mkdir build && cd build
|
||||
if [ "${{ matrix.build_type }}" = "hardsubx" ]; then
|
||||
cmake ../src -DCMAKE_BUILD_TYPE=Release -DWITH_OCR=ON -DWITH_HARDSUBX=ON
|
||||
else
|
||||
cmake ../src -DCMAKE_BUILD_TYPE=Release -DWITH_OCR=ON
|
||||
fi
|
||||
make -j$(nproc)
|
||||
|
||||
- name: Test build
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: ./build/ccextractor --version
|
||||
|
||||
- name: Create .deb package structure
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
VARIANT="${{ matrix.build_type }}"
|
||||
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
PKG_NAME="ccextractor_${VERSION}_amd64"
|
||||
else
|
||||
PKG_NAME="ccextractor-${VARIANT}_${VERSION}_amd64"
|
||||
fi
|
||||
|
||||
mkdir -p ${PKG_NAME}/DEBIAN
|
||||
mkdir -p ${PKG_NAME}/usr/bin
|
||||
mkdir -p ${PKG_NAME}/usr/lib/ccextractor
|
||||
mkdir -p ${PKG_NAME}/usr/share/doc/ccextractor
|
||||
mkdir -p ${PKG_NAME}/usr/share/man/man1
|
||||
|
||||
# Copy binary
|
||||
cp build/ccextractor ${PKG_NAME}/usr/bin/
|
||||
|
||||
# Copy GPAC library
|
||||
cp $HOME/gpac-install/usr/lib/libgpac.so* ${PKG_NAME}/usr/lib/ccextractor/
|
||||
|
||||
# Set rpath so ccextractor finds bundled libgpac
|
||||
patchelf --set-rpath '/usr/lib/ccextractor:$ORIGIN/../lib/ccextractor' ${PKG_NAME}/usr/bin/ccextractor
|
||||
|
||||
# Copy documentation
|
||||
cp docs/CHANGES.TXT ${PKG_NAME}/usr/share/doc/ccextractor/changelog
|
||||
cp LICENSE.txt ${PKG_NAME}/usr/share/doc/ccextractor/copyright
|
||||
gzip -9 -n ${PKG_NAME}/usr/share/doc/ccextractor/changelog
|
||||
|
||||
# Generate man page
|
||||
help2man --no-info --name="closed captions and teletext subtitle extractor" \
|
||||
./build/ccextractor > ${PKG_NAME}/usr/share/man/man1/ccextractor.1 2>/dev/null || true
|
||||
if [ -f ${PKG_NAME}/usr/share/man/man1/ccextractor.1 ]; then
|
||||
gzip -9 -n ${PKG_NAME}/usr/share/man/man1/ccextractor.1
|
||||
fi
|
||||
|
||||
# Create control file
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
PKG_DESCRIPTION="CCExtractor - closed captions and teletext subtitle extractor"
|
||||
else
|
||||
PKG_DESCRIPTION="CCExtractor (with HardSubX) - closed captions and teletext subtitle extractor"
|
||||
fi
|
||||
|
||||
INSTALLED_SIZE=$(du -sk ${PKG_NAME}/usr | cut -f1)
|
||||
|
||||
# Determine dependencies based on build variant (Ubuntu 24.04)
|
||||
if [ "$VARIANT" = "hardsubx" ]; then
|
||||
DEPENDS="libc6, libtesseract5, liblept5, libcurl3t64-gnutls, libavcodec60, libavformat60, libavutil58, libswscale7, libavdevice60, libswresample4, libavfilter9"
|
||||
else
|
||||
DEPENDS="libc6, libtesseract5, liblept5, libcurl3t64-gnutls"
|
||||
fi
|
||||
|
||||
cat > ${PKG_NAME}/DEBIAN/control << CTRL
|
||||
Package: ccextractor
|
||||
Version: ${VERSION}
|
||||
Section: utils
|
||||
Priority: optional
|
||||
Architecture: amd64
|
||||
Installed-Size: ${INSTALLED_SIZE}
|
||||
Depends: ${DEPENDS}
|
||||
Maintainer: CCExtractor Development Team <carlos@ccextractor.org>
|
||||
Homepage: https://www.ccextractor.org
|
||||
Description: ${PKG_DESCRIPTION}
|
||||
CCExtractor is a tool that extracts closed captions and teletext subtitles
|
||||
from video files and streams. It supports a wide variety of input formats
|
||||
including MPEG, H.264/AVC, H.265/HEVC, MP4, MKV, WTV, and transport streams.
|
||||
.
|
||||
This package includes a bundled GPAC library for MP4 support.
|
||||
CTRL
|
||||
|
||||
# Remove leading spaces from control file
|
||||
sed -i 's/^ //' ${PKG_NAME}/DEBIAN/control
|
||||
|
||||
# Create postinst to update library cache
|
||||
cat > ${PKG_NAME}/DEBIAN/postinst << 'POSTINST'
|
||||
#!/bin/sh
|
||||
set -e
|
||||
ldconfig
|
||||
POSTINST
|
||||
chmod 755 ${PKG_NAME}/DEBIAN/postinst
|
||||
|
||||
# Create postrm to update library cache
|
||||
cat > ${PKG_NAME}/DEBIAN/postrm << 'POSTRM'
|
||||
#!/bin/sh
|
||||
set -e
|
||||
ldconfig
|
||||
POSTRM
|
||||
chmod 755 ${PKG_NAME}/DEBIAN/postrm
|
||||
|
||||
# Set permissions
|
||||
chmod 755 ${PKG_NAME}/usr/bin/ccextractor
|
||||
chmod 755 ${PKG_NAME}/usr/lib/ccextractor
|
||||
find ${PKG_NAME}/usr/lib/ccextractor -name "*.so*" -exec chmod 644 {} \;
|
||||
|
||||
# Build the .deb
|
||||
dpkg-deb --build --root-owner-group ${PKG_NAME}
|
||||
|
||||
echo "deb_name=${PKG_NAME}.deb" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Test .deb package
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
VARIANT="${{ matrix.build_type }}"
|
||||
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
PKG_NAME="ccextractor_${VERSION}_amd64"
|
||||
else
|
||||
PKG_NAME="ccextractor-${VARIANT}_${VERSION}_amd64"
|
||||
fi
|
||||
|
||||
# Install and test (apt handles dependencies automatically)
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ./${PKG_NAME}.deb
|
||||
ccextractor --version
|
||||
|
||||
- name: Get .deb filename
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: deb_name
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
VARIANT="${{ matrix.build_type }}"
|
||||
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
echo "name=ccextractor_${VERSION}_amd64.deb" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "name=ccextractor-${VARIANT}_${VERSION}_amd64.deb" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Upload .deb artifact
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: ${{ steps.deb_name.outputs.name }}
|
||||
path: ${{ steps.deb_name.outputs.name }}
|
||||
|
||||
- name: Upload to Release
|
||||
if: steps.should_build.outputs.should_build == 'true' && github.event_name == 'release'
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: ${{ steps.deb_name.outputs.name }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
275
.github/workflows/build_deb_debian13.yml
vendored
Normal file
275
.github/workflows/build_deb_debian13.yml
vendored
Normal file
@@ -0,0 +1,275 @@
|
||||
name: Build Debian 13 .deb Package
|
||||
|
||||
on:
|
||||
# Build on releases
|
||||
release:
|
||||
types: [published]
|
||||
# Allow manual trigger
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
build_type:
|
||||
description: 'Build type (all, basic, hardsubx)'
|
||||
required: false
|
||||
default: 'all'
|
||||
# Build on pushes to workflow file for testing
|
||||
push:
|
||||
paths:
|
||||
- '.github/workflows/build_deb_debian13.yml'
|
||||
|
||||
jobs:
|
||||
build-deb:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: debian:trixie
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [basic, hardsubx]
|
||||
|
||||
steps:
|
||||
- name: Check if should build this variant
|
||||
id: should_build
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
INPUT_TYPE="${{ github.event.inputs.build_type }}"
|
||||
if [ "$INPUT_TYPE" = "all" ] || [ "$INPUT_TYPE" = "${{ matrix.build_type }}" ]; then
|
||||
echo "should_build=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "should_build=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
else
|
||||
echo "should_build=true" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Install git and dependencies for checkout
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install -y git ca-certificates
|
||||
|
||||
- name: Checkout repository
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Get version
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: version
|
||||
run: |
|
||||
# Extract version from source or use tag
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
VERSION="${{ github.event.release.tag_name }}"
|
||||
VERSION="${VERSION#v}" # Remove 'v' prefix if present
|
||||
else
|
||||
# Extract version from lib_ccx.h (e.g., #define VERSION "0.96.5")
|
||||
VERSION=$(grep -oP '#define VERSION "\K[^"]+' src/lib_ccx/lib_ccx.h || echo "0.96")
|
||||
fi
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
echo "Building version: $VERSION"
|
||||
|
||||
- name: Install base dependencies
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
cmake \
|
||||
pkg-config \
|
||||
zlib1g-dev \
|
||||
libpng-dev \
|
||||
libjpeg-dev \
|
||||
libfreetype-dev \
|
||||
libxml2-dev \
|
||||
libcurl4-gnutls-dev \
|
||||
libssl-dev \
|
||||
clang \
|
||||
libclang-dev \
|
||||
tesseract-ocr \
|
||||
libtesseract-dev \
|
||||
libleptonica-dev \
|
||||
patchelf \
|
||||
curl
|
||||
|
||||
- name: Install FFmpeg dependencies (HardSubX)
|
||||
if: steps.should_build.outputs.should_build == 'true' && matrix.build_type == 'hardsubx'
|
||||
run: |
|
||||
apt-get install -y --no-install-recommends \
|
||||
libavcodec-dev \
|
||||
libavformat-dev \
|
||||
libavutil-dev \
|
||||
libswscale-dev \
|
||||
libswresample-dev \
|
||||
libavfilter-dev \
|
||||
libavdevice-dev
|
||||
|
||||
- name: Install Rust toolchain
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Build GPAC
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
git clone -b abi-16.4 --depth 1 https://github.com/gpac/gpac
|
||||
cd gpac
|
||||
./configure --prefix=/usr
|
||||
make -j$(nproc)
|
||||
make install-lib
|
||||
ldconfig
|
||||
|
||||
- name: Build CCExtractor
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
export PATH="$HOME/.cargo/bin:$PATH"
|
||||
mkdir build && cd build
|
||||
if [ "${{ matrix.build_type }}" = "hardsubx" ]; then
|
||||
cmake ../src -DCMAKE_BUILD_TYPE=Release -DWITH_OCR=ON -DWITH_HARDSUBX=ON
|
||||
else
|
||||
cmake ../src -DCMAKE_BUILD_TYPE=Release -DWITH_OCR=ON
|
||||
fi
|
||||
make -j$(nproc)
|
||||
|
||||
- name: Test build
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: ./build/ccextractor --version
|
||||
|
||||
- name: Create .deb package structure
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: create_deb
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
VARIANT="${{ matrix.build_type }}"
|
||||
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
PKG_NAME="ccextractor_${VERSION}_debian13_amd64"
|
||||
else
|
||||
PKG_NAME="ccextractor-${VARIANT}_${VERSION}_debian13_amd64"
|
||||
fi
|
||||
|
||||
mkdir -p ${PKG_NAME}/DEBIAN
|
||||
mkdir -p ${PKG_NAME}/usr/bin
|
||||
mkdir -p ${PKG_NAME}/usr/lib/ccextractor
|
||||
mkdir -p ${PKG_NAME}/usr/share/doc/ccextractor
|
||||
mkdir -p ${PKG_NAME}/usr/share/man/man1
|
||||
|
||||
# Copy binary
|
||||
cp build/ccextractor ${PKG_NAME}/usr/bin/
|
||||
|
||||
# Copy GPAC library
|
||||
cp /usr/lib/libgpac.so* ${PKG_NAME}/usr/lib/ccextractor/
|
||||
|
||||
# Set rpath so ccextractor finds bundled libgpac
|
||||
patchelf --set-rpath '/usr/lib/ccextractor:$ORIGIN/../lib/ccextractor' ${PKG_NAME}/usr/bin/ccextractor
|
||||
|
||||
# Copy documentation
|
||||
cp docs/CHANGES.TXT ${PKG_NAME}/usr/share/doc/ccextractor/changelog
|
||||
cp LICENSE.txt ${PKG_NAME}/usr/share/doc/ccextractor/copyright
|
||||
gzip -9 -n ${PKG_NAME}/usr/share/doc/ccextractor/changelog
|
||||
|
||||
# Create control file
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
PKG_DESCRIPTION="CCExtractor - closed captions and teletext subtitle extractor"
|
||||
else
|
||||
PKG_DESCRIPTION="CCExtractor (with HardSubX) - closed captions and teletext subtitle extractor"
|
||||
fi
|
||||
|
||||
INSTALLED_SIZE=$(du -sk ${PKG_NAME}/usr | cut -f1)
|
||||
|
||||
# Determine dependencies based on build variant (Debian 13 Trixie)
|
||||
if [ "$VARIANT" = "hardsubx" ]; then
|
||||
DEPENDS="libc6, libtesseract5, libleptonica6, libcurl3t64-gnutls, libavcodec61, libavformat61, libavutil59, libswscale8, libavdevice61, libswresample5, libavfilter10"
|
||||
else
|
||||
DEPENDS="libc6, libtesseract5, libleptonica6, libcurl3t64-gnutls"
|
||||
fi
|
||||
|
||||
cat > ${PKG_NAME}/DEBIAN/control << CTRL
|
||||
Package: ccextractor
|
||||
Version: ${VERSION}
|
||||
Section: utils
|
||||
Priority: optional
|
||||
Architecture: amd64
|
||||
Installed-Size: ${INSTALLED_SIZE}
|
||||
Depends: ${DEPENDS}
|
||||
Maintainer: CCExtractor Development Team <carlos@ccextractor.org>
|
||||
Homepage: https://www.ccextractor.org
|
||||
Description: ${PKG_DESCRIPTION}
|
||||
CCExtractor is a tool that extracts closed captions and teletext subtitles
|
||||
from video files and streams. It supports a wide variety of input formats
|
||||
including MPEG, H.264/AVC, H.265/HEVC, MP4, MKV, WTV, and transport streams.
|
||||
.
|
||||
This package includes a bundled GPAC library for MP4 support.
|
||||
Built for Debian 13 (Trixie).
|
||||
CTRL
|
||||
|
||||
# Remove leading spaces from control file
|
||||
sed -i 's/^ //' ${PKG_NAME}/DEBIAN/control
|
||||
|
||||
# Create postinst to update library cache
|
||||
cat > ${PKG_NAME}/DEBIAN/postinst << 'POSTINST'
|
||||
#!/bin/sh
|
||||
set -e
|
||||
ldconfig
|
||||
POSTINST
|
||||
chmod 755 ${PKG_NAME}/DEBIAN/postinst
|
||||
|
||||
# Create postrm to update library cache
|
||||
cat > ${PKG_NAME}/DEBIAN/postrm << 'POSTRM'
|
||||
#!/bin/sh
|
||||
set -e
|
||||
ldconfig
|
||||
POSTRM
|
||||
chmod 755 ${PKG_NAME}/DEBIAN/postrm
|
||||
|
||||
# Set permissions
|
||||
chmod 755 ${PKG_NAME}/usr/bin/ccextractor
|
||||
chmod 755 ${PKG_NAME}/usr/lib/ccextractor
|
||||
find ${PKG_NAME}/usr/lib/ccextractor -name "*.so*" -exec chmod 644 {} \;
|
||||
|
||||
# Build the .deb
|
||||
dpkg-deb --build --root-owner-group ${PKG_NAME}
|
||||
|
||||
echo "deb_name=${PKG_NAME}.deb" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Test .deb package
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
VARIANT="${{ matrix.build_type }}"
|
||||
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
PKG_NAME="ccextractor_${VERSION}_debian13_amd64"
|
||||
else
|
||||
PKG_NAME="ccextractor-${VARIANT}_${VERSION}_debian13_amd64"
|
||||
fi
|
||||
|
||||
# Install and test (apt handles dependencies automatically)
|
||||
apt-get update
|
||||
apt-get install -y ./${PKG_NAME}.deb
|
||||
ccextractor --version
|
||||
|
||||
- name: Get .deb filename
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: deb_name
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
VARIANT="${{ matrix.build_type }}"
|
||||
|
||||
if [ "$VARIANT" = "basic" ]; then
|
||||
echo "name=ccextractor_${VERSION}_debian13_amd64.deb" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "name=ccextractor-${VARIANT}_${VERSION}_debian13_amd64.deb" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Upload .deb artifact
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: ${{ steps.deb_name.outputs.name }}
|
||||
path: ${{ steps.deb_name.outputs.name }}
|
||||
|
||||
- name: Upload to Release
|
||||
if: steps.should_build.outputs.should_build == 'true' && github.event_name == 'release'
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: ${{ steps.deb_name.outputs.name }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
4
.github/workflows/build_docker.yml
vendored
4
.github/workflows/build_docker.yml
vendored
@@ -8,6 +8,8 @@ on:
|
||||
- 'docker/**'
|
||||
- '**.c'
|
||||
- '**.h'
|
||||
- '**CMakeLists.txt'
|
||||
- '**.cmake'
|
||||
- 'src/rust/**'
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
@@ -16,6 +18,8 @@ on:
|
||||
- 'docker/**'
|
||||
- '**.c'
|
||||
- '**.h'
|
||||
- '**CMakeLists.txt'
|
||||
- '**.cmake'
|
||||
- 'src/rust/**'
|
||||
|
||||
jobs:
|
||||
|
||||
4
.github/workflows/build_linux.yml
vendored
4
.github/workflows/build_linux.yml
vendored
@@ -7,6 +7,8 @@ on:
|
||||
- '.github/workflows/build_linux.yml'
|
||||
- '**.c'
|
||||
- '**.h'
|
||||
- '**CMakeLists.txt'
|
||||
- '**.cmake'
|
||||
- '**Makefile**'
|
||||
- 'linux/**'
|
||||
- 'package_creators/**'
|
||||
@@ -17,6 +19,8 @@ on:
|
||||
- '.github/workflows/build_linux.yml'
|
||||
- '**.c'
|
||||
- '**.h'
|
||||
- '**CMakeLists.txt'
|
||||
- '**.cmake'
|
||||
- '**Makefile**'
|
||||
- 'linux/**'
|
||||
- 'package_creators/**'
|
||||
|
||||
154
.github/workflows/build_linux_systemlibs.yml
vendored
Normal file
154
.github/workflows/build_linux_systemlibs.yml
vendored
Normal file
@@ -0,0 +1,154 @@
|
||||
name: Build Linux (System Libs)
|
||||
|
||||
on:
|
||||
# Build on releases
|
||||
release:
|
||||
types: [published]
|
||||
# Allow manual trigger
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
build_type:
|
||||
description: 'Build type (all, basic, hardsubx)'
|
||||
required: false
|
||||
default: 'all'
|
||||
# Build on pushes to workflow file for testing
|
||||
push:
|
||||
paths:
|
||||
- '.github/workflows/build_linux_systemlibs.yml'
|
||||
- 'linux/build'
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
build-systemlibs:
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [basic, hardsubx]
|
||||
|
||||
steps:
|
||||
- name: Check if should build this variant
|
||||
id: should_build
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
INPUT_TYPE="${{ github.event.inputs.build_type }}"
|
||||
if [ "$INPUT_TYPE" = "all" ] || [ "$INPUT_TYPE" = "${{ matrix.build_type }}" ]; then
|
||||
echo "should_build=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "should_build=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
else
|
||||
echo "should_build=true" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Checkout repository
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Install base dependencies
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
pkg-config \
|
||||
zlib1g-dev \
|
||||
libpng-dev \
|
||||
libfreetype-dev \
|
||||
libutf8proc-dev \
|
||||
libgpac-dev \
|
||||
libtesseract-dev \
|
||||
libleptonica-dev \
|
||||
tesseract-ocr-eng \
|
||||
clang \
|
||||
libclang-dev
|
||||
|
||||
- name: Install FFmpeg dependencies (HardSubX)
|
||||
if: steps.should_build.outputs.should_build == 'true' && matrix.build_type == 'hardsubx'
|
||||
run: |
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
libavcodec-dev \
|
||||
libavformat-dev \
|
||||
libavutil-dev \
|
||||
libswscale-dev \
|
||||
libswresample-dev \
|
||||
libavfilter-dev \
|
||||
libavdevice-dev \
|
||||
libxcb1-dev \
|
||||
libxcb-shm0-dev \
|
||||
libx11-dev \
|
||||
liblzma-dev
|
||||
|
||||
- name: Install Rust toolchain
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
|
||||
- name: Build with system libraries
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
cd linux
|
||||
if [ "${{ matrix.build_type }}" = "hardsubx" ]; then
|
||||
./build -system-libs -hardsubx
|
||||
else
|
||||
./build -system-libs
|
||||
fi
|
||||
|
||||
- name: Verify build
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
./linux/ccextractor --version
|
||||
echo "=== Library dependencies ==="
|
||||
ldd ./linux/ccextractor | grep -E 'freetype|png|utf8proc|tesseract|leptonica' || true
|
||||
|
||||
- name: Get output name
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
id: output_name
|
||||
run: |
|
||||
case "${{ matrix.build_type }}" in
|
||||
basic)
|
||||
echo "name=ccextractor-linux-systemlibs-x86_64" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
hardsubx)
|
||||
echo "name=ccextractor-linux-systemlibs-hardsubx-x86_64" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Package binary
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
run: |
|
||||
mkdir -p package
|
||||
cp linux/ccextractor package/
|
||||
# Create a simple README for the package
|
||||
cat > package/README.txt << 'EOF'
|
||||
CCExtractor - System Libraries Build
|
||||
=====================================
|
||||
|
||||
This build uses system libraries (dynamic linking).
|
||||
|
||||
Required system packages (Debian/Ubuntu):
|
||||
sudo apt install libgpac12 libtesseract5 libleptonica6 \
|
||||
libpng16-16 libfreetype6 libutf8proc3
|
||||
|
||||
For HardSubX builds, also install:
|
||||
sudo apt install libavcodec60 libavformat60 libswscale7 libavfilter9
|
||||
|
||||
Run with: ./ccextractor --help
|
||||
EOF
|
||||
tar -czvf ${{ steps.output_name.outputs.name }}.tar.gz -C package .
|
||||
|
||||
- name: Upload artifact
|
||||
if: steps.should_build.outputs.should_build == 'true'
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: ${{ steps.output_name.outputs.name }}
|
||||
path: ${{ steps.output_name.outputs.name }}.tar.gz
|
||||
|
||||
- name: Upload to Release
|
||||
if: steps.should_build.outputs.should_build == 'true' && github.event_name == 'release'
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: ${{ steps.output_name.outputs.name }}.tar.gz
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
4
.github/workflows/build_mac.yml
vendored
4
.github/workflows/build_mac.yml
vendored
@@ -7,6 +7,8 @@ on:
|
||||
- '.github/workflows/build_mac.yml'
|
||||
- '**.c'
|
||||
- '**.h'
|
||||
- '**CMakeLists.txt'
|
||||
- '**.cmake'
|
||||
- '**Makefile**'
|
||||
- 'mac/**'
|
||||
- 'package_creators/**'
|
||||
@@ -17,6 +19,8 @@ on:
|
||||
- '.github/workflows/build_mac.yml'
|
||||
- '**.c'
|
||||
- '**.h'
|
||||
- '**CMakeLists.txt'
|
||||
- '**.cmake'
|
||||
- '**Makefile**'
|
||||
- 'mac/**'
|
||||
- 'package_creators/**'
|
||||
|
||||
51
.github/workflows/build_snap.yml
vendored
Normal file
51
.github/workflows/build_snap.yml
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
name: Build CCExtractor Snap
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
build_snap:
|
||||
name: Build Snap package
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Install snapd
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y snapd
|
||||
|
||||
- name: Start snapd
|
||||
run: |
|
||||
sudo systemctl start snapd.socket
|
||||
sudo systemctl start snapd
|
||||
|
||||
- name: Install Snapcraft
|
||||
run: |
|
||||
sudo snap install core22
|
||||
sudo snap install snapcraft --classic
|
||||
|
||||
- name: Show Snapcraft version
|
||||
run: snapcraft --version
|
||||
|
||||
- name: Build snap
|
||||
run: sudo snapcraft --destructive-mode
|
||||
|
||||
- name: List generated snap
|
||||
run: ls -lh *.snap
|
||||
|
||||
- name: Upload snap as workflow artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: CCExtractor Snap
|
||||
path: "*.snap"
|
||||
|
||||
- name: Upload snap to GitHub Release
|
||||
if: github.event_name == 'release'
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
files: "*.snap"
|
||||
129
.github/workflows/build_windows.yml
vendored
129
.github/workflows/build_windows.yml
vendored
@@ -3,7 +3,6 @@ name: Build CCExtractor on Windows
|
||||
env:
|
||||
RUSTFLAGS: -Ctarget-feature=+crt-static
|
||||
VCPKG_DEFAULT_TRIPLET: x64-windows-static
|
||||
VCPKG_DEFAULT_BINARY_CACHE: C:\vcpkg\.cache
|
||||
VCPKG_COMMIT: ab2977be50c702126336e5088f4836060733c899
|
||||
|
||||
on:
|
||||
@@ -13,6 +12,8 @@ on:
|
||||
- ".github/workflows/build_windows.yml"
|
||||
- "**.c"
|
||||
- "**.h"
|
||||
- "**CMakeLists.txt"
|
||||
- "**.cmake"
|
||||
- "windows/**"
|
||||
- "src/rust/**"
|
||||
pull_request:
|
||||
@@ -21,108 +22,118 @@ on:
|
||||
- ".github/workflows/build_windows.yml"
|
||||
- "**.c"
|
||||
- "**.h"
|
||||
- "**CMakeLists.txt"
|
||||
- "**.cmake"
|
||||
- "windows/**"
|
||||
- "src/rust/**"
|
||||
|
||||
jobs:
|
||||
build_release:
|
||||
build:
|
||||
runs-on: windows-2022
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Setup MSBuild.exe
|
||||
uses: microsoft/setup-msbuild@v2.0.0
|
||||
with:
|
||||
msbuild-architecture: x64
|
||||
|
||||
# Install GPAC (fast, ~30s, not worth caching complexity)
|
||||
- name: Install gpac
|
||||
run: choco install gpac --version 2.4.0
|
||||
run: choco install gpac --version 2.4.0 --no-progress
|
||||
|
||||
# Use lukka/run-vcpkg for better caching
|
||||
- name: Setup vcpkg
|
||||
run: mkdir C:\vcpkg\.cache
|
||||
- name: Cache vcpkg
|
||||
id: cache
|
||||
uses: lukka/run-vcpkg@v11
|
||||
id: runvcpkg
|
||||
with:
|
||||
vcpkgGitCommitId: ${{ env.VCPKG_COMMIT }}
|
||||
vcpkgDirectory: ${{ github.workspace }}/vcpkg
|
||||
vcpkgJsonGlob: 'windows/vcpkg.json'
|
||||
|
||||
# Cache vcpkg installed packages separately for faster restores
|
||||
- name: Cache vcpkg installed packages
|
||||
id: vcpkg-installed-cache
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ${{ github.workspace }}/vcpkg/installed
|
||||
key: vcpkg-installed-${{ runner.os }}-${{ env.VCPKG_COMMIT }}-${{ hashFiles('windows/vcpkg.json') }}
|
||||
restore-keys: |
|
||||
vcpkg-installed-${{ runner.os }}-${{ env.VCPKG_COMMIT }}-
|
||||
|
||||
- name: Install vcpkg dependencies
|
||||
if: steps.vcpkg-installed-cache.outputs.cache-hit != 'true'
|
||||
run: ${{ github.workspace }}/vcpkg/vcpkg.exe install --x-install-root ${{ github.workspace }}/vcpkg/installed/
|
||||
working-directory: windows
|
||||
|
||||
# Cache Rust/Cargo artifacts
|
||||
- name: Cache Cargo registry
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: |
|
||||
C:\vcpkg\.cache
|
||||
key: vcpkg-${{ runner.os }}-${{ env.VCPKG_COMMIT }}
|
||||
- name: Build vcpkg
|
||||
run: |
|
||||
git clone https://github.com/microsoft/vcpkg
|
||||
./vcpkg/bootstrap-vcpkg.bat
|
||||
- name: Install dependencies
|
||||
run: ${{ github.workspace }}/vcpkg/vcpkg.exe install --x-install-root ${{ github.workspace }}/vcpkg/installed/
|
||||
working-directory: windows
|
||||
- uses: actions-rs/toolchain@v1
|
||||
~/.cargo/registry
|
||||
~/.cargo/git
|
||||
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-cargo-registry-
|
||||
|
||||
# Cache Cargo build artifacts - rust.bat sets CARGO_TARGET_DIR to windows/
|
||||
# which results in artifacts at windows/x86_64-pc-windows-msvc/
|
||||
- name: Cache Cargo build artifacts
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
path: ${{ github.workspace }}/windows/x86_64-pc-windows-msvc
|
||||
key: ${{ runner.os }}-cargo-build-${{ hashFiles('**/Cargo.lock') }}-${{ hashFiles('src/rust/**/*.rs') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-cargo-build-${{ hashFiles('**/Cargo.lock') }}-
|
||||
${{ runner.os }}-cargo-build-
|
||||
|
||||
- name: Setup Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
|
||||
- name: Install Win 10 SDK
|
||||
uses: ilammy/msvc-dev-cmd@v1
|
||||
- name: build Release-Full
|
||||
|
||||
# Build Release-Full
|
||||
- name: Build Release-Full
|
||||
env:
|
||||
LIBCLANG_PATH: "C:\\Program Files\\LLVM\\lib"
|
||||
LLVM_CONFIG_PATH: "C:\\Program Files\\LLVM\\bin\\llvm-config"
|
||||
CARGO_TARGET_DIR: "..\\..\\windows"
|
||||
BINDGEN_EXTRA_CLANG_ARGS: -fmsc-version=0
|
||||
VCPKG_ROOT: ${{ github.workspace }}/vcpkg
|
||||
run: msbuild ccextractor.sln /p:Configuration=Release-Full /p:Platform=x64
|
||||
working-directory: ./windows
|
||||
- name: Display version information
|
||||
|
||||
- name: Display Release version information
|
||||
run: ./ccextractorwinfull.exe --version
|
||||
working-directory: ./windows/x64/Release-Full
|
||||
- uses: actions/upload-artifact@v6
|
||||
|
||||
- name: Upload Release artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: CCExtractor Windows Release build
|
||||
path: |
|
||||
./windows/x64/Release-Full/ccextractorwinfull.exe
|
||||
./windows/x64/Release-Full/*.dll
|
||||
build_debug:
|
||||
runs-on: windows-2022
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v6
|
||||
- name: Setup MSBuild.exe
|
||||
uses: microsoft/setup-msbuild@v2.0.0
|
||||
with:
|
||||
msbuild-architecture: x64
|
||||
- name: Install gpac
|
||||
run: choco install gpac --version 2.4.0
|
||||
- name: Setup vcpkg
|
||||
run: mkdir C:\vcpkg\.cache
|
||||
- name: Cache vcpkg
|
||||
id: cache
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: |
|
||||
C:\vcpkg\.cache
|
||||
key: vcpkg-${{ runner.os }}-${{ env.VCPKG_COMMIT }}
|
||||
- name: Build vcpkg
|
||||
run: |
|
||||
git clone https://github.com/microsoft/vcpkg
|
||||
./vcpkg/bootstrap-vcpkg.bat
|
||||
- name: Install dependencies
|
||||
run: ${{ github.workspace }}/vcpkg/vcpkg.exe install --x-install-root ${{ github.workspace }}/vcpkg/installed/
|
||||
working-directory: windows
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Install Win 10 SDK
|
||||
uses: ilammy/msvc-dev-cmd@v1
|
||||
- name: build Debug-Full
|
||||
|
||||
# Build Debug-Full (reuses cached Cargo artifacts)
|
||||
- name: Build Debug-Full
|
||||
env:
|
||||
LIBCLANG_PATH: "C:\\Program Files\\LLVM\\lib"
|
||||
LLVM_CONFIG_PATH: "C:\\Program Files\\LLVM\\bin\\llvm-config"
|
||||
CARGO_TARGET_DIR: "..\\..\\windows"
|
||||
BINDGEN_EXTRA_CLANG_ARGS: -fmsc-version=0
|
||||
VCPKG_ROOT: ${{ github.workspace }}/vcpkg
|
||||
run: msbuild ccextractor.sln /p:Configuration=Debug-Full /p:Platform=x64
|
||||
working-directory: ./windows
|
||||
- name: Display version information
|
||||
|
||||
- name: Display Debug version information
|
||||
continue-on-error: true
|
||||
run: ./ccextractorwinfull.exe --version
|
||||
working-directory: ./windows/x64/Debug-Full
|
||||
- uses: actions/upload-artifact@v6
|
||||
|
||||
- name: Upload Debug artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: CCExtractor Windows Debug build
|
||||
path: |
|
||||
|
||||
15
.github/workflows/homebrew.yml
vendored
Normal file
15
.github/workflows/homebrew.yml
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
name: Bump Homebrew Formula
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
homebrew:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Update Homebrew formula
|
||||
uses: dawidd6/action-homebrew-bump-formula@v7
|
||||
with:
|
||||
token: ${{ secrets.HOMEBREW_GITHUB_API_TOKEN }}
|
||||
formula: ccextractor
|
||||
136
.github/workflows/publish_chocolatey.yml
vendored
Normal file
136
.github/workflows/publish_chocolatey.yml
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
# Publish to Chocolatey Community Repository
|
||||
#
|
||||
# PREREQUISITES:
|
||||
# 1. Create a Chocolatey account at https://community.chocolatey.org/account/Register
|
||||
# 2. Get your API key from https://community.chocolatey.org/account
|
||||
# 3. Add the API key as repository secret: CHOCOLATEY_API_KEY
|
||||
#
|
||||
# Reference: https://docs.chocolatey.org/en-us/create/create-packages-quick-start
|
||||
|
||||
name: Publish to Chocolatey
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [released]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
release_tag:
|
||||
description: 'Release tag to publish (e.g., v0.96.1)'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Get version from tag
|
||||
id: version
|
||||
shell: bash
|
||||
run: |
|
||||
TAG="${{ github.event.inputs.release_tag || github.event.release.tag_name }}"
|
||||
# Strip 'v' prefix if present
|
||||
VERSION="${TAG#v}"
|
||||
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
||||
echo "tag=$TAG" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Download MSI from release
|
||||
shell: pwsh
|
||||
run: |
|
||||
$version = "${{ steps.version.outputs.version }}"
|
||||
$tag = "${{ steps.version.outputs.tag }}"
|
||||
$msiUrl = "https://github.com/CCExtractor/ccextractor/releases/download/$tag/CCExtractor.$version.msi"
|
||||
|
||||
Write-Host "Downloading MSI from: $msiUrl"
|
||||
Invoke-WebRequest -Uri $msiUrl -OutFile "CCExtractor.msi"
|
||||
|
||||
# Calculate SHA256 checksum
|
||||
$hash = (Get-FileHash -Path "CCExtractor.msi" -Algorithm SHA256).Hash
|
||||
Write-Host "SHA256: $hash"
|
||||
echo "MSI_CHECKSUM=$hash" >> $env:GITHUB_ENV
|
||||
|
||||
- name: Update nuspec version
|
||||
shell: pwsh
|
||||
run: |
|
||||
$version = "${{ steps.version.outputs.version }}"
|
||||
$nuspecPath = "packaging/chocolatey/ccextractor.nuspec"
|
||||
|
||||
$content = Get-Content $nuspecPath -Raw
|
||||
$content = $content -replace '<version>.*</version>', "<version>$version</version>"
|
||||
Set-Content -Path $nuspecPath -Value $content
|
||||
|
||||
Write-Host "Updated nuspec to version $version"
|
||||
|
||||
- name: Update install script
|
||||
shell: pwsh
|
||||
run: |
|
||||
$version = "${{ steps.version.outputs.version }}"
|
||||
$tag = "${{ steps.version.outputs.tag }}"
|
||||
$checksum = $env:MSI_CHECKSUM
|
||||
$installScript = "packaging/chocolatey/tools/chocolateyInstall.ps1"
|
||||
|
||||
$content = Get-Content $installScript -Raw
|
||||
|
||||
# Update URL
|
||||
$newUrl = "https://github.com/CCExtractor/ccextractor/releases/download/$tag/CCExtractor.$version.msi"
|
||||
$content = $content -replace "url64bit\s*=\s*'[^']*'", "url64bit = '$newUrl'"
|
||||
|
||||
# Update checksum
|
||||
$content = $content -replace "checksum64\s*=\s*'[^']*'", "checksum64 = '$checksum'"
|
||||
|
||||
Set-Content -Path $installScript -Value $content
|
||||
|
||||
Write-Host "Updated install script with URL and checksum"
|
||||
|
||||
- name: Build Chocolatey package
|
||||
shell: pwsh
|
||||
run: |
|
||||
cd packaging/chocolatey
|
||||
choco pack ccextractor.nuspec
|
||||
|
||||
# List the generated package
|
||||
Get-ChildItem *.nupkg
|
||||
|
||||
- name: Test package locally
|
||||
shell: pwsh
|
||||
run: |
|
||||
cd packaging/chocolatey
|
||||
$nupkg = Get-ChildItem *.nupkg | Select-Object -First 1
|
||||
Write-Host "Testing package: $($nupkg.Name)"
|
||||
|
||||
# Install from local package
|
||||
choco install ccextractor --source="'.;https://community.chocolatey.org/api/v2/'" --yes --force
|
||||
|
||||
# Verify installation
|
||||
$ccx = Get-Command ccextractor -ErrorAction SilentlyContinue
|
||||
if ($ccx) {
|
||||
Write-Host "CCExtractor found at: $($ccx.Source)"
|
||||
& ccextractor --version
|
||||
} else {
|
||||
Write-Host "CCExtractor not found in PATH, checking Program Files..."
|
||||
$exePath = Join-Path $env:ProgramFiles "CCExtractor\ccextractor.exe"
|
||||
if (Test-Path $exePath) {
|
||||
& $exePath --version
|
||||
}
|
||||
}
|
||||
|
||||
- name: Push to Chocolatey
|
||||
shell: pwsh
|
||||
env:
|
||||
CHOCOLATEY_API_KEY: ${{ secrets.CHOCOLATEY_API_KEY }}
|
||||
run: |
|
||||
cd packaging/chocolatey
|
||||
$nupkg = Get-ChildItem *.nupkg | Select-Object -First 1
|
||||
|
||||
Write-Host "Pushing $($nupkg.Name) to Chocolatey..."
|
||||
choco push $nupkg.Name --source="https://push.chocolatey.org/" --api-key="$env:CHOCOLATEY_API_KEY"
|
||||
|
||||
Write-Host "Package submitted to Chocolatey! It may take some time to be moderated and published."
|
||||
|
||||
- name: Upload package artifact
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: chocolatey-package
|
||||
path: packaging/chocolatey/*.nupkg
|
||||
38
.github/workflows/publish_winget.yml
vendored
Normal file
38
.github/workflows/publish_winget.yml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
# Publish to Windows Package Manager (winget)
|
||||
#
|
||||
# PREREQUISITES:
|
||||
# 1. CCExtractor must already have ONE version in winget-pkgs before this works
|
||||
# - Submit the initial manifest manually from packaging/winget/
|
||||
# - PR to: https://github.com/microsoft/winget-pkgs
|
||||
#
|
||||
# 2. Create a fork of microsoft/winget-pkgs under the CCExtractor organization
|
||||
# - https://github.com/CCExtractor/winget-pkgs (needs to be created)
|
||||
#
|
||||
# 3. Create a GitHub Personal Access Token (classic) with 'public_repo' scope
|
||||
# - Add as repository secret: WINGET_TOKEN
|
||||
#
|
||||
# Reference: https://github.com/vedantmgoyal9/winget-releaser
|
||||
|
||||
name: Publish to WinGet
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [released]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
release_tag:
|
||||
description: 'Release tag to publish (e.g., v0.96.1)'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- name: Publish to WinGet
|
||||
uses: vedantmgoyal9/winget-releaser@v2
|
||||
with:
|
||||
identifier: CCExtractor.CCExtractor
|
||||
installers-regex: '\.msi$' # Only use the MSI installer
|
||||
token: ${{ secrets.WINGET_TOKEN }}
|
||||
release-tag: ${{ github.event.inputs.release_tag || github.event.release.tag_name }}
|
||||
39
.github/workflows/release.yml
vendored
39
.github/workflows/release.yml
vendored
@@ -26,7 +26,20 @@ jobs:
|
||||
# Extract version from tag, strip 'v' prefix and everything after first dash
|
||||
VERSION=${GITHUB_REF/refs\/tags\/v/}
|
||||
VERSION=${VERSION%%-*}
|
||||
echo ::set-output name=VERSION::$VERSION
|
||||
# Save display version for filenames (e.g., 0.96.1)
|
||||
echo ::set-output name=DISPLAY_VERSION::$VERSION
|
||||
# Count dots to determine version format
|
||||
DOTS="${VERSION//[^.]}"
|
||||
PART_COUNT=$((${#DOTS} + 1))
|
||||
# MSI requires 4-part version (major.minor.build.revision)
|
||||
if [ "$PART_COUNT" -eq 2 ]; then
|
||||
MSI_VERSION="${VERSION}.0.0"
|
||||
elif [ "$PART_COUNT" -eq 3 ]; then
|
||||
MSI_VERSION="${VERSION}.0"
|
||||
else
|
||||
MSI_VERSION="${VERSION}"
|
||||
fi
|
||||
echo ::set-output name=VERSION::$MSI_VERSION
|
||||
shell: bash
|
||||
- name: Setup MSBuild.exe
|
||||
uses: microsoft/setup-msbuild@v2.0.0
|
||||
@@ -68,6 +81,14 @@ jobs:
|
||||
- name: Copy files to directory for installer
|
||||
run: mkdir installer; cp ./x64/Release-Full/ccextractorwinfull.exe ./installer; cp ./x64/Release-Full/*.dll ./installer
|
||||
working-directory: ./windows
|
||||
- name: Download tessdata for OCR support
|
||||
run: |
|
||||
mkdir -p ./installer/tessdata
|
||||
# Download English traineddata from tessdata_fast (smaller, faster, good for most use cases)
|
||||
Invoke-WebRequest -Uri "https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata" -OutFile "./installer/tessdata/eng.traineddata"
|
||||
# Download OSD (Orientation and Script Detection) for automatic script detection
|
||||
Invoke-WebRequest -Uri "https://github.com/tesseract-ocr/tessdata_fast/raw/main/osd.traineddata" -OutFile "./installer/tessdata/osd.traineddata"
|
||||
working-directory: ./windows
|
||||
- name: install WiX
|
||||
run: dotnet tool uninstall --global wix; dotnet tool install --global wix --version 6.0.2 && wix extension add -g WixToolset.UI.wixext/6.0.2
|
||||
- name: Make sure WiX works
|
||||
@@ -85,15 +106,15 @@ jobs:
|
||||
run: Get-ChildItem -Recurse ./installer
|
||||
working-directory: ./windows
|
||||
- name: Create portable zip
|
||||
run: Compress-Archive -Path ./installer/* -DestinationPath ./CCExtractor_win_portable.zip
|
||||
run: Compress-Archive -Path ./installer/* -DestinationPath ./CCExtractor.${{ steps.get_version.outputs.DISPLAY_VERSION }}_win_portable.zip
|
||||
working-directory: ./windows
|
||||
- name: Build installer
|
||||
run: wix build -ext WixToolset.UI.wixext -d "AppVersion=${{ steps.get_version.outputs.VERSION }}.0.0" -o CCExtractor.msi installer.wxs CustomUI.wxs
|
||||
run: wix build -arch x64 -ext WixToolset.UI.wixext -d "AppVersion=${{ steps.get_version.outputs.VERSION }}" -o CCExtractor.${{ steps.get_version.outputs.DISPLAY_VERSION }}.msi installer.wxs CustomUI.wxs
|
||||
working-directory: ./windows
|
||||
- name: Upload as asset
|
||||
uses: AButler/upload-release-assets@v3.0
|
||||
with:
|
||||
files: './windows/CCExtractor.msi;./windows/CCExtractor_win_portable.zip'
|
||||
files: './windows/CCExtractor.${{ steps.get_version.outputs.DISPLAY_VERSION }}.msi;./windows/CCExtractor.${{ steps.get_version.outputs.DISPLAY_VERSION }}_win_portable.zip'
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
create_linux_package:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -101,10 +122,16 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
path: ./ccextractor
|
||||
- name: Get the version
|
||||
id: get_version
|
||||
run: |
|
||||
VERSION=${GITHUB_REF/refs\/tags\/v/}
|
||||
VERSION=${VERSION%%-*}
|
||||
echo ::set-output name=DISPLAY_VERSION::$VERSION
|
||||
- name: Create .tar.gz without git and windows folders
|
||||
run: tar -pczf ./ccextractor_minimal.tar.gz --exclude "ccextractor/windows" --exclude "ccextractor/.git" ccextractor
|
||||
run: tar -pczf ./ccextractor.${{ steps.get_version.outputs.DISPLAY_VERSION }}.tar.gz --exclude "ccextractor/windows" --exclude "ccextractor/.git" ccextractor
|
||||
- name: Upload as asset
|
||||
uses: AButler/upload-release-assets@v3.0
|
||||
with:
|
||||
files: './ccextractor_minimal.tar.gz'
|
||||
files: './ccextractor.${{ steps.get_version.outputs.DISPLAY_VERSION }}.tar.gz'
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -17,6 +17,7 @@ CVS
|
||||
mac/ccextractor
|
||||
linux/ccextractor
|
||||
linux/depend
|
||||
linux/build_scan/
|
||||
windows/x86_64-pc-windows-msvc/**
|
||||
windows/Debug/**
|
||||
windows/Debug-OCR/**
|
||||
@@ -28,6 +29,7 @@ windows/Debug-Full/**
|
||||
windows/x64/**
|
||||
windows/ccextractor.VC.db
|
||||
build/
|
||||
build_*/
|
||||
|
||||
####
|
||||
# Python
|
||||
@@ -143,6 +145,9 @@ bazel*
|
||||
#Intellij IDEs
|
||||
.idea/
|
||||
|
||||
# Plans (local only)
|
||||
plans/
|
||||
|
||||
# Rust build and MakeFiles (and CMake files)
|
||||
src/rust/CMakeFiles/
|
||||
src/rust/CMakeCache.txt
|
||||
|
||||
@@ -4,7 +4,7 @@ MAINTAINER = Marc Espie <espie@openbsd.org>
|
||||
CATEGORIES = multimedia
|
||||
COMMENT = closed caption subtitles extractor
|
||||
HOMEPAGE = https://ccextractor.org
|
||||
V = 0.96
|
||||
V = 0.96.5
|
||||
DISTFILES = ccextractor.${V:S/.//}-src.zip
|
||||
MASTER_SITES = ${MASTER_SITE_SOURCEFORGE:=ccextractor/}
|
||||
DISTNAME = ccextractor-$V
|
||||
|
||||
48
README.md
48
README.md
@@ -2,7 +2,6 @@
|
||||
|
||||
# CCExtractor
|
||||
|
||||
<a href="https://travis-ci.org/CCExtractor/ccextractor"><img src="https://raw.githubusercontent.com/CCExtractor/ccextractor-org-media/master/static/macOS-build-badge-logo.png" width="20"></a> [](https://travis-ci.org/CCExtractor/ccextractor)
|
||||
[](https://sampleplatform.ccextractor.org/test/master/windows)
|
||||
[](https://sampleplatform.ccextractor.org/test/master/linux)
|
||||
[](https://sourceforge.net/projects/ccextractor/)
|
||||
@@ -29,6 +28,25 @@ The core functionality is written in C. Other languages used include C++ and Pyt
|
||||
|
||||
Downloads for precompiled binaries and source code can be found [on our website](https://ccextractor.org/public/general/downloads/).
|
||||
|
||||
|
||||
### Windows Package Managers
|
||||
|
||||
**WinGet:**
|
||||
```powershell
|
||||
winget install CCExtractor.CCExtractor
|
||||
```
|
||||
|
||||
**Chocolatey:**
|
||||
```powershell
|
||||
choco install ccextractor
|
||||
```
|
||||
|
||||
**Scoop:**
|
||||
```powershell
|
||||
scoop bucket add extras
|
||||
scoop install ccextractor
|
||||
```
|
||||
|
||||
Extracting subtitles is relatively simple. Just run the following command:
|
||||
|
||||
`ccextractor <input>`
|
||||
@@ -44,6 +62,34 @@ You can also find the list of parameters and their brief description by running
|
||||
|
||||
You can find sample files on [our website](https://ccextractor.org/public/general/tvsamples/) to test the software.
|
||||
|
||||
### Building from Source
|
||||
|
||||
- [Building on Windows using WSL](docs/build-wsl.md)
|
||||
|
||||
#### Linux (Autotools) build notes
|
||||
|
||||
CCExtractor also supports an autotools-based build system under the `linux/`
|
||||
directory.
|
||||
|
||||
Important notes:
|
||||
- The autotools workflow lives inside `linux/`. The `configure` script is
|
||||
generated there and should be run from that directory.
|
||||
- Typical build steps are:
|
||||
```
|
||||
cd linux
|
||||
./autogen.sh
|
||||
./configure
|
||||
make
|
||||
```
|
||||
- Rust support is enabled automatically if `cargo` and `rustc` are available
|
||||
on the system. In that case, Rust components are built and linked during
|
||||
`make`.
|
||||
- If you encounter unexpected build or linking issues, a clean rebuild
|
||||
(`make clean` or a fresh clone) is recommended, especially when Rust is
|
||||
involved.
|
||||
|
||||
This build flow has been tested on Linux and WSL.
|
||||
|
||||
## Compiling CCExtractor
|
||||
|
||||
To learn more about how to compile and build CCExtractor for your platform check the [compilation guide](https://github.com/CCExtractor/ccextractor/blob/master/docs/COMPILATION.MD).
|
||||
|
||||
@@ -1,9 +1,68 @@
|
||||
0.96.6 (unreleased)
|
||||
-------------------
|
||||
- New: Add Snap packaging support with Snapcraft configuration and GitHub Actions CI workflow.
|
||||
- Fix: Clear status line output on Linux/WSL to prevent text artifacts (#2017)
|
||||
- Fix: Prevent infinite loop on truncated MKV files
|
||||
- Fix: Various memory safety and stability fixes in demuxers (MP4, PS, MKV, DVB)
|
||||
- Fix: Delete empty output files instead of leaving 0-byte files (#1282)
|
||||
- Fix: --mkvlang now supports BCP 47 language tags (e.g., en-US, zh-Hans-CN) and multiple codes
|
||||
|
||||
0.96.5 (2026-01-05)
|
||||
-------------------
|
||||
- New: CCExtractor is available again via Homebrew on macOS and Linux.
|
||||
- New: Add support for raw CDP (Caption Distribution Packet) files (#1406)
|
||||
- New: Add --scc-accurate-timing option for bandwidth-aware SCC output (#1120)
|
||||
- Fix: MXF files containing CEA-708 captions not being detected/extracted (#1647)
|
||||
- Docs: Add Windows WSL build instructions
|
||||
- Fix: Security fixes (out-of-bounds read/write) in a few places in the legacy C code.
|
||||
|
||||
0.96.4 (2026-01-01)
|
||||
-------------------
|
||||
- New: Persistent CEA-708 decoder context - maintains state across multiple calls for proper subtitle continuity
|
||||
- New: OCR character blacklist options (--ocr-blacklist, --ocr-blacklist-file) for improved accuracy
|
||||
- New: OCR line-split option (--ocr-splitontimechange) for better subtitle segmentation
|
||||
- Fix: 32-bit build failures on i686 and armv7l architectures
|
||||
- Fix: Legacy command-line argument compatibility (-1, -2, -12, --sc, --svc)
|
||||
- Fix: Prevent heap buffer overflow in Teletext processing (security fix)
|
||||
- Fix: Prevent integer overflow leading to heap buffer overflow in Transport Stream handling (security fix)
|
||||
- Fix: Lazy OCR initialization - only initialize when first DVB subtitle is encountered
|
||||
- Build: Optimized Windows CI workflow for faster builds
|
||||
- Fix: Updated GUI with version 0.7.1. A blind attempt to fix a hang on start on some Windows.
|
||||
|
||||
0.96.3 (2025-12-29)
|
||||
-------------------
|
||||
- New: VOBSUB subtitle extraction with OCR support for MP4 files
|
||||
- New: VOBSUB subtitle extraction support for MKV/Matroska files
|
||||
- New: Native SCC (Scenarist Closed Caption) input file support - CCExtractor can now read SCC files
|
||||
- New: Configurable frame rate (--scc-framerate) and styled PAC codes for SCC output
|
||||
- Fix: Apply --delay option to DVB/bitmap subtitles (previously only worked with text-based subtitles)
|
||||
- Fix: 200ms timing offset in MOV/MP4 caption extraction
|
||||
- Fix: utf8proc include path for system library builds
|
||||
- Fix: Use fixed-width integer types in MP4 bswap functions for better portability
|
||||
- Fix: Guard ocr_text access with ENABLE_OCR preprocessor check
|
||||
- Fix: Preserve FFmpeg libs when building with -system-libs -hardsubx
|
||||
- Build: Add vobsub_decoder to Windows and autoconf build systems
|
||||
- Build: Add winget and Chocolatey packaging workflows for Windows distribution
|
||||
- Docs: Add VOBSUB extraction documentation and subtile-ocr Dockerfile
|
||||
|
||||
0.96.2 (2025-12-26)
|
||||
-------------------
|
||||
- Fix: Resolve utf8proc header include path when building against system libraries on Linux.
|
||||
- Rebundle Windows version to include required runtime files to process hardcoded subtitles
|
||||
(hardcodex mode).
|
||||
- New: Add optional -system-libs flag to Linux build script for package manager compatibility
|
||||
|
||||
0.96.1 (2025-12-25)
|
||||
-------------------
|
||||
- Rebundle Windows version to include an updated GUI. No changes in CCExtractor itself.
|
||||
|
||||
0.96 (2025-12-23)
|
||||
-----------------
|
||||
- New: Multi-page teletext extraction support (#665)
|
||||
- Extract multiple teletext pages simultaneously with separate output files
|
||||
- Use --tpage multiple times (e.g., --tpage 100 --tpage 200)
|
||||
- Output files are named with page suffix (e.g., output_p100.srt, output_p200.srt)
|
||||
- Fix: SPUPNG subtitle offset calculation to center based on actual image dimensions
|
||||
|
||||
- New: Added --list-tracks (-L) option to list all tracks in media files without processing
|
||||
New: Chinese, Korean, Japanese support - proper encoding and OCR.
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
# Installation
|
||||
|
||||
## Homebrew
|
||||
The easiest way to install CCExtractor for Mac and Linux is through Homebrew:
|
||||
|
||||
```bash
|
||||
brew install ccextractor
|
||||
```
|
||||
Note: If you don't have Homebrew installed, see [brew.sh](https://brew.sh/)
|
||||
for installation instructions.
|
||||
|
||||
---
|
||||
|
||||
# Compiling CCExtractor
|
||||
|
||||
You may compile CCExtractor across all major platforms using `CMakeLists.txt` stored under `ccextractor/src/` directory. Autoconf and custom build scripts are also available. See platform specific instructions in the below sections.
|
||||
|
||||
@@ -26,6 +26,14 @@ Running ccextractor without parameters shows the help screen. Usage is
|
||||
trivial - you just need to pass the input file and (optionally) some
|
||||
details about the input and output files.
|
||||
|
||||
Example:
|
||||
|
||||
ccextractor input_video.ts
|
||||
|
||||
This command extracts subtitles from the input video file and generates a subtitle output file
|
||||
(such as .srt) in the same directory.
|
||||
|
||||
|
||||
|
||||
## Languages
|
||||
Usually English captions are transmitted in line 21 field 1 data,
|
||||
|
||||
129
docs/VOBSUB.md
Normal file
129
docs/VOBSUB.md
Normal file
@@ -0,0 +1,129 @@
|
||||
# VOBSUB Subtitle Extraction from MKV Files
|
||||
|
||||
CCExtractor supports extracting VOBSUB (S_VOBSUB) subtitles from Matroska (MKV) containers. VOBSUB is an image-based subtitle format originally from DVD video.
|
||||
|
||||
## Overview
|
||||
|
||||
VOBSUB subtitles consist of two files:
|
||||
- `.idx` - Index file containing metadata, palette, and timestamp/position entries
|
||||
- `.sub` - Binary file containing the actual subtitle bitmap data in MPEG Program Stream format
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```bash
|
||||
ccextractor movie.mkv
|
||||
```
|
||||
|
||||
This will extract all VOBSUB tracks and create paired `.idx` and `.sub` files:
|
||||
- `movie_eng.idx` + `movie_eng.sub` (first English track)
|
||||
- `movie_eng_1.idx` + `movie_eng_1.sub` (second English track, if present)
|
||||
- etc.
|
||||
|
||||
## Converting VOBSUB to SRT (Text)
|
||||
|
||||
Since VOBSUB subtitles are images, you need OCR (Optical Character Recognition) to convert them to text-based formats like SRT.
|
||||
|
||||
### Using subtile-ocr (Recommended)
|
||||
|
||||
[subtile-ocr](https://github.com/gwen-lg/subtile-ocr) is an actively maintained Rust tool that provides accurate OCR conversion.
|
||||
|
||||
#### Option 1: Docker (Easiest)
|
||||
|
||||
We provide a Dockerfile that builds subtile-ocr with all dependencies:
|
||||
|
||||
```bash
|
||||
# Build the Docker image (one-time)
|
||||
cd tools/vobsubocr
|
||||
docker build -t subtile-ocr .
|
||||
|
||||
# Extract VOBSUB from MKV
|
||||
ccextractor movie.mkv
|
||||
|
||||
# Convert to SRT using OCR
|
||||
docker run --rm -v $(pwd):/data subtile-ocr -l eng -o /data/movie_eng.srt /data/movie_eng.idx
|
||||
```
|
||||
|
||||
#### Option 2: Install subtile-ocr Natively
|
||||
|
||||
If you have Rust and Tesseract development libraries installed:
|
||||
|
||||
```bash
|
||||
# Install dependencies (Ubuntu/Debian)
|
||||
sudo apt-get install libleptonica-dev libtesseract-dev tesseract-ocr tesseract-ocr-eng
|
||||
|
||||
# Install subtile-ocr
|
||||
cargo install --git https://github.com/gwen-lg/subtile-ocr
|
||||
|
||||
# Convert
|
||||
subtile-ocr -l eng -o movie_eng.srt movie_eng.idx
|
||||
```
|
||||
|
||||
### subtile-ocr Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `-l, --lang <LANG>` | Tesseract language code (required). Examples: `eng`, `fra`, `deu`, `chi_sim` |
|
||||
| `-o, --output <FILE>` | Output SRT file (stdout if not specified) |
|
||||
| `-t, --threshold <0.0-1.0>` | Binarization threshold (default: 0.6) |
|
||||
| `-d, --dpi <DPI>` | Image DPI for OCR (default: 150) |
|
||||
| `--dump` | Save processed subtitle images as PNG files |
|
||||
|
||||
### Language Codes
|
||||
|
||||
Install additional Tesseract language packs as needed:
|
||||
|
||||
```bash
|
||||
# Examples
|
||||
sudo apt-get install tesseract-ocr-fra # French
|
||||
sudo apt-get install tesseract-ocr-deu # German
|
||||
sudo apt-get install tesseract-ocr-spa # Spanish
|
||||
sudo apt-get install tesseract-ocr-chi-sim # Simplified Chinese
|
||||
```
|
||||
|
||||
## Technical Details
|
||||
|
||||
### .idx File Format
|
||||
|
||||
The index file contains:
|
||||
1. Header with metadata (size, palette, alignment settings)
|
||||
2. Language identifier line
|
||||
3. Timestamp entries with file positions
|
||||
|
||||
Example:
|
||||
```
|
||||
# VobSub index file, v7 (do not modify this line!)
|
||||
size: 720x576
|
||||
palette: 000000, 828282, ...
|
||||
|
||||
id: eng, index: 0
|
||||
timestamp: 00:01:12:920, filepos: 000000000
|
||||
timestamp: 00:01:18:640, filepos: 000000800
|
||||
...
|
||||
```
|
||||
|
||||
### .sub File Format
|
||||
|
||||
The binary file contains MPEG Program Stream packets:
|
||||
- Each subtitle is wrapped in a PS Pack header (14 bytes) + PES header (15 bytes)
|
||||
- Subtitles are aligned to 2048-byte boundaries
|
||||
- Contains raw SPU (SubPicture Unit) bitmap data
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Empty output files
|
||||
- Ensure the MKV file actually contains VOBSUB tracks (check with `mediainfo` or `ffprobe`)
|
||||
- CCExtractor will report "No VOBSUB subtitles to write" if the track is empty
|
||||
|
||||
### OCR quality issues
|
||||
- Try adjusting the `-t` threshold parameter
|
||||
- Ensure the correct language pack is installed
|
||||
- Use `--dump` to inspect the processed images
|
||||
|
||||
### Docker permission issues
|
||||
- The output files may be owned by root; use `sudo chown` to fix ownership
|
||||
- Or run Docker with `--user $(id -u):$(id -g)`
|
||||
|
||||
## See Also
|
||||
|
||||
- [OCR.md](OCR.md) - General OCR support in CCExtractor
|
||||
- [subtile-ocr GitHub](https://github.com/gwen-lg/subtile-ocr) - OCR tool documentation
|
||||
137
docs/build-wsl.md
Normal file
137
docs/build-wsl.md
Normal file
@@ -0,0 +1,137 @@
|
||||
# Building CCExtractor on Windows using WSL
|
||||
|
||||
This guide explains how to build CCExtractor on Windows using WSL (Ubuntu).
|
||||
It is based on a fresh setup and includes all required dependencies and
|
||||
common build issues encountered during compilation.
|
||||
|
||||
---
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Windows 10 or Windows 11
|
||||
- WSL enabled
|
||||
- Ubuntu installed via Microsoft Store
|
||||
|
||||
---
|
||||
|
||||
## Install WSL and Ubuntu
|
||||
|
||||
From PowerShell (run as Administrator):
|
||||
|
||||
```powershell
|
||||
wsl --install -d Ubuntu
|
||||
```
|
||||
|
||||
Restart the system if prompted, then launch Ubuntu from the Start menu.
|
||||
|
||||
---
|
||||
|
||||
## Update system packages
|
||||
|
||||
```bash
|
||||
sudo apt update
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Install basic build tools
|
||||
|
||||
```bash
|
||||
sudo apt install -y build-essential git pkg-config
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Install Rust (required)
|
||||
|
||||
CCExtractor includes Rust components, so Rust and Cargo are required.
|
||||
|
||||
```bash
|
||||
curl https://sh.rustup.rs -sSf | sh
|
||||
source ~/.cargo/env
|
||||
```
|
||||
|
||||
Verify installation:
|
||||
|
||||
```bash
|
||||
cargo --version
|
||||
rustc --version
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Install required libraries
|
||||
|
||||
```bash
|
||||
sudo apt install -y \
|
||||
libclang-dev clang \
|
||||
libtesseract-dev tesseract-ocr \
|
||||
libgpac-dev
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Clone the repository
|
||||
|
||||
```bash
|
||||
git clone https://github.com/CCExtractor/ccextractor.git
|
||||
cd ccextractor
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Build CCExtractor
|
||||
|
||||
```bash
|
||||
cd linux
|
||||
./build
|
||||
```
|
||||
|
||||
After a successful build, verify by running:
|
||||
|
||||
```bash
|
||||
./ccextractor
|
||||
```
|
||||
|
||||
You should see the help/usage output.
|
||||
|
||||
---
|
||||
|
||||
## Common build issues
|
||||
|
||||
### cargo: command not found
|
||||
|
||||
```bash
|
||||
source ~/.cargo/env
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Unable to find libclang
|
||||
|
||||
```bash
|
||||
sudo apt install libclang-dev clang
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### gpac/isomedia.h: No such file or directory
|
||||
|
||||
```bash
|
||||
sudo apt install libgpac-dev
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### please install tesseract development library
|
||||
|
||||
```bash
|
||||
sudo apt install libtesseract-dev tesseract-ocr
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Notes
|
||||
|
||||
- Compiler warnings during the build process are expected and do not indicate failure.
|
||||
- This guide was tested on Ubuntu (WSL) running on Windows 11.
|
||||
@@ -151,6 +151,8 @@ ccextractor_SOURCES = \
|
||||
../src/lib_ccx/list.h \
|
||||
../src/lib_ccx/matroska.c \
|
||||
../src/lib_ccx/matroska.h \
|
||||
../src/lib_ccx/vobsub_decoder.c \
|
||||
../src/lib_ccx/vobsub_decoder.h \
|
||||
../src/lib_ccx/mp4.c \
|
||||
../src/lib_ccx/myth.c \
|
||||
../src/lib_ccx/networking.c \
|
||||
|
||||
64
linux/build
64
linux/build
@@ -2,6 +2,7 @@
|
||||
|
||||
RUST_LIB="rust/release/libccx_rust.a"
|
||||
RUST_PROFILE="--release"
|
||||
USE_SYSTEM_LIBS=false
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-debug)
|
||||
@@ -23,6 +24,10 @@ while [[ $# -gt 0 ]]; do
|
||||
BLD_LINKER="$BLD_LINKER -lswscale -lavutil -pthread -lavformat -lavcodec -lavfilter -lxcb-shm -lxcb -lX11 -llzma -lswresample"
|
||||
shift
|
||||
;;
|
||||
-system-libs)
|
||||
USE_SYSTEM_LIBS=true
|
||||
shift
|
||||
;;
|
||||
-*)
|
||||
echo "Unknown option $1"
|
||||
exit 1
|
||||
@@ -30,7 +35,42 @@ while [[ $# -gt 0 ]]; do
|
||||
esac
|
||||
done
|
||||
|
||||
BLD_FLAGS="$BLD_FLAGS -std=gnu99 -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -DENABLE_OCR -DFT2_BUILD_LIBRARY -DGPAC_DISABLE_VTT -DGPAC_DISABLE_OD_DUMP -DGPAC_DISABLE_REMOTERY -DNO_GZIP"
|
||||
if [ "$USE_SYSTEM_LIBS" = true ]; then
|
||||
command -v pkg-config >/dev/null || {
|
||||
echo "Error: pkg-config is required for -system-libs mode"
|
||||
exit 1
|
||||
}
|
||||
|
||||
MISSING=""
|
||||
for lib in libpng zlib freetype2 libutf8proc; do
|
||||
if ! pkg-config --exists "$lib" 2>/dev/null; then
|
||||
MISSING="$MISSING $lib"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "$MISSING" ]; then
|
||||
echo "Error: Missing required system libraries:$MISSING"
|
||||
echo ""
|
||||
echo "On Debian/Ubuntu: sudo apt install libpng-dev zlib1g-dev libfreetype-dev libutf8proc-dev"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for hdr in leptonica/allheaders.h tesseract/capi.h; do
|
||||
if ! echo "#include <$hdr>" | gcc -E - >/dev/null 2>&1; then
|
||||
echo "Error: Missing headers for <$hdr>"
|
||||
echo "On Debian/Ubuntu: sudo apt install libleptonica-dev libtesseract-dev"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
PKG_CFLAGS="$(pkg-config --cflags libpng zlib freetype2 libutf8proc)"
|
||||
PKG_LIBS="$(pkg-config --libs libpng zlib freetype2 libutf8proc)"
|
||||
fi
|
||||
|
||||
BLD_FLAGS="$BLD_FLAGS -std=gnu99 -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -DENABLE_OCR -DGPAC_DISABLE_VTT -DGPAC_DISABLE_OD_DUMP -DGPAC_DISABLE_REMOTERY -DNO_GZIP"
|
||||
if [ "$USE_SYSTEM_LIBS" != true ]; then
|
||||
BLD_FLAGS="$BLD_FLAGS -DFT2_BUILD_LIBRARY"
|
||||
fi
|
||||
bit_os=$(getconf LONG_BIT)
|
||||
if [ "$bit_os" == "64" ]
|
||||
then
|
||||
@@ -87,6 +127,24 @@ SRC_FREETYPE="../src/thirdparty/freetype/autofit/autofit.c
|
||||
BLD_SOURCES="../src/ccextractor.c $SRC_CCX $SRC_GPAC $SRC_ZLIB $SRC_LIBPNG $SRC_HASH $SRC_UTF8PROC $SRC_FREETYPE"
|
||||
BLD_LINKER="$BLD_LINKER -lm -zmuldefs -l tesseract -l leptonica -lpthread -ldl -lgpac"
|
||||
|
||||
if [ "$USE_SYSTEM_LIBS" = true ]; then
|
||||
LEPTONICA_CFLAGS="$(pkg-config --cflags --silence-errors lept)"
|
||||
TESSERACT_CFLAGS="$(pkg-config --cflags --silence-errors tesseract)"
|
||||
GPAC_CFLAGS="$(pkg-config --cflags --silence-errors gpac)"
|
||||
|
||||
BLD_INCLUDE="-I../src -I../src/lib_ccx -I../src/lib_ccx/zvbi -I../src/thirdparty/lib_hash \
|
||||
$PKG_CFLAGS $LEPTONICA_CFLAGS $TESSERACT_CFLAGS $GPAC_CFLAGS"
|
||||
|
||||
BLD_SOURCES="../src/ccextractor.c $SRC_CCX $SRC_HASH"
|
||||
# Preserve FFmpeg libraries if -hardsubx was specified
|
||||
FFMPEG_LIBS=""
|
||||
if [ "$HARDSUBX" = true ]; then
|
||||
FFMPEG_LIBS="-lswscale -lavutil -pthread -lavformat -lavcodec -lavfilter -lxcb-shm -lxcb -lX11 -llzma -lswresample"
|
||||
fi
|
||||
BLD_LINKER="$PKG_LIBS -ltesseract -lleptonica -lgpac -lpthread -ldl -lm $FFMPEG_LIBS"
|
||||
fi
|
||||
|
||||
|
||||
echo "Running pre-build script..."
|
||||
./pre-build.sh
|
||||
echo "Trying to compile..."
|
||||
@@ -149,3 +207,7 @@ if [[ "$out" != "" ]] ; then
|
||||
else
|
||||
echo "Compilation successful, no compiler messages."
|
||||
fi
|
||||
|
||||
if [ -d ./utf8proc_compat ]; then
|
||||
rm -rf ./utf8proc_compat
|
||||
fi
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Process this file with autoconf to produce a configure script.
|
||||
|
||||
AC_PREREQ([2.71])
|
||||
AC_INIT([CCExtractor], [0.96], [carlos@ccextractor.org])
|
||||
AC_INIT([CCExtractor], [0.96.5], [carlos@ccextractor.org])
|
||||
AC_CONFIG_AUX_DIR([build-conf])
|
||||
AC_CONFIG_SRCDIR([../src/ccextractor.c])
|
||||
AM_INIT_AUTOMAKE([foreign subdir-objects])
|
||||
|
||||
@@ -123,6 +123,8 @@ ccextractor_SOURCES = \
|
||||
../src/lib_ccx/list.h \
|
||||
../src/lib_ccx/matroska.c \
|
||||
../src/lib_ccx/matroska.h \
|
||||
../src/lib_ccx/vobsub_decoder.c \
|
||||
../src/lib_ccx/vobsub_decoder.h \
|
||||
../src/lib_ccx/mp4.c \
|
||||
../src/lib_ccx/myth.c \
|
||||
../src/lib_ccx/networking.c \
|
||||
|
||||
@@ -42,7 +42,16 @@ while [[ $# -gt 0 ]]; do
|
||||
esac
|
||||
done
|
||||
|
||||
BLD_FLAGS="-std=gnu99 -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -Dfopen64=fopen -Dopen64=open -Dlseek64=lseek"
|
||||
# Determine architecture based on cargo (to ensure consistency with Rust part)
|
||||
CARGO_ARCH=$(file $(which cargo) | grep -o 'x86_64\|arm64')
|
||||
if [[ "$CARGO_ARCH" == "x86_64" ]]; then
|
||||
echo "Detected Intel (x86_64) Cargo. Forcing x86_64 build to match Rust and libraries..."
|
||||
BLD_ARCH="-arch x86_64"
|
||||
else
|
||||
BLD_ARCH="-arch arm64"
|
||||
fi
|
||||
|
||||
BLD_FLAGS="$BLD_ARCH -std=gnu99 -Wno-write-strings -Wno-pointer-sign -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT -Dfopen64=fopen -Dopen64=open -Dlseek64=lseek"
|
||||
|
||||
# Add flags for bundled libraries (not needed when using system libs)
|
||||
if [[ "$USE_SYSTEM_LIBS" != "true" ]]; then
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Process this file with autoconf to produce a configure script.
|
||||
|
||||
AC_PREREQ([2.71])
|
||||
AC_INIT([CCExtractor],[0.96],[carlos@ccextractor.org])
|
||||
AC_INIT([CCExtractor],[0.96.5],[carlos@ccextractor.org])
|
||||
AC_CONFIG_AUX_DIR([build-conf])
|
||||
AC_CONFIG_SRCDIR([../src/ccextractor.c])
|
||||
AM_INIT_AUTOMAKE([foreign subdir-objects])
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
pkgname=ccextractor
|
||||
pkgver=0.96
|
||||
pkgver=0.96.5
|
||||
pkgrel=1
|
||||
pkgdesc="A closed captions and teletext subtitles extractor for video streams."
|
||||
arch=('i686' 'x86_64')
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
Name: ccextractor
|
||||
Version: 0.96
|
||||
Version: 0.96.5
|
||||
Release: 1
|
||||
Summary: A closed captions and teletext subtitles extractor for video streams.
|
||||
Group: Applications/Internet
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/bin/bash
|
||||
TYPE="debian" # can be one of 'slackware', 'debian', 'rpm'
|
||||
PROGRAM_NAME="ccextractor"
|
||||
VERSION="0.96"
|
||||
VERSION="0.96.5"
|
||||
RELEASE="1"
|
||||
LICENSE="GPL-2.0"
|
||||
MAINTAINER="carlos@ccextractor.org"
|
||||
|
||||
96
packaging/README.md
Normal file
96
packaging/README.md
Normal file
@@ -0,0 +1,96 @@
|
||||
# CCExtractor Packaging
|
||||
|
||||
This directory contains packaging configurations for Windows package managers.
|
||||
|
||||
## Windows Package Manager (winget)
|
||||
|
||||
### Initial Setup (One-time)
|
||||
|
||||
1. **Calculate MSI hash** for the current release:
|
||||
```powershell
|
||||
certutil -hashfile CCExtractor.0.96.1.msi SHA256
|
||||
```
|
||||
|
||||
2. **Update the manifest files** in `winget/` with the SHA256 hash
|
||||
|
||||
3. **Fork microsoft/winget-pkgs** to the CCExtractor organization:
|
||||
- Go to https://github.com/microsoft/winget-pkgs
|
||||
- Fork to https://github.com/CCExtractor/winget-pkgs
|
||||
|
||||
4. **Submit initial manifest** via PR:
|
||||
- Clone your fork
|
||||
- Create directory: `manifests/c/CCExtractor/CCExtractor/0.96.1/`
|
||||
- Copy the three YAML files from `winget/`
|
||||
- Submit PR to microsoft/winget-pkgs
|
||||
|
||||
5. **Create GitHub token** for automation:
|
||||
- Go to GitHub Settings > Developer settings > Personal access tokens > Tokens (classic)
|
||||
- Create token with `public_repo` scope
|
||||
- Add as secret `WINGET_TOKEN` in CCExtractor/ccextractor repository
|
||||
|
||||
### Automated Updates
|
||||
|
||||
After the initial submission is merged, the `publish_winget.yml` workflow will automatically submit PRs for new releases.
|
||||
|
||||
## Chocolatey
|
||||
|
||||
### Initial Setup (One-time)
|
||||
|
||||
1. **Create Chocolatey account**:
|
||||
- Register at https://community.chocolatey.org/account/Register
|
||||
|
||||
2. **Get API key**:
|
||||
- Go to https://community.chocolatey.org/account
|
||||
- Copy your API key
|
||||
|
||||
3. **Add secret**:
|
||||
- Add `CHOCOLATEY_API_KEY` secret to CCExtractor/ccextractor repository
|
||||
|
||||
### Package Structure
|
||||
|
||||
```
|
||||
chocolatey/
|
||||
├── ccextractor.nuspec # Package metadata
|
||||
└── tools/
|
||||
├── chocolateyInstall.ps1 # Installation script
|
||||
└── chocolateyUninstall.ps1 # Uninstallation script
|
||||
```
|
||||
|
||||
### Manual Testing
|
||||
|
||||
```powershell
|
||||
cd packaging/chocolatey
|
||||
|
||||
# Update version and checksum in files first, then:
|
||||
choco pack ccextractor.nuspec
|
||||
|
||||
# Test locally
|
||||
choco install ccextractor --source="'.'" --yes --force
|
||||
|
||||
# Verify
|
||||
ccextractor --version
|
||||
```
|
||||
|
||||
### Automated Updates
|
||||
|
||||
The `publish_chocolatey.yml` workflow automatically:
|
||||
1. Downloads the MSI from the release
|
||||
2. Calculates the SHA256 checksum
|
||||
3. Updates the nuspec and install script
|
||||
4. Builds and tests the package
|
||||
5. Pushes to Chocolatey
|
||||
|
||||
Note: Chocolatey packages go through moderation before being publicly available.
|
||||
|
||||
## Workflow Triggers
|
||||
|
||||
Both workflows trigger on:
|
||||
- **Release published**: Automatic publishing when a new release is created
|
||||
- **Manual dispatch**: Can be triggered manually with a specific tag
|
||||
|
||||
## Secrets Required
|
||||
|
||||
| Secret | Purpose |
|
||||
|--------|---------|
|
||||
| `WINGET_TOKEN` | GitHub PAT with `public_repo` scope for winget PRs |
|
||||
| `CHOCOLATEY_API_KEY` | Chocolatey API key for package uploads |
|
||||
43
packaging/chocolatey/ccextractor.nuspec
Normal file
43
packaging/chocolatey/ccextractor.nuspec
Normal file
@@ -0,0 +1,43 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<package xmlns="http://schemas.microsoft.com/packaging/2015/06/nuspec.xsd">
|
||||
<metadata>
|
||||
<id>ccextractor</id>
|
||||
<version>0.96.5</version>
|
||||
<title>CCExtractor</title>
|
||||
<authors>CCExtractor Development Team</authors>
|
||||
<owners>CCExtractor</owners>
|
||||
<licenseUrl>https://github.com/CCExtractor/ccextractor/blob/master/LICENSE.txt</licenseUrl>
|
||||
<projectUrl>https://ccextractor.org</projectUrl>
|
||||
<iconUrl>https://raw.githubusercontent.com/CCExtractor/ccextractor/master/windows/CCX.ico</iconUrl>
|
||||
<requireLicenseAcceptance>false</requireLicenseAcceptance>
|
||||
<description>CCExtractor is a tool that analyzes video files and produces independent subtitle files from the closed captions data.
|
||||
|
||||
### Features
|
||||
- Extracts closed captions from various video formats (MPEG, H.264, MKV, MP4, etc.)
|
||||
- Supports multiple input sources including DVDs, DVRs, and live TV captures
|
||||
- Outputs to multiple formats (SRT, WebVTT, SAMI, transcript, etc.)
|
||||
- OCR support for bitmap-based subtitles (DVB, teletext)
|
||||
- Includes a graphical user interface
|
||||
|
||||
### Usage
|
||||
After installation, run `ccextractor` from the command line or use the GUI.
|
||||
|
||||
```
|
||||
ccextractor video.ts -o output.srt
|
||||
```
|
||||
|
||||
For more options: `ccextractor --help`
|
||||
</description>
|
||||
<summary>Extract closed captions and subtitles from video files</summary>
|
||||
<releaseNotes>https://github.com/CCExtractor/ccextractor/releases</releaseNotes>
|
||||
<copyright>Copyright (c) CCExtractor Development</copyright>
|
||||
<tags>subtitles closed-captions video extraction accessibility srt dvb teletext ocr media cli</tags>
|
||||
<projectSourceUrl>https://github.com/CCExtractor/ccextractor</projectSourceUrl>
|
||||
<packageSourceUrl>https://github.com/CCExtractor/ccextractor/tree/master/packaging/chocolatey</packageSourceUrl>
|
||||
<docsUrl>https://github.com/CCExtractor/ccextractor/wiki</docsUrl>
|
||||
<bugTrackerUrl>https://github.com/CCExtractor/ccextractor/issues</bugTrackerUrl>
|
||||
</metadata>
|
||||
<files>
|
||||
<file src="tools\**" target="tools" />
|
||||
</files>
|
||||
</package>
|
||||
24
packaging/chocolatey/tools/chocolateyInstall.ps1
Normal file
24
packaging/chocolatey/tools/chocolateyInstall.ps1
Normal file
@@ -0,0 +1,24 @@
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
||||
$packageName = 'ccextractor'
|
||||
$toolsDir = "$(Split-Path -parent $MyInvocation.MyCommand.Definition)"
|
||||
|
||||
# Package parameters
|
||||
$packageArgs = @{
|
||||
packageName = $packageName
|
||||
fileType = 'MSI'
|
||||
url64bit = 'https://github.com/CCExtractor/ccextractor/releases/download/v0.96.5/CCExtractor.0.96.5.msi'
|
||||
checksum64 = 'FFCAB0D766180AFC2832277397CDEC885D15270DECE33A9A51947B790F1F095B'
|
||||
checksumType64 = 'sha256'
|
||||
silentArgs = '/quiet /norestart'
|
||||
validExitCodes = @(0, 3010, 1641)
|
||||
}
|
||||
|
||||
Install-ChocolateyPackage @packageArgs
|
||||
|
||||
# Add to PATH if not already there
|
||||
$installPath = Join-Path $env:ProgramFiles 'CCExtractor'
|
||||
if (Test-Path $installPath) {
|
||||
Install-ChocolateyPath -PathToInstall $installPath -PathType 'Machine'
|
||||
Write-Host "CCExtractor installed to: $installPath"
|
||||
}
|
||||
23
packaging/chocolatey/tools/chocolateyUninstall.ps1
Normal file
23
packaging/chocolatey/tools/chocolateyUninstall.ps1
Normal file
@@ -0,0 +1,23 @@
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
||||
$packageName = 'ccextractor'
|
||||
|
||||
# Get the uninstall registry key
|
||||
$regKey = Get-UninstallRegistryKey -SoftwareName 'CCExtractor*'
|
||||
|
||||
if ($regKey) {
|
||||
$silentArgs = '/quiet /norestart'
|
||||
$file = $regKey.UninstallString -replace 'msiexec.exe','msiexec.exe ' -replace '/I','/X'
|
||||
|
||||
$packageArgs = @{
|
||||
packageName = $packageName
|
||||
fileType = 'MSI'
|
||||
silentArgs = "$($regKey.PSChildName) $silentArgs"
|
||||
file = ''
|
||||
validExitCodes = @(0, 3010, 1605, 1614, 1641)
|
||||
}
|
||||
|
||||
Uninstall-ChocolateyPackage @packageArgs
|
||||
} else {
|
||||
Write-Warning "CCExtractor was not found in the registry. It may have been uninstalled already."
|
||||
}
|
||||
21
packaging/winget/CCExtractor.CCExtractor.installer.yaml
Normal file
21
packaging/winget/CCExtractor.CCExtractor.installer.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
# yaml-language-server: $schema=https://aka.ms/winget-manifest.installer.1.9.0.schema.json
|
||||
PackageIdentifier: CCExtractor.CCExtractor
|
||||
PackageVersion: 0.96.5
|
||||
Platform:
|
||||
- Windows.Desktop
|
||||
MinimumOSVersion: 10.0.0.0
|
||||
InstallModes:
|
||||
- interactive
|
||||
- silent
|
||||
- silentWithProgress
|
||||
InstallerSwitches:
|
||||
Silent: /quiet
|
||||
SilentWithProgress: /passive
|
||||
UpgradeBehavior: install
|
||||
Installers:
|
||||
- Architecture: x64
|
||||
InstallerType: msi
|
||||
InstallerUrl: https://github.com/CCExtractor/ccextractor/releases/download/v0.96.5/CCExtractor.0.96.5.msi
|
||||
InstallerSha256: FFCAB0D766180AFC2832277397CDEC885D15270DECE33A9A51947B790F1F095B
|
||||
ManifestType: installer
|
||||
ManifestVersion: 1.9.0
|
||||
39
packaging/winget/CCExtractor.CCExtractor.locale.en-US.yaml
Normal file
39
packaging/winget/CCExtractor.CCExtractor.locale.en-US.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
# yaml-language-server: $schema=https://aka.ms/winget-manifest.defaultLocale.1.9.0.schema.json
|
||||
PackageIdentifier: CCExtractor.CCExtractor
|
||||
PackageVersion: 0.96.5
|
||||
PackageLocale: en-US
|
||||
Publisher: CCExtractor Development
|
||||
PublisherUrl: https://ccextractor.org
|
||||
PublisherSupportUrl: https://github.com/CCExtractor/ccextractor/issues
|
||||
Author: CCExtractor Development Team
|
||||
PackageName: CCExtractor
|
||||
PackageUrl: https://ccextractor.org
|
||||
License: GPL-2.0
|
||||
LicenseUrl: https://github.com/CCExtractor/ccextractor/blob/master/LICENSE.txt
|
||||
Copyright: Copyright (c) CCExtractor Development
|
||||
ShortDescription: A tool to extract subtitles from video files
|
||||
Description: |-
|
||||
CCExtractor is a tool that analyzes video files and produces independent subtitle files from the closed captions data.
|
||||
|
||||
Key features:
|
||||
- Extracts closed captions from various video formats (MPEG, H.264, MKV, MP4, etc.)
|
||||
- Supports multiple input sources including DVDs, DVRs, and live TV captures
|
||||
- Outputs to multiple formats (SRT, WebVTT, SAMI, transcript, etc.)
|
||||
- OCR support for bitmap-based subtitles (DVB, teletext)
|
||||
- Cross-platform (Windows, Linux, macOS)
|
||||
- Includes a GUI for easy operation
|
||||
Moniker: ccextractor
|
||||
Tags:
|
||||
- subtitles
|
||||
- closed-captions
|
||||
- video
|
||||
- extraction
|
||||
- accessibility
|
||||
- srt
|
||||
- dvb
|
||||
- teletext
|
||||
- ocr
|
||||
- media
|
||||
ReleaseNotesUrl: https://github.com/CCExtractor/ccextractor/releases
|
||||
ManifestType: defaultLocale
|
||||
ManifestVersion: 1.9.0
|
||||
6
packaging/winget/CCExtractor.CCExtractor.yaml
Normal file
6
packaging/winget/CCExtractor.CCExtractor.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
# yaml-language-server: $schema=https://aka.ms/winget-manifest.version.1.9.0.schema.json
|
||||
PackageIdentifier: CCExtractor.CCExtractor
|
||||
PackageVersion: 0.96.5
|
||||
DefaultLocale: en-US
|
||||
ManifestType: version
|
||||
ManifestVersion: 1.9.0
|
||||
19
snap/local/run-ccextractor.sh
Executable file
19
snap/local/run-ccextractor.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
# Default fallback
|
||||
LIB_TRIPLET="x86_64-linux-gnu"
|
||||
# Detect multiarch directory if present
|
||||
for d in "$SNAP/usr/lib/"*-linux-gnu; do
|
||||
if [ -d "$d" ]; then
|
||||
LIB_TRIPLET=$(basename "$d")
|
||||
break
|
||||
fi
|
||||
done
|
||||
export LD_LIBRARY_PATH="$SNAP/usr/lib:\
|
||||
$SNAP/usr/lib/$LIB_TRIPLET:\
|
||||
$SNAP/usr/lib/$LIB_TRIPLET/blas:\
|
||||
$SNAP/usr/lib/$LIB_TRIPLET/lapack:\
|
||||
$SNAP/usr/lib/$LIB_TRIPLET/pulseaudio:\
|
||||
${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
|
||||
shift
|
||||
exec "$SNAP/usr/local/bin/ccextractor" "$@"
|
||||
104
snap/snapcraft.yaml
Normal file
104
snap/snapcraft.yaml
Normal file
@@ -0,0 +1,104 @@
|
||||
name: ccextractor
|
||||
base: core22
|
||||
version: '0.96.5'
|
||||
summary: Closed Caption Extractor
|
||||
description: |
|
||||
CCExtractor is a tool for extracting closed captions from video files.
|
||||
website: https://www.ccextractor.org
|
||||
source-code: https://github.com/CCExtractor/ccextractor
|
||||
confinement: classic
|
||||
|
||||
apps:
|
||||
ccextractor:
|
||||
command: usr/local/bin/ccextractor
|
||||
command-chain:
|
||||
- local/run-ccextractor.sh
|
||||
plugs:
|
||||
- home
|
||||
|
||||
parts:
|
||||
gpac:
|
||||
plugin: make
|
||||
source: https://github.com/gpac/gpac.git
|
||||
source-tag: abi-16.4
|
||||
build-packages:
|
||||
- build-essential
|
||||
- pkg-config
|
||||
- zlib1g-dev
|
||||
- libssl-dev
|
||||
- libfreetype6-dev
|
||||
- libjpeg-dev
|
||||
- libpng-dev
|
||||
override-build: |
|
||||
set -eux
|
||||
./configure --prefix=/usr
|
||||
make -j$(nproc)
|
||||
make DESTDIR=$SNAPCRAFT_PART_INSTALL install-lib
|
||||
sed -i "s|^prefix=.*|prefix=$SNAPCRAFT_STAGE/usr|" $SNAPCRAFT_PART_INSTALL/usr/lib/pkgconfig/gpac.pc
|
||||
stage:
|
||||
- usr/lib/libgpac*
|
||||
- usr/lib/pkgconfig/gpac.pc
|
||||
- usr/include/gpac
|
||||
|
||||
ccextractor:
|
||||
after: [gpac]
|
||||
plugin: cmake
|
||||
source: .
|
||||
source-subdir: src
|
||||
build-environment:
|
||||
- PKG_CONFIG_PATH: "$SNAPCRAFT_STAGE/usr/lib/pkgconfig:$PKG_CONFIG_PATH"
|
||||
build-snaps:
|
||||
- cmake/latest/stable
|
||||
- rustup/latest/stable
|
||||
build-packages:
|
||||
- build-essential
|
||||
- pkg-config
|
||||
- clang
|
||||
- llvm-dev
|
||||
- libclang-dev
|
||||
- libzvbi-dev
|
||||
- libtesseract-dev
|
||||
- libavcodec-dev
|
||||
- libavformat-dev
|
||||
- libavdevice-dev
|
||||
- libavfilter-dev
|
||||
- libswscale-dev
|
||||
- libx11-dev
|
||||
- libxcb1-dev
|
||||
- libxcb-shm0-dev
|
||||
- libpng-dev
|
||||
- zlib1g-dev
|
||||
- libblas3
|
||||
- liblapack3
|
||||
stage-packages:
|
||||
- libzvbi0
|
||||
- libfreetype6
|
||||
- libpng16-16
|
||||
- libprotobuf-c1
|
||||
- libutf8proc2
|
||||
- libgl1
|
||||
- libglu1-mesa
|
||||
- libavcodec58
|
||||
- libavformat58
|
||||
- libavutil56
|
||||
- libavdevice58
|
||||
- libavfilter7
|
||||
- libswscale5
|
||||
- libjpeg-turbo8
|
||||
- libvorbis0a
|
||||
- libtheora0
|
||||
- libxvidcore4
|
||||
- libfaad2
|
||||
- libmad0
|
||||
- liba52-0.7.4
|
||||
- libpulse0
|
||||
- pulseaudio-utils
|
||||
override-build: |
|
||||
set -eux
|
||||
rustup toolchain install stable
|
||||
rustup default stable
|
||||
export PATH="$HOME/.cargo/bin:$PATH"
|
||||
snapcraftctl build
|
||||
install -D -m 0755 \
|
||||
$SNAPCRAFT_PROJECT_DIR/snap/local/run-ccextractor.sh \
|
||||
$SNAPCRAFT_PART_INSTALL/local/run-ccextractor.sh
|
||||
@@ -9,7 +9,7 @@ option (WITH_HARDSUBX "Build with support for burned-in subtitles" OFF)
|
||||
|
||||
# Version number
|
||||
set (CCEXTRACTOR_VERSION_MAJOR 0)
|
||||
set (CCEXTRACTOR_VERSION_MINOR 89)
|
||||
set (CCEXTRACTOR_VERSION_MINOR 96)
|
||||
|
||||
# Get project directory
|
||||
get_filename_component(BASE_PROJ_DIR ../ ABSOLUTE)
|
||||
@@ -255,4 +255,13 @@ endif (PKG_CONFIG_FOUND)
|
||||
target_link_libraries (ccextractor ${EXTRA_LIBS})
|
||||
target_include_directories (ccextractor PUBLIC ${EXTRA_INCLUDES})
|
||||
|
||||
# ccx_rust (Rust) calls C functions from ccx (like decode_vbi).
|
||||
# Force the linker to pull these symbols from ccx before processing ccx_rust.
|
||||
if (NOT WIN32 AND NOT APPLE)
|
||||
target_link_options (ccextractor PRIVATE
|
||||
-Wl,--undefined=decode_vbi
|
||||
-Wl,--undefined=do_cb
|
||||
-Wl,--undefined=store_hdcc)
|
||||
endif()
|
||||
|
||||
install (TARGETS ccextractor DESTINATION bin)
|
||||
|
||||
@@ -202,6 +202,12 @@ int start_ccx()
|
||||
if (!ret)
|
||||
ret = tmp;
|
||||
break;
|
||||
case CCX_SM_SCC:
|
||||
mprint("\rAnalyzing data in SCC (Scenarist Closed Caption) mode\n");
|
||||
tmp = raw_loop(ctx);
|
||||
if (!ret)
|
||||
ret = tmp;
|
||||
break;
|
||||
case CCX_SM_RCWT:
|
||||
mprint("\rAnalyzing data in CCExtractor's binary format\n");
|
||||
tmp = rcwt_loop(ctx);
|
||||
@@ -429,6 +435,9 @@ int main(int argc, char *argv[])
|
||||
|
||||
int compile_ret = ccxr_parse_parameters(argc, argv);
|
||||
|
||||
// Update the Rust logger target after parsing so --quiet is respected
|
||||
ccxr_update_logger_target();
|
||||
|
||||
if (compile_ret == EXIT_NO_INPUT_FILES)
|
||||
{
|
||||
print_usage();
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
cmake_policy (SET CMP0037 NEW)
|
||||
|
||||
if(MSVC)
|
||||
set (CMAKE_C_FLAGS "-W3 /wd4005 /wd4996")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -W3 /wd4005 /wd4996")
|
||||
else (MSVC)
|
||||
set (CMAKE_C_FLAGS "-Wall -Wno-pointer-sign -g -std=gnu99")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-pointer-sign -g -std=gnu99")
|
||||
endif(MSVC)
|
||||
|
||||
if(WIN32)
|
||||
|
||||
@@ -379,11 +379,10 @@ void sei_rbsp(struct avc_ctx *ctx, unsigned char *seibuf, unsigned char *seiend)
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: This really really looks bad
|
||||
mprint("WARNING: Unexpected SEI unit length...trying to continue.");
|
||||
temp_debug = 1;
|
||||
mprint("\n Failed block (at sei_rbsp) was:\n");
|
||||
dump(CCX_DMT_GENERIC_NOTICES, (unsigned char *)seibuf, seiend - seibuf, 0, 0);
|
||||
// Unexpected SEI length - common with malformed streams, don't spam output
|
||||
dbg_print(CCX_DMT_VERBOSE, "WARNING: Unexpected SEI unit length (parsed to %p, expected %p)...trying to continue.\n",
|
||||
(void *)tbuf, (void *)(seiend - 1));
|
||||
dump(CCX_DMT_VERBOSE, (unsigned char *)seibuf, seiend - seibuf, 0, 0);
|
||||
|
||||
ctx->num_unexpected_sei_length++;
|
||||
}
|
||||
@@ -393,20 +392,24 @@ void sei_rbsp(struct avc_ctx *ctx, unsigned char *seibuf, unsigned char *seiend)
|
||||
unsigned char *sei_message(struct avc_ctx *ctx, unsigned char *seibuf, unsigned char *seiend)
|
||||
{
|
||||
int payload_type = 0;
|
||||
while (*seibuf == 0xff)
|
||||
while (seibuf < seiend && *seibuf == 0xff)
|
||||
{
|
||||
payload_type += 255;
|
||||
seibuf++;
|
||||
}
|
||||
if (seibuf >= seiend)
|
||||
return NULL;
|
||||
payload_type += *seibuf;
|
||||
seibuf++;
|
||||
|
||||
int payload_size = 0;
|
||||
while (*seibuf == 0xff)
|
||||
while (seibuf < seiend && *seibuf == 0xff)
|
||||
{
|
||||
payload_size += 255;
|
||||
seibuf++;
|
||||
}
|
||||
if (seibuf >= seiend)
|
||||
return NULL;
|
||||
payload_size += *seibuf;
|
||||
seibuf++;
|
||||
|
||||
@@ -904,10 +907,10 @@ void seq_parameter_set_rbsp(struct avc_ctx *ctx, unsigned char *seqbuf, unsigned
|
||||
dvprint("vcl_hrd_parameters_present_flag= %llX\n", tmp1);
|
||||
if (tmp)
|
||||
{
|
||||
// TODO.
|
||||
mprint("vcl_hrd. Not implemented for now. Hopefully not needed. Skipping rest of NAL\n");
|
||||
// VCL HRD parameters are for video buffering compliance, not needed for caption extraction.
|
||||
// Just skip and continue - this doesn't affect our ability to extract captions.
|
||||
mprint("Skipping VCL HRD parameters (not needed for caption extraction)\n");
|
||||
ctx->num_vcl_hrd++;
|
||||
// exit(1);
|
||||
}
|
||||
if (tmp || tmp1)
|
||||
{
|
||||
@@ -993,9 +996,9 @@ void slice_header(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, un
|
||||
|
||||
if (nal_unit_type == 5)
|
||||
{
|
||||
// idr_pic_id: Read to advance bitstream position; value not needed for caption extraction
|
||||
tmp = read_exp_golomb_unsigned(&q1);
|
||||
dvprint("idr_pic_id= % 4lld (%#llX)\n", tmp, tmp);
|
||||
// TODO
|
||||
}
|
||||
if (dec_ctx->avc_ctx->pic_order_cnt_type == 0)
|
||||
{
|
||||
|
||||
@@ -212,6 +212,7 @@ enum ccx_stream_mode_enum
|
||||
CCX_SM_GXF = 11,
|
||||
CCX_SM_MKV = 12,
|
||||
CCX_SM_MXF = 13,
|
||||
CCX_SM_SCC = 14, // Scenarist Closed Caption input
|
||||
|
||||
CCX_SM_AUTODETECT = 16
|
||||
};
|
||||
|
||||
@@ -74,6 +74,8 @@ void init_options(struct ccx_s_options *options)
|
||||
options->ocr_oem = -1; // By default, OEM mode depends on the tesseract version
|
||||
options->psm = 3; // Default PSM mode (3 is the default tesseract as well)
|
||||
options->ocr_quantmode = 0; // No quantization (better OCR accuracy for DVB subtitles)
|
||||
options->ocr_line_split = 0; // By default, don't split images into lines (pending testing)
|
||||
options->ocr_blacklist = 1; // By default, use character blacklist to prevent common OCR errors (| vs I, etc.)
|
||||
options->mkvlang = NULL; // By default, all the languages are extracted
|
||||
options->ignore_pts_jumps = 1;
|
||||
options->analyze_video_stream = 0;
|
||||
@@ -139,7 +141,9 @@ void init_options(struct ccx_s_options *options)
|
||||
options->enc_cfg.services_charsets = NULL;
|
||||
options->enc_cfg.all_services_charset = NULL;
|
||||
options->enc_cfg.with_semaphore = 0;
|
||||
options->enc_cfg.force_dropframe = 0; // Assume No Drop Frame for MCC Encode.
|
||||
options->enc_cfg.force_dropframe = 0; // Assume No Drop Frame for MCC Encode.
|
||||
options->enc_cfg.scc_framerate = 0; // Default: 29.97fps for SCC output
|
||||
options->enc_cfg.scc_accurate_timing = 0; // Default: off for backwards compatibility (issue #1120)
|
||||
options->enc_cfg.extract_only_708 = 0;
|
||||
|
||||
options->settings_dtvcc.enabled = 0;
|
||||
@@ -152,6 +156,8 @@ void init_options(struct ccx_s_options *options)
|
||||
options->settings_dtvcc.services_enabled, 0,
|
||||
CCX_DTVCC_MAX_SERVICES * sizeof(options->settings_dtvcc.services_enabled[0]));
|
||||
|
||||
options->scc_framerate = 0; // Default: 29.97fps
|
||||
|
||||
#ifdef WITH_LIBCURL
|
||||
options->curlposturl = NULL;
|
||||
#endif
|
||||
|
||||
@@ -75,6 +75,10 @@ struct encoder_cfg
|
||||
// MCC File
|
||||
int force_dropframe; // 1 if dropframe frame count should be used. defaults to no drop frame.
|
||||
|
||||
// SCC output framerate
|
||||
int scc_framerate; // SCC output framerate: 0=29.97 (default), 1=24, 2=25, 3=30
|
||||
int scc_accurate_timing; // If 1, use bandwidth-aware timing for broadcast compliance (issue #1120)
|
||||
|
||||
// text -> png (text render)
|
||||
char *render_font; // The font used to render text if needed (e.g. teletext->spupng)
|
||||
char *render_font_italics;
|
||||
@@ -149,6 +153,8 @@ struct ccx_s_options // Options from user parameters
|
||||
int ocr_oem; // The Tesseract OEM mode, could be 0 (default), 1 or 2
|
||||
int psm; // The Tesseract PSM mode, could be between 0 and 13. 3 is tesseract default
|
||||
int ocr_quantmode; // How to quantize the bitmap before passing to to tesseract (0=no quantization at all, 1=CCExtractor's internal)
|
||||
int ocr_line_split; // If 1, split images into lines before OCR (uses PSM 7 for better accuracy)
|
||||
int ocr_blacklist; // If 1, use character blacklist to prevent common OCR errors (default: enabled)
|
||||
char *mkvlang; // The name of the language stream for MKV
|
||||
int analyze_video_stream; // If 1, the video stream will be processed even if we're using a different one for subtitles.
|
||||
|
||||
@@ -195,6 +201,7 @@ struct ccx_s_options // Options from user parameters
|
||||
int multiprogram;
|
||||
int out_interval;
|
||||
int segment_on_key_frames_only;
|
||||
int scc_framerate; // SCC input framerate: 0=29.97 (default), 1=24, 2=25, 3=30
|
||||
#ifdef WITH_LIBCURL
|
||||
char *curlposturl;
|
||||
#endif
|
||||
|
||||
@@ -201,6 +201,9 @@ void delete_to_end_of_row(ccx_decoder_608_context *context)
|
||||
{
|
||||
if (context->mode != MODE_TEXT)
|
||||
{
|
||||
if (context->cursor_row >= CCX_DECODER_608_SCREEN_ROWS)
|
||||
return;
|
||||
|
||||
struct eia608_screen *use_buffer = get_writing_buffer(context);
|
||||
for (int i = context->cursor_column; i <= CCX_DECODER_608_SCREEN_WIDTH - 1; i++)
|
||||
{
|
||||
@@ -221,6 +224,10 @@ void write_char(const unsigned char c, ccx_decoder_608_context *context)
|
||||
/* printf ("\rWriting char [%c] at %s:%d:%d\n",c,
|
||||
use_buffer == &wb->data608->buffer1?"B1":"B2",
|
||||
wb->data608->cursor_row,wb->data608->cursor_column); */
|
||||
|
||||
if (context->cursor_row >= CCX_DECODER_608_SCREEN_ROWS || context->cursor_column >= CCX_DECODER_608_SCREEN_WIDTH)
|
||||
return;
|
||||
|
||||
use_buffer->characters[context->cursor_row][context->cursor_column] = c;
|
||||
use_buffer->colors[context->cursor_row][context->cursor_column] = context->current_color;
|
||||
use_buffer->fonts[context->cursor_row][context->cursor_column] = context->font;
|
||||
@@ -316,10 +323,20 @@ int write_cc_buffer(ccx_decoder_608_context *context, struct cc_subtitle *sub)
|
||||
|
||||
if (!data->empty && context->output_format != CCX_OF_NULL)
|
||||
{
|
||||
struct eia608_screen *new_data = (struct eia608_screen *)realloc(sub->data, (sub->nb_data + 1) * sizeof(*data));
|
||||
size_t new_size;
|
||||
|
||||
if (sub->nb_data + 1 > SIZE_MAX / sizeof(struct eia608_screen))
|
||||
{
|
||||
ccx_common_logging.log_ftn("Too many screens, cannot allocate more memory.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
new_size = (sub->nb_data + 1) * sizeof(struct eia608_screen);
|
||||
|
||||
struct eia608_screen *new_data = (struct eia608_screen *)realloc(sub->data, new_size);
|
||||
if (!new_data)
|
||||
{
|
||||
ccx_common_logging.log_ftn("No Memory left");
|
||||
ccx_common_logging.log_ftn("Out of memory while reallocating screen buffer\n");
|
||||
return 0;
|
||||
}
|
||||
sub->data = new_data;
|
||||
@@ -386,10 +403,20 @@ int write_cc_line(ccx_decoder_608_context *context, struct cc_subtitle *sub)
|
||||
|
||||
if (!data->empty)
|
||||
{
|
||||
struct eia608_screen *new_data = (struct eia608_screen *)realloc(sub->data, (sub->nb_data + 1) * sizeof(*data));
|
||||
size_t new_size;
|
||||
|
||||
if (sub->nb_data + 1 > SIZE_MAX / sizeof(struct eia608_screen))
|
||||
{
|
||||
ccx_common_logging.log_ftn("Too many screens, cannot allocate more memory.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
new_size = (sub->nb_data + 1) * sizeof(struct eia608_screen);
|
||||
|
||||
struct eia608_screen *new_data = (struct eia608_screen *)realloc(sub->data, new_size);
|
||||
if (!new_data)
|
||||
{
|
||||
ccx_common_logging.log_ftn("No Memory left");
|
||||
ccx_common_logging.log_ftn("Out of memory while reallocating screen buffer\n");
|
||||
return 0;
|
||||
}
|
||||
sub->data = new_data;
|
||||
|
||||
@@ -998,6 +998,14 @@ void dtvcc_handle_DFx_DefineWindow(dtvcc_service_decoder *decoder, int window_id
|
||||
int row_count = (data[4] & 0xf) + 1; // according to CEA-708-D
|
||||
int anchor_point = data[4] >> 4;
|
||||
int col_count = (data[5] & 0x3f) + 1; // according to CEA-708-D
|
||||
|
||||
if (row_count > CCX_DTVCC_MAX_ROWS || col_count > CCX_DTVCC_MAX_COLUMNS)
|
||||
{
|
||||
ccx_common_logging.log_ftn("[CEA-708] Invalid window size %dx%d (max %dx%d), rejecting window definition\n",
|
||||
row_count, col_count, CCX_DTVCC_MAX_ROWS, CCX_DTVCC_MAX_COLUMNS);
|
||||
return;
|
||||
}
|
||||
|
||||
int pen_style = data[6] & 0x7;
|
||||
int win_style = (data[6] >> 3) & 0x7;
|
||||
|
||||
@@ -1341,6 +1349,14 @@ void dtvcc_handle_SPL_SetPenLocation(dtvcc_service_decoder *decoder, unsigned ch
|
||||
}
|
||||
|
||||
dtvcc_window *window = &decoder->windows[decoder->current_window];
|
||||
if (row >= window->row_count || col >= window->col_count)
|
||||
{
|
||||
ccx_common_logging.log_ftn("[CEA-708] dtvcc_handle_SPL_SetPenLocation: "
|
||||
"Invalid pen location %d:%d for window size %dx%d, rejecting command\n",
|
||||
row, col, window->row_count, window->col_count);
|
||||
return;
|
||||
}
|
||||
|
||||
window->pen_row = row;
|
||||
window->pen_column = col;
|
||||
}
|
||||
@@ -1479,7 +1495,12 @@ int dtvcc_handle_C0(dtvcc_ctx *dtvcc,
|
||||
else if (c0 >= 0x18 && c0 <= 0x1F)
|
||||
{
|
||||
if (c0 == DTVCC_C0_P16) // PE16
|
||||
dtvcc_handle_C0_P16(decoder, data + 1);
|
||||
{
|
||||
if (data_length >= 3)
|
||||
dtvcc_handle_C0_P16(decoder, data + 1);
|
||||
else
|
||||
ccx_common_logging.debug_ftn(CCX_DMT_708, "[CEA-708] dtvcc_handle_C0: Not enough data for P16\n");
|
||||
}
|
||||
len = 3;
|
||||
}
|
||||
if (len == -1)
|
||||
@@ -1633,6 +1654,9 @@ int dtvcc_handle_extended_char(dtvcc_service_decoder *decoder, unsigned char *da
|
||||
ccx_common_logging.debug_ftn(CCX_DMT_708, "[CEA-708] In dtvcc_handle_extended_char, "
|
||||
"first data code: [%c], length: [%u]\n",
|
||||
data[0], data_length);
|
||||
if (data_length < 1)
|
||||
return 0;
|
||||
|
||||
unsigned char c = 0x20; // Default to space
|
||||
unsigned char code = data[0];
|
||||
if (/* data[i]>=0x00 && */ code <= 0x1F) // Comment to silence warning
|
||||
@@ -1701,8 +1725,17 @@ void dtvcc_process_service_block(dtvcc_ctx *dtvcc,
|
||||
}
|
||||
else // Use extended set
|
||||
{
|
||||
used = dtvcc_handle_extended_char(decoder, data + i + 1, data_length - 1);
|
||||
used++; // Since we had DTVCC_C0_EXT1
|
||||
if (i + 1 >= data_length)
|
||||
{
|
||||
used = 1; // skip EXT1
|
||||
}
|
||||
else
|
||||
{
|
||||
used = dtvcc_handle_extended_char(decoder,
|
||||
data + i + 1,
|
||||
data_length - i - 1) +
|
||||
1;
|
||||
}
|
||||
}
|
||||
i += used;
|
||||
}
|
||||
@@ -1754,6 +1787,12 @@ void dtvcc_process_current_packet(dtvcc_ctx *dtvcc, int len)
|
||||
|
||||
if (service_number == 7) // There is an extended header
|
||||
{
|
||||
if (pos + 1 >= dtvcc->current_packet + len)
|
||||
{
|
||||
ccx_common_logging.debug_ftn(CCX_DMT_708, "[CEA-708] dtvcc_process_current_packet: "
|
||||
"Truncated extended header, stopping.\n");
|
||||
break;
|
||||
}
|
||||
pos++;
|
||||
service_number = (pos[0] & 0x3F); // 6 more significant bits
|
||||
// printf ("Extended header: Service number: [%d]\n",service_number);
|
||||
|
||||
@@ -224,7 +224,12 @@ int do_cb(struct lib_cc_decode *ctx, unsigned char *cc_block, struct cc_subtitle
|
||||
void dinit_cc_decode(struct lib_cc_decode **ctx)
|
||||
{
|
||||
struct lib_cc_decode *lctx = *ctx;
|
||||
#ifndef DISABLE_RUST
|
||||
ccxr_dtvcc_free(lctx->dtvcc_rust);
|
||||
lctx->dtvcc_rust = NULL;
|
||||
#else
|
||||
dtvcc_free(&lctx->dtvcc);
|
||||
#endif
|
||||
dinit_avc(&lctx->avc_ctx);
|
||||
ccx_decoder_608_dinit_library(&lctx->context_cc608_field_1);
|
||||
ccx_decoder_608_dinit_library(&lctx->context_cc608_field_2);
|
||||
@@ -294,10 +299,16 @@ struct lib_cc_decode *init_cc_decode(struct ccx_decoders_common_settings_t *sett
|
||||
ctx->no_rollup = setting->no_rollup;
|
||||
ctx->noscte20 = setting->noscte20;
|
||||
|
||||
#ifndef DISABLE_RUST
|
||||
ctx->dtvcc_rust = ccxr_dtvcc_init(setting->settings_dtvcc);
|
||||
ctx->dtvcc = NULL; // Not used when Rust is enabled
|
||||
#else
|
||||
ctx->dtvcc = dtvcc_init(setting->settings_dtvcc);
|
||||
if (!ctx->dtvcc)
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "In init_cc_decode: Out of memory initializing dtvcc.");
|
||||
ctx->dtvcc->is_active = setting->settings_dtvcc->enabled;
|
||||
ctx->dtvcc_rust = NULL;
|
||||
#endif
|
||||
|
||||
if (setting->codec == CCX_CODEC_ATSC_CC)
|
||||
{
|
||||
@@ -477,6 +488,13 @@ void flush_cc_decode(struct lib_cc_decode *ctx, struct cc_subtitle *sub)
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifndef DISABLE_RUST
|
||||
if (ccxr_dtvcc_is_active(ctx->dtvcc_rust))
|
||||
{
|
||||
ctx->current_field = 3;
|
||||
ccxr_flush_active_decoders(ctx->dtvcc_rust);
|
||||
}
|
||||
#else
|
||||
if (ctx->dtvcc->is_active)
|
||||
{
|
||||
for (int i = 0; i < CCX_DTVCC_MAX_SERVICES; i++)
|
||||
@@ -491,6 +509,7 @@ void flush_cc_decode(struct lib_cc_decode *ctx, struct cc_subtitle *sub)
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
struct encoder_ctx *copy_encoder_context(struct encoder_ctx *ctx)
|
||||
{
|
||||
|
||||
@@ -32,4 +32,10 @@ struct cc_subtitle *copy_subtitle(struct cc_subtitle *sub);
|
||||
void free_encoder_context(struct encoder_ctx *ctx);
|
||||
void free_decoder_context(struct lib_cc_decode *ctx);
|
||||
void free_subtitle(struct cc_subtitle *sub);
|
||||
|
||||
#ifndef DISABLE_RUST
|
||||
// Rust FFI function to flush active CEA-708 service decoders
|
||||
extern void ccxr_flush_active_decoders(void *dtvcc_rust);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -724,16 +724,17 @@ static int parse_csi(ISDBSubContext *ctx, const uint8_t *buf, int len)
|
||||
// Copy buf in arg
|
||||
for (i = 0; *buf != 0x20; i++)
|
||||
{
|
||||
if (i >= (sizeof(arg)) + 1)
|
||||
if (i >= sizeof(arg) - 1)
|
||||
{
|
||||
isdb_log("UnExpected CSI %d >= %d", sizeof(arg) + 1, i);
|
||||
isdb_log("UnExpected CSI: too long");
|
||||
break;
|
||||
}
|
||||
arg[i] = *buf;
|
||||
buf++;
|
||||
}
|
||||
/* ignore terminating 0x20 character */
|
||||
arg[i] = *buf++;
|
||||
if (i < sizeof(arg))
|
||||
arg[i] = *buf++;
|
||||
|
||||
switch (*buf)
|
||||
{
|
||||
|
||||
@@ -208,6 +208,7 @@ struct lib_cc_decode
|
||||
int false_pict_header;
|
||||
|
||||
dtvcc_ctx *dtvcc;
|
||||
void *dtvcc_rust; // Persistent Rust CEA-708 decoder context
|
||||
int current_field;
|
||||
// Analyse/use the picture information
|
||||
int maxtref; // Use to remember the temporal reference number
|
||||
|
||||
@@ -285,6 +285,9 @@ static void ccx_demuxer_print_cfg(struct ccx_demuxer *ctx)
|
||||
case CCX_SM_MXF:
|
||||
mprint("MXF");
|
||||
break;
|
||||
case CCX_SM_SCC:
|
||||
mprint("SCC");
|
||||
break;
|
||||
#ifdef WTV_DEBUG
|
||||
case CCX_SM_HEX_DUMP:
|
||||
mprint("Hex");
|
||||
@@ -348,7 +351,6 @@ struct ccx_demuxer *init_demuxer(void *parent, struct demuxer_cfg *cfg)
|
||||
{
|
||||
ctx->pinfo[i].got_important_streams_min_pts[j] = UINT64_MAX;
|
||||
}
|
||||
ctx->pinfo[i].initialized_ocr = 0;
|
||||
ctx->pinfo[i].version = 0xFF; // Not real in a real stream since it's 5 bits. FF => Not initialized
|
||||
}
|
||||
|
||||
|
||||
@@ -35,7 +35,6 @@ struct program_info
|
||||
{
|
||||
int pid;
|
||||
int program_number;
|
||||
int initialized_ocr; // Avoid initializing the OCR more than once
|
||||
uint8_t analysed_PMT_once : 1;
|
||||
uint8_t version;
|
||||
uint8_t saved_section[1021];
|
||||
|
||||
@@ -75,12 +75,15 @@ enum MXFLocalTag
|
||||
void update_tid_lut(struct MXFContext *ctx, uint32_t track_id, uint8_t *track_number, struct ccx_rational edit_rate)
|
||||
{
|
||||
int i;
|
||||
debug("update_tid_lut: track_id=%u (0x%x), track_number=%02X%02X%02X%02X, cap_track_id=%u\n",
|
||||
track_id, track_id, track_number[0], track_number[1], track_number[2], track_number[3], ctx->cap_track_id);
|
||||
// Update essence element key if we have track Id of caption
|
||||
if (ctx->cap_track_id == track_id)
|
||||
{
|
||||
memcpy(ctx->cap_essence_key, mxf_essence_element_key, 12);
|
||||
memcpy(ctx->cap_essence_key + 12, track_number, 4);
|
||||
ctx->edit_rate = edit_rate;
|
||||
debug("MXF: Found caption track, track_id=%u\n", track_id);
|
||||
}
|
||||
|
||||
for (i = 0; i < ctx->nb_tracks; i++)
|
||||
@@ -248,6 +251,7 @@ static int mxf_read_vanc_vbi_desc(struct ccx_demuxer *demux, uint64_t size)
|
||||
{
|
||||
case MXF_TAG_LTRACK_ID:
|
||||
ctx->cap_track_id = buffered_get_be32(demux);
|
||||
debug("MXF: VANC/VBI descriptor found, Linked Track ID = %u\n", ctx->cap_track_id);
|
||||
update_cap_essence_key(ctx, ctx->cap_track_id);
|
||||
break;
|
||||
default:
|
||||
@@ -304,6 +308,17 @@ static int mxf_read_cdp_data(struct ccx_demuxer *demux, int size, struct demuxer
|
||||
log("Incomplete CDP packet\n");
|
||||
|
||||
ret = buffered_read(demux, data->buffer + data->len, cc_count * 3);
|
||||
// Log first few bytes of cc_data for debugging
|
||||
if (cc_count > 0)
|
||||
{
|
||||
unsigned char *cc_ptr = data->buffer + data->len;
|
||||
debug("cc_data (first 6 triplets): ");
|
||||
for (int j = 0; j < (cc_count < 6 ? cc_count : 6); j++)
|
||||
{
|
||||
debug("%02X%02X%02X ", cc_ptr[j * 3], cc_ptr[j * 3 + 1], cc_ptr[j * 3 + 2]);
|
||||
}
|
||||
debug("\n");
|
||||
}
|
||||
data->len += cc_count * 3;
|
||||
demux->past += cc_count * 3;
|
||||
len += ret;
|
||||
@@ -361,7 +376,10 @@ static int mxf_read_vanc_data(struct ccx_demuxer *demux, uint64_t size, struct d
|
||||
// uint8_t count; /* Currently unused */
|
||||
|
||||
if (size < 19)
|
||||
{
|
||||
debug("VANC data too small: %" PRIu64 " < 19\n", size);
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = buffered_read(demux, vanc_header, 16);
|
||||
|
||||
@@ -370,31 +388,39 @@ static int mxf_read_vanc_data(struct ccx_demuxer *demux, uint64_t size, struct d
|
||||
return CCX_EOF;
|
||||
len += ret;
|
||||
|
||||
debug("VANC header: num_packets=%d, line=0x%02x%02x, wrap_type=0x%02x, sample_config=0x%02x\n",
|
||||
vanc_header[1], vanc_header[2], vanc_header[3], vanc_header[4], vanc_header[5]);
|
||||
|
||||
for (int i = 0; i < vanc_header[1]; i++)
|
||||
{
|
||||
DID = buffered_get_byte(demux);
|
||||
len++;
|
||||
debug("VANC packet %d: DID=0x%02x\n", i, DID);
|
||||
if (!(DID == 0x61 || DID == 0x80))
|
||||
{
|
||||
debug("DID 0x%02x not recognized as caption DID\n", DID);
|
||||
goto error;
|
||||
}
|
||||
|
||||
SDID = buffered_get_byte(demux);
|
||||
len++;
|
||||
debug("VANC packet %d: SDID=0x%02x\n", i, SDID);
|
||||
if (SDID == 0x01)
|
||||
debug("Caption Type 708\n");
|
||||
else if (SDID == 0x02)
|
||||
debug("Caption Type 608\n");
|
||||
|
||||
cdp_size = buffered_get_byte(demux);
|
||||
debug("VANC packet %d: cdp_size=%d\n", i, cdp_size);
|
||||
if (cdp_size + 19 > size)
|
||||
{
|
||||
debug("Incomplete cdp(%d) in anc data(%d)\n", cdp_size, size);
|
||||
log("Incomplete cdp(%d) in anc data(%" PRIu64 ")\n", cdp_size, size);
|
||||
goto error;
|
||||
}
|
||||
len++;
|
||||
|
||||
ret = mxf_read_cdp_data(demux, cdp_size, data);
|
||||
debug("mxf_read_cdp_data returned %d, data->len=%d\n", ret, data->len);
|
||||
len += ret;
|
||||
// len += (3 + count + 4);
|
||||
}
|
||||
@@ -411,15 +437,33 @@ static int mxf_read_essence_element(struct ccx_demuxer *demux, uint64_t size, st
|
||||
int ret;
|
||||
struct MXFContext *ctx = demux->private_data;
|
||||
|
||||
debug("mxf_read_essence_element: ctx->type=%d (ANC=%d, VBI=%d), size=%" PRIu64 "\n",
|
||||
ctx->type, MXF_CT_ANC, MXF_CT_VBI, size);
|
||||
|
||||
if (ctx->type == MXF_CT_ANC)
|
||||
{
|
||||
data->bufferdatatype = CCX_RAW_TYPE;
|
||||
ret = mxf_read_vanc_data(demux, size, data);
|
||||
data->pts = ctx->cap_count;
|
||||
debug("mxf_read_vanc_data returned %d, data->len=%d\n", ret, data->len);
|
||||
// Calculate PTS in 90kHz units from frame count and edit rate
|
||||
// edit_rate is frames per second (e.g., 25/1 for 25fps)
|
||||
// PTS = frame_count * 90000 / fps = frame_count * 90000 * edit_rate.den / edit_rate.num
|
||||
if (ctx->edit_rate.num > 0 && ctx->edit_rate.den > 0)
|
||||
{
|
||||
data->pts = (int64_t)ctx->cap_count * 90000 * ctx->edit_rate.den / ctx->edit_rate.num;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fallback to 25fps if edit_rate not set
|
||||
data->pts = (int64_t)ctx->cap_count * 90000 / 25;
|
||||
}
|
||||
debug("Frame %d, PTS=%" PRId64 " (edit_rate=%d/%d)\n",
|
||||
ctx->cap_count, data->pts, ctx->edit_rate.num, ctx->edit_rate.den);
|
||||
ctx->cap_count++;
|
||||
}
|
||||
else
|
||||
{
|
||||
debug("Skipping essence element (not ANC type)\n");
|
||||
ret = buffered_skip(demux, size);
|
||||
demux->past += ret;
|
||||
}
|
||||
@@ -514,6 +558,7 @@ static int read_packet(struct ccx_demuxer *demux, struct demuxer_data *data)
|
||||
KLVPacket klv;
|
||||
const MXFReadTableEntry *reader;
|
||||
struct MXFContext *ctx = demux->private_data;
|
||||
static int first_essence_logged = 0;
|
||||
while ((ret = klv_read_packet(&klv, demux)) == 0)
|
||||
{
|
||||
debug("Key %02X%02X%02X%02X%02X%02X%02X%02X.%02X%02X%02X%02X%02X%02X%02X%02X size %" PRIu64 "\n",
|
||||
@@ -523,8 +568,25 @@ static int read_packet(struct ccx_demuxer *demux, struct demuxer_data *data)
|
||||
klv.key[12], klv.key[13], klv.key[14], klv.key[15],
|
||||
klv.length);
|
||||
|
||||
// Check if this is an essence element key (first 12 bytes match)
|
||||
if (IS_KLV_KEY(klv.key, mxf_essence_element_key) && !first_essence_logged)
|
||||
{
|
||||
debug("MXF: First essence element key: %02X%02X%02X%02X%02X%02X%02X%02X.%02X%02X%02X%02X%02X%02X%02X%02X\n",
|
||||
klv.key[0], klv.key[1], klv.key[2], klv.key[3],
|
||||
klv.key[4], klv.key[5], klv.key[6], klv.key[7],
|
||||
klv.key[8], klv.key[9], klv.key[10], klv.key[11],
|
||||
klv.key[12], klv.key[13], klv.key[14], klv.key[15]);
|
||||
debug("MXF: cap_essence_key: %02X%02X%02X%02X%02X%02X%02X%02X.%02X%02X%02X%02X%02X%02X%02X%02X\n",
|
||||
ctx->cap_essence_key[0], ctx->cap_essence_key[1], ctx->cap_essence_key[2], ctx->cap_essence_key[3],
|
||||
ctx->cap_essence_key[4], ctx->cap_essence_key[5], ctx->cap_essence_key[6], ctx->cap_essence_key[7],
|
||||
ctx->cap_essence_key[8], ctx->cap_essence_key[9], ctx->cap_essence_key[10], ctx->cap_essence_key[11],
|
||||
ctx->cap_essence_key[12], ctx->cap_essence_key[13], ctx->cap_essence_key[14], ctx->cap_essence_key[15]);
|
||||
first_essence_logged = 1;
|
||||
}
|
||||
|
||||
if (IS_KLV_KEY(klv.key, ctx->cap_essence_key))
|
||||
{
|
||||
debug("MXF: Found ANC essence element, size=%" PRIu64 "\n", klv.length);
|
||||
mxf_read_essence_element(demux, klv.length, data);
|
||||
if (data->len > 0)
|
||||
break;
|
||||
@@ -566,8 +628,15 @@ int ccx_mxf_getmoredata(struct lib_ccx_ctx *ctx, struct demuxer_data **ppdata)
|
||||
data->program_number = 1;
|
||||
data->stream_pid = 1;
|
||||
data->codec = CCX_CODEC_ATSC_CC;
|
||||
data->tb.num = 1001;
|
||||
data->tb.den = 30000;
|
||||
// PTS is already calculated in 90kHz units by mxf_read_essence_element
|
||||
data->tb.num = 1;
|
||||
data->tb.den = 90000;
|
||||
|
||||
// Enable CEA-708 (DTVCC) decoder for MXF files with VANC captions
|
||||
if (ctx->dec_global_setting && ctx->dec_global_setting->settings_dtvcc)
|
||||
{
|
||||
ctx->dec_global_setting->settings_dtvcc->enabled = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -576,6 +645,11 @@ int ccx_mxf_getmoredata(struct lib_ccx_ctx *ctx, struct demuxer_data **ppdata)
|
||||
|
||||
ret = read_packet(ctx->demux_ctx, data);
|
||||
|
||||
// Ensure timebase is 90kHz since PTS is calculated in 90kHz units
|
||||
// CDP parsing may have set a frame-based timebase which would cause incorrect conversion
|
||||
data->tb.num = 1;
|
||||
data->tb.den = 90000;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ void dtvcc_process_data(struct dtvcc_ctx *dtvcc,
|
||||
ccx_common_logging.debug_ftn(CCX_DMT_708, "[CEA-708] dtvcc_process_data: DTVCC Channel Packet Data\n");
|
||||
if (cc_valid && dtvcc->is_current_packet_header_parsed)
|
||||
{
|
||||
if (dtvcc->current_packet_length > 253)
|
||||
if (dtvcc->current_packet_length + 2 > CCX_DTVCC_MAX_PACKET_LENGTH)
|
||||
{
|
||||
ccx_common_logging.debug_ftn(CCX_DMT_708, "[CEA-708] dtvcc_process_data: "
|
||||
"Warning: Legal packet size exceeded (1), data not added.\n");
|
||||
@@ -51,7 +51,7 @@ void dtvcc_process_data(struct dtvcc_ctx *dtvcc,
|
||||
ccx_common_logging.debug_ftn(CCX_DMT_708, "[CEA-708] dtvcc_process_data: DTVCC Channel Packet Start\n");
|
||||
if (cc_valid)
|
||||
{
|
||||
if (dtvcc->current_packet_length > CCX_DTVCC_MAX_PACKET_LENGTH - 1)
|
||||
if (dtvcc->current_packet_length + 2 > CCX_DTVCC_MAX_PACKET_LENGTH)
|
||||
{
|
||||
ccx_common_logging.debug_ftn(CCX_DMT_708, "[CEA-708] dtvcc_process_data: "
|
||||
"Warning: Legal packet size exceeded (2), data not added.\n");
|
||||
|
||||
@@ -10,4 +10,14 @@ void dtvcc_process_data(struct dtvcc_ctx *dtvcc,
|
||||
dtvcc_ctx *dtvcc_init(ccx_decoder_dtvcc_settings *opts);
|
||||
void dtvcc_free(dtvcc_ctx **);
|
||||
|
||||
#ifndef DISABLE_RUST
|
||||
// Rust FFI functions for persistent CEA-708 decoder
|
||||
extern void *ccxr_dtvcc_init(struct ccx_decoder_dtvcc_settings *settings_dtvcc);
|
||||
extern void ccxr_dtvcc_free(void *dtvcc_rust);
|
||||
extern void ccxr_dtvcc_process_data(void *dtvcc_rust, const unsigned char cc_valid,
|
||||
const unsigned char cc_type, const unsigned char data1, const unsigned char data2);
|
||||
extern int ccxr_dtvcc_is_active(void *dtvcc_rust);
|
||||
extern void ccxr_dtvcc_set_active(void *dtvcc_rust, int active);
|
||||
#endif
|
||||
|
||||
#endif // CCEXTRACTOR_CCX_DTVCC_H
|
||||
|
||||
@@ -775,6 +775,7 @@ struct encoder_ctx *init_encoder(struct encoder_cfg *opt)
|
||||
return NULL;
|
||||
}
|
||||
ctx->in_fileformat = opt->in_format;
|
||||
ctx->is_pal = (opt->in_format == 2);
|
||||
|
||||
/** used in case of SUB_EOD_MARKER */
|
||||
ctx->prev_start = -1;
|
||||
@@ -840,6 +841,10 @@ struct encoder_ctx *init_encoder(struct encoder_cfg *opt)
|
||||
ctx->segment_pending = 0;
|
||||
ctx->segment_last_key_frame = 0;
|
||||
ctx->nospupngocr = opt->nospupngocr;
|
||||
ctx->scc_framerate = opt->scc_framerate;
|
||||
ctx->scc_accurate_timing = opt->scc_accurate_timing;
|
||||
ctx->scc_last_transmission_end = 0;
|
||||
ctx->scc_last_display_end = 0;
|
||||
|
||||
// Initialize teletext multi-page output arrays (issue #665)
|
||||
ctx->tlt_out_count = 0;
|
||||
@@ -1045,6 +1050,28 @@ int encode_sub(struct encoder_ctx *context, struct cc_subtitle *sub)
|
||||
freep(&sub->data);
|
||||
break;
|
||||
case CC_BITMAP:;
|
||||
// Apply subs_delay to bitmap subtitles (DVB, DVD, etc.)
|
||||
// This is the same as what's done for CC_608 above
|
||||
sub->start_time += context->subs_delay;
|
||||
sub->end_time += context->subs_delay;
|
||||
|
||||
// After adding delay, if start/end time is lower than 0, skip this subtitle
|
||||
if (sub->start_time < 0 || sub->end_time <= 0)
|
||||
{
|
||||
// Free bitmap data to avoid memory leak
|
||||
if (sub->datatype == CC_DATATYPE_DVB)
|
||||
{
|
||||
struct cc_bitmap *bitmap_tmp = (struct cc_bitmap *)sub->data;
|
||||
if (bitmap_tmp)
|
||||
{
|
||||
freep(&bitmap_tmp->data0);
|
||||
freep(&bitmap_tmp->data1);
|
||||
}
|
||||
}
|
||||
freep(&sub->data);
|
||||
sub->nb_data = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
struct cc_bitmap *rect;
|
||||
|
||||
@@ -153,6 +153,14 @@ struct encoder_ctx
|
||||
unsigned int cdp_hdr_seq;
|
||||
int force_dropframe;
|
||||
|
||||
// SCC output framerate
|
||||
int scc_framerate; // SCC output framerate: 0=29.97 (default), 1=24, 2=25, 3=30
|
||||
|
||||
// SCC accurate timing (issue #1120)
|
||||
int scc_accurate_timing; // If 1, use bandwidth-aware timing for broadcast compliance
|
||||
LLONG scc_last_transmission_end; // When last caption transmission ends (ms)
|
||||
LLONG scc_last_display_end; // When last caption display ends (ms)
|
||||
|
||||
int new_sentence; // Capitalize next letter?
|
||||
|
||||
int program_number;
|
||||
@@ -174,12 +182,12 @@ struct encoder_ctx
|
||||
|
||||
// OCR in SPUPNG
|
||||
int nospupngocr;
|
||||
int is_pal;
|
||||
|
||||
// Teletext multi-page output (issue #665)
|
||||
struct ccx_s_write *tlt_out[MAX_TLT_PAGES_EXTRACT]; // Output files per teletext page
|
||||
uint16_t tlt_out_pages[MAX_TLT_PAGES_EXTRACT]; // Page numbers for each output slot
|
||||
unsigned int tlt_srt_counter[MAX_TLT_PAGES_EXTRACT]; // SRT counter per page
|
||||
int tlt_out_count; // Number of teletext output files
|
||||
int tlt_out_count; // Number of teletext output files
|
||||
};
|
||||
|
||||
#define INITIAL_ENC_BUFFER_CAPACITY 2048
|
||||
|
||||
@@ -10,6 +10,171 @@ unsigned char odd_parity(const unsigned char byte)
|
||||
return byte | !(cc608_parity(byte) % 2) << 7;
|
||||
}
|
||||
|
||||
/**
|
||||
* SCC Accurate Timing Implementation (Issue #1120)
|
||||
*
|
||||
* EIA-608 bandwidth constraints:
|
||||
* - 2 bytes per frame at 29.97 FPS (or configured frame rate)
|
||||
* - Captions must be pre-loaded before display time
|
||||
* - Each control code takes 2 bytes (sent twice for reliability = 4 bytes total)
|
||||
* - Text characters take 1 byte each
|
||||
*/
|
||||
|
||||
// Get frame rate value from scc_framerate setting
|
||||
// 0=29.97 (default), 1=24, 2=25, 3=30
|
||||
static float get_scc_fps_internal(int scc_framerate)
|
||||
{
|
||||
switch (scc_framerate)
|
||||
{
|
||||
case 1:
|
||||
return 24.0f;
|
||||
case 2:
|
||||
return 25.0f;
|
||||
case 3:
|
||||
return 30.0f;
|
||||
default:
|
||||
return 29.97f;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate total bytes needed to transmit a caption
|
||||
*
|
||||
* Byte costs:
|
||||
* - Control code (RCL, EOC, ENM, EDM): 2 bytes x 2 (sent twice) = 4 bytes
|
||||
* - Preamble code: 2 bytes x 2 = 4 bytes
|
||||
* - Tab offset: 2 bytes x 2 = 4 bytes
|
||||
* - Mid-row code (color/style): 2 bytes x 2 = 4 bytes
|
||||
* - Text character: 1 byte each
|
||||
* - Padding: 1 byte if odd number of text bytes
|
||||
*/
|
||||
static unsigned int calculate_caption_bytes(const struct eia608_screen *data)
|
||||
{
|
||||
unsigned int total_bytes = 0;
|
||||
|
||||
// RCL (Resume Caption Loading): 4 bytes
|
||||
total_bytes += 4;
|
||||
|
||||
for (unsigned char row = 0; row < 15; ++row)
|
||||
{
|
||||
if (!data->row_used[row])
|
||||
continue;
|
||||
|
||||
int first, last;
|
||||
find_limit_characters(data->characters[row], &first, &last, CCX_DECODER_608_SCREEN_WIDTH);
|
||||
|
||||
if (first > last)
|
||||
continue;
|
||||
|
||||
// Assume we need at least one preamble per row: 4 bytes
|
||||
total_bytes += 4;
|
||||
|
||||
// Count characters on this row
|
||||
unsigned int char_count = 0;
|
||||
enum font_bits prev_font = FONT_REGULAR;
|
||||
enum ccx_decoder_608_color_code prev_color = COL_WHITE;
|
||||
int prev_col = -1;
|
||||
|
||||
for (int col = first; col <= last; ++col)
|
||||
{
|
||||
// Check if we need position codes
|
||||
if (prev_col != col - 1 && prev_col != -1)
|
||||
{
|
||||
// Need preamble + possible tab offset: 4-8 bytes
|
||||
total_bytes += 4;
|
||||
if (col % 4 != 0)
|
||||
total_bytes += 4; // Tab offset
|
||||
}
|
||||
|
||||
// Check if we need mid-row style codes
|
||||
if (data->fonts[row][col] != prev_font || data->colors[row][col] != prev_color)
|
||||
{
|
||||
total_bytes += 4; // Mid-row code
|
||||
prev_font = data->fonts[row][col];
|
||||
prev_color = data->colors[row][col];
|
||||
}
|
||||
|
||||
// Text character
|
||||
char_count++;
|
||||
prev_col = col;
|
||||
}
|
||||
|
||||
// Add text bytes (1 per character, rounded up to even)
|
||||
total_bytes += char_count;
|
||||
if (char_count % 2 == 1)
|
||||
total_bytes++; // Padding
|
||||
}
|
||||
|
||||
// EOC (End of Caption): 4 bytes
|
||||
total_bytes += 4;
|
||||
|
||||
// ENM (Erase Non-displayed Memory): 4 bytes
|
||||
total_bytes += 4;
|
||||
|
||||
return total_bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the pre-roll start time for a caption
|
||||
*
|
||||
* @param display_time When the caption should appear on screen (ms)
|
||||
* @param total_bytes Total bytes to transmit
|
||||
* @param fps Frame rate
|
||||
* @return Time to begin loading the caption (ms)
|
||||
*/
|
||||
static LLONG calculate_preroll_time(LLONG display_time, unsigned int total_bytes, float fps)
|
||||
{
|
||||
// Calculate transmission time in milliseconds
|
||||
// 2 bytes per frame, so frames_needed = (total_bytes + 1) / 2
|
||||
float ms_per_frame = 1000.0f / fps;
|
||||
unsigned int frames_needed = (total_bytes + 1) / 2;
|
||||
LLONG transmission_time_ms = (LLONG)(frames_needed * ms_per_frame);
|
||||
|
||||
// Add 1 frame for EOC to be sent before display
|
||||
LLONG one_frame_ms = (LLONG)ms_per_frame;
|
||||
|
||||
LLONG preroll_start = display_time - transmission_time_ms - one_frame_ms;
|
||||
|
||||
// Don't go negative
|
||||
if (preroll_start < 0)
|
||||
preroll_start = 0;
|
||||
|
||||
return preroll_start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for collision with previous caption transmission and resolve it
|
||||
*
|
||||
* @param context Encoder context with timing state
|
||||
* @param preroll_start Proposed pre-roll start time (will be modified if collision)
|
||||
* @param display_time Caption display time (may be adjusted)
|
||||
* @param fps Frame rate
|
||||
* @return true if timing was adjusted due to collision
|
||||
*/
|
||||
static bool resolve_collision(struct encoder_ctx *context, LLONG *preroll_start,
|
||||
LLONG *display_time, float fps)
|
||||
{
|
||||
// Check if our preroll would start before previous caption finishes transmitting
|
||||
// This prevents bandwidth collision but allows visual overlap (like scc_tools)
|
||||
// Visual overlap is fine - the EOC command swaps buffers atomically
|
||||
if (context->scc_last_transmission_end > 0 &&
|
||||
*preroll_start < context->scc_last_transmission_end)
|
||||
{
|
||||
// Bandwidth collision detected - shift our caption forward
|
||||
// Add 1 frame buffer to ensure no overlap
|
||||
LLONG one_frame_ms = (LLONG)(1000.0f / fps);
|
||||
LLONG new_preroll = context->scc_last_transmission_end + one_frame_ms;
|
||||
LLONG shift = new_preroll - *preroll_start;
|
||||
|
||||
*preroll_start = new_preroll;
|
||||
*display_time += shift;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct control_code_info
|
||||
{
|
||||
unsigned int byte1_odd;
|
||||
@@ -484,14 +649,156 @@ void write_control_code(const int fd, const unsigned char channel, const enum co
|
||||
* @param row 0-14 (inclusive)
|
||||
* @param column 0-31 (inclusive)
|
||||
*
|
||||
* //TODO: Preamble code need to take into account font as well
|
||||
*
|
||||
* Returns an indent-based preamble code (positions cursor at column with white color)
|
||||
*/
|
||||
enum control_code get_preamble_code(const unsigned char row, const unsigned char column)
|
||||
{
|
||||
return PREAMBLE_CC_START + 1 + (row * 8) + (column / 4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get byte2 value for a styled PAC (color/font at column 0)
|
||||
* Returns 0x40-0x4F or 0x60-0x6F depending on the style
|
||||
*
|
||||
* @param color The color to use
|
||||
* @param font The font style to use
|
||||
* @param use_high_range If true, use 0x60-0x6F range instead of 0x40-0x4F
|
||||
*
|
||||
* PAC style encoding (byte2):
|
||||
* 0x40/0x60: white, regular 0x41/0x61: white, underline
|
||||
* 0x42/0x62: green, regular 0x43/0x63: green, underline
|
||||
* 0x44/0x64: blue, regular 0x45/0x65: blue, underline
|
||||
* 0x46/0x66: cyan, regular 0x47/0x67: cyan, underline
|
||||
* 0x48/0x68: red, regular 0x49/0x69: red, underline
|
||||
* 0x4a/0x6a: yellow, regular 0x4b/0x6b: yellow, underline
|
||||
* 0x4c/0x6c: magenta, regular 0x4d/0x6d: magenta, underline
|
||||
* 0x4e/0x6e: white, italics 0x4f/0x6f: white, italic underline
|
||||
*/
|
||||
static unsigned char get_styled_pac_byte2(enum ccx_decoder_608_color_code color, enum font_bits font, bool use_high_range)
|
||||
{
|
||||
unsigned char base = use_high_range ? 0x60 : 0x40;
|
||||
unsigned char style_offset;
|
||||
|
||||
// Handle italics specially - they're always white
|
||||
if (font == FONT_ITALICS)
|
||||
return base + 0x0e;
|
||||
if (font == FONT_UNDERLINED_ITALICS)
|
||||
return base + 0x0f;
|
||||
|
||||
// Map color to base offset (0, 2, 4, 6, 8, 10, 12)
|
||||
switch (color)
|
||||
{
|
||||
case COL_WHITE:
|
||||
style_offset = 0x00;
|
||||
break;
|
||||
case COL_GREEN:
|
||||
style_offset = 0x02;
|
||||
break;
|
||||
case COL_BLUE:
|
||||
style_offset = 0x04;
|
||||
break;
|
||||
case COL_CYAN:
|
||||
style_offset = 0x06;
|
||||
break;
|
||||
case COL_RED:
|
||||
style_offset = 0x08;
|
||||
break;
|
||||
case COL_YELLOW:
|
||||
style_offset = 0x0a;
|
||||
break;
|
||||
case COL_MAGENTA:
|
||||
style_offset = 0x0c;
|
||||
break;
|
||||
default:
|
||||
// For unsupported colors (black, transparent, userdefined), fall back to white
|
||||
style_offset = 0x00;
|
||||
break;
|
||||
}
|
||||
|
||||
// Add 1 for underlined
|
||||
if (font == FONT_UNDERLINED)
|
||||
style_offset += 1;
|
||||
|
||||
return base + style_offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the row uses high range (0x60-0x6F) or low range (0x40-0x4F) for styled PACs
|
||||
* Rows that have byte2 in 0x70-0x7F range for indents use 0x60-0x6F for styles
|
||||
*/
|
||||
static bool row_uses_high_range(unsigned char row)
|
||||
{
|
||||
// Based on the preamble code table:
|
||||
// Rows 2, 4, 6, 8, 10, 13, 15 use the "high" range (byte2 0x70-0x7F for indents)
|
||||
// which corresponds to 0x60-0x6F for styled PACs
|
||||
return (row == 1 || row == 3 || row == 5 || row == 7 || row == 9 || row == 12 || row == 14);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a styled PAC code (color/font at column 0) directly
|
||||
* This is more efficient than using indent PAC + mid-row code when at column 0
|
||||
*
|
||||
* @param fd File descriptor
|
||||
* @param channel Caption channel (1-4)
|
||||
* @param row Row number (0-14)
|
||||
* @param color Color to set
|
||||
* @param font Font style to set
|
||||
* @param disassemble If true, output assembly format
|
||||
* @param bytes_written Pointer to byte counter
|
||||
*/
|
||||
static void write_styled_preamble(const int fd, const unsigned char channel, const unsigned char row,
|
||||
enum ccx_decoder_608_color_code color, enum font_bits font,
|
||||
const bool disassemble, unsigned int *bytes_written)
|
||||
{
|
||||
// Get the preamble code for column 0 to obtain byte1
|
||||
enum control_code base_preamble = get_preamble_code(row, 0);
|
||||
unsigned char byte1 = odd_parity(get_first_byte(channel, base_preamble));
|
||||
|
||||
// Get styled byte2
|
||||
bool use_high_range = row_uses_high_range(row);
|
||||
unsigned char byte2 = odd_parity(get_styled_pac_byte2(color, font, use_high_range));
|
||||
|
||||
check_padding(fd, disassemble, bytes_written);
|
||||
|
||||
if (disassemble)
|
||||
{
|
||||
// Output assembly format like {0100Gr} for row 1, green
|
||||
const char *color_names[] = {"Wh", "Gr", "Bl", "Cy", "R", "Y", "Ma", "Wh", "Bk", "Wh"};
|
||||
const char *font_suffix = "";
|
||||
if (font == FONT_UNDERLINED)
|
||||
font_suffix = "U";
|
||||
else if (font == FONT_ITALICS)
|
||||
font_suffix = "I";
|
||||
else if (font == FONT_UNDERLINED_ITALICS)
|
||||
font_suffix = "IU";
|
||||
|
||||
fdprintf(fd, "{%02d00%s%s}", row + 1, color_names[color], font_suffix);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (*bytes_written % 2 == 0)
|
||||
write_wrapped(fd, " ", 1);
|
||||
fdprintf(fd, "%02x%02x", byte1, byte2);
|
||||
}
|
||||
*bytes_written += 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a styled PAC can be used (when color/font differs from white/regular and column is 0)
|
||||
*/
|
||||
static bool can_use_styled_pac(enum ccx_decoder_608_color_code color, enum font_bits font, unsigned char column)
|
||||
{
|
||||
// Styled PACs can only be used at column 0
|
||||
if (column != 0)
|
||||
return false;
|
||||
|
||||
// If style is already white/regular, no need for styled PAC
|
||||
if (color == COL_WHITE && font == FONT_REGULAR)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
enum control_code get_tab_offset_code(const unsigned char column)
|
||||
{
|
||||
int offset = column % 4;
|
||||
@@ -519,6 +826,23 @@ enum control_code get_font_code(enum font_bits font, enum ccx_decoder_608_color_
|
||||
}
|
||||
}
|
||||
|
||||
// Get frame rate value from scc_framerate setting
|
||||
// 0=29.97 (default), 1=24, 2=25, 3=30
|
||||
static float get_scc_fps(int scc_framerate)
|
||||
{
|
||||
switch (scc_framerate)
|
||||
{
|
||||
case 1:
|
||||
return 24.0f;
|
||||
case 2:
|
||||
return 25.0f;
|
||||
case 3:
|
||||
return 30.0f;
|
||||
default:
|
||||
return 29.97f;
|
||||
}
|
||||
}
|
||||
|
||||
void add_timestamp(const struct encoder_ctx *context, LLONG time, const bool disassemble)
|
||||
{
|
||||
write_wrapped(context->out->fh, context->encoded_crlf, context->encoded_crlf_length);
|
||||
@@ -528,9 +852,15 @@ void add_timestamp(const struct encoder_ctx *context, LLONG time, const bool dis
|
||||
unsigned hour, minute, second, milli;
|
||||
millis_to_time(time, &hour, &minute, &second, &milli);
|
||||
|
||||
// SMPTE format
|
||||
float frame = milli * 29.97 / 1000;
|
||||
fdprintf(context->out->fh, "%02u:%02u:%02u:%02.f\t", hour, minute, second, frame);
|
||||
// SMPTE format - use configurable frame rate (issue #1191)
|
||||
float fps = get_scc_fps(context->scc_framerate);
|
||||
// Calculate frame number from milliseconds, ensuring it stays in valid range 0 to fps-1
|
||||
// Use floor to avoid rounding up to fps (e.g., 29.97 -> 30 is invalid)
|
||||
int max_frames = (int)fps;
|
||||
int frame = (int)(milli * fps / 1000.0f);
|
||||
if (frame >= max_frames)
|
||||
frame = max_frames - 1; // Cap at max valid frame (e.g., 29 for 29.97fps)
|
||||
fdprintf(context->out->fh, "%02u:%02u:%02u:%02d\t", hour, minute, second, frame);
|
||||
}
|
||||
|
||||
void clear_screen(const struct encoder_ctx *context, LLONG end_time, const unsigned char channel, const bool disassemble)
|
||||
@@ -550,8 +880,51 @@ int write_cc_buffer_as_scenarist(const struct eia608_screen *data, struct encode
|
||||
unsigned char current_row = UINT8_MAX;
|
||||
unsigned char current_column = UINT8_MAX;
|
||||
|
||||
// 1. Load the caption
|
||||
add_timestamp(context, data->start_time, disassemble);
|
||||
// Timing variables for accurate timing mode (issue #1120)
|
||||
LLONG actual_start_time = data->start_time; // When caption should display
|
||||
LLONG actual_end_time = data->end_time; // When caption should clear
|
||||
LLONG preroll_start = data->start_time; // When to start loading (default: same as display)
|
||||
float fps = get_scc_fps_internal(context->scc_framerate);
|
||||
bool use_separate_display_time = false; // Whether to write EOC at separate timestamp
|
||||
|
||||
// If accurate timing is enabled, calculate pre-roll and handle collisions
|
||||
if (context->scc_accurate_timing)
|
||||
{
|
||||
// Calculate total bytes needed for this caption
|
||||
unsigned int total_bytes = calculate_caption_bytes(data);
|
||||
|
||||
// Calculate when we need to start loading
|
||||
preroll_start = calculate_preroll_time(actual_start_time, total_bytes, fps);
|
||||
|
||||
// Check for collisions with previous caption and resolve
|
||||
if (resolve_collision(context, &preroll_start, &actual_start_time, fps))
|
||||
{
|
||||
// Timing was adjusted due to collision
|
||||
// Also adjust end time by the same amount
|
||||
LLONG shift = actual_start_time - data->start_time;
|
||||
actual_end_time = data->end_time + shift;
|
||||
}
|
||||
|
||||
// Update timing state for next caption
|
||||
float ms_per_frame = 1000.0f / fps;
|
||||
unsigned int frames_needed = (total_bytes + 1) / 2;
|
||||
LLONG transmission_time_ms = (LLONG)(frames_needed * ms_per_frame);
|
||||
context->scc_last_transmission_end = preroll_start + transmission_time_ms;
|
||||
context->scc_last_display_end = actual_end_time;
|
||||
|
||||
// Enable separate display timing (like scc_tools)
|
||||
use_separate_display_time = true;
|
||||
|
||||
// 1. Load the caption at pre-roll time
|
||||
add_timestamp(context, preroll_start, disassemble);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Legacy mode: use original timing
|
||||
// 1. Load the caption
|
||||
add_timestamp(context, data->start_time, disassemble);
|
||||
}
|
||||
|
||||
write_control_code(context->out->fh, data->channel, RCL, disassemble, &bytes_written);
|
||||
for (uint8_t row = 0; row < 15; ++row)
|
||||
{
|
||||
@@ -578,6 +951,23 @@ int write_cc_buffer_as_scenarist(const struct eia608_screen *data, struct encode
|
||||
{
|
||||
if (switch_font || switch_color)
|
||||
{
|
||||
// Optimization (issue #1191): Use styled PAC when at column 0 with non-default style
|
||||
// This avoids needing a separate mid-row code
|
||||
if (column == 0 && can_use_styled_pac(data->colors[row][column], data->fonts[row][column], 0))
|
||||
{
|
||||
write_styled_preamble(context->out->fh, data->channel, row,
|
||||
data->colors[row][column], data->fonts[row][column],
|
||||
disassemble, &bytes_written);
|
||||
current_row = row;
|
||||
current_column = 0;
|
||||
current_font = data->fonts[row][column];
|
||||
current_color = data->colors[row][column];
|
||||
// Write the character and continue
|
||||
write_character(context->out->fh, data->characters[row][column], disassemble, &bytes_written);
|
||||
++current_column;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (data->characters[row][column] == ' ')
|
||||
{
|
||||
// The MID-ROW code is going to move the cursor to the
|
||||
@@ -617,12 +1007,26 @@ int write_cc_buffer_as_scenarist(const struct eia608_screen *data, struct encode
|
||||
check_padding(context->out->fh, disassemble, &bytes_written);
|
||||
}
|
||||
|
||||
// 2. Show the caption
|
||||
// 2. Show the caption (EOC = End of Caption, makes it visible)
|
||||
if (use_separate_display_time)
|
||||
{
|
||||
// For accurate timing: write display command at actual display time
|
||||
// This matches scc_tools behavior where load and display are separate
|
||||
add_timestamp(context, actual_start_time, disassemble);
|
||||
}
|
||||
write_control_code(context->out->fh, data->channel, EOC, disassemble, &bytes_written);
|
||||
write_control_code(context->out->fh, data->channel, ENM, disassemble, &bytes_written);
|
||||
|
||||
// 3. Clear the caption
|
||||
clear_screen(context, data->end_time, data->channel, disassemble);
|
||||
// 3. Clear the caption at the end time
|
||||
// In accurate timing mode, skip clear - the next caption's EOC will handle the transition
|
||||
// This matches scc_tools behavior which doesn't write EDM between consecutive captions
|
||||
if (!use_separate_display_time)
|
||||
{
|
||||
// Legacy mode: always write clear
|
||||
clear_screen(context, actual_end_time, data->channel, disassemble);
|
||||
}
|
||||
// In accurate timing mode, scc_last_display_end is still tracked for reference
|
||||
// but we don't write the clear command to avoid out-of-order timestamps
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -251,6 +251,9 @@ void set_spupng_offset(void *ctx, int x, int y)
|
||||
sp->xOffset = x;
|
||||
sp->yOffset = y;
|
||||
}
|
||||
|
||||
// Forward declaration for calculate_spupng_offsets
|
||||
static void calculate_spupng_offsets(struct spupng_t *sp, struct encoder_ctx *ctx);
|
||||
int save_spupng(const char *filename, uint8_t *bitmap, int w, int h,
|
||||
png_color *palette, png_byte *alpha, int nb_color)
|
||||
{
|
||||
@@ -384,7 +387,7 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
|
||||
struct cc_bitmap *rect;
|
||||
png_color *palette = NULL;
|
||||
png_byte *alpha = NULL;
|
||||
int wrote_opentag = 1;
|
||||
int wrote_opentag = 0; // Track if we actually wrote the tag
|
||||
|
||||
x_pos = -1;
|
||||
y_pos = -1;
|
||||
@@ -395,13 +398,11 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
|
||||
return 0;
|
||||
|
||||
inc_spupng_fileindex(sp);
|
||||
write_sputag_open(sp, sub->start_time, sub->end_time - 1);
|
||||
|
||||
if (sub->nb_data == 0 && (sub->flags & SUB_EOD_MARKER))
|
||||
{
|
||||
context->prev_start = -1;
|
||||
if (wrote_opentag)
|
||||
write_sputag_close(sp);
|
||||
// No subtitle data, skip writing
|
||||
return 0;
|
||||
}
|
||||
rect = sub->data;
|
||||
@@ -440,7 +441,13 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
|
||||
}
|
||||
}
|
||||
filename = get_spupng_filename(sp);
|
||||
set_spupng_offset(sp, x_pos, y_pos);
|
||||
|
||||
// Set image dimensions for offset calculation
|
||||
sp->img_w = width;
|
||||
sp->img_h = height;
|
||||
|
||||
// Calculate centered offsets based on screen size (PAL/NTSC)
|
||||
calculate_spupng_offsets(sp, context);
|
||||
if (sub->flags & SUB_EOD_MARKER)
|
||||
context->prev_start = sub->start_time;
|
||||
pbuf = (uint8_t *)malloc(width * height);
|
||||
@@ -475,6 +482,15 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
|
||||
|
||||
/* TODO do rectangle wise, one color table should not be used for all rectangles */
|
||||
mapclut_paletee(palette, alpha, (uint32_t *)rect[0].data1, rect[0].nb_colors);
|
||||
|
||||
// Save PNG file first
|
||||
save_spupng(filename, pbuf, width, height, palette, alpha, rect[0].nb_colors);
|
||||
freep(&pbuf);
|
||||
|
||||
// Write XML tag with calculated centered offsets
|
||||
write_sputag_open(sp, sub->start_time, sub->end_time - 1);
|
||||
wrote_opentag = 1; // Mark that we wrote the tag
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
if (!context->nospupngocr)
|
||||
{
|
||||
@@ -487,8 +503,6 @@ int write_cc_bitmap_as_spupng(struct cc_subtitle *sub, struct encoder_ctx *conte
|
||||
}
|
||||
}
|
||||
#endif
|
||||
save_spupng(filename, pbuf, width, height, palette, alpha, rect[0].nb_colors);
|
||||
freep(&pbuf);
|
||||
|
||||
end:
|
||||
if (wrote_opentag)
|
||||
@@ -991,6 +1005,8 @@ int spupng_export_string2png(struct spupng_t *sp, char *str, FILE *output)
|
||||
*/
|
||||
|
||||
// Save image
|
||||
sp->img_w = canvas_width;
|
||||
sp->img_h = canvas_height;
|
||||
write_image(buffer, output, canvas_width, canvas_height);
|
||||
free(tmp);
|
||||
free(buffer);
|
||||
@@ -1081,6 +1097,28 @@ int eia608_to_str(struct encoder_ctx *context, struct eia608_screen *data, char
|
||||
|
||||
// string needs to be in UTF-8 encoding.
|
||||
// This function will take care of encoding.
|
||||
static void calculate_spupng_offsets(struct spupng_t *sp, struct encoder_ctx *ctx)
|
||||
{
|
||||
int screen_w = 720;
|
||||
int screen_h;
|
||||
|
||||
/* Teletext is always PAL */
|
||||
if (ctx->in_fileformat == 2 || ctx->is_pal)
|
||||
{
|
||||
screen_h = 576;
|
||||
}
|
||||
else
|
||||
{
|
||||
screen_h = 480;
|
||||
}
|
||||
|
||||
sp->xOffset = (screen_w - sp->img_w) / 2;
|
||||
sp->yOffset = (screen_h - sp->img_h) / 2;
|
||||
|
||||
// SPU / DVD requires even yOffset (interlacing)
|
||||
if (sp->yOffset & 1)
|
||||
sp->yOffset++;
|
||||
}
|
||||
int spupng_write_string(struct spupng_t *sp, char *string, LLONG start_time, LLONG end_time,
|
||||
struct encoder_ctx *context)
|
||||
{
|
||||
@@ -1099,6 +1137,7 @@ int spupng_write_string(struct spupng_t *sp, char *string, LLONG start_time, LLO
|
||||
}
|
||||
// free(string_utf32);
|
||||
fclose(sp->fppng);
|
||||
calculate_spupng_offsets(sp, context);
|
||||
write_sputag_open(sp, start_time, end_time);
|
||||
write_spucomment(sp, string);
|
||||
write_sputag_close(sp);
|
||||
|
||||
@@ -39,6 +39,8 @@ struct spupng_t
|
||||
int fileIndex;
|
||||
int xOffset;
|
||||
int yOffset;
|
||||
int img_w;
|
||||
int img_h;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -182,6 +182,7 @@ typedef struct DVBSubContext
|
||||
LLONG time_out;
|
||||
#ifdef ENABLE_OCR
|
||||
void *ocr_ctx;
|
||||
int ocr_initialized; // Flag to track if OCR has been lazily initialized
|
||||
#endif
|
||||
DVBSubRegion *region_list;
|
||||
DVBSubCLUT *clut_list;
|
||||
@@ -418,7 +419,7 @@ static void delete_regions(DVBSubContext *ctx)
|
||||
* @return DVB context kept as void* for abstraction
|
||||
*
|
||||
*/
|
||||
void *dvbsub_init_decoder(struct dvb_config *cfg, int initialized_ocr)
|
||||
void *dvbsub_init_decoder(struct dvb_config *cfg)
|
||||
{
|
||||
int i, r, g, b, a = 0;
|
||||
DVBSubContext *ctx = (DVBSubContext *)malloc(sizeof(DVBSubContext));
|
||||
@@ -442,8 +443,11 @@ void *dvbsub_init_decoder(struct dvb_config *cfg, int initialized_ocr)
|
||||
}
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
if (!initialized_ocr)
|
||||
ctx->ocr_ctx = init_ocr(ctx->lang_index);
|
||||
// Lazy OCR initialization: don't init here, wait until a bitmap actually needs OCR
|
||||
// This avoids ~10 second Tesseract startup overhead for files that have DVB streams
|
||||
// but don't actually produce any bitmap subtitles (e.g., files with CEA-608 captions)
|
||||
ctx->ocr_ctx = NULL;
|
||||
ctx->ocr_initialized = 0;
|
||||
#endif
|
||||
ctx->version = -1;
|
||||
|
||||
@@ -1702,7 +1706,13 @@ static int write_dvb_sub(struct lib_cc_decode *dec_ctx, struct cc_subtitle *sub)
|
||||
// Perform OCR
|
||||
#ifdef ENABLE_OCR
|
||||
char *ocr_str = NULL;
|
||||
if (ctx->ocr_ctx)
|
||||
// Lazy OCR initialization: only init when we actually have a bitmap to process
|
||||
if (!ctx->ocr_initialized)
|
||||
{
|
||||
ctx->ocr_ctx = init_ocr(ctx->lang_index);
|
||||
ctx->ocr_initialized = 1; // Mark as initialized even if init_ocr returns NULL
|
||||
}
|
||||
if (ctx->ocr_ctx && region)
|
||||
{
|
||||
int ret = ocr_rect(ctx->ocr_ctx, rect, &ocr_str, region->bgcolor, dec_ctx->ocr_quantmode);
|
||||
if (ret >= 0)
|
||||
|
||||
@@ -42,7 +42,7 @@ extern "C"
|
||||
* @return DVB context kept as void* for abstraction
|
||||
*
|
||||
*/
|
||||
void *dvbsub_init_decoder(struct dvb_config *cfg, int initialized_ocr);
|
||||
void *dvbsub_init_decoder(struct dvb_config *cfg);
|
||||
|
||||
int dvbsub_close_decoder(void **dvb_ctx);
|
||||
|
||||
|
||||
@@ -142,7 +142,7 @@ int user_data(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, struct
|
||||
{
|
||||
if ((ud_header[1] & 0x7F) == 0x01)
|
||||
{
|
||||
unsigned char cc_data[3 * 31 + 1]; // Maximum cc_count is 31
|
||||
unsigned char cc_data[3 * 32]; // Increased for safety margin, 31 is max count
|
||||
|
||||
dec_ctx->stat_scte20ccheaders++;
|
||||
read_bytes(ustream, 2); // "03 01"
|
||||
@@ -370,6 +370,7 @@ int user_data(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, struct
|
||||
dbg_print(CCX_DMT_PARSE, "%s", debug_608_to_ASC(dishdata, 0));
|
||||
dbg_print(CCX_DMT_PARSE, "%s:\n", debug_608_to_ASC(dishdata + 3, 0));
|
||||
|
||||
dishdata[cc_count * 3] = 0xFF; // Ensure termination for store_hdcc
|
||||
store_hdcc(enc_ctx, dec_ctx, dishdata, cc_count, dec_ctx->timing->current_tref, dec_ctx->timing->fts_now, sub);
|
||||
|
||||
// Ignore 4 (0x020A, followed by two unknown) bytes.
|
||||
@@ -484,7 +485,10 @@ int user_data(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, struct
|
||||
mprint("MPEG:VBI: only support Luma line\n");
|
||||
|
||||
if (udatalen < 720)
|
||||
mprint("MPEG:VBI: Minimum 720 bytes in luma line required\n");
|
||||
{
|
||||
mprint("MPEG:VBI: Minimum 720 bytes in luma line required, skipping truncated packet.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
decode_vbi(dec_ctx, field, ustream->pos, 720, sub);
|
||||
dbg_print(CCX_DMT_VERBOSE, "GXF (vbi line %d) user data:\n", line_nb);
|
||||
|
||||
@@ -66,6 +66,7 @@ void prepare_for_new_file(struct lib_ccx_ctx *ctx)
|
||||
{
|
||||
// Init per file variables
|
||||
ctx->last_reported_progress = -1;
|
||||
ctx->min_global_timestamp_offset = -1; // -1 means not yet initialized
|
||||
ctx->stat_numuserheaders = 0;
|
||||
ctx->stat_dvdccheaders = 0;
|
||||
ctx->stat_scte20ccheaders = 0;
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "ccx_gxf.h"
|
||||
#include "dvd_subtitle_decoder.h"
|
||||
#include "ccx_demuxer_mxf.h"
|
||||
#include "ccx_dtvcc.h"
|
||||
|
||||
int end_of_file = 0; // End of file?
|
||||
|
||||
@@ -75,7 +76,7 @@ int ps_get_more_data(struct lib_ccx_ctx *ctx, struct demuxer_data **ppdata)
|
||||
if (!ctx->demux_ctx->strangeheader)
|
||||
{
|
||||
mprint("\nNot a recognized header. Searching for next header.\n");
|
||||
dump(CCX_DMT_GENERIC_NOTICES, nextheader, 6, 0, 0);
|
||||
dump(CCX_DMT_PARSE, nextheader, 6, 0, 0);
|
||||
// Only print the message once per loop / unrecognized header
|
||||
ctx->demux_ctx->strangeheader = 1;
|
||||
}
|
||||
@@ -566,6 +567,104 @@ static size_t process_raw_for_mcc(struct encoder_ctx *enc_ctx, struct lib_cc_dec
|
||||
}
|
||||
|
||||
// Raw file process
|
||||
// Parse raw CDP (Caption Distribution Packet) data
|
||||
// Returns number of bytes processed
|
||||
static size_t process_raw_cdp(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx,
|
||||
struct cc_subtitle *sub, unsigned char *buffer, size_t len)
|
||||
{
|
||||
size_t pos = 0;
|
||||
int cdp_count = 0;
|
||||
|
||||
while (pos + 10 < len) // Minimum CDP size
|
||||
{
|
||||
// Check for CDP identifier
|
||||
if (buffer[pos] != 0x96 || buffer[pos + 1] != 0x69)
|
||||
{
|
||||
pos++;
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned char cdp_length = buffer[pos + 2];
|
||||
if (pos + cdp_length > len)
|
||||
break; // Incomplete CDP packet
|
||||
|
||||
unsigned char framerate_byte = buffer[pos + 3];
|
||||
int framerate_code = framerate_byte >> 4;
|
||||
|
||||
// Skip to find cc_data section (0x72)
|
||||
size_t cdp_pos = pos + 4; // After identifier, length, framerate
|
||||
int cc_count = 0;
|
||||
unsigned char *cc_data = NULL;
|
||||
|
||||
// Skip header sequence counter (2 bytes)
|
||||
cdp_pos += 2;
|
||||
|
||||
// Look for cc_data section (0x72) within CDP
|
||||
while (cdp_pos < pos + cdp_length - 4)
|
||||
{
|
||||
if (buffer[cdp_pos] == 0x72) // cc_data section
|
||||
{
|
||||
cc_count = buffer[cdp_pos + 1] & 0x1F;
|
||||
cc_data = buffer + cdp_pos + 2;
|
||||
break;
|
||||
}
|
||||
else if (buffer[cdp_pos] == 0x71) // time code section
|
||||
{
|
||||
cdp_pos += 5; // Skip time code section
|
||||
}
|
||||
else if (buffer[cdp_pos] == 0x73) // service info section
|
||||
{
|
||||
break; // Past cc_data
|
||||
}
|
||||
else if (buffer[cdp_pos] == 0x74) // footer
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
cdp_pos++;
|
||||
}
|
||||
}
|
||||
|
||||
if (cc_count > 0 && cc_data != NULL)
|
||||
{
|
||||
// Calculate PTS based on CDP frame count and frame rate
|
||||
static const int fps_table[] = {0, 24, 24, 25, 30, 30, 50, 60, 60};
|
||||
int fps = (framerate_code < 9) ? fps_table[framerate_code] : 30;
|
||||
LLONG pts = (LLONG)cdp_count * 90000 / fps;
|
||||
|
||||
// Set timing if not already set
|
||||
if (dec_ctx->timing->pts_set == 0)
|
||||
{
|
||||
dec_ctx->timing->min_pts = pts;
|
||||
dec_ctx->timing->pts_set = 2;
|
||||
dec_ctx->timing->sync_pts = pts;
|
||||
}
|
||||
set_current_pts(dec_ctx->timing, pts);
|
||||
set_fts(dec_ctx->timing);
|
||||
|
||||
#ifndef DISABLE_RUST
|
||||
// Enable DTVCC decoder for CEA-708 captions
|
||||
if (dec_ctx->dtvcc_rust)
|
||||
{
|
||||
int is_active = ccxr_dtvcc_is_active(dec_ctx->dtvcc_rust);
|
||||
if (!is_active)
|
||||
{
|
||||
ccxr_dtvcc_set_active(dec_ctx->dtvcc_rust, 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// Process cc_data triplets through process_cc_data for 708 support
|
||||
process_cc_data(enc_ctx, dec_ctx, cc_data, cc_count, sub);
|
||||
cdp_count++;
|
||||
}
|
||||
|
||||
pos += cdp_length;
|
||||
}
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
int raw_loop(struct lib_ccx_ctx *ctx)
|
||||
{
|
||||
LLONG ret;
|
||||
@@ -575,6 +674,8 @@ int raw_loop(struct lib_ccx_ctx *ctx)
|
||||
struct lib_cc_decode *dec_ctx = NULL;
|
||||
int caps = 0;
|
||||
int is_dvdraw = 0; // Flag to track if this is DVD raw format
|
||||
int is_scc = 0; // Flag to track if this is SCC format
|
||||
int is_cdp = 0; // Flag to track if this is raw CDP format
|
||||
int is_mcc_output = 0; // Flag for MCC output format
|
||||
|
||||
dec_ctx = update_decoder_list(ctx);
|
||||
@@ -607,13 +708,28 @@ int raw_loop(struct lib_ccx_ctx *ctx)
|
||||
break;
|
||||
|
||||
// Check if this is DVD raw format using Rust detection
|
||||
if (!is_dvdraw && ccxr_is_dvdraw_header(data->buffer, (unsigned int)data->len))
|
||||
if (!is_dvdraw && !is_scc && ccxr_is_dvdraw_header(data->buffer, (unsigned int)data->len))
|
||||
{
|
||||
is_dvdraw = 1;
|
||||
mprint("Detected McPoodle's DVD raw format\n");
|
||||
}
|
||||
|
||||
if (is_mcc_output && !is_dvdraw)
|
||||
// Check if this is SCC format using Rust detection
|
||||
if (!is_scc && !is_dvdraw && ccxr_is_scc_file(data->buffer, (unsigned int)data->len))
|
||||
{
|
||||
is_scc = 1;
|
||||
mprint("Detected SCC (Scenarist Closed Caption) format\n");
|
||||
}
|
||||
|
||||
// Check if this is raw CDP format (starts with 0x9669)
|
||||
if (!is_cdp && !is_scc && !is_dvdraw && data->len >= 2 &&
|
||||
data->buffer[0] == 0x96 && data->buffer[1] == 0x69)
|
||||
{
|
||||
is_cdp = 1;
|
||||
mprint("Detected raw CDP (Caption Distribution Packet) format\n");
|
||||
}
|
||||
|
||||
if (is_mcc_output && !is_dvdraw && !is_scc && !is_cdp)
|
||||
{
|
||||
// For MCC output, encode raw data directly without decoding
|
||||
// This preserves the original CEA-608 byte pairs in CDP format
|
||||
@@ -626,6 +742,18 @@ int raw_loop(struct lib_ccx_ctx *ctx)
|
||||
// Use Rust implementation - handles timing internally
|
||||
ret = ccxr_process_dvdraw(dec_ctx, dec_sub, data->buffer, (unsigned int)data->len);
|
||||
}
|
||||
else if (is_scc)
|
||||
{
|
||||
// Use Rust SCC implementation - handles timing internally via SMPTE timecodes
|
||||
ret = ccxr_process_scc(dec_ctx, dec_sub, data->buffer, (unsigned int)data->len, ccx_options.scc_framerate);
|
||||
}
|
||||
else if (is_cdp)
|
||||
{
|
||||
// Process raw CDP packets (e.g., from SDI VANC capture)
|
||||
ret = process_raw_cdp(enc_ctx, dec_ctx, dec_sub, data->buffer, data->len);
|
||||
if (ret > 0)
|
||||
caps = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = process_raw(dec_ctx, dec_sub, data->buffer, data->len);
|
||||
@@ -796,10 +924,6 @@ int process_data(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, str
|
||||
got = data_node->len;
|
||||
}
|
||||
}
|
||||
else if (data_node->bufferdatatype == CCX_PRIVATE_MPEG2_CC)
|
||||
{
|
||||
got = data_node->len; // Do nothing. Still don't know how to process it
|
||||
}
|
||||
else if (data_node->bufferdatatype == CCX_RAW) // Raw two byte 608 data from DVR-MS/ASF
|
||||
{
|
||||
// The asf_get_more_data() loop sets current_pts when possible
|
||||
@@ -852,7 +976,34 @@ int process_data(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, str
|
||||
}
|
||||
else if (data_node->bufferdatatype == CCX_RAW_TYPE)
|
||||
{
|
||||
got = process_raw_with_field(dec_ctx, dec_sub, data_node->buffer, data_node->len);
|
||||
// CCX_RAW_TYPE contains cc_data triplets (cc_type + 2 data bytes each)
|
||||
// Used by MXF and GXF demuxers
|
||||
|
||||
// Initialize timing if not set (use caption PTS as reference)
|
||||
if (dec_ctx->timing->pts_set == 0 && data_node->pts != CCX_NOPTS)
|
||||
{
|
||||
dec_ctx->timing->min_pts = data_node->pts;
|
||||
dec_ctx->timing->pts_set = 2; // MinPtsSet
|
||||
dec_ctx->timing->sync_pts = data_node->pts;
|
||||
set_fts(dec_ctx->timing);
|
||||
}
|
||||
|
||||
#ifndef DISABLE_RUST
|
||||
// Enable DTVCC decoder for CEA-708 captions from MXF/GXF
|
||||
if (dec_ctx->dtvcc_rust)
|
||||
{
|
||||
int is_active = ccxr_dtvcc_is_active(dec_ctx->dtvcc_rust);
|
||||
if (!is_active)
|
||||
{
|
||||
ccxr_dtvcc_set_active(dec_ctx->dtvcc_rust, 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Use process_cc_data to properly invoke DTVCC decoder for 708 captions
|
||||
int cc_count = data_node->len / 3;
|
||||
process_cc_data(enc_ctx, dec_ctx, data_node->buffer, cc_count, dec_sub);
|
||||
got = data_node->len;
|
||||
}
|
||||
else if (data_node->bufferdatatype == CCX_ISDB_SUBTITLE)
|
||||
{
|
||||
@@ -1041,7 +1192,11 @@ int process_non_multiprogram_general_loop(struct lib_ccx_ctx *ctx,
|
||||
cinfo = get_cinfo(ctx->demux_ctx, pid);
|
||||
*enc_ctx = update_encoder_list_cinfo(ctx, cinfo);
|
||||
*dec_ctx = update_decoder_list_cinfo(ctx, cinfo);
|
||||
#ifndef DISABLE_RUST
|
||||
ccxr_dtvcc_set_encoder((*dec_ctx)->dtvcc_rust, *enc_ctx);
|
||||
#else
|
||||
(*dec_ctx)->dtvcc->encoder = (void *)(*enc_ctx);
|
||||
#endif
|
||||
|
||||
if ((*dec_ctx)->timing->min_pts == 0x01FFFFFFFFLL) // if we didn't set the min_pts of the program
|
||||
{
|
||||
@@ -1265,7 +1420,11 @@ int general_loop(struct lib_ccx_ctx *ctx)
|
||||
|
||||
enc_ctx = update_encoder_list_cinfo(ctx, cinfo);
|
||||
dec_ctx = update_decoder_list_cinfo(ctx, cinfo);
|
||||
#ifndef DISABLE_RUST
|
||||
ccxr_dtvcc_set_encoder(dec_ctx->dtvcc_rust, enc_ctx);
|
||||
#else
|
||||
dec_ctx->dtvcc->encoder = (void *)enc_ctx; // WARN: otherwise cea-708 will not work
|
||||
#endif
|
||||
|
||||
if (dec_ctx->timing->min_pts == 0x01FFFFFFFFLL) // if we didn't set the min_pts of the program
|
||||
{
|
||||
@@ -1349,7 +1508,24 @@ int general_loop(struct lib_ccx_ctx *ctx)
|
||||
}
|
||||
if (ctx->live_stream)
|
||||
{
|
||||
int cur_sec = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
|
||||
LLONG t = get_fts(dec_ctx->timing, dec_ctx->current_field);
|
||||
if (!t && ctx->demux_ctx->global_timestamp_inited)
|
||||
t = ctx->demux_ctx->global_timestamp - ctx->demux_ctx->min_global_timestamp;
|
||||
// Handle multi-program TS timing
|
||||
if (ctx->demux_ctx->global_timestamp_inited)
|
||||
{
|
||||
LLONG offset = ctx->demux_ctx->global_timestamp - ctx->demux_ctx->min_global_timestamp;
|
||||
if (ctx->min_global_timestamp_offset < 0 || offset < ctx->min_global_timestamp_offset)
|
||||
ctx->min_global_timestamp_offset = offset;
|
||||
// Only use timestamps from the program with the lowest base
|
||||
if (offset - ctx->min_global_timestamp_offset < 60000)
|
||||
t = offset - ctx->min_global_timestamp_offset;
|
||||
else
|
||||
t = ctx->min_global_timestamp_offset > 0 ? 0 : t;
|
||||
if (t < 0)
|
||||
t = 0;
|
||||
}
|
||||
int cur_sec = (int)(t / 1000);
|
||||
int th = cur_sec / 10;
|
||||
if (ctx->last_reported_progress != th)
|
||||
{
|
||||
@@ -1367,6 +1543,28 @@ int general_loop(struct lib_ccx_ctx *ctx)
|
||||
LLONG t = get_fts(dec_ctx->timing, dec_ctx->current_field);
|
||||
if (!t && ctx->demux_ctx->global_timestamp_inited)
|
||||
t = ctx->demux_ctx->global_timestamp - ctx->demux_ctx->min_global_timestamp;
|
||||
// For multi-program TS files, different programs can have different
|
||||
// PCR bases (e.g., one at 25h, another at 23h). This causes the
|
||||
// global_timestamp to jump between different bases, resulting in
|
||||
// wildly different offset values. Track the minimum offset seen
|
||||
// and only display times from the program with the lowest base.
|
||||
if (ctx->demux_ctx->global_timestamp_inited)
|
||||
{
|
||||
LLONG offset = ctx->demux_ctx->global_timestamp - ctx->demux_ctx->min_global_timestamp;
|
||||
// Track minimum offset (this is the PCR base of the program
|
||||
// with the lowest timestamp, which represents true file time)
|
||||
if (ctx->min_global_timestamp_offset < 0 || offset < ctx->min_global_timestamp_offset)
|
||||
ctx->min_global_timestamp_offset = offset;
|
||||
// Only use timestamps from the program with the lowest base.
|
||||
// If current offset is significantly larger than minimum (by > 60s),
|
||||
// it's from a program with a higher PCR base - use minimum instead.
|
||||
if (offset - ctx->min_global_timestamp_offset < 60000)
|
||||
t = offset - ctx->min_global_timestamp_offset;
|
||||
else
|
||||
t = ctx->min_global_timestamp_offset > 0 ? 0 : t; // fallback to minimum-based time
|
||||
if (t < 0)
|
||||
t = 0;
|
||||
}
|
||||
int cur_sec = (int)(t / 1000);
|
||||
activity_progress(progress, cur_sec / 60, cur_sec % 60);
|
||||
ctx->last_reported_progress = progress;
|
||||
@@ -1475,7 +1673,11 @@ int rcwt_loop(struct lib_ccx_ctx *ctx)
|
||||
}
|
||||
|
||||
dec_ctx = update_decoder_list(ctx);
|
||||
#ifndef DISABLE_RUST
|
||||
ccxr_dtvcc_set_encoder(dec_ctx->dtvcc_rust, enc_ctx);
|
||||
#else
|
||||
dec_ctx->dtvcc->encoder = (void *)enc_ctx; // WARN: otherwise cea-708 will not work
|
||||
#endif
|
||||
if (parsebuf[6] == 0 && parsebuf[7] == 2)
|
||||
{
|
||||
dec_ctx->codec = CCX_CODEC_TELETEXT;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef CCX_CCEXTRACTOR_H
|
||||
#define CCX_CCEXTRACTOR_H
|
||||
|
||||
#define VERSION "0.96"
|
||||
#define VERSION "0.96.5"
|
||||
|
||||
// Load common includes and constants for library usage
|
||||
#include "ccx_common_platform.h"
|
||||
@@ -43,7 +43,7 @@ struct file_report
|
||||
};
|
||||
|
||||
// Stuff for telxcc.c
|
||||
#define MAX_TLT_PAGES_EXTRACT 8 // Maximum number of teletext pages to extract simultaneously
|
||||
#define MAX_TLT_PAGES_EXTRACT 8 // Maximum number of teletext pages to extract simultaneously
|
||||
|
||||
struct ccx_s_teletext_config
|
||||
{
|
||||
@@ -55,11 +55,11 @@ struct ccx_s_teletext_config
|
||||
uint8_t nonempty : 1; // produce at least one (dummy) frame
|
||||
// uint8_t se_mode : 1; // search engine compatible mode => Uses CCExtractor's write_format
|
||||
// uint64_t utc_refvalue; // UTC referential value => Moved to ccx_decoders_common, so can be used for other decoders (608/xds) too
|
||||
uint16_t user_page; // Page selected by user (legacy, first page)
|
||||
uint16_t user_page; // Page selected by user (legacy, first page)
|
||||
// Multi-page teletext extraction (issue #665)
|
||||
uint16_t user_pages[MAX_TLT_PAGES_EXTRACT]; // Pages selected by user for extraction
|
||||
int num_user_pages; // Number of pages to extract (0 = auto-detect single page)
|
||||
int extract_all_pages; // If 1, extract all detected subtitle pages
|
||||
uint16_t user_pages[MAX_TLT_PAGES_EXTRACT]; // Pages selected by user for extraction
|
||||
int num_user_pages; // Number of pages to extract (0 = auto-detect single page)
|
||||
int extract_all_pages; // If 1, extract all detected subtitle pages
|
||||
int dolevdist; // 0=Don't attempt to correct errors
|
||||
int levdistmincnt, levdistmaxpct; // Means 2 fails or less is "the same", 10% or less is also "the same"
|
||||
struct ccx_boundary_time extraction_start, extraction_end; // Segment we actually process
|
||||
@@ -90,6 +90,7 @@ struct lib_ccx_ctx
|
||||
LLONG total_past; // Only in binary concat mode
|
||||
|
||||
int last_reported_progress;
|
||||
LLONG min_global_timestamp_offset; // Track minimum (global - min) for multi-program TS
|
||||
|
||||
/* Stats */
|
||||
int stat_numuserheaders;
|
||||
@@ -160,6 +161,7 @@ struct lib_ccx_ctx *init_libraries(struct ccx_s_options *opt);
|
||||
void dinit_libraries(struct lib_ccx_ctx **ctx);
|
||||
|
||||
extern void ccxr_init_basic_logger();
|
||||
extern void ccxr_update_logger_target();
|
||||
|
||||
// ccextractor.c
|
||||
void print_end_msg(void);
|
||||
@@ -183,6 +185,10 @@ size_t process_raw(struct lib_cc_decode *ctx, struct cc_subtitle *sub, unsigned
|
||||
unsigned int ccxr_process_dvdraw(struct lib_cc_decode *ctx, struct cc_subtitle *sub, const unsigned char *buffer, unsigned int len);
|
||||
int ccxr_is_dvdraw_header(const unsigned char *buffer, unsigned int len);
|
||||
|
||||
// Rust FFI: SCC (Scenarist Closed Caption) format processing (see src/rust/src/demuxer/scc.rs)
|
||||
unsigned int ccxr_process_scc(struct lib_cc_decode *ctx, struct cc_subtitle *sub, const unsigned char *buffer, unsigned int len, int framerate);
|
||||
int ccxr_is_scc_file(const unsigned char *buffer, unsigned int len);
|
||||
|
||||
int general_loop(struct lib_ccx_ctx *ctx);
|
||||
void process_hex(struct lib_ccx_ctx *ctx, char *filename);
|
||||
int rcwt_loop(struct lib_ccx_ctx *ctx);
|
||||
@@ -337,4 +343,9 @@ int process_non_multiprogram_general_loop(struct lib_ccx_ctx *ctx,
|
||||
void segment_output_file(struct lib_ccx_ctx *ctx, struct lib_cc_decode *dec_ctx);
|
||||
int decode_vbi(struct lib_cc_decode *dec_ctx, uint8_t field, unsigned char *buffer, size_t len, struct cc_subtitle *sub);
|
||||
|
||||
#ifndef DISABLE_RUST
|
||||
// Rust FFI function to set encoder on persistent CEA-708 decoder
|
||||
void ccxr_dtvcc_set_encoder(void *dtvcc_rust, struct encoder_ctx *encoder);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <limits.h>
|
||||
#include <assert.h>
|
||||
#include "dvb_subtitle_decoder.h"
|
||||
#include "vobsub_decoder.h"
|
||||
|
||||
void skip_bytes(FILE *file, ULLONG n)
|
||||
{
|
||||
@@ -121,6 +122,8 @@ void parse_ebml(FILE *file)
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
|
||||
switch (code)
|
||||
@@ -185,6 +188,8 @@ void parse_segment_info(FILE *file)
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
|
||||
switch (code)
|
||||
@@ -483,6 +488,8 @@ void parse_segment_cluster_block_group(struct matroska_ctx *mkv_ctx, ULLONG clus
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
|
||||
switch (code)
|
||||
@@ -611,6 +618,8 @@ void parse_segment_cluster(struct matroska_ctx *mkv_ctx)
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
|
||||
switch (code)
|
||||
@@ -733,14 +742,24 @@ int process_avc_frame_mkv(struct matroska_ctx *mkv_ctx, struct matroska_avc_fram
|
||||
{
|
||||
uint32_t nal_length;
|
||||
|
||||
nal_length = bswap32(*(long *)&frame.data[i]);
|
||||
if (i + nal_unit_size > frame.len)
|
||||
break;
|
||||
|
||||
nal_length =
|
||||
((uint32_t)frame.data[i] << 24) |
|
||||
((uint32_t)frame.data[i + 1] << 16) |
|
||||
((uint32_t)frame.data[i + 2] << 8) |
|
||||
(uint32_t)frame.data[i + 3];
|
||||
|
||||
i += nal_unit_size;
|
||||
|
||||
if (nal_length > frame.len - i)
|
||||
break;
|
||||
|
||||
if (nal_length > 0)
|
||||
do_NAL(enc_ctx, dec_ctx, (unsigned char *)&(frame.data[i]), nal_length, &mkv_ctx->dec_sub);
|
||||
do_NAL(enc_ctx, dec_ctx, (unsigned char *)&frame.data[i], nal_length, &mkv_ctx->dec_sub);
|
||||
i += nal_length;
|
||||
} // outer for
|
||||
assert(i == frame.len);
|
||||
|
||||
mkv_ctx->current_second = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
|
||||
|
||||
@@ -768,11 +787,22 @@ int process_hevc_frame_mkv(struct matroska_ctx *mkv_ctx, struct matroska_avc_fra
|
||||
{
|
||||
uint32_t nal_length;
|
||||
|
||||
nal_length = bswap32(*(long *)&frame.data[i]);
|
||||
if (i + nal_unit_size > frame.len)
|
||||
break;
|
||||
|
||||
nal_length =
|
||||
((uint32_t)frame.data[i] << 24) |
|
||||
((uint32_t)frame.data[i + 1] << 16) |
|
||||
((uint32_t)frame.data[i + 2] << 8) |
|
||||
(uint32_t)frame.data[i + 3];
|
||||
|
||||
i += nal_unit_size;
|
||||
|
||||
if (nal_length > frame.len - i)
|
||||
break;
|
||||
|
||||
if (nal_length > 0)
|
||||
do_NAL(enc_ctx, dec_ctx, (unsigned char *)&(frame.data[i]), nal_length, &mkv_ctx->dec_sub);
|
||||
do_NAL(enc_ctx, dec_ctx, (unsigned char *)&frame.data[i], nal_length, &mkv_ctx->dec_sub);
|
||||
i += nal_length;
|
||||
}
|
||||
|
||||
@@ -844,6 +874,8 @@ void parse_segment_track_entry(struct matroska_ctx *mkv_ctx)
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
|
||||
switch (code)
|
||||
@@ -1172,7 +1204,7 @@ void parse_private_codec_data(struct matroska_ctx *mkv_ctx, char *codec_id_strin
|
||||
memset((void *)&cnf, 0, sizeof(struct dvb_config));
|
||||
|
||||
parse_dvb_description(&cnf, codec_data, 8);
|
||||
dec_ctx->private_data = dvbsub_init_decoder(&cnf, 0);
|
||||
dec_ctx->private_data = dvbsub_init_decoder(&cnf);
|
||||
|
||||
free(codec_data);
|
||||
}
|
||||
@@ -1196,6 +1228,8 @@ void parse_segment_tracks(struct matroska_ctx *mkv_ctx)
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
|
||||
switch (code)
|
||||
@@ -1240,6 +1274,8 @@ void parse_segment(struct matroska_ctx *mkv_ctx)
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
switch (code)
|
||||
{
|
||||
@@ -1334,11 +1370,362 @@ char *ass_ssa_sentence_erase_read_order(char *text)
|
||||
return buf;
|
||||
}
|
||||
|
||||
/* VOBSUB support: Generate PS Pack header
|
||||
* The PS Pack header is 14 bytes:
|
||||
* - 4 bytes: start code (00 00 01 ba)
|
||||
* - 6 bytes: SCR (System Clock Reference) in MPEG-2 format
|
||||
* - 3 bytes: mux rate
|
||||
* - 1 byte: stuffing length (0)
|
||||
*/
|
||||
static void generate_ps_pack_header(unsigned char *buf, ULLONG pts_90khz)
|
||||
{
|
||||
// PS Pack start code
|
||||
buf[0] = 0x00;
|
||||
buf[1] = 0x00;
|
||||
buf[2] = 0x01;
|
||||
buf[3] = 0xBA;
|
||||
|
||||
// SCR (System Clock Reference) - use PTS as SCR base, SCR extension = 0
|
||||
// MPEG-2 format: 01 SCR[32:30] 1 SCR[29:15] 1 SCR[14:0] 1 SCR_ext[8:0] 1
|
||||
ULLONG scr = pts_90khz;
|
||||
ULLONG scr_base = scr;
|
||||
int scr_ext = 0;
|
||||
|
||||
buf[4] = 0x44 | ((scr_base >> 27) & 0x38) | ((scr_base >> 28) & 0x03);
|
||||
buf[5] = (scr_base >> 20) & 0xFF;
|
||||
buf[6] = 0x04 | ((scr_base >> 12) & 0xF8) | ((scr_base >> 13) & 0x03);
|
||||
buf[7] = (scr_base >> 5) & 0xFF;
|
||||
buf[8] = 0x04 | ((scr_base << 3) & 0xF8) | ((scr_ext >> 7) & 0x03);
|
||||
buf[9] = ((scr_ext << 1) & 0xFE) | 0x01;
|
||||
|
||||
// Mux rate (10080 = standard DVD rate)
|
||||
int mux_rate = 10080;
|
||||
buf[10] = (mux_rate >> 14) & 0xFF;
|
||||
buf[11] = (mux_rate >> 6) & 0xFF;
|
||||
buf[12] = ((mux_rate << 2) & 0xFC) | 0x03;
|
||||
|
||||
// Stuffing length = 0, with marker bits
|
||||
buf[13] = 0xF8;
|
||||
}
|
||||
|
||||
/* VOBSUB support: Generate PES header for private stream 1
|
||||
* Returns the total header size (variable based on PTS)
|
||||
*/
|
||||
static int generate_pes_header(unsigned char *buf, ULLONG pts_90khz, int payload_size, int stream_id)
|
||||
{
|
||||
// PES start code for private stream 1
|
||||
buf[0] = 0x00;
|
||||
buf[1] = 0x00;
|
||||
buf[2] = 0x01;
|
||||
buf[3] = 0xBD; // Private stream 1
|
||||
|
||||
// PES packet length = header data (3 + 5 for PTS) + 1 (substream ID) + payload
|
||||
int pes_header_data_len = 5; // PTS only
|
||||
int pes_packet_len = 3 + pes_header_data_len + 1 + payload_size;
|
||||
buf[4] = (pes_packet_len >> 8) & 0xFF;
|
||||
buf[5] = pes_packet_len & 0xFF;
|
||||
|
||||
// PES flags: MPEG-2, original
|
||||
buf[6] = 0x81;
|
||||
// PTS_DTS_flags = 10 (PTS only)
|
||||
buf[7] = 0x80;
|
||||
// PES header data length
|
||||
buf[8] = pes_header_data_len;
|
||||
|
||||
// PTS (5 bytes): '0010' | PTS[32:30] | '1' | PTS[29:15] | '1' | PTS[14:0] | '1'
|
||||
buf[9] = 0x21 | ((pts_90khz >> 29) & 0x0E);
|
||||
buf[10] = (pts_90khz >> 22) & 0xFF;
|
||||
buf[11] = 0x01 | ((pts_90khz >> 14) & 0xFE);
|
||||
buf[12] = (pts_90khz >> 7) & 0xFF;
|
||||
buf[13] = 0x01 | ((pts_90khz << 1) & 0xFE);
|
||||
|
||||
// Substream ID (0x20 = first VOBSUB stream)
|
||||
buf[14] = 0x20 + stream_id;
|
||||
|
||||
return 15; // Total PES header size
|
||||
}
|
||||
|
||||
/* VOBSUB support: Generate timestamp string for .idx file
|
||||
* Format: HH:MM:SS:mmm (where mmm is milliseconds)
|
||||
*/
|
||||
static void generate_vobsub_timestamp(char *buf, size_t bufsize, ULLONG milliseconds)
|
||||
{
|
||||
ULLONG ms = milliseconds % 1000;
|
||||
milliseconds /= 1000;
|
||||
ULLONG seconds = milliseconds % 60;
|
||||
milliseconds /= 60;
|
||||
ULLONG minutes = milliseconds % 60;
|
||||
milliseconds /= 60;
|
||||
ULLONG hours = milliseconds;
|
||||
|
||||
snprintf(buf, bufsize, "%02" LLU_M ":%02" LLU_M ":%02" LLU_M ":%03" LLU_M,
|
||||
hours, minutes, seconds, ms);
|
||||
}
|
||||
|
||||
/* Check if output format is text-based (requires OCR for bitmap subtitles) */
|
||||
static int is_text_output_format(enum ccx_output_format format)
|
||||
{
|
||||
return (format == CCX_OF_SRT || format == CCX_OF_SSA ||
|
||||
format == CCX_OF_WEBVTT || format == CCX_OF_TRANSCRIPT ||
|
||||
format == CCX_OF_SAMI || format == CCX_OF_SMPTETT);
|
||||
}
|
||||
|
||||
/* VOBSUB support: Process VOBSUB track with OCR and output text format */
|
||||
static void process_vobsub_track_ocr(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *track)
|
||||
{
|
||||
if (track->sentence_count == 0)
|
||||
{
|
||||
mprint("\nNo VOBSUB subtitles to process");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check if OCR is available */
|
||||
if (!vobsub_ocr_available())
|
||||
{
|
||||
fatal(EXIT_NOT_CLASSIFIED,
|
||||
"VOBSUB to text conversion requires OCR support.\n"
|
||||
"Please rebuild CCExtractor with -DWITH_OCR=ON or use raw output (--out=idx)");
|
||||
}
|
||||
|
||||
/* Initialize VOBSUB decoder */
|
||||
struct vobsub_ctx *vob_ctx = init_vobsub_decoder();
|
||||
if (!vob_ctx)
|
||||
{
|
||||
fatal(EXIT_NOT_CLASSIFIED,
|
||||
"VOBSUB to text conversion requires OCR, but initialization failed.\n"
|
||||
"Please ensure Tesseract is installed with language data.");
|
||||
}
|
||||
|
||||
/* Parse palette from track header (CodecPrivate) */
|
||||
if (track->header)
|
||||
{
|
||||
vobsub_parse_palette(vob_ctx, track->header);
|
||||
}
|
||||
|
||||
mprint("\nProcessing VOBSUB track with OCR (%d subtitles)", track->sentence_count);
|
||||
|
||||
/* Get encoder context for output */
|
||||
struct encoder_ctx *enc_ctx = update_encoder_list(mkv_ctx->ctx);
|
||||
|
||||
/* Process each subtitle */
|
||||
for (int i = 0; i < track->sentence_count; i++)
|
||||
{
|
||||
struct matroska_sub_sentence *sentence = track->sentences[i];
|
||||
mkv_ctx->sentence_count++;
|
||||
|
||||
/* Calculate end time (use next subtitle start if not specified) */
|
||||
ULLONG end_time = sentence->time_end;
|
||||
if (end_time == 0 && i + 1 < track->sentence_count)
|
||||
{
|
||||
end_time = track->sentences[i + 1]->time_start - 1;
|
||||
}
|
||||
else if (end_time == 0)
|
||||
{
|
||||
end_time = sentence->time_start + 5000; /* Default 5 second duration */
|
||||
}
|
||||
|
||||
/* Decode SPU and run OCR */
|
||||
struct cc_subtitle sub;
|
||||
memset(&sub, 0, sizeof(sub));
|
||||
|
||||
int ret = vobsub_decode_spu(vob_ctx,
|
||||
(unsigned char *)sentence->text,
|
||||
sentence->text_size,
|
||||
sentence->time_start,
|
||||
end_time,
|
||||
&sub);
|
||||
|
||||
if (ret == 0 && sub.got_output)
|
||||
{
|
||||
/* Encode the subtitle to output format */
|
||||
encode_sub(enc_ctx, &sub);
|
||||
|
||||
/* Free subtitle data */
|
||||
if (sub.data)
|
||||
{
|
||||
struct cc_bitmap *rect = (struct cc_bitmap *)sub.data;
|
||||
for (int j = 0; j < sub.nb_data; j++)
|
||||
{
|
||||
if (rect[j].data0)
|
||||
free(rect[j].data0);
|
||||
if (rect[j].data1)
|
||||
free(rect[j].data1);
|
||||
#ifdef ENABLE_OCR
|
||||
if (rect[j].ocr_text)
|
||||
free(rect[j].ocr_text);
|
||||
#endif
|
||||
}
|
||||
free(sub.data);
|
||||
}
|
||||
}
|
||||
|
||||
/* Progress indicator */
|
||||
if ((i + 1) % 50 == 0 || i + 1 == track->sentence_count)
|
||||
{
|
||||
mprint("\rProcessing VOBSUB: %d/%d subtitles", i + 1, track->sentence_count);
|
||||
}
|
||||
}
|
||||
|
||||
delete_vobsub_decoder(&vob_ctx);
|
||||
mprint("\nVOBSUB OCR processing complete");
|
||||
}
|
||||
|
||||
/* VOBSUB support: Save VOBSUB track to .idx and .sub files */
|
||||
#define VOBSUB_BLOCK_SIZE 2048
|
||||
static void save_vobsub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *track)
|
||||
{
|
||||
if (track->sentence_count == 0)
|
||||
{
|
||||
mprint("\nNo VOBSUB subtitles to write");
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate base filename (without extension)
|
||||
const char *lang_to_use = track->lang_ietf ? track->lang_ietf : track->lang;
|
||||
const char *basename = get_basename(mkv_ctx->filename);
|
||||
size_t needed = strlen(basename) + strlen(lang_to_use) + 32;
|
||||
char *base_filename = malloc(needed);
|
||||
if (base_filename == NULL)
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "In save_vobsub_track: Out of memory.");
|
||||
|
||||
if (track->lang_index == 0)
|
||||
snprintf(base_filename, needed, "%s_%s", basename, lang_to_use);
|
||||
else
|
||||
snprintf(base_filename, needed, "%s_%s_" LLD, basename, lang_to_use, track->lang_index);
|
||||
|
||||
// Create .sub filename
|
||||
char *sub_filename = malloc(needed + 5);
|
||||
if (sub_filename == NULL)
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "In save_vobsub_track: Out of memory.");
|
||||
snprintf(sub_filename, needed + 5, "%s.sub", base_filename);
|
||||
|
||||
// Create .idx filename
|
||||
char *idx_filename = malloc(needed + 5);
|
||||
if (idx_filename == NULL)
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "In save_vobsub_track: Out of memory.");
|
||||
snprintf(idx_filename, needed + 5, "%s.idx", base_filename);
|
||||
|
||||
mprint("\nOutput files: %s, %s", idx_filename, sub_filename);
|
||||
|
||||
// Open .sub file
|
||||
int sub_desc;
|
||||
#ifdef WIN32
|
||||
sub_desc = open(sub_filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IREAD | S_IWRITE);
|
||||
#else
|
||||
sub_desc = open(sub_filename, O_WRONLY | O_CREAT | O_TRUNC, S_IWUSR | S_IRUSR);
|
||||
#endif
|
||||
if (sub_desc < 0)
|
||||
{
|
||||
mprint("\nError: Cannot create .sub file");
|
||||
free(base_filename);
|
||||
free(sub_filename);
|
||||
free(idx_filename);
|
||||
return;
|
||||
}
|
||||
|
||||
// Open .idx file
|
||||
int idx_desc;
|
||||
#ifdef WIN32
|
||||
idx_desc = open(idx_filename, O_WRONLY | O_CREAT | O_TRUNC, S_IREAD | S_IWRITE);
|
||||
#else
|
||||
idx_desc = open(idx_filename, O_WRONLY | O_CREAT | O_TRUNC, S_IWUSR | S_IRUSR);
|
||||
#endif
|
||||
if (idx_desc < 0)
|
||||
{
|
||||
mprint("\nError: Cannot create .idx file");
|
||||
close(sub_desc);
|
||||
free(base_filename);
|
||||
free(sub_filename);
|
||||
free(idx_filename);
|
||||
return;
|
||||
}
|
||||
|
||||
// Write .idx header (from CodecPrivate)
|
||||
if (track->header != NULL)
|
||||
write_wrapped(idx_desc, track->header, strlen(track->header));
|
||||
|
||||
// Add language identifier line
|
||||
char lang_line[128];
|
||||
snprintf(lang_line, sizeof(lang_line), "\nid: %s, index: 0\n", lang_to_use);
|
||||
write_wrapped(idx_desc, lang_line, strlen(lang_line));
|
||||
|
||||
// Buffer for PS/PES headers and padding
|
||||
unsigned char header_buf[32];
|
||||
unsigned char zero_buf[VOBSUB_BLOCK_SIZE];
|
||||
memset(zero_buf, 0, VOBSUB_BLOCK_SIZE);
|
||||
|
||||
ULLONG file_pos = 0;
|
||||
|
||||
// Write each subtitle
|
||||
for (int i = 0; i < track->sentence_count; i++)
|
||||
{
|
||||
struct matroska_sub_sentence *sentence = track->sentences[i];
|
||||
mkv_ctx->sentence_count++;
|
||||
|
||||
// Convert timestamp to 90kHz PTS
|
||||
ULLONG pts_90khz = sentence->time_start * 90;
|
||||
|
||||
// Write timestamp entry to .idx
|
||||
char timestamp[32];
|
||||
generate_vobsub_timestamp(timestamp, sizeof(timestamp), sentence->time_start);
|
||||
char idx_entry[128];
|
||||
snprintf(idx_entry, sizeof(idx_entry), "timestamp: %s, filepos: %09" LLX_M "\n",
|
||||
timestamp, file_pos);
|
||||
write_wrapped(idx_desc, idx_entry, strlen(idx_entry));
|
||||
|
||||
// Generate PS Pack header (14 bytes)
|
||||
generate_ps_pack_header(header_buf, pts_90khz);
|
||||
write_wrapped(sub_desc, (char *)header_buf, 14);
|
||||
|
||||
// Generate PES header (15 bytes)
|
||||
int pes_header_len = generate_pes_header(header_buf, pts_90khz, sentence->text_size, 0);
|
||||
write_wrapped(sub_desc, (char *)header_buf, pes_header_len);
|
||||
|
||||
// Write SPU data
|
||||
write_wrapped(sub_desc, sentence->text, sentence->text_size);
|
||||
|
||||
// Calculate bytes written and pad to block boundary
|
||||
ULLONG bytes_written = 14 + pes_header_len + sentence->text_size;
|
||||
ULLONG padding_needed = VOBSUB_BLOCK_SIZE - (bytes_written % VOBSUB_BLOCK_SIZE);
|
||||
if (padding_needed < VOBSUB_BLOCK_SIZE)
|
||||
{
|
||||
write_wrapped(sub_desc, (char *)zero_buf, padding_needed);
|
||||
bytes_written += padding_needed;
|
||||
}
|
||||
|
||||
file_pos += bytes_written;
|
||||
}
|
||||
|
||||
close(sub_desc);
|
||||
close(idx_desc);
|
||||
free(base_filename);
|
||||
free(sub_filename);
|
||||
free(idx_filename);
|
||||
}
|
||||
|
||||
void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *track)
|
||||
{
|
||||
char *filename;
|
||||
int desc;
|
||||
|
||||
// VOBSUB tracks need special handling
|
||||
if (track->codec_id == MATROSKA_TRACK_SUBTITLE_CODEC_ID_VOBSUB)
|
||||
{
|
||||
// Check if user wants text output (SRT, SSA, WebVTT, etc.)
|
||||
if (ccx_options.write_format_rewritten &&
|
||||
is_text_output_format(ccx_options.enc_cfg.write_format))
|
||||
{
|
||||
// Use OCR to convert VOBSUB to text
|
||||
process_vobsub_track_ocr(mkv_ctx, track);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Output raw idx/sub files
|
||||
save_vobsub_track(mkv_ctx, track);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (mkv_ctx->ctx->cc_to_stdout == CCX_TRUE)
|
||||
{
|
||||
desc = 1; // file descriptor of stdout
|
||||
@@ -1358,11 +1745,6 @@ void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *tra
|
||||
if (track->header != NULL)
|
||||
write_wrapped(desc, track->header, strlen(track->header));
|
||||
|
||||
if (track->codec_id == MATROSKA_TRACK_SUBTITLE_CODEC_ID_VOBSUB)
|
||||
{
|
||||
mprint("\nError: VOBSUB not supported");
|
||||
}
|
||||
|
||||
for (int i = 0; i < track->sentence_count; i++)
|
||||
{
|
||||
struct matroska_sub_sentence *sentence = track->sentences[i];
|
||||
@@ -1497,10 +1879,6 @@ void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *tra
|
||||
free(timestamp_start);
|
||||
free(timestamp_end);
|
||||
}
|
||||
else if (track->codec_id == MATROSKA_TRACK_SUBTITLE_CODEC_ID_VOBSUB)
|
||||
{
|
||||
// TODO: Add support for VOBSUB
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1572,6 +1950,9 @@ void matroska_parse(struct matroska_ctx *mkv_ctx)
|
||||
{
|
||||
code <<= 8;
|
||||
code += mkv_read_byte(file);
|
||||
// Check for EOF after reading - feof() is only set after a failed read
|
||||
if (feof(file))
|
||||
break;
|
||||
code_len++;
|
||||
|
||||
switch (code)
|
||||
@@ -1623,8 +2004,13 @@ int matroska_loop(struct lib_ccx_ctx *ctx)
|
||||
{
|
||||
if (ccx_options.write_format_rewritten)
|
||||
{
|
||||
mprint(MATROSKA_WARNING "You are using --out=<format>, but Matroska parser extract subtitles in a recorded format\n");
|
||||
mprint("--out=<format> will be ignored\n");
|
||||
/* Note: For VOBSUB tracks, text output formats (SRT, SSA, etc.) are
|
||||
* supported via OCR. For other subtitle types, the native format is used. */
|
||||
if (!is_text_output_format(ccx_options.enc_cfg.write_format))
|
||||
{
|
||||
mprint(MATROSKA_WARNING "You are using --out=<format>, but Matroska parser extracts subtitles in their recorded format\n");
|
||||
mprint("--out=<format> will be ignored for non-VOBSUB tracks\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Don't need generated input file
|
||||
|
||||
@@ -5,26 +5,31 @@
|
||||
#if (defined(WIN32) || defined(_WIN32_WCE)) && (defined(__MINGW32__) || !defined(__GNUC__))
|
||||
#define LLD_M "I64d"
|
||||
#define LLU_M "I64u"
|
||||
#define LLX_M "I64x"
|
||||
#define LLD "%I64d"
|
||||
#define LLU "%I64u"
|
||||
#elif defined(__SYMBIAN32__)
|
||||
#define LLD_M "d"
|
||||
#define LLU_M "u"
|
||||
#define LLX_M "x"
|
||||
#define LLD "%d"
|
||||
#define LLU "%u"
|
||||
#elif defined(__DARWIN__) || defined(__APPLE__)
|
||||
#define LLD_M "lld"
|
||||
#define LLU_M "llu"
|
||||
#define LLX_M "llx"
|
||||
#define LLD "%lld"
|
||||
#define LLU "%llu"
|
||||
#elif defined(_LP64) /* Unix 64 bits */
|
||||
#define LLD_M "ld"
|
||||
#define LLU_M "lu"
|
||||
#define LLX_M "lx"
|
||||
#define LLD "%ld"
|
||||
#define LLU "%lu"
|
||||
#else /* Unix 32 bits */
|
||||
#define LLD_M "lld"
|
||||
#define LLU_M "llu"
|
||||
#define LLX_M "llx"
|
||||
#define LLD "%lld"
|
||||
#define LLU "%llu"
|
||||
#endif
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "ccx_mp4.h"
|
||||
#include "activity.h"
|
||||
#include "ccx_dtvcc.h"
|
||||
#include "vobsub_decoder.h"
|
||||
|
||||
#define MEDIA_TYPE(type, subtype) (((u64)(type) << 32) + (subtype))
|
||||
|
||||
@@ -25,15 +26,22 @@
|
||||
#define GF_ISOM_SUBTYPE_HVC1 GF_4CC('h', 'v', 'c', '1')
|
||||
#endif
|
||||
|
||||
static short bswap16(short v)
|
||||
// VOBSUB subtype (mp4s or MPEG)
|
||||
#ifndef GF_ISOM_SUBTYPE_MPEG4
|
||||
#define GF_ISOM_SUBTYPE_MPEG4 GF_4CC('M', 'P', 'E', 'G')
|
||||
#endif
|
||||
|
||||
static int16_t bswap16(int16_t v)
|
||||
{
|
||||
return ((v >> 8) & 0x00FF) | ((v << 8) & 0xFF00);
|
||||
}
|
||||
|
||||
static long bswap32(long v)
|
||||
static int32_t bswap32(int32_t v)
|
||||
{
|
||||
// For 0x12345678 returns 78563412
|
||||
long swapped = ((v & 0xFF) << 24) | ((v & 0xFF00) << 8) | ((v & 0xFF0000) >> 8) | ((v & 0xFF000000) >> 24);
|
||||
// Use int32_t instead of long for consistent behavior across platforms
|
||||
// (long is 4 bytes on Windows x64 but 8 bytes on Linux x64)
|
||||
int32_t swapped = ((v & 0xFF) << 24) | ((v & 0xFF00) << 8) | ((v & 0xFF0000) >> 8) | ((v & 0xFF000000) >> 24);
|
||||
return swapped;
|
||||
}
|
||||
static struct
|
||||
@@ -76,10 +84,10 @@ static int process_avc_sample(struct lib_ccx_ctx *ctx, u32 timescale, GF_AVCConf
|
||||
nal_length = s->data[i];
|
||||
break;
|
||||
case 2:
|
||||
nal_length = bswap16(*(short *)&s->data[i]);
|
||||
nal_length = bswap16(*(int16_t *)&s->data[i]);
|
||||
break;
|
||||
case 4:
|
||||
nal_length = bswap32(*(long *)&s->data[i]);
|
||||
nal_length = bswap32(*(int32_t *)&s->data[i]);
|
||||
break;
|
||||
}
|
||||
const u32 previous_index = i;
|
||||
@@ -145,10 +153,10 @@ static int process_hevc_sample(struct lib_ccx_ctx *ctx, u32 timescale, GF_HEVCCo
|
||||
nal_length = s->data[i];
|
||||
break;
|
||||
case 2:
|
||||
nal_length = bswap16(*(short *)&s->data[i]);
|
||||
nal_length = bswap16(*(int16_t *)&s->data[i]);
|
||||
break;
|
||||
case 4:
|
||||
nal_length = bswap32(*(long *)&s->data[i]);
|
||||
nal_length = bswap32(*(int32_t *)&s->data[i]);
|
||||
break;
|
||||
default:
|
||||
mprint("Unexpected nal_unit_size %u in HEVC config\n", c->nal_unit_size);
|
||||
@@ -202,6 +210,13 @@ static int process_xdvb_track(struct lib_ccx_ctx *ctx, const char *basename, GF_
|
||||
|
||||
dec_ctx = update_decoder_list(ctx);
|
||||
enc_ctx = update_encoder_list(ctx);
|
||||
|
||||
// Set buffer data type to CCX_PES for MP4/MOV MPEG-2 tracks.
|
||||
// This ensures cb_field counters are not incremented in do_cb(),
|
||||
// which is correct because container formats associate captions
|
||||
// with the frame's PTS directly.
|
||||
dec_ctx->in_bufferdatatype = CCX_PES;
|
||||
|
||||
if ((sample_count = gf_isom_get_sample_count(f, track)) < 1)
|
||||
{
|
||||
return 0;
|
||||
@@ -249,6 +264,12 @@ static int process_avc_track(struct lib_ccx_ctx *ctx, const char *basename, GF_I
|
||||
|
||||
dec_ctx = update_decoder_list(ctx);
|
||||
|
||||
// Set buffer data type to CCX_H264 for MP4/MOV AVC tracks.
|
||||
// This ensures cb_field counters are not incremented in do_cb(),
|
||||
// which is correct because container formats associate captions
|
||||
// with the frame's PTS directly.
|
||||
dec_ctx->in_bufferdatatype = CCX_H264;
|
||||
|
||||
if ((sample_count = gf_isom_get_sample_count(f, track)) < 1)
|
||||
{
|
||||
return 0;
|
||||
@@ -326,6 +347,12 @@ static int process_hevc_track(struct lib_ccx_ctx *ctx, const char *basename, GF_
|
||||
// Enable HEVC mode
|
||||
dec_ctx->avc_ctx->is_hevc = 1;
|
||||
|
||||
// Set buffer data type to CCX_H264 for MP4/MOV HEVC tracks.
|
||||
// This ensures cb_field counters are not incremented in do_cb(),
|
||||
// which is correct because container formats associate captions
|
||||
// with the frame's PTS directly.
|
||||
dec_ctx->in_bufferdatatype = CCX_H264;
|
||||
|
||||
if ((sample_count = gf_isom_get_sample_count(f, track)) < 1)
|
||||
{
|
||||
return 0;
|
||||
@@ -391,6 +418,144 @@ static int process_hevc_track(struct lib_ccx_ctx *ctx, const char *basename, GF_
|
||||
return status;
|
||||
}
|
||||
|
||||
static int process_vobsub_track(struct lib_ccx_ctx *ctx, GF_ISOFile *f, u32 track, struct cc_subtitle *sub)
|
||||
{
|
||||
u32 timescale, i, sample_count;
|
||||
int status = 0;
|
||||
struct lib_cc_decode *dec_ctx = NULL;
|
||||
struct encoder_ctx *enc_ctx = NULL;
|
||||
struct vobsub_ctx *vob_ctx = NULL;
|
||||
|
||||
dec_ctx = update_decoder_list(ctx);
|
||||
enc_ctx = update_encoder_list(ctx);
|
||||
|
||||
if ((sample_count = gf_isom_get_sample_count(f, track)) < 1)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
timescale = gf_isom_get_media_timescale(f, track);
|
||||
|
||||
/* Check if OCR is available */
|
||||
if (!vobsub_ocr_available())
|
||||
{
|
||||
fatal(EXIT_NOT_CLASSIFIED,
|
||||
"VOBSUB to text conversion requires OCR support.\n"
|
||||
"Please rebuild CCExtractor with -DWITH_OCR=ON");
|
||||
}
|
||||
|
||||
/* Initialize VOBSUB decoder */
|
||||
vob_ctx = init_vobsub_decoder();
|
||||
if (!vob_ctx)
|
||||
{
|
||||
fatal(EXIT_NOT_CLASSIFIED,
|
||||
"VOBSUB decoder initialization failed.\n"
|
||||
"Please ensure Tesseract is installed with language data.");
|
||||
}
|
||||
|
||||
/* Try to get decoder config for palette info */
|
||||
GF_GenericSampleDescription *gdesc = gf_isom_get_generic_sample_description(f, track, 1);
|
||||
if (gdesc && gdesc->extension_buf && gdesc->extension_buf_size > 0)
|
||||
{
|
||||
/* The extension buffer may contain an idx-like header with palette */
|
||||
char *header = malloc(gdesc->extension_buf_size + 1);
|
||||
if (header)
|
||||
{
|
||||
memcpy(header, gdesc->extension_buf, gdesc->extension_buf_size);
|
||||
header[gdesc->extension_buf_size] = '\0';
|
||||
vobsub_parse_palette(vob_ctx, header);
|
||||
free(header);
|
||||
}
|
||||
}
|
||||
if (gdesc)
|
||||
free(gdesc);
|
||||
|
||||
mprint("Processing VOBSUB track (%u samples)\n", sample_count);
|
||||
|
||||
for (i = 0; i < sample_count; i++)
|
||||
{
|
||||
u32 sdi;
|
||||
GF_ISOSample *s = gf_isom_get_sample(f, track, i + 1, &sdi);
|
||||
|
||||
if (s != NULL)
|
||||
{
|
||||
s32 signed_cts = (s32)s->CTS_Offset;
|
||||
LLONG start_time_ms = (LLONG)((s->DTS + signed_cts) * 1000) / timescale;
|
||||
|
||||
/* Calculate end time from next sample if available */
|
||||
LLONG end_time_ms = 0;
|
||||
if (i + 1 < sample_count)
|
||||
{
|
||||
u32 next_sdi;
|
||||
GF_ISOSample *next_s = gf_isom_get_sample(f, track, i + 2, &next_sdi);
|
||||
if (next_s)
|
||||
{
|
||||
s32 next_signed_cts = (s32)next_s->CTS_Offset;
|
||||
end_time_ms = (LLONG)((next_s->DTS + next_signed_cts) * 1000) / timescale;
|
||||
gf_isom_sample_del(&next_s);
|
||||
}
|
||||
}
|
||||
if (end_time_ms == 0)
|
||||
end_time_ms = start_time_ms + 5000; /* Default 5 second duration */
|
||||
|
||||
set_current_pts(dec_ctx->timing, (s->DTS + signed_cts) * MPEG_CLOCK_FREQ / timescale);
|
||||
set_fts(dec_ctx->timing);
|
||||
|
||||
/* Decode SPU and run OCR */
|
||||
struct cc_subtitle vob_sub;
|
||||
memset(&vob_sub, 0, sizeof(vob_sub));
|
||||
|
||||
int ret = vobsub_decode_spu(vob_ctx,
|
||||
(unsigned char *)s->data, s->dataLength,
|
||||
start_time_ms, end_time_ms,
|
||||
&vob_sub);
|
||||
|
||||
if (ret == 0 && vob_sub.got_output)
|
||||
{
|
||||
/* Encode the subtitle to output format */
|
||||
encode_sub(enc_ctx, &vob_sub);
|
||||
sub->got_output = 1;
|
||||
|
||||
/* Free subtitle data */
|
||||
if (vob_sub.data)
|
||||
{
|
||||
struct cc_bitmap *rect = (struct cc_bitmap *)vob_sub.data;
|
||||
for (int j = 0; j < vob_sub.nb_data; j++)
|
||||
{
|
||||
if (rect[j].data0)
|
||||
free(rect[j].data0);
|
||||
if (rect[j].data1)
|
||||
free(rect[j].data1);
|
||||
#ifdef ENABLE_OCR
|
||||
if (rect[j].ocr_text)
|
||||
free(rect[j].ocr_text);
|
||||
#endif
|
||||
}
|
||||
free(vob_sub.data);
|
||||
}
|
||||
}
|
||||
|
||||
gf_isom_sample_del(&s);
|
||||
}
|
||||
|
||||
int progress = (int)((i * 100) / sample_count);
|
||||
if (ctx->last_reported_progress != progress)
|
||||
{
|
||||
int cur_sec = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
|
||||
activity_progress(progress, cur_sec / 60, cur_sec % 60);
|
||||
ctx->last_reported_progress = progress;
|
||||
}
|
||||
}
|
||||
|
||||
int cur_sec = (int)(get_fts(dec_ctx->timing, dec_ctx->current_field) / 1000);
|
||||
activity_progress(100, cur_sec / 60, cur_sec % 60);
|
||||
|
||||
delete_vobsub_decoder(&vob_ctx);
|
||||
mprint("VOBSUB processing complete\n");
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static char *format_duration(u64 dur, u32 timescale, char *szDur, size_t szDur_size)
|
||||
{
|
||||
u32 h, m, s, ms;
|
||||
@@ -584,7 +749,11 @@ static int process_clcp(struct lib_ccx_ctx *ctx, struct encoder_ctx *enc_ctx,
|
||||
dbg_print(CCX_DMT_PARSE, "MP4-708: atom skipped (cc_type < 2)\n");
|
||||
continue;
|
||||
}
|
||||
#ifndef DISABLE_RUST
|
||||
ccxr_dtvcc_process_data(dec_ctx->dtvcc_rust, cc_valid, cc_type, temp[2], temp[3]);
|
||||
#else
|
||||
dtvcc_process_data(dec_ctx->dtvcc, (unsigned char *)temp);
|
||||
#endif
|
||||
cb_708++;
|
||||
}
|
||||
if (ctx->write_format == CCX_OF_MCC)
|
||||
@@ -722,10 +891,19 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
|
||||
if (enc_ctx)
|
||||
enc_ctx->timing = dec_ctx->timing;
|
||||
|
||||
// WARN: otherwise cea-708 will not work
|
||||
// WARN: otherwise cea-708 will not work
|
||||
#ifndef DISABLE_RUST
|
||||
ccxr_dtvcc_set_encoder(dec_ctx->dtvcc_rust, enc_ctx);
|
||||
#else
|
||||
dec_ctx->dtvcc->encoder = (void *)enc_ctx;
|
||||
#endif
|
||||
|
||||
memset(&dec_sub, 0, sizeof(dec_sub));
|
||||
if (file == NULL)
|
||||
{
|
||||
mprint("Error: NULL file path provided to processmp4\n");
|
||||
return -1;
|
||||
}
|
||||
mprint("Opening \'%s\': ", file);
|
||||
#ifdef MP4_DEBUG
|
||||
gf_log_set_tool_level(GF_LOG_CONTAINER, GF_LOG_DEBUG);
|
||||
@@ -745,6 +923,7 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
|
||||
avc_track_count = 0;
|
||||
hevc_track_count = 0;
|
||||
cc_track_count = 0;
|
||||
u32 vobsub_track_count = 0;
|
||||
|
||||
for (i = 0; i < track_count; i++)
|
||||
{
|
||||
@@ -760,9 +939,11 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
|
||||
avc_track_count++;
|
||||
if (type == GF_ISOM_MEDIA_VISUAL && (subtype == GF_ISOM_SUBTYPE_HEV1 || subtype == GF_ISOM_SUBTYPE_HVC1))
|
||||
hevc_track_count++;
|
||||
if (type == GF_ISOM_MEDIA_SUBPIC && subtype == GF_ISOM_SUBTYPE_MPEG4)
|
||||
vobsub_track_count++;
|
||||
}
|
||||
|
||||
mprint("MP4: found %u tracks: %u avc, %u hevc and %u cc\n", track_count, avc_track_count, hevc_track_count, cc_track_count);
|
||||
mprint("MP4: found %u tracks: %u avc, %u hevc, %u cc, %u vobsub\n", track_count, avc_track_count, hevc_track_count, cc_track_count, vobsub_track_count);
|
||||
|
||||
for (i = 0; i < track_count; i++)
|
||||
{
|
||||
@@ -880,6 +1061,24 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
|
||||
}
|
||||
break;
|
||||
|
||||
case MEDIA_TYPE(GF_ISOM_MEDIA_SUBPIC, GF_ISOM_SUBTYPE_MPEG4): // subp:MPEG (VOBSUB)
|
||||
// If there are multiple VOBSUB tracks, change fd for different tracks
|
||||
if (vobsub_track_count > 1)
|
||||
{
|
||||
switch_output_file(ctx, enc_ctx, i);
|
||||
}
|
||||
if (process_vobsub_track(ctx, f, i + 1, &dec_sub) != 0)
|
||||
{
|
||||
mprint("Error on process_vobsub_track()\n");
|
||||
free(dec_ctx->xds_ctx);
|
||||
return -3;
|
||||
}
|
||||
if (dec_sub.got_output)
|
||||
{
|
||||
mp4_ret = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
if (type != GF_ISOM_MEDIA_CLOSED_CAPTION && type != GF_ISOM_MEDIA_SUBT && type != GF_ISOM_MEDIA_TEXT)
|
||||
break; // ignore non cc track
|
||||
@@ -1019,9 +1218,14 @@ int processmp4(struct lib_ccx_ctx *ctx, struct ccx_s_mp4Cfg *cfg, char *file)
|
||||
mprint("Found no HEVC track(s). ");
|
||||
|
||||
if (cc_track_count)
|
||||
mprint("Found %d CC track(s).\n", cc_track_count);
|
||||
mprint("Found %d CC track(s). ", cc_track_count);
|
||||
else
|
||||
mprint("Found no dedicated CC track(s).\n");
|
||||
mprint("Found no dedicated CC track(s). ");
|
||||
|
||||
if (vobsub_track_count)
|
||||
mprint("Found %d VOBSUB track(s).\n", vobsub_track_count);
|
||||
else
|
||||
mprint("\n");
|
||||
|
||||
ctx->freport.mp4_cc_track_cnt = cc_track_count;
|
||||
|
||||
|
||||
@@ -103,7 +103,8 @@ int set_nonblocking(int fd);
|
||||
void connect_to_srv(const char *addr, const char *port, const char *cc_desc, const char *pwd)
|
||||
{
|
||||
#ifndef DISABLE_RUST
|
||||
return ccxr_connect_to_srv(addr, port, cc_desc, pwd);
|
||||
(void)ccxr_connect_to_srv(addr, port, cc_desc, pwd);
|
||||
return;
|
||||
#endif
|
||||
if (NULL == addr)
|
||||
{
|
||||
@@ -137,7 +138,8 @@ void connect_to_srv(const char *addr, const char *port, const char *cc_desc, con
|
||||
void net_send_header(const unsigned char *data, size_t len)
|
||||
{
|
||||
#ifndef DISABLE_RUST
|
||||
return ccxr_net_send_header(data, len);
|
||||
(void)ccxr_net_send_header(data, len);
|
||||
return;
|
||||
#endif
|
||||
assert(srv_sd > 0);
|
||||
|
||||
@@ -188,7 +190,8 @@ int net_send_cc(const unsigned char *data, int len, void *private_data, struct c
|
||||
void net_check_conn()
|
||||
{
|
||||
#ifndef DISABLE_RUST
|
||||
return ccxr_net_check_conn();
|
||||
ccxr_net_check_conn();
|
||||
return;
|
||||
#endif
|
||||
time_t now;
|
||||
static time_t last_ping = 0;
|
||||
@@ -252,7 +255,8 @@ void net_send_epg(
|
||||
const char *category)
|
||||
{
|
||||
#ifndef DISABLE_RUST
|
||||
return ccxr_net_send_epg(start, stop, title, desc, lang, category);
|
||||
(void)ccxr_net_send_epg(start, stop, title, desc, lang, category);
|
||||
return;
|
||||
#endif
|
||||
size_t st;
|
||||
size_t sp;
|
||||
|
||||
@@ -8,6 +8,11 @@
|
||||
#include <dirent.h>
|
||||
#include "ccx_encoders_helpers.h"
|
||||
#include "ccx_encoders_spupng.h"
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <mach-o/dyld.h>
|
||||
#endif
|
||||
#include "ocr.h"
|
||||
|
||||
struct ocrCtx
|
||||
@@ -100,6 +105,68 @@ void delete_ocr(void **arg)
|
||||
freep(arg);
|
||||
}
|
||||
|
||||
/**
|
||||
* get_executable_directory
|
||||
*
|
||||
* Returns the directory containing the executable.
|
||||
* Returns a pointer to a static buffer, or NULL on failure.
|
||||
*/
|
||||
static const char *get_executable_directory(void)
|
||||
{
|
||||
static char exe_dir[1024] = {0};
|
||||
static int initialized = 0;
|
||||
|
||||
if (initialized)
|
||||
return exe_dir[0] ? exe_dir : NULL;
|
||||
|
||||
initialized = 1;
|
||||
|
||||
#ifdef _WIN32
|
||||
char exe_path[MAX_PATH];
|
||||
DWORD len = GetModuleFileNameA(NULL, exe_path, MAX_PATH);
|
||||
if (len == 0 || len >= MAX_PATH)
|
||||
return NULL;
|
||||
|
||||
// Find the last backslash and truncate there
|
||||
char *last_sep = strrchr(exe_path, '\\');
|
||||
if (last_sep)
|
||||
{
|
||||
*last_sep = '\0';
|
||||
strncpy(exe_dir, exe_path, sizeof(exe_dir) - 1);
|
||||
exe_dir[sizeof(exe_dir) - 1] = '\0';
|
||||
}
|
||||
#elif defined(__linux__)
|
||||
char exe_path[1024];
|
||||
ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1);
|
||||
if (len <= 0)
|
||||
return NULL;
|
||||
exe_path[len] = '\0';
|
||||
|
||||
char *last_sep = strrchr(exe_path, '/');
|
||||
if (last_sep)
|
||||
{
|
||||
*last_sep = '\0';
|
||||
strncpy(exe_dir, exe_path, sizeof(exe_dir) - 1);
|
||||
exe_dir[sizeof(exe_dir) - 1] = '\0';
|
||||
}
|
||||
#elif defined(__APPLE__)
|
||||
char exe_path[1024];
|
||||
uint32_t size = sizeof(exe_path);
|
||||
if (_NSGetExecutablePath(exe_path, &size) != 0)
|
||||
return NULL;
|
||||
|
||||
char *last_sep = strrchr(exe_path, '/');
|
||||
if (last_sep)
|
||||
{
|
||||
*last_sep = '\0';
|
||||
strncpy(exe_dir, exe_path, sizeof(exe_dir) - 1);
|
||||
exe_dir[sizeof(exe_dir) - 1] = '\0';
|
||||
}
|
||||
#endif
|
||||
|
||||
return exe_dir[0] ? exe_dir : NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* probe_tessdata_location
|
||||
*
|
||||
@@ -107,8 +174,10 @@ void delete_ocr(void **arg)
|
||||
*
|
||||
* Priority of Tesseract traineddata file search paths:-
|
||||
* 1. tessdata in TESSDATA_PREFIX, if it is specified. Overrides others
|
||||
* 2. tessdata in current working directory
|
||||
* 3. tessdata in /usr/share
|
||||
* 2. tessdata in executable directory (for bundled tessdata)
|
||||
* 3. tessdata in current working directory
|
||||
* 4. tessdata in system locations (/usr/share, etc.)
|
||||
* 5. tessdata in default Tesseract install location (Windows)
|
||||
*/
|
||||
char *probe_tessdata_location(const char *lang)
|
||||
{
|
||||
@@ -116,6 +185,7 @@ char *probe_tessdata_location(const char *lang)
|
||||
|
||||
const char *paths[] = {
|
||||
getenv("TESSDATA_PREFIX"),
|
||||
get_executable_directory(),
|
||||
"./",
|
||||
"/usr/share/",
|
||||
"/usr/local/share/",
|
||||
@@ -211,6 +281,13 @@ void *init_ocr(int lang_index)
|
||||
// set PSM mode
|
||||
TessBaseAPISetPageSegMode(ctx->api, ccx_options.psm);
|
||||
|
||||
// Set character blacklist to prevent common OCR errors (e.g. | vs I)
|
||||
// These characters are rarely used in subtitles but often misrecognized
|
||||
if (ccx_options.ocr_blacklist)
|
||||
{
|
||||
TessBaseAPISetVariable(ctx->api, "tessedit_char_blacklist", "|\\`_~");
|
||||
}
|
||||
|
||||
free(pars_vec);
|
||||
free(pars_values);
|
||||
|
||||
@@ -281,6 +358,176 @@ BOX *ignore_alpha_at_edge(png_byte *alpha, unsigned char *indata, int w, int h,
|
||||
return cropWindow;
|
||||
}
|
||||
|
||||
/**
|
||||
* Structure to hold the vertical boundaries of a detected text line.
|
||||
*/
|
||||
struct line_bounds
|
||||
{
|
||||
int start_y; // Top row of line (inclusive)
|
||||
int end_y; // Bottom row of line (inclusive)
|
||||
};
|
||||
|
||||
/**
|
||||
* Detects horizontal text line boundaries in a bitmap by finding rows of
|
||||
* fully transparent pixels that separate lines of text.
|
||||
*
|
||||
* @param alpha Palette alpha values (indexed by pixel value)
|
||||
* @param indata Bitmap pixel data (palette indices, w*h bytes)
|
||||
* @param w Image width
|
||||
* @param h Image height
|
||||
* @param lines Output: allocated array of line boundaries (caller must free)
|
||||
* @param num_lines Output: number of lines found
|
||||
* @param min_gap Minimum consecutive transparent rows to count as line separator
|
||||
* @return 0 on success, -1 on failure
|
||||
*/
|
||||
static int detect_text_lines(png_byte *alpha, unsigned char *indata,
|
||||
int w, int h,
|
||||
struct line_bounds **lines, int *num_lines,
|
||||
int min_gap)
|
||||
{
|
||||
if (!alpha || !indata || !lines || !num_lines || w <= 0 || h <= 0)
|
||||
return -1;
|
||||
|
||||
*lines = NULL;
|
||||
*num_lines = 0;
|
||||
|
||||
// Allocate array to track which rows have visible content
|
||||
int *row_has_content = (int *)malloc(h * sizeof(int));
|
||||
if (!row_has_content)
|
||||
return -1;
|
||||
|
||||
// Scan each row to determine if it has any visible (non-transparent) pixels
|
||||
for (int i = 0; i < h; i++)
|
||||
{
|
||||
row_has_content[i] = 0;
|
||||
for (int j = 0; j < w; j++)
|
||||
{
|
||||
int index = indata[i * w + j];
|
||||
if (alpha[index] != 0)
|
||||
{
|
||||
row_has_content[i] = 1;
|
||||
break; // Found visible pixel, no need to check rest of row
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Count lines by finding runs of content rows separated by gaps
|
||||
int max_lines = (h / 2) + 1; // Conservative upper bound
|
||||
struct line_bounds *temp_lines = (struct line_bounds *)malloc(max_lines * sizeof(struct line_bounds));
|
||||
if (!temp_lines)
|
||||
{
|
||||
free(row_has_content);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int line_count = 0;
|
||||
int in_line = 0;
|
||||
int line_start = 0;
|
||||
int gap_count = 0;
|
||||
|
||||
for (int i = 0; i < h; i++)
|
||||
{
|
||||
if (row_has_content[i])
|
||||
{
|
||||
if (!in_line)
|
||||
{
|
||||
// Start of a new line
|
||||
line_start = i;
|
||||
in_line = 1;
|
||||
}
|
||||
gap_count = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (in_line)
|
||||
{
|
||||
gap_count++;
|
||||
if (gap_count >= min_gap)
|
||||
{
|
||||
// End of line found (gap is large enough)
|
||||
if (line_count < max_lines)
|
||||
{
|
||||
temp_lines[line_count].start_y = line_start;
|
||||
temp_lines[line_count].end_y = i - gap_count;
|
||||
line_count++;
|
||||
}
|
||||
in_line = 0;
|
||||
gap_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle last line if we ended while still in a line
|
||||
if (in_line && line_count < max_lines)
|
||||
{
|
||||
temp_lines[line_count].start_y = line_start;
|
||||
// Find the last row with content
|
||||
int last_content = h - 1;
|
||||
while (last_content > line_start && !row_has_content[last_content])
|
||||
last_content--;
|
||||
temp_lines[line_count].end_y = last_content;
|
||||
line_count++;
|
||||
}
|
||||
|
||||
free(row_has_content);
|
||||
|
||||
if (line_count == 0)
|
||||
{
|
||||
free(temp_lines);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Shrink allocation to actual size
|
||||
*lines = (struct line_bounds *)realloc(temp_lines, line_count * sizeof(struct line_bounds));
|
||||
if (!*lines)
|
||||
{
|
||||
*lines = temp_lines; // Keep original if realloc fails
|
||||
}
|
||||
*num_lines = line_count;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs OCR on a single text line image using PSM 7 (single line mode).
|
||||
*
|
||||
* @param ctx OCR context (contains Tesseract API)
|
||||
* @param line_pix Pre-processed PIX for single line (grayscale, inverted)
|
||||
* @return Recognized text (caller must free with free()), or NULL on failure
|
||||
*/
|
||||
static char *ocr_single_line(struct ocrCtx *ctx, PIX *line_pix)
|
||||
{
|
||||
if (!ctx || !ctx->api || !line_pix)
|
||||
return NULL;
|
||||
|
||||
// Save current PSM
|
||||
int saved_psm = TessBaseAPIGetPageSegMode(ctx->api);
|
||||
|
||||
// Set PSM 7 for single line recognition
|
||||
TessBaseAPISetPageSegMode(ctx->api, 7); // PSM_SINGLE_LINE
|
||||
|
||||
// Perform OCR
|
||||
TessBaseAPISetImage2(ctx->api, line_pix);
|
||||
BOOL ret = TessBaseAPIRecognize(ctx->api, NULL);
|
||||
|
||||
char *text = NULL;
|
||||
if (!ret)
|
||||
{
|
||||
char *tess_text = TessBaseAPIGetUTF8Text(ctx->api);
|
||||
if (tess_text)
|
||||
{
|
||||
text = strdup(tess_text);
|
||||
TessDeleteText(tess_text);
|
||||
}
|
||||
}
|
||||
|
||||
// Restore original PSM
|
||||
TessBaseAPISetPageSegMode(ctx->api, saved_psm);
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
void debug_tesseract(struct ocrCtx *ctx, char *dump_path)
|
||||
{
|
||||
#ifdef OCR_DEBUG
|
||||
@@ -327,6 +574,8 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
|
||||
unsigned int *data, *ppixel;
|
||||
BOOL tess_ret = FALSE;
|
||||
struct ocrCtx *ctx = arg;
|
||||
char *combined_text = NULL; // Used by line-split mode
|
||||
size_t combined_len = 0; // Used by line-split mode
|
||||
pix = pixCreate(w, h, 32);
|
||||
color_pix = pixCreate(w, h, 32);
|
||||
if (pix == NULL || color_pix == NULL)
|
||||
@@ -406,6 +655,98 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Line splitting mode: detect lines and OCR each separately with PSM 7
|
||||
if (ccx_options.ocr_line_split && h > 30)
|
||||
{
|
||||
struct line_bounds *lines = NULL;
|
||||
int num_lines = 0;
|
||||
|
||||
// Use min_gap of 3 rows to detect line boundaries
|
||||
if (detect_text_lines(alpha, indata, w, h, &lines, &num_lines, 3) == 0 && num_lines > 1)
|
||||
{
|
||||
// Multiple lines detected - process each separately with PSM 7
|
||||
// (combined_text and combined_len are declared at function scope)
|
||||
|
||||
for (int line_idx = 0; line_idx < num_lines; line_idx++)
|
||||
{
|
||||
int line_h = lines[line_idx].end_y - lines[line_idx].start_y + 1;
|
||||
if (line_h <= 0)
|
||||
continue;
|
||||
|
||||
// Extract line region from the grayscale image
|
||||
BOX *line_box = boxCreate(0, lines[line_idx].start_y,
|
||||
pixGetWidth(cpix_gs), line_h);
|
||||
PIX *line_pix_raw = pixClipRectangle(cpix_gs, line_box, NULL);
|
||||
boxDestroy(&line_box);
|
||||
|
||||
if (line_pix_raw)
|
||||
{
|
||||
// Add white padding around the line (helps Tesseract with edge characters)
|
||||
// The image is inverted (dark text on light bg), so add white (255) border
|
||||
int padding = 10;
|
||||
PIX *line_pix = pixAddBorderGeneral(line_pix_raw, padding, padding, padding, padding, 255);
|
||||
pixDestroy(&line_pix_raw);
|
||||
if (!line_pix)
|
||||
continue;
|
||||
char *line_text = ocr_single_line(ctx, line_pix);
|
||||
pixDestroy(&line_pix);
|
||||
|
||||
if (line_text)
|
||||
{
|
||||
// Trim trailing whitespace from line
|
||||
size_t line_len = strlen(line_text);
|
||||
while (line_len > 0 && (line_text[line_len - 1] == '\n' ||
|
||||
line_text[line_len - 1] == '\r' ||
|
||||
line_text[line_len - 1] == ' '))
|
||||
{
|
||||
line_text[--line_len] = '\0';
|
||||
}
|
||||
|
||||
if (line_len > 0)
|
||||
{
|
||||
// Append to combined result
|
||||
size_t new_len = combined_len + line_len + 2; // +1 for newline, +1 for null
|
||||
char *new_combined = (char *)realloc(combined_text, new_len);
|
||||
if (new_combined)
|
||||
{
|
||||
combined_text = new_combined;
|
||||
if (combined_len > 0)
|
||||
{
|
||||
combined_text[combined_len++] = '\n';
|
||||
}
|
||||
strcpy(combined_text + combined_len, line_text);
|
||||
combined_len += line_len;
|
||||
}
|
||||
}
|
||||
free(line_text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(lines);
|
||||
|
||||
if (combined_text && combined_len > 0)
|
||||
{
|
||||
// Successfully processed lines - skip whole-image OCR
|
||||
// but continue to color detection below
|
||||
goto line_split_color_detection;
|
||||
}
|
||||
|
||||
// If we got here, line splitting didn't produce results
|
||||
// Fall through to whole-image OCR
|
||||
if (combined_text)
|
||||
free(combined_text);
|
||||
combined_text = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Line detection failed or only 1 line - fall through to whole-image OCR
|
||||
if (lines)
|
||||
free(lines);
|
||||
}
|
||||
}
|
||||
|
||||
// Standard whole-image OCR path
|
||||
TessBaseAPISetImage2(ctx->api, cpix_gs);
|
||||
tess_ret = TessBaseAPIRecognize(ctx->api, NULL);
|
||||
debug_tesseract(ctx, "./temp/");
|
||||
@@ -448,6 +789,14 @@ char *ocr_bitmap(void *arg, png_color *palette, png_byte *alpha, unsigned char *
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "In ocr_bitmap: Out of memory allocating text_out.");
|
||||
}
|
||||
|
||||
// Jump target for line-split mode: use combined_text and continue with color detection
|
||||
if (0)
|
||||
{
|
||||
line_split_color_detection:
|
||||
text_out = combined_text;
|
||||
combined_text = NULL; // Transfer ownership
|
||||
}
|
||||
|
||||
// Begin color detection
|
||||
// Using tlt_config.nofontcolor or ccx_options.nofontcolor (true when "--no-fontcolor" parameter used) to skip color detection if not required
|
||||
// This is also skipped if --no-spupngocr is set since the OCR output won't be used anyway
|
||||
|
||||
@@ -14,7 +14,19 @@ void dinit_write(struct ccx_s_write *wb)
|
||||
return;
|
||||
}
|
||||
if (wb->fh > 0)
|
||||
{
|
||||
// Check if the file is empty before closing
|
||||
off_t file_size = lseek(wb->fh, 0, SEEK_END);
|
||||
close(wb->fh);
|
||||
|
||||
// Delete empty output files to avoid generating useless 0-byte files
|
||||
// This commonly happens with -12 option when one field has no captions
|
||||
if (file_size == 0 && wb->filename != NULL)
|
||||
{
|
||||
unlink(wb->filename);
|
||||
mprint("Deleted empty output file: %s\n", wb->filename);
|
||||
}
|
||||
}
|
||||
freep(&wb->filename);
|
||||
freep(&wb->original_filename);
|
||||
if (wb->with_semaphore && wb->semaphore_filename)
|
||||
|
||||
@@ -13,8 +13,11 @@
|
||||
#include "../lib_hash/sha2.h"
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#if __has_include(<utf8proc.h>)
|
||||
#include <utf8proc.h>
|
||||
#else
|
||||
#include <utf8proc/utf8proc.h>
|
||||
|
||||
#endif
|
||||
#ifdef ENABLE_OCR
|
||||
#include <tesseract/capi.h>
|
||||
#include <leptonica/allheaders.h>
|
||||
@@ -398,6 +401,13 @@ void print_usage(void)
|
||||
mprint(" 12 Sparse text with OSD.\n");
|
||||
mprint(" 13 Raw line. Treat the image as a single text line,\n");
|
||||
mprint(" bypassing hacks that are Tesseract-specific.\n");
|
||||
mprint(" --ocr-line-split: Split subtitle images into lines before OCR.\n");
|
||||
mprint(" Uses PSM 7 (single text line mode) for each line,\n");
|
||||
mprint(" which can improve accuracy for multi-line bitmap subtitles\n");
|
||||
mprint(" (VOBSUB, DVD, DVB).\n");
|
||||
mprint(" --no-ocr-blacklist: Disable the OCR character blacklist. By default,\n");
|
||||
mprint(" CCExtractor blacklists characters like |, \\, `, _, ~\n");
|
||||
mprint(" that are commonly misrecognized (e.g. 'I' as '|').\n");
|
||||
mprint(" --mkvlang: For MKV subtitles, select which language's caption\n");
|
||||
mprint(" stream will be processed. e.g. 'eng' for English.\n");
|
||||
mprint(" Language codes can be either the 3 letters bibliographic\n");
|
||||
|
||||
@@ -78,6 +78,30 @@ void detect_stream_type(struct ccx_demuxer *ctx)
|
||||
ctx->startbytes[7] == 0xf8)
|
||||
ctx->stream_mode = CCX_SM_MCPOODLESRAW;
|
||||
}
|
||||
// Check for SCC (Scenarist Closed Caption) text format
|
||||
// SCC files start with "Scenarist_SCC V1.0" (18 bytes), optionally with UTF-8 BOM (3 bytes)
|
||||
if (ctx->stream_mode == CCX_SM_ELEMENTARY_OR_NOT_FOUND)
|
||||
{
|
||||
unsigned char *check_buf = ctx->startbytes;
|
||||
int check_pos = 0;
|
||||
|
||||
// Skip UTF-8 BOM if present
|
||||
if (ctx->startbytes_avail >= 3 &&
|
||||
ctx->startbytes[0] == 0xEF &&
|
||||
ctx->startbytes[1] == 0xBB &&
|
||||
ctx->startbytes[2] == 0xBF)
|
||||
{
|
||||
check_buf += 3;
|
||||
check_pos = 3;
|
||||
}
|
||||
|
||||
if (ctx->startbytes_avail >= check_pos + 18 &&
|
||||
memcmp(check_buf, "Scenarist_SCC V1.0", 18) == 0)
|
||||
{
|
||||
ctx->stream_mode = CCX_SM_SCC;
|
||||
mprint("Detected SCC (Scenarist Closed Caption) format\n");
|
||||
}
|
||||
}
|
||||
#ifdef WTV_DEBUG
|
||||
if (ctx->stream_mode == CCX_SM_ELEMENTARY_OR_NOT_FOUND && ctx->startbytes_avail >= 6)
|
||||
{
|
||||
|
||||
@@ -434,10 +434,21 @@ void remap_g0_charset(uint8_t c)
|
||||
{
|
||||
if (c != primary_charset.current)
|
||||
{
|
||||
if (c >= 56)
|
||||
{
|
||||
fprintf(stderr, "- G0 Latin National Subset ID 0x%1x.%1x is out of bounds\n", (c >> 3), (c & 0x7));
|
||||
return;
|
||||
}
|
||||
uint8_t m = G0_LATIN_NATIONAL_SUBSETS_MAP[c];
|
||||
if (m == 0xff)
|
||||
{
|
||||
fprintf(stderr, "- G0 Latin National Subset ID 0x%1x.%1x is not implemented\n", (c >> 3), (c & 0x7));
|
||||
return;
|
||||
}
|
||||
else if (m >= 14)
|
||||
{
|
||||
fprintf(stderr, "- G0 Latin National Subset index %d is out of bounds\n", m);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1392,7 +1403,7 @@ int tlt_process_pes_packet(struct lib_cc_decode *dec_ctx, uint8_t *buffer, uint1
|
||||
uint8_t pes_ext_flag;
|
||||
// extension
|
||||
uint32_t t = 0;
|
||||
uint16_t i;
|
||||
uint32_t i;
|
||||
struct TeletextCtx *ctx = dec_ctx->private_data;
|
||||
ctx->sentence_cap = sentence_cap;
|
||||
|
||||
@@ -1468,6 +1479,9 @@ int tlt_process_pes_packet(struct lib_cc_decode *dec_ctx, uint8_t *buffer, uint1
|
||||
if (pes_packet_length > size)
|
||||
pes_packet_length = size;
|
||||
|
||||
if (size < 9)
|
||||
return CCX_OK;
|
||||
|
||||
// optional PES header marker bits (10.. ....)
|
||||
if ((buffer[6] & 0xc0) == 0x80)
|
||||
{
|
||||
@@ -1480,8 +1494,16 @@ int tlt_process_pes_packet(struct lib_cc_decode *dec_ctx, uint8_t *buffer, uint1
|
||||
{
|
||||
if ((optional_pes_header_included == YES) && ((buffer[7] & 0x80) > 0))
|
||||
{
|
||||
ctx->using_pts = YES;
|
||||
dbg_print(CCX_DMT_TELETEXT, "- PID 0xbd PTS available\n");
|
||||
if (size < 14)
|
||||
{
|
||||
ctx->using_pts = NO;
|
||||
dbg_print(CCX_DMT_TELETEXT, "- PID 0xbd PTS signaled but packet too short, using TS PCR\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx->using_pts = YES;
|
||||
dbg_print(CCX_DMT_TELETEXT, "- PID 0xbd PTS available\n");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1554,11 +1576,17 @@ int tlt_process_pes_packet(struct lib_cc_decode *dec_ctx, uint8_t *buffer, uint1
|
||||
if (optional_pes_header_included == YES)
|
||||
i += 3 + optional_pes_header_length;
|
||||
|
||||
while (i <= pes_packet_length - 6)
|
||||
while (i + 2 <= pes_packet_length)
|
||||
{
|
||||
uint8_t data_unit_id = buffer[i++];
|
||||
uint8_t data_unit_len = buffer[i++];
|
||||
|
||||
if (i + data_unit_len > pes_packet_length)
|
||||
{
|
||||
dbg_print(CCX_DMT_TELETEXT, "- Teletext data unit length %u exceeds PES packet length, stopping.\n", data_unit_len);
|
||||
break;
|
||||
}
|
||||
|
||||
if ((data_unit_id == DATA_UNIT_EBU_TELETEXT_NONSUBTITLE) || (data_unit_id == DATA_UNIT_EBU_TELETEXT_SUBTITLE))
|
||||
{
|
||||
// teletext payload has always size 44 bytes
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "dvb_subtitle_decoder.h"
|
||||
#include "ccx_decoders_isdb.h"
|
||||
#include "file_buffer.h"
|
||||
#include <inttypes.h>
|
||||
|
||||
#ifdef DEBUG_SAVE_TS_PACKETS
|
||||
#include <sys/types.h>
|
||||
@@ -153,12 +154,11 @@ enum ccx_bufferdata_type get_buffer_type(struct cap_info *cinfo)
|
||||
{
|
||||
return CCX_TELETEXT;
|
||||
}
|
||||
else if (cinfo->stream == CCX_STREAM_TYPE_PRIVATE_MPEG2 && cinfo->codec == CCX_CODEC_ATSC_CC)
|
||||
{
|
||||
return CCX_PRIVATE_MPEG2_CC;
|
||||
}
|
||||
else if (cinfo->stream == CCX_STREAM_TYPE_PRIVATE_USER_MPEG2 && cinfo->codec == CCX_CODEC_ATSC_CC)
|
||||
else if ((cinfo->stream == CCX_STREAM_TYPE_PRIVATE_MPEG2 ||
|
||||
cinfo->stream == CCX_STREAM_TYPE_PRIVATE_USER_MPEG2) &&
|
||||
cinfo->codec == CCX_CODEC_ATSC_CC)
|
||||
{
|
||||
// ATSC CC can be in either private stream type - process both as PES
|
||||
return CCX_PES;
|
||||
}
|
||||
else
|
||||
@@ -567,17 +567,15 @@ int copy_capbuf_demux_data(struct ccx_demuxer *ctx, struct demuxer_data **data,
|
||||
if (!cinfo->capbuf || !cinfo->capbuflen)
|
||||
return -1;
|
||||
|
||||
if (ptr->bufferdatatype == CCX_PRIVATE_MPEG2_CC)
|
||||
{
|
||||
dump(CCX_DMT_GENERIC_NOTICES, cinfo->capbuf, cinfo->capbuflen, 0, 1);
|
||||
// Bogus data, so we return something
|
||||
ptr->buffer[ptr->len++] = 0xFA;
|
||||
ptr->buffer[ptr->len++] = 0x80;
|
||||
ptr->buffer[ptr->len++] = 0x80;
|
||||
return CCX_OK;
|
||||
}
|
||||
if (cinfo->codec == CCX_CODEC_TELETEXT)
|
||||
{
|
||||
if (cinfo->capbuflen > BUFSIZE - ptr->len)
|
||||
{
|
||||
fatal(CCX_COMMON_EXIT_BUG_BUG,
|
||||
"Teletext packet (%" PRId64 ") larger than remaining buffer (%" PRId64 ").\n",
|
||||
cinfo->capbuflen, (int64_t)(BUFSIZE - ptr->len));
|
||||
}
|
||||
|
||||
memcpy(ptr->buffer + ptr->len, cinfo->capbuf, cinfo->capbuflen);
|
||||
ptr->len += cinfo->capbuflen;
|
||||
return CCX_OK;
|
||||
@@ -672,7 +670,6 @@ void cinfo_cremation(struct ccx_demuxer *ctx, struct demuxer_data **data)
|
||||
|
||||
int copy_payload_to_capbuf(struct cap_info *cinfo, struct ts_payload *payload)
|
||||
{
|
||||
int newcapbuflen;
|
||||
|
||||
if (cinfo->ignore == CCX_TRUE &&
|
||||
((cinfo->stream != CCX_STREAM_TYPE_VIDEO_MPEG2 &&
|
||||
@@ -698,17 +695,22 @@ int copy_payload_to_capbuf(struct cap_info *cinfo, struct ts_payload *payload)
|
||||
}
|
||||
|
||||
// copy payload to capbuf
|
||||
newcapbuflen = cinfo->capbuflen + payload->length;
|
||||
if (newcapbuflen > cinfo->capbufsize)
|
||||
if (payload->length > INT64_MAX - cinfo->capbuflen)
|
||||
{
|
||||
unsigned char *new_capbuf = (unsigned char *)realloc(cinfo->capbuf, newcapbuflen);
|
||||
mprint("Error: capbuf size overflow\n");
|
||||
return -1;
|
||||
}
|
||||
int64_t newcapbuflen = (int64_t)cinfo->capbuflen + payload->length;
|
||||
if (newcapbuflen > (int64_t)cinfo->capbufsize)
|
||||
{
|
||||
unsigned char *new_capbuf = (unsigned char *)realloc(cinfo->capbuf, (size_t)newcapbuflen);
|
||||
if (!new_capbuf)
|
||||
return -1;
|
||||
cinfo->capbuf = new_capbuf;
|
||||
cinfo->capbufsize = newcapbuflen;
|
||||
cinfo->capbufsize = newcapbuflen; // Note: capbufsize is int in struct cap_info
|
||||
}
|
||||
memcpy(cinfo->capbuf + cinfo->capbuflen, payload->start, payload->length);
|
||||
cinfo->capbuflen = newcapbuflen;
|
||||
cinfo->capbuflen = newcapbuflen; // Note: capbuflen is int in struct cap_info
|
||||
|
||||
return CCX_OK;
|
||||
}
|
||||
|
||||
@@ -50,8 +50,8 @@ struct EPG_rating
|
||||
struct EPG_event
|
||||
{
|
||||
uint32_t id;
|
||||
char start_time_string[21]; //"YYYYMMDDHHMMSS +0000" = 20 chars
|
||||
char end_time_string[21];
|
||||
char start_time_string[74]; // "YYYYMMDDHHMMSS +0000" = 20 chars, 74 to silence compiler warning
|
||||
char end_time_string[74];
|
||||
uint8_t running_status;
|
||||
uint8_t free_ca_mode;
|
||||
char ISO_639_language_code[4];
|
||||
|
||||
@@ -173,7 +173,7 @@ static void *init_private_data(enum ccx_code_type codec)
|
||||
case CCX_CODEC_TELETEXT:
|
||||
return telxcc_init();
|
||||
case CCX_CODEC_DVB:
|
||||
return dvbsub_init_decoder(NULL, 0);
|
||||
return dvbsub_init_decoder(NULL);
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -399,9 +399,7 @@ int parse_PMT(struct ccx_demuxer *ctx, unsigned char *buf, int len, struct progr
|
||||
ret = parse_dvb_description(&cnf, es_info, desc_len);
|
||||
if (ret < 0)
|
||||
break;
|
||||
ptr = dvbsub_init_decoder(&cnf, pinfo->initialized_ocr);
|
||||
if (!pinfo->initialized_ocr)
|
||||
pinfo->initialized_ocr = 1;
|
||||
ptr = dvbsub_init_decoder(&cnf);
|
||||
if (ptr == NULL)
|
||||
break;
|
||||
update_capinfo(ctx, elementary_PID, stream_type, CCX_CODEC_DVB, program_number, ptr);
|
||||
@@ -413,9 +411,18 @@ int parse_PMT(struct ccx_demuxer *ctx, unsigned char *buf, int len, struct progr
|
||||
{
|
||||
// if this any generally used video stream tyoe get clashed with ATSC/SCTE standard
|
||||
// then this code can go in some atsc flag
|
||||
// Validate ES_info_length against buffer bounds to prevent heap overflow
|
||||
if (i + 5 + ES_info_length > len)
|
||||
break;
|
||||
|
||||
unsigned char *es_info = buf + i + 5;
|
||||
for (desc_len = 0; (buf + i + 5 + ES_info_length) > es_info; es_info += desc_len)
|
||||
unsigned char *es_info_end = buf + i + 5 + ES_info_length;
|
||||
for (desc_len = 0; es_info_end > es_info; es_info += desc_len)
|
||||
{
|
||||
// Need at least 2 bytes for descriptor_tag and desc_len
|
||||
if (es_info + 2 > es_info_end)
|
||||
break;
|
||||
|
||||
enum ccx_mpeg_descriptor descriptor_tag = (enum ccx_mpeg_descriptor)(*es_info++);
|
||||
int nb_service;
|
||||
int is_608;
|
||||
@@ -439,9 +446,18 @@ int parse_PMT(struct ccx_demuxer *ctx, unsigned char *buf, int len, struct progr
|
||||
|
||||
if (IS_FEASIBLE(ctx->codec, ctx->nocodec, CCX_CODEC_TELETEXT) && ES_info_length && stream_type == CCX_STREAM_TYPE_PRIVATE_MPEG2) // MPEG-2 Packetized Elementary Stream packets containing private data
|
||||
{
|
||||
// Validate ES_info_length against buffer bounds
|
||||
if (i + 5 + ES_info_length > len)
|
||||
continue;
|
||||
|
||||
unsigned char *es_info = buf + i + 5;
|
||||
for (desc_len = 0; (buf + i + 5 + ES_info_length) - es_info; es_info += desc_len)
|
||||
unsigned char *es_info_end = buf + i + 5 + ES_info_length;
|
||||
for (desc_len = 0; es_info_end > es_info; es_info += desc_len)
|
||||
{
|
||||
// Need at least 2 bytes for descriptor_tag and desc_len
|
||||
if (es_info + 2 > es_info_end)
|
||||
break;
|
||||
|
||||
enum ccx_mpeg_descriptor descriptor_tag = (enum ccx_mpeg_descriptor)(*es_info++);
|
||||
desc_len = (*es_info++);
|
||||
if (!IS_VALID_TELETEXT_DESC(descriptor_tag))
|
||||
@@ -576,6 +592,15 @@ void ts_buffer_psi_packet(struct ccx_demuxer *ctx)
|
||||
else if (ccounter == ctx->PID_buffers[pid]->prev_ccounter + 1 || (ctx->PID_buffers[pid]->prev_ccounter == 0x0f && ccounter == 0))
|
||||
{
|
||||
ctx->PID_buffers[pid]->prev_ccounter = ccounter;
|
||||
// Check for integer overflow and reasonable size limit (1MB)
|
||||
if (ctx->PID_buffers[pid]->buffer_length > 1024 * 1024 ||
|
||||
payload_length > 1024 * 1024 ||
|
||||
ctx->PID_buffers[pid]->buffer_length + payload_length > 1024 * 1024)
|
||||
{
|
||||
dbg_print(CCX_DMT_GENERIC_NOTICES, "\rWarning: PSI buffer for PID %u exceeded reasonable limit (1MB), discarding.\n", pid);
|
||||
return;
|
||||
}
|
||||
|
||||
void *tmp = realloc(ctx->PID_buffers[pid]->buffer, ctx->PID_buffers[pid]->buffer_length + payload_length);
|
||||
if (tmp == NULL)
|
||||
{
|
||||
@@ -614,6 +639,10 @@ int parse_PAT(struct ccx_demuxer *ctx)
|
||||
payload_start = ctx->PID_buffers[0]->buffer + pointer_field + 1;
|
||||
payload_length = ctx->PID_buffers[0]->buffer_length - (pointer_field + 1);
|
||||
|
||||
// Need at least 8 bytes to read header fields
|
||||
if (payload_length < 8)
|
||||
return 0;
|
||||
|
||||
section_number = payload_start[6];
|
||||
last_section_number = payload_start[7];
|
||||
|
||||
|
||||
@@ -87,13 +87,11 @@ void EPG_ATSC_decode_ETT_text(uint8_t *offset, uint32_t length, struct EPG_event
|
||||
|
||||
for (j = 0; j < number_segments && offset < offset_end; j++)
|
||||
{
|
||||
uint8_t compression_type, mode, number_bytes;
|
||||
uint8_t number_bytes;
|
||||
|
||||
if (offset + 3 > offset_end)
|
||||
return;
|
||||
|
||||
compression_type = offset[0];
|
||||
mode = offset[1];
|
||||
number_bytes = offset[2];
|
||||
offset += 3;
|
||||
|
||||
@@ -127,7 +125,7 @@ void EPG_ATSC_calc_time(char *output, uint32_t time)
|
||||
timeinfo.tm_hour = 0;
|
||||
timeinfo.tm_isdst = -1;
|
||||
mktime(&timeinfo);
|
||||
snprintf(output, 21, "%02d%02d%02d%02d%02d%02d +0000", timeinfo.tm_year + 1900, timeinfo.tm_mon + 1, timeinfo.tm_mday, timeinfo.tm_hour, timeinfo.tm_min, timeinfo.tm_sec);
|
||||
snprintf(output, 74, "%02d%02d%02d%02d%02d%02d +0000", timeinfo.tm_year + 1900, timeinfo.tm_mon + 1, timeinfo.tm_mday, timeinfo.tm_hour, timeinfo.tm_min, timeinfo.tm_sec);
|
||||
}
|
||||
|
||||
// Fills event.start_time_string in XMLTV format with passed DVB time
|
||||
|
||||
@@ -179,16 +179,21 @@ void mprint(const char *fmt, ...)
|
||||
if (!ccx_options.messages_target)
|
||||
return;
|
||||
va_start(args, fmt);
|
||||
if (ccx_options.messages_target == CCX_MESSAGES_STDOUT)
|
||||
|
||||
FILE *target = (ccx_options.messages_target == CCX_MESSAGES_STDOUT) ? stdout : stderr;
|
||||
|
||||
if (fmt[0] == '\r')
|
||||
{
|
||||
vfprintf(stdout, fmt, args);
|
||||
fflush(stdout);
|
||||
}
|
||||
else
|
||||
{
|
||||
vfprintf(stderr, fmt, args);
|
||||
fflush(stderr);
|
||||
#ifndef _WIN32
|
||||
fprintf(target, "\r\033[K"); // Clear the line first
|
||||
fmt++; // Skip the '\r' so only the clean text gets printed next
|
||||
#endif
|
||||
}
|
||||
// Windows (legacy console) does not support ANSI sequences; fallback to standard \r; and vfprintf below handles it the old-fashioned way.
|
||||
|
||||
vfprintf(target, fmt, args);
|
||||
fflush(target);
|
||||
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
|
||||
517
src/lib_ccx/vobsub_decoder.c
Normal file
517
src/lib_ccx/vobsub_decoder.c
Normal file
@@ -0,0 +1,517 @@
|
||||
/**
|
||||
* VOBSUB decoder with OCR support
|
||||
*
|
||||
* Decodes VOBSUB (DVD bitmap) subtitles from MKV, MP4, or standalone idx/sub files
|
||||
* and optionally performs OCR to convert to text.
|
||||
*
|
||||
* SPU (SubPicture Unit) format:
|
||||
* - 2 bytes: total SPU size
|
||||
* - 2 bytes: offset to control sequence
|
||||
* - RLE-encoded pixel data (interlaced)
|
||||
* - Control sequence with timing, colors, coordinates
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "lib_ccx.h"
|
||||
#include "vobsub_decoder.h"
|
||||
#include "ccx_common_common.h"
|
||||
#include "ccx_decoders_structs.h"
|
||||
#include "ccx_common_constants.h"
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
#include "ocr.h"
|
||||
#endif
|
||||
|
||||
#define RGBA(r, g, b, a) (((unsigned)(a) << 24) | ((r) << 16) | ((g) << 8) | (b))
|
||||
|
||||
/* Control sequence structure */
|
||||
struct vobsub_ctrl_seq
|
||||
{
|
||||
uint8_t color[4]; /* Color indices */
|
||||
uint8_t alpha[4]; /* Alpha values */
|
||||
uint16_t coord[4]; /* x1, x2, y1, y2 */
|
||||
uint16_t pixoffset[2]; /* Offset to 1st and 2nd graphic line */
|
||||
uint16_t start_time;
|
||||
uint16_t stop_time;
|
||||
};
|
||||
|
||||
struct vobsub_ctx
|
||||
{
|
||||
uint32_t palette[16]; /* RGBA palette from idx header */
|
||||
int palette_parsed; /* 1 if palette has been parsed */
|
||||
struct vobsub_ctrl_seq ctrl;
|
||||
unsigned char *bitmap; /* Decoded bitmap */
|
||||
#ifdef ENABLE_OCR
|
||||
void *ocr_ctx; /* OCR context */
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Get 4 bits from buffer for RLE decoding */
|
||||
static int vobsub_get_bits(unsigned char *buffer, uint8_t *nextbyte, int *pos, int *m)
|
||||
{
|
||||
int ret;
|
||||
ret = (*nextbyte & 0xf0) >> 4;
|
||||
if (*m == 0)
|
||||
*pos += 1;
|
||||
*nextbyte = (*nextbyte << 4) | ((*m) ? (buffer[*pos] & 0x0f) : ((buffer[*pos] & 0xf0) >> 4));
|
||||
*m = (*m + 1) % 2;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* RLE decode to get run length and color */
|
||||
static int vobsub_rle_decode(unsigned char *buffer, int *color, uint8_t *nextbyte, int *pos, int *m)
|
||||
{
|
||||
int val = 4;
|
||||
uint16_t rlen = vobsub_get_bits(buffer, nextbyte, pos, m);
|
||||
while (rlen < val && val <= 0x40)
|
||||
{
|
||||
rlen = (rlen << 4) | vobsub_get_bits(buffer, nextbyte, pos, m);
|
||||
val = val << 2;
|
||||
}
|
||||
*color = rlen & 0x3;
|
||||
rlen = rlen >> 2;
|
||||
return rlen;
|
||||
}
|
||||
|
||||
/* Decode bitmap from RLE-encoded SPU data */
|
||||
static void vobsub_get_bitmap(struct vobsub_ctx *ctx, unsigned char *buffer, size_t buf_size)
|
||||
{
|
||||
int w, h, x, lineno;
|
||||
int pos, color, m;
|
||||
int len;
|
||||
uint8_t nextbyte;
|
||||
unsigned char *buffp;
|
||||
|
||||
w = (ctx->ctrl.coord[1] - ctx->ctrl.coord[0]) + 1;
|
||||
h = (ctx->ctrl.coord[3] - ctx->ctrl.coord[2]) + 1;
|
||||
|
||||
if (w <= 0 || h <= 0 || w > 4096 || h > 4096)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Invalid dimensions w=%d h=%d\n", w, h);
|
||||
return;
|
||||
}
|
||||
|
||||
pos = ctx->ctrl.pixoffset[0];
|
||||
if (pos >= (int)buf_size)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Pixel offset out of bounds\n");
|
||||
return;
|
||||
}
|
||||
|
||||
m = 0;
|
||||
nextbyte = buffer[pos];
|
||||
|
||||
ctx->bitmap = malloc(w * h);
|
||||
if (!ctx->bitmap)
|
||||
return;
|
||||
memset(ctx->bitmap, 0, w * h);
|
||||
|
||||
buffp = ctx->bitmap;
|
||||
x = 0;
|
||||
lineno = 0;
|
||||
|
||||
/* Decode first field (odd lines in interlaced) */
|
||||
while (lineno < (h + 1) / 2 && pos < (int)buf_size)
|
||||
{
|
||||
len = vobsub_rle_decode(buffer, &color, &nextbyte, &pos, &m);
|
||||
if (len > (w - x) || len == 0)
|
||||
len = w - x;
|
||||
|
||||
memset(buffp + x, color, len);
|
||||
x += len;
|
||||
if (x >= w)
|
||||
{
|
||||
x = 0;
|
||||
++lineno;
|
||||
buffp += (2 * w); /* Skip 1 line due to interlacing */
|
||||
if ((m == 1))
|
||||
{
|
||||
vobsub_get_bits(buffer, &nextbyte, &pos, &m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Decode second field (even lines) */
|
||||
if (pos > ctx->ctrl.pixoffset[1])
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Error creating bitmap - overlapping fields\n");
|
||||
return;
|
||||
}
|
||||
|
||||
pos = ctx->ctrl.pixoffset[1];
|
||||
if (pos >= (int)buf_size)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Second field offset out of bounds\n");
|
||||
return;
|
||||
}
|
||||
|
||||
buffp = ctx->bitmap + w;
|
||||
x = 0;
|
||||
lineno = 0;
|
||||
m = 0;
|
||||
nextbyte = buffer[pos];
|
||||
|
||||
while (lineno < h / 2 && pos < (int)buf_size)
|
||||
{
|
||||
len = vobsub_rle_decode(buffer, &color, &nextbyte, &pos, &m);
|
||||
if (len > (w - x) || len == 0)
|
||||
len = w - x;
|
||||
|
||||
memset(buffp + x, color, len);
|
||||
x += len;
|
||||
if (x >= w)
|
||||
{
|
||||
x = 0;
|
||||
++lineno;
|
||||
buffp += (2 * w);
|
||||
if ((m == 1))
|
||||
{
|
||||
vobsub_get_bits(buffer, &nextbyte, &pos, &m);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Parse control sequence from SPU data */
|
||||
static void vobsub_decode_control(struct vobsub_ctx *ctx, unsigned char *buffer, size_t buf_size, uint16_t ctrl_offset)
|
||||
{
|
||||
int pos = ctrl_offset;
|
||||
int pack_end = 0;
|
||||
uint16_t date, next_ctrl;
|
||||
|
||||
memset(&ctx->ctrl, 0, sizeof(ctx->ctrl));
|
||||
|
||||
while (pos + 4 <= (int)buf_size && pack_end == 0)
|
||||
{
|
||||
date = (buffer[pos] << 8) | buffer[pos + 1];
|
||||
next_ctrl = (buffer[pos + 2] << 8) | buffer[pos + 3];
|
||||
if (next_ctrl == pos)
|
||||
pack_end = 1;
|
||||
pos += 4;
|
||||
|
||||
int seq_end = 0;
|
||||
while (seq_end == 0 && pos < (int)buf_size)
|
||||
{
|
||||
int command = buffer[pos++];
|
||||
switch (command)
|
||||
{
|
||||
case 0x01: /* Start display */
|
||||
ctx->ctrl.start_time = (date << 10) / 90;
|
||||
break;
|
||||
case 0x02: /* Stop display */
|
||||
ctx->ctrl.stop_time = (date << 10) / 90;
|
||||
break;
|
||||
case 0x03: /* SET_COLOR */
|
||||
if (pos + 2 > (int)buf_size)
|
||||
break;
|
||||
ctx->ctrl.color[3] = (buffer[pos] & 0xf0) >> 4;
|
||||
ctx->ctrl.color[2] = buffer[pos] & 0x0f;
|
||||
ctx->ctrl.color[1] = (buffer[pos + 1] & 0xf0) >> 4;
|
||||
ctx->ctrl.color[0] = buffer[pos + 1] & 0x0f;
|
||||
pos += 2;
|
||||
break;
|
||||
case 0x04: /* SET_CONTR (alpha) */
|
||||
if (pos + 2 > (int)buf_size)
|
||||
break;
|
||||
ctx->ctrl.alpha[3] = (buffer[pos] & 0xf0) >> 4;
|
||||
ctx->ctrl.alpha[2] = buffer[pos] & 0x0f;
|
||||
ctx->ctrl.alpha[1] = (buffer[pos + 1] & 0xf0) >> 4;
|
||||
ctx->ctrl.alpha[0] = buffer[pos + 1] & 0x0f;
|
||||
pos += 2;
|
||||
break;
|
||||
case 0x05: /* SET_DAREA (coordinates) */
|
||||
if (pos + 6 > (int)buf_size)
|
||||
break;
|
||||
ctx->ctrl.coord[0] = ((buffer[pos] << 8) | (buffer[pos + 1] & 0xf0)) >> 4;
|
||||
ctx->ctrl.coord[1] = ((buffer[pos + 1] & 0x0f) << 8) | buffer[pos + 2];
|
||||
ctx->ctrl.coord[2] = ((buffer[pos + 3] << 8) | (buffer[pos + 4] & 0xf0)) >> 4;
|
||||
ctx->ctrl.coord[3] = ((buffer[pos + 4] & 0x0f) << 8) | buffer[pos + 5];
|
||||
pos += 6;
|
||||
break;
|
||||
case 0x06: /* SET_DSPXA (pixel offset) */
|
||||
if (pos + 4 > (int)buf_size)
|
||||
break;
|
||||
ctx->ctrl.pixoffset[0] = (buffer[pos] << 8) | buffer[pos + 1];
|
||||
ctx->ctrl.pixoffset[1] = (buffer[pos + 2] << 8) | buffer[pos + 3];
|
||||
pos += 4;
|
||||
break;
|
||||
case 0x07: /* Extended command */
|
||||
if (pos + 2 > (int)buf_size)
|
||||
break;
|
||||
{
|
||||
uint16_t skip = (buffer[pos] << 8) | buffer[pos + 1];
|
||||
pos += skip;
|
||||
}
|
||||
break;
|
||||
case 0xff: /* End of control sequence */
|
||||
seq_end = 1;
|
||||
break;
|
||||
default:
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Unknown control command 0x%02x\n", command);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Generate RGBA palette from color/alpha indices using parsed palette */
|
||||
static void vobsub_generate_rgba_palette(struct vobsub_ctx *ctx, uint32_t *rgba_palette)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
if (ctx->ctrl.alpha[i] == 0)
|
||||
{
|
||||
rgba_palette[i] = 0; /* Fully transparent */
|
||||
}
|
||||
else if (ctx->palette_parsed)
|
||||
{
|
||||
/* Use parsed palette from idx header */
|
||||
uint32_t color = ctx->palette[ctx->ctrl.color[i] & 0x0f];
|
||||
uint8_t r = (color >> 16) & 0xff;
|
||||
uint8_t g = (color >> 8) & 0xff;
|
||||
uint8_t b = color & 0xff;
|
||||
uint8_t a = ctx->ctrl.alpha[i] * 17; /* Scale 0-15 to 0-255 */
|
||||
rgba_palette[i] = RGBA(r, g, b, a);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Fallback: guess palette (grayscale levels) */
|
||||
static const uint8_t level_map[4][4] = {
|
||||
{0xff},
|
||||
{0x00, 0xff},
|
||||
{0x00, 0x80, 0xff},
|
||||
{0x00, 0x55, 0xaa, 0xff},
|
||||
};
|
||||
|
||||
/* Count opaque colors */
|
||||
int nb_opaque = 0;
|
||||
for (int j = 0; j < 4; j++)
|
||||
if (ctx->ctrl.alpha[j] != 0)
|
||||
nb_opaque++;
|
||||
|
||||
if (nb_opaque == 0)
|
||||
nb_opaque = 1;
|
||||
if (nb_opaque > 4)
|
||||
nb_opaque = 4;
|
||||
|
||||
int level = level_map[nb_opaque - 1][i < nb_opaque ? i : nb_opaque - 1];
|
||||
uint8_t a = ctx->ctrl.alpha[i] * 17;
|
||||
rgba_palette[i] = RGBA(level, level, level, a);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct vobsub_ctx *init_vobsub_decoder(void)
|
||||
{
|
||||
struct vobsub_ctx *ctx = malloc(sizeof(struct vobsub_ctx));
|
||||
if (!ctx)
|
||||
return NULL;
|
||||
|
||||
memset(ctx, 0, sizeof(struct vobsub_ctx));
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
ctx->ocr_ctx = init_ocr(1); /* 1 = default language index (English) */
|
||||
if (!ctx->ocr_ctx)
|
||||
{
|
||||
mprint("VOBSUB: Warning - OCR initialization failed\n");
|
||||
/* Continue anyway - OCR will just not work */
|
||||
}
|
||||
#endif
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
int vobsub_parse_palette(struct vobsub_ctx *ctx, const char *idx_header)
|
||||
{
|
||||
if (!ctx || !idx_header)
|
||||
return -1;
|
||||
|
||||
/* Find "palette:" line */
|
||||
const char *palette_line = strstr(idx_header, "palette:");
|
||||
if (!palette_line)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: No palette line found in idx header\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
palette_line += 8; /* Skip "palette:" */
|
||||
|
||||
/* Skip whitespace */
|
||||
while (*palette_line == ' ' || *palette_line == '\t')
|
||||
palette_line++;
|
||||
|
||||
/* Parse 16 hex RGB colors */
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
unsigned int color;
|
||||
if (sscanf(palette_line, "%x", &color) != 1)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Failed to parse palette color %d\n", i);
|
||||
break;
|
||||
}
|
||||
ctx->palette[i] = color;
|
||||
|
||||
/* Skip to next color (past comma and whitespace) */
|
||||
while (*palette_line && *palette_line != ',' && *palette_line != '\n')
|
||||
palette_line++;
|
||||
if (*palette_line == ',')
|
||||
palette_line++;
|
||||
while (*palette_line == ' ' || *palette_line == '\t')
|
||||
palette_line++;
|
||||
}
|
||||
|
||||
ctx->palette_parsed = 1;
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Parsed palette from idx header\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vobsub_decode_spu(struct vobsub_ctx *ctx,
|
||||
unsigned char *spu_data, size_t spu_size,
|
||||
long long start_time, long long end_time,
|
||||
struct cc_subtitle *sub)
|
||||
{
|
||||
if (!ctx || !spu_data || spu_size < 4 || !sub)
|
||||
return -1;
|
||||
|
||||
/* Parse SPU header */
|
||||
uint16_t size_spu = (spu_data[0] << 8) | spu_data[1];
|
||||
uint16_t ctrl_offset = (spu_data[2] << 8) | spu_data[3];
|
||||
|
||||
if (ctrl_offset > spu_size || size_spu > spu_size)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Invalid SPU header (size=%u, ctrl=%u, buf=%zu)\n",
|
||||
size_spu, ctrl_offset, spu_size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Parse control sequence */
|
||||
vobsub_decode_control(ctx, spu_data, spu_size, ctrl_offset);
|
||||
|
||||
/* Free any previous bitmap */
|
||||
if (ctx->bitmap)
|
||||
{
|
||||
free(ctx->bitmap);
|
||||
ctx->bitmap = NULL;
|
||||
}
|
||||
|
||||
/* Decode bitmap */
|
||||
vobsub_get_bitmap(ctx, spu_data, spu_size);
|
||||
if (!ctx->bitmap)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Failed to decode bitmap\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Build cc_subtitle structure */
|
||||
int w = (ctx->ctrl.coord[1] - ctx->ctrl.coord[0]) + 1;
|
||||
int h = (ctx->ctrl.coord[3] - ctx->ctrl.coord[2]) + 1;
|
||||
|
||||
if (w <= 0 || h <= 0)
|
||||
{
|
||||
dbg_print(CCX_DMT_VERBOSE, "VOBSUB: Invalid bitmap dimensions\n");
|
||||
free(ctx->bitmap);
|
||||
ctx->bitmap = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
sub->type = CC_BITMAP;
|
||||
sub->nb_data = 1;
|
||||
sub->got_output = 1;
|
||||
|
||||
struct cc_bitmap *rect = malloc(sizeof(struct cc_bitmap));
|
||||
if (!rect)
|
||||
{
|
||||
free(ctx->bitmap);
|
||||
ctx->bitmap = NULL;
|
||||
return -1;
|
||||
}
|
||||
memset(rect, 0, sizeof(struct cc_bitmap));
|
||||
|
||||
sub->data = rect;
|
||||
sub->datatype = CC_DATATYPE_GENERIC;
|
||||
sub->start_time = start_time;
|
||||
sub->end_time = end_time > 0 ? end_time : start_time + ctx->ctrl.stop_time;
|
||||
|
||||
/* Copy bitmap data */
|
||||
rect->data0 = malloc(w * h);
|
||||
if (!rect->data0)
|
||||
{
|
||||
free(rect);
|
||||
sub->data = NULL;
|
||||
free(ctx->bitmap);
|
||||
ctx->bitmap = NULL;
|
||||
return -1;
|
||||
}
|
||||
memcpy(rect->data0, ctx->bitmap, w * h);
|
||||
|
||||
/* Generate RGBA palette */
|
||||
rect->data1 = malloc(1024); /* Space for 256 colors */
|
||||
if (!rect->data1)
|
||||
{
|
||||
free(rect->data0);
|
||||
free(rect);
|
||||
sub->data = NULL;
|
||||
free(ctx->bitmap);
|
||||
ctx->bitmap = NULL;
|
||||
return -1;
|
||||
}
|
||||
memset(rect->data1, 0, 1024);
|
||||
vobsub_generate_rgba_palette(ctx, (uint32_t *)rect->data1);
|
||||
|
||||
rect->nb_colors = 4;
|
||||
rect->x = ctx->ctrl.coord[0];
|
||||
rect->y = ctx->ctrl.coord[2];
|
||||
rect->w = w;
|
||||
rect->h = h;
|
||||
rect->linesize0 = w;
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
/* Run OCR if available */
|
||||
if (ctx->ocr_ctx)
|
||||
{
|
||||
char *ocr_str = NULL;
|
||||
int ret = ocr_rect(ctx->ocr_ctx, rect, &ocr_str, 0, 1); /* quantmode=1 */
|
||||
if (ret >= 0 && ocr_str)
|
||||
{
|
||||
rect->ocr_text = ocr_str;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
free(ctx->bitmap);
|
||||
ctx->bitmap = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vobsub_ocr_available(void)
|
||||
{
|
||||
#ifdef ENABLE_OCR
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void delete_vobsub_decoder(struct vobsub_ctx **ctx)
|
||||
{
|
||||
if (!ctx || !*ctx)
|
||||
return;
|
||||
|
||||
struct vobsub_ctx *c = *ctx;
|
||||
|
||||
#ifdef ENABLE_OCR
|
||||
if (c->ocr_ctx)
|
||||
delete_ocr(&c->ocr_ctx);
|
||||
#endif
|
||||
|
||||
if (c->bitmap)
|
||||
free(c->bitmap);
|
||||
|
||||
free(c);
|
||||
*ctx = NULL;
|
||||
}
|
||||
53
src/lib_ccx/vobsub_decoder.h
Normal file
53
src/lib_ccx/vobsub_decoder.h
Normal file
@@ -0,0 +1,53 @@
|
||||
#ifndef VOBSUB_DECODER_H
|
||||
#define VOBSUB_DECODER_H
|
||||
|
||||
#include "ccx_decoders_structs.h"
|
||||
|
||||
/**
|
||||
* VOBSUB decoder context - opaque structure
|
||||
*/
|
||||
struct vobsub_ctx;
|
||||
|
||||
/**
|
||||
* Initialize VOBSUB decoder context
|
||||
* @return Pointer to context, or NULL on failure
|
||||
*/
|
||||
struct vobsub_ctx *init_vobsub_decoder(void);
|
||||
|
||||
/**
|
||||
* Parse palette from idx header string (e.g., from MKV CodecPrivate)
|
||||
* Looks for "palette:" line and parses 16 hex RGB colors
|
||||
* @param ctx VOBSUB decoder context
|
||||
* @param idx_header The idx header string containing palette info
|
||||
* @return 0 on success, -1 on failure
|
||||
*/
|
||||
int vobsub_parse_palette(struct vobsub_ctx *ctx, const char *idx_header);
|
||||
|
||||
/**
|
||||
* Decode single SPU packet and optionally perform OCR
|
||||
* @param ctx VOBSUB decoder context
|
||||
* @param spu_data Raw SPU data (starting with 2-byte size)
|
||||
* @param spu_size Size of SPU data
|
||||
* @param start_time Start time in milliseconds
|
||||
* @param end_time End time in milliseconds (0 if unknown)
|
||||
* @param sub Output subtitle structure
|
||||
* @return 0 on success, -1 on error
|
||||
*/
|
||||
int vobsub_decode_spu(struct vobsub_ctx *ctx,
|
||||
unsigned char *spu_data, size_t spu_size,
|
||||
long long start_time, long long end_time,
|
||||
struct cc_subtitle *sub);
|
||||
|
||||
/**
|
||||
* Check if VOBSUB OCR is available (compiled with OCR support)
|
||||
* @return 1 if OCR available, 0 otherwise
|
||||
*/
|
||||
int vobsub_ocr_available(void);
|
||||
|
||||
/**
|
||||
* Free VOBSUB decoder context and resources
|
||||
* @param ctx Pointer to context pointer (will be set to NULL)
|
||||
*/
|
||||
void delete_vobsub_decoder(struct vobsub_ctx **ctx);
|
||||
|
||||
#endif /* VOBSUB_DECODER_H */
|
||||
44
src/rust/Cargo.lock
generated
44
src/rust/Cargo.lock
generated
@@ -161,6 +161,12 @@ version = "2.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
|
||||
|
||||
[[package]]
|
||||
name = "by_address"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64fa3c856b712db6612c019f14756e64e4bcea13337a6b33b696333a9eaa2d06"
|
||||
|
||||
[[package]]
|
||||
name = "camino"
|
||||
version = "1.2.1"
|
||||
@@ -355,21 +361,18 @@ dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fast-srgb8"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd2e7510819d6fbf51a5545c8f922716ecfb14df168a3242f7d33e0239efe6a1"
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "find-crate"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59a98bbaacea1c0eb6a0876280051b892eb73594fd90cf3b20e9c817029c57d2"
|
||||
dependencies = [
|
||||
"toml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.2.2"
|
||||
@@ -819,26 +822,26 @@ checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||
|
||||
[[package]]
|
||||
name = "palette"
|
||||
version = "0.6.1"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f9cd68f7112581033f157e56c77ac4a5538ec5836a2e39284e65bd7d7275e49"
|
||||
checksum = "4cbf71184cc5ecc2e4e1baccdb21026c20e5fc3dcf63028a086131b3ab00b6e6"
|
||||
dependencies = [
|
||||
"approx",
|
||||
"num-traits",
|
||||
"fast-srgb8",
|
||||
"palette_derive",
|
||||
"phf",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "palette_derive"
|
||||
version = "0.6.1"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05eedf46a8e7c27f74af0c9cfcdb004ceca158cb1b918c6f68f8d7a549b3e427"
|
||||
checksum = "f5030daf005bface118c096f510ffb781fc28f9ab6a32ab224d8631be6851d30"
|
||||
dependencies = [
|
||||
"find-crate",
|
||||
"by_address",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
"syn 2.0.111",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1436,15 +1439,6 @@ dependencies = [
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml"
|
||||
version = "0.5.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_datetime"
|
||||
version = "0.7.3"
|
||||
|
||||
@@ -13,7 +13,7 @@ crate-type = ["staticlib"]
|
||||
[dependencies]
|
||||
log = "0.4.26"
|
||||
env_logger = "0.8.4"
|
||||
palette = "0.6.1"
|
||||
palette = "0.7"
|
||||
tesseract-sys = { version = "0.5.15", optional = true, default-features = false }
|
||||
leptonica-sys = { version = "= 0.4.6", optional = true, default-features = false }
|
||||
clap = { version = "4.5.31", features = ["derive"] }
|
||||
|
||||
@@ -84,7 +84,12 @@ fn main() {
|
||||
{
|
||||
builder = builder.clang_arg("-DENABLE_HARDSUBX");
|
||||
|
||||
// Add FFmpeg include paths for Mac
|
||||
// Check FFMPEG_INCLUDE_DIR environment variable (works on all platforms)
|
||||
if let Ok(ffmpeg_include) = env::var("FFMPEG_INCLUDE_DIR") {
|
||||
builder = builder.clang_arg(format!("-I{}", ffmpeg_include));
|
||||
}
|
||||
|
||||
// Add FFmpeg include paths for Mac (Homebrew)
|
||||
if cfg!(target_os = "macos") {
|
||||
// Try common Homebrew paths
|
||||
if std::path::Path::new("/opt/homebrew/include").exists() {
|
||||
@@ -98,22 +103,23 @@ fn main() {
|
||||
if std::path::Path::new(cellar_ffmpeg).exists() {
|
||||
// Find the FFmpeg version directory
|
||||
if let Ok(entries) = std::fs::read_dir(cellar_ffmpeg) {
|
||||
for entry in entries {
|
||||
if let Ok(entry) = entry {
|
||||
let include_path = entry.path().join("include");
|
||||
if include_path.exists() {
|
||||
builder =
|
||||
builder.clang_arg(format!("-I{}", include_path.display()));
|
||||
break;
|
||||
}
|
||||
for entry in entries.flatten() {
|
||||
let include_path = entry.path().join("include");
|
||||
if include_path.exists() {
|
||||
builder = builder.clang_arg(format!("-I{}", include_path.display()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also check environment variable
|
||||
if let Ok(ffmpeg_include) = env::var("FFMPEG_INCLUDE_DIR") {
|
||||
builder = builder.clang_arg(format!("-I{}", ffmpeg_include));
|
||||
// On Linux, try pkg-config to find FFmpeg include paths
|
||||
if cfg!(target_os = "linux") {
|
||||
if let Ok(lib) = pkg_config::Config::new().probe("libavcodec") {
|
||||
for path in lib.include_paths {
|
||||
builder = builder.clang_arg(format!("-I{}", path.display()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -147,7 +147,11 @@ pub const CCX_DECODER_608_SCREEN_WIDTH: usize = 32;
|
||||
pub const ONEPASS: usize = 120; // Bytes we can always look ahead without going out of limits
|
||||
pub const BUFSIZE: usize = 2048 * 1024 + ONEPASS; // 2 Mb plus the safety pass
|
||||
pub const MAX_CLOSED_CAPTION_DATA_PER_PICTURE: usize = 32;
|
||||
pub const EIA_708_BUFFER_LENGTH: usize = 2048; // TODO: Find out what the real limit is
|
||||
/// CEA-708 Service Input Buffer size.
|
||||
/// Specification minimum is 128 bytes per service, but we use 2048 bytes
|
||||
/// (16x the minimum) to provide a safety margin for buffer management.
|
||||
/// Reference: CEA-708-E Section 8.4.3 - Service Input Buffers
|
||||
pub const EIA_708_BUFFER_LENGTH: usize = 2048;
|
||||
pub const TS_PACKET_PAYLOAD_LENGTH: usize = 184; // From specs
|
||||
pub const SUBLINESIZE: usize = 2048; // Max. length of a .srt line - TODO: Get rid of this
|
||||
pub const STARTBYTESLENGTH: usize = 1024 * 1024;
|
||||
@@ -278,6 +282,7 @@ pub enum StreamMode {
|
||||
Gxf = 11,
|
||||
Mkv = 12,
|
||||
Mxf = 13,
|
||||
Scc = 14, // Scenarist Closed Caption input
|
||||
Autodetect = 16,
|
||||
}
|
||||
#[derive(Debug, Eq, Clone, Copy)]
|
||||
|
||||
385
src/rust/lib_ccxr/src/common/mkv_lang.rs
Normal file
385
src/rust/lib_ccxr/src/common/mkv_lang.rs
Normal file
@@ -0,0 +1,385 @@
|
||||
//! MKV language filtering support.
|
||||
//!
|
||||
//! Matroska files support two language code formats:
|
||||
//! - ISO 639-2 (3-letter bibliographic codes): "eng", "fre", "chi"
|
||||
//! - BCP 47 / IETF language tags: "en-US", "fr-CA", "zh-Hans"
|
||||
//!
|
||||
//! This module provides [`MkvLangFilter`] for parsing and matching language codes.
|
||||
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
/// A filter for matching MKV track languages.
|
||||
///
|
||||
/// Supports comma-separated lists of language codes in either:
|
||||
/// - ISO 639-2 format (3-letter codes like "eng", "fre")
|
||||
/// - BCP 47 format (tags like "en-US", "fr-CA", "zh-Hans")
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use lib_ccxr::common::MkvLangFilter;
|
||||
///
|
||||
/// // Single language
|
||||
/// let filter: MkvLangFilter = "eng".parse().unwrap();
|
||||
/// assert!(filter.matches("eng", None));
|
||||
///
|
||||
/// // Multiple languages
|
||||
/// let filter: MkvLangFilter = "eng,fre,chi".parse().unwrap();
|
||||
/// assert!(filter.matches("fre", None));
|
||||
///
|
||||
/// // BCP 47 matching
|
||||
/// let filter: MkvLangFilter = "en-US,fr-CA".parse().unwrap();
|
||||
/// assert!(filter.matches("eng", Some("en-US")));
|
||||
/// ```
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct MkvLangFilter {
|
||||
/// The original input string (used for C FFI)
|
||||
raw: String,
|
||||
/// Parsed and validated language codes
|
||||
codes: Vec<LanguageCode>,
|
||||
}
|
||||
|
||||
/// A single language code, either ISO 639-2 or BCP 47.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct LanguageCode {
|
||||
/// The normalized (lowercase) code
|
||||
code: String,
|
||||
}
|
||||
|
||||
/// Error type for invalid language codes.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct InvalidLanguageCode {
|
||||
/// The invalid code
|
||||
pub code: String,
|
||||
/// Description of what's wrong
|
||||
pub reason: &'static str,
|
||||
}
|
||||
|
||||
impl fmt::Display for InvalidLanguageCode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "invalid language code '{}': {}", self.code, self.reason)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for InvalidLanguageCode {}
|
||||
|
||||
impl LanguageCode {
|
||||
/// Validates and creates a new language code.
|
||||
///
|
||||
/// Accepts:
|
||||
/// - ISO 639-2 codes: 3 ASCII letters (e.g., "eng", "fre")
|
||||
/// - BCP 47 tags: primary language with optional subtags separated by hyphens
|
||||
/// (e.g., "en-US", "fr-CA", "zh-Hans-CN")
|
||||
///
|
||||
/// # BCP 47 Structure
|
||||
/// - Primary language: 2-3 letters
|
||||
/// - Script (optional): 4 letters (e.g., "Hans", "Latn")
|
||||
/// - Region (optional): 2 letters or 3 digits (e.g., "US", "419")
|
||||
/// - Variant (optional): 5-8 alphanumeric characters
|
||||
pub fn new(code: &str) -> Result<Self, InvalidLanguageCode> {
|
||||
let code = code.trim();
|
||||
|
||||
if code.is_empty() {
|
||||
return Err(InvalidLanguageCode {
|
||||
code: code.to_string(),
|
||||
reason: "empty language code",
|
||||
});
|
||||
}
|
||||
|
||||
// Check for valid characters (alphanumeric and hyphens only)
|
||||
if !code.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') {
|
||||
return Err(InvalidLanguageCode {
|
||||
code: code.to_string(),
|
||||
reason: "must contain only ASCII letters, digits, and hyphens",
|
||||
});
|
||||
}
|
||||
|
||||
// Cannot start or end with hyphen
|
||||
if code.starts_with('-') || code.ends_with('-') {
|
||||
return Err(InvalidLanguageCode {
|
||||
code: code.to_string(),
|
||||
reason: "cannot start or end with hyphen",
|
||||
});
|
||||
}
|
||||
|
||||
// Cannot have consecutive hyphens
|
||||
if code.contains("--") {
|
||||
return Err(InvalidLanguageCode {
|
||||
code: code.to_string(),
|
||||
reason: "cannot have consecutive hyphens",
|
||||
});
|
||||
}
|
||||
|
||||
// Validate subtag structure
|
||||
let subtags: Vec<&str> = code.split('-').collect();
|
||||
|
||||
// First subtag must be the primary language (2-3 letters)
|
||||
let primary = subtags[0];
|
||||
if primary.len() < 2 || primary.len() > 3 {
|
||||
return Err(InvalidLanguageCode {
|
||||
code: code.to_string(),
|
||||
reason: "primary language subtag must be 2-3 letters",
|
||||
});
|
||||
}
|
||||
if !primary.chars().all(|c| c.is_ascii_alphabetic()) {
|
||||
return Err(InvalidLanguageCode {
|
||||
code: code.to_string(),
|
||||
reason: "primary language subtag must contain only letters",
|
||||
});
|
||||
}
|
||||
|
||||
// Validate subsequent subtags
|
||||
for subtag in subtags.iter().skip(1) {
|
||||
if subtag.is_empty() {
|
||||
return Err(InvalidLanguageCode {
|
||||
code: code.to_string(),
|
||||
reason: "empty subtag",
|
||||
});
|
||||
}
|
||||
|
||||
let len = subtag.len();
|
||||
let all_alpha = subtag.chars().all(|c| c.is_ascii_alphabetic());
|
||||
let all_digit = subtag.chars().all(|c| c.is_ascii_digit());
|
||||
let all_alnum = subtag.chars().all(|c| c.is_ascii_alphanumeric());
|
||||
|
||||
// Valid subtag types:
|
||||
// - Script: 4 letters (e.g., "Hans")
|
||||
// - Region: 2 letters or 3 digits (e.g., "US", "419")
|
||||
// - Variant: 5-8 alphanumeric, or 4 starting with digit
|
||||
// - Extension: single letter followed by more subtags
|
||||
// - Private use: 'x' followed by 1-8 char subtags
|
||||
let valid = match len {
|
||||
1 => subtag.chars().all(|c| c.is_ascii_alphanumeric()), // Extension singleton
|
||||
2 => all_alpha, // Region (2 letters)
|
||||
3 => all_alpha || all_digit, // 3 letters or 3 digits
|
||||
4 => all_alpha || (subtag.chars().next().unwrap().is_ascii_digit() && all_alnum), // Script or variant starting with digit
|
||||
5..=8 => all_alnum, // Variant
|
||||
_ => false,
|
||||
};
|
||||
|
||||
if !valid {
|
||||
return Err(InvalidLanguageCode {
|
||||
code: code.to_string(),
|
||||
reason: "invalid subtag format",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
code: code.to_lowercase(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the normalized (lowercase) code.
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.code
|
||||
}
|
||||
|
||||
/// Checks if this code matches a track's language.
|
||||
///
|
||||
/// Matching rules:
|
||||
/// 1. Exact match (case-insensitive)
|
||||
/// 2. Prefix match for BCP 47 (e.g., "en" matches "en-US")
|
||||
pub fn matches(&self, iso639: &str, bcp47: Option<&str>) -> bool {
|
||||
let iso639_lower = iso639.to_lowercase();
|
||||
let bcp47_lower = bcp47.map(|s| s.to_lowercase());
|
||||
|
||||
// Exact match on ISO 639-2
|
||||
if self.code == iso639_lower {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Exact match on BCP 47
|
||||
if let Some(ref bcp) = bcp47_lower {
|
||||
if self.code == *bcp {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Prefix match: "en" matches "en-US", "eng" matches track with bcp47 "en-US"
|
||||
// The filter code could be a prefix of the track's BCP 47 tag
|
||||
if let Some(ref bcp) = bcp47_lower {
|
||||
if bcp.starts_with(&self.code) && bcp[self.code.len()..].starts_with('-') {
|
||||
return true;
|
||||
}
|
||||
// Or the track's BCP 47 could be a prefix of the filter
|
||||
if self.code.starts_with(bcp.as_str()) && self.code[bcp.len()..].starts_with('-') {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for LanguageCode {
|
||||
type Err = InvalidLanguageCode;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Self::new(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for LanguageCode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", self.code)
|
||||
}
|
||||
}
|
||||
|
||||
impl MkvLangFilter {
|
||||
/// Creates a new filter from a comma-separated list of language codes.
|
||||
pub fn new(input: &str) -> Result<Self, InvalidLanguageCode> {
|
||||
let input = input.trim();
|
||||
if input.is_empty() {
|
||||
return Err(InvalidLanguageCode {
|
||||
code: String::new(),
|
||||
reason: "empty language filter",
|
||||
});
|
||||
}
|
||||
|
||||
let codes: Result<Vec<LanguageCode>, _> = input.split(',').map(LanguageCode::new).collect();
|
||||
|
||||
Ok(Self {
|
||||
raw: input.to_string(),
|
||||
codes: codes?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the raw input string (for C FFI compatibility).
|
||||
pub fn as_raw_str(&self) -> &str {
|
||||
&self.raw
|
||||
}
|
||||
|
||||
/// Returns the parsed language codes.
|
||||
pub fn codes(&self) -> &[LanguageCode] {
|
||||
&self.codes
|
||||
}
|
||||
|
||||
/// Checks if any of the filter's codes match a track's language.
|
||||
///
|
||||
/// # Arguments
|
||||
/// - `iso639`: The track's ISO 639-2 language code (e.g., "eng")
|
||||
/// - `bcp47`: The track's BCP 47 language tag, if available (e.g., "en-US")
|
||||
pub fn matches(&self, iso639: &str, bcp47: Option<&str>) -> bool {
|
||||
self.codes.iter().any(|code| code.matches(iso639, bcp47))
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for MkvLangFilter {
|
||||
type Err = InvalidLanguageCode;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Self::new(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MkvLangFilter {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", self.raw)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_iso639_codes() {
|
||||
// Valid 3-letter codes
|
||||
assert!(LanguageCode::new("eng").is_ok());
|
||||
assert!(LanguageCode::new("fre").is_ok());
|
||||
assert!(LanguageCode::new("chi").is_ok());
|
||||
assert!(LanguageCode::new("ENG").is_ok()); // Case insensitive
|
||||
|
||||
// 2-letter codes (ISO 639-1 style, valid in BCP 47)
|
||||
assert!(LanguageCode::new("en").is_ok());
|
||||
assert!(LanguageCode::new("fr").is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bcp47_codes() {
|
||||
// Language + region
|
||||
assert!(LanguageCode::new("en-US").is_ok());
|
||||
assert!(LanguageCode::new("fr-CA").is_ok());
|
||||
assert!(LanguageCode::new("pt-BR").is_ok());
|
||||
|
||||
// Language + script
|
||||
assert!(LanguageCode::new("zh-Hans").is_ok());
|
||||
assert!(LanguageCode::new("zh-Hant").is_ok());
|
||||
assert!(LanguageCode::new("sr-Latn").is_ok());
|
||||
|
||||
// Language + script + region
|
||||
assert!(LanguageCode::new("zh-Hans-CN").is_ok());
|
||||
assert!(LanguageCode::new("zh-Hant-TW").is_ok());
|
||||
|
||||
// UN M.49 numeric region codes
|
||||
assert!(LanguageCode::new("es-419").is_ok()); // Latin America
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_codes() {
|
||||
// Too short
|
||||
assert!(LanguageCode::new("a").is_err());
|
||||
|
||||
// Invalid characters
|
||||
assert!(LanguageCode::new("en_US").is_err()); // Underscore not allowed
|
||||
assert!(LanguageCode::new("en US").is_err()); // Space not allowed
|
||||
assert!(LanguageCode::new("ça").is_err()); // Non-ASCII
|
||||
|
||||
// Invalid structure
|
||||
assert!(LanguageCode::new("-en").is_err()); // Leading hyphen
|
||||
assert!(LanguageCode::new("en-").is_err()); // Trailing hyphen
|
||||
assert!(LanguageCode::new("en--US").is_err()); // Double hyphen
|
||||
|
||||
// Empty
|
||||
assert!(LanguageCode::new("").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_multiple_codes() {
|
||||
let filter = MkvLangFilter::new("eng,fre,chi").unwrap();
|
||||
assert_eq!(filter.codes().len(), 3);
|
||||
assert!(filter.matches("eng", None));
|
||||
assert!(filter.matches("fre", None));
|
||||
assert!(filter.matches("chi", None));
|
||||
assert!(!filter.matches("spa", None));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_bcp47_matching() {
|
||||
let filter = MkvLangFilter::new("en-US,fr-CA").unwrap();
|
||||
|
||||
// Exact BCP 47 match
|
||||
assert!(filter.matches("eng", Some("en-US")));
|
||||
assert!(filter.matches("fre", Some("fr-CA")));
|
||||
|
||||
// No match
|
||||
assert!(!filter.matches("eng", Some("en-GB")));
|
||||
assert!(!filter.matches("eng", None));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_mixed_formats() {
|
||||
let filter = MkvLangFilter::new("eng,fr-CA,zh-Hans").unwrap();
|
||||
|
||||
assert!(filter.matches("eng", None));
|
||||
assert!(filter.matches("fre", Some("fr-CA")));
|
||||
assert!(filter.matches("chi", Some("zh-Hans")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_case_insensitivity() {
|
||||
let filter = MkvLangFilter::new("ENG,FR-CA").unwrap();
|
||||
assert!(filter.matches("eng", None));
|
||||
assert!(filter.matches("ENG", None));
|
||||
assert!(filter.matches("fre", Some("fr-ca")));
|
||||
assert!(filter.matches("FRE", Some("FR-CA")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_raw_string_preserved() {
|
||||
let filter = MkvLangFilter::new("eng,fre").unwrap();
|
||||
assert_eq!(filter.as_raw_str(), "eng,fre");
|
||||
}
|
||||
}
|
||||
@@ -18,8 +18,10 @@
|
||||
|
||||
mod bitstream;
|
||||
mod constants;
|
||||
mod mkv_lang;
|
||||
mod options;
|
||||
|
||||
pub use bitstream::*;
|
||||
pub use constants::*;
|
||||
pub use mkv_lang::*;
|
||||
pub use options::*;
|
||||
|
||||
@@ -462,8 +462,13 @@ pub struct Options {
|
||||
/// (0 = no quantization at all, 1 = CCExtractor's internal,
|
||||
/// 2 = reduce distinct color count in image for faster results.)
|
||||
pub ocr_quantmode: u8,
|
||||
/// The name of the language stream for MKV
|
||||
pub mkvlang: Option<Language>,
|
||||
/// If true, split images into lines before OCR (uses PSM 7 for better accuracy)
|
||||
pub ocr_line_split: bool,
|
||||
/// If true, use character blacklist to prevent common OCR errors (e.g. | vs I)
|
||||
pub ocr_blacklist: bool,
|
||||
/// Language filter for MKV subtitle tracks.
|
||||
/// Accepts comma-separated ISO 639-2 codes (e.g., "eng,fre") or BCP 47 tags (e.g., "en-US,fr-CA").
|
||||
pub mkvlang: Option<super::MkvLangFilter>,
|
||||
/// If true, the video stream will be processed even if we're using a different one for subtitles.
|
||||
pub analyze_video_stream: bool,
|
||||
|
||||
@@ -517,6 +522,10 @@ pub struct Options {
|
||||
pub multiprogram: bool,
|
||||
pub out_interval: i32,
|
||||
pub segment_on_key_frames_only: bool,
|
||||
/// SCC input framerate: 0=29.97 (default), 1=24, 2=25, 3=30
|
||||
pub scc_framerate: i32,
|
||||
/// SCC accurate timing (issue #1120): if true, use bandwidth-aware timing for broadcast compliance
|
||||
pub scc_accurate_timing: bool,
|
||||
pub debug_mask: DebugMessageMask,
|
||||
|
||||
#[cfg(feature = "with_libcurl")]
|
||||
@@ -582,6 +591,8 @@ impl Default for Options {
|
||||
ocr_oem: -1,
|
||||
psm: 3,
|
||||
ocr_quantmode: 0, // No quantization - better OCR accuracy for DVB subtitles
|
||||
ocr_line_split: false, // Don't split images into lines by default
|
||||
ocr_blacklist: true, // Use character blacklist by default to prevent | vs I errors
|
||||
mkvlang: Default::default(),
|
||||
analyze_video_stream: Default::default(),
|
||||
hardsubx_ocr_mode: Default::default(),
|
||||
@@ -618,6 +629,8 @@ impl Default for Options {
|
||||
multiprogram: Default::default(),
|
||||
out_interval: -1,
|
||||
segment_on_key_frames_only: Default::default(),
|
||||
scc_framerate: 0, // 0 = 29.97fps (default)
|
||||
scc_accurate_timing: false, // Off by default for backwards compatibility (issue #1120)
|
||||
debug_mask: DebugMessageMask::new(
|
||||
DebugMessageFlag::GENERIC_NOTICE,
|
||||
DebugMessageFlag::VERBOSE,
|
||||
|
||||
@@ -82,7 +82,6 @@ impl<'a> SendTarget<'a> {
|
||||
"Unable to connect, address passed is null\n"
|
||||
);
|
||||
}
|
||||
info!("Target address: {}\n", config.target_addr); // TODO remove this
|
||||
info!("Target port: {}\n", config.port.unwrap_or(DEFAULT_TCP_PORT));
|
||||
let tcp_stream = TcpStream::connect((
|
||||
config.target_addr,
|
||||
|
||||
@@ -1154,10 +1154,9 @@ impl<'a> TeletextContext<'a> {
|
||||
}
|
||||
|
||||
if v >= 0x20 {
|
||||
let u = char::from_u32(v as u32).unwrap();
|
||||
let u = char::from_u32(v as u32).unwrap_or(char::REPLACEMENT_CHARACTER);
|
||||
self.page_buffer_cur.get_or_insert("".into()).push(u);
|
||||
if logger().expect("could not access logger").is_gui_mode() {
|
||||
// For now we just handle the easy stuff
|
||||
eprint!("{u}");
|
||||
}
|
||||
}
|
||||
@@ -1225,13 +1224,15 @@ impl<'a> TeletextContext<'a> {
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
ans = Some(Subtitle::new_text(
|
||||
self.page_buffer_cur.take().unwrap().into(),
|
||||
self.page_buffer.show_timestamp,
|
||||
self.page_buffer.hide_timestamp + Timestamp::from_millis(1),
|
||||
None,
|
||||
"TLT".into(),
|
||||
));
|
||||
if let Some(cur) = self.page_buffer_cur.take() {
|
||||
ans = Some(Subtitle::new_text(
|
||||
cur.into(),
|
||||
self.page_buffer.show_timestamp,
|
||||
self.page_buffer.hide_timestamp + Timestamp::from_millis(1),
|
||||
None,
|
||||
"TLT".into(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1251,34 +1252,43 @@ impl<'a> TeletextContext<'a> {
|
||||
capitalization_list: &[String],
|
||||
) {
|
||||
// variable names conform to ETS 300 706, chapter 7.1.2
|
||||
let address = (decode_hamming_8_4(packet.address[1]).unwrap() << 4)
|
||||
| decode_hamming_8_4(packet.address[0]).unwrap();
|
||||
let Some(addr1) = decode_hamming_8_4(packet.address[1]) else {
|
||||
return;
|
||||
};
|
||||
let Some(addr0) = decode_hamming_8_4(packet.address[0]) else {
|
||||
return;
|
||||
};
|
||||
let address = (addr1 << 4) | addr0;
|
||||
let mut m = address & 0x7;
|
||||
if m == 0 {
|
||||
m = 8;
|
||||
}
|
||||
let y = (address >> 3) & 0x1f;
|
||||
let designation_code = if y > 25 {
|
||||
decode_hamming_8_4(packet.data[0]).unwrap()
|
||||
decode_hamming_8_4(packet.data[0]).unwrap_or(0x00)
|
||||
} else {
|
||||
0x00
|
||||
};
|
||||
|
||||
if y == 0 {
|
||||
// CC map
|
||||
let i = (decode_hamming_8_4(packet.data[1]).unwrap() << 4)
|
||||
| decode_hamming_8_4(packet.data[0]).unwrap();
|
||||
let flag_subtitle = (decode_hamming_8_4(packet.data[5]).unwrap() & 0x08) >> 3;
|
||||
let h1 = decode_hamming_8_4(packet.data[1]).unwrap_or(0);
|
||||
let h0 = decode_hamming_8_4(packet.data[0]).unwrap_or(0);
|
||||
let i = (h1 << 4) | h0;
|
||||
|
||||
let flag_subtitle = (decode_hamming_8_4(packet.data[5]).unwrap_or(0) & 0x08) >> 3;
|
||||
self.cc_map[i as usize] |= flag_subtitle << (m - 1);
|
||||
|
||||
let flag_subtitle = flag_subtitle != 0;
|
||||
|
||||
if flag_subtitle && (i < 0xff) {
|
||||
let mut thisp = ((m as u32) << 8)
|
||||
| ((decode_hamming_8_4(packet.data[1]).unwrap() as u32) << 4)
|
||||
| (decode_hamming_8_4(packet.data[0]).unwrap() as u32);
|
||||
let t1 = format!("{thisp:x}"); // Example: 1928 -> 788
|
||||
thisp = t1.parse().unwrap();
|
||||
let h1 = decode_hamming_8_4(packet.data[1]).unwrap_or(0) as u32;
|
||||
let h0 = decode_hamming_8_4(packet.data[0]).unwrap_or(0) as u32;
|
||||
let mut thisp = ((m as u32) << 8) | (h1 << 4) | h0;
|
||||
|
||||
let t1 = format!("{thisp:x}");
|
||||
// Fallback to original value if parsing fails to avoid panics on malformed BCD
|
||||
thisp = t1.parse().unwrap_or(thisp);
|
||||
if !self.seen_sub_page[thisp as usize] {
|
||||
self.seen_sub_page[thisp as usize] = true;
|
||||
info!(
|
||||
@@ -1288,36 +1298,28 @@ impl<'a> TeletextContext<'a> {
|
||||
}
|
||||
}
|
||||
if (self.config.page.get() == 0.into()) && flag_subtitle && (i < 0xff) {
|
||||
self.config.page.replace(
|
||||
(((m as u16) << 8)
|
||||
| ((decode_hamming_8_4(packet.data[1]).unwrap() as u16) << 4)
|
||||
| (decode_hamming_8_4(packet.data[0]).unwrap() as u16))
|
||||
.into(),
|
||||
);
|
||||
let h1 = decode_hamming_8_4(packet.data[1]).unwrap_or(0) as u16;
|
||||
let h0 = decode_hamming_8_4(packet.data[0]).unwrap_or(0) as u16;
|
||||
|
||||
self.config
|
||||
.page
|
||||
.replace((((m as u16) << 8) | (h1 << 4) | h0).into());
|
||||
info!("- No teletext page specified, first received suitable page is {}, not guaranteed\n", self.config.page.get());
|
||||
}
|
||||
|
||||
// Page number and control bits
|
||||
let page_number: TeletextPageNumber = (((m as u16) << 8)
|
||||
| ((decode_hamming_8_4(packet.data[1]).unwrap() as u16) << 4)
|
||||
| (decode_hamming_8_4(packet.data[0]).unwrap() as u16))
|
||||
.into();
|
||||
let charset = ((decode_hamming_8_4(packet.data[7]).unwrap() & 0x08)
|
||||
| (decode_hamming_8_4(packet.data[7]).unwrap() & 0x04)
|
||||
| (decode_hamming_8_4(packet.data[7]).unwrap() & 0x02))
|
||||
>> 1;
|
||||
// let flag_suppress_header = decode_hamming_8_4(packet.data[6]).unwrap() & 0x01;
|
||||
// let flag_inhibit_display = (decode_hamming_8_4(packet.data[6]).unwrap() & 0x08) >> 3;
|
||||
let h1 = decode_hamming_8_4(packet.data[1]).unwrap_or(0) as u16;
|
||||
let h0 = decode_hamming_8_4(packet.data[0]).unwrap_or(0) as u16;
|
||||
let page_number: TeletextPageNumber = (((m as u16) << 8) | (h1 << 4) | h0).into();
|
||||
|
||||
let c7 = decode_hamming_8_4(packet.data[7]).unwrap_or(0);
|
||||
let charset = (c7 & 0x08 | c7 & 0x04 | c7 & 0x02) >> 1;
|
||||
// ETS 300 706, chapter 9.3.1.3:
|
||||
// When set to '1' the service is designated to be in Serial mode and the transmission of a page is terminated
|
||||
// by the next page header with a different page number.
|
||||
// When set to '0' the service is designated to be in Parallel mode and the transmission of a page is terminated
|
||||
// by the next page header with a different page number but the same magazine number.
|
||||
// The same setting shall be used for all page headers in the service.
|
||||
// ETS 300 706, chapter 7.2.1: Page is terminated by and excludes the next page header packet
|
||||
// having the same magazine address in parallel transmission mode, or any magazine address in serial transmission mode.
|
||||
self.transmission_mode = if decode_hamming_8_4(packet.data[7]).unwrap() & 0x01 == 0 {
|
||||
self.transmission_mode = if c7 & 0x01 == 0 {
|
||||
TransmissionMode::Parallel
|
||||
} else {
|
||||
TransmissionMode::Serial
|
||||
@@ -1353,19 +1355,17 @@ impl<'a> TeletextContext<'a> {
|
||||
|
||||
// Now we have the begining of page transmission; if there is page_buffer pending, process it
|
||||
if self.page_buffer.tainted {
|
||||
// Convert telx to UCS-2 before processing
|
||||
for yt in 1..=23 {
|
||||
for it in 0..40 {
|
||||
if self.page_buffer.text[yt][it] != 0x00
|
||||
&& !self.page_buffer.g2_char_present[yt][it]
|
||||
{
|
||||
self.page_buffer.text[yt][it] = self
|
||||
.g0_charset
|
||||
.ucs2_char(self.page_buffer.text[yt][it].try_into().unwrap());
|
||||
if let Ok(c) = self.page_buffer.text[yt][it].try_into() {
|
||||
self.page_buffer.text[yt][it] = self.g0_charset.ucs2_char(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// it would be nice, if subtitle hides on previous video frame, so we contract 40 ms (1 frame @25 fps)
|
||||
self.page_buffer.hide_timestamp = timestamp - Timestamp::from_millis(40);
|
||||
if self.page_buffer.hide_timestamp > timestamp {
|
||||
self.page_buffer.hide_timestamp = Timestamp::from_millis(0);
|
||||
@@ -1544,12 +1544,14 @@ impl<'a> TeletextContext<'a> {
|
||||
info!("- Programme Identification Data = ");
|
||||
for i in 20..40 {
|
||||
let c = self.g0_charset.ucs2_char(packet.data[i]);
|
||||
// strip any control codes from PID, eg. TVP station
|
||||
if c < 0x20 {
|
||||
continue;
|
||||
}
|
||||
|
||||
info!("{}", char::from_u32(c as u32).unwrap());
|
||||
info!(
|
||||
"{}",
|
||||
char::from_u32(c as u32).unwrap_or(char::REPLACEMENT_CHARACTER)
|
||||
);
|
||||
}
|
||||
info!("\n");
|
||||
|
||||
@@ -1580,7 +1582,7 @@ impl<'a> TeletextContext<'a> {
|
||||
|
||||
info!(
|
||||
"- Universal Time Co-ordinated = {}\n",
|
||||
t0.to_ctime().unwrap()
|
||||
t0.to_ctime().as_deref().unwrap_or("unknown")
|
||||
);
|
||||
|
||||
debug!(msg_type = DebugMessageFlag::TELETEXT; "- Transmission mode = {:?}\n", self.transmission_mode);
|
||||
@@ -1589,8 +1591,13 @@ impl<'a> TeletextContext<'a> {
|
||||
&& matches!(self.config.date_format, TimestampFormat::Date { .. })
|
||||
&& !self.config.noautotimeref
|
||||
{
|
||||
info!("- Broadcast Service Data Packet received, resetting UTC referential value to {}\n", t0.to_ctime().unwrap());
|
||||
*UTC_REFVALUE.write().unwrap() = t as u64;
|
||||
info!(
|
||||
"- Broadcast Service Data Packet received, resetting UTC referential value to {}\n",
|
||||
t0.to_ctime().as_deref().unwrap_or("unknown")
|
||||
);
|
||||
if let Ok(mut lock) = UTC_REFVALUE.write() {
|
||||
*lock = t as u64;
|
||||
}
|
||||
self.states.pts_initialized = false;
|
||||
}
|
||||
|
||||
@@ -1610,15 +1617,14 @@ impl<'a> TeletextContext<'a> {
|
||||
if let Some(subtitles) = subtitles {
|
||||
// output any pending close caption
|
||||
if self.page_buffer.tainted {
|
||||
// Convert telx to UCS-2 before processing
|
||||
for yt in 1..=23 {
|
||||
for it in 0..40 {
|
||||
if self.page_buffer.text[yt][it] != 0x00
|
||||
&& !self.page_buffer.g2_char_present[yt][it]
|
||||
{
|
||||
self.page_buffer.text[yt][it] = self
|
||||
.g0_charset
|
||||
.ucs2_char(self.page_buffer.text[yt][it].try_into().unwrap());
|
||||
if let Ok(c) = self.page_buffer.text[yt][it].try_into() {
|
||||
self.page_buffer.text[yt][it] = self.g0_charset.ucs2_char(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -225,9 +225,6 @@ impl Timestamp {
|
||||
let m = millis / 60000 - 60 * h;
|
||||
let s = millis / 1000 - 3600 * h - 60 * m;
|
||||
let u = millis - 3600000 * h - 60000 * m - 1000 * s;
|
||||
if h > 24 {
|
||||
println!("{h}")
|
||||
}
|
||||
Ok((h.try_into()?, m as u8, s as u8, u as u16))
|
||||
}
|
||||
|
||||
|
||||
@@ -269,6 +269,11 @@ impl<'a> CCExtractorLogger {
|
||||
self.target
|
||||
}
|
||||
|
||||
/// Sets the target for logging messages.
|
||||
pub fn set_target(&mut self, target: OutputTarget) {
|
||||
self.target = target;
|
||||
}
|
||||
|
||||
/// Check if the messages are intercepted by GUI.
|
||||
pub fn is_gui_mode(&self) -> bool {
|
||||
self.gui_mode
|
||||
@@ -276,8 +281,16 @@ impl<'a> CCExtractorLogger {
|
||||
|
||||
fn print(&self, args: &Arguments<'a>) {
|
||||
match &self.target {
|
||||
OutputTarget::Stdout => print!("{args}"),
|
||||
OutputTarget::Stderr => eprint!("{args}"),
|
||||
OutputTarget::Stdout => {
|
||||
print!("{args}");
|
||||
// Flush stdout to ensure output appears immediately, especially when
|
||||
// mixing with C code that also writes to stdout
|
||||
let _ = std::io::Write::flush(&mut std::io::stdout());
|
||||
}
|
||||
OutputTarget::Stderr => {
|
||||
eprint!("{args}");
|
||||
let _ = std::io::Write::flush(&mut std::io::stderr());
|
||||
}
|
||||
OutputTarget::Quiet => {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ const BURNEDIN_SUBTITLE_EXTRACTION: &str = "Burned-in subtitle extraction";
|
||||
#[derive(Debug, Parser)]
|
||||
#[command(name = "CCExtractor")]
|
||||
#[command(author = "Carlos Fernandez Sanz, Volker Quetschke.")]
|
||||
#[command(version = "1.0")]
|
||||
#[command(version = "0.96.5")]
|
||||
#[command(about = "Teletext portions taken from Petr Kutalek's telxcc
|
||||
--------------------------------------------------------------------------
|
||||
Originally based on McPoodle's tools. Check his page for lots of information
|
||||
@@ -227,7 +227,7 @@ pub struct Args {
|
||||
/// "all[EUC-KR]") and it will encode specified charset to
|
||||
/// UTF-8 using iconv. See iconv documentation to check if
|
||||
/// required encoding/charset is supported.
|
||||
#[arg(long="service", value_name="services", verbatim_doc_comment, help_heading=OPTION_AFFECT_PROCESSED)]
|
||||
#[arg(long="service", alias="svc", value_name="services", verbatim_doc_comment, help_heading=OPTION_AFFECT_PROCESSED)]
|
||||
pub cea708services: Option<String>,
|
||||
/// With the exception of McPoodle's raw format, which is just the closed
|
||||
/// caption data with no other info, CCExtractor can usually detect the
|
||||
@@ -290,6 +290,18 @@ pub struct Args {
|
||||
/// DVD Recorder)
|
||||
#[arg(long="90090", verbatim_doc_comment, help_heading=OPTIONS_AFFECTING_INPUT_FILES)]
|
||||
pub mpeg90090: bool,
|
||||
/// Set the frame rate for SCC (Scenarist Closed Caption) input files.
|
||||
/// Valid values: 29.97 (default), 24, 25, 30
|
||||
/// Example: --scc-framerate 25
|
||||
#[arg(long="scc-framerate", verbatim_doc_comment, value_name="fps", help_heading=OPTIONS_AFFECTING_INPUT_FILES)]
|
||||
pub scc_framerate: Option<String>,
|
||||
/// Enable bandwidth-aware timing for SCC output (issue #1120).
|
||||
/// When enabled, captions are pre-loaded ahead of their display time
|
||||
/// based on the EIA-608 transmission bandwidth (2 bytes/frame).
|
||||
/// This ensures YouTube and broadcast compliance by preventing
|
||||
/// caption collisions. Use this for professional SCC output.
|
||||
#[arg(long="scc-accurate-timing", verbatim_doc_comment, help_heading=OPTIONS_AFFECTING_INPUT_FILES)]
|
||||
pub scc_accurate_timing: bool,
|
||||
/// By default, ccextractor will process input files in
|
||||
/// sequence as if they were all one large file (i.e.
|
||||
/// split by a generic, non video-aware tool. If you
|
||||
@@ -390,10 +402,10 @@ pub struct Args {
|
||||
/// reference to the received data. Use this parameter if
|
||||
/// you prefer your own reference. Note: Current this only
|
||||
/// affects Teletext in timed transcript with --datets.
|
||||
#[arg(long, verbatim_doc_comment, help_heading=OPTIONS_AFFECTING_INPUT_FILES)]
|
||||
#[arg(long, alias="noautotimeref", verbatim_doc_comment, help_heading=OPTIONS_AFFECTING_INPUT_FILES)]
|
||||
pub no_autotimeref: bool,
|
||||
/// Ignore SCTE-20 data if present.
|
||||
#[arg(long, verbatim_doc_comment, help_heading=OPTIONS_AFFECTING_INPUT_FILES)]
|
||||
#[arg(long, alias="noscte20", verbatim_doc_comment, help_heading=OPTIONS_AFFECTING_INPUT_FILES)]
|
||||
pub no_scte20: bool,
|
||||
/// Create a separate file for CSS instead of inline.
|
||||
#[arg(long, verbatim_doc_comment, help_heading=OPTIONS_AFFECTING_INPUT_FILES)]
|
||||
@@ -448,7 +460,7 @@ pub struct Args {
|
||||
/// Do not append a BOM (Byte Order Mark) to output
|
||||
/// files. Note that this may break files when using
|
||||
/// Windows. This is the default in non-Windows builds.
|
||||
#[arg(long, verbatim_doc_comment, conflicts_with="bom", help_heading=OUTPUT_AFFECTING_OUTPUT_FILES)]
|
||||
#[arg(long, alias="nobom", verbatim_doc_comment, conflicts_with="bom", help_heading=OUTPUT_AFFECTING_OUTPUT_FILES)]
|
||||
pub no_bom: bool,
|
||||
/// Encode subtitles in Unicode instead of Latin-1.
|
||||
#[arg(long, verbatim_doc_comment, help_heading=OUTPUT_AFFECTING_OUTPUT_FILES)]
|
||||
@@ -481,7 +493,7 @@ pub struct Args {
|
||||
pub defaultcolor: Option<String>,
|
||||
/// Sentence capitalization. Use if you hate
|
||||
/// ALL CAPS in subtitles.
|
||||
#[arg(long, verbatim_doc_comment, help_heading=OUTPUT_AFFECTING_OUTPUT_FILES)]
|
||||
#[arg(long, alias="sc", verbatim_doc_comment, help_heading=OUTPUT_AFFECTING_OUTPUT_FILES)]
|
||||
pub sentencecap: bool,
|
||||
/// Add the contents of 'file' to the list of words
|
||||
/// that must be capitalized. For example, if file
|
||||
@@ -625,6 +637,18 @@ pub struct Args {
|
||||
/// bypassing hacks that are Tesseract-specific.
|
||||
#[arg(long, verbatim_doc_comment, value_name="mode", help_heading=OUTPUT_AFFECTING_OUTPUT_FILES)]
|
||||
pub psm: Option<u8>,
|
||||
/// Split subtitle images into lines before OCR.
|
||||
/// Uses PSM 7 (single text line mode) for each line,
|
||||
/// which can improve accuracy for multi-line bitmap subtitles
|
||||
/// (VOBSUB, DVD, DVB).
|
||||
#[arg(long, verbatim_doc_comment, help_heading=OUTPUT_AFFECTING_OUTPUT_FILES)]
|
||||
pub ocr_line_split: bool,
|
||||
/// Disable the OCR character blacklist.
|
||||
/// By default, CCExtractor blacklists characters like |, \, `, _
|
||||
/// that are commonly misrecognized (e.g. 'I' as '|').
|
||||
/// Use this flag to disable the blacklist.
|
||||
#[arg(long, verbatim_doc_comment, help_heading=OUTPUT_AFFECTING_OUTPUT_FILES)]
|
||||
pub no_ocr_blacklist: bool,
|
||||
/// For MKV subtitles, select which language's caption
|
||||
/// stream will be processed. e.g. 'eng' for English.
|
||||
/// Language codes can be either the 3 letters bibliographic
|
||||
@@ -677,7 +701,7 @@ pub struct Args {
|
||||
/// If you hate the repeated lines caused by the roll-up
|
||||
/// emulation, you can have ccextractor write only one
|
||||
/// line at a time, getting rid of these repeated lines.
|
||||
#[arg(long, verbatim_doc_comment, help_heading=OUTPUT_AFFECTING_BUFFERING)]
|
||||
#[arg(long, alias="noru", verbatim_doc_comment, help_heading=OUTPUT_AFFECTING_BUFFERING)]
|
||||
pub no_rollup: bool,
|
||||
/// roll-up captions can consist of 2, 3 or 4 visible
|
||||
/// lines at any time (the number of lines is part of
|
||||
@@ -806,10 +830,10 @@ pub struct Args {
|
||||
#[arg(long, verbatim_doc_comment, help_heading=OUTPUT_AFFECTING_DEBUG_DATA)]
|
||||
pub parsedebug: bool,
|
||||
/// Print Program Association Table dump.
|
||||
#[arg(long="parsePAT", verbatim_doc_comment, help_heading=OUTPUT_AFFECTING_DEBUG_DATA)]
|
||||
#[arg(long="parsePAT", alias="pat", verbatim_doc_comment, help_heading=OUTPUT_AFFECTING_DEBUG_DATA)]
|
||||
pub parse_pat: bool,
|
||||
/// Print Program Map Table dump.
|
||||
#[arg(long="parsePMT", verbatim_doc_comment, help_heading=OUTPUT_AFFECTING_DEBUG_DATA)]
|
||||
#[arg(long="parsePMT", alias="pmt", verbatim_doc_comment, help_heading=OUTPUT_AFFECTING_DEBUG_DATA)]
|
||||
pub parse_pmt: bool,
|
||||
/// Hex-dump defective TS packets.
|
||||
#[arg(long, verbatim_doc_comment, help_heading=OUTPUT_AFFECTING_DEBUG_DATA)]
|
||||
@@ -844,7 +868,7 @@ pub struct Args {
|
||||
/// for video streams that have both teletext packets
|
||||
/// and CEA-608/708 packets (if teletext is processed
|
||||
/// then CEA-608/708 processing is disabled).
|
||||
#[arg(long, verbatim_doc_comment, conflicts_with="teletext", help_heading=TELETEXT_OPTIONS)]
|
||||
#[arg(long, alias="noteletext", verbatim_doc_comment, conflicts_with="teletext", help_heading=TELETEXT_OPTIONS)]
|
||||
pub no_teletext: bool,
|
||||
/// Use the passed format to customize the (Timed) Transcript
|
||||
/// output. The format must be like this: 1100100 (7 digits).
|
||||
@@ -973,6 +997,8 @@ pub enum InFormat {
|
||||
Mkv,
|
||||
/// Material Exchange Format (MXF).
|
||||
Mxf,
|
||||
/// Scenarist Closed Caption (SCC).
|
||||
Scc,
|
||||
#[cfg(feature = "wtv_debug")]
|
||||
// For WTV Debug mode only
|
||||
Hex,
|
||||
|
||||
@@ -8,7 +8,9 @@ use crate::{anchor_hdcc, current_fps, process_hdcc, store_hdcc, MPEG_CLOCK_FREQ}
|
||||
use lib_ccxr::common::AvcNalType;
|
||||
use lib_ccxr::util::log::DebugMessageFlag;
|
||||
use lib_ccxr::{debug, info};
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86::*;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64::*;
|
||||
use std::os::raw::c_void;
|
||||
use std::slice;
|
||||
@@ -453,7 +455,7 @@ pub fn hex_dump(data: &[u8]) {
|
||||
|
||||
// Print hex bytes
|
||||
for byte in chunk {
|
||||
print!("{:02X} ", byte);
|
||||
print!("{byte:02X} ");
|
||||
}
|
||||
|
||||
// Pad if less than 16 bytes
|
||||
|
||||
@@ -21,6 +21,19 @@ pub unsafe extern "C" fn ccxr_process_avc(
|
||||
return 0;
|
||||
}
|
||||
|
||||
// In report-only mode (-out=report), enc_ctx is NULL because no encoder is created.
|
||||
// Skip AVC processing in this case since we can't output captions without an encoder.
|
||||
// Return the full buffer length to indicate we've "consumed" the data.
|
||||
if enc_ctx.is_null() {
|
||||
return avcbuflen;
|
||||
}
|
||||
|
||||
// dec_ctx and sub should never be NULL in normal operation, but check defensively
|
||||
if dec_ctx.is_null() || sub.is_null() {
|
||||
info!("Warning: dec_ctx or sub is NULL in ccxr_process_avc");
|
||||
return avcbuflen;
|
||||
}
|
||||
|
||||
// Create a safe slice from the raw pointer
|
||||
let avc_slice = std::slice::from_raw_parts_mut(avcbuf, avcbuflen);
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ pub fn sei_message(ctx: &mut AvcContextRust, seibuf: &[u8]) -> usize {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let mut payload_type = 0;
|
||||
let mut payload_type: u32 = 0;
|
||||
while seibuf_idx < seibuf.len() && seibuf[seibuf_idx] == 0xff {
|
||||
payload_type += 255;
|
||||
seibuf_idx += 1;
|
||||
@@ -60,10 +60,10 @@ pub fn sei_message(ctx: &mut AvcContextRust, seibuf: &[u8]) -> usize {
|
||||
return seibuf_idx;
|
||||
}
|
||||
|
||||
payload_type += seibuf[seibuf_idx] as i32;
|
||||
payload_type += seibuf[seibuf_idx] as u32;
|
||||
seibuf_idx += 1;
|
||||
|
||||
let mut payload_size = 0;
|
||||
let mut payload_size: u32 = 0;
|
||||
while seibuf_idx < seibuf.len() && seibuf[seibuf_idx] == 0xff {
|
||||
payload_size += 255;
|
||||
seibuf_idx += 1;
|
||||
@@ -73,7 +73,7 @@ pub fn sei_message(ctx: &mut AvcContextRust, seibuf: &[u8]) -> usize {
|
||||
return seibuf_idx;
|
||||
}
|
||||
|
||||
payload_size += seibuf[seibuf_idx] as i32;
|
||||
payload_size += seibuf[seibuf_idx] as u32;
|
||||
seibuf_idx += 1;
|
||||
|
||||
let mut broken = false;
|
||||
@@ -226,12 +226,10 @@ pub fn user_data_registered_itu_t_t35(ctx: &mut AvcContextRust, userbuf: &[u8])
|
||||
}
|
||||
|
||||
// Save the data and process once we know the sequence number
|
||||
if ((ctx.cc_count as usize + local_cc_count) * 3) + 1 > ctx.cc_databufsize {
|
||||
let required_size = ((ctx.cc_count as usize + local_cc_count) * 3) + 1;
|
||||
if required_size > ctx.cc_data.len() {
|
||||
let new_size = ((ctx.cc_count as usize + local_cc_count) * 6) + 1;
|
||||
unsafe {
|
||||
ctx.cc_data.set_len(new_size);
|
||||
}
|
||||
ctx.cc_data.reserve(new_size);
|
||||
ctx.cc_data.resize(new_size, 0);
|
||||
ctx.cc_databufsize = new_size;
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ use lib_ccxr::common::DtvccServiceCharset;
|
||||
use lib_ccxr::common::EncoderConfig;
|
||||
use lib_ccxr::common::EncodersTranscriptFormat;
|
||||
use lib_ccxr::common::Language;
|
||||
use lib_ccxr::common::MkvLangFilter;
|
||||
use lib_ccxr::common::Options;
|
||||
use lib_ccxr::common::OutputFormat;
|
||||
use lib_ccxr::common::SelectCodec;
|
||||
@@ -181,9 +182,11 @@ pub unsafe fn copy_from_rust(ccx_s_options: *mut ccx_s_options, options: Options
|
||||
(*ccx_s_options).ocr_oem = options.ocr_oem as _;
|
||||
(*ccx_s_options).psm = options.psm as _;
|
||||
(*ccx_s_options).ocr_quantmode = options.ocr_quantmode as _;
|
||||
if let Some(mkvlang) = options.mkvlang {
|
||||
(*ccx_s_options).ocr_line_split = options.ocr_line_split as _;
|
||||
(*ccx_s_options).ocr_blacklist = options.ocr_blacklist as _;
|
||||
if let Some(ref mkvlang) = options.mkvlang {
|
||||
(*ccx_s_options).mkvlang =
|
||||
replace_rust_c_string((*ccx_s_options).mkvlang, mkvlang.to_ctype().as_str());
|
||||
replace_rust_c_string((*ccx_s_options).mkvlang, mkvlang.as_raw_str());
|
||||
}
|
||||
(*ccx_s_options).analyze_video_stream = options.analyze_video_stream as _;
|
||||
(*ccx_s_options).hardsubx_ocr_mode = options.hardsubx_ocr_mode.to_ctype();
|
||||
@@ -209,11 +212,9 @@ pub unsafe fn copy_from_rust(ccx_s_options: *mut ccx_s_options, options: Options
|
||||
replace_rust_c_string((*ccx_s_options).udpaddr, &options.udpaddr.clone().unwrap());
|
||||
}
|
||||
(*ccx_s_options).udpport = options.udpport as _;
|
||||
if options.tcpport.is_some() {
|
||||
(*ccx_s_options).tcpport = replace_rust_c_string(
|
||||
(*ccx_s_options).tcpport,
|
||||
&options.tcpport.unwrap().to_string(),
|
||||
);
|
||||
if let Some(tcpport) = options.tcpport {
|
||||
(*ccx_s_options).tcpport =
|
||||
replace_rust_c_string((*ccx_s_options).tcpport, &tcpport.to_string());
|
||||
}
|
||||
if options.tcp_password.is_some() {
|
||||
(*ccx_s_options).tcp_password = replace_rust_c_string(
|
||||
@@ -233,11 +234,9 @@ pub unsafe fn copy_from_rust(ccx_s_options: *mut ccx_s_options, options: Options
|
||||
&options.srv_addr.clone().unwrap(),
|
||||
);
|
||||
}
|
||||
if options.srv_port.is_some() {
|
||||
(*ccx_s_options).srv_port = replace_rust_c_string(
|
||||
(*ccx_s_options).srv_port,
|
||||
&options.srv_port.unwrap().to_string(),
|
||||
);
|
||||
if let Some(srv_port) = options.srv_port {
|
||||
(*ccx_s_options).srv_port =
|
||||
replace_rust_c_string((*ccx_s_options).srv_port, &srv_port.to_string());
|
||||
}
|
||||
(*ccx_s_options).noautotimeref = options.noautotimeref as _;
|
||||
(*ccx_s_options).input_source = options.input_source as _;
|
||||
@@ -251,15 +250,12 @@ pub unsafe fn copy_from_rust(ccx_s_options: *mut ccx_s_options, options: Options
|
||||
// Subsequent calls from ccxr_demuxer_open/close should NOT modify inputfile because
|
||||
// C code holds references to those strings throughout processing.
|
||||
// Freeing them would cause use-after-free and double-free errors.
|
||||
if options.inputfile.is_some() && (*ccx_s_options).inputfile.is_null() {
|
||||
(*ccx_s_options).inputfile = string_to_c_chars(options.inputfile.clone().unwrap());
|
||||
(*ccx_s_options).num_input_files = options
|
||||
.inputfile
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.filter(|s| !s.is_empty())
|
||||
.count() as _;
|
||||
if let Some(ref inputfile) = options.inputfile {
|
||||
if (*ccx_s_options).inputfile.is_null() {
|
||||
(*ccx_s_options).inputfile = string_to_c_chars(inputfile.clone());
|
||||
(*ccx_s_options).num_input_files =
|
||||
inputfile.iter().filter(|s| !s.is_empty()).count() as _;
|
||||
}
|
||||
}
|
||||
(*ccx_s_options).demux_cfg = options.demux_cfg.to_ctype();
|
||||
// Only set enc_cfg on the first call (when output_filename is null).
|
||||
@@ -275,6 +271,10 @@ pub unsafe fn copy_from_rust(ccx_s_options: *mut ccx_s_options, options: Options
|
||||
(*ccx_s_options).multiprogram = options.multiprogram as _;
|
||||
(*ccx_s_options).out_interval = options.out_interval;
|
||||
(*ccx_s_options).segment_on_key_frames_only = options.segment_on_key_frames_only as _;
|
||||
(*ccx_s_options).scc_framerate = options.scc_framerate;
|
||||
// Also copy to enc_cfg so the encoder uses the same frame rate for SCC output
|
||||
(*ccx_s_options).enc_cfg.scc_framerate = options.scc_framerate;
|
||||
(*ccx_s_options).enc_cfg.scc_accurate_timing = options.scc_accurate_timing.into();
|
||||
#[cfg(feature = "with_libcurl")]
|
||||
{
|
||||
if options.curlposturl.is_some() {
|
||||
@@ -416,13 +416,13 @@ pub unsafe fn copy_to_rust(ccx_s_options: *const ccx_s_options) -> Options {
|
||||
options.ocr_oem = (*ccx_s_options).ocr_oem as i8;
|
||||
options.psm = (*ccx_s_options).psm;
|
||||
options.ocr_quantmode = (*ccx_s_options).ocr_quantmode as u8;
|
||||
options.ocr_line_split = (*ccx_s_options).ocr_line_split != 0;
|
||||
options.ocr_blacklist = (*ccx_s_options).ocr_blacklist != 0;
|
||||
|
||||
// Handle mkvlang (C string to Option<Language>)
|
||||
// Handle mkvlang (C string to Option<MkvLangFilter>)
|
||||
if !(*ccx_s_options).mkvlang.is_null() {
|
||||
options.mkvlang = Some(
|
||||
Language::from_str(&c_char_to_string((*ccx_s_options).mkvlang))
|
||||
.expect("Invalid language"),
|
||||
)
|
||||
let lang_str = c_char_to_string((*ccx_s_options).mkvlang);
|
||||
options.mkvlang = MkvLangFilter::new(&lang_str).ok();
|
||||
}
|
||||
|
||||
options.analyze_video_stream = (*ccx_s_options).analyze_video_stream != 0;
|
||||
@@ -531,6 +531,8 @@ pub unsafe fn copy_to_rust(ccx_s_options: *const ccx_s_options) -> Options {
|
||||
options.multiprogram = (*ccx_s_options).multiprogram != 0;
|
||||
options.out_interval = (*ccx_s_options).out_interval;
|
||||
options.segment_on_key_frames_only = (*ccx_s_options).segment_on_key_frames_only != 0;
|
||||
options.scc_framerate = (*ccx_s_options).scc_framerate;
|
||||
options.scc_accurate_timing = (*ccx_s_options).enc_cfg.scc_accurate_timing != 0;
|
||||
|
||||
// Handle optional features with conditional compilation
|
||||
#[cfg(feature = "with_libcurl")]
|
||||
@@ -873,6 +875,7 @@ impl CType<u32> for StreamMode {
|
||||
StreamMode::Gxf => ccx_stream_mode_enum_CCX_SM_GXF as _,
|
||||
StreamMode::Mkv => ccx_stream_mode_enum_CCX_SM_MKV as _,
|
||||
StreamMode::Mxf => ccx_stream_mode_enum_CCX_SM_MXF as _,
|
||||
StreamMode::Scc => ccx_stream_mode_enum_CCX_SM_SCC as _,
|
||||
StreamMode::Autodetect => ccx_stream_mode_enum_CCX_SM_AUTODETECT as _,
|
||||
_ => ccx_stream_mode_enum_CCX_SM_ELEMENTARY_OR_NOT_FOUND as _,
|
||||
}
|
||||
@@ -972,6 +975,8 @@ impl CType<encoder_cfg> for EncoderConfig {
|
||||
null_pointer()
|
||||
},
|
||||
extract_only_708: self.extract_only_708 as _,
|
||||
scc_framerate: 0, // Will be set from ccx_options.scc_framerate in copy_to_c
|
||||
scc_accurate_timing: 0, // Will be set from ccx_options.scc_accurate_timing in copy_to_c
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1064,7 +1069,6 @@ impl CType<program_info> for ProgramInfo {
|
||||
program_info {
|
||||
pid: self.pid,
|
||||
program_number: self.program_number,
|
||||
initialized_ocr: self.initialized_ocr as c_int,
|
||||
_bitfield_align_1: [],
|
||||
_bitfield_1: bf1,
|
||||
version: self.version,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user