mirror of
https://github.com/CCExtractor/ccextractor.git
synced 2026-02-04 05:44:53 +00:00
feat(matroska): Add VOBSUB subtitle extraction support for MKV files
Previously, CCExtractor would only print "Error: VOBSUB not supported" when encountering VOBSUB (S_VOBSUB) subtitle tracks in Matroska files. This left users without any usable output. This commit adds full VOBSUB extraction support: - Generate proper .idx index files with timestamps and file positions - Generate proper .sub files with PS-wrapped SPU data - Correct PS Pack header with SCR derived from timestamps - Correct PES header with PTS for each subtitle - 2048-byte block alignment (standard VOBSUB format) The output is compatible with VLC, FFmpeg, and other players that support VobSub subtitle format. Tested with sample from issue #1371 - output validates correctly with FFprobe and produces identical subtitle data to mkvextract. Fixes #1371 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1334,11 +1334,245 @@ char *ass_ssa_sentence_erase_read_order(char *text)
|
||||
return buf;
|
||||
}
|
||||
|
||||
/* VOBSUB support: Generate PS Pack header
|
||||
* The PS Pack header is 14 bytes:
|
||||
* - 4 bytes: start code (00 00 01 ba)
|
||||
* - 6 bytes: SCR (System Clock Reference) in MPEG-2 format
|
||||
* - 3 bytes: mux rate
|
||||
* - 1 byte: stuffing length (0)
|
||||
*/
|
||||
static void generate_ps_pack_header(unsigned char *buf, ULLONG pts_90khz)
|
||||
{
|
||||
// PS Pack start code
|
||||
buf[0] = 0x00;
|
||||
buf[1] = 0x00;
|
||||
buf[2] = 0x01;
|
||||
buf[3] = 0xBA;
|
||||
|
||||
// SCR (System Clock Reference) - use PTS as SCR base, SCR extension = 0
|
||||
// MPEG-2 format: 01 SCR[32:30] 1 SCR[29:15] 1 SCR[14:0] 1 SCR_ext[8:0] 1
|
||||
ULLONG scr = pts_90khz;
|
||||
ULLONG scr_base = scr;
|
||||
int scr_ext = 0;
|
||||
|
||||
buf[4] = 0x44 | ((scr_base >> 27) & 0x38) | ((scr_base >> 28) & 0x03);
|
||||
buf[5] = (scr_base >> 20) & 0xFF;
|
||||
buf[6] = 0x04 | ((scr_base >> 12) & 0xF8) | ((scr_base >> 13) & 0x03);
|
||||
buf[7] = (scr_base >> 5) & 0xFF;
|
||||
buf[8] = 0x04 | ((scr_base << 3) & 0xF8) | ((scr_ext >> 7) & 0x03);
|
||||
buf[9] = ((scr_ext << 1) & 0xFE) | 0x01;
|
||||
|
||||
// Mux rate (10080 = standard DVD rate)
|
||||
int mux_rate = 10080;
|
||||
buf[10] = (mux_rate >> 14) & 0xFF;
|
||||
buf[11] = (mux_rate >> 6) & 0xFF;
|
||||
buf[12] = ((mux_rate << 2) & 0xFC) | 0x03;
|
||||
|
||||
// Stuffing length = 0, with marker bits
|
||||
buf[13] = 0xF8;
|
||||
}
|
||||
|
||||
/* VOBSUB support: Generate PES header for private stream 1
|
||||
* Returns the total header size (variable based on PTS)
|
||||
*/
|
||||
static int generate_pes_header(unsigned char *buf, ULLONG pts_90khz, int payload_size, int stream_id)
|
||||
{
|
||||
// PES start code for private stream 1
|
||||
buf[0] = 0x00;
|
||||
buf[1] = 0x00;
|
||||
buf[2] = 0x01;
|
||||
buf[3] = 0xBD; // Private stream 1
|
||||
|
||||
// PES packet length = header data (3 + 5 for PTS) + 1 (substream ID) + payload
|
||||
int pes_header_data_len = 5; // PTS only
|
||||
int pes_packet_len = 3 + pes_header_data_len + 1 + payload_size;
|
||||
buf[4] = (pes_packet_len >> 8) & 0xFF;
|
||||
buf[5] = pes_packet_len & 0xFF;
|
||||
|
||||
// PES flags: MPEG-2, original
|
||||
buf[6] = 0x81;
|
||||
// PTS_DTS_flags = 10 (PTS only)
|
||||
buf[7] = 0x80;
|
||||
// PES header data length
|
||||
buf[8] = pes_header_data_len;
|
||||
|
||||
// PTS (5 bytes): '0010' | PTS[32:30] | '1' | PTS[29:15] | '1' | PTS[14:0] | '1'
|
||||
buf[9] = 0x21 | ((pts_90khz >> 29) & 0x0E);
|
||||
buf[10] = (pts_90khz >> 22) & 0xFF;
|
||||
buf[11] = 0x01 | ((pts_90khz >> 14) & 0xFE);
|
||||
buf[12] = (pts_90khz >> 7) & 0xFF;
|
||||
buf[13] = 0x01 | ((pts_90khz << 1) & 0xFE);
|
||||
|
||||
// Substream ID (0x20 = first VOBSUB stream)
|
||||
buf[14] = 0x20 + stream_id;
|
||||
|
||||
return 15; // Total PES header size
|
||||
}
|
||||
|
||||
/* VOBSUB support: Generate timestamp string for .idx file
|
||||
* Format: HH:MM:SS:mmm (where mmm is milliseconds)
|
||||
*/
|
||||
static void generate_vobsub_timestamp(char *buf, size_t bufsize, ULLONG milliseconds)
|
||||
{
|
||||
ULLONG ms = milliseconds % 1000;
|
||||
milliseconds /= 1000;
|
||||
ULLONG seconds = milliseconds % 60;
|
||||
milliseconds /= 60;
|
||||
ULLONG minutes = milliseconds % 60;
|
||||
milliseconds /= 60;
|
||||
ULLONG hours = milliseconds;
|
||||
|
||||
snprintf(buf, bufsize, "%02" LLU_M ":%02" LLU_M ":%02" LLU_M ":%03" LLU_M,
|
||||
hours, minutes, seconds, ms);
|
||||
}
|
||||
|
||||
/* VOBSUB support: Save VOBSUB track to .idx and .sub files */
|
||||
static void save_vobsub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *track)
|
||||
{
|
||||
if (track->sentence_count == 0)
|
||||
{
|
||||
mprint("\nNo VOBSUB subtitles to write");
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate base filename (without extension)
|
||||
const char *lang_to_use = track->lang_ietf ? track->lang_ietf : track->lang;
|
||||
const char *basename = get_basename(mkv_ctx->filename);
|
||||
size_t needed = strlen(basename) + strlen(lang_to_use) + 32;
|
||||
char *base_filename = malloc(needed);
|
||||
if (base_filename == NULL)
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "In save_vobsub_track: Out of memory.");
|
||||
|
||||
if (track->lang_index == 0)
|
||||
snprintf(base_filename, needed, "%s_%s", basename, lang_to_use);
|
||||
else
|
||||
snprintf(base_filename, needed, "%s_%s_" LLD, basename, lang_to_use, track->lang_index);
|
||||
|
||||
// Create .sub filename
|
||||
char *sub_filename = malloc(needed + 5);
|
||||
if (sub_filename == NULL)
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "In save_vobsub_track: Out of memory.");
|
||||
snprintf(sub_filename, needed + 5, "%s.sub", base_filename);
|
||||
|
||||
// Create .idx filename
|
||||
char *idx_filename = malloc(needed + 5);
|
||||
if (idx_filename == NULL)
|
||||
fatal(EXIT_NOT_ENOUGH_MEMORY, "In save_vobsub_track: Out of memory.");
|
||||
snprintf(idx_filename, needed + 5, "%s.idx", base_filename);
|
||||
|
||||
mprint("\nOutput files: %s, %s", idx_filename, sub_filename);
|
||||
|
||||
// Open .sub file
|
||||
int sub_desc;
|
||||
#ifdef WIN32
|
||||
sub_desc = open(sub_filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IREAD | S_IWRITE);
|
||||
#else
|
||||
sub_desc = open(sub_filename, O_WRONLY | O_CREAT | O_TRUNC, S_IWUSR | S_IRUSR);
|
||||
#endif
|
||||
if (sub_desc < 0)
|
||||
{
|
||||
mprint("\nError: Cannot create .sub file");
|
||||
free(base_filename);
|
||||
free(sub_filename);
|
||||
free(idx_filename);
|
||||
return;
|
||||
}
|
||||
|
||||
// Open .idx file
|
||||
int idx_desc;
|
||||
#ifdef WIN32
|
||||
idx_desc = open(idx_filename, O_WRONLY | O_CREAT | O_TRUNC, S_IREAD | S_IWRITE);
|
||||
#else
|
||||
idx_desc = open(idx_filename, O_WRONLY | O_CREAT | O_TRUNC, S_IWUSR | S_IRUSR);
|
||||
#endif
|
||||
if (idx_desc < 0)
|
||||
{
|
||||
mprint("\nError: Cannot create .idx file");
|
||||
close(sub_desc);
|
||||
free(base_filename);
|
||||
free(sub_filename);
|
||||
free(idx_filename);
|
||||
return;
|
||||
}
|
||||
|
||||
// Write .idx header (from CodecPrivate)
|
||||
if (track->header != NULL)
|
||||
write_wrapped(idx_desc, track->header, strlen(track->header));
|
||||
|
||||
// Add language identifier line
|
||||
char lang_line[128];
|
||||
snprintf(lang_line, sizeof(lang_line), "\nid: %s, index: 0\n", lang_to_use);
|
||||
write_wrapped(idx_desc, lang_line, strlen(lang_line));
|
||||
|
||||
// Block size for alignment (2048 bytes = 0x800)
|
||||
const int VOBSUB_BLOCK_SIZE = 2048;
|
||||
|
||||
// Buffer for PS/PES headers and padding
|
||||
unsigned char header_buf[32];
|
||||
unsigned char zero_buf[VOBSUB_BLOCK_SIZE];
|
||||
memset(zero_buf, 0, VOBSUB_BLOCK_SIZE);
|
||||
|
||||
ULLONG file_pos = 0;
|
||||
|
||||
// Write each subtitle
|
||||
for (int i = 0; i < track->sentence_count; i++)
|
||||
{
|
||||
struct matroska_sub_sentence *sentence = track->sentences[i];
|
||||
mkv_ctx->sentence_count++;
|
||||
|
||||
// Convert timestamp to 90kHz PTS
|
||||
ULLONG pts_90khz = sentence->time_start * 90;
|
||||
|
||||
// Write timestamp entry to .idx
|
||||
char timestamp[32];
|
||||
generate_vobsub_timestamp(timestamp, sizeof(timestamp), sentence->time_start);
|
||||
char idx_entry[128];
|
||||
snprintf(idx_entry, sizeof(idx_entry), "timestamp: %s, filepos: %09" LLX_M "\n",
|
||||
timestamp, file_pos);
|
||||
write_wrapped(idx_desc, idx_entry, strlen(idx_entry));
|
||||
|
||||
// Generate PS Pack header (14 bytes)
|
||||
generate_ps_pack_header(header_buf, pts_90khz);
|
||||
write_wrapped(sub_desc, (char *)header_buf, 14);
|
||||
|
||||
// Generate PES header (15 bytes)
|
||||
int pes_header_len = generate_pes_header(header_buf, pts_90khz, sentence->text_size, 0);
|
||||
write_wrapped(sub_desc, (char *)header_buf, pes_header_len);
|
||||
|
||||
// Write SPU data
|
||||
write_wrapped(sub_desc, sentence->text, sentence->text_size);
|
||||
|
||||
// Calculate bytes written and pad to block boundary
|
||||
ULLONG bytes_written = 14 + pes_header_len + sentence->text_size;
|
||||
ULLONG padding_needed = VOBSUB_BLOCK_SIZE - (bytes_written % VOBSUB_BLOCK_SIZE);
|
||||
if (padding_needed < VOBSUB_BLOCK_SIZE)
|
||||
{
|
||||
write_wrapped(sub_desc, (char *)zero_buf, padding_needed);
|
||||
bytes_written += padding_needed;
|
||||
}
|
||||
|
||||
file_pos += bytes_written;
|
||||
}
|
||||
|
||||
close(sub_desc);
|
||||
close(idx_desc);
|
||||
free(base_filename);
|
||||
free(sub_filename);
|
||||
free(idx_filename);
|
||||
}
|
||||
|
||||
void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *track)
|
||||
{
|
||||
char *filename;
|
||||
int desc;
|
||||
|
||||
// VOBSUB tracks need special handling - separate .idx and .sub files
|
||||
if (track->codec_id == MATROSKA_TRACK_SUBTITLE_CODEC_ID_VOBSUB)
|
||||
{
|
||||
save_vobsub_track(mkv_ctx, track);
|
||||
return;
|
||||
}
|
||||
|
||||
if (mkv_ctx->ctx->cc_to_stdout == CCX_TRUE)
|
||||
{
|
||||
desc = 1; // file descriptor of stdout
|
||||
@@ -1358,11 +1592,6 @@ void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *tra
|
||||
if (track->header != NULL)
|
||||
write_wrapped(desc, track->header, strlen(track->header));
|
||||
|
||||
if (track->codec_id == MATROSKA_TRACK_SUBTITLE_CODEC_ID_VOBSUB)
|
||||
{
|
||||
mprint("\nError: VOBSUB not supported");
|
||||
}
|
||||
|
||||
for (int i = 0; i < track->sentence_count; i++)
|
||||
{
|
||||
struct matroska_sub_sentence *sentence = track->sentences[i];
|
||||
@@ -1497,10 +1726,6 @@ void save_sub_track(struct matroska_ctx *mkv_ctx, struct matroska_sub_track *tra
|
||||
free(timestamp_start);
|
||||
free(timestamp_end);
|
||||
}
|
||||
else if (track->codec_id == MATROSKA_TRACK_SUBTITLE_CODEC_ID_VOBSUB)
|
||||
{
|
||||
// TODO: Add support for VOBSUB
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,26 +5,31 @@
|
||||
#if (defined(WIN32) || defined(_WIN32_WCE)) && (defined(__MINGW32__) || !defined(__GNUC__))
|
||||
#define LLD_M "I64d"
|
||||
#define LLU_M "I64u"
|
||||
#define LLX_M "I64x"
|
||||
#define LLD "%I64d"
|
||||
#define LLU "%I64u"
|
||||
#elif defined(__SYMBIAN32__)
|
||||
#define LLD_M "d"
|
||||
#define LLU_M "u"
|
||||
#define LLX_M "x"
|
||||
#define LLD "%d"
|
||||
#define LLU "%u"
|
||||
#elif defined(__DARWIN__) || defined(__APPLE__)
|
||||
#define LLD_M "lld"
|
||||
#define LLU_M "llu"
|
||||
#define LLX_M "llx"
|
||||
#define LLD "%lld"
|
||||
#define LLU "%llu"
|
||||
#elif defined(_LP64) /* Unix 64 bits */
|
||||
#define LLD_M "ld"
|
||||
#define LLU_M "lu"
|
||||
#define LLX_M "lx"
|
||||
#define LLD "%ld"
|
||||
#define LLU "%lu"
|
||||
#else /* Unix 32 bits */
|
||||
#define LLD_M "lld"
|
||||
#define LLU_M "llu"
|
||||
#define LLX_M "llx"
|
||||
#define LLD "%lld"
|
||||
#define LLU "%llu"
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user