mirror of
https://github.com/CCExtractor/ccextractor.git
synced 2026-02-04 05:44:53 +00:00
Break incoming subs into sentences (through a buffer), and remove duplicates
This commit is contained in:
59
tests/Makefile
Normal file
59
tests/Makefile
Normal file
@@ -0,0 +1,59 @@
|
||||
SHELL = /bin/sh
|
||||
|
||||
CC=gcc
|
||||
# SYS := $(shell gcc -dumpmachine)
|
||||
CFLAGS=-O0 -std=gnu99 -D ENABLE_OCR -g -ggdb -rdynamic
|
||||
#-Q -da -v
|
||||
|
||||
# enable COVERAGE
|
||||
# CFLAGS+=-fprofile-arcs -ftest-coverage
|
||||
|
||||
# add debug flag
|
||||
ifdef DEBUG
|
||||
CFLAGS+=-DDEBUG
|
||||
endif
|
||||
|
||||
#ALL_FLAGS = -Wno-write-strings -D_FILE_OFFSET_BITS=64 -DVERSION_FILE_PRESENT
|
||||
LDFLAGS=-lm -g
|
||||
|
||||
CFLAGS+=$(shell pkg-config --cflags check)
|
||||
LDFLAGS+=$(shell pkg-config --libs check)
|
||||
|
||||
# TODO: need to rewrite this. Need new way to load sources for testing
|
||||
SRC=$(wildcard ../src/lib_ccx/ccx_encoders_splitbysentence.c)
|
||||
OBJS=
|
||||
|
||||
SRC_SUITE=$(wildcard *_suite.c)
|
||||
OBJ_SUITE=$(patsubst %_suite.c, %_suite.o, $(SRC_SUITE))
|
||||
|
||||
OBJS+=$(OBJ_SUITE)
|
||||
|
||||
all: clean test
|
||||
|
||||
%.o: %.c
|
||||
# explicit output name : -o $@
|
||||
$(CC) -c $(ALL_FLAGS) $(CFLAGS) $<
|
||||
|
||||
runtest: $(OBJS)
|
||||
@echo "+----------------------------------------------+"
|
||||
@echo "| BUILD TESTS |"
|
||||
@echo "+----------------------------------------------+"
|
||||
$(CC) -c $(ALL_FLAGS) $(CFLAGS) $@.c
|
||||
$(CC) $(SRC) $@.o $^ $(ALL_FLAGS) $(CFLAGS) $(LDFLAGS) -o $@
|
||||
|
||||
.PHONY: test
|
||||
test: runtest
|
||||
@echo "+----------------------------------------------+"
|
||||
@echo "| START TESTS |"
|
||||
@echo "+----------------------------------------------+"
|
||||
./runtest
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm runtest || true
|
||||
rm *.o || true
|
||||
# coverage info
|
||||
rm *.gcda || true
|
||||
rm *.gcno || true
|
||||
# debug info
|
||||
rm *.c.* || true
|
||||
43
tests/README.md
Normal file
43
tests/README.md
Normal file
@@ -0,0 +1,43 @@
|
||||
# UNIT TESTING
|
||||
|
||||
This folder contains a archetype and several unit-tests for CCExtractor
|
||||
|
||||
## RUN TESTS
|
||||
|
||||
```shell
|
||||
cd tests
|
||||
make
|
||||
```
|
||||
|
||||
This will build and run all test-suite.
|
||||
|
||||
If you want MORE output:
|
||||
|
||||
```shell
|
||||
DEBUG=1 make
|
||||
```
|
||||
|
||||
Where `DEBUG` is just an environment variable.
|
||||
|
||||
## DEBUGGING
|
||||
|
||||
If tests failed after your changes, you could debug them (almost all flags for this are set in the `tests/Makefile`.
|
||||
|
||||
Run:
|
||||
|
||||
```shell
|
||||
# build test runner
|
||||
make
|
||||
# load test runner to the debgger:
|
||||
gdb runner
|
||||
|
||||
# run under debugger:
|
||||
(gdb) run
|
||||
|
||||
# on segfault:
|
||||
(gdb) where
|
||||
```
|
||||
|
||||
## DEPENDENCIES
|
||||
|
||||
Tests are built around this library: [**libcheck**](https://github.com/libcheck/check), here is [**documentation**](https://libcheck.github.io/check/)
|
||||
305
tests/ccx_encoders_splitbysentence_suite.c
Normal file
305
tests/ccx_encoders_splitbysentence_suite.c
Normal file
@@ -0,0 +1,305 @@
|
||||
#include <check.h>
|
||||
#include "ccx_encoders_splitbysentence_suite.h"
|
||||
|
||||
// -------------------------------------
|
||||
// MOCKS
|
||||
// -------------------------------------
|
||||
typedef int64_t LLONG;
|
||||
#include "../src/lib_ccx/ccx_encoders_common.h"
|
||||
|
||||
// -------------------------------------
|
||||
// Private SBS-functions (for testing only)
|
||||
// -------------------------------------
|
||||
struct cc_subtitle * sbs_append_string(unsigned char * str, LLONG time_from, LLONG time_trim, struct encoder_ctx * context);
|
||||
|
||||
// -------------------------------------
|
||||
// Helpers
|
||||
// -------------------------------------
|
||||
struct cc_subtitle * helper_create_sub(char * str, LLONG time_from, LLONG time_trim)
|
||||
{
|
||||
struct cc_subtitle * sub = (struct cc_subtitle *)malloc(sizeof(struct cc_subtitle));
|
||||
sub->type = CC_BITMAP;
|
||||
sub->start_time = 1;
|
||||
sub->end_time = 100;
|
||||
sub->data = strdup(str);
|
||||
sub->nb_data = strlen(sub->data);
|
||||
|
||||
return sub;
|
||||
}
|
||||
|
||||
struct cc_subtitle * helper_sbs_append_string(char * str, LLONG time_from, LLONG time_trim, struct encoder_ctx * context)
|
||||
{
|
||||
char * str1;
|
||||
struct cc_subtitle * sub;
|
||||
|
||||
str1 = strdup(str);
|
||||
sub = sbs_append_string(str1, time_from, time_trim, context);
|
||||
free(str1);
|
||||
return sub;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
// MOCKS
|
||||
// -------------------------------------
|
||||
struct encoder_ctx * context;
|
||||
|
||||
void freep(void * obj){
|
||||
}
|
||||
void fatal(int x, void * obj){
|
||||
}
|
||||
|
||||
unsigned char * paraof_ocrtext(void * sub) {
|
||||
// this is OCR -> text converter.
|
||||
// now, in our test cases, we will pass TEXT instead of OCR.
|
||||
// and will return passed text as result
|
||||
|
||||
return ((struct cc_subtitle *)sub)->data;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
// TEST preparations
|
||||
// -------------------------------------
|
||||
void setup(void)
|
||||
{
|
||||
context = (struct encoder_ctx *)malloc(sizeof(struct encoder_ctx));
|
||||
context->sbs_buffer = NULL;
|
||||
context->sbs_capacity = 0;
|
||||
}
|
||||
|
||||
void teardown(void)
|
||||
{
|
||||
free(context);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
// TESTS
|
||||
// -------------------------------------
|
||||
START_TEST(test_sbs_one_simple_sentence)
|
||||
{
|
||||
struct cc_subtitle * sub = helper_create_sub("Simple sentence.", 1, 100);
|
||||
struct cc_subtitle * out = reformat_cc_bitmap_through_sentence_buffer(sub, context);
|
||||
|
||||
ck_assert_ptr_ne(out, NULL);
|
||||
ck_assert_str_eq(out->data, "Simple sentence.");
|
||||
ck_assert_ptr_eq(out->next, NULL);
|
||||
ck_assert_ptr_eq(out->prev, NULL);
|
||||
}
|
||||
END_TEST
|
||||
|
||||
|
||||
START_TEST(test_sbs_two_sentences_with_rep)
|
||||
{
|
||||
struct cc_subtitle * sub1 = helper_create_sub("asdf", 1, 100);
|
||||
struct cc_subtitle * out1 = reformat_cc_bitmap_through_sentence_buffer(sub1, context);
|
||||
ck_assert_ptr_eq(out1, NULL);
|
||||
|
||||
// second sub:
|
||||
struct cc_subtitle * sub2 = helper_create_sub("asdf Hello.", 101, 200);
|
||||
struct cc_subtitle * out2 = reformat_cc_bitmap_through_sentence_buffer(sub2, context);
|
||||
|
||||
ck_assert_ptr_ne(out2, NULL);
|
||||
ck_assert_str_eq(out2->data, "asdf Hello.");
|
||||
ck_assert_ptr_eq(out2->next, NULL);
|
||||
ck_assert_ptr_eq(out2->prev, NULL);}
|
||||
END_TEST
|
||||
|
||||
|
||||
START_TEST(test_sbs_append_string_two_separate)
|
||||
{
|
||||
unsigned char * test_strings[] = {
|
||||
"First string.",
|
||||
"Second string."
|
||||
};
|
||||
struct cc_subtitle * sub;
|
||||
unsigned char * str;
|
||||
|
||||
// first string
|
||||
str = strdup(test_strings[0]);
|
||||
sub = NULL;
|
||||
sub = sbs_append_string(str, 1, 20, context);
|
||||
ck_assert_ptr_ne(sub, NULL);
|
||||
ck_assert_str_eq(sub->data, test_strings[0]);
|
||||
ck_assert_int_eq(sub->start_time, 1);
|
||||
ck_assert_int_eq(sub->end_time, 20);
|
||||
|
||||
// second string:
|
||||
str = strdup(test_strings[1]);
|
||||
sub = NULL;
|
||||
sub = sbs_append_string(str, 21, 40, context);
|
||||
|
||||
ck_assert_ptr_ne(sub, NULL);
|
||||
ck_assert_str_eq(sub->data, test_strings[1]);
|
||||
ck_assert_int_eq(sub->start_time, 21);
|
||||
ck_assert_int_eq(sub->end_time, 40);
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(test_sbs_append_string_two_with_broken_sentence)
|
||||
{
|
||||
// important !!
|
||||
// summary len == 32
|
||||
char * test_strings[] = {
|
||||
"First string",
|
||||
" ends here, deabbea."
|
||||
};
|
||||
struct cc_subtitle * sub;
|
||||
char * str;
|
||||
|
||||
// first string
|
||||
str = strdup(test_strings[0]);
|
||||
sub = sbs_append_string(str, 1, 3, context);
|
||||
|
||||
ck_assert_ptr_eq(sub, NULL);
|
||||
|
||||
// second string:
|
||||
str = strdup(test_strings[1]);
|
||||
sub = sbs_append_string(str, 4, 5, context);
|
||||
|
||||
ck_assert_ptr_ne(sub, NULL);
|
||||
ck_assert_str_eq(sub->data, "First string ends here, deabbea.");
|
||||
ck_assert_int_eq(sub->start_time, 1);
|
||||
ck_assert_int_eq(sub->end_time, 5);
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(test_sbs_append_string_two_intersecting)
|
||||
{
|
||||
char * test_strings[] = {
|
||||
"First string",
|
||||
"First string ends here."
|
||||
};
|
||||
struct cc_subtitle * sub;
|
||||
char * str;
|
||||
|
||||
// first string
|
||||
str = strdup(test_strings[0]);
|
||||
sub = sbs_append_string(str, 1, 20, context);
|
||||
|
||||
ck_assert_ptr_eq(sub, NULL);
|
||||
free(sub);
|
||||
|
||||
// second string:
|
||||
str = strdup(test_strings[1]);
|
||||
//printf("second string: [%s]\n", str);
|
||||
sub = sbs_append_string(str, 21, 40, context);
|
||||
|
||||
ck_assert_ptr_ne(sub, NULL);
|
||||
ck_assert_str_eq(sub->data, "First string ends here.");
|
||||
ck_assert_int_eq(sub->start_time, 1);
|
||||
ck_assert_int_eq(sub->end_time, 40);
|
||||
}
|
||||
END_TEST
|
||||
|
||||
|
||||
START_TEST(test_sbs_append_string_real_data_1)
|
||||
{
|
||||
struct cc_subtitle * sub;
|
||||
|
||||
// 1
|
||||
sub = helper_sbs_append_string("Oleon",
|
||||
1, 0, context);
|
||||
ck_assert_ptr_eq(sub, NULL);
|
||||
|
||||
// 2
|
||||
sub = helper_sbs_append_string("Oleon costs.",
|
||||
1, 189, context);
|
||||
ck_assert_ptr_ne(sub, NULL);
|
||||
ck_assert_str_eq(sub->data, "Oleon costs.");
|
||||
|
||||
// 3
|
||||
sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
|
||||
Didn't",
|
||||
190, 889, context);
|
||||
ck_assert_ptr_ne(sub, NULL);
|
||||
ck_assert_str_eq(sub->data, "buried in the annex, 95 Oleon costs.");
|
||||
ck_assert_int_eq(sub->start_time, 190); // = <sub start>
|
||||
ck_assert_int_eq(sub->end_time, 783); // = <sub start> + <available time,889-190=699 > * <sentence alphanum, 28> / <sub alphanum, 33>
|
||||
ck_assert_ptr_eq(sub->next, NULL);
|
||||
|
||||
// 4
|
||||
sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
|
||||
Didn't want",
|
||||
890, 1129, context);
|
||||
ck_assert_ptr_eq(sub, NULL);
|
||||
|
||||
// 5
|
||||
sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
|
||||
Didn't want to",
|
||||
1130, 1359, context);
|
||||
ck_assert_ptr_eq(sub, NULL);
|
||||
|
||||
// 6
|
||||
sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
|
||||
Didn't want to acknowledge",
|
||||
1360, 2059, context);
|
||||
ck_assert_ptr_eq(sub, NULL);
|
||||
|
||||
// 7
|
||||
sub = helper_sbs_append_string("buried in the annex, 95 Oleon costs.\n\
|
||||
Didn't want to acknowledge the",
|
||||
2060, 2299, context);
|
||||
ck_assert_ptr_eq(sub, NULL);
|
||||
|
||||
// 9
|
||||
sub = helper_sbs_append_string("Didn't want to acknowledge the\n\
|
||||
pressures on hospitals, schools and",
|
||||
2300, 5019, context);
|
||||
ck_assert_ptr_eq(sub, NULL);
|
||||
|
||||
// 13
|
||||
sub = helper_sbs_append_string("pressures on hospitals, schools and\n\
|
||||
infrastructure.",
|
||||
5020, 5159, context);
|
||||
ck_assert_ptr_ne(sub, NULL);
|
||||
ck_assert_str_eq(sub->data, "Didn't want to acknowledge the pressures on hospitals, schools and infrastructure.");
|
||||
ck_assert_int_eq(sub->start_time, 784);
|
||||
ck_assert_int_eq(sub->end_time, 5159);
|
||||
ck_assert_ptr_eq(sub->next, NULL);
|
||||
|
||||
// 14
|
||||
sub = helper_sbs_append_string("pressures on hospitals, schools and\n\
|
||||
infrastructure. If",
|
||||
5160, 5529, context);
|
||||
ck_assert_ptr_eq(sub, NULL);
|
||||
|
||||
// 16
|
||||
sub = helper_sbs_append_string("pressures on hospitals, schools and\n\
|
||||
infrastructure. If we go",
|
||||
5530, 6559, context);
|
||||
ck_assert_ptr_eq(sub, NULL);
|
||||
|
||||
// ck_assert_int_eq(sub->start_time, 1);
|
||||
// ck_assert_int_eq(sub->end_time, 40);
|
||||
}
|
||||
END_TEST
|
||||
|
||||
|
||||
Suite * ccx_encoders_splitbysentence_suite(void)
|
||||
{
|
||||
Suite *s;
|
||||
TCase *tc_core;
|
||||
|
||||
s = suite_create("Sentence Buffer");
|
||||
|
||||
/* Overall tests */
|
||||
tc_core = tcase_create("SB: Overall");
|
||||
|
||||
tcase_add_checked_fixture(tc_core, setup, teardown);
|
||||
tcase_add_test(tc_core, test_sbs_one_simple_sentence);
|
||||
tcase_add_test(tc_core, test_sbs_two_sentences_with_rep);
|
||||
suite_add_tcase(s, tc_core);
|
||||
|
||||
/**/
|
||||
TCase *tc_append_string;
|
||||
tc_append_string = tcase_create("SB: append_string");
|
||||
tcase_add_checked_fixture(tc_append_string, setup, teardown);
|
||||
|
||||
tcase_add_test(tc_append_string, test_sbs_append_string_two_separate);
|
||||
tcase_add_test(tc_append_string, test_sbs_append_string_two_with_broken_sentence);
|
||||
tcase_add_test(tc_append_string, test_sbs_append_string_two_intersecting);
|
||||
tcase_add_test(tc_append_string, test_sbs_append_string_real_data_1);
|
||||
|
||||
suite_add_tcase(s, tc_append_string);
|
||||
|
||||
return s;
|
||||
}
|
||||
4
tests/ccx_encoders_splitbysentence_suite.h
Normal file
4
tests/ccx_encoders_splitbysentence_suite.h
Normal file
@@ -0,0 +1,4 @@
|
||||
// -------------------------------------
|
||||
// SUITE
|
||||
// -------------------------------------
|
||||
Suite * ccx_encoders_splitbysentence_suite(void);
|
||||
21
tests/runtest.c
Normal file
21
tests/runtest.c
Normal file
@@ -0,0 +1,21 @@
|
||||
#include <check.h>
|
||||
|
||||
// TESTS:
|
||||
#include "ccx_encoders_splitbysentence_suite.h"
|
||||
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int number_failed;
|
||||
Suite *s;
|
||||
SRunner *sr;
|
||||
|
||||
s = ccx_encoders_splitbysentence_suite();
|
||||
sr = srunner_create(s);
|
||||
srunner_set_fork_status(sr, CK_NOFORK);
|
||||
|
||||
srunner_run_all(sr, CK_NORMAL);
|
||||
number_failed = srunner_ntests_failed(sr);
|
||||
srunner_free(sr);
|
||||
return (number_failed == 0) ? 0 : 1;
|
||||
}
|
||||
Reference in New Issue
Block a user