1 #include <check.h>
2 #include "ccx_encoders_splitbysentence_suite.h"
3
4 // -------------------------------------
5 // MOCKS
6 // -------------------------------------
7 typedef int64_t LLONG;
8 #include "../src/lib_ccx/ccx_encoders_common.h"
9
10 //#define ENABLE_OCR
11
12
13 // -------------------------------------
14 // Private SBS-functions (for testing only)
15 // -------------------------------------
16 void sbs_reset_context();
17 struct cc_subtitle * sbs_append_string(unsigned char * str, LLONG time_from, LLONG time_trim, void * sbs_context);
18
19 // -------------------------------------
20 // Helpers
21 // -------------------------------------
22
helper_sbs_append_sub_from_file(FILE * fd,struct encoder_ctx * context)23 struct cc_subtitle * helper_sbs_append_sub_from_file(FILE * fd, struct encoder_ctx * context) {
24 // TODO : I am not sure about correctness of this line,
25 // but I just want to test the code:
26 char localbuf[2000];
27 char * str;
28 LLONG time_from, time_trim;
29
30 if (feof(fd)) {
31 return NULL;
32 }
33
34 if ( 0 >= fscanf(fd, "%ld %ld", &time_from, &time_trim)) {
35 return NULL;
36 }
37
38 fgets(localbuf, 2000, fd);
39
40 // skip leading spaces
41 str = localbuf;
42 for (; isspace(*str); str++){
43 }
44
45 // replace LITERAL "\n" with a newline char
46 size_t i, j, L;
47 L = strlen(str);
48 for (i=0, j=0; i < L; i++, j++) {
49
50 if (i + 1 < L) {
51 if (str[i] == '\\' && str[i+1] == 'n') {
52 i++;
53 str[i] = '\n';
54 }
55 }
56 str[j] = str[i];
57 }
58
59 // remove trailing newline:
60 for(; j>0 && str[j] == '\n'; j--) {
61 str[j] = 0;
62 }
63 //str1 = strdup(str);
64 struct cc_subtitle * sub;
65 sub = sbs_append_string(str, time_from, time_trim, sbs_init_context());
66 //free(str1);
67 return sub;
68 }
69
helper_create_sub(char * str,LLONG time_from,LLONG time_trim)70 struct cc_subtitle * helper_create_sub(char * str, LLONG time_from, LLONG time_trim) {
71 struct cc_bitmap* rect;
72 struct cc_subtitle * sub = (struct cc_subtitle *)malloc(sizeof(struct cc_subtitle));
73 sub->type = CC_BITMAP;
74 sub->start_time = 1;
75 sub->end_time = 100;
76
77 rect = malloc(sizeof(struct cc_bitmap));
78 rect->data[0] = strdup(str);
79 rect->data[1] = NULL;
80
81 sub->data = rect;
82 sub->nb_data = 1;
83
84 return sub;
85 }
86
87 // -------------------------------------
88 // MOCKS
89 // -------------------------------------
90 struct encoder_ctx * context;
91
paraof_ocrtext(void * sub)92 unsigned char * paraof_ocrtext(void * sub) {
93 // this is OCR -> text converter.
94 // now, in our test cases, we will pass TEXT instead of OCR.
95 // and will return passed text as result
96 struct cc_bitmap* rect;
97
98 rect = ((struct cc_subtitle *)sub)->data;
99 #ifdef ENABLE_OCR
100 return strdup(rect->data[0]);
101 #else
102 return NULL;
103 #endif
104
105 }
106
107 // -------------------------------------
108 // TEST preparations
109 // -------------------------------------
setup(void)110 void setup(void)
111 {
112 context = (struct encoder_ctx *)malloc(sizeof(struct encoder_ctx));
113 sbs_reset_context();
114 }
115
teardown(void)116 void teardown(void)
117 {
118 free(context);
119 }
120
121 // -------------------------------------
122 // TESTS
123 // -------------------------------------
START_TEST(test_sbs_one_simple_sentence)124 START_TEST(test_sbs_one_simple_sentence)
125 {
126 printf(
127 "=====================\n\
128 test_sbs_one_simple_sentence\n\
129 =====================\n"
130 );
131
132 struct cc_subtitle * sub = helper_create_sub("Simple sentence.", 1, 100);
133 struct cc_subtitle * out = reformat_cc_bitmap_through_sentence_buffer(sub, context);
134
135 ck_assert_ptr_ne(out, NULL);
136 ck_assert_str_eq(out->data, "Simple sentence.");
137 ck_assert_ptr_eq(out->next, NULL);
138 ck_assert_ptr_eq(out->prev, NULL);
139 }
140 END_TEST
141
142
START_TEST(test_sbs_two_sentences_with_rep)143 START_TEST(test_sbs_two_sentences_with_rep)
144 {
145 printf(
146 "=====================\n\
147 test_sbs_two_sentences_with_rep\n\
148 =====================\n"
149 );
150 struct cc_subtitle * sub1 = helper_create_sub("asdf", 1, 100);
151 struct cc_subtitle * out1 = reformat_cc_bitmap_through_sentence_buffer(sub1, context);
152 ck_assert_ptr_eq(out1, NULL);
153
154 // second sub:
155 struct cc_subtitle * sub2 = helper_create_sub("asdf Hello.", 101, 200);
156 struct cc_subtitle * out2 = reformat_cc_bitmap_through_sentence_buffer(sub2, context);
157
158 ck_assert_ptr_ne(out2, NULL);
159 ck_assert_str_eq(out2->data, "asdf Hello.");
160 ck_assert_ptr_eq(out2->next, NULL);
161 ck_assert_ptr_eq(out2->prev, NULL);}
162 END_TEST
163
164
START_TEST(test_sbs_append_string_two_separate)165 START_TEST(test_sbs_append_string_two_separate)
166 {
167 printf(
168 "=====================\n\
169 test_sbs_append_string_two_separate\n\
170 =====================\n"
171 );
172 unsigned char * test_strings[] = {
173 "First string.",
174 "Second string."
175 };
176 struct cc_subtitle * sub;
177 unsigned char * str;
178
179 // first string
180 str = strdup(test_strings[0]);
181 sub = NULL;
182 sub = sbs_append_string(str, 1, 20, sbs_init_context());
183 ck_assert_ptr_ne(sub, NULL);
184 ck_assert_str_eq(sub->data, test_strings[0]);
185 ck_assert_int_eq(sub->start_time, 1);
186 ck_assert_int_eq(sub->end_time, 20);
187
188 // second string:
189 str = strdup(test_strings[1]);
190 sub = NULL;
191 sub = sbs_append_string(str, 21, 40, sbs_init_context());
192
193 ck_assert_ptr_ne(sub, NULL);
194 ck_assert_str_eq(sub->data, test_strings[1]);
195 ck_assert_int_eq(sub->start_time, 21);
196 ck_assert_int_eq(sub->end_time, 40);
197 }
198 END_TEST
199
START_TEST(test_sbs_append_string_two_with_broken_sentence)200 START_TEST(test_sbs_append_string_two_with_broken_sentence)
201 {
202 char * test_strings[] = {
203 "First string",
204 " ends here, deabbea."
205 };
206 struct cc_subtitle * sub;
207 char * str;
208
209 // first string
210 str = strdup(test_strings[0]);
211 sub = sbs_append_string(str, 1, 3, sbs_init_context());
212
213 ck_assert_ptr_eq(sub, NULL);
214
215 // second string:
216 str = strdup(test_strings[1]);
217 sub = sbs_append_string(str, 4, 5, sbs_init_context());
218
219 ck_assert_ptr_ne(sub, NULL);
220 ck_assert_str_eq(sub->data, "First string ends here, deabbea.");
221 ck_assert_int_eq(sub->start_time, 1);
222 ck_assert_int_eq(sub->end_time, 5);
223 }
224 END_TEST
225
START_TEST(test_sbs_append_string_two_intersecting)226 START_TEST(test_sbs_append_string_two_intersecting)
227 {
228 char * test_strings[] = {
229 "First string",
230 "First string ends here."
231 };
232 struct cc_subtitle * sub;
233 char * str;
234
235 // first string
236 str = strdup(test_strings[0]);
237 sub = sbs_append_string(str, 1, 20, sbs_init_context());
238
239 ck_assert_ptr_eq(sub, NULL);
240 free(sub);
241
242 // second string:
243 str = strdup(test_strings[1]);
244 sub = sbs_append_string(str, 21, 40, sbs_init_context());
245
246 ck_assert_ptr_ne(sub, NULL);
247 ck_assert_str_eq(sub->data, "First string ends here.");
248 ck_assert_int_eq(sub->start_time, 1);
249 ck_assert_int_eq(sub->end_time, 40);
250 }
251 END_TEST
252
253
254
START_TEST(test_sbs_append_string_00)255 START_TEST(test_sbs_append_string_00)
256 {
257 FILE * fsample;
258 int skip;
259
260 fsample = fopen("samples/sbs_append_string_00", "r");
261 struct cc_subtitle * sub;
262
263 skip = 2;
264 while (skip-- > 0) {
265 sub = helper_sbs_append_sub_from_file(fsample, context);
266 ck_assert_ptr_eq(sub, NULL);
267 }
268
269 sub = helper_sbs_append_sub_from_file(fsample, context);
270 ck_assert_ptr_ne(sub, NULL);
271 ck_assert_str_eq(sub->data, "in all these different environments, just \
272 doing what turkeys want to do‘ Let's get the sport now with an update from \
273 Sky Sports News HQ.");
274 ck_assert_int_eq(sub->start_time, 1);
275 ck_assert_int_eq(sub->end_time, 3467);
276
277 sub = helper_sbs_append_sub_from_file(fsample, context);
278 ck_assert_ptr_eq(sub, NULL);
279
280 sub = helper_sbs_append_sub_from_file(fsample, context);
281 ck_assert_ptr_ne(sub, NULL);
282 ck_assert_str_eq(sub->data, "Malky Mackay believes he deserves a second \
283 chance after being appointed the Scottish FA's new Performance Director.");
284 ck_assert_int_eq(sub->start_time, 3468);
285 ck_assert_int_eq(sub->end_time, 16361);
286
287 skip = 10;
288 while (skip-- > 0) {
289 sub = helper_sbs_append_sub_from_file(fsample, context);
290 ck_assert_ptr_eq(sub, NULL);
291 }
292
293 sub = helper_sbs_append_sub_from_file(fsample, context);
294 ck_assert_ptr_ne(sub, NULL);
295 // TODO : It is too hard to fix this error automatically (hard for me)
296 // May be someone knows, how to implement this checker, and then next
297 // assertion could be uncommented
298 // maxkoryukov/ccextractor#3
299 /*
300 ck_assert_str_eq(sub->data, "Mackay was sacked by Cardiff in 2013 after it \
301 emerged he sent racist There has been some opposition When I said at the time, \
302 I deeply to his appointment but he's asked regret.");
303 */
304 ck_assert_int_eq(sub->start_time, 16362);
305 ck_assert_int_eq(sub->end_time, 38924);
306
307
308 skip = 19;
309 while (skip-- > 0) {
310
311 // if (sub != NULL) {
312 // printf("%d :> [%s]\n", skip, sub->data);
313 // }
314 sub = helper_sbs_append_sub_from_file(fsample, context);
315 // TODO : this subs should give an empty response
316 // But the algorithm is not smart enough
317 // maxkoryukov/ccextractor#3
318 /*
319 ck_assert_ptr_eq(sub, NULL);
320 */
321 }
322
323 sub = helper_sbs_append_sub_from_file(fsample, context);
324 ck_assert_ptr_ne(sub, NULL);
325 // TODO : It is too hard to fix this error automatically (hard for me)
326 // May be someone knows, how to implement this checker, and then next
327 // assertion could be uncommented
328 // maxkoryukov/ccextractor#3
329 /*
330 ck_assert_str_eq(sub->data, "It was said in I am in support to \
331 shoot support to shoot —— I spoke to the two individuals that were \
332 involved."
333 );
334 ck_assert_int_eq(sub->start_time, 38925);
335 */
336 ck_assert_int_eq(sub->end_time, 47406);
337
338 fclose(fsample);
339 }
340 END_TEST
341
342
START_TEST(test_sbs_append_string_01)343 START_TEST(test_sbs_append_string_01)
344 {
345 FILE * fsample;
346 int skip;
347
348 fsample = fopen("samples/sbs_append_string_01", "r");
349 struct cc_subtitle * sub;
350
351 sub = helper_sbs_append_sub_from_file(fsample, context);
352 ck_assert_ptr_eq(sub, NULL);
353
354 // 2
355 sub = helper_sbs_append_sub_from_file(fsample, context);
356 ck_assert_ptr_ne(sub, NULL);
357 ck_assert_str_eq(sub->data, "Oleon costs.");
358
359 // 3
360 sub = helper_sbs_append_sub_from_file(fsample, context);
361 ck_assert_ptr_ne(sub, NULL);
362 ck_assert_str_eq(sub->data, "buried in the annex, 95 Oleon costs.");
363 ck_assert_int_eq(sub->start_time, 190); // = <sub start>
364 ck_assert_int_eq(sub->end_time, 783); // = <sub start> + <available time,889-190=699 > * <sentence alphanum, 28> / <sub alphanum, 33>
365 ck_assert_ptr_eq(sub->next, NULL);
366
367 skip = 5;
368 while (skip--) {
369 sub = helper_sbs_append_sub_from_file(fsample, context);
370 ck_assert_ptr_eq(sub, NULL);
371 }
372
373 // 13
374 sub = helper_sbs_append_sub_from_file(fsample, context);
375 ck_assert_ptr_ne(sub, NULL);
376 ck_assert_str_eq(sub->data, "Didn't want to acknowledge the pressures on hospitals, schools and infrastructure.");
377 ck_assert_int_eq(sub->start_time, 784);
378 ck_assert_int_eq(sub->end_time, 5159);
379 ck_assert_ptr_eq(sub->next, NULL);
380
381 skip = 20;
382 while (skip--) {
383 sub = helper_sbs_append_sub_from_file(fsample, context);
384 ck_assert_ptr_eq(sub, NULL);
385 }
386
387 sub = helper_sbs_append_sub_from_file(fsample, context);
388 ck_assert_ptr_ne(sub, NULL);
389 ck_assert_int_eq(sub->start_time, 5160);
390 ck_assert_int_eq(sub->end_time, 16100);
391 ck_assert_ptr_eq(sub->next, NULL);
392 ck_assert_str_eq(sub->data, "If we go to the Australian size system, we can have the migrants who will contribute and not drain the economy.");
393
394 fclose(fsample);
395 }
396 END_TEST
397
398
ccx_encoders_splitbysentence_suite(void)399 Suite * ccx_encoders_splitbysentence_suite(void)
400 {
401 Suite *s;
402 TCase *tc_core;
403
404 s = suite_create("Sentence Buffer");
405
406 /* Overall tests */
407 tc_core = tcase_create("SB: Overall: ");
408
409 tcase_add_checked_fixture(tc_core, setup, teardown);
410 tcase_add_test(tc_core, test_sbs_one_simple_sentence);
411 tcase_add_test(tc_core, test_sbs_two_sentences_with_rep);
412 suite_add_tcase(s, tc_core);
413
414 /**/
415 TCase *tc_append_string;
416 tc_append_string = tcase_create("SB: append_string: ");
417 tcase_add_checked_fixture(tc_append_string, setup, teardown);
418
419 tcase_add_test(tc_append_string, test_sbs_append_string_two_separate);
420 tcase_add_test(tc_append_string, test_sbs_append_string_two_with_broken_sentence);
421 tcase_add_test(tc_append_string, test_sbs_append_string_two_intersecting);
422 tcase_add_test(tc_append_string, test_sbs_append_string_00);
423 tcase_add_test(tc_append_string, test_sbs_append_string_01);
424
425 suite_add_tcase(s, tc_append_string);
426
427 return s;
428 }
429