1 #include <tesseract/capi.h>
2 #include <leptonica/allheaders.h>
3 #include <arcan_shmif.h>
4 #include "util/utf8.c"
5 
6 /*
7  * Same code as in select- in terminal. Ought to be moved to a shared
8  * shmif-support lib that also covers 3D setup and handle extraction.
9  */
push_multipart(struct arcan_shmif_cont * out,char * msg,size_t len)10 static void push_multipart(struct arcan_shmif_cont* out,
11 	char* msg, size_t len)
12 {
13 	arcan_event msgev = {
14 		.category = EVENT_EXTERNAL,
15 		.ext.kind = ARCAN_EVENT(MESSAGE)
16 	};
17 
18 	uint32_t state = 0, codepoint = 0;
19 	char* outs = msg;
20 	size_t maxlen = sizeof(msgev.ext.message.data) - 1;
21 
22 /* utf8- point aligned against block size */
23 	while (len > maxlen){
24 		size_t i, lastok = 0;
25 		state = 0;
26 		for (i = 0; i <= maxlen - 1; i++){
27 			if (UTF8_ACCEPT == utf8_decode(&state, &codepoint, (uint8_t)(msg[i])))
28 				lastok = i;
29 
30 			if (i != lastok){
31 				if (0 == i)
32 					return;
33 			}
34 		}
35 
36 		memcpy(msgev.ext.message.data, outs, lastok);
37 		msgev.ext.message.data[lastok] = '\0';
38 		len -= lastok;
39 		outs += lastok;
40 		if (len)
41 			msgev.ext.message.multipart = 1;
42 		else
43 			msgev.ext.message.multipart = 0;
44 
45 		arcan_shmif_enqueue(out, &msgev);
46 	}
47 
48 /* flush remaining */
49 	if (len){
50 		snprintf((char*)msgev.ext.message.data, maxlen, "%s", outs);
51 		msgev.ext.message.multipart = 0;
52 		arcan_shmif_enqueue(out, &msgev);
53 	}
54 }
55 
ocr_serv_run(struct arg_arr * args,struct arcan_shmif_cont cont)56 void ocr_serv_run(struct arg_arr* args, struct arcan_shmif_cont cont)
57 {
58 	TessBaseAPI* handle = TessBaseAPICreate();
59 	PIX* img;
60 
61 	const char* lang = "eng";
62 	arg_lookup(args, "lang", 0, &lang);
63 	if (TessBaseAPIInit3(handle, NULL, lang)){
64 		LOG("encode-ocr: Couldn't initialize tesseract with lang (%s)\n", lang);
65 		return;
66 	}
67 
68 /*
69  * There are many little details missing here, e.g.  control over segmentation
70  * / grouping (receiving input) and somehow alerting when the OCR failed to
71  * yield anything.
72  */
73 	arcan_event ev;
74 	while(arcan_shmif_wait(&cont, &ev)){
75 		if (ev.category == EVENT_TARGET){
76 			switch (ev.tgt.kind){
77 			case TARGET_COMMAND_STEPFRAME:{
78 				TessBaseAPISetImage(handle, (const unsigned char*) cont.vidp,
79 					cont.w, cont.h, sizeof(shmif_pixel), cont.stride);
80 				char* text = TessBaseAPIGetUTF8Text(handle);
81 				size_t len;
82 				if (!text || (len = strlen(text)) == 0)
83 					continue;
84 
85 				push_multipart(&cont, text, len);
86 				TessDeleteText(text);
87 			}
88 			break;
89 			case TARGET_COMMAND_EXIT:
90 				goto out;
91 			default:
92 			break;
93 			}
94 		}
95 	}
96 out:
97 	TessBaseAPIEnd(handle);
98 	TessBaseAPIDelete(handle);
99 }
100