1 #include <tesseract/capi.h>
2 #include <leptonica/allheaders.h>
3 #include <arcan_shmif.h>
4 #include "util/utf8.c"
5
6 /*
7 * Same code as in select- in terminal. Ought to be moved to a shared
8 * shmif-support lib that also covers 3D setup and handle extraction.
9 */
push_multipart(struct arcan_shmif_cont * out,char * msg,size_t len)10 static void push_multipart(struct arcan_shmif_cont* out,
11 char* msg, size_t len)
12 {
13 arcan_event msgev = {
14 .category = EVENT_EXTERNAL,
15 .ext.kind = ARCAN_EVENT(MESSAGE)
16 };
17
18 uint32_t state = 0, codepoint = 0;
19 char* outs = msg;
20 size_t maxlen = sizeof(msgev.ext.message.data) - 1;
21
22 /* utf8- point aligned against block size */
23 while (len > maxlen){
24 size_t i, lastok = 0;
25 state = 0;
26 for (i = 0; i <= maxlen - 1; i++){
27 if (UTF8_ACCEPT == utf8_decode(&state, &codepoint, (uint8_t)(msg[i])))
28 lastok = i;
29
30 if (i != lastok){
31 if (0 == i)
32 return;
33 }
34 }
35
36 memcpy(msgev.ext.message.data, outs, lastok);
37 msgev.ext.message.data[lastok] = '\0';
38 len -= lastok;
39 outs += lastok;
40 if (len)
41 msgev.ext.message.multipart = 1;
42 else
43 msgev.ext.message.multipart = 0;
44
45 arcan_shmif_enqueue(out, &msgev);
46 }
47
48 /* flush remaining */
49 if (len){
50 snprintf((char*)msgev.ext.message.data, maxlen, "%s", outs);
51 msgev.ext.message.multipart = 0;
52 arcan_shmif_enqueue(out, &msgev);
53 }
54 }
55
ocr_serv_run(struct arg_arr * args,struct arcan_shmif_cont cont)56 void ocr_serv_run(struct arg_arr* args, struct arcan_shmif_cont cont)
57 {
58 TessBaseAPI* handle = TessBaseAPICreate();
59 PIX* img;
60
61 const char* lang = "eng";
62 arg_lookup(args, "lang", 0, &lang);
63 if (TessBaseAPIInit3(handle, NULL, lang)){
64 LOG("encode-ocr: Couldn't initialize tesseract with lang (%s)\n", lang);
65 return;
66 }
67
68 /*
69 * There are many little details missing here, e.g. control over segmentation
70 * / grouping (receiving input) and somehow alerting when the OCR failed to
71 * yield anything.
72 */
73 arcan_event ev;
74 while(arcan_shmif_wait(&cont, &ev)){
75 if (ev.category == EVENT_TARGET){
76 switch (ev.tgt.kind){
77 case TARGET_COMMAND_STEPFRAME:{
78 TessBaseAPISetImage(handle, (const unsigned char*) cont.vidp,
79 cont.w, cont.h, sizeof(shmif_pixel), cont.stride);
80 char* text = TessBaseAPIGetUTF8Text(handle);
81 size_t len;
82 if (!text || (len = strlen(text)) == 0)
83 continue;
84
85 push_multipart(&cont, text, len);
86 TessDeleteText(text);
87 }
88 break;
89 case TARGET_COMMAND_EXIT:
90 goto out;
91 default:
92 break;
93 }
94 }
95 }
96 out:
97 TessBaseAPIEnd(handle);
98 TessBaseAPIDelete(handle);
99 }
100