1<?php
2// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project
3//
4// All Rights Reserved. See copyright.txt for details and a complete list of authors.
5// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details.
6// $Id$
7
8function prefs_ocr_list()
9{
10	$langLib = TikiLib::lib('language');
11	$ocr = TikiLib::lib('ocr');
12
13	$ocrLangs = $langLib->findLanguageNames($ocr->getTesseractLangs());
14	// Place the default (OSD) at the top
15	unset($ocrLangs['osd']);
16	$ocrLangs = $langLib->findLanguageNames(['osd']) + $ocrLangs;
17
18	try{
19		$tesseractPath = $ocr->whereIsExecutable('tesseract') ?: 'tesseract';
20		$pdfimagesPath = $ocr->whereIsExecutable('pdfimages') ?: 'pdfimages';
21	}catch (Exception $e){
22		$tesseractPath = 'tesseract';
23		$pdfimagesPath = 'pdfimages';
24	}
25
26	return [
27		'ocr_enable' => [
28			'name' => tra('OCR Files'),
29			'type' => 'flag',
30			'default' => 'n',
31			'description' => tra('Extract and index text from supported file types.'),
32			'keywords' => 'ocr optical character recognition',
33			'dependencies' => ['feature_file_galleries'],
34			'packages_required' => ['thiagoalessio/tesseract_ocr' => 'thiagoalessio\TesseractOCR\TesseractOCR',
35									'media-alchemyst/media-alchemyst' => 'MediaAlchemyst\Alchemyst'],
36		],
37		'ocr_every_file' => [
38			'name' => tra('OCR Every File'),
39			'type' => 'flag',
40			'description' => tra('Attempt to OCR every supported file.'),
41			'default' => 'n',
42		],
43		'ocr_file_level' => [
44			'name' => tra('Allow file level OCR languages'),
45			'type' => 'flag',
46			'description' => tra('Allow users to change the default languages that will be used to OCR a file.'),
47			'default' => 'y',
48		],
49		'ocr_limit_languages' => [
50			'name' => tra('OCR limit languages'),
51			'description' => tra('Limit the number of languages one can select from this list.'),
52			'filter' => 'text',
53			'type' => 'multilist',
54			'options' => $ocrLangs,
55			'dependencies' => ['ocr_file_level'],
56			'default' => [''],
57		],
58		'ocr_tesseract_path' => [
59			'name' => tra('tesseract path'),
60			'description' => tra('Path to the location of the binary. Defaults to the $PATH location.'),
61			'hint' => 'If blank, the $PATH will be used, but will likely fail with scheduler.',
62			'type' => 'text',
63			'size' => '256',
64			'filter' => 'text',
65			'default' => $tesseractPath,
66		],
67		'ocr_pdfimages_path' => [
68			'name' => tra('pdfimages path'),
69			'description' => tra('Path to the location of the binary. Defaults to the $PATH location.'),
70			'hint' => 'If blank, the $PATH will be used, but will likely fail with scheduler.',
71			'type' => 'text',
72			'size' => '256',
73			'filter' => 'text',
74			'default' => $pdfimagesPath,
75		],
76	];
77}