1<?php 2// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project 3// 4// All Rights Reserved. See copyright.txt for details and a complete list of authors. 5// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details. 6// $Id$ 7 8function prefs_ocr_list() 9{ 10 $langLib = TikiLib::lib('language'); 11 $ocr = TikiLib::lib('ocr'); 12 13 $ocrLangs = $langLib->findLanguageNames($ocr->getTesseractLangs()); 14 // Place the default (OSD) at the top 15 unset($ocrLangs['osd']); 16 $ocrLangs = $langLib->findLanguageNames(['osd']) + $ocrLangs; 17 18 try{ 19 $tesseractPath = $ocr->whereIsExecutable('tesseract') ?: 'tesseract'; 20 $pdfimagesPath = $ocr->whereIsExecutable('pdfimages') ?: 'pdfimages'; 21 }catch (Exception $e){ 22 $tesseractPath = 'tesseract'; 23 $pdfimagesPath = 'pdfimages'; 24 } 25 26 return [ 27 'ocr_enable' => [ 28 'name' => tra('OCR Files'), 29 'type' => 'flag', 30 'default' => 'n', 31 'description' => tra('Extract and index text from supported file types.'), 32 'keywords' => 'ocr optical character recognition', 33 'dependencies' => ['feature_file_galleries'], 34 'packages_required' => ['thiagoalessio/tesseract_ocr' => 'thiagoalessio\TesseractOCR\TesseractOCR', 35 'media-alchemyst/media-alchemyst' => 'MediaAlchemyst\Alchemyst'], 36 ], 37 'ocr_every_file' => [ 38 'name' => tra('OCR Every File'), 39 'type' => 'flag', 40 'description' => tra('Attempt to OCR every supported file.'), 41 'default' => 'n', 42 ], 43 'ocr_file_level' => [ 44 'name' => tra('Allow file level OCR languages'), 45 'type' => 'flag', 46 'description' => tra('Allow users to change the default languages that will be used to OCR a file.'), 47 'default' => 'y', 48 ], 49 'ocr_limit_languages' => [ 50 'name' => tra('OCR limit languages'), 51 'description' => tra('Limit the number of languages one can select from this list.'), 52 'filter' => 'text', 53 'type' => 'multilist', 54 'options' => $ocrLangs, 55 'dependencies' => ['ocr_file_level'], 56 'default' => [''], 57 ], 58 'ocr_tesseract_path' => [ 59 'name' => tra('tesseract path'), 60 'description' => tra('Path to the location of the binary. Defaults to the $PATH location.'), 61 'hint' => 'If blank, the $PATH will be used, but will likely fail with scheduler.', 62 'type' => 'text', 63 'size' => '256', 64 'filter' => 'text', 65 'default' => $tesseractPath, 66 ], 67 'ocr_pdfimages_path' => [ 68 'name' => tra('pdfimages path'), 69 'description' => tra('Path to the location of the binary. Defaults to the $PATH location.'), 70 'hint' => 'If blank, the $PATH will be used, but will likely fail with scheduler.', 71 'type' => 'text', 72 'size' => '256', 73 'filter' => 'text', 74 'default' => $pdfimagesPath, 75 ], 76 ]; 77}