1<?php 2/** 3 * Cantonese (粵語) specific code. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup Language 22 */ 23 24/** 25 * Cantonese (粵語) 26 * 27 * @ingroup Language 28 */ 29class LanguageYue extends Language { 30 31 /** 32 * @return bool 33 */ 34 public function hasWordBreaks() { 35 return false; 36 } 37 38 /** 39 * Eventually this should be a word segmentation; 40 * for now just treat each character as a word. 41 * @todo FIXME: Only do this for Han characters... 42 * 43 * @param string $string 44 * @return string 45 */ 46 public function segmentByWord( $string ) { 47 $reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/"; 48 $s = self::insertSpace( $string, $reg ); 49 return $s; 50 } 51 52 /** 53 * @param string $string 54 * @return string 55 */ 56 public function normalizeForSearch( $string ) { 57 // Double-width roman characters 58 $s = self::convertDoubleWidth( $string ); 59 $s = trim( $s ); 60 $s = parent::normalizeForSearch( $s ); 61 62 return $s; 63 } 64} 65