1<?php 2/** 3 * Japanese (日本語) specific code. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 * http://www.gnu.org/copyleft/gpl.html 19 * 20 * @file 21 * @ingroup Language 22 */ 23 24/** 25 * Japanese (日本語) 26 * 27 * @ingroup Language 28 */ 29class LanguageJa extends Language { 30 31 /** 32 * @param string $string 33 * @return string 34 */ 35 public function segmentByWord( $string ) { 36 // Strip known punctuation ? 37 // $s = preg_replace( '/\xe3\x80[\x80-\xbf]/', '', $s ); # U3000-303f 38 39 // Space strings of like hiragana/katakana/kanji 40 $hiragana = '(?:\xe3(?:\x81[\x80-\xbf]|\x82[\x80-\x9f]))'; # U3040-309f 41 $katakana = '(?:\xe3(?:\x82[\xa0-\xbf]|\x83[\x80-\xbf]))'; # U30a0-30ff 42 $kanji = '(?:\xe3[\x88-\xbf][\x80-\xbf]' 43 . '|[\xe4-\xe8][\x80-\xbf]{2}' 44 . '|\xe9[\x80-\xa5][\x80-\xbf]' 45 . '|\xe9\xa6[\x80-\x99])'; 46 # U3200-9999 = \xe3\x88\x80-\xe9\xa6\x99 47 $reg = "/({$hiragana}+|{$katakana}+|{$kanji}+)/"; 48 $s = self::insertSpace( $string, $reg ); 49 return $s; 50 } 51 52 /** 53 * Italic is not appropriate for Japanese script 54 * Unfortunately most browsers do not recognise this, and render `<em>` as italic 55 * 56 * @param string $text 57 * @return string 58 */ 59 public function emphasize( $text ) { 60 return $text; 61 } 62} 63