1<?php 2/** 3 * Copyright 2016-2017 Horde LLC (http://www.horde.org/) 4 * 5 * See the enclosed file COPYING for license information (LGPL). If you 6 * did not receive this file, see http://www.horde.org/licenses/lgpl21. 7 * 8 * @author Jan Schneider <jan@horde.org> 9 * @category Horde 10 * @license http://www.horde.org/licenses/lgpl21 LGPL 2.1 11 * @package Text_Filter 12 */ 13 14/** 15 * Takes HTML and removes any MS Office formatting quirks. 16 * 17 * @author Jan Schneider <jan@horde.org> 18 * @category Horde 19 * @license http://www.horde.org/licenses/lgpl21 LGPL 2.1 20 * @package Text_Filter 21 */ 22class Horde_Text_Filter_Msoffice extends Horde_Text_Filter_Base 23{ 24 /** 25 * Filter parameters. 26 * 27 * @var array 28 */ 29 protected $_params = array( 30 'charset' => 'UTF-8', 31 ); 32 33 /** 34 * Executes any code necessary after applying the filter patterns. 35 * 36 * @param string $text The text after the filtering. 37 * 38 * @return string The modified text. 39 */ 40 public function postProcess($text) 41 { 42 // We cannot find those elements via DOM because HTML doesn't know 43 // about namespaces. 44 $text = str_replace('<o:p> </o:p>', '', $text); 45 46 try { 47 $dom = new Horde_Domhtml($text, $this->_params['charset']); 48 } catch (Exception $e) { 49 return $text; 50 } 51 52 // Replace all <p> elements of class "MsoNormal" with <br> elements, 53 // unless they contain other classes. Then replace with <div> elements. 54 foreach ($dom as $child) { 55 if ($child instanceof DOMElement && 56 Horde_String::lower($child->tagName) == 'p') { 57 } 58 if (!($child instanceof DOMElement) || 59 Horde_String::lower($child->tagName) != 'p' || 60 !($css = $child->getAttribute('class')) || 61 strpos($css, 'MsoNormal') === false) { 62 continue; 63 } 64 $css = trim(str_replace('MsoNormal', '', $css)); 65 if (strlen($css)) { 66 $div = $dom->dom->createElement('div'); 67 $div->setAttribute('class', $css); 68 foreach ($child->childNodes as $subchild) { 69 $div->appendChild($subchild); 70 } 71 $child->parentNode->insertBefore($div, $child); 72 } elseif (strlen(preg_replace('/^\s*(.*)\s*$/u', '$1', $child->textContent))) { 73 while ($child->hasChildNodes()) { 74 $tomove = $child->removeChild($child->firstChild); 75 $child->parentNode->insertBefore($tomove, $child); 76 } 77 $child->parentNode->insertBefore( 78 $dom->dom->createElement('br'), $child 79 ); 80 } 81 $child->parentNode->removeChild($child); 82 } 83 84 return $dom->returnHtml(array('charset' => $this->_params['charset'])); 85 } 86}