1 /*******************************************************************************
2  * Copyright (c) 2005, 2019 IBM Corporation and others.
3  *
4  * This program and the accompanying materials
5  * are made available under the terms of the Eclipse Public License 2.0
6  * which accompanies this distribution, and is available at
7  * https://www.eclipse.org/legal/epl-2.0/
8  *
9  * SPDX-License-Identifier: EPL-2.0
10  *
11  * Contributors:
12  *     IBM Corporation - initial API and implementation
13  *******************************************************************************/
14 package org.eclipse.help.internal.search;
15 
16 import java.io.InputStream;
17 
18 import org.eclipse.core.runtime.IPath;
19 import org.eclipse.core.runtime.Path;
20 import org.eclipse.core.runtime.Platform;
21 import org.eclipse.help.internal.xhtml.DynamicXHTMLProcessor;
22 import org.eclipse.help.search.SearchParticipantXML;
23 import org.xml.sax.Attributes;
24 
25 /**
26  * The search participant responsible for indexing XHTML documents.
27  */
28 public class XHTMLSearchParticipant extends SearchParticipantXML {
29 
30 	private static final String KEYWORDS = "keywords"; //$NON-NLS-1$
31 	private static final String META_TAG = "meta"; //$NON-NLS-1$
32 	private static final String DESCRIPTION = "description"; //$NON-NLS-1$
33 	private static final String NAME_ATTRIBUTE = "name"; //$NON-NLS-1$
34 	private static final String CONTENT_ATTRIBUTE = "content"; //$NON-NLS-1$
35 	private String title;
36 	private String summary;
37 	private boolean hasDescriptionMetaTag = false;
38 
39 	@Override
handleEndElement(String name, IParsedXMLContent data)40 	protected void handleEndElement(String name, IParsedXMLContent data) {
41 	}
42 
43 	@Override
handleStartElement(String name, Attributes attributes, IParsedXMLContent data)44 	protected void handleStartElement(String name, Attributes attributes, IParsedXMLContent data) {
45 		title = null;
46 		if (META_TAG.equalsIgnoreCase(name)) {
47 			String nameAttribute = attributes.getValue(NAME_ATTRIBUTE);
48 			if (DESCRIPTION.equalsIgnoreCase(nameAttribute)) {
49 				String descriptionAttribute = attributes.getValue(CONTENT_ATTRIBUTE);
50 				if (descriptionAttribute != null) {
51 					hasDescriptionMetaTag = true;
52 					data.addToSummary(descriptionAttribute);
53 					data.addText(" "); //$NON-NLS-1$
54 					data.addText(descriptionAttribute);
55 					data.addText(" "); //$NON-NLS-1$
56 				}
57 			} else if (KEYWORDS.equalsIgnoreCase(nameAttribute)) {
58 				String keywordsAttribute = attributes.getValue(CONTENT_ATTRIBUTE);
59 				if (keywordsAttribute != null) {
60 					data.addText(" "); //$NON-NLS-1$
61 					data.addText(keywordsAttribute);
62 					data.addText(" "); //$NON-NLS-1$
63 				}
64 			}
65 		}
66 	}
67 
68 	@Override
handleStartDocument(IParsedXMLContent data)69 	protected void handleStartDocument(IParsedXMLContent data) {
70 		hasDescriptionMetaTag = false;
71 	}
72 
73 	@Override
handleText(String text, IParsedXMLContent data)74 	protected void handleText(String text, IParsedXMLContent data) {
75 		String stackPath = getElementStackPath();
76 		IPath path = new Path(stackPath);
77 		if (path.segment(1).equalsIgnoreCase("body") &&  //$NON-NLS-1$
78 			!isSkipped(path.segment(path.segmentCount() -1))) {
79 			data.addText(text);
80 			if (!hasDescriptionMetaTag) {
81 				data.addToSummary(text);
82 			}
83 		} else if (path.segment(1).equalsIgnoreCase("head")) { //$NON-NLS-1$
84 			if (path.segment(path.segmentCount() -1).equalsIgnoreCase("title")) { //$NON-NLS-1$
85 				if (title == null) {
86 					title = text;
87 				} else {
88 					title = title + text;
89 				}
90 				data.setTitle(title);
91 			}
92 		}
93 	}
94 
isSkipped(String tag)95 	private boolean isSkipped(String tag) {
96 		return tag.equals("script"); //$NON-NLS-1$
97 	}
98 
99 	@Override
preprocess(InputStream in, String name, String locale)100 	protected InputStream preprocess(InputStream in, String name, String locale) {
101 		try {
102 			return DynamicXHTMLProcessor.process(name, in, locale, false);
103 		}
104 		catch (Throwable t) {
105 			String msg = "An error occured while pre-processing help XHTML document \"" + name + "\" for search indexing"; //$NON-NLS-1$ //$NON-NLS-2$
106 			Platform.getLog(getClass()).error(msg, t);
107 			return in;
108 		}
109 	}
110 
getSummary()111 	public String getSummary() {
112 		return summary;
113 	}
114 }