1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include "filterdetect.hxx"
21 #include <osl/diagnose.h>
22 #include <com/sun/star/io/XInputStream.hpp>
23 #include <com/sun/star/uno/XComponentContext.hpp>
24 #include <com/sun/star/document/XExtendedFilterDetection.hpp>
25 #include <com/sun/star/container/XNameAccess.hpp>
26 #include <com/sun/star/beans/PropertyState.hpp>
27 #include <cppuhelper/supportsservice.hxx>
28 #include <ucbhelper/content.hxx>
29 #include <unotools/ucbstreamhelper.hxx>
30 #include <svl/inettype.hxx>
31 #include <memory>
32
33 using namespace com::sun::star::container;
34 using namespace com::sun::star::uno;
35 using namespace com::sun::star::beans;
36
37 namespace {
38
supportedByType(const OUString & clipBoardFormat,const OUString & resultString,const OUString & checkType)39 OUString supportedByType( const OUString& clipBoardFormat, const OUString& resultString, const OUString& checkType)
40 {
41 OUString sTypeName;
42 if ( clipBoardFormat.match("doctype:") )
43 {
44 OUString tryStr = clipBoardFormat.copy(8);
45 if (resultString.indexOf(tryStr) >= 0)
46 {
47 sTypeName = checkType;
48 }
49 }
50 return sTypeName;
51 }
52
IsMediaTypeXML(const OUString & mediaType)53 bool IsMediaTypeXML( const OUString& mediaType )
54 {
55 if (!mediaType.isEmpty())
56 {
57 OUString sType, sSubType;
58 if (INetContentTypes::parse(mediaType, sType, sSubType)
59 && sType == "application")
60 {
61 // RFC 3023: application/xml; don't detect text/xml
62 if (sSubType == "xml")
63 return true;
64 // Registered media types: application/XXXX+xml
65 if (sSubType.endsWith("+xml"))
66 return true;
67 }
68 }
69 return false;
70 }
71
72 }
73
detect(css::uno::Sequence<css::beans::PropertyValue> & aArguments)74 OUString SAL_CALL FilterDetect::detect( css::uno::Sequence< css::beans::PropertyValue >& aArguments )
75 {
76 OUString sTypeName;
77 OUString sUrl;
78 Sequence<PropertyValue > lProps ;
79
80 css::uno::Reference< css::io::XInputStream > xInStream;
81 const PropertyValue * pValue = aArguments.getConstArray();
82 sal_Int32 nLength;
83 OUString resultString;
84
85 nLength = aArguments.getLength();
86 sal_Int32 location=nLength;
87 for (sal_Int32 i = 0 ; i < nLength; i++)
88 {
89 if ( pValue[i].Name == "TypeName" )
90 {
91 location=i;
92 }
93 else if ( pValue[i].Name == "URL" )
94 {
95 pValue[i].Value >>= sUrl;
96 }
97 else if ( pValue[i].Name == "InputStream" )
98 {
99 pValue[i].Value >>= xInStream ;
100 }
101 }
102 try
103 {
104 if (!xInStream.is())
105 {
106 ::ucbhelper::Content aContent(
107 sUrl, Reference< css::ucb::XCommandEnvironment >(),
108 mxCtx);
109 xInStream = aContent.openStream();
110 if (!xInStream.is())
111 {
112 return sTypeName;
113 }
114 }
115
116 std::unique_ptr< SvStream > pInStream( ::utl::UcbStreamHelper::CreateStream( xInStream ) );
117 pInStream->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW );
118 sal_uInt64 const nUniPos = pInStream->Tell();
119
120 const sal_uInt16 nSize = 4000;
121 bool bTryUtf16 = false;
122
123 if ( nUniPos == 0 ) // No BOM detected, try to guess UTF-16 endianness
124 {
125 sal_uInt16 nHeader = 0;
126 pInStream->ReadUInt16( nHeader );
127 if ( nHeader == 0x003C )
128 bTryUtf16 = true;
129 else if ( nHeader == 0x3C00 )
130 {
131 bTryUtf16 = true;
132 pInStream->SetEndian( pInStream->GetEndian() == SvStreamEndian::LITTLE ? SvStreamEndian::BIG : SvStreamEndian::LITTLE );
133 }
134 pInStream->Seek( STREAM_SEEK_TO_BEGIN );
135 }
136
137 if ( nUniPos == 3 || ( nUniPos == 0 && !bTryUtf16 ) ) // UTF-8 or non-Unicode
138 {
139 OString const str(read_uInt8s_ToOString(*pInStream, nSize));
140 resultString = OUString(str.getStr(), str.getLength(),
141 RTL_TEXTENCODING_ASCII_US,
142 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT);
143 }
144 else if ( nUniPos == 2 || bTryUtf16 ) // UTF-16
145 resultString = read_uInt16s_ToOUString( *pInStream, nSize );
146
147 if ( !resultString.startsWith( "<?xml" ) )
148 {
149 // Check the content type; XML declaration is optional in XML files according to XML 1.0 ch.2.8
150 // (see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd)
151 OUString sMediaType;
152 try
153 {
154 ::ucbhelper::Content aContent(
155 sUrl, Reference< css::ucb::XCommandEnvironment >(),
156 mxCtx);
157 aContent.getPropertyValue("MediaType") >>= sMediaType;
158 if (sMediaType.isEmpty())
159 {
160 aContent.getPropertyValue("Content-Type") >>= sMediaType;
161 }
162 }
163 catch (...) {}
164
165 if (!IsMediaTypeXML(sMediaType))
166 {
167 // This is not an XML stream. It makes no sense to try to detect
168 // a non-XML file type here.
169 return OUString();
170 }
171 }
172
173 // test typedetect code
174 Reference <XNameAccess> xTypeCont(mxCtx->getServiceManager()->createInstanceWithContext("com.sun.star.document.TypeDetection", mxCtx), UNO_QUERY);
175 Sequence < OUString > myTypes= xTypeCont->getElementNames();
176 nLength = myTypes.getLength();
177
178 sal_Int32 new_nlength=0;
179 sal_Int32 i = 0 ;
180 while ((i < nLength) && (sTypeName.isEmpty()))
181 {
182 Any elem = xTypeCont->getByName(myTypes[i]);
183 elem >>=lProps;
184 new_nlength = lProps.getLength();
185 sal_Int32 j =0;
186 while (j < new_nlength && (sTypeName.isEmpty()))
187 {
188 OUString tmpStr;
189 lProps[j].Value >>=tmpStr;
190 if ( lProps[j].Name == "ClipboardFormat" && !tmpStr.isEmpty() )
191 {
192 sTypeName = supportedByType(tmpStr,resultString, myTypes[i]);
193 }
194 j++;
195 }
196 i++;
197 }
198 }
199 catch (const Exception &)
200 {
201 OSL_FAIL( "An Exception occurred while opening File stream" );
202 }
203
204 if (!sTypeName.isEmpty())
205 {
206 if (location == aArguments.getLength())
207 {
208 aArguments.realloc(nLength+1);
209 aArguments[location].Name = "TypeName";
210 }
211 aArguments[location].Value <<=sTypeName;
212 }
213
214 return sTypeName;
215 }
216
217 // XInitialization
initialize(const Sequence<Any> &)218 void SAL_CALL FilterDetect::initialize( const Sequence< Any >& /*aArguments*/ )
219 {
220 }
221
FilterDetect_getImplementationName()222 OUString FilterDetect_getImplementationName ()
223 {
224 return "com.sun.star.comp.filters.XMLFilterDetect";
225 }
226
FilterDetect_getSupportedServiceNames()227 Sequence< OUString > FilterDetect_getSupportedServiceNames()
228 {
229 Sequence < OUString > aRet { "com.sun.star.document.ExtendedTypeDetection" };
230 return aRet;
231 }
232
FilterDetect_createInstance(const Reference<XComponentContext> & context)233 Reference< XInterface > FilterDetect_createInstance( const Reference< XComponentContext > & context)
234 {
235 return static_cast< cppu::OWeakObject * >( new FilterDetect( context ) );
236 }
237
238 // XServiceInfo
getImplementationName()239 OUString SAL_CALL FilterDetect::getImplementationName( )
240 {
241 return FilterDetect_getImplementationName();
242 }
243
supportsService(const OUString & rServiceName)244 sal_Bool SAL_CALL FilterDetect::supportsService( const OUString& rServiceName )
245 {
246 return cppu::supportsService( this, rServiceName );
247 }
248
getSupportedServiceNames()249 Sequence< OUString > SAL_CALL FilterDetect::getSupportedServiceNames( )
250 {
251 return FilterDetect_getSupportedServiceNames();
252 }
253
254 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
255