1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include "filterdetect.hxx"
21 #include <osl/diagnose.h>
22 #include <com/sun/star/io/XInputStream.hpp>
23 #include <com/sun/star/uno/XComponentContext.hpp>
24 #include <com/sun/star/document/XExtendedFilterDetection.hpp>
25 #include <com/sun/star/container/XNameAccess.hpp>
26 #include <com/sun/star/beans/PropertyState.hpp>
27 #include <cppuhelper/supportsservice.hxx>
28 #include <ucbhelper/content.hxx>
29 #include <unotools/ucbstreamhelper.hxx>
30 #include <svl/inettype.hxx>
31 #include <memory>
32 
33 using namespace com::sun::star::container;
34 using namespace com::sun::star::uno;
35 using namespace com::sun::star::beans;
36 
37 namespace {
38 
supportedByType(const OUString & clipBoardFormat,const OUString & resultString,const OUString & checkType)39 OUString supportedByType( const OUString& clipBoardFormat,  const OUString& resultString, const OUString& checkType)
40 {
41     OUString sTypeName;
42     if ( clipBoardFormat.match("doctype:") )
43     {
44         OUString tryStr = clipBoardFormat.copy(8);
45         if (resultString.indexOf(tryStr) >= 0)
46         {
47             sTypeName = checkType;
48         }
49     }
50     return sTypeName;
51 }
52 
IsMediaTypeXML(const OUString & mediaType)53 bool IsMediaTypeXML( const OUString& mediaType )
54 {
55     if (!mediaType.isEmpty())
56     {
57         OUString sType, sSubType;
58         if (INetContentTypes::parse(mediaType, sType, sSubType)
59             && sType == "application")
60         {
61             // RFC 3023: application/xml; don't detect text/xml
62             if (sSubType == "xml")
63                 return true;
64             // Registered media types: application/XXXX+xml
65             if (sSubType.endsWith("+xml"))
66                 return true;
67         }
68     }
69     return false;
70 }
71 
72 }
73 
detect(css::uno::Sequence<css::beans::PropertyValue> & aArguments)74 OUString SAL_CALL FilterDetect::detect( css::uno::Sequence< css::beans::PropertyValue >& aArguments )
75 {
76     OUString sTypeName;
77     OUString sUrl;
78     Sequence<PropertyValue > lProps ;
79 
80     css::uno::Reference< css::io::XInputStream > xInStream;
81     const PropertyValue * pValue = aArguments.getConstArray();
82     sal_Int32 nLength;
83     OUString resultString;
84 
85     nLength = aArguments.getLength();
86     sal_Int32 location=nLength;
87     for (sal_Int32 i = 0 ; i < nLength; i++)
88     {
89         if ( pValue[i].Name == "TypeName" )
90         {
91             location=i;
92         }
93         else if ( pValue[i].Name == "URL" )
94         {
95             pValue[i].Value >>= sUrl;
96         }
97         else if ( pValue[i].Name == "InputStream" )
98         {
99             pValue[i].Value >>= xInStream ;
100         }
101     }
102     try
103     {
104         if (!xInStream.is())
105         {
106             ::ucbhelper::Content aContent(
107                 sUrl, Reference< css::ucb::XCommandEnvironment >(),
108                 mxCtx);
109             xInStream = aContent.openStream();
110             if (!xInStream.is())
111             {
112                 return sTypeName;
113             }
114         }
115 
116         std::unique_ptr< SvStream > pInStream( ::utl::UcbStreamHelper::CreateStream( xInStream ) );
117         pInStream->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW );
118         sal_uInt64 const nUniPos = pInStream->Tell();
119 
120         const sal_uInt16 nSize = 4000;
121         bool  bTryUtf16 = false;
122 
123         if ( nUniPos == 0 ) // No BOM detected, try to guess UTF-16 endianness
124         {
125             sal_uInt16 nHeader = 0;
126             pInStream->ReadUInt16( nHeader );
127             if ( nHeader == 0x003C )
128                 bTryUtf16 = true;
129             else if ( nHeader == 0x3C00 )
130             {
131                 bTryUtf16 = true;
132                 pInStream->SetEndian( pInStream->GetEndian() == SvStreamEndian::LITTLE ? SvStreamEndian::BIG : SvStreamEndian::LITTLE );
133             }
134             pInStream->Seek( STREAM_SEEK_TO_BEGIN );
135         }
136 
137         if ( nUniPos == 3 || ( nUniPos == 0 && !bTryUtf16 ) ) // UTF-8 or non-Unicode
138         {
139             OString const str(read_uInt8s_ToOString(*pInStream, nSize));
140             resultString = OUString(str.getStr(), str.getLength(),
141                 RTL_TEXTENCODING_ASCII_US,
142                 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT);
143         }
144         else if ( nUniPos == 2 || bTryUtf16 ) // UTF-16
145             resultString = read_uInt16s_ToOUString( *pInStream, nSize );
146 
147         if ( !resultString.startsWith( "<?xml" ) )
148         {
149             // Check the content type; XML declaration is optional in XML files according to XML 1.0 ch.2.8
150             // (see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd)
151             OUString sMediaType;
152             try
153             {
154                 ::ucbhelper::Content aContent(
155                     sUrl, Reference< css::ucb::XCommandEnvironment >(),
156                     mxCtx);
157                 aContent.getPropertyValue("MediaType") >>= sMediaType;
158                 if (sMediaType.isEmpty())
159                 {
160                     aContent.getPropertyValue("Content-Type") >>= sMediaType;
161                 }
162             }
163             catch (...) {}
164 
165             if (!IsMediaTypeXML(sMediaType))
166             {
167                 // This is not an XML stream.  It makes no sense to try to detect
168                 // a non-XML file type here.
169                 return OUString();
170             }
171         }
172 
173         // test typedetect code
174         Reference <XNameAccess> xTypeCont(mxCtx->getServiceManager()->createInstanceWithContext("com.sun.star.document.TypeDetection", mxCtx), UNO_QUERY);
175         Sequence < OUString > myTypes= xTypeCont->getElementNames();
176         nLength = myTypes.getLength();
177 
178         sal_Int32 new_nlength=0;
179         sal_Int32 i = 0 ;
180         while ((i < nLength) && (sTypeName.isEmpty()))
181         {
182             Any elem = xTypeCont->getByName(myTypes[i]);
183             elem >>=lProps;
184             new_nlength = lProps.getLength();
185             sal_Int32 j =0;
186             while (j < new_nlength && (sTypeName.isEmpty()))
187             {
188                 OUString tmpStr;
189                 lProps[j].Value >>=tmpStr;
190                 if ( lProps[j].Name == "ClipboardFormat" && !tmpStr.isEmpty() )
191                 {
192                     sTypeName = supportedByType(tmpStr,resultString, myTypes[i]);
193                 }
194                 j++;
195             }
196             i++;
197         }
198     }
199     catch (const Exception &)
200     {
201         OSL_FAIL( "An Exception occurred while opening File stream" );
202     }
203 
204     if (!sTypeName.isEmpty())
205     {
206         if (location == aArguments.getLength())
207         {
208             aArguments.realloc(nLength+1);
209             aArguments[location].Name = "TypeName";
210         }
211         aArguments[location].Value <<=sTypeName;
212     }
213 
214     return sTypeName;
215 }
216 
217 // XInitialization
initialize(const Sequence<Any> &)218 void SAL_CALL FilterDetect::initialize( const Sequence< Any >& /*aArguments*/ )
219 {
220 }
221 
FilterDetect_getImplementationName()222 OUString FilterDetect_getImplementationName ()
223 {
224     return "com.sun.star.comp.filters.XMLFilterDetect";
225 }
226 
FilterDetect_getSupportedServiceNames()227 Sequence< OUString > FilterDetect_getSupportedServiceNames()
228 {
229     Sequence < OUString > aRet { "com.sun.star.document.ExtendedTypeDetection" };
230     return aRet;
231 }
232 
FilterDetect_createInstance(const Reference<XComponentContext> & context)233 Reference< XInterface > FilterDetect_createInstance( const Reference< XComponentContext > & context)
234 {
235     return static_cast< cppu::OWeakObject * >( new FilterDetect( context ) );
236 }
237 
238 // XServiceInfo
getImplementationName()239 OUString SAL_CALL FilterDetect::getImplementationName(  )
240 {
241     return FilterDetect_getImplementationName();
242 }
243 
supportsService(const OUString & rServiceName)244 sal_Bool SAL_CALL FilterDetect::supportsService( const OUString& rServiceName )
245 {
246     return cppu::supportsService( this, rServiceName );
247 }
248 
getSupportedServiceNames()249 Sequence< OUString > SAL_CALL FilterDetect::getSupportedServiceNames(  )
250 {
251     return FilterDetect_getSupportedServiceNames();
252 }
253 
254 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
255