1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <HtmlReader.hxx>
21 #include <connectivity/dbconversion.hxx>
22 #include <connectivity/dbtools.hxx>
23 #include <toolkit/helper/vclunohelper.hxx>
24 #include <tools/stream.hxx>
25 #include <tools/tenccvt.hxx>
26 #include <comphelper/string.hxx>
27 #include <strings.hrc>
28 #include <stringconstants.hxx>
29 #include <sfx2/sfxhtml.hxx>
30 #include <osl/diagnose.h>
31 #include <core_resource.hxx>
32 #include <com/sun/star/sdbcx/XDataDescriptorFactory.hpp>
33 #include <com/sun/star/sdbcx/XColumnsSupplier.hpp>
34 #include <com/sun/star/sdbcx/XAppend.hpp>
35 #include <com/sun/star/sdbc/DataType.hpp>
36 #include <com/sun/star/sdbc/ColumnValue.hpp>
37 #include <com/sun/star/awt/FontDescriptor.hpp>
38 #include <com/sun/star/awt/FontWeight.hpp>
39 #include <com/sun/star/awt/FontStrikeout.hpp>
40 #include <com/sun/star/awt/FontSlant.hpp>
41 #include <com/sun/star/awt/FontUnderline.hpp>
42 #include <com/sun/star/util/NumberFormat.hpp>
43 #include <com/sun/star/util/XNumberFormatTypes.hpp>
44 #include <svtools/htmltokn.h>
45 #include <svtools/htmlkywd.hxx>
46 #include <tools/color.hxx>
47 #include <WCopyTable.hxx>
48 #include <WExtendPages.hxx>
49 #include <WNameMatch.hxx>
50 #include <WColumnSelect.hxx>
51 #include <QEnumTypes.hxx>
52 #include <WCPage.hxx>
53 #include <rtl/tencinfo.h>
54 #include <UITools.hxx>
55 #include <vcl/svapp.hxx>
56 #include <vcl/settings.hxx>
57 
58 using namespace dbaui;
59 using namespace ::com::sun::star::uno;
60 using namespace ::com::sun::star::beans;
61 using namespace ::com::sun::star::container;
62 using namespace ::com::sun::star::sdbc;
63 using namespace ::com::sun::star::sdbcx;
64 using namespace ::com::sun::star::awt;
65 
66 #define DBAUI_HTML_FONTSIZES    8       // like export, HTML-Options
67 
68 // OHTMLReader
OHTMLReader(SvStream & rIn,const SharedConnection & _rxConnection,const Reference<css::util::XNumberFormatter> & _rxNumberF,const css::uno::Reference<css::uno::XComponentContext> & _rxContext)69 OHTMLReader::OHTMLReader(SvStream& rIn,const SharedConnection& _rxConnection,
70                         const Reference< css::util::XNumberFormatter >& _rxNumberF,
71                         const css::uno::Reference< css::uno::XComponentContext >& _rxContext)
72     : HTMLParser(rIn)
73     , ODatabaseExport( _rxConnection, _rxNumberF, _rxContext, rIn )
74     , m_nTableCount(0)
75     , m_nColumnWidth(87)
76 {
77     SetSrcEncoding( GetExtendedCompatibilityTextEncoding(  RTL_TEXTENCODING_ISO_8859_1 ) );
78     // If the file starts with a BOM, switch to UCS2.
79     SetSwitchToUCS2( true );
80 }
81 
OHTMLReader(SvStream & rIn,sal_Int32 nRows,const TPositions & _rColumnPositions,const Reference<css::util::XNumberFormatter> & _rxNumberF,const css::uno::Reference<css::uno::XComponentContext> & _rxContext,const TColumnVector * pList,const OTypeInfoMap * _pInfoMap,bool _bAutoIncrementEnabled)82 OHTMLReader::OHTMLReader(SvStream& rIn,
83                          sal_Int32 nRows,
84                          const TPositions &_rColumnPositions,
85                          const Reference< css::util::XNumberFormatter >& _rxNumberF,
86                          const css::uno::Reference< css::uno::XComponentContext >& _rxContext,
87                          const TColumnVector* pList,
88                          const OTypeInfoMap* _pInfoMap,
89                          bool _bAutoIncrementEnabled)
90     : HTMLParser(rIn)
91     , ODatabaseExport( nRows, _rColumnPositions, _rxNumberF, _rxContext, pList, _pInfoMap, _bAutoIncrementEnabled, rIn )
92     , m_nTableCount(0)
93     , m_nColumnWidth(87)
94 {
95     SetSrcEncoding( GetExtendedCompatibilityTextEncoding(  RTL_TEXTENCODING_ISO_8859_1 ) );
96     // If the file starts with a BOM, switch to UCS2.
97     SetSwitchToUCS2( true );
98 }
99 
~OHTMLReader()100 OHTMLReader::~OHTMLReader()
101 {
102 }
103 
CallParser()104 SvParserState OHTMLReader::CallParser()
105 {
106     rInput.Seek(STREAM_SEEK_TO_BEGIN);
107     rInput.ResetError();
108     SvParserState  eParseState = HTMLParser::CallParser();
109     SetColumnTypes(m_pColumnList,m_pInfoMap);
110     return m_bFoundTable ? eParseState : SvParserState::Error;
111 }
112 
113 #if defined _MSC_VER
114 #pragma warning(disable: 4702) // unreachable code, bug in MSVC2015
115 #endif
NextToken(HtmlTokenId nToken)116 void OHTMLReader::NextToken( HtmlTokenId nToken )
117 {
118     if(m_bError || !m_nRows) // if there is an error or no more rows to check, return immediately
119         return;
120     if ( nToken ==  HtmlTokenId::META )
121         setTextEncoding();
122 
123     if(m_xConnection.is())    // names, which CTOR was called and hence, if a table should be created
124     {
125         switch(nToken)
126         {
127             case HtmlTokenId::TABLE_ON:
128                 ++m_nTableCount;
129                 {   // can also be TD or TH, if there was no TABLE before
130                     const HTMLOptions& rHtmlOptions = GetOptions();
131                     for (const auto & rOption : rHtmlOptions)
132                     {
133                         if( rOption.GetToken() == HtmlOptionId::WIDTH )
134                         {   // percentage: of document width respectively outer cell
135                             m_nColumnWidth = GetWidthPixel( rOption );
136                         }
137                     }
138                 }
139                 [[fallthrough]];
140             case HtmlTokenId::THEAD_ON:
141             case HtmlTokenId::TBODY_ON:
142                 {
143                     sal_uInt64 const nTell = rInput.Tell(); // perhaps alters position of the stream
144                     if ( !m_xTable.is() )
145                     {// use first line as header
146                         m_bError = !CreateTable(nToken);
147                         if ( m_bAppendFirstLine )
148                             rInput.Seek(nTell);
149                     }
150                 }
151                 break;
152             case HtmlTokenId::TABLE_OFF:
153                 if(!--m_nTableCount)
154                 {
155                     m_xTable = nullptr;
156                 }
157                 break;
158             case HtmlTokenId::TABLEROW_ON:
159                 if ( !m_pUpdateHelper.get() )
160                     m_bError = true;
161                 break;
162             case HtmlTokenId::TEXTTOKEN:
163             case HtmlTokenId::SINGLECHAR:
164                 if ( m_bInTbl ) //&& !m_bSDNum ) // important, as otherwise we also get the names of the fonts
165                     m_sTextToken += aToken;
166                 break;
167             case HtmlTokenId::PARABREAK_OFF:
168                 m_sCurrent += m_sTextToken;
169                 break;
170             case HtmlTokenId::PARABREAK_ON:
171                 m_sTextToken.clear();
172                 break;
173             case HtmlTokenId::TABLEDATA_ON:
174                 fetchOptions();
175                 break;
176             case HtmlTokenId::TABLEDATA_OFF:
177                 {
178                     if ( !m_sCurrent.isEmpty() )
179                         m_sTextToken = m_sCurrent;
180                     try
181                     {
182                         insertValueIntoColumn();
183                     }
184                     catch(SQLException& e)
185                     // handling update failure
186                     {
187                         showErrorDialog(e);
188                     }
189                     m_sCurrent.clear();
190                     m_nColumnPos++;
191                     eraseTokens();
192                     m_bInTbl = false;
193                 }
194                 break;
195             case HtmlTokenId::TABLEROW_OFF:
196                 if ( !m_pUpdateHelper.get() )
197                 {
198                     m_bError = true;
199                     break;
200                 }
201                 try
202                 {
203                     m_nRowCount++;
204                     if (m_bIsAutoIncrement) // if bSetAutoIncrement then I have to set the autoincrement
205                         m_pUpdateHelper->updateInt(1,m_nRowCount);
206                     m_pUpdateHelper->insertRow();
207                 }
208                 catch(SQLException& e)
209                 // handling update failure
210                 {
211                     showErrorDialog(e);
212                 }
213                 m_nColumnPos = 0;
214                 break;
215             default: break;
216         }
217     }
218     else // branch only valid for type checking
219     {
220         switch(nToken)
221         {
222             case HtmlTokenId::THEAD_ON:
223             case HtmlTokenId::TBODY_ON:
224                 // The head of the column is not included
225                 if(m_bHead)
226                 {
227                     do
228                     {}
229                     while(GetNextToken() != HtmlTokenId::TABLEROW_OFF);
230                     m_bHead = false;
231                 }
232                 break;
233             case HtmlTokenId::TABLEDATA_ON:
234             case HtmlTokenId::TABLEHEADER_ON:
235                 fetchOptions();
236                 break;
237             case HtmlTokenId::TEXTTOKEN:
238             case HtmlTokenId::SINGLECHAR:
239                 if ( m_bInTbl ) // && !m_bSDNum ) // important, as otherwise we also get the names of the fonts
240                     m_sTextToken += aToken;
241                 break;
242             case HtmlTokenId::PARABREAK_OFF:
243                 m_sCurrent += m_sTextToken;
244                 break;
245             case HtmlTokenId::PARABREAK_ON:
246                 m_sTextToken.clear();
247                 break;
248             case HtmlTokenId::TABLEDATA_OFF:
249                 if ( !m_sCurrent.isEmpty() )
250                     m_sTextToken = m_sCurrent;
251                 adjustFormat();
252                 m_nColumnPos++;
253                 m_bInTbl = false;
254                 m_sCurrent.clear();
255                 break;
256             case HtmlTokenId::TABLEROW_OFF:
257                 if ( !m_sCurrent.isEmpty() )
258                     m_sTextToken = m_sCurrent;
259                 adjustFormat();
260                 m_nColumnPos = 0;
261                 m_nRows--;
262                 m_sCurrent.clear();
263                 break;
264             default: break;
265         }
266     }
267 }
268 
fetchOptions()269 void OHTMLReader::fetchOptions()
270 {
271     m_bInTbl = true;
272     const HTMLOptions& options = GetOptions();
273     for (const auto & rOption : options)
274     {
275         switch( rOption.GetToken() )
276         {
277             case HtmlOptionId::SDNUM:
278                 m_sNumToken = rOption.GetString();
279             break;
280             default: break;
281         }
282     }
283 }
284 
TableDataOn(SvxCellHorJustify & eVal)285 void OHTMLReader::TableDataOn(SvxCellHorJustify& eVal)
286 {
287     const HTMLOptions& rHtmlOptions = GetOptions();
288     for (const auto & rOption : rHtmlOptions)
289     {
290         switch( rOption.GetToken() )
291         {
292             case HtmlOptionId::ALIGN:
293             {
294                 const OUString& rOptVal = rOption.GetString();
295                 if (rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ))
296                     eVal = SvxCellHorJustify::Right;
297                 else if (rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_center ))
298                     eVal = SvxCellHorJustify::Center;
299                 else if (rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ))
300                     eVal = SvxCellHorJustify::Left;
301                 else
302                     eVal = SvxCellHorJustify::Standard;
303             }
304             break;
305             default: break;
306         }
307     }
308 }
309 
TableFontOn(FontDescriptor & _rFont,Color & _rTextColor)310 void OHTMLReader::TableFontOn(FontDescriptor& _rFont, Color &_rTextColor)
311 {
312     const HTMLOptions& rHtmlOptions = GetOptions();
313     for (const auto & rOption : rHtmlOptions)
314     {
315         switch( rOption.GetToken() )
316         {
317         case HtmlOptionId::COLOR:
318             {
319                 Color aColor;
320                 rOption.GetColor( aColor );
321                 _rTextColor = aColor.GetRGBColor();
322             }
323             break;
324         case HtmlOptionId::FACE :
325             {
326                 const OUString& rFace = rOption.GetString();
327                 OUStringBuffer aFontName;
328                 sal_Int32 nPos = 0;
329                 while( nPos != -1 )
330                 {
331                     // list of fonts, VCL: semicolon as separator, HTML: comma
332                     OUString aFName = rFace.getToken( 0, ',', nPos );
333                     aFName = comphelper::string::strip(aFName, ' ');
334                     if( !aFontName.isEmpty() )
335                         aFontName.append(";");
336                     aFontName.append(aFName);
337                 }
338                 if ( !aFontName.isEmpty() )
339                     _rFont.Name = aFontName.makeStringAndClear();
340             }
341             break;
342         case HtmlOptionId::SIZE :
343             {
344                 sal_Int16 nSize = static_cast<sal_Int16>(rOption.GetNumber());
345                 if ( nSize == 0 )
346                     nSize = 1;
347                 else if ( nSize < DBAUI_HTML_FONTSIZES )
348                     nSize = DBAUI_HTML_FONTSIZES;
349 
350                 _rFont.Height = nSize;
351             }
352             break;
353         default: break;
354         }
355     }
356 }
357 
GetWidthPixel(const HTMLOption & rOption)358 sal_Int16 OHTMLReader::GetWidthPixel( const HTMLOption& rOption )
359 {
360     const OUString& rOptVal = rOption.GetString();
361     if ( rOptVal.indexOf('%') != -1 )
362     {   // percentage
363         OSL_ENSURE( m_nColumnWidth, "WIDTH Option: m_nColumnWidth==0 and Width%" );
364         return static_cast<sal_Int16>((rOption.GetNumber() * m_nColumnWidth) / 100);
365     }
366     else
367     {
368         if ( rOptVal.indexOf('*') != -1 )
369         {   // relative to what?!?
370 //TODO: collect ColArray of all relevant values and then MakeCol
371             return 0;
372         }
373         else
374             return static_cast<sal_Int16>(rOption.GetNumber());  // pixel
375     }
376 }
377 
CreateTable(HtmlTokenId nToken)378 bool OHTMLReader::CreateTable(HtmlTokenId nToken)
379 {
380     OUString aTempName(DBA_RES(STR_TBL_TITLE));
381     aTempName = aTempName.getToken(0,' ');
382     aTempName = ::dbtools::createUniqueName(m_xTables, aTempName);
383 
384     bool bCaption = false;
385     bool bTableHeader = false;
386     OUString aColumnName;
387     SvxCellHorJustify eVal;
388 
389     OUString aTableName;
390     FontDescriptor aFont = VCLUnoHelper::CreateFontDescriptor(Application::GetSettings().GetStyleSettings().GetAppFont());
391     Color nTextColor;
392     do
393     {
394         switch (nToken)
395         {
396             case HtmlTokenId::TEXTTOKEN:
397             case HtmlTokenId::SINGLECHAR:
398                 if(bTableHeader)
399                     aColumnName += aToken;
400                 if(bCaption)
401                     aTableName += aToken;
402                 break;
403             case HtmlTokenId::PARABREAK_OFF:
404                 m_sCurrent += aColumnName;
405                 break;
406             case HtmlTokenId::PARABREAK_ON:
407                 m_sTextToken.clear();
408                 break;
409             case HtmlTokenId::TABLEDATA_ON:
410             case HtmlTokenId::TABLEHEADER_ON:
411                 TableDataOn(eVal);
412                 bTableHeader = true;
413                 break;
414             case HtmlTokenId::TABLEDATA_OFF:
415             case HtmlTokenId::TABLEHEADER_OFF:
416                 {
417                     aColumnName = comphelper::string::strip(aColumnName, ' ' );
418                     if (aColumnName.isEmpty() || m_bAppendFirstLine )
419                         aColumnName = DBA_RES(STR_COLUMN_NAME);
420                     else if ( !m_sCurrent.isEmpty() )
421                         aColumnName = m_sCurrent;
422 
423                     aColumnName = comphelper::string::strip(aColumnName, ' ');
424                     CreateDefaultColumn(aColumnName);
425                     aColumnName.clear();
426                     m_sCurrent.clear();
427 
428                     eVal = SvxCellHorJustify::Standard;
429                     bTableHeader = false;
430                 }
431                 break;
432 
433             case HtmlTokenId::TITLE_ON:
434             case HtmlTokenId::CAPTION_ON:
435                 bCaption = true;
436                 break;
437             case HtmlTokenId::TITLE_OFF:
438             case HtmlTokenId::CAPTION_OFF:
439                 aTableName = comphelper::string::strip(aTableName, ' ');
440                 if(aTableName.isEmpty())
441                     aTableName = ::dbtools::createUniqueName(m_xTables, aTableName);
442                 else
443                     aTableName = aTempName;
444                 bCaption = false;
445                 break;
446             case HtmlTokenId::FONT_ON:
447                 TableFontOn(aFont,nTextColor);
448                 break;
449             case HtmlTokenId::BOLD_ON:
450                 aFont.Weight = css::awt::FontWeight::BOLD;
451                 break;
452             case HtmlTokenId::ITALIC_ON:
453                 aFont.Slant = css::awt::FontSlant_ITALIC;
454                 break;
455             case HtmlTokenId::UNDERLINE_ON:
456                 aFont.Underline = css::awt::FontUnderline::SINGLE;
457                 break;
458             case HtmlTokenId::STRIKE_ON:
459                 aFont.Strikeout = css::awt::FontStrikeout::SINGLE;
460                 break;
461             default: break;
462         }
463         nToken = GetNextToken();
464     }
465     while (nToken != HtmlTokenId::TABLEROW_OFF);
466 
467     if ( !m_sCurrent.isEmpty() )
468         aColumnName = m_sCurrent;
469     aColumnName = comphelper::string::strip(aColumnName, ' ');
470     if(!aColumnName.isEmpty())
471         CreateDefaultColumn(aColumnName);
472 
473     if ( m_vDestVector.empty() )
474         return false;
475 
476     if(aTableName.isEmpty())
477         aTableName = aTempName;
478 
479     m_bInTbl        = false;
480     m_bFoundTable   = true;
481 
482     if ( isCheckEnabled() )
483         return true;
484 
485     return !executeWizard(aTableName,makeAny(nTextColor),aFont) && m_xTable.is();
486 }
487 
setTextEncoding()488 void OHTMLReader::setTextEncoding()
489 {
490     ParseMetaOptions(nullptr, nullptr);
491 }
492 
getTypeSelectionPageFactory()493 TypeSelectionPageFactory OHTMLReader::getTypeSelectionPageFactory()
494 {
495     return &OWizHTMLExtend::Create;
496 }
497 
498 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
499