1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include <HtmlReader.hxx>
21 #include <connectivity/dbconversion.hxx>
22 #include <connectivity/dbtools.hxx>
23 #include <toolkit/helper/vclunohelper.hxx>
24 #include <tools/stream.hxx>
25 #include <tools/tenccvt.hxx>
26 #include <comphelper/string.hxx>
27 #include <strings.hrc>
28 #include <stringconstants.hxx>
29 #include <sfx2/sfxhtml.hxx>
30 #include <osl/diagnose.h>
31 #include <core_resource.hxx>
32 #include <com/sun/star/sdbcx/XDataDescriptorFactory.hpp>
33 #include <com/sun/star/sdbcx/XColumnsSupplier.hpp>
34 #include <com/sun/star/sdbcx/XAppend.hpp>
35 #include <com/sun/star/sdbc/DataType.hpp>
36 #include <com/sun/star/sdbc/ColumnValue.hpp>
37 #include <com/sun/star/awt/FontDescriptor.hpp>
38 #include <com/sun/star/awt/FontWeight.hpp>
39 #include <com/sun/star/awt/FontStrikeout.hpp>
40 #include <com/sun/star/awt/FontSlant.hpp>
41 #include <com/sun/star/awt/FontUnderline.hpp>
42 #include <com/sun/star/util/NumberFormat.hpp>
43 #include <com/sun/star/util/XNumberFormatTypes.hpp>
44 #include <svtools/htmltokn.h>
45 #include <svtools/htmlkywd.hxx>
46 #include <tools/color.hxx>
47 #include <WCopyTable.hxx>
48 #include <WExtendPages.hxx>
49 #include <WNameMatch.hxx>
50 #include <WColumnSelect.hxx>
51 #include <QEnumTypes.hxx>
52 #include <WCPage.hxx>
53 #include <rtl/tencinfo.h>
54 #include <UITools.hxx>
55 #include <vcl/svapp.hxx>
56 #include <vcl/settings.hxx>
57
58 using namespace dbaui;
59 using namespace ::com::sun::star::uno;
60 using namespace ::com::sun::star::beans;
61 using namespace ::com::sun::star::container;
62 using namespace ::com::sun::star::sdbc;
63 using namespace ::com::sun::star::sdbcx;
64 using namespace ::com::sun::star::awt;
65
66 #define DBAUI_HTML_FONTSIZES 8 // like export, HTML-Options
67
68 // OHTMLReader
OHTMLReader(SvStream & rIn,const SharedConnection & _rxConnection,const Reference<css::util::XNumberFormatter> & _rxNumberF,const css::uno::Reference<css::uno::XComponentContext> & _rxContext)69 OHTMLReader::OHTMLReader(SvStream& rIn,const SharedConnection& _rxConnection,
70 const Reference< css::util::XNumberFormatter >& _rxNumberF,
71 const css::uno::Reference< css::uno::XComponentContext >& _rxContext)
72 : HTMLParser(rIn)
73 , ODatabaseExport( _rxConnection, _rxNumberF, _rxContext, rIn )
74 , m_nTableCount(0)
75 , m_nColumnWidth(87)
76 {
77 SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1 ) );
78 // If the file starts with a BOM, switch to UCS2.
79 SetSwitchToUCS2( true );
80 }
81
OHTMLReader(SvStream & rIn,sal_Int32 nRows,const TPositions & _rColumnPositions,const Reference<css::util::XNumberFormatter> & _rxNumberF,const css::uno::Reference<css::uno::XComponentContext> & _rxContext,const TColumnVector * pList,const OTypeInfoMap * _pInfoMap,bool _bAutoIncrementEnabled)82 OHTMLReader::OHTMLReader(SvStream& rIn,
83 sal_Int32 nRows,
84 const TPositions &_rColumnPositions,
85 const Reference< css::util::XNumberFormatter >& _rxNumberF,
86 const css::uno::Reference< css::uno::XComponentContext >& _rxContext,
87 const TColumnVector* pList,
88 const OTypeInfoMap* _pInfoMap,
89 bool _bAutoIncrementEnabled)
90 : HTMLParser(rIn)
91 , ODatabaseExport( nRows, _rColumnPositions, _rxNumberF, _rxContext, pList, _pInfoMap, _bAutoIncrementEnabled, rIn )
92 , m_nTableCount(0)
93 , m_nColumnWidth(87)
94 {
95 SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1 ) );
96 // If the file starts with a BOM, switch to UCS2.
97 SetSwitchToUCS2( true );
98 }
99
~OHTMLReader()100 OHTMLReader::~OHTMLReader()
101 {
102 }
103
CallParser()104 SvParserState OHTMLReader::CallParser()
105 {
106 rInput.Seek(STREAM_SEEK_TO_BEGIN);
107 rInput.ResetError();
108 SvParserState eParseState = HTMLParser::CallParser();
109 SetColumnTypes(m_pColumnList,m_pInfoMap);
110 return m_bFoundTable ? eParseState : SvParserState::Error;
111 }
112
113 #if defined _MSC_VER
114 #pragma warning(disable: 4702) // unreachable code, bug in MSVC2015
115 #endif
NextToken(HtmlTokenId nToken)116 void OHTMLReader::NextToken( HtmlTokenId nToken )
117 {
118 if(m_bError || !m_nRows) // if there is an error or no more rows to check, return immediately
119 return;
120 if ( nToken == HtmlTokenId::META )
121 setTextEncoding();
122
123 if(m_xConnection.is()) // names, which CTOR was called and hence, if a table should be created
124 {
125 switch(nToken)
126 {
127 case HtmlTokenId::TABLE_ON:
128 ++m_nTableCount;
129 { // can also be TD or TH, if there was no TABLE before
130 const HTMLOptions& rHtmlOptions = GetOptions();
131 for (const auto & rOption : rHtmlOptions)
132 {
133 if( rOption.GetToken() == HtmlOptionId::WIDTH )
134 { // percentage: of document width respectively outer cell
135 m_nColumnWidth = GetWidthPixel( rOption );
136 }
137 }
138 }
139 [[fallthrough]];
140 case HtmlTokenId::THEAD_ON:
141 case HtmlTokenId::TBODY_ON:
142 {
143 sal_uInt64 const nTell = rInput.Tell(); // perhaps alters position of the stream
144 if ( !m_xTable.is() )
145 {// use first line as header
146 m_bError = !CreateTable(nToken);
147 if ( m_bAppendFirstLine )
148 rInput.Seek(nTell);
149 }
150 }
151 break;
152 case HtmlTokenId::TABLE_OFF:
153 if(!--m_nTableCount)
154 {
155 m_xTable = nullptr;
156 }
157 break;
158 case HtmlTokenId::TABLEROW_ON:
159 if ( !m_pUpdateHelper.get() )
160 m_bError = true;
161 break;
162 case HtmlTokenId::TEXTTOKEN:
163 case HtmlTokenId::SINGLECHAR:
164 if ( m_bInTbl ) //&& !m_bSDNum ) // important, as otherwise we also get the names of the fonts
165 m_sTextToken += aToken;
166 break;
167 case HtmlTokenId::PARABREAK_OFF:
168 m_sCurrent += m_sTextToken;
169 break;
170 case HtmlTokenId::PARABREAK_ON:
171 m_sTextToken.clear();
172 break;
173 case HtmlTokenId::TABLEDATA_ON:
174 fetchOptions();
175 break;
176 case HtmlTokenId::TABLEDATA_OFF:
177 {
178 if ( !m_sCurrent.isEmpty() )
179 m_sTextToken = m_sCurrent;
180 try
181 {
182 insertValueIntoColumn();
183 }
184 catch(SQLException& e)
185 // handling update failure
186 {
187 showErrorDialog(e);
188 }
189 m_sCurrent.clear();
190 m_nColumnPos++;
191 eraseTokens();
192 m_bInTbl = false;
193 }
194 break;
195 case HtmlTokenId::TABLEROW_OFF:
196 if ( !m_pUpdateHelper.get() )
197 {
198 m_bError = true;
199 break;
200 }
201 try
202 {
203 m_nRowCount++;
204 if (m_bIsAutoIncrement) // if bSetAutoIncrement then I have to set the autoincrement
205 m_pUpdateHelper->updateInt(1,m_nRowCount);
206 m_pUpdateHelper->insertRow();
207 }
208 catch(SQLException& e)
209 // handling update failure
210 {
211 showErrorDialog(e);
212 }
213 m_nColumnPos = 0;
214 break;
215 default: break;
216 }
217 }
218 else // branch only valid for type checking
219 {
220 switch(nToken)
221 {
222 case HtmlTokenId::THEAD_ON:
223 case HtmlTokenId::TBODY_ON:
224 // The head of the column is not included
225 if(m_bHead)
226 {
227 do
228 {}
229 while(GetNextToken() != HtmlTokenId::TABLEROW_OFF);
230 m_bHead = false;
231 }
232 break;
233 case HtmlTokenId::TABLEDATA_ON:
234 case HtmlTokenId::TABLEHEADER_ON:
235 fetchOptions();
236 break;
237 case HtmlTokenId::TEXTTOKEN:
238 case HtmlTokenId::SINGLECHAR:
239 if ( m_bInTbl ) // && !m_bSDNum ) // important, as otherwise we also get the names of the fonts
240 m_sTextToken += aToken;
241 break;
242 case HtmlTokenId::PARABREAK_OFF:
243 m_sCurrent += m_sTextToken;
244 break;
245 case HtmlTokenId::PARABREAK_ON:
246 m_sTextToken.clear();
247 break;
248 case HtmlTokenId::TABLEDATA_OFF:
249 if ( !m_sCurrent.isEmpty() )
250 m_sTextToken = m_sCurrent;
251 adjustFormat();
252 m_nColumnPos++;
253 m_bInTbl = false;
254 m_sCurrent.clear();
255 break;
256 case HtmlTokenId::TABLEROW_OFF:
257 if ( !m_sCurrent.isEmpty() )
258 m_sTextToken = m_sCurrent;
259 adjustFormat();
260 m_nColumnPos = 0;
261 m_nRows--;
262 m_sCurrent.clear();
263 break;
264 default: break;
265 }
266 }
267 }
268
fetchOptions()269 void OHTMLReader::fetchOptions()
270 {
271 m_bInTbl = true;
272 const HTMLOptions& options = GetOptions();
273 for (const auto & rOption : options)
274 {
275 switch( rOption.GetToken() )
276 {
277 case HtmlOptionId::SDNUM:
278 m_sNumToken = rOption.GetString();
279 break;
280 default: break;
281 }
282 }
283 }
284
TableDataOn(SvxCellHorJustify & eVal)285 void OHTMLReader::TableDataOn(SvxCellHorJustify& eVal)
286 {
287 const HTMLOptions& rHtmlOptions = GetOptions();
288 for (const auto & rOption : rHtmlOptions)
289 {
290 switch( rOption.GetToken() )
291 {
292 case HtmlOptionId::ALIGN:
293 {
294 const OUString& rOptVal = rOption.GetString();
295 if (rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_right ))
296 eVal = SvxCellHorJustify::Right;
297 else if (rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_center ))
298 eVal = SvxCellHorJustify::Center;
299 else if (rOptVal.equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_AL_left ))
300 eVal = SvxCellHorJustify::Left;
301 else
302 eVal = SvxCellHorJustify::Standard;
303 }
304 break;
305 default: break;
306 }
307 }
308 }
309
TableFontOn(FontDescriptor & _rFont,Color & _rTextColor)310 void OHTMLReader::TableFontOn(FontDescriptor& _rFont, Color &_rTextColor)
311 {
312 const HTMLOptions& rHtmlOptions = GetOptions();
313 for (const auto & rOption : rHtmlOptions)
314 {
315 switch( rOption.GetToken() )
316 {
317 case HtmlOptionId::COLOR:
318 {
319 Color aColor;
320 rOption.GetColor( aColor );
321 _rTextColor = aColor.GetRGBColor();
322 }
323 break;
324 case HtmlOptionId::FACE :
325 {
326 const OUString& rFace = rOption.GetString();
327 OUStringBuffer aFontName;
328 sal_Int32 nPos = 0;
329 while( nPos != -1 )
330 {
331 // list of fonts, VCL: semicolon as separator, HTML: comma
332 OUString aFName = rFace.getToken( 0, ',', nPos );
333 aFName = comphelper::string::strip(aFName, ' ');
334 if( !aFontName.isEmpty() )
335 aFontName.append(";");
336 aFontName.append(aFName);
337 }
338 if ( !aFontName.isEmpty() )
339 _rFont.Name = aFontName.makeStringAndClear();
340 }
341 break;
342 case HtmlOptionId::SIZE :
343 {
344 sal_Int16 nSize = static_cast<sal_Int16>(rOption.GetNumber());
345 if ( nSize == 0 )
346 nSize = 1;
347 else if ( nSize < DBAUI_HTML_FONTSIZES )
348 nSize = DBAUI_HTML_FONTSIZES;
349
350 _rFont.Height = nSize;
351 }
352 break;
353 default: break;
354 }
355 }
356 }
357
GetWidthPixel(const HTMLOption & rOption)358 sal_Int16 OHTMLReader::GetWidthPixel( const HTMLOption& rOption )
359 {
360 const OUString& rOptVal = rOption.GetString();
361 if ( rOptVal.indexOf('%') != -1 )
362 { // percentage
363 OSL_ENSURE( m_nColumnWidth, "WIDTH Option: m_nColumnWidth==0 and Width%" );
364 return static_cast<sal_Int16>((rOption.GetNumber() * m_nColumnWidth) / 100);
365 }
366 else
367 {
368 if ( rOptVal.indexOf('*') != -1 )
369 { // relative to what?!?
370 //TODO: collect ColArray of all relevant values and then MakeCol
371 return 0;
372 }
373 else
374 return static_cast<sal_Int16>(rOption.GetNumber()); // pixel
375 }
376 }
377
CreateTable(HtmlTokenId nToken)378 bool OHTMLReader::CreateTable(HtmlTokenId nToken)
379 {
380 OUString aTempName(DBA_RES(STR_TBL_TITLE));
381 aTempName = aTempName.getToken(0,' ');
382 aTempName = ::dbtools::createUniqueName(m_xTables, aTempName);
383
384 bool bCaption = false;
385 bool bTableHeader = false;
386 OUString aColumnName;
387 SvxCellHorJustify eVal;
388
389 OUString aTableName;
390 FontDescriptor aFont = VCLUnoHelper::CreateFontDescriptor(Application::GetSettings().GetStyleSettings().GetAppFont());
391 Color nTextColor;
392 do
393 {
394 switch (nToken)
395 {
396 case HtmlTokenId::TEXTTOKEN:
397 case HtmlTokenId::SINGLECHAR:
398 if(bTableHeader)
399 aColumnName += aToken;
400 if(bCaption)
401 aTableName += aToken;
402 break;
403 case HtmlTokenId::PARABREAK_OFF:
404 m_sCurrent += aColumnName;
405 break;
406 case HtmlTokenId::PARABREAK_ON:
407 m_sTextToken.clear();
408 break;
409 case HtmlTokenId::TABLEDATA_ON:
410 case HtmlTokenId::TABLEHEADER_ON:
411 TableDataOn(eVal);
412 bTableHeader = true;
413 break;
414 case HtmlTokenId::TABLEDATA_OFF:
415 case HtmlTokenId::TABLEHEADER_OFF:
416 {
417 aColumnName = comphelper::string::strip(aColumnName, ' ' );
418 if (aColumnName.isEmpty() || m_bAppendFirstLine )
419 aColumnName = DBA_RES(STR_COLUMN_NAME);
420 else if ( !m_sCurrent.isEmpty() )
421 aColumnName = m_sCurrent;
422
423 aColumnName = comphelper::string::strip(aColumnName, ' ');
424 CreateDefaultColumn(aColumnName);
425 aColumnName.clear();
426 m_sCurrent.clear();
427
428 eVal = SvxCellHorJustify::Standard;
429 bTableHeader = false;
430 }
431 break;
432
433 case HtmlTokenId::TITLE_ON:
434 case HtmlTokenId::CAPTION_ON:
435 bCaption = true;
436 break;
437 case HtmlTokenId::TITLE_OFF:
438 case HtmlTokenId::CAPTION_OFF:
439 aTableName = comphelper::string::strip(aTableName, ' ');
440 if(aTableName.isEmpty())
441 aTableName = ::dbtools::createUniqueName(m_xTables, aTableName);
442 else
443 aTableName = aTempName;
444 bCaption = false;
445 break;
446 case HtmlTokenId::FONT_ON:
447 TableFontOn(aFont,nTextColor);
448 break;
449 case HtmlTokenId::BOLD_ON:
450 aFont.Weight = css::awt::FontWeight::BOLD;
451 break;
452 case HtmlTokenId::ITALIC_ON:
453 aFont.Slant = css::awt::FontSlant_ITALIC;
454 break;
455 case HtmlTokenId::UNDERLINE_ON:
456 aFont.Underline = css::awt::FontUnderline::SINGLE;
457 break;
458 case HtmlTokenId::STRIKE_ON:
459 aFont.Strikeout = css::awt::FontStrikeout::SINGLE;
460 break;
461 default: break;
462 }
463 nToken = GetNextToken();
464 }
465 while (nToken != HtmlTokenId::TABLEROW_OFF);
466
467 if ( !m_sCurrent.isEmpty() )
468 aColumnName = m_sCurrent;
469 aColumnName = comphelper::string::strip(aColumnName, ' ');
470 if(!aColumnName.isEmpty())
471 CreateDefaultColumn(aColumnName);
472
473 if ( m_vDestVector.empty() )
474 return false;
475
476 if(aTableName.isEmpty())
477 aTableName = aTempName;
478
479 m_bInTbl = false;
480 m_bFoundTable = true;
481
482 if ( isCheckEnabled() )
483 return true;
484
485 return !executeWizard(aTableName,makeAny(nTextColor),aFont) && m_xTable.is();
486 }
487
setTextEncoding()488 void OHTMLReader::setTextEncoding()
489 {
490 ParseMetaOptions(nullptr, nullptr);
491 }
492
getTypeSelectionPageFactory()493 TypeSelectionPageFactory OHTMLReader::getTypeSelectionPageFactory()
494 {
495 return &OWizHTMLExtend::Create;
496 }
497
498 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
499