1 /////////////////////////////////////////////////////////////////////////////
2 // Name:        src/html/htmltag.cpp
3 // Purpose:     wxHtmlTag class (represents single tag)
4 // Author:      Vaclav Slavik
5 // RCS-ID:      $Id: htmltag.cpp 53433 2008-05-03 00:40:29Z VZ $
6 // Copyright:   (c) 1999 Vaclav Slavik
7 // Licence:     wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9 
10 #include "wx/wxprec.h"
11 
12 #ifdef __BORLANDC__
13     #pragma hdrstop
14 #endif
15 
16 #if wxUSE_HTML
17 
18 #include "wx/html/htmltag.h"
19 
20 #ifndef WXPRECOMP
21     #include "wx/colour.h"
22 #endif
23 
24 #include "wx/html/htmlpars.h"
25 #include <stdio.h> // for vsscanf
26 #include <stdarg.h>
27 
28 
29 //-----------------------------------------------------------------------------
30 // wxHtmlTagsCache
31 //-----------------------------------------------------------------------------
32 
33 struct wxHtmlCacheItem
34 {
35     // this is "pos" value passed to wxHtmlTag's constructor.
36     // it is position of '<' character of the tag
37     int Key;
38 
39     // end positions for the tag:
40     // end1 is '<' of ending tag,
41     // end2 is '>' or both are
42     // -1 if there is no ending tag for this one...
43     // or -2 if this is ending tag  </...>
44     int End1, End2;
45 
46     // name of this tag
47     wxChar *Name;
48 };
49 
50 
IMPLEMENT_CLASS(wxHtmlTagsCache,wxObject)51 IMPLEMENT_CLASS(wxHtmlTagsCache,wxObject)
52 
53 #define CACHE_INCREMENT  64
54 
55 bool wxIsCDATAElement(const wxChar *tag)
56 {
57     return (wxStrcmp(tag, _T("SCRIPT")) == 0) ||
58            (wxStrcmp(tag, _T("STYLE")) == 0);
59 }
60 
wxHtmlTagsCache(const wxString & source)61 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
62 {
63     const wxChar *src = source.c_str();
64     int lng = source.length();
65     wxChar tagBuffer[256];
66 
67     m_Cache = NULL;
68     m_CacheSize = 0;
69     m_CachePos = 0;
70 
71     int pos = 0;
72     while (pos < lng)
73     {
74         if (src[pos] == wxT('<'))   // tag found:
75         {
76             if (m_CacheSize % CACHE_INCREMENT == 0)
77                 m_Cache = (wxHtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wxHtmlCacheItem));
78             int tg = m_CacheSize++;
79             int stpos = pos++;
80             m_Cache[tg].Key = stpos;
81 
82             int i;
83             for ( i = 0;
84                   pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 &&
85                   src[pos] != wxT('>') && !wxIsspace(src[pos]);
86                   i++, pos++ )
87             {
88                 tagBuffer[i] = (wxChar)wxToupper(src[pos]);
89             }
90             tagBuffer[i] = _T('\0');
91 
92             m_Cache[tg].Name = new wxChar[i+1];
93             memcpy(m_Cache[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
94 
95             while (pos < lng && src[pos] != wxT('>')) pos++;
96 
97             if (src[stpos+1] == wxT('/')) // ending tag:
98             {
99                 m_Cache[tg].End1 = m_Cache[tg].End2 = -2;
100                 // find matching begin tag:
101                 for (i = tg; i >= 0; i--)
102                     if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, tagBuffer+1) == 0))
103                     {
104                         m_Cache[i].End1 = stpos;
105                         m_Cache[i].End2 = pos + 1;
106                         break;
107                     }
108             }
109             else
110             {
111                 m_Cache[tg].End1 = m_Cache[tg].End2 = -1;
112 
113                 if (wxIsCDATAElement(tagBuffer))
114                 {
115                     // store the orig pos in case we are missing the closing
116                     // tag (see below)
117                     wxInt32 old_pos = pos;
118                     bool foundCloseTag = false;
119 
120                     // find next matching tag
121                     int tag_len = wxStrlen(tagBuffer);
122                     while (pos < lng)
123                     {
124                         // find the ending tag
125                         while (pos + 1 < lng &&
126                                (src[pos] != '<' || src[pos+1] != '/'))
127                             ++pos;
128                         if (src[pos] == '<')
129                             ++pos;
130 
131                         // see if it matches
132                         int match_pos = 0;
133                         while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
134                             // cast to wxChar needed to suppress warning in
135                             // Unicode build
136                             if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) {
137                                 ++match_pos;
138                             }
139                             else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') ||
140                                 src[pos] == wxT('\r') || src[pos] == wxT('\t')) {
141                                 // need to skip over these
142                             }
143                             else {
144                                 match_pos = 0;
145                             }
146                             ++pos;
147                         }
148 
149                         // found a match
150                         if (match_pos == tag_len)
151                         {
152                             pos = pos - tag_len - 3;
153                             foundCloseTag = true;
154                             break;
155                         }
156                         else // keep looking for the closing tag
157                         {
158                             ++pos;
159                         }
160                     }
161                     if (!foundCloseTag)
162                     {
163                         // we didn't find closing tag; this means the markup
164                         // is incorrect and the best thing we can do is to
165                         // ignore the unclosed tag and continue parsing as if
166                         // it didn't exist:
167                         pos = old_pos;
168                     }
169                 }
170             }
171         }
172 
173         pos++;
174     }
175 
176     // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
177     for (int i = 0; i < m_CacheSize; i++)
178     {
179         delete[] m_Cache[i].Name;
180         m_Cache[i].Name = NULL;
181     }
182 }
183 
QueryTag(int at,int * end1,int * end2)184 void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
185 {
186     if (m_Cache == NULL) return;
187     if (m_Cache[m_CachePos].Key != at)
188     {
189         int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
190         do
191         {
192             if ( m_CachePos < 0 || m_CachePos == m_CacheSize )
193             {
194                 // something is very wrong with HTML, give up by returning an
195                 // impossibly large value which is going to be ignored by the
196                 // caller
197                 *end1 =
198                 *end2 = INT_MAX;
199                 return;
200             }
201 
202             m_CachePos += delta;
203         }
204         while (m_Cache[m_CachePos].Key != at);
205     }
206     *end1 = m_Cache[m_CachePos].End1;
207     *end2 = m_Cache[m_CachePos].End2;
208 }
209 
210 
211 
212 
213 //-----------------------------------------------------------------------------
214 // wxHtmlTag
215 //-----------------------------------------------------------------------------
216 
IMPLEMENT_CLASS(wxHtmlTag,wxObject)217 IMPLEMENT_CLASS(wxHtmlTag,wxObject)
218 
219 wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
220                      const wxString& source, int pos, int end_pos,
221                      wxHtmlTagsCache *cache,
222                      wxHtmlEntitiesParser *entParser) : wxObject()
223 {
224     /* Setup DOM relations */
225 
226     m_Next = NULL;
227     m_FirstChild = m_LastChild = NULL;
228     m_Parent = parent;
229     if (parent)
230     {
231         m_Prev = m_Parent->m_LastChild;
232         if (m_Prev == NULL)
233             m_Parent->m_FirstChild = this;
234         else
235             m_Prev->m_Next = this;
236         m_Parent->m_LastChild = this;
237     }
238     else
239         m_Prev = NULL;
240 
241     /* Find parameters and their values: */
242 
243     int i;
244     wxChar c;
245 
246     // fill-in name, params and begin pos:
247     i = pos+1;
248 
249     // find tag's name and convert it to uppercase:
250     while ((i < end_pos) &&
251            ((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
252              c != wxT('\n') && c != wxT('\t') &&
253              c != wxT('>')))
254     {
255         if ((c >= wxT('a')) && (c <= wxT('z')))
256             c -= (wxT('a') - wxT('A'));
257         m_Name << c;
258     }
259 
260     // if the tag has parameters, read them and "normalize" them,
261     // i.e. convert to uppercase, replace whitespaces by spaces and
262     // remove whitespaces around '=':
263     if (source[i-1] != wxT('>'))
264     {
265         #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
266                              c == wxT('\n') || c == wxT('\t'))
267         wxString pname, pvalue;
268         wxChar quote;
269         enum
270         {
271             ST_BEFORE_NAME = 1,
272             ST_NAME,
273             ST_BEFORE_EQ,
274             ST_BEFORE_VALUE,
275             ST_VALUE
276         } state;
277 
278         quote = 0;
279         state = ST_BEFORE_NAME;
280         while (i < end_pos)
281         {
282             c = source[i++];
283 
284             if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
285             {
286                 if (state == ST_BEFORE_EQ || state == ST_NAME)
287                 {
288                     m_ParamNames.Add(pname);
289                     m_ParamValues.Add(wxEmptyString);
290                 }
291                 else if (state == ST_VALUE && quote == 0)
292                 {
293                     m_ParamNames.Add(pname);
294                     if (entParser)
295                         m_ParamValues.Add(entParser->Parse(pvalue));
296                     else
297                         m_ParamValues.Add(pvalue);
298                 }
299                 break;
300             }
301             switch (state)
302             {
303                 case ST_BEFORE_NAME:
304                     if (!IS_WHITE(c))
305                     {
306                         pname = c;
307                         state = ST_NAME;
308                     }
309                     break;
310                 case ST_NAME:
311                     if (IS_WHITE(c))
312                         state = ST_BEFORE_EQ;
313                     else if (c == wxT('='))
314                         state = ST_BEFORE_VALUE;
315                     else
316                         pname << c;
317                     break;
318                 case ST_BEFORE_EQ:
319                     if (c == wxT('='))
320                         state = ST_BEFORE_VALUE;
321                     else if (!IS_WHITE(c))
322                     {
323                         m_ParamNames.Add(pname);
324                         m_ParamValues.Add(wxEmptyString);
325                         pname = c;
326                         state = ST_NAME;
327                     }
328                     break;
329                 case ST_BEFORE_VALUE:
330                     if (!IS_WHITE(c))
331                     {
332                         if (c == wxT('"') || c == wxT('\''))
333                             quote = c, pvalue = wxEmptyString;
334                         else
335                             quote = 0, pvalue = c;
336                         state = ST_VALUE;
337                     }
338                     break;
339                 case ST_VALUE:
340                     if ((quote != 0 && c == quote) ||
341                         (quote == 0 && IS_WHITE(c)))
342                     {
343                         m_ParamNames.Add(pname);
344                         if (quote == 0)
345                         {
346                             // VS: backward compatibility, no real reason,
347                             //     but wxHTML code relies on this... :(
348                             pvalue.MakeUpper();
349                         }
350                         if (entParser)
351                             m_ParamValues.Add(entParser->Parse(pvalue));
352                         else
353                             m_ParamValues.Add(pvalue);
354                         state = ST_BEFORE_NAME;
355                     }
356                     else
357                         pvalue << c;
358                     break;
359             }
360         }
361 
362         #undef IS_WHITE
363     }
364     m_Begin = i;
365 
366     cache->QueryTag(pos, &m_End1, &m_End2);
367     if (m_End1 > end_pos) m_End1 = end_pos;
368     if (m_End2 > end_pos) m_End2 = end_pos;
369 }
370 
~wxHtmlTag()371 wxHtmlTag::~wxHtmlTag()
372 {
373     wxHtmlTag *t1, *t2;
374     t1 = m_FirstChild;
375     while (t1)
376     {
377         t2 = t1->GetNextSibling();
378         delete t1;
379         t1 = t2;
380     }
381 }
382 
HasParam(const wxString & par) const383 bool wxHtmlTag::HasParam(const wxString& par) const
384 {
385     return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
386 }
387 
GetParam(const wxString & par,bool with_commas) const388 wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const
389 {
390     int index = m_ParamNames.Index(par, false);
391     if (index == wxNOT_FOUND)
392         return wxEmptyString;
393     if (with_commas)
394     {
395         // VS: backward compatibility, seems to be never used by wxHTML...
396         wxString s;
397         s << wxT('"') << m_ParamValues[index] << wxT('"');
398         return s;
399     }
400     else
401         return m_ParamValues[index];
402 }
403 
ScanParam(const wxString & par,const wxChar * format,void * param) const404 int wxHtmlTag::ScanParam(const wxString& par,
405                          const wxChar *format,
406                          void *param) const
407 {
408     wxString parval = GetParam(par);
409     return wxSscanf(parval, format, param);
410 }
411 
GetParamAsColour(const wxString & par,wxColour * clr) const412 bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
413 {
414     wxCHECK_MSG( clr, false, _T("invalid colour argument") );
415 
416     wxString str = GetParam(par);
417 
418     // handle colours defined in HTML 4.0 first:
419     if (str.length() > 1 && str[0] != _T('#'))
420     {
421         #define HTML_COLOUR(name, r, g, b)              \
422             if (str.IsSameAs(wxT(name), false))         \
423                 { clr->Set(r, g, b); return true; }
424         HTML_COLOUR("black",   0x00,0x00,0x00)
425         HTML_COLOUR("silver",  0xC0,0xC0,0xC0)
426         HTML_COLOUR("gray",    0x80,0x80,0x80)
427         HTML_COLOUR("white",   0xFF,0xFF,0xFF)
428         HTML_COLOUR("maroon",  0x80,0x00,0x00)
429         HTML_COLOUR("red",     0xFF,0x00,0x00)
430         HTML_COLOUR("purple",  0x80,0x00,0x80)
431         HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
432         HTML_COLOUR("green",   0x00,0x80,0x00)
433         HTML_COLOUR("lime",    0x00,0xFF,0x00)
434         HTML_COLOUR("olive",   0x80,0x80,0x00)
435         HTML_COLOUR("yellow",  0xFF,0xFF,0x00)
436         HTML_COLOUR("navy",    0x00,0x00,0x80)
437         HTML_COLOUR("blue",    0x00,0x00,0xFF)
438         HTML_COLOUR("teal",    0x00,0x80,0x80)
439         HTML_COLOUR("aqua",    0x00,0xFF,0xFF)
440         #undef HTML_COLOUR
441     }
442 
443     // then try to parse #rrggbb representations or set from other well
444     // known names (note that this doesn't strictly conform to HTML spec,
445     // but it doesn't do real harm -- but it *must* be done after the standard
446     // colors are handled above):
447     if (clr->Set(str))
448         return true;
449 
450     return false;
451 }
452 
GetParamAsInt(const wxString & par,int * clr) const453 bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
454 {
455     if ( !HasParam(par) )
456         return false;
457 
458     long i;
459     if ( !GetParam(par).ToLong(&i) )
460         return false;
461 
462     *clr = (int)i;
463     return true;
464 }
465 
GetAllParams() const466 wxString wxHtmlTag::GetAllParams() const
467 {
468     // VS: this function is for backward compatibility only,
469     //     never used by wxHTML
470     wxString s;
471     size_t cnt = m_ParamNames.GetCount();
472     for (size_t i = 0; i < cnt; i++)
473     {
474         s << m_ParamNames[i];
475         s << wxT('=');
476         if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
477             s << wxT('\'') << m_ParamValues[i] << wxT('\'');
478         else
479             s << wxT('"') << m_ParamValues[i] << wxT('"');
480     }
481     return s;
482 }
483 
GetFirstSibling() const484 wxHtmlTag *wxHtmlTag::GetFirstSibling() const
485 {
486     if (m_Parent)
487         return m_Parent->m_FirstChild;
488     else
489     {
490         wxHtmlTag *cur = (wxHtmlTag*)this;
491         while (cur->m_Prev)
492             cur = cur->m_Prev;
493         return cur;
494     }
495 }
496 
GetLastSibling() const497 wxHtmlTag *wxHtmlTag::GetLastSibling() const
498 {
499     if (m_Parent)
500         return m_Parent->m_LastChild;
501     else
502     {
503         wxHtmlTag *cur = (wxHtmlTag*)this;
504         while (cur->m_Next)
505             cur = cur->m_Next;
506         return cur;
507     }
508 }
509 
GetNextTag() const510 wxHtmlTag *wxHtmlTag::GetNextTag() const
511 {
512     if (m_FirstChild) return m_FirstChild;
513     if (m_Next) return m_Next;
514     wxHtmlTag *cur = m_Parent;
515     if (!cur) return NULL;
516     while (cur->m_Parent && !cur->m_Next)
517         cur = cur->m_Parent;
518     return cur->m_Next;
519 }
520 
521 #endif
522