1/*
2 * Copyright (c) 2014 Kurt Jung (Gmail: kurt.w.jung)
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17package gofpdf
18
19import (
20	"regexp"
21	"strings"
22)
23
24// HTMLBasicSegmentType defines a segment of literal text in which the current
25// attributes do not vary, or an open tag or a close tag.
26type HTMLBasicSegmentType struct {
27	Cat  byte              // 'O' open tag, 'C' close tag, 'T' text
28	Str  string            // Literal text unchanged, tags are lower case
29	Attr map[string]string // Attribute keys are lower case
30}
31
32// HTMLBasicTokenize returns a list of HTML tags and literal elements. This is
33// done with regular expressions, so the result is only marginally better than
34// useless.
35func HTMLBasicTokenize(htmlStr string) (list []HTMLBasicSegmentType) {
36	// This routine is adapted from http://www.fpdf.org/
37	list = make([]HTMLBasicSegmentType, 0, 16)
38	htmlStr = strings.Replace(htmlStr, "\n", " ", -1)
39	htmlStr = strings.Replace(htmlStr, "\r", "", -1)
40	tagRe, _ := regexp.Compile(`(?U)<.*>`)
41	attrRe, _ := regexp.Compile(`([^=]+)=["']?([^"']+)`)
42	capList := tagRe.FindAllStringIndex(htmlStr, -1)
43	if capList != nil {
44		var seg HTMLBasicSegmentType
45		var parts []string
46		pos := 0
47		for _, cap := range capList {
48			if pos < cap[0] {
49				seg.Cat = 'T'
50				seg.Str = htmlStr[pos:cap[0]]
51				seg.Attr = nil
52				list = append(list, seg)
53			}
54			if htmlStr[cap[0]+1] == '/' {
55				seg.Cat = 'C'
56				seg.Str = strings.ToLower(htmlStr[cap[0]+2 : cap[1]-1])
57				seg.Attr = nil
58				list = append(list, seg)
59			} else {
60				// Extract attributes
61				parts = strings.Split(htmlStr[cap[0]+1:cap[1]-1], " ")
62				if len(parts) > 0 {
63					for j, part := range parts {
64						if j == 0 {
65							seg.Cat = 'O'
66							seg.Str = strings.ToLower(parts[0])
67							seg.Attr = make(map[string]string)
68						} else {
69							attrList := attrRe.FindAllStringSubmatch(part, -1)
70							if attrList != nil {
71								for _, attr := range attrList {
72									seg.Attr[strings.ToLower(attr[1])] = attr[2]
73								}
74							}
75						}
76					}
77					list = append(list, seg)
78				}
79			}
80			pos = cap[1]
81		}
82		if len(htmlStr) > pos {
83			seg.Cat = 'T'
84			seg.Str = htmlStr[pos:]
85			seg.Attr = nil
86			list = append(list, seg)
87		}
88	} else {
89		list = append(list, HTMLBasicSegmentType{Cat: 'T', Str: htmlStr, Attr: nil})
90	}
91	return
92}
93
94// HTMLBasicType is used for rendering a very basic subset of HTML. It supports
95// only hyperlinks and bold, italic and underscore attributes. In the Link
96// structure, the ClrR, ClrG and ClrB fields (0 through 255) define the color
97// of hyperlinks. The Bold, Italic and Underscore values define the hyperlink
98// style.
99type HTMLBasicType struct {
100	pdf  *Fpdf
101	Link struct {
102		ClrR, ClrG, ClrB         int
103		Bold, Italic, Underscore bool
104	}
105}
106
107// HTMLBasicNew returns an instance that facilitates writing basic HTML in the
108// specified PDF file.
109func (f *Fpdf) HTMLBasicNew() (html HTMLBasicType) {
110	html.pdf = f
111	html.Link.ClrR, html.Link.ClrG, html.Link.ClrB = 0, 0, 128
112	html.Link.Bold, html.Link.Italic, html.Link.Underscore = false, false, true
113	return
114}
115
116// Write prints text from the current position using the currently selected
117// font. See HTMLBasicNew() to create a receiver that is associated with the
118// PDF document instance. The text can be encoded with a basic subset of HTML
119// that includes hyperlinks and tags for italic (I), bold (B), underscore
120// (U) and center (CENTER) attributes. When the right margin is reached a line
121// break occurs and text continues from the left margin. Upon method exit, the
122// current position is left at the end of the text.
123//
124// lineHt indicates the line height in the unit of measure specified in New().
125func (html *HTMLBasicType) Write(lineHt float64, htmlStr string) {
126	var boldLvl, italicLvl, underscoreLvl, linkBold, linkItalic, linkUnderscore int
127	var textR, textG, textB = html.pdf.GetTextColor()
128	var hrefStr string
129	if html.Link.Bold {
130		linkBold = 1
131	}
132	if html.Link.Italic {
133		linkItalic = 1
134	}
135	if html.Link.Underscore {
136		linkUnderscore = 1
137	}
138	setStyle := func(boldAdj, italicAdj, underscoreAdj int) {
139		styleStr := ""
140		boldLvl += boldAdj
141		if boldLvl > 0 {
142			styleStr += "B"
143		}
144		italicLvl += italicAdj
145		if italicLvl > 0 {
146			styleStr += "I"
147		}
148		underscoreLvl += underscoreAdj
149		if underscoreLvl > 0 {
150			styleStr += "U"
151		}
152		html.pdf.SetFont("", styleStr, 0)
153	}
154	putLink := func(urlStr, txtStr string) {
155		// Put a hyperlink
156		html.pdf.SetTextColor(html.Link.ClrR, html.Link.ClrG, html.Link.ClrB)
157		setStyle(linkBold, linkItalic, linkUnderscore)
158		html.pdf.WriteLinkString(lineHt, txtStr, urlStr)
159		setStyle(-linkBold, -linkItalic, -linkUnderscore)
160		html.pdf.SetTextColor(textR, textG, textB)
161	}
162	list := HTMLBasicTokenize(htmlStr)
163	var ok bool
164	alignStr := "L"
165	for _, el := range list {
166		switch el.Cat {
167		case 'T':
168			if len(hrefStr) > 0 {
169				putLink(hrefStr, el.Str)
170				hrefStr = ""
171			} else {
172				if alignStr == "C" || alignStr == "R" {
173					html.pdf.WriteAligned(0, lineHt, el.Str, alignStr)
174				} else {
175					html.pdf.Write(lineHt, el.Str)
176				}
177			}
178		case 'O':
179			switch el.Str {
180			case "b":
181				setStyle(1, 0, 0)
182			case "i":
183				setStyle(0, 1, 0)
184			case "u":
185				setStyle(0, 0, 1)
186			case "br":
187				html.pdf.Ln(lineHt)
188			case "center":
189				html.pdf.Ln(lineHt)
190				alignStr = "C"
191			case "right":
192				html.pdf.Ln(lineHt)
193				alignStr = "R"
194			case "left":
195				html.pdf.Ln(lineHt)
196				alignStr = "L"
197			case "a":
198				hrefStr, ok = el.Attr["href"]
199				if !ok {
200					hrefStr = ""
201				}
202			}
203		case 'C':
204			switch el.Str {
205			case "b":
206				setStyle(-1, 0, 0)
207			case "i":
208				setStyle(0, -1, 0)
209			case "u":
210				setStyle(0, 0, -1)
211			case "center":
212				html.pdf.Ln(lineHt)
213				alignStr = "L"
214			case "right":
215				html.pdf.Ln(lineHt)
216				alignStr = "L"
217			}
218		}
219	}
220}
221