1// Copyright 2015 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:generate go run gen.go gen_trieval.go gen_ranges.go 6 7// Package bidi contains functionality for bidirectional text support. 8// 9// See https://www.unicode.org/reports/tr9. 10// 11// NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways 12// and without notice. 13package bidi // import "golang.org/x/text/unicode/bidi" 14 15// TODO 16// - Transformer for reordering? 17// - Transformer (validator, really) for Bidi Rule. 18 19import ( 20 "bytes" 21) 22 23// This API tries to avoid dealing with embedding levels for now. Under the hood 24// these will be computed, but the question is to which extent the user should 25// know they exist. We should at some point allow the user to specify an 26// embedding hierarchy, though. 27 28// A Direction indicates the overall flow of text. 29type Direction int 30 31const ( 32 // LeftToRight indicates the text contains no right-to-left characters and 33 // that either there are some left-to-right characters or the option 34 // DefaultDirection(LeftToRight) was passed. 35 LeftToRight Direction = iota 36 37 // RightToLeft indicates the text contains no left-to-right characters and 38 // that either there are some right-to-left characters or the option 39 // DefaultDirection(RightToLeft) was passed. 40 RightToLeft 41 42 // Mixed indicates text contains both left-to-right and right-to-left 43 // characters. 44 Mixed 45 46 // Neutral means that text contains no left-to-right and right-to-left 47 // characters and that no default direction has been set. 48 Neutral 49) 50 51type options struct { 52 defaultDirection Direction 53} 54 55// An Option is an option for Bidi processing. 56type Option func(*options) 57 58// ICU allows the user to define embedding levels. This may be used, for example, 59// to use hierarchical structure of markup languages to define embeddings. 60// The following option may be a way to expose this functionality in this API. 61// // LevelFunc sets a function that associates nesting levels with the given text. 62// // The levels function will be called with monotonically increasing values for p. 63// func LevelFunc(levels func(p int) int) Option { 64// panic("unimplemented") 65// } 66 67// DefaultDirection sets the default direction for a Paragraph. The direction is 68// overridden if the text contains directional characters. 69func DefaultDirection(d Direction) Option { 70 return func(opts *options) { 71 opts.defaultDirection = d 72 } 73} 74 75// A Paragraph holds a single Paragraph for Bidi processing. 76type Paragraph struct { 77 p []byte 78 o Ordering 79 opts []Option 80 types []Class 81 pairTypes []bracketType 82 pairValues []rune 83 runes []rune 84 options options 85} 86 87// Initialize the p.pairTypes, p.pairValues and p.types from the input previously 88// set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph 89// separator (bidi class B). 90// 91// The function p.Order() needs these values to be set, so this preparation could be postponed. 92// But since the SetBytes and SetStrings functions return the length of the input up to the paragraph 93// separator, the whole input needs to be processed anyway and should not be done twice. 94// 95// The function has the same return values as SetBytes() / SetString() 96func (p *Paragraph) prepareInput() (n int, err error) { 97 p.runes = bytes.Runes(p.p) 98 bytecount := 0 99 // clear slices from previous SetString or SetBytes 100 p.pairTypes = nil 101 p.pairValues = nil 102 p.types = nil 103 104 for _, r := range p.runes { 105 props, i := LookupRune(r) 106 bytecount += i 107 cls := props.Class() 108 if cls == B { 109 return bytecount, nil 110 } 111 p.types = append(p.types, cls) 112 if props.IsOpeningBracket() { 113 p.pairTypes = append(p.pairTypes, bpOpen) 114 p.pairValues = append(p.pairValues, r) 115 } else if props.IsBracket() { 116 // this must be a closing bracket, 117 // since IsOpeningBracket is not true 118 p.pairTypes = append(p.pairTypes, bpClose) 119 p.pairValues = append(p.pairValues, r) 120 } else { 121 p.pairTypes = append(p.pairTypes, bpNone) 122 p.pairValues = append(p.pairValues, 0) 123 } 124 } 125 return bytecount, nil 126} 127 128// SetBytes configures p for the given paragraph text. It replaces text 129// previously set by SetBytes or SetString. If b contains a paragraph separator 130// it will only process the first paragraph and report the number of bytes 131// consumed from b including this separator. Error may be non-nil if options are 132// given. 133func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) { 134 p.p = b 135 p.opts = opts 136 return p.prepareInput() 137} 138 139// SetString configures s for the given paragraph text. It replaces text 140// previously set by SetBytes or SetString. If s contains a paragraph separator 141// it will only process the first paragraph and report the number of bytes 142// consumed from s including this separator. Error may be non-nil if options are 143// given. 144func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) { 145 p.p = []byte(s) 146 p.opts = opts 147 return p.prepareInput() 148} 149 150// IsLeftToRight reports whether the principle direction of rendering for this 151// paragraphs is left-to-right. If this returns false, the principle direction 152// of rendering is right-to-left. 153func (p *Paragraph) IsLeftToRight() bool { 154 return p.Direction() == LeftToRight 155} 156 157// Direction returns the direction of the text of this paragraph. 158// 159// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. 160func (p *Paragraph) Direction() Direction { 161 return p.o.Direction() 162} 163 164// TODO: what happens if the position is > len(input)? This should return an error. 165 166// RunAt reports the Run at the given position of the input text. 167// 168// This method can be used for computing line breaks on paragraphs. 169func (p *Paragraph) RunAt(pos int) Run { 170 c := 0 171 runNumber := 0 172 for i, r := range p.o.runes { 173 c += len(r) 174 if pos < c { 175 runNumber = i 176 } 177 } 178 return p.o.Run(runNumber) 179} 180 181func calculateOrdering(levels []level, runes []rune) Ordering { 182 var curDir Direction 183 184 prevDir := Neutral 185 prevI := 0 186 187 o := Ordering{} 188 // lvl = 0,2,4,...: left to right 189 // lvl = 1,3,5,...: right to left 190 for i, lvl := range levels { 191 if lvl%2 == 0 { 192 curDir = LeftToRight 193 } else { 194 curDir = RightToLeft 195 } 196 if curDir != prevDir { 197 if i > 0 { 198 o.runes = append(o.runes, runes[prevI:i]) 199 o.directions = append(o.directions, prevDir) 200 o.startpos = append(o.startpos, prevI) 201 } 202 prevI = i 203 prevDir = curDir 204 } 205 } 206 o.runes = append(o.runes, runes[prevI:]) 207 o.directions = append(o.directions, prevDir) 208 o.startpos = append(o.startpos, prevI) 209 return o 210} 211 212// Order computes the visual ordering of all the runs in a Paragraph. 213func (p *Paragraph) Order() (Ordering, error) { 214 if len(p.types) == 0 { 215 return Ordering{}, nil 216 } 217 218 for _, fn := range p.opts { 219 fn(&p.options) 220 } 221 lvl := level(-1) 222 if p.options.defaultDirection == RightToLeft { 223 lvl = 1 224 } 225 para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl) 226 if err != nil { 227 return Ordering{}, err 228 } 229 230 levels := para.getLevels([]int{len(p.types)}) 231 232 p.o = calculateOrdering(levels, p.runes) 233 return p.o, nil 234} 235 236// Line computes the visual ordering of runs for a single line starting and 237// ending at the given positions in the original text. 238func (p *Paragraph) Line(start, end int) (Ordering, error) { 239 lineTypes := p.types[start:end] 240 para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1) 241 if err != nil { 242 return Ordering{}, err 243 } 244 levels := para.getLevels([]int{len(lineTypes)}) 245 o := calculateOrdering(levels, p.runes[start:end]) 246 return o, nil 247} 248 249// An Ordering holds the computed visual order of runs of a Paragraph. Calling 250// SetBytes or SetString on the originating Paragraph invalidates an Ordering. 251// The methods of an Ordering should only be called by one goroutine at a time. 252type Ordering struct { 253 runes [][]rune 254 directions []Direction 255 startpos []int 256} 257 258// Direction reports the directionality of the runs. 259// 260// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. 261func (o *Ordering) Direction() Direction { 262 return o.directions[0] 263} 264 265// NumRuns returns the number of runs. 266func (o *Ordering) NumRuns() int { 267 return len(o.runes) 268} 269 270// Run returns the ith run within the ordering. 271func (o *Ordering) Run(i int) Run { 272 r := Run{ 273 runes: o.runes[i], 274 direction: o.directions[i], 275 startpos: o.startpos[i], 276 } 277 return r 278} 279 280// TODO: perhaps with options. 281// // Reorder creates a reader that reads the runes in visual order per character. 282// // Modifiers remain after the runes they modify. 283// func (l *Runs) Reorder() io.Reader { 284// panic("unimplemented") 285// } 286 287// A Run is a continuous sequence of characters of a single direction. 288type Run struct { 289 runes []rune 290 direction Direction 291 startpos int 292} 293 294// String returns the text of the run in its original order. 295func (r *Run) String() string { 296 return string(r.runes) 297} 298 299// Bytes returns the text of the run in its original order. 300func (r *Run) Bytes() []byte { 301 return []byte(r.String()) 302} 303 304// TODO: methods for 305// - Display order 306// - headers and footers 307// - bracket replacement. 308 309// Direction reports the direction of the run. 310func (r *Run) Direction() Direction { 311 return r.direction 312} 313 314// Pos returns the position of the Run within the text passed to SetBytes or SetString of the 315// originating Paragraph value. 316func (r *Run) Pos() (start, end int) { 317 return r.startpos, r.startpos + len(r.runes) - 1 318} 319 320// AppendReverse reverses the order of characters of in, appends them to out, 321// and returns the result. Modifiers will still follow the runes they modify. 322// Brackets are replaced with their counterparts. 323func AppendReverse(out, in []byte) []byte { 324 ret := make([]byte, len(in)+len(out)) 325 copy(ret, out) 326 inRunes := bytes.Runes(in) 327 328 for i, r := range inRunes { 329 prop, _ := LookupRune(r) 330 if prop.IsBracket() { 331 inRunes[i] = prop.reverseBracket(r) 332 } 333 } 334 335 for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 { 336 inRunes[i], inRunes[j] = inRunes[j], inRunes[i] 337 } 338 copy(ret[len(out):], string(inRunes)) 339 340 return ret 341} 342 343// ReverseString reverses the order of characters in s and returns a new string. 344// Modifiers will still follow the runes they modify. Brackets are replaced with 345// their counterparts. 346func ReverseString(s string) string { 347 input := []rune(s) 348 li := len(input) 349 ret := make([]rune, li) 350 for i, r := range input { 351 prop, _ := LookupRune(r) 352 if prop.IsBracket() { 353 ret[li-i-1] = prop.reverseBracket(r) 354 } else { 355 ret[li-i-1] = r 356 } 357 } 358 return string(ret) 359} 360