1// Copyright 2014 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:generate go run gen.go gen_trieval.go 6 7// Package cases provides general and language-specific case mappers. 8package cases // import "golang.org/x/text/cases" 9 10import ( 11 "golang.org/x/text/language" 12 "golang.org/x/text/transform" 13) 14 15// References: 16// - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18. 17// - https://www.unicode.org/reports/tr29/ 18// - https://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt 19// - https://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt 20// - https://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt 21// - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt 22// - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt 23// - http://userguide.icu-project.org/transforms/casemappings 24 25// TODO: 26// - Case folding 27// - Wide and Narrow? 28// - Segmenter option for title casing. 29// - ASCII fast paths 30// - Encode Soft-Dotted property within trie somehow. 31 32// A Caser transforms given input to a certain case. It implements 33// transform.Transformer. 34// 35// A Caser may be stateful and should therefore not be shared between 36// goroutines. 37type Caser struct { 38 t transform.SpanningTransformer 39} 40 41// Bytes returns a new byte slice with the result of converting b to the case 42// form implemented by c. 43func (c Caser) Bytes(b []byte) []byte { 44 b, _, _ = transform.Bytes(c.t, b) 45 return b 46} 47 48// String returns a string with the result of transforming s to the case form 49// implemented by c. 50func (c Caser) String(s string) string { 51 s, _, _ = transform.String(c.t, s) 52 return s 53} 54 55// Reset resets the Caser to be reused for new input after a previous call to 56// Transform. 57func (c Caser) Reset() { c.t.Reset() } 58 59// Transform implements the transform.Transformer interface and transforms the 60// given input to the case form implemented by c. 61func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 62 return c.t.Transform(dst, src, atEOF) 63} 64 65// Span implements the transform.SpanningTransformer interface. 66func (c Caser) Span(src []byte, atEOF bool) (n int, err error) { 67 return c.t.Span(src, atEOF) 68} 69 70// Upper returns a Caser for language-specific uppercasing. 71func Upper(t language.Tag, opts ...Option) Caser { 72 return Caser{makeUpper(t, getOpts(opts...))} 73} 74 75// Lower returns a Caser for language-specific lowercasing. 76func Lower(t language.Tag, opts ...Option) Caser { 77 return Caser{makeLower(t, getOpts(opts...))} 78} 79 80// Title returns a Caser for language-specific title casing. It uses an 81// approximation of the default Unicode Word Break algorithm. 82func Title(t language.Tag, opts ...Option) Caser { 83 return Caser{makeTitle(t, getOpts(opts...))} 84} 85 86// Fold returns a Caser that implements Unicode case folding. The returned Caser 87// is stateless and safe to use concurrently by multiple goroutines. 88// 89// Case folding does not normalize the input and may not preserve a normal form. 90// Use the collate or search package for more convenient and linguistically 91// sound comparisons. Use golang.org/x/text/secure/precis for string comparisons 92// where security aspects are a concern. 93func Fold(opts ...Option) Caser { 94 return Caser{makeFold(getOpts(opts...))} 95} 96 97// An Option is used to modify the behavior of a Caser. 98type Option func(o options) options 99 100// TODO: consider these options to take a boolean as well, like FinalSigma. 101// The advantage of using this approach is that other providers of a lower-case 102// algorithm could set different defaults by prefixing a user-provided slice 103// of options with their own. This is handy, for instance, for the precis 104// package which would override the default to not handle the Greek final sigma. 105 106var ( 107 // NoLower disables the lowercasing of non-leading letters for a title 108 // caser. 109 NoLower Option = noLower 110 111 // Compact omits mappings in case folding for characters that would grow the 112 // input. (Unimplemented.) 113 Compact Option = compact 114) 115 116// TODO: option to preserve a normal form, if applicable? 117 118type options struct { 119 noLower bool 120 simple bool 121 122 // TODO: segmenter, max ignorable, alternative versions, etc. 123 124 ignoreFinalSigma bool 125} 126 127func getOpts(o ...Option) (res options) { 128 for _, f := range o { 129 res = f(res) 130 } 131 return 132} 133 134func noLower(o options) options { 135 o.noLower = true 136 return o 137} 138 139func compact(o options) options { 140 o.simple = true 141 return o 142} 143 144// HandleFinalSigma specifies whether the special handling of Greek final sigma 145// should be enabled. Unicode prescribes handling the Greek final sigma for all 146// locales, but standards like IDNA and PRECIS override this default. 147func HandleFinalSigma(enable bool) Option { 148 if enable { 149 return handleFinalSigma 150 } 151 return ignoreFinalSigma 152} 153 154func ignoreFinalSigma(o options) options { 155 o.ignoreFinalSigma = true 156 return o 157} 158 159func handleFinalSigma(o options) options { 160 o.ignoreFinalSigma = false 161 return o 162} 163