1/* 2Copyright 2015 Google LLC 3 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10Unless required by applicable law or agreed to in writing, software 11distributed under the License is distributed on an "AS IS" BASIS, 12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13See the License for the specific language governing permissions and 14limitations under the License. 15*/ 16 17package bigtable 18 19import ( 20 "fmt" 21 "strings" 22 "time" 23 24 btpb "google.golang.org/genproto/googleapis/bigtable/v2" 25) 26 27// A Filter represents a row filter. 28type Filter interface { 29 String() string 30 proto() *btpb.RowFilter 31} 32 33// ChainFilters returns a filter that applies a sequence of filters. 34func ChainFilters(sub ...Filter) Filter { return chainFilter{sub} } 35 36type chainFilter struct { 37 sub []Filter 38} 39 40func (cf chainFilter) String() string { 41 var ss []string 42 for _, sf := range cf.sub { 43 ss = append(ss, sf.String()) 44 } 45 return "(" + strings.Join(ss, " | ") + ")" 46} 47 48func (cf chainFilter) proto() *btpb.RowFilter { 49 chain := &btpb.RowFilter_Chain{} 50 for _, sf := range cf.sub { 51 chain.Filters = append(chain.Filters, sf.proto()) 52 } 53 return &btpb.RowFilter{ 54 Filter: &btpb.RowFilter_Chain_{Chain: chain}, 55 } 56} 57 58// InterleaveFilters returns a filter that applies a set of filters in parallel 59// and interleaves the results. 60func InterleaveFilters(sub ...Filter) Filter { return interleaveFilter{sub} } 61 62type interleaveFilter struct { 63 sub []Filter 64} 65 66func (ilf interleaveFilter) String() string { 67 var ss []string 68 for _, sf := range ilf.sub { 69 ss = append(ss, sf.String()) 70 } 71 return "(" + strings.Join(ss, " + ") + ")" 72} 73 74func (ilf interleaveFilter) proto() *btpb.RowFilter { 75 inter := &btpb.RowFilter_Interleave{} 76 for _, sf := range ilf.sub { 77 inter.Filters = append(inter.Filters, sf.proto()) 78 } 79 return &btpb.RowFilter{ 80 Filter: &btpb.RowFilter_Interleave_{Interleave: inter}, 81 } 82} 83 84// RowKeyFilter returns a filter that matches cells from rows whose 85// key matches the provided RE2 pattern. 86// See https://github.com/google/re2/wiki/Syntax for the accepted syntax. 87func RowKeyFilter(pattern string) Filter { return rowKeyFilter(pattern) } 88 89type rowKeyFilter string 90 91func (rkf rowKeyFilter) String() string { return fmt.Sprintf("row(%s)", string(rkf)) } 92 93func (rkf rowKeyFilter) proto() *btpb.RowFilter { 94 return &btpb.RowFilter{Filter: &btpb.RowFilter_RowKeyRegexFilter{RowKeyRegexFilter: []byte(rkf)}} 95} 96 97// FamilyFilter returns a filter that matches cells whose family name 98// matches the provided RE2 pattern. 99// See https://github.com/google/re2/wiki/Syntax for the accepted syntax. 100func FamilyFilter(pattern string) Filter { return familyFilter(pattern) } 101 102type familyFilter string 103 104func (ff familyFilter) String() string { return fmt.Sprintf("col(%s:)", string(ff)) } 105 106func (ff familyFilter) proto() *btpb.RowFilter { 107 return &btpb.RowFilter{Filter: &btpb.RowFilter_FamilyNameRegexFilter{FamilyNameRegexFilter: string(ff)}} 108} 109 110// ColumnFilter returns a filter that matches cells whose column name 111// matches the provided RE2 pattern. 112// See https://github.com/google/re2/wiki/Syntax for the accepted syntax. 113func ColumnFilter(pattern string) Filter { return columnFilter(pattern) } 114 115type columnFilter string 116 117func (cf columnFilter) String() string { return fmt.Sprintf("col(.*:%s)", string(cf)) } 118 119func (cf columnFilter) proto() *btpb.RowFilter { 120 return &btpb.RowFilter{Filter: &btpb.RowFilter_ColumnQualifierRegexFilter{ColumnQualifierRegexFilter: []byte(cf)}} 121} 122 123// ValueFilter returns a filter that matches cells whose value 124// matches the provided RE2 pattern. 125// See https://github.com/google/re2/wiki/Syntax for the accepted syntax. 126func ValueFilter(pattern string) Filter { return valueFilter(pattern) } 127 128type valueFilter string 129 130func (vf valueFilter) String() string { return fmt.Sprintf("value_match(%s)", string(vf)) } 131 132func (vf valueFilter) proto() *btpb.RowFilter { 133 return &btpb.RowFilter{Filter: &btpb.RowFilter_ValueRegexFilter{ValueRegexFilter: []byte(vf)}} 134} 135 136// LatestNFilter returns a filter that matches the most recent N cells in each column. 137func LatestNFilter(n int) Filter { return latestNFilter(n) } 138 139type latestNFilter int32 140 141func (lnf latestNFilter) String() string { return fmt.Sprintf("col(*,%d)", lnf) } 142 143func (lnf latestNFilter) proto() *btpb.RowFilter { 144 return &btpb.RowFilter{Filter: &btpb.RowFilter_CellsPerColumnLimitFilter{CellsPerColumnLimitFilter: int32(lnf)}} 145} 146 147// StripValueFilter returns a filter that replaces each value with the empty string. 148func StripValueFilter() Filter { return stripValueFilter{} } 149 150type stripValueFilter struct{} 151 152func (stripValueFilter) String() string { return "strip_value()" } 153func (stripValueFilter) proto() *btpb.RowFilter { 154 return &btpb.RowFilter{Filter: &btpb.RowFilter_StripValueTransformer{StripValueTransformer: true}} 155} 156 157// TimestampRangeFilter returns a filter that matches any cells whose timestamp is within the given time bounds. A zero 158// time means no bound. 159// The timestamp will be truncated to millisecond granularity. 160func TimestampRangeFilter(startTime time.Time, endTime time.Time) Filter { 161 trf := timestampRangeFilter{} 162 if !startTime.IsZero() { 163 trf.startTime = Time(startTime) 164 } 165 if !endTime.IsZero() { 166 trf.endTime = Time(endTime) 167 } 168 return trf 169} 170 171// TimestampRangeFilterMicros returns a filter that matches any cells whose timestamp is within the given time bounds, 172// specified in units of microseconds since 1 January 1970. A zero value for the end time is interpreted as no bound. 173// The timestamp will be truncated to millisecond granularity. 174func TimestampRangeFilterMicros(startTime Timestamp, endTime Timestamp) Filter { 175 return timestampRangeFilter{startTime, endTime} 176} 177 178type timestampRangeFilter struct { 179 startTime Timestamp 180 endTime Timestamp 181} 182 183func (trf timestampRangeFilter) String() string { 184 return fmt.Sprintf("timestamp_range(%v,%v)", trf.startTime, trf.endTime) 185} 186 187func (trf timestampRangeFilter) proto() *btpb.RowFilter { 188 return &btpb.RowFilter{ 189 Filter: &btpb.RowFilter_TimestampRangeFilter{TimestampRangeFilter: &btpb.TimestampRange{ 190 StartTimestampMicros: int64(trf.startTime.TruncateToMilliseconds()), 191 EndTimestampMicros: int64(trf.endTime.TruncateToMilliseconds()), 192 }, 193 }} 194} 195 196// ColumnRangeFilter returns a filter that matches a contiguous range of columns within a single 197// family, as specified by an inclusive start qualifier and exclusive end qualifier. 198func ColumnRangeFilter(family, start, end string) Filter { 199 return columnRangeFilter{family, start, end} 200} 201 202type columnRangeFilter struct { 203 family string 204 start string 205 end string 206} 207 208func (crf columnRangeFilter) String() string { 209 return fmt.Sprintf("columnRangeFilter(%s,%s,%s)", crf.family, crf.start, crf.end) 210} 211 212func (crf columnRangeFilter) proto() *btpb.RowFilter { 213 r := &btpb.ColumnRange{FamilyName: crf.family} 214 if crf.start != "" { 215 r.StartQualifier = &btpb.ColumnRange_StartQualifierClosed{StartQualifierClosed: []byte(crf.start)} 216 } 217 if crf.end != "" { 218 r.EndQualifier = &btpb.ColumnRange_EndQualifierOpen{EndQualifierOpen: []byte(crf.end)} 219 } 220 return &btpb.RowFilter{Filter: &btpb.RowFilter_ColumnRangeFilter{ColumnRangeFilter: r}} 221} 222 223// ValueRangeFilter returns a filter that matches cells with values that fall within 224// the given range, as specified by an inclusive start value and exclusive end value. 225func ValueRangeFilter(start, end []byte) Filter { 226 return valueRangeFilter{start, end} 227} 228 229type valueRangeFilter struct { 230 start []byte 231 end []byte 232} 233 234func (vrf valueRangeFilter) String() string { 235 return fmt.Sprintf("valueRangeFilter(%s,%s)", vrf.start, vrf.end) 236} 237 238func (vrf valueRangeFilter) proto() *btpb.RowFilter { 239 r := &btpb.ValueRange{} 240 if vrf.start != nil { 241 r.StartValue = &btpb.ValueRange_StartValueClosed{StartValueClosed: vrf.start} 242 } 243 if vrf.end != nil { 244 r.EndValue = &btpb.ValueRange_EndValueOpen{EndValueOpen: vrf.end} 245 } 246 return &btpb.RowFilter{Filter: &btpb.RowFilter_ValueRangeFilter{ValueRangeFilter: r}} 247} 248 249// ConditionFilter returns a filter that evaluates to one of two possible filters depending 250// on whether or not the given predicate filter matches at least one cell. 251// If the matched filter is nil then no results will be returned. 252// IMPORTANT NOTE: The predicate filter does not execute atomically with the 253// true and false filters, which may lead to inconsistent or unexpected 254// results. Additionally, condition filters have poor performance, especially 255// when filters are set for the false condition. 256func ConditionFilter(predicateFilter, trueFilter, falseFilter Filter) Filter { 257 return conditionFilter{predicateFilter, trueFilter, falseFilter} 258} 259 260type conditionFilter struct { 261 predicateFilter Filter 262 trueFilter Filter 263 falseFilter Filter 264} 265 266func (cf conditionFilter) String() string { 267 return fmt.Sprintf("conditionFilter(%s,%s,%s)", cf.predicateFilter, cf.trueFilter, cf.falseFilter) 268} 269 270func (cf conditionFilter) proto() *btpb.RowFilter { 271 var tf *btpb.RowFilter 272 var ff *btpb.RowFilter 273 if cf.trueFilter != nil { 274 tf = cf.trueFilter.proto() 275 } 276 if cf.falseFilter != nil { 277 ff = cf.falseFilter.proto() 278 } 279 return &btpb.RowFilter{ 280 Filter: &btpb.RowFilter_Condition_{Condition: &btpb.RowFilter_Condition{ 281 PredicateFilter: cf.predicateFilter.proto(), 282 TrueFilter: tf, 283 FalseFilter: ff, 284 }}} 285} 286 287// CellsPerRowOffsetFilter returns a filter that skips the first N cells of each row, matching all subsequent cells. 288func CellsPerRowOffsetFilter(n int) Filter { 289 return cellsPerRowOffsetFilter(n) 290} 291 292type cellsPerRowOffsetFilter int32 293 294func (cof cellsPerRowOffsetFilter) String() string { 295 return fmt.Sprintf("cells_per_row_offset(%d)", cof) 296} 297 298func (cof cellsPerRowOffsetFilter) proto() *btpb.RowFilter { 299 return &btpb.RowFilter{Filter: &btpb.RowFilter_CellsPerRowOffsetFilter{CellsPerRowOffsetFilter: int32(cof)}} 300} 301 302// CellsPerRowLimitFilter returns a filter that matches only the first N cells of each row. 303func CellsPerRowLimitFilter(n int) Filter { 304 return cellsPerRowLimitFilter(n) 305} 306 307type cellsPerRowLimitFilter int32 308 309func (clf cellsPerRowLimitFilter) String() string { 310 return fmt.Sprintf("cells_per_row_limit(%d)", clf) 311} 312 313func (clf cellsPerRowLimitFilter) proto() *btpb.RowFilter { 314 return &btpb.RowFilter{Filter: &btpb.RowFilter_CellsPerRowLimitFilter{CellsPerRowLimitFilter: int32(clf)}} 315} 316 317// RowSampleFilter returns a filter that matches a row with a probability of p (must be in the interval (0, 1)). 318func RowSampleFilter(p float64) Filter { 319 return rowSampleFilter(p) 320} 321 322type rowSampleFilter float64 323 324func (rsf rowSampleFilter) String() string { 325 return fmt.Sprintf("filter(%f)", rsf) 326} 327 328func (rsf rowSampleFilter) proto() *btpb.RowFilter { 329 return &btpb.RowFilter{Filter: &btpb.RowFilter_RowSampleFilter{RowSampleFilter: float64(rsf)}} 330} 331