1/* 2Copyright 2015 Google LLC 3 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10Unless required by applicable law or agreed to in writing, software 11distributed under the License is distributed on an "AS IS" BASIS, 12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13See the License for the specific language governing permissions and 14limitations under the License. 15*/ 16 17package bigtable 18 19import ( 20 "fmt" 21 "strings" 22 "time" 23 24 btpb "google.golang.org/genproto/googleapis/bigtable/v2" 25) 26 27// A Filter represents a row filter. 28type Filter interface { 29 String() string 30 proto() *btpb.RowFilter 31} 32 33// ChainFilters returns a filter that applies a sequence of filters. 34func ChainFilters(sub ...Filter) Filter { return chainFilter{sub} } 35 36type chainFilter struct { 37 sub []Filter 38} 39 40func (cf chainFilter) String() string { 41 var ss []string 42 for _, sf := range cf.sub { 43 ss = append(ss, sf.String()) 44 } 45 return "(" + strings.Join(ss, " | ") + ")" 46} 47 48func (cf chainFilter) proto() *btpb.RowFilter { 49 chain := &btpb.RowFilter_Chain{} 50 for _, sf := range cf.sub { 51 chain.Filters = append(chain.Filters, sf.proto()) 52 } 53 return &btpb.RowFilter{ 54 Filter: &btpb.RowFilter_Chain_{Chain: chain}, 55 } 56} 57 58// InterleaveFilters returns a filter that applies a set of filters in parallel 59// and interleaves the results. 60func InterleaveFilters(sub ...Filter) Filter { return interleaveFilter{sub} } 61 62type interleaveFilter struct { 63 sub []Filter 64} 65 66func (ilf interleaveFilter) String() string { 67 var ss []string 68 for _, sf := range ilf.sub { 69 ss = append(ss, sf.String()) 70 } 71 return "(" + strings.Join(ss, " + ") + ")" 72} 73 74func (ilf interleaveFilter) proto() *btpb.RowFilter { 75 inter := &btpb.RowFilter_Interleave{} 76 for _, sf := range ilf.sub { 77 inter.Filters = append(inter.Filters, sf.proto()) 78 } 79 return &btpb.RowFilter{ 80 Filter: &btpb.RowFilter_Interleave_{Interleave: inter}, 81 } 82} 83 84// RowKeyFilter returns a filter that matches cells from rows whose 85// key matches the provided RE2 pattern. 86// See https://github.com/google/re2/wiki/Syntax for the accepted syntax. 87func RowKeyFilter(pattern string) Filter { return rowKeyFilter(pattern) } 88 89type rowKeyFilter string 90 91func (rkf rowKeyFilter) String() string { return fmt.Sprintf("row(%s)", string(rkf)) } 92 93func (rkf rowKeyFilter) proto() *btpb.RowFilter { 94 return &btpb.RowFilter{Filter: &btpb.RowFilter_RowKeyRegexFilter{RowKeyRegexFilter: []byte(rkf)}} 95} 96 97// FamilyFilter returns a filter that matches cells whose family name 98// matches the provided RE2 pattern. 99// See https://github.com/google/re2/wiki/Syntax for the accepted syntax. 100func FamilyFilter(pattern string) Filter { return familyFilter(pattern) } 101 102type familyFilter string 103 104func (ff familyFilter) String() string { return fmt.Sprintf("col(%s:)", string(ff)) } 105 106func (ff familyFilter) proto() *btpb.RowFilter { 107 return &btpb.RowFilter{Filter: &btpb.RowFilter_FamilyNameRegexFilter{FamilyNameRegexFilter: string(ff)}} 108} 109 110// ColumnFilter returns a filter that matches cells whose column name 111// matches the provided RE2 pattern. 112// See https://github.com/google/re2/wiki/Syntax for the accepted syntax. 113func ColumnFilter(pattern string) Filter { return columnFilter(pattern) } 114 115type columnFilter string 116 117func (cf columnFilter) String() string { return fmt.Sprintf("col(.*:%s)", string(cf)) } 118 119func (cf columnFilter) proto() *btpb.RowFilter { 120 return &btpb.RowFilter{Filter: &btpb.RowFilter_ColumnQualifierRegexFilter{ColumnQualifierRegexFilter: []byte(cf)}} 121} 122 123// ValueFilter returns a filter that matches cells whose value 124// matches the provided RE2 pattern. 125// See https://github.com/google/re2/wiki/Syntax for the accepted syntax. 126func ValueFilter(pattern string) Filter { return valueFilter(pattern) } 127 128type valueFilter string 129 130func (vf valueFilter) String() string { return fmt.Sprintf("value_match(%s)", string(vf)) } 131 132func (vf valueFilter) proto() *btpb.RowFilter { 133 return &btpb.RowFilter{Filter: &btpb.RowFilter_ValueRegexFilter{ValueRegexFilter: []byte(vf)}} 134} 135 136// LatestNFilter returns a filter that matches the most recent N cells in each column. 137func LatestNFilter(n int) Filter { return latestNFilter(n) } 138 139type latestNFilter int32 140 141func (lnf latestNFilter) String() string { return fmt.Sprintf("col(*,%d)", lnf) } 142 143func (lnf latestNFilter) proto() *btpb.RowFilter { 144 return &btpb.RowFilter{Filter: &btpb.RowFilter_CellsPerColumnLimitFilter{CellsPerColumnLimitFilter: int32(lnf)}} 145} 146 147// LabelFilter returns a filter that applies the 148// given label to all cells in the output row. 149func LabelFilter(label string) Filter { return labelFilter(label) } 150 151type labelFilter string 152 153func (lf labelFilter) String() string { return fmt.Sprintf("apply_label(%s)", string(lf)) } 154 155func (lf labelFilter) proto() *btpb.RowFilter { 156 return &btpb.RowFilter{Filter: &btpb.RowFilter_ApplyLabelTransformer{ApplyLabelTransformer: string(lf)}} 157} 158 159// StripValueFilter returns a filter that replaces each value with the empty string. 160func StripValueFilter() Filter { return stripValueFilter{} } 161 162type stripValueFilter struct{} 163 164func (stripValueFilter) String() string { return "strip_value()" } 165func (stripValueFilter) proto() *btpb.RowFilter { 166 return &btpb.RowFilter{Filter: &btpb.RowFilter_StripValueTransformer{StripValueTransformer: true}} 167} 168 169// TimestampRangeFilter returns a filter that matches any cells whose timestamp is within the given time bounds. A zero 170// time means no bound. 171// The timestamp will be truncated to millisecond granularity. 172func TimestampRangeFilter(startTime time.Time, endTime time.Time) Filter { 173 trf := timestampRangeFilter{} 174 if !startTime.IsZero() { 175 trf.startTime = Time(startTime) 176 } 177 if !endTime.IsZero() { 178 trf.endTime = Time(endTime) 179 } 180 return trf 181} 182 183// TimestampRangeFilterMicros returns a filter that matches any cells whose timestamp is within the given time bounds, 184// specified in units of microseconds since 1 January 1970. A zero value for the end time is interpreted as no bound. 185// The timestamp will be truncated to millisecond granularity. 186func TimestampRangeFilterMicros(startTime Timestamp, endTime Timestamp) Filter { 187 return timestampRangeFilter{startTime, endTime} 188} 189 190type timestampRangeFilter struct { 191 startTime Timestamp 192 endTime Timestamp 193} 194 195func (trf timestampRangeFilter) String() string { 196 return fmt.Sprintf("timestamp_range(%v,%v)", trf.startTime, trf.endTime) 197} 198 199func (trf timestampRangeFilter) proto() *btpb.RowFilter { 200 return &btpb.RowFilter{ 201 Filter: &btpb.RowFilter_TimestampRangeFilter{TimestampRangeFilter: &btpb.TimestampRange{ 202 StartTimestampMicros: int64(trf.startTime.TruncateToMilliseconds()), 203 EndTimestampMicros: int64(trf.endTime.TruncateToMilliseconds()), 204 }, 205 }} 206} 207 208// ColumnRangeFilter returns a filter that matches a contiguous range of columns within a single 209// family, as specified by an inclusive start qualifier and exclusive end qualifier. 210func ColumnRangeFilter(family, start, end string) Filter { 211 return columnRangeFilter{family, start, end} 212} 213 214type columnRangeFilter struct { 215 family string 216 start string 217 end string 218} 219 220func (crf columnRangeFilter) String() string { 221 return fmt.Sprintf("columnRangeFilter(%s,%s,%s)", crf.family, crf.start, crf.end) 222} 223 224func (crf columnRangeFilter) proto() *btpb.RowFilter { 225 r := &btpb.ColumnRange{FamilyName: crf.family} 226 if crf.start != "" { 227 r.StartQualifier = &btpb.ColumnRange_StartQualifierClosed{StartQualifierClosed: []byte(crf.start)} 228 } 229 if crf.end != "" { 230 r.EndQualifier = &btpb.ColumnRange_EndQualifierOpen{EndQualifierOpen: []byte(crf.end)} 231 } 232 return &btpb.RowFilter{Filter: &btpb.RowFilter_ColumnRangeFilter{ColumnRangeFilter: r}} 233} 234 235// ValueRangeFilter returns a filter that matches cells with values that fall within 236// the given range, as specified by an inclusive start value and exclusive end value. 237func ValueRangeFilter(start, end []byte) Filter { 238 return valueRangeFilter{start, end} 239} 240 241type valueRangeFilter struct { 242 start []byte 243 end []byte 244} 245 246func (vrf valueRangeFilter) String() string { 247 return fmt.Sprintf("valueRangeFilter(%s,%s)", vrf.start, vrf.end) 248} 249 250func (vrf valueRangeFilter) proto() *btpb.RowFilter { 251 r := &btpb.ValueRange{} 252 if vrf.start != nil { 253 r.StartValue = &btpb.ValueRange_StartValueClosed{StartValueClosed: vrf.start} 254 } 255 if vrf.end != nil { 256 r.EndValue = &btpb.ValueRange_EndValueOpen{EndValueOpen: vrf.end} 257 } 258 return &btpb.RowFilter{Filter: &btpb.RowFilter_ValueRangeFilter{ValueRangeFilter: r}} 259} 260 261// ConditionFilter returns a filter that evaluates to one of two possible filters depending 262// on whether or not the given predicate filter matches at least one cell. 263// If the matched filter is nil then no results will be returned. 264// IMPORTANT NOTE: The predicate filter does not execute atomically with the 265// true and false filters, which may lead to inconsistent or unexpected 266// results. Additionally, condition filters have poor performance, especially 267// when filters are set for the false condition. 268func ConditionFilter(predicateFilter, trueFilter, falseFilter Filter) Filter { 269 return conditionFilter{predicateFilter, trueFilter, falseFilter} 270} 271 272type conditionFilter struct { 273 predicateFilter Filter 274 trueFilter Filter 275 falseFilter Filter 276} 277 278func (cf conditionFilter) String() string { 279 return fmt.Sprintf("conditionFilter(%s,%s,%s)", cf.predicateFilter, cf.trueFilter, cf.falseFilter) 280} 281 282func (cf conditionFilter) proto() *btpb.RowFilter { 283 var tf *btpb.RowFilter 284 var ff *btpb.RowFilter 285 if cf.trueFilter != nil { 286 tf = cf.trueFilter.proto() 287 } 288 if cf.falseFilter != nil { 289 ff = cf.falseFilter.proto() 290 } 291 return &btpb.RowFilter{ 292 Filter: &btpb.RowFilter_Condition_{Condition: &btpb.RowFilter_Condition{ 293 PredicateFilter: cf.predicateFilter.proto(), 294 TrueFilter: tf, 295 FalseFilter: ff, 296 }}} 297} 298 299// CellsPerRowOffsetFilter returns a filter that skips the first N cells of each row, matching all subsequent cells. 300func CellsPerRowOffsetFilter(n int) Filter { 301 return cellsPerRowOffsetFilter(n) 302} 303 304type cellsPerRowOffsetFilter int32 305 306func (cof cellsPerRowOffsetFilter) String() string { 307 return fmt.Sprintf("cells_per_row_offset(%d)", cof) 308} 309 310func (cof cellsPerRowOffsetFilter) proto() *btpb.RowFilter { 311 return &btpb.RowFilter{Filter: &btpb.RowFilter_CellsPerRowOffsetFilter{CellsPerRowOffsetFilter: int32(cof)}} 312} 313 314// CellsPerRowLimitFilter returns a filter that matches only the first N cells of each row. 315func CellsPerRowLimitFilter(n int) Filter { 316 return cellsPerRowLimitFilter(n) 317} 318 319type cellsPerRowLimitFilter int32 320 321func (clf cellsPerRowLimitFilter) String() string { 322 return fmt.Sprintf("cells_per_row_limit(%d)", clf) 323} 324 325func (clf cellsPerRowLimitFilter) proto() *btpb.RowFilter { 326 return &btpb.RowFilter{Filter: &btpb.RowFilter_CellsPerRowLimitFilter{CellsPerRowLimitFilter: int32(clf)}} 327} 328 329// RowSampleFilter returns a filter that matches a row with a probability of p (must be in the interval (0, 1)). 330func RowSampleFilter(p float64) Filter { 331 return rowSampleFilter(p) 332} 333 334type rowSampleFilter float64 335 336func (rsf rowSampleFilter) String() string { 337 return fmt.Sprintf("filter(%f)", rsf) 338} 339 340func (rsf rowSampleFilter) proto() *btpb.RowFilter { 341 return &btpb.RowFilter{Filter: &btpb.RowFilter_RowSampleFilter{RowSampleFilter: float64(rsf)}} 342} 343 344// PassAllFilter returns a filter that matches everything. 345func PassAllFilter() Filter { return passAllFilter{} } 346 347type passAllFilter struct{} 348 349func (paf passAllFilter) String() string { return "passAllFilter()" } 350 351func (paf passAllFilter) proto() *btpb.RowFilter { 352 return &btpb.RowFilter{Filter: &btpb.RowFilter_PassAllFilter{PassAllFilter: true}} 353} 354 355// BlockAllFilter returns a filter that matches nothing. 356func BlockAllFilter() Filter { return blockAllFilter{} } 357 358type blockAllFilter struct{} 359 360func (baf blockAllFilter) String() string { return "blockAllFilter()" } 361 362func (baf blockAllFilter) proto() *btpb.RowFilter { 363 return &btpb.RowFilter{Filter: &btpb.RowFilter_BlockAllFilter{BlockAllFilter: true}} 364} 365