legacy-libs/google-proto-files/google/bigtable/v1/bigtable_data.proto

   1 // Copyright 2018 Google Inc.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //     http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 syntax = "proto3";
  16
  17 package google.bigtable.v1;
  18
  19 option go_package = "google.golang.org/genproto/googleapis/bigtable/v1;bigtable";
  20 option java_multiple_files = true;
  21 option java_outer_classname = "BigtableDataProto";
  22 option java_package = "com.google.bigtable.v1";
  23
  24 // Specifies the complete (requested) contents of a single row of a table.
  25 // Rows which exceed 256MiB in size cannot be read in full.
  26 message Row {
  27   // The unique key which identifies this row within its table. This is the same
  28   // key that's used to identify the row in, for example, a MutateRowRequest.
  29   // May contain any non-empty byte string up to 4KiB in length.
  30   bytes key = 1;
  31
  32   // May be empty, but only if the entire row is empty.
  33   // The mutual ordering of column families is not specified.
  34   repeated Family families = 2;
  35 }
  36
  37 // Specifies (some of) the contents of a single row/column family of a table.
  38 message Family {
  39   // The unique key which identifies this family within its row. This is the
  40   // same key that's used to identify the family in, for example, a RowFilter
  41   // which sets its "family_name_regex_filter" field.
  42   // Must match [-_.a-zA-Z0-9]+, except that AggregatingRowProcessors may
  43   // produce cells in a sentinel family with an empty name.
  44   // Must be no greater than 64 characters in length.
  45   string name = 1;
  46
  47   // Must not be empty. Sorted in order of increasing "qualifier".
  48   repeated Column columns = 2;
  49 }
  50
  51 // Specifies (some of) the contents of a single row/column of a table.
  52 message Column {
  53   // The unique key which identifies this column within its family. This is the
  54   // same key that's used to identify the column in, for example, a RowFilter
  55   // which sets its "column_qualifier_regex_filter" field.
  56   // May contain any byte string, including the empty string, up to 16kiB in
  57   // length.
  58   bytes qualifier = 1;
  59
  60   // Must not be empty. Sorted in order of decreasing "timestamp_micros".
  61   repeated Cell cells = 2;
  62 }
  63
  64 // Specifies (some of) the contents of a single row/column/timestamp of a table.
  65 message Cell {
  66   // The cell's stored timestamp, which also uniquely identifies it within
  67   // its column.
  68   // Values are always expressed in microseconds, but individual tables may set
  69   // a coarser "granularity" to further restrict the allowed values. For
  70   // example, a table which specifies millisecond granularity will only allow
  71   // values of "timestamp_micros" which are multiples of 1000.
  72   int64 timestamp_micros = 1;
  73
  74   // The value stored in the cell.
  75   // May contain any byte string, including the empty string, up to 100MiB in
  76   // length.
  77   bytes value = 2;
  78
  79   // Labels applied to the cell by a [RowFilter][google.bigtable.v1.RowFilter].
  80   repeated string labels = 3;
  81 }
  82
  83 // Specifies a contiguous range of rows.
  84 message RowRange {
  85   // Inclusive lower bound. If left empty, interpreted as the empty string.
  86   bytes start_key = 2;
  87
  88   // Exclusive upper bound. If left empty, interpreted as infinity.
  89   bytes end_key = 3;
  90 }
  91
  92 // Specifies a non-contiguous set of rows.
  93 message RowSet {
  94   // Single rows included in the set.
  95   repeated bytes row_keys = 1;
  96
  97   // Contiguous row ranges included in the set.
  98   repeated RowRange row_ranges = 2;
  99 }
 100
 101 // Specifies a contiguous range of columns within a single column family.
 102 // The range spans from <column_family>:<start_qualifier> to
 103 // <column_family>:<end_qualifier>, where both bounds can be either inclusive or
 104 // exclusive.
 105 message ColumnRange {
 106   // The name of the column family within which this range falls.
 107   string family_name = 1;
 108
 109   // The column qualifier at which to start the range (within 'column_family').
 110   // If neither field is set, interpreted as the empty string, inclusive.
 111   oneof start_qualifier {
 112     // Used when giving an inclusive lower bound for the range.
 113     bytes start_qualifier_inclusive = 2;
 114
 115     // Used when giving an exclusive lower bound for the range.
 116     bytes start_qualifier_exclusive = 3;
 117   }
 118
 119   // The column qualifier at which to end the range (within 'column_family').
 120   // If neither field is set, interpreted as the infinite string, exclusive.
 121   oneof end_qualifier {
 122     // Used when giving an inclusive upper bound for the range.
 123     bytes end_qualifier_inclusive = 4;
 124
 125     // Used when giving an exclusive upper bound for the range.
 126     bytes end_qualifier_exclusive = 5;
 127   }
 128 }
 129
 130 // Specified a contiguous range of microsecond timestamps.
 131 message TimestampRange {
 132   // Inclusive lower bound. If left empty, interpreted as 0.
 133   int64 start_timestamp_micros = 1;
 134
 135   // Exclusive upper bound. If left empty, interpreted as infinity.
 136   int64 end_timestamp_micros = 2;
 137 }
 138
 139 // Specifies a contiguous range of raw byte values.
 140 message ValueRange {
 141   // The value at which to start the range.
 142   // If neither field is set, interpreted as the empty string, inclusive.
 143   oneof start_value {
 144     // Used when giving an inclusive lower bound for the range.
 145     bytes start_value_inclusive = 1;
 146
 147     // Used when giving an exclusive lower bound for the range.
 148     bytes start_value_exclusive = 2;
 149   }
 150
 151   // The value at which to end the range.
 152   // If neither field is set, interpreted as the infinite string, exclusive.
 153   oneof end_value {
 154     // Used when giving an inclusive upper bound for the range.
 155     bytes end_value_inclusive = 3;
 156
 157     // Used when giving an exclusive upper bound for the range.
 158     bytes end_value_exclusive = 4;
 159   }
 160 }
 161
 162 // Takes a row as input and produces an alternate view of the row based on
 163 // specified rules. For example, a RowFilter might trim down a row to include
 164 // just the cells from columns matching a given regular expression, or might
 165 // return all the cells of a row but not their values. More complicated filters
 166 // can be composed out of these components to express requests such as, "within
 167 // every column of a particular family, give just the two most recent cells
 168 // which are older than timestamp X."
 169 //
 170 // There are two broad categories of RowFilters (true filters and transformers),
 171 // as well as two ways to compose simple filters into more complex ones
 172 // (chains and interleaves). They work as follows:
 173 //
 174 // * True filters alter the input row by excluding some of its cells wholesale
 175 // from the output row. An example of a true filter is the "value_regex_filter",
 176 // which excludes cells whose values don't match the specified pattern. All
 177 // regex true filters use RE2 syntax (https://github.com/google/re2/wiki/Syntax)
 178 // in raw byte mode (RE2::Latin1), and are evaluated as full matches. An
 179 // important point to keep in mind is that RE2(.) is equivalent by default to
 180 // RE2([^\n]), meaning that it does not match newlines. When attempting to match
 181 // an arbitrary byte, you should therefore use the escape sequence '\C', which
 182 // may need to be further escaped as '\\C' in your client language.
 183 //
 184 // * Transformers alter the input row by changing the values of some of its
 185 // cells in the output, without excluding them completely. Currently, the only
 186 // supported transformer is the "strip_value_transformer", which replaces every
 187 // cell's value with the empty string.
 188 //
 189 // * Chains and interleaves are described in more detail in the
 190 // RowFilter.Chain and RowFilter.Interleave documentation.
 191 //
 192 // The total serialized size of a RowFilter message must not
 193 // exceed 4096 bytes, and RowFilters may not be nested within each other
 194 // (in Chains or Interleaves) to a depth of more than 20.
 195 message RowFilter {
 196   // A RowFilter which sends rows through several RowFilters in sequence.
 197   message Chain {
 198     // The elements of "filters" are chained together to process the input row:
 199     // in row -> f(0) -> intermediate row -> f(1) -> ... -> f(N) -> out row
 200     // The full chain is executed atomically.
 201     repeated RowFilter filters = 1;
 202   }
 203
 204   // A RowFilter which sends each row to each of several component
 205   // RowFilters and interleaves the results.
 206   message Interleave {
 207     // The elements of "filters" all process a copy of the input row, and the
 208     // results are pooled, sorted, and combined into a single output row.
 209     // If multiple cells are produced with the same column and timestamp,
 210     // they will all appear in the output row in an unspecified mutual order.
 211     // Consider the following example, with three filters:
 212     //
 213     //                              input row
 214     //                                  |
 215     //        -----------------------------------------------------
 216     //        |                         |                         |
 217     //       f(0)                      f(1)                      f(2)
 218     //        |                         |                         |
 219     // 1: foo,bar,10,x             foo,bar,10,z              far,bar,7,a
 220     // 2: foo,blah,11,z            far,blah,5,x              far,blah,5,x
 221     //        |                         |                         |
 222     //        -----------------------------------------------------
 223     //                                  |
 224     // 1:                        foo,bar,10,z     // could have switched with #2
 225     // 2:                        foo,bar,10,x     // could have switched with #1
 226     // 3:                        foo,blah,11,z
 227     // 4:                        far,bar,7,a
 228     // 5:                        far,blah,5,x     // identical to #6
 229     // 6:                        far,blah,5,x     // identical to #5
 230     // All interleaved filters are executed atomically.
 231     repeated RowFilter filters = 1;
 232   }
 233
 234   // A RowFilter which evaluates one of two possible RowFilters, depending on
 235   // whether or not a predicate RowFilter outputs any cells from the input row.
 236   //
 237   // IMPORTANT NOTE: The predicate filter does not execute atomically with the
 238   // true and false filters, which may lead to inconsistent or unexpected
 239   // results. Additionally, Condition filters have poor performance, especially
 240   // when filters are set for the false condition.
 241   message Condition {
 242     // If "predicate_filter" outputs any cells, then "true_filter" will be
 243     // evaluated on the input row. Otherwise, "false_filter" will be evaluated.
 244     RowFilter predicate_filter = 1;
 245
 246     // The filter to apply to the input row if "predicate_filter" returns any
 247     // results. If not provided, no results will be returned in the true case.
 248     RowFilter true_filter = 2;
 249
 250     // The filter to apply to the input row if "predicate_filter" does not
 251     // return any results. If not provided, no results will be returned in the
 252     // false case.
 253     RowFilter false_filter = 3;
 254   }
 255
 256   // Which of the possible RowFilter types to apply. If none are set, this
 257   // RowFilter returns all cells in the input row.
 258   oneof filter {
 259     // Applies several RowFilters to the data in sequence, progressively
 260     // narrowing the results.
 261     Chain chain = 1;
 262
 263     // Applies several RowFilters to the data in parallel and combines the
 264     // results.
 265     Interleave interleave = 2;
 266
 267     // Applies one of two possible RowFilters to the data based on the output of
 268     // a predicate RowFilter.
 269     Condition condition = 3;
 270
 271     // ADVANCED USE ONLY.
 272     // Hook for introspection into the RowFilter. Outputs all cells directly to
 273     // the output of the read rather than to any parent filter. Consider the
 274     // following example:
 275     //
 276     // Chain(
 277     //   FamilyRegex("A"),
 278     //   Interleave(
 279     //     All(),
 280     //     Chain(Label("foo"), Sink())
 281     //   ),
 282     //   QualifierRegex("B")
 283     // )
 284     //
 285     //                         A,A,1,w
 286     //                         A,B,2,x
 287     //                         B,B,4,z
 288     //                            |
 289     //                     FamilyRegex("A")
 290     //                            |
 291     //                         A,A,1,w
 292     //                         A,B,2,x
 293     //                            |
 294     //               +------------+-------------+
 295     //               |                          |
 296     //             All()                    Label(foo)
 297     //               |                          |
 298     //            A,A,1,w              A,A,1,w,labels:[foo]
 299     //            A,B,2,x              A,B,2,x,labels:[foo]
 300     //               |                          |
 301     //               |                        Sink() --------------+
 302     //               |                          |                  |
 303     //               +------------+      x------+          A,A,1,w,labels:[foo]
 304     //                            |                        A,B,2,x,labels:[foo]
 305     //                         A,A,1,w                             |
 306     //                         A,B,2,x                             |
 307     //                            |                                |
 308     //                    QualifierRegex("B")                      |
 309     //                            |                                |
 310     //                         A,B,2,x                             |
 311     //                            |                                |
 312     //                            +--------------------------------+
 313     //                            |
 314     //                         A,A,1,w,labels:[foo]
 315     //                         A,B,2,x,labels:[foo]  // could be switched
 316     //                         A,B,2,x               // could be switched
 317     //
 318     // Despite being excluded by the qualifier filter, a copy of every cell
 319     // that reaches the sink is present in the final result.
 320     //
 321     // As with an [Interleave][google.bigtable.v1.RowFilter.Interleave],
 322     // duplicate cells are possible, and appear in an unspecified mutual order.
 323     // In this case we have a duplicate with column "A:B" and timestamp 2,
 324     // because one copy passed through the all filter while the other was
 325     // passed through the label and sink. Note that one copy has label "foo",
 326     // while the other does not.
 327     //
 328     // Cannot be used within the `predicate_filter`, `true_filter`, or
 329     // `false_filter` of a [Condition][google.bigtable.v1.RowFilter.Condition].
 330     bool sink = 16;
 331
 332     // Matches all cells, regardless of input. Functionally equivalent to
 333     // leaving `filter` unset, but included for completeness.
 334     bool pass_all_filter = 17;
 335
 336     // Does not match any cells, regardless of input. Useful for temporarily
 337     // disabling just part of a filter.
 338     bool block_all_filter = 18;
 339
 340     // Matches only cells from rows whose keys satisfy the given RE2 regex. In
 341     // other words, passes through the entire row when the key matches, and
 342     // otherwise produces an empty row.
 343     // Note that, since row keys can contain arbitrary bytes, the '\C' escape
 344     // sequence must be used if a true wildcard is desired. The '.' character
 345     // will not match the new line character '\n', which may be present in a
 346     // binary key.
 347     bytes row_key_regex_filter = 4;
 348
 349     // Matches all cells from a row with probability p, and matches no cells
 350     // from the row with probability 1-p.
 351     double row_sample_filter = 14;
 352
 353     // Matches only cells from columns whose families satisfy the given RE2
 354     // regex. For technical reasons, the regex must not contain the ':'
 355     // character, even if it is not being used as a literal.
 356     // Note that, since column families cannot contain the new line character
 357     // '\n', it is sufficient to use '.' as a full wildcard when matching
 358     // column family names.
 359     string family_name_regex_filter = 5;
 360
 361     // Matches only cells from columns whose qualifiers satisfy the given RE2
 362     // regex.
 363     // Note that, since column qualifiers can contain arbitrary bytes, the '\C'
 364     // escape sequence must be used if a true wildcard is desired. The '.'
 365     // character will not match the new line character '\n', which may be
 366     // present in a binary qualifier.
 367     bytes column_qualifier_regex_filter = 6;
 368
 369     // Matches only cells from columns within the given range.
 370     ColumnRange column_range_filter = 7;
 371
 372     // Matches only cells with timestamps within the given range.
 373     TimestampRange timestamp_range_filter = 8;
 374
 375     // Matches only cells with values that satisfy the given regular expression.
 376     // Note that, since cell values can contain arbitrary bytes, the '\C' escape
 377     // sequence must be used if a true wildcard is desired. The '.' character
 378     // will not match the new line character '\n', which may be present in a
 379     // binary value.
 380     bytes value_regex_filter = 9;
 381
 382     // Matches only cells with values that fall within the given range.
 383     ValueRange value_range_filter = 15;
 384
 385     // Skips the first N cells of each row, matching all subsequent cells.
 386     // If duplicate cells are present, as is possible when using an Interleave,
 387     // each copy of the cell is counted separately.
 388     int32 cells_per_row_offset_filter = 10;
 389
 390     // Matches only the first N cells of each row.
 391     // If duplicate cells are present, as is possible when using an Interleave,
 392     // each copy of the cell is counted separately.
 393     int32 cells_per_row_limit_filter = 11;
 394
 395     // Matches only the most recent N cells within each column. For example,
 396     // if N=2, this filter would match column "foo:bar" at timestamps 10 and 9,
 397     // skip all earlier cells in "foo:bar", and then begin matching again in
 398     // column "foo:bar2".
 399     // If duplicate cells are present, as is possible when using an Interleave,
 400     // each copy of the cell is counted separately.
 401     int32 cells_per_column_limit_filter = 12;
 402
 403     // Replaces each cell's value with the empty string.
 404     bool strip_value_transformer = 13;
 405
 406     // Applies the given label to all cells in the output row. This allows
 407     // the client to determine which results were produced from which part of
 408     // the filter.
 409     //
 410     // Values must be at most 15 characters in length, and match the RE2
 411     // pattern [a-z0-9\\-]+
 412     //
 413     // Due to a technical limitation, it is not currently possible to apply
 414     // multiple labels to a cell. As a result, a Chain may have no more than
 415     // one sub-filter which contains a apply_label_transformer. It is okay for
 416     // an Interleave to contain multiple apply_label_transformers, as they will
 417     // be applied to separate copies of the input. This may be relaxed in the
 418     // future.
 419     string apply_label_transformer = 19;
 420   }
 421 }
 422
 423 // Specifies a particular change to be made to the contents of a row.
 424 message Mutation {
 425   // A Mutation which sets the value of the specified cell.
 426   message SetCell {
 427     // The name of the family into which new data should be written.
 428     // Must match [-_.a-zA-Z0-9]+
 429     string family_name = 1;
 430
 431     // The qualifier of the column into which new data should be written.
 432     // Can be any byte string, including the empty string.
 433     bytes column_qualifier = 2;
 434
 435     // The timestamp of the cell into which new data should be written.
 436     // Use -1 for current Bigtable server time.
 437     // Otherwise, the client should set this value itself, noting that the
 438     // default value is a timestamp of zero if the field is left unspecified.
 439     // Values must match the "granularity" of the table (e.g. micros, millis).
 440     int64 timestamp_micros = 3;
 441
 442     // The value to be written into the specified cell.
 443     bytes value = 4;
 444   }
 445
 446   // A Mutation which deletes cells from the specified column, optionally
 447   // restricting the deletions to a given timestamp range.
 448   message DeleteFromColumn {
 449     // The name of the family from which cells should be deleted.
 450     // Must match [-_.a-zA-Z0-9]+
 451     string family_name = 1;
 452
 453     // The qualifier of the column from which cells should be deleted.
 454     // Can be any byte string, including the empty string.
 455     bytes column_qualifier = 2;
 456
 457     // The range of timestamps within which cells should be deleted.
 458     TimestampRange time_range = 3;
 459   }
 460
 461   // A Mutation which deletes all cells from the specified column family.
 462   message DeleteFromFamily {
 463     // The name of the family from which cells should be deleted.
 464     // Must match [-_.a-zA-Z0-9]+
 465     string family_name = 1;
 466   }
 467
 468   // A Mutation which deletes all cells from the containing row.
 469   message DeleteFromRow {}
 470
 471   // Which of the possible Mutation types to apply.
 472   oneof mutation {
 473     // Set a cell's value.
 474     SetCell set_cell = 1;
 475
 476     // Deletes cells from a column.
 477     DeleteFromColumn delete_from_column = 2;
 478
 479     // Deletes cells from a column family.
 480     DeleteFromFamily delete_from_family = 3;
 481
 482     // Deletes cells from the entire row.
 483     DeleteFromRow delete_from_row = 4;
 484   }
 485 }
 486
 487 // Specifies an atomic read/modify/write operation on the latest value of the
 488 // specified column.
 489 message ReadModifyWriteRule {
 490   // The name of the family to which the read/modify/write should be applied.
 491   // Must match [-_.a-zA-Z0-9]+
 492   string family_name = 1;
 493
 494   // The qualifier of the column to which the read/modify/write should be
 495   // applied.
 496   // Can be any byte string, including the empty string.
 497   bytes column_qualifier = 2;
 498
 499   // The rule used to determine the column's new latest value from its current
 500   // latest value.
 501   oneof rule {
 502     // Rule specifying that "append_value" be appended to the existing value.
 503     // If the targeted cell is unset, it will be treated as containing the
 504     // empty string.
 505     bytes append_value = 3;
 506
 507     // Rule specifying that "increment_amount" be added to the existing value.
 508     // If the targeted cell is unset, it will be treated as containing a zero.
 509     // Otherwise, the targeted cell must contain an 8-byte value (interpreted
 510     // as a 64-bit big-endian signed integer), or the entire request will fail.
 511     int64 increment_amount = 4;
 512   }
 513 }