1 // Copyright 2016 Google Inc.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
17 package google.genomics.v1;
19 import "google/api/annotations.proto";
21 option cc_enable_arenas = true;
22 option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
23 option java_multiple_files = true;
24 option java_outer_classname = "ReferencesProto";
25 option java_package = "com.google.genomics.v1";
27 service ReferenceServiceV1 {
28 // Searches for reference sets which match the given criteria.
30 // For the definitions of references and other genomics resources, see
31 // [Fundamentals of Google
32 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
35 // [GlobalAllianceApi.searchReferenceSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L71)
36 rpc SearchReferenceSets(SearchReferenceSetsRequest)
37 returns (SearchReferenceSetsResponse) {
38 option (google.api.http) = {
39 post: "/v1/referencesets/search"
44 // Gets a reference set.
46 // For the definitions of references and other genomics resources, see
47 // [Fundamentals of Google
48 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
51 // [GlobalAllianceApi.getReferenceSet](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L83).
52 rpc GetReferenceSet(GetReferenceSetRequest) returns (ReferenceSet) {
53 option (google.api.http) = {
54 get: "/v1/referencesets/{reference_set_id}"
58 // Searches for references which match the given criteria.
60 // For the definitions of references and other genomics resources, see
61 // [Fundamentals of Google
62 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
65 // [GlobalAllianceApi.searchReferences](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L146).
66 rpc SearchReferences(SearchReferencesRequest)
67 returns (SearchReferencesResponse) {
68 option (google.api.http) = {
69 post: "/v1/references/search"
76 // For the definitions of references and other genomics resources, see
77 // [Fundamentals of Google
78 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
81 // [GlobalAllianceApi.getReference](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L158).
82 rpc GetReference(GetReferenceRequest) returns (Reference) {
83 option (google.api.http) = {
84 get: "/v1/references/{reference_id}"
88 // Lists the bases in a reference, optionally restricted to a range.
90 // For the definitions of references and other genomics resources, see
91 // [Fundamentals of Google
92 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
95 // [GlobalAllianceApi.getReferenceBases](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L221).
96 rpc ListBases(ListBasesRequest) returns (ListBasesResponse) {
97 option (google.api.http) = {
98 get: "/v1/references/{reference_id}/bases"
103 // A reference is a canonical assembled DNA sequence, intended to act as a
104 // reference coordinate space for other genomic annotations. A single reference
105 // might represent the human chromosome 1 or mitochandrial DNA, for instance. A
106 // reference belongs to one or more reference sets.
108 // For more genomics resource definitions, see [Fundamentals of Google
109 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
111 // The server-generated reference ID, unique across all references.
114 // The length of this reference's sequence.
117 // MD5 of the upper-case sequence excluding all whitespace characters (this
118 // is equivalent to SQ:M5 in SAM). This value is represented in lower case
119 // hexadecimal format.
120 string md5checksum = 3;
122 // The name of this reference, for example `22`.
125 // The URI from which the sequence was obtained. Typically specifies a FASTA
127 string source_uri = 5;
129 // All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
130 // with a version number, for example `GCF_000001405.26`.
131 repeated string source_accessions = 6;
133 // ID from http://www.ncbi.nlm.nih.gov/taxonomy. For example, 9606 for human.
134 int32 ncbi_taxon_id = 7;
137 // A reference set is a set of references which typically comprise a reference
138 // assembly for a species, such as `GRCh38` which is representative
139 // of the human genome. A reference set defines a common coordinate space for
140 // comparing reference-aligned experimental data. A reference set contains 1 or
143 // For more genomics resource definitions, see [Fundamentals of Google
144 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
145 message ReferenceSet {
146 // The server-generated reference set ID, unique across all reference sets.
149 // The IDs of the reference objects that are part of this set.
150 // `Reference.md5checksum` must be unique within this set.
151 repeated string reference_ids = 2;
153 // Order-independent MD5 checksum which identifies this reference set. The
154 // checksum is computed by sorting all lower case hexidecimal string
155 // `reference.md5checksum` (for all reference in this set) in
156 // ascending lexicographic order, concatenating, and taking the MD5 of that
157 // value. The resulting value is represented in lower case hexadecimal format.
158 string md5checksum = 3;
160 // ID from http://www.ncbi.nlm.nih.gov/taxonomy (for example, 9606 for human)
161 // indicating the species which this reference set is intended to model. Note
162 // that contained references may specify a different `ncbiTaxonId`, as
163 // assemblies may contain reference sequences which do not belong to the
164 // modeled species, for example EBV in a human reference genome.
165 int32 ncbi_taxon_id = 4;
167 // Free text description of this reference set.
168 string description = 5;
170 // Public id of this reference set, such as `GRCh37`.
171 string assembly_id = 6;
173 // The URI from which the references were obtained.
174 string source_uri = 7;
176 // All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
177 // with a version number, for example `NC_000001.11`.
178 repeated string source_accessions = 8;
181 message SearchReferenceSetsRequest {
182 // If present, return reference sets for which the
183 // [md5checksum][google.genomics.v1.ReferenceSet.md5checksum] matches exactly.
184 repeated string md5checksums = 1;
186 // If present, return reference sets for which a prefix of any of
187 // [sourceAccessions][google.genomics.v1.ReferenceSet.source_accessions]
188 // match any of these strings. Accession numbers typically have a main number
189 // and a version, for example `NC_000001.11`.
190 repeated string accessions = 2;
192 // If present, return reference sets for which a substring of their
193 // `assemblyId` matches this string (case insensitive).
194 string assembly_id = 3;
196 // The continuation token, which is used to page through large result sets.
197 // To get the next page of results, set this parameter to the value of
198 // `nextPageToken` from the previous response.
199 string page_token = 4;
201 // The maximum number of results to return in a single page. If unspecified,
202 // defaults to 1024. The maximum value is 4096.
206 message SearchReferenceSetsResponse {
207 // The matching references sets.
208 repeated ReferenceSet reference_sets = 1;
210 // The continuation token, which is used to page through large result sets.
211 // Provide this value in a subsequent request to return the next page of
212 // results. This field will be empty if there aren't any additional results.
213 string next_page_token = 2;
216 message GetReferenceSetRequest {
217 // The ID of the reference set.
218 string reference_set_id = 1;
221 message SearchReferencesRequest {
222 // If present, return references for which the
223 // [md5checksum][google.genomics.v1.Reference.md5checksum] matches exactly.
224 repeated string md5checksums = 1;
226 // If present, return references for which a prefix of any of
227 // [sourceAccessions][google.genomics.v1.Reference.source_accessions] match
228 // any of these strings. Accession numbers typically have a main number and a
229 // version, for example `GCF_000001405.26`.
230 repeated string accessions = 2;
232 // If present, return only references which belong to this reference set.
233 string reference_set_id = 3;
235 // The continuation token, which is used to page through large result sets.
236 // To get the next page of results, set this parameter to the value of
237 // `nextPageToken` from the previous response.
238 string page_token = 4;
240 // The maximum number of results to return in a single page. If unspecified,
241 // defaults to 1024. The maximum value is 4096.
245 message SearchReferencesResponse {
246 // The matching references.
247 repeated Reference references = 1;
249 // The continuation token, which is used to page through large result sets.
250 // Provide this value in a subsequent request to return the next page of
251 // results. This field will be empty if there aren't any additional results.
252 string next_page_token = 2;
255 message GetReferenceRequest {
256 // The ID of the reference.
257 string reference_id = 1;
260 message ListBasesRequest {
261 // The ID of the reference.
262 string reference_id = 1;
264 // The start position (0-based) of this query. Defaults to 0.
267 // The end position (0-based, exclusive) of this query. Defaults to the length
268 // of this reference.
271 // The continuation token, which is used to page through large result sets.
272 // To get the next page of results, set this parameter to the value of
273 // `nextPageToken` from the previous response.
274 string page_token = 4;
276 // The maximum number of bases to return in a single page. If unspecified,
277 // defaults to 200Kbp (kilo base pairs). The maximum value is 10Mbp (mega base
282 message ListBasesResponse {
283 // The offset position (0-based) of the given `sequence` from the
284 // start of this `Reference`. This value will differ for each page
285 // in a paginated request.
288 // A substring of the bases that make up this reference.
291 // The continuation token, which is used to page through large result sets.
292 // Provide this value in a subsequent request to return the next page of
293 // results. This field will be empty if there aren't any additional results.
294 string next_page_token = 3;