Firebase Update

This commit is contained in:
Lukas Nowy
2018-12-22 23:30:39 +01:00
parent befb44764d
commit acffe619b3
11523 changed files with 1614327 additions and 930246 deletions

View File

@ -0,0 +1,672 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.genomics.v1;
import "google/api/annotations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/struct.proto";
import "google/protobuf/wrappers.proto";
import "google/rpc/status.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
option java_multiple_files = true;
option java_outer_classname = "AnnotationsProto";
option java_package = "com.google.genomics.v1";
// This service provides storage and positional retrieval of genomic
// reference annotations, including variant annotations.
service AnnotationServiceV1 {
// Creates a new annotation set. Caller must have WRITE permission for the
// associated dataset.
//
// The following fields are required:
//
// * [datasetId][google.genomics.v1.AnnotationSet.dataset_id]
// * [referenceSetId][google.genomics.v1.AnnotationSet.reference_set_id]
//
// All other fields may be optionally specified, unless documented as being
// server-generated (for example, the `id` field).
rpc CreateAnnotationSet(CreateAnnotationSetRequest) returns (AnnotationSet) {
option (google.api.http) = { post: "/v1/annotationsets" body: "annotation_set" };
}
// Gets an annotation set. Caller must have READ permission for
// the associated dataset.
rpc GetAnnotationSet(GetAnnotationSetRequest) returns (AnnotationSet) {
option (google.api.http) = { get: "/v1/annotationsets/{annotation_set_id}" };
}
// Updates an annotation set. The update must respect all mutability
// restrictions and other invariants described on the annotation set resource.
// Caller must have WRITE permission for the associated dataset.
rpc UpdateAnnotationSet(UpdateAnnotationSetRequest) returns (AnnotationSet) {
option (google.api.http) = { put: "/v1/annotationsets/{annotation_set_id}" body: "annotation_set" };
}
// Deletes an annotation set. Caller must have WRITE permission
// for the associated annotation set.
rpc DeleteAnnotationSet(DeleteAnnotationSetRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { delete: "/v1/annotationsets/{annotation_set_id}" };
}
// Searches for annotation sets that match the given criteria. Annotation sets
// are returned in an unspecified order. This order is consistent, such that
// two queries for the same content (regardless of page size) yield annotation
// sets in the same order across their respective streams of paginated
// responses. Caller must have READ permission for the queried datasets.
rpc SearchAnnotationSets(SearchAnnotationSetsRequest) returns (SearchAnnotationSetsResponse) {
option (google.api.http) = { post: "/v1/annotationsets/search" body: "*" };
}
// Creates a new annotation. Caller must have WRITE permission
// for the associated annotation set.
//
// The following fields are required:
//
// * [annotationSetId][google.genomics.v1.Annotation.annotation_set_id]
// * [referenceName][google.genomics.v1.Annotation.reference_name] or
// [referenceId][google.genomics.v1.Annotation.reference_id]
//
// ### Transcripts
//
// For annotations of type TRANSCRIPT, the following fields of
// [transcript][google.genomics.v1.Annotation.transcript] must be provided:
//
// * [exons.start][google.genomics.v1.Transcript.Exon.start]
// * [exons.end][google.genomics.v1.Transcript.Exon.end]
//
// All other fields may be optionally specified, unless documented as being
// server-generated (for example, the `id` field). The annotated
// range must be no longer than 100Mbp (mega base pairs). See the
// [Annotation resource][google.genomics.v1.Annotation]
// for additional restrictions on each field.
rpc CreateAnnotation(CreateAnnotationRequest) returns (Annotation) {
option (google.api.http) = { post: "/v1/annotations" body: "annotation" };
}
// Creates one or more new annotations atomically. All annotations must
// belong to the same annotation set. Caller must have WRITE
// permission for this annotation set. For optimal performance, batch
// positionally adjacent annotations together.
//
// If the request has a systemic issue, such as an attempt to write to
// an inaccessible annotation set, the entire RPC will fail accordingly. For
// lesser data issues, when possible an error will be isolated to the
// corresponding batch entry in the response; the remaining well formed
// annotations will be created normally.
//
// For details on the requirements for each individual annotation resource,
// see
// [CreateAnnotation][google.genomics.v1.AnnotationServiceV1.CreateAnnotation].
rpc BatchCreateAnnotations(BatchCreateAnnotationsRequest) returns (BatchCreateAnnotationsResponse) {
option (google.api.http) = { post: "/v1/annotations:batchCreate" body: "*" };
}
// Gets an annotation. Caller must have READ permission
// for the associated annotation set.
rpc GetAnnotation(GetAnnotationRequest) returns (Annotation) {
option (google.api.http) = { get: "/v1/annotations/{annotation_id}" };
}
// Updates an annotation. Caller must have
// WRITE permission for the associated dataset.
rpc UpdateAnnotation(UpdateAnnotationRequest) returns (Annotation) {
option (google.api.http) = { put: "/v1/annotations/{annotation_id}" body: "annotation" };
}
// Deletes an annotation. Caller must have WRITE permission for
// the associated annotation set.
rpc DeleteAnnotation(DeleteAnnotationRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { delete: "/v1/annotations/{annotation_id}" };
}
// Searches for annotations that match the given criteria. Results are
// ordered by genomic coordinate (by reference sequence, then position).
// Annotations with equivalent genomic coordinates are returned in an
// unspecified order. This order is consistent, such that two queries for the
// same content (regardless of page size) yield annotations in the same order
// across their respective streams of paginated responses. Caller must have
// READ permission for the queried annotation sets.
rpc SearchAnnotations(SearchAnnotationsRequest) returns (SearchAnnotationsResponse) {
option (google.api.http) = { post: "/v1/annotations/search" body: "*" };
}
}
// An annotation set is a logical grouping of annotations that share consistent
// type information and provenance. Examples of annotation sets include 'all
// genes from refseq', and 'all variant annotations from ClinVar'.
message AnnotationSet {
// The server-generated annotation set ID, unique across all annotation sets.
string id = 1;
// The dataset to which this annotation set belongs.
string dataset_id = 2;
// The ID of the reference set that defines the coordinate space for this
// set's annotations.
string reference_set_id = 3;
// The display name for this annotation set.
string name = 4;
// The source URI describing the file from which this annotation set was
// generated, if any.
string source_uri = 5;
// The type of annotations contained within this set.
AnnotationType type = 6;
// A map of additional read alignment information. This must be of the form
// map<string, string[]> (string key mapping to a list of string values).
map<string, google.protobuf.ListValue> info = 17;
}
// An annotation describes a region of reference genome. The value of an
// annotation may be one of several canonical types, supplemented by arbitrary
// info tags. An annotation is not inherently associated with a specific
// sample or individual (though a client could choose to use annotations in
// this way). Example canonical annotation types are `GENE` and
// `VARIANT`.
message Annotation {
// The server-generated annotation ID, unique across all annotations.
string id = 1;
// The annotation set to which this annotation belongs.
string annotation_set_id = 2;
// The display name of this annotation.
string name = 3;
// The ID of the Google Genomics reference associated with this range.
string reference_id = 4;
// The display name corresponding to the reference specified by
// `referenceId`, for example `chr1`, `1`, or `chrX`.
string reference_name = 5;
// The start position of the range on the reference, 0-based inclusive.
int64 start = 6;
// The end position of the range on the reference, 0-based exclusive.
int64 end = 7;
// Whether this range refers to the reverse strand, as opposed to the forward
// strand. Note that regardless of this field, the start/end position of the
// range always refer to the forward strand.
bool reverse_strand = 8;
// The data type for this annotation. Must match the containing annotation
// set's type.
AnnotationType type = 9;
oneof value {
// A variant annotation, which describes the effect of a variant on the
// genome, the coding sequence, and/or higher level consequences at the
// organism level e.g. pathogenicity. This field is only set for annotations
// of type `VARIANT`.
VariantAnnotation variant = 10;
// A transcript value represents the assertion that a particular region of
// the reference genome may be transcribed as RNA. An alternative splicing
// pattern would be represented as a separate transcript object. This field
// is only set for annotations of type `TRANSCRIPT`.
Transcript transcript = 11;
}
// A map of additional read alignment information. This must be of the form
// map<string, string[]> (string key mapping to a list of string values).
map<string, google.protobuf.ListValue> info = 12;
}
message VariantAnnotation {
message ClinicalCondition {
// A set of names for the condition.
repeated string names = 1;
// The set of external IDs for this condition.
repeated ExternalId external_ids = 2;
// The MedGen concept id associated with this gene.
// Search for these IDs at http://www.ncbi.nlm.nih.gov/medgen/
string concept_id = 3;
// The OMIM id for this condition.
// Search for these IDs at http://omim.org/
string omim_id = 4;
}
enum Type {
TYPE_UNSPECIFIED = 0;
// `TYPE_OTHER` should be used when no other Type will suffice.
// Further explanation of the variant type may be included in the
// [info][google.genomics.v1.Annotation.info] field.
TYPE_OTHER = 1;
// `INSERTION` indicates an insertion.
INSERTION = 2;
// `DELETION` indicates a deletion.
DELETION = 3;
// `SUBSTITUTION` indicates a block substitution of
// two or more nucleotides.
SUBSTITUTION = 4;
// `SNP` indicates a single nucleotide polymorphism.
SNP = 5;
// `STRUCTURAL` indicates a large structural variant,
// including chromosomal fusions, inversions, etc.
STRUCTURAL = 6;
// `CNV` indicates a variation in copy number.
CNV = 7;
}
enum Effect {
EFFECT_UNSPECIFIED = 0;
// `EFFECT_OTHER` should be used when no other Effect
// will suffice.
EFFECT_OTHER = 1;
// `FRAMESHIFT` indicates a mutation in which the insertion or
// deletion of nucleotides resulted in a frameshift change.
FRAMESHIFT = 2;
// `FRAME_PRESERVING_INDEL` indicates a mutation in which a
// multiple of three nucleotides has been inserted or deleted, resulting
// in no change to the reading frame of the coding sequence.
FRAME_PRESERVING_INDEL = 3;
// `SYNONYMOUS_SNP` indicates a single nucleotide polymorphism
// mutation that results in no amino acid change.
SYNONYMOUS_SNP = 4;
// `NONSYNONYMOUS_SNP` indicates a single nucleotide
// polymorphism mutation that results in an amino acid change.
NONSYNONYMOUS_SNP = 5;
// `STOP_GAIN` indicates a mutation that leads to the creation
// of a stop codon at the variant site. Frameshift mutations creating
// downstream stop codons do not count as `STOP_GAIN`.
STOP_GAIN = 6;
// `STOP_LOSS` indicates a mutation that eliminates a
// stop codon at the variant site.
STOP_LOSS = 7;
// `SPLICE_SITE_DISRUPTION` indicates that this variant is
// found in a splice site for the associated transcript, and alters the
// normal splicing pattern.
SPLICE_SITE_DISRUPTION = 8;
}
enum ClinicalSignificance {
CLINICAL_SIGNIFICANCE_UNSPECIFIED = 0;
// `OTHER` should be used when no other clinical significance
// value will suffice.
CLINICAL_SIGNIFICANCE_OTHER = 1;
UNCERTAIN = 2;
BENIGN = 3;
LIKELY_BENIGN = 4;
LIKELY_PATHOGENIC = 5;
PATHOGENIC = 6;
DRUG_RESPONSE = 7;
HISTOCOMPATIBILITY = 8;
CONFERS_SENSITIVITY = 9;
RISK_FACTOR = 10;
ASSOCIATION = 11;
PROTECTIVE = 12;
// `MULTIPLE_REPORTED` should be used when multiple clinical
// signficances are reported for a variant. The original clinical
// significance values may be provided in the `info` field.
MULTIPLE_REPORTED = 13;
}
// Type has been adapted from ClinVar's list of variant types.
Type type = 1;
// Effect of the variant on the coding sequence.
Effect effect = 2;
// The alternate allele for this variant. If multiple alternate alleles
// exist at this location, create a separate variant for each one, as they
// may represent distinct conditions.
string alternate_bases = 3;
// Google annotation ID of the gene affected by this variant. This should
// be provided when the variant is created.
string gene_id = 4;
// Google annotation IDs of the transcripts affected by this variant. These
// should be provided when the variant is created.
repeated string transcript_ids = 5;
// The set of conditions associated with this variant.
// A condition describes the way a variant influences human health.
repeated ClinicalCondition conditions = 6;
// Describes the clinical significance of a variant.
// It is adapted from the ClinVar controlled vocabulary for clinical
// significance described at:
// http://www.ncbi.nlm.nih.gov/clinvar/docs/clinsig/
ClinicalSignificance clinical_significance = 7;
}
// A transcript represents the assertion that a particular region of the
// reference genome may be transcribed as RNA.
message Transcript {
message Exon {
// The start position of the exon on this annotation's reference sequence,
// 0-based inclusive. Note that this is relative to the reference start, and
// **not** the containing annotation start.
int64 start = 1;
// The end position of the exon on this annotation's reference sequence,
// 0-based exclusive. Note that this is relative to the reference start, and
// *not* the containing annotation start.
int64 end = 2;
// The frame of this exon. Contains a value of 0, 1, or 2, which indicates
// the offset of the first coding base of the exon within the reading frame
// of the coding DNA sequence, if any. This field is dependent on the
// strandedness of this annotation (see
// [Annotation.reverse_strand][google.genomics.v1.Annotation.reverse_strand]).
// For forward stranded annotations, this offset is relative to the
// [exon.start][google.genomics.v1.Transcript.Exon.start]. For reverse
// strand annotations, this offset is relative to the
// [exon.end][google.genomics.v1.Transcript.Exon.end] `- 1`.
//
// Unset if this exon does not intersect the coding sequence. Upon creation
// of a transcript, the frame must be populated for all or none of the
// coding exons.
google.protobuf.Int32Value frame = 3;
}
message CodingSequence {
// The start of the coding sequence on this annotation's reference sequence,
// 0-based inclusive. Note that this position is relative to the reference
// start, and *not* the containing annotation start.
int64 start = 1;
// The end of the coding sequence on this annotation's reference sequence,
// 0-based exclusive. Note that this position is relative to the reference
// start, and *not* the containing annotation start.
int64 end = 2;
}
// The annotation ID of the gene from which this transcript is transcribed.
string gene_id = 1;
// The <a href="http://en.wikipedia.org/wiki/Exon">exons</a> that compose
// this transcript. This field should be unset for genomes where transcript
// splicing does not occur, for example prokaryotes.
//
// Introns are regions of the transcript that are not included in the
// spliced RNA product. Though not explicitly modeled here, intron ranges can
// be deduced; all regions of this transcript that are not exons are introns.
//
// Exonic sequences do not necessarily code for a translational product
// (amino acids). Only the regions of exons bounded by the
// [codingSequence][google.genomics.v1.Transcript.coding_sequence] correspond
// to coding DNA sequence.
//
// Exons are ordered by start position and may not overlap.
repeated Exon exons = 2;
// The range of the coding sequence for this transcript, if any. To determine
// the exact ranges of coding sequence, intersect this range with those of the
// [exons][google.genomics.v1.Transcript.exons], if any. If there are any
// [exons][google.genomics.v1.Transcript.exons], the
// [codingSequence][google.genomics.v1.Transcript.coding_sequence] must start
// and end within them.
//
// Note that in some cases, the reference genome will not exactly match the
// observed mRNA transcript e.g. due to variance in the source genome from
// reference. In these cases,
// [exon.frame][google.genomics.v1.Transcript.Exon.frame] will not necessarily
// match the expected reference reading frame and coding exon reference bases
// cannot necessarily be concatenated to produce the original transcript mRNA.
CodingSequence coding_sequence = 3;
}
message ExternalId {
// The name of the source of this data.
string source_name = 1;
// The id used by the source of this data.
string id = 2;
}
message CreateAnnotationSetRequest {
// The annotation set to create.
AnnotationSet annotation_set = 1;
}
message GetAnnotationSetRequest {
// The ID of the annotation set to be retrieved.
string annotation_set_id = 1;
}
message UpdateAnnotationSetRequest {
// The ID of the annotation set to be updated.
string annotation_set_id = 1;
// The new annotation set.
AnnotationSet annotation_set = 2;
// An optional mask specifying which fields to update. Mutable fields are
// [name][google.genomics.v1.AnnotationSet.name],
// [source_uri][google.genomics.v1.AnnotationSet.source_uri], and
// [info][google.genomics.v1.AnnotationSet.info]. If unspecified, all
// mutable fields will be updated.
google.protobuf.FieldMask update_mask = 3;
}
message DeleteAnnotationSetRequest {
// The ID of the annotation set to be deleted.
string annotation_set_id = 1;
}
message SearchAnnotationSetsRequest {
// Required. The dataset IDs to search within. Caller must have `READ` access
// to these datasets.
repeated string dataset_ids = 1;
// If specified, only annotation sets associated with the given reference set
// are returned.
string reference_set_id = 2;
// Only return annotations sets for which a substring of the name matches this
// string (case insensitive).
string name = 3;
// If specified, only annotation sets that have any of these types are
// returned.
repeated AnnotationType types = 4;
// The continuation token, which is used to page through large result sets.
// To get the next page of results, set this parameter to the value of
// `nextPageToken` from the previous response.
string page_token = 5;
// The maximum number of results to return in a single page. If unspecified,
// defaults to 128. The maximum value is 1024.
int32 page_size = 6;
}
message SearchAnnotationSetsResponse {
// The matching annotation sets.
repeated AnnotationSet annotation_sets = 1;
// The continuation token, which is used to page through large result sets.
// Provide this value in a subsequent request to return the next page of
// results. This field will be empty if there aren't any additional results.
string next_page_token = 2;
}
message CreateAnnotationRequest {
// The annotation to be created.
Annotation annotation = 1;
}
message BatchCreateAnnotationsRequest {
// The annotations to be created. At most 4096 can be specified in a single
// request.
repeated Annotation annotations = 1;
// A unique request ID which enables the server to detect duplicated requests.
// If provided, duplicated requests will result in the same response; if not
// provided, duplicated requests may result in duplicated data. For a given
// annotation set, callers should not reuse `request_id`s when writing
// different batches of annotations - behavior in this case is undefined.
// A common approach is to use a UUID. For batch jobs where worker crashes are
// a possibility, consider using some unique variant of a worker or run ID.
string request_id = 2;
}
message BatchCreateAnnotationsResponse {
message Entry {
// The creation status.
google.rpc.Status status = 1;
// The created annotation, if creation was successful.
Annotation annotation = 2;
}
// The resulting per-annotation entries, ordered consistently with the
// original request.
repeated Entry entries = 1;
}
message GetAnnotationRequest {
// The ID of the annotation to be retrieved.
string annotation_id = 1;
}
message UpdateAnnotationRequest {
// The ID of the annotation to be updated.
string annotation_id = 1;
// The new annotation.
Annotation annotation = 2;
// An optional mask specifying which fields to update. Mutable fields are
// [name][google.genomics.v1.Annotation.name],
// [variant][google.genomics.v1.Annotation.variant],
// [transcript][google.genomics.v1.Annotation.transcript], and
// [info][google.genomics.v1.Annotation.info]. If unspecified, all mutable
// fields will be updated.
google.protobuf.FieldMask update_mask = 3;
}
message DeleteAnnotationRequest {
// The ID of the annotation to be deleted.
string annotation_id = 1;
}
message SearchAnnotationsRequest {
// Required. The annotation sets to search within. The caller must have
// `READ` access to these annotation sets.
// All queried annotation sets must have the same type.
repeated string annotation_set_ids = 1;
// Required. `reference_id` or `reference_name` must be set.
oneof reference {
// The ID of the reference to query.
string reference_id = 2;
// The name of the reference to query, within the reference set associated
// with this query.
string reference_name = 3;
}
// The start position of the range on the reference, 0-based inclusive. If
// specified,
// [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or
// [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name]
// must be specified. Defaults to 0.
int64 start = 4;
// The end position of the range on the reference, 0-based exclusive. If
// [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or
// [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name]
// must be specified, Defaults to the length of the reference.
int64 end = 5;
// The continuation token, which is used to page through large result sets.
// To get the next page of results, set this parameter to the value of
// `nextPageToken` from the previous response.
string page_token = 6;
// The maximum number of results to return in a single page. If unspecified,
// defaults to 256. The maximum value is 2048.
int32 page_size = 7;
}
message SearchAnnotationsResponse {
// The matching annotations.
repeated Annotation annotations = 1;
// The continuation token, which is used to page through large result sets.
// Provide this value in a subsequent request to return the next page of
// results. This field will be empty if there aren't any additional results.
string next_page_token = 2;
}
// When an [Annotation][google.genomics.v1.Annotation] or
// [AnnotationSet][google.genomics.v1.AnnotationSet] is created, if `type` is
// not specified it will be set to `GENERIC`.
enum AnnotationType {
ANNOTATION_TYPE_UNSPECIFIED = 0;
// A `GENERIC` annotation type should be used when no other annotation
// type will suffice. This represents an untyped annotation of the reference
// genome.
GENERIC = 1;
// A `VARIANT` annotation type.
VARIANT = 2;
// A `GENE` annotation type represents the existence of a gene at the
// associated reference coordinates. The start coordinate is typically the
// gene's transcription start site and the end is typically the end of the
// gene's last exon.
GENE = 3;
// A `TRANSCRIPT` annotation type represents the assertion that a
// particular region of the reference genome may be transcribed as RNA.
TRANSCRIPT = 4;
}

View File

@ -0,0 +1,99 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.genomics.v1;
import "google/api/annotations.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
option java_multiple_files = true;
option java_outer_classname = "CigarProto";
option java_package = "com.google.genomics.v1";
// A single CIGAR operation.
message CigarUnit {
// Describes the different types of CIGAR alignment operations that exist.
// Used wherever CIGAR alignments are used.
enum Operation {
OPERATION_UNSPECIFIED = 0;
// An alignment match indicates that a sequence can be aligned to the
// reference without evidence of an INDEL. Unlike the
// `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators,
// the `ALIGNMENT_MATCH` operator does not indicate whether the
// reference and read sequences are an exact match. This operator is
// equivalent to SAM's `M`.
ALIGNMENT_MATCH = 1;
// The insert operator indicates that the read contains evidence of bases
// being inserted into the reference. This operator is equivalent to SAM's
// `I`.
INSERT = 2;
// The delete operator indicates that the read contains evidence of bases
// being deleted from the reference. This operator is equivalent to SAM's
// `D`.
DELETE = 3;
// The skip operator indicates that this read skips a long segment of the
// reference, but the bases have not been deleted. This operator is commonly
// used when working with RNA-seq data, where reads may skip long segments
// of the reference between exons. This operator is equivalent to SAM's
// `N`.
SKIP = 4;
// The soft clip operator indicates that bases at the start/end of a read
// have not been considered during alignment. This may occur if the majority
// of a read maps, except for low quality bases at the start/end of a read.
// This operator is equivalent to SAM's `S`. Bases that are soft
// clipped will still be stored in the read.
CLIP_SOFT = 5;
// The hard clip operator indicates that bases at the start/end of a read
// have been omitted from this alignment. This may occur if this linear
// alignment is part of a chimeric alignment, or if the read has been
// trimmed (for example, during error correction or to trim poly-A tails for
// RNA-seq). This operator is equivalent to SAM's `H`.
CLIP_HARD = 6;
// The pad operator indicates that there is padding in an alignment. This
// operator is equivalent to SAM's `P`.
PAD = 7;
// This operator indicates that this portion of the aligned sequence exactly
// matches the reference. This operator is equivalent to SAM's `=`.
SEQUENCE_MATCH = 8;
// This operator indicates that this portion of the aligned sequence is an
// alignment match to the reference, but a sequence mismatch. This can
// indicate a SNP or a read error. This operator is equivalent to SAM's
// `X`.
SEQUENCE_MISMATCH = 9;
}
Operation operation = 1;
// The number of genomic bases that the operation runs for. Required.
int64 operation_length = 2;
// `referenceSequence` is only used at mismatches
// (`SEQUENCE_MISMATCH`) and deletions (`DELETE`).
// Filling this field replaces SAM's MD tag. If the relevant information is
// not available, this field is unset.
string reference_sequence = 3;
}

View File

@ -0,0 +1,212 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.genomics.v1;
import "google/api/annotations.proto";
import "google/iam/v1/iam_policy.proto";
import "google/iam/v1/policy.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
option java_multiple_files = true;
option java_outer_classname = "DatasetsProto";
option java_package = "com.google.genomics.v1";
// This service manages datasets, which are collections of genomic data.
service DatasetServiceV1 {
// Lists datasets within a project.
//
// For the definitions of datasets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc ListDatasets(ListDatasetsRequest) returns (ListDatasetsResponse) {
option (google.api.http) = { get: "/v1/datasets" };
}
// Creates a new dataset.
//
// For the definitions of datasets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc CreateDataset(CreateDatasetRequest) returns (Dataset) {
option (google.api.http) = { post: "/v1/datasets" body: "dataset" };
}
// Gets a dataset by ID.
//
// For the definitions of datasets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc GetDataset(GetDatasetRequest) returns (Dataset) {
option (google.api.http) = { get: "/v1/datasets/{dataset_id}" };
}
// Updates a dataset.
//
// For the definitions of datasets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// This method supports patch semantics.
rpc UpdateDataset(UpdateDatasetRequest) returns (Dataset) {
option (google.api.http) = { patch: "/v1/datasets/{dataset_id}" body: "dataset" };
}
// Deletes a dataset and all of its contents (all read group sets,
// reference sets, variant sets, call sets, annotation sets, etc.)
// This is reversible (up to one week after the deletion) via
// the
// [datasets.undelete][google.genomics.v1.DatasetServiceV1.UndeleteDataset]
// operation.
//
// For the definitions of datasets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc DeleteDataset(DeleteDatasetRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { delete: "/v1/datasets/{dataset_id}" };
}
// Undeletes a dataset by restoring a dataset which was deleted via this API.
//
// For the definitions of datasets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// This operation is only possible for a week after the deletion occurred.
rpc UndeleteDataset(UndeleteDatasetRequest) returns (Dataset) {
option (google.api.http) = { post: "/v1/datasets/{dataset_id}:undelete" body: "*" };
}
// Sets the access control policy on the specified dataset. Replaces any
// existing policy.
//
// For the definitions of datasets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// See <a href="/iam/docs/managing-policies#setting_a_policy">Setting a
// Policy</a> for more information.
rpc SetIamPolicy(google.iam.v1.SetIamPolicyRequest) returns (google.iam.v1.Policy) {
option (google.api.http) = { post: "/v1/{resource=datasets/*}:setIamPolicy" body: "*" };
}
// Gets the access control policy for the dataset. This is empty if the
// policy or resource does not exist.
//
// See <a href="/iam/docs/managing-policies#getting_a_policy">Getting a
// Policy</a> for more information.
//
// For the definitions of datasets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc GetIamPolicy(google.iam.v1.GetIamPolicyRequest) returns (google.iam.v1.Policy) {
option (google.api.http) = { post: "/v1/{resource=datasets/*}:getIamPolicy" body: "*" };
}
// Returns permissions that a caller has on the specified resource.
// See <a href="/iam/docs/managing-policies#testing_permissions">Testing
// Permissions</a> for more information.
//
// For the definitions of datasets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc TestIamPermissions(google.iam.v1.TestIamPermissionsRequest) returns (google.iam.v1.TestIamPermissionsResponse) {
option (google.api.http) = { post: "/v1/{resource=datasets/*}:testIamPermissions" body: "*" };
}
}
// A Dataset is a collection of genomic data.
//
// For more genomics resource definitions, see [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message Dataset {
// The server-generated dataset ID, unique across all datasets.
string id = 1;
// The Google Cloud project ID that this dataset belongs to.
string project_id = 2;
// The dataset name.
string name = 3;
// The time this dataset was created, in seconds from the epoch.
google.protobuf.Timestamp create_time = 4;
}
// The dataset list request.
message ListDatasetsRequest {
// Required. The Google Cloud project ID to list datasets for.
string project_id = 1;
// The maximum number of results to return in a single page. If unspecified,
// defaults to 50. The maximum value is 1024.
int32 page_size = 2;
// The continuation token, which is used to page through large result sets.
// To get the next page of results, set this parameter to the value of
// `nextPageToken` from the previous response.
string page_token = 3;
}
// The dataset list response.
message ListDatasetsResponse {
// The list of matching Datasets.
repeated Dataset datasets = 1;
// The continuation token, which is used to page through large result sets.
// Provide this value in a subsequent request to return the next page of
// results. This field will be empty if there aren't any additional results.
string next_page_token = 2;
}
message CreateDatasetRequest {
// The dataset to be created. Must contain projectId and name.
Dataset dataset = 1;
}
message UpdateDatasetRequest {
// The ID of the dataset to be updated.
string dataset_id = 1;
// The new dataset data.
Dataset dataset = 2;
// An optional mask specifying which fields to update. At this time, the only
// mutable field is [name][google.genomics.v1.Dataset.name]. The only
// acceptable value is "name". If unspecified, all mutable fields will be
// updated.
google.protobuf.FieldMask update_mask = 3;
}
message DeleteDatasetRequest {
// The ID of the dataset to be deleted.
string dataset_id = 1;
}
message UndeleteDatasetRequest {
// The ID of the dataset to be undeleted.
string dataset_id = 1;
}
message GetDatasetRequest {
// The ID of the dataset.
string dataset_id = 1;
}

View File

@ -0,0 +1,77 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.genomics.v1;
import "google/api/annotations.proto";
import "google/protobuf/any.proto";
import "google/protobuf/timestamp.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
option java_multiple_files = true;
option java_outer_classname = "OperationsProto";
option java_package = "com.google.genomics.v1";
// Metadata describing an [Operation][google.longrunning.Operation].
message OperationMetadata {
// The Google Cloud Project in which the job is scoped.
string project_id = 1;
// The time at which the job was submitted to the Genomics service.
google.protobuf.Timestamp create_time = 2;
// The time at which the job began to run.
google.protobuf.Timestamp start_time = 3;
// The time at which the job stopped running.
google.protobuf.Timestamp end_time = 4;
// The original request that started the operation. Note that this will be in
// current version of the API. If the operation was started with v1beta2 API
// and a GetOperation is performed on v1 API, a v1 request will be returned.
google.protobuf.Any request = 5;
// Optional event messages that were generated during the job's execution.
// This also contains any warnings that were generated during import
// or export.
repeated OperationEvent events = 6;
// This field is deprecated. Use `labels` instead. Optionally provided by the
// caller when submitting the request that creates the operation.
string client_id = 7;
// Runtime metadata on this Operation.
google.protobuf.Any runtime_metadata = 8;
// Optionally provided by the caller when submitting the request that creates
// the operation.
map<string, string> labels = 9;
}
// An event that occurred during an [Operation][google.longrunning.Operation].
message OperationEvent {
// Optional time of when event started.
google.protobuf.Timestamp start_time = 1;
// Optional time of when event finished. An event can have a start time and no
// finish time. If an event has a finish time, there must be a start time.
google.protobuf.Timestamp end_time = 2;
// Required description of event.
string description = 3;
}

View File

@ -0,0 +1,42 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.genomics.v1;
import "google/api/annotations.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
option java_multiple_files = true;
option java_outer_classname = "PositionProto";
option java_package = "com.google.genomics.v1";
// An abstraction for referring to a genomic position, in relation to some
// already known reference. For now, represents a genomic position as a
// reference name, a base number on that reference (0-based), and a
// determination of forward or reverse strand.
message Position {
// The name of the reference in whatever reference set is being used.
string reference_name = 1;
// The 0-based offset from the start of the forward strand for that reference.
int64 position = 2;
// Whether this position is on the reverse strand, as opposed to the forward
// strand.
bool reverse_strand = 3;
}

View File

@ -0,0 +1,39 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.genomics.v1;
import "google/api/annotations.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
option java_multiple_files = true;
option java_outer_classname = "RangeProto";
option java_package = "com.google.genomics.v1";
// A 0-based half-open genomic coordinate range for search requests.
message Range {
// The reference sequence name, for example `chr1`,
// `1`, or `chrX`.
string reference_name = 1;
// The start position of the range on the reference, 0-based inclusive.
int64 start = 2;
// The end position of the range on the reference, 0-based exclusive.
int64 end = 3;
}

View File

@ -0,0 +1,221 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.genomics.v1;
import "google/api/annotations.proto";
import "google/genomics/v1/cigar.proto";
import "google/genomics/v1/position.proto";
import "google/protobuf/struct.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
option java_multiple_files = true;
option java_outer_classname = "ReadAlignmentProto";
option java_package = "com.google.genomics.v1";
// A linear alignment can be represented by one CIGAR string. Describes the
// mapped position and local alignment of the read to the reference.
message LinearAlignment {
// The position of this alignment.
Position position = 1;
// The mapping quality of this alignment. Represents how likely
// the read maps to this position as opposed to other locations.
//
// Specifically, this is -10 log10 Pr(mapping position is wrong), rounded to
// the nearest integer.
int32 mapping_quality = 2;
// Represents the local alignment of this sequence (alignment matches, indels,
// etc) against the reference.
repeated CigarUnit cigar = 3;
}
// A read alignment describes a linear alignment of a string of DNA to a
// [reference sequence][google.genomics.v1.Reference], in addition to metadata
// about the fragment (the molecule of DNA sequenced) and the read (the bases
// which were read by the sequencer). A read is equivalent to a line in a SAM
// file. A read belongs to exactly one read group and exactly one
// [read group set][google.genomics.v1.ReadGroupSet].
//
// For more genomics resource definitions, see [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// ### Reverse-stranded reads
//
// Mapped reads (reads having a non-null `alignment`) can be aligned to either
// the forward or the reverse strand of their associated reference. Strandedness
// of a mapped read is encoded by `alignment.position.reverseStrand`.
//
// If we consider the reference to be a forward-stranded coordinate space of
// `[0, reference.length)` with `0` as the left-most position and
// `reference.length` as the right-most position, reads are always aligned left
// to right. That is, `alignment.position.position` always refers to the
// left-most reference coordinate and `alignment.cigar` describes the alignment
// of this read to the reference from left to right. All per-base fields such as
// `alignedSequence` and `alignedQuality` share this same left-to-right
// orientation; this is true of reads which are aligned to either strand. For
// reverse-stranded reads, this means that `alignedSequence` is the reverse
// complement of the bases that were originally reported by the sequencing
// machine.
//
// ### Generating a reference-aligned sequence string
//
// When interacting with mapped reads, it's often useful to produce a string
// representing the local alignment of the read to reference. The following
// pseudocode demonstrates one way of doing this:
//
// out = ""
// offset = 0
// for c in read.alignment.cigar {
// switch c.operation {
// case "ALIGNMENT_MATCH", "SEQUENCE_MATCH", "SEQUENCE_MISMATCH":
// out += read.alignedSequence[offset:offset+c.operationLength]
// offset += c.operationLength
// break
// case "CLIP_SOFT", "INSERT":
// offset += c.operationLength
// break
// case "PAD":
// out += repeat("*", c.operationLength)
// break
// case "DELETE":
// out += repeat("-", c.operationLength)
// break
// case "SKIP":
// out += repeat(" ", c.operationLength)
// break
// case "CLIP_HARD":
// break
// }
// }
// return out
//
// ### Converting to SAM's CIGAR string
//
// The following pseudocode generates a SAM CIGAR string from the
// `cigar` field. Note that this is a lossy conversion
// (`cigar.referenceSequence` is lost).
//
// cigarMap = {
// "ALIGNMENT_MATCH": "M",
// "INSERT": "I",
// "DELETE": "D",
// "SKIP": "N",
// "CLIP_SOFT": "S",
// "CLIP_HARD": "H",
// "PAD": "P",
// "SEQUENCE_MATCH": "=",
// "SEQUENCE_MISMATCH": "X",
// }
// cigarStr = ""
// for c in read.alignment.cigar {
// cigarStr += c.operationLength + cigarMap[c.operation]
// }
// return cigarStr
message Read {
// The server-generated read ID, unique across all reads. This is different
// from the `fragmentName`.
string id = 1;
// The ID of the read group this read belongs to. A read belongs to exactly
// one read group. This is a server-generated ID which is distinct from SAM's
// RG tag (for that value, see
// [ReadGroup.name][google.genomics.v1.ReadGroup.name]).
string read_group_id = 2;
// The ID of the read group set this read belongs to. A read belongs to
// exactly one read group set.
string read_group_set_id = 3;
// The fragment name. Equivalent to QNAME (query template name) in SAM.
string fragment_name = 4;
// The orientation and the distance between reads from the fragment are
// consistent with the sequencing protocol (SAM flag 0x2).
bool proper_placement = 5;
// The fragment is a PCR or optical duplicate (SAM flag 0x400).
bool duplicate_fragment = 6;
// The observed length of the fragment, equivalent to TLEN in SAM.
int32 fragment_length = 7;
// The read number in sequencing. 0-based and less than numberReads. This
// field replaces SAM flag 0x40 and 0x80.
int32 read_number = 8;
// The number of reads in the fragment (extension to SAM flag 0x1).
int32 number_reads = 9;
// Whether this read did not pass filters, such as platform or vendor quality
// controls (SAM flag 0x200).
bool failed_vendor_quality_checks = 10;
// The linear alignment for this alignment record. This field is null for
// unmapped reads.
LinearAlignment alignment = 11;
// Whether this alignment is secondary. Equivalent to SAM flag 0x100.
// A secondary alignment represents an alternative to the primary alignment
// for this read. Aligners may return secondary alignments if a read can map
// ambiguously to multiple coordinates in the genome. By convention, each read
// has one and only one alignment where both `secondaryAlignment`
// and `supplementaryAlignment` are false.
bool secondary_alignment = 12;
// Whether this alignment is supplementary. Equivalent to SAM flag 0x800.
// Supplementary alignments are used in the representation of a chimeric
// alignment. In a chimeric alignment, a read is split into multiple
// linear alignments that map to different reference contigs. The first
// linear alignment in the read will be designated as the representative
// alignment; the remaining linear alignments will be designated as
// supplementary alignments. These alignments may have different mapping
// quality scores. In each linear alignment in a chimeric alignment, the read
// will be hard clipped. The `alignedSequence` and
// `alignedQuality` fields in the alignment record will only
// represent the bases for its respective linear alignment.
bool supplementary_alignment = 13;
// The bases of the read sequence contained in this alignment record,
// **without CIGAR operations applied** (equivalent to SEQ in SAM).
// `alignedSequence` and `alignedQuality` may be
// shorter than the full read sequence and quality. This will occur if the
// alignment is part of a chimeric alignment, or if the read was trimmed. When
// this occurs, the CIGAR for this read will begin/end with a hard clip
// operator that will indicate the length of the excised sequence.
string aligned_sequence = 14;
// The quality of the read sequence contained in this alignment record
// (equivalent to QUAL in SAM).
// `alignedSequence` and `alignedQuality` may be shorter than the full read
// sequence and quality. This will occur if the alignment is part of a
// chimeric alignment, or if the read was trimmed. When this occurs, the CIGAR
// for this read will begin/end with a hard clip operator that will indicate
// the length of the excised sequence.
repeated int32 aligned_quality = 15;
// The mapping of the primary alignment of the
// `(readNumber+1)%numberReads` read in the fragment. It replaces
// mate position and mate strand in SAM.
Position next_mate_position = 16;
// A map of additional read alignment information. This must be of the form
// map<string, string[]> (string key mapping to a list of string values).
map<string, google.protobuf.ListValue> info = 17;
}

View File

@ -0,0 +1,106 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.genomics.v1;
import "google/api/annotations.proto";
import "google/protobuf/struct.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
option java_multiple_files = true;
option java_outer_classname = "ReadGroupProto";
option java_package = "com.google.genomics.v1";
// A read group is all the data that's processed the same way by the sequencer.
message ReadGroup {
message Experiment {
// A client-supplied library identifier; a library is a collection of DNA
// fragments which have been prepared for sequencing from a sample. This
// field is important for quality control as error or bias can be introduced
// during sample preparation.
string library_id = 1;
// The platform unit used as part of this experiment, for example
// flowcell-barcode.lane for Illumina or slide for SOLiD. Corresponds to the
// @RG PU field in the SAM spec.
string platform_unit = 2;
// The sequencing center used as part of this experiment.
string sequencing_center = 3;
// The instrument model used as part of this experiment. This maps to
// sequencing technology in the SAM spec.
string instrument_model = 4;
}
message Program {
// The command line used to run this program.
string command_line = 1;
// The user specified locally unique ID of the program. Used along with
// `prevProgramId` to define an ordering between programs.
string id = 2;
// The display name of the program. This is typically the colloquial name of
// the tool used, for example 'bwa' or 'picard'.
string name = 3;
// The ID of the program run before this one.
string prev_program_id = 4;
// The version of the program run.
string version = 5;
}
// The server-generated read group ID, unique for all read groups.
// Note: This is different than the @RG ID field in the SAM spec. For that
// value, see [name][google.genomics.v1.ReadGroup.name].
string id = 1;
// The dataset to which this read group belongs.
string dataset_id = 2;
// The read group name. This corresponds to the @RG ID field in the SAM spec.
string name = 3;
// A free-form text description of this read group.
string description = 4;
// A client-supplied sample identifier for the reads in this read group.
string sample_id = 5;
// The experiment used to generate this read group.
Experiment experiment = 6;
// The predicted insert size of this read group. The insert size is the length
// the sequenced DNA fragment from end-to-end, not including the adapters.
int32 predicted_insert_size = 7;
// The programs used to generate this read group. Programs are always
// identical for all read groups within a read group set. For this reason,
// only the first read group in a returned set will have this field
// populated.
repeated Program programs = 10;
// The reference set the reads in this read group are aligned to.
string reference_set_id = 11;
// A map of additional read group information. This must be of the form
// map<string, string[]> (string key mapping to a list of string values).
map<string, google.protobuf.ListValue> info = 12;
}

View File

@ -0,0 +1,64 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.genomics.v1;
import "google/api/annotations.proto";
import "google/genomics/v1/readgroup.proto";
import "google/protobuf/struct.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
option java_multiple_files = true;
option java_outer_classname = "ReadGroupSetProto";
option java_package = "com.google.genomics.v1";
// A read group set is a logical collection of read groups, which are
// collections of reads produced by a sequencer. A read group set typically
// models reads corresponding to one sample, sequenced one way, and aligned one
// way.
//
// * A read group set belongs to one dataset.
// * A read group belongs to one read group set.
// * A read belongs to one read group.
//
// For more genomics resource definitions, see [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message ReadGroupSet {
// The server-generated read group set ID, unique for all read group sets.
string id = 1;
// The dataset to which this read group set belongs.
string dataset_id = 2;
// The reference set to which the reads in this read group set are aligned.
string reference_set_id = 3;
// The read group set name. By default this will be initialized to the sample
// name of the sequenced data contained in this set.
string name = 4;
// The filename of the original source file for this read group set, if any.
string filename = 5;
// The read groups in this set. There are typically 1-10 read groups in a read
// group set.
repeated ReadGroup read_groups = 6;
// A map of additional read group set information.
map<string, google.protobuf.ListValue> info = 7;
}

View File

@ -0,0 +1,468 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.genomics.v1;
import "google/api/annotations.proto";
import "google/genomics/v1/range.proto";
import "google/genomics/v1/readalignment.proto";
import "google/genomics/v1/readgroupset.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
option java_multiple_files = true;
option java_outer_classname = "ReadsProto";
option java_package = "com.google.genomics.v1";
service StreamingReadService {
// Returns a stream of all the reads matching the search request, ordered
// by reference name, position, and ID.
rpc StreamReads(StreamReadsRequest) returns (stream StreamReadsResponse) {
option (google.api.http) = { post: "/v1/reads:stream" body: "*" };
}
}
// The Readstore. A data store for DNA sequencing Reads.
service ReadServiceV1 {
// Creates read group sets by asynchronously importing the provided
// information.
//
// For the definitions of read group sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// The caller must have WRITE permissions to the dataset.
//
// ## Notes on [BAM](https://samtools.github.io/hts-specs/SAMv1.pdf) import
//
// - Tags will be converted to strings - tag types are not preserved
// - Comments (`@CO`) in the input file header will not be preserved
// - Original header order of references (`@SQ`) will not be preserved
// - Any reverse stranded unmapped reads will be reverse complemented, and
// their qualities (also the "BQ" and "OQ" tags, if any) will be reversed
// - Unmapped reads will be stripped of positional information (reference name
// and position)
rpc ImportReadGroupSets(ImportReadGroupSetsRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { post: "/v1/readgroupsets:import" body: "*" };
}
// Exports a read group set to a BAM file in Google Cloud Storage.
//
// For the definitions of read group sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// Note that currently there may be some differences between exported BAM
// files and the original BAM file at the time of import. See
// [ImportReadGroupSets][google.genomics.v1.ReadServiceV1.ImportReadGroupSets]
// for caveats.
rpc ExportReadGroupSet(ExportReadGroupSetRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { post: "/v1/readgroupsets/{read_group_set_id}:export" body: "*" };
}
// Searches for read group sets matching the criteria.
//
// For the definitions of read group sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// Implements
// [GlobalAllianceApi.searchReadGroupSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L135).
rpc SearchReadGroupSets(SearchReadGroupSetsRequest) returns (SearchReadGroupSetsResponse) {
option (google.api.http) = { post: "/v1/readgroupsets/search" body: "*" };
}
// Updates a read group set.
//
// For the definitions of read group sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// This method supports patch semantics.
rpc UpdateReadGroupSet(UpdateReadGroupSetRequest) returns (ReadGroupSet) {
option (google.api.http) = { patch: "/v1/readgroupsets/{read_group_set_id}" body: "read_group_set" };
}
// Deletes a read group set.
//
// For the definitions of read group sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc DeleteReadGroupSet(DeleteReadGroupSetRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { delete: "/v1/readgroupsets/{read_group_set_id}" };
}
// Gets a read group set by ID.
//
// For the definitions of read group sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc GetReadGroupSet(GetReadGroupSetRequest) returns (ReadGroupSet) {
option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}" };
}
// Lists fixed width coverage buckets for a read group set, each of which
// correspond to a range of a reference sequence. Each bucket summarizes
// coverage information across its corresponding genomic range.
//
// For the definitions of read group sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// Coverage is defined as the number of reads which are aligned to a given
// base in the reference sequence. Coverage buckets are available at several
// precomputed bucket widths, enabling retrieval of various coverage 'zoom
// levels'. The caller must have READ permissions for the target read group
// set.
rpc ListCoverageBuckets(ListCoverageBucketsRequest) returns (ListCoverageBucketsResponse) {
option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}/coveragebuckets" };
}
// Gets a list of reads for one or more read group sets.
//
// For the definitions of read group sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// Reads search operates over a genomic coordinate space of reference sequence
// & position defined over the reference sequences to which the requested
// read group sets are aligned.
//
// If a target positional range is specified, search returns all reads whose
// alignment to the reference genome overlap the range. A query which
// specifies only read group set IDs yields all reads in those read group
// sets, including unmapped reads.
//
// All reads returned (including reads on subsequent pages) are ordered by
// genomic coordinate (by reference sequence, then position). Reads with
// equivalent genomic coordinates are returned in an unspecified order. This
// order is consistent, such that two queries for the same content (regardless
// of page size) yield reads in the same order across their respective streams
// of paginated responses.
//
// Implements
// [GlobalAllianceApi.searchReads](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L85).
rpc SearchReads(SearchReadsRequest) returns (SearchReadsResponse) {
option (google.api.http) = { post: "/v1/reads/search" body: "*" };
}
}
// The read group set search request.
message SearchReadGroupSetsRequest {
// Restricts this query to read group sets within the given datasets. At least
// one ID must be provided.
repeated string dataset_ids = 1;
// Only return read group sets for which a substring of the name matches this
// string.
string name = 3;
// The continuation token, which is used to page through large result sets.
// To get the next page of results, set this parameter to the value of
// `nextPageToken` from the previous response.
string page_token = 2;
// The maximum number of results to return in a single page. If unspecified,
// defaults to 256. The maximum value is 1024.
int32 page_size = 4;
}
// The read group set search response.
message SearchReadGroupSetsResponse {
// The list of matching read group sets.
repeated ReadGroupSet read_group_sets = 1;
// The continuation token, which is used to page through large result sets.
// Provide this value in a subsequent request to return the next page of
// results. This field will be empty if there aren't any additional results.
string next_page_token = 2;
}
// The read group set import request.
message ImportReadGroupSetsRequest {
enum PartitionStrategy {
PARTITION_STRATEGY_UNSPECIFIED = 0;
// In most cases, this strategy yields one read group set per file. This is
// the default behavior.
//
// Allocate one read group set per file per sample. For BAM files, read
// groups are considered to share a sample if they have identical sample
// names. Furthermore, all reads for each file which do not belong to a read
// group, if any, will be grouped into a single read group set per-file.
PER_FILE_PER_SAMPLE = 1;
// Includes all read groups in all imported files into a single read group
// set. Requires that the headers for all imported files are equivalent. All
// reads which do not belong to a read group, if any, will be grouped into a
// separate read group set.
MERGE_ALL = 2;
}
// Required. The ID of the dataset these read group sets will belong to. The
// caller must have WRITE permissions to this dataset.
string dataset_id = 1;
// The reference set to which the imported read group sets are aligned to, if
// any. The reference names of this reference set must be a superset of those
// found in the imported file headers. If no reference set id is provided, a
// best effort is made to associate with a matching reference set.
string reference_set_id = 4;
// A list of URIs pointing at [BAM
// files](https://samtools.github.io/hts-specs/SAMv1.pdf)
// in Google Cloud Storage.
// Those URIs can include wildcards (*), but do not add or remove
// matching files before import has completed.
//
// Note that Google Cloud Storage object listing is only eventually
// consistent: files added may be not be immediately visible to
// everyone. Thus, if using a wildcard it is preferable not to start
// the import immediately after the files are created.
repeated string source_uris = 2;
// The partition strategy describes how read groups are partitioned into read
// group sets.
PartitionStrategy partition_strategy = 5;
}
// The read group set import response.
message ImportReadGroupSetsResponse {
// IDs of the read group sets that were created.
repeated string read_group_set_ids = 1;
}
// The read group set export request.
message ExportReadGroupSetRequest {
// Required. The Google Cloud project ID that owns this
// export. The caller must have WRITE access to this project.
string project_id = 1;
// Required. A Google Cloud Storage URI for the exported BAM file.
// The currently authenticated user must have write access to the new file.
// An error will be returned if the URI already contains data.
string export_uri = 2;
// Required. The ID of the read group set to export. The caller must have
// READ access to this read group set.
string read_group_set_id = 3;
// The reference names to export. If this is not specified, all reference
// sequences, including unmapped reads, are exported.
// Use `*` to export only unmapped reads.
repeated string reference_names = 4;
}
message UpdateReadGroupSetRequest {
// The ID of the read group set to be updated. The caller must have WRITE
// permissions to the dataset associated with this read group set.
string read_group_set_id = 1;
// The new read group set data. See `updateMask` for details on mutability of
// fields.
ReadGroupSet read_group_set = 2;
// An optional mask specifying which fields to update. Supported fields:
//
// * [name][google.genomics.v1.ReadGroupSet.name].
// * [referenceSetId][google.genomics.v1.ReadGroupSet.reference_set_id].
//
// Leaving `updateMask` unset is equivalent to specifying all mutable
// fields.
google.protobuf.FieldMask update_mask = 3;
}
message DeleteReadGroupSetRequest {
// The ID of the read group set to be deleted. The caller must have WRITE
// permissions to the dataset associated with this read group set.
string read_group_set_id = 1;
}
message GetReadGroupSetRequest {
// The ID of the read group set.
string read_group_set_id = 1;
}
message ListCoverageBucketsRequest {
// Required. The ID of the read group set over which coverage is requested.
string read_group_set_id = 1;
// The name of the reference to query, within the reference set associated
// with this query. Optional.
string reference_name = 3;
// The start position of the range on the reference, 0-based inclusive. If
// specified, `referenceName` must also be specified. Defaults to 0.
int64 start = 4;
// The end position of the range on the reference, 0-based exclusive. If
// specified, `referenceName` must also be specified. If unset or 0, defaults
// to the length of the reference.
int64 end = 5;
// The desired width of each reported coverage bucket in base pairs. This
// will be rounded down to the nearest precomputed bucket width; the value
// of which is returned as `bucketWidth` in the response. Defaults
// to infinity (each bucket spans an entire reference sequence) or the length
// of the target range, if specified. The smallest precomputed
// `bucketWidth` is currently 2048 base pairs; this is subject to
// change.
int64 target_bucket_width = 6;
// The continuation token, which is used to page through large result sets.
// To get the next page of results, set this parameter to the value of
// `nextPageToken` from the previous response.
string page_token = 7;
// The maximum number of results to return in a single page. If unspecified,
// defaults to 1024. The maximum value is 2048.
int32 page_size = 8;
}
// A bucket over which read coverage has been precomputed. A bucket corresponds
// to a specific range of the reference sequence.
message CoverageBucket {
// The genomic coordinate range spanned by this bucket.
Range range = 1;
// The average number of reads which are aligned to each individual
// reference base in this bucket.
float mean_coverage = 2;
}
message ListCoverageBucketsResponse {
// The length of each coverage bucket in base pairs. Note that buckets at the
// end of a reference sequence may be shorter. This value is omitted if the
// bucket width is infinity (the default behaviour, with no range or
// `targetBucketWidth`).
int64 bucket_width = 1;
// The coverage buckets. The list of buckets is sparse; a bucket with 0
// overlapping reads is not returned. A bucket never crosses more than one
// reference sequence. Each bucket has width `bucketWidth`, unless
// its end is the end of the reference sequence.
repeated CoverageBucket coverage_buckets = 2;
// The continuation token, which is used to page through large result sets.
// Provide this value in a subsequent request to return the next page of
// results. This field will be empty if there aren't any additional results.
string next_page_token = 3;
}
// The read search request.
message SearchReadsRequest {
// The IDs of the read groups sets within which to search for reads. All
// specified read group sets must be aligned against a common set of reference
// sequences; this defines the genomic coordinates for the query. Must specify
// one of `readGroupSetIds` or `readGroupIds`.
repeated string read_group_set_ids = 1;
// The IDs of the read groups within which to search for reads. All specified
// read groups must belong to the same read group sets. Must specify one of
// `readGroupSetIds` or `readGroupIds`.
repeated string read_group_ids = 5;
// The reference sequence name, for example `chr1`, `1`, or `chrX`. If set to
// `*`, only unmapped reads are returned. If unspecified, all reads (mapped
// and unmapped) are returned.
string reference_name = 7;
// The start position of the range on the reference, 0-based inclusive. If
// specified, `referenceName` must also be specified.
int64 start = 8;
// The end position of the range on the reference, 0-based exclusive. If
// specified, `referenceName` must also be specified.
int64 end = 9;
// The continuation token, which is used to page through large result sets.
// To get the next page of results, set this parameter to the value of
// `nextPageToken` from the previous response.
string page_token = 3;
// The maximum number of results to return in a single page. If unspecified,
// defaults to 256. The maximum value is 2048.
int32 page_size = 4;
}
// The read search response.
message SearchReadsResponse {
// The list of matching alignments sorted by mapped genomic coordinate,
// if any, ascending in position within the same reference. Unmapped reads,
// which have no position, are returned contiguously and are sorted in
// ascending lexicographic order by fragment name.
repeated Read alignments = 1;
// The continuation token, which is used to page through large result sets.
// Provide this value in a subsequent request to return the next page of
// results. This field will be empty if there aren't any additional results.
string next_page_token = 2;
}
// The stream reads request.
message StreamReadsRequest {
// The Google Cloud project ID which will be billed
// for this access. The caller must have WRITE access to this project.
// Required.
string project_id = 1;
// The ID of the read group set from which to stream reads.
string read_group_set_id = 2;
// The reference sequence name, for example `chr1`,
// `1`, or `chrX`. If set to *, only unmapped reads are
// returned.
string reference_name = 3;
// The start position of the range on the reference, 0-based inclusive. If
// specified, `referenceName` must also be specified.
int64 start = 4;
// The end position of the range on the reference, 0-based exclusive. If
// specified, `referenceName` must also be specified.
int64 end = 5;
// Restricts results to a shard containing approximately `1/totalShards`
// of the normal response payload for this query. Results from a sharded
// request are disjoint from those returned by all queries which differ only
// in their shard parameter. A shard may yield 0 results; this is especially
// likely for large values of `totalShards`.
//
// Valid values are `[0, totalShards)`.
int32 shard = 6;
// Specifying `totalShards` causes a disjoint subset of the normal response
// payload to be returned for each query with a unique `shard` parameter
// specified. A best effort is made to yield equally sized shards. Sharding
// can be used to distribute processing amongst workers, where each worker is
// assigned a unique `shard` number and all workers specify the same
// `totalShards` number. The union of reads returned for all sharded queries
// `[0, totalShards)` is equal to those returned by a single unsharded query.
//
// Queries for different values of `totalShards` with common divisors will
// share shard boundaries. For example, streaming `shard` 2 of 5
// `totalShards` yields the same results as streaming `shard`s 4 and 5 of 10
// `totalShards`. This property can be leveraged for adaptive retries.
int32 total_shards = 7;
}
message StreamReadsResponse {
repeated Read alignments = 1;
}

View File

@ -0,0 +1,282 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.genomics.v1;
import "google/api/annotations.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
option java_multiple_files = true;
option java_outer_classname = "ReferencesProto";
option java_package = "com.google.genomics.v1";
service ReferenceServiceV1 {
// Searches for reference sets which match the given criteria.
//
// For the definitions of references and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// Implements
// [GlobalAllianceApi.searchReferenceSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L71)
rpc SearchReferenceSets(SearchReferenceSetsRequest) returns (SearchReferenceSetsResponse) {
option (google.api.http) = { post: "/v1/referencesets/search" body: "*" };
}
// Gets a reference set.
//
// For the definitions of references and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// Implements
// [GlobalAllianceApi.getReferenceSet](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L83).
rpc GetReferenceSet(GetReferenceSetRequest) returns (ReferenceSet) {
option (google.api.http) = { get: "/v1/referencesets/{reference_set_id}" };
}
// Searches for references which match the given criteria.
//
// For the definitions of references and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// Implements
// [GlobalAllianceApi.searchReferences](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L146).
rpc SearchReferences(SearchReferencesRequest) returns (SearchReferencesResponse) {
option (google.api.http) = { post: "/v1/references/search" body: "*" };
}
// Gets a reference.
//
// For the definitions of references and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// Implements
// [GlobalAllianceApi.getReference](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L158).
rpc GetReference(GetReferenceRequest) returns (Reference) {
option (google.api.http) = { get: "/v1/references/{reference_id}" };
}
// Lists the bases in a reference, optionally restricted to a range.
//
// For the definitions of references and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// Implements
// [GlobalAllianceApi.getReferenceBases](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L221).
rpc ListBases(ListBasesRequest) returns (ListBasesResponse) {
option (google.api.http) = { get: "/v1/references/{reference_id}/bases" };
}
}
// A reference is a canonical assembled DNA sequence, intended to act as a
// reference coordinate space for other genomic annotations. A single reference
// might represent the human chromosome 1 or mitochandrial DNA, for instance. A
// reference belongs to one or more reference sets.
//
// For more genomics resource definitions, see [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message Reference {
// The server-generated reference ID, unique across all references.
string id = 1;
// The length of this reference's sequence.
int64 length = 2;
// MD5 of the upper-case sequence excluding all whitespace characters (this
// is equivalent to SQ:M5 in SAM). This value is represented in lower case
// hexadecimal format.
string md5checksum = 3;
// The name of this reference, for example `22`.
string name = 4;
// The URI from which the sequence was obtained. Typically specifies a FASTA
// format file.
string source_uri = 5;
// All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
// with a version number, for example `GCF_000001405.26`.
repeated string source_accessions = 6;
// ID from http://www.ncbi.nlm.nih.gov/taxonomy. For example, 9606 for human.
int32 ncbi_taxon_id = 7;
}
// A reference set is a set of references which typically comprise a reference
// assembly for a species, such as `GRCh38` which is representative
// of the human genome. A reference set defines a common coordinate space for
// comparing reference-aligned experimental data. A reference set contains 1 or
// more references.
//
// For more genomics resource definitions, see [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message ReferenceSet {
// The server-generated reference set ID, unique across all reference sets.
string id = 1;
// The IDs of the reference objects that are part of this set.
// `Reference.md5checksum` must be unique within this set.
repeated string reference_ids = 2;
// Order-independent MD5 checksum which identifies this reference set. The
// checksum is computed by sorting all lower case hexidecimal string
// `reference.md5checksum` (for all reference in this set) in
// ascending lexicographic order, concatenating, and taking the MD5 of that
// value. The resulting value is represented in lower case hexadecimal format.
string md5checksum = 3;
// ID from http://www.ncbi.nlm.nih.gov/taxonomy (for example, 9606 for human)
// indicating the species which this reference set is intended to model. Note
// that contained references may specify a different `ncbiTaxonId`, as
// assemblies may contain reference sequences which do not belong to the
// modeled species, for example EBV in a human reference genome.
int32 ncbi_taxon_id = 4;
// Free text description of this reference set.
string description = 5;
// Public id of this reference set, such as `GRCh37`.
string assembly_id = 6;
// The URI from which the references were obtained.
string source_uri = 7;
// All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
// with a version number, for example `NC_000001.11`.
repeated string source_accessions = 8;
}
message SearchReferenceSetsRequest {
// If present, return reference sets for which the
// [md5checksum][google.genomics.v1.ReferenceSet.md5checksum] matches exactly.
repeated string md5checksums = 1;
// If present, return reference sets for which a prefix of any of
// [sourceAccessions][google.genomics.v1.ReferenceSet.source_accessions]
// match any of these strings. Accession numbers typically have a main number
// and a version, for example `NC_000001.11`.
repeated string accessions = 2;
// If present, return reference sets for which a substring of their
// `assemblyId` matches this string (case insensitive).
string assembly_id = 3;
// The continuation token, which is used to page through large result sets.
// To get the next page of results, set this parameter to the value of
// `nextPageToken` from the previous response.
string page_token = 4;
// The maximum number of results to return in a single page. If unspecified,
// defaults to 1024. The maximum value is 4096.
int32 page_size = 5;
}
message SearchReferenceSetsResponse {
// The matching references sets.
repeated ReferenceSet reference_sets = 1;
// The continuation token, which is used to page through large result sets.
// Provide this value in a subsequent request to return the next page of
// results. This field will be empty if there aren't any additional results.
string next_page_token = 2;
}
message GetReferenceSetRequest {
// The ID of the reference set.
string reference_set_id = 1;
}
message SearchReferencesRequest {
// If present, return references for which the
// [md5checksum][google.genomics.v1.Reference.md5checksum] matches exactly.
repeated string md5checksums = 1;
// If present, return references for which a prefix of any of
// [sourceAccessions][google.genomics.v1.Reference.source_accessions] match
// any of these strings. Accession numbers typically have a main number and a
// version, for example `GCF_000001405.26`.
repeated string accessions = 2;
// If present, return only references which belong to this reference set.
string reference_set_id = 3;
// The continuation token, which is used to page through large result sets.
// To get the next page of results, set this parameter to the value of
// `nextPageToken` from the previous response.
string page_token = 4;
// The maximum number of results to return in a single page. If unspecified,
// defaults to 1024. The maximum value is 4096.
int32 page_size = 5;
}
message SearchReferencesResponse {
// The matching references.
repeated Reference references = 1;
// The continuation token, which is used to page through large result sets.
// Provide this value in a subsequent request to return the next page of
// results. This field will be empty if there aren't any additional results.
string next_page_token = 2;
}
message GetReferenceRequest {
// The ID of the reference.
string reference_id = 1;
}
message ListBasesRequest {
// The ID of the reference.
string reference_id = 1;
// The start position (0-based) of this query. Defaults to 0.
int64 start = 2;
// The end position (0-based, exclusive) of this query. Defaults to the length
// of this reference.
int64 end = 3;
// The continuation token, which is used to page through large result sets.
// To get the next page of results, set this parameter to the value of
// `nextPageToken` from the previous response.
string page_token = 4;
// The maximum number of bases to return in a single page. If unspecified,
// defaults to 200Kbp (kilo base pairs). The maximum value is 10Mbp (mega base
// pairs).
int32 page_size = 5;
}
message ListBasesResponse {
// The offset position (0-based) of the given `sequence` from the
// start of this `Reference`. This value will differ for each page
// in a paginated request.
int64 offset = 1;
// A substring of the bases that make up this reference.
string sequence = 2;
// The continuation token, which is used to page through large result sets.
// Provide this value in a subsequent request to return the next page of
// results. This field will be empty if there aren't any additional results.
string next_page_token = 3;
}

View File

@ -0,0 +1,903 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.genomics.v1;
import "google/api/annotations.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/struct.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
option java_multiple_files = true;
option java_outer_classname = "VariantsProto";
option java_package = "com.google.genomics.v1";
service StreamingVariantService {
// Returns a stream of all the variants matching the search request, ordered
// by reference name, position, and ID.
rpc StreamVariants(StreamVariantsRequest) returns (stream StreamVariantsResponse) {
option (google.api.http) = { post: "/v1/variants:stream" body: "*" };
}
}
service VariantServiceV1 {
// Creates variant data by asynchronously importing the provided information.
//
// For the definitions of variant sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// The variants for import will be merged with any existing variant that
// matches its reference sequence, start, end, reference bases, and
// alternative bases. If no such variant exists, a new one will be created.
//
// When variants are merged, the call information from the new variant
// is added to the existing variant, and Variant info fields are merged
// as specified in
// [infoMergeConfig][google.genomics.v1.ImportVariantsRequest.info_merge_config].
// As a special case, for single-sample VCF files, QUAL and FILTER fields will
// be moved to the call level; these are sometimes interpreted in a
// call-specific context.
// Imported VCF headers are appended to the metadata already in a variant set.
rpc ImportVariants(ImportVariantsRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { post: "/v1/variants:import" body: "*" };
}
// Creates a new variant set.
//
// For the definitions of variant sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// The provided variant set must have a valid `datasetId` set - all other
// fields are optional. Note that the `id` field will be ignored, as this is
// assigned by the server.
rpc CreateVariantSet(CreateVariantSetRequest) returns (VariantSet) {
option (google.api.http) = { post: "/v1/variantsets" body: "variant_set" };
}
// Exports variant set data to an external destination.
//
// For the definitions of variant sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc ExportVariantSet(ExportVariantSetRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { post: "/v1/variantsets/{variant_set_id}:export" body: "*" };
}
// Gets a variant set by ID.
//
// For the definitions of variant sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc GetVariantSet(GetVariantSetRequest) returns (VariantSet) {
option (google.api.http) = { get: "/v1/variantsets/{variant_set_id}" };
}
// Returns a list of all variant sets matching search criteria.
//
// For the definitions of variant sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// Implements
// [GlobalAllianceApi.searchVariantSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L49).
rpc SearchVariantSets(SearchVariantSetsRequest) returns (SearchVariantSetsResponse) {
option (google.api.http) = { post: "/v1/variantsets/search" body: "*" };
}
// Deletes a variant set including all variants, call sets, and calls within.
// This is not reversible.
//
// For the definitions of variant sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc DeleteVariantSet(DeleteVariantSetRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { delete: "/v1/variantsets/{variant_set_id}" };
}
// Updates a variant set using patch semantics.
//
// For the definitions of variant sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc UpdateVariantSet(UpdateVariantSetRequest) returns (VariantSet) {
option (google.api.http) = { patch: "/v1/variantsets/{variant_set_id}" body: "variant_set" };
}
// Gets a list of variants matching the criteria.
//
// For the definitions of variants and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// Implements
// [GlobalAllianceApi.searchVariants](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L126).
rpc SearchVariants(SearchVariantsRequest) returns (SearchVariantsResponse) {
option (google.api.http) = { post: "/v1/variants/search" body: "*" };
}
// Creates a new variant.
//
// For the definitions of variants and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc CreateVariant(CreateVariantRequest) returns (Variant) {
option (google.api.http) = { post: "/v1/variants" body: "variant" };
}
// Updates a variant.
//
// For the definitions of variants and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// This method supports patch semantics. Returns the modified variant without
// its calls.
rpc UpdateVariant(UpdateVariantRequest) returns (Variant) {
option (google.api.http) = { patch: "/v1/variants/{variant_id}" body: "variant" };
}
// Deletes a variant.
//
// For the definitions of variants and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc DeleteVariant(DeleteVariantRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { delete: "/v1/variants/{variant_id}" };
}
// Gets a variant by ID.
//
// For the definitions of variants and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc GetVariant(GetVariantRequest) returns (Variant) {
option (google.api.http) = { get: "/v1/variants/{variant_id}" };
}
// Merges the given variants with existing variants.
//
// For the definitions of variants and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// Each variant will be
// merged with an existing variant that matches its reference sequence,
// start, end, reference bases, and alternative bases. If no such variant
// exists, a new one will be created.
//
// When variants are merged, the call information from the new variant
// is added to the existing variant. Variant info fields are merged as
// specified in the
// [infoMergeConfig][google.genomics.v1.MergeVariantsRequest.info_merge_config]
// field of the MergeVariantsRequest.
//
// Please exercise caution when using this method! It is easy to introduce
// mistakes in existing variants and difficult to back out of them. For
// example,
// suppose you were trying to merge a new variant with an existing one and
// both
// variants contain calls that belong to callsets with the same callset ID.
//
// // Existing variant - irrelevant fields trimmed for clarity
// {
// "variantSetId": "10473108253681171589",
// "referenceName": "1",
// "start": "10582",
// "referenceBases": "G",
// "alternateBases": [
// "A"
// ],
// "calls": [
// {
// "callSetId": "10473108253681171589-0",
// "callSetName": "CALLSET0",
// "genotype": [
// 0,
// 1
// ],
// }
// ]
// }
//
// // New variant with conflicting call information
// {
// "variantSetId": "10473108253681171589",
// "referenceName": "1",
// "start": "10582",
// "referenceBases": "G",
// "alternateBases": [
// "A"
// ],
// "calls": [
// {
// "callSetId": "10473108253681171589-0",
// "callSetName": "CALLSET0",
// "genotype": [
// 1,
// 1
// ],
// }
// ]
// }
//
// The resulting merged variant would overwrite the existing calls with those
// from the new variant:
//
// {
// "variantSetId": "10473108253681171589",
// "referenceName": "1",
// "start": "10582",
// "referenceBases": "G",
// "alternateBases": [
// "A"
// ],
// "calls": [
// {
// "callSetId": "10473108253681171589-0",
// "callSetName": "CALLSET0",
// "genotype": [
// 1,
// 1
// ],
// }
// ]
// }
//
// This may be the desired outcome, but it is up to the user to determine if
// if that is indeed the case.
rpc MergeVariants(MergeVariantsRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { post: "/v1/variants:merge" body: "*" };
}
// Gets a list of call sets matching the criteria.
//
// For the definitions of call sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// Implements
// [GlobalAllianceApi.searchCallSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L178).
rpc SearchCallSets(SearchCallSetsRequest) returns (SearchCallSetsResponse) {
option (google.api.http) = { post: "/v1/callsets/search" body: "*" };
}
// Creates a new call set.
//
// For the definitions of call sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc CreateCallSet(CreateCallSetRequest) returns (CallSet) {
option (google.api.http) = { post: "/v1/callsets" body: "call_set" };
}
// Updates a call set.
//
// For the definitions of call sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// This method supports patch semantics.
rpc UpdateCallSet(UpdateCallSetRequest) returns (CallSet) {
option (google.api.http) = { patch: "/v1/callsets/{call_set_id}" body: "call_set" };
}
// Deletes a call set.
//
// For the definitions of call sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc DeleteCallSet(DeleteCallSetRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { delete: "/v1/callsets/{call_set_id}" };
}
// Gets a call set by ID.
//
// For the definitions of call sets and other genomics resources, see
// [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
rpc GetCallSet(GetCallSetRequest) returns (CallSet) {
option (google.api.http) = { get: "/v1/callsets/{call_set_id}" };
}
}
// Metadata describes a single piece of variant call metadata.
// These data include a top level key and either a single value string (value)
// or a list of key-value pairs (info.)
// Value and info are mutually exclusive.
message VariantSetMetadata {
enum Type {
TYPE_UNSPECIFIED = 0;
INTEGER = 1;
FLOAT = 2;
FLAG = 3;
CHARACTER = 4;
STRING = 5;
}
// The top-level key.
string key = 1;
// The value field for simple metadata
string value = 2;
// User-provided ID field, not enforced by this API.
// Two or more pieces of structured metadata with identical
// id and key fields are considered equivalent.
string id = 4;
// The type of data. Possible types include: Integer, Float,
// Flag, Character, and String.
Type type = 5;
// The number of values that can be included in a field described by this
// metadata.
string number = 8;
// A textual description of this metadata.
string description = 7;
// Remaining structured metadata key-value pairs. This must be of the form
// map<string, string[]> (string key mapping to a list of string values).
map<string, google.protobuf.ListValue> info = 3;
}
// A variant set is a collection of call sets and variants. It contains summary
// statistics of those contents. A variant set belongs to a dataset.
//
// For more genomics resource definitions, see [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message VariantSet {
// The dataset to which this variant set belongs.
string dataset_id = 1;
// The server-generated variant set ID, unique across all variant sets.
string id = 2;
// The reference set to which the variant set is mapped. The reference set
// describes the alignment provenance of the variant set, while the
// `referenceBounds` describe the shape of the actual variant data. The
// reference set's reference names are a superset of those found in the
// `referenceBounds`.
//
// For example, given a variant set that is mapped to the GRCh38 reference set
// and contains a single variant on reference 'X', `referenceBounds` would
// contain only an entry for 'X', while the associated reference set
// enumerates all possible references: '1', '2', 'X', 'Y', 'MT', etc.
string reference_set_id = 6;
// A list of all references used by the variants in a variant set
// with associated coordinate upper bounds for each one.
repeated ReferenceBound reference_bounds = 5;
// The metadata associated with this variant set.
repeated VariantSetMetadata metadata = 4;
// User-specified, mutable name.
string name = 7;
// A textual description of this variant set.
string description = 8;
}
// A variant represents a change in DNA sequence relative to a reference
// sequence. For example, a variant could represent a SNP or an insertion.
// Variants belong to a variant set.
//
// For more genomics resource definitions, see [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
//
// Each of the calls on a variant represent a determination of genotype with
// respect to that variant. For example, a call might assign probability of 0.32
// to the occurrence of a SNP named rs1234 in a sample named NA12345. A call
// belongs to a call set, which contains related calls typically from one
// sample.
message Variant {
// The ID of the variant set this variant belongs to.
string variant_set_id = 15;
// The server-generated variant ID, unique across all variants.
string id = 2;
// Names for the variant, for example a RefSNP ID.
repeated string names = 3;
// The date this variant was created, in milliseconds from the epoch.
int64 created = 12;
// The reference on which this variant occurs.
// (such as `chr20` or `X`)
string reference_name = 14;
// The position at which this variant occurs (0-based).
// This corresponds to the first base of the string of reference bases.
int64 start = 16;
// The end position (0-based) of this variant. This corresponds to the first
// base after the last base in the reference allele. So, the length of
// the reference allele is (end - start). This is useful for variants
// that don't explicitly give alternate bases, for example large deletions.
int64 end = 13;
// The reference bases for this variant. They start at the given
// position.
string reference_bases = 6;
// The bases that appear instead of the reference bases.
repeated string alternate_bases = 7;
// A measure of how likely this variant is to be real.
// A higher value is better.
double quality = 8;
// A list of filters (normally quality filters) this variant has failed.
// `PASS` indicates this variant has passed all filters.
repeated string filter = 9;
// A map of additional variant information. This must be of the form
// map<string, string[]> (string key mapping to a list of string values).
map<string, google.protobuf.ListValue> info = 10;
// The variant calls for this particular variant. Each one represents the
// determination of genotype with respect to this variant.
repeated VariantCall calls = 11;
}
// A call represents the determination of genotype with respect to a particular
// variant. It may include associated information such as quality and phasing.
// For example, a call might assign a probability of 0.32 to the occurrence of
// a SNP named rs1234 in a call set with the name NA12345.
message VariantCall {
// The ID of the call set this variant call belongs to.
string call_set_id = 8;
// The name of the call set this variant call belongs to.
string call_set_name = 9;
// The genotype of this variant call. Each value represents either the value
// of the `referenceBases` field or a 1-based index into
// `alternateBases`. If a variant had a `referenceBases`
// value of `T` and an `alternateBases`
// value of `["A", "C"]`, and the `genotype` was
// `[2, 1]`, that would mean the call
// represented the heterozygous value `CA` for this variant.
// If the `genotype` was instead `[0, 1]`, the
// represented value would be `TA`. Ordering of the
// genotype values is important if the `phaseset` is present.
// If a genotype is not called (that is, a `.` is present in the
// GT string) -1 is returned.
repeated int32 genotype = 7;
// If this field is present, this variant call's genotype ordering implies
// the phase of the bases and is consistent with any other variant calls in
// the same reference sequence which have the same phaseset value.
// When importing data from VCF, if the genotype data was phased but no
// phase set was specified this field will be set to `*`.
string phaseset = 5;
// The genotype likelihoods for this variant call. Each array entry
// represents how likely a specific genotype is for this call. The value
// ordering is defined by the GL tag in the VCF spec.
// If Phred-scaled genotype likelihood scores (PL) are available and
// log10(P) genotype likelihood scores (GL) are not, PL scores are converted
// to GL scores. If both are available, PL scores are stored in `info`.
repeated double genotype_likelihood = 6;
// A map of additional variant call information. This must be of the form
// map<string, string[]> (string key mapping to a list of string values).
map<string, google.protobuf.ListValue> info = 2;
}
// A call set is a collection of variant calls, typically for one sample. It
// belongs to a variant set.
//
// For more genomics resource definitions, see [Fundamentals of Google
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
message CallSet {
// The server-generated call set ID, unique across all call sets.
string id = 1;
// The call set name.
string name = 2;
// The sample ID this call set corresponds to.
string sample_id = 7;
// The IDs of the variant sets this call set belongs to. This field must
// have exactly length one, as a call set belongs to a single variant set.
// This field is repeated for compatibility with the
// [GA4GH 0.5.1
// API](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variants.avdl#L76).
repeated string variant_set_ids = 6;
// The date this call set was created in milliseconds from the epoch.
int64 created = 5;
// A map of additional call set information. This must be of the form
// map<string, string[]> (string key mapping to a list of string values).
map<string, google.protobuf.ListValue> info = 4;
}
// ReferenceBound records an upper bound for the starting coordinate of
// variants in a particular reference.
message ReferenceBound {
// The name of the reference associated with this reference bound.
string reference_name = 1;
// An upper bound (inclusive) on the starting coordinate of any
// variant in the reference sequence.
int64 upper_bound = 2;
}
// The variant data import request.
message ImportVariantsRequest {
enum Format {
FORMAT_UNSPECIFIED = 0;
// VCF (Variant Call Format). The VCF files may be gzip compressed. gVCF is
// also supported.
FORMAT_VCF = 1;
// Complete Genomics masterVarBeta format. The masterVarBeta files may
// be bzip2 compressed.
FORMAT_COMPLETE_GENOMICS = 2;
}
// Required. The variant set to which variant data should be imported.
string variant_set_id = 1;
// A list of URIs referencing variant files in Google Cloud Storage. URIs can
// include wildcards [as described
// here](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames).
// Note that recursive wildcards ('**') are not supported.
repeated string source_uris = 2;
// The format of the variant data being imported. If unspecified, defaults to
// to `VCF`.
Format format = 3;
// Convert reference names to the canonical representation.
// hg19 haploytypes (those reference names containing "_hap")
// are not modified in any way.
// All other reference names are modified according to the following rules:
// The reference name is capitalized.
// The "chr" prefix is dropped for all autosomes and sex chromsomes.
// For example "chr17" becomes "17" and "chrX" becomes "X".
// All mitochondrial chromosomes ("chrM", "chrMT", etc) become "MT".
bool normalize_reference_names = 5;
// A mapping between info field keys and the InfoMergeOperations to
// be performed on them. This is plumbed down to the MergeVariantRequests
// generated by the resulting import job.
map<string, InfoMergeOperation> info_merge_config = 6;
}
// The variant data import response.
message ImportVariantsResponse {
// IDs of the call sets created during the import.
repeated string call_set_ids = 1;
}
// The CreateVariantSet request
message CreateVariantSetRequest {
// Required. The variant set to be created. Must have a valid `datasetId`.
VariantSet variant_set = 1;
}
// The variant data export request.
message ExportVariantSetRequest {
enum Format {
FORMAT_UNSPECIFIED = 0;
// Export the data to Google BigQuery.
FORMAT_BIGQUERY = 1;
}
// Required. The ID of the variant set that contains variant data which
// should be exported. The caller must have READ access to this variant set.
string variant_set_id = 1;
// If provided, only variant call information from the specified call sets
// will be exported. By default all variant calls are exported.
repeated string call_set_ids = 2;
// Required. The Google Cloud project ID that owns the destination
// BigQuery dataset. The caller must have WRITE access to this project. This
// project will also own the resulting export job.
string project_id = 3;
// The format for the exported data.
Format format = 4;
// Required. The BigQuery dataset to export data to. This dataset must already
// exist. Note that this is distinct from the Genomics concept of "dataset".
string bigquery_dataset = 5;
// Required. The BigQuery table to export data to.
// If the table doesn't exist, it will be created. If it already exists, it
// will be overwritten.
string bigquery_table = 6;
}
// The variant set request.
message GetVariantSetRequest {
// Required. The ID of the variant set.
string variant_set_id = 1;
}
// The search variant sets request.
message SearchVariantSetsRequest {
// Exactly one dataset ID must be provided here. Only variant sets which
// belong to this dataset will be returned.
repeated string dataset_ids = 1;
// The continuation token, which is used to page through large result sets.
// To get the next page of results, set this parameter to the value of
// `nextPageToken` from the previous response.
string page_token = 2;
// The maximum number of results to return in a single page. If unspecified,
// defaults to 1024.
int32 page_size = 3;
}
// The search variant sets response.
message SearchVariantSetsResponse {
// The variant sets belonging to the requested dataset.
repeated VariantSet variant_sets = 1;
// The continuation token, which is used to page through large result sets.
// Provide this value in a subsequent request to return the next page of
// results. This field will be empty if there aren't any additional results.
string next_page_token = 2;
}
// The delete variant set request.
message DeleteVariantSetRequest {
// The ID of the variant set to be deleted.
string variant_set_id = 1;
}
message UpdateVariantSetRequest {
// The ID of the variant to be updated (must already exist).
string variant_set_id = 1;
// The new variant data. Only the variant_set.metadata will be considered
// for update.
VariantSet variant_set = 2;
// An optional mask specifying which fields to update. Supported fields:
//
// * [metadata][google.genomics.v1.VariantSet.metadata].
// * [name][google.genomics.v1.VariantSet.name].
// * [description][google.genomics.v1.VariantSet.description].
//
// Leaving `updateMask` unset is equivalent to specifying all mutable
// fields.
google.protobuf.FieldMask update_mask = 5;
}
// The variant search request.
message SearchVariantsRequest {
// At most one variant set ID must be provided. Only variants from this
// variant set will be returned. If omitted, a call set id must be included in
// the request.
repeated string variant_set_ids = 1;
// Only return variants which have exactly this name.
string variant_name = 2;
// Only return variant calls which belong to call sets with these ids.
// Leaving this blank returns all variant calls. If a variant has no
// calls belonging to any of these call sets, it won't be returned at all.
repeated string call_set_ids = 3;
// Required. Only return variants in this reference sequence.
string reference_name = 4;
// The beginning of the window (0-based, inclusive) for which
// overlapping variants should be returned. If unspecified, defaults to 0.
int64 start = 5;
// The end of the window, 0-based exclusive. If unspecified or 0, defaults to
// the length of the reference.
int64 end = 6;
// The continuation token, which is used to page through large result sets.
// To get the next page of results, set this parameter to the value of
// `nextPageToken` from the previous response.
string page_token = 7;
// The maximum number of variants to return in a single page. If unspecified,
// defaults to 5000. The maximum value is 10000.
int32 page_size = 8;
// The maximum number of calls to return in a single page. Note that this
// limit may be exceeded in the event that a matching variant contains more
// calls than the requested maximum. If unspecified, defaults to 5000. The
// maximum value is 10000.
int32 max_calls = 9;
}
// The variant search response.
message SearchVariantsResponse {
// The list of matching Variants.
repeated Variant variants = 1;
// The continuation token, which is used to page through large result sets.
// Provide this value in a subsequent request to return the next page of
// results. This field will be empty if there aren't any additional results.
string next_page_token = 2;
}
message CreateVariantRequest {
// The variant to be created.
Variant variant = 1;
}
message UpdateVariantRequest {
// The ID of the variant to be updated.
string variant_id = 1;
// The new variant data.
Variant variant = 2;
// An optional mask specifying which fields to update. At this time, mutable
// fields are [names][google.genomics.v1.Variant.names] and
// [info][google.genomics.v1.Variant.info]. Acceptable values are "names" and
// "info". If unspecified, all mutable fields will be updated.
google.protobuf.FieldMask update_mask = 3;
}
message DeleteVariantRequest {
// The ID of the variant to be deleted.
string variant_id = 1;
}
message GetVariantRequest {
// The ID of the variant.
string variant_id = 1;
}
message MergeVariantsRequest {
// The destination variant set.
string variant_set_id = 1;
// The variants to be merged with existing variants.
repeated Variant variants = 2;
// A mapping between info field keys and the InfoMergeOperations to
// be performed on them.
map<string, InfoMergeOperation> info_merge_config = 3;
}
// The call set search request.
message SearchCallSetsRequest {
// Restrict the query to call sets within the given variant sets. At least one
// ID must be provided.
repeated string variant_set_ids = 1;
// Only return call sets for which a substring of the name matches this
// string.
string name = 2;
// The continuation token, which is used to page through large result sets.
// To get the next page of results, set this parameter to the value of
// `nextPageToken` from the previous response.
string page_token = 3;
// The maximum number of results to return in a single page. If unspecified,
// defaults to 1024.
int32 page_size = 4;
}
// The call set search response.
message SearchCallSetsResponse {
// The list of matching call sets.
repeated CallSet call_sets = 1;
// The continuation token, which is used to page through large result sets.
// Provide this value in a subsequent request to return the next page of
// results. This field will be empty if there aren't any additional results.
string next_page_token = 2;
}
message CreateCallSetRequest {
// The call set to be created.
CallSet call_set = 1;
}
message UpdateCallSetRequest {
// The ID of the call set to be updated.
string call_set_id = 1;
// The new call set data.
CallSet call_set = 2;
// An optional mask specifying which fields to update. At this time, the only
// mutable field is [name][google.genomics.v1.CallSet.name]. The only
// acceptable value is "name". If unspecified, all mutable fields will be
// updated.
google.protobuf.FieldMask update_mask = 3;
}
message DeleteCallSetRequest {
// The ID of the call set to be deleted.
string call_set_id = 1;
}
message GetCallSetRequest {
// The ID of the call set.
string call_set_id = 1;
}
// The stream variants request.
message StreamVariantsRequest {
// The Google Cloud project ID which will be billed
// for this access. The caller must have WRITE access to this project.
// Required.
string project_id = 1;
// The variant set ID from which to stream variants.
string variant_set_id = 2;
// Only return variant calls which belong to call sets with these IDs.
// Leaving this blank returns all variant calls.
repeated string call_set_ids = 3;
// Required. Only return variants in this reference sequence.
string reference_name = 4;
// The beginning of the window (0-based, inclusive) for which
// overlapping variants should be returned.
int64 start = 5;
// The end of the window (0-based, exclusive) for which overlapping
// variants should be returned.
int64 end = 6;
}
message StreamVariantsResponse {
repeated Variant variants = 1;
}
// Operations to be performed during import on Variant info fields.
// These operations are set for each info field in the info_merge_config
// map of ImportVariantsRequest, which is plumbed down to the
// MergeVariantRequests generated by the import job.
enum InfoMergeOperation {
INFO_MERGE_OPERATION_UNSPECIFIED = 0;
// By default, Variant info fields are persisted if the Variant doesn't
// already exist in the variantset. If the Variant is equivalent to a
// Variant already in the variantset, the incoming Variant's info field
// is ignored in favor of that of the already persisted Variant.
IGNORE_NEW = 1;
// This operation removes an info field from the incoming Variant
// and persists this info field in each of the incoming Variant's Calls.
MOVE_TO_CALLS = 2;
}

View File

@ -0,0 +1,614 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.genomics.v1alpha2;
import "google/api/annotations.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/code.proto";
option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/genomics/v1alpha2;genomics";
option java_multiple_files = true;
option java_outer_classname = "PipelinesProto";
option java_package = "com.google.genomics.v1a";
// A service for running genomics pipelines.
service PipelinesV1Alpha2 {
// Creates a pipeline that can be run later. Create takes a Pipeline that
// has all fields other than `pipelineId` populated, and then returns
// the same pipeline with `pipelineId` populated. This id can be used
// to run the pipeline.
//
// Caller must have WRITE permission to the project.
rpc CreatePipeline(CreatePipelineRequest) returns (Pipeline) {
option (google.api.http) = { post: "/v1alpha2/pipelines" body: "pipeline" };
}
// Runs a pipeline. If `pipelineId` is specified in the request, then
// run a saved pipeline. If `ephemeralPipeline` is specified, then run
// that pipeline once without saving a copy.
//
// The caller must have READ permission to the project where the pipeline
// is stored and WRITE permission to the project where the pipeline will be
// run, as VMs will be created and storage will be used.
rpc RunPipeline(RunPipelineRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { post: "/v1alpha2/pipelines:run" body: "*" };
}
// Retrieves a pipeline based on ID.
//
// Caller must have READ permission to the project.
rpc GetPipeline(GetPipelineRequest) returns (Pipeline) {
option (google.api.http) = { get: "/v1alpha2/pipelines/{pipeline_id}" };
}
// Lists pipelines.
//
// Caller must have READ permission to the project.
rpc ListPipelines(ListPipelinesRequest) returns (ListPipelinesResponse) {
option (google.api.http) = { get: "/v1alpha2/pipelines" };
}
// Deletes a pipeline based on ID.
//
// Caller must have WRITE permission to the project.
rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { delete: "/v1alpha2/pipelines/{pipeline_id}" };
}
// Gets controller configuration information. Should only be called
// by VMs created by the Pipelines Service and not by end users.
rpc GetControllerConfig(GetControllerConfigRequest) returns (ControllerConfig) {
option (google.api.http) = { get: "/v1alpha2/pipelines:getControllerConfig" };
}
// Sets status of a given operation. Any new timestamps (as determined by
// description) are appended to TimestampEvents. Should only be called by VMs
// created by the Pipelines Service and not by end users.
rpc SetOperationStatus(SetOperationStatusRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { put: "/v1alpha2/pipelines:setOperationStatus" body: "*" };
}
}
// Describes a Compute Engine resource that is being managed by a running
// [pipeline][google.genomics.v1alpha2.Pipeline].
message ComputeEngine {
// The instance on which the operation is running.
string instance_name = 1;
// The availability zone in which the instance resides.
string zone = 2;
// The machine type of the instance.
string machine_type = 3;
// The names of the disks that were created for this pipeline.
repeated string disk_names = 4;
}
// Runtime metadata that will be populated in the
// [runtimeMetadata][google.genomics.v1.OperationMetadata.runtime_metadata]
// field of the Operation associated with a RunPipeline execution.
message RuntimeMetadata {
// Execution information specific to Google Compute Engine.
ComputeEngine compute_engine = 1;
}
// The pipeline object. Represents a transformation from a set of input
// parameters to a set of output parameters. The transformation is defined
// as a docker image and command to run within that image. Each pipeline
// is run on a Google Compute Engine VM. A pipeline can be created with the
// `create` method and then later run with the `run` method, or a pipeline can
// be defined and run all at once with the `run` method.
message Pipeline {
// Required. The project in which to create the pipeline. The caller must have
// WRITE access.
string project_id = 1;
// Required. A user specified pipeline name that does not have to be unique.
// This name can be used for filtering Pipelines in ListPipelines.
string name = 2;
// User-specified description.
string description = 3;
// Input parameters of the pipeline.
repeated PipelineParameter input_parameters = 8;
// Output parameters of the pipeline.
repeated PipelineParameter output_parameters = 9;
// Required. The executor indicates in which environment the pipeline runs.
oneof executor {
// Specifies the docker run information.
DockerExecutor docker = 5;
}
// Required. Specifies resource requirements for the pipeline run.
// Required fields:
//
// *
// [minimumCpuCores][google.genomics.v1alpha2.PipelineResources.minimum_cpu_cores]
//
// *
// [minimumRamGb][google.genomics.v1alpha2.PipelineResources.minimum_ram_gb]
PipelineResources resources = 6;
// Unique pipeline id that is generated by the service when CreatePipeline
// is called. Cannot be specified in the Pipeline used in the
// CreatePipelineRequest, and will be populated in the response to
// CreatePipeline and all subsequent Get and List calls. Indicates that the
// service has registered this pipeline.
string pipeline_id = 7;
}
// The request to create a pipeline. The pipeline field here should not have
// `pipelineId` populated, as that will be populated by the server.
message CreatePipelineRequest {
// The pipeline to create. Should not have `pipelineId` populated.
Pipeline pipeline = 1;
}
// The pipeline run arguments.
message RunPipelineArgs {
// Required. The project in which to run the pipeline. The caller must have
// WRITER access to all Google Cloud services and resources (e.g. Google
// Compute Engine) will be used.
string project_id = 1;
// Pipeline input arguments; keys are defined in the pipeline documentation.
// All input parameters that do not have default values must be specified.
// If parameters with defaults are specified here, the defaults will be
// overridden.
map<string, string> inputs = 2;
// Pipeline output arguments; keys are defined in the pipeline
// documentation. All output parameters of without default values
// must be specified. If parameters with defaults are specified
// here, the defaults will be overridden.
map<string, string> outputs = 3;
// The Google Cloud Service Account that will be used to access data and
// services. By default, the compute service account associated with
// `projectId` is used.
ServiceAccount service_account = 4;
// This field is deprecated. Use `labels` instead. Client-specified pipeline
// operation identifier.
string client_id = 5;
// Specifies resource requirements/overrides for the pipeline run.
PipelineResources resources = 6;
// Required. Logging options. Used by the service to communicate results
// to the user.
LoggingOptions logging = 7;
// How long to keep the VM up after a failure (for example docker command
// failed, copying input or output files failed, etc). While the VM is up, one
// can ssh into the VM to debug. Default is 0; maximum allowed value is 1 day.
google.protobuf.Duration keep_vm_alive_on_failure_duration = 8;
// Labels to apply to this pipeline run. Labels will also be applied to
// compute resources (VM, disks) created by this pipeline run. When listing
// operations, operations can [filtered by labels]
// [google.longrunning.ListOperationsRequest.filter].
// Label keys may not be empty; label values may be empty. Non-empty labels
// must be 1-63 characters long, and comply with [RFC1035]
// (https://www.ietf.org/rfc/rfc1035.txt).
// Specifically, the name must be 1-63 characters long and match the regular
// expression `[a-z]([-a-z0-9]*[a-z0-9])?` which means the first
// character must be a lowercase letter, and all following characters must be
// a dash, lowercase letter, or digit, except the last character, which cannot
// be a dash.
map<string, string> labels = 9;
}
// The request to run a pipeline. If `pipelineId` is specified, it
// refers to a saved pipeline created with CreatePipeline and set as
// the `pipelineId` of the returned Pipeline object. If
// `ephemeralPipeline` is specified, that pipeline is run once
// with the given args and not saved. It is an error to specify both
// `pipelineId` and `ephemeralPipeline`. `pipelineArgs`
// must be specified.
message RunPipelineRequest {
oneof pipeline {
// The already created pipeline to run.
string pipeline_id = 1;
// A new pipeline object to run once and then delete.
Pipeline ephemeral_pipeline = 2;
}
// The arguments to use when running this pipeline.
RunPipelineArgs pipeline_args = 3;
}
// A request to get a saved pipeline by id.
message GetPipelineRequest {
// Caller must have READ access to the project in which this pipeline
// is defined.
string pipeline_id = 1;
}
// A request to list pipelines in a given project. Pipelines can be
// filtered by name using `namePrefix`: all pipelines with names that
// begin with `namePrefix` will be returned. Uses standard pagination:
// `pageSize` indicates how many pipelines to return, and
// `pageToken` comes from a previous ListPipelinesResponse to
// indicate offset.
message ListPipelinesRequest {
// Required. The name of the project to search for pipelines. Caller
// must have READ access to this project.
string project_id = 1;
// Pipelines with names that match this prefix should be
// returned. If unspecified, all pipelines in the project, up to
// `pageSize`, will be returned.
string name_prefix = 2;
// Number of pipelines to return at once. Defaults to 256, and max
// is 2048.
int32 page_size = 3;
// Token to use to indicate where to start getting results.
// If unspecified, returns the first page of results.
string page_token = 4;
}
// The response of ListPipelines. Contains at most `pageSize`
// pipelines. If it contains `pageSize` pipelines, and more pipelines
// exist, then `nextPageToken` will be populated and should be
// used as the `pageToken` argument to a subsequent ListPipelines
// request.
message ListPipelinesResponse {
// The matched pipelines.
repeated Pipeline pipelines = 1;
// The token to use to get the next page of results.
string next_page_token = 2;
}
// The request to delete a saved pipeline by ID.
message DeletePipelineRequest {
// Caller must have WRITE access to the project in which this pipeline
// is defined.
string pipeline_id = 1;
}
// Request to get controller configuation. Should only be used
// by VMs created by the Pipelines Service and not by end users.
message GetControllerConfigRequest {
// The operation to retrieve controller configuration for.
string operation_id = 1;
uint64 validation_token = 2;
}
// Stores the information that the controller will fetch from the
// server in order to run. Should only be used by VMs created by the
// Pipelines Service and not by end users.
message ControllerConfig {
message RepeatedString {
repeated string values = 1;
}
string image = 1;
string cmd = 2;
string gcs_log_path = 3;
string machine_type = 4;
map<string, string> vars = 5;
map<string, string> disks = 6;
map<string, RepeatedString> gcs_sources = 7;
map<string, RepeatedString> gcs_sinks = 8;
}
// Stores the list of events and times they occured for major events in job
// execution.
message TimestampEvent {
// String indicating the type of event
string description = 1;
// The time this event occured.
google.protobuf.Timestamp timestamp = 2;
}
// Request to set operation status. Should only be used by VMs
// created by the Pipelines Service and not by end users.
message SetOperationStatusRequest {
string operation_id = 1;
repeated TimestampEvent timestamp_events = 2;
google.rpc.Code error_code = 3;
string error_message = 4;
uint64 validation_token = 5;
}
// A Google Cloud Service Account.
message ServiceAccount {
// Email address of the service account. Defaults to `default`,
// which uses the compute service account associated with the project.
string email = 1;
// List of scopes to be enabled for this service account on the VM.
// The following scopes are automatically included:
//
// * https://www.googleapis.com/auth/compute
// * https://www.googleapis.com/auth/devstorage.full_control
// * https://www.googleapis.com/auth/genomics
// * https://www.googleapis.com/auth/logging.write
// * https://www.googleapis.com/auth/monitoring.write
repeated string scopes = 2;
}
// The logging options for the pipeline run.
message LoggingOptions {
// The location in Google Cloud Storage to which the pipeline logs
// will be copied. Can be specified as a fully qualified directory
// path, in which case logs will be output with a unique identifier
// as the filename in that directory, or as a fully specified path,
// which must end in `.log`, in which case that path will be
// used, and the user must ensure that logs are not
// overwritten. Stdout and stderr logs from the run are also
// generated and output as `-stdout.log` and `-stderr.log`.
string gcs_path = 1;
}
// The system resources for the pipeline run.
message PipelineResources {
// A Google Compute Engine disk resource specification.
message Disk {
// The types of disks that may be attached to VMs.
enum Type {
// Default disk type. Use one of the other options below.
TYPE_UNSPECIFIED = 0;
// Specifies a Google Compute Engine persistent hard disk. See
// https://cloud.google.com/compute/docs/disks/#pdspecs for details.
PERSISTENT_HDD = 1;
// Specifies a Google Compute Engine persistent solid-state disk. See
// https://cloud.google.com/compute/docs/disks/#pdspecs for details.
PERSISTENT_SSD = 2;
// Specifies a Google Compute Engine local SSD.
// See https://cloud.google.com/compute/docs/disks/local-ssd for details.
LOCAL_SSD = 3;
}
// Required. The name of the disk that can be used in the pipeline
// parameters. Must be 1 - 63 characters.
// The name "boot" is reserved for system use.
string name = 1;
// Required. The type of the disk to create.
Type type = 2;
// The size of the disk. Defaults to 500 (GB).
// This field is not applicable for local SSD.
int32 size_gb = 3;
// The full or partial URL of the persistent disk to attach. See
// https://cloud.google.com/compute/docs/reference/latest/instances#resource
// and
// https://cloud.google.com/compute/docs/disks/persistent-disks#snapshots
// for more details.
string source = 4;
// Deprecated. Disks created by the Pipelines API will be deleted at the end
// of the pipeline run, regardless of what this field is set to.
bool auto_delete = 6;
// Required at create time and cannot be overridden at run time.
// Specifies the path in the docker container where files on
// this disk should be located. For example, if `mountPoint`
// is `/mnt/disk`, and the parameter has `localPath`
// `inputs/file.txt`, the docker container can access the data at
// `/mnt/disk/inputs/file.txt`.
string mount_point = 8;
}
// The minimum number of cores to use. Defaults to 1.
int32 minimum_cpu_cores = 1;
// Whether to use preemptible VMs. Defaults to `false`. In order to use this,
// must be true for both create time and run time. Cannot be true at run time
// if false at create time.
bool preemptible = 2;
// The minimum amount of RAM to use. Defaults to 3.75 (GB)
double minimum_ram_gb = 3;
// Disks to attach.
repeated Disk disks = 4;
// List of Google Compute Engine availability zones to which resource
// creation will restricted. If empty, any zone may be chosen.
repeated string zones = 5;
// The size of the boot disk. Defaults to 10 (GB).
int32 boot_disk_size_gb = 6;
// Whether to assign an external IP to the instance. This is an experimental
// feature that may go away. Defaults to false.
// Corresponds to `--no_address` flag for [gcloud compute instances create]
// (https://cloud.google.com/sdk/gcloud/reference/compute/instances/create).
// In order to use this, must be true for both create time and run time.
// Cannot be true at run time if false at create time. If you need to ssh into
// a private IP VM for debugging, you can ssh to a public VM and then ssh into
// the private VM's Internal IP. If noAddress is set, this pipeline run may
// only load docker images from Google Container Registry and not Docker Hub.
// ** Note: To use this option, your project must be in Google Access for
// Private IPs Early Access Program.**
bool no_address = 7;
}
// Parameters facilitate setting and delivering data into the
// pipeline's execution environment. They are defined at create time,
// with optional defaults, and can be overridden at run time.
//
// If `localCopy` is unset, then the parameter specifies a string that
// is passed as-is into the pipeline, as the value of the environment
// variable with the given name. A default value can be optionally
// specified at create time. The default can be overridden at run time
// using the inputs map. If no default is given, a value must be
// supplied at runtime.
//
// If `localCopy` is defined, then the parameter specifies a data
// source or sink, both in Google Cloud Storage and on the Docker container
// where the pipeline computation is run. The [service account associated with
// the Pipeline][google.genomics.v1alpha2.RunPipelineArgs.service_account] (by
// default the project's Compute Engine service account) must have access to the
// Google Cloud Storage paths.
//
// At run time, the Google Cloud Storage paths can be overridden if a default
// was provided at create time, or must be set otherwise. The pipeline runner
// should add a key/value pair to either the inputs or outputs map. The
// indicated data copies will be carried out before/after pipeline execution,
// just as if the corresponding arguments were provided to `gsutil cp`.
//
// For example: Given the following `PipelineParameter`, specified
// in the `inputParameters` list:
//
// ```
// {name: "input_file", localCopy: {path: "file.txt", disk: "pd1"}}
// ```
//
// where `disk` is defined in the `PipelineResources` object as:
//
// ```
// {name: "pd1", mountPoint: "/mnt/disk/"}
// ```
//
// We create a disk named `pd1`, mount it on the host VM, and map
// `/mnt/pd1` to `/mnt/disk` in the docker container. At
// runtime, an entry for `input_file` would be required in the inputs
// map, such as:
//
// ```
// inputs["input_file"] = "gs://my-bucket/bar.txt"
// ```
//
// This would generate the following gsutil call:
//
// ```
// gsutil cp gs://my-bucket/bar.txt /mnt/pd1/file.txt
// ```
//
// The file `/mnt/pd1/file.txt` maps to `/mnt/disk/file.txt` in the
// Docker container. Acceptable paths are:
//
// <table>
// <thead>
// <tr><th>Google Cloud storage path</th><th>Local path</th></tr>
// </thead>
// <tbody>
// <tr><td>file</td><td>file</td></tr>
// <tr><td>glob</td><td>directory</td></tr>
// </tbody>
// </table>
//
// For outputs, the direction of the copy is reversed:
//
// ```
// gsutil cp /mnt/disk/file.txt gs://my-bucket/bar.txt
// ```
//
// Acceptable paths are:
//
// <table>
// <thead>
// <tr><th>Local path</th><th>Google Cloud Storage path</th></tr>
// </thead>
// <tbody>
// <tr><td>file</td><td>file</td></tr>
// <tr>
// <td>file</td>
// <td>directory - directory must already exist</td>
// </tr>
// <tr>
// <td>glob</td>
// <td>directory - directory will be created if it doesn't exist</td></tr>
// </tbody>
// </table>
//
// One restriction due to docker limitations, is that for outputs that are found
// on the boot disk, the local path cannot be a glob and must be a file.
message PipelineParameter {
// LocalCopy defines how a remote file should be copied to and from the VM.
message LocalCopy {
// Required. The path within the user's docker container where
// this input should be localized to and from, relative to the specified
// disk's mount point. For example: file.txt,
string path = 1;
// Required. The name of the disk where this parameter is
// located. Can be the name of one of the disks specified in the
// Resources field, or "boot", which represents the Docker
// instance's boot disk and has a mount point of `/`.
string disk = 2;
}
// Required. Name of the parameter - the pipeline runner uses this string
// as the key to the input and output maps in RunPipeline.
string name = 1;
// Human-readable description.
string description = 2;
// The default value for this parameter. Can be overridden at runtime.
// If `localCopy` is present, then this must be a Google Cloud Storage path
// beginning with `gs://`.
string default_value = 5;
// If present, this parameter is marked for copying to and from the VM.
// `LocalCopy` indicates where on the VM the file should be. The value
// given to this parameter (either at runtime or using `defaultValue`)
// must be the remote path where the file should be.
LocalCopy local_copy = 6;
}
// The Docker execuctor specification.
message DockerExecutor {
// Required. Image name from either Docker Hub or Google Container Registry.
// Users that run pipelines must have READ access to the image.
string image_name = 1;
// Required. The command or newline delimited script to run. The command
// string will be executed within a bash shell.
//
// If the command exits with a non-zero exit code, output parameter
// de-localization will be skipped and the pipeline operation's
// [`error`][google.longrunning.Operation.error] field will be populated.
//
// Maximum command string length is 16384.
string cmd = 2;
}