mirror of
https://github.com/titanscouting/tra-analysis.git
synced 2025-01-10 07:15:55 +00:00
469 lines
20 KiB
Protocol Buffer
469 lines
20 KiB
Protocol Buffer
// Copyright 2016 Google Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
syntax = "proto3";
|
|
|
|
package google.genomics.v1;
|
|
|
|
import "google/api/annotations.proto";
|
|
import "google/genomics/v1/range.proto";
|
|
import "google/genomics/v1/readalignment.proto";
|
|
import "google/genomics/v1/readgroupset.proto";
|
|
import "google/longrunning/operations.proto";
|
|
import "google/protobuf/empty.proto";
|
|
import "google/protobuf/field_mask.proto";
|
|
|
|
option cc_enable_arenas = true;
|
|
option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
|
|
option java_multiple_files = true;
|
|
option java_outer_classname = "ReadsProto";
|
|
option java_package = "com.google.genomics.v1";
|
|
|
|
|
|
service StreamingReadService {
|
|
// Returns a stream of all the reads matching the search request, ordered
|
|
// by reference name, position, and ID.
|
|
rpc StreamReads(StreamReadsRequest) returns (stream StreamReadsResponse) {
|
|
option (google.api.http) = { post: "/v1/reads:stream" body: "*" };
|
|
}
|
|
}
|
|
|
|
// The Readstore. A data store for DNA sequencing Reads.
|
|
service ReadServiceV1 {
|
|
// Creates read group sets by asynchronously importing the provided
|
|
// information.
|
|
//
|
|
// For the definitions of read group sets and other genomics resources, see
|
|
// [Fundamentals of Google
|
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
|
|
//
|
|
// The caller must have WRITE permissions to the dataset.
|
|
//
|
|
// ## Notes on [BAM](https://samtools.github.io/hts-specs/SAMv1.pdf) import
|
|
//
|
|
// - Tags will be converted to strings - tag types are not preserved
|
|
// - Comments (`@CO`) in the input file header will not be preserved
|
|
// - Original header order of references (`@SQ`) will not be preserved
|
|
// - Any reverse stranded unmapped reads will be reverse complemented, and
|
|
// their qualities (also the "BQ" and "OQ" tags, if any) will be reversed
|
|
// - Unmapped reads will be stripped of positional information (reference name
|
|
// and position)
|
|
rpc ImportReadGroupSets(ImportReadGroupSetsRequest) returns (google.longrunning.Operation) {
|
|
option (google.api.http) = { post: "/v1/readgroupsets:import" body: "*" };
|
|
}
|
|
|
|
// Exports a read group set to a BAM file in Google Cloud Storage.
|
|
//
|
|
// For the definitions of read group sets and other genomics resources, see
|
|
// [Fundamentals of Google
|
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
|
|
//
|
|
// Note that currently there may be some differences between exported BAM
|
|
// files and the original BAM file at the time of import. See
|
|
// [ImportReadGroupSets][google.genomics.v1.ReadServiceV1.ImportReadGroupSets]
|
|
// for caveats.
|
|
rpc ExportReadGroupSet(ExportReadGroupSetRequest) returns (google.longrunning.Operation) {
|
|
option (google.api.http) = { post: "/v1/readgroupsets/{read_group_set_id}:export" body: "*" };
|
|
}
|
|
|
|
// Searches for read group sets matching the criteria.
|
|
//
|
|
// For the definitions of read group sets and other genomics resources, see
|
|
// [Fundamentals of Google
|
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
|
|
//
|
|
// Implements
|
|
// [GlobalAllianceApi.searchReadGroupSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L135).
|
|
rpc SearchReadGroupSets(SearchReadGroupSetsRequest) returns (SearchReadGroupSetsResponse) {
|
|
option (google.api.http) = { post: "/v1/readgroupsets/search" body: "*" };
|
|
}
|
|
|
|
// Updates a read group set.
|
|
//
|
|
// For the definitions of read group sets and other genomics resources, see
|
|
// [Fundamentals of Google
|
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
|
|
//
|
|
// This method supports patch semantics.
|
|
rpc UpdateReadGroupSet(UpdateReadGroupSetRequest) returns (ReadGroupSet) {
|
|
option (google.api.http) = { patch: "/v1/readgroupsets/{read_group_set_id}" body: "read_group_set" };
|
|
}
|
|
|
|
// Deletes a read group set.
|
|
//
|
|
// For the definitions of read group sets and other genomics resources, see
|
|
// [Fundamentals of Google
|
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
|
|
rpc DeleteReadGroupSet(DeleteReadGroupSetRequest) returns (google.protobuf.Empty) {
|
|
option (google.api.http) = { delete: "/v1/readgroupsets/{read_group_set_id}" };
|
|
}
|
|
|
|
// Gets a read group set by ID.
|
|
//
|
|
// For the definitions of read group sets and other genomics resources, see
|
|
// [Fundamentals of Google
|
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
|
|
rpc GetReadGroupSet(GetReadGroupSetRequest) returns (ReadGroupSet) {
|
|
option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}" };
|
|
}
|
|
|
|
// Lists fixed width coverage buckets for a read group set, each of which
|
|
// correspond to a range of a reference sequence. Each bucket summarizes
|
|
// coverage information across its corresponding genomic range.
|
|
//
|
|
// For the definitions of read group sets and other genomics resources, see
|
|
// [Fundamentals of Google
|
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
|
|
//
|
|
// Coverage is defined as the number of reads which are aligned to a given
|
|
// base in the reference sequence. Coverage buckets are available at several
|
|
// precomputed bucket widths, enabling retrieval of various coverage 'zoom
|
|
// levels'. The caller must have READ permissions for the target read group
|
|
// set.
|
|
rpc ListCoverageBuckets(ListCoverageBucketsRequest) returns (ListCoverageBucketsResponse) {
|
|
option (google.api.http) = { get: "/v1/readgroupsets/{read_group_set_id}/coveragebuckets" };
|
|
}
|
|
|
|
// Gets a list of reads for one or more read group sets.
|
|
//
|
|
// For the definitions of read group sets and other genomics resources, see
|
|
// [Fundamentals of Google
|
|
// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
|
|
//
|
|
// Reads search operates over a genomic coordinate space of reference sequence
|
|
// & position defined over the reference sequences to which the requested
|
|
// read group sets are aligned.
|
|
//
|
|
// If a target positional range is specified, search returns all reads whose
|
|
// alignment to the reference genome overlap the range. A query which
|
|
// specifies only read group set IDs yields all reads in those read group
|
|
// sets, including unmapped reads.
|
|
//
|
|
// All reads returned (including reads on subsequent pages) are ordered by
|
|
// genomic coordinate (by reference sequence, then position). Reads with
|
|
// equivalent genomic coordinates are returned in an unspecified order. This
|
|
// order is consistent, such that two queries for the same content (regardless
|
|
// of page size) yield reads in the same order across their respective streams
|
|
// of paginated responses.
|
|
//
|
|
// Implements
|
|
// [GlobalAllianceApi.searchReads](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L85).
|
|
rpc SearchReads(SearchReadsRequest) returns (SearchReadsResponse) {
|
|
option (google.api.http) = { post: "/v1/reads/search" body: "*" };
|
|
}
|
|
}
|
|
|
|
// The read group set search request.
|
|
message SearchReadGroupSetsRequest {
|
|
// Restricts this query to read group sets within the given datasets. At least
|
|
// one ID must be provided.
|
|
repeated string dataset_ids = 1;
|
|
|
|
// Only return read group sets for which a substring of the name matches this
|
|
// string.
|
|
string name = 3;
|
|
|
|
// The continuation token, which is used to page through large result sets.
|
|
// To get the next page of results, set this parameter to the value of
|
|
// `nextPageToken` from the previous response.
|
|
string page_token = 2;
|
|
|
|
// The maximum number of results to return in a single page. If unspecified,
|
|
// defaults to 256. The maximum value is 1024.
|
|
int32 page_size = 4;
|
|
}
|
|
|
|
// The read group set search response.
|
|
message SearchReadGroupSetsResponse {
|
|
// The list of matching read group sets.
|
|
repeated ReadGroupSet read_group_sets = 1;
|
|
|
|
// The continuation token, which is used to page through large result sets.
|
|
// Provide this value in a subsequent request to return the next page of
|
|
// results. This field will be empty if there aren't any additional results.
|
|
string next_page_token = 2;
|
|
}
|
|
|
|
// The read group set import request.
|
|
message ImportReadGroupSetsRequest {
|
|
enum PartitionStrategy {
|
|
PARTITION_STRATEGY_UNSPECIFIED = 0;
|
|
|
|
// In most cases, this strategy yields one read group set per file. This is
|
|
// the default behavior.
|
|
//
|
|
// Allocate one read group set per file per sample. For BAM files, read
|
|
// groups are considered to share a sample if they have identical sample
|
|
// names. Furthermore, all reads for each file which do not belong to a read
|
|
// group, if any, will be grouped into a single read group set per-file.
|
|
PER_FILE_PER_SAMPLE = 1;
|
|
|
|
// Includes all read groups in all imported files into a single read group
|
|
// set. Requires that the headers for all imported files are equivalent. All
|
|
// reads which do not belong to a read group, if any, will be grouped into a
|
|
// separate read group set.
|
|
MERGE_ALL = 2;
|
|
}
|
|
|
|
// Required. The ID of the dataset these read group sets will belong to. The
|
|
// caller must have WRITE permissions to this dataset.
|
|
string dataset_id = 1;
|
|
|
|
// The reference set to which the imported read group sets are aligned to, if
|
|
// any. The reference names of this reference set must be a superset of those
|
|
// found in the imported file headers. If no reference set id is provided, a
|
|
// best effort is made to associate with a matching reference set.
|
|
string reference_set_id = 4;
|
|
|
|
// A list of URIs pointing at [BAM
|
|
// files](https://samtools.github.io/hts-specs/SAMv1.pdf)
|
|
// in Google Cloud Storage.
|
|
// Those URIs can include wildcards (*), but do not add or remove
|
|
// matching files before import has completed.
|
|
//
|
|
// Note that Google Cloud Storage object listing is only eventually
|
|
// consistent: files added may be not be immediately visible to
|
|
// everyone. Thus, if using a wildcard it is preferable not to start
|
|
// the import immediately after the files are created.
|
|
repeated string source_uris = 2;
|
|
|
|
// The partition strategy describes how read groups are partitioned into read
|
|
// group sets.
|
|
PartitionStrategy partition_strategy = 5;
|
|
}
|
|
|
|
// The read group set import response.
|
|
message ImportReadGroupSetsResponse {
|
|
// IDs of the read group sets that were created.
|
|
repeated string read_group_set_ids = 1;
|
|
}
|
|
|
|
// The read group set export request.
|
|
message ExportReadGroupSetRequest {
|
|
// Required. The Google Cloud project ID that owns this
|
|
// export. The caller must have WRITE access to this project.
|
|
string project_id = 1;
|
|
|
|
// Required. A Google Cloud Storage URI for the exported BAM file.
|
|
// The currently authenticated user must have write access to the new file.
|
|
// An error will be returned if the URI already contains data.
|
|
string export_uri = 2;
|
|
|
|
// Required. The ID of the read group set to export. The caller must have
|
|
// READ access to this read group set.
|
|
string read_group_set_id = 3;
|
|
|
|
// The reference names to export. If this is not specified, all reference
|
|
// sequences, including unmapped reads, are exported.
|
|
// Use `*` to export only unmapped reads.
|
|
repeated string reference_names = 4;
|
|
}
|
|
|
|
message UpdateReadGroupSetRequest {
|
|
// The ID of the read group set to be updated. The caller must have WRITE
|
|
// permissions to the dataset associated with this read group set.
|
|
string read_group_set_id = 1;
|
|
|
|
// The new read group set data. See `updateMask` for details on mutability of
|
|
// fields.
|
|
ReadGroupSet read_group_set = 2;
|
|
|
|
// An optional mask specifying which fields to update. Supported fields:
|
|
//
|
|
// * [name][google.genomics.v1.ReadGroupSet.name].
|
|
// * [referenceSetId][google.genomics.v1.ReadGroupSet.reference_set_id].
|
|
//
|
|
// Leaving `updateMask` unset is equivalent to specifying all mutable
|
|
// fields.
|
|
google.protobuf.FieldMask update_mask = 3;
|
|
}
|
|
|
|
message DeleteReadGroupSetRequest {
|
|
// The ID of the read group set to be deleted. The caller must have WRITE
|
|
// permissions to the dataset associated with this read group set.
|
|
string read_group_set_id = 1;
|
|
}
|
|
|
|
message GetReadGroupSetRequest {
|
|
// The ID of the read group set.
|
|
string read_group_set_id = 1;
|
|
}
|
|
|
|
message ListCoverageBucketsRequest {
|
|
// Required. The ID of the read group set over which coverage is requested.
|
|
string read_group_set_id = 1;
|
|
|
|
// The name of the reference to query, within the reference set associated
|
|
// with this query. Optional.
|
|
string reference_name = 3;
|
|
|
|
// The start position of the range on the reference, 0-based inclusive. If
|
|
// specified, `referenceName` must also be specified. Defaults to 0.
|
|
int64 start = 4;
|
|
|
|
// The end position of the range on the reference, 0-based exclusive. If
|
|
// specified, `referenceName` must also be specified. If unset or 0, defaults
|
|
// to the length of the reference.
|
|
int64 end = 5;
|
|
|
|
// The desired width of each reported coverage bucket in base pairs. This
|
|
// will be rounded down to the nearest precomputed bucket width; the value
|
|
// of which is returned as `bucketWidth` in the response. Defaults
|
|
// to infinity (each bucket spans an entire reference sequence) or the length
|
|
// of the target range, if specified. The smallest precomputed
|
|
// `bucketWidth` is currently 2048 base pairs; this is subject to
|
|
// change.
|
|
int64 target_bucket_width = 6;
|
|
|
|
// The continuation token, which is used to page through large result sets.
|
|
// To get the next page of results, set this parameter to the value of
|
|
// `nextPageToken` from the previous response.
|
|
string page_token = 7;
|
|
|
|
// The maximum number of results to return in a single page. If unspecified,
|
|
// defaults to 1024. The maximum value is 2048.
|
|
int32 page_size = 8;
|
|
}
|
|
|
|
// A bucket over which read coverage has been precomputed. A bucket corresponds
|
|
// to a specific range of the reference sequence.
|
|
message CoverageBucket {
|
|
// The genomic coordinate range spanned by this bucket.
|
|
Range range = 1;
|
|
|
|
// The average number of reads which are aligned to each individual
|
|
// reference base in this bucket.
|
|
float mean_coverage = 2;
|
|
}
|
|
|
|
message ListCoverageBucketsResponse {
|
|
// The length of each coverage bucket in base pairs. Note that buckets at the
|
|
// end of a reference sequence may be shorter. This value is omitted if the
|
|
// bucket width is infinity (the default behaviour, with no range or
|
|
// `targetBucketWidth`).
|
|
int64 bucket_width = 1;
|
|
|
|
// The coverage buckets. The list of buckets is sparse; a bucket with 0
|
|
// overlapping reads is not returned. A bucket never crosses more than one
|
|
// reference sequence. Each bucket has width `bucketWidth`, unless
|
|
// its end is the end of the reference sequence.
|
|
repeated CoverageBucket coverage_buckets = 2;
|
|
|
|
// The continuation token, which is used to page through large result sets.
|
|
// Provide this value in a subsequent request to return the next page of
|
|
// results. This field will be empty if there aren't any additional results.
|
|
string next_page_token = 3;
|
|
}
|
|
|
|
// The read search request.
|
|
message SearchReadsRequest {
|
|
// The IDs of the read groups sets within which to search for reads. All
|
|
// specified read group sets must be aligned against a common set of reference
|
|
// sequences; this defines the genomic coordinates for the query. Must specify
|
|
// one of `readGroupSetIds` or `readGroupIds`.
|
|
repeated string read_group_set_ids = 1;
|
|
|
|
// The IDs of the read groups within which to search for reads. All specified
|
|
// read groups must belong to the same read group sets. Must specify one of
|
|
// `readGroupSetIds` or `readGroupIds`.
|
|
repeated string read_group_ids = 5;
|
|
|
|
// The reference sequence name, for example `chr1`, `1`, or `chrX`. If set to
|
|
// `*`, only unmapped reads are returned. If unspecified, all reads (mapped
|
|
// and unmapped) are returned.
|
|
string reference_name = 7;
|
|
|
|
// The start position of the range on the reference, 0-based inclusive. If
|
|
// specified, `referenceName` must also be specified.
|
|
int64 start = 8;
|
|
|
|
// The end position of the range on the reference, 0-based exclusive. If
|
|
// specified, `referenceName` must also be specified.
|
|
int64 end = 9;
|
|
|
|
// The continuation token, which is used to page through large result sets.
|
|
// To get the next page of results, set this parameter to the value of
|
|
// `nextPageToken` from the previous response.
|
|
string page_token = 3;
|
|
|
|
// The maximum number of results to return in a single page. If unspecified,
|
|
// defaults to 256. The maximum value is 2048.
|
|
int32 page_size = 4;
|
|
}
|
|
|
|
// The read search response.
|
|
message SearchReadsResponse {
|
|
// The list of matching alignments sorted by mapped genomic coordinate,
|
|
// if any, ascending in position within the same reference. Unmapped reads,
|
|
// which have no position, are returned contiguously and are sorted in
|
|
// ascending lexicographic order by fragment name.
|
|
repeated Read alignments = 1;
|
|
|
|
// The continuation token, which is used to page through large result sets.
|
|
// Provide this value in a subsequent request to return the next page of
|
|
// results. This field will be empty if there aren't any additional results.
|
|
string next_page_token = 2;
|
|
}
|
|
|
|
// The stream reads request.
|
|
message StreamReadsRequest {
|
|
// The Google Cloud project ID which will be billed
|
|
// for this access. The caller must have WRITE access to this project.
|
|
// Required.
|
|
string project_id = 1;
|
|
|
|
// The ID of the read group set from which to stream reads.
|
|
string read_group_set_id = 2;
|
|
|
|
// The reference sequence name, for example `chr1`,
|
|
// `1`, or `chrX`. If set to *, only unmapped reads are
|
|
// returned.
|
|
string reference_name = 3;
|
|
|
|
// The start position of the range on the reference, 0-based inclusive. If
|
|
// specified, `referenceName` must also be specified.
|
|
int64 start = 4;
|
|
|
|
// The end position of the range on the reference, 0-based exclusive. If
|
|
// specified, `referenceName` must also be specified.
|
|
int64 end = 5;
|
|
|
|
// Restricts results to a shard containing approximately `1/totalShards`
|
|
// of the normal response payload for this query. Results from a sharded
|
|
// request are disjoint from those returned by all queries which differ only
|
|
// in their shard parameter. A shard may yield 0 results; this is especially
|
|
// likely for large values of `totalShards`.
|
|
//
|
|
// Valid values are `[0, totalShards)`.
|
|
int32 shard = 6;
|
|
|
|
// Specifying `totalShards` causes a disjoint subset of the normal response
|
|
// payload to be returned for each query with a unique `shard` parameter
|
|
// specified. A best effort is made to yield equally sized shards. Sharding
|
|
// can be used to distribute processing amongst workers, where each worker is
|
|
// assigned a unique `shard` number and all workers specify the same
|
|
// `totalShards` number. The union of reads returned for all sharded queries
|
|
// `[0, totalShards)` is equal to those returned by a single unsharded query.
|
|
//
|
|
// Queries for different values of `totalShards` with common divisors will
|
|
// share shard boundaries. For example, streaming `shard` 2 of 5
|
|
// `totalShards` yields the same results as streaming `shard`s 4 and 5 of 10
|
|
// `totalShards`. This property can be leveraged for adaptive retries.
|
|
int32 total_shards = 7;
|
|
}
|
|
|
|
message StreamReadsResponse {
|
|
repeated Read alignments = 1;
|
|
}
|