diff --git a/Cargo.lock b/Cargo.lock index 2dad7585b..2f6590377 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2154,6 +2154,7 @@ dependencies = [ "serde_repr", "serde_stacker", "serde_yaml", + "smart-default", "strsim", "strum 0.27.2", "strum_macros 0.27.2", @@ -3268,6 +3269,17 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "smart-default" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eb01866308440fc64d6c44d9e86c5cc17adfe33c4d6eed55da9145044d0ffc1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.112", +] + [[package]] name = "socket2" version = "0.5.10" diff --git a/Cargo.toml b/Cargo.toml index d73c802ee..bd790b297 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,6 +81,7 @@ serde_json = { version = "=1.0.148", features = ["preserve_order", "indexmap", " serde_repr = "=0.1.20" serde_stacker = { version = "=0.1.14" } serde_yaml = "=0.9.34" +smart-default = "=0.7.1" strsim = "=0.11.1" strum = "=0.27.2" strum_macros = "=0.27.2" diff --git a/docs/user/input-files/05-pathogen-config.md b/docs/user/input-files/05-pathogen-config.md index 7f6a89817..81e2e7546 100644 --- a/docs/user/input-files/05-pathogen-config.md +++ b/docs/user/input-files/05-pathogen-config.md @@ -220,6 +220,63 @@ In addition, a "default" value can be specified for amino acid mutations that ar If the score is only relevant for specific clades, you can specify which clades are to be ignored. +#### Nucleotide mutation pattern detection (`mutationPatterns`) + +Nextclade can detect named groups of private nucleotide substitutions. This is useful for reporting mutation patterns such as RNA editing signatures separately from the generic SNP cluster QC rule. + +Pattern detection is configured with `mutationPatterns.patterns`. Each pattern has an `id`, a display `name`, optional `description`, one or more `events`, and optional clustering parameters. The only supported event type is currently `nucSubstitution`. + +```json + "mutationPatterns": { + "patterns": [ + { + "id": "adar", + "name": "ADAR-like RNA editing", + "description": "ADAR-mediated A-to-I editing observed as A>G and complementary T>C", + "events": [ + { + "type": "nucSubstitution", + "ref": ["A"], + "qry": ["G"] + }, + { + "type": "nucSubstitution", + "ref": ["T"], + "qry": ["C"] + } + ], + "cluster": { + "windowSize": 50, + "cutoff": 3 + } + }, + { + "id": "apobec", + "name": "APOBEC-like cytosine deamination", + "description": "APOBEC-like cytosine deamination observed as G>A in a reference motif", + "events": [ + { + "type": "nucSubstitution", + "ref": ["G"], + "qry": ["A"], + "motifs": ["[CT]G[ACT]"] + } + ], + "cluster": { + "windowSize": 50, + "cutoff": 3 + } + } + ] + } +``` + +The `ref` and `qry` arrays use Nextclade nucleotide symbols, including IUPAC ambiguity codes such as `N`, `R`, and `Y`. A substitution matches when both the reference and query nucleotide match one of the configured symbols. + +The `motifs` array contains regular expressions matched against the reference sequence. A motif qualifies a substitution when the regex match interval contains the substituted reference position. Motifs are regular expressions over the reference letters, so use regex character classes such as `[CT]` instead of IUPAC ambiguity symbols when matching multiple reference letters inside a motif. + +The optional `cluster` object reports clusters within mutations matched by that pattern. It does not replace `qc.snpClusters`: `qc.snpClusters` remains the generic global SNP cluster QC rule over all private nucleotide substitutions. + #### Amino acid motif detection (`aaMotifs`) Nextclade can detect and report specific motifs in translated amino acid sequences. This feature is currently being used to highlight changes in glycosylation or cleavage sites, but the feature itself is generic. diff --git a/docs/user/output-files/04-results-tsv.md b/docs/user/output-files/04-results-tsv.md index ca4b1b8d6..802625c16 100644 --- a/docs/user/output-files/04-results-tsv.md +++ b/docs/user/output-files/04-results-tsv.md @@ -100,6 +100,15 @@ Every row in tabular output corresponds to 1 input sequence. The meaning of colu | qc.stopCodons.totalStopCodons | Total number of detected stop codons in "Stop codons" QC rule | non-negative integer | 2 | | qc.stopCodons.score | Score for "Stop codons" QC rule | float | 0.5 | | qc.stopCodons.status | Status for "Stop codons" QC rule | string: `good | mediocre |bad` | bad | +| mutationPatterns.id | Mutation pattern identifier, or multiple identifiers separated by `\|` | string | adar | +| mutationPatterns.name | Mutation pattern display name, or multiple names separated by `\|` | string | ADAR-like RNA editing | +| mutationPatterns.description | Mutation pattern description, or multiple descriptions separated by `\|` | string | ADAR-mediated A-to-I editing | +| mutationPatterns.counts.matches | Total number of events matched by mutation patterns | non-negative integer | 14 | +| mutationPatterns.counts.clustered | Total number of matched events that occur in mutation pattern clusters | non-negative integer | 9 | +| mutationPatterns.counts.clusters | Total number of mutation pattern clusters | non-negative integer | 2 | +| mutationPatterns.eventTypeCounts | Matched event type counts for mutation patterns | comma separated list of strings | nucSubstitution:A>G:5 | +| mutationPatterns.clusters | Matched mutation pattern cluster ranges and event counts | comma separated list of strings | 3003-3011:5 | +| mutationPatterns.clusterEvents | Events in matched mutation pattern clusters | comma separated list of strings | 3003-3011:nucSubstitution:A3003G | | isReverseComplement | Whether query sequences were transformed using reverse complement operation before alignment | boolean | false | | errors | List of errors during processing | comma separated list of strings | | | warnings | List of warnings during processing | comma separated list of strings | | @@ -123,4 +132,3 @@ The table can contain additional columns for every clade-like attribute defined >
> > See descriptions of individual outputs and [Errors and warnings](./errors-and-warnings.md) section for more details. - diff --git a/packages/nextclade-cli/src/cli/nextclade_loop.rs b/packages/nextclade-cli/src/cli/nextclade_loop.rs index 2c28d1cae..e37b72c9c 100644 --- a/packages/nextclade-cli/src/cli/nextclade_loop.rs +++ b/packages/nextclade-cli/src/cli/nextclade_loop.rs @@ -138,6 +138,7 @@ pub fn nextclade_run(mut run_args: NextcladeRunArgs) -> Result<(), Report> { clade_node_attr_key_descs, phenotype_attr_descs, aa_motif_keys, + mutation_pattern_keys, ref_nodes, .. } = nextclade.get_initial_data(); @@ -148,6 +149,7 @@ pub fn nextclade_run(mut run_args: NextcladeRunArgs) -> Result<(), Report> { &phenotype_attr_descs, &ref_nodes, &aa_motif_keys, + &mutation_pattern_keys, &csv_column_config, &run_args.outputs, &nextclade.params, diff --git a/packages/nextclade-cli/src/cli/nextclade_ordered_writer.rs b/packages/nextclade-cli/src/cli/nextclade_ordered_writer.rs index 2d57270fe..1204a59b4 100644 --- a/packages/nextclade-cli/src/cli/nextclade_ordered_writer.rs +++ b/packages/nextclade-cli/src/cli/nextclade_ordered_writer.rs @@ -44,6 +44,7 @@ impl NextcladeOrderedWriter { phenotype_attr_key_desc: &[PhenotypeAttrDesc], ref_nodes: &AuspiceRefNodesDesc, aa_motifs_keys: &[String], + mutation_pattern_keys: &[String], csv_column_config: &CsvColumnConfig, output_params: &NextcladeRunOutputArgs, params: &NextcladeInputParams, @@ -73,6 +74,7 @@ impl NextcladeOrderedWriter { &phenotype_attr_keys, ref_nodes, aa_motifs_keys, + mutation_pattern_keys, csv_column_config, ) })?; @@ -85,6 +87,7 @@ impl NextcladeOrderedWriter { &phenotype_attr_keys, ref_nodes, aa_motifs_keys, + mutation_pattern_keys, csv_column_config, ) })?; diff --git a/packages/nextclade-schemas/input-pathogen-json.schema.json b/packages/nextclade-schemas/input-pathogen-json.schema.json index 0d3382078..81b4a85b0 100644 --- a/packages/nextclade-schemas/input-pathogen-json.schema.json +++ b/packages/nextclade-schemas/input-pathogen-json.schema.json @@ -53,9 +53,9 @@ }, "snpClusters": { "enabled": true, + "scoreWeight": 50.0, "windowSize": 100, - "clusterCutOff": 5, - "scoreWeight": 50.0 + "clusterCutOff": 5 }, "frameShifts": { "enabled": true, @@ -85,6 +85,62 @@ "scoreWeight": 75.0 } }, + "mutationPatterns": { + "patterns": [ + { + "id": "adar", + "name": "ADAR-like RNA editing", + "description": "ADAR-mediated A-to-I editing observed as A>G and complementary T>C", + "events": [ + { + "type": "nucSubstitution", + "ref": [ + "A", + "T" + ], + "qry": [ + "G", + "C" + ], + "motifs": [ + "A[ACGT]G", + "T[ACGT]C" + ] + } + ], + "cluster": { + "windowSize": 100, + "cutoff": 5 + } + }, + { + "id": "apobec", + "name": "APOBEC-like cytosine deamination", + "description": "APOBEC-like cytosine deamination observed as C>T and complementary G>A", + "events": [ + { + "type": "nucSubstitution", + "ref": [ + "C", + "G" + ], + "qry": [ + "T", + "A" + ], + "motifs": [ + "TC[AT]", + "[AT]GA" + ] + } + ], + "cluster": { + "windowSize": 50, + "cutoff": 4 + } + } + ] + }, "phenotypeData": [ { "name": "receptor_binding", @@ -219,6 +275,17 @@ } ] }, + "mutationPatterns": { + "description": "Mutation pattern analysis configuration. When present, detects private mutation patterns such as enzyme-associated clustered substitution signatures.", + "anyOf": [ + { + "$ref": "#/definitions/MutationPatternsConfig" + }, + { + "type": "null" + } + ] + }, "generalParams": { "description": "General analysis parameters (e.g. includeReference, inOrder, replaceUnknown).", "anyOf": [ @@ -507,9 +574,9 @@ }, "snpClusters": { "enabled": true, + "scoreWeight": 50.0, "windowSize": 100, - "clusterCutOff": 5, - "scoreWeight": 50.0 + "clusterCutOff": 5 }, "frameShifts": { "enabled": true, @@ -590,9 +657,9 @@ "description": "Configuration for the \"SNP clusters\" (C) rule", "default": { "enabled": false, + "scoreWeight": 0.0, "windowSize": 0, - "clusterCutOff": 0, - "scoreWeight": 0.0 + "clusterCutOff": 0 }, "allOf": [ { @@ -754,9 +821,9 @@ "examples": [ { "enabled": true, + "scoreWeight": 50.0, "windowSize": 100, - "clusterCutOff": 5, - "scoreWeight": 50.0 + "clusterCutOff": 5 } ], "type": "object", @@ -765,25 +832,25 @@ "default": false, "type": "boolean" }, + "scoreWeight": { + "description": "QC score added per detected cluster", + "default": 0.0, + "type": "number", + "format": "double" + }, "windowSize": { - "description": "Size of the sliding window (in nucleotides) used to count private mutations", + "description": "Size of the sliding nucleotide window for global SNP cluster detection", "default": 0, "type": "integer", "format": "uint", "minimum": 0.0 }, "clusterCutOff": { - "description": "Number of private mutations within a window above which the window is flagged as a cluster", + "description": "Number of substitutions within a window required to count as a global SNP cluster", "default": 0, "type": "integer", "format": "uint", "minimum": 0.0 - }, - "scoreWeight": { - "description": "QC score added per detected cluster", - "default": 0.0, - "type": "number", - "format": "double" } } }, @@ -942,6 +1009,272 @@ } } }, + "MutationPatternsConfig": { + "description": "Configuration for mutation pattern analysis. Detects private mutations matching biologically meaningful mutation type and reference-context rules. Pattern-specific clustering is independent from global `qc.snpClusters`.", + "examples": [ + { + "patterns": [ + { + "id": "adar", + "name": "ADAR-like RNA editing", + "description": "ADAR-mediated A-to-I editing observed as A>G and complementary T>C", + "events": [ + { + "type": "nucSubstitution", + "ref": [ + "A", + "T" + ], + "qry": [ + "G", + "C" + ], + "motifs": [ + "A[ACGT]G", + "T[ACGT]C" + ] + } + ], + "cluster": { + "windowSize": 100, + "cutoff": 5 + } + }, + { + "id": "apobec", + "name": "APOBEC-like cytosine deamination", + "description": "APOBEC-like cytosine deamination observed as C>T and complementary G>A", + "events": [ + { + "type": "nucSubstitution", + "ref": [ + "C", + "G" + ], + "qry": [ + "T", + "A" + ], + "motifs": [ + "TC[AT]", + "[AT]GA" + ] + } + ], + "cluster": { + "windowSize": 50, + "cutoff": 4 + } + } + ] + } + ], + "type": "object", + "properties": { + "patterns": { + "description": "Mutation patterns evaluated independently for every analyzed sequence.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternConfig" + } + } + } + }, + "MutationPatternConfig": { + "description": "Named mutation pattern: event filters, optional clustering, and display metadata.\n\nDataset authors can define multiple patterns to separate biologically different mutation processes, such as ADAR-like A-to-I editing and APOBEC-like cytosine deamination.", + "examples": [ + { + "id": "adar", + "name": "ADAR-like RNA editing", + "description": "ADAR-mediated A-to-I editing observed as A>G and complementary T>C", + "events": [ + { + "type": "nucSubstitution", + "ref": [ + "A", + "T" + ], + "qry": [ + "G", + "C" + ], + "motifs": [ + "A[ACGT]G", + "T[ACGT]C" + ] + } + ], + "cluster": { + "windowSize": 100, + "cutoff": 5 + } + } + ], + "type": "object", + "required": [ + "id", + "name" + ], + "properties": { + "id": { + "description": "Stable machine-readable identifier. This value appears in JSON and TSV output.", + "type": "string" + }, + "name": { + "description": "Human-readable name shown in Nextclade Web tooltips and reports.", + "type": "string" + }, + "description": { + "description": "Optional explanatory text shown together with the pattern result.", + "type": [ + "string", + "null" + ] + }, + "events": { + "description": "Event filters included in this pattern. If empty, the pattern matches all private nucleotide substitutions.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternEvent" + } + }, + "cluster": { + "description": "Optional pattern-local clustering rule. If omitted, Nextclade reports matches and type counts, but no clusters for this pattern.", + "anyOf": [ + { + "$ref": "#/definitions/MutationPatternClusterConfig" + }, + { + "type": "null" + } + ] + } + } + }, + "MutationPatternEvent": { + "description": "Mutation event filter in mutation pattern analysis.\n\nEach event selects one class of private mutations to include in a pattern. More event variants can be added without changing the surrounding `mutationPatterns.patterns[]` structure.", + "examples": [ + { + "type": "nucSubstitution", + "ref": [ + "A", + "T" + ], + "qry": [ + "G", + "C" + ], + "motifs": [ + "A[ACGT]G", + "T[ACGT]C" + ] + } + ], + "oneOf": [ + { + "description": "Nucleotide substitution event, selected by reference nucleotide, query nucleotide, and optional reference motifs.", + "examples": [ + { + "ref": [ + "A", + "T" + ], + "qry": [ + "G", + "C" + ], + "motifs": [ + "A[ACGT]G", + "T[ACGT]C" + ] + } + ], + "type": "object", + "required": [ + "qry", + "ref", + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "nucSubstitution" + ] + }, + "ref": { + "description": "Reference nucleotides to match at the mutated position.", + "type": "array", + "items": { + "$ref": "#/definitions/Nuc" + } + }, + "qry": { + "description": "Query nucleotides to match at the mutated position.", + "type": "array", + "items": { + "$ref": "#/definitions/Nuc" + } + }, + "motifs": { + "description": "Regular expressions matched against the reference sequence. A mutation matches a motif when the regex match spans the mutated position. Use IUPAC nucleotide letters directly in the regex, for example `TC[AT]` for a TCW motif.", + "type": "array", + "items": { + "type": "string" + } + } + } + } + ] + }, + "Nuc": { + "description": "A nucleotide", + "type": "string", + "enum": [ + "T", + "A", + "W", + "C", + "Y", + "M", + "H", + "G", + "K", + "R", + "D", + "S", + "B", + "V", + "N", + "-" + ] + }, + "MutationPatternClusterConfig": { + "description": "Clustering rule applied to events matching one mutation pattern.\n\nClustering is pattern-local. It decides which matched events are reported as dense clusters in this pattern. It does not change the global `qc.snpClusters` rule unless the QC rule itself is configured separately.", + "examples": [ + { + "windowSize": 100, + "cutoff": 5 + } + ], + "type": "object", + "properties": { + "windowSize": { + "description": "Sliding nucleotide window size. A cluster is detected when `cutoff` matched events fall within this many reference nucleotides.", + "default": 100, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "cutoff": { + "description": "Minimum number of matched events in one sliding window required to report a cluster.", + "default": 5, + "type": "integer", + "format": "uint", + "minimum": 0.0 + } + } + }, "NextcladeGeneralParamsOptional": { "type": "object", "properties": { diff --git a/packages/nextclade-schemas/input-pathogen-json.schema.yaml b/packages/nextclade-schemas/input-pathogen-json.schema.yaml index 1c94e542a..29edc8b19 100644 --- a/packages/nextclade-schemas/input-pathogen-json.schema.yaml +++ b/packages/nextclade-schemas/input-pathogen-json.schema.yaml @@ -41,9 +41,9 @@ examples: cutoff: 15.0 snpClusters: enabled: true + scoreWeight: 50.0 windowSize: 100 clusterCutOff: 5 - scoreWeight: 50.0 frameShifts: enabled: true ignoredFrameShifts: @@ -60,6 +60,42 @@ examples: - cdsName: ORF3a codon: 238 scoreWeight: 75.0 + mutationPatterns: + patterns: + - id: adar + name: ADAR-like RNA editing + description: ADAR-mediated A-to-I editing observed as A>G and complementary T>C + events: + - type: nucSubstitution + ref: + - A + - T + qry: + - G + - C + motifs: + - A[ACGT]G + - T[ACGT]C + cluster: + windowSize: 100 + cutoff: 5 + - id: apobec + name: APOBEC-like cytosine deamination + description: APOBEC-like cytosine deamination observed as C>T and complementary G>A + events: + - type: nucSubstitution + ref: + - C + - G + qry: + - T + - A + motifs: + - TC[AT] + - '[AT]GA' + cluster: + windowSize: 50 + cutoff: 4 phenotypeData: - name: receptor_binding nameFriendly: Receptor Binding @@ -139,6 +175,11 @@ properties: anyOf: - $ref: '#/definitions/QcConfig' - type: 'null' + mutationPatterns: + description: Mutation pattern analysis configuration. When present, detects private mutation patterns such as enzyme-associated clustered substitution signatures. + anyOf: + - $ref: '#/definitions/MutationPatternsConfig' + - type: 'null' generalParams: description: General analysis parameters (e.g. includeReference, inOrder, replaceUnknown). anyOf: @@ -324,9 +365,9 @@ definitions: cutoff: 15.0 snpClusters: enabled: true + scoreWeight: 50.0 windowSize: 100 clusterCutOff: 5 - scoreWeight: 50.0 frameShifts: enabled: true ignoredFrameShifts: @@ -378,9 +419,9 @@ definitions: description: Configuration for the "SNP clusters" (C) rule default: enabled: false + scoreWeight: 0.0 windowSize: 0 clusterCutOff: 0 - scoreWeight: 0.0 allOf: - $ref: '#/definitions/QcRulesConfigSnpClusters' frameShifts: @@ -495,31 +536,31 @@ definitions: description: Configuration for QC rule "SNP clusters" examples: - enabled: true + scoreWeight: 50.0 windowSize: 100 clusterCutOff: 5 - scoreWeight: 50.0 type: object properties: enabled: default: false type: boolean + scoreWeight: + description: QC score added per detected cluster + default: 0.0 + type: number + format: double windowSize: - description: Size of the sliding window (in nucleotides) used to count private mutations + description: Size of the sliding nucleotide window for global SNP cluster detection default: 0 type: integer format: uint minimum: 0.0 clusterCutOff: - description: Number of private mutations within a window above which the window is flagged as a cluster + description: Number of substitutions within a window required to count as a global SNP cluster default: 0 type: integer format: uint minimum: 0.0 - scoreWeight: - description: QC score added per detected cluster - default: 0.0 - type: number - format: double QcRulesConfigFrameShifts: description: Configuration for QC rule "frame shifts" examples: @@ -625,6 +666,195 @@ definitions: type: integer format: uint minimum: 0.0 + MutationPatternsConfig: + description: Configuration for mutation pattern analysis. Detects private mutations matching biologically meaningful mutation type and reference-context rules. Pattern-specific clustering is independent from global `qc.snpClusters`. + examples: + - patterns: + - id: adar + name: ADAR-like RNA editing + description: ADAR-mediated A-to-I editing observed as A>G and complementary T>C + events: + - type: nucSubstitution + ref: + - A + - T + qry: + - G + - C + motifs: + - A[ACGT]G + - T[ACGT]C + cluster: + windowSize: 100 + cutoff: 5 + - id: apobec + name: APOBEC-like cytosine deamination + description: APOBEC-like cytosine deamination observed as C>T and complementary G>A + events: + - type: nucSubstitution + ref: + - C + - G + qry: + - T + - A + motifs: + - TC[AT] + - '[AT]GA' + cluster: + windowSize: 50 + cutoff: 4 + type: object + properties: + patterns: + description: Mutation patterns evaluated independently for every analyzed sequence. + type: array + items: + $ref: '#/definitions/MutationPatternConfig' + MutationPatternConfig: + description: |- + Named mutation pattern: event filters, optional clustering, and display metadata. + + Dataset authors can define multiple patterns to separate biologically different mutation processes, such as ADAR-like A-to-I editing and APOBEC-like cytosine deamination. + examples: + - id: adar + name: ADAR-like RNA editing + description: ADAR-mediated A-to-I editing observed as A>G and complementary T>C + events: + - type: nucSubstitution + ref: + - A + - T + qry: + - G + - C + motifs: + - A[ACGT]G + - T[ACGT]C + cluster: + windowSize: 100 + cutoff: 5 + type: object + required: + - id + - name + properties: + id: + description: Stable machine-readable identifier. This value appears in JSON and TSV output. + type: string + name: + description: Human-readable name shown in Nextclade Web tooltips and reports. + type: string + description: + description: Optional explanatory text shown together with the pattern result. + type: + - string + - 'null' + events: + description: Event filters included in this pattern. If empty, the pattern matches all private nucleotide substitutions. + type: array + items: + $ref: '#/definitions/MutationPatternEvent' + cluster: + description: Optional pattern-local clustering rule. If omitted, Nextclade reports matches and type counts, but no clusters for this pattern. + anyOf: + - $ref: '#/definitions/MutationPatternClusterConfig' + - type: 'null' + MutationPatternEvent: + description: |- + Mutation event filter in mutation pattern analysis. + + Each event selects one class of private mutations to include in a pattern. More event variants can be added without changing the surrounding `mutationPatterns.patterns[]` structure. + examples: + - type: nucSubstitution + ref: + - A + - T + qry: + - G + - C + motifs: + - A[ACGT]G + - T[ACGT]C + oneOf: + - description: Nucleotide substitution event, selected by reference nucleotide, query nucleotide, and optional reference motifs. + examples: + - ref: + - A + - T + qry: + - G + - C + motifs: + - A[ACGT]G + - T[ACGT]C + type: object + required: + - qry + - ref + - type + properties: + type: + type: string + enum: + - nucSubstitution + ref: + description: Reference nucleotides to match at the mutated position. + type: array + items: + $ref: '#/definitions/Nuc' + qry: + description: Query nucleotides to match at the mutated position. + type: array + items: + $ref: '#/definitions/Nuc' + motifs: + description: Regular expressions matched against the reference sequence. A mutation matches a motif when the regex match spans the mutated position. Use IUPAC nucleotide letters directly in the regex, for example `TC[AT]` for a TCW motif. + type: array + items: + type: string + Nuc: + description: A nucleotide + type: string + enum: + - T + - A + - W + - C + - Y + - M + - H + - G + - K + - R + - D + - S + - B + - V + - N + - '-' + MutationPatternClusterConfig: + description: |- + Clustering rule applied to events matching one mutation pattern. + + Clustering is pattern-local. It decides which matched events are reported as dense clusters in this pattern. It does not change the global `qc.snpClusters` rule unless the QC rule itself is configured separately. + examples: + - windowSize: 100 + cutoff: 5 + type: object + properties: + windowSize: + description: Sliding nucleotide window size. A cluster is detected when `cutoff` matched events fall within this many reference nucleotides. + default: 100 + type: integer + format: uint + minimum: 0.0 + cutoff: + description: Minimum number of matched events in one sliding window required to report a cluster. + default: 5 + type: integer + format: uint + minimum: 0.0 NextcladeGeneralParamsOptional: type: object properties: diff --git a/packages/nextclade-schemas/nextclade-auspice-extensions.schema.json b/packages/nextclade-schemas/nextclade-auspice-extensions.schema.json index 139417c43..f0b216a27 100644 --- a/packages/nextclade-schemas/nextclade-auspice-extensions.schema.json +++ b/packages/nextclade-schemas/nextclade-auspice-extensions.schema.json @@ -387,9 +387,9 @@ }, "snpClusters": { "enabled": true, + "scoreWeight": 50.0, "windowSize": 100, - "clusterCutOff": 5, - "scoreWeight": 50.0 + "clusterCutOff": 5 }, "frameShifts": { "enabled": true, @@ -419,6 +419,62 @@ "scoreWeight": 75.0 } }, + "mutationPatterns": { + "patterns": [ + { + "id": "adar", + "name": "ADAR-like RNA editing", + "description": "ADAR-mediated A-to-I editing observed as A>G and complementary T>C", + "events": [ + { + "type": "nucSubstitution", + "ref": [ + "A", + "T" + ], + "qry": [ + "G", + "C" + ], + "motifs": [ + "A[ACGT]G", + "T[ACGT]C" + ] + } + ], + "cluster": { + "windowSize": 100, + "cutoff": 5 + } + }, + { + "id": "apobec", + "name": "APOBEC-like cytosine deamination", + "description": "APOBEC-like cytosine deamination observed as C>T and complementary G>A", + "events": [ + { + "type": "nucSubstitution", + "ref": [ + "C", + "G" + ], + "qry": [ + "T", + "A" + ], + "motifs": [ + "TC[AT]", + "[AT]GA" + ] + } + ], + "cluster": { + "windowSize": 50, + "cutoff": 4 + } + } + ] + }, "phenotypeData": [ { "name": "receptor_binding", @@ -553,6 +609,17 @@ } ] }, + "mutationPatterns": { + "description": "Mutation pattern analysis configuration. When present, detects private mutation patterns such as enzyme-associated clustered substitution signatures.", + "anyOf": [ + { + "$ref": "#/definitions/MutationPatternsConfig" + }, + { + "type": "null" + } + ] + }, "generalParams": { "description": "General analysis parameters (e.g. includeReference, inOrder, replaceUnknown).", "anyOf": [ @@ -841,9 +908,9 @@ }, "snpClusters": { "enabled": true, + "scoreWeight": 50.0, "windowSize": 100, - "clusterCutOff": 5, - "scoreWeight": 50.0 + "clusterCutOff": 5 }, "frameShifts": { "enabled": true, @@ -924,9 +991,9 @@ "description": "Configuration for the \"SNP clusters\" (C) rule", "default": { "enabled": false, + "scoreWeight": 0.0, "windowSize": 0, - "clusterCutOff": 0, - "scoreWeight": 0.0 + "clusterCutOff": 0 }, "allOf": [ { @@ -1088,9 +1155,9 @@ "examples": [ { "enabled": true, + "scoreWeight": 50.0, "windowSize": 100, - "clusterCutOff": 5, - "scoreWeight": 50.0 + "clusterCutOff": 5 } ], "type": "object", @@ -1099,25 +1166,25 @@ "default": false, "type": "boolean" }, + "scoreWeight": { + "description": "QC score added per detected cluster", + "default": 0.0, + "type": "number", + "format": "double" + }, "windowSize": { - "description": "Size of the sliding window (in nucleotides) used to count private mutations", + "description": "Size of the sliding nucleotide window for global SNP cluster detection", "default": 0, "type": "integer", "format": "uint", "minimum": 0.0 }, "clusterCutOff": { - "description": "Number of private mutations within a window above which the window is flagged as a cluster", + "description": "Number of substitutions within a window required to count as a global SNP cluster", "default": 0, "type": "integer", "format": "uint", "minimum": 0.0 - }, - "scoreWeight": { - "description": "QC score added per detected cluster", - "default": 0.0, - "type": "number", - "format": "double" } } }, @@ -1256,6 +1323,272 @@ } } }, + "MutationPatternsConfig": { + "description": "Configuration for mutation pattern analysis. Detects private mutations matching biologically meaningful mutation type and reference-context rules. Pattern-specific clustering is independent from global `qc.snpClusters`.", + "examples": [ + { + "patterns": [ + { + "id": "adar", + "name": "ADAR-like RNA editing", + "description": "ADAR-mediated A-to-I editing observed as A>G and complementary T>C", + "events": [ + { + "type": "nucSubstitution", + "ref": [ + "A", + "T" + ], + "qry": [ + "G", + "C" + ], + "motifs": [ + "A[ACGT]G", + "T[ACGT]C" + ] + } + ], + "cluster": { + "windowSize": 100, + "cutoff": 5 + } + }, + { + "id": "apobec", + "name": "APOBEC-like cytosine deamination", + "description": "APOBEC-like cytosine deamination observed as C>T and complementary G>A", + "events": [ + { + "type": "nucSubstitution", + "ref": [ + "C", + "G" + ], + "qry": [ + "T", + "A" + ], + "motifs": [ + "TC[AT]", + "[AT]GA" + ] + } + ], + "cluster": { + "windowSize": 50, + "cutoff": 4 + } + } + ] + } + ], + "type": "object", + "properties": { + "patterns": { + "description": "Mutation patterns evaluated independently for every analyzed sequence.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternConfig" + } + } + } + }, + "MutationPatternConfig": { + "description": "Named mutation pattern: event filters, optional clustering, and display metadata.\n\nDataset authors can define multiple patterns to separate biologically different mutation processes, such as ADAR-like A-to-I editing and APOBEC-like cytosine deamination.", + "examples": [ + { + "id": "adar", + "name": "ADAR-like RNA editing", + "description": "ADAR-mediated A-to-I editing observed as A>G and complementary T>C", + "events": [ + { + "type": "nucSubstitution", + "ref": [ + "A", + "T" + ], + "qry": [ + "G", + "C" + ], + "motifs": [ + "A[ACGT]G", + "T[ACGT]C" + ] + } + ], + "cluster": { + "windowSize": 100, + "cutoff": 5 + } + } + ], + "type": "object", + "required": [ + "id", + "name" + ], + "properties": { + "id": { + "description": "Stable machine-readable identifier. This value appears in JSON and TSV output.", + "type": "string" + }, + "name": { + "description": "Human-readable name shown in Nextclade Web tooltips and reports.", + "type": "string" + }, + "description": { + "description": "Optional explanatory text shown together with the pattern result.", + "type": [ + "string", + "null" + ] + }, + "events": { + "description": "Event filters included in this pattern. If empty, the pattern matches all private nucleotide substitutions.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternEvent" + } + }, + "cluster": { + "description": "Optional pattern-local clustering rule. If omitted, Nextclade reports matches and type counts, but no clusters for this pattern.", + "anyOf": [ + { + "$ref": "#/definitions/MutationPatternClusterConfig" + }, + { + "type": "null" + } + ] + } + } + }, + "MutationPatternEvent": { + "description": "Mutation event filter in mutation pattern analysis.\n\nEach event selects one class of private mutations to include in a pattern. More event variants can be added without changing the surrounding `mutationPatterns.patterns[]` structure.", + "examples": [ + { + "type": "nucSubstitution", + "ref": [ + "A", + "T" + ], + "qry": [ + "G", + "C" + ], + "motifs": [ + "A[ACGT]G", + "T[ACGT]C" + ] + } + ], + "oneOf": [ + { + "description": "Nucleotide substitution event, selected by reference nucleotide, query nucleotide, and optional reference motifs.", + "examples": [ + { + "ref": [ + "A", + "T" + ], + "qry": [ + "G", + "C" + ], + "motifs": [ + "A[ACGT]G", + "T[ACGT]C" + ] + } + ], + "type": "object", + "required": [ + "qry", + "ref", + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "nucSubstitution" + ] + }, + "ref": { + "description": "Reference nucleotides to match at the mutated position.", + "type": "array", + "items": { + "$ref": "#/definitions/Nuc" + } + }, + "qry": { + "description": "Query nucleotides to match at the mutated position.", + "type": "array", + "items": { + "$ref": "#/definitions/Nuc" + } + }, + "motifs": { + "description": "Regular expressions matched against the reference sequence. A mutation matches a motif when the regex match spans the mutated position. Use IUPAC nucleotide letters directly in the regex, for example `TC[AT]` for a TCW motif.", + "type": "array", + "items": { + "type": "string" + } + } + } + } + ] + }, + "Nuc": { + "description": "A nucleotide", + "type": "string", + "enum": [ + "T", + "A", + "W", + "C", + "Y", + "M", + "H", + "G", + "K", + "R", + "D", + "S", + "B", + "V", + "N", + "-" + ] + }, + "MutationPatternClusterConfig": { + "description": "Clustering rule applied to events matching one mutation pattern.\n\nClustering is pattern-local. It decides which matched events are reported as dense clusters in this pattern. It does not change the global `qc.snpClusters` rule unless the QC rule itself is configured separately.", + "examples": [ + { + "windowSize": 100, + "cutoff": 5 + } + ], + "type": "object", + "properties": { + "windowSize": { + "description": "Sliding nucleotide window size. A cluster is detected when `cutoff` matched events fall within this many reference nucleotides.", + "default": 100, + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "cutoff": { + "description": "Minimum number of matched events in one sliding window required to report a cluster.", + "default": 5, + "type": "integer", + "format": "uint", + "minimum": 0.0 + } + } + }, "NextcladeGeneralParamsOptional": { "type": "object", "properties": { diff --git a/packages/nextclade-schemas/nextclade-auspice-extensions.schema.yaml b/packages/nextclade-schemas/nextclade-auspice-extensions.schema.yaml index ffdbb66b2..904d97de8 100644 --- a/packages/nextclade-schemas/nextclade-auspice-extensions.schema.yaml +++ b/packages/nextclade-schemas/nextclade-auspice-extensions.schema.yaml @@ -261,9 +261,9 @@ definitions: cutoff: 15.0 snpClusters: enabled: true + scoreWeight: 50.0 windowSize: 100 clusterCutOff: 5 - scoreWeight: 50.0 frameShifts: enabled: true ignoredFrameShifts: @@ -280,6 +280,42 @@ definitions: - cdsName: ORF3a codon: 238 scoreWeight: 75.0 + mutationPatterns: + patterns: + - id: adar + name: ADAR-like RNA editing + description: ADAR-mediated A-to-I editing observed as A>G and complementary T>C + events: + - type: nucSubstitution + ref: + - A + - T + qry: + - G + - C + motifs: + - A[ACGT]G + - T[ACGT]C + cluster: + windowSize: 100 + cutoff: 5 + - id: apobec + name: APOBEC-like cytosine deamination + description: APOBEC-like cytosine deamination observed as C>T and complementary G>A + events: + - type: nucSubstitution + ref: + - C + - G + qry: + - T + - A + motifs: + - TC[AT] + - '[AT]GA' + cluster: + windowSize: 50 + cutoff: 4 phenotypeData: - name: receptor_binding nameFriendly: Receptor Binding @@ -359,6 +395,11 @@ definitions: anyOf: - $ref: '#/definitions/QcConfig' - type: 'null' + mutationPatterns: + description: Mutation pattern analysis configuration. When present, detects private mutation patterns such as enzyme-associated clustered substitution signatures. + anyOf: + - $ref: '#/definitions/MutationPatternsConfig' + - type: 'null' generalParams: description: General analysis parameters (e.g. includeReference, inOrder, replaceUnknown). anyOf: @@ -543,9 +584,9 @@ definitions: cutoff: 15.0 snpClusters: enabled: true + scoreWeight: 50.0 windowSize: 100 clusterCutOff: 5 - scoreWeight: 50.0 frameShifts: enabled: true ignoredFrameShifts: @@ -597,9 +638,9 @@ definitions: description: Configuration for the "SNP clusters" (C) rule default: enabled: false + scoreWeight: 0.0 windowSize: 0 clusterCutOff: 0 - scoreWeight: 0.0 allOf: - $ref: '#/definitions/QcRulesConfigSnpClusters' frameShifts: @@ -714,31 +755,31 @@ definitions: description: Configuration for QC rule "SNP clusters" examples: - enabled: true + scoreWeight: 50.0 windowSize: 100 clusterCutOff: 5 - scoreWeight: 50.0 type: object properties: enabled: default: false type: boolean + scoreWeight: + description: QC score added per detected cluster + default: 0.0 + type: number + format: double windowSize: - description: Size of the sliding window (in nucleotides) used to count private mutations + description: Size of the sliding nucleotide window for global SNP cluster detection default: 0 type: integer format: uint minimum: 0.0 clusterCutOff: - description: Number of private mutations within a window above which the window is flagged as a cluster + description: Number of substitutions within a window required to count as a global SNP cluster default: 0 type: integer format: uint minimum: 0.0 - scoreWeight: - description: QC score added per detected cluster - default: 0.0 - type: number - format: double QcRulesConfigFrameShifts: description: Configuration for QC rule "frame shifts" examples: @@ -827,6 +868,195 @@ definitions: type: integer format: uint minimum: 0.0 + MutationPatternsConfig: + description: Configuration for mutation pattern analysis. Detects private mutations matching biologically meaningful mutation type and reference-context rules. Pattern-specific clustering is independent from global `qc.snpClusters`. + examples: + - patterns: + - id: adar + name: ADAR-like RNA editing + description: ADAR-mediated A-to-I editing observed as A>G and complementary T>C + events: + - type: nucSubstitution + ref: + - A + - T + qry: + - G + - C + motifs: + - A[ACGT]G + - T[ACGT]C + cluster: + windowSize: 100 + cutoff: 5 + - id: apobec + name: APOBEC-like cytosine deamination + description: APOBEC-like cytosine deamination observed as C>T and complementary G>A + events: + - type: nucSubstitution + ref: + - C + - G + qry: + - T + - A + motifs: + - TC[AT] + - '[AT]GA' + cluster: + windowSize: 50 + cutoff: 4 + type: object + properties: + patterns: + description: Mutation patterns evaluated independently for every analyzed sequence. + type: array + items: + $ref: '#/definitions/MutationPatternConfig' + MutationPatternConfig: + description: |- + Named mutation pattern: event filters, optional clustering, and display metadata. + + Dataset authors can define multiple patterns to separate biologically different mutation processes, such as ADAR-like A-to-I editing and APOBEC-like cytosine deamination. + examples: + - id: adar + name: ADAR-like RNA editing + description: ADAR-mediated A-to-I editing observed as A>G and complementary T>C + events: + - type: nucSubstitution + ref: + - A + - T + qry: + - G + - C + motifs: + - A[ACGT]G + - T[ACGT]C + cluster: + windowSize: 100 + cutoff: 5 + type: object + required: + - id + - name + properties: + id: + description: Stable machine-readable identifier. This value appears in JSON and TSV output. + type: string + name: + description: Human-readable name shown in Nextclade Web tooltips and reports. + type: string + description: + description: Optional explanatory text shown together with the pattern result. + type: + - string + - 'null' + events: + description: Event filters included in this pattern. If empty, the pattern matches all private nucleotide substitutions. + type: array + items: + $ref: '#/definitions/MutationPatternEvent' + cluster: + description: Optional pattern-local clustering rule. If omitted, Nextclade reports matches and type counts, but no clusters for this pattern. + anyOf: + - $ref: '#/definitions/MutationPatternClusterConfig' + - type: 'null' + MutationPatternEvent: + description: |- + Mutation event filter in mutation pattern analysis. + + Each event selects one class of private mutations to include in a pattern. More event variants can be added without changing the surrounding `mutationPatterns.patterns[]` structure. + examples: + - type: nucSubstitution + ref: + - A + - T + qry: + - G + - C + motifs: + - A[ACGT]G + - T[ACGT]C + oneOf: + - description: Nucleotide substitution event, selected by reference nucleotide, query nucleotide, and optional reference motifs. + examples: + - ref: + - A + - T + qry: + - G + - C + motifs: + - A[ACGT]G + - T[ACGT]C + type: object + required: + - qry + - ref + - type + properties: + type: + type: string + enum: + - nucSubstitution + ref: + description: Reference nucleotides to match at the mutated position. + type: array + items: + $ref: '#/definitions/Nuc' + qry: + description: Query nucleotides to match at the mutated position. + type: array + items: + $ref: '#/definitions/Nuc' + motifs: + description: Regular expressions matched against the reference sequence. A mutation matches a motif when the regex match spans the mutated position. Use IUPAC nucleotide letters directly in the regex, for example `TC[AT]` for a TCW motif. + type: array + items: + type: string + Nuc: + description: A nucleotide + type: string + enum: + - T + - A + - W + - C + - Y + - M + - H + - G + - K + - R + - D + - S + - B + - V + - N + - '-' + MutationPatternClusterConfig: + description: |- + Clustering rule applied to events matching one mutation pattern. + + Clustering is pattern-local. It decides which matched events are reported as dense clusters in this pattern. It does not change the global `qc.snpClusters` rule unless the QC rule itself is configured separately. + examples: + - windowSize: 100 + cutoff: 5 + type: object + properties: + windowSize: + description: Sliding nucleotide window size. A cluster is detected when `cutoff` matched events fall within this many reference nucleotides. + default: 100 + type: integer + format: uint + minimum: 0.0 + cutoff: + description: Minimum number of matched events in one sliding window required to report a cluster. + default: 5 + type: integer + format: uint + minimum: 0.0 NextcladeGeneralParamsOptional: type: object properties: diff --git a/packages/nextclade-schemas/output-json.schema.json b/packages/nextclade-schemas/output-json.schema.json index 2c4e8fe7e..507a3a019 100644 --- a/packages/nextclade-schemas/output-json.schema.json +++ b/packages/nextclade-schemas/output-json.schema.json @@ -399,6 +399,7 @@ "lenUnaligned", "missing", "missingCdses", + "mutationPatterns", "nearestNodeId", "nearestNodeName", "nonACGTNs", @@ -700,6 +701,14 @@ "$ref": "#/definitions/PrivateAaMutations" } }, + "mutationPatterns": { + "description": "Per-type mutation statistics, local reference context, and detected mutation clusters", + "allOf": [ + { + "$ref": "#/definitions/MutationPatternsResults" + } + ] + }, "cladeFounderInfo": { "description": "Mutations relative to the clade founder node", "anyOf": [ @@ -1664,6 +1673,633 @@ } } }, + "MutationPatternsResults": { + "description": "Mutation pattern analysis output. Contains per-pattern results.", + "examples": [ + { + "results": [ + { + "id": "adar", + "name": "ADAR-like RNA editing", + "matches": [ + { + "type": "nucSubstitution", + "pos": 5003, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ] + }, + { + "type": "nucSubstitution", + "pos": 5033, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5032, + "end": 5035 + } + ] + } + ], + "eventTypeCounts": [ + { + "type": "nucSubstitution", + "refNuc": "A", + "qryNuc": "G", + "count": 2 + } + ], + "clusters": [ + { + "start": 5003, + "end": 5033, + "count": 2, + "events": [ + { + "type": "nucSubstitution", + "pos": 5003, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ] + }, + { + "type": "nucSubstitution", + "pos": 5033, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5032, + "end": 5035 + } + ] + } + ], + "eventTypeCounts": [ + { + "type": "nucSubstitution", + "refNuc": "A", + "qryNuc": "G", + "count": 2 + } + ] + } + ], + "counts": { + "matches": 2, + "clustered": 2, + "clusters": 1 + }, + "description": "ADAR-mediated A-to-I editing observed as A>G and complementary T>C" + } + ] + } + ], + "type": "object", + "properties": { + "results": { + "description": "Results for each configured mutation pattern. Empty when mutation pattern analysis is not configured and no global SNP cluster compatibility output is needed.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternResults" + } + } + } + }, + "MutationPatternResults": { + "description": "Results for a single mutation pattern.", + "examples": [ + { + "id": "adar", + "name": "ADAR-like RNA editing", + "matches": [ + { + "type": "nucSubstitution", + "pos": 5003, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ] + }, + { + "type": "nucSubstitution", + "pos": 5033, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5032, + "end": 5035 + } + ] + } + ], + "eventTypeCounts": [ + { + "type": "nucSubstitution", + "refNuc": "A", + "qryNuc": "G", + "count": 2 + } + ], + "clusters": [ + { + "start": 5003, + "end": 5033, + "count": 2, + "events": [ + { + "type": "nucSubstitution", + "pos": 5003, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ] + }, + { + "type": "nucSubstitution", + "pos": 5033, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5032, + "end": 5035 + } + ] + } + ], + "eventTypeCounts": [ + { + "type": "nucSubstitution", + "refNuc": "A", + "qryNuc": "G", + "count": 2 + } + ] + } + ], + "counts": { + "matches": 2, + "clustered": 2, + "clusters": 1 + }, + "description": "ADAR-mediated A-to-I editing observed as A>G and complementary T>C" + } + ], + "type": "object", + "required": [ + "clusters", + "counts", + "eventTypeCounts", + "id", + "matches", + "name" + ], + "properties": { + "id": { + "description": "Stable machine-readable pattern identifier copied from pathogen.json.", + "type": "string" + }, + "name": { + "description": "Human-readable pattern name copied from pathogen.json.", + "type": "string" + }, + "matches": { + "description": "All private mutation events matching this pattern.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternEventMatch" + } + }, + "eventTypeCounts": { + "description": "Counts of matched event types across all `matches`.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternEventTypeCount" + } + }, + "clusters": { + "description": "Pattern-local clusters detected among `matches`.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternCluster" + } + }, + "counts": { + "description": "Summary counts for matches and clusters.", + "allOf": [ + { + "$ref": "#/definitions/MutationPatternCounts" + } + ] + }, + "description": { + "description": "Optional explanatory text copied from pathogen.json.", + "type": [ + "string", + "null" + ] + } + } + }, + "MutationPatternEventMatch": { + "description": "Matched mutation pattern event.", + "examples": [ + { + "type": "nucSubstitution", + "pos": 5003, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ] + } + ], + "oneOf": [ + { + "description": "Matched nucleotide substitution event.", + "examples": [ + { + "pos": 5003, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ] + } + ], + "type": "object", + "required": [ + "motifMatches", + "pos", + "qryNuc", + "refContext", + "refNuc", + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "nucSubstitution" + ] + }, + "motifMatches": { + "description": "Motif matches that spanned the substituted position. Empty when the pattern event had no motif restriction.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternMotifMatch" + } + }, + "refContext": { + "description": "Reference nucleotide context around the substituted position.", + "type": "array", + "items": { + "$ref": "#/definitions/Nuc" + } + }, + "pos": { + "description": "0-based position in the reference sequence", + "allOf": [ + { + "$ref": "#/definitions/Position" + } + ] + }, + "refNuc": { + "description": "Nucleotide in the reference at this position", + "allOf": [ + { + "$ref": "#/definitions/Nuc" + } + ] + }, + "qryNuc": { + "description": "Nucleotide in the query at this position", + "allOf": [ + { + "$ref": "#/definitions/Nuc" + } + ] + } + } + } + ] + }, + "MutationPatternMotifMatch": { + "description": "Reference motif match that overlapped a mutation pattern event.", + "examples": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ], + "type": "object", + "required": [ + "end", + "motif", + "start" + ], + "properties": { + "motif": { + "description": "Regular expression from the pattern configuration that matched the reference sequence.", + "type": "string" + }, + "start": { + "description": "0-based first reference position included in the motif match.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "end": { + "description": "0-based position after the end of the motif match.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + } + } + }, + "MutationPatternEventTypeCount": { + "description": "Count of matched mutation pattern events of one event type.", + "examples": [ + { + "type": "nucSubstitution", + "refNuc": "A", + "qryNuc": "G", + "count": 8 + } + ], + "oneOf": [ + { + "description": "Count of nucleotide substitutions with the same reference and query nucleotide.", + "examples": [ + { + "refNuc": "A", + "qryNuc": "G", + "count": 8 + } + ], + "type": "object", + "required": [ + "count", + "qryNuc", + "refNuc", + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "nucSubstitution" + ] + }, + "refNuc": { + "description": "Reference nucleotide at the substituted position.", + "allOf": [ + { + "$ref": "#/definitions/Nuc" + } + ] + }, + "qryNuc": { + "description": "Query nucleotide at the substituted position.", + "allOf": [ + { + "$ref": "#/definitions/Nuc" + } + ] + }, + "count": { + "description": "Number of matching substitutions with this reference and query nucleotide pair.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + } + } + } + ] + }, + "MutationPatternCluster": { + "description": "Cluster of mutation pattern events detected within one sliding nucleotide window.", + "examples": [ + { + "start": 5003, + "end": 5033, + "count": 2, + "events": [ + { + "type": "nucSubstitution", + "pos": 5003, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ] + }, + { + "type": "nucSubstitution", + "pos": 5033, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5032, + "end": 5035 + } + ] + } + ], + "eventTypeCounts": [ + { + "type": "nucSubstitution", + "refNuc": "A", + "qryNuc": "G", + "count": 2 + } + ] + } + ], + "type": "object", + "required": [ + "count", + "end", + "eventTypeCounts", + "events", + "start" + ], + "properties": { + "start": { + "description": "0-based first reference position included in this cluster.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "end": { + "description": "0-based last reference position included in this cluster.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "count": { + "description": "Number of matched events in this cluster.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "events": { + "description": "Matched events belonging to this cluster, sorted by reference position.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternEventMatch" + } + }, + "eventTypeCounts": { + "description": "Counts of event types within this cluster.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternEventTypeCount" + } + } + } + }, + "MutationPatternCounts": { + "description": "Summary counts for one mutation pattern.", + "examples": [ + { + "matches": 14, + "clustered": 14, + "clusters": 2 + } + ], + "type": "object", + "required": [ + "clustered", + "clusters", + "matches" + ], + "properties": { + "matches": { + "description": "Number of private mutation events matching the pattern.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "clustered": { + "description": "Number of matching events that belong to reported clusters.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "clusters": { + "description": "Number of clusters reported for this pattern.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + } + } + }, "CladeNodeAttrFounderInfo": { "description": "Information about a clade-like node attribute founder", "type": "object", diff --git a/packages/nextclade-schemas/output-json.schema.yaml b/packages/nextclade-schemas/output-json.schema.yaml index 9305c399d..a2b4f9b49 100644 --- a/packages/nextclade-schemas/output-json.schema.yaml +++ b/packages/nextclade-schemas/output-json.schema.yaml @@ -277,6 +277,7 @@ definitions: - lenUnaligned - missing - missingCdses + - mutationPatterns - nearestNodeId - nearestNodeName - nonACGTNs @@ -509,6 +510,10 @@ definitions: type: object additionalProperties: $ref: '#/definitions/PrivateAaMutations' + mutationPatterns: + description: Per-type mutation statistics, local reference context, and detected mutation clusters + allOf: + - $ref: '#/definitions/MutationPatternsResults' cladeFounderInfo: description: Mutations relative to the clade founder node anyOf: @@ -1178,6 +1183,423 @@ definitions: type: array items: type: string + MutationPatternsResults: + description: Mutation pattern analysis output. Contains per-pattern results. + examples: + - results: + - id: adar + name: ADAR-like RNA editing + matches: + - type: nucSubstitution + pos: 5003 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5002 + end: 5005 + - type: nucSubstitution + pos: 5033 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5032 + end: 5035 + eventTypeCounts: + - type: nucSubstitution + refNuc: A + qryNuc: G + count: 2 + clusters: + - start: 5003 + end: 5033 + count: 2 + events: + - type: nucSubstitution + pos: 5003 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5002 + end: 5005 + - type: nucSubstitution + pos: 5033 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5032 + end: 5035 + eventTypeCounts: + - type: nucSubstitution + refNuc: A + qryNuc: G + count: 2 + counts: + matches: 2 + clustered: 2 + clusters: 1 + description: ADAR-mediated A-to-I editing observed as A>G and complementary T>C + type: object + properties: + results: + description: Results for each configured mutation pattern. Empty when mutation pattern analysis is not configured and no global SNP cluster compatibility output is needed. + type: array + items: + $ref: '#/definitions/MutationPatternResults' + MutationPatternResults: + description: Results for a single mutation pattern. + examples: + - id: adar + name: ADAR-like RNA editing + matches: + - type: nucSubstitution + pos: 5003 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5002 + end: 5005 + - type: nucSubstitution + pos: 5033 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5032 + end: 5035 + eventTypeCounts: + - type: nucSubstitution + refNuc: A + qryNuc: G + count: 2 + clusters: + - start: 5003 + end: 5033 + count: 2 + events: + - type: nucSubstitution + pos: 5003 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5002 + end: 5005 + - type: nucSubstitution + pos: 5033 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5032 + end: 5035 + eventTypeCounts: + - type: nucSubstitution + refNuc: A + qryNuc: G + count: 2 + counts: + matches: 2 + clustered: 2 + clusters: 1 + description: ADAR-mediated A-to-I editing observed as A>G and complementary T>C + type: object + required: + - clusters + - counts + - eventTypeCounts + - id + - matches + - name + properties: + id: + description: Stable machine-readable pattern identifier copied from pathogen.json. + type: string + name: + description: Human-readable pattern name copied from pathogen.json. + type: string + matches: + description: All private mutation events matching this pattern. + type: array + items: + $ref: '#/definitions/MutationPatternEventMatch' + eventTypeCounts: + description: Counts of matched event types across all `matches`. + type: array + items: + $ref: '#/definitions/MutationPatternEventTypeCount' + clusters: + description: Pattern-local clusters detected among `matches`. + type: array + items: + $ref: '#/definitions/MutationPatternCluster' + counts: + description: Summary counts for matches and clusters. + allOf: + - $ref: '#/definitions/MutationPatternCounts' + description: + description: Optional explanatory text copied from pathogen.json. + type: + - string + - 'null' + MutationPatternEventMatch: + description: Matched mutation pattern event. + examples: + - type: nucSubstitution + pos: 5003 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5002 + end: 5005 + oneOf: + - description: Matched nucleotide substitution event. + examples: + - pos: 5003 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5002 + end: 5005 + type: object + required: + - motifMatches + - pos + - qryNuc + - refContext + - refNuc + - type + properties: + type: + type: string + enum: + - nucSubstitution + motifMatches: + description: Motif matches that spanned the substituted position. Empty when the pattern event had no motif restriction. + type: array + items: + $ref: '#/definitions/MutationPatternMotifMatch' + refContext: + description: Reference nucleotide context around the substituted position. + type: array + items: + $ref: '#/definitions/Nuc' + pos: + description: 0-based position in the reference sequence + allOf: + - $ref: '#/definitions/Position' + refNuc: + description: Nucleotide in the reference at this position + allOf: + - $ref: '#/definitions/Nuc' + qryNuc: + description: Nucleotide in the query at this position + allOf: + - $ref: '#/definitions/Nuc' + MutationPatternMotifMatch: + description: Reference motif match that overlapped a mutation pattern event. + examples: + - motif: A[ACGT]G + start: 5002 + end: 5005 + type: object + required: + - end + - motif + - start + properties: + motif: + description: Regular expression from the pattern configuration that matched the reference sequence. + type: string + start: + description: 0-based first reference position included in the motif match. + type: integer + format: uint + minimum: 0.0 + end: + description: 0-based position after the end of the motif match. + type: integer + format: uint + minimum: 0.0 + MutationPatternEventTypeCount: + description: Count of matched mutation pattern events of one event type. + examples: + - type: nucSubstitution + refNuc: A + qryNuc: G + count: 8 + oneOf: + - description: Count of nucleotide substitutions with the same reference and query nucleotide. + examples: + - refNuc: A + qryNuc: G + count: 8 + type: object + required: + - count + - qryNuc + - refNuc + - type + properties: + type: + type: string + enum: + - nucSubstitution + refNuc: + description: Reference nucleotide at the substituted position. + allOf: + - $ref: '#/definitions/Nuc' + qryNuc: + description: Query nucleotide at the substituted position. + allOf: + - $ref: '#/definitions/Nuc' + count: + description: Number of matching substitutions with this reference and query nucleotide pair. + type: integer + format: uint + minimum: 0.0 + MutationPatternCluster: + description: Cluster of mutation pattern events detected within one sliding nucleotide window. + examples: + - start: 5003 + end: 5033 + count: 2 + events: + - type: nucSubstitution + pos: 5003 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5002 + end: 5005 + - type: nucSubstitution + pos: 5033 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5032 + end: 5035 + eventTypeCounts: + - type: nucSubstitution + refNuc: A + qryNuc: G + count: 2 + type: object + required: + - count + - end + - eventTypeCounts + - events + - start + properties: + start: + description: 0-based first reference position included in this cluster. + type: integer + format: uint + minimum: 0.0 + end: + description: 0-based last reference position included in this cluster. + type: integer + format: uint + minimum: 0.0 + count: + description: Number of matched events in this cluster. + type: integer + format: uint + minimum: 0.0 + events: + description: Matched events belonging to this cluster, sorted by reference position. + type: array + items: + $ref: '#/definitions/MutationPatternEventMatch' + eventTypeCounts: + description: Counts of event types within this cluster. + type: array + items: + $ref: '#/definitions/MutationPatternEventTypeCount' + MutationPatternCounts: + description: Summary counts for one mutation pattern. + examples: + - matches: 14 + clustered: 14 + clusters: 2 + type: object + required: + - clustered + - clusters + - matches + properties: + matches: + description: Number of private mutation events matching the pattern. + type: integer + format: uint + minimum: 0.0 + clustered: + description: Number of matching events that belong to reported clusters. + type: integer + format: uint + minimum: 0.0 + clusters: + description: Number of clusters reported for this pattern. + type: integer + format: uint + minimum: 0.0 CladeNodeAttrFounderInfo: description: Information about a clade-like node attribute founder type: object diff --git a/packages/nextclade-schemas/output-ndjson.schema.json b/packages/nextclade-schemas/output-ndjson.schema.json index fbbd7d7ee..d17f29142 100644 --- a/packages/nextclade-schemas/output-ndjson.schema.json +++ b/packages/nextclade-schemas/output-ndjson.schema.json @@ -30,6 +30,7 @@ "lenUnaligned", "missing", "missingCdses", + "mutationPatterns", "nearestNodeId", "nearestNodeName", "nonACGTNs", @@ -331,6 +332,14 @@ "$ref": "#/definitions/PrivateAaMutations" } }, + "mutationPatterns": { + "description": "Per-type mutation statistics, local reference context, and detected mutation clusters", + "allOf": [ + { + "$ref": "#/definitions/MutationPatternsResults" + } + ] + }, "cladeFounderInfo": { "description": "Mutations relative to the clade founder node", "anyOf": [ @@ -1295,6 +1304,633 @@ } } }, + "MutationPatternsResults": { + "description": "Mutation pattern analysis output. Contains per-pattern results.", + "examples": [ + { + "results": [ + { + "id": "adar", + "name": "ADAR-like RNA editing", + "matches": [ + { + "type": "nucSubstitution", + "pos": 5003, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ] + }, + { + "type": "nucSubstitution", + "pos": 5033, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5032, + "end": 5035 + } + ] + } + ], + "eventTypeCounts": [ + { + "type": "nucSubstitution", + "refNuc": "A", + "qryNuc": "G", + "count": 2 + } + ], + "clusters": [ + { + "start": 5003, + "end": 5033, + "count": 2, + "events": [ + { + "type": "nucSubstitution", + "pos": 5003, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ] + }, + { + "type": "nucSubstitution", + "pos": 5033, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5032, + "end": 5035 + } + ] + } + ], + "eventTypeCounts": [ + { + "type": "nucSubstitution", + "refNuc": "A", + "qryNuc": "G", + "count": 2 + } + ] + } + ], + "counts": { + "matches": 2, + "clustered": 2, + "clusters": 1 + }, + "description": "ADAR-mediated A-to-I editing observed as A>G and complementary T>C" + } + ] + } + ], + "type": "object", + "properties": { + "results": { + "description": "Results for each configured mutation pattern. Empty when mutation pattern analysis is not configured and no global SNP cluster compatibility output is needed.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternResults" + } + } + } + }, + "MutationPatternResults": { + "description": "Results for a single mutation pattern.", + "examples": [ + { + "id": "adar", + "name": "ADAR-like RNA editing", + "matches": [ + { + "type": "nucSubstitution", + "pos": 5003, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ] + }, + { + "type": "nucSubstitution", + "pos": 5033, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5032, + "end": 5035 + } + ] + } + ], + "eventTypeCounts": [ + { + "type": "nucSubstitution", + "refNuc": "A", + "qryNuc": "G", + "count": 2 + } + ], + "clusters": [ + { + "start": 5003, + "end": 5033, + "count": 2, + "events": [ + { + "type": "nucSubstitution", + "pos": 5003, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ] + }, + { + "type": "nucSubstitution", + "pos": 5033, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5032, + "end": 5035 + } + ] + } + ], + "eventTypeCounts": [ + { + "type": "nucSubstitution", + "refNuc": "A", + "qryNuc": "G", + "count": 2 + } + ] + } + ], + "counts": { + "matches": 2, + "clustered": 2, + "clusters": 1 + }, + "description": "ADAR-mediated A-to-I editing observed as A>G and complementary T>C" + } + ], + "type": "object", + "required": [ + "clusters", + "counts", + "eventTypeCounts", + "id", + "matches", + "name" + ], + "properties": { + "id": { + "description": "Stable machine-readable pattern identifier copied from pathogen.json.", + "type": "string" + }, + "name": { + "description": "Human-readable pattern name copied from pathogen.json.", + "type": "string" + }, + "matches": { + "description": "All private mutation events matching this pattern.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternEventMatch" + } + }, + "eventTypeCounts": { + "description": "Counts of matched event types across all `matches`.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternEventTypeCount" + } + }, + "clusters": { + "description": "Pattern-local clusters detected among `matches`.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternCluster" + } + }, + "counts": { + "description": "Summary counts for matches and clusters.", + "allOf": [ + { + "$ref": "#/definitions/MutationPatternCounts" + } + ] + }, + "description": { + "description": "Optional explanatory text copied from pathogen.json.", + "type": [ + "string", + "null" + ] + } + } + }, + "MutationPatternEventMatch": { + "description": "Matched mutation pattern event.", + "examples": [ + { + "type": "nucSubstitution", + "pos": 5003, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ] + } + ], + "oneOf": [ + { + "description": "Matched nucleotide substitution event.", + "examples": [ + { + "pos": 5003, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ] + } + ], + "type": "object", + "required": [ + "motifMatches", + "pos", + "qryNuc", + "refContext", + "refNuc", + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "nucSubstitution" + ] + }, + "motifMatches": { + "description": "Motif matches that spanned the substituted position. Empty when the pattern event had no motif restriction.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternMotifMatch" + } + }, + "refContext": { + "description": "Reference nucleotide context around the substituted position.", + "type": "array", + "items": { + "$ref": "#/definitions/Nuc" + } + }, + "pos": { + "description": "0-based position in the reference sequence", + "allOf": [ + { + "$ref": "#/definitions/Position" + } + ] + }, + "refNuc": { + "description": "Nucleotide in the reference at this position", + "allOf": [ + { + "$ref": "#/definitions/Nuc" + } + ] + }, + "qryNuc": { + "description": "Nucleotide in the query at this position", + "allOf": [ + { + "$ref": "#/definitions/Nuc" + } + ] + } + } + } + ] + }, + "MutationPatternMotifMatch": { + "description": "Reference motif match that overlapped a mutation pattern event.", + "examples": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ], + "type": "object", + "required": [ + "end", + "motif", + "start" + ], + "properties": { + "motif": { + "description": "Regular expression from the pattern configuration that matched the reference sequence.", + "type": "string" + }, + "start": { + "description": "0-based first reference position included in the motif match.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "end": { + "description": "0-based position after the end of the motif match.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + } + } + }, + "MutationPatternEventTypeCount": { + "description": "Count of matched mutation pattern events of one event type.", + "examples": [ + { + "type": "nucSubstitution", + "refNuc": "A", + "qryNuc": "G", + "count": 8 + } + ], + "oneOf": [ + { + "description": "Count of nucleotide substitutions with the same reference and query nucleotide.", + "examples": [ + { + "refNuc": "A", + "qryNuc": "G", + "count": 8 + } + ], + "type": "object", + "required": [ + "count", + "qryNuc", + "refNuc", + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "nucSubstitution" + ] + }, + "refNuc": { + "description": "Reference nucleotide at the substituted position.", + "allOf": [ + { + "$ref": "#/definitions/Nuc" + } + ] + }, + "qryNuc": { + "description": "Query nucleotide at the substituted position.", + "allOf": [ + { + "$ref": "#/definitions/Nuc" + } + ] + }, + "count": { + "description": "Number of matching substitutions with this reference and query nucleotide pair.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + } + } + } + ] + }, + "MutationPatternCluster": { + "description": "Cluster of mutation pattern events detected within one sliding nucleotide window.", + "examples": [ + { + "start": 5003, + "end": 5033, + "count": 2, + "events": [ + { + "type": "nucSubstitution", + "pos": 5003, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5002, + "end": 5005 + } + ] + }, + { + "type": "nucSubstitution", + "pos": 5033, + "refNuc": "A", + "qryNuc": "G", + "refContext": [ + "A", + "A", + "G" + ], + "motifMatches": [ + { + "motif": "A[ACGT]G", + "start": 5032, + "end": 5035 + } + ] + } + ], + "eventTypeCounts": [ + { + "type": "nucSubstitution", + "refNuc": "A", + "qryNuc": "G", + "count": 2 + } + ] + } + ], + "type": "object", + "required": [ + "count", + "end", + "eventTypeCounts", + "events", + "start" + ], + "properties": { + "start": { + "description": "0-based first reference position included in this cluster.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "end": { + "description": "0-based last reference position included in this cluster.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "count": { + "description": "Number of matched events in this cluster.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "events": { + "description": "Matched events belonging to this cluster, sorted by reference position.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternEventMatch" + } + }, + "eventTypeCounts": { + "description": "Counts of event types within this cluster.", + "type": "array", + "items": { + "$ref": "#/definitions/MutationPatternEventTypeCount" + } + } + } + }, + "MutationPatternCounts": { + "description": "Summary counts for one mutation pattern.", + "examples": [ + { + "matches": 14, + "clustered": 14, + "clusters": 2 + } + ], + "type": "object", + "required": [ + "clustered", + "clusters", + "matches" + ], + "properties": { + "matches": { + "description": "Number of private mutation events matching the pattern.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "clustered": { + "description": "Number of matching events that belong to reported clusters.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + }, + "clusters": { + "description": "Number of clusters reported for this pattern.", + "type": "integer", + "format": "uint", + "minimum": 0.0 + } + } + }, "CladeNodeAttrFounderInfo": { "description": "Information about a clade-like node attribute founder", "type": "object", diff --git a/packages/nextclade-schemas/output-ndjson.schema.yaml b/packages/nextclade-schemas/output-ndjson.schema.yaml index b5d3ba19d..9e874e63b 100644 --- a/packages/nextclade-schemas/output-ndjson.schema.yaml +++ b/packages/nextclade-schemas/output-ndjson.schema.yaml @@ -29,6 +29,7 @@ required: - lenUnaligned - missing - missingCdses +- mutationPatterns - nearestNodeId - nearestNodeName - nonACGTNs @@ -261,6 +262,10 @@ properties: type: object additionalProperties: $ref: '#/definitions/PrivateAaMutations' + mutationPatterns: + description: Per-type mutation statistics, local reference context, and detected mutation clusters + allOf: + - $ref: '#/definitions/MutationPatternsResults' cladeFounderInfo: description: Mutations relative to the clade founder node anyOf: @@ -931,6 +936,423 @@ definitions: type: array items: type: string + MutationPatternsResults: + description: Mutation pattern analysis output. Contains per-pattern results. + examples: + - results: + - id: adar + name: ADAR-like RNA editing + matches: + - type: nucSubstitution + pos: 5003 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5002 + end: 5005 + - type: nucSubstitution + pos: 5033 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5032 + end: 5035 + eventTypeCounts: + - type: nucSubstitution + refNuc: A + qryNuc: G + count: 2 + clusters: + - start: 5003 + end: 5033 + count: 2 + events: + - type: nucSubstitution + pos: 5003 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5002 + end: 5005 + - type: nucSubstitution + pos: 5033 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5032 + end: 5035 + eventTypeCounts: + - type: nucSubstitution + refNuc: A + qryNuc: G + count: 2 + counts: + matches: 2 + clustered: 2 + clusters: 1 + description: ADAR-mediated A-to-I editing observed as A>G and complementary T>C + type: object + properties: + results: + description: Results for each configured mutation pattern. Empty when mutation pattern analysis is not configured and no global SNP cluster compatibility output is needed. + type: array + items: + $ref: '#/definitions/MutationPatternResults' + MutationPatternResults: + description: Results for a single mutation pattern. + examples: + - id: adar + name: ADAR-like RNA editing + matches: + - type: nucSubstitution + pos: 5003 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5002 + end: 5005 + - type: nucSubstitution + pos: 5033 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5032 + end: 5035 + eventTypeCounts: + - type: nucSubstitution + refNuc: A + qryNuc: G + count: 2 + clusters: + - start: 5003 + end: 5033 + count: 2 + events: + - type: nucSubstitution + pos: 5003 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5002 + end: 5005 + - type: nucSubstitution + pos: 5033 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5032 + end: 5035 + eventTypeCounts: + - type: nucSubstitution + refNuc: A + qryNuc: G + count: 2 + counts: + matches: 2 + clustered: 2 + clusters: 1 + description: ADAR-mediated A-to-I editing observed as A>G and complementary T>C + type: object + required: + - clusters + - counts + - eventTypeCounts + - id + - matches + - name + properties: + id: + description: Stable machine-readable pattern identifier copied from pathogen.json. + type: string + name: + description: Human-readable pattern name copied from pathogen.json. + type: string + matches: + description: All private mutation events matching this pattern. + type: array + items: + $ref: '#/definitions/MutationPatternEventMatch' + eventTypeCounts: + description: Counts of matched event types across all `matches`. + type: array + items: + $ref: '#/definitions/MutationPatternEventTypeCount' + clusters: + description: Pattern-local clusters detected among `matches`. + type: array + items: + $ref: '#/definitions/MutationPatternCluster' + counts: + description: Summary counts for matches and clusters. + allOf: + - $ref: '#/definitions/MutationPatternCounts' + description: + description: Optional explanatory text copied from pathogen.json. + type: + - string + - 'null' + MutationPatternEventMatch: + description: Matched mutation pattern event. + examples: + - type: nucSubstitution + pos: 5003 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5002 + end: 5005 + oneOf: + - description: Matched nucleotide substitution event. + examples: + - pos: 5003 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5002 + end: 5005 + type: object + required: + - motifMatches + - pos + - qryNuc + - refContext + - refNuc + - type + properties: + type: + type: string + enum: + - nucSubstitution + motifMatches: + description: Motif matches that spanned the substituted position. Empty when the pattern event had no motif restriction. + type: array + items: + $ref: '#/definitions/MutationPatternMotifMatch' + refContext: + description: Reference nucleotide context around the substituted position. + type: array + items: + $ref: '#/definitions/Nuc' + pos: + description: 0-based position in the reference sequence + allOf: + - $ref: '#/definitions/Position' + refNuc: + description: Nucleotide in the reference at this position + allOf: + - $ref: '#/definitions/Nuc' + qryNuc: + description: Nucleotide in the query at this position + allOf: + - $ref: '#/definitions/Nuc' + MutationPatternMotifMatch: + description: Reference motif match that overlapped a mutation pattern event. + examples: + - motif: A[ACGT]G + start: 5002 + end: 5005 + type: object + required: + - end + - motif + - start + properties: + motif: + description: Regular expression from the pattern configuration that matched the reference sequence. + type: string + start: + description: 0-based first reference position included in the motif match. + type: integer + format: uint + minimum: 0.0 + end: + description: 0-based position after the end of the motif match. + type: integer + format: uint + minimum: 0.0 + MutationPatternEventTypeCount: + description: Count of matched mutation pattern events of one event type. + examples: + - type: nucSubstitution + refNuc: A + qryNuc: G + count: 8 + oneOf: + - description: Count of nucleotide substitutions with the same reference and query nucleotide. + examples: + - refNuc: A + qryNuc: G + count: 8 + type: object + required: + - count + - qryNuc + - refNuc + - type + properties: + type: + type: string + enum: + - nucSubstitution + refNuc: + description: Reference nucleotide at the substituted position. + allOf: + - $ref: '#/definitions/Nuc' + qryNuc: + description: Query nucleotide at the substituted position. + allOf: + - $ref: '#/definitions/Nuc' + count: + description: Number of matching substitutions with this reference and query nucleotide pair. + type: integer + format: uint + minimum: 0.0 + MutationPatternCluster: + description: Cluster of mutation pattern events detected within one sliding nucleotide window. + examples: + - start: 5003 + end: 5033 + count: 2 + events: + - type: nucSubstitution + pos: 5003 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5002 + end: 5005 + - type: nucSubstitution + pos: 5033 + refNuc: A + qryNuc: G + refContext: + - A + - A + - G + motifMatches: + - motif: A[ACGT]G + start: 5032 + end: 5035 + eventTypeCounts: + - type: nucSubstitution + refNuc: A + qryNuc: G + count: 2 + type: object + required: + - count + - end + - eventTypeCounts + - events + - start + properties: + start: + description: 0-based first reference position included in this cluster. + type: integer + format: uint + minimum: 0.0 + end: + description: 0-based last reference position included in this cluster. + type: integer + format: uint + minimum: 0.0 + count: + description: Number of matched events in this cluster. + type: integer + format: uint + minimum: 0.0 + events: + description: Matched events belonging to this cluster, sorted by reference position. + type: array + items: + $ref: '#/definitions/MutationPatternEventMatch' + eventTypeCounts: + description: Counts of event types within this cluster. + type: array + items: + $ref: '#/definitions/MutationPatternEventTypeCount' + MutationPatternCounts: + description: Summary counts for one mutation pattern. + examples: + - matches: 14 + clustered: 14 + clusters: 2 + type: object + required: + - clustered + - clusters + - matches + properties: + matches: + description: Number of private mutation events matching the pattern. + type: integer + format: uint + minimum: 0.0 + clustered: + description: Number of matching events that belong to reported clusters. + type: integer + format: uint + minimum: 0.0 + clusters: + description: Number of clusters reported for this pattern. + type: integer + format: uint + minimum: 0.0 CladeNodeAttrFounderInfo: description: Information about a clade-like node attribute founder type: object diff --git a/packages/nextclade-web/src/components/Export/ExportTabColumnConfig.tsx b/packages/nextclade-web/src/components/Export/ExportTabColumnConfig.tsx index 423455908..c3fcfcd3c 100644 --- a/packages/nextclade-web/src/components/Export/ExportTabColumnConfig.tsx +++ b/packages/nextclade-web/src/components/Export/ExportTabColumnConfig.tsx @@ -137,6 +137,36 @@ export function ExportTabColumnConfig({ setActiveTabId }: { setActiveTabId(id: s [onRelMutsColumnsStateChange, relMutsColumnsState, t], ) + const mutPatternsColumnsState = useMemo( + () => csvColumnConfig?.includeMutPatterns ?? false, + [csvColumnConfig?.includeMutPatterns], + ) + + const onMutPatternsColumnsStateChange = useCallback(() => { + setCsvColumnConfig((config) => + config + ? { + ...config, + includeMutPatterns: !config.includeMutPatterns, + } + : undefined, + ) + }, [setCsvColumnConfig]) + + const mutPatterns = useMemo( + () => ( + + + + ), + [mutPatternsColumnsState, onMutPatternsColumnsStateChange, t], + ) + const dynamicColumnsState = useMemo(() => csvColumnConfig?.includeDynamic ?? false, [csvColumnConfig?.includeDynamic]) const onDynamicColumnsStateChange = useCallback(() => { @@ -179,6 +209,7 @@ export function ExportTabColumnConfig({ setActiveTabId }: { setActiveTabId(id: s {categories} + diff --git a/packages/nextclade-web/src/components/Results/ColumnMutations.tsx b/packages/nextclade-web/src/components/Results/ColumnMutations.tsx index 561b3df83..ac16154a8 100644 --- a/packages/nextclade-web/src/components/Results/ColumnMutations.tsx +++ b/packages/nextclade-web/src/components/Results/ColumnMutations.tsx @@ -1,16 +1,129 @@ import React, { useCallback, useMemo, useState } from 'react' import { useRecoilValue } from 'recoil' +import styled from 'styled-components' import { REF_NODE_CLADE_FOUNDER, REF_NODE_PARENT, REF_NODE_ROOT } from 'src/constants' import { findCladeNodeAttrFounderInfo, getAaMutations, getNucMutations } from 'src/helpers/relativeMuts' import { viewedDatasetNameAtom } from 'src/state/dataset.state' import { currentRefNodeNameAtom, refNodesAtom } from 'src/state/results.state' import type { ColumnCladeProps } from 'src/components/Results/ColumnClade' +import type { AnalysisResult } from 'src/types' import { getSafeId } from 'src/helpers/getSafeId' -import { TableSlim } from 'src/components/Common/TableSlim' import { Tooltip } from 'src/components/Results/Tooltip' import { ListOfNucMuts } from 'src/components/Results/ListOfNucMuts' import { ListOfAaMuts } from 'src/components/Results/ListOfAaMuts' +import { NucleotideMutationBadge } from 'src/components/Common/MutationBadge' import { useTranslationSafe } from 'src/helpers/useTranslationSafe' +import { TableSlim } from 'src/components/Common/TableSlim' +import type { MutationPatternEventMatch } from 'src/gen/_SchemaRoot' + +const PatternList = styled.div` + border-top: 1px solid #dee2e6; + margin-top: 0.5rem; + padding-top: 0.5rem; +` + +const PatternSection = styled.section` + border-top: 1px solid #343a40; + margin-top: 0.7rem; + padding-top: 0.6rem; + + &:first-child { + border-top: none; + margin-top: 0; + padding-top: 0; + } +` + +const PatternName = styled.div` + font-weight: 700; +` + +const PatternDescription = styled.div` + color: ${(props) => props.theme.gray600}; + font-size: 0.85em; + margin-bottom: 0.35rem; +` + +const ClusterCard = styled.div` + background: rgba(255, 140, 0, 0.06); + border: 1px solid rgba(255, 140, 0, 0.2); + border-radius: 3px; + padding: 0.25rem 0.4rem; + + &:not(:last-child) { + margin-bottom: 0.35rem; + } +` + +const ClusterTitle = styled.div` + font-weight: 700; + font-size: 0.85em; + margin-bottom: 2px; +` + +const ClusterBadgeGrid = styled.div` + display: flex; + flex-wrap: wrap; + gap: 2px; +` + +function MutationPatternsSection({ analysisResult }: { analysisResult: AnalysisResult }) { + const { t } = useTranslationSafe() + const { mutationPatterns, privateNucMutations } = analysisResult + + if (!mutationPatterns?.results?.length || privateNucMutations.privateSubstitutions.length === 0) { + return null + } + + const visiblePatterns = mutationPatterns.results.filter((pattern) => pattern.clusters.length > 0) + + if (visiblePatterns.length === 0) { + return null + } + + return ( + + {visiblePatterns.map((pattern) => ( + + {pattern.name || t('Mutation pattern')} + {pattern.description && {pattern.description}} + + {pattern.clusters.map((cluster) => ( + + + {t('{{start}}-{{end}} ({{count}} events)', { + start: cluster.start + 1, + end: cluster.end + 1, + count: cluster.count, + })} + + + {cluster.events.map((event) => ( + + ))} + + + ))} + + ))} + + ) +} + +function MutationPatternEventBadge({ event }: { event: MutationPatternEventMatch }) { + switch (event.type) { + case 'nucSubstitution': + return + } +} + +function mutationPatternEventKey(event: MutationPatternEventMatch): string { + switch (event.type) { + case 'nucSubstitution': + return `${event.type}:${event.pos}:${event.refNuc}:${event.qryNuc}` + } + throw new Error(`Unknown mutation pattern event type: ${event.type}`) +} export function ColumnMutations({ analysisResult }: ColumnCladeProps) { const { t } = useTranslationSafe() @@ -117,6 +230,8 @@ export function ColumnMutations({ analysisResult }: ColumnCladeProps) { + + ) diff --git a/packages/nextclade-web/src/components/SequenceView/SequenceMarkerCluster.tsx b/packages/nextclade-web/src/components/SequenceView/SequenceMarkerCluster.tsx new file mode 100644 index 000000000..8a46587a3 --- /dev/null +++ b/packages/nextclade-web/src/components/SequenceView/SequenceMarkerCluster.tsx @@ -0,0 +1,130 @@ +import React, { SVGProps, useCallback, useMemo, useState } from 'react' +import { useRecoilValue } from 'recoil' +import styled from 'styled-components' + +import { useTranslationSafe as useTranslation } from 'src/helpers/useTranslationSafe' + +import { Tooltip } from 'src/components/Results/Tooltip' +import { NucleotideMutationBadge } from 'src/components/Common/MutationBadge' +import { getSafeId } from 'src/helpers/getSafeId' +import { + SeqMarkerHeightState, + getSeqMarkerDims, + seqMarkerClusterHeightStateAtom, +} from 'src/state/seqViewSettings.state' +import type { MutationPatternEventMatch } from 'src/gen/_SchemaRoot' + +const CLUSTER_FILL = 'rgba(255, 140, 0, 0.12)' +const CLUSTER_STROKE = '#e06000' +const CLUSTER_MIN_WIDTH_PX = 12 + +const ClusterBadgeGrid = styled.div` + display: flex; + flex-wrap: wrap; + gap: 2px; + margin-top: 4px; +` + +const ClusterDescription = styled.div` + margin-top: 4px; + font-size: 0.85em; + color: #666; +` + +interface ClusterProps { + start: number + end: number + count: number + events: MutationPatternEventMatch[] +} + +export interface SequenceMarkerClusterProps extends SVGProps { + index: number + seqName: string + cluster: ClusterProps + pixelsPerBase: number + description?: string +} + +function SequenceMarkerClusterUnmemoed({ + index, + seqName, + cluster, + pixelsPerBase, + description, + ...rest +}: SequenceMarkerClusterProps) { + const { t } = useTranslation() + const [showTooltip, setShowTooltip] = useState(false) + const onMouseEnter = useCallback(() => setShowTooltip(true), []) + const onMouseLeave = useCallback(() => setShowTooltip(false), []) + + const seqMarkerClusterHeightState = useRecoilValue(seqMarkerClusterHeightStateAtom) + const { y, height } = useMemo(() => getSeqMarkerDims(seqMarkerClusterHeightState), [seqMarkerClusterHeightState]) + + if (seqMarkerClusterHeightState === SeqMarkerHeightState.Off) { + return null + } + + const { start, end, count, events } = cluster + + const id = getSafeId('cluster-marker', { index, seqName, begin: start, end }) + + let width = (end - start + 1) * pixelsPerBase + width = Math.max(width, CLUSTER_MIN_WIDTH_PX) + const halfNuc = Math.max(pixelsPerBase, CLUSTER_MIN_WIDTH_PX) / 2 + const x = start * pixelsPerBase - halfNuc + + return ( + + + +
+ + {t('Mutation cluster: {{start}}-{{end}} ({{count}} events)', { + start: start + 1, + end: end + 1, + count, + })} + +
+ {events.length > 0 && ( + + {events.map((event) => ( + + ))} + + )} + {description && {description}} +
+
+ ) +} + +export const SequenceMarkerCluster = React.memo(SequenceMarkerClusterUnmemoed) + +function MutationPatternEventBadge({ event }: { event: MutationPatternEventMatch }) { + switch (event.type) { + case 'nucSubstitution': + return + } +} + +function mutationPatternEventKey(event: MutationPatternEventMatch): string { + switch (event.type) { + case 'nucSubstitution': + return `${event.type}:${event.pos}:${event.refNuc}:${event.qryNuc}` + } + throw new Error(`Unknown mutation pattern event type: ${event.type}`) +} diff --git a/packages/nextclade-web/src/components/SequenceView/SequenceViewAbsolute.tsx b/packages/nextclade-web/src/components/SequenceView/SequenceViewAbsolute.tsx index ea164a01f..b20c21a44 100644 --- a/packages/nextclade-web/src/components/SequenceView/SequenceViewAbsolute.tsx +++ b/packages/nextclade-web/src/components/SequenceView/SequenceViewAbsolute.tsx @@ -11,6 +11,7 @@ import { SequenceMarkerGap } from './SequenceMarkerGap' import { SequenceMarkerMissing } from './SequenceMarkerMissing' import { SequenceMarkerMutation } from './SequenceMarkerMutation' import { SequenceMarkerUnsequencedEnd, SequenceMarkerUnsequencedStart } from './SequenceMarkerUnsequenced' +import { SequenceMarkerCluster } from './SequenceMarkerCluster' import { SequenceMarkerFrameShift } from './SequenceMarkerFrameShift' import { SequenceMarkerInsertion } from './SequenceMarkerInsertion' import { SequenceViewCoverageWrapper, SequenceViewCoverageText, SequenceViewSVG } from './SequenceViewStyles' @@ -32,6 +33,7 @@ export function SequenceViewAbsolute({ sequence, width }: SequenceViewAbsolutePr insertions, nucToAaMuts, nonACGTNs, + mutationPatterns, } = sequence const { t } = useTranslationSafe() @@ -103,6 +105,19 @@ export function SequenceViewAbsolute({ sequence, width }: SequenceViewAbsolutePr ) }) + const clusterViews = (mutationPatterns?.results ?? []).flatMap((pattern, patternIndex) => + (pattern.clusters ?? []).map((cluster) => ( + + )), + ) + const frameShiftMarkers = frameShifts.map((frameShift) => ( na.begin).join('-')}`} @@ -114,7 +129,7 @@ export function SequenceViewAbsolute({ sequence, width }: SequenceViewAbsolutePr )) const totalMarkers = - mutationViews.length + deletionViews.length + missingViews.length + frameShiftMarkers.length + insertionViews.length + mutationViews.length + deletionViews.length + missingViews.length + frameShiftMarkers.length + insertionViews.length + clusterViews.length if (totalMarkers > maxNucMarkers) { return ( @@ -164,6 +179,7 @@ export function SequenceViewAbsolute({ sequence, width }: SequenceViewAbsolutePr pixelsPerBase={pixelsPerBase} /> {frameShiftMarkers} + {clusterViews} ) } diff --git a/packages/nextclade-web/src/components/SequenceView/SequenceViewRelative.tsx b/packages/nextclade-web/src/components/SequenceView/SequenceViewRelative.tsx index de99b7c56..f82001f9b 100644 --- a/packages/nextclade-web/src/components/SequenceView/SequenceViewRelative.tsx +++ b/packages/nextclade-web/src/components/SequenceView/SequenceViewRelative.tsx @@ -11,6 +11,7 @@ import { SequenceMarkerMutation } from './SequenceMarkerMutation' import { SequenceMarkerGap } from './SequenceMarkerGap' import { SequenceMarkerAmbiguous } from './SequenceMarkerAmbiguous' import { SequenceMarkerMissing } from './SequenceMarkerMissing' +import { SequenceMarkerCluster } from './SequenceMarkerCluster' import { SequenceMarkerFrameShift } from './SequenceMarkerFrameShift' import { SequenceMarkerInsertion } from './SequenceMarkerInsertion' import { SequenceMarkerUnsequencedEnd, SequenceMarkerUnsequencedStart } from './SequenceMarkerUnsequenced' @@ -23,7 +24,7 @@ export interface SequenceViewRelativeProps { } export function SequenceViewRelative({ sequence, width, refNodeName }: SequenceViewRelativeProps) { - const { index, seqName, missing, alignmentRange, frameShifts, insertions, nucToAaMuts, nonACGTNs } = sequence + const { index, seqName, missing, alignmentRange, frameShifts, insertions, nucToAaMuts, nonACGTNs, mutationPatterns } = sequence const { t } = useTranslationSafe() const maxNucMarkers = useRecoilValue(maxNucMarkersAtom) @@ -93,6 +94,19 @@ export function SequenceViewRelative({ sequence, width, refNodeName }: SequenceV /> )) + const clusterViews = (mutationPatterns?.results ?? []).flatMap((pattern, patternIndex) => + (pattern.clusters ?? []).map((cluster) => ( + + )), + ) + const frameShiftMarkers = frameShifts.map((frameShift) => ( na.begin).join('-')}`} @@ -104,7 +118,7 @@ export function SequenceViewRelative({ sequence, width, refNodeName }: SequenceV )) const totalMarkers = - mutationViews.length + deletionViews.length + missingViews.length + frameShiftMarkers.length + insertionViews.length + mutationViews.length + deletionViews.length + missingViews.length + frameShiftMarkers.length + insertionViews.length + clusterViews.length if (totalMarkers > maxNucMarkers) { return ( @@ -155,6 +169,7 @@ export function SequenceViewRelative({ sequence, width, refNodeName }: SequenceV pixelsPerBase={pixelsPerBase} /> {frameShiftMarkers} + {clusterViews} ) } diff --git a/packages/nextclade-web/src/components/Settings/SeqViewSettings.tsx b/packages/nextclade-web/src/components/Settings/SeqViewSettings.tsx index 34fc4e6b5..4aa6ac454 100644 --- a/packages/nextclade-web/src/components/Settings/SeqViewSettings.tsx +++ b/packages/nextclade-web/src/components/Settings/SeqViewSettings.tsx @@ -12,6 +12,7 @@ import { SeqMarkerState, maxNucMarkersAtom, seqMarkerAmbiguousHeightStateAtom, + seqMarkerClusterHeightStateAtom, seqMarkerFrameShiftStateAtom, seqMarkerGapHeightStateAtom, seqMarkerHeightStateFromString, @@ -78,6 +79,10 @@ export function SeqViewSettings() { seqMarkerUnsequencedHeightStateAtom, ) + const [seqMarkerClusterHeightState, setSeqMarkerClusterHeightState] = useSeqMarkerHeightState( + seqMarkerClusterHeightStateAtom, + ) + const [seqMarkerInsertionState, setSeqMarkerInsertionState] = useSeqMarkerState(seqMarkerInsertionStateAtom) const [seqMarkerFrameShiftState, setSeqMarkerFrameShiftState] = useSeqMarkerState(seqMarkerFrameShiftStateAtom) @@ -174,6 +179,18 @@ export function SeqViewSettings() { + + + +