Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ rule run_tbprofiler:
output:
touch("data/tbprofiler/flags/{sample}_flag.txt"),
params:
s3_bucket=config["s3_bucket"],
s3_dst_unversioned=config["s3_dst_unversioned"],
tb_output_path="data/tbprofiler/results/{sample}.results.json",
fastq_outdir="data/fastq",
tb_outdir="data/tbprofiler",
Expand All @@ -135,7 +135,7 @@ rule run_tbprofiler:
{params.fastq_outdir} \
{params.tb_outdir} \
{threads} \
{params.s3_bucket} \
{params.s3_dst_unversioned} \
|| echo "tbprofiler failed at sample {wildcards.sample}"
rm -f data/tbprofiler/bam/{wildcards.sample}.bam*
rm -f data/tbprofiler/vcf/{wildcards.sample}.targets.vcf.gz
Expand Down Expand Up @@ -192,7 +192,7 @@ rule run_snippy:
touch("data/snippy/flags/{sample}_flag.txt"),
priority: 1000
params:
s3_bucket=config["s3_bucket"],
s3_dst_unversioned=config["s3_dst_unversioned"],
snippy_output_path="data/snippy/{sample}",
fastq_outdir="data/fastq",
reference=config["files"]["reference_genbank"],
Expand All @@ -212,7 +212,7 @@ rule run_snippy:
{params.fastq_outdir} \
{params.reference} \
{threads} \
{params.s3_bucket} \
{params.s3_dst_unversioned} \
|| echo "snippy failed at sample {wildcards.sample}"
rm -f data/fastq/{wildcards.sample}_*.fastq.gz
rm -fr data/snippy/{wildcards.sample}/reference
Expand Down
2 changes: 1 addition & 1 deletion build-configs/nextstrain-automation/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
custom_rules:
- build-configs/nextstrain-automation/deploy.smk

s3_bucket: "nextstrain-data"
s3_dst_unversioned: "nextstrain-data-unversioned/files/workflows/tb/"
Comment thread
joverlee521 marked this conversation as resolved.
Outdated
deploy_url: "s3://nextstrain-data"
2 changes: 1 addition & 1 deletion defaults/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
strain_id_field: "accession"
s3_bucket: ""
s3_dst_unversioned: ""
files:
exclude: "defaults/dropped_strains.txt"
reference_genbank: "defaults/GCF_000195955.2_ASM19595v2_genomic.gbff"
Expand Down
22 changes: 10 additions & 12 deletions scripts/run_snippy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,16 @@ snippy_output_path="$2"
fastq_outdir="$3"
reference="$4"
threads="$5"
s3_bucket="${6:-}"

s3_path="files/workflows/tb/${snippy_output_path}"
s3_dst_unversioned="${6:-}"

# Check if S3 bucket is configured and accessible
USE_S3=false
if [[ -n "${s3_bucket}" ]]; then
if aws s3 ls "s3://${s3_bucket}" > /dev/null 2>&1; then
if [[ -n "${s3_dst_unversioned}" ]]; then
if aws s3 ls "s3://${s3_dst_unversioned}" > /dev/null 2>&1; then
USE_S3=true
echo "S3 bucket accessible. Will use S3 caching." >&2
else
echo "Warning: Cannot access s3://${s3_bucket}. Running without S3 caching." >&2
echo "Warning: Cannot access s3://${s3_dst_unversioned}. Running without S3 caching." >&2
fi
else
echo "S3 bucket not specified. Running without S3 caching." >&2
Expand Down Expand Up @@ -45,18 +43,18 @@ upload_zstd() {

# Try to download from S3 if enabled and BOTH expected .zst files exist
if [[ "$USE_S3" == "true" ]] \
&& aws s3 ls "s3://${s3_bucket}/${s3_path}/snps.aligned.fa.zst" >/dev/null 2>&1 \
&& aws s3 ls "s3://${s3_bucket}/${s3_path}/snps.vcf.zst" >/dev/null 2>&1; then
&& aws s3 ls "s3://${s3_dst_unversioned}/${snippy_output_path}/snps.aligned.fa.zst" >/dev/null 2>&1 \
&& aws s3 ls "s3://${s3_dst_unversioned}/${snippy_output_path}/snps.vcf.zst" >/dev/null 2>&1; then
echo "Found snippy results on S3 (.zst). Downloading to ${snippy_output_path} …" >&2
mkdir -p "$(dirname "${snippy_output_path}")" "${snippy_output_path}"

# Download and decompress aligned.fa, then remove local .zst
aws s3 cp "s3://${s3_bucket}/${s3_path}/snps.aligned.fa.zst" "${snippy_output_path}/snps.aligned.fa.zst"
aws s3 cp "s3://${s3_dst_unversioned}/${snippy_output_path}/snps.aligned.fa.zst" "${snippy_output_path}/snps.aligned.fa.zst"
zstd -d -f "${snippy_output_path}/snps.aligned.fa.zst" -o "${snippy_output_path}/snps.aligned.fa"
rm -f "${snippy_output_path}/snps.aligned.fa.zst"

# Download and decompress vcf, then remove local .zst
aws s3 cp "s3://${s3_bucket}/${s3_path}/snps.vcf.zst" "${snippy_output_path}/snps.vcf.zst"
aws s3 cp "s3://${s3_dst_unversioned}/${snippy_output_path}/snps.vcf.zst" "${snippy_output_path}/snps.vcf.zst"
zstd -d -f "${snippy_output_path}/snps.vcf.zst" -o "${snippy_output_path}/snps.vcf"
rm -f "${snippy_output_path}/snps.vcf.zst"

Expand Down Expand Up @@ -110,7 +108,7 @@ else
# Upload to S3 if enabled
if [[ "$USE_S3" == "true" ]]; then
echo "Uploading compressed snippy results to S3…" >&2
upload_zstd "${snippy_output_path}/snps.aligned.fa" "s3://${s3_bucket}/${s3_path}/snps.aligned.fa"
upload_zstd "${snippy_output_path}/snps.vcf" "s3://${s3_bucket}/${s3_path}/snps.vcf"
upload_zstd "${snippy_output_path}/snps.aligned.fa" "s3://${s3_dst_unversioned}/${snippy_output_path}/snps.aligned.fa"
upload_zstd "${snippy_output_path}/snps.vcf" "s3://${s3_dst_unversioned}/${snippy_output_path}/snps.vcf"
fi
fi
16 changes: 7 additions & 9 deletions scripts/run_tbprofiler.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,27 @@ tb_output_path="$2"
fastq_outdir="$3"
tb_outdir="$4"
threads="$5"
s3_bucket="${6:-}"

s3_path="files/workflows/tb/${tb_output_path}" # S3 key for the (compressed) tb-profiler output
s3_dst_unversioned="${6:-}"

# Check if S3 bucket is configured and accessible
USE_S3=false
if [[ -n "${s3_bucket}" ]]; then
if aws s3 ls "s3://${s3_bucket}" > /dev/null 2>&1; then
if [[ -n "${s3_dst_unversioned}" ]]; then
if aws s3 ls "s3://${s3_dst_unversioned}" > /dev/null 2>&1; then
USE_S3=true
echo "S3 bucket accessible. Will use S3 caching." >&2
else
echo "Warning: Cannot access s3://${s3_bucket}. Running without S3 caching." >&2
echo "Warning: Cannot access s3://${s3_dst_unversioned}. Running without S3 caching." >&2
fi
else
echo "S3 bucket not specified. Running without S3 caching." >&2
fi

# Try to download from S3 if enabled and results exist
if [[ "$USE_S3" == "true" ]] && aws s3 ls "s3://${s3_bucket}/${s3_path}.zst" >/dev/null 2>&1; then
if [[ "$USE_S3" == "true" ]] && aws s3 ls "s3://${s3_dst_unversioned}/${tb_output_path}.zst" >/dev/null 2>&1; then
echo "Found tb-profiler results on S3 (.zst). Downloading to ${tb_output_path} …" >&2
mkdir -p "$(dirname "${tb_output_path}")"

aws s3 cp "s3://${s3_bucket}/${s3_path}.zst" "${tb_output_path}.zst"
aws s3 cp "s3://${s3_dst_unversioned}/${tb_output_path}.zst" "${tb_output_path}.zst"
zstd -d -f "${tb_output_path}.zst" -o "${tb_output_path}"
rm -f "${tb_output_path}.zst"

Expand Down Expand Up @@ -80,7 +78,7 @@ else
echo "Uploading compressed tb-profiler result to S3…" >&2
# Compress -> upload -> remove local .zst (leave plain file locally)
zstd -f -T"${threads}" -19 "${tb_output_path}" -o "${tb_output_path}.zst"
aws s3 cp "${tb_output_path}.zst" "s3://${s3_bucket}/${s3_path}.zst"
aws s3 cp "${tb_output_path}.zst" "s3://${s3_dst_unversioned}/${tb_output_path}.zst"
rm -f "${tb_output_path}.zst"
fi
fi