Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ jobs:
strategy:
fail-fast: false
runs-on: ubuntu-latest
env:
SQL_DRIVER: sqlite

steps:
- uses: actions/checkout@v4
Expand Down
14 changes: 8 additions & 6 deletions examples/empty/.tests/unit/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import os
from pathlib import Path
from subprocess import check_output
from subprocess import check_output, PIPE


cmp_cmds = {
Expand All @@ -28,21 +28,22 @@ def check(self, cmp_cmds=cmp_cmds):
for path, subdirs, files in os.walk(self.data_path)
for f in files
)
print(f"input: {input_files}") # DEBUG
# Workdir files
print(f"input_files: {input_files}") # DEBUG
# Workdir files (ignoring '.snakemake/' and 'config/' folders)
workdir_files = set(
(Path(path) / f).relative_to(self.workdir)
for path, subdirs, files in os.walk(self.workdir)
for f in files
if "/.snakemake" not in path and "/config" not in path
)
print(f"workdir: {workdir_files}") # DEBUG
print(f"workdir_files: {workdir_files}") # DEBUG
# Expected files
expected_files = set(
(Path(path) / f).relative_to(self.expected_path)
for path, subdirs, files in os.walk(self.expected_path)
for f in files
)
print(f"expected: {expected_files}") # DEBUG
print(f"expected_files: {expected_files}") # DEBUG

assert expected_files.issubset(
workdir_files
Expand All @@ -55,5 +56,6 @@ def check(self, cmp_cmds=cmp_cmds):
def compare_files(self, expected_file, generated_file, cmp_cmds):
check_output(
cmp_cmds.get(expected_file.suffix, ["cmp"])
+ [expected_file, generated_file]
+ [expected_file, generated_file],
stderr=PIPE,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@

# - Only tested with Phred33 quality scores

samples: config/samples.tsv

units: config/units.tsv



#############
### READS ###
#############
trim:
trim:
activate: true
tool: adapterremoval
params: "--trimns --maxns 10 --trimqualities --minlength 30 --mask-degenerate-bases --seed 12345"

# Ignored for SE
collapse:
activate: true
params: "--collapse-conservatively"

derep:
extension:
activate: false
k: 16
params: "ibb=t prefilter=0 el=100 er=100 ecc=f ecco=f ignorebadquality extendrollback=0"

derep:
activate: true
# vsearch or seqkit
tool: seqkit
params: ""

low_complex:
params: "entropy=0.7 entropywindow=30 entropyk=4"



#############
### ALIGN ###
#############
prefilter:
taxa: "Bacteria,Archaea,Viruses"

ref:
prok:
n_shards: 2
path: "data/prok.{n_shard}-of-2.fas.gz"
map:
tool: bowtie2
params: "-k 10 -L 22 -i S,1,1.15 --mp 1,1 --rdg 0,1 --rfg 0,1 --score-min L,0,-0.1 --no-unal -N 1"
bt2l: False
acc2taxid: "data/prok.acc2taxid.gz"
virus:
n_shards: 1
path: "data/virus.1-of-1.fas.gz"
map:
tool: bowtie2
params: "-k 10 -L 22 -i S,1,1.15 --mp 1,1 --rdg 0,1 --rfg 0,1 --score-min L,0,-0.1 --no-unal"
bt2l: False
acc2taxid: "data/virus.acc2taxid.gz"

filter:
saturated_reads:
activate: true
n_alns: 10

bam_filter:
reassign:
activate: false
params: "--iters 0 --min-read-ani 92 --min-read-count 3 --scale 0 --reference-lengths genomes.len.map"

filter:
activate: false
params: "--min-read-ani 92 --min-read-count 3 --min-normalized-entropy 0.6 --min-normalized-gini 0.4 --min-avg-read-ani 94 --reference-lengths genomes.len.map"

lca:
activate: false
params: "--lca-rank genus --reference-lengths genomes.len.map"

taxonomy:
nodes: "data/taxdump/nodes.dmp"
names: "data/taxdump/names.dmp"

metadmg:
damage:
params: "--print_length 15"

lca:
params: "--fix_ncbi 0 --how_many 25 --weight_type 1 --edit_dist_max 10000 --lca_rank genus"

dfit:
params: "--nopt 5 --showfits 2"


euk:
ref:
mitoch:
n_shards: 1
path: "data/mitoch.1-of-1.fas.gz"
map:
tool: bowtie2
params: "-k 10 -L 22 -i S,1,1.15 --mp 1,1 --rdg 0,1 --rfg 0,1 --score-min L,0,-0.1 --no-unal"
bt2l: False
acc2taxid: "data/mitoch.acc2taxid.gz"
plastid:
n_shards: 1
path: "data/plastid.1-of-1.fas.gz"
map:
tool: bowtie2
params: "-k 10 -L 22 -i S,1,1.15 --mp 1,1 --rdg 0,1 --rfg 0,1 --score-min L,0,-0.1 --no-unal"
bt2l: False
acc2taxid: "data/plastid.acc2taxid.gz"

filter:
saturated_reads:
activate: true
n_alns: 10

bam_filter:
reassign:
activate: false
params: "--iters 0 --min-read-ani 92 --min-read-count 3 --scale 0"

filter:
activate: false
params: "--min-read-ani 92 --min-read-count 3 --min-normalized-entropy 0.6 --min-normalized-gini 0.4 --min-avg-read-ani 92"

lca:
activate: false
params: "--lca-rank genus"

taxonomy:
nodes: "data/taxdump/nodes.dmp"
names: "data/taxdump/names.dmp"

metadmg:
damage:
params: "--print_length 15"

lca:
params: "--fix_ncbi 0 --how_many 15 --sim_score_low 0.95 --weight_type 0 --lca_rank genus"

dfit:
params: "--nopt 5 --showfits 2 --seed 12345"


############
## REPORT ##
############
report:
multiqc: "--verbose --cl-config 'custom_logo: data/KU_long.png' --cl-config 'custom_logo_title: CAEG - Center for Ancient Environmental Genomics' --cl-config 'custom_logo_url: https://globe.ku.dk/research/caeg/'"
multiqc_db_url: "sqlite:///test_qc.sqlite"
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
sample alias group condition
HD827sonic_1 NA NA NA
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Adapter list: https://gist.github.com/photocyte/3edd9401d0b13476e60f8b104c2575f8
sample library barcode flowcell lane seq_type library_type material data machine run_n center platform adapters sample_n date
HD827sonic_1 lib1 ACGGAACAxACGAGAAC HKTG2BGXG L001 PE ds DNA data/empty_L001_R{Read}.fq.gz NDX550220 98 HYDRA_GEN ILLUMINA AGATCGGAAGAGCACACGTCTGAACTCCAGTCA,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT S1 21-09-2021
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
INFO:DB sqlite:///test_qc.sqlite does not exist.
INFO:Reading file reports/multiqc_data.taxon.zip
INFO:Uploading report v1.33 to DB...
INFO:Adding report record to DB
INFO:Adding report metadata to DB
INFO:Parsing section multiqc_fastqc_prefilter-fastqc
INFO:Parsing section multiqc_fastqc_low_complexity-fastqc
INFO:Parsing section multiqc_fastqc_derep-fastqc
INFO:Parsing section multiqc_nonpareil_merge_lanes-nonpareil
INFO:Parsing section multiqc_fastqc_merge_lanes-fastqc
INFO:Parsing section multiqc_fastqc_trim-fastqc
INFO:Parsing section multiqc_adapter_removal
INFO:Parsing section multiqc_fastqc_raw-fastqc
INFO:Parsing section multiqc_general_stats
INFO:Parsing plot prefilter-fastqc_sequence_counts_plot
INFO:Parsing dataset prefilter-fastqc_sequence_counts_plot
INFO:Parsing plot prefilter-fastqc_per_sequence_gc_content_plot
INFO:Parsing dataset prefilter-fastqc_per_sequence_gc_content_plot_Percentages
INFO:Parsing dataset prefilter-fastqc_per_sequence_gc_content_plot_Counts
INFO:Parsing plot prefilter-fastqc_sequence_duplication_levels_plot
INFO:Parsing dataset prefilter-fastqc_sequence_duplication_levels_plot
INFO:Parsing plot prefilter-fastqc-status-check-heatmap
WARNING:Plot type heatmap is not supported
INFO:Parsing plot low_complexity-fastqc_sequence_counts_plot
INFO:Parsing dataset low_complexity-fastqc_sequence_counts_plot
INFO:Parsing plot low_complexity-fastqc_per_sequence_gc_content_plot
INFO:Parsing dataset low_complexity-fastqc_per_sequence_gc_content_plot_Percentages
INFO:Parsing dataset low_complexity-fastqc_per_sequence_gc_content_plot_Counts
INFO:Parsing plot low_complexity-fastqc_sequence_duplication_levels_plot
INFO:Parsing dataset low_complexity-fastqc_sequence_duplication_levels_plot
INFO:Parsing plot low_complexity-fastqc-status-check-heatmap
WARNING:Plot type heatmap is not supported
INFO:Parsing plot derep-fastqc_sequence_counts_plot
INFO:Parsing dataset derep-fastqc_sequence_counts_plot
INFO:Parsing plot derep-fastqc_per_sequence_gc_content_plot
INFO:Parsing dataset derep-fastqc_per_sequence_gc_content_plot_Percentages
INFO:Parsing dataset derep-fastqc_per_sequence_gc_content_plot_Counts
INFO:Parsing plot derep-fastqc_sequence_duplication_levels_plot
INFO:Parsing dataset derep-fastqc_sequence_duplication_levels_plot
INFO:Parsing plot derep-fastqc-status-check-heatmap
WARNING:Plot type heatmap is not supported
INFO:Parsing plot nonpareil-table
WARNING:Plot type violin plot is not supported
INFO:Parsing plot nonpareil-redundancy-plot
INFO:Parsing dataset nonpareil-redundancy-plot_Combined
INFO:Parsing dataset nonpareil-redundancy-plot_Observed
INFO:Parsing plot merge_lanes-fastqc_sequence_counts_plot
INFO:Parsing dataset merge_lanes-fastqc_sequence_counts_plot
INFO:Parsing plot merge_lanes-fastqc_per_sequence_gc_content_plot
INFO:Parsing dataset merge_lanes-fastqc_per_sequence_gc_content_plot_Percentages
INFO:Parsing dataset merge_lanes-fastqc_per_sequence_gc_content_plot_Counts
INFO:Parsing plot merge_lanes-fastqc_sequence_duplication_levels_plot
INFO:Parsing dataset merge_lanes-fastqc_sequence_duplication_levels_plot
INFO:Parsing plot merge_lanes-fastqc-status-check-heatmap
WARNING:Plot type heatmap is not supported
INFO:Parsing plot trim-fastqc_sequence_counts_plot
INFO:Parsing dataset trim-fastqc_sequence_counts_plot
INFO:Parsing plot trim-fastqc_per_sequence_gc_content_plot
INFO:Parsing dataset trim-fastqc_per_sequence_gc_content_plot_Percentages
INFO:Parsing dataset trim-fastqc_per_sequence_gc_content_plot_Counts
INFO:Parsing plot trim-fastqc_sequence_duplication_levels_plot
INFO:Parsing dataset trim-fastqc_sequence_duplication_levels_plot
INFO:Parsing plot trim-fastqc-status-check-heatmap
WARNING:Plot type heatmap is not supported
INFO:Parsing plot ar_retained_plot
INFO:Parsing dataset ar_retained_plot
INFO:Parsing plot raw-fastqc_sequence_counts_plot
INFO:Parsing dataset raw-fastqc_sequence_counts_plot
INFO:Parsing plot raw-fastqc_per_sequence_gc_content_plot
INFO:Parsing dataset raw-fastqc_per_sequence_gc_content_plot_Percentages
INFO:Parsing dataset raw-fastqc_per_sequence_gc_content_plot_Counts
INFO:Parsing plot raw-fastqc_sequence_duplication_levels_plot
INFO:Parsing dataset raw-fastqc_sequence_duplication_levels_plot
INFO:Parsing plot raw-fastqc-status-check-heatmap
WARNING:Plot type heatmap is not supported
INFO:Parsing plot general_stats_table
WARNING:Plot type violin plot is not supported
69 changes: 69 additions & 0 deletions examples/empty/.tests/unit/test_multiqc_taxon_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""
Rule test code for unit testing of rules generated with Snakemake 9.16.4.dev3.
"""

import os
import sys
import shutil
import tempfile
from pathlib import Path
from subprocess import check_output

sys.path.insert(0, os.path.dirname(__file__))


def test_multiqc_taxon_upload(conda_prefix):

with tempfile.TemporaryDirectory() as tmpdir:
workdir = Path(tmpdir) / "workdir"
config_path = Path(".tests/unit/multiqc_taxon_upload/config")
data_path = Path(".tests/unit/multiqc_taxon_upload/data")
expected_path = Path(".tests/unit/multiqc_taxon_upload/expected")

# Copy config to the temporary workdir.
shutil.copytree(config_path, workdir)

# Copy data to the temporary workdir.
shutil.copytree(data_path, workdir, dirs_exist_ok=True)

# Run the test job.
check_output(
[
"python",
"-m",
"snakemake",
"stats/reports/multiqc_taxon.upload.flag",
"--snakefile",
"../../workflow/Snakefile",
"-f",
"--notemp",
"--show-failed-logs",
"-j1",
"--target-files-omit-workdir-adjustment",
"--allowed-rules",
"multiqc_taxon_upload",
"--configfile",
"config/config.yaml",
"--software-deployment-method",
"conda",
"--directory",
workdir,
]
+ conda_prefix
)

# Check the output byte by byte using cmp/zmp/bzcmp/xzcmp.
# To modify this behavior, you can inherit from common.OutputChecker in here
# and overwrite the method `compare_files(generated_file, expected_file),
# also see common.py.
import common

common.OutputChecker(data_path, expected_path, workdir).check(
{
".flag": [
"diff",
"--ignore-matching-lines=Uploading",
"--ignore-matching-lines=tzname",
]
}
)
8 changes: 7 additions & 1 deletion examples/empty/.tests/unit/test_taxon_align_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,11 @@ def test_taxon_align_stats(conda_prefix):
import common

common.OutputChecker(data_path, expected_path, workdir).check(
{".txt": ["diff", "--ignore-matching-lines=\\#"]}
{
".txt": [
"diff",
"--ignore-matching-lines=samtools",
"--ignore-matching-lines=command",
]
}
)
1 change: 1 addition & 0 deletions examples/empty/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,4 @@ euk:
############
report:
multiqc: "--verbose --cl-config 'custom_logo: data/KU_long.png' --cl-config 'custom_logo_title: CAEG - Center for Ancient Environmental Genomics' --cl-config 'custom_logo_url: https://globe.ku.dk/research/caeg/'"
multiqc_db_url: "test_qc.sqlite"
Loading