GeoGenetics · fgvieira · Mar 8, 2026 · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -38,6 +38,8 @@ jobs:
     strategy:
       fail-fast: false
     runs-on: ubuntu-latest
+    env:
+      SQL_DRIVER: sqlite
 
     steps:
       - uses: actions/checkout@v4

diff --git a/examples/empty/.tests/unit/common.py b/examples/empty/.tests/unit/common.py
@@ -4,7 +4,7 @@
 
 import os
 from pathlib import Path
-from subprocess import check_output
+from subprocess import check_output, PIPE
 
 
 cmp_cmds = {
@@ -28,21 +28,22 @@ def check(self, cmp_cmds=cmp_cmds):
             for path, subdirs, files in os.walk(self.data_path)
             for f in files
         )
-        print(f"input: {input_files}")  # DEBUG
-        # Workdir files
+        print(f"input_files: {input_files}")  # DEBUG
+        # Workdir files (ignoring '.snakemake/' and 'config/' folders)
         workdir_files = set(
             (Path(path) / f).relative_to(self.workdir)
             for path, subdirs, files in os.walk(self.workdir)
             for f in files
+            if "/.snakemake" not in path and "/config" not in path
         )
-        print(f"workdir: {workdir_files}")  # DEBUG
+        print(f"workdir_files: {workdir_files}")  # DEBUG
         # Expected files
         expected_files = set(
             (Path(path) / f).relative_to(self.expected_path)
             for path, subdirs, files in os.walk(self.expected_path)
             for f in files
         )
-        print(f"expected: {expected_files}")  # DEBUG
+        print(f"expected_files: {expected_files}")  # DEBUG
 
         assert expected_files.issubset(
             workdir_files
@@ -55,5 +56,6 @@ def check(self, cmp_cmds=cmp_cmds):
     def compare_files(self, expected_file, generated_file, cmp_cmds):
         check_output(
             cmp_cmds.get(expected_file.suffix, ["cmp"])
-            + [expected_file, generated_file]
+            + [expected_file, generated_file],
+            stderr=PIPE,
         )
diff --git a/examples/empty/.tests/unit/multiqc_taxon_upload/config/config/config.yaml b/examples/empty/.tests/unit/multiqc_taxon_upload/config/config/config.yaml
@@ -0,0 +1,155 @@
+
+# - Only tested with Phred33 quality scores
+
+samples: config/samples.tsv
+
+units: config/units.tsv
+
+
+
+#############
+### READS ###
+#############
+trim:
+  trim:
+    activate: true
+    tool: adapterremoval
+    params: "--trimns --maxns 10 --trimqualities --minlength 30 --mask-degenerate-bases --seed 12345"
+
+  # Ignored for SE
+  collapse:
+    activate: true
+    params: "--collapse-conservatively"
+
+derep:
+  extension:
+    activate: false
+    k: 16
+    params: "ibb=t prefilter=0 el=100 er=100 ecc=f ecco=f ignorebadquality extendrollback=0"
+
+  derep:
+    activate: true
+    # vsearch or seqkit
+    tool: seqkit
+    params: ""
+
+  low_complex:
+    params: "entropy=0.7 entropywindow=30 entropyk=4"
+
+
+
+#############
+### ALIGN ###
+#############
+prefilter:
+  taxa: "Bacteria,Archaea,Viruses"
+
+  ref:
+    prok:
+      n_shards: 2
+      path: "data/prok.{n_shard}-of-2.fas.gz"
+      map:
+        tool: bowtie2
+        params: "-k 10 -L 22 -i S,1,1.15 --mp 1,1 --rdg 0,1 --rfg 0,1 --score-min L,0,-0.1 --no-unal -N 1"
+        bt2l: False
+      acc2taxid: "data/prok.acc2taxid.gz"
+    virus:
+      n_shards: 1
+      path: "data/virus.1-of-1.fas.gz"
+      map:
+        tool: bowtie2
+        params: "-k 10 -L 22 -i S,1,1.15 --mp 1,1 --rdg 0,1 --rfg 0,1 --score-min L,0,-0.1 --no-unal"
+        bt2l: False
+      acc2taxid: "data/virus.acc2taxid.gz"
+
+  filter:
+    saturated_reads:
+      activate: true
+      n_alns: 10
+
+  bam_filter:
+    reassign:
+      activate: false
+      params: "--iters 0 --min-read-ani 92 --min-read-count 3 --scale 0 --reference-lengths genomes.len.map"
+
+    filter:
+      activate: false
+      params: "--min-read-ani 92 --min-read-count 3 --min-normalized-entropy 0.6 --min-normalized-gini 0.4 --min-avg-read-ani 94 --reference-lengths genomes.len.map"
+
+    lca:
+      activate: false
+      params: "--lca-rank genus --reference-lengths genomes.len.map"
+
+  taxonomy:
+    nodes: "data/taxdump/nodes.dmp"
+    names: "data/taxdump/names.dmp"
+
+  metadmg:
+    damage:
+      params: "--print_length 15"
+
+    lca:
+      params: "--fix_ncbi 0 --how_many 25 --weight_type 1 --edit_dist_max 10000 --lca_rank genus"
+
+    dfit:
+      params: "--nopt 5 --showfits 2"
+
+
+euk:
+  ref:
+    mitoch:
+      n_shards: 1
+      path: "data/mitoch.1-of-1.fas.gz"
+      map:
+        tool: bowtie2
+        params: "-k 10 -L 22 -i S,1,1.15 --mp 1,1 --rdg 0,1 --rfg 0,1 --score-min L,0,-0.1 --no-unal"
+        bt2l: False
+      acc2taxid: "data/mitoch.acc2taxid.gz"
+    plastid:
+      n_shards: 1
+      path: "data/plastid.1-of-1.fas.gz"
+      map:
+        tool: bowtie2
+        params: "-k 10 -L 22 -i S,1,1.15 --mp 1,1 --rdg 0,1 --rfg 0,1 --score-min L,0,-0.1 --no-unal"
+        bt2l: False
+      acc2taxid: "data/plastid.acc2taxid.gz"
+
+  filter:
+    saturated_reads:
+      activate: true
+      n_alns: 10
+
+  bam_filter:
+    reassign:
+      activate: false
+      params: "--iters 0 --min-read-ani 92 --min-read-count 3 --scale 0"
+
+    filter:
+      activate: false
+      params: "--min-read-ani 92 --min-read-count 3 --min-normalized-entropy 0.6 --min-normalized-gini 0.4 --min-avg-read-ani 92"
+
+    lca:
+      activate: false
+      params: "--lca-rank genus"
+
+  taxonomy:
+    nodes: "data/taxdump/nodes.dmp"
+    names: "data/taxdump/names.dmp"
+
+  metadmg:
+    damage:
+      params: "--print_length 15"
+
+    lca:
+      params: "--fix_ncbi 0 --how_many 15 --sim_score_low 0.95 --weight_type 0 --lca_rank genus"
+
+    dfit:
+      params: "--nopt 5 --showfits 2 --seed 12345"
+
+
+############
+## REPORT ##
+############
+report:
+  multiqc: "--verbose --cl-config 'custom_logo: data/KU_long.png' --cl-config 'custom_logo_title: CAEG - Center for Ancient Environmental Genomics' --cl-config 'custom_logo_url: https://globe.ku.dk/research/caeg/'"
+  multiqc_db_url: "sqlite:///test_qc.sqlite"
diff --git a/examples/empty/.tests/unit/multiqc_taxon_upload/config/config/samples.tsv b/examples/empty/.tests/unit/multiqc_taxon_upload/config/config/samples.tsv
@@ -0,0 +1,2 @@
+sample	alias	group	condition
+HD827sonic_1	NA	NA	NA
diff --git a/examples/empty/.tests/unit/multiqc_taxon_upload/config/config/units.tsv b/examples/empty/.tests/unit/multiqc_taxon_upload/config/config/units.tsv
@@ -0,0 +1,3 @@
+# Adapter list: https://gist.github.com/photocyte/3edd9401d0b13476e60f8b104c2575f8
+sample	library	barcode	flowcell	lane	seq_type	library_type	material	data	machine	run_n	center	platform	adapters	sample_n	date
+HD827sonic_1	lib1	ACGGAACAxACGAGAAC	HKTG2BGXG	L001	PE	ds	DNA	data/empty_L001_R{Read}.fq.gz	NDX550220	98	HYDRA_GEN	ILLUMINA	AGATCGGAAGAGCACACGTCTGAACTCCAGTCA,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT	S1	21-09-2021
diff --git a/examples/empty/.tests/unit/multiqc_taxon_upload/data/reports/multiqc_data.taxon.zip b/examples/empty/.tests/unit/multiqc_taxon_upload/data/reports/multiqc_data.taxon.zip
diff --git a/...s/empty/.tests/unit/multiqc_taxon_upload/expected/stats/reports/multiqc_taxon.upload.flag b/...s/empty/.tests/unit/multiqc_taxon_upload/expected/stats/reports/multiqc_taxon.upload.flag
@@ -0,0 +1,77 @@
+INFO:DB sqlite:///test_qc.sqlite does not exist.
+INFO:Reading file reports/multiqc_data.taxon.zip
+INFO:Uploading report v1.33 to DB...
+INFO:Adding report record to DB
+INFO:Adding report metadata to DB
+INFO:Parsing section multiqc_fastqc_prefilter-fastqc
+INFO:Parsing section multiqc_fastqc_low_complexity-fastqc
+INFO:Parsing section multiqc_fastqc_derep-fastqc
+INFO:Parsing section multiqc_nonpareil_merge_lanes-nonpareil
+INFO:Parsing section multiqc_fastqc_merge_lanes-fastqc
+INFO:Parsing section multiqc_fastqc_trim-fastqc
+INFO:Parsing section multiqc_adapter_removal
+INFO:Parsing section multiqc_fastqc_raw-fastqc
+INFO:Parsing section multiqc_general_stats
+INFO:Parsing plot prefilter-fastqc_sequence_counts_plot
+INFO:Parsing dataset prefilter-fastqc_sequence_counts_plot
+INFO:Parsing plot prefilter-fastqc_per_sequence_gc_content_plot
+INFO:Parsing dataset prefilter-fastqc_per_sequence_gc_content_plot_Percentages
+INFO:Parsing dataset prefilter-fastqc_per_sequence_gc_content_plot_Counts
+INFO:Parsing plot prefilter-fastqc_sequence_duplication_levels_plot
+INFO:Parsing dataset prefilter-fastqc_sequence_duplication_levels_plot
+INFO:Parsing plot prefilter-fastqc-status-check-heatmap
+WARNING:Plot type heatmap is not supported
+INFO:Parsing plot low_complexity-fastqc_sequence_counts_plot
+INFO:Parsing dataset low_complexity-fastqc_sequence_counts_plot
+INFO:Parsing plot low_complexity-fastqc_per_sequence_gc_content_plot
+INFO:Parsing dataset low_complexity-fastqc_per_sequence_gc_content_plot_Percentages
+INFO:Parsing dataset low_complexity-fastqc_per_sequence_gc_content_plot_Counts
+INFO:Parsing plot low_complexity-fastqc_sequence_duplication_levels_plot
+INFO:Parsing dataset low_complexity-fastqc_sequence_duplication_levels_plot
+INFO:Parsing plot low_complexity-fastqc-status-check-heatmap
+WARNING:Plot type heatmap is not supported
+INFO:Parsing plot derep-fastqc_sequence_counts_plot
+INFO:Parsing dataset derep-fastqc_sequence_counts_plot
+INFO:Parsing plot derep-fastqc_per_sequence_gc_content_plot
+INFO:Parsing dataset derep-fastqc_per_sequence_gc_content_plot_Percentages
+INFO:Parsing dataset derep-fastqc_per_sequence_gc_content_plot_Counts
+INFO:Parsing plot derep-fastqc_sequence_duplication_levels_plot
+INFO:Parsing dataset derep-fastqc_sequence_duplication_levels_plot
+INFO:Parsing plot derep-fastqc-status-check-heatmap
+WARNING:Plot type heatmap is not supported
+INFO:Parsing plot nonpareil-table
+WARNING:Plot type violin plot is not supported
+INFO:Parsing plot nonpareil-redundancy-plot
+INFO:Parsing dataset nonpareil-redundancy-plot_Combined
+INFO:Parsing dataset nonpareil-redundancy-plot_Observed
+INFO:Parsing plot merge_lanes-fastqc_sequence_counts_plot
+INFO:Parsing dataset merge_lanes-fastqc_sequence_counts_plot
+INFO:Parsing plot merge_lanes-fastqc_per_sequence_gc_content_plot
+INFO:Parsing dataset merge_lanes-fastqc_per_sequence_gc_content_plot_Percentages
+INFO:Parsing dataset merge_lanes-fastqc_per_sequence_gc_content_plot_Counts
+INFO:Parsing plot merge_lanes-fastqc_sequence_duplication_levels_plot
+INFO:Parsing dataset merge_lanes-fastqc_sequence_duplication_levels_plot
+INFO:Parsing plot merge_lanes-fastqc-status-check-heatmap
+WARNING:Plot type heatmap is not supported
+INFO:Parsing plot trim-fastqc_sequence_counts_plot
+INFO:Parsing dataset trim-fastqc_sequence_counts_plot
+INFO:Parsing plot trim-fastqc_per_sequence_gc_content_plot
+INFO:Parsing dataset trim-fastqc_per_sequence_gc_content_plot_Percentages
+INFO:Parsing dataset trim-fastqc_per_sequence_gc_content_plot_Counts
+INFO:Parsing plot trim-fastqc_sequence_duplication_levels_plot
+INFO:Parsing dataset trim-fastqc_sequence_duplication_levels_plot
+INFO:Parsing plot trim-fastqc-status-check-heatmap
+WARNING:Plot type heatmap is not supported
+INFO:Parsing plot ar_retained_plot
+INFO:Parsing dataset ar_retained_plot
+INFO:Parsing plot raw-fastqc_sequence_counts_plot
+INFO:Parsing dataset raw-fastqc_sequence_counts_plot
+INFO:Parsing plot raw-fastqc_per_sequence_gc_content_plot
+INFO:Parsing dataset raw-fastqc_per_sequence_gc_content_plot_Percentages
+INFO:Parsing dataset raw-fastqc_per_sequence_gc_content_plot_Counts
+INFO:Parsing plot raw-fastqc_sequence_duplication_levels_plot
+INFO:Parsing dataset raw-fastqc_sequence_duplication_levels_plot
+INFO:Parsing plot raw-fastqc-status-check-heatmap
+WARNING:Plot type heatmap is not supported
+INFO:Parsing plot general_stats_table
+WARNING:Plot type violin plot is not supported
diff --git a/examples/empty/.tests/unit/test_multiqc_taxon_upload.py b/examples/empty/.tests/unit/test_multiqc_taxon_upload.py
@@ -0,0 +1,69 @@
+"""
+Rule test code for unit testing of rules generated with Snakemake 9.16.4.dev3.
+"""
+
+import os
+import sys
+import shutil
+import tempfile
+from pathlib import Path
+from subprocess import check_output
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+
+def test_multiqc_taxon_upload(conda_prefix):
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        workdir = Path(tmpdir) / "workdir"
+        config_path = Path(".tests/unit/multiqc_taxon_upload/config")
+        data_path = Path(".tests/unit/multiqc_taxon_upload/data")
+        expected_path = Path(".tests/unit/multiqc_taxon_upload/expected")
+
+        # Copy config to the temporary workdir.
+        shutil.copytree(config_path, workdir)
+
+        # Copy data to the temporary workdir.
+        shutil.copytree(data_path, workdir, dirs_exist_ok=True)
+
+        # Run the test job.
+        check_output(
+            [
+                "python",
+                "-m",
+                "snakemake",
+                "stats/reports/multiqc_taxon.upload.flag",
+                "--snakefile",
+                "../../workflow/Snakefile",
+                "-f",
+                "--notemp",
+                "--show-failed-logs",
+                "-j1",
+                "--target-files-omit-workdir-adjustment",
+                "--allowed-rules",
+                "multiqc_taxon_upload",
+                "--configfile",
+                "config/config.yaml",
+                "--software-deployment-method",
+                "conda",
+                "--directory",
+                workdir,
+            ]
+            + conda_prefix
+        )
+
+        # Check the output byte by byte using cmp/zmp/bzcmp/xzcmp.
+        # To modify this behavior, you can inherit from common.OutputChecker in here
+        # and overwrite the method `compare_files(generated_file, expected_file),
+        # also see common.py.
+        import common
+
+        common.OutputChecker(data_path, expected_path, workdir).check(
+            {
+                ".flag": [
+                    "diff",
+                    "--ignore-matching-lines=Uploading",
+                    "--ignore-matching-lines=tzname",
+                ]
+            }
+        )
diff --git a/examples/empty/.tests/unit/test_taxon_align_stats.py b/examples/empty/.tests/unit/test_taxon_align_stats.py
@@ -59,5 +59,11 @@ def test_taxon_align_stats(conda_prefix):
         import common
 
         common.OutputChecker(data_path, expected_path, workdir).check(
-            {".txt": ["diff", "--ignore-matching-lines=\\#"]}
+            {
+                ".txt": [
+                    "diff",
+                    "--ignore-matching-lines=samtools",
+                    "--ignore-matching-lines=command",
+                ]
+            }
         )
diff --git a/examples/empty/config/config.yaml b/examples/empty/config/config.yaml
@@ -152,3 +152,4 @@ euk:
 ############
 report:
   multiqc: "--verbose --cl-config 'custom_logo: data/KU_long.png' --cl-config 'custom_logo_title: CAEG - Center for Ancient Environmental Genomics' --cl-config 'custom_logo_url: https://globe.ku.dk/research/caeg/'"
+  multiqc_db_url: "test_qc.sqlite"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		sample alias group condition
		HD827sonic_1 NA NA NA