> For the complete documentation index, see [llms.txt](https://aleksandre80.gitbook.io/stage/llms.txt). Markdown versions of documentation pages are available by appending `.md` to page URLs; this page is available as [Markdown](https://aleksandre80.gitbook.io/stage/basecalling-avec-dorado/basecalling-dorado-+-demultiplexage.md).

# BaseCalling Dorado + Demultiplexage

### Téléchargement des modèles

```bash
$ /home/grid/dorado-0.7.2-linux-x64/bin/dorado download
```

### Lancement du basecalling et du demultiplexage en même temps

```bash
/home/grid/dorado-0.7.2-linux-x64/bin/dorado basecaller \
    -x "cuda:0" \
    --min-qscore 7 \
    --no-trim \
    --emit-fastq \
    /home/grid/dorado-0.7.2-linux-x64/bin/dna_r10.4.1_e8.2_400bps_hac@v4.2.0 \
    pod5/ | \
    /home/grid/dorado-0.7.2-linux-x64/bin/dorado demux \
    --kit-name SQK-RBK114-24 \
    --emit-fastq \
    --output-dir demultiplexed

```

### Bash permettant l'automatisation de plusieurs basecalling et demultiplexage à la suite

```bash
#!/bin/bash


# Processing C:/path/to/pod5/1
DORADO_BIN="/home/grid/dorado-0.7.2-linux-x64/bin/dorado"
MODEL_PATH="/home/grid/dorado-0.7.2-linux-x64/bin/dna_r10.4.1_e8.2_400bps_hac@v5.0.0"
REF_GENOME="C:/pth/to/References/hg38.mmi"
INPUT_DIR="C:/path/to/pod5/1"
QS_SCORES=(10)

for qscore in "${QS_SCORES[@]}"; do
    OUTPUT_DIR="demultiplexed_q${qscore}"
    mkdir -p "${OUTPUT_DIR}"
    ${DORADO_BIN} basecaller -x "cuda:0" --min-qscore "${qscore}" --no-trim --emit-fastq ${MODEL_PATH} ${INPUT_DIR} | \
    ${DORADO_BIN} demux --kit-name "SQK-NBD114-24" --emit-fastq --output-dir "${OUTPUT_DIR}"
    echo "Processing complete for C:/path/to/pod5/1 with Q-score ${qscore}"
done

# Processing C:/path/to/pod5/2
DORADO_BIN="/home/grid/dorado-0.7.2-linux-x64/bin/dorado"
MODEL_PATH="/home/grid/dorado-0.7.2-linux-x64/bin/dna_r10.4.1_e8.2_400bps_hac@v5.0.0"
REF_GENOME="C:/path/to/References/hg38.mmi"
INPUT_DIR="C:/path/to/pod5/2"
QS_SCORES=(40)

for qscore in "${QS_SCORES[@]}"; do
    OUTPUT_DIR="demultiplexed_q${qscore}"
    mkdir -p "${OUTPUT_DIR}"
    ${DORADO_BIN} basecaller -x "cuda:0" --min-qscore "${qscore}" --no-trim --emit-fastq ${MODEL_PATH} ${INPUT_DIR} | \
    ${DORADO_BIN} demux --kit-name "SQK-NBD114-24" --emit-fastq --output-dir "${OUTPUT_DIR}"
    echo "Processing complete for C:/path/to/pod5/2 with Q-score ${qscore}"
done
```

### Script python permettant de creer le bash via interface graphique

```python
import tkinter as tk
from tkinter import filedialog, messagebox
import os

def launch_config_ui():
    root = tk.Tk()
    root.title("Batch Configuration for Genomic Processing")

    configurations = []

    def add_configuration():
        ref_genome = ref_genome_entry.get()
        input_dir = input_dir_entry.get()
        qs_scores = qs_score_entry.get()
        cuda_device = cuda_device_entry.get()
        kit_name = kit_name_entry.get()
        
        if not all([ref_genome, input_dir, qs_scores, cuda_device, kit_name]):
            messagebox.showerror("Error", "Please fill all fields before adding a configuration.")
            return
        
        configurations.append({
            "ref_genome": ref_genome,
            "input_dir": input_dir,
            "qs_scores": qs_scores,
            "cuda_device": cuda_device,
            "kit_name": kit_name
        })
        
        listbox.insert(tk.END, input_dir)
        ref_genome_entry.delete(0, tk.END)
        input_dir_entry.delete(0, tk.END)
        qs_score_entry.delete(0, tk.END)
        cuda_device_entry.delete(0, tk.END)
        kit_name_entry.delete(0, tk.END)
        messagebox.showinfo("Success", "Configuration added successfully.")

    def generate_and_run_script():
        script_path = "all_configurations_processing.sh"
        with open(script_path, "w") as script_file:
            script_file.write("#!/bin/bash\n\n")
            for config in configurations:
                qs_scores_list = config['qs_scores'].split()
                qs_scores_array = ' '.join(f"{qs}" for qs in qs_scores_list)
                script_content = f"""
# Processing {config['input_dir']}
DORADO_BIN="/home/grid/dorado-0.7.2-linux-x64/bin/dorado"
MODEL_PATH="/home/grid/dorado-0.7.2-linux-x64/bin/dna_r10.4.1_e8.2_400bps_hac@v5.0.0"
REF_GENOME="{config['ref_genome']}"
INPUT_DIR="{config['input_dir']}/"
QS_SCORES=({qs_scores_array})

for qscore in "${{QS_SCORES[@]}}"; do
    OUTPUT_DIR="demultiplexed_q${{qscore}}"
    mkdir -p "${{OUTPUT_DIR}}"
    ${{DORADO_BIN}} basecaller -x "{config['cuda_device']}" --min-qscore "${{qscore}}" --no-trim --emit-fastq ${{MODEL_PATH}} ${{INPUT_DIR}} | \\
    ${{DORADO_BIN}} demux --kit-name "{config['kit_name']}" --emit-fastq --output-dir "${{OUTPUT_DIR}}"
    echo "Processing complete for {config['input_dir']} with Q-score ${{qscore}}"
done
"""
                script_file.write(script_content)
        messagebox.showinfo("Done", f"All configurations have been written to {script_path}. Please run the script manually.")

    tk.Label(root, text="Select the genome file REF_GENOME (.mmi):").pack()
    ref_genome_entry = tk.Entry(root, width=50)
    ref_genome_entry.pack(padx=20, pady=5)
    tk.Button(root, text="Browse", command=lambda: ref_genome_entry.insert(0, filedialog.askopenfilename(filetypes=[("MMI files", "*.mmi")]))).pack()

    tk.Label(root, text="Select the folder for INPUT_DIR:").pack()
    input_dir_entry = tk.Entry(root, width=50)
    input_dir_entry.pack(padx=20, pady=5)
    tk.Button(root, text="Browse", command=lambda: input_dir_entry.insert(0, filedialog.askdirectory())).pack()

    tk.Label(root, text="Enter Q-scores separated by spaces:").pack()
    qs_score_entry = tk.Entry(root, width=50)
    qs_score_entry.pack(padx=20, pady=5)

    tk.Label(root, text="Specify the CUDA device (e.g., cuda:0):").pack()
    cuda_device_entry = tk.Entry(root, width=50)
    cuda_device_entry.insert(0, "cuda:0")
    cuda_device_entry.pack(padx=20, pady=5)

    tk.Label(root, text="Enter the kit name (e.g., SQK-NBD114-24):").pack()
    kit_name_entry = tk.Entry(root, width=50)
    kit_name_entry.insert(0, "SQK-NBD114-24")
    kit_name_entry.pack(padx=20, pady=5)

    tk.Button(root, text="Add Configuration", command=add_configuration).pack(pady=10)

    listbox = tk.Listbox(root, height=6, width=50)
    listbox.pack(pady=10)

    tk.Button(root, text="Generate Script", command=generate_and_run_script).pack(pady=20)

    root.mainloop()

if __name__ == "__main__":
    launch_config_ui()
```

Basecalling, demultiplexage suivi de l'alignement et de la conversion en BAM

```bash
#!/bin/bash

source ~/miniconda3/etc/profile.d/conda.sh
conda activate genomics

BASE_OUTPUT_DIR="C:/Users/aleks/OneDrive/Bureau/CHU/Test1/BAM"
mkdir -p "${BASE_OUTPUT_DIR}"

DORADO_BIN="/home/grid/dorado-0.7.2-linux-x64/bin/dorado"
MODEL_PATH="/home/grid/dorado-0.7.2-linux-x64/bin/dna_r10.4.1_e8.2_400bps_hac@v5.0.0"
REF_GENOME="C:/Users/aleks/OneDrive/Bureau/CHU/References/hg38.mmi"
INPUT_DIR="C:/Users/aleks/OneDrive/Bureau/CHU/Test1/pod51"
OUTPUT_DIR="${BASE_OUTPUT_DIR}/demultiplexed_q17"
mkdir -p "${OUTPUT_DIR}"
${DORADO_BIN} basecaller -x "cuda:0" --min-qscore "17" --no-trim --emit-fastq ${MODEL_PATH} ${INPUT_DIR} | \
${DORADO_BIN} demux --kit-name "SQK-NBD114-24" --emit-fastq --output-dir "${OUTPUT_DIR}"
echo "Processing complete for C:/Users/aleks/OneDrive/Bureau/CHU/Test1/pod51 with Q-score 17"
for fastq_file in "${OUTPUT_DIR}"/*.fastq; do
    bam_file="${fastq_file%.fastq}.bam"
    echo "Aligning ${fastq_file} to reference genome..."
    minimap2 -ax map-ont "C:/Users/aleks/OneDrive/Bureau/CHU/References/hg38.mmi" "$fastq_file" | samtools sort -o "$bam_file"
    samtools index "$bam_file"
    echo "Alignment and BAM conversion completed for ${bam_file}"
done
echo "All processes are complete."
```

Et en python via interface graphique :

```python
import tkinter as tk
from tkinter import filedialog, messagebox

def launch_config_ui():
    root = tk.Tk()
    root.title("Batch Configuration for Genomic Processing")

    configurations = []

    def add_configuration():
        base_output_dir = base_output_dir_entry.get()
        input_dir = input_dir_entry.get()
        ref_genome = ref_genome_entry.get()
        qs_scores = qs_score_entry.get()
        cuda_device = cuda_device_entry.get()
        kit_name = kit_name_entry.get()
        
        if not all([base_output_dir, input_dir, ref_genome, qs_scores, cuda_device, kit_name]):
            messagebox.showerror("Error", "Please fill all fields before adding a configuration.")
            return
        
        configurations.append({
            "base_output_dir": base_output_dir,
            "input_dir": input_dir,
            "ref_genome": ref_genome,
            "qs_scores": qs_scores,
            "cuda_device": cuda_device,
            "kit_name": kit_name
        })
        
        listbox.insert(tk.END, f"Input Dir: {input_dir}, Output Dir: {base_output_dir}, Q-Scores: {qs_scores}")
        base_output_dir_entry.delete(0, tk.END)
        input_dir_entry.delete(0, tk.END)
        ref_genome_entry.delete(0, tk.END)
        qs_score_entry.delete(0, tk.END)
        cuda_device_entry.delete(0, tk.END)
        kit_name_entry.delete(0, tk.END)
        messagebox.showinfo("Success", "Configuration added successfully.")

    def generate_and_run_script():
        script_path = "all_configurations_processing.sh"
        with open(script_path, "w") as script_file:
            script_file.write("#!/bin/bash\n\n")
            script_file.write("source ~/miniconda3/etc/profile.d/conda.sh\n")
            script_file.write("conda activate genomics\n\n")
            for config in configurations:
                qs_scores_list = config['qs_scores'].split()
                for qscore in qs_scores_list:
                    output_dir = f"${{BASE_OUTPUT_DIR}}/demultiplexed_q{qscore}"
                    script_file.write(f"BASE_OUTPUT_DIR=\"{config['base_output_dir']}\"\n")
                    script_file.write("mkdir -p \"${BASE_OUTPUT_DIR}\"\n")
                    script_file.write(f"""
DORADO_BIN="/home/grid/dorado-0.7.2-linux-x64/bin/dorado"
MODEL_PATH="/home/grid/dorado-0.7.2-linux-x64/bin/dna_r10.4.1_e8.2_400bps_hac@v5.0.0"
REF_GENOME="{config['ref_genome']}"
INPUT_DIR="{config['input_dir']}"
OUTPUT_DIR="{output_dir}"
mkdir -p "${{OUTPUT_DIR}}"
${{DORADO_BIN}} basecaller -x "{config['cuda_device']}" --min-qscore "{qscore}" --no-trim --emit-fastq ${{MODEL_PATH}} ${{INPUT_DIR}} | \\
${{DORADO_BIN}} demux --kit-name "{config['kit_name']}" --emit-fastq --output-dir "${{OUTPUT_DIR}}"
echo "Processing complete for {config['input_dir']} with Q-score {qscore}"
""")
                    # Alignment and conversion to BAM
                    script_file.write(f"for fastq_file in \"${{OUTPUT_DIR}}\"/*.fastq; do\n")
                    script_file.write(f"    bam_file=\"${{fastq_file%.fastq}}.bam\"\n")
                    script_file.write(f"    echo \"Aligning ${{fastq_file}} to reference genome...\"\n")
                    script_file.write(f"    minimap2 -ax map-ont \"{config['ref_genome']}\" \"$fastq_file\" | samtools sort -o \"$bam_file\"\n")
                    script_file.write(f"    samtools index \"$bam_file\"\n")
                    script_file.write(f"    echo \"Alignment and BAM conversion completed for ${{bam_file}}\"\n")
                    script_file.write("done\n")
            script_file.write("echo \"All processes are complete.\"\n")
        messagebox.showinfo("Done", f"All configurations have been written to {script_path}. Please run the script manually.")

    # GUI layout settings
    tk.Label(root, text="Set the base output directory BASE_OUTPUT_DIR:").pack()
    base_output_dir_entry = tk.Entry(root, width=50)
    base_output_dir_entry.pack(padx=20, pady=5)
    tk.Button(root, text="Browse", command=lambda: base_output_dir_entry.insert(0, filedialog.askdirectory())).pack()

    tk.Label(root, text="Select the folder for INPUT_DIR:").pack()
    input_dir_entry = tk.Entry(root, width=50)
    input_dir_entry.pack(padx=20, pady=5)
    tk.Button(root, text="Browse", command=lambda: input_dir_entry.insert(0, filedialog.askdirectory())).pack()

    tk.Label(root, text="Select the genome file REF_GENOME (.mmi):").pack()
    ref_genome_entry = tk.Entry(root, width=50)
    ref_genome_entry.pack(padx=20, pady=5)
    tk.Button(root, text="Browse", command=lambda: ref_genome_entry.insert(0, filedialog.askopenfilename(filetypes=[("FASTA files", "*.mmi")]))).pack()

    tk.Label(root, text="Enter Q-scores separated by spaces:").pack()
    qs_score_entry = tk.Entry(root, width=50)
    qs_score_entry.pack(padx=20, pady=5)

    tk.Label(root, text="Specify the CUDA device (e.g., cuda:0):").pack()
    cuda_device_entry = tk.Entry(root, width=50)
    cuda_device_entry.insert(0, "cuda:0")
    cuda_device_entry.pack(padx=20, pady=5)

    tk.Label(root, text="Enter the kit name (e.g., SQK-NBD114-24):").pack()
    kit_name_entry = tk.Entry(root, width=50)
    kit_name_entry.insert(0, "SQK-NBD114-24")
    kit_name_entry.pack(padx=20, pady=5)

    tk.Button(root, text="Add Configuration", command=add_configuration).pack(pady=10)

    listbox = tk.Listbox(root, height=6, width=50)
    listbox.pack(pady=10)

    tk.Button(root, text="Generate Script", command=generate_and_run_script).pack(pady=20)

    root.mainloop()

if __name__ == "__main__":
    launch_config_ui()
```


---

# Agent Instructions
This documentation is published with GitBook. GitBook is the documentation platform designed so that both humans and AI agents can read, navigate, and reason over technical content effectively. Learn more at gitbook.com.

## Querying This Documentation
If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter, and the optional `goal` query parameter:

```
GET https://aleksandre80.gitbook.io/stage/basecalling-avec-dorado/basecalling-dorado-+-demultiplexage.md?ask=<question>&goal=<endgoal>
```

`ask` is the immediate question: it should be specific, self-contained, and written in natural language.
`goal` is optional and describes the broader end goal you are ultimately trying to accomplish on behalf of the user. GitBook uses it to tailor the answer towards what is most useful for that goal.

The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
