Source code for meganorm.utils.freesurfer

import os
import shutil
import glob
import sys
import subprocess
import argparse
import pickle
import re
import numpy as np
import pandas as pd
from pathlib import Path
import time
import json
from datetime import datetime



[docs]
def get_freesurfer_home(freesurfer_path: str | None) -> str:
    """
    Resolve FreeSurfer installation path:
    - If freesurfer_path is provided, use it.
    - Else read $FREESURFER_HOME from the environment.
    """
    if freesurfer_path:
        return freesurfer_path
    fs_home = os.environ.get("FREESURFER_HOME")
    if not fs_home:
        raise EnvironmentError(
            "FREESURFER_HOME is not set and no freesurfer_path was provided. "
            "Set FREESURFER_HOME or pass freesurfer_path explicitly."
        )
    return fs_home




[docs]
def find_bids_t1w_files(subjects_directory: str, subject_id: str):
    """
    Return a list of dicts for all T1w files for a BIDS subject.
    Handles no-session, multi-session, and multi-run cases.
    """
    bids_root = Path(subjects_directory)
    subj_dir = bids_root / subject_id
    if not subj_dir.is_dir():
        return []

    patterns = [
        str(subj_dir / "anat" / f"{subject_id}_*T1w.nii.gz"),
        str(subj_dir / "anat" / f"{subject_id}_*T1w.nii"),
        str(subj_dir / "anat" / f"{subject_id}_*T1w.mgz"),
        str(subj_dir / "ses-*" / "anat" / f"{subject_id}_ses-*_T1w.nii.gz"),
        str(subj_dir / "ses-*" / "anat" / f"{subject_id}_ses-*_T1w.nii"),
        str(subj_dir / "ses-*" / "anat" / f"{subject_id}_ses-*_T1w.mgz"),
    ]

    t1_files = []
    for pat in patterns:
        t1_files.extend(glob.glob(pat))

    t1_files = sorted(set(t1_files))

    results = []
    for t1 in t1_files:
        t1_path = Path(t1)

        ses_match = re.search(r"(ses-[^_/\\]+)", t1_path.name)
        run_match = re.search(r"(run-[^_/\\]+)", t1_path.name)

        session = ses_match.group(1) if ses_match else None
        run = run_match.group(1) if run_match else None

        job_parts = [subject_id]
        if session:
            job_parts.append(session)
        if run:
            job_parts.append(run)

        job_label = "_".join(job_parts)

        results.append(
            {
                "subject_id": subject_id,
                "session": session,
                "run": run,
                "t1_path": str(t1_path),
                "job_label": job_label,
            }
        )

    return results




[docs]
def prepare_mri_data(mri_directory):
    """This function is written to prepare the BTNRH MRI data for recon-all processing.

    Args:
        mri_directory (str): Directory to MRI data
    """
    subject_list = glob.glob(mri_directory + "/*.nii")
    subject_list = [os.path.basename(file).split(".")[0] for file in subject_list]

    for subject in subject_list:
        os.makedirs(os.path.join(mri_directory, subject, "anat"))
        shutil.move(
            os.path.join(mri_directory, subject + ".nii"),
            os.path.join(mri_directory, subject, "anat", subject + ".nii"),
        )




[docs]
def create_slurm_script(
    t1_path,
    job_label,
    results_dir,  # DEFAULT: <BIDS_ROOT>/derivatives/freesurfer
    processing_directory,
    freesurfer_path,
    nodes=1,
    ntasks=1,
    cpus_per_task=1,
    mem="16G",
    time="48:00:00",
    i_option=True,
):
    """
    Create a Slurm batch script for running recon-all with given parameters.
    BIDS-aware, and FreeSurfer path comes from freesurfer_path or $FREESURFER_HOME.
    """
    if not os.path.isfile(t1_path):
        raise FileNotFoundError(f"T1 path does not exist: {t1_path}")

    fs_home = get_freesurfer_home(freesurfer_path)

    # Paths & logs
    script_filename = f"{job_label}_recon_all_slurm.sh"
    log_path = os.path.join(processing_directory, "log")
    os.makedirs(log_path, exist_ok=True)
    os.makedirs(results_dir, exist_ok=True)

    recon_all_command = (
        f"recon-all -s ${{SUBJECT_ID}} -i ${{VOLUME}} -all"
        if i_option
        else f"recon-all -s ${{SUBJECT_ID}} -all -no-isrunning"
    )

    script_content = f"""#!/bin/bash
#SBATCH --job-name={job_label}
#SBATCH --nodes={nodes}
#SBATCH --ntasks={ntasks}
#SBATCH --cpus-per-task={cpus_per_task}
#SBATCH --mem={mem}
#SBATCH --time={time}
#SBATCH --output={log_path}/%x_%j.log
#SBATCH --error={log_path}/%x_%j.err

# FreeSurfer env (from resolved path or $FREESURFER_HOME)
export FREESURFER_HOME={fs_home}
source $FREESURFER_HOME/SetUpFreeSurfer.sh

# Output root (BIDS derivatives by default)
export SUBJECTS_DIR={results_dir}

# Input T1
VOLUME="{t1_path}"

# Keep BIDS subject id as-is (e.g., sub-01)
SUBJECT_ID="$1"

echo "Running recon-all for ${{SUBJECT_ID}} with input ${{VOLUME}}"
{recon_all_command}

echo "Done: ${{SUBJECT_ID}}"
"""

    os.makedirs(processing_directory, exist_ok=True)
    script_path = os.path.join(processing_directory, script_filename)
    with open(script_path, "w") as f:
        f.write(script_content)
    os.chmod(script_path, 0o755)
    return script_path




[docs]
def is_success(
    results_directory: str,
    subject_id: str,
    token: str = "finished without error",
    tail_lines_to_scan: int = 200,
    fresh_minutes: int = 30,
    stalled_hours: int = 24,
) -> bool:

    status, _ = classify_subject_status(
        results_directory,
        subject_id,
        success_token=token,
        tail_lines_to_scan=tail_lines_to_scan,
        fresh_minutes=fresh_minutes,
        stalled_hours=stalled_hours,
    )
    return status == "success"




[docs]
def run_parallel_reconall(
    subjects_directory,
    results_directory=None,
    processing_directory=".",
    freesurfer_path=None,
    file_postfix=".nii",
    skip_completed: bool = True,
    skip_running: bool = True,
    resubmit_statuses: tuple[str, ...] = ("failed", "missing", "stalled"),
    success_token: str = "finished without error",
    tail_lines_to_scan: int = 200,
    fresh_minutes: int = 30,
    stalled_hours: int = 24,
    selected_subjects: list[str] | str | None = None,
    selected_sessions: list[str] | str | None = None,
):
    """
    Submit recon-all for BIDS subjects, with optional filtering by specific
    subjects and sessions.

    Behavior
    --------
    - If selected_subjects=None and selected_sessions=None:
      behaves like the old run_parallel_reconall (one session per subject by default).
    - If selected_subjects is provided:
      only those subjects are considered.
    - If selected_sessions is provided:
      only matching sessions are considered.
    - If first_session_only=True and selected_sessions=None:
      only the first discovered T1/session is used per subject.
    - If session_specific_subject_ids=True:
      FreeSurfer subject IDs become session-specific (e.g. sub-01_ses-01).
      This is recommended when processing multiple sessions separately.

    Returns
    -------
    submitted : list[str]
        Submitted job labels.
    failed_job_submissions : list[str]
        Subjects or job labels that could not be submitted.
    """

    Path(processing_directory).mkdir(parents=True, exist_ok=True)

    bids_root = Path(subjects_directory)

    if results_directory is None:
        results_directory = str(bids_root / "derivatives" / "freesurfer")
    os.makedirs(results_directory, exist_ok=True)

    # Normalize selected_subjects
    if selected_subjects is None:
        subject_ids = sorted(d.name for d in bids_root.glob("sub-*") if d.is_dir())
    else:
        if isinstance(selected_subjects, str):
            selected_subjects = [selected_subjects]
        subject_ids = sorted(set(selected_subjects))

        available_subjects = {d.name for d in bids_root.glob("sub-*") if d.is_dir()}
        missing_subjects = [sid for sid in subject_ids if sid not in available_subjects]
        if missing_subjects:
            raise RuntimeError(
                f"These selected subjects were not found in {subjects_directory}: {missing_subjects}"
            )

    if not subject_ids:
        raise RuntimeError(f"No BIDS subjects found in {subjects_directory}")

    # Normalize selected_sessions
    if selected_sessions is not None:
        if isinstance(selected_sessions, str):
            selected_sessions = [selected_sessions]
        selected_sessions = set(selected_sessions)

    submitted: list[str] = []
    failed_job_submissions: list[str] = []

    submitted_records: list[dict] = []
    failed_records: list[dict] = []

    for subject_id in subject_ids:
        # Subject-level status check, preserves old behavior
        status, _info = classify_subject_status(
            results_directory,
            subject_id,
            success_token=success_token,
            tail_lines_to_scan=tail_lines_to_scan,
            fresh_minutes=fresh_minutes,
            stalled_hours=stalled_hours,
        )

        if skip_completed and status == "success" and selected_sessions is None:
            print(f"[SKIP] {subject_id} already finished without error.")
            continue

        if skip_running and status == "running" and selected_sessions is None:
            print(f"[SKIP] {subject_id} currently running.")
            continue

        if (
            status not in resubmit_statuses
            and status != "success"
            and selected_sessions is None
        ):
            print(f"[SKIP] {subject_id}: status '{status}' not in resubmit_statuses.")
            continue

        # Discover all T1w inputs for this subject
        t1_entries = find_bids_t1w_files(subjects_directory, subject_id)
        if not t1_entries:
            print(f"[WARN] No T1w for {subject_id}; skipping")
            failed_job_submissions.append(subject_id)
            failed_records.append(
                {
                    "subject_id": subject_id,
                    "reason": "no_T1w_found",
                    "attempt_time": datetime.now().isoformat(timespec="seconds"),
                }
            )
            continue

        # Filter by user-requested sessions if provided
        if selected_sessions is not None:
            t1_entries = [
                entry
                for entry in t1_entries
                if entry.get("session") in selected_sessions
            ]

            if not t1_entries:
                print(
                    f"[WARN] No matching T1w found for {subject_id} in requested sessions."
                )
                failed_job_submissions.append(subject_id)
                failed_records.append(
                    {
                        "subject_id": subject_id,
                        "reason": "no_matching_session_found",
                        "requested_sessions": sorted(selected_sessions),
                        "attempt_time": datetime.now().isoformat(timespec="seconds"),
                    }
                )
                continue

        # Run for the first found session.
        else:
            t1_entries = t1_entries[:1]

        for entry in t1_entries:
            # Decide the FreeSurfer subject ID used by recon-all
            if entry.get("run") is not None:
                fs_subject_id = entry["job_label"]
            elif selected_sessions is not None:
                fs_subject_id = entry["job_label"]
            else:
                fs_subject_id = subject_id

            # If using session-specific subject IDs, classify per output target
            entry_status, _entry_info = classify_subject_status(
                results_directory,
                fs_subject_id,
                success_token=success_token,
                tail_lines_to_scan=tail_lines_to_scan,
                fresh_minutes=fresh_minutes,
                stalled_hours=stalled_hours,
            )

            if skip_completed and entry_status == "success":
                print(f"[SKIP] {fs_subject_id} already finished without error.")
                continue

            if skip_running and entry_status == "running":
                print(f"[SKIP] {fs_subject_id} currently running.")
                continue

            if entry_status not in resubmit_statuses and entry_status != "success":
                print(
                    f"[SKIP] {fs_subject_id}: status '{entry_status}' not in resubmit_statuses."
                )
                continue

            subj_fs_dir = Path(results_directory) / fs_subject_id
            i_opt = not subj_fs_dir.exists()

            script_file_path = create_slurm_script(
                t1_path=entry["t1_path"],
                job_label=entry["job_label"],
                results_dir=results_directory,
                processing_directory=processing_directory,
                freesurfer_path=freesurfer_path,
                i_option=i_opt,
            )

            cmd = ["sbatch", script_file_path, fs_subject_id]
            print(f"Submitting job: {entry['job_label']} -> {' '.join(cmd)}")

            res = subprocess.run(cmd, check=False, capture_output=True, text=True)

            job_id = None
            m = re.search(r"Submitted batch job\s+(\d+)", (res.stdout or ""))
            if m:
                job_id = m.group(1)

            if res.returncode == 0:
                submitted.append(entry["job_label"])
                submitted_records.append(
                    {
                        "subject_id": subject_id,
                        "fs_subject_id": fs_subject_id,
                        "job_label": entry["job_label"],
                        "session": entry.get("session"),
                        "run": entry.get("run"),
                        "t1_path": entry["t1_path"],
                        "script_path": script_file_path,
                        "sbatch_cmd": " ".join(cmd),
                        "job_id": job_id,
                        "sbatch_returncode": res.returncode,
                        "sbatch_stdout": (res.stdout or "").strip(),
                        "sbatch_stderr": (res.stderr or "").strip(),
                        "submit_time": datetime.now().isoformat(timespec="seconds"),
                        "pre_status": entry_status,
                    }
                )
            else:
                failed_job_submissions.append(entry["job_label"])
                failed_records.append(
                    {
                        "subject_id": subject_id,
                        "fs_subject_id": fs_subject_id,
                        "job_label": entry["job_label"],
                        "session": entry.get("session"),
                        "run": entry.get("run"),
                        "t1_path": entry["t1_path"],
                        "script_path": script_file_path,
                        "sbatch_cmd": " ".join(cmd),
                        "job_id": job_id,
                        "sbatch_returncode": res.returncode,
                        "sbatch_stdout": (res.stdout or "").strip(),
                        "sbatch_stderr": (res.stderr or "").strip(),
                        "reason": "sbatch_submission_failed",
                        "attempt_time": datetime.now().isoformat(timespec="seconds"),
                        "pre_status": entry_status,
                    }
                )

    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    proc_dir = Path(processing_directory)

    with open(proc_dir / f"submitted_jobs_{ts}.json", "w") as f:
        json.dump(submitted_records, f, indent=2)

    with open(proc_dir / f"failed_jobs_{ts}.json", "w") as f:
        json.dump(failed_records, f, indent=2)

    return submitted, failed_job_submissions




[docs]
def log_tail_lines(path: Path, n: int = 200) -> list[str]:
    """Return the last n lines of a log file efficiently."""
    if not path.exists():
        return []
    with path.open("rb") as f:
        try:
            f.seek(0, os.SEEK_END)
            end = f.tell()
            block = 4096
            data = b""
            while end > 0 and data.count(b"\n") <= n:
                start = max(0, end - block)
                f.seek(start)
                data = f.read(end - start) + data
                end = start
        except OSError:
            f.seek(0)
            data = f.read()
    lines = data.splitlines()[-n:]
    return [l.decode(errors="replace") for l in lines]




[docs]
def discover_subjects(results_directory: str, exclude_subjects: set[str]) -> list[str]:
    root = Path(results_directory)
    if not root.is_dir():
        return []

    def keep(name: str) -> bool:
        if name.startswith("."):
            return False
        if name in exclude_subjects:
            return False
        if name.startswith("fsaverage"):
            return False
        return True

    with_logs = [
        p.name
        for p in root.iterdir()
        if p.is_dir() and keep(p.name) and (p / "scripts" / "recon-all.log").exists()
    ]
    return (
        sorted(with_logs)
        if with_logs
        else sorted([p.name for p in root.iterdir() if p.is_dir() and keep(p.name)])
    )




[docs]
def classify_subject_status(
    results_directory: str,
    subject_id: str,
    *,
    success_token: str = "finished without error",
    tail_lines_to_scan: int = 200,
    fresh_minutes: int = 30,
    stalled_hours: int = 24,
) -> tuple[str, dict]:
    """
    Classify a single subject by inspecting recon-all logs and IsRunning* locks.

    Returns:
        (status, info_dict)
        status ∈ {"success","running","stalled","failed","missing"}
    """
    scripts_dir = Path(results_directory) / subject_id / "scripts"
    log_path = scripts_dir / "recon-all.log"

    # Gather candidate IsRunning* lock files (ignore backups)
    isrunning_files = []
    for p in scripts_dir.glob("IsRunning*"):
        name = p.name
        if name.endswith((".bak", ".old", "~")):
            continue
        if p.exists():
            isrunning_files.append(p)

    # Running/stalled logic based on mtimes
    now = time.time()
    fresh_s = fresh_minutes * 60
    stalled_s = stalled_hours * 3600

    log_mtime = log_path.stat().st_mtime if log_path.exists() else 0.0
    ir_mtimes = [p.stat().st_mtime for p in isrunning_files] if isrunning_files else []
    newest_ir = max(ir_mtimes) if ir_mtimes else 0.0

    recently_touched = (log_path.exists() and now - log_mtime <= fresh_s) or (
        ir_mtimes and now - newest_ir <= fresh_s
    )

    # Build info dict (filled incrementally)
    info: dict[str, object] = {
        "subject_id": subject_id,
        "log_path": str(log_path),
        "has_log": log_path.exists(),
        "is_running_files": [p.name for p in isrunning_files],
        "last_mod_time": log_mtime if log_path.exists() else None,
        "tail_excerpt": [],
        "error_hints": [],
    }

    # No log at all
    if not log_path.exists():
        status = "missing"
        info["status"] = status
        return status, info

    # Read tail and check for success
    tail = log_tail_lines(log_path, n=tail_lines_to_scan)
    info["tail_excerpt"] = tail

    if any(success_token in line for line in tail):
        status = "success"
        info["status"] = status
        return status, info

    # Not successful yet → decide running/stalled/failed
    if recently_touched:
        status = "running"
    elif isrunning_files and (
        ((not log_path.exists()) or (now - log_mtime > stalled_s))
        and (now - newest_ir > stalled_s)
    ):
        status = "stalled"
    else:
        # Try to extract error hints from tail
        error_patterns = [
            re.compile(p, re.IGNORECASE)
            for p in [
                r"\bERROR\b",
                r"Segmentation (fault|violation)",
                r"\bKilled\b",
                r"out of memory",
                r"No space left on device",
                r"Bus error",
                r"floating point exception",
                r"Abort",
                r"assertion.*failed",
            ]
        ]
        hints: list[str] = []
        for line in tail:
            if any(p.search(line) for p in error_patterns):
                hints.append(line.strip())
        info["error_hints"] = hints[-10:]
        status = "failed"

    info["status"] = status
    return status, info




[docs]
def check_log_for_success(
    results_directory: str,
    subject_ids: list[str] | None = None,
    *,
    processing_directory: str | None = None,  # where to save failed manifests
    write_manifests: bool = True,  # save JSON outputs
    success_token: str = "finished without error",
    consider_running_as_failure: bool = False,
    tail_lines_to_scan: int = 200,
    fresh_minutes: int = 30,
    stalled_hours: int = 24,
    return_details: bool = True,  # return only FAILED/MISSING/STALLED by default
):
    """
    Scan SUBJECTS_DIR for recon-all outcomes, print a summary, and
    (optionally) write failed/stalled/missing manifests to processing_directory.

    Returns:
        dict[str, dict] of FAILED/MISSING/STALLED subjects (default),
        or list[str] of subject IDs if return_details=False.
    """
    # Subject discovery (skip FS templates)
    default_exclude = {
        "fsaverage",
        "fsaverage_sym",
        "fsaverage5",
        "fsaverage6",
        "lh.EC_average",
        "rh.EC_average",
        "bert",
    }
    subjects = (
        subject_ids
        if subject_ids
        else discover_subjects(results_directory, default_exclude)
    )

    counts = {"success": 0, "running": 0, "stalled": 0, "failed": 0, "missing": 0}
    failed_ids: list[str] = []
    failed_details: dict[str, dict] = {}

    for sid in subjects:
        status, info = classify_subject_status(
            results_directory,
            sid,
            success_token=success_token,
            tail_lines_to_scan=tail_lines_to_scan,
            fresh_minutes=fresh_minutes,
            stalled_hours=stalled_hours,
        )

        counts[status] += 1

        if status in ("failed", "missing", "stalled") or (
            status == "running" and consider_running_as_failure
        ):
            failed_ids.append(sid)
            failed_details[sid] = info

    # Summary
    checked = sum(counts.values())
    print(
        f"[check_log_for_success] Checked {checked} subjects "
        f"(success={counts['success']}, running={counts['running']}, "
        f"stalled={counts['stalled']}, failed={counts['failed']}, missing={counts['missing']})."
    )

    # Persist manifests (FAILED/MISSING/STALLED only)
    if write_manifests and processing_directory:
        Path(processing_directory).mkdir(parents=True, exist_ok=True)
        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
        ts_json = Path(processing_directory) / f"failed_jobs_{ts}.json"

        with open(ts_json, "w") as f:
            json.dump(failed_details, f, indent=2, default=str)

    return failed_details if return_details else failed_ids




[docs]
def retrieve_freesurfer_eulernum(
    freesurfer_dir, subjects=None, save_path=None, verbose=False
):
    """
    This function receives the freesurfer directory (including processed data
    for several subjects) and retrieves the Euler number from the log files. If
    the log file does not exist, this function uses 'mris_euler_number' to recompute
    the Euler numbers (ENs). The function returns the ENs in a dataframe and
    the list of missing subjects (that for which computing EN is failed). If
    'save_path' is specified then the results will be saved in a pickle file.

    Basic usage::

        ENs, missing_subjects = retrieve_freesurfer_eulernum(freesurfer_dir)

    where the arguments are defined below.

    :param freesurfer_dir: absolute path to the Freesurfer directory.
    :param subjects: List of subject that we want to retrieve the ENs for.
     If it is 'None' (the default), the list of the subjects will be automatically
     retreived from existing directories in the 'freesurfer_dir' (i.e. the ENs
     for all subjects will be retrieved).
    :param save_path: The path to save the results. If 'None' (default) the
     results are not saves on the disk.


    :outputs: * ENs - A dataframe of retrieved ENs.
              * missing_subjects - The list of missing subjects.

    Developed by S.M. Kia

    """

    if subjects is None:
        subjects = [
            s
            for s in os.listdir(freesurfer_dir)
            if os.path.isdir(os.path.join(freesurfer_dir, s))
        ]

    df = pd.DataFrame(index=subjects, columns=["lh_en", "rh_en", "avg_en"])
    missing_subjects = []

    for s, sub in enumerate(subjects):
        sub_dir = os.path.join(freesurfer_dir, sub)
        log_file = os.path.join(sub_dir, "scripts", "recon-all.log")

        if not os.path.isdir(sub_dir):
            missing_subjects.append(sub)
            if verbose:
                print(f"{s}: Subject {sub} is missing.")
            continue

        if os.path.exists(log_file):
            eno_line = None
            with open(log_file) as f:
                for line in f:
                    if "orig.nofix lheno" in line:
                        eno_line = line
            if eno_line:
                parts = eno_line.split()
                try:
                    eno_l, eno_r = float(parts[3].rstrip(",")), float(parts[6])
                    df.at[sub, "lh_en"] = eno_l
                    df.at[sub, "rh_en"] = eno_r
                    df.at[sub, "avg_en"] = (eno_l + eno_r) / 2.0
                    if verbose:
                        print(f"{s}: Subject {sub} EN = {df.at[sub,'avg_en']:.3f}")
                    continue
                except Exception:
                    pass  # fall through to recompute
            if verbose:
                print(f"{s}: Subject {sub} missing EN line, recomputing ...")

        try:
            # recompute with mris_euler_number
            lh_cmd = [
                "mris_euler_number",
                os.path.join(sub_dir, "surf", "lh.orig.nofix"),
            ]
            rh_cmd = [
                "mris_euler_number",
                os.path.join(sub_dir, "surf", "rh.orig.nofix"),
            ]
            lh_out = subprocess.run(
                lh_cmd, capture_output=True, text=True, check=True
            ).stdout.split()
            rh_out = subprocess.run(
                rh_cmd, capture_output=True, text=True, check=True
            ).stdout.split()
            # typically the value is near the end; be defensive
            eno_l = float([t for t in lh_out if re.fullmatch(r"-?\d+(\.\d+)?", t)][-1])
            eno_r = float([t for t in rh_out if re.fullmatch(r"-?\d+(\.\d+)?", t)][-1])
            df.at[sub, "lh_en"] = eno_l
            df.at[sub, "rh_en"] = eno_r
            df.at[sub, "avg_en"] = (eno_l + eno_r) / 2.0
            if verbose:
                print(f"{s}: Subject {sub} EN = {df.at[sub,'avg_en']:.3f}")
        except Exception as e:
            missing_subjects.append(sub)
            if verbose:
                print(f"{s}: QC failed for {sub}: {e}")

    df = df.dropna()
    if save_path is not None:
        with open(save_path, "wb") as f:
            pickle.dump({"ENs": df}, f)

    return df, missing_subjects




[docs]
def freesurfer_QC(results_directory, method="MAD", threshold=3.0, verbose=False):
    """
    Perform Euler-number–based quality control (QC) on FreeSurfer outputs.

    This function retrieves Euler numbers for all subjects in a FreeSurfer
    results directory and performs outlier detection based on the distribution
    of Euler numbers across subjects. The worst hemisphere (i.e., most negative
    Euler number) is used per subject as the QC metric.

    Two QC strategies are supported:

    1. MAD-based robust z-score (default, recommended)
       Computes a robust z-score using the median and median absolute deviation
       (MAD). Subjects with |z| > threshold are flagged as QC failures.
       This approach is robust and recommended for large multi-site datasets.

    2. Absolute deviation from median
       Computes absolute deviation from the median Euler number and excludes
       subjects whose deviation exceeds the specified threshold (in Euler units).

    Parameters
    ----------
    results_directory : str
        Path to the FreeSurfer results directory containing subject folders.

    method : {'MAD', 'ABS'}, optional
        QC method to use:
        - 'MAD' : Median absolute deviation–based robust z-score (default).
        - 'ABS' : Absolute deviation from median Euler number.
        The default is 'MAD'.

    threshold : float, optional
        Threshold for exclusion.
        - If method='MAD': threshold is the robust z-score cutoff (default=3).
        - If method='ABS': threshold is in Euler-number units.
        The default is 3.0.

    verbose : bool, optional
        If True, prints progress and diagnostic information during execution.
        The default is False.

    Returns
    -------
    qc_passed_samples : list of str
        List of subject IDs that passed QC.

    qc_failed_samples : list of str
        List of subject IDs that failed QC.

    missing_samples : list of str
        List of subjects for which Euler numbers could not be retrieved or
        computed.

    Notes
    -----
    - Euler numbers are extracted from FreeSurfer outputs using
      ``retrieve_freesurfer_eulernum``.
    - The worst hemisphere (most negative Euler number) is used per subject,
      as more negative values typically indicate poorer surface reconstruction.
    - For MAD-based QC, the robust z-score is computed as:

          z = (x - median) / (1.4826 * MAD)

      where MAD is the median absolute deviation.
    - If MAD is zero (rare but possible), an IQR-based fallback is used to
      estimate scale.
    - This function assumes all subjects in ``results_directory`` belong to a
      single site. For multi-site studies, QC should ideally be performed
      separately per site.

    Examples
    --------
    >>> passed, failed, missing = freesurfer_QC("/path/to/freesurfer_dir")

    >>> passed, failed, missing = freesurfer_QC(
    ...     "/path/to/freesurfer_dir",
    ...     method="MAD",
    ...     threshold=2.5,
    ...     verbose=True
    ... )
    """

    euler_numbers, missing_samples = retrieve_freesurfer_eulernum(
        results_directory, verbose=verbose
    )

    if euler_numbers is None or len(euler_numbers) == 0:
        return [], [], missing_samples

    # Ensure numeric and remove incomplete rows
    df = euler_numbers.copy()
    df[["lh_en", "rh_en"]] = df[["lh_en", "rh_en"]].apply(
        pd.to_numeric, errors="coerce"
    )
    df = df.dropna(subset=["lh_en", "rh_en"])

    if df.empty:
        return [], [], list(set(missing_samples) | set(euler_numbers.index))

    # Worst hemisphere per subject (most negative Euler)
    e = df[["lh_en", "rh_en"]].to_numpy(float)
    per_subj = np.min(e, axis=1)

    method_u = method.upper()

    if method_u == "MAD":
        med = np.median(per_subj)
        mad = np.median(np.abs(per_subj - med))

        # Handle zero MAD robustly
        if mad == 0:
            q25, q75 = np.percentile(per_subj, [25, 75])
            iqr = q75 - q25
            scale = iqr / 1.349 if iqr > 0 else 1.0
        else:
            scale = 1.4826 * mad

        qc_measure = (per_subj - med) / scale
        failed_mask = qc_measure < -float(threshold)

    else:
        # Absolute deviation from median (threshold in Euler units)
        qc_measure = np.abs(per_subj - np.median(per_subj))
        failed_mask = qc_measure > float(threshold)

    qc_failed_samples = list(df.index[failed_mask])
    qc_passed_samples = list(df.index[~failed_mask])

    # Subjects dropped due to missing hemisphere values
    dropped = set(euler_numbers.index) - set(df.index)
    missing_samples = list(set(missing_samples) | dropped)

    return qc_passed_samples, qc_failed_samples, missing_samples



if __name__ == "__main__":

    parser = argparse.ArgumentParser(
        description="Run FreeSurfer recon-all in parallel on a Slurm cluster."
    )
    parser.add_argument("subjects_directory", type=str, help="BIDS root with sub-*")
    parser.add_argument(
        "processing_directory", type=str, help="Directory to write SLURM scripts/logs"
    )
    parser.add_argument(
        "--results-directory",
        type=str,
        default=None,
        help="SUBJECTS_DIR (default: <BIDS>/derivatives/freesurfer)",
    )
    parser.add_argument(
        "--freesurfer-path",
        type=str,
        default=None,
        help="Path to FreeSurfer install (default: $FREESURFER_HOME)",
    )
    parser.add_argument(
        "--no-skip-completed",
        action="store_true",
        help="Do not skip already successful subjects",
    )
    args = parser.parse_args()

    submitted, failed = run_parallel_reconall(
        subjects_directory=args.subjects_directory,
        results_directory=args.results_directory,
        processing_directory=args.processing_directory,
        freesurfer_path=args.freesurfer_path,
        skip_completed=not args.no_skip_completed,
    )
    print(f"Submitted jobs: {len(submitted)}; missing T1w: {len(failed)}")