#!/usr/bin/env bash set -euo pipefail if [[ $# -lt 1 || $# -gt 2 ]]; then echo "Usage: $0 [output.csv]" >&2 exit 1 fi samples_root=$1 output_csv=${2:-dataset_eval.csv} if [[ ! -d "$samples_root" ]]; then echo "Samples directory not found: $samples_root" >&2 exit 1 fi if [[ ! -x build/aucdtect ]]; then echo "CLI binary missing: build/aucdtect" >&2 echo "Build the project first with: cmake --build build" >&2 exit 1 fi if ! command -v ffmpeg >/dev/null 2>&1; then echo "ffmpeg is required for dataset evaluation" >&2 exit 1 fi tmp_root=$(mktemp -d) trap 'rm -rf "$tmp_root"' EXIT echo "label,source_file,file,ok,conclusion,accuracy,confidence,sample_rate,channels,bits_per_sample,sample_frames,cutoff_khz,rolloff_khz,lo_cut_hz,hi_cut_hz,lo_boundary_hz,hi_boundary_hz,probable_boundary_hz,phase_nonlinearity,first_order_smoothness,second_order_smoothness,high_band_ratio,very_high_band_ratio,spectral_flatness,analyzed_windows,informative_windows,suspect_windows,genuine_windows,suspect_ratio,error" > "$output_csv" while IFS= read -r -d '' source_file; do rel_path=${source_file#"$samples_root"/} label=${rel_path%%/*} if [[ "$label" == "$rel_path" ]]; then label=unlabeled fi ext=${source_file##*.} ext_lower=$(printf '%s' "$ext" | tr '[:upper:]' '[:lower:]') analysis_file=$source_file if [[ "$ext_lower" != "wav" ]]; then analysis_file="$tmp_root/$(basename "${source_file%.*}").wav" ffmpeg -loglevel error -y -i "$source_file" -ar 44100 -ac 2 -c:a pcm_s16le "$analysis_file" > "$output_csv" done < <(find "$samples_root" -type f \( -iname '*.wav' -o -iname '*.flac' -o -iname '*.mp3' -o -iname '*.aac' -o -iname '*.m4a' -o -iname '*.ape' -o -iname '*.wv' -o -iname '*.tta' -o -iname '*.tak' \) -print0 | sort -z) echo "Saved dataset report to $output_csv"