Files
auCDtect_linux/tools/eval_dataset.sh

54 lines
1.9 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
if [[ $# -lt 1 || $# -gt 2 ]]; then
echo "Usage: $0 <samples-root> [output.csv]" >&2
exit 1
fi
samples_root=$1
output_csv=${2:-dataset_eval.csv}
if [[ ! -d "$samples_root" ]]; then
echo "Samples directory not found: $samples_root" >&2
exit 1
fi
if [[ ! -x build/aucdtect ]]; then
echo "CLI binary missing: build/aucdtect" >&2
echo "Build the project first with: cmake --build build" >&2
exit 1
fi
if ! command -v ffmpeg >/dev/null 2>&1; then
echo "ffmpeg is required for dataset evaluation" >&2
exit 1
fi
tmp_root=$(mktemp -d)
trap 'rm -rf "$tmp_root"' EXIT
echo "label,source_file,file,ok,conclusion,accuracy,confidence,sample_rate,channels,bits_per_sample,sample_frames,cutoff_khz,rolloff_khz,lo_cut_hz,hi_cut_hz,lo_boundary_hz,hi_boundary_hz,probable_boundary_hz,phase_nonlinearity,first_order_smoothness,second_order_smoothness,high_band_ratio,very_high_band_ratio,spectral_flatness,analyzed_windows,informative_windows,suspect_windows,genuine_windows,suspect_ratio,error" > "$output_csv"
while IFS= read -r -d '' source_file; do
rel_path=${source_file#"$samples_root"/}
label=${rel_path%%/*}
if [[ "$label" == "$rel_path" ]]; then
label=unlabeled
fi
ext=${source_file##*.}
ext_lower=$(printf '%s' "$ext" | tr '[:upper:]' '[:lower:]')
analysis_file=$source_file
if [[ "$ext_lower" != "wav" ]]; then
analysis_file="$tmp_root/$(basename "${source_file%.*}").wav"
ffmpeg -loglevel error -y -i "$source_file" -ar 44100 -ac 2 -c:a pcm_s16le "$analysis_file" </dev/null
fi
row=$(build/aucdtect --dump-features "$analysis_file" </dev/null | tail -n +2)
printf '"%s","%s",%s\n' "$label" "$source_file" "$row" >> "$output_csv"
done < <(find "$samples_root" -type f \( -iname '*.wav' -o -iname '*.flac' -o -iname '*.mp3' -o -iname '*.aac' -o -iname '*.m4a' -o -iname '*.ape' -o -iname '*.wv' -o -iname '*.tta' -o -iname '*.tak' \) -print0 | sort -z)
echo "Saved dataset report to $output_csv"