fcb_looper/collect.sh
2025-07-16 16:02:59 +02:00

290 lines
7.6 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
declare -A FILTER_SETS=(
["rust"]="-f .*\\.(rs|toml)$"
["doc"]="-f .*\\.md$"
["core"]="-s rust . -s doc ."
)
OUTPUT="./collect.txt"
VERBOSE=0
usage() {
echo "Usage: $0 [OPTIONS] [DIRS...]"
echo
echo "Options:"
echo " -f, --filter PATTERN Regex filter for subsequent directories"
echo " -s, --set SETNAME Use predefined set of parameters"
echo " -o, --output FILE Output file (default: ./collect.txt, use '-' for stdout)"
echo " -v, -vv Verbose output"
echo
echo "Predefined Filter Sets:"
for setname in $(echo "${!FILTER_SETS[@]}" | tr ' ' '\n' | sort); do
printf " %-15s %s\n" "$setname:" "${FILTER_SETS[$setname]}"
done
exit 1
}
# Expand any set arguments to their full form
expand_args() {
local -a expanded=()
local i=1
local has_sets=0
# First pass: expand sets
while [ $i -le $# ]; do
local arg="${!i}"
case "$arg" in
-s|--set)
has_sets=1
i=$((i+1))
[ $i -le $# ] || { echo "Error: No set name specified"; usage; }
local set_name="${!i}"
if [[ -v "FILTER_SETS[$set_name]" ]]; then
# Add the expanded set parameters
local -a set_args
read -ra set_args <<< "${FILTER_SETS[$set_name]}"
for set_arg in "${set_args[@]}"; do
expanded+=("$set_arg")
done
else
echo "Error: Unknown set '$set_name'"
usage
fi
;;
*)
# Preserve other arguments
expanded+=("$arg")
;;
esac
i=$((i+1))
done
# If we expanded sets, recursively expand again until no more sets
if [ $has_sets -eq 1 ]; then
expand_args "${expanded[@]}"
else
# No more sets, return the fully expanded arguments
echo "${expanded[@]+"${expanded[@]}"}"
fi
}
# Process the final expanded command line
process_command() {
local filter=""
declare -A dir_filters_map # Maps directory to array of filters
if ((VERBOSE >= 1)); then
echo "Expanded arguments:"
for arg in "$@"; do
echo " $arg"
done
fi
# Process final arguments
local i=1
while [ $i -le $# ]; do
local arg="${!i}"
case "$arg" in
-f|--filter)
i=$((i+1))
[ $i -le $# ] || break
filter="${!i}"
;;
-o|--output)
i=$((i+1))
[ $i -le $# ] || break
OUTPUT="${!i}"
;;
-v)
((VERBOSE++))
;;
-vv)
VERBOSE=2
;;
-s|--set)
echo "Warning: Set argument found after expansion"
i=$((i+1))
;;
-*)
# Skip other options
;;
*)
# It's a directory
if [ -e "$arg" ]; then
# Add filter to this directory's filter list
if [ -n "$filter" ]; then
if [ -n "${dir_filters_map[$arg]:-}" ]; then
dir_filters_map["$arg"]="${dir_filters_map["$arg"]}|$filter"
else
dir_filters_map["$arg"]="$filter"
fi
if ((VERBOSE >= 1)); then
echo "Adding filter: $filter to directory: $arg"
fi
else
# If no filter specified, ensure directory is in the map
[ -z "${dir_filters_map[$arg]:-}" ] && dir_filters_map["$arg"]=""
if ((VERBOSE >= 1)); then
echo "Adding directory with no filter: $arg"
fi
fi
else
echo "Warning: Directory '$arg' not found"
fi
;;
esac
i=$((i+1))
done
# Handle special case for stdout
local output_file="$OUTPUT"
if [ "$OUTPUT" = "-" ]; then
output_file="/dev/stdout"
fi
# Convert output path and check conflicts
output_abs_path=$(realpath -m "$output_file")
output_rel_path=$(realpath --relative-to=. "$output_abs_path")
# Process directories and collect files
declare -a all_files=()
# Cache git-ignored files once
declare -a git_ignored_files=()
if git rev-parse --is-inside-work-tree &>/dev/null; then
while IFS= read -r ignored_file; do
git_ignored_files+=("$ignored_file")
done < <(git ls-files --others --ignored --exclude-standard 2>/dev/null)
if ((VERBOSE >= 1)); then
echo "Cached ${#git_ignored_files[@]} git-ignored files"
fi
fi
for dir in "${!dir_filters_map[@]}"; do
local filters="${dir_filters_map[$dir]}"
if ((VERBOSE >= 1)); then
echo "Processing directory: $dir with filters: ${filters:-none}"
fi
# Get all files in the directory first
declare -a dir_files=()
while IFS= read -r -d $'\0' file; do
local rel_path="${file#./}"
[[ -z "$rel_path" || "$rel_path" == "." ]] && continue
# Skip output file
[[ "$rel_path" == "$output_rel_path" ]] && continue
# Check if file is git-ignored using cached list
local is_ignored=0
for ignored in "${git_ignored_files[@]}"; do
if [[ "$rel_path" == "$ignored" ]]; then
is_ignored=1
break
fi
done
# Skip git-ignored files
[[ $is_ignored -eq 1 ]] && continue
dir_files+=("$rel_path")
done < <(find "$dir" \( -path '*/.git' -prune \) -o \( -path "./$output_rel_path" -prune \) -o -type f -print0 2>/dev/null)
if ((VERBOSE >= 1)); then
echo " Found ${#dir_files[@]} files in directory"
fi
# If filters are specified, apply them to the full file list
if [[ -n "$filters" ]]; then
# Create temp file with all paths
local temp_file=$(mktemp)
printf "%s\n" "${dir_files[@]}" > "$temp_file"
# Apply filters to get matching files
declare -a matching_files=()
while IFS= read -r matched; do
[[ -n "$matched" ]] && matching_files+=("$matched")
(( VERBOSE >= 2 )) && echo " Included: $matched"
done < <(grep -E "($filters)" "$temp_file")
# Add matching files to all_files
all_files+=("${matching_files[@]}")
# Clean up
rm -f "$temp_file"
if ((VERBOSE >= 1)); then
echo " Matched ${#matching_files[@]} files after filtering"
fi
else
# No filter, add all files
all_files+=("${dir_files[@]}")
if ((VERBOSE >= 2)); then
for file in "${dir_files[@]}"; do
echo " Included: $file"
done
fi
fi
done
# Generate unique file list
if [ ${#all_files[@]} -gt 0 ]; then
readarray -t unique_files < <(printf "%s\n" "${all_files[@]}" | sort -u)
# Append file contents to output
{
echo -e "File Contents:"
for file in "${unique_files[@]}"; do
if [[ -L "$file" ]]; then
target=$(readlink -f "$file" || echo "unknown")
echo -e "\n===== SYMLINK: $file$target ====="
else
echo -e "\n===== FILE: $file ====="
cat "$file" 2>/dev/null || echo "Error: Unable to read file"
fi
done
} > "$output_file"
fi
}
main() {
# No arguments? Show usage
[ $# -eq 0 ] && usage
# First scan for verbose flags
for arg in "$@"; do
if [[ "$arg" == "-v" ]]; then
((VERBOSE++))
elif [[ "$arg" == "-vv" ]]; then
VERBOSE=2
fi
done
# Expand all set arguments recursively
local -a expanded_args
read -ra expanded_args <<< "$(expand_args "$@")"
# Process the completely expanded arguments
process_command "${expanded_args[@]}"
# Use original OUTPUT variable for the message
if [ "$OUTPUT" = "-" ]; then
echo "Concatenation complete. Output written to stdout" >&2
else
echo "Concatenation complete. Output written to $OUTPUT" >&2
fi
}
main "$@"