#!/usr/bin/env bash set -euo pipefail declare -A FILTER_SETS=( ["rust"]="-f .*\\.(rs|toml)$" ["doc"]="-f .*\\.md$" ["core"]="-s rust . -s doc ." ) OUTPUT="./collect.txt" VERBOSE=0 usage() { echo "Usage: $0 [OPTIONS] [DIRS...]" echo echo "Options:" echo " -f, --filter PATTERN Regex filter for subsequent directories" echo " -s, --set SETNAME Use predefined set of parameters" echo " -o, --output FILE Output file (default: ./collect.txt, use '-' for stdout)" echo " -v, -vv Verbose output" echo echo "Predefined Filter Sets:" for setname in $(echo "${!FILTER_SETS[@]}" | tr ' ' '\n' | sort); do printf " %-15s %s\n" "$setname:" "${FILTER_SETS[$setname]}" done exit 1 } # Expand any set arguments to their full form expand_args() { local -a expanded=() local i=1 local has_sets=0 # First pass: expand sets while [ $i -le $# ]; do local arg="${!i}" case "$arg" in -s|--set) has_sets=1 i=$((i+1)) [ $i -le $# ] || { echo "Error: No set name specified"; usage; } local set_name="${!i}" if [[ -v "FILTER_SETS[$set_name]" ]]; then # Add the expanded set parameters local -a set_args read -ra set_args <<< "${FILTER_SETS[$set_name]}" for set_arg in "${set_args[@]}"; do expanded+=("$set_arg") done else echo "Error: Unknown set '$set_name'" usage fi ;; *) # Preserve other arguments expanded+=("$arg") ;; esac i=$((i+1)) done # If we expanded sets, recursively expand again until no more sets if [ $has_sets -eq 1 ]; then expand_args "${expanded[@]}" else # No more sets, return the fully expanded arguments echo "${expanded[@]+"${expanded[@]}"}" fi } # Process the final expanded command line process_command() { local filter="" declare -A dir_filters_map # Maps directory to array of filters if ((VERBOSE >= 1)); then echo "Expanded arguments:" for arg in "$@"; do echo " $arg" done fi # Process final arguments local i=1 while [ $i -le $# ]; do local arg="${!i}" case "$arg" in -f|--filter) i=$((i+1)) [ $i -le $# ] || break filter="${!i}" ;; -o|--output) i=$((i+1)) [ $i -le $# ] || break OUTPUT="${!i}" ;; -v) ((VERBOSE++)) ;; -vv) VERBOSE=2 ;; -s|--set) echo "Warning: Set argument found after expansion" i=$((i+1)) ;; -*) # Skip other options ;; *) # It's a directory if [ -e "$arg" ]; then # Add filter to this directory's filter list if [ -n "$filter" ]; then if [ -n "${dir_filters_map[$arg]:-}" ]; then dir_filters_map["$arg"]="${dir_filters_map["$arg"]}|$filter" else dir_filters_map["$arg"]="$filter" fi if ((VERBOSE >= 1)); then echo "Adding filter: $filter to directory: $arg" fi else # If no filter specified, ensure directory is in the map [ -z "${dir_filters_map[$arg]:-}" ] && dir_filters_map["$arg"]="" if ((VERBOSE >= 1)); then echo "Adding directory with no filter: $arg" fi fi else echo "Warning: Directory '$arg' not found" fi ;; esac i=$((i+1)) done # Handle special case for stdout local output_file="$OUTPUT" if [ "$OUTPUT" = "-" ]; then output_file="/dev/stdout" fi # Convert output path and check conflicts output_abs_path=$(realpath -m "$output_file") output_rel_path=$(realpath --relative-to=. "$output_abs_path") # Process directories and collect files declare -a all_files=() # Cache git-ignored files once declare -a git_ignored_files=() if git rev-parse --is-inside-work-tree &>/dev/null; then while IFS= read -r ignored_file; do git_ignored_files+=("$ignored_file") done < <(git ls-files --others --ignored --exclude-standard 2>/dev/null) if ((VERBOSE >= 1)); then echo "Cached ${#git_ignored_files[@]} git-ignored files" fi fi for dir in "${!dir_filters_map[@]}"; do local filters="${dir_filters_map[$dir]}" if ((VERBOSE >= 1)); then echo "Processing directory: $dir with filters: ${filters:-none}" fi # Get all files in the directory first declare -a dir_files=() while IFS= read -r -d $'\0' file; do local rel_path="${file#./}" [[ -z "$rel_path" || "$rel_path" == "." ]] && continue # Skip output file [[ "$rel_path" == "$output_rel_path" ]] && continue # Check if file is git-ignored using cached list local is_ignored=0 for ignored in "${git_ignored_files[@]}"; do if [[ "$rel_path" == "$ignored" ]]; then is_ignored=1 break fi done # Skip git-ignored files [[ $is_ignored -eq 1 ]] && continue dir_files+=("$rel_path") done < <(find "$dir" \( -path '*/.git' -prune \) -o \( -path "./$output_rel_path" -prune \) -o -type f -print0 2>/dev/null) if ((VERBOSE >= 1)); then echo " Found ${#dir_files[@]} files in directory" fi # If filters are specified, apply them to the full file list if [[ -n "$filters" ]]; then # Create temp file with all paths local temp_file=$(mktemp) printf "%s\n" "${dir_files[@]}" > "$temp_file" # Apply filters to get matching files declare -a matching_files=() while IFS= read -r matched; do [[ -n "$matched" ]] && matching_files+=("$matched") (( VERBOSE >= 2 )) && echo " Included: $matched" done < <(grep -E "($filters)" "$temp_file") # Add matching files to all_files all_files+=("${matching_files[@]}") # Clean up rm -f "$temp_file" if ((VERBOSE >= 1)); then echo " Matched ${#matching_files[@]} files after filtering" fi else # No filter, add all files all_files+=("${dir_files[@]}") if ((VERBOSE >= 2)); then for file in "${dir_files[@]}"; do echo " Included: $file" done fi fi done # Generate unique file list if [ ${#all_files[@]} -gt 0 ]; then readarray -t unique_files < <(printf "%s\n" "${all_files[@]}" | sort -u) # Append file contents to output { echo -e "File Contents:" for file in "${unique_files[@]}"; do if [[ -L "$file" ]]; then target=$(readlink -f "$file" || echo "unknown") echo -e "\n===== SYMLINK: $file → $target =====" else echo -e "\n===== FILE: $file =====" cat "$file" 2>/dev/null || echo "Error: Unable to read file" fi done } > "$output_file" fi } main() { # No arguments? Show usage [ $# -eq 0 ] && usage # First scan for verbose flags for arg in "$@"; do if [[ "$arg" == "-v" ]]; then ((VERBOSE++)) elif [[ "$arg" == "-vv" ]]; then VERBOSE=2 fi done # Expand all set arguments recursively local -a expanded_args read -ra expanded_args <<< "$(expand_args "$@")" # Process the completely expanded arguments process_command "${expanded_args[@]}" # Use original OUTPUT variable for the message if [ "$OUTPUT" = "-" ]; then echo "Concatenation complete. Output written to stdout" >&2 else echo "Concatenation complete. Output written to $OUTPUT" >&2 fi } main "$@"