From deb6bd8f4cd2fb418ad8cfd3972e63e9b6f73f48 Mon Sep 17 00:00:00 2001 From: Geens Date: Tue, 10 Jun 2025 19:27:04 +0200 Subject: [PATCH] Added collect script --- .gitignore | 1 + collect.sh | 290 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 291 insertions(+) create mode 100644 collect.sh diff --git a/.gitignore b/.gitignore index 2b6253f..ca40fa1 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ *.bak *.zip /target +collect.txt fp-info-cache \ No newline at end of file diff --git a/collect.sh b/collect.sh new file mode 100644 index 0000000..abbc5e9 --- /dev/null +++ b/collect.sh @@ -0,0 +1,290 @@ +#!/usr/bin/env bash + +set -euo pipefail + +declare -A FILTER_SETS=( + ["rust"]="-f .*\\.(rs|toml)$" + ["doc"]="-f .*\\.md$" + + ["core"]="-s rust . -s doc ." +) + +OUTPUT="./collect.txt" +VERBOSE=0 + +usage() { + echo "Usage: $0 [OPTIONS] [DIRS...]" + echo + echo "Options:" + echo " -f, --filter PATTERN Regex filter for subsequent directories" + echo " -s, --set SETNAME Use predefined set of parameters" + echo " -o, --output FILE Output file (default: ./collect.txt, use '-' for stdout)" + echo " -v, -vv Verbose output" + echo + echo "Predefined Filter Sets:" + for setname in $(echo "${!FILTER_SETS[@]}" | tr ' ' '\n' | sort); do + printf " %-15s %s\n" "$setname:" "${FILTER_SETS[$setname]}" + done + exit 1 +} + +# Expand any set arguments to their full form +expand_args() { + local -a expanded=() + local i=1 + local has_sets=0 + + # First pass: expand sets + while [ $i -le $# ]; do + local arg="${!i}" + + case "$arg" in + -s|--set) + has_sets=1 + i=$((i+1)) + [ $i -le $# ] || { echo "Error: No set name specified"; usage; } + local set_name="${!i}" + + if [[ -v "FILTER_SETS[$set_name]" ]]; then + # Add the expanded set parameters + local -a set_args + read -ra set_args <<< "${FILTER_SETS[$set_name]}" + for set_arg in "${set_args[@]}"; do + expanded+=("$set_arg") + done + else + echo "Error: Unknown set '$set_name'" + usage + fi + ;; + *) + # Preserve other arguments + expanded+=("$arg") + ;; + esac + + i=$((i+1)) + done + + # If we expanded sets, recursively expand again until no more sets + if [ $has_sets -eq 1 ]; then + expand_args "${expanded[@]}" + else + # No more sets, return the fully expanded arguments + echo "${expanded[@]+"${expanded[@]}"}" + fi +} + +# Process the final expanded command line +process_command() { + local filter="" + declare -A dir_filters_map # Maps directory to array of filters + + if ((VERBOSE >= 1)); then + echo "Expanded arguments:" + for arg in "$@"; do + echo " $arg" + done + fi + + # Process final arguments + local i=1 + while [ $i -le $# ]; do + local arg="${!i}" + + case "$arg" in + -f|--filter) + i=$((i+1)) + [ $i -le $# ] || break + filter="${!i}" + ;; + -o|--output) + i=$((i+1)) + [ $i -le $# ] || break + OUTPUT="${!i}" + ;; + -v) + ((VERBOSE++)) + ;; + -vv) + VERBOSE=2 + ;; + -s|--set) + echo "Warning: Set argument found after expansion" + i=$((i+1)) + ;; + -*) + # Skip other options + ;; + *) + # It's a directory + if [ -e "$arg" ]; then + # Add filter to this directory's filter list + if [ -n "$filter" ]; then + if [ -n "${dir_filters_map[$arg]:-}" ]; then + dir_filters_map["$arg"]="${dir_filters_map["$arg"]}|$filter" + else + dir_filters_map["$arg"]="$filter" + fi + if ((VERBOSE >= 1)); then + echo "Adding filter: $filter to directory: $arg" + fi + else + # If no filter specified, ensure directory is in the map + [ -z "${dir_filters_map[$arg]:-}" ] && dir_filters_map["$arg"]="" + if ((VERBOSE >= 1)); then + echo "Adding directory with no filter: $arg" + fi + fi + else + echo "Warning: Directory '$arg' not found" + fi + ;; + esac + + i=$((i+1)) + done + + # Handle special case for stdout + local output_file="$OUTPUT" + if [ "$OUTPUT" = "-" ]; then + output_file="/dev/stdout" + fi + + # Convert output path and check conflicts + output_abs_path=$(realpath -m "$output_file") + output_rel_path=$(realpath --relative-to=. "$output_abs_path") + + # Process directories and collect files + declare -a all_files=() + + # Cache git-ignored files once + declare -a git_ignored_files=() + if git rev-parse --is-inside-work-tree &>/dev/null; then + while IFS= read -r ignored_file; do + git_ignored_files+=("$ignored_file") + done < <(git ls-files --others --ignored --exclude-standard 2>/dev/null) + + if ((VERBOSE >= 1)); then + echo "Cached ${#git_ignored_files[@]} git-ignored files" + fi + fi + + for dir in "${!dir_filters_map[@]}"; do + local filters="${dir_filters_map[$dir]}" + + if ((VERBOSE >= 1)); then + echo "Processing directory: $dir with filters: ${filters:-none}" + fi + + # Get all files in the directory first + declare -a dir_files=() + while IFS= read -r -d $'\0' file; do + local rel_path="${file#./}" + [[ -z "$rel_path" || "$rel_path" == "." ]] && continue + + # Skip output file + [[ "$rel_path" == "$output_rel_path" ]] && continue + + # Check if file is git-ignored using cached list + local is_ignored=0 + for ignored in "${git_ignored_files[@]}"; do + if [[ "$rel_path" == "$ignored" ]]; then + is_ignored=1 + break + fi + done + + # Skip git-ignored files + [[ $is_ignored -eq 1 ]] && continue + + dir_files+=("$rel_path") + done < <(find "$dir" \( -path '*/.git' -prune \) -o \( -path "./$output_rel_path" -prune \) -o -type f -print0 2>/dev/null) + + if ((VERBOSE >= 1)); then + echo " Found ${#dir_files[@]} files in directory" + fi + + # If filters are specified, apply them to the full file list + if [[ -n "$filters" ]]; then + # Create temp file with all paths + local temp_file=$(mktemp) + printf "%s\n" "${dir_files[@]}" > "$temp_file" + + # Apply filters to get matching files + declare -a matching_files=() + while IFS= read -r matched; do + [[ -n "$matched" ]] && matching_files+=("$matched") + (( VERBOSE >= 2 )) && echo " Included: $matched" + done < <(grep -E "($filters)" "$temp_file") + + # Add matching files to all_files + all_files+=("${matching_files[@]}") + + # Clean up + rm -f "$temp_file" + + if ((VERBOSE >= 1)); then + echo " Matched ${#matching_files[@]} files after filtering" + fi + else + # No filter, add all files + all_files+=("${dir_files[@]}") + + if ((VERBOSE >= 2)); then + for file in "${dir_files[@]}"; do + echo " Included: $file" + done + fi + fi + done + + # Generate unique file list + if [ ${#all_files[@]} -gt 0 ]; then + readarray -t unique_files < <(printf "%s\n" "${all_files[@]}" | sort -u) + + # Append file contents to output + { + echo -e "File Contents:" + for file in "${unique_files[@]}"; do + if [[ -L "$file" ]]; then + target=$(readlink -f "$file" || echo "unknown") + echo -e "\n===== SYMLINK: $file → $target =====" + else + echo -e "\n===== FILE: $file =====" + cat "$file" 2>/dev/null || echo "Error: Unable to read file" + fi + done + } > "$output_file" + fi +} + +main() { + # No arguments? Show usage + [ $# -eq 0 ] && usage + + # First scan for verbose flags + for arg in "$@"; do + if [[ "$arg" == "-v" ]]; then + ((VERBOSE++)) + elif [[ "$arg" == "-vv" ]]; then + VERBOSE=2 + fi + done + + # Expand all set arguments recursively + local -a expanded_args + read -ra expanded_args <<< "$(expand_args "$@")" + + # Process the completely expanded arguments + process_command "${expanded_args[@]}" + + # Use original OUTPUT variable for the message + if [ "$OUTPUT" = "-" ]; then + echo "Concatenation complete. Output written to stdout" >&2 + else + echo "Concatenation complete. Output written to $OUTPUT" >&2 + fi +} + +main "$@" \ No newline at end of file