#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ tree_gen.py - 目录树生成脚本 (Windows 平台) 功能: F013-F020 - 接收路径输入(命令行参数) - 加载忽略配置 - 递归遍历目录 - 生成目录树/文件树 - 终端输出 - Markdown 保存 - 统计信息 技术选型: Python 3.8+ 标准库,零第三方依赖 """ import argparse import fnmatch import os import sys from pathlib import Path from datetime import datetime # ============================================================================= # 模块 1: 默认忽略列表 & 配置 # ============================================================================= DEFAULT_IGNORE = { ".git", ".svn", ".hg", "node_modules", "bower_components", "__pycache__", ".pytest_cache", ".idea", ".vscode", "dist", "build", "target", ".DS_Store", "Thumbs.db", "venv", ".venv", "env", } # 通配符模式(用于 fnmatch 匹配) DEFAULT_GLOB_IGNORE = {"*.pyc"} # ============================================================================= # 模块 2: ArgParser # ============================================================================= class ArgParser: """解析命令行参数 (F013)""" @staticmethod def parse_args(args=None): parser = argparse.ArgumentParser( prog="tree_gen.py", description="目录树生成脚本 - 生成目录结构和文件列表", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" 示例: python tree_gen.py # 当前目录 python tree_gen.py /path/to/project # 指定目录 python tree_gen.py -d 2 src/ # 限制深度为 2 python tree_gen.py -f -o files.md . # 仅文件树,输出到 files.md python tree_gen.py -D -i .gitignore . # 仅目录树,使用 .gitignore """, ) parser.add_argument( "path", nargs="?", default=".", help="目标目录路径(默认: 当前目录)", ) parser.add_argument( "-o", "--output", default="tree_output.md", help="Markdown 输出文件路径(默认: tree_output.md)", ) parser.add_argument( "-d", "--depth", type=int, default=None, help="最大递归深度(默认: 无限制)", ) parser.add_argument( "-f", "--files-only", action="store_true", help="仅显示文件树", ) parser.add_argument( "-D", "--dirs-only", action="store_true", help="仅显示目录树", ) parser.add_argument( "-i", "--ignore", dest="ignore_file", default=None, help="忽略配置文件路径(默认: 目标目录下的 .treeignore)", ) return parser.parse_args(args) # ============================================================================= # 模块 3: IgnoreLoader # ============================================================================= class IgnoreLoader: """加载忽略配置 (F014)""" @staticmethod def load(ignore_file_path=None, target_dir=None): """ 加载忽略配置。 优先级: 1. 命令行指定的 ignore 文件 2. 目标目录下的 .treeignore 3. 目标目录下的 .gitignore(作为备选) 4. 内置默认忽略列表 Returns: tuple: (ignore_set, glob_ignore_set) """ ignore_set = set(DEFAULT_IGNORE) glob_ignore_set = set(DEFAULT_GLOB_IGNORE) # 确定配置文件路径 config_path = None if ignore_file_path: config_path = Path(ignore_file_path) elif target_dir: treeignore = Path(target_dir) / ".treeignore" if treeignore.is_file(): config_path = treeignore else: gitignore = Path(target_dir) / ".gitignore" if gitignore.is_file(): config_path = gitignore # 解析配置文件 if config_path and config_path.is_file(): try: with open(config_path, "r", encoding="utf-8", errors="replace") as f: for line in f: line = line.strip() # 跳过空行和注释 if not line or line.startswith("#"): continue # 移除尾部的斜杠(目录标记) clean = line.rstrip("/") # 判断是否为通配符模式 if any(c in clean for c in "*?["): glob_ignore_set.add(clean) else: ignore_set.add(clean) except (IOError, OSError): # 配置文件读取失败,使用默认配置 print(f"警告: 无法读取忽略配置文件 {config_path},使用默认配置", file=sys.stderr) return ignore_set, glob_ignore_set # ============================================================================= # 模块 4: DirectoryScanner # ============================================================================= class DirectoryScanner: """递归遍历目录,生成树形结构 (F015)""" def __init__(self, ignore_set, glob_ignore_set, max_depth=None): self.ignore_set = ignore_set self.glob_ignore_set = glob_ignore_set self.max_depth = max_depth self._seen_real_paths = set() # 用于检测符号链接循环 def should_ignore(self, name): """判断是否应该忽略该名称""" # 精确匹配 if name in self.ignore_set: return True # 通配符匹配 for pattern in self.glob_ignore_set: if fnmatch.fnmatch(name, pattern): return True return False def scan(self, root_path): """ 扫描目录,返回树形字典。 Args: root_path: 根目录路径 Returns: dict: 树形结构字典 Raises: FileNotFoundError: 路径不存在 """ root = Path(root_path).resolve() if not root.exists(): raise FileNotFoundError(f"路径不存在: {root}") if not root.is_dir(): raise NotADirectoryError(f"不是目录: {root}") self._seen_real_paths = set() return self._scan_node(root, depth=0, is_root=True) def _scan_node(self, path, depth, is_root=False): """递归扫描单个节点""" name = path.name if path.parent != path else str(path) is_dir = path.is_dir() node = { "name": name, "path": path, "is_dir": is_dir, "children": [], "size": 0, } if is_dir: # 检查深度限制 if self.max_depth is not None and depth >= self.max_depth: return node # 检测符号链接循环(根节点跳过) if not is_root: try: real_path = str(path.resolve()) if real_path in self._seen_real_paths: print(f"警告: 检测到符号链接循环,跳过: {path}", file=sys.stderr) return node self._seen_real_paths.add(real_path) except (OSError, ValueError): print(f"警告: 无法解析路径,跳过: {path}", file=sys.stderr) return node # 读取目录内容 try: entries = sorted(path.iterdir(), key=lambda p: (not p.is_dir(), p.name.lower())) except PermissionError: print(f"警告: 权限不足,跳过目录: {path}", file=sys.stderr) return node except OSError as e: print(f"警告: 读取目录失败 ({e}),跳过: {path}", file=sys.stderr) return node for entry in entries: if self.should_ignore(entry.name): continue # 对于符号链接,检查目标是否有效 if entry.is_symlink(): try: entry.resolve() # 验证目标存在 except (OSError, ValueError): continue child = self._scan_node(entry, depth + 1) if child: node["children"].append(child) else: # 文件大小 try: node["size"] = path.stat().st_size except (OSError, PermissionError): node["size"] = 0 return node # ============================================================================= # 模块 5: TreeFormatter # ============================================================================= class TreeFormatter: """生成树形文本输出 (F016, F017)""" # 树形字符 BRANCH = "├── " LAST_BRANCH = "└── " PIPE = "│ " SPACE = " " @staticmethod def format_tree(tree_node, dirs_only=False, files_only=False, is_root=True): """ 格式化树形结构为文本。 Args: tree_node: 树形字典 dirs_only: 仅显示目录 files_only: 仅显示文件 is_root: 是否为根节点 Returns: list: 文本行列表 """ lines = [] if is_root: # 根节点特殊处理 name = tree_node["name"] if tree_node["is_dir"]: lines.append(f"{name}/") else: lines.append(name) children = tree_node.get("children", []) if dirs_only: children = [c for c in children if c["is_dir"]] elif files_only: children = [c for c in children if not c["is_dir"]] for i, child in enumerate(children): is_last = (i == len(children) - 1) prefix = "" sub_lines = TreeFormatter._format_children(child, prefix, is_last, dirs_only, files_only) lines.extend(sub_lines) else: # 非根节点由 _format_children 处理 pass return lines @staticmethod def _format_children(node, prefix, is_last, dirs_only, files_only): """递归格式化子节点""" lines = [] # 当前节点的连接符 connector = TreeFormatter.LAST_BRANCH if is_last else TreeFormatter.BRANCH name = node["name"] if node["is_dir"]: lines.append(f"{prefix}{connector}{name}/") else: lines.append(f"{prefix}{connector}{name}") # 计算子节点的前缀 child_prefix = prefix + (TreeFormatter.SPACE if is_last else TreeFormatter.PIPE) # 获取子节点 children = node.get("children", []) if dirs_only: children = [c for c in children if c["is_dir"]] elif files_only: children = [c for c in children if not c["is_dir"]] for i, child in enumerate(children): child_is_last = (i == len(children) - 1) sub_lines = TreeFormatter._format_children(child, child_prefix, child_is_last, dirs_only, files_only) lines.extend(sub_lines) return lines @staticmethod def format_file_list(tree_node, dirs_only=False, files_only=False): """ 生成文件列表(带完整路径)(F017) Args: tree_node: 树形字典 dirs_only: 仅显示目录 files_only: 仅显示文件 Returns: list: (路径, 大小) 元组列表 """ items = [] TreeFormatter._collect_files(tree_node, items, dirs_only, files_only) return items @staticmethod def _collect_files(node, items, dirs_only, files_only): """递归收集文件和目录""" path = str(node["path"]) if node["is_dir"]: if not files_only: items.append((path, 0)) for child in node.get("children", []): TreeFormatter._collect_files(child, items, dirs_only, files_only) else: if not dirs_only: items.append((path, node.get("size", 0))) # ============================================================================= # 模块 6: TerminalOutput # ============================================================================= class TerminalOutput: """终端输出 (F018)""" @staticmethod def setup_encoding(): """设置终端 UTF-8 编码""" try: if hasattr(sys.stdout, "reconfigure"): sys.stdout.reconfigure(encoding="utf-8") except (AttributeError, ValueError): pass @staticmethod def display(tree_lines, statistics=None): """ 在终端显示树形结构和统计信息。 Args: tree_lines: 树形文本行列表 statistics: 统计信息字典(可选) """ TerminalOutput.setup_encoding() print() for line in tree_lines: print(line) print() if statistics: print("=" * 50) print(f" 目录数: {statistics['dir_count']}") print(f" 文件数: {statistics['file_count']}") print(f" 总大小: {statistics['total_size_str']}") print("=" * 50) # ============================================================================= # 模块 7: MarkdownWriter # ============================================================================= class MarkdownWriter: """Markdown 文件保存 (F019)""" @staticmethod def write(output_path, tree_lines, file_list, statistics, root_path): """ 写入 Markdown 文件。 Args: output_path: 输出文件路径 tree_lines: 树形文本行列表 file_list: 文件列表 statistics: 统计信息 root_path: 根目录路径 Returns: bool: 是否成功写入 """ try: with open(output_path, "w", encoding="utf-8-sig") as f: # 标题 f.write(f"# 目录树 - {Path(root_path).name}\n\n") f.write(f"> 生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") # 目录树 f.write("## 目录结构\n\n") f.write("```\n") for line in tree_lines: f.write(line + "\n") f.write("```\n\n") # 文件列表 if file_list: f.write("## 文件列表\n\n") f.write("| 序号 | 文件路径 | 大小 |\n") f.write("|------|----------|------|\n") for idx, (path, size) in enumerate(file_list, 1): size_str = MarkdownWriter._format_size(size) if size > 0 else "-" # 转义 Markdown 特殊字符 safe_path = path.replace("|", "\\|") f.write(f"| {idx} | `{safe_path}` | {size_str} |\n") f.write("\n") # 统计信息 if statistics: f.write("## 统计信息\n\n") f.write(f"- **目录数**: {statistics['dir_count']}\n") f.write(f"- **文件数**: {statistics['file_count']}\n") f.write(f"- **总大小**: {statistics['total_size_str']}\n") return True except (IOError, OSError) as e: print(f"警告: 无法写入 Markdown 文件 {output_path} ({e}),回退到仅终端输出", file=sys.stderr) return False @staticmethod def _format_size(size_bytes): """格式化文件大小""" if size_bytes < 1024: return f"{size_bytes} B" elif size_bytes < 1024 * 1024: return f"{size_bytes / 1024:.1f} KB" elif size_bytes < 1024 * 1024 * 1024: return f"{size_bytes / (1024 * 1024):.1f} MB" else: return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB" # ============================================================================= # 模块 8: StatisticsCollector # ============================================================================= class StatisticsCollector: """统计信息收集 (F020)""" @staticmethod def collect(tree_node, dirs_only=False, files_only=False): """ 收集统计信息。 Args: tree_node: 树形字典 dirs_only: 仅统计目录 files_only: 仅统计文件 Returns: dict: 统计信息字典 """ stats = { "dir_count": 0, "file_count": 0, "total_size": 0, "total_size_str": "0 B", } StatisticsCollector._count(tree_node, stats, dirs_only, files_only) # 格式化总大小 stats["total_size_str"] = MarkdownWriter._format_size(stats["total_size"]) return stats @staticmethod def _count(node, stats, dirs_only=False, files_only=False): """递归计数""" if node["is_dir"]: if not files_only: stats["dir_count"] += 1 for child in node.get("children", []): StatisticsCollector._count(child, stats, dirs_only, files_only) else: if not dirs_only: stats["file_count"] += 1 stats["total_size"] += node.get("size", 0) # ============================================================================= # 主程序 # ============================================================================= def main(): """主入口函数""" # 解析命令行参数 (F013) args = ArgParser.parse_args() # 确定目标路径 target_path = Path(args.path).resolve() # 检查路径是否存在 if not target_path.exists(): print(f"错误: 路径不存在: {target_path}", file=sys.stderr) sys.exit(1) if not target_path.is_dir(): print(f"错误: 不是目录: {target_path}", file=sys.stderr) sys.exit(1) # 加载忽略配置 (F014) ignore_set, glob_ignore_set = IgnoreLoader.load( ignore_file_path=args.ignore_file, target_dir=target_path, ) # 扫描目录 (F015) scanner = DirectoryScanner( ignore_set=ignore_set, glob_ignore_set=glob_ignore_set, max_depth=args.depth, ) try: tree = scanner.scan(target_path) except (FileNotFoundError, NotADirectoryError) as e: print(f"错误: {e}", file=sys.stderr) sys.exit(1) # 生成树形文本 (F016, F017) tree_lines = TreeFormatter.format_tree( tree, dirs_only=args.dirs_only, files_only=args.files_only, ) # 收集文件列表 file_list = TreeFormatter.format_file_list( tree, dirs_only=args.dirs_only, files_only=args.files_only, ) # 收集统计信息 (F020) statistics = StatisticsCollector.collect( tree, dirs_only=args.dirs_only, files_only=args.files_only, ) # 终端输出 (F018) TerminalOutput.display(tree_lines, statistics) # Markdown 保存 (F019) if not args.dirs_only and not args.files_only: # 默认模式:保存完整树 MarkdownWriter.write( output_path=args.output, tree_lines=tree_lines, file_list=file_list, statistics=statistics, root_path=target_path, ) print(f"Markdown 已保存到: {Path(args.output).resolve()}") elif args.files_only: # 文件树模式 MarkdownWriter.write( output_path=args.output, tree_lines=tree_lines, file_list=file_list, statistics=statistics, root_path=target_path, ) print(f"文件树 Markdown 已保存到: {Path(args.output).resolve()}") else: # 目录树模式 MarkdownWriter.write( output_path=args.output, tree_lines=tree_lines, file_list=file_list, statistics=statistics, root_path=target_path, ) print(f"目录树 Markdown 已保存到: {Path(args.output).resolve()}") if __name__ == "__main__": main()