tree-generator/Releases/v1.0.0/source/tree_gen.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
tree_gen.py - 目录树生成脚本 (Windows 平台)

功能: F013-F020
- 接收路径输入（命令行参数）
- 加载忽略配置
- 递归遍历目录
- 生成目录树/文件树
- 终端输出
- Markdown 保存
- 统计信息

技术选型: Python 3.8+ 标准库，零第三方依赖
"""

import argparse
import fnmatch
import os
import sys
from pathlib import Path
from datetime import datetime


# =============================================================================
# 模块 1: 默认忽略列表 & 配置
# =============================================================================

DEFAULT_IGNORE = {
    ".git", ".svn", ".hg",
    "node_modules", "bower_components",
    "__pycache__", ".pytest_cache",
    ".idea", ".vscode",
    "dist", "build", "target",
    ".DS_Store", "Thumbs.db",
    "venv", ".venv", "env",
}

# 通配符模式（用于 fnmatch 匹配）
DEFAULT_GLOB_IGNORE = {"*.pyc"}


# =============================================================================
# 模块 2: ArgParser
# =============================================================================

class ArgParser:
    """解析命令行参数 (F013)"""

    @staticmethod
    def parse_args(args=None):
        parser = argparse.ArgumentParser(
            prog="tree_gen.py",
            description="目录树生成脚本 - 生成目录结构和文件列表",
            formatter_class=argparse.RawDescriptionHelpFormatter,
            epilog="""
示例:
  python tree_gen.py                     # 当前目录
  python tree_gen.py /path/to/project    # 指定目录
  python tree_gen.py -d 2 src/           # 限制深度为 2
  python tree_gen.py -f -o files.md .    # 仅文件树，输出到 files.md
  python tree_gen.py -D -i .gitignore .  # 仅目录树，使用 .gitignore
            """,
        )

        parser.add_argument(
            "path",
            nargs="?",
            default=".",
            help="目标目录路径（默认: 当前目录）",
        )
        parser.add_argument(
            "-o", "--output",
            default="tree_output.md",
            help="Markdown 输出文件路径（默认: tree_output.md）",
        )
        parser.add_argument(
            "-d", "--depth",
            type=int,
            default=None,
            help="最大递归深度（默认: 无限制）",
        )
        parser.add_argument(
            "-f", "--files-only",
            action="store_true",
            help="仅显示文件树",
        )
        parser.add_argument(
            "-D", "--dirs-only",
            action="store_true",
            help="仅显示目录树",
        )
        parser.add_argument(
            "-i", "--ignore",
            dest="ignore_file",
            default=None,
            help="忽略配置文件路径（默认: 目标目录下的 .treeignore）",
        )

        return parser.parse_args(args)


# =============================================================================
# 模块 3: IgnoreLoader
# =============================================================================

class IgnoreLoader:
    """加载忽略配置 (F014)"""

    @staticmethod
    def load(ignore_file_path=None, target_dir=None):
        """
        加载忽略配置。

        优先级:
        1. 命令行指定的 ignore 文件
        2. 目标目录下的 .treeignore
        3. 目标目录下的 .gitignore（作为备选）
        4. 内置默认忽略列表

        Returns:
            tuple: (ignore_set, glob_ignore_set)
        """
        ignore_set = set(DEFAULT_IGNORE)
        glob_ignore_set = set(DEFAULT_GLOB_IGNORE)

        # 确定配置文件路径
        config_path = None
        if ignore_file_path:
            config_path = Path(ignore_file_path)
        elif target_dir:
            treeignore = Path(target_dir) / ".treeignore"
            if treeignore.is_file():
                config_path = treeignore
            else:
                gitignore = Path(target_dir) / ".gitignore"
                if gitignore.is_file():
                    config_path = gitignore

        # 解析配置文件
        if config_path and config_path.is_file():
            try:
                with open(config_path, "r", encoding="utf-8", errors="replace") as f:
                    for line in f:
                        line = line.strip()
                        # 跳过空行和注释
                        if not line or line.startswith("#"):
                            continue
                        # 移除尾部的斜杠（目录标记）
                        clean = line.rstrip("/")
                        # 判断是否为通配符模式
                        if any(c in clean for c in "*?["):
                            glob_ignore_set.add(clean)
                        else:
                            ignore_set.add(clean)
            except (IOError, OSError):
                # 配置文件读取失败，使用默认配置
                print(f"警告: 无法读取忽略配置文件 {config_path}，使用默认配置", file=sys.stderr)

        return ignore_set, glob_ignore_set


# =============================================================================
# 模块 4: DirectoryScanner
# =============================================================================

class DirectoryScanner:
    """递归遍历目录，生成树形结构 (F015)"""

    def __init__(self, ignore_set, glob_ignore_set, max_depth=None):
        self.ignore_set = ignore_set
        self.glob_ignore_set = glob_ignore_set
        self.max_depth = max_depth
        self._seen_real_paths = set()  # 用于检测符号链接循环

    def should_ignore(self, name):
        """判断是否应该忽略该名称"""
        # 精确匹配
        if name in self.ignore_set:
            return True
        # 通配符匹配
        for pattern in self.glob_ignore_set:
            if fnmatch.fnmatch(name, pattern):
                return True
        return False

    def scan(self, root_path):
        """
        扫描目录，返回树形字典。

        Args:
            root_path: 根目录路径

        Returns:
            dict: 树形结构字典

        Raises:
            FileNotFoundError: 路径不存在
        """
        root = Path(root_path).resolve()

        if not root.exists():
            raise FileNotFoundError(f"路径不存在: {root}")

        if not root.is_dir():
            raise NotADirectoryError(f"不是目录: {root}")

        self._seen_real_paths = set()

        return self._scan_node(root, depth=0, is_root=True)

    def _scan_node(self, path, depth, is_root=False):
        """递归扫描单个节点"""
        name = path.name if path.parent != path else str(path)
        is_dir = path.is_dir()

        node = {
            "name": name,
            "path": path,
            "is_dir": is_dir,
            "children": [],
            "size": 0,
        }

        if is_dir:
            # 检查深度限制
            if self.max_depth is not None and depth >= self.max_depth:
                return node

            # 检测符号链接循环（根节点跳过）
            if not is_root:
                try:
                    real_path = str(path.resolve())
                    if real_path in self._seen_real_paths:
                        print(f"警告: 检测到符号链接循环，跳过: {path}", file=sys.stderr)
                        return node
                    self._seen_real_paths.add(real_path)
                except (OSError, ValueError):
                    print(f"警告: 无法解析路径，跳过: {path}", file=sys.stderr)
                    return node

            # 读取目录内容
            try:
                entries = sorted(path.iterdir(), key=lambda p: (not p.is_dir(), p.name.lower()))
            except PermissionError:
                print(f"警告: 权限不足，跳过目录: {path}", file=sys.stderr)
                return node
            except OSError as e:
                print(f"警告: 读取目录失败 ({e})，跳过: {path}", file=sys.stderr)
                return node

            for entry in entries:
                if self.should_ignore(entry.name):
                    continue

                # 对于符号链接，检查目标是否有效
                if entry.is_symlink():
                    try:
                        entry.resolve()  # 验证目标存在
                    except (OSError, ValueError):
                        continue

                child = self._scan_node(entry, depth + 1)
                if child:
                    node["children"].append(child)

        else:
            # 文件大小
            try:
                node["size"] = path.stat().st_size
            except (OSError, PermissionError):
                node["size"] = 0

        return node


# =============================================================================
# 模块 5: TreeFormatter
# =============================================================================

class TreeFormatter:
    """生成树形文本输出 (F016, F017)"""

    # 树形字符
    BRANCH = "├── "
    LAST_BRANCH = "└── "
    PIPE = "│   "
    SPACE = "    "

    @staticmethod
    def format_tree(tree_node, dirs_only=False, files_only=False, is_root=True):
        """
        格式化树形结构为文本。

        Args:
            tree_node: 树形字典
            dirs_only: 仅显示目录
            files_only: 仅显示文件
            is_root: 是否为根节点

        Returns:
            list: 文本行列表
        """
        lines = []

        if is_root:
            # 根节点特殊处理
            name = tree_node["name"]
            if tree_node["is_dir"]:
                lines.append(f"{name}/")
            else:
                lines.append(name)

            children = tree_node.get("children", [])
            if dirs_only:
                children = [c for c in children if c["is_dir"]]
            elif files_only:
                children = [c for c in children if not c["is_dir"]]

            for i, child in enumerate(children):
                is_last = (i == len(children) - 1)
                prefix = ""
                sub_lines = TreeFormatter._format_children(child, prefix, is_last, dirs_only, files_only)
                lines.extend(sub_lines)
        else:
            # 非根节点由 _format_children 处理
            pass

        return lines

    @staticmethod
    def _format_children(node, prefix, is_last, dirs_only, files_only):
        """递归格式化子节点"""
        lines = []

        # 当前节点的连接符
        connector = TreeFormatter.LAST_BRANCH if is_last else TreeFormatter.BRANCH
        name = node["name"]

        if node["is_dir"]:
            lines.append(f"{prefix}{connector}{name}/")
        else:
            lines.append(f"{prefix}{connector}{name}")

        # 计算子节点的前缀
        child_prefix = prefix + (TreeFormatter.SPACE if is_last else TreeFormatter.PIPE)

        # 获取子节点
        children = node.get("children", [])
        if dirs_only:
            children = [c for c in children if c["is_dir"]]
        elif files_only:
            children = [c for c in children if not c["is_dir"]]

        for i, child in enumerate(children):
            child_is_last = (i == len(children) - 1)
            sub_lines = TreeFormatter._format_children(child, child_prefix, child_is_last, dirs_only, files_only)
            lines.extend(sub_lines)

        return lines

    @staticmethod
    def format_file_list(tree_node, dirs_only=False, files_only=False):
        """
        生成文件列表（带完整路径）(F017)

        Args:
            tree_node: 树形字典
            dirs_only: 仅显示目录
            files_only: 仅显示文件

        Returns:
            list: (路径, 大小) 元组列表
        """
        items = []
        TreeFormatter._collect_files(tree_node, items, dirs_only, files_only)
        return items

    @staticmethod
    def _collect_files(node, items, dirs_only, files_only):
        """递归收集文件和目录"""
        path = str(node["path"])

        if node["is_dir"]:
            if not files_only:
                items.append((path, 0))
            for child in node.get("children", []):
                TreeFormatter._collect_files(child, items, dirs_only, files_only)
        else:
            if not dirs_only:
                items.append((path, node.get("size", 0)))


# =============================================================================
# 模块 6: TerminalOutput
# =============================================================================

class TerminalOutput:
    """终端输出 (F018)"""

    @staticmethod
    def setup_encoding():
        """设置终端 UTF-8 编码"""
        try:
            if hasattr(sys.stdout, "reconfigure"):
                sys.stdout.reconfigure(encoding="utf-8")
        except (AttributeError, ValueError):
            pass

    @staticmethod
    def display(tree_lines, statistics=None):
        """
        在终端显示树形结构和统计信息。

        Args:
            tree_lines: 树形文本行列表
            statistics: 统计信息字典（可选）
        """
        TerminalOutput.setup_encoding()

        print()
        for line in tree_lines:
            print(line)
        print()

        if statistics:
            print("=" * 50)
            print(f"  目录数: {statistics['dir_count']}")
            print(f"  文件数: {statistics['file_count']}")
            print(f"  总大小: {statistics['total_size_str']}")
            print("=" * 50)


# =============================================================================
# 模块 7: MarkdownWriter
# =============================================================================

class MarkdownWriter:
    """Markdown 文件保存 (F019)"""

    @staticmethod
    def write(output_path, tree_lines, file_list, statistics, root_path):
        """
        写入 Markdown 文件。

        Args:
            output_path: 输出文件路径
            tree_lines: 树形文本行列表
            file_list: 文件列表
            statistics: 统计信息
            root_path: 根目录路径

        Returns:
            bool: 是否成功写入
        """
        try:
            with open(output_path, "w", encoding="utf-8-sig") as f:
                # 标题
                f.write(f"# 目录树 - {Path(root_path).name}\n\n")
                f.write(f"> 生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")

                # 目录树
                f.write("## 目录结构\n\n")
                f.write("```\n")
                for line in tree_lines:
                    f.write(line + "\n")
                f.write("```\n\n")

                # 文件列表
                if file_list:
                    f.write("## 文件列表\n\n")
                    f.write("| 序号 | 文件路径 | 大小 |\n")
                    f.write("|------|----------|------|\n")
                    for idx, (path, size) in enumerate(file_list, 1):
                        size_str = MarkdownWriter._format_size(size) if size > 0 else "-"
                        # 转义 Markdown 特殊字符
                        safe_path = path.replace("|", "\\|")
                        f.write(f"| {idx} | `{safe_path}` | {size_str} |\n")
                    f.write("\n")

                # 统计信息
                if statistics:
                    f.write("## 统计信息\n\n")
                    f.write(f"- **目录数**: {statistics['dir_count']}\n")
                    f.write(f"- **文件数**: {statistics['file_count']}\n")
                    f.write(f"- **总大小**: {statistics['total_size_str']}\n")

            return True

        except (IOError, OSError) as e:
            print(f"警告: 无法写入 Markdown 文件 {output_path} ({e})，回退到仅终端输出", file=sys.stderr)
            return False

    @staticmethod
    def _format_size(size_bytes):
        """格式化文件大小"""
        if size_bytes < 1024:
            return f"{size_bytes} B"
        elif size_bytes < 1024 * 1024:
            return f"{size_bytes / 1024:.1f} KB"
        elif size_bytes < 1024 * 1024 * 1024:
            return f"{size_bytes / (1024 * 1024):.1f} MB"
        else:
            return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"


# =============================================================================
# 模块 8: StatisticsCollector
# =============================================================================

class StatisticsCollector:
    """统计信息收集 (F020)"""

    @staticmethod
    def collect(tree_node, dirs_only=False, files_only=False):
        """
        收集统计信息。

        Args:
            tree_node: 树形字典
            dirs_only: 仅统计目录
            files_only: 仅统计文件

        Returns:
            dict: 统计信息字典
        """
        stats = {
            "dir_count": 0,
            "file_count": 0,
            "total_size": 0,
            "total_size_str": "0 B",
        }

        StatisticsCollector._count(tree_node, stats, dirs_only, files_only)

        # 格式化总大小
        stats["total_size_str"] = MarkdownWriter._format_size(stats["total_size"])

        return stats

    @staticmethod
    def _count(node, stats, dirs_only=False, files_only=False):
        """递归计数"""
        if node["is_dir"]:
            if not files_only:
                stats["dir_count"] += 1
            for child in node.get("children", []):
                StatisticsCollector._count(child, stats, dirs_only, files_only)
        else:
            if not dirs_only:
                stats["file_count"] += 1
                stats["total_size"] += node.get("size", 0)


# =============================================================================
# 主程序
# =============================================================================

def main():
    """主入口函数"""
    # 解析命令行参数 (F013)
    args = ArgParser.parse_args()

    # 确定目标路径
    target_path = Path(args.path).resolve()

    # 检查路径是否存在
    if not target_path.exists():
        print(f"错误: 路径不存在: {target_path}", file=sys.stderr)
        sys.exit(1)

    if not target_path.is_dir():
        print(f"错误: 不是目录: {target_path}", file=sys.stderr)
        sys.exit(1)

    # 加载忽略配置 (F014)
    ignore_set, glob_ignore_set = IgnoreLoader.load(
        ignore_file_path=args.ignore_file,
        target_dir=target_path,
    )

    # 扫描目录 (F015)
    scanner = DirectoryScanner(
        ignore_set=ignore_set,
        glob_ignore_set=glob_ignore_set,
        max_depth=args.depth,
    )

    try:
        tree = scanner.scan(target_path)
    except (FileNotFoundError, NotADirectoryError) as e:
        print(f"错误: {e}", file=sys.stderr)
        sys.exit(1)

    # 生成树形文本 (F016, F017)
    tree_lines = TreeFormatter.format_tree(
        tree,
        dirs_only=args.dirs_only,
        files_only=args.files_only,
    )

    # 收集文件列表
    file_list = TreeFormatter.format_file_list(
        tree,
        dirs_only=args.dirs_only,
        files_only=args.files_only,
    )

    # 收集统计信息 (F020)
    statistics = StatisticsCollector.collect(
        tree,
        dirs_only=args.dirs_only,
        files_only=args.files_only,
    )

    # 终端输出 (F018)
    TerminalOutput.display(tree_lines, statistics)

    # Markdown 保存 (F019)
    if not args.dirs_only and not args.files_only:
        # 默认模式：保存完整树
        MarkdownWriter.write(
            output_path=args.output,
            tree_lines=tree_lines,
            file_list=file_list,
            statistics=statistics,
            root_path=target_path,
        )
        print(f"Markdown 已保存到: {Path(args.output).resolve()}")
    elif args.files_only:
        # 文件树模式
        MarkdownWriter.write(
            output_path=args.output,
            tree_lines=tree_lines,
            file_list=file_list,
            statistics=statistics,
            root_path=target_path,
        )
        print(f"文件树 Markdown 已保存到: {Path(args.output).resolve()}")
    else:
        # 目录树模式
        MarkdownWriter.write(
            output_path=args.output,
            tree_lines=tree_lines,
            file_list=file_list,
            statistics=statistics,
            root_path=target_path,
        )
        print(f"目录树 Markdown 已保存到: {Path(args.output).resolve()}")


if __name__ == "__main__":
    main()