Files
tree-generator/Releases/v1.0.0/source/tree_gen.py
2026-05-16 17:34:32 +08:00

655 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
tree_gen.py - 目录树生成脚本 (Windows 平台)
功能: F013-F020
- 接收路径输入(命令行参数)
- 加载忽略配置
- 递归遍历目录
- 生成目录树/文件树
- 终端输出
- Markdown 保存
- 统计信息
技术选型: Python 3.8+ 标准库,零第三方依赖
"""
import argparse
import fnmatch
import os
import sys
from pathlib import Path
from datetime import datetime
# =============================================================================
# 模块 1: 默认忽略列表 & 配置
# =============================================================================
DEFAULT_IGNORE = {
".git", ".svn", ".hg",
"node_modules", "bower_components",
"__pycache__", ".pytest_cache",
".idea", ".vscode",
"dist", "build", "target",
".DS_Store", "Thumbs.db",
"venv", ".venv", "env",
}
# 通配符模式(用于 fnmatch 匹配)
DEFAULT_GLOB_IGNORE = {"*.pyc"}
# =============================================================================
# 模块 2: ArgParser
# =============================================================================
class ArgParser:
"""解析命令行参数 (F013)"""
@staticmethod
def parse_args(args=None):
parser = argparse.ArgumentParser(
prog="tree_gen.py",
description="目录树生成脚本 - 生成目录结构和文件列表",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例:
python tree_gen.py # 当前目录
python tree_gen.py /path/to/project # 指定目录
python tree_gen.py -d 2 src/ # 限制深度为 2
python tree_gen.py -f -o files.md . # 仅文件树,输出到 files.md
python tree_gen.py -D -i .gitignore . # 仅目录树,使用 .gitignore
""",
)
parser.add_argument(
"path",
nargs="?",
default=".",
help="目标目录路径(默认: 当前目录)",
)
parser.add_argument(
"-o", "--output",
default="tree_output.md",
help="Markdown 输出文件路径(默认: tree_output.md",
)
parser.add_argument(
"-d", "--depth",
type=int,
default=None,
help="最大递归深度(默认: 无限制)",
)
parser.add_argument(
"-f", "--files-only",
action="store_true",
help="仅显示文件树",
)
parser.add_argument(
"-D", "--dirs-only",
action="store_true",
help="仅显示目录树",
)
parser.add_argument(
"-i", "--ignore",
dest="ignore_file",
default=None,
help="忽略配置文件路径(默认: 目标目录下的 .treeignore",
)
return parser.parse_args(args)
# =============================================================================
# 模块 3: IgnoreLoader
# =============================================================================
class IgnoreLoader:
"""加载忽略配置 (F014)"""
@staticmethod
def load(ignore_file_path=None, target_dir=None):
"""
加载忽略配置。
优先级:
1. 命令行指定的 ignore 文件
2. 目标目录下的 .treeignore
3. 目标目录下的 .gitignore作为备选
4. 内置默认忽略列表
Returns:
tuple: (ignore_set, glob_ignore_set)
"""
ignore_set = set(DEFAULT_IGNORE)
glob_ignore_set = set(DEFAULT_GLOB_IGNORE)
# 确定配置文件路径
config_path = None
if ignore_file_path:
config_path = Path(ignore_file_path)
elif target_dir:
treeignore = Path(target_dir) / ".treeignore"
if treeignore.is_file():
config_path = treeignore
else:
gitignore = Path(target_dir) / ".gitignore"
if gitignore.is_file():
config_path = gitignore
# 解析配置文件
if config_path and config_path.is_file():
try:
with open(config_path, "r", encoding="utf-8", errors="replace") as f:
for line in f:
line = line.strip()
# 跳过空行和注释
if not line or line.startswith("#"):
continue
# 移除尾部的斜杠(目录标记)
clean = line.rstrip("/")
# 判断是否为通配符模式
if any(c in clean for c in "*?["):
glob_ignore_set.add(clean)
else:
ignore_set.add(clean)
except (IOError, OSError):
# 配置文件读取失败,使用默认配置
print(f"警告: 无法读取忽略配置文件 {config_path},使用默认配置", file=sys.stderr)
return ignore_set, glob_ignore_set
# =============================================================================
# 模块 4: DirectoryScanner
# =============================================================================
class DirectoryScanner:
"""递归遍历目录,生成树形结构 (F015)"""
def __init__(self, ignore_set, glob_ignore_set, max_depth=None):
self.ignore_set = ignore_set
self.glob_ignore_set = glob_ignore_set
self.max_depth = max_depth
self._seen_real_paths = set() # 用于检测符号链接循环
def should_ignore(self, name):
"""判断是否应该忽略该名称"""
# 精确匹配
if name in self.ignore_set:
return True
# 通配符匹配
for pattern in self.glob_ignore_set:
if fnmatch.fnmatch(name, pattern):
return True
return False
def scan(self, root_path):
"""
扫描目录,返回树形字典。
Args:
root_path: 根目录路径
Returns:
dict: 树形结构字典
Raises:
FileNotFoundError: 路径不存在
"""
root = Path(root_path).resolve()
if not root.exists():
raise FileNotFoundError(f"路径不存在: {root}")
if not root.is_dir():
raise NotADirectoryError(f"不是目录: {root}")
self._seen_real_paths = set()
return self._scan_node(root, depth=0, is_root=True)
def _scan_node(self, path, depth, is_root=False):
"""递归扫描单个节点"""
name = path.name if path.parent != path else str(path)
is_dir = path.is_dir()
node = {
"name": name,
"path": path,
"is_dir": is_dir,
"children": [],
"size": 0,
}
if is_dir:
# 检查深度限制
if self.max_depth is not None and depth >= self.max_depth:
return node
# 检测符号链接循环(根节点跳过)
if not is_root:
try:
real_path = str(path.resolve())
if real_path in self._seen_real_paths:
print(f"警告: 检测到符号链接循环,跳过: {path}", file=sys.stderr)
return node
self._seen_real_paths.add(real_path)
except (OSError, ValueError):
print(f"警告: 无法解析路径,跳过: {path}", file=sys.stderr)
return node
# 读取目录内容
try:
entries = sorted(path.iterdir(), key=lambda p: (not p.is_dir(), p.name.lower()))
except PermissionError:
print(f"警告: 权限不足,跳过目录: {path}", file=sys.stderr)
return node
except OSError as e:
print(f"警告: 读取目录失败 ({e}),跳过: {path}", file=sys.stderr)
return node
for entry in entries:
if self.should_ignore(entry.name):
continue
# 对于符号链接,检查目标是否有效
if entry.is_symlink():
try:
entry.resolve() # 验证目标存在
except (OSError, ValueError):
continue
child = self._scan_node(entry, depth + 1)
if child:
node["children"].append(child)
else:
# 文件大小
try:
node["size"] = path.stat().st_size
except (OSError, PermissionError):
node["size"] = 0
return node
# =============================================================================
# 模块 5: TreeFormatter
# =============================================================================
class TreeFormatter:
"""生成树形文本输出 (F016, F017)"""
# 树形字符
BRANCH = "├── "
LAST_BRANCH = "└── "
PIPE = ""
SPACE = " "
@staticmethod
def format_tree(tree_node, dirs_only=False, files_only=False, is_root=True):
"""
格式化树形结构为文本。
Args:
tree_node: 树形字典
dirs_only: 仅显示目录
files_only: 仅显示文件
is_root: 是否为根节点
Returns:
list: 文本行列表
"""
lines = []
if is_root:
# 根节点特殊处理
name = tree_node["name"]
if tree_node["is_dir"]:
lines.append(f"{name}/")
else:
lines.append(name)
children = tree_node.get("children", [])
if dirs_only:
children = [c for c in children if c["is_dir"]]
elif files_only:
children = [c for c in children if not c["is_dir"]]
for i, child in enumerate(children):
is_last = (i == len(children) - 1)
prefix = ""
sub_lines = TreeFormatter._format_children(child, prefix, is_last, dirs_only, files_only)
lines.extend(sub_lines)
else:
# 非根节点由 _format_children 处理
pass
return lines
@staticmethod
def _format_children(node, prefix, is_last, dirs_only, files_only):
"""递归格式化子节点"""
lines = []
# 当前节点的连接符
connector = TreeFormatter.LAST_BRANCH if is_last else TreeFormatter.BRANCH
name = node["name"]
if node["is_dir"]:
lines.append(f"{prefix}{connector}{name}/")
else:
lines.append(f"{prefix}{connector}{name}")
# 计算子节点的前缀
child_prefix = prefix + (TreeFormatter.SPACE if is_last else TreeFormatter.PIPE)
# 获取子节点
children = node.get("children", [])
if dirs_only:
children = [c for c in children if c["is_dir"]]
elif files_only:
children = [c for c in children if not c["is_dir"]]
for i, child in enumerate(children):
child_is_last = (i == len(children) - 1)
sub_lines = TreeFormatter._format_children(child, child_prefix, child_is_last, dirs_only, files_only)
lines.extend(sub_lines)
return lines
@staticmethod
def format_file_list(tree_node, dirs_only=False, files_only=False):
"""
生成文件列表(带完整路径)(F017)
Args:
tree_node: 树形字典
dirs_only: 仅显示目录
files_only: 仅显示文件
Returns:
list: (路径, 大小) 元组列表
"""
items = []
TreeFormatter._collect_files(tree_node, items, dirs_only, files_only)
return items
@staticmethod
def _collect_files(node, items, dirs_only, files_only):
"""递归收集文件和目录"""
path = str(node["path"])
if node["is_dir"]:
if not files_only:
items.append((path, 0))
for child in node.get("children", []):
TreeFormatter._collect_files(child, items, dirs_only, files_only)
else:
if not dirs_only:
items.append((path, node.get("size", 0)))
# =============================================================================
# 模块 6: TerminalOutput
# =============================================================================
class TerminalOutput:
"""终端输出 (F018)"""
@staticmethod
def setup_encoding():
"""设置终端 UTF-8 编码"""
try:
if hasattr(sys.stdout, "reconfigure"):
sys.stdout.reconfigure(encoding="utf-8")
except (AttributeError, ValueError):
pass
@staticmethod
def display(tree_lines, statistics=None):
"""
在终端显示树形结构和统计信息。
Args:
tree_lines: 树形文本行列表
statistics: 统计信息字典(可选)
"""
TerminalOutput.setup_encoding()
print()
for line in tree_lines:
print(line)
print()
if statistics:
print("=" * 50)
print(f" 目录数: {statistics['dir_count']}")
print(f" 文件数: {statistics['file_count']}")
print(f" 总大小: {statistics['total_size_str']}")
print("=" * 50)
# =============================================================================
# 模块 7: MarkdownWriter
# =============================================================================
class MarkdownWriter:
"""Markdown 文件保存 (F019)"""
@staticmethod
def write(output_path, tree_lines, file_list, statistics, root_path):
"""
写入 Markdown 文件。
Args:
output_path: 输出文件路径
tree_lines: 树形文本行列表
file_list: 文件列表
statistics: 统计信息
root_path: 根目录路径
Returns:
bool: 是否成功写入
"""
try:
with open(output_path, "w", encoding="utf-8-sig") as f:
# 标题
f.write(f"# 目录树 - {Path(root_path).name}\n\n")
f.write(f"> 生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
# 目录树
f.write("## 目录结构\n\n")
f.write("```\n")
for line in tree_lines:
f.write(line + "\n")
f.write("```\n\n")
# 文件列表
if file_list:
f.write("## 文件列表\n\n")
f.write("| 序号 | 文件路径 | 大小 |\n")
f.write("|------|----------|------|\n")
for idx, (path, size) in enumerate(file_list, 1):
size_str = MarkdownWriter._format_size(size) if size > 0 else "-"
# 转义 Markdown 特殊字符
safe_path = path.replace("|", "\\|")
f.write(f"| {idx} | `{safe_path}` | {size_str} |\n")
f.write("\n")
# 统计信息
if statistics:
f.write("## 统计信息\n\n")
f.write(f"- **目录数**: {statistics['dir_count']}\n")
f.write(f"- **文件数**: {statistics['file_count']}\n")
f.write(f"- **总大小**: {statistics['total_size_str']}\n")
return True
except (IOError, OSError) as e:
print(f"警告: 无法写入 Markdown 文件 {output_path} ({e}),回退到仅终端输出", file=sys.stderr)
return False
@staticmethod
def _format_size(size_bytes):
"""格式化文件大小"""
if size_bytes < 1024:
return f"{size_bytes} B"
elif size_bytes < 1024 * 1024:
return f"{size_bytes / 1024:.1f} KB"
elif size_bytes < 1024 * 1024 * 1024:
return f"{size_bytes / (1024 * 1024):.1f} MB"
else:
return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
# =============================================================================
# 模块 8: StatisticsCollector
# =============================================================================
class StatisticsCollector:
"""统计信息收集 (F020)"""
@staticmethod
def collect(tree_node, dirs_only=False, files_only=False):
"""
收集统计信息。
Args:
tree_node: 树形字典
dirs_only: 仅统计目录
files_only: 仅统计文件
Returns:
dict: 统计信息字典
"""
stats = {
"dir_count": 0,
"file_count": 0,
"total_size": 0,
"total_size_str": "0 B",
}
StatisticsCollector._count(tree_node, stats, dirs_only, files_only)
# 格式化总大小
stats["total_size_str"] = MarkdownWriter._format_size(stats["total_size"])
return stats
@staticmethod
def _count(node, stats, dirs_only=False, files_only=False):
"""递归计数"""
if node["is_dir"]:
if not files_only:
stats["dir_count"] += 1
for child in node.get("children", []):
StatisticsCollector._count(child, stats, dirs_only, files_only)
else:
if not dirs_only:
stats["file_count"] += 1
stats["total_size"] += node.get("size", 0)
# =============================================================================
# 主程序
# =============================================================================
def main():
"""主入口函数"""
# 解析命令行参数 (F013)
args = ArgParser.parse_args()
# 确定目标路径
target_path = Path(args.path).resolve()
# 检查路径是否存在
if not target_path.exists():
print(f"错误: 路径不存在: {target_path}", file=sys.stderr)
sys.exit(1)
if not target_path.is_dir():
print(f"错误: 不是目录: {target_path}", file=sys.stderr)
sys.exit(1)
# 加载忽略配置 (F014)
ignore_set, glob_ignore_set = IgnoreLoader.load(
ignore_file_path=args.ignore_file,
target_dir=target_path,
)
# 扫描目录 (F015)
scanner = DirectoryScanner(
ignore_set=ignore_set,
glob_ignore_set=glob_ignore_set,
max_depth=args.depth,
)
try:
tree = scanner.scan(target_path)
except (FileNotFoundError, NotADirectoryError) as e:
print(f"错误: {e}", file=sys.stderr)
sys.exit(1)
# 生成树形文本 (F016, F017)
tree_lines = TreeFormatter.format_tree(
tree,
dirs_only=args.dirs_only,
files_only=args.files_only,
)
# 收集文件列表
file_list = TreeFormatter.format_file_list(
tree,
dirs_only=args.dirs_only,
files_only=args.files_only,
)
# 收集统计信息 (F020)
statistics = StatisticsCollector.collect(
tree,
dirs_only=args.dirs_only,
files_only=args.files_only,
)
# 终端输出 (F018)
TerminalOutput.display(tree_lines, statistics)
# Markdown 保存 (F019)
if not args.dirs_only and not args.files_only:
# 默认模式:保存完整树
MarkdownWriter.write(
output_path=args.output,
tree_lines=tree_lines,
file_list=file_list,
statistics=statistics,
root_path=target_path,
)
print(f"Markdown 已保存到: {Path(args.output).resolve()}")
elif args.files_only:
# 文件树模式
MarkdownWriter.write(
output_path=args.output,
tree_lines=tree_lines,
file_list=file_list,
statistics=statistics,
root_path=target_path,
)
print(f"文件树 Markdown 已保存到: {Path(args.output).resolve()}")
else:
# 目录树模式
MarkdownWriter.write(
output_path=args.output,
tree_lines=tree_lines,
file_list=file_list,
statistics=statistics,
root_path=target_path,
)
print(f"目录树 Markdown 已保存到: {Path(args.output).resolve()}")
if __name__ == "__main__":
main()