655 lines
20 KiB
Python
655 lines
20 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
tree_gen.py - 目录树生成脚本 (Windows 平台)
|
||
|
||
功能: F013-F020
|
||
- 接收路径输入(命令行参数)
|
||
- 加载忽略配置
|
||
- 递归遍历目录
|
||
- 生成目录树/文件树
|
||
- 终端输出
|
||
- Markdown 保存
|
||
- 统计信息
|
||
|
||
技术选型: Python 3.8+ 标准库,零第三方依赖
|
||
"""
|
||
|
||
import argparse
|
||
import fnmatch
|
||
import os
|
||
import sys
|
||
from pathlib import Path
|
||
from datetime import datetime
|
||
|
||
|
||
# =============================================================================
|
||
# 模块 1: 默认忽略列表 & 配置
|
||
# =============================================================================
|
||
|
||
DEFAULT_IGNORE = {
|
||
".git", ".svn", ".hg",
|
||
"node_modules", "bower_components",
|
||
"__pycache__", ".pytest_cache",
|
||
".idea", ".vscode",
|
||
"dist", "build", "target",
|
||
".DS_Store", "Thumbs.db",
|
||
"venv", ".venv", "env",
|
||
}
|
||
|
||
# 通配符模式(用于 fnmatch 匹配)
|
||
DEFAULT_GLOB_IGNORE = {"*.pyc"}
|
||
|
||
|
||
# =============================================================================
|
||
# 模块 2: ArgParser
|
||
# =============================================================================
|
||
|
||
class ArgParser:
|
||
"""解析命令行参数 (F013)"""
|
||
|
||
@staticmethod
|
||
def parse_args(args=None):
|
||
parser = argparse.ArgumentParser(
|
||
prog="tree_gen.py",
|
||
description="目录树生成脚本 - 生成目录结构和文件列表",
|
||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||
epilog="""
|
||
示例:
|
||
python tree_gen.py # 当前目录
|
||
python tree_gen.py /path/to/project # 指定目录
|
||
python tree_gen.py -d 2 src/ # 限制深度为 2
|
||
python tree_gen.py -f -o files.md . # 仅文件树,输出到 files.md
|
||
python tree_gen.py -D -i .gitignore . # 仅目录树,使用 .gitignore
|
||
""",
|
||
)
|
||
|
||
parser.add_argument(
|
||
"path",
|
||
nargs="?",
|
||
default=".",
|
||
help="目标目录路径(默认: 当前目录)",
|
||
)
|
||
parser.add_argument(
|
||
"-o", "--output",
|
||
default="tree_output.md",
|
||
help="Markdown 输出文件路径(默认: tree_output.md)",
|
||
)
|
||
parser.add_argument(
|
||
"-d", "--depth",
|
||
type=int,
|
||
default=None,
|
||
help="最大递归深度(默认: 无限制)",
|
||
)
|
||
parser.add_argument(
|
||
"-f", "--files-only",
|
||
action="store_true",
|
||
help="仅显示文件树",
|
||
)
|
||
parser.add_argument(
|
||
"-D", "--dirs-only",
|
||
action="store_true",
|
||
help="仅显示目录树",
|
||
)
|
||
parser.add_argument(
|
||
"-i", "--ignore",
|
||
dest="ignore_file",
|
||
default=None,
|
||
help="忽略配置文件路径(默认: 目标目录下的 .treeignore)",
|
||
)
|
||
|
||
return parser.parse_args(args)
|
||
|
||
|
||
# =============================================================================
|
||
# 模块 3: IgnoreLoader
|
||
# =============================================================================
|
||
|
||
class IgnoreLoader:
|
||
"""加载忽略配置 (F014)"""
|
||
|
||
@staticmethod
|
||
def load(ignore_file_path=None, target_dir=None):
|
||
"""
|
||
加载忽略配置。
|
||
|
||
优先级:
|
||
1. 命令行指定的 ignore 文件
|
||
2. 目标目录下的 .treeignore
|
||
3. 目标目录下的 .gitignore(作为备选)
|
||
4. 内置默认忽略列表
|
||
|
||
Returns:
|
||
tuple: (ignore_set, glob_ignore_set)
|
||
"""
|
||
ignore_set = set(DEFAULT_IGNORE)
|
||
glob_ignore_set = set(DEFAULT_GLOB_IGNORE)
|
||
|
||
# 确定配置文件路径
|
||
config_path = None
|
||
if ignore_file_path:
|
||
config_path = Path(ignore_file_path)
|
||
elif target_dir:
|
||
treeignore = Path(target_dir) / ".treeignore"
|
||
if treeignore.is_file():
|
||
config_path = treeignore
|
||
else:
|
||
gitignore = Path(target_dir) / ".gitignore"
|
||
if gitignore.is_file():
|
||
config_path = gitignore
|
||
|
||
# 解析配置文件
|
||
if config_path and config_path.is_file():
|
||
try:
|
||
with open(config_path, "r", encoding="utf-8", errors="replace") as f:
|
||
for line in f:
|
||
line = line.strip()
|
||
# 跳过空行和注释
|
||
if not line or line.startswith("#"):
|
||
continue
|
||
# 移除尾部的斜杠(目录标记)
|
||
clean = line.rstrip("/")
|
||
# 判断是否为通配符模式
|
||
if any(c in clean for c in "*?["):
|
||
glob_ignore_set.add(clean)
|
||
else:
|
||
ignore_set.add(clean)
|
||
except (IOError, OSError):
|
||
# 配置文件读取失败,使用默认配置
|
||
print(f"警告: 无法读取忽略配置文件 {config_path},使用默认配置", file=sys.stderr)
|
||
|
||
return ignore_set, glob_ignore_set
|
||
|
||
|
||
# =============================================================================
|
||
# 模块 4: DirectoryScanner
|
||
# =============================================================================
|
||
|
||
class DirectoryScanner:
|
||
"""递归遍历目录,生成树形结构 (F015)"""
|
||
|
||
def __init__(self, ignore_set, glob_ignore_set, max_depth=None):
|
||
self.ignore_set = ignore_set
|
||
self.glob_ignore_set = glob_ignore_set
|
||
self.max_depth = max_depth
|
||
self._seen_real_paths = set() # 用于检测符号链接循环
|
||
|
||
def should_ignore(self, name):
|
||
"""判断是否应该忽略该名称"""
|
||
# 精确匹配
|
||
if name in self.ignore_set:
|
||
return True
|
||
# 通配符匹配
|
||
for pattern in self.glob_ignore_set:
|
||
if fnmatch.fnmatch(name, pattern):
|
||
return True
|
||
return False
|
||
|
||
def scan(self, root_path):
|
||
"""
|
||
扫描目录,返回树形字典。
|
||
|
||
Args:
|
||
root_path: 根目录路径
|
||
|
||
Returns:
|
||
dict: 树形结构字典
|
||
|
||
Raises:
|
||
FileNotFoundError: 路径不存在
|
||
"""
|
||
root = Path(root_path).resolve()
|
||
|
||
if not root.exists():
|
||
raise FileNotFoundError(f"路径不存在: {root}")
|
||
|
||
if not root.is_dir():
|
||
raise NotADirectoryError(f"不是目录: {root}")
|
||
|
||
self._seen_real_paths = set()
|
||
|
||
return self._scan_node(root, depth=0, is_root=True)
|
||
|
||
def _scan_node(self, path, depth, is_root=False):
|
||
"""递归扫描单个节点"""
|
||
name = path.name if path.parent != path else str(path)
|
||
is_dir = path.is_dir()
|
||
|
||
node = {
|
||
"name": name,
|
||
"path": path,
|
||
"is_dir": is_dir,
|
||
"children": [],
|
||
"size": 0,
|
||
}
|
||
|
||
if is_dir:
|
||
# 检查深度限制
|
||
if self.max_depth is not None and depth >= self.max_depth:
|
||
return node
|
||
|
||
# 检测符号链接循环(根节点跳过)
|
||
if not is_root:
|
||
try:
|
||
real_path = str(path.resolve())
|
||
if real_path in self._seen_real_paths:
|
||
print(f"警告: 检测到符号链接循环,跳过: {path}", file=sys.stderr)
|
||
return node
|
||
self._seen_real_paths.add(real_path)
|
||
except (OSError, ValueError):
|
||
print(f"警告: 无法解析路径,跳过: {path}", file=sys.stderr)
|
||
return node
|
||
|
||
# 读取目录内容
|
||
try:
|
||
entries = sorted(path.iterdir(), key=lambda p: (not p.is_dir(), p.name.lower()))
|
||
except PermissionError:
|
||
print(f"警告: 权限不足,跳过目录: {path}", file=sys.stderr)
|
||
return node
|
||
except OSError as e:
|
||
print(f"警告: 读取目录失败 ({e}),跳过: {path}", file=sys.stderr)
|
||
return node
|
||
|
||
for entry in entries:
|
||
if self.should_ignore(entry.name):
|
||
continue
|
||
|
||
# 对于符号链接,检查目标是否有效
|
||
if entry.is_symlink():
|
||
try:
|
||
entry.resolve() # 验证目标存在
|
||
except (OSError, ValueError):
|
||
continue
|
||
|
||
child = self._scan_node(entry, depth + 1)
|
||
if child:
|
||
node["children"].append(child)
|
||
|
||
else:
|
||
# 文件大小
|
||
try:
|
||
node["size"] = path.stat().st_size
|
||
except (OSError, PermissionError):
|
||
node["size"] = 0
|
||
|
||
return node
|
||
|
||
|
||
# =============================================================================
|
||
# 模块 5: TreeFormatter
|
||
# =============================================================================
|
||
|
||
class TreeFormatter:
|
||
"""生成树形文本输出 (F016, F017)"""
|
||
|
||
# 树形字符
|
||
BRANCH = "├── "
|
||
LAST_BRANCH = "└── "
|
||
PIPE = "│ "
|
||
SPACE = " "
|
||
|
||
@staticmethod
|
||
def format_tree(tree_node, dirs_only=False, files_only=False, is_root=True):
|
||
"""
|
||
格式化树形结构为文本。
|
||
|
||
Args:
|
||
tree_node: 树形字典
|
||
dirs_only: 仅显示目录
|
||
files_only: 仅显示文件
|
||
is_root: 是否为根节点
|
||
|
||
Returns:
|
||
list: 文本行列表
|
||
"""
|
||
lines = []
|
||
|
||
if is_root:
|
||
# 根节点特殊处理
|
||
name = tree_node["name"]
|
||
if tree_node["is_dir"]:
|
||
lines.append(f"{name}/")
|
||
else:
|
||
lines.append(name)
|
||
|
||
children = tree_node.get("children", [])
|
||
if dirs_only:
|
||
children = [c for c in children if c["is_dir"]]
|
||
elif files_only:
|
||
children = [c for c in children if not c["is_dir"]]
|
||
|
||
for i, child in enumerate(children):
|
||
is_last = (i == len(children) - 1)
|
||
prefix = ""
|
||
sub_lines = TreeFormatter._format_children(child, prefix, is_last, dirs_only, files_only)
|
||
lines.extend(sub_lines)
|
||
else:
|
||
# 非根节点由 _format_children 处理
|
||
pass
|
||
|
||
return lines
|
||
|
||
@staticmethod
|
||
def _format_children(node, prefix, is_last, dirs_only, files_only):
|
||
"""递归格式化子节点"""
|
||
lines = []
|
||
|
||
# 当前节点的连接符
|
||
connector = TreeFormatter.LAST_BRANCH if is_last else TreeFormatter.BRANCH
|
||
name = node["name"]
|
||
|
||
if node["is_dir"]:
|
||
lines.append(f"{prefix}{connector}{name}/")
|
||
else:
|
||
lines.append(f"{prefix}{connector}{name}")
|
||
|
||
# 计算子节点的前缀
|
||
child_prefix = prefix + (TreeFormatter.SPACE if is_last else TreeFormatter.PIPE)
|
||
|
||
# 获取子节点
|
||
children = node.get("children", [])
|
||
if dirs_only:
|
||
children = [c for c in children if c["is_dir"]]
|
||
elif files_only:
|
||
children = [c for c in children if not c["is_dir"]]
|
||
|
||
for i, child in enumerate(children):
|
||
child_is_last = (i == len(children) - 1)
|
||
sub_lines = TreeFormatter._format_children(child, child_prefix, child_is_last, dirs_only, files_only)
|
||
lines.extend(sub_lines)
|
||
|
||
return lines
|
||
|
||
@staticmethod
|
||
def format_file_list(tree_node, dirs_only=False, files_only=False):
|
||
"""
|
||
生成文件列表(带完整路径)(F017)
|
||
|
||
Args:
|
||
tree_node: 树形字典
|
||
dirs_only: 仅显示目录
|
||
files_only: 仅显示文件
|
||
|
||
Returns:
|
||
list: (路径, 大小) 元组列表
|
||
"""
|
||
items = []
|
||
TreeFormatter._collect_files(tree_node, items, dirs_only, files_only)
|
||
return items
|
||
|
||
@staticmethod
|
||
def _collect_files(node, items, dirs_only, files_only):
|
||
"""递归收集文件和目录"""
|
||
path = str(node["path"])
|
||
|
||
if node["is_dir"]:
|
||
if not files_only:
|
||
items.append((path, 0))
|
||
for child in node.get("children", []):
|
||
TreeFormatter._collect_files(child, items, dirs_only, files_only)
|
||
else:
|
||
if not dirs_only:
|
||
items.append((path, node.get("size", 0)))
|
||
|
||
|
||
# =============================================================================
|
||
# 模块 6: TerminalOutput
|
||
# =============================================================================
|
||
|
||
class TerminalOutput:
|
||
"""终端输出 (F018)"""
|
||
|
||
@staticmethod
|
||
def setup_encoding():
|
||
"""设置终端 UTF-8 编码"""
|
||
try:
|
||
if hasattr(sys.stdout, "reconfigure"):
|
||
sys.stdout.reconfigure(encoding="utf-8")
|
||
except (AttributeError, ValueError):
|
||
pass
|
||
|
||
@staticmethod
|
||
def display(tree_lines, statistics=None):
|
||
"""
|
||
在终端显示树形结构和统计信息。
|
||
|
||
Args:
|
||
tree_lines: 树形文本行列表
|
||
statistics: 统计信息字典(可选)
|
||
"""
|
||
TerminalOutput.setup_encoding()
|
||
|
||
print()
|
||
for line in tree_lines:
|
||
print(line)
|
||
print()
|
||
|
||
if statistics:
|
||
print("=" * 50)
|
||
print(f" 目录数: {statistics['dir_count']}")
|
||
print(f" 文件数: {statistics['file_count']}")
|
||
print(f" 总大小: {statistics['total_size_str']}")
|
||
print("=" * 50)
|
||
|
||
|
||
# =============================================================================
|
||
# 模块 7: MarkdownWriter
|
||
# =============================================================================
|
||
|
||
class MarkdownWriter:
|
||
"""Markdown 文件保存 (F019)"""
|
||
|
||
@staticmethod
|
||
def write(output_path, tree_lines, file_list, statistics, root_path):
|
||
"""
|
||
写入 Markdown 文件。
|
||
|
||
Args:
|
||
output_path: 输出文件路径
|
||
tree_lines: 树形文本行列表
|
||
file_list: 文件列表
|
||
statistics: 统计信息
|
||
root_path: 根目录路径
|
||
|
||
Returns:
|
||
bool: 是否成功写入
|
||
"""
|
||
try:
|
||
with open(output_path, "w", encoding="utf-8-sig") as f:
|
||
# 标题
|
||
f.write(f"# 目录树 - {Path(root_path).name}\n\n")
|
||
f.write(f"> 生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
||
|
||
# 目录树
|
||
f.write("## 目录结构\n\n")
|
||
f.write("```\n")
|
||
for line in tree_lines:
|
||
f.write(line + "\n")
|
||
f.write("```\n\n")
|
||
|
||
# 文件列表
|
||
if file_list:
|
||
f.write("## 文件列表\n\n")
|
||
f.write("| 序号 | 文件路径 | 大小 |\n")
|
||
f.write("|------|----------|------|\n")
|
||
for idx, (path, size) in enumerate(file_list, 1):
|
||
size_str = MarkdownWriter._format_size(size) if size > 0 else "-"
|
||
# 转义 Markdown 特殊字符
|
||
safe_path = path.replace("|", "\\|")
|
||
f.write(f"| {idx} | `{safe_path}` | {size_str} |\n")
|
||
f.write("\n")
|
||
|
||
# 统计信息
|
||
if statistics:
|
||
f.write("## 统计信息\n\n")
|
||
f.write(f"- **目录数**: {statistics['dir_count']}\n")
|
||
f.write(f"- **文件数**: {statistics['file_count']}\n")
|
||
f.write(f"- **总大小**: {statistics['total_size_str']}\n")
|
||
|
||
return True
|
||
|
||
except (IOError, OSError) as e:
|
||
print(f"警告: 无法写入 Markdown 文件 {output_path} ({e}),回退到仅终端输出", file=sys.stderr)
|
||
return False
|
||
|
||
@staticmethod
|
||
def _format_size(size_bytes):
|
||
"""格式化文件大小"""
|
||
if size_bytes < 1024:
|
||
return f"{size_bytes} B"
|
||
elif size_bytes < 1024 * 1024:
|
||
return f"{size_bytes / 1024:.1f} KB"
|
||
elif size_bytes < 1024 * 1024 * 1024:
|
||
return f"{size_bytes / (1024 * 1024):.1f} MB"
|
||
else:
|
||
return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
|
||
|
||
|
||
# =============================================================================
|
||
# 模块 8: StatisticsCollector
|
||
# =============================================================================
|
||
|
||
class StatisticsCollector:
|
||
"""统计信息收集 (F020)"""
|
||
|
||
@staticmethod
|
||
def collect(tree_node, dirs_only=False, files_only=False):
|
||
"""
|
||
收集统计信息。
|
||
|
||
Args:
|
||
tree_node: 树形字典
|
||
dirs_only: 仅统计目录
|
||
files_only: 仅统计文件
|
||
|
||
Returns:
|
||
dict: 统计信息字典
|
||
"""
|
||
stats = {
|
||
"dir_count": 0,
|
||
"file_count": 0,
|
||
"total_size": 0,
|
||
"total_size_str": "0 B",
|
||
}
|
||
|
||
StatisticsCollector._count(tree_node, stats, dirs_only, files_only)
|
||
|
||
# 格式化总大小
|
||
stats["total_size_str"] = MarkdownWriter._format_size(stats["total_size"])
|
||
|
||
return stats
|
||
|
||
@staticmethod
|
||
def _count(node, stats, dirs_only=False, files_only=False):
|
||
"""递归计数"""
|
||
if node["is_dir"]:
|
||
if not files_only:
|
||
stats["dir_count"] += 1
|
||
for child in node.get("children", []):
|
||
StatisticsCollector._count(child, stats, dirs_only, files_only)
|
||
else:
|
||
if not dirs_only:
|
||
stats["file_count"] += 1
|
||
stats["total_size"] += node.get("size", 0)
|
||
|
||
|
||
# =============================================================================
|
||
# 主程序
|
||
# =============================================================================
|
||
|
||
def main():
|
||
"""主入口函数"""
|
||
# 解析命令行参数 (F013)
|
||
args = ArgParser.parse_args()
|
||
|
||
# 确定目标路径
|
||
target_path = Path(args.path).resolve()
|
||
|
||
# 检查路径是否存在
|
||
if not target_path.exists():
|
||
print(f"错误: 路径不存在: {target_path}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
if not target_path.is_dir():
|
||
print(f"错误: 不是目录: {target_path}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
# 加载忽略配置 (F014)
|
||
ignore_set, glob_ignore_set = IgnoreLoader.load(
|
||
ignore_file_path=args.ignore_file,
|
||
target_dir=target_path,
|
||
)
|
||
|
||
# 扫描目录 (F015)
|
||
scanner = DirectoryScanner(
|
||
ignore_set=ignore_set,
|
||
glob_ignore_set=glob_ignore_set,
|
||
max_depth=args.depth,
|
||
)
|
||
|
||
try:
|
||
tree = scanner.scan(target_path)
|
||
except (FileNotFoundError, NotADirectoryError) as e:
|
||
print(f"错误: {e}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
# 生成树形文本 (F016, F017)
|
||
tree_lines = TreeFormatter.format_tree(
|
||
tree,
|
||
dirs_only=args.dirs_only,
|
||
files_only=args.files_only,
|
||
)
|
||
|
||
# 收集文件列表
|
||
file_list = TreeFormatter.format_file_list(
|
||
tree,
|
||
dirs_only=args.dirs_only,
|
||
files_only=args.files_only,
|
||
)
|
||
|
||
# 收集统计信息 (F020)
|
||
statistics = StatisticsCollector.collect(
|
||
tree,
|
||
dirs_only=args.dirs_only,
|
||
files_only=args.files_only,
|
||
)
|
||
|
||
# 终端输出 (F018)
|
||
TerminalOutput.display(tree_lines, statistics)
|
||
|
||
# Markdown 保存 (F019)
|
||
if not args.dirs_only and not args.files_only:
|
||
# 默认模式:保存完整树
|
||
MarkdownWriter.write(
|
||
output_path=args.output,
|
||
tree_lines=tree_lines,
|
||
file_list=file_list,
|
||
statistics=statistics,
|
||
root_path=target_path,
|
||
)
|
||
print(f"Markdown 已保存到: {Path(args.output).resolve()}")
|
||
elif args.files_only:
|
||
# 文件树模式
|
||
MarkdownWriter.write(
|
||
output_path=args.output,
|
||
tree_lines=tree_lines,
|
||
file_list=file_list,
|
||
statistics=statistics,
|
||
root_path=target_path,
|
||
)
|
||
print(f"文件树 Markdown 已保存到: {Path(args.output).resolve()}")
|
||
else:
|
||
# 目录树模式
|
||
MarkdownWriter.write(
|
||
output_path=args.output,
|
||
tree_lines=tree_lines,
|
||
file_list=file_list,
|
||
statistics=statistics,
|
||
root_path=target_path,
|
||
)
|
||
print(f"目录树 Markdown 已保存到: {Path(args.output).resolve()}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|