- main.py: 增加show_banner()启动说明、各阶段[INFO]日志、结果摘要、任意键退出 - file_selector.py: 重写为路径输入→验证→空输入弹窗回退→不存在循环重试 - run.bat: 新建启动脚本(chcp 65001, mode con cols=80 lines=20, color 0B, title固定署名, pause) - Code/docs/modification-assessment.md: 修改需求评估文档
98 lines
3.5 KiB
Python
98 lines
3.5 KiB
Python
"""XLSX reader — pure Python, zero dependencies.
|
|
|
|
Uses ``zipfile`` + ``xml.etree.ElementTree`` to parse an .xlsx file
|
|
and return a cell map matching the xls_reader interface.
|
|
"""
|
|
|
|
import zipfile
|
|
import xml.etree.ElementTree as ET
|
|
|
|
from models import FileFormatError
|
|
from utils import cell_ref_to_rc
|
|
|
|
# OOXML namespace — the XML uses a default namespace (no prefix),
|
|
# so we build the tag names with the full URI.
|
|
_S = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
|
|
|
|
def _tag(local: str) -> str:
|
|
"""Build a namespaced tag like {ns}row."""
|
|
return f'{{{_S}}}{local}'
|
|
|
|
|
|
def read_excel_cells(filepath: str) -> dict[tuple[int, int], str]:
|
|
"""Read an .xlsx file and return {(row, col): str}.
|
|
|
|
Rows and columns are 0-based. A1 → (0, 0).
|
|
"""
|
|
return XLSXReader(filepath).read_all_cells()
|
|
|
|
|
|
class XLSXReader:
|
|
"""Read an .xlsx file and return a cell map."""
|
|
|
|
def __init__(self, filepath: str):
|
|
self._filepath = filepath
|
|
self._shared_strings: list[str] = []
|
|
self._cells: dict[tuple[int, int], str] = {}
|
|
|
|
def read_all_cells(self) -> dict[tuple[int, int], str]:
|
|
"""Return {(row, col): str} for every non-empty cell."""
|
|
with zipfile.ZipFile(self._filepath, 'r') as zf:
|
|
self._parse_shared_strings(zf)
|
|
self._parse_sheet(zf, 'xl/worksheets/sheet1.xml')
|
|
return dict(self._cells)
|
|
|
|
def _parse_shared_strings(self, zf: zipfile.ZipFile):
|
|
"""Parse xl/sharedStrings.xml."""
|
|
try:
|
|
data = zf.read('xl/sharedStrings.xml')
|
|
except KeyError:
|
|
return # No shared strings table
|
|
|
|
root = ET.fromstring(data)
|
|
self._shared_strings = []
|
|
for si in root.findall(_tag('si')):
|
|
text_parts = []
|
|
for t in si.findall(f'.//{_tag("t")}'):
|
|
if t.text:
|
|
text_parts.append(t.text)
|
|
self._shared_strings.append(''.join(text_parts))
|
|
|
|
def _parse_sheet(self, zf: zipfile.ZipFile, sheet_path: str):
|
|
"""Parse a worksheet XML and populate self._cells."""
|
|
try:
|
|
data = zf.read(sheet_path)
|
|
except KeyError:
|
|
raise FileFormatError(f"Worksheet not found: {sheet_path}")
|
|
|
|
root = ET.fromstring(data)
|
|
sheet_data = root.find(_tag('sheetData'))
|
|
if sheet_data is None:
|
|
return
|
|
for row_elem in sheet_data.findall(_tag('row')):
|
|
row_num = int(row_elem.get('r', '0')) - 1 # 1-based → 0-based
|
|
for cell_elem in row_elem.findall(_tag('c')):
|
|
ref = cell_elem.get('r', '')
|
|
if not ref:
|
|
continue
|
|
row, col = cell_ref_to_rc(ref)
|
|
cell_type = cell_elem.get('t', '')
|
|
value_elem = cell_elem.find(_tag('v'))
|
|
value = value_elem.text if value_elem is not None else ''
|
|
|
|
if cell_type == 's':
|
|
# Shared string reference
|
|
try:
|
|
idx = int(value)
|
|
value = self._shared_strings[idx] if idx < len(self._shared_strings) else value
|
|
except (ValueError, IndexError):
|
|
pass
|
|
elif cell_type == 'b':
|
|
value = 'TRUE' if value == '1' else 'FALSE'
|
|
elif cell_type == 'n':
|
|
# Numeric — keep as-is (will be formatted later)
|
|
pass
|
|
# else: inline string or default text
|
|
|
|
self._cells[(row, col)] = value
|