import type { Element, Node } from 'domhandler'; export interface ParsedTable { headers: string[]; rows: Record[]; } const isElement = (n: Node): n is Element => n.type === 'tag'; export function parseHtmlTable(table: Element): ParsedTable { if (table.name !== 'table') { throw new Error('parseHtmlTable: node is not '); } // 获取 table 下的所有 tag 子节点 const getChildren = (node: Node): Element[] => isElement(node) ? node.children.filter(isElement) : []; // ---- 找 thead / tbody ---- const children = getChildren(table); const thead = children.find((n) => n.name === 'thead'); const tbody = children.find((n) => n.name === 'tbody'); if (!tbody) { return { headers: [], rows: [] }; } const bodyRows = getChildren(tbody).filter((n) => n.name === 'tr'); if (bodyRows.length === 0) { return { headers: [], rows: [] }; } // ---- 1. 表头 ---- let headerCells: Element[] = []; if (thead) { // 如果 Directus 有 thead(一般不会) const headerRow = getChildren(thead).find((n) => n.name === 'tr'); headerCells = headerRow ? getChildren(headerRow).filter((n) => n.name === 'th' || n.name === 'td') : []; } else { // Directus 情况:没有 thead → 用 tbody 第一行作为 header headerCells = getChildren(bodyRows[0]).filter( (n) => n.name === 'th' || n.name === 'td' ); } logger.info(headerCells); const headers = headerCells.map((cell, i) => { const text = cell.children .filter((c) => c.type === 'text') .map((t) => t.data.trim()) .filter(Boolean) .join(''); return text || `列${i + 1}`; }); // ---- 2. 数据行 ---- // 如果没有 thead,则跳过第一行(它是 header) const dataRows = thead ? bodyRows : bodyRows.slice(1); const rows = dataRows.map((row) => { const cells = getChildren(row).filter( (n) => n.name === 'td' || n.name === 'th' ); const record: Record = {}; headers.forEach((header, i) => { const cell = cells[i]; if (!cell) { record[header] = ''; } else { const text = cell.children .filter((c) => c.type === 'text') .map((t) => t.data.trim()) .filter(Boolean) .join(''); record[header] = text; } }); return record; }); logger.info(headers, rows); return { headers, rows }; }