chore(turndown-plugin-gfm): align with upstream

219d5bcae3
This commit is contained in:
Elian Doran
2025-03-19 18:22:20 +02:00
parent d3423127d8
commit 2021c8f751
13 changed files with 246 additions and 112 deletions

View File

@@ -2,7 +2,7 @@ export default function strikethrough (turndownService) {
turndownService.addRule('strikethrough', {
filter: ['del', 's', 'strike'],
replacement: function (content) {
return '~' + content + '~'
return '~~' + content + '~~'
}
})
}

View File

@@ -1,6 +1,48 @@
var indexOf = Array.prototype.indexOf
var every = Array.prototype.every
var rules = {}
var alignMap = { left: ':---', right: '---:', center: ':---:' };
let isCodeBlock_ = null;
let options_ = null;
// We need to cache the result of tableShouldBeSkipped() as it is expensive.
// Caching it means we went from about 9000 ms for rendering down to 90 ms.
// Fixes https://github.com/laurent22/joplin/issues/6736
const tableShouldBeSkippedCache_ = new WeakMap();
function getAlignment(node) {
return node ? (node.getAttribute('align') || node.style.textAlign || '').toLowerCase() : '';
}
function getBorder(alignment) {
return alignment ? alignMap[alignment] : '---';
}
function getColumnAlignment(table, columnIndex) {
var votes = {
left: 0,
right: 0,
center: 0,
'': 0,
};
var align = '';
for (var i = 0; i < table.rows.length; ++i) {
var row = table.rows[i];
if (columnIndex < row.childNodes.length) {
var cellAlignment = getAlignment(row.childNodes[columnIndex]);
++votes[cellAlignment];
if (votes[cellAlignment] > votes[align]) {
align = cellAlignment;
}
}
}
return align;
}
rules.tableCell = {
filter: ['th', 'td'],
@@ -17,22 +59,13 @@ rules.tableRow = {
if (tableShouldBeSkipped(parentTable)) return content;
var borderCells = ''
var alignMap = { left: ':--', right: '--:', center: ':-:' }
if (isHeadingRow(node)) {
const colCount = tableColCount(parentTable);
for (var i = 0; i < colCount; i++) {
const childNode = colCount >= node.childNodes.length ? null : node.childNodes[i];
var border = '---'
var align = childNode ? (childNode.getAttribute('align') || '').toLowerCase() : '';
if (align) border = alignMap[align] || border
if (childNode) {
borderCells += cell(border, node.childNodes[i])
} else {
borderCells += cell(border, null, i);
}
const childNode = i < node.childNodes.length ? node.childNodes[i] : null;
var border = getBorder(getColumnAlignment(parentTable, i));
borderCells += cell(border, childNode, i);
}
}
return '\n' + content + (borderCells ? '\n' + borderCells : '')
@@ -40,33 +73,70 @@ rules.tableRow = {
}
rules.table = {
// Only convert tables with a heading row.
// Tables with no heading row are kept using `keep` (see below).
filter: function (node) {
return node.nodeName === 'TABLE'
filter: function (node, options) {
return node.nodeName === 'TABLE';
},
replacement: function (content, node) {
if (tableShouldBeSkipped(node)) return content;
// Only convert tables that can result in valid Markdown
// Other tables are kept as HTML using `keep` (see below).
if (tableShouldBeHtml(node, options_)) {
let html = node.outerHTML;
let divParent = nodeParentDiv(node)
// Make table in HTML format horizontally scrollable by give table a div parent, so the width of the table is limited to the screen width.
// see https://github.com/laurent22/joplin/pull/10161
// test cases:
// packages/app-cli/tests/html_to_md/preserve_nested_tables.html
// packages/app-cli/tests/html_to_md/table_with_blockquote.html
// packages/app-cli/tests/html_to_md/table_with_code_1.html
// packages/app-cli/tests/html_to_md/table_with_code_2.html
// packages/app-cli/tests/html_to_md/table_with_code_3.html
// packages/app-cli/tests/html_to_md/table_with_heading.html
// packages/app-cli/tests/html_to_md/table_with_hr.html
// packages/app-cli/tests/html_to_md/table_with_list.html
if (divParent === null || !divParent.classList.contains('joplin-table-wrapper')){
return `\n\n<div class="joplin-table-wrapper">${html}</div>\n\n`;
} else {
return html
}
} else {
if (tableShouldBeSkipped(node)) return content;
// Ensure there are no blank lines
content = content.replace(/\n+/g, '\n')
// Ensure there are no blank lines
content = content.replace(/\n+/g, '\n')
// If table has no heading, add an empty one so as to get a valid Markdown table
var secondLine = content.trim().split('\n');
if (secondLine.length >= 2) secondLine = secondLine[1]
var secondLineIsDivider = secondLine.indexOf('| ---') === 0
var columnCount = tableColCount(node);
var emptyHeader = ''
if (columnCount && !secondLineIsDivider) {
emptyHeader = '|' + ' |'.repeat(columnCount) + '\n' + '|' + ' --- |'.repeat(columnCount)
// If table has no heading, add an empty one so as to get a valid Markdown table
var secondLine = content.trim().split('\n');
if (secondLine.length >= 2) secondLine = secondLine[1]
var secondLineIsDivider = /\| :?---/.test(secondLine);
var columnCount = tableColCount(node);
var emptyHeader = ''
if (columnCount && !secondLineIsDivider) {
emptyHeader = '|' + ' |'.repeat(columnCount) + '\n' + '|'
for (var columnIndex = 0; columnIndex < columnCount; ++columnIndex) {
emptyHeader += ' ' + getBorder(getColumnAlignment(node, columnIndex)) + ' |';
}
}
const captionContent = node.caption ? node.caption.textContent || '' : '';
const caption = captionContent ? `${captionContent}\n\n` : '';
const tableContent = `${emptyHeader}${content}`.trimStart();
return `\n\n${caption}${tableContent}\n\n`;
}
return '\n\n' + emptyHeader + content + '\n\n'
}
}
rules.tableCaption = {
filter: ['caption'],
replacement: () => '',
};
rules.tableColgroup = {
filter: ['colgroup', 'col'],
replacement: () => '',
};
rules.tableSection = {
filter: ['thead', 'tbody', 'tfoot'],
replacement: function (content) {
@@ -126,9 +196,57 @@ function nodeContainsTable(node) {
return false;
}
const nodeContains = (node, types) => {
if (!node.childNodes) return false;
for (let i = 0; i < node.childNodes.length; i++) {
const child = node.childNodes[i];
if (types === 'code' && isCodeBlock_ && isCodeBlock_(child)) return true;
if (types.includes(child.nodeName)) return true;
if (nodeContains(child, types)) return true;
}
return false;
}
const tableShouldBeHtml = (tableNode, options) => {
const possibleTags = [
'UL',
'OL',
'H1',
'H2',
'H3',
'H4',
'H5',
'H6',
'HR',
'BLOCKQUOTE',
];
// In general we should leave as HTML tables that include other tables. The
// exception is with the Web Clipper when we import a web page with a layout
// that's made of HTML tables. In that case we have this logic of removing the
// outer table and keeping only the inner ones. For the Rich Text editor
// however we always want to keep nested tables.
if (options.preserveNestedTables) possibleTags.push('TABLE');
return nodeContains(tableNode, 'code') ||
nodeContains(tableNode, possibleTags);
}
// Various conditions under which a table should be skipped - i.e. each cell
// will be rendered one after the other as if they were paragraphs.
function tableShouldBeSkipped(tableNode) {
const cached = tableShouldBeSkippedCache_.get(tableNode);
if (cached !== undefined) return cached;
const result = tableShouldBeSkipped_(tableNode);
tableShouldBeSkippedCache_.set(tableNode, result);
return result;
}
function tableShouldBeSkipped_(tableNode) {
if (!tableNode) return true;
if (!tableNode.rows) return true;
if (tableNode.rows.length === 1 && tableNode.rows[0].childNodes.length <= 1) return true; // Table with only one cell
@@ -136,6 +254,15 @@ function tableShouldBeSkipped(tableNode) {
return false;
}
function nodeParentDiv(node) {
let parent = node.parentNode;
while (parent.nodeName !== 'DIV') {
parent = parent.parentNode;
if (!parent) return null;
}
return parent;
}
function nodeParentTable(node) {
let parent = node.parentNode;
while (parent.nodeName !== 'TABLE') {
@@ -164,8 +291,12 @@ function tableColCount(node) {
}
export default function tables (turndownService) {
isCodeBlock_ = turndownService.isCodeBlock;
options_ = turndownService.options;
turndownService.keep(function (node) {
return node.nodeName === 'TABLE'
})
if (node.nodeName === 'TABLE' && tableShouldBeHtml(node, turndownService.options)) return true;
return false;
});
for (var key in rules) turndownService.addRule(key, rules[key])
}