Skip to content

Fix: target correct link when multiple matches are present #50

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 49 additions & 38 deletions src/paste-markdown-html.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ export function uninstall(el: HTMLElement): void {
el.removeEventListener('paste', onPaste)
}

type MarkdownTransformer = (element: HTMLElement | HTMLAnchorElement, args: string[]) => string

function onPaste(event: ClipboardEvent) {
const transfer = event.clipboardData
// if there is no clipboard data, or
Expand All @@ -20,65 +18,78 @@ function onPaste(event: ClipboardEvent) {
if (!(field instanceof HTMLTextAreaElement)) return

// Get the plaintext and html version of clipboard contents
let text = transfer.getData('text/plain')
let plaintext = transfer.getData('text/plain')
const textHTML = transfer.getData('text/html')
// Replace Unicode equivalent of "&nbsp" with a space
const textHTMLClean = textHTML.replace(/\u00A0/g, ' ')
const textHTMLClean = textHTML.replace(/\u00A0/g, ' ').replace(/\uC2A0/g, ' ')
if (!textHTML) return

text = text.trim()
if (!text) return
plaintext = plaintext.trim()
if (!plaintext) return

// Generate DOM tree from HTML string
const parser = new DOMParser()
const doc = parser.parseFromString(textHTMLClean, 'text/html')
const walker = doc.createTreeWalker(doc.body, NodeFilter.SHOW_ELEMENT)

const a = doc.getElementsByTagName('a')
const markdown = transform(a, text, linkify as MarkdownTransformer)
const markdown = convertToMarkdown(plaintext, walker)

// If no changes made by transforming
if (markdown === text) return
if (markdown === plaintext) return

event.stopPropagation()
event.preventDefault()

insertText(field, markdown)
}

// Build a markdown string from a DOM tree and plaintext
function transform(
elements: HTMLCollectionOf<HTMLElement>,
text: string,
transformer: MarkdownTransformer,
...args: string[]
): string {
const markdownParts = []
for (const element of elements) {
const textContent = element.textContent || ''
const {part, index} = trimAfter(text, textContent)
if (index >= 0) {
markdownParts.push(part.replace(textContent, transformer(element, args)))
text = text.slice(index)
function convertToMarkdown(plaintext: string, walker: TreeWalker): string {
let currentNode = walker.firstChild()
let markdown = plaintext
let markdownIgnoreBeforeIndex = 0
let index = 0
const NODE_LIMIT = 10000

// Walk through the DOM tree
while (currentNode && index < NODE_LIMIT) {
index++
const text = isLink(currentNode) ? currentNode.textContent || '' : (currentNode.firstChild as Text)?.wholeText || ''

// No need to transform whitespace
if (isEmptyString(text)) {
currentNode = walker.nextNode()
continue
}

// Find the index where "text" is found in "markdown" _after_ "markdownIgnoreBeforeIndex"
const markdownFoundIndex = markdown.indexOf(text, markdownIgnoreBeforeIndex)

if (markdownFoundIndex >= 0) {
if (isLink(currentNode)) {
const markdownLink = linkify(currentNode)
// Transform 'example link plus more text' into 'example [link](example link) plus more text'
// Method: 'example [link](example link) plus more text' = 'example ' + '[link](example link)' + ' plus more text'
markdown =
markdown.slice(0, markdownFoundIndex) + markdownLink + markdown.slice(markdownFoundIndex + text.length)
markdownIgnoreBeforeIndex = markdownFoundIndex + markdownLink.length
} else {
markdownIgnoreBeforeIndex = markdownFoundIndex + text.length
}
}

currentNode = walker.nextNode()
}
markdownParts.push(text)
return markdownParts.join('')
}

// Trim text at index of last character of the first occurrence of "search" and
// return a new string with the substring until the index
// Example: trimAfter('Hello world', 'world') => {part: 'Hello world', index: 11}
// Example: trimAfter('Hello world', 'bananas') => {part: '', index: -1}
function trimAfter(text: string, search = ''): {part: string; index: number} {
let index = text.indexOf(search)
if (index === -1) return {part: '', index}
// Unless we hit the node limit, we should have processed all nodes
return index === NODE_LIMIT ? plaintext : markdown
}

index += search.length
function isEmptyString(text: string): boolean {
return !text || text?.trim().length === 0
}

return {
part: text.substring(0, index),
index
}
function isLink(node: Node): node is HTMLAnchorElement {
return (node as HTMLElement).tagName?.toLowerCase() === 'a' && (node as HTMLElement).hasAttribute('href')
}

function hasHTML(transfer: DataTransfer): boolean {
Expand Down
34 changes: 22 additions & 12 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,10 @@ describe('paste-markdown', function () {

it('turns mixed html content containing several links into appropriate markdown', function () {
// eslint-disable-next-line github/unescaped-html-literal
const sentence = `<meta charset='utf-8'><meta charset="utf-8">
const sentence = `<meta charset='utf-8'>
<b style="font-weight:normal;"><p dir="ltr"><span>This is a </span>
<a href="https://github.com/"><span>link</span></a><span> and </span>
<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ"><span>another link</span></a></p>
<a href="https://github.com/">link</a><span> and </span>
<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ">another link</a></p>
<br /><a href="https://github.com/"><span>Link</span></a><span> at the beginning, link at the </span>
<a href="https://github.com/"><span>end</span></a></b>`
// eslint-disable-next-line i18n-text/no-en
Expand Down Expand Up @@ -186,19 +186,29 @@ describe('paste-markdown', function () {

it('leaves plaintext links alone', function () {
// eslint-disable-next-line github/unescaped-html-literal
const sentence = `<meta charset='utf-8'><meta charset="utf-8">
const sentence = `<meta charset='utf-8'>
<b style="font-weight:normal;"><p dir="ltr"><span>This is a </span>
<a href="https://github.com/"><span>https://github.com</span></a><span> and </span>
<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ"><span>another link</span></a></p>
<br /><a href="https://github.com/"><span>Link</span></a><span> at the beginning, link at the </span>
<a href="https://github.com/"><span>https://github.com/</span></a></b>`
<a href="https://github.com/">link</a><span> and </span>
<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ">another link</a></p>
<br /><a href="https://github.com/">Link</a><span> at the beginning, link at the </span>
<a href="https://github.com/"><span>end</span></a></b>`
/* eslint-disable i18n-text/no-en */
const plaintextSentence =
'This is a https://github.com and another link\n\nLink at the beginning, link at the https://github.com/'
const plaintextSentence = 'This is a link and another link\n\nLink at the beginning, link at the end'
/* eslint-enable i18n-text/no-en */
const markdownSentence =
'This is a https://github.com/ and [another link](https://www.youtube.com/watch?v=dQw4w9WgXcQ)\n\n' +
'[Link](https://github.com/) at the beginning, link at the https://github.com/'
'This is a [link](https://github.com/) and [another link](https://www.youtube.com/watch?v=dQw4w9WgXcQ)\n\n' +
'[Link](https://github.com/) at the beginning, link at the [end](https://github.com/)'

paste(textarea, {'text/html': sentence, 'text/plain': plaintextSentence})
assert.equal(textarea.value, markdownSentence)
})

it('finds the right link when identical labels are present', function () {
// eslint-disable-next-line github/unescaped-html-literal
const sentence = `<meta charset='utf-8'><span>example<span> </span>
</span><a href="https://example.com/">example</a>`
const plaintextSentence = 'example example'
const markdownSentence = 'example [example](https://example.com/)'

paste(textarea, {'text/html': sentence, 'text/plain': plaintextSentence})
assert.equal(textarea.value, markdownSentence)
Expand Down