['href', 'title', 'rel', 'target'],
'img' => ['src', 'alt', 'width', 'height', 'loading', 'decoding'],
];
public function compile(string $markdown, Media $media): string
{
$markdown = trim($markdown);
if ($markdown === '') {
throw new RuntimeException('Ajoute du contenu avant de publier.');
}
$markdown = $this->neutralizeRawHtml($markdown);
$doc = new DOMDocument('1.0', 'UTF-8');
$html = '
' . Markdown::instance()->convert($markdown) . '
';
$previous = libxml_use_internal_errors(true);
$doc->loadHTML('' . $html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
libxml_clear_errors();
libxml_use_internal_errors($previous);
$root = $doc->getElementById('content');
if (!$root) {
return '';
}
$this->sanitizeChildren($root, $media);
$out = '';
foreach (iterator_to_array($root->childNodes) as $child) {
$out .= $doc->saveHTML($child);
}
return trim($out);
}
private function neutralizeRawHtml(string $markdown): string
{
return preg_replace_callback(
'~|?[A-Za-z][A-Za-z0-9:-]*(?:\s[^<>]*)?/?>~s',
static fn(array $match): string => str_replace(['<', '>'], ['<', '>'], $match[0]),
$markdown
) ?? $markdown;
}
private function sanitizeChildren(DOMNode $parent, Media $media): void
{
foreach (iterator_to_array($parent->childNodes) as $child) {
if (!$child instanceof DOMElement) {
continue;
}
$tag = strtolower($child->tagName);
if (!in_array($tag, self::TAGS, true)) {
$this->unwrap($child);
$this->sanitizeChildren($parent, $media);
continue;
}
foreach (iterator_to_array($child->attributes) as $attr) {
if (!in_array(strtolower($attr->name), self::ATTRS[$tag] ?? [], true)) {
$child->removeAttributeNode($attr);
}
}
if ($tag === 'a') {
$href = trim((string) $child->getAttribute('href'));
if (!$this->allowedHref($href)) {
$this->unwrap($child);
$this->sanitizeChildren($parent, $media);
continue;
}
$child->setAttribute('href', $href);
$child->setAttribute('rel', 'noopener noreferrer');
if (preg_match('~^https?://~i', $href)) {
$child->setAttribute('target', '_blank');
} else {
$child->removeAttribute('target');
}
}
if ($tag === 'img') {
$src = trim((string) $child->getAttribute('src'));
if (!str_starts_with($src, 'media:')) {
$child->parentNode?->removeChild($child);
continue;
}
$item = $media->findByFileName(substr($src, 6));
if (!$item) {
throw new RuntimeException('Une image utilisée dans le Markdown est introuvable.');
}
$child->setAttribute('src', $item['url']);
$child->setAttribute('alt', trim((string) $child->getAttribute('alt')) ?: (string) $item['alt']);
$child->setAttribute('width', (string) $item['width']);
$child->setAttribute('height', (string) $item['height']);
$child->setAttribute('loading', 'lazy');
$child->setAttribute('decoding', 'async');
}
$this->sanitizeChildren($child, $media);
}
}
private function unwrap(DOMElement $node): void
{
$parent = $node->parentNode;
if (!$parent) {
return;
}
if (in_array(strtolower($node->tagName), ['script', 'style'], true)) {
$parent->removeChild($node);
return;
}
while ($node->firstChild) {
$parent->insertBefore($node->firstChild, $node);
}
$parent->removeChild($node);
}
private function allowedHref(string $href): bool
{
if ($href === '') {
return false;
}
if (str_starts_with($href, '#') || str_starts_with($href, '/')) {
return true;
}
if (preg_match('~^(?:https?://|mailto:)~i', $href)) {
return true;
}
return !preg_match('~^[a-z][a-z0-9+.-]*:~i', $href);
}
}