convert($markdown); $html = self::sanitizeAndResolve($html, $media); if (trim(strip_tags($html)) === '' && !preg_match('/<(img|video|audio|figure)[\s>]/i', $html)) { $fallback = nl2br(htmlspecialchars($markdown, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')); $html = '

' . str_replace('
', '

', $fallback) . '

'; } return $html; } // Reconstruction en liste blanche : les descendants d'une balise interdite // sont retraités récursivement avant d'être réinsérés. private static function sanitizeAndResolve(string $html, Media $media): string { $source = new DOMDocument('1.0', 'UTF-8'); $clean = new DOMDocument('1.0', 'UTF-8'); $cleanBody = $clean->createElement('body'); $clean->appendChild($cleanBody); $previousUseInternalErrors = libxml_use_internal_errors(true); $source->loadHTML('' . $html . '', LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); libxml_clear_errors(); libxml_use_internal_errors($previousUseInternalErrors); $sourceBody = $source->getElementsByTagName('body')->item(0); if (!$sourceBody instanceof DOMElement) { return ''; } self::appendSanitizedChildren($sourceBody, $cleanBody, $clean, $media); $out = ''; for ($i = 0; $i < $cleanBody->childNodes->length; $i++) { $child = $cleanBody->childNodes->item($i); if ($child !== null) { $out .= $clean->saveHTML($child); } } return trim($out); } private static function appendSanitizedChildren(DOMNode $sourceParent, DOMNode $targetParent, DOMDocument $target, Media $media): void { $children = []; for ($i = 0; $i < $sourceParent->childNodes->length; $i++) { $child = $sourceParent->childNodes->item($i); if ($child !== null) { $children[] = $child; } } foreach ($children as $child) { if ($child instanceof DOMComment) { continue; } if ($child instanceof DOMText) { $targetParent->appendChild($target->createTextNode($child->nodeValue ?? '')); continue; } if (!$child instanceof DOMElement) { continue; } self::appendSanitizedElement($child, $targetParent, $target, $media); } } private static function appendSanitizedElement(DOMElement $sourceElement, DOMNode $targetParent, DOMDocument $target, Media $media): void { $tag = strtolower($sourceElement->tagName); if (!in_array($tag, self::ALLOWED_TAGS, true)) { self::appendSanitizedChildren($sourceElement, $targetParent, $target, $media); return; } if ($tag === 'img') { $image = self::buildSanitizedImage($sourceElement, $target, $media); if ($image !== null) { $targetParent->appendChild($image); } return; } $cleanElement = $target->createElement($tag); self::sanitizeAttributes($sourceElement, $cleanElement); $targetParent->appendChild($cleanElement); self::appendSanitizedChildren($sourceElement, $cleanElement, $target, $media); } private static function sanitizeAttributes(DOMElement $sourceElement, DOMElement $targetElement): void { if ($targetElement->tagName !== 'a') { return; } $href = self::sanitizeHref((string) $sourceElement->getAttribute('href')); if ($href !== null) { $targetElement->setAttribute('href', $href); $targetElement->setAttribute('rel', 'noopener noreferrer'); if (preg_match('~^https?://~i', $href) === 1) { $targetElement->setAttribute('target', '_blank'); } } $title = self::sanitizeAttributeValue((string) $sourceElement->getAttribute('title')); if ($title !== null) { $targetElement->setAttribute('title', $title); } } private static function buildSanitizedImage(DOMElement $sourceElement, DOMDocument $target, Media $media): ?DOMElement { $src = trim((string) $sourceElement->getAttribute('src')); if ($src === '' || !str_starts_with($src, 'media:')) { return null; } $fileName = substr($src, 6); if ($fileName === '' || preg_match('/[\x00-\x1F\x7F]/u', $fileName) === 1) { return null; } $item = $media->findByFileName($fileName); if ($item === null) { throw new RuntimeException('Une image utilisée dans le Markdown est introuvable.'); } $image = $target->createElement('img'); $image->setAttribute('src', (string) $item['url']); $image->setAttribute('loading', 'lazy'); $image->setAttribute('decoding', 'async'); if ($sourceElement->hasAttribute('alt')) { $image->setAttribute('alt', self::sanitizeAttributeValue((string) $sourceElement->getAttribute('alt'), true) ?? ''); } elseif ((string) $item['alt'] !== '') { $image->setAttribute('alt', (string) $item['alt']); } else { $image->setAttribute('alt', ''); } $title = self::sanitizeAttributeValue((string) $sourceElement->getAttribute('title')); if ($title !== null) { $image->setAttribute('title', $title); } return $image; } private static function sanitizeHref(string $href): ?string { $href = trim(html_entity_decode($href, ENT_QUOTES | ENT_HTML5, 'UTF-8')); if ($href === '' || preg_match('/[\x00-\x1F\x7F]/u', $href) === 1) { return null; } if (preg_match('~^(https?://|mailto:|tel:)~i', $href) === 1) { return $href; } if (self::isSafeRelativeHref($href)) { return $href; } return null; } private static function isSafeRelativeHref(string $href): bool { if ($href === '/') { return true; } if (str_starts_with($href, '//')) { return false; } return preg_match('~^(?:/[^/]|\./|\.\./|#|\?)~', $href) === 1; } private static function sanitizeAttributeValue(string $value, bool $allowEmpty = false): ?string { $value = html_entity_decode($value, ENT_QUOTES | ENT_HTML5, 'UTF-8'); $value = trim((string) preg_replace('/[\x00-\x1F\x7F]+/u', ' ', $value)); if ($value === '' && !$allowEmpty) { return null; } return $value; } private static function normalizeMarkdown(string $markdown): string { $markdown = str_replace(["\r\n", "\r"], "\n", $markdown); $lines = explode("\n", $markdown); $normalized = []; $inFence = false; foreach ($lines as $line) { if (preg_match('/^\s*(```|~~~)/', $line) === 1) { $inFence = !$inFence; $normalized[] = $line; continue; } if ($inFence) { $normalized[] = $line; continue; } $isBlank = trim($line) === ''; $isListItem = preg_match('/^\s*(?:[-+*]|\d+\.)\s+/', $line) === 1; $previous = $normalized[count($normalized) - 1] ?? null; $previousIsBlank = $previous === null || trim($previous) === ''; $previousIsListItem = $previous !== null && preg_match('/^\s*(?:[-+*]|\d+\.)\s+/', $previous) === 1; if ($isListItem && !$previousIsBlank && !$previousIsListItem) { $normalized[] = ''; } if (!$isBlank && !$isListItem && $previousIsListItem) { $normalized[] = ''; } $normalized[] = $line; } return trim(implode("\n", $normalized)); } }