From 0a49f259b687aba010dd2edfcdf73d040622b561 Mon Sep 17 00:00:00 2001 From: Thomas Lange Date: Thu, 29 Mar 2018 17:32:49 +0200 Subject: Update Parsedown library to version 1.7.1 This commit updates the Parsedown library to the latest version 1.7.1. The patch to prevent that tab indentations in code blocks are being converted to spaces (see 78c5974cd34559d0130d8be509935e2c992cd9ca) is already applied here. In addition, the patch file itself is now shipped within the same directory where the Parsedown.php resides. --- core/namespace/Parsedown.patch | 40 +++++++++ core/namespace/Parsedown.php | 179 +++++++++++++++++++++++++++++++++++------ 2 files changed, 195 insertions(+), 24 deletions(-) create mode 100644 core/namespace/Parsedown.patch diff --git a/core/namespace/Parsedown.patch b/core/namespace/Parsedown.patch new file mode 100644 index 0000000..1a81781 --- /dev/null +++ b/core/namespace/Parsedown.patch @@ -0,0 +1,40 @@ +160,176d159 +< if (strpos($line, "\t") !== false) +< { +< $parts = explode("\t", $line); +< +< $line = $parts[0]; +< +< unset($parts[0]); +< +< foreach ($parts as $part) +< { +< $shortage = 4 - mb_strlen($line, 'utf-8') % 4; +< +< $line .= str_repeat(' ', $shortage); +< $line .= $part; +< } +< } +< +327c310,313 +< if ($Line['indent'] >= 4) +--- +> $conditionA = $Line['indent'] >= 4; +> $conditionB = substr($Line['body'], 0, 1) === "\t"; +> +> if (($conditionA and $remove = 4) or ($conditionB and $remove = 1)) +329c315 +< $text = substr($Line['body'], 4); +--- +> $text = substr($Line['body'], $remove); +348c334,337 +< if ($Line['indent'] >= 4) +--- +> $conditionA = $Line['indent'] >= 4; +> $conditionB = substr($Line['body'], 0, 1) === "\t"; +> +> if (($conditionA and $remove = 4) or ($conditionB and $remove = 1)) +359c348 +< $text = substr($Line['body'], 4); +--- +> $text = substr($Line['body'], $remove); diff --git a/core/namespace/Parsedown.php b/core/namespace/Parsedown.php index ec7bdde..e101bc5 100644 --- a/core/namespace/Parsedown.php +++ b/core/namespace/Parsedown.php @@ -17,7 +17,7 @@ class Parsedown { # ~ - const version = '1.6.0'; + const version = '1.7.1'; # ~ @@ -75,6 +75,32 @@ class Parsedown protected $urlsLinked = true; + function setSafeMode($safeMode) + { + $this->safeMode = (bool) $safeMode; + + return $this; + } + + protected $safeMode; + + protected $safeLinksWhitelist = array( + 'http://', + 'https://', + 'ftp://', + 'ftps://', + 'mailto:', + 'data:image/png;base64,', + 'data:image/gif;base64,', + 'data:image/jpeg;base64,', + 'irc:', + 'ircs:', + 'git:', + 'ssh:', + 'news:', + 'steam:', + ); + # # Lines # @@ -331,8 +357,6 @@ class Parsedown { $text = $Block['element']['text']['text']; - $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); - $Block['element']['text']['text'] = $text; return $Block; @@ -343,7 +367,7 @@ class Parsedown protected function blockComment($Line) { - if ($this->markupEscaped) + if ($this->markupEscaped or $this->safeMode) { return; } @@ -385,7 +409,7 @@ class Parsedown protected function blockFencedCode($Line) { - if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([\w-]+)?[ ]*$/', $Line['text'], $matches)) + if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches)) { $Element = array( 'name' => 'code', @@ -446,8 +470,6 @@ class Parsedown { $text = $Block['element']['text']['text']; - $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); - $Block['element']['text']['text'] = $text; return $Block; @@ -504,10 +526,10 @@ class Parsedown ), ); - if($name === 'ol') + if($name === 'ol') { $listStart = stristr($matches[0], '.', true); - + if($listStart !== '1') { $Block['element']['attributes'] = array('start' => $listStart); @@ -536,6 +558,8 @@ class Parsedown { $Block['li']['text'] []= ''; + $Block['loose'] = true; + unset($Block['interrupted']); } @@ -584,6 +608,22 @@ class Parsedown } } + protected function blockListComplete(array $Block) + { + if (isset($Block['loose'])) + { + foreach ($Block['element']['text'] as &$li) + { + if (end($li['text']) !== '') + { + $li['text'] []= ''; + } + } + } + + return $Block; + } + # # Quote @@ -667,12 +707,12 @@ class Parsedown protected function blockMarkup($Line) { - if ($this->markupEscaped) + if ($this->markupEscaped or $this->safeMode) { return; } - if (preg_match('/^<(\w*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) + if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) { $element = strtolower($matches[1]); @@ -986,7 +1026,7 @@ class Parsedown # ~ # - public function line($text) + public function line($text, $nonNestables=array()) { $markup = ''; @@ -1002,6 +1042,13 @@ class Parsedown foreach ($this->InlineTypes[$marker] as $inlineType) { + # check to see if the current inline type is nestable in the current context + + if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables)) + { + continue; + } + $Inline = $this->{'inline'.$inlineType}($Excerpt); if ( ! isset($Inline)) @@ -1023,6 +1070,13 @@ class Parsedown $Inline['position'] = $markerPosition; } + # cause the new element to 'inherit' our non nestables + + foreach ($nonNestables as $non_nestable) + { + $Inline['element']['nonNestables'][] = $non_nestable; + } + # the text that comes before the inline $unmarkedText = substr($text, 0, $Inline['position']); @@ -1063,7 +1117,6 @@ class Parsedown if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(? 'a', 'handler' => 'line', + 'nonNestables' => array('Url', 'Link'), 'text' => null, 'attributes' => array( 'href' => null, @@ -1242,8 +1296,6 @@ class Parsedown $Element['attributes']['title'] = $Definition['title']; } - $Element['attributes']['href'] = str_replace(array('&', '<'), array('&', '<'), $Element['attributes']['href']); - return array( 'extent' => $extent, 'element' => $Element, @@ -1252,12 +1304,12 @@ class Parsedown protected function inlineMarkup($Excerpt) { - if ($this->markupEscaped or strpos($Excerpt['text'], '>') === false) + if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false) { return; } - if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w*[ ]*>/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*[ ]*>/s', $Excerpt['text'], $matches)) { return array( 'markup' => $matches[0], @@ -1273,7 +1325,7 @@ class Parsedown ); } - if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches)) { return array( 'markup' => $matches[0], @@ -1332,14 +1384,16 @@ class Parsedown if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) { + $url = $matches[0][0]; + $Inline = array( 'extent' => strlen($matches[0][0]), 'position' => $matches[0][1], 'element' => array( 'name' => 'a', - 'text' => $matches[0][0], + 'text' => $url, 'attributes' => array( - 'href' => $matches[0][0], + 'href' => $url, ), ), ); @@ -1352,7 +1406,7 @@ class Parsedown { if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) { - $url = str_replace(array('&', '<'), array('&', '<'), $matches[1]); + $url = $matches[1]; return array( 'extent' => strlen($matches[0]), @@ -1390,6 +1444,11 @@ class Parsedown protected function element(array $Element) { + if ($this->safeMode) + { + $Element = $this->sanitiseElement($Element); + } + $markup = '<'.$Element['name']; if (isset($Element['attributes'])) @@ -1401,7 +1460,7 @@ class Parsedown continue; } - $markup .= ' '.$name.'="'.$value.'"'; + $markup .= ' '.$name.'="'.self::escape($value).'"'; } } @@ -1409,13 +1468,18 @@ class Parsedown { $markup .= '>'; + if (!isset($Element['nonNestables'])) + { + $Element['nonNestables'] = array(); + } + if (isset($Element['handler'])) { - $markup .= $this->{$Element['handler']}($Element['text']); + $markup .= $this->{$Element['handler']}($Element['text'], $Element['nonNestables']); } else { - $markup .= $Element['text']; + $markup .= self::escape($Element['text'], true); } $markup .= ''; @@ -1474,10 +1538,77 @@ class Parsedown return $markup; } + protected function sanitiseElement(array $Element) + { + static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/'; + static $safeUrlNameToAtt = array( + 'a' => 'href', + 'img' => 'src', + ); + + if (isset($safeUrlNameToAtt[$Element['name']])) + { + $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); + } + + if ( ! empty($Element['attributes'])) + { + foreach ($Element['attributes'] as $att => $val) + { + # filter out badly parsed attribute + if ( ! preg_match($goodAttribute, $att)) + { + unset($Element['attributes'][$att]); + } + # dump onevent attribute + elseif (self::striAtStart($att, 'on')) + { + unset($Element['attributes'][$att]); + } + } + } + + return $Element; + } + + protected function filterUnsafeUrlInAttribute(array $Element, $attribute) + { + foreach ($this->safeLinksWhitelist as $scheme) + { + if (self::striAtStart($Element['attributes'][$attribute], $scheme)) + { + return $Element; + } + } + + $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]); + + return $Element; + } + # # Static Methods # + protected static function escape($text, $allowQuotes = false) + { + return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8'); + } + + protected static function striAtStart($string, $needle) + { + $len = strlen($needle); + + if ($len > strlen($string)) + { + return false; + } + else + { + return strtolower(substr($string, 0, $len)) === strtolower($needle); + } + } + static function instance($name = 'default') { if (isset(self::$instances[$name])) -- cgit v1.2.3