Upgrade framework

This commit is contained in:
2023-11-14 16:54:35 +01:00
parent 1648a5cd42
commit 4fcf6fffcc
10548 changed files with 693138 additions and 466698 deletions

View File

@@ -0,0 +1,47 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser\Block;
use League\CommonMark\Node\Block\AbstractBlock;
/**
* Base class for a block parser
*
* Slightly more convenient to extend from vs. implementing the interface
*/
abstract class AbstractBlockContinueParser implements BlockContinueParserInterface
{
public function isContainer(): bool
{
return false;
}
public function canHaveLazyContinuationLines(): bool
{
return false;
}
public function canContain(AbstractBlock $childBlock): bool
{
return false;
}
public function addLine(string $line): void
{
}
public function closeBlock(): void
{
}
}

View File

@@ -0,0 +1,73 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser\Block;
use League\CommonMark\Parser\Cursor;
use League\CommonMark\Parser\CursorState;
/**
* Result object for continuing parsing of a block; see static methods for constructors.
*
* @psalm-immutable
*/
final class BlockContinue
{
/** @psalm-readonly */
private ?CursorState $cursorState = null;
/** @psalm-readonly */
private bool $finalize;
private function __construct(?CursorState $cursorState = null, bool $finalize = false)
{
$this->cursorState = $cursorState;
$this->finalize = $finalize;
}
public function getCursorState(): ?CursorState
{
return $this->cursorState;
}
public function isFinalize(): bool
{
return $this->finalize;
}
/**
* Signal that we cannot continue here
*
* @return null
*/
public static function none(): ?self
{
return null;
}
/**
* Signal that we're continuing at the given position
*/
public static function at(Cursor $cursor): self
{
return new self($cursor->saveState(), false);
}
/**
* Signal that we want to finalize and close the block
*/
public static function finished(): self
{
return new self(null, true);
}
}

View File

@@ -0,0 +1,64 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser\Block;
use League\CommonMark\Node\Block\AbstractBlock;
use League\CommonMark\Parser\Cursor;
/**
* Interface for a block continuation parser
*
* A block continue parser can only handle a single block instance. The current block being parsed is stored within this parser and
* can be returned once parsing has completed. If you need to parse multiple block continuations, instantiate a new parser for each one.
*/
interface BlockContinueParserInterface
{
/**
* Return the current block being parsed by this parser
*/
public function getBlock(): AbstractBlock;
/**
* Return whether we are parsing a container block
*/
public function isContainer(): bool;
/**
* Return whether we are interested in possibly lazily parsing any subsequent lines
*/
public function canHaveLazyContinuationLines(): bool;
/**
* Determine whether the current block being parsed can contain the given child block
*/
public function canContain(AbstractBlock $childBlock): bool;
/**
* Attempt to parse the given line
*/
public function tryContinue(Cursor $cursor, BlockContinueParserInterface $activeBlockParser): ?BlockContinue;
/**
* Add the given line of text to the current block
*/
public function addLine(string $line): void;
/**
* Close and finalize the current block
*/
public function closeBlock(): void;
}

View File

@@ -0,0 +1,24 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser\Block;
use League\CommonMark\Parser\InlineParserEngineInterface;
interface BlockContinueParserWithInlinesInterface extends BlockContinueParserInterface
{
/**
* Parse any inlines inside of the current block
*/
public function parseInlines(InlineParserEngineInterface $inlineParser): void;
}

View File

@@ -0,0 +1,124 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser\Block;
use League\CommonMark\Parser\Cursor;
use League\CommonMark\Parser\CursorState;
/**
* Result object for starting parsing of a block; see static methods for constructors
*/
final class BlockStart
{
/**
* @var BlockContinueParserInterface[]
*
* @psalm-readonly
*/
private array $blockParsers;
/** @psalm-readonly-allow-private-mutation */
private ?CursorState $cursorState = null;
/** @psalm-readonly-allow-private-mutation */
private bool $replaceActiveBlockParser = false;
private bool $isAborting = false;
private function __construct(BlockContinueParserInterface ...$blockParsers)
{
$this->blockParsers = $blockParsers;
}
/**
* @return BlockContinueParserInterface[]
*/
public function getBlockParsers(): iterable
{
return $this->blockParsers;
}
public function getCursorState(): ?CursorState
{
return $this->cursorState;
}
public function isReplaceActiveBlockParser(): bool
{
return $this->replaceActiveBlockParser;
}
/**
* @internal
*/
public function isAborting(): bool
{
return $this->isAborting;
}
/**
* Signal that we want to parse at the given cursor position
*
* @return $this
*/
public function at(Cursor $cursor): self
{
$this->cursorState = $cursor->saveState();
return $this;
}
/**
* Signal that we want to replace the active block parser with this one
*
* @return $this
*/
public function replaceActiveBlockParser(): self
{
$this->replaceActiveBlockParser = true;
return $this;
}
/**
* Signal that we cannot parse whatever is here
*
* @return null
*/
public static function none(): ?self
{
return null;
}
/**
* Signal that we'd like to register the given parser(s) so they can parse the current block
*/
public static function of(BlockContinueParserInterface ...$blockParsers): self
{
return new self(...$blockParsers);
}
/**
* Signal that the block parsing process should be aborted (no other block starts should be checked)
*
* @internal
*/
public static function abort(): self
{
$ret = new self();
$ret->isAborting = true;
return $ret;
}
}

View File

@@ -0,0 +1,33 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser\Block;
use League\CommonMark\Parser\Cursor;
use League\CommonMark\Parser\MarkdownParserStateInterface;
/**
* Interface for a block parser which identifies block starts.
*/
interface BlockStartParserInterface
{
/**
* Check whether we should handle the block at the current position
*
* @param Cursor $cursor A cloned copy of the cursor at the current parsing location
* @param MarkdownParserStateInterface $parserState Additional information about the state of the Markdown parser
*
* @return BlockStart|null The BlockStart that has been identified, or null if the block doesn't match here
*/
public function tryStart(Cursor $cursor, MarkdownParserStateInterface $parserState): ?BlockStart;
}

View File

@@ -0,0 +1,53 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser\Block;
use League\CommonMark\Node\Block\AbstractBlock;
use League\CommonMark\Node\Block\Document;
use League\CommonMark\Parser\Cursor;
use League\CommonMark\Reference\ReferenceMapInterface;
/**
* Parser implementation which ensures everything is added to the root-level Document
*/
final class DocumentBlockParser extends AbstractBlockContinueParser
{
/** @psalm-readonly */
private Document $document;
public function __construct(ReferenceMapInterface $referenceMap)
{
$this->document = new Document($referenceMap);
}
public function getBlock(): Document
{
return $this->document;
}
public function isContainer(): bool
{
return true;
}
public function canContain(AbstractBlock $childBlock): bool
{
return true;
}
public function tryContinue(Cursor $cursor, BlockContinueParserInterface $activeBlockParser): ?BlockContinue
{
return BlockContinue::at($cursor);
}
}

View File

@@ -0,0 +1,87 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser\Block;
use League\CommonMark\Node\Block\Paragraph;
use League\CommonMark\Parser\Cursor;
use League\CommonMark\Parser\InlineParserEngineInterface;
use League\CommonMark\Reference\ReferenceInterface;
use League\CommonMark\Reference\ReferenceParser;
final class ParagraphParser extends AbstractBlockContinueParser implements BlockContinueParserWithInlinesInterface
{
/** @psalm-readonly */
private Paragraph $block;
/** @psalm-readonly */
private ReferenceParser $referenceParser;
public function __construct()
{
$this->block = new Paragraph();
$this->referenceParser = new ReferenceParser();
}
public function canHaveLazyContinuationLines(): bool
{
return true;
}
public function getBlock(): Paragraph
{
return $this->block;
}
public function tryContinue(Cursor $cursor, BlockContinueParserInterface $activeBlockParser): ?BlockContinue
{
if ($cursor->isBlank()) {
return BlockContinue::none();
}
return BlockContinue::at($cursor);
}
public function addLine(string $line): void
{
$this->referenceParser->parse($line);
}
public function closeBlock(): void
{
if ($this->referenceParser->hasReferences() && $this->referenceParser->getParagraphContent() === '') {
$this->block->detach();
}
}
public function parseInlines(InlineParserEngineInterface $inlineParser): void
{
$content = $this->getContentString();
if ($content !== '') {
$inlineParser->parse($content, $this->block);
}
}
public function getContentString(): string
{
return $this->referenceParser->getParagraphContent();
}
/**
* @return ReferenceInterface[]
*/
public function getReferences(): iterable
{
return $this->referenceParser->getReferences();
}
}

View File

@@ -0,0 +1,45 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser\Block;
use League\CommonMark\Parser\Cursor;
use League\CommonMark\Parser\MarkdownParserStateInterface;
use League\CommonMark\Util\RegexHelper;
/**
* @internal
*
* This "parser" is actually a performance optimization.
*
* Most lines in a typical Markdown document probably won't match a block start. This is especially true for lines starting
* with letters - nothing in the core CommonMark spec or our supported extensions will match those lines as blocks. Therefore,
* if we can identify those lines and skip block start parsing, we can optimize performance by ~10%.
*
* Previously this optimization was hard-coded in the MarkdownParser but did not allow users to override this behavior.
* By implementing this optimization as a block parser instead, users wanting custom blocks starting with letters
* can instead register their block parser with a higher priority to ensure their parser is always called first.
*/
final class SkipLinesStartingWithLettersParser implements BlockStartParserInterface
{
public function tryStart(Cursor $cursor, MarkdownParserStateInterface $parserState): ?BlockStart
{
if (! $cursor->isIndented() && RegexHelper::isLetter($cursor->getNextNonSpaceCharacter())) {
$cursor->advanceToNextNonSpaceOrTab();
return BlockStart::abort();
}
return BlockStart::none();
}
}

View File

@@ -0,0 +1,481 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser;
use League\CommonMark\Exception\UnexpectedEncodingException;
class Cursor
{
public const INDENT_LEVEL = 4;
/** @psalm-readonly */
private string $line;
/** @psalm-readonly */
private int $length;
/**
* @var int
*
* It's possible for this to be 1 char past the end, meaning we've parsed all chars and have
* reached the end. In this state, any character-returning method MUST return null.
*/
private int $currentPosition = 0;
private int $column = 0;
private int $indent = 0;
private int $previousPosition = 0;
private ?int $nextNonSpaceCache = null;
private bool $partiallyConsumedTab = false;
/** @psalm-readonly */
private bool $lineContainsTabs;
/** @psalm-readonly */
private bool $isMultibyte;
/** @var array<int, string> */
private array $charCache = [];
/**
* @param string $line The line being parsed (ASCII or UTF-8)
*/
public function __construct(string $line)
{
if (! \mb_check_encoding($line, 'UTF-8')) {
throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected');
}
$this->line = $line;
$this->length = \mb_strlen($line, 'UTF-8') ?: 0;
$this->isMultibyte = $this->length !== \strlen($line);
$this->lineContainsTabs = \strpos($line, "\t") !== false;
}
/**
* Returns the position of the next character which is not a space (or tab)
*/
public function getNextNonSpacePosition(): int
{
if ($this->nextNonSpaceCache !== null) {
return $this->nextNonSpaceCache;
}
$c = null;
$i = $this->currentPosition;
$cols = $this->column;
while (($c = $this->getCharacter($i)) !== null) {
if ($c === ' ') {
$i++;
$cols++;
} elseif ($c === "\t") {
$i++;
$cols += 4 - ($cols % 4);
} else {
break;
}
}
$nextNonSpace = $c === null ? $this->length : $i;
$this->indent = $cols - $this->column;
return $this->nextNonSpaceCache = $nextNonSpace;
}
/**
* Returns the next character which isn't a space (or tab)
*/
public function getNextNonSpaceCharacter(): ?string
{
return $this->getCharacter($this->getNextNonSpacePosition());
}
/**
* Calculates the current indent (number of spaces after current position)
*/
public function getIndent(): int
{
if ($this->nextNonSpaceCache === null) {
$this->getNextNonSpacePosition();
}
return $this->indent;
}
/**
* Whether the cursor is indented to INDENT_LEVEL
*/
public function isIndented(): bool
{
if ($this->nextNonSpaceCache === null) {
$this->getNextNonSpacePosition();
}
return $this->indent >= self::INDENT_LEVEL;
}
public function getCharacter(?int $index = null): ?string
{
if ($index === null) {
$index = $this->currentPosition;
}
// Index out-of-bounds, or we're at the end
if ($index < 0 || $index >= $this->length) {
return null;
}
if ($this->isMultibyte) {
return $this->charCache[$index] ??= \mb_substr($this->line, $index, 1, 'UTF-8');
}
return $this->line[$index];
}
/**
* Slightly-optimized version of getCurrent(null)
*/
public function getCurrentCharacter(): ?string
{
if ($this->currentPosition >= $this->length) {
return null;
}
if ($this->isMultibyte) {
return $this->charCache[$this->currentPosition] ??= \mb_substr($this->line, $this->currentPosition, 1, 'UTF-8');
}
return $this->line[$this->currentPosition];
}
/**
* Returns the next character (or null, if none) without advancing forwards
*/
public function peek(int $offset = 1): ?string
{
return $this->getCharacter($this->currentPosition + $offset);
}
/**
* Whether the remainder is blank
*/
public function isBlank(): bool
{
return $this->nextNonSpaceCache === $this->length || $this->getNextNonSpacePosition() === $this->length;
}
/**
* Move the cursor forwards
*/
public function advance(): void
{
$this->advanceBy(1);
}
/**
* Move the cursor forwards
*
* @param int $characters Number of characters to advance by
* @param bool $advanceByColumns Whether to advance by columns instead of spaces
*/
public function advanceBy(int $characters, bool $advanceByColumns = false): void
{
$this->previousPosition = $this->currentPosition;
$this->nextNonSpaceCache = null;
// Optimization to avoid tab handling logic if we have no tabs
if (! $this->lineContainsTabs) {
$this->advanceWithoutTabCharacters($characters);
return;
}
$nextFewChars = $this->isMultibyte ?
\mb_substr($this->line, $this->currentPosition, $characters, 'UTF-8') :
\substr($this->line, $this->currentPosition, $characters);
if ($nextFewChars === '') {
return;
}
// Optimization to avoid tab handling logic if we have no tabs
if (\strpos($nextFewChars, "\t") === false) {
$this->advanceWithoutTabCharacters($characters);
return;
}
if ($characters === 1) {
$asArray = [$nextFewChars];
} elseif ($this->isMultibyte) {
/** @var string[] $asArray */
$asArray = \mb_str_split($nextFewChars, 1, 'UTF-8');
} else {
$asArray = \str_split($nextFewChars);
}
foreach ($asArray as $c) {
if ($c === "\t") {
$charsToTab = 4 - ($this->column % 4);
if ($advanceByColumns) {
$this->partiallyConsumedTab = $charsToTab > $characters;
$charsToAdvance = $charsToTab > $characters ? $characters : $charsToTab;
$this->column += $charsToAdvance;
$this->currentPosition += $this->partiallyConsumedTab ? 0 : 1;
$characters -= $charsToAdvance;
} else {
$this->partiallyConsumedTab = false;
$this->column += $charsToTab;
$this->currentPosition++;
$characters--;
}
} else {
$this->partiallyConsumedTab = false;
$this->currentPosition++;
$this->column++;
$characters--;
}
if ($characters <= 0) {
break;
}
}
}
private function advanceWithoutTabCharacters(int $characters): void
{
$length = \min($characters, $this->length - $this->currentPosition);
$this->partiallyConsumedTab = false;
$this->currentPosition += $length;
$this->column += $length;
}
/**
* Advances the cursor by a single space or tab, if present
*/
public function advanceBySpaceOrTab(): bool
{
$character = $this->getCurrentCharacter();
if ($character === ' ' || $character === "\t") {
$this->advanceBy(1, true);
return true;
}
return false;
}
/**
* Parse zero or more space/tab characters
*
* @return int Number of positions moved
*/
public function advanceToNextNonSpaceOrTab(): int
{
$newPosition = $this->nextNonSpaceCache ?? $this->getNextNonSpacePosition();
if ($newPosition === $this->currentPosition) {
return 0;
}
$this->advanceBy($newPosition - $this->currentPosition);
$this->partiallyConsumedTab = false;
// We've just advanced to where that non-space is,
// so any subsequent calls to find the next one will
// always return the current position.
$this->nextNonSpaceCache = $this->currentPosition;
$this->indent = 0;
return $this->currentPosition - $this->previousPosition;
}
/**
* Parse zero or more space characters, including at most one newline.
*
* Tab characters are not parsed with this function.
*
* @return int Number of positions moved
*/
public function advanceToNextNonSpaceOrNewline(): int
{
$remainder = $this->getRemainder();
// Optimization: Avoid the regex if we know there are no spaces or newlines
if ($remainder === '' || ($remainder[0] !== ' ' && $remainder[0] !== "\n")) {
$this->previousPosition = $this->currentPosition;
return 0;
}
$matches = [];
\preg_match('/^ *(?:\n *)?/', $remainder, $matches, \PREG_OFFSET_CAPTURE);
// [0][0] contains the matched text
// [0][1] contains the index of that match
$increment = $matches[0][1] + \strlen($matches[0][0]);
$this->advanceBy($increment);
return $this->currentPosition - $this->previousPosition;
}
/**
* Move the position to the very end of the line
*
* @return int The number of characters moved
*/
public function advanceToEnd(): int
{
$this->previousPosition = $this->currentPosition;
$this->nextNonSpaceCache = null;
$this->currentPosition = $this->length;
return $this->currentPosition - $this->previousPosition;
}
public function getRemainder(): string
{
if ($this->currentPosition >= $this->length) {
return '';
}
$prefix = '';
$position = $this->currentPosition;
if ($this->partiallyConsumedTab) {
$position++;
$charsToTab = 4 - ($this->column % 4);
$prefix = \str_repeat(' ', $charsToTab);
}
$subString = $this->isMultibyte ?
\mb_substr($this->line, $position, null, 'UTF-8') :
\substr($this->line, $position);
return $prefix . $subString;
}
public function getLine(): string
{
return $this->line;
}
public function isAtEnd(): bool
{
return $this->currentPosition >= $this->length;
}
/**
* Try to match a regular expression
*
* Returns the matching text and advances to the end of that match
*/
public function match(string $regex): ?string
{
$subject = $this->getRemainder();
if (! \preg_match($regex, $subject, $matches, \PREG_OFFSET_CAPTURE)) {
return null;
}
// $matches[0][0] contains the matched text
// $matches[0][1] contains the index of that match
if ($this->isMultibyte) {
// PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
$offset = \mb_strlen(\substr($subject, 0, $matches[0][1]), 'UTF-8');
$matchLength = \mb_strlen($matches[0][0], 'UTF-8');
} else {
$offset = $matches[0][1];
$matchLength = \strlen($matches[0][0]);
}
// [0][0] contains the matched text
// [0][1] contains the index of that match
$this->advanceBy($offset + $matchLength);
return $matches[0][0];
}
/**
* Encapsulates the current state of this cursor in case you need to rollback later.
*
* WARNING: Do not parse or use the return value for ANYTHING except for
* passing it back into restoreState(), as the number of values and their
* contents may change in any future release without warning.
*/
public function saveState(): CursorState
{
return new CursorState([
$this->currentPosition,
$this->previousPosition,
$this->nextNonSpaceCache,
$this->indent,
$this->column,
$this->partiallyConsumedTab,
]);
}
/**
* Restore the cursor to a previous state.
*
* Pass in the value previously obtained by calling saveState().
*/
public function restoreState(CursorState $state): void
{
[
$this->currentPosition,
$this->previousPosition,
$this->nextNonSpaceCache,
$this->indent,
$this->column,
$this->partiallyConsumedTab,
] = $state->toArray();
}
public function getPosition(): int
{
return $this->currentPosition;
}
public function getPreviousText(): string
{
return \mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, 'UTF-8');
}
public function getSubstring(int $start, ?int $length = null): string
{
if ($this->isMultibyte) {
return \mb_substr($this->line, $start, $length, 'UTF-8');
}
if ($length !== null) {
return \substr($this->line, $start, $length);
}
return \substr($this->line, $start);
}
public function getColumn(): int
{
return $this->column;
}
}

View File

@@ -0,0 +1,56 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser;
/**
* Encapsulates the current state of a cursor in case you need to rollback later.
*
* WARNING: Do not attempt to use this class for ANYTHING except for
* type hinting and passing this object back into restoreState().
* The constructor, methods, and inner contents may change in any
* future release without warning!
*
* @internal
*
* @psalm-immutable
*/
final class CursorState
{
/**
* @var array<int, mixed>
*
* @psalm-readonly
*/
private array $state;
/**
* @internal
*
* @param array<int, mixed> $state
*/
public function __construct(array $state)
{
$this->state = $state;
}
/**
* @internal
*
* @return array<int, mixed>
*/
public function toArray(): array
{
return $this->state;
}
}

View File

@@ -0,0 +1,23 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser\Inline;
use League\CommonMark\Parser\InlineParserContext;
interface InlineParserInterface
{
public function getMatchDefinition(): InlineParserMatch;
public function parse(InlineParserContext $inlineContext): bool;
}

View File

@@ -0,0 +1,83 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser\Inline;
final class InlineParserMatch
{
private string $regex;
private bool $caseSensitive;
private function __construct(string $regex, bool $caseSensitive = false)
{
$this->regex = $regex;
$this->caseSensitive = $caseSensitive;
}
public function caseSensitive(): self
{
$this->caseSensitive = true;
return $this;
}
/**
* @internal
*/
public function getRegex(): string
{
return '/' . $this->regex . '/' . ($this->caseSensitive ? '' : 'i');
}
/**
* Match the given string (case-insensitive)
*/
public static function string(string $str): self
{
return new self(\preg_quote($str, '/'));
}
/**
* Match any of the given strings (case-insensitive)
*/
public static function oneOf(string ...$str): self
{
return new self(\implode('|', \array_map(static fn (string $str): string => \preg_quote($str, '/'), $str)));
}
/**
* Match a partial regular expression without starting/ending delimiters, anchors, or flags
*/
public static function regex(string $regex): self
{
return new self($regex);
}
public static function join(self ...$definitions): self
{
$regex = '';
$caseSensitive = null;
foreach ($definitions as $definition) {
$regex .= '(' . $definition->regex . ')';
if ($caseSensitive === null) {
$caseSensitive = $definition->caseSensitive;
} elseif ($caseSensitive !== $definition->caseSensitive) {
throw new \LogicException('Case-sensitive and case-insensitive defintions cannot be comined');
}
}
return new self($regex, $caseSensitive ?? false);
}
}

View File

@@ -0,0 +1,53 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser\Inline;
use League\CommonMark\Node\Inline\Newline;
use League\CommonMark\Node\Inline\Text;
use League\CommonMark\Parser\InlineParserContext;
final class NewlineParser implements InlineParserInterface
{
public function getMatchDefinition(): InlineParserMatch
{
return InlineParserMatch::regex('\\n');
}
public function parse(InlineParserContext $inlineContext): bool
{
$inlineContext->getCursor()->advanceBy(1);
// Check previous inline for trailing spaces
$spaces = 0;
$lastInline = $inlineContext->getContainer()->lastChild();
if ($lastInline instanceof Text) {
$trimmed = \rtrim($lastInline->getLiteral(), ' ');
$spaces = \strlen($lastInline->getLiteral()) - \strlen($trimmed);
if ($spaces) {
$lastInline->setLiteral($trimmed);
}
}
if ($spaces >= 2) {
$inlineContext->getContainer()->appendChild(new Newline(Newline::HARDBREAK));
} else {
$inlineContext->getContainer()->appendChild(new Newline(Newline::SOFTBREAK));
}
return true;
}
}

View File

@@ -0,0 +1,120 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser;
use League\CommonMark\Delimiter\DelimiterStack;
use League\CommonMark\Node\Block\AbstractBlock;
use League\CommonMark\Reference\ReferenceMapInterface;
final class InlineParserContext
{
/** @psalm-readonly */
private AbstractBlock $container;
/** @psalm-readonly */
private ReferenceMapInterface $referenceMap;
/** @psalm-readonly */
private Cursor $cursor;
/** @psalm-readonly */
private DelimiterStack $delimiterStack;
/**
* @var string[]
* @psalm-var non-empty-array<string>
*
* @psalm-readonly-allow-private-mutation
*/
private array $matches;
public function __construct(Cursor $contents, AbstractBlock $container, ReferenceMapInterface $referenceMap)
{
$this->referenceMap = $referenceMap;
$this->container = $container;
$this->cursor = $contents;
$this->delimiterStack = new DelimiterStack();
}
public function getContainer(): AbstractBlock
{
return $this->container;
}
public function getReferenceMap(): ReferenceMapInterface
{
return $this->referenceMap;
}
public function getCursor(): Cursor
{
return $this->cursor;
}
public function getDelimiterStack(): DelimiterStack
{
return $this->delimiterStack;
}
/**
* @return string The full text that matched the InlineParserMatch definition
*/
public function getFullMatch(): string
{
return $this->matches[0];
}
/**
* @return int The length of the full match (in characters, not bytes)
*/
public function getFullMatchLength(): int
{
return \mb_strlen($this->matches[0], 'UTF-8');
}
/**
* @return string[] Similar to preg_match(), index 0 will contain the full match, and any other array elements will be captured sub-matches
*
* @psalm-return non-empty-array<string>
*/
public function getMatches(): array
{
return $this->matches;
}
/**
* @return string[]
*/
public function getSubMatches(): array
{
return \array_slice($this->matches, 1);
}
/**
* @param string[] $matches
*
* @psalm-param non-empty-array<string> $matches
*/
public function withMatches(array $matches): InlineParserContext
{
$ctx = clone $this;
$ctx->matches = $matches;
return $ctx;
}
}

View File

@@ -0,0 +1,177 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser;
use League\CommonMark\Environment\EnvironmentInterface;
use League\CommonMark\Node\Block\AbstractBlock;
use League\CommonMark\Node\Inline\AdjacentTextMerger;
use League\CommonMark\Node\Inline\Text;
use League\CommonMark\Parser\Inline\InlineParserInterface;
use League\CommonMark\Reference\ReferenceMapInterface;
/**
* @internal
*/
final class InlineParserEngine implements InlineParserEngineInterface
{
/** @psalm-readonly */
private EnvironmentInterface $environment;
/** @psalm-readonly */
private ReferenceMapInterface $referenceMap;
/**
* @var array<int, InlineParserInterface|string|bool>
* @psalm-var list<array{0: InlineParserInterface, 1: string, 2: bool}>
* @phpstan-var array<int, array{0: InlineParserInterface, 1: string, 2: bool}>
*/
private array $parsers = [];
public function __construct(EnvironmentInterface $environment, ReferenceMapInterface $referenceMap)
{
$this->environment = $environment;
$this->referenceMap = $referenceMap;
foreach ($environment->getInlineParsers() as $parser) {
\assert($parser instanceof InlineParserInterface);
$regex = $parser->getMatchDefinition()->getRegex();
$this->parsers[] = [$parser, $regex, \strlen($regex) !== \mb_strlen($regex, 'UTF-8')];
}
}
public function parse(string $contents, AbstractBlock $block): void
{
$contents = \trim($contents);
$cursor = new Cursor($contents);
$inlineParserContext = new InlineParserContext($cursor, $block, $this->referenceMap);
// Have all parsers look at the line to determine what they might want to parse and what positions they exist at
foreach ($this->matchParsers($contents) as $matchPosition => $parsers) {
$currentPosition = $cursor->getPosition();
// We've already gone past this point
if ($currentPosition > $matchPosition) {
continue;
}
// We've skipped over some uninteresting text that should be added as a plain text node
if ($currentPosition < $matchPosition) {
$cursor->advanceBy($matchPosition - $currentPosition);
$this->addPlainText($cursor->getPreviousText(), $block);
}
// We're now at a potential start - see which of the current parsers can handle it
$parsed = false;
foreach ($parsers as [$parser, $matches]) {
\assert($parser instanceof InlineParserInterface);
if ($parser->parse($inlineParserContext->withMatches($matches))) {
// A parser has successfully handled the text at the given position; don't consider any others at this position
$parsed = true;
break;
}
}
if ($parsed) {
continue;
}
// Despite potentially being interested, nothing actually parsed text here, so add the current character and continue onwards
$this->addPlainText((string) $cursor->getCurrentCharacter(), $block);
$cursor->advance();
}
// Add any remaining text that wasn't parsed
if (! $cursor->isAtEnd()) {
$this->addPlainText($cursor->getRemainder(), $block);
}
// Process any delimiters that were found
$delimiterStack = $inlineParserContext->getDelimiterStack();
$delimiterStack->processDelimiters(null, $this->environment->getDelimiterProcessors());
$delimiterStack->removeAll();
// Combine adjacent text notes into one
AdjacentTextMerger::mergeChildNodes($block);
}
private function addPlainText(string $text, AbstractBlock $container): void
{
$lastInline = $container->lastChild();
if ($lastInline instanceof Text && ! $lastInline->data->has('delim')) {
$lastInline->append($text);
} else {
$container->appendChild(new Text($text));
}
}
/**
* Given the current line, ask all the parsers which parts of the text they would be interested in parsing.
*
* The resulting array provides a list of character positions, which parsers are interested in trying to parse
* the text at those points, and (for convenience/optimization) what the matching text happened to be.
*
* @return array<array<int, InlineParserInterface|string>>
*
* @psalm-return array<int, list<array{0: InlineParserInterface, 1: non-empty-array<string>}>>
*
* @phpstan-return array<int, array<int, array{0: InlineParserInterface, 1: non-empty-array<string>}>>
*/
private function matchParsers(string $contents): array
{
$contents = \trim($contents);
$isMultibyte = \mb_strlen($contents, 'UTF-8') !== \strlen($contents);
$ret = [];
foreach ($this->parsers as [$parser, $regex, $isRegexMultibyte]) {
if ($isMultibyte || $isRegexMultibyte) {
$regex .= 'u';
}
// See if the parser's InlineParserMatch regex matched against any part of the string
if (! \preg_match_all($regex, $contents, $matches, \PREG_OFFSET_CAPTURE | \PREG_SET_ORDER)) {
continue;
}
// For each part that matched...
foreach ($matches as $match) {
if ($isMultibyte) {
// PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
$offset = \mb_strlen(\substr($contents, 0, $match[0][1]), 'UTF-8');
} else {
$offset = \intval($match[0][1]);
}
// Remove the offsets, keeping only the matched text
$m = \array_column($match, 0);
if ($m === []) {
continue;
}
// Add this match to the list of character positions to stop at
$ret[$offset][] = [$parser, $m];
}
}
// Sort matches by position so we visit them in order
\ksort($ret);
return $ret;
}
}

View File

@@ -0,0 +1,27 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser;
use League\CommonMark\Node\Block\AbstractBlock;
/**
* Parser for inline content (text, links, emphasized text, etc).
*/
interface InlineParserEngineInterface
{
/**
* Parse the given contents as inlines and insert them into the given block
*/
public function parse(string $contents, AbstractBlock $block): void;
}

View File

@@ -0,0 +1,339 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
* - (c) John MacFarlane
*
* Additional code based on commonmark-java (https://github.com/commonmark/commonmark-java)
* - (c) Atlassian Pty Ltd
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser;
use League\CommonMark\Environment\EnvironmentInterface;
use League\CommonMark\Event\DocumentParsedEvent;
use League\CommonMark\Event\DocumentPreParsedEvent;
use League\CommonMark\Input\MarkdownInput;
use League\CommonMark\Node\Block\Document;
use League\CommonMark\Node\Block\Paragraph;
use League\CommonMark\Parser\Block\BlockContinueParserInterface;
use League\CommonMark\Parser\Block\BlockContinueParserWithInlinesInterface;
use League\CommonMark\Parser\Block\BlockStart;
use League\CommonMark\Parser\Block\BlockStartParserInterface;
use League\CommonMark\Parser\Block\DocumentBlockParser;
use League\CommonMark\Parser\Block\ParagraphParser;
use League\CommonMark\Reference\ReferenceInterface;
use League\CommonMark\Reference\ReferenceMap;
final class MarkdownParser implements MarkdownParserInterface
{
/** @psalm-readonly */
private EnvironmentInterface $environment;
/** @psalm-readonly-allow-private-mutation */
private int $maxNestingLevel;
/** @psalm-readonly-allow-private-mutation */
private ReferenceMap $referenceMap;
/** @psalm-readonly-allow-private-mutation */
private int $lineNumber = 0;
/** @psalm-readonly-allow-private-mutation */
private Cursor $cursor;
/**
* @var array<int, BlockContinueParserInterface>
*
* @psalm-readonly-allow-private-mutation
*/
private array $activeBlockParsers = [];
/**
* @var array<int, BlockContinueParserWithInlinesInterface>
*
* @psalm-readonly-allow-private-mutation
*/
private array $closedBlockParsers = [];
public function __construct(EnvironmentInterface $environment)
{
$this->environment = $environment;
}
private function initialize(): void
{
$this->referenceMap = new ReferenceMap();
$this->lineNumber = 0;
$this->activeBlockParsers = [];
$this->closedBlockParsers = [];
$this->maxNestingLevel = $this->environment->getConfiguration()->get('max_nesting_level');
}
/**
* @throws \RuntimeException
*/
public function parse(string $input): Document
{
$this->initialize();
$documentParser = new DocumentBlockParser($this->referenceMap);
$this->activateBlockParser($documentParser);
$preParsedEvent = new DocumentPreParsedEvent($documentParser->getBlock(), new MarkdownInput($input));
$this->environment->dispatch($preParsedEvent);
$markdownInput = $preParsedEvent->getMarkdown();
foreach ($markdownInput->getLines() as $lineNumber => $line) {
$this->lineNumber = $lineNumber;
$this->parseLine($line);
}
// finalizeAndProcess
$this->closeBlockParsers(\count($this->activeBlockParsers), $this->lineNumber);
$this->processInlines();
$this->environment->dispatch(new DocumentParsedEvent($documentParser->getBlock()));
return $documentParser->getBlock();
}
/**
* Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each
* line of input, then finalizing the document.
*/
private function parseLine(string $line): void
{
$this->cursor = new Cursor($line);
$matches = $this->parseBlockContinuation();
if ($matches === null) {
return;
}
$unmatchedBlocks = \count($this->activeBlockParsers) - $matches;
$blockParser = $this->activeBlockParsers[$matches - 1];
$startedNewBlock = false;
// Unless last matched container is a code block, try new container starts,
// adding children to the last matched container:
$tryBlockStarts = $blockParser->getBlock() instanceof Paragraph || $blockParser->isContainer();
while ($tryBlockStarts) {
// this is a little performance optimization
if ($this->cursor->isBlank()) {
$this->cursor->advanceToEnd();
break;
}
if ($blockParser->getBlock()->getDepth() >= $this->maxNestingLevel) {
break;
}
$blockStart = $this->findBlockStart($blockParser);
if ($blockStart === null || $blockStart->isAborting()) {
$this->cursor->advanceToNextNonSpaceOrTab();
break;
}
if (($state = $blockStart->getCursorState()) !== null) {
$this->cursor->restoreState($state);
}
$startedNewBlock = true;
// We're starting a new block. If we have any previous blocks that need to be closed, we need to do it now.
if ($unmatchedBlocks > 0) {
$this->closeBlockParsers($unmatchedBlocks, $this->lineNumber - 1);
$unmatchedBlocks = 0;
}
if ($blockStart->isReplaceActiveBlockParser()) {
$this->prepareActiveBlockParserForReplacement();
}
foreach ($blockStart->getBlockParsers() as $newBlockParser) {
$blockParser = $this->addChild($newBlockParser);
$tryBlockStarts = $newBlockParser->isContainer();
}
}
// What remains at the offset is a text line. Add the text to the appropriate block.
// First check for a lazy paragraph continuation:
if (! $startedNewBlock && ! $this->cursor->isBlank() && $this->getActiveBlockParser()->canHaveLazyContinuationLines()) {
$this->getActiveBlockParser()->addLine($this->cursor->getRemainder());
} else {
// finalize any blocks not matched
if ($unmatchedBlocks > 0) {
$this->closeBlockParsers($unmatchedBlocks, $this->lineNumber);
}
if (! $blockParser->isContainer()) {
$this->getActiveBlockParser()->addLine($this->cursor->getRemainder());
} elseif (! $this->cursor->isBlank()) {
$this->addChild(new ParagraphParser());
$this->getActiveBlockParser()->addLine($this->cursor->getRemainder());
}
}
}
private function parseBlockContinuation(): ?int
{
// For each containing block, try to parse the associated line start.
// The document will always match, so we can skip the first block parser and start at 1 matches
$matches = 1;
for ($i = 1; $i < \count($this->activeBlockParsers); $i++) {
$blockParser = $this->activeBlockParsers[$i];
$blockContinue = $blockParser->tryContinue(clone $this->cursor, $this->getActiveBlockParser());
if ($blockContinue === null) {
break;
}
if ($blockContinue->isFinalize()) {
$this->closeBlockParsers(\count($this->activeBlockParsers) - $i, $this->lineNumber);
return null;
}
if (($state = $blockContinue->getCursorState()) !== null) {
$this->cursor->restoreState($state);
}
$matches++;
}
return $matches;
}
private function findBlockStart(BlockContinueParserInterface $lastMatchedBlockParser): ?BlockStart
{
$matchedBlockParser = new MarkdownParserState($this->getActiveBlockParser(), $lastMatchedBlockParser);
foreach ($this->environment->getBlockStartParsers() as $blockStartParser) {
\assert($blockStartParser instanceof BlockStartParserInterface);
if (($result = $blockStartParser->tryStart(clone $this->cursor, $matchedBlockParser)) !== null) {
return $result;
}
}
return null;
}
private function closeBlockParsers(int $count, int $endLineNumber): void
{
for ($i = 0; $i < $count; $i++) {
$blockParser = $this->deactivateBlockParser();
$this->finalize($blockParser, $endLineNumber);
// phpcs:disable SlevomatCodingStandard.ControlStructures.EarlyExit.EarlyExitNotUsed
if ($blockParser instanceof BlockContinueParserWithInlinesInterface) {
// Remember for inline parsing
$this->closedBlockParsers[] = $blockParser;
}
}
}
/**
* Finalize a block. Close it and do any necessary postprocessing, e.g. creating string_content from strings,
* setting the 'tight' or 'loose' status of a list, and parsing the beginnings of paragraphs for reference
* definitions.
*/
private function finalize(BlockContinueParserInterface $blockParser, int $endLineNumber): void
{
if ($blockParser instanceof ParagraphParser) {
$this->updateReferenceMap($blockParser->getReferences());
}
$blockParser->getBlock()->setEndLine($endLineNumber);
$blockParser->closeBlock();
}
/**
* Walk through a block & children recursively, parsing string content into inline content where appropriate.
*/
private function processInlines(): void
{
$p = new InlineParserEngine($this->environment, $this->referenceMap);
foreach ($this->closedBlockParsers as $blockParser) {
$blockParser->parseInlines($p);
}
}
/**
* Add block of type tag as a child of the tip. If the tip can't accept children, close and finalize it and try
* its parent, and so on til we find a block that can accept children.
*/
private function addChild(BlockContinueParserInterface $blockParser): BlockContinueParserInterface
{
$blockParser->getBlock()->setStartLine($this->lineNumber);
while (! $this->getActiveBlockParser()->canContain($blockParser->getBlock())) {
$this->closeBlockParsers(1, $this->lineNumber - 1);
}
$this->getActiveBlockParser()->getBlock()->appendChild($blockParser->getBlock());
$this->activateBlockParser($blockParser);
return $blockParser;
}
private function activateBlockParser(BlockContinueParserInterface $blockParser): void
{
$this->activeBlockParsers[] = $blockParser;
}
private function deactivateBlockParser(): BlockContinueParserInterface
{
$popped = \array_pop($this->activeBlockParsers);
if ($popped === null) {
throw new \RuntimeException('The last block parser should not be deactivated');
}
return $popped;
}
private function prepareActiveBlockParserForReplacement(): void
{
// Note that we don't want to parse inlines or finalize this block, as it's getting replaced.
$old = $this->deactivateBlockParser();
if ($old instanceof ParagraphParser) {
$this->updateReferenceMap($old->getReferences());
}
$old->getBlock()->detach();
}
/**
* @param ReferenceInterface[] $references
*/
private function updateReferenceMap(iterable $references): void
{
foreach ($references as $reference) {
if (! $this->referenceMap->contains($reference->getLabel())) {
$this->referenceMap->add($reference);
}
}
}
public function getActiveBlockParser(): BlockContinueParserInterface
{
$active = \end($this->activeBlockParsers);
if ($active === false) {
throw new \RuntimeException('No active block parsers are available');
}
return $active;
}
}

View File

@@ -0,0 +1,24 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser;
use League\CommonMark\Node\Block\Document;
interface MarkdownParserInterface
{
/**
* @throws \RuntimeException
*/
public function parse(string $input): Document;
}

View File

@@ -0,0 +1,57 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser;
use League\CommonMark\Parser\Block\BlockContinueParserInterface;
use League\CommonMark\Parser\Block\ParagraphParser;
/**
* @internal You should rely on the interface instead
*/
final class MarkdownParserState implements MarkdownParserStateInterface
{
/** @psalm-readonly */
private BlockContinueParserInterface $activeBlockParser;
/** @psalm-readonly */
private BlockContinueParserInterface $lastMatchedBlockParser;
public function __construct(BlockContinueParserInterface $activeBlockParser, BlockContinueParserInterface $lastMatchedBlockParser)
{
$this->activeBlockParser = $activeBlockParser;
$this->lastMatchedBlockParser = $lastMatchedBlockParser;
}
public function getActiveBlockParser(): BlockContinueParserInterface
{
return $this->activeBlockParser;
}
public function getLastMatchedBlockParser(): BlockContinueParserInterface
{
return $this->lastMatchedBlockParser;
}
public function getParagraphContent(): ?string
{
if (! $this->lastMatchedBlockParser instanceof ParagraphParser) {
return null;
}
$paragraphParser = $this->lastMatchedBlockParser;
$content = $paragraphParser->getContentString();
return $content === '' ? null : $content;
}
}

View File

@@ -0,0 +1,36 @@
<?php
declare(strict_types=1);
/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace League\CommonMark\Parser;
use League\CommonMark\Parser\Block\BlockContinueParserInterface;
interface MarkdownParserStateInterface
{
/**
* Returns the deepest open block parser
*/
public function getActiveBlockParser(): BlockContinueParserInterface;
/**
* Open block parser that was last matched during the continue phase. This is different from the currently active
* block parser, as an unmatched block is only closed when a new block is started.
*/
public function getLastMatchedBlockParser(): BlockContinueParserInterface;
/**
* Returns the current content of the paragraph if the matched block is a paragraph. The content can be multiple
* lines separated by newlines.
*/
public function getParagraphContent(): ?string;
}