<?php /** * Main WordPress Formatting API. * * Handles many functions for formatting output. * * @package WordPress */ /** * Replaces common plain text characters with formatted entities. * * Returns given text with transformations of quotes into smart quotes, apostrophes, * dashes, ellipses, the trademark symbol, and the multiplication symbol. * * As an example, * * 'cause today's effort makes it worth tomorrow's "holiday" ... * * Becomes: * * &#8217;cause today&#8217;s effort makes it worth tomorrow&#8217;s &#8220;holiday&#8221; &#8230; * * Code within certain HTML blocks are skipped. * * Do not use this function before the {@see 'init'} action hook; everything will break. * * @since 0.71 * * @global array $wp_cockneyreplace Array of formatted entities for certain common phrases. * @global array $shortcode_tags * * @param string $text The text to be formatted. * @param bool $reset Set to true for unit testing. Translated patterns will reset. * @return string The string replaced with HTML entities. */ function wptexturize( $text, $reset = false ) { global $wp_cockneyreplace, $shortcode_tags; static $static_characters = null, $static_replacements = null, $dynamic_characters = null, $dynamic_replacements = null, $default_no_texturize_tags = null, $default_no_texturize_shortcodes = null, $run_texturize = true, $apos = null, $prime = null, $double_prime = null, $opening_quote = null, $closing_quote = null, $opening_single_quote = null, $closing_single_quote = null, $open_q_flag = '<!--oq-->', $open_sq_flag = '<!--osq-->', $apos_flag = '<!--apos-->'; // If there's nothing to do, just stop. if ( empty( $text ) || false === $run_texturize ) { return $text; } // Set up static variables. Run once only. if ( $reset || ! isset( $static_characters ) ) { /** * Filters whether to skip running wptexturize(). * * Returning false from the filter will effectively short-circuit wptexturize() * and return the original text passed to the function instead. * * The filter runs only once, the first time wptexturize() is called. * * @since 4.0.0 * * @see wptexturize() * * @param bool $run_texturize Whether to short-circuit wptexturize(). */ $run_texturize = apply_filters( 'run_wptexturize', $run_texturize ); if ( false === $run_texturize ) { return $text; } /* translators: Opening curly double quote. */ $opening_quote = _x( '&#8220;', 'opening curly double quote' ); /* translators: Closing curly double quote. */ $closing_quote = _x( '&#8221;', 'closing curly double quote' ); /* translators: Apostrophe, for example in 'cause or can't. */ $apos = _x( '&#8217;', 'apostrophe' ); /* translators: Prime, for example in 9' (nine feet). */ $prime = _x( '&#8242;', 'prime' ); /* translators: Double prime, for example in 9" (nine inches). */ $double_prime = _x( '&#8243;', 'double prime' ); /* translators: Opening curly single quote. */ $opening_single_quote = _x( '&#8216;', 'opening curly single quote' ); /* translators: Closing curly single quote. */ $closing_single_quote = _x( '&#8217;', 'closing curly single quote' ); /* translators: En dash. */ $en_dash = _x( '&#8211;', 'en dash' ); /* translators: Em dash. */ $em_dash = _x( '&#8212;', 'em dash' ); $default_no_texturize_tags = array( 'pre', 'code', 'kbd', 'style', 'script', 'tt' ); $default_no_texturize_shortcodes = array( 'code' ); // If a plugin has provided an autocorrect array, use it. if ( isset( $wp_cockneyreplace ) ) { $cockney = array_keys( $wp_cockneyreplace ); $cockneyreplace = array_values( $wp_cockneyreplace ); } else { /* * translators: This is a comma-separated list of words that defy the syntax of quotations in normal use, * for example... 'We do not have enough words yet'... is a typical quoted phrase. But when we write * lines of code 'til we have enough of 'em, then we need to insert apostrophes instead of quotes. */ $cockney = explode( ',', _x( "'tain't,'twere,'twas,'tis,'twill,'til,'bout,'nuff,'round,'cause,'em", 'Comma-separated list of words to texturize in your language' ) ); $cockneyreplace = explode( ',', _x( '&#8217;tain&#8217;t,&#8217;twere,&#8217;twas,&#8217;tis,&#8217;twill,&#8217;til,&#8217;bout,&#8217;nuff,&#8217;round,&#8217;cause,&#8217;em', 'Comma-separated list of replacement words in your language' ) ); } $static_characters = array_merge( array( '...', '``', '\'\'', ' (tm)' ), $cockney ); $static_replacements = array_merge( array( '&#8230;', $opening_quote, $closing_quote, ' &#8482;' ), $cockneyreplace ); /* * Pattern-based replacements of characters. * Sort the remaining patterns into several arrays for performance tuning. */ $dynamic_characters = array( 'apos' => array(), 'quote' => array(), 'dash' => array(), ); $dynamic_replacements = array( 'apos' => array(), 'quote' => array(), 'dash' => array(), ); $dynamic = array(); $spaces = wp_spaces_regexp(); // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation. if ( "'" !== $apos || "'" !== $closing_single_quote ) { $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_single_quote; } if ( "'" !== $apos || '"' !== $closing_quote ) { $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|&gt;|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_quote; } // '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0. if ( "'" !== $apos ) { $dynamic['/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/'] = $apos_flag; } // Quoted numbers like '0.42'. if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) { $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $closing_single_quote; } // Single quote at start, or preceded by (, {, <, [, ", -, or spaces. if ( "'" !== $opening_single_quote ) { $dynamic[ '/(?<=\A|[([{"\-]|&lt;|' . $spaces . ')\'/' ] = $open_sq_flag; } // Apostrophe in a word. No spaces, double apostrophes, or other punctuation. if ( "'" !== $apos ) { $dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;!?"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos_flag; } $dynamic_characters['apos'] = array_keys( $dynamic ); $dynamic_replacements['apos'] = array_values( $dynamic ); $dynamic = array(); // Quoted numbers like "42". if ( '"' !== $opening_quote && '"' !== $closing_quote ) { $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $closing_quote; } // Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces. if ( '"' !== $opening_quote ) { $dynamic[ '/(?<=\A|[([{\-]|&lt;|' . $spaces . ')"(?!' . $spaces . ')/' ] = $open_q_flag; } $dynamic_characters['quote'] = array_keys( $dynamic ); $dynamic_replacements['quote'] = array_values( $dynamic ); $dynamic = array(); // Dashes and spaces. $dynamic['/---/'] = $em_dash; $dynamic[ '/(?<=^|' . $spaces . ')--(?=$|' . $spaces . ')/' ] = $em_dash; $dynamic['/(?<!xn)--/'] = $en_dash; $dynamic[ '/(?<=^|' . $spaces . ')-(?=$|' . $spaces . ')/' ] = $en_dash; $dynamic_characters['dash'] = array_keys( $dynamic ); $dynamic_replacements['dash'] = array_values( $dynamic ); } // Must do this every time in case plugins use these filters in a context sensitive manner. /** * Filters the list of HTML elements not to texturize. * * @since 2.8.0 * * @param string[] $default_no_texturize_tags An array of HTML element names. */ $no_texturize_tags = apply_filters( 'no_texturize_tags', $default_no_texturize_tags ); /** * Filters the list of shortcodes not to texturize. * * @since 2.8.0 * * @param string[] $default_no_texturize_shortcodes An array of shortcode names. */ $no_texturize_shortcodes = apply_filters( 'no_texturize_shortcodes', $default_no_texturize_shortcodes ); $no_texturize_tags_stack = array(); $no_texturize_shortcodes_stack = array(); // Look for shortcodes and HTML elements. preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20=]++)@', $text, $matches ); $tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] ); $found_shortcodes = ! empty( $tagnames ); $shortcode_regex = $found_shortcodes ? _get_wptexturize_shortcode_regex( $tagnames ) : ''; $regex = _get_wptexturize_split_regex( $shortcode_regex ); $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); foreach ( $textarr as &$curl ) { // Only call _wptexturize_pushpop_element if $curl is a delimiter. $first = $curl[0]; if ( '<' === $first ) { if ( str_starts_with( $curl, '<!--' ) ) { // This is an HTML comment delimiter. continue; } else { // This is an HTML element delimiter. // Replace each & with &#038; unless it already looks like an entity. $curl = preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&#038;', $curl ); _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags ); } } elseif ( '' === trim( $curl ) ) { // This is a newline between delimiters. Performance improves when we check this. continue; } elseif ( '[' === $first && $found_shortcodes && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) { // This is a shortcode delimiter. if ( ! str_starts_with( $curl, '[[' ) && ! str_ends_with( $curl, ']]' ) ) { // Looks like a normal shortcode. _wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes ); } else { // Looks like an escaped shortcode. continue; } } elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) ) { // This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize. $curl = str_replace( $static_characters, $static_replacements, $curl ); if ( str_contains( $curl, "'" ) ) { $curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl ); $curl = wptexturize_primes( $curl, "'", $prime, $open_sq_flag, $closing_single_quote ); $curl = str_replace( $apos_flag, $apos, $curl ); $curl = str_replace( $open_sq_flag, $opening_single_quote, $curl ); } if ( str_contains( $curl, '"' ) ) { $curl = preg_replace( $dynamic_characters['quote'], $dynamic_replacements['quote'], $curl ); $curl = wptexturize_primes( $curl, '"', $double_prime, $open_q_flag, $closing_quote ); $curl = str_replace( $open_q_flag, $opening_quote, $curl ); } if ( str_contains( $curl, '-' ) ) { $curl = preg_replace( $dynamic_characters['dash'], $dynamic_replacements['dash'], $curl ); } // 9x9 (times), but never 0x9999. if ( 1 === preg_match( '/(?<=\d)x\d/', $curl ) ) { // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one! $curl = preg_replace( '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(\d[\d\.,]*)\b/', '$1&#215;$2', $curl ); } // Replace each & with &#038; unless it already looks like an entity. $curl = preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&#038;', $curl ); } } return implode( '', $textarr ); } /** * Implements a logic tree to determine whether or not "7'." represents seven feet, * then converts the special char into either a prime char or a closing quote char. * * @since 4.3.0 * * @param string $haystack The plain text to be searched. * @param string $needle The character to search for such as ' or ". * @param string $prime The prime char to use for replacement. * @param string $open_quote The opening quote char. Opening quote replacement must be * accomplished already. * @param string $close_quote The closing quote char to use for replacement. * @return string The $haystack value after primes and quotes replacements. */ function wptexturize_primes( $haystack, $needle, $prime, $open_quote, $close_quote ) { $spaces = wp_spaces_regexp(); $flag = '<!--wp-prime-or-quote-->'; $quote_pattern = "/$needle(?=\\Z|[.,:;!?)}\\-\\]]|&gt;|" . $spaces . ')/'; $prime_pattern = "/(?<=\\d)$needle/"; $flag_after_digit = "/(?<=\\d)$flag/"; $flag_no_digit = "/(?<!\\d)$flag/"; $sentences = explode( $open_quote, $haystack ); foreach ( $sentences as $key => &$sentence ) { if ( ! str_contains( $sentence, $needle ) ) { continue; } elseif ( 0 !== $key && 0 === substr_count( $sentence, $close_quote ) ) { $sentence = preg_replace( $quote_pattern, $flag, $sentence, -1, $count ); if ( $count > 1 ) { // This sentence appears to have multiple closing quotes. Attempt Vulcan logic. $sentence = preg_replace( $flag_no_digit, $close_quote, $sentence, -1, $count2 ); if ( 0 === $count2 ) { // Try looking for a quote followed by a period. $count2 = substr_count( $sentence, "$flag." ); if ( $count2 > 0 ) { // Assume the rightmost quote-period match is the end of quotation. $pos = strrpos( $sentence, "$flag." ); } else { /* * When all else fails, make the rightmost candidate a closing quote. * This is most likely to be problematic in the context of bug #18549. */ $pos = strrpos( $sentence, $flag ); } $sentence = substr_replace( $sentence, $close_quote, $pos, strlen( $flag ) ); } // Use conventional replacement on any remaining primes and quotes. $sentence = preg_replace( $prime_pattern, $prime, $sentence ); $sentence = preg_replace( $flag_after_digit, $prime, $sentence ); $sentence = str_replace( $flag, $close_quote, $sentence ); } elseif ( 1 === $count ) { // Found only one closing quote candidate, so give it priority over primes. $sentence = str_replace( $flag, $close_quote, $sentence ); $sentence = preg_replace( $prime_pattern, $prime, $sentence ); } else { // No closing quotes found. Just run primes pattern. $sentence = preg_replace( $prime_pattern, $prime, $sentence ); } } else { $sentence = preg_replace( $prime_pattern, $prime, $sentence ); $sentence = preg_replace( $quote_pattern, $close_quote, $sentence ); } if ( '"' === $needle && str_contains( $sentence, '"' ) ) { $sentence = str_replace( '"', $close_quote, $sentence ); } } return implode( $open_quote, $sentences ); } /** * Searches for disabled element tags. Pushes element to stack on tag open * and pops on tag close. * * Assumes first char of `$text` is tag opening and last char is tag closing. * Assumes second char of `$text` is optionally `/` to indicate closing as in `</html>`. * * @since 2.9.0 * @access private * * @param string $text Text to check. Must be a tag like `<html>` or `[shortcode]`. * @param string[] $stack Array of open tag elements. * @param string[] $disabled_elements Array of tag names to match against. Spaces are not allowed in tag names. */ function _wptexturize_pushpop_element( $text, &$stack, $disabled_elements ) { // Is it an opening tag or closing tag? if ( isset( $text[1] ) && '/' !== $text[1] ) { $opening_tag = true; $name_offset = 1; } elseif ( 0 === count( $stack ) ) { // Stack is empty. Just stop. return; } else { $opening_tag = false; $name_offset = 2; } // Parse out the tag name. $space = strpos( $text, ' ' ); if ( false === $space ) { $space = -1; } else { $space -= $name_offset; } $tag = substr( $text, $name_offset, $space ); // Handle disabled tags. if ( in_array( $tag, $disabled_elements, true ) ) { if ( $opening_tag ) { /* * This disables texturize until we find a closing tag of our type * (e.g. <pre>) even if there was invalid nesting before that. * * Example: in the case <pre>sadsadasd</code>"baba"</pre> * "baba" won't be texturized. */ array_push( $stack, $tag ); } elseif ( end( $stack ) === $tag ) { array_pop( $stack ); } } } /** * Replaces double line breaks with paragraph elements. * * A group of regex replaces used to identify text formatted with newlines and * replace double line breaks with HTML paragraph tags. The remaining line breaks * after conversion become `<br />` tags, unless `$br` is set to '0' or 'false'. * * @since 0.71 * * @param string $text The text which has to be formatted. * @param bool $br Optional. If set, this will convert all remaining line breaks * after paragraphing. Line breaks within `<script>`, `<style>`, * and `<svg>` tags are not affected. Default true. * @return string Text which has been converted into correct paragraph tags. */ function wpautop( $text, $br = true ) { $pre_tags = array(); if ( trim( $text ) === '' ) { return ''; } // Just to make things a little easier, pad the end. $text = $text . "\n"; /* * Pre tags shouldn't be touched by autop. * Replace pre tags with placeholders and bring them back after autop. */ if ( str_contains( $text, '<pre' ) ) { $text_parts = explode( '</pre>', $text ); $last_part = array_pop( $text_parts ); $text = ''; $i = 0; foreach ( $text_parts as $text_part ) { $start = strpos( $text_part, '<pre' ); // Malformed HTML? if ( false === $start ) { $text .= $text_part; continue; } $name = "<pre wp-pre-tag-$i></pre>"; $pre_tags[ $name ] = substr( $text_part, $start ) . '</pre>'; $text .= substr( $text_part, 0, $start ) . $name; ++$i; } $text .= $last_part; } // Change multiple <br>'s into two line breaks, which will turn into paragraphs. $text = preg_replace( '|<br\s*/?>\s*<br\s*/?>|', "\n\n", $text ); $allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'; // Add a double line break above block-level opening tags. $text = preg_replace( '!(<' . $allblocks . '[\s/>])!', "\n\n$1", $text ); // Add a double line break below block-level closing tags. $text = preg_replace( '!(</' . $allblocks . '>)!', "$1\n\n", $text ); // Add a double line break after hr tags, which are self closing. $text = preg_replace( '!(<hr\s*?/?>)!', "$1\n\n", $text ); // Standardize newline characters to "\n". $text = str_replace( array( "\r\n", "\r" ), "\n", $text ); // Find newlines in all elements and add placeholders. $text = wp_replace_in_html_tags( $text, array( "\n" => ' <!-- wpnl --> ' ) ); // Collapse line breaks before and after <option> elements so they don't get autop'd. if ( str_contains( $text, '<option' ) ) { $text = preg_replace( '|\s*<option|', '<option', $text ); $text = preg_replace( '|</option>\s*|', '</option>', $text ); } /* * Collapse line breaks inside <object> elements, before <param> and <embed> elements * so they don't get autop'd. */ if ( str_contains( $text, '</object>' ) ) { $text = preg_replace( '|(<object[^>]*>)\s*|', '$1', $text ); $text = preg_replace( '|\s*</object>|', '</object>', $text ); $text = preg_replace( '%\s*(</?(?:param|embed)[^>]*>)\s*%', '$1', $text ); } /* * Collapse line breaks inside <audio> and <video> elements, * before and after <source> and <track> elements. */ if ( str_contains( $text, '<source' ) || str_contains( $text, '<track' ) ) { $text = preg_replace( '%([<\[](?:audio|video)[^>\]]*[>\]])\s*%', '$1', $text ); $text = preg_replace( '%\s*([<\[]/(?:audio|video)[>\]])%', '$1', $text ); $text = preg_replace( '%\s*(<(?:source|track)[^>]*>)\s*%', '$1', $text ); } // Collapse line breaks before and after <figcaption> elements. if ( str_contains( $text, '<figcaption' ) ) { $text = preg_replace( '|\s*(<figcaption[^>]*>)|', '$1', $text ); $text = preg_replace( '|</figcaption>\s*|', '</figcaption>', $text ); } // Remove more than two contiguous line breaks. $text = preg_replace( "/\n\n+/", "\n\n", $text ); // Split up the contents into an array of strings, separated by double line breaks. $paragraphs = preg_split( '/\n\s*\n/', $text, -1, PREG_SPLIT_NO_EMPTY ); // Reset $text prior to rebuilding. $text = ''; // Rebuild the content as a string, wrapping every bit with a <p>. foreach ( $paragraphs as $paragraph ) { $text .= '<p>' . trim( $paragraph, "\n" ) . "</p>\n"; } // Under certain strange conditions it could create a P of entirely whitespace. $text = preg_replace( '|<p>\s*</p>|', '', $text ); // Add a closing <p> inside <div>, <address>, or <form> tag if missing. $text = preg_replace( '!<p>([^<]+)</(div|address|form)>!', '<p>$1</p></$2>', $text ); // If an opening or closing block element tag is wrapped in a <p>, unwrap it. $text = preg_replace( '!<p>\s*(</?' . $allblocks . '[^>]*>)\s*</p>!', '$1', $text ); // In some cases <li> may get wrapped in <p>, fix them. $text = preg_replace( '|<p>(<li.+?)</p>|', '$1', $text ); // If a <blockquote> is wrapped with a <p>, move it inside the <blockquote>. $text = preg_replace( '|<p><blockquote([^>]*)>|i', '<blockquote$1><p>', $text ); $text = str_replace( '</blockquote></p>', '</p></blockquote>', $text ); // If an opening or closing block element tag is preceded by an opening <p> tag, remove it. $text = preg_replace( '!<p>\s*(</?' . $allblocks . '[^>]*>)!', '$1', $text ); // If an opening or closing block element tag is followed by a closing <p> tag, remove it. $text = preg_replace( '!(</?' . $allblocks . '[^>]*>)\s*</p>!', '$1', $text ); // Optionally insert line breaks. if ( $br ) { // Replace newlines that shouldn't be touched with a placeholder. $text = preg_replace_callback( '/<(script|style|svg|math).*?<\/\\1>/s', '_autop_newline_preservation_helper', $text ); // Normalize <br> $text = str_replace( array( '<br>', '<br/>' ), '<br />', $text ); // Replace any new line characters that aren't preceded by a <br /> with a <br />. $text = preg_replace( '|(?<!<br />)\s*\n|', "<br />\n", $text ); // Replace newline placeholders with newlines. $text = str_replace( '<WPPreserveNewline />', "\n", $text ); } // If a <br /> tag is after an opening or closing block tag, remove it. $text = preg_replace( '!(</?' . $allblocks . '[^>]*>)\s*<br />!', '$1', $text ); // If a <br /> tag is before a subset of opening or closing block tags, remove it. $text = preg_replace( '!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)!', '$1', $text ); $text = preg_replace( "|\n</p>$|", '</p>', $text ); // Replace placeholder <pre> tags with their original content. if ( ! empty( $pre_tags ) ) { $text = str_replace( array_keys( $pre_tags ), array_values( $pre_tags ), $text ); } // Restore newlines in all elements. if ( str_contains( $text, '<!-- wpnl -->' ) ) { $text = str_replace( array( ' <!-- wpnl --> ', '<!-- wpnl -->' ), "\n", $text ); } return $text; } /** * Separates HTML elements and comments from the text. * * @since 4.2.4 * * @param string $input The text which has to be formatted. * @return string[] Array of the formatted text. */ function wp_html_split( $input ) { return preg_split( get_html_split_regex(), $input, -1, PREG_SPLIT_DELIM_CAPTURE ); } /** * Retrieves the regular expression for an HTML element. * * @since 4.4.0 * * @return string The regular expression */ function get_html_split_regex() { static $regex; if ( ! isset( $regex ) ) { // phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation $comments = '!' // Start of comment, after the <. . '(?:' // Unroll the loop: Consume everything until --> is found. . '-(?!->)' // Dash not followed by end of comment. . '[^\-]*+' // Consume non-dashes. . ')*+' // Loop possessively. . '(?:-->)?'; // End of comment. If not found, match all input. $cdata = '!\[CDATA\[' // Start of comment, after the <. . '[^\]]*+' // Consume non-]. . '(?:' // Unroll the loop: Consume everything until ]]> is found. . '](?!]>)' // One ] not followed by end of comment. . '[^\]]*+' // Consume non-]. . ')*+' // Loop possessively. . '(?:]]>)?'; // End of comment. If not found, match all input. $escaped = '(?=' // Is the element escaped? . '!--' . '|' . '!\[CDATA\[' . ')' . '(?(?=!-)' // If yes, which type? . $comments . '|' . $cdata . ')'; $regex = '/(' // Capture the entire match. . '<' // Find start of element. . '(?' // Conditional expression follows. . $escaped // Find end of escaped element. . '|' // ...else... . '[^>]*>?' // Find end of normal element. . ')' . ')/'; // phpcs:enable } return $regex; } /** * Retrieves the combined regular expression for HTML and shortcodes. * * @access private * @ignore * @internal This function will be removed in 4.5.0 per Shortcode API Roadmap. * @since 4.4.0 * * @param string $shortcode_regex Optional. The result from _get_wptexturize_shortcode_regex(). * @return string The regular expression */ function _get_wptexturize_split_regex( $shortcode_regex = '' ) { static $html_regex; if ( ! isset( $html_regex ) ) { // phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation $comment_regex = '!' // Start of comment, after the <. . '(?:' // Unroll the loop: Consume everything until --> is found. . '-(?!->)' // Dash not followed by end of comment. . '[^\-]*+' // Consume non-dashes. . ')*+' // Loop possessively. . '(?:-->)?'; // End of comment. If not found, match all input. $html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap. '<' // Find start of element. . '(?(?=!--)' // Is this a comment? . $comment_regex // Find end of comment. . '|' . '[^>]*>?' // Find end of element. If not found, match all input. . ')'; // phpcs:enable } if ( empty( $shortcode_regex ) ) { $regex = '/(' . $html_regex . ')/'; } else { $regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/'; } return $regex; } /** * Retrieves the regular expression for shortcodes. * * @access private * @ignore * @since 4.4.0 * * @param string[] $tagnames Array of shortcodes to find. * @return string The regular expression */ function _get_wptexturize_shortcode_regex( $tagnames ) { $tagregexp = implode( '|', array_map( 'preg_quote', $tagnames ) ); $tagregexp = "(?:$tagregexp)(?=[\\s\\]\\/])"; // Excerpt of get_shortcode_regex(). // phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation $regex = '\[' // Find start of shortcode. . '[\/\[]?' // Shortcodes may begin with [/ or [[. . $tagregexp // Only match registered shortcodes, because performance. . '(?:' . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. . '|' . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. . ')*+' // Possessive critical. . '\]' // Find end of shortcode. . '\]?'; // Shortcodes may end with ]]. // phpcs:enable return $regex; } /** * Replaces characters or phrases within HTML elements only. * * @since 4.2.3 * * @param string $haystack The text which has to be formatted. * @param array $replace_pairs In the form array('from' => 'to', ...). * @return string The formatted text. */ function wp_replace_in_html_tags( $haystack, $replace_pairs ) { // Find all elements. $textarr = wp_html_split( $haystack ); $changed = false; // Optimize when searching for one item. if ( 1 === count( $replace_pairs ) ) { // Extract $needle and $replace. $needle = array_key_first( $replace_pairs ); $replace = $replace_pairs[ $needle ]; // Loop through delimiters (elements) only. for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) { if ( str_contains( $textarr[ $i ], $needle ) ) { $textarr[ $i ] = str_replace( $needle, $replace, $textarr[ $i ] ); $changed = true; } } } else { // Extract all $needles. $needles = array_keys( $replace_pairs ); // Loop through delimiters (elements) only. for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) { foreach ( $needles as $needle ) { if ( str_contains( $textarr[ $i ], $needle ) ) { $textarr[ $i ] = strtr( $textarr[ $i ], $replace_pairs ); $changed = true; // After one strtr() break out of the foreach loop and look at next element. break; } } } } if ( $changed ) { $haystack = implode( $textarr ); } return $haystack; } /** * Newline preservation help function for wpautop(). * * @since 3.1.0 * @access private * * @param array $matches preg_replace_callback matches array * @return string */ function _autop_newline_preservation_helper( $matches ) { return str_replace( "\n", '<WPPreserveNewline />', $matches[0] ); } /** * Don't auto-p wrap shortcodes that stand alone. * * Ensures that shortcodes are not wrapped in `<p>...</p>`. * * @since 2.9.0 * * @global array $shortcode_tags * * @param string $text The content. * @return string The filtered content. */ function shortcode_unautop( $text ) { global $shortcode_tags; if ( empty( $shortcode_tags ) || ! is_array( $shortcode_tags ) ) { return $text; } $tagregexp = implode( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) ); $spaces = wp_spaces_regexp(); // phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound,Universal.WhiteSpace.PrecisionAlignment.Found -- don't remove regex indentation $pattern = '/' . '<p>' // Opening paragraph. . '(?:' . $spaces . ')*+' // Optional leading whitespace. . '(' // 1: The shortcode. . '\\[' // Opening bracket. . "($tagregexp)" // 2: Shortcode name. . '(?![\\w-])' // Not followed by word character or hyphen. // Unroll the loop: Inside the opening shortcode tag. . '[^\\]\\/]*' // Not a closing bracket or forward slash. . '(?:' . '\\/(?!\\])' // A forward slash not followed by a closing bracket. . '[^\\]\\/]*' // Not a closing bracket or forward slash. . ')*?' . '(?:' . '\\/\\]' // Self closing tag and closing bracket. . '|' . '\\]' // Closing bracket. . '(?:' // Unroll the loop: Optionally, anything between the opening and closing shortcode tags. . '[^\\[]*+' // Not an opening bracket. . '(?:' . '\\[(?!\\/\\2\\])' // An opening bracket not followed by the closing shortcode tag. . '[^\\[]*+' // Not an opening bracket. . ')*+' . '\\[\\/\\2\\]' // Closing shortcode tag. . ')?' . ')' . ')' . '(?:' . $spaces . ')*+' // Optional trailing whitespace. . '<\\/p>' // Closing paragraph. . '/'; // phpcs:enable return preg_replace( $pattern, '$1', $text ); } /** * Checks to see if a string is utf8 encoded. * * NOTE: This function checks for 5-Byte sequences, UTF8 * has Bytes Sequences with a maximum length of 4. * * @author bmorel at ssi dot fr (modified) * @since 1.2.1 * * @param string $str The string to be checked * @return bool True if $str fits a UTF-8 model, false otherwise. */ function seems_utf8( $str ) { mbstring_binary_safe_encoding(); $length = strlen( $str ); reset_mbstring_encoding(); for ( $i = 0; $i < $length; $i++ ) { $c = ord( $str[ $i ] ); if ( $c < 0x80 ) { $n = 0; // 0bbbbbbb } elseif ( ( $c & 0xE0 ) === 0xC0 ) { $n = 1; // 110bbbbb } elseif ( ( $c & 0xF0 ) === 0xE0 ) { $n = 2; // 1110bbbb } elseif ( ( $c & 0xF8 ) === 0xF0 ) { $n = 3; // 11110bbb } elseif ( ( $c & 0xFC ) === 0xF8 ) { $n = 4; // 111110bb } elseif ( ( $c & 0xFE ) === 0xFC ) { $n = 5; // 1111110b } else { return false; // Does not match any model. } for ( $j = 0; $j < $n; $j++ ) { // n bytes matching 10bbbbbb follow ? if ( ( ++$i === $length ) || ( ( ord( $str[ $i ] ) & 0xC0 ) !== 0x80 ) ) { return false; } } } return true; } /** * Converts a number of special characters into their HTML entities. * * Specifically deals with: `&`, `<`, `>`, `"`, and `'`. * * `$quote_style` can be set to ENT_COMPAT to encode `"` to * `&quot;`, or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded. * * @since 1.2.2 * @since 5.5.0 `$quote_style` also accepts `ENT_XML1`. * @access private * * @param string $text The text which is to be encoded. * @param int|string $quote_style Optional. Converts double quotes if set to ENT_COMPAT, * both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. * Converts single and double quotes, as well as converting HTML * named entities (that are not also XML named entities) to their * code points if set to ENT_XML1. Also compatible with old values; * converting single quotes if set to 'single', * double if set to 'double' or both if otherwise set. * Default is ENT_NOQUOTES. * @param false|string $charset Optional. The character encoding of the string. Default false. * @param bool $double_encode Optional. Whether to encode existing HTML entities. Default false. * @return string The encoded text with HTML entities. */ function _wp_specialchars( $text, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) { $text = (string) $text; if ( 0 === strlen( $text ) ) { return ''; } // Don't bother if there are no specialchars - saves some processing. if ( ! preg_match( '/[&<>"\']/', $text ) ) { return $text; } // Account for the previous behavior of the function when the $quote_style is not an accepted value. if ( empty( $quote_style ) ) { $quote_style = ENT_NOQUOTES; } elseif ( ENT_XML1 === $quote_style ) { $quote_style = ENT_QUOTES | ENT_XML1; } elseif ( ! in_array( $quote_style, array( ENT_NOQUOTES, ENT_COMPAT, ENT_QUOTES, 'single', 'double' ), true ) ) { $quote_style = ENT_QUOTES; } $charset = _canonical_charset( $charset ? $charset : get_option( 'blog_charset' ) ); $_quote_style = $quote_style; if ( 'double' === $quote_style ) { $quote_style = ENT_COMPAT; $_quote_style = ENT_COMPAT; } elseif ( 'single' === $quote_style ) { $quote_style = ENT_NOQUOTES; } if ( ! $double_encode ) { /* * Guarantee every &entity; is valid, convert &garbage; into &amp;garbage; * This is required for PHP < 5.4.0 because ENT_HTML401 flag is unavailable. */ $text = wp_kses_normalize_entities( $text, ( $quote_style & ENT_XML1 ) ? 'xml' : 'html' ); } $text = htmlspecialchars( $text, $quote_style, $charset, $double_encode ); // Back-compat. if ( 'single' === $_quote_style ) { $text = str_replace( "'", '&#039;', $text ); } return $text; } /** * Converts a number of HTML entities into their special characters. * * Specifically deals with: `&`, `<`, `>`, `"`, and `'`. * * `$quote_style` can be set to ENT_COMPAT to decode `"` entities, * or ENT_QUOTES to do both `"` and `'`. Default is ENT_NOQUOTES where no quotes are decoded. * * @since 2.8.0 * * @param string $text The text which is to be decoded. * @param string|int $quote_style Optional. Converts double quotes if set to ENT_COMPAT, * both single and double if set to ENT_QUOTES or * none if set to ENT_NOQUOTES. * Also compatible with old _wp_specialchars() values; * converting single quotes if set to 'single', * double if set to 'double' or both if otherwise set. * Default is ENT_NOQUOTES. * @return string The decoded text without HTML entities. */ function wp_specialchars_decode( $text, $quote_style = ENT_NOQUOTES ) { $text = (string) $text; if ( 0 === strlen( $text ) ) { return ''; } // Don't bother if there are no entities - saves a lot of processing. if ( ! str_contains( $text, '&' ) ) { return $text; } // Match the previous behavior of _wp_specialchars() when the $quote_style is not an accepted value. if ( empty( $quote_style ) ) { $quote_style = ENT_NOQUOTES; } elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) { $quote_style = ENT_QUOTES; } // More complete than get_html_translation_table( HTML_SPECIALCHARS ). $single = array( '&#039;' => '\'', '&#x27;' => '\'', ); $single_preg = array( '/&#0*39;/' => '&#039;', '/&#x0*27;/i' => '&#x27;', ); $double = array( '&quot;' => '"', '&#034;' => '"', '&#x22;' => '"', ); $double_preg = array( '/&#0*34;/' => '&#034;', '/&#x0*22;/i' => '&#x22;', ); $others = array( '&lt;' => '<', '&#060;' => '<', '&gt;' => '>', '&#062;' => '>', '&amp;' => '&', '&#038;' => '&', '&#x26;' => '&', ); $others_preg = array( '/&#0*60;/' => '&#060;', '/&#0*62;/' => '&#062;', '/&#0*38;/' => '&#038;', '/&#x0*26;/i' => '&#x26;', ); if ( ENT_QUOTES === $quote_style ) { $translation = array_merge( $single, $double, $others ); $translation_preg = array_merge( $single_preg, $double_preg, $others_preg ); } elseif ( ENT_COMPAT === $quote_style || 'double' === $quote_style ) { $translation = array_merge( $double, $others ); $translation_preg = array_merge( $double_preg, $others_preg ); } elseif ( 'single' === $quote_style ) { $translation = array_merge( $single, $others ); $translation_preg = array_merge( $single_preg, $others_preg ); } elseif ( ENT_NOQUOTES === $quote_style ) { $translation = $others; $translation_preg = $others_preg; } // Remove zero padding on numeric entities. $text = preg_replace( array_keys( $translation_preg ), array_values( $translation_preg ), $text ); // Replace characters according to translation table. return strtr( $text, $translation ); } /** * Checks for invalid UTF8 in a string. * * @since 2.8.0 * * @param string $text The text which is to be checked. * @param bool $strip Optional. Whether to attempt to strip out invalid UTF8. Default false. * @return string The checked text. */ function wp_check_invalid_utf8( $text, $strip = false ) { $text = (string) $text; if ( 0 === strlen( $text ) ) { return ''; } // Store the site charset as a static to avoid multiple calls to get_option(). static $is_utf8 = null; if ( ! isset( $is_utf8 ) ) { $is_utf8 = is_utf8_charset(); } if ( ! $is_utf8 ) { return $text; } // Check for support for utf8 in the installed PCRE library once and store the result in a static. static $utf8_pcre = null; if ( ! isset( $utf8_pcre ) ) { // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged $utf8_pcre = @preg_match( '/^./u', 'a' ); } // We can't demand utf8 in the PCRE installation, so just return the string in those cases. if ( ! $utf8_pcre ) { return $text; } // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged -- preg_match fails when it encounters invalid UTF8 in $text. if ( 1 === @preg_match( '/^./us', $text ) ) { return $text; } // Attempt to strip the bad chars if requested (not recommended). if ( $strip && function_exists( 'iconv' ) ) { return iconv( 'utf-8', 'utf-8', $text ); } return ''; } /** * Encodes the Unicode values to be used in the URI. * * @since 1.5.0 * @since 5.8.3 Added the `encode_ascii_characters` parameter. * * @param string $utf8_string String to encode. * @param int $length Max length of the string * @param bool $encode_ascii_characters Whether to encode ascii characters such as < " ' * @return string String with Unicode encoded for URI. */ function utf8_uri_encode( $utf8_string, $length = 0, $encode_ascii_characters = false ) { $unicode = ''; $values = array(); $num_octets = 1; $unicode_length = 0; mbstring_binary_safe_encoding(); $string_length = strlen( $utf8_string ); reset_mbstring_encoding(); for ( $i = 0; $i < $string_length; $i++ ) { $value = ord( $utf8_string[ $i ] ); if ( $value < 128 ) { $char = chr( $value ); $encoded_char = $encode_ascii_characters ? rawurlencode( $char ) : $char; $encoded_char_length = strlen( $encoded_char ); if ( $length && ( $unicode_length + $encoded_char_length ) > $length ) { break; } $unicode .= $encoded_char; $unicode_length += $encoded_char_length; } else { if ( count( $values ) === 0 ) { if ( $value < 224 ) { $num_octets = 2; } elseif ( $value < 240 ) { $num_octets = 3; } else { $num_octets = 4; } } $values[] = $value; if ( $length && ( $unicode_length + ( $num_octets * 3 ) ) > $length ) { break; } if ( count( $values ) === $num_octets ) { for ( $j = 0; $j < $num_octets; $j++ ) { $unicode .= '%' . dechex( $values[ $j ] ); } $unicode_length += $num_octets * 3; $values = array(); $num_octets = 1; } } } return $unicode; } /** * Converts all accent characters to ASCII characters. * * If there are no accent characters, then the string given is just returned. * * **Accent characters converted:** * * Currency signs: * * | Code | Glyph | Replacement | Description | * | -------- | ----- | ----------- | ------------------- | * | U+00A3 | £ | (empty) | British Pound sign | * | U+20AC | € | E | Euro sign | * * Decompositions for Latin-1 Supplement: * * | Code | Glyph | Replacement | Description | * | ------- | ----- | ----------- | -------------------------------------- | * | U+00AA | ª | a | Feminine ordinal indicator | * | U+00BA | º | o | Masculine ordinal indicator | * | U+00C0 | À | A | Latin capital letter A with grave | * | U+00C1 | Á | A | Latin capital letter A with acute | * | U+00C2 |  | A | Latin capital letter A with circumflex | * | U+00C3 | à | A | Latin capital letter A with tilde | * | U+00C4 | Ä | A | Latin capital letter A with diaeresis | * | U+00C5 | Å | A | Latin capital letter A with ring above | * | U+00C6 | Æ | AE | Latin capital letter AE | * | U+00C7 | Ç | C | Latin capital letter C with cedilla | * | U+00C8 | È | E | Latin capital letter E with grave | * | U+00C9 | É | E | Latin capital letter E with acute | * | U+00CA | Ê | E | Latin capital letter E with circumflex | * | U+00CB | Ë | E | Latin capital letter E with diaeresis | * | U+00CC | Ì | I | Latin capital letter I with grave | * | U+00CD | Í | I | Latin capital letter I with acute | * | U+00CE | Î | I | Latin capital letter I with circumflex | * | U+00CF | Ï | I | Latin capital letter I with diaeresis | * | U+00D0 | Ð | D | Latin capital letter Eth | * | U+00D1 | Ñ | N | Latin capital letter N with tilde | * | U+00D2 | Ò | O | Latin capital letter O with grave | * | U+00D3 | Ó | O | Latin capital letter O with acute | * | U+00D4 | Ô | O | Latin capital letter O with circumflex | * | U+00D5 | Õ | O | Latin capital letter O with tilde | * | U+00D6 | Ö | O | Latin capital letter O with diaeresis | * | U+00D8 | Ø | O | Latin capital letter O with stroke | * | U+00D9 | Ù | U | Latin capital letter U with grave | * | U+00DA | Ú | U | Latin capital letter U with acute | * | U+00DB | Û | U | Latin capital letter U with circumflex | * | U+00DC | Ü | U | Latin capital letter U with diaeresis | * | U+00DD | Ý | Y | Latin capital letter Y with acute | * | U+00DE | Þ | TH | Latin capital letter Thorn | * | U+00DF | ß | s | Latin small letter sharp s | * | U+00E0 | à | a | Latin small letter a with grave | * | U+00E1 | á | a | Latin small letter a with acute | * | U+00E2 | â | a | Latin small letter a with circumflex | * | U+00E3 | ã | a | Latin small letter a with tilde | * | U+00E4 | ä | a | Latin small letter a with diaeresis | * | U+00E5 | å | a | Latin small letter a with ring above | * | U+00E6 | æ | ae | Latin small letter ae | * | U+00E7 | ç | c | Latin small letter c with cedilla | * | U+00E8 | è | e | Latin small letter e with grave | * | U+00E9 | é | e | Latin small letter e with acute | * | U+00EA | ê | e | Latin small letter e with circumflex | * | U+00EB | ë | e | Latin small letter e with diaeresis | * | U+00EC | ì | i | Latin small letter i with grave | * | U+00ED | í | i | Latin small letter i with acute | * | U+00EE | î | i | Latin small letter i with circumflex | * | U+00EF | ï | i | Latin small letter i with diaeresis | * | U+00F0 | ð | d | Latin small letter Eth | * | U+00F1 | ñ | n | Latin small letter n with tilde | * | U+00F2 | ò | o | Latin small letter o with grave | * | U+00F3 | ó | o | Latin small letter o with acute | * | U+00F4 | ô | o | Latin small letter o with circumflex | * | U+00F5 | õ | o | Latin small letter o with tilde | * | U+00F6 | ö | o | Latin small letter o with diaeresis | * | U+00F8 | ø | o | Latin small letter o with stroke | * | U+00F9 | ù | u | Latin small letter u with grave | * | U+00FA | ú | u | Latin small letter u with acute | * | U+00FB | û | u | Latin small letter u with circumflex | * | U+00FC | ü | u | Latin small letter u with diaeresis | * | U+00FD | ý | y | Latin small letter y with acute | * | U+00FE | þ | th | Latin small letter Thorn | * | U+00FF | ÿ | y | Latin small letter y with diaeresis | * * Decompositions for Latin Extended-A: * * | Code | Glyph | Replacement | Description | * | ------- | ----- | ----------- | ------------------------------------------------- | * | U+0100 | Ā | A | Latin capital letter A with macron | * | U+0101 | ā | a | Latin small letter a with macron | * | U+0102 | Ă | A | Latin capital letter A with breve | * | U+0103 | ă | a | Latin small letter a with breve | * | U+0104 | Ą | A | Latin capital letter A with ogonek | * | U+0105 | ą | a | Latin small letter a with ogonek | * | U+01006 | Ć | C | Latin capital letter C with acute | * | U+0107 | ć | c | Latin small letter c with acute | * | U+0108 | Ĉ | C | Latin capital letter C with circumflex | * | U+0109 | ĉ | c | Latin small letter c with circumflex | * | U+010A | Ċ | C | Latin capital letter C with dot above | * | U+010B | ċ | c | Latin small letter c with dot above | * | U+010C | Č | C | Latin capital letter C with caron | * | U+010D | č | c | Latin small letter c with caron | * | U+010E | Ď | D | Latin capital letter D with caron | * | U+010F | ď | d | Latin small letter d with caron | * | U+0110 | Đ | D | Latin capital letter D with stroke | * | U+0111 | đ | d | Latin small letter d with stroke | * | U+0112 | Ē | E | Latin capital letter E with macron | * | U+0113 | ē | e | Latin small letter e with macron | * | U+0114 | Ĕ | E | Latin capital letter E with breve | * | U+0115 | ĕ | e | Latin small letter e with breve | * | U+0116 | Ė | E | Latin capital letter E with dot above | * | U+0117 | ė | e | Latin small letter e with dot above | * | U+0118 | Ę | E | Latin capital letter E with ogonek | * | U+0119 | ę | e | Latin small letter e with ogonek | * | U+011A | Ě | E | Latin capital letter E with caron | * | U+011B | ě | e | Latin small letter e with caron | * | U+011C | Ĝ | G | Latin capital letter G with circumflex | * | U+011D | ĝ | g | Latin small letter g with circumflex | * | U+011E | Ğ | G | Latin capital letter G with breve | * | U+011F | ğ | g | Latin small letter g with breve | * | U+0120 | Ġ | G | Latin capital letter G with dot above | * | U+0121 | ġ | g | Latin small letter g with dot above | * | U+0122 | Ģ | G | Latin capital letter G with cedilla | * | U+0123 | ģ | g | Latin small letter g with cedilla | * | U+0124 | Ĥ | H | Latin capital letter H with circumflex | * | U+0125 | ĥ | h | Latin small letter h with circumflex | * | U+0126 | Ħ | H | Latin capital letter H with stroke | * | U+0127 | ħ | h | Latin small letter h with stroke | * | U+0128 | Ĩ | I | Latin capital letter I with tilde | * | U+0129 | ĩ | i | Latin small letter i with tilde | * | U+012A | Ī | I | Latin capital letter I with macron | * | U+012B | ī | i | Latin small letter i with macron | * | U+012C | Ĭ | I | Latin capital letter I with breve | * | U+012D | ĭ | i | Latin small letter i with breve | * | U+012E | Į | I | Latin capital letter I with ogonek | * | U+012F | į | i | Latin small letter i with ogonek | * | U+0130 | İ | I | Latin capital letter I with dot above | * | U+0131 | ı | i | Latin small letter dotless i | * | U+0132 | IJ | IJ | Latin capital ligature IJ | * | U+0133 | ij | ij | Latin small ligature ij | * | U+0134 | Ĵ | J | Latin capital letter J with circumflex | * | U+0135 | ĵ | j | Latin small letter j with circumflex | * | U+0136 | Ķ | K | Latin capital letter K with cedilla | * | U+0137 | ķ | k | Latin small letter k with cedilla | * | U+0138 | ĸ | k | Latin small letter Kra | * | U+0139 | Ĺ | L | Latin capital letter L with acute | * | U+013A | ĺ | l | Latin small letter l with acute | * | U+013B | Ļ | L | Latin capital letter L with cedilla | * | U+013C | ļ | l | Latin small letter l with cedilla | * | U+013D | Ľ | L | Latin capital letter L with caron | * | U+013E | ľ | l | Latin small letter l with caron | * | U+013F | Ŀ | L | Latin capital letter L with middle dot | * | U+0140 | ŀ | l | Latin small letter l with middle dot | * | U+0141 | Ł | L | Latin capital letter L with stroke | * | U+0142 | ł | l | Latin small letter l with stroke | * | U+0143 | Ń | N | Latin capital letter N with acute | * | U+0144 | ń | n | Latin small letter N with acute | * | U+0145 | Ņ | N | Latin capital letter N with cedilla | * | U+0146 | ņ | n | Latin small letter n with cedilla | * | U+0147 | Ň | N | Latin capital letter N with caron | * | U+0148 | ň | n | Latin small letter n with caron | * | U+0149 | ʼn | n | Latin small letter n preceded by apostrophe | * | U+014A | Ŋ | N | Latin capital letter Eng | * | U+014B | ŋ | n | Latin small letter Eng | * | U+014C | Ō | O | Latin capital letter O with macron | * | U+014D | ō | o | Latin small letter o with macron | * | U+014E | Ŏ | O | Latin capital letter O with breve | * | U+014F | ŏ | o | Latin small letter o with breve | * | U+0150 | Ő | O | Latin capital letter O with double acute | * | U+0151 | ő | o | Latin small letter o with double acute | * | U+0152 | Œ | OE | Latin capital ligature OE | * | U+0153 | œ | oe | Latin small ligature oe | * | U+0154 | Ŕ | R | Latin capital letter R with acute | * | U+0155 | ŕ | r | Latin small letter r with acute | * | U+0156 | Ŗ | R | Latin capital letter R with cedilla | * | U+0157 | ŗ | r | Latin small letter r with cedilla | * | U+0158 | Ř | R | Latin capital letter R with caron | * | U+0159 | ř | r | Latin small letter r with caron | * | U+015A | Ś | S | Latin capital letter S with acute | * | U+015B | ś | s | Latin small letter s with acute | * | U+015C | Ŝ | S | Latin capital letter S with circumflex | * | U+015D | ŝ | s | Latin small letter s with circumflex | * | U+015E | Ş | S | Latin capital letter S with cedilla | * | U+015F | ş | s | Latin small letter s with cedilla | * | U+0160 | Š | S | Latin capital letter S with caron | * | U+0161 | š | s | Latin small letter s with caron | * | U+0162 | Ţ | T | Latin capital letter T with cedilla | * | U+0163 | ţ | t |