![]() |
TYPO3
7.6
|
Public Member Functions | |
__construct () | |
parse_charset ($charset) | |
get_locale_charset ($locale) | |
conv ($inputString, $fromCharset, $toCharset, $useEntityForNoChar=false) | |
convArray (&$array, $fromCharset, $toCharset, $useEntityForNoChar=false) | |
utf8_encode ($str, $charset) | |
utf8_decode ($str, $charset, $useEntityForNoChar=false) | |
utf8_to_entities ($str) | |
entities_to_utf8 ($str, $alsoStdHtmlEnt=false) | |
utf8_to_numberarray ($str, $convEntities=false, $retChar=false) | |
UnumberToChar ($unicodeInteger) | |
utf8CharToUnumber ($str, $hex=false) | |
initCharset ($charset) | |
initUnicodeData ($mode=null) | |
initCaseFolding ($charset) | |
initToASCII ($charset) | |
substr ($charset, $string, $start, $len=null) | |
strlen ($charset, $string) | |
crop ($charset, $string, $len, $crop= '') | |
strtrunc ($charset, $string, $len) | |
conv_case ($charset, $string, $case) | |
convCaseFirst ($charset, $string, $case) | |
specCharsToASCII ($charset, $string) | |
getPreferredClientLanguage ($languageCodesList) | |
sb_char_mapping ($str, $charset, $mode, $opt= '') | |
utf8_substr ($str, $start, $len=null) | |
utf8_strlen ($str) | |
utf8_strtrunc ($str, $len) | |
utf8_strpos ($haystack, $needle, $offset=0) | |
utf8_strrpos ($haystack, $needle) | |
utf8_char2byte_pos ($str, $pos) | |
utf8_byte2char_pos ($str, $pos) | |
utf8_char_mapping ($str, $mode, $opt= '') | |
euc_strtrunc ($str, $len, $charset) | |
euc_substr ($str, $start, $charset, $len=null) | |
euc_strlen ($str, $charset) | |
euc_char2byte_pos ($str, $pos, $charset) | |
euc_char_mapping ($str, $charset, $mode, $opt= '') | |
Public Attributes | |
$noCharByteVal = 63 | |
$parsedCharsets = array() | |
$caseFolding = array() | |
$toASCII = array() | |
$twoByteSets | |
$fourByteSets | |
$eucBasedSets | |
$synonyms | |
$lang_to_script | |
$script_to_charset_unix | |
$script_to_charset_windows | |
$locale_to_charset | |
$charSetArray | |
Protected Member Functions | |
cropMbstring ($charset, $string, $len, $crop= '') | |
Protected Attributes | |
$locales | |
Notes on UTF-8
Functions working on UTF-8 strings:
Functions nearly working on UTF-8 strings:
Functions NOT working on UTF-8 strings:
Definition at line 53 of file CharsetConverter.php.
__construct | ( | ) |
Constructor
Definition at line 527 of file CharsetConverter.php.
References GeneralUtility\makeInstance().
conv | ( | $inputString, | |
$fromCharset, | |||
$toCharset, | |||
$useEntityForNoChar = false |
|||
) |
Convert from one charset to another charset.
string | $inputString | Input string |
string | $fromCharset | From charset (the current charset of the string) |
string | $toCharset | To charset (the output charset wanted) |
bool | $useEntityForNoChar | If set, then characters that are not available in the destination character set will be encoded as numeric entities |
Definition at line 604 of file CharsetConverter.php.
References $GLOBALS, CharsetConverter\utf8_decode(), and CharsetConverter\utf8_encode().
Referenced by CharsetConverter\convArray().
conv_case | ( | $charset, | |
$string, | |||
$case | |||
) |
Translates all characters of a string into their respective case values. Unlike strtolower() and strtoupper() this method is locale independent. Note that the string length may change! eg. lower case German "ß" (sharp S) becomes upper case "SS" Unit-tested by Kasper Real case folding is language dependent, this method ignores this fact.
string | $charset | Character set of string |
string | $string | Input string to convert case for |
string | $case | Case keyword: "toLower" means lowercase conversion, anything else is uppercase (use "toUpper" ) |
Definition at line 1641 of file CharsetConverter.php.
References $GLOBALS, elseif, CharsetConverter\euc_char_mapping(), CharsetConverter\sb_char_mapping(), and CharsetConverter\utf8_char_mapping().
Referenced by CharsetConverter\convCaseFirst().
convArray | ( | & | $array, |
$fromCharset, | |||
$toCharset, | |||
$useEntityForNoChar = false |
|||
) |
Convert all elements in ARRAY with type string from one charset to another charset. NOTICE: Array is passed by reference!
array | $array | Input array, possibly multidimensional |
string | $fromCharset | From charset (the current charset of the string) |
string | $toCharset | To charset (the output charset wanted) |
bool | $useEntityForNoChar | If set, then characters that are not available in the destination character set will be encoded as numeric entities |
Definition at line 653 of file CharsetConverter.php.
References CharsetConverter\conv(), and elseif.
convCaseFirst | ( | $charset, | |
$string, | |||
$case | |||
) |
Equivalent of lcfirst/ucfirst but using character set.
string | $charset | |
string | $string | |
string | $case |
Definition at line 1669 of file CharsetConverter.php.
References CharsetConverter\conv_case(), and CharsetConverter\substr().
crop | ( | $charset, | |
$string, | |||
$len, | |||
$crop = '' |
|||
) |
Truncates a string and pre-/appends a string. Unit tested by Kasper
string | $charset | The character set |
string | $string | Character string |
int | $len | Length (in characters) |
string | $crop | Crop signifier |
Definition at line 1555 of file CharsetConverter.php.
References $GLOBALS, CharsetConverter\cropMbstring(), elseif, CharsetConverter\euc_char2byte_pos(), CharsetConverter\strlen(), CharsetConverter\substr(), and CharsetConverter\utf8_char2byte_pos().
|
protected |
Method to crop strings using the mb_substr function.
string | $charset | The character set |
string | $string | String to be cropped |
int | $len | Crop length (in characters) |
string | $crop | Crop signifier |
Definition at line 1531 of file CharsetConverter.php.
Referenced by CharsetConverter\crop().
entities_to_utf8 | ( | $str, | |
$alsoStdHtmlEnt = false |
|||
) |
Converts numeric entities (UNICODE, eg. decimal (Ӓ) or hexadecimal ()) to UTF-8 multibyte chars
string | $str | Input string, UTF-8 |
bool | $alsoStdHtmlEnt | If set, then all string-HTML entities (like & or will be converted as well) |
Definition at line 842 of file CharsetConverter.php.
References elseif, CharsetConverter\substr(), and CharsetConverter\UnumberToChar().
Referenced by CharsetConverter\utf8_to_numberarray().
euc_char2byte_pos | ( | $str, | |
$pos, | |||
$charset | |||
) |
Translates a character position into an 'absolute' byte position.
string | $str | EUC multibyte character string |
int | $pos | Character position (negative values start from the end) |
string | $charset | The charset |
Definition at line 2182 of file CharsetConverter.php.
References CharsetConverter\strlen().
Referenced by CharsetConverter\crop(), and CharsetConverter\euc_substr().
euc_char_mapping | ( | $str, | |
$charset, | |||
$mode, | |||
$opt = '' |
|||
) |
Maps all characters of a string in the EUC charset family.
string | $str | EUC multibyte character string |
string | $charset | The charset |
string | $mode | Mode: 'case' (case folding) or 'ascii' (ASCII transliteration) |
string | $opt | 'case': conversion 'toLower' or 'toUpper' |
Definition at line 2229 of file CharsetConverter.php.
References CharsetConverter\initCaseFolding(), CharsetConverter\initToASCII(), and CharsetConverter\substr().
Referenced by CharsetConverter\conv_case(), and CharsetConverter\specCharsToASCII().
euc_strlen | ( | $str, | |
$charset | |||
) |
Counts the number of characters of a string in the EUC charset family.
string | $str | EUC multibyte character string |
string | $charset | The charset |
Definition at line 2154 of file CharsetConverter.php.
Referenced by CharsetConverter\strlen().
euc_strtrunc | ( | $str, | |
$len, | |||
$charset | |||
) |
Cuts a string in the EUC charset family short at a given byte length.
string | $str | EUC multibyte character string |
int | $len | The byte length |
string | $charset | The charset |
Definition at line 2089 of file CharsetConverter.php.
References CharsetConverter\substr().
Referenced by CharsetConverter\strtrunc().
euc_substr | ( | $str, | |
$start, | |||
$charset, | |||
$len = null |
|||
) |
Returns a part of a string in the EUC charset family.
string | $str | EUC multibyte character string |
int | $start | Start position (character position) |
string | $charset | The charset |
int | $len | Length (in characters) |
Definition at line 2125 of file CharsetConverter.php.
References CharsetConverter\euc_char2byte_pos(), and CharsetConverter\substr().
Referenced by CharsetConverter\substr().
get_locale_charset | ( | $locale | ) |
Get the charset of a locale.
ln language ln_CN language / country ln_CN.cs language / country / charset ln_CN.cs language / country / charset / modifier
string | $locale | Locale string |
Definition at line 558 of file CharsetConverter.php.
References CharsetConverter\parse_charset().
getPreferredClientLanguage | ( | $languageCodesList | ) |
Converts the language codes that we get from the client (usually HTTP_ACCEPT_LANGUAGE) into a TYPO3-readable language code
string | $languageCodesList | List of language codes. something like 'de,en-us;q=0.9,de-de;q=0.7,es-cl;q=0.6,en;q=0.4,es;q=0.3,zh;q=0.1' |
Definition at line 1704 of file CharsetConverter.php.
References GeneralUtility\trimExplode().
initCaseFolding | ( | $charset | ) |
This function initializes the folding table for a charset other than UTF-8. This function is automatically called by the case folding functions.
string | $charset | Charset for which to initialize case folding. |
Definition at line 1338 of file CharsetConverter.php.
References GeneralUtility\getFileAbsFileName(), GeneralUtility\getUrl(), CharsetConverter\initCharset(), CharsetConverter\initUnicodeData(), CharsetConverter\utf8_decode(), and GeneralUtility\writeFileToTypo3tempDir().
Referenced by CharsetConverter\euc_char_mapping(), and CharsetConverter\sb_char_mapping().
initCharset | ( | $charset | ) |
This will initialize a charset for use if it's defined in the 'typo3/sysext/core/Resources/Private/Charsets/csconvtbl/' folder This function is automatically called by the conversion functions
PLEASE SEE: http://www.unicode.org/Public/MAPPINGS/
string | $charset | The charset to be initialized. Use lowercase charset always (the charset must match exactly with a filename in csconvtbl/ folder ([charset].tbl) |
Definition at line 1034 of file CharsetConverter.php.
References elseif, ExtensionManagementUtility\extPath(), GeneralUtility\getFileAbsFileName(), GeneralUtility\getUrl(), CharsetConverter\substr(), GeneralUtility\trimExplode(), CharsetConverter\UnumberToChar(), GeneralUtility\validPathStr(), and GeneralUtility\writeFileToTypo3tempDir().
Referenced by CharsetConverter\initCaseFolding(), CharsetConverter\initToASCII(), CharsetConverter\utf8_decode(), and CharsetConverter\utf8_encode().
initToASCII | ( | $charset | ) |
This function initializes the to-ASCII conversion table for a charset other than UTF-8. This function is automatically called by the ASCII transliteration functions.
string | $charset | Charset for which to initialize conversion. |
Definition at line 1400 of file CharsetConverter.php.
References GeneralUtility\getFileAbsFileName(), GeneralUtility\getUrl(), CharsetConverter\initCharset(), CharsetConverter\initUnicodeData(), CharsetConverter\utf8_decode(), and GeneralUtility\writeFileToTypo3tempDir().
Referenced by CharsetConverter\euc_char_mapping(), and CharsetConverter\sb_char_mapping().
initUnicodeData | ( | $mode = null | ) |
This function initializes all UTF-8 character data tables.
PLEASE SEE: http://www.unicode.org/Public/UNIDATA/
string | $mode | Mode ("case", "ascii", ...) |
Definition at line 1100 of file CharsetConverter.php.
References elseif, ExtensionManagementUtility\extPath(), GeneralUtility\getFileAbsFileName(), GeneralUtility\getUrl(), GeneralUtility\trimExplode(), CharsetConverter\UnumberToChar(), GeneralUtility\validPathStr(), and GeneralUtility\writeFileToTypo3tempDir().
Referenced by CharsetConverter\initCaseFolding(), CharsetConverter\initToASCII(), and CharsetConverter\utf8_char_mapping().
parse_charset | ( | $charset | ) |
Normalize - changes input character set to lowercase letters.
string | $charset | Input charset |
Definition at line 538 of file CharsetConverter.php.
Referenced by CharsetConverter\get_locale_charset().
sb_char_mapping | ( | $str, | |
$charset, | |||
$mode, | |||
$opt = '' |
|||
) |
Maps all characters of a string in a single byte charset.
string | $str | The string |
string | $charset | The charset |
string | $mode | Mode: 'case' (case folding) or 'ascii' (ASCII transliteration) |
string | $opt | 'case': conversion 'toLower' or 'toUpper' |
Definition at line 1765 of file CharsetConverter.php.
References CharsetConverter\initCaseFolding(), and CharsetConverter\initToASCII().
Referenced by CharsetConverter\conv_case(), and CharsetConverter\specCharsToASCII().
specCharsToASCII | ( | $charset, | |
$string | |||
) |
Converts special chars (like æøåÆØÅ, umlauts etc) to ascii equivalents (usually double-bytes, like æ => ae etc.)
string | $charset | Character set of string |
string | $string | Input string to convert |
Definition at line 1684 of file CharsetConverter.php.
References elseif, CharsetConverter\euc_char_mapping(), CharsetConverter\sb_char_mapping(), and CharsetConverter\utf8_char_mapping().
strlen | ( | $charset, | |
$string | |||
) |
Counts the number of characters. Unit-tested by Kasper (single byte charsets only)
string | $charset | The character set |
string | $string | Character string |
Definition at line 1502 of file CharsetConverter.php.
References $GLOBALS, elseif, CharsetConverter\euc_strlen(), and CharsetConverter\utf8_strlen().
Referenced by CharsetConverter\crop(), CharsetConverter\euc_char2byte_pos(), CharsetConverter\utf8_char2byte_pos(), CharsetConverter\utf8_decode(), CharsetConverter\utf8_encode(), CharsetConverter\utf8_to_entities(), and CharsetConverter\utf8_to_numberarray().
strtrunc | ( | $charset, | |
$string, | |||
$len | |||
) |
Cuts a string short at a given byte length.
string | $charset | The character set |
string | $string | Character string |
int | $len | The byte length |
Definition at line 1603 of file CharsetConverter.php.
References $GLOBALS, elseif, CharsetConverter\euc_strtrunc(), CharsetConverter\substr(), and CharsetConverter\utf8_strtrunc().
substr | ( | $charset, | |
$string, | |||
$start, | |||
$len = null |
|||
) |
Returns a part of a string. Unit-tested by Kasper (single byte charsets only)
string | $charset | The character set |
string | $string | Character string |
int | $start | Start position (character position) |
int | $len | Length (in characters) |
Definition at line 1449 of file CharsetConverter.php.
References $GLOBALS, elseif, CharsetConverter\euc_substr(), and CharsetConverter\utf8_substr().
Referenced by CharsetConverter\convCaseFirst(), CharsetConverter\crop(), CharsetConverter\entities_to_utf8(), CharsetConverter\euc_char_mapping(), CharsetConverter\euc_strtrunc(), CharsetConverter\euc_substr(), CharsetConverter\initCharset(), CharsetConverter\strtrunc(), CharsetConverter\utf8_char_mapping(), CharsetConverter\utf8_decode(), CharsetConverter\utf8_encode(), CharsetConverter\utf8_strtrunc(), CharsetConverter\utf8_substr(), CharsetConverter\utf8_to_entities(), CharsetConverter\utf8_to_numberarray(), and CharsetConverter\utf8CharToUnumber().
UnumberToChar | ( | $unicodeInteger | ) |
Converts a UNICODE number to a UTF-8 multibyte character Algorithm based on script found at From: http://czyborra.com/utf/ Unit-tested by Kasper
The binary representation of the character's integer value is thus simply spread across the bytes and the number of high bits set in the lead byte announces the number of bytes in the multibyte sequence:
bytes | bits | representation 1 | 7 | 0vvvvvvv 2 | 11 | 110vvvvv 10vvvvvv 3 | 16 | 1110vvvv 10vvvvvv 10vvvvvv 4 | 21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv 5 | 26 | 111110vv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 6 | 31 | 1111110v 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv
int | $unicodeInteger | UNICODE integer |
Definition at line 947 of file CharsetConverter.php.
References elseif.
Referenced by CharsetConverter\entities_to_utf8(), CharsetConverter\initCharset(), and CharsetConverter\initUnicodeData().
utf8_byte2char_pos | ( | $str, | |
$pos | |||
) |
Translates an 'absolute' byte position into a character position. Unit tested by Kasper.
string | $str | UTF-8 string |
int | $pos | Byte position |
Definition at line 2000 of file CharsetConverter.php.
References elseif.
Referenced by CharsetConverter\utf8_strpos(), and CharsetConverter\utf8_strrpos().
utf8_char2byte_pos | ( | $str, | |
$pos | |||
) |
Translates a character position into an 'absolute' byte position. Unit tested by Kasper.
string | $str | UTF-8 string |
int | $pos | Character position (negative values start from the end) |
Definition at line 1953 of file CharsetConverter.php.
References elseif, and CharsetConverter\strlen().
Referenced by CharsetConverter\crop(), CharsetConverter\utf8_strpos(), and CharsetConverter\utf8_substr().
utf8_char_mapping | ( | $str, | |
$mode, | |||
$opt = '' |
|||
) |
Maps all characters of an UTF-8 string.
string | $str | UTF-8 string |
string | $mode | Mode: 'case' (case folding) or 'ascii' (ASCII transliteration) |
string | $opt | 'case': conversion 'toLower' or 'toUpper' |
Definition at line 2029 of file CharsetConverter.php.
References elseif, CharsetConverter\initUnicodeData(), and CharsetConverter\substr().
Referenced by CharsetConverter\conv_case(), and CharsetConverter\specCharsToASCII().
utf8_decode | ( | $str, | |
$charset, | |||
$useEntityForNoChar = false |
|||
) |
Converts $str from UTF-8 to $charset
string | $str | String in UTF-8 to convert to local charset |
string | $charset | Charset, lowercase. Must be found in csconvtbl/ folder. |
bool | $useEntityForNoChar | If set, then characters that are not available in the destination character set will be encoded as numeric entities |
Definition at line 731 of file CharsetConverter.php.
References elseif, CharsetConverter\initCharset(), CharsetConverter\strlen(), CharsetConverter\substr(), and CharsetConverter\utf8CharToUnumber().
Referenced by CharsetConverter\conv(), CharsetConverter\initCaseFolding(), and CharsetConverter\initToASCII().
utf8_encode | ( | $str, | |
$charset | |||
) |
Converts $str from $charset to UTF-8
string | $str | String in local charset to convert to UTF-8 |
string | $charset | Charset, lowercase. Must be found in csconvtbl/ folder. |
Definition at line 671 of file CharsetConverter.php.
References elseif, CharsetConverter\initCharset(), CharsetConverter\strlen(), and CharsetConverter\substr().
Referenced by CharsetConverter\conv().
utf8_strlen | ( | $str | ) |
Counts the number of characters of a string in UTF-8. Unit-tested by Kasper and works 100% like strlen() / mb_strlen()
string | $str | UTF-8 multibyte character string |
Definition at line 1847 of file CharsetConverter.php.
References elseif.
Referenced by CharsetConverter\strlen().
utf8_strpos | ( | $haystack, | |
$needle, | |||
$offset = 0 |
|||
) |
Find position of first occurrence of a string, both arguments are in UTF-8.
string | $haystack | UTF-8 string to search in |
string | $needle | UTF-8 string to search for |
int | $offset | Position to start the search |
Definition at line 1902 of file CharsetConverter.php.
References $GLOBALS, elseif, CharsetConverter\utf8_byte2char_pos(), and CharsetConverter\utf8_char2byte_pos().
utf8_strrpos | ( | $haystack, | |
$needle | |||
) |
Find position of last occurrence of a char in a string, both arguments are in UTF-8.
string | $haystack | UTF-8 string to search in |
string | $needle | UTF-8 character to search for (single character) |
Definition at line 1930 of file CharsetConverter.php.
References $GLOBALS, elseif, and CharsetConverter\utf8_byte2char_pos().
utf8_strtrunc | ( | $str, | |
$len | |||
) |
Truncates a string in UTF-8 short at a given byte length.
string | $str | UTF-8 multibyte character string |
int | $len | The byte length |
Definition at line 1871 of file CharsetConverter.php.
References CharsetConverter\substr().
Referenced by CharsetConverter\strtrunc().
utf8_substr | ( | $str, | |
$start, | |||
$len = null |
|||
) |
Returns a part of a UTF-8 string. Unit-tested by Kasper and works 100% like substr() / mb_substr() for full range of $start/$len
string | $str | UTF-8 string |
int | $start | Start position (character position) |
int | $len | Length (in characters) |
Definition at line 1812 of file CharsetConverter.php.
References CharsetConverter\substr(), and CharsetConverter\utf8_char2byte_pos().
Referenced by CharsetConverter\substr().
utf8_to_entities | ( | $str | ) |
Converts all chars > 127 to numeric entities.
string | $str | Input string |
Definition at line 797 of file CharsetConverter.php.
References CharsetConverter\strlen(), CharsetConverter\substr(), and CharsetConverter\utf8CharToUnumber().
utf8_to_numberarray | ( | $str, | |
$convEntities = false , |
|||
$retChar = false |
|||
) |
Converts all chars in the input UTF-8 string into integer numbers returned in an array
string | $str | Input string, UTF-8 |
bool | $convEntities | If set, then all HTML entities (like & or or { or 㽝) will be detected as characters. |
bool | $retChar | If set, then instead of integer numbers the real UTF-8 char is returned. |
Definition at line 884 of file CharsetConverter.php.
References CharsetConverter\$noCharByteVal, CharsetConverter\entities_to_utf8(), CharsetConverter\strlen(), CharsetConverter\substr(), and CharsetConverter\utf8CharToUnumber().
utf8CharToUnumber | ( | $str, | |
$hex = false |
|||
) |
Converts a UTF-8 Multibyte character to a UNICODE number Unit-tested by Kasper
string | $str | UTF-8 multibyte character string |
bool | $hex | If set, then a hex. number is returned. |
Definition at line 993 of file CharsetConverter.php.
References CharsetConverter\substr().
Referenced by CharsetConverter\utf8_decode(), CharsetConverter\utf8_to_entities(), and CharsetConverter\utf8_to_numberarray().
$caseFolding = array() |
Definition at line 79 of file CharsetConverter.php.
$charSetArray |
Definition at line 455 of file CharsetConverter.php.
$eucBasedSets |
Definition at line 112 of file CharsetConverter.php.
$fourByteSets |
Definition at line 102 of file CharsetConverter.php.
$lang_to_script |
Definition at line 215 of file CharsetConverter.php.
$locale_to_charset |
Definition at line 439 of file CharsetConverter.php.
|
protected |
Definition at line 58 of file CharsetConverter.php.
$noCharByteVal = 63 |
Definition at line 65 of file CharsetConverter.php.
Referenced by CharsetConverter\utf8_to_numberarray().
$parsedCharsets = array() |
Definition at line 72 of file CharsetConverter.php.
$script_to_charset_unix |
Definition at line 385 of file CharsetConverter.php.
$script_to_charset_windows |
Definition at line 412 of file CharsetConverter.php.
$synonyms |
Definition at line 125 of file CharsetConverter.php.
$toASCII = array() |
Definition at line 86 of file CharsetConverter.php.
$twoByteSets |
Definition at line 93 of file CharsetConverter.php.