Charsets.php 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660
  1. <?php
  2. /* vim: set expandtab sw=4 ts=4 sts=4: */
  3. /**
  4. * MySQL charset metadata and manipulations
  5. *
  6. * @package PhpMyAdmin
  7. */
  8. namespace PhpMyAdmin;
  9. use PhpMyAdmin\DatabaseInterface;
  10. use PhpMyAdmin\Util;
  11. /**
  12. * Class used to manage MySQL charsets
  13. *
  14. * @package PhpMyAdmin
  15. */
  16. class Charsets
  17. {
  18. /**
  19. * MySQL charsets map
  20. *
  21. * @var array
  22. */
  23. public static $mysql_charset_map = array(
  24. 'big5' => 'big5',
  25. 'cp-866' => 'cp866',
  26. 'euc-jp' => 'ujis',
  27. 'euc-kr' => 'euckr',
  28. 'gb2312' => 'gb2312',
  29. 'gbk' => 'gbk',
  30. 'iso-8859-1' => 'latin1',
  31. 'iso-8859-2' => 'latin2',
  32. 'iso-8859-7' => 'greek',
  33. 'iso-8859-8' => 'hebrew',
  34. 'iso-8859-8-i' => 'hebrew',
  35. 'iso-8859-9' => 'latin5',
  36. 'iso-8859-13' => 'latin7',
  37. 'iso-8859-15' => 'latin1',
  38. 'koi8-r' => 'koi8r',
  39. 'shift_jis' => 'sjis',
  40. 'tis-620' => 'tis620',
  41. 'utf-8' => 'utf8',
  42. 'windows-1250' => 'cp1250',
  43. 'windows-1251' => 'cp1251',
  44. 'windows-1252' => 'latin1',
  45. 'windows-1256' => 'cp1256',
  46. 'windows-1257' => 'cp1257',
  47. );
  48. private static $_charsets = array();
  49. private static $_charsets_descriptions = array();
  50. private static $_collations = array();
  51. private static $_default_collations = array();
  52. /**
  53. * Loads charset data from the MySQL server.
  54. *
  55. * @param DatabaseInterface $dbi DatabaseInterface instance
  56. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  57. *
  58. * @return void
  59. */
  60. private static function loadCharsets(DatabaseInterface $dbi, $disableIs)
  61. {
  62. /* Data already loaded */
  63. if (count(self::$_charsets) > 0) {
  64. return;
  65. }
  66. if ($disableIs) {
  67. $sql = 'SHOW CHARACTER SET';
  68. } else {
  69. $sql = 'SELECT `CHARACTER_SET_NAME` AS `Charset`,'
  70. . ' `DESCRIPTION` AS `Description`'
  71. . ' FROM `information_schema`.`CHARACTER_SETS`';
  72. }
  73. $res = $dbi->query($sql);
  74. self::$_charsets = array();
  75. while ($row = $dbi->fetchAssoc($res)) {
  76. $name = $row['Charset'];
  77. self::$_charsets[] = $name;
  78. self::$_charsets_descriptions[$name] = $row['Description'];
  79. }
  80. $dbi->freeResult($res);
  81. sort(self::$_charsets, SORT_STRING);
  82. }
  83. /**
  84. * Loads collation data from the MySQL server.
  85. *
  86. * @param DatabaseInterface $dbi DatabaseInterface instance
  87. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  88. *
  89. * @return void
  90. */
  91. private static function loadCollations(DatabaseInterface $dbi, $disableIs)
  92. {
  93. /* Data already loaded */
  94. if (count(self::$_collations) > 0) {
  95. return;
  96. }
  97. if ($disableIs) {
  98. $sql = 'SHOW COLLATION';
  99. } else {
  100. $sql = 'SELECT `CHARACTER_SET_NAME` AS `Charset`,'
  101. . ' `COLLATION_NAME` AS `Collation`, `IS_DEFAULT` AS `Default`'
  102. . ' FROM `information_schema`.`COLLATIONS`';
  103. }
  104. $res = $dbi->query($sql);
  105. while ($row = $dbi->fetchAssoc($res)) {
  106. $char_set_name = $row['Charset'];
  107. $name = $row['Collation'];
  108. self::$_collations[$char_set_name][] = $name;
  109. if ($row['Default'] == 'Yes' || $row['Default'] == '1') {
  110. self::$_default_collations[$char_set_name] = $name;
  111. }
  112. }
  113. $dbi->freeResult($res);
  114. foreach (self::$_collations as $key => $value) {
  115. sort(self::$_collations[$key], SORT_STRING);
  116. }
  117. }
  118. /**
  119. * Get MySQL charsets
  120. *
  121. * @param DatabaseInterface $dbi DatabaseInterface instance
  122. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  123. *
  124. * @return array
  125. */
  126. public static function getMySQLCharsets(DatabaseInterface $dbi, $disableIs)
  127. {
  128. self::loadCharsets($dbi, $disableIs);
  129. return self::$_charsets;
  130. }
  131. /**
  132. * Get MySQL charsets descriptions
  133. *
  134. * @param DatabaseInterface $dbi DatabaseInterface instance
  135. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  136. *
  137. * @return array
  138. */
  139. public static function getMySQLCharsetsDescriptions(DatabaseInterface $dbi, $disableIs)
  140. {
  141. self::loadCharsets($dbi, $disableIs);
  142. return self::$_charsets_descriptions;
  143. }
  144. /**
  145. * Get MySQL collations
  146. *
  147. * @param DatabaseInterface $dbi DatabaseInterface instance
  148. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  149. *
  150. * @return array
  151. */
  152. public static function getMySQLCollations(DatabaseInterface $dbi, $disableIs)
  153. {
  154. self::loadCollations($dbi, $disableIs);
  155. return self::$_collations;
  156. }
  157. /**
  158. * Get MySQL default collations
  159. *
  160. * @param DatabaseInterface $dbi DatabaseInterface instance
  161. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  162. *
  163. * @return array
  164. */
  165. public static function getMySQLCollationsDefault(DatabaseInterface $dbi, $disableIs)
  166. {
  167. self::loadCollations($dbi, $disableIs);
  168. return self::$_default_collations;
  169. }
  170. /**
  171. * Generate charset dropdown box
  172. *
  173. * @param DatabaseInterface $dbi DatabaseInterface instance
  174. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  175. * @param string $name Element name
  176. * @param string $id Element id
  177. * @param null|string $default Default value
  178. * @param bool $label Label
  179. * @param bool $submitOnChange Submit on change
  180. *
  181. * @return string
  182. */
  183. public static function getCharsetDropdownBox(
  184. DatabaseInterface $dbi,
  185. $disableIs,
  186. $name = null,
  187. $id = null,
  188. $default = null,
  189. $label = true,
  190. $submitOnChange = false
  191. ) {
  192. self::loadCharsets($dbi, $disableIs);
  193. if (empty($name)) {
  194. $name = 'character_set';
  195. }
  196. $return_str = '<select lang="en" dir="ltr" name="'
  197. . htmlspecialchars($name) . '"'
  198. . (empty($id) ? '' : ' id="' . htmlspecialchars($id) . '"')
  199. . ($submitOnChange ? ' class="autosubmit"' : '') . '>' . "\n";
  200. if ($label) {
  201. $return_str .= '<option value="">'
  202. . __('Charset')
  203. . '</option>' . "\n";
  204. }
  205. $return_str .= '<option value=""></option>' . "\n";
  206. foreach (self::$_charsets as $current_charset) {
  207. $current_cs_descr
  208. = empty(self::$_charsets_descriptions[$current_charset])
  209. ? $current_charset
  210. : self::$_charsets_descriptions[$current_charset];
  211. $return_str .= '<option value="' . $current_charset
  212. . '" title="' . $current_cs_descr . '"'
  213. . ($default == $current_charset ? ' selected="selected"' : '') . '>'
  214. . $current_charset . '</option>' . "\n";
  215. }
  216. $return_str .= '</select>' . "\n";
  217. return $return_str;
  218. }
  219. /**
  220. * Generate collation dropdown box
  221. *
  222. * @param DatabaseInterface $dbi DatabaseInterface instance
  223. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  224. * @param string $name Element name
  225. * @param string $id Element id
  226. * @param null|string $default Default value
  227. * @param bool $label Label
  228. * @param bool $submitOnChange Submit on change
  229. *
  230. * @return string
  231. */
  232. public static function getCollationDropdownBox(
  233. DatabaseInterface $dbi,
  234. $disableIs,
  235. $name = null,
  236. $id = null,
  237. $default = null,
  238. $label = true,
  239. $submitOnChange = false
  240. ) {
  241. self::loadCharsets($dbi, $disableIs);
  242. self::loadCollations($dbi, $disableIs);
  243. if (empty($name)) {
  244. $name = 'collation';
  245. }
  246. $return_str = '<select lang="en" dir="ltr" name="'
  247. . htmlspecialchars($name) . '"'
  248. . (empty($id) ? '' : ' id="' . htmlspecialchars($id) . '"')
  249. . ($submitOnChange ? ' class="autosubmit"' : '') . '>' . "\n";
  250. if ($label) {
  251. $return_str .= '<option value="">'
  252. . __('Collation')
  253. . '</option>' . "\n";
  254. }
  255. $return_str .= '<option value=""></option>' . "\n";
  256. foreach (self::$_charsets as $current_charset) {
  257. $current_cs_descr
  258. = empty(self::$_charsets_descriptions[$current_charset])
  259. ? $current_charset
  260. : self::$_charsets_descriptions[$current_charset];
  261. $return_str .= '<optgroup label="' . $current_charset
  262. . '" title="' . $current_cs_descr . '">' . "\n";
  263. foreach (self::$_collations[$current_charset] as $current_collation) {
  264. $return_str .= '<option value="' . $current_collation
  265. . '" title="' . self::getCollationDescr($current_collation) . '"'
  266. . ($default == $current_collation ? ' selected="selected"' : '')
  267. . '>'
  268. . $current_collation . '</option>' . "\n";
  269. }
  270. $return_str .= '</optgroup>' . "\n";
  271. }
  272. $return_str .= '</select>' . "\n";
  273. return $return_str;
  274. }
  275. /**
  276. * Returns description for given collation
  277. *
  278. * @param string $collation MySQL collation string
  279. *
  280. * @return string collation description
  281. */
  282. public static function getCollationDescr($collation)
  283. {
  284. $parts = explode('_', $collation);
  285. $name = __('Unknown');
  286. $variant = null;
  287. $suffixes = array();
  288. $unicode = false;
  289. $unknown = false;
  290. $level = 0;
  291. foreach ($parts as $part) {
  292. if ($level == 0) {
  293. /* Next will be language */
  294. $level = 1;
  295. /* First should be charset */
  296. switch ($part) {
  297. case 'binary':
  298. $name = _pgettext('Collation', 'Binary');
  299. break;
  300. // Unicode charsets
  301. case 'utf8mb4':
  302. $variant = 'UCA 4.0.0';
  303. // Fall through to other unicode
  304. case 'ucs2':
  305. case 'utf8':
  306. case 'utf16':
  307. case 'utf16le':
  308. case 'utf16be':
  309. case 'utf32':
  310. $name = _pgettext('Collation', 'Unicode');
  311. $unicode = true;
  312. break;
  313. // West European charsets
  314. case 'ascii':
  315. case 'cp850':
  316. case 'dec8':
  317. case 'hp8':
  318. case 'latin1':
  319. case 'macroman':
  320. $name = _pgettext('Collation', 'West European');
  321. break;
  322. // Central European charsets
  323. case 'cp1250':
  324. case 'cp852':
  325. case 'latin2':
  326. case 'macce':
  327. $name = _pgettext('Collation', 'Central European');
  328. break;
  329. // Russian charsets
  330. case 'cp866':
  331. case 'koi8r':
  332. $name = _pgettext('Collation', 'Russian');
  333. break;
  334. // Simplified Chinese charsets
  335. case 'gb2312':
  336. case 'gbk':
  337. $name = _pgettext('Collation', 'Simplified Chinese');
  338. break;
  339. // Japanese charsets
  340. case 'sjis':
  341. case 'ujis':
  342. case 'cp932':
  343. case 'eucjpms':
  344. $name = _pgettext('Collation', 'Japanese');
  345. break;
  346. // Baltic charsets
  347. case 'cp1257':
  348. case 'latin7':
  349. $name = _pgettext('Collation', 'Baltic');
  350. break;
  351. // Other
  352. case 'armscii8':
  353. case 'armscii':
  354. $name = _pgettext('Collation', 'Armenian');
  355. break;
  356. case 'big5':
  357. $name = _pgettext('Collation', 'Traditional Chinese');
  358. break;
  359. case 'cp1251':
  360. $name = _pgettext('Collation', 'Cyrillic');
  361. break;
  362. case 'cp1256':
  363. $name = _pgettext('Collation', 'Arabic');
  364. break;
  365. case 'euckr':
  366. $name = _pgettext('Collation', 'Korean');
  367. break;
  368. case 'hebrew':
  369. $name = _pgettext('Collation', 'Hebrew');
  370. break;
  371. case 'geostd8':
  372. $name = _pgettext('Collation', 'Georgian');
  373. break;
  374. case 'greek':
  375. $name = _pgettext('Collation', 'Greek');
  376. break;
  377. case 'keybcs2':
  378. $name = _pgettext('Collation', 'Czech-Slovak');
  379. break;
  380. case 'koi8u':
  381. $name = _pgettext('Collation', 'Ukrainian');
  382. break;
  383. case 'latin5':
  384. $name = _pgettext('Collation', 'Turkish');
  385. break;
  386. case 'swe7':
  387. $name = _pgettext('Collation', 'Swedish');
  388. break;
  389. case 'tis620':
  390. $name = _pgettext('Collation', 'Thai');
  391. break;
  392. default:
  393. $name = _pgettext('Collation', 'Unknown');
  394. $unknown = true;
  395. break;
  396. }
  397. continue;
  398. }
  399. if ($level == 1) {
  400. /* Next will be variant unless changed later */
  401. $level = 4;
  402. /* Locale name or code */
  403. $found = true;
  404. switch ($part) {
  405. case 'general':
  406. break;
  407. case 'bulgarian':
  408. case 'bg':
  409. $name = _pgettext('Collation', 'Bulgarian');
  410. break;
  411. case 'chinese':
  412. case 'cn':
  413. if ($unicode) {
  414. $name = _pgettext('Collation', 'Chinese');
  415. }
  416. break;
  417. case 'croatian':
  418. case 'hr':
  419. $name = _pgettext('Collation', 'Croatian');
  420. break;
  421. case 'czech':
  422. case 'cs':
  423. $name = _pgettext('Collation', 'Czech');
  424. break;
  425. case 'danish':
  426. case 'da':
  427. $name = _pgettext('Collation', 'Danish');
  428. break;
  429. case 'english':
  430. case 'en':
  431. $name = _pgettext('Collation', 'English');
  432. break;
  433. case 'esperanto':
  434. case 'eo':
  435. $name = _pgettext('Collation', 'Esperanto');
  436. break;
  437. case 'estonian':
  438. case 'et':
  439. $name = _pgettext('Collation', 'Estonian');
  440. break;
  441. case 'german1':
  442. $name = _pgettext('Collation', 'German (dictionary order)');
  443. break;
  444. case 'german2':
  445. $name = _pgettext('Collation', 'German (phone book order)');
  446. break;
  447. case 'german':
  448. case 'de':
  449. /* Name is set later */
  450. $level = 2;
  451. break;
  452. case 'hungarian':
  453. case 'hu':
  454. $name = _pgettext('Collation', 'Hungarian');
  455. break;
  456. case 'icelandic':
  457. case 'is':
  458. $name = _pgettext('Collation', 'Icelandic');
  459. break;
  460. case 'japanese':
  461. case 'ja':
  462. $name = _pgettext('Collation', 'Japanese');
  463. break;
  464. case 'la':
  465. $name = _pgettext('Collation', 'Classical Latin');
  466. break;
  467. case 'latvian':
  468. case 'lv':
  469. $name = _pgettext('Collation', 'Latvian');
  470. break;
  471. case 'lithuanian':
  472. case 'lt':
  473. $name = _pgettext('Collation', 'Lithuanian');
  474. break;
  475. case 'korean':
  476. case 'ko':
  477. $name = _pgettext('Collation', 'Korean');
  478. break;
  479. case 'myanmar':
  480. case 'my':
  481. $name = _pgettext('Collation', 'Burmese');
  482. break;
  483. case 'persian':
  484. $name = _pgettext('Collation', 'Persian');
  485. break;
  486. case 'polish':
  487. case 'pl':
  488. $name = _pgettext('Collation', 'Polish');
  489. break;
  490. case 'roman':
  491. $name = _pgettext('Collation', 'West European');
  492. break;
  493. case 'romanian':
  494. case 'ro':
  495. $name = _pgettext('Collation', 'Romanian');
  496. break;
  497. case 'si':
  498. case 'sinhala':
  499. $name = _pgettext('Collation', 'Sinhalese');
  500. break;
  501. case 'slovak':
  502. case 'sl':
  503. $name = _pgettext('Collation', 'Slovak');
  504. break;
  505. case 'slovenian':
  506. case 'sl':
  507. $name = _pgettext('Collation', 'Slovenian');
  508. break;
  509. case 'spanish':
  510. $name = _pgettext('Collation', 'Spanish (modern)');
  511. break;
  512. case 'es':
  513. /* Name is set later */
  514. $level = 3;
  515. break;
  516. case 'spanish2':
  517. $name = _pgettext('Collation', 'Spanish (traditional)');
  518. break;
  519. case 'swedish':
  520. $name = _pgettext('Collation', 'Swedish');
  521. break;
  522. case 'thai':
  523. case 'th':
  524. $name = _pgettext('Collation', 'Thai');
  525. break;
  526. case 'turkish':
  527. case 'tr':
  528. $name = _pgettext('Collation', 'Turkish');
  529. break;
  530. case 'ukrainian':
  531. case 'uk':
  532. $name = _pgettext('Collation', 'Ukrainian');
  533. break;
  534. case 'vietnamese':
  535. case 'vi':
  536. $name = _pgettext('Collation', 'Vietnamese');
  537. break;
  538. case 'unicode':
  539. if ($unknown) {
  540. $name = _pgettext('Collation', 'Unicode');
  541. }
  542. break;
  543. default:
  544. $found = false;
  545. }
  546. if ($found) {
  547. continue;
  548. }
  549. // Not parsed token, fall to next level
  550. }
  551. if ($level == 2) {
  552. /* Next will be variant */
  553. $level = 4;
  554. /* Germal variant */
  555. if ($part == 'pb') {
  556. $name = _pgettext('Collation', 'German (phone book order)');
  557. continue;
  558. }
  559. $name = _pgettext('Collation', 'German (dictionary order)');
  560. // Not parsed token, fall to next level
  561. }
  562. if ($level == 3) {
  563. /* Next will be variant */
  564. $level = 4;
  565. /* Spanish variant */
  566. if ($part == 'trad') {
  567. $name = _pgettext('Collation', 'Spanish (traditional)');
  568. continue;
  569. }
  570. $name = _pgettext('Collation', 'Spanish (modern)');
  571. // Not parsed token, fall to next level
  572. }
  573. if ($level == 4) {
  574. /* Next will be suffix */
  575. $level = 5;
  576. /* Variant */
  577. $found = true;
  578. switch ($part) {
  579. case '0900':
  580. $variant = 'UCA 9.0.0';
  581. break;
  582. case '520':
  583. $variant = 'UCA 5.2.0';
  584. break;
  585. case 'mysql561':
  586. $variant = 'MySQL 5.6.1';
  587. break;
  588. case 'mysql500':
  589. $variant = 'MySQL 5.0.0';
  590. break;
  591. default:
  592. $found = false;
  593. }
  594. if ($found) {
  595. continue;
  596. }
  597. // Not parsed token, fall to next level
  598. }
  599. if ($level == 5) {
  600. /* Suffixes */
  601. switch ($part) {
  602. case 'ci':
  603. $suffixes[] = _pgettext('Collation variant', 'case-insensitive');
  604. break;
  605. case 'cs':
  606. $suffixes[] = _pgettext('Collation variant', 'case-sensitive');
  607. break;
  608. case 'ai':
  609. $suffixes[] = _pgettext('Collation variant', 'accent-insensitive');
  610. break;
  611. case 'as':
  612. $suffixes[] = _pgettext('Collation variant', 'accent-sensitive');
  613. break;
  614. case 'w2':
  615. case 'l2':
  616. $suffixes[] = _pgettext('Collation variant', 'multi-level');
  617. break;
  618. case 'bin':
  619. $suffixes[] = _pgettext('Collation variant', 'binary');
  620. break;
  621. }
  622. }
  623. }
  624. $result = $name;
  625. if (! is_null($variant)) {
  626. $result .= ' (' . $variant . ')';
  627. }
  628. if (count($suffixes) > 0) {
  629. $result .= ', ' . implode(', ', $suffixes);
  630. }
  631. return $result;
  632. }
  633. }