fix:修复BUG/升级1.1.6版本
This commit is contained in:
96
vendor/overtrue/pinyin/bin/build
vendored
Normal file
96
vendor/overtrue/pinyin/bin/build
vendored
Normal file
@@ -0,0 +1,96 @@
|
||||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
require __DIR__ . '/utils.php';
|
||||
|
||||
$polyphones = explode(',', file_get_contents(__DIR__ . '/../sources/polyphones.txt'));
|
||||
$charsSouce = __DIR__ . '/../sources/chars.txt';
|
||||
$charsPathes = __DIR__.'/../sources/pathes/chars.txt';
|
||||
$wordsSouce = __DIR__ . '/../sources/words.txt';
|
||||
$wordsPathes = __DIR__ . '/../sources/pathes/words.txt';
|
||||
$surnamesSource = file(__DIR__.'/../sources/surnames.txt');
|
||||
|
||||
|
||||
if (!file_exists($charsSouce)) {
|
||||
file_put_contents($charsSouce, file_get_contents('https://raw.githubusercontent.com/mozillazg/pinyin-data/master/pinyin.txt'));
|
||||
}
|
||||
|
||||
if (!file_exists($wordsSouce)) {
|
||||
file_put_contents($wordsSouce, file_get_contents('https://raw.githubusercontent.com/mozillazg/phrase-pinyin-data/master/large_pinyin.txt'));
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------
|
||||
$surnames = [];
|
||||
foreach ($surnamesSource as $line) {
|
||||
[$surname, $pinyin] = explode(',', trim($line));
|
||||
|
||||
$surnames[trim($surname)] = join("\t", ["", ...preg_split('/\s+/', trim($pinyin)), ""]);
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------
|
||||
|
||||
// 单字,带多音
|
||||
$charWithPolyphones = [];
|
||||
// 单字,不带多音
|
||||
$chars = [];
|
||||
|
||||
foreach (parse_chars($charsSouce) as $char => $pinyin) {
|
||||
$charWithPolyphones[$char] = $pinyin;
|
||||
$chars[$char] = "\t{$pinyin[0]}\t";
|
||||
}
|
||||
|
||||
// 补丁部分
|
||||
foreach (parse_chars($charsPathes, fn ($p) => "\t{$p[0]}\t") as $char => $pinyin) {
|
||||
$chars[$char] = $pinyin;
|
||||
}
|
||||
|
||||
// ------------------------------------------------
|
||||
|
||||
$words = [];
|
||||
|
||||
foreach (parse_words($wordsSouce) as $word => $pinyin) {
|
||||
$wordChars = preg_split('//u', $word, -1, PREG_SPLIT_NO_EMPTY);
|
||||
|
||||
try {
|
||||
$pinyinSegments = array_combine($wordChars, $pinyin);
|
||||
} catch (Throwable $e) {
|
||||
throw new Exception("行解析错误:$line");
|
||||
}
|
||||
|
||||
// 多音字处理
|
||||
$polyphoneChars = array_intersect_key($wordChars, $polyphones);
|
||||
|
||||
foreach ($polyphoneChars as $char) {
|
||||
// 如果词里的任何一个多音字在词里的读音和常用读音不一致,则需要加入词典,否则抛弃该词
|
||||
if (isset($charWithPolyphones[$char]) && $pinyinSegments[$char] != $charWithPolyphones[$char][0]) {
|
||||
$words[$word] = join("\t", ["", ...$pinyin, ""]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach (parse_words($wordsPathes) as $word => $pinyin) {
|
||||
$words[$word] = join("\t", ["", ...$pinyin, ""]);
|
||||
}
|
||||
|
||||
// 清理
|
||||
exec('rm -rf ' . __DIR__ . '/../data/*');
|
||||
|
||||
// 姓氏
|
||||
file_put_contents(__DIR__ . '/../data/surnames.php', "<?php\nreturn ".var_export($surnames, true).";\n");
|
||||
echo count($surnames)." surnames saved.\n";
|
||||
|
||||
// 单字:带多音
|
||||
file_put_contents(__DIR__ . '/../data/chars.php', "<?php\nreturn ".var_export($charWithPolyphones, true).";\n");
|
||||
echo count($chars)." chars saved.\n";
|
||||
|
||||
// 词:从长到短 + 单字
|
||||
$words = array_merge($words, $chars);
|
||||
uksort($words, fn ($a, $b) => strlen($b) <=> strlen($a));
|
||||
|
||||
foreach (array_chunk($words, 8000, true) as $index => $group) {
|
||||
file_put_contents(__DIR__ . "/../data/words-{$index}.php", "<?php\nreturn ".var_export($group, true).";\n");
|
||||
echo count($group)." words saved in ".__DIR__ . "/../data/words-{$index}.php \n";
|
||||
}
|
||||
84
vendor/overtrue/pinyin/bin/pinyin
vendored
Normal file
84
vendor/overtrue/pinyin/bin/pinyin
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
require __DIR__ . '/../vendor/autoload.php';
|
||||
|
||||
use Overtrue\Pinyin\Pinyin;
|
||||
|
||||
$input = $argv[1] ?? null;
|
||||
$methods = explode(',', 'name,phrase,permalink,polyphones,chars,nameAbbr,abbr,sentence');
|
||||
$method = 'sentence';
|
||||
$inputOptions = [];
|
||||
$help = <<<"HELP"
|
||||
Usage:
|
||||
./pinyin [chinese] [method] [options]
|
||||
Options:
|
||||
-j, --json 输出 JSON 格式.
|
||||
-c, --compact 不格式化输出 JSON.
|
||||
-m, --method=[method] 转换方式,可选:name/phrase/permalink/polyphones/chars/nameAbbr/abbr/sentence.
|
||||
--no-tone 不使用音调.
|
||||
--tone-style=[style] 音调风格,可选值:default/none/number.
|
||||
-h, --help 显示帮助.
|
||||
|
||||
HELP;
|
||||
|
||||
foreach ($argv as $i => $arg) {
|
||||
if ($i === 0) {
|
||||
continue;
|
||||
}
|
||||
if (in_array($arg, $methods)) {
|
||||
$method = $arg;
|
||||
} elseif (str_starts_with($arg, '-')) {
|
||||
[$key, $value] = array_pad(array_map('trim', explode('=', $arg, 2)), 2, null);
|
||||
$inputOptions[$key] = $value;
|
||||
}
|
||||
}
|
||||
|
||||
function has_option($option, $alias = null): bool
|
||||
{
|
||||
global $inputOptions;
|
||||
|
||||
if ($alias) {
|
||||
return array_key_exists($option, $inputOptions) || array_key_exists($alias, $inputOptions);
|
||||
}
|
||||
|
||||
return array_key_exists($option, $inputOptions);
|
||||
}
|
||||
|
||||
function get_option($option, $default = null, $alias = null): ?string
|
||||
{
|
||||
global $inputOptions;
|
||||
|
||||
if ($alias) {
|
||||
return $inputOptions[$option] ?? $inputOptions[$alias] ?? $default;
|
||||
}
|
||||
|
||||
return $inputOptions[$option] ?? $default;
|
||||
}
|
||||
|
||||
if (empty($input) || has_option('--help', '-h')) {
|
||||
echo $help;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (has_option('--method', '-m')) {
|
||||
$method = get_option('--method');
|
||||
}
|
||||
|
||||
$toneStyle = has_option('--no-tone') ? 'none' : get_option('--tone-style', 'default');
|
||||
|
||||
$result = Pinyin::$method($input, $method === 'permalink' ? '-' : $toneStyle);
|
||||
|
||||
$toJson = has_option('--json', '-j') || in_array($method, ['polyphones']);
|
||||
|
||||
if ($toJson) {
|
||||
$options = JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT;
|
||||
|
||||
if (has_option('--compact', '-c')) {
|
||||
$options = 0;
|
||||
}
|
||||
|
||||
$result = json_encode($result, $options);
|
||||
}
|
||||
|
||||
echo $result, "\n";
|
||||
44
vendor/overtrue/pinyin/bin/utils.php
vendored
Normal file
44
vendor/overtrue/pinyin/bin/utils.php
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @example
|
||||
* <pre>
|
||||
* // U+4E2D: zhōng,zhòng # 中
|
||||
* </pre>
|
||||
* @throws Exception
|
||||
*/
|
||||
function parse_chars(string $path, callable $fn = null): Generator
|
||||
{
|
||||
$fn ??= fn ($p) => $p;
|
||||
|
||||
foreach (file($path) as $line) {
|
||||
preg_match('/^U\+(?<code>[0-9A-Z]+):\s+(?<pinyin>\S+)\s+#\s*(?<char>\S+)/', $line, $matched);
|
||||
|
||||
if ($matched && !empty($matched['pinyin'])) {
|
||||
yield $matched['char'] => $fn(explode(',', $matched['pinyin']));
|
||||
} elseif (!str_starts_with($line, '#')) {
|
||||
throw new Exception("行解析错误:$line");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @example
|
||||
* <pre>
|
||||
* // 㞎㞎: bǎ ba # 注释
|
||||
* </pre>
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
function parse_words(string $path, callable $fn = null): Generator
|
||||
{
|
||||
$fn ??= fn ($p) => $p;
|
||||
|
||||
foreach (file($path) as $line) {
|
||||
preg_match('/^(?<word>[^#\s]+):\s+(?<pinyin>[\p{L} ]+)#?/u', $line, $matched);
|
||||
|
||||
if ($matched && !empty($matched['pinyin'])) {
|
||||
yield $matched['word'] => $fn(explode(' ', trim($matched['pinyin'])));
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user