Skip to content

Commit f34a4c5

Browse files
committed
完善代码、说明、demo
1 parent 9674035 commit f34a4c5

File tree

3 files changed

+202
-11
lines changed

3 files changed

+202
-11
lines changed

README.md

Lines changed: 178 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,180 @@
11
# ChineseUtil
22
PHP 中文工具类,支持汉字转拼音、拼音分词、简繁互转。
3-
PHP Chinese Tool class, support Chinese pinyin, pinyin participle, simplified and traditional conversion
3+
4+
PHP Chinese Tool class, support Chinese pinyin, pinyin participle, simplified and traditional conversion
5+
6+
由于中文的博大精深,字有多音字,简体字和繁体字也有多种对应。并且本类库返回的所有结果,均为包含所有组合的数组。
7+
8+
本类库字典数据加载后会占用 40+ MB 内存,在访问量大的接口要使用此类汉字转拼音、繁简转换功能时,推荐用 Swoole 开发一个异步服务程序,只需加载一次数据,就可以持续高效地为你提供服务。
9+
10+
## 功能
11+
12+
### 汉字转拼音
13+
14+
```php
15+
use \Yurun\Util\Chinese;
16+
$string = '恭喜發財!把我翻译成拼音看下?';
17+
echo $string, PHP_EOL;
18+
19+
echo '所有结果:', PHP_EOL;
20+
var_dump(Chinese::toPinyin($string));
21+
22+
echo '全拼:', PHP_EOL;
23+
var_dump(Chinese::toPinyin($string, Pinyin::CONVERT_MODE_PINYIN));
24+
25+
echo '首字母:', PHP_EOL;
26+
var_dump(Chinese::toPinyin($string, Pinyin::CONVERT_MODE_PINYIN_FIRST));
27+
28+
echo '读音:', PHP_EOL;
29+
var_dump(Chinese::toPinyin($string, Pinyin::CONVERT_MODE_PINYIN_SOUND));
30+
31+
echo '读音数字:', PHP_EOL;
32+
var_dump(Chinese::toPinyin($string, Pinyin::CONVERT_MODE_PINYIN_SOUND_NUMBER));
33+
34+
echo '自选 + 自定义分隔符:', PHP_EOL;
35+
var_dump(Chinese::toPinyin($string, Pinyin::CONVERT_MODE_PINYIN | Pinyin::CONVERT_MODE_PINYIN_SOUND_NUMBER, '/'));
36+
37+
/**
38+
输出结果:
39+
array(4) {
40+
["pinyin"]=>
41+
array(1) {
42+
[0]=>
43+
string(58) "gong xi fa cai ! ba wo fan yi cheng pin yin kan xia ? "
44+
}
45+
["pinyinSound"]=>
46+
array(4) {
47+
[0]=>
48+
string(63) "gōng xǐ fā cái bǎ wǒ fān yì chéng pīn yīn kàn xià "
49+
[1]=>
50+
string(63) "gōng xǐ fā cái bà wǒ fān yì chéng pīn yīn kàn xià "
51+
[2]=>
52+
string(63) "gōng xǐ fā cái bǎ wǒ fān yì chéng pīn yīn kān xià "
53+
[3]=>
54+
string(63) "gōng xǐ fā cái bà wǒ fān yì chéng pīn yīn kān xià "
55+
}
56+
["pinyinSoundNumber"]=>
57+
array(4) {
58+
[0]=>
59+
string(63) "gong1 xi3 fa1 cai2 ba3 wo3 fan1 yi4 cheng2 pin1 yin1 kan4 xia4 "
60+
[1]=>
61+
string(63) "gong1 xi3 fa1 cai2 ba4 wo3 fan1 yi4 cheng2 pin1 yin1 kan4 xia4 "
62+
[2]=>
63+
string(63) "gong1 xi3 fa1 cai2 ba3 wo3 fan1 yi4 cheng2 pin1 yin1 kan1 xia4 "
64+
[3]=>
65+
string(63) "gong1 xi3 fa1 cai2 ba4 wo3 fan1 yi4 cheng2 pin1 yin1 kan1 xia4 "
66+
}
67+
["pinyinFirst"]=>
68+
array(1) {
69+
[0]=>
70+
string(34) "g x f c ! b w f y c p y k x ? "
71+
}
72+
}
73+
全拼:
74+
array(1) {
75+
["pinyin"]=>
76+
array(1) {
77+
[0]=>
78+
string(58) "gong xi fa cai ! ba wo fan yi cheng pin yin kan xia ? "
79+
}
80+
}
81+
首字母:
82+
array(1) {
83+
["pinyinFirst"]=>
84+
array(1) {
85+
[0]=>
86+
string(34) "g x f c ! b w f y c p y k x ? "
87+
}
88+
}
89+
读音:
90+
array(1) {
91+
["pinyinSound"]=>
92+
array(4) {
93+
[0]=>
94+
string(63) "gōng xǐ fā cái bǎ wǒ fān yì chéng pīn yīn kàn xià "
95+
[1]=>
96+
string(63) "gōng xǐ fā cái bà wǒ fān yì chéng pīn yīn kàn xià "
97+
[2]=>
98+
string(63) "gōng xǐ fā cái bǎ wǒ fān yì chéng pīn yīn kān xià "
99+
[3]=>
100+
string(63) "gōng xǐ fā cái bà wǒ fān yì chéng pīn yīn kān xià "
101+
}
102+
}
103+
读音数字:
104+
array(1) {
105+
["pinyinSoundNumber"]=>
106+
array(4) {
107+
[0]=>
108+
string(63) "gong1 xi3 fa1 cai2 ba3 wo3 fan1 yi4 cheng2 pin1 yin1 kan4 xia4 "
109+
[1]=>
110+
string(63) "gong1 xi3 fa1 cai2 ba4 wo3 fan1 yi4 cheng2 pin1 yin1 kan4 xia4 "
111+
[2]=>
112+
string(63) "gong1 xi3 fa1 cai2 ba3 wo3 fan1 yi4 cheng2 pin1 yin1 kan1 xia4 "
113+
[3]=>
114+
string(63) "gong1 xi3 fa1 cai2 ba4 wo3 fan1 yi4 cheng2 pin1 yin1 kan1 xia4 "
115+
}
116+
}
117+
自选 + 自定义分隔符:
118+
array(2) {
119+
["pinyin"]=>
120+
array(1) {
121+
[0]=>
122+
string(58) "gong/xi/fa/cai/!/ba/wo/fan/yi/cheng/pin/yin/kan/xia/?/"
123+
}
124+
["pinyinSoundNumber"]=>
125+
array(4) {
126+
[0]=>
127+
string(63) "gong1/xi3/fa1/cai2/ba3/wo3/fan1/yi4/cheng2/pin1/yin1/kan4/xia4/"
128+
[1]=>
129+
string(63) "gong1/xi3/fa1/cai2/ba4/wo3/fan1/yi4/cheng2/pin1/yin1/kan4/xia4/"
130+
[2]=>
131+
string(63) "gong1/xi3/fa1/cai2/ba3/wo3/fan1/yi4/cheng2/pin1/yin1/kan1/xia4/"
132+
[3]=>
133+
string(63) "gong1/xi3/fa1/cai2/ba4/wo3/fan1/yi4/cheng2/pin1/yin1/kan1/xia4/"
134+
}
135+
}
136+
* /
137+
```
138+
139+
### 拼音分词
140+
141+
```php
142+
use \Yurun\Util\Chinese;
143+
$string2 = 'xianggang';
144+
echo '"', $string2, '"的分词结果:', PHP_EOL;
145+
var_dump(Chinese::splitPinyin($string2));
146+
/**
147+
输出结果:
148+
"xianggang"的分词结果:
149+
array(2) {
150+
[0]=>
151+
string(12) "xi ang gang "
152+
[1]=>
153+
string(11) "xiang gang "
154+
}
155+
* /
156+
```
157+
158+
### 简繁互转
159+
160+
```php
161+
use \Yurun\Util\Chinese;
162+
$string3 = '中华人民共和国!恭喜發財!';
163+
echo '"', $string3, '"的简体转换:', PHP_EOL;
164+
var_dump(Chinese::toSimplified($string3));
165+
echo '"', $string3, '"的繁体转换:', PHP_EOL;
166+
var_dump(Chinese::toTraditional($string3));
167+
/**
168+
输出结果:
169+
"中华人民共和国!恭喜發財!"的简体转换:
170+
array(1) {
171+
[0]=>
172+
string(39) "中华人民共和国!恭喜发财!"
173+
}
174+
"中华人民共和国!恭喜發財!"的繁体转换:
175+
array(1) {
176+
[0]=>
177+
string(39) "中華人民共和國!恭喜發財!"
178+
}
179+
* /
180+
```

demo/test.php

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
namespace Yurun\Util;
33
require_once dirname(__DIR__) . '/vendor/autoload.php';
44
use \Yurun\Util\Chinese\Pinyin;
5-
use \Yurun\Util\Chinese\PinyinSplit;
6-
use \Yurun\Util\Chinese\SimplifiedAndTraditional;
75
// 信息
6+
$mem1 = memory_get_usage();
87
$info = Chinese::info();
8+
$mem2 = memory_get_usage();
99
echo '总共收录 ', $info['chars'], ' 个汉字,', $info['scCount'], ' 个简体字,', $info['tcCount'], ' 个繁体字,', $info['otherCount'], ' 个其它汉字。', PHP_EOL;
10+
echo '加载数据字典前内存占用:', $mem1, ',加载数据字典后内存占用:', $mem2, PHP_EOL;
1011
// 汉字转拼音
1112
$string = '恭喜發財!把我翻译成拼音看下?';
1213
echo $string, PHP_EOL;
@@ -20,15 +21,15 @@
2021
var_dump(Chinese::toPinyin($string, Pinyin::CONVERT_MODE_PINYIN_SOUND));
2122
echo '读音数字:', PHP_EOL;
2223
var_dump(Chinese::toPinyin($string, Pinyin::CONVERT_MODE_PINYIN_SOUND_NUMBER));
23-
echo '自选:', PHP_EOL;
24-
var_dump(Chinese::toPinyin($string, Pinyin::CONVERT_MODE_PINYIN | Pinyin::CONVERT_MODE_PINYIN_SOUND_NUMBER));
24+
echo '自选 + 自定义分隔符:', PHP_EOL;
25+
var_dump(Chinese::toPinyin($string, Pinyin::CONVERT_MODE_PINYIN | Pinyin::CONVERT_MODE_PINYIN_SOUND_NUMBER, '/'));
2526
// 拼音分词
2627
$string2 = 'xianggang';
27-
echo '"', $string, '"的分词结果:', PHP_EOL;
28-
var_dump(PinyinSplit::split($string));
29-
// 繁体简体转换
28+
echo '"', $string2, '"的分词结果:', PHP_EOL;
29+
var_dump(Chinese::splitPinyin($string2));
30+
// 简繁互转
3031
$string3 = '中华人民共和国!恭喜發財!';
3132
echo '"', $string3, '"的简体转换:', PHP_EOL;
32-
var_dump(SimplifiedAndTraditional::toSimplified($string3));
33+
var_dump(Chinese::toSimplified($string3));
3334
echo '"', $string3, '"的繁体转换:', PHP_EOL;
34-
var_dump(SimplifiedAndTraditional::toTraditional($string3));
35+
var_dump(Chinese::toTraditional($string3));

src/Chinese.php

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
use \Yurun\Util\Chinese\Pinyin;
55
use \Yurun\Util\Chinese\PinyinSplit;
6+
use \Yurun\Util\Chinese\SimplifiedAndTraditional;
67

78
class Chinese
89
{
@@ -53,7 +54,17 @@ public static function toPinyin($string, $mode = Pinyin::CONVERT_MODE_FULL, $wor
5354
{
5455
static::init();
5556
}
56-
return Pinyin::convert($string, $mode);
57+
return Pinyin::convert($string, $mode, $wordSplit);
58+
}
59+
60+
/**
61+
* 拼音分词
62+
* @param string $string
63+
* @return array
64+
*/
65+
public static function splitPinyin($string)
66+
{
67+
return PinyinSplit::split($string);
5768
}
5869

5970
/**
@@ -67,6 +78,7 @@ public static function toSimplified($string)
6778
{
6879
static::init();
6980
}
81+
return SimplifiedAndTraditional::toSimplified($string);
7082
}
7183

7284
/**
@@ -80,6 +92,7 @@ public static function toTraditional($string)
8092
{
8193
static::init();
8294
}
95+
return SimplifiedAndTraditional::toTraditional($string);
8396
}
8497

8598
/**

0 commit comments

Comments
 (0)