Skip to content

Commit 978295f

Browse files
authored
Merge pull request #149 from bact/dev
Number converters: convert more than one digit at a time
2 parents 3a0cc37 + ace9aee commit 978295f

File tree

5 files changed

+181
-75
lines changed

5 files changed

+181
-75
lines changed

pythainlp/number/__init__.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,23 @@
33
Number conversions between Thai digits, Arabic digits, and Thai words
44
"""
55

6-
from .thainum import (
7-
bahttext,
8-
num_to_thaiword,
6+
from .digitconv import (
7+
arabic_digit_to_thai_digit,
8+
digit_to_text,
9+
text_to_arabic_digit,
10+
text_to_thai_digit,
11+
thai_digit_to_arabic_digit,
912
)
13+
from .numtoword import bahttext, num_to_thaiword
1014
from .wordtonum import thaiword_to_num
1115

12-
__all__ = ["bahttext", "num_to_thaiword", "thaiword_to_num"]
16+
__all__ = [
17+
"bahttext",
18+
"num_to_thaiword",
19+
"thaiword_to_num",
20+
"arabic_digit_to_thai_digit",
21+
"digit_to_text",
22+
"text_to_arabic_digit",
23+
"text_to_thai_digit",
24+
"thai_digit_to_arabic_digit",
25+
]

pythainlp/number/digitconv.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Convert digits
4+
"""
5+
6+
_arabic_thai = {
7+
"0": "๐",
8+
"1": "๑",
9+
"2": "๒",
10+
"3": "๓",
11+
"4": "๔",
12+
"5": "๕",
13+
"6": "๖",
14+
"7": "๗",
15+
"8": "๘",
16+
"9": "๙",
17+
}
18+
19+
_thai_arabic = {
20+
"๐": "0",
21+
"๑": "1",
22+
"๒": "2",
23+
"๓": "3",
24+
"๔": "4",
25+
"๕": "5",
26+
"๖": "6",
27+
"๗": "7",
28+
"๘": "8",
29+
"๙": "9",
30+
}
31+
32+
_digit_spell = {
33+
"0": "ศูนย์",
34+
"1": "หนึ่ง",
35+
"2": "สอง",
36+
"3": "สาม",
37+
"4": "สี่",
38+
"5": "ห้า",
39+
"6": "หก",
40+
"7": "เจ็ด",
41+
"8": "แปด",
42+
"9": "เก้า",
43+
}
44+
45+
_spell_digit = {
46+
"ศูนย์": "0",
47+
"หนึ่ง": "1",
48+
"สอง": "2",
49+
"สาม": "3",
50+
"สี่": "4",
51+
"ห้า": "5",
52+
"หก": "6",
53+
"เจ็ด": "7",
54+
"แปด": "8",
55+
"เก้า": "9",
56+
}
57+
58+
59+
def thai_digit_to_arabic_digit(text):
60+
"""
61+
:param str text: Text with Thai digits such as '๑', '๒', '๓'
62+
:return: Text with Thai digits being converted to Arabic digits such as '1', '2', '3'
63+
"""
64+
if not text:
65+
return ""
66+
67+
newtext = []
68+
for ch in text:
69+
if ch in _thai_arabic:
70+
newtext.append(_thai_arabic[ch])
71+
else:
72+
newtext.append(ch)
73+
74+
return "".join(newtext)
75+
76+
77+
def arabic_digit_to_thai_digit(text):
78+
"""
79+
:param str text: Text with Arabic digits such as '1', '2', '3'
80+
:return: Text with Arabic digits being converted to Thai digits such as '๑', '๒', '๓'
81+
"""
82+
if not text:
83+
return ""
84+
85+
newtext = []
86+
for ch in text:
87+
if ch in _arabic_thai:
88+
newtext.append(_arabic_thai[ch])
89+
else:
90+
newtext.append(ch)
91+
92+
return "".join(newtext)
93+
94+
95+
def digit_to_text(text):
96+
"""
97+
:param str text: Text with digits such as '1', '2', '๓', '๔'
98+
:return: Text with digits being spelled out in Thai
99+
"""
100+
if not text:
101+
return ""
102+
103+
newtext = []
104+
for ch in text:
105+
if ch in _thai_arabic:
106+
ch = _thai_arabic[ch]
107+
108+
if ch in _digit_spell:
109+
newtext.append(_digit_spell[ch])
110+
else:
111+
newtext.append(ch)
112+
113+
return "".join(newtext)
114+
115+
116+
def text_to_arabic_digit(text):
117+
"""
118+
:param text: A digit spelled out in Thai
119+
:return: An Arabic digit such as '1', '2', '3'
120+
"""
121+
if not text or text not in _spell_digit:
122+
return ""
123+
124+
return _spell_digit[text]
125+
126+
127+
def text_to_thai_digit(text):
128+
"""
129+
:param text: A digit spelled out in Thai
130+
:return: A Thai digit such as '๑', '๒', '๓'
131+
"""
132+
return arabic_digit_to_thai_digit(text_to_arabic_digit(text))

pythainlp/number/thainum.py renamed to pythainlp/number/numtoword.py

Lines changed: 1 addition & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22
"""
3-
Convert Thai numbers
3+
Convert number value to Thai read out
44
55
Adapted from
66
http://justmindthought.blogspot.com/2012/12/code-php.html
@@ -9,74 +9,6 @@
99

1010
__all__ = ["bahttext", "num_to_thaiword"]
1111

12-
_p = [
13-
["ภาษาไทย", "ตัวเลข", "เลขไทย"],
14-
["หนึ่ง", "1", "๑"],
15-
["สอง", "2", "๒"],
16-
["สาม", "3", "๓"],
17-
["สี่", "4", "๔"],
18-
["ห้า", "5", "๕"],
19-
["หก", "6", "๖"],
20-
["หก", "7", "๗"],
21-
["แปด", "8", "๘"],
22-
["เก้า", "9", "๙"],
23-
]
24-
25-
26-
# เลขไทยสู่เลขอารบิก
27-
def thai_num_to_num(text):
28-
"""
29-
:param str text: Thai number characters such as '๑', '๒', '๓'
30-
:return: universal numbers such as '1', '2', '3'
31-
"""
32-
thaitonum = dict((x[2], x[1]) for x in _p[1:])
33-
return thaitonum[text]
34-
35-
36-
def thai_num_to_text(text):
37-
"""
38-
:param str text: Thai number characters such as '๑', '๒', '๓'
39-
:return: Thai numbers, spelled out in Thai
40-
"""
41-
thaitonum = dict((x[2], x[0]) for x in _p[1:])
42-
return thaitonum[text]
43-
44-
45-
def num_to_thai_num(text):
46-
"""
47-
:param text: universal numbers such as '1', '2', '3'
48-
:return: Thai number characters such as '๑', '๒', '๓'
49-
"""
50-
thaitonum = dict((x[1], x[2]) for x in _p[1:])
51-
return thaitonum[text]
52-
53-
54-
def num_to_text(text):
55-
"""
56-
:param text: universal numbers such as '1', '2', '3'
57-
:return: Thai numbers, spelled out in Thai
58-
"""
59-
thaitonum = dict((x[1], x[0]) for x in _p[1:])
60-
return thaitonum[text]
61-
62-
63-
def text_to_num(text):
64-
"""
65-
:param text: Thai numbers, spelled out in Thai
66-
:return: universal numbers such as '1', '2', '3'
67-
"""
68-
thaitonum = dict((x[0], x[1]) for x in _p[1:])
69-
return thaitonum[text]
70-
71-
72-
def text_to_thai_num(text):
73-
"""
74-
:param text: Thai numbers, spelled out in Thai
75-
:return: Thai numbers such as '๑', '๒', '๓'
76-
"""
77-
thaitonum = dict((x[0], x[2]) for x in _p[1:])
78-
return thaitonum[text]
79-
8012

8113
def bahttext(number):
8214
"""

pythainlp/number/wordtonum.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22
"""
3-
Convert number in words to a computablenumber value
3+
Convert number in words to a computable number value
44
55
Adapted from Korakot Chaovavanich's notebook
66
https://colab.research.google.com/drive/148WNIeclf0kOU6QxKd6pcfwpSs8l-VKD#scrollTo=EuVDd0nNuI8Q

tests/__init__.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,16 @@
2121
from pythainlp.keywords import find_keyword
2222
from pythainlp.ner import ThaiNameRecognizer
2323
from pythainlp.ner.locations import tag_provinces
24-
from pythainlp.number import bahttext, num_to_thaiword, thaiword_to_num
24+
from pythainlp.number import (
25+
arabic_digit_to_thai_digit,
26+
bahttext,
27+
digit_to_text,
28+
num_to_thaiword,
29+
text_to_arabic_digit,
30+
text_to_thai_digit,
31+
thai_digit_to_arabic_digit,
32+
thaiword_to_num,
33+
)
2534
from pythainlp.rank import rank
2635
from pythainlp.romanization import romanize
2736
from pythainlp.sentiment import sentiment
@@ -172,6 +181,26 @@ def test_number(self):
172181
self.assertEqual(thaiword_to_num(""), None)
173182
self.assertEqual(thaiword_to_num(None), None)
174183

184+
self.assertEqual(arabic_digit_to_thai_digit("ไทยแลนด์ 4.0"), "ไทยแลนด์ ๔.๐")
185+
self.assertEqual(arabic_digit_to_thai_digit(""), "")
186+
self.assertEqual(arabic_digit_to_thai_digit(None), "")
187+
188+
self.assertEqual(thai_digit_to_arabic_digit("๔๐๔ Not Found"), "404 Not Found")
189+
self.assertEqual(thai_digit_to_arabic_digit(""), "")
190+
self.assertEqual(thai_digit_to_arabic_digit(None), "")
191+
192+
self.assertEqual(digit_to_text("RFC 7258"), "RFC เจ็ดสองห้าแปด")
193+
self.assertEqual(digit_to_text(""), "")
194+
self.assertEqual(digit_to_text(None), "")
195+
196+
self.assertEqual(text_to_arabic_digit("เจ็ด"), "7")
197+
self.assertEqual(text_to_arabic_digit(""), "")
198+
self.assertEqual(text_to_arabic_digit(None), "")
199+
200+
self.assertEqual(text_to_thai_digit("เก้า"), "๙")
201+
self.assertEqual(text_to_thai_digit(""), "")
202+
self.assertEqual(text_to_thai_digit(None), "")
203+
175204
# ### pythainlp.rank
176205

177206
def test_rank(self):

0 commit comments

Comments
 (0)