Word Characters
import re
values = [
'2',
'٣', # Arabic 3
'½', # unicode 1/2
'②', # unicode circled 2
'߄', # NKO 4 (a writing system for the Manding languages of West Africa)
'६', # Devanagari aka. Nagari (Indian)
'_', # underscrore
'-', # dash
'a', # Latin a
'á', # Hungarian
'א', # Hebrew aleph
]
for val in values:
print(val)
match = re.search(r'\w', val)
if match:
print('Match ', match.group(0))
match = re.search(r'\w', val, re.ASCII)
if match:
print('Match ASCII ', match.group(0))
Output:
2
Match 2
Match ASCII 2
٣
Match ٣
½
Match ½
②
Match ②
߄
Match ߄
६
Match ६
_
Match _
Match ASCII _
-
a
Match a
Match ASCII a
á
Match á
א
Match א