Word Characters

import re

values = [
    '2',
    '٣', # Arabic 3
    '½', # unicode 1/2
    '②', # unicode circled 2
    '߄', # NKO 4 (a writing system for the Manding languages of West Africa)
    '६', # Devanagari aka. Nagari (Indian)
    '_', # underscrore
    '-', # dash
    'a', # Latin a
    'á', # Hungarian
    'א', # Hebrew aleph

]

for val in values:
    print(val)
    match = re.search(r'\w', val)
    if match:
        print('Match ', match.group(0))

    match = re.search(r'\w', val, re.ASCII)
    if match:
        print('Match ASCII ', match.group(0))

Output:

2
Match  2
Match ASCII  2
٣
Match  ٣
½
Match  ½
②
Match  ②
߄
Match  ߄
६
Match  ६
_
Match  _
Match ASCII  _
-
a
Match  a
Match ASCII  a
á
Match  á
א
Match  א

Keyboard shortcuts

Regular Expressions in Python

Word Characters