421 lines
11 KiB
Python
421 lines
11 KiB
Python
# Copyright (C) 2018 and later: Unicode, Inc. and others.
|
|
# License & terms of use: http://www.unicode.org/copyright.html
|
|
|
|
import io as pyio
|
|
import json
|
|
import os
|
|
import unittest
|
|
|
|
from .. import InFile
|
|
from ..comment_stripper import CommentStripper
|
|
from ..filtration import Filter
|
|
|
|
EXAMPLE_FILE_STEMS = [
|
|
"af_NA",
|
|
"af_VARIANT",
|
|
"af_ZA_VARIANT",
|
|
"af_ZA",
|
|
"af",
|
|
"ar",
|
|
"ar_SA",
|
|
"ars",
|
|
"bs_BA",
|
|
"bs_Cyrl_BA",
|
|
"bs_Cyrl",
|
|
"bs_Latn_BA",
|
|
"bs_Latn",
|
|
"bs",
|
|
"en_001",
|
|
"en_150",
|
|
"en_DE",
|
|
"en_GB",
|
|
"en_US",
|
|
"root",
|
|
"sr_BA",
|
|
"sr_CS",
|
|
"sr_Cyrl_BA",
|
|
"sr_Cyrl_CS",
|
|
"sr_Cyrl_ME",
|
|
"sr_Cyrl",
|
|
"sr_Latn_BA",
|
|
"sr_Latn_CS",
|
|
"sr_Latn_ME_VARIANT",
|
|
"sr_Latn_ME",
|
|
"sr_Latn",
|
|
"sr_ME",
|
|
"sr",
|
|
"vai_Latn_LR",
|
|
"vai_Latn",
|
|
"vai_LR",
|
|
"vai_Vaii_LR",
|
|
"vai_Vaii",
|
|
"vai",
|
|
"yue",
|
|
"zh_CN",
|
|
"zh_Hans_CN",
|
|
"zh_Hans_HK",
|
|
"zh_Hans_MO",
|
|
"zh_Hans_SG",
|
|
"zh_Hans",
|
|
"zh_Hant_HK",
|
|
"zh_Hant_MO",
|
|
"zh_Hant_TW",
|
|
"zh_Hant",
|
|
"zh_HK",
|
|
"zh_MO",
|
|
"zh_SG",
|
|
"zh_TW",
|
|
"zh"
|
|
]
|
|
|
|
|
|
class TestIO(object):
|
|
def __init__(self):
|
|
pass
|
|
|
|
def read_locale_deps(self, tree):
|
|
if tree not in ("brkitr", "locales", "rbnf"):
|
|
return None
|
|
with pyio.open(os.path.join(
|
|
os.path.dirname(__file__),
|
|
"sample_data",
|
|
tree,
|
|
"LOCALE_DEPS.json"
|
|
), "r", encoding="utf-8-sig") as f:
|
|
return json.load(CommentStripper(f))
|
|
|
|
|
|
class FiltrationTest(unittest.TestCase):
|
|
|
|
def test_exclude(self):
|
|
self._check_filter(Filter.create_from_json({
|
|
"filterType": "exclude"
|
|
}, TestIO()), [
|
|
])
|
|
|
|
def test_default_whitelist(self):
|
|
self._check_filter(Filter.create_from_json({
|
|
"whitelist": [
|
|
"ars",
|
|
"zh_Hans"
|
|
]
|
|
}, TestIO()), [
|
|
"ars",
|
|
"zh_Hans"
|
|
])
|
|
|
|
def test_default_blacklist(self):
|
|
expected_matches = set(EXAMPLE_FILE_STEMS)
|
|
expected_matches.remove("ars")
|
|
expected_matches.remove("zh_Hans")
|
|
self._check_filter(Filter.create_from_json({
|
|
"blacklist": [
|
|
"ars",
|
|
"zh_Hans"
|
|
]
|
|
}, TestIO()), expected_matches)
|
|
|
|
def test_language_whitelist(self):
|
|
self._check_filter(Filter.create_from_json({
|
|
"filterType": "language",
|
|
"whitelist": [
|
|
"af",
|
|
"bs"
|
|
]
|
|
}, TestIO()), [
|
|
"root",
|
|
"af_NA",
|
|
"af_VARIANT",
|
|
"af_ZA_VARIANT",
|
|
"af_ZA",
|
|
"af",
|
|
"bs_BA",
|
|
"bs_Cyrl_BA",
|
|
"bs_Cyrl",
|
|
"bs_Latn_BA",
|
|
"bs_Latn",
|
|
"bs"
|
|
])
|
|
|
|
def test_language_blacklist(self):
|
|
expected_matches = set(EXAMPLE_FILE_STEMS)
|
|
expected_matches.remove("af_NA")
|
|
expected_matches.remove("af_VARIANT")
|
|
expected_matches.remove("af_ZA_VARIANT")
|
|
expected_matches.remove("af_ZA")
|
|
expected_matches.remove("af")
|
|
self._check_filter(Filter.create_from_json({
|
|
"filterType": "language",
|
|
"blacklist": [
|
|
"af"
|
|
]
|
|
}, TestIO()), expected_matches)
|
|
|
|
def test_regex_whitelist(self):
|
|
self._check_filter(Filter.create_from_json({
|
|
"filterType": "regex",
|
|
"whitelist": [
|
|
r"^ar.*$",
|
|
r"^zh$"
|
|
]
|
|
}, TestIO()), [
|
|
"ar",
|
|
"ar_SA",
|
|
"ars",
|
|
"zh"
|
|
])
|
|
|
|
def test_regex_blacklist(self):
|
|
expected_matches = set(EXAMPLE_FILE_STEMS)
|
|
expected_matches.remove("ar")
|
|
expected_matches.remove("ar_SA")
|
|
expected_matches.remove("ars")
|
|
expected_matches.remove("zh")
|
|
self._check_filter(Filter.create_from_json({
|
|
"filterType": "regex",
|
|
"blacklist": [
|
|
r"^ar.*$",
|
|
r"^zh$"
|
|
]
|
|
}, TestIO()), expected_matches)
|
|
|
|
def test_locale_basic(self):
|
|
self._check_filter(Filter.create_from_json({
|
|
"filterType": "locale",
|
|
"whitelist": [
|
|
# Default scripts:
|
|
# sr => Cyrl
|
|
# vai => Vaii
|
|
# zh => Hans
|
|
"bs_BA", # is an alias to bs_Latn_BA
|
|
"en_DE",
|
|
"sr", # Language with no script
|
|
"vai_Latn", # Language with non-default script
|
|
"zh_Hans" # Language with default script
|
|
]
|
|
}, TestIO()), [
|
|
"root",
|
|
# bs: should include the full dependency tree of bs_BA
|
|
"bs_BA",
|
|
"bs_Latn_BA",
|
|
"bs_Latn",
|
|
"bs",
|
|
# en: should include the full dependency tree of en_DE
|
|
"en",
|
|
"en_DE",
|
|
"en_150",
|
|
"en_001",
|
|
# sr: include Cyrl, the default, but not Latn.
|
|
"sr",
|
|
"sr_BA",
|
|
"sr_CS",
|
|
"sr_Cyrl",
|
|
"sr_Cyrl_BA",
|
|
"sr_Cyrl_CS",
|
|
"sr_Cyrl_ME",
|
|
# vai: include Latn but NOT Vaii.
|
|
"vai_Latn",
|
|
"vai_Latn_LR",
|
|
# zh: include Hans but NOT Hant.
|
|
"zh",
|
|
"zh_CN",
|
|
"zh_SG",
|
|
"zh_Hans",
|
|
"zh_Hans_CN",
|
|
"zh_Hans_HK",
|
|
"zh_Hans_MO",
|
|
"zh_Hans_SG"
|
|
])
|
|
|
|
def test_locale_no_children(self):
|
|
self._check_filter(Filter.create_from_json({
|
|
"filterType": "locale",
|
|
"includeChildren": False,
|
|
"whitelist": [
|
|
# See comments in test_locale_basic.
|
|
"bs_BA",
|
|
"en_DE",
|
|
"sr",
|
|
"vai_Latn",
|
|
"zh_Hans"
|
|
]
|
|
}, TestIO()), [
|
|
"root",
|
|
"bs_BA",
|
|
"bs_Latn_BA",
|
|
"bs_Latn",
|
|
"bs",
|
|
"en",
|
|
"en_DE",
|
|
"en_150",
|
|
"en_001",
|
|
"sr",
|
|
"vai_Latn",
|
|
"zh",
|
|
"zh_Hans",
|
|
])
|
|
|
|
def test_locale_include_scripts(self):
|
|
self._check_filter(Filter.create_from_json({
|
|
"filterType": "locale",
|
|
"includeScripts": True,
|
|
"whitelist": [
|
|
# See comments in test_locale_basic.
|
|
"bs_BA",
|
|
"en_DE",
|
|
"sr",
|
|
"vai_Latn",
|
|
"zh_Hans"
|
|
]
|
|
}, TestIO()), [
|
|
"root",
|
|
# bs: includeScripts only works for language-only (without region)
|
|
"bs_BA",
|
|
"bs_Latn_BA",
|
|
"bs_Latn",
|
|
"bs",
|
|
# en: should include the full dependency tree of en_DE
|
|
"en",
|
|
"en_DE",
|
|
"en_150",
|
|
"en_001",
|
|
# sr: include Latn, since no particular script was requested.
|
|
"sr_BA",
|
|
"sr_CS",
|
|
"sr_Cyrl_BA",
|
|
"sr_Cyrl_CS",
|
|
"sr_Cyrl_ME",
|
|
"sr_Cyrl",
|
|
"sr_Latn_BA",
|
|
"sr_Latn_CS",
|
|
"sr_Latn_ME_VARIANT",
|
|
"sr_Latn_ME",
|
|
"sr_Latn",
|
|
"sr_ME",
|
|
"sr",
|
|
# vai: do NOT include Vaii; the script was explicitly requested.
|
|
"vai_Latn_LR",
|
|
"vai_Latn",
|
|
# zh: do NOT include Hant; the script was explicitly requested.
|
|
"zh_CN",
|
|
"zh_SG",
|
|
"zh_Hans_CN",
|
|
"zh_Hans_HK",
|
|
"zh_Hans_MO",
|
|
"zh_Hans_SG",
|
|
"zh_Hans",
|
|
"zh"
|
|
])
|
|
|
|
def test_locale_no_children_include_scripts(self):
|
|
self._check_filter(Filter.create_from_json({
|
|
"filterType": "locale",
|
|
"includeChildren": False,
|
|
"includeScripts": True,
|
|
"whitelist": [
|
|
# See comments in test_locale_basic.
|
|
"bs_BA",
|
|
"en_DE",
|
|
"sr",
|
|
"vai_Latn",
|
|
"zh_Hans"
|
|
]
|
|
}, TestIO()), [
|
|
"root",
|
|
# bs: includeScripts only works for language-only (without region)
|
|
"bs_BA",
|
|
"bs_Latn_BA",
|
|
"bs_Latn",
|
|
"bs",
|
|
# en: should include the full dependency tree of en_DE
|
|
"en",
|
|
"en_DE",
|
|
"en_150",
|
|
"en_001",
|
|
# sr: include Cyrl and Latn but no other children
|
|
"sr",
|
|
"sr_Cyrl",
|
|
"sr_Latn",
|
|
# vai: include only the requested script
|
|
"vai_Latn",
|
|
# zh: include only the requested script
|
|
"zh",
|
|
"zh_Hans",
|
|
])
|
|
|
|
def test_union(self):
|
|
self._check_filter(Filter.create_from_json({
|
|
"filterType": "union",
|
|
"unionOf": [
|
|
{
|
|
"whitelist": [
|
|
"ars",
|
|
"zh_Hans"
|
|
]
|
|
},
|
|
{
|
|
"filterType": "regex",
|
|
"whitelist": [
|
|
r"^bs.*$",
|
|
r"^zh$"
|
|
]
|
|
}
|
|
]
|
|
}, TestIO()), [
|
|
"ars",
|
|
"zh_Hans",
|
|
"bs_BA",
|
|
"bs_Cyrl_BA",
|
|
"bs_Cyrl",
|
|
"bs_Latn_BA",
|
|
"bs_Latn",
|
|
"bs",
|
|
"zh"
|
|
])
|
|
|
|
def test_hk_deps_normal(self):
|
|
self._check_filter(Filter.create_from_json({
|
|
"filterType": "locale",
|
|
"whitelist": [
|
|
"zh_HK"
|
|
]
|
|
}, TestIO()), [
|
|
"root",
|
|
"zh_Hant",
|
|
"zh_Hant_HK",
|
|
"zh_HK",
|
|
])
|
|
|
|
def test_hk_deps_rbnf(self):
|
|
self._check_filter(Filter.create_from_json({
|
|
"filterType": "locale",
|
|
"whitelist": [
|
|
"zh_HK"
|
|
]
|
|
}, TestIO()), [
|
|
"root",
|
|
"yue",
|
|
"zh_Hant_HK",
|
|
"zh_HK",
|
|
], "rbnf")
|
|
|
|
def test_no_alias_parent_structure(self):
|
|
self._check_filter(Filter.create_from_json({
|
|
"filterType": "locale",
|
|
"whitelist": [
|
|
"zh_HK"
|
|
]
|
|
}, TestIO()), [
|
|
"root",
|
|
"zh_HK",
|
|
"zh",
|
|
], "brkitr")
|
|
|
|
def _check_filter(self, filter, expected_matches, tree="locales"):
|
|
for file_stem in EXAMPLE_FILE_STEMS:
|
|
is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem)))
|
|
expected_match = file_stem in expected_matches
|
|
self.assertEqual(is_match, expected_match, file_stem)
|
|
|
|
# Export the test for the runner
|
|
suite = unittest.makeSuite(FiltrationTest)
|