diff --git a/libunicode-table.h b/libunicode-table.h index 0120ea9..67df6b3 100644 --- a/libunicode-table.h +++ b/libunicode-table.h @@ -3130,6 +3130,7 @@ typedef enum { } UnicodeScriptEnum; static const char unicode_script_name_table[] = + "Unknown,Zzzz" "\0" "Adlam,Adlm" "\0" "Ahom,Ahom" "\0" "Anatolian_Hieroglyphs,Hluw" "\0" diff --git a/libunicode.c b/libunicode.c index b4a0206..3791523 100644 --- a/libunicode.c +++ b/libunicode.c @@ -1285,8 +1285,6 @@ int unicode_script(CharRange *cr, script_idx = unicode_find_name(unicode_script_name_table, script_name); if (script_idx < 0) return -2; - /* Note: we remove the "Unknown" Script */ - script_idx += UNICODE_SCRIPT_Unknown + 1; is_common = (script_idx == UNICODE_SCRIPT_Common || script_idx == UNICODE_SCRIPT_Inherited); @@ -1316,17 +1314,21 @@ int unicode_script(CharRange *cr, n |= *p++; n += 96 + (1 << 12); } - if (type == 0) - v = 0; - else - v = *p++; c1 = c + n + 1; - if (v == script_idx) { - if (cr_add_interval(cr1, c, c1)) - goto fail; + if (type != 0) { + v = *p++; + if (v == script_idx || script_idx == UNICODE_SCRIPT_Unknown) { + if (cr_add_interval(cr1, c, c1)) + goto fail; + } } c = c1; } + if (script_idx == UNICODE_SCRIPT_Unknown) { + /* Unknown is all the characters outside scripts */ + if (cr_invert(cr1)) + goto fail; + } if (is_ext) { /* add the script extensions */ diff --git a/unicode_gen.c b/unicode_gen.c index 1b43538..c793ba1 100644 --- a/unicode_gen.c +++ b/unicode_gen.c @@ -2087,10 +2087,9 @@ void build_script_table(FILE *f) fprintf(f, " UNICODE_SCRIPT_COUNT,\n"); fprintf(f, "} UnicodeScriptEnum;\n\n"); - i = 1; dump_name_table(f, "unicode_script_name_table", - unicode_script_name + i, SCRIPT_COUNT - i, - unicode_script_short_name + i); + unicode_script_name, SCRIPT_COUNT, + unicode_script_short_name); dbuf_init(dbuf); #ifdef DUMP_TABLE_SIZE