1 files changed, 200 insertions, 0 deletions
diff --git a/plugins/Pcre16/src/ucp.h b/plugins/Pcre16/src/ucp.h
new file mode 100644
index 0000000000..d8b34bfcc5
--- /dev/null
+++ b/plugins/Pcre16/src/ucp.h
@@ -0,0 +1,200 @@
+/*************************************************
+*          Unicode Property Table handler        *
+*************************************************/
+
+#ifndef _UCP_H
+#define _UCP_H
+
+/* This file contains definitions of the property values that are returned by
+the UCD access macros. New values that are added for new releases of Unicode
+should always be at the end of each enum, for backwards compatibility.
+
+IMPORTANT: Note also that the specific numeric values of the enums have to be
+the same as the values that are generated by the maint/MultiStage2.py script,
+where the equivalent property descriptive names are listed in vectors.
+
+ALSO: The specific values of the first two enums are assumed for the table
+called catposstab in pcre_compile.c. */
+
+/* These are the general character categories. */
+
+enum {
+  ucp_C,     /* Other */
+  ucp_L,     /* Letter */
+  ucp_M,     /* Mark */
+  ucp_N,     /* Number */
+  ucp_P,     /* Punctuation */
+  ucp_S,     /* Symbol */
+  ucp_Z      /* Separator */
+};
+
+/* These are the particular character categories. */
+
+enum {
+  ucp_Cc,    /* Control */
+  ucp_Cf,    /* Format */
+  ucp_Cn,    /* Unassigned */
+  ucp_Co,    /* Private use */
+  ucp_Cs,    /* Surrogate */
+  ucp_Ll,    /* Lower case letter */
+  ucp_Lm,    /* Modifier letter */
+  ucp_Lo,    /* Other letter */
+  ucp_Lt,    /* Title case letter */
+  ucp_Lu,    /* Upper case letter */
+  ucp_Mc,    /* Spacing mark */
+  ucp_Me,    /* Enclosing mark */
+  ucp_Mn,    /* Non-spacing mark */
+  ucp_Nd,    /* Decimal number */
+  ucp_Nl,    /* Letter number */
+  ucp_No,    /* Other number */
+  ucp_Pc,    /* Connector punctuation */
+  ucp_Pd,    /* Dash punctuation */
+  ucp_Pe,    /* Close punctuation */
+  ucp_Pf,    /* Final punctuation */
+  ucp_Pi,    /* Initial punctuation */
+  ucp_Po,    /* Other punctuation */
+  ucp_Ps,    /* Open punctuation */
+  ucp_Sc,    /* Currency symbol */
+  ucp_Sk,    /* Modifier symbol */
+  ucp_Sm,    /* Mathematical symbol */
+  ucp_So,    /* Other symbol */
+  ucp_Zl,    /* Line separator */
+  ucp_Zp,    /* Paragraph separator */
+  ucp_Zs     /* Space separator */
+};
+
+/* These are grapheme break properties. Note that the code for processing them
+assumes that the values are less than 16. If more values are added that take
+the number to 16 or more, the code will have to be rewritten. */
+
+enum {
+  ucp_gbCR,                /*  0 */
+  ucp_gbLF,                /*  1 */
+  ucp_gbControl,           /*  2 */
+  ucp_gbExtend,            /*  3 */
+  ucp_gbPrepend,           /*  4 */
+  ucp_gbSpacingMark,       /*  5 */
+  ucp_gbL,                 /*  6 Hangul syllable type L */
+  ucp_gbV,                 /*  7 Hangul syllable type V */
+  ucp_gbT,                 /*  8 Hangul syllable type T */
+  ucp_gbLV,                /*  9 Hangul syllable type LV */
+  ucp_gbLVT,               /* 10 Hangul syllable type LVT */
+  ucp_gbRegionalIndicator, /* 11 */
+  ucp_gbOther              /* 12 */
+};
+
+/* These are the script identifications. */
+
+enum {
+  ucp_Arabic,
+  ucp_Armenian,
+  ucp_Bengali,
+  ucp_Bopomofo,
+  ucp_Braille,
+  ucp_Buginese,
+  ucp_Buhid,
+  ucp_Canadian_Aboriginal,
+  ucp_Cherokee,
+  ucp_Common,
+  ucp_Coptic,
+  ucp_Cypriot,
+  ucp_Cyrillic,
+  ucp_Deseret,
+  ucp_Devanagari,
+  ucp_Ethiopic,
+  ucp_Georgian,
+  ucp_Glagolitic,
+  ucp_Gothic,
+  ucp_Greek,
+  ucp_Gujarati,
+  ucp_Gurmukhi,
+  ucp_Han,
+  ucp_Hangul,
+  ucp_Hanunoo,
+  ucp_Hebrew,
+  ucp_Hiragana,
+  ucp_Inherited,
+  ucp_Kannada,
+  ucp_Katakana,
+  ucp_Kharoshthi,
+  ucp_Khmer,
+  ucp_Lao,
+  ucp_Latin,
+  ucp_Limbu,
+  ucp_Linear_B,
+  ucp_Malayalam,
+  ucp_Mongolian,
+  ucp_Myanmar,
+  ucp_New_Tai_Lue,
+  ucp_Ogham,
+  ucp_Old_Italic,
+  ucp_Old_Persian,
+  ucp_Oriya,
+  ucp_Osmanya,
+  ucp_Runic,
+  ucp_Shavian,
+  ucp_Sinhala,
+  ucp_Syloti_Nagri,
+  ucp_Syriac,
+  ucp_Tagalog,
+  ucp_Tagbanwa,
+  ucp_Tai_Le,
+  ucp_Tamil,
+  ucp_Telugu,
+  ucp_Thaana,
+  ucp_Thai,
+  ucp_Tibetan,
+  ucp_Tifinagh,
+  ucp_Ugaritic,
+  ucp_Yi,
+  /* New for Unicode 5.0: */
+  ucp_Balinese,
+  ucp_Cuneiform,
+  ucp_Nko,
+  ucp_Phags_Pa,
+  ucp_Phoenician,
+  /* New for Unicode 5.1: */
+  ucp_Carian,
+  ucp_Cham,
+  ucp_Kayah_Li,
+  ucp_Lepcha,
+  ucp_Lycian,
+  ucp_Lydian,
+  ucp_Ol_Chiki,
+  ucp_Rejang,
+  ucp_Saurashtra,
+  ucp_Sundanese,
+  ucp_Vai,
+  /* New for Unicode 5.2: */
+  ucp_Avestan,
+  ucp_Bamum,
+  ucp_Egyptian_Hieroglyphs,
+  ucp_Imperial_Aramaic,
+  ucp_Inscriptional_Pahlavi,
+  ucp_Inscriptional_Parthian,
+  ucp_Javanese,
+  ucp_Kaithi,
+  ucp_Lisu,
+  ucp_Meetei_Mayek,
+  ucp_Old_South_Arabian,
+  ucp_Old_Turkic,
+  ucp_Samaritan,
+  ucp_Tai_Tham,
+  ucp_Tai_Viet,
+  /* New for Unicode 6.0.0: */
+  ucp_Batak,
+  ucp_Brahmi,
+  ucp_Mandaic,
+  /* New for Unicode 6.1.0: */
+  ucp_Chakma,
+  ucp_Meroitic_Cursive,
+  ucp_Meroitic_Hieroglyphs,
+  ucp_Miao,
+  ucp_Sharada,
+  ucp_Sora_Sompeng,
+  ucp_Takri
+};
+
+#endif
+
+/* End of ucp.h */