summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIwan Kawrakow <iwan.kawrakow@gmail.com>2024-06-25 18:19:11 +0300
committerIwan Kawrakow <iwan.kawrakow@gmail.com>2024-06-25 18:19:11 +0300
commit753dbaeeb0be5fb3d0d4337d7854dcf4f3a30fe1 (patch)
treeafedc73d7d8b8032f5c2057aec8bdff95e6601df
parent8b436a84c53de4c5a8eaf9be72cdd82324da2eeb (diff)
bitnet: remove iq1_bn lookup table storing +/- signs
The AVX2 implementation was the only one left using it, so I decided to see if we can get a performant implementation using the 0,1,2 lookup table. Turns out we can, and it is even slightly faster than the sign based table. We now get PP-512 = 275 t/s and TG-128 = 57.7 t/s with 16 threads on the Ryzen-7950X. With only one lookup table left for iq1_bn, I renamed it to iq1bn_grid_u16.
-rw-r--r--ggml-common.h210
-rw-r--r--ggml-cuda/convert.cu2
-rw-r--r--ggml-cuda/vecdotq.cuh6
-rw-r--r--ggml-metal.metal6
-rw-r--r--iqk-quantize.cpp6
-rw-r--r--iqk_mul_mat.cpp42
6 files changed, 33 insertions, 239 deletions
diff --git a/ggml-common.h b/ggml-common.h
index 2fa69f64..bf95da2a 100644
--- a/ggml-common.h
+++ b/ggml-common.h
@@ -1847,215 +1847,7 @@ GGML_TABLE_BEGIN(uint32_t, iq1s_grid_gpu, NGRID_IQ1S)
GGML_TABLE_END()
#endif
-GGML_TABLE_BEGIN(uint16_t, iq1bn_grid_xxx, 3281)
- 0xff00, 0xfe00, 0xfe01, 0xfd00, 0xfc00, 0xfc01, 0xfd02, 0xfc02, 0xfc03, 0xfb00, 0xfa00, 0xfa01, 0xf900, 0xf800, 0xf801, 0xf902,
- 0xf802, 0xf803, 0xfb04, 0xfa04, 0xfa05, 0xf904, 0xf804, 0xf805, 0xf906, 0xf806, 0xf807, 0xf700, 0xf600, 0xf601, 0xf500, 0xf400,
- 0xf401, 0xf502, 0xf402, 0xf403, 0xf300, 0xf200, 0xf201, 0xf100, 0xf000, 0xf001, 0xf102, 0xf002, 0xf003, 0xf304, 0xf204, 0xf205,
- 0xf104, 0xf004, 0xf005, 0xf106, 0xf006, 0xf007, 0xf708, 0xf608, 0xf609, 0xf508, 0xf408, 0xf409, 0xf50a, 0xf40a, 0xf40b, 0xf308,
- 0xf208, 0xf209, 0xf108, 0xf008, 0xf009, 0xf10a, 0xf00a, 0xf00b, 0xf30c, 0xf20c, 0xf20d, 0xf10c, 0xf00c, 0xf00d, 0xf10e, 0xf00e,
- 0xf00f, 0xef00, 0xee00, 0xee01, 0xed00, 0xec00, 0xec01, 0xed02, 0xec02, 0xec03, 0xeb00, 0xea00, 0xea01, 0xe900, 0xe800, 0xe801,
- 0xe902, 0xe802, 0xe803, 0xeb04, 0xea04, 0xea05, 0xe904, 0xe804, 0xe805, 0xe906, 0xe806, 0xe807, 0xe700, 0xe600, 0xe601, 0xe500,
- 0xe400, 0xe401, 0xe502, 0xe402, 0xe403, 0xe300, 0xe200, 0xe201, 0xe100, 0xe000, 0xe001, 0xe102, 0xe002, 0xe003, 0xe304, 0xe204,
- 0xe205, 0xe104, 0xe004, 0xe005, 0xe106, 0xe006, 0xe007, 0xe708, 0xe608, 0xe609, 0xe508, 0xe408, 0xe409, 0xe50a, 0xe40a, 0xe40b,
- 0xe308, 0xe208, 0xe209, 0xe108, 0xe008, 0xe009, 0xe10a, 0xe00a, 0xe00b, 0xe30c, 0xe20c, 0xe20d, 0xe10c, 0xe00c, 0xe00d, 0xe10e,
- 0xe00e, 0xe00f, 0xef10, 0xee10, 0xee11, 0xed10, 0xec10, 0xec11, 0xed12, 0xec12, 0xec13, 0xeb10, 0xea10, 0xea11, 0xe910, 0xe810,
- 0xe811, 0xe912, 0xe812, 0xe813, 0xeb14, 0xea14, 0xea15, 0xe914, 0xe814, 0xe815, 0xe916, 0xe816, 0xe817, 0xe710, 0xe610, 0xe611,
- 0xe510, 0xe410, 0xe411, 0xe512, 0xe412, 0xe413, 0xe310, 0xe210, 0xe211, 0xe110, 0xe010, 0xe011, 0xe112, 0xe012, 0xe013, 0xe314,
- 0xe214, 0xe215, 0xe114, 0xe014, 0xe015, 0xe116, 0xe016, 0xe017, 0xe718, 0xe618, 0xe619, 0xe518, 0xe418, 0xe419, 0xe51a, 0xe41a,
- 0xe41b, 0xe318, 0xe218, 0xe219, 0xe118, 0xe018, 0xe019, 0xe11a, 0xe01a, 0xe01b, 0xe31c, 0xe21c, 0xe21d, 0xe11c, 0xe01c, 0xe01d,
- 0xe11e, 0xe01e, 0xe01f, 0xdf00, 0xde00, 0xde01, 0xdd00, 0xdc00, 0xdc01, 0xdd02, 0xdc02, 0xdc03, 0xdb00, 0xda00, 0xda01, 0xd900,
- 0xd800, 0xd801, 0xd902, 0xd802, 0xd803, 0xdb04, 0xda04, 0xda05, 0xd904, 0xd804, 0xd805, 0xd906, 0xd806, 0xd807, 0xd700, 0xd600,
- 0xd601, 0xd500, 0xd400, 0xd401, 0xd502, 0xd402, 0xd403, 0xd300, 0xd200, 0xd201, 0xd100, 0xd000, 0xd001, 0xd102, 0xd002, 0xd003,
- 0xd304, 0xd204, 0xd205, 0xd104, 0xd004, 0xd005, 0xd106, 0xd006, 0xd007, 0xd708, 0xd608, 0xd609, 0xd508, 0xd408, 0xd409, 0xd50a,
- 0xd40a, 0xd40b, 0xd308, 0xd208, 0xd209, 0xd108, 0xd008, 0xd009, 0xd10a, 0xd00a, 0xd00b, 0xd30c, 0xd20c, 0xd20d, 0xd10c, 0xd00c,
- 0xd00d, 0xd10e, 0xd00e, 0xd00f, 0xcf00, 0xce00, 0xce01, 0xcd00, 0xcc00, 0xcc01, 0xcd02, 0xcc02, 0xcc03, 0xcb00, 0xca00, 0xca01,
- 0xc900, 0xc800, 0xc801, 0xc902, 0xc802, 0xc803, 0xcb04, 0xca04, 0xca05, 0xc904, 0xc804, 0xc805, 0xc906, 0xc806, 0xc807, 0xc700,
- 0xc600, 0xc601, 0xc500, 0xc400, 0xc401, 0xc502, 0xc402, 0xc403, 0xc300, 0xc200, 0xc201, 0xc100, 0xc000, 0xc001, 0xc102, 0xc002,
- 0xc003, 0xc304, 0xc204, 0xc205, 0xc104, 0xc004, 0xc005, 0xc106, 0xc006, 0xc007, 0xc708, 0xc608, 0xc609, 0xc508, 0xc408, 0xc409,
- 0xc50a, 0xc40a, 0xc40b, 0xc308, 0xc208, 0xc209, 0xc108, 0xc008, 0xc009, 0xc10a, 0xc00a, 0xc00b, 0xc30c, 0xc20c, 0xc20d, 0xc10c,
- 0xc00c, 0xc00d, 0xc10e, 0xc00e, 0xc00f, 0xcf10, 0xce10, 0xce11, 0xcd10, 0xcc10, 0xcc11, 0xcd12, 0xcc12, 0xcc13, 0xcb10, 0xca10,
- 0xca11, 0xc910, 0xc810, 0xc811, 0xc912, 0xc812, 0xc813, 0xcb14, 0xca14, 0xca15, 0xc914, 0xc814, 0xc815, 0xc916, 0xc816, 0xc817,
- 0xc710, 0xc610, 0xc611, 0xc510, 0xc410, 0xc411, 0xc512, 0xc412, 0xc413, 0xc310, 0xc210, 0xc211, 0xc110, 0xc010, 0xc011, 0xc112,
- 0xc012, 0xc013, 0xc314, 0xc214, 0xc215, 0xc114, 0xc014, 0xc015, 0xc116, 0xc016, 0xc017, 0xc718, 0xc618, 0xc619, 0xc518, 0xc418,
- 0xc419, 0xc51a, 0xc41a, 0xc41b, 0xc318, 0xc218, 0xc219, 0xc118, 0xc018, 0xc019, 0xc11a, 0xc01a, 0xc01b, 0xc31c, 0xc21c, 0xc21d,
- 0xc11c, 0xc01c, 0xc01d, 0xc11e, 0xc01e, 0xc01f, 0xdf20, 0xde20, 0xde21, 0xdd20, 0xdc20, 0xdc21, 0xdd22, 0xdc22, 0xdc23, 0xdb20,
- 0xda20, 0xda21, 0xd920, 0xd820, 0xd821, 0xd922, 0xd822, 0xd823, 0xdb24, 0xda24, 0xda25, 0xd924, 0xd824, 0xd825, 0xd926, 0xd826,
- 0xd827, 0xd720, 0xd620, 0xd621, 0xd520, 0xd420, 0xd421, 0xd522, 0xd422, 0xd423, 0xd320, 0xd220, 0xd221, 0xd120, 0xd020, 0xd021,
- 0xd122, 0xd022, 0xd023, 0xd324, 0xd224, 0xd225, 0xd124, 0xd024, 0xd025, 0xd126, 0xd026, 0xd027, 0xd728, 0xd628, 0xd629, 0xd528,
- 0xd428, 0xd429, 0xd52a, 0xd42a, 0xd42b, 0xd328, 0xd228, 0xd229, 0xd128, 0xd028, 0xd029, 0xd12a, 0xd02a, 0xd02b, 0xd32c, 0xd22c,
- 0xd22d, 0xd12c, 0xd02c, 0xd02d, 0xd12e, 0xd02e, 0xd02f, 0xcf20, 0xce20, 0xce21, 0xcd20, 0xcc20, 0xcc21, 0xcd22, 0xcc22, 0xcc23,
- 0xcb20, 0xca20, 0xca21, 0xc920, 0xc820, 0xc821, 0xc922, 0xc822, 0xc823, 0xcb24, 0xca24, 0xca25, 0xc924, 0xc824, 0xc825, 0xc926,
- 0xc826, 0xc827, 0xc720, 0xc620, 0xc621, 0xc520, 0xc420, 0xc421, 0xc522, 0xc422, 0xc423, 0xc320, 0xc220, 0xc221, 0xc120, 0xc020,
- 0xc021, 0xc122, 0xc022, 0xc023, 0xc324, 0xc224, 0xc225, 0xc124, 0xc024, 0xc025, 0xc126, 0xc026, 0xc027, 0xc728, 0xc628, 0xc629,
- 0xc528, 0xc428, 0xc429, 0xc52a, 0xc42a, 0xc42b, 0xc328, 0xc228, 0xc229, 0xc128, 0xc028, 0xc029, 0xc12a, 0xc02a, 0xc02b, 0xc32c,
- 0xc22c, 0xc22d, 0xc12c, 0xc02c, 0xc02d, 0xc12e, 0xc02e, 0xc02f, 0xcf30, 0xce30, 0xce31, 0xcd30, 0xcc30, 0xcc31, 0xcd32, 0xcc32,
- 0xcc33, 0xcb30, 0xca30, 0xca31, 0xc930, 0xc830, 0xc831, 0xc932, 0xc832, 0xc833, 0xcb34, 0xca34, 0xca35, 0xc934, 0xc834, 0xc835,
- 0xc936, 0xc836, 0xc837, 0xc730, 0xc630, 0xc631, 0xc530, 0xc430, 0xc431, 0xc532, 0xc432, 0xc433, 0xc330, 0xc230, 0xc231, 0xc130,
- 0xc030, 0xc031, 0xc132, 0xc032, 0xc033, 0xc334, 0xc234, 0xc235, 0xc134, 0xc034, 0xc035, 0xc136, 0xc036, 0xc037, 0xc738, 0xc638,
- 0xc639, 0xc538, 0xc438, 0xc439, 0xc53a, 0xc43a, 0xc43b, 0xc338, 0xc238, 0xc239, 0xc138, 0xc038, 0xc039, 0xc13a, 0xc03a, 0xc03b,
- 0xc33c, 0xc23c, 0xc23d, 0xc13c, 0xc03c, 0xc03d, 0xc13e, 0xc03e, 0xc03f, 0xbf00, 0xbe00, 0xbe01, 0xbd00, 0xbc00, 0xbc01, 0xbd02,
- 0xbc02, 0xbc03, 0xbb00, 0xba00, 0xba01, 0xb900, 0xb800, 0xb801, 0xb902, 0xb802, 0xb803, 0xbb04, 0xba04, 0xba05, 0xb904, 0xb804,
- 0xb805, 0xb906, 0xb806, 0xb807, 0xb700, 0xb600, 0xb601, 0xb500, 0xb400, 0xb401, 0xb502, 0xb402, 0xb403, 0xb300, 0xb200, 0xb201,
- 0xb100, 0xb000, 0xb001, 0xb102, 0xb002, 0xb003, 0xb304, 0xb204, 0xb205, 0xb104, 0xb004, 0xb005, 0xb106, 0xb006, 0xb007, 0xb708,
- 0xb608, 0xb609, 0xb508, 0xb408, 0xb409, 0xb50a, 0xb40a, 0xb40b, 0xb308, 0xb208, 0xb209, 0xb108, 0xb008, 0xb009, 0xb10a, 0xb00a,
- 0xb00b, 0xb30c, 0xb20c, 0xb20d, 0xb10c, 0xb00c, 0xb00d, 0xb10e, 0xb00e, 0xb00f, 0xaf00, 0xae00, 0xae01, 0xad00, 0xac00, 0xac01,
- 0xad02, 0xac02, 0xac03, 0xab00, 0xaa00, 0xaa01, 0xa900, 0xa800, 0xa801, 0xa902, 0xa802, 0xa803, 0xab04, 0xaa04, 0xaa05, 0xa904,
- 0xa804, 0xa805, 0xa906, 0xa806, 0xa807, 0xa700, 0xa600, 0xa601, 0xa500, 0xa400, 0xa401, 0xa502, 0xa402, 0xa403, 0xa300, 0xa200,
- 0xa201, 0xa100, 0xa000, 0xa001, 0xa102, 0xa002, 0xa003, 0xa304, 0xa204, 0xa205, 0xa104, 0xa004, 0xa005, 0xa106, 0xa006, 0xa007,
- 0xa708, 0xa608, 0xa609, 0xa508, 0xa408, 0xa409, 0xa50a, 0xa40a, 0xa40b, 0xa308, 0xa208, 0xa209, 0xa108, 0xa008, 0xa009, 0xa10a,
- 0xa00a, 0xa00b, 0xa30c, 0xa20c, 0xa20d, 0xa10c, 0xa00c, 0xa00d, 0xa10e, 0xa00e, 0xa00f, 0xaf10, 0xae10, 0xae11, 0xad10, 0xac10,
- 0xac11, 0xad12, 0xac12, 0xac13, 0xab10, 0xaa10, 0xaa11, 0xa910, 0xa810, 0xa811, 0xa912, 0xa812, 0xa813, 0xab14, 0xaa14, 0xaa15,
- 0xa914, 0xa814, 0xa815, 0xa916, 0xa816, 0xa817, 0xa710, 0xa610, 0xa611, 0xa510, 0xa410, 0xa411, 0xa512, 0xa412, 0xa413, 0xa310,
- 0xa210, 0xa211, 0xa110, 0xa010, 0xa011, 0xa112, 0xa012, 0xa013, 0xa314, 0xa214, 0xa215, 0xa114, 0xa014, 0xa015, 0xa116, 0xa016,
- 0xa017, 0xa718, 0xa618, 0xa619, 0xa518, 0xa418, 0xa419, 0xa51a, 0xa41a, 0xa41b, 0xa318, 0xa218, 0xa219, 0xa118, 0xa018, 0xa019,
- 0xa11a, 0xa01a, 0xa01b, 0xa31c, 0xa21c, 0xa21d, 0xa11c, 0xa01c, 0xa01d, 0xa11e, 0xa01e, 0xa01f, 0x9f00, 0x9e00, 0x9e01, 0x9d00,
- 0x9c00, 0x9c01, 0x9d02, 0x9c02, 0x9c03, 0x9b00, 0x9a00, 0x9a01, 0x9900, 0x9800, 0x9801, 0x9902, 0x9802, 0x9803, 0x9b04, 0x9a04,
- 0x9a05, 0x9904, 0x9804, 0x9805, 0x9906, 0x9806, 0x9807, 0x9700, 0x9600, 0x9601, 0x9500, 0x9400, 0x9401, 0x9502, 0x9402, 0x9403,
- 0x9300, 0x9200, 0x9201, 0x9100, 0x9000, 0x9001, 0x9102, 0x9002, 0x9003, 0x9304, 0x9204, 0x9205, 0x9104, 0x9004, 0x9005, 0x9106,
- 0x9006, 0x9007, 0x9708, 0x9608, 0x9609, 0x9508, 0x9408, 0x9409, 0x950a, 0x940a, 0x940b, 0x9308, 0x9208, 0x9209, 0x9108, 0x9008,
- 0x9009, 0x910a, 0x900a, 0x900b, 0x930c, 0x920c, 0x920d, 0x910c, 0x900c, 0x900d, 0x910e, 0x900e, 0x900f, 0x8f00, 0x8e00, 0x8e01,
- 0x8d00, 0x8c00, 0x8c01, 0x8d02, 0x8c02, 0x8c03, 0x8b00, 0x8a00, 0x8a01, 0x8900, 0x8800, 0x8801, 0x8902, 0x8802, 0x8803, 0x8b04,
- 0x8a04, 0x8a05, 0x8904, 0x8804, 0x8805, 0x8906, 0x8806, 0x8807, 0x8700, 0x8600, 0x8601, 0x8500, 0x8400, 0x8401, 0x8502, 0x8402,
- 0x8403, 0x8300, 0x8200, 0x8201, 0x8100, 0x8000, 0x8001, 0x8102, 0x8002, 0x8003, 0x8304, 0x8204, 0x8205, 0x8104, 0x8004, 0x8005,
- 0x8106, 0x8006, 0x8007, 0x8708, 0x8608, 0x8609, 0x8508, 0x8408, 0x8409, 0x850a, 0x840a, 0x840b, 0x8308, 0x8208, 0x8209, 0x8108,
- 0x8008, 0x8009, 0x810a, 0x800a, 0x800b, 0x830c, 0x820c, 0x820d, 0x810c, 0x800c, 0x800d, 0x810e, 0x800e, 0x800f, 0x8f10, 0x8e10,
- 0x8e11, 0x8d10, 0x8c10, 0x8c11, 0x8d12, 0x8c12, 0x8c13, 0x8b10, 0x8a10, 0x8a11, 0x8910, 0x8810, 0x8811, 0x8912, 0x8812, 0x8813,
- 0x8b14, 0x8a14, 0x8a15, 0x8914, 0x8814, 0x8815, 0x8916, 0x8816, 0x8817, 0x8710, 0x8610, 0x8611, 0x8510, 0x8410, 0x8411, 0x8512,
- 0x8412, 0x8413, 0x8310, 0x8210, 0x8211, 0x8110, 0x8010, 0x8011, 0x8112, 0x8012, 0x8013, 0x8314, 0x8214, 0x8215, 0x8114, 0x8014,
- 0x8015, 0x8116, 0x8016, 0x8017, 0x8718, 0x8618, 0x8619, 0x8518, 0x8418, 0x8419, 0x851a, 0x841a, 0x841b, 0x8318, 0x8218, 0x8219,
- 0x8118, 0x8018, 0x8019, 0x811a, 0x801a, 0x801b, 0x831c, 0x821c, 0x821d, 0x811c, 0x801c, 0x801d, 0x811e, 0x801e, 0x801f, 0x9f20,
- 0x9e20, 0x9e21, 0x9d20, 0x9c20, 0x9c21, 0x9d22, 0x9c22, 0x9c23, 0x9b20, 0x9a20, 0x9a21, 0x9920, 0x9820, 0x9821, 0x9922, 0x9822,
- 0x9823, 0x9b24, 0x9a24, 0x9a25, 0x9924, 0x9824, 0x9825, 0x9926, 0x9826, 0x9827, 0x9720, 0x9620, 0x9621, 0x9520, 0x9420, 0x9421,
- 0x9522, 0x9422, 0x9423, 0x9320, 0x9220, 0x9221, 0x9120, 0x9020, 0x9021, 0x9122, 0x9022, 0x9023, 0x9324, 0x9224, 0x9225, 0x9124,
- 0x9024, 0x9025, 0x9126, 0x9026, 0x9027, 0x9728, 0x9628, 0x9629, 0x9528, 0x9428, 0x9429, 0x952a, 0x942a, 0x942b, 0x9328, 0x9228,
- 0x9229, 0x9128, 0x9028, 0x9029, 0x912a, 0x902a, 0x902b, 0x932c, 0x922c, 0x922d, 0x912c, 0x902c, 0x902d, 0x912e, 0x902e, 0x902f,
- 0x8f20, 0x8e20, 0x8e21, 0x8d20, 0x8c20, 0x8c21, 0x8d22, 0x8c22, 0x8c23, 0x8b20, 0x8a20, 0x8a21, 0x8920, 0x8820, 0x8821, 0x8922,
- 0x8822, 0x8823, 0x8b24, 0x8a24, 0x8a25, 0x8924, 0x8824, 0x8825, 0x8926, 0x8826, 0x8827, 0x8720, 0x8620, 0x8621, 0x8520, 0x8420,
- 0x8421, 0x8522, 0x8422, 0x8423, 0x8320, 0x8220, 0x8221, 0x8120, 0x8020, 0x8021, 0x8122, 0x8022, 0x8023, 0x8324, 0x8224, 0x8225,
- 0x8124, 0x8024, 0x8025, 0x8126, 0x8026, 0x8027, 0x8728, 0x8628, 0x8629, 0x8528, 0x8428, 0x8429, 0x852a, 0x842a, 0x842b, 0x8328,
- 0x8228, 0x8229, 0x8128, 0x8028, 0x8029, 0x812a, 0x802a, 0x802b, 0x832c, 0x822c, 0x822d, 0x812c, 0x802c, 0x802d, 0x812e, 0x802e,
- 0x802f, 0x8f30, 0x8e30, 0x8e31, 0x8d30, 0x8c30, 0x8c31, 0x8d32, 0x8c32, 0x8c33, 0x8b30, 0x8a30, 0x8a31, 0x8930, 0x8830, 0x8831,
- 0x8932, 0x8832, 0x8833, 0x8b34, 0x8a34, 0x8a35, 0x8934, 0x8834, 0x8835, 0x8936, 0x8836, 0x8837, 0x8730, 0x8630, 0x8631, 0x8530,
- 0x8430, 0x8431, 0x8532, 0x8432, 0x8433, 0x8330, 0x8230, 0x8231, 0x8130, 0x8030, 0x8031, 0x8132, 0x8032, 0x8033, 0x8334, 0x8234,
- 0x8235, 0x8134, 0x8034, 0x8035, 0x8136, 0x8036, 0x8037, 0x8738, 0x8638, 0x8639, 0x8538, 0x8438, 0x8439, 0x853a, 0x843a, 0x843b,
- 0x8338, 0x8238, 0x8239, 0x8138, 0x8038, 0x8039, 0x813a, 0x803a, 0x803b, 0x833c, 0x823c, 0x823d, 0x813c, 0x803c, 0x803d, 0x813e,
- 0x803e, 0x803f, 0xbf40, 0xbe40, 0xbe41, 0xbd40, 0xbc40, 0xbc41, 0xbd42, 0xbc42, 0xbc43, 0xbb40, 0xba40, 0xba41, 0xb940, 0xb840,
- 0xb841, 0xb942, 0xb842, 0xb843, 0xbb44, 0xba44, 0xba45, 0xb944, 0xb844, 0xb845, 0xb946, 0xb846, 0xb847, 0xb740, 0xb640, 0xb641,
- 0xb540, 0xb440, 0xb441, 0xb542, 0xb442, 0xb443, 0xb340, 0xb240, 0xb241, 0xb140, 0xb040, 0xb041, 0xb142, 0xb042, 0xb043, 0xb344,
- 0xb244, 0xb245, 0xb144, 0xb044, 0xb045, 0xb146, 0xb046, 0xb047, 0xb748, 0xb648, 0xb649, 0xb548, 0xb448, 0xb449, 0xb54a, 0xb44a,
- 0xb44b, 0xb348, 0xb248, 0xb249, 0xb148, 0xb048, 0xb049, 0xb14a, 0xb04a, 0xb04b, 0xb34c, 0xb24c, 0xb24d, 0xb14c, 0xb04c, 0xb04d,
- 0xb14e, 0xb04e, 0xb04f, 0xaf40, 0xae40, 0xae41, 0xad40, 0xac40, 0xac41, 0xad42, 0xac42, 0xac43, 0xab40, 0xaa40, 0xaa41, 0xa940,
- 0xa840, 0xa841, 0xa942, 0xa842, 0xa843, 0xab44, 0xaa44, 0xaa45, 0xa944, 0xa844, 0xa845, 0xa946, 0xa846, 0xa847, 0xa740, 0xa640,
- 0xa641, 0xa540, 0xa440, 0xa441, 0xa542, 0xa442, 0xa443, 0xa340, 0xa240, 0xa241, 0xa140, 0xa040, 0xa041, 0xa142, 0xa042, 0xa043,
- 0xa344, 0xa244, 0xa245, 0xa144, 0xa044, 0xa045, 0xa146, 0xa046, 0xa047, 0xa748, 0xa648, 0xa649, 0xa548, 0xa448, 0xa449, 0xa54a,
- 0xa44a, 0xa44b, 0xa348, 0xa248, 0xa249, 0xa148, 0xa048, 0xa049, 0xa14a, 0xa04a, 0xa04b, 0xa34c, 0xa24c, 0xa24d, 0xa14c, 0xa04c,
- 0xa04d, 0xa14e, 0xa04e, 0xa04f, 0xaf50, 0xae50, 0xae51, 0xad50, 0xac50, 0xac51, 0xad52, 0xac52, 0xac53, 0xab50, 0xaa50, 0xaa51,
- 0xa950, 0xa850, 0xa851, 0xa952, 0xa852, 0xa853, 0xab54, 0xaa54, 0xaa55, 0xa954, 0xa854, 0xa855, 0xa956, 0xa856, 0xa857, 0xa750,
- 0xa650, 0xa651, 0xa550, 0xa450, 0xa451, 0xa552, 0xa452, 0xa453, 0xa350, 0xa250, 0xa251, 0xa150, 0xa050, 0xa051, 0xa152, 0xa052,
- 0xa053, 0xa354, 0xa254, 0xa255, 0xa154, 0xa054, 0xa055, 0xa156, 0xa056, 0xa057, 0xa758, 0xa658, 0xa659, 0xa558, 0xa458, 0xa459,
- 0xa55a, 0xa45a, 0xa45b, 0xa358, 0xa258, 0xa259, 0xa158, 0xa058, 0xa059, 0xa15a, 0xa05a, 0xa05b, 0xa35c, 0xa25c, 0xa25d, 0xa15c,
- 0xa05c, 0xa05d, 0xa15e, 0xa05e, 0xa05f, 0x9f40, 0x9e40, 0x9e41, 0x9d40, 0x9c40, 0x9c41, 0x9d42, 0x9c42, 0x9c43, 0x9b40, 0x9a40,
- 0x9a41, 0x9940, 0x9840, 0x9841, 0x9942, 0x9842, 0x9843, 0x9b44, 0x9a44, 0x9a45, 0x9944, 0x9844, 0x9845, 0x9946, 0x9846, 0x9847,
- 0x9740, 0x9640, 0x9641, 0x9540, 0x9440, 0x9441, 0x9542, 0x9442, 0x9443, 0x9340, 0x9240, 0x9241, 0x9140, 0x9040, 0x9041, 0x9142,
- 0x9042, 0x9043, 0x9344, 0x9244, 0x9245, 0x9144, 0x9044, 0x9045, 0x9146, 0x9046, 0x9047, 0x9748, 0x9648, 0x9649, 0x9548, 0x9448,
- 0x9449, 0x954a, 0x944a, 0x944b, 0x9348, 0x9248, 0x9249, 0x9148, 0x9048, 0x9049, 0x914a, 0x904a, 0x904b, 0x934c, 0x924c, 0x924d,
- 0x914c, 0x904c, 0x904d, 0x914e, 0x904e, 0x904f, 0x8f40, 0x8e40, 0x8e41, 0x8d40, 0x8c40, 0x8c41, 0x8d42, 0x8c42, 0x8c43, 0x8b40,
- 0x8a40, 0x8a41, 0x8940, 0x8840, 0x8841, 0x8942, 0x8842, 0x8843, 0x8b44, 0x8a44, 0x8a45, 0x8944, 0x8844, 0x8845, 0x8946, 0x8846,
- 0x8847, 0x8740, 0x8640, 0x8641, 0x8540, 0x8440, 0x8441, 0x8542, 0x8442, 0x8443, 0x8340, 0x8240, 0x8241, 0x8140, 0x8040, 0x8041,
- 0x8142, 0x8042, 0x8043, 0x8344, 0x8244, 0x8245, 0x8144, 0x8044, 0x8045, 0x8146, 0x8046, 0x8047, 0x8748, 0x8648, 0x8649, 0x8548,
- 0x8448, 0x8449, 0x854a, 0x844a, 0x844b, 0x8348, 0x8248, 0x8249, 0x8148, 0x8048, 0x8049, 0x814a, 0x804a, 0x804b, 0x834c, 0x824c,
- 0x824d, 0x814c, 0x804c, 0x804d, 0x814e, 0x804e, 0x804f, 0x8f50, 0x8e50, 0x8e51, 0x8d50, 0x8c50, 0x8c51, 0x8d52, 0x8c52, 0x8c53,
- 0x8b50, 0x8a50, 0x8a51, 0x8950, 0x8850, 0x8851, 0x8952, 0x8852, 0x8853, 0x8b54, 0x8a54, 0x8a55, 0x8954, 0x8854, 0x8855, 0x8956,
- 0x8856, 0x8857, 0x8750, 0x8650, 0x8651, 0x8550, 0x8450, 0x8451, 0x8552, 0x8452, 0x8453, 0x8350, 0x8250, 0x8251, 0x8150, 0x8050,
- 0x8051, 0x8152, 0x8052, 0x8053, 0x8354, 0x8254, 0x8255, 0x8154, 0x8054, 0x8055, 0x8156, 0x8056, 0x8057, 0x8758, 0x8658, 0x8659,
- 0x8558, 0x8458, 0x8459, 0x855a, 0x845a, 0x845b, 0x8358, 0x8258, 0x8259, 0x8158, 0x8058, 0x8059, 0x815a, 0x805a, 0x805b, 0x835c,
- 0x825c, 0x825d, 0x815c, 0x805c, 0x805d, 0x815e, 0x805e, 0x805f, 0x9f60, 0x9e60, 0x9e61, 0x9d60, 0x9c60, 0x9c61, 0x9d62, 0x9c62,
- 0x9c63, 0x9b60, 0x9a60, 0x9a61, 0x9960, 0x9860, 0x9861, 0x9962, 0x9862, 0x9863, 0x9b64, 0x9a64, 0x9a65, 0x9964, 0x9864, 0x9865,
- 0x9966, 0x9866, 0x9867, 0x9760, 0x9660, 0x9661, 0x9560, 0x9460, 0x9461, 0x9562, 0x9462, 0x9463, 0x9360, 0x9260, 0x9261, 0x9160,
- 0x9060, 0x9061, 0x9162, 0x9062, 0x9063, 0x9364, 0x9264, 0x9265, 0x9164, 0x9064, 0x9065, 0x9166, 0x9066, 0x9067, 0x9768, 0x9668,
- 0x9669, 0x9568, 0x9468, 0x9469, 0x956a, 0x946a, 0x946b, 0x9368, 0x9268, 0x9269, 0x9168, 0x9068, 0x9069, 0x916a, 0x906a, 0x906b,
- 0x936c, 0x926c, 0x926d, 0x916c, 0x906c, 0x906d, 0x916e, 0x906e, 0x906f, 0x8f60, 0x8e60, 0x8e61, 0x8d60, 0x8c60, 0x8c61, 0x8d62,
- 0x8c62, 0x8c63, 0x8b60, 0x8a60, 0x8a61, 0x8960, 0x8860, 0x8861, 0x8962, 0x8862, 0x8863, 0x8b64, 0x8a64, 0x8a65, 0x8964, 0x8864,
- 0x8865, 0x8966, 0x8866, 0x8867, 0x8760, 0x8660, 0x8661, 0x8560, 0x8460, 0x8461, 0x8562, 0x8462, 0x8463, 0x8360, 0x8260, 0x8261,
- 0x8160, 0x8060, 0x8061, 0x8162, 0x8062, 0x8063, 0x8364, 0x8264, 0x8265, 0x8164, 0x8064, 0x8065, 0x8166, 0x8066, 0x8067, 0x8768,
- 0x8668, 0x8669, 0x8568, 0x8468, 0x8469, 0x856a, 0x846a, 0x846b, 0x8368, 0x8268, 0x8269, 0x8168, 0x8068, 0x8069, 0x816a, 0x806a,
- 0x806b, 0x836c, 0x826c, 0x826d, 0x816c, 0x806c, 0x806d, 0x816e, 0x806e, 0x806f, 0x8f70, 0x8e70, 0x8e71, 0x8d70, 0x8c70, 0x8c71,
- 0x8d72, 0x8c72, 0x8c73, 0x8b70, 0x8a70, 0x8a71, 0x8970, 0x8870, 0x8871, 0x8972, 0x8872, 0x8873, 0x8b74, 0x8a74, 0x8a75, 0x8974,
- 0x8874, 0x8875, 0x8976, 0x8876, 0x8877, 0x8770, 0x8670, 0x8671, 0x8570, 0x8470, 0x8471, 0x8572, 0x8472, 0x8473, 0x8370, 0x8270,
- 0x8271, 0x8170, 0x8070, 0x8071, 0x8172, 0x8072, 0x8073, 0x8374, 0x8274, 0x8275, 0x8174, 0x8074, 0x8075, 0x8176, 0x8076, 0x8077,
- 0x8778, 0x8678, 0x8679, 0x8578, 0x8478, 0x8479, 0x857a, 0x847a, 0x847b, 0x8378, 0x8278, 0x8279, 0x8178, 0x8078, 0x8079, 0x817a,
- 0x807a, 0x807b, 0x837c, 0x827c, 0x827d, 0x817c, 0x807c, 0x807d, 0x817e, 0x807e, 0x807f, 0x7f00, 0x7e00, 0x7e01, 0x7d00, 0x7c00,
- 0x7c01, 0x7d02, 0x7c02, 0x7c03, 0x7b00, 0x7a00, 0x7a01, 0x7900, 0x7800, 0x7801, 0x7902, 0x7802, 0x7803, 0x7b04, 0x7a04, 0x7a05,
- 0x7904, 0x7804, 0x7805, 0x7906, 0x7806, 0x7807, 0x7700, 0x7600, 0x7601, 0x7500, 0x7400, 0x7401, 0x7502, 0x7402, 0x7403, 0x7300,
- 0x7200, 0x7201, 0x7100, 0x7000, 0x7001, 0x7102, 0x7002, 0x7003, 0x7304, 0x7204, 0x7205, 0x7104, 0x7004, 0x7005, 0x7106, 0x7006,
- 0x7007, 0x7708, 0x7608, 0x7609, 0x7508, 0x7408, 0x7409, 0x750a, 0x740a, 0x740b, 0x7308, 0x7208, 0x7209, 0x7108, 0x7008, 0x7009,
- 0x710a, 0x700a, 0x700b, 0x730c, 0x720c, 0x720d, 0x710c, 0x700c, 0x700d, 0x710e, 0x700e, 0x700f, 0x6f00, 0x6e00, 0x6e01, 0x6d00,
- 0x6c00, 0x6c01, 0x6d02, 0x6c02, 0x6c03, 0x6b00, 0x6a00, 0x6a01, 0x6900, 0x6800, 0x6801, 0x6902, 0x6802, 0x6803, 0x6b04, 0x6a04,
- 0x6a05, 0x6904, 0x6804, 0x6805, 0x6906, 0x6806, 0x6807, 0x6700, 0x6600, 0x6601, 0x6500, 0x6400, 0x6401, 0x6502, 0x6402, 0x6403,
- 0x6300, 0x6200, 0x6201, 0x6100, 0x6000, 0x6001, 0x6102, 0x6002, 0x6003, 0x6304, 0x6204, 0x6205, 0x6104, 0x6004, 0x6005, 0x6106,
- 0x6006, 0x6007, 0x6708, 0x6608, 0x6609, 0x6508, 0x6408, 0x6409, 0x650a, 0x640a, 0x640b, 0x6308, 0x6208, 0x6209, 0x6108, 0x6008,
- 0x6009, 0x610a, 0x600a, 0x600b, 0x630c, 0x620c, 0x620d, 0x610c, 0x600c, 0x600d, 0x610e, 0x600e, 0x600f, 0x6f10, 0x6e10, 0x6e11,
- 0x6d10, 0x6c10, 0x6c11, 0x6d12, 0x6c12, 0x6c13, 0x6b10, 0x6a10, 0x6a11, 0x6910, 0x6810, 0x6811, 0x6912, 0x6812, 0x6813, 0x6b14,
- 0x6a14, 0x6a15, 0x6914, 0x6814, 0x6815, 0x6916, 0x6816, 0x6817, 0x6710, 0x6610, 0x6611, 0x6510, 0x6410, 0x6411, 0x6512, 0x6412,
- 0x6413, 0x6310, 0x6210, 0x6211, 0x6110, 0x6010, 0x6011, 0x6112, 0x6012, 0x6013, 0x6314, 0x6214, 0x6215, 0x6114, 0x6014, 0x6015,
- 0x6116, 0x6016, 0x6017, 0x6718, 0x6618, 0x6619, 0x6518, 0x6418, 0x6419, 0x651a, 0x641a, 0x641b, 0x6318, 0x6218, 0x6219, 0x6118,
- 0x6018, 0x6019, 0x611a, 0x601a, 0x601b, 0x631c, 0x621c, 0x621d, 0x611c, 0x601c, 0x601d, 0x611e, 0x601e, 0x601f, 0x5f00, 0x5e00,
- 0x5e01, 0x5d00, 0x5c00, 0x5c01, 0x5d02, 0x5c02, 0x5c03, 0x5b00, 0x5a00, 0x5a01, 0x5900, 0x5800, 0x5801, 0x5902, 0x5802, 0x5803,
- 0x5b04, 0x5a04, 0x5a05, 0x5904, 0x5804, 0x5805, 0x5906, 0x5806, 0x5807, 0x5700, 0x5600, 0x5601, 0x5500, 0x5400, 0x5401, 0x5502,
- 0x5402, 0x5403, 0x5300, 0x5200, 0x5201, 0x5100, 0x5000, 0x5001, 0x5102, 0x5002, 0x5003, 0x5304, 0x5204, 0x5205, 0x5104, 0x5004,
- 0x5005, 0x5106, 0x5006, 0x5007, 0x5708, 0x5608, 0x5609, 0x5508, 0x5408, 0x5409, 0x550a, 0x540a, 0x540b, 0x5308, 0x5208, 0x5209,
- 0x5108, 0x5008, 0x5009, 0x510a, 0x500a, 0x500b, 0x530c, 0x520c, 0x520d, 0x510c, 0x500c, 0x500d, 0x510e, 0x500e, 0x500f, 0x4f00,
- 0x4e00, 0x4e01, 0x4d00, 0x4c00, 0x4c01, 0x4d02, 0x4c02, 0x4c03, 0x4b00, 0x4a00, 0x4a01, 0x4900, 0x4800, 0x4801, 0x4902, 0x4802,
- 0x4803, 0x4b04, 0x4a04, 0x4a05, 0x4904, 0x4804, 0x4805, 0x4906, 0x4806, 0x4807, 0x4700, 0x4600, 0x4601, 0x4500, 0x4400, 0x4401,
- 0x4502, 0x4402, 0x4403, 0x4300, 0x4200, 0x4201, 0x4100, 0x4000, 0x4001, 0x4102, 0x4002, 0x4003, 0x4304, 0x4204, 0x4205, 0x4104,
- 0x4004, 0x4005, 0x4106, 0x4006, 0x4007, 0x4708, 0x4608, 0x4609, 0x4508, 0x4408, 0x4409, 0x450a, 0x440a, 0x440b, 0x4308, 0x4208,
- 0x4209, 0x4108, 0x4008, 0x4009, 0x410a, 0x400a, 0x400b, 0x430c, 0x420c, 0x420d, 0x410c, 0x400c, 0x400d, 0x410e, 0x400e, 0x400f,
- 0x4f10, 0x4e10, 0x4e11, 0x4d10, 0x4c10, 0x4c11, 0x4d12, 0x4c12, 0x4c13, 0x4b10, 0x4a10, 0x4a11, 0x4910, 0x4810, 0x4811, 0x4912,
- 0x4812, 0x4813, 0x4b14, 0x4a14, 0x4a15, 0x4914, 0x4814, 0x4815, 0x4916, 0x4816, 0x4817, 0x4710, 0x4610, 0x4611, 0x4510, 0x4410,
- 0x4411, 0x4512, 0x4412, 0x4413, 0x4310, 0x4210, 0x4211, 0x4110, 0x4010, 0x4011, 0x4112, 0x4012, 0x4013, 0x4314, 0x4214, 0x4215,
- 0x4114, 0x4014, 0x4015, 0x4116, 0x4016, 0x4017, 0x4718, 0x4618, 0x4619, 0x4518, 0x4418, 0x4419, 0x451a, 0x441a, 0x441b, 0x4318,
- 0x4218, 0x4219, 0x4118, 0x4018, 0x4019, 0x411a, 0x401a, 0x401b, 0x431c, 0x421c, 0x421d, 0x411c, 0x401c, 0x401d, 0x411e, 0x401e,
- 0x401f, 0x5f20, 0x5e20, 0x5e21, 0x5d20, 0x5c20, 0x5c21, 0x5d22, 0x5c22, 0x5c23, 0x5b20, 0x5a20, 0x5a21, 0x5920, 0x5820, 0x5821,
- 0x5922, 0x5822, 0x5823, 0x5b24, 0x5a24, 0x5a25, 0x5924, 0x5824, 0x5825, 0x5926, 0x5826, 0x5827, 0x5720, 0x5620, 0x5621, 0x5520,
- 0x5420, 0x5421, 0x5522, 0x5422, 0x5423, 0x5320, 0x5220, 0x5221, 0x5120, 0x5020, 0x5021, 0x5122, 0x5022, 0x5023, 0x5324, 0x5224,
- 0x5225, 0x5124, 0x5024, 0x5025, 0x5126, 0x5026, 0x5027, 0x5728, 0x5628, 0x5629, 0x5528, 0x5428, 0x5429, 0x552a, 0x542a, 0x542b,
- 0x5328, 0x5228, 0x5229, 0x5128, 0x5028, 0x5029, 0x512a, 0x502a, 0x502b, 0x532c, 0x522c, 0x522d, 0x512c, 0x502c, 0x502d, 0x512e,
- 0x502e, 0x502f, 0x4f20, 0x4e20, 0x4e21, 0x4d20, 0x4c20, 0x4c21, 0x4d22, 0x4c22, 0x4c23, 0x4b20, 0x4a20, 0x4a21, 0x4920, 0x4820,
- 0x4821, 0x4922, 0x4822, 0x4823, 0x4b24, 0x4a24, 0x4a25, 0x4924, 0x4824, 0x4825, 0x4926, 0x4826, 0x4827, 0x4720, 0x4620, 0x4621,
- 0x4520, 0x4420, 0x4421, 0x4522, 0x4422, 0x4423, 0x4320, 0x4220, 0x4221, 0x4120, 0x4020, 0x4021, 0x4122, 0x4022, 0x4023, 0x4324,
- 0x4224, 0x4225, 0x4124, 0x4024, 0x4025, 0x4126, 0x4026, 0x4027, 0x4728, 0x4628, 0x4629, 0x4528, 0x4428, 0x4429, 0x452a, 0x442a,
- 0x442b, 0x4328, 0x4228, 0x4229, 0x4128, 0x4028, 0x4029, 0x412a, 0x402a, 0x402b, 0x432c, 0x422c, 0x422d, 0x412c, 0x402c, 0x402d,
- 0x412e, 0x402e, 0x402f, 0x4f30, 0x4e30, 0x4e31, 0x4d30, 0x4c30, 0x4c31, 0x4d32, 0x4c32, 0x4c33, 0x4b30, 0x4a30, 0x4a31, 0x4930,
- 0x4830, 0x4831, 0x4932, 0x4832, 0x4833, 0x4b34, 0x4a34, 0x4a35, 0x4934, 0x4834, 0x4835, 0x4936, 0x4836, 0x4837, 0x4730, 0x4630,
- 0x4631, 0x4530, 0x4430, 0x4431, 0x4532, 0x4432, 0x4433, 0x4330, 0x4230, 0x4231, 0x4130, 0x4030, 0x4031, 0x4132, 0x4032, 0x4033,
- 0x4334, 0x4234, 0x4235, 0x4134, 0x4034, 0x4035, 0x4136, 0x4036, 0x4037, 0x4738, 0x4638, 0x4639, 0x4538, 0x4438, 0x4439, 0x453a,
- 0x443a, 0x443b, 0x4338, 0x4238, 0x4239, 0x4138, 0x4038, 0x4039, 0x413a, 0x403a, 0x403b, 0x433c, 0x423c, 0x423d, 0x413c, 0x403c,
- 0x403d, 0x413e, 0x403e, 0x403f, 0x3f00, 0x3e00, 0x3e01, 0x3d00, 0x3c00, 0x3c01, 0x3d02, 0x3c02, 0x3c03, 0x3b00, 0x3a00, 0x3a01,
- 0x3900, 0x3800, 0x3801, 0x3902, 0x3802, 0x3803, 0x3b04, 0x3a04, 0x3a05, 0x3904, 0x3804, 0x3805, 0x3906, 0x3806, 0x3807, 0x3700,
- 0x3600, 0x3601, 0x3500, 0x3400, 0x3401, 0x3502, 0x3402, 0x3403, 0x3300, 0x3200, 0x3201, 0x3100, 0x3000, 0x3001, 0x3102, 0x3002,
- 0x3003, 0x3304, 0x3204, 0x3205, 0x3104, 0x3004, 0x3005, 0x3106, 0x3006, 0x3007, 0x3708, 0x3608, 0x3609, 0x3508, 0x3408, 0x3409,
- 0x350a, 0x340a, 0x340b, 0x3308, 0x3208, 0x3209, 0x3108, 0x3008, 0x3009, 0x310a, 0x300a, 0x300b, 0x330c, 0x320c, 0x320d, 0x310c,
- 0x300c, 0x300d, 0x310e, 0x300e, 0x300f, 0x2f00, 0x2e00, 0x2e01, 0x2d00, 0x2c00, 0x2c01, 0x2d02, 0x2c02, 0x2c03, 0x2b00, 0x2a00,
- 0x2a01, 0x2900, 0x2800, 0x2801, 0x2902, 0x2802, 0x2803, 0x2b04, 0x2a04, 0x2a05, 0x2904, 0x2804, 0x2805, 0x2906, 0x2806, 0x2807,
- 0x2700, 0x2600, 0x2601, 0x2500, 0x2400, 0x2401, 0x2502, 0x2402, 0x2403, 0x2300, 0x2200, 0x2201, 0x2100, 0x2000, 0x2001, 0x2102,
- 0x2002, 0x2003, 0x2304, 0x2204, 0x2205, 0x2104, 0x2004, 0x2005, 0x2106, 0x2006, 0x2007, 0x2708, 0x2608, 0x2609, 0x2508, 0x2408,
- 0x2409, 0x250a, 0x240a, 0x240b, 0x2308, 0x2208, 0x2209, 0x2108, 0x2008, 0x2009, 0x210a, 0x200a, 0x200b, 0x230c, 0x220c, 0x220d,
- 0x210c, 0x200c, 0x200d, 0x210e, 0x200e, 0x200f, 0x2f10, 0x2e10, 0x2e11, 0x2d10, 0x2c10, 0x2c11, 0x2d12, 0x2c12, 0x2c13, 0x2b10,
- 0x2a10, 0x2a11, 0x2910, 0x2810, 0x2811, 0x2912, 0x2812, 0x2813, 0x2b14, 0x2a14, 0x2a15, 0x2914, 0x2814, 0x2815, 0x2916, 0x2816,
- 0x2817, 0x2710, 0x2610, 0x2611, 0x2510, 0x2410, 0x2411, 0x2512, 0x2412, 0x2413, 0x2310, 0x2210, 0x2211, 0x2110, 0x2010, 0x2011,
- 0x2112, 0x2012, 0x2013, 0x2314, 0x2214, 0x2215, 0x2114, 0x2014, 0x2015, 0x2116, 0x2016, 0x2017, 0x2718, 0x2618, 0x2619, 0x2518,
- 0x2418, 0x2419, 0x251a, 0x241a, 0x241b, 0x2318, 0x2218, 0x2219, 0x2118, 0x2018, 0x2019, 0x211a, 0x201a, 0x201b, 0x231c, 0x221c,
- 0x221d, 0x211c, 0x201c, 0x201d, 0x211e, 0x201e, 0x201f, 0x1f00, 0x1e00, 0x1e01, 0x1d00, 0x1c00, 0x1c01, 0x1d02, 0x1c02, 0x1c03,
- 0x1b00, 0x1a00, 0x1a01, 0x1900, 0x1800, 0x1801, 0x1902, 0x1802, 0x1803, 0x1b04, 0x1a04, 0x1a05, 0x1904, 0x1804, 0x1805, 0x1906,
- 0x1806, 0x1807, 0x1700, 0x1600, 0x1601, 0x1500, 0x1400, 0x1401, 0x1502, 0x1402, 0x1403, 0x1300, 0x1200, 0x1201, 0x1100, 0x1000,
- 0x1001, 0x1102, 0x1002, 0x1003, 0x1304, 0x1204, 0x1205, 0x1104, 0x1004, 0x1005, 0x1106, 0x1006, 0x1007, 0x1708, 0x1608, 0x1609,
- 0x1508, 0x1408, 0x1409, 0x150a, 0x140a, 0x140b, 0x1308, 0x1208, 0x1209, 0x1108, 0x1008, 0x1009, 0x110a, 0x100a, 0x100b, 0x130c,
- 0x120c, 0x120d, 0x110c, 0x100c, 0x100d, 0x110e, 0x100e, 0x100f, 0x0f00, 0x0e00, 0x0e01, 0x0d00, 0x0c00, 0x0c01, 0x0d02, 0x0c02,
- 0x0c03, 0x0b00, 0x0a00, 0x0a01, 0x0900, 0x0800, 0x0801, 0x0902, 0x0802, 0x0803, 0x0b04, 0x0a04, 0x0a05, 0x0904, 0x0804, 0x0805,
- 0x0906, 0x0806, 0x0807, 0x0700, 0x0600, 0x0601, 0x0500, 0x0400, 0x0401, 0x0502, 0x0402, 0x0403, 0x0300, 0x0200, 0x0201, 0x0100, 0x0000,
-GGML_TABLE_END()
-
-GGML_TABLE_BEGIN(uint16_t, iq1bn_grid_zzz, 3281)
+GGML_TABLE_BEGIN(uint16_t, iq1bn_grid_u16, 3281)
0x0000, 0x0001, 0x0002, 0x0100, 0x0101, 0x0102, 0x0200, 0x0201, 0x0202, 0x0004, 0x0005, 0x0006, 0x0104, 0x0105, 0x0106, 0x0204,
0x0205, 0x0206, 0x0008, 0x0009, 0x000a, 0x0108, 0x0109, 0x010a, 0x0208, 0x0209, 0x020a, 0x0400, 0x0401, 0x0402, 0x0500, 0x0501,
0x0502, 0x0600, 0x0601, 0x0602, 0x0404, 0x0405, 0x0406, 0x0504, 0x0505, 0x0506, 0x0604, 0x0605, 0x0606, 0x0408, 0x0409, 0x040a,
diff --git a/ggml-cuda/convert.cu b/ggml-cuda/convert.cu
index 888c8452..0e1cde9b 100644
--- a/ggml-cuda/convert.cu
+++ b/ggml-cuda/convert.cu
@@ -433,7 +433,7 @@ static __global__ void dequantize_block_iq1_bn(const void * __restrict__ vx, dst
if (i >= nb64) return;
ib = ib%(QK_IQ1BN/32);
uint16_t idx = x[i].ql[4*ib + il] | ((x[i].qh[2*ib + il/2] << (8 - 4*(il%2))) & 0x0f00);
- uint16_t val = x[i].extra & (1 << (4*ib + il)) ? 0xaaaa - iq1bn_grid_zzz[idx] : iq1bn_grid_zzz[idx];
+ uint16_t val = x[i].extra & (1 << (4*ib + il)) ? 0xaaaa - iq1bn_grid_u16[idx] : iq1bn_grid_u16[idx];
uint32_t aux32[2];
const int8_t * aux8 = (const int8_t *)aux32;
aux32[0] = val | (val << 14);
diff --git a/ggml-cuda/vecdotq.cuh b/ggml-cuda/vecdotq.cuh
index bce2c154..1e2b4b7a 100644
--- a/ggml-cuda/vecdotq.cuh
+++ b/ggml-cuda/vecdotq.cuh
@@ -1086,8 +1086,8 @@ static __device__ __forceinline__ float vec_dot_iq1_bn_q8_1(
for (int l = 0; l < 2; ++l) {
uint16_t idx1 = bq1->ql[4*iqs + 2*l+0] | ((bq1->qh[2*iqs + l] << 8) & 0x0f00);
uint16_t idx2 = bq1->ql[4*iqs + 2*l+1] | ((bq1->qh[2*iqs + l] << 4) & 0x0f00);
- uint16_t val1 = extra & 1 ? 0xaaaa - iq1bn_grid_zzz[idx1] : iq1bn_grid_zzz[idx1];
- uint16_t val2 = extra & 2 ? 0xaaaa - iq1bn_grid_zzz[idx2] : iq1bn_grid_zzz[idx2];
+ uint16_t val1 = extra & 1 ? 0xaaaa - iq1bn_grid_u16[idx1] : iq1bn_grid_u16[idx1];
+ uint16_t val2 = extra & 2 ? 0xaaaa - iq1bn_grid_u16[idx2] : iq1bn_grid_u16[idx2];
val32 = val1 | (val1 << 14);
v1 = __vsub4(val32 & 0x03030303, 0x01010101);
v2 = __vsub4((val32 >> 4) & 0x03030303, 0x01010101);
@@ -1104,7 +1104,7 @@ static __device__ __forceinline__ float vec_dot_iq1_bn_q8_1(
const int8_t * q8 = bq8_1[iqs].qs;
for (int l = 0; l < 4; ++l) {
uint16_t idx = bq1->ql[4*iqs + l] | ((bq1->qh[2*iqs + l/2] << (8 - 4*(l%2))) & 0x0f00);
- uint16_t val = extra & 1 ? 0xaaaa - iq1bn_grid_zzz[idx] : iq1bn_grid_zzz[idx];
+ uint16_t val = extra & 1 ? 0xaaaa - iq1bn_grid_u16[idx] : iq1bn_grid_u16[idx];
aux32[0] = val | (val << 14);
aux32[1] = (aux32[0] >> 4) & 0x03030303;
aux32[0] &= 0x03030303;
diff --git a/ggml-metal.metal b/ggml-metal.metal
index 4ec98e11..12ab9cca 100644
--- a/ggml-metal.metal
+++ b/ggml-metal.metal
@@ -5084,7 +5084,7 @@ void kernel_mul_mv_iq1_bn_f32_impl(
uint8_t signs = extra[0] >> (4*ib + ir);
- uint32_t v = iq1bn_grid_zzz[ql[0] | ((qh[0] << (8 - 4*(ir%2))) & 0x0f00)];
+ uint32_t v = iq1bn_grid_u16[ql[0] | ((qh[0] << (8 - 4*(ir%2))) & 0x0f00)];
uint32_t v32 = v | (v << 14);
aux32[0] = v32 & 0x03030303; aux32[1] = (v32 >> 4) & 0x03030303;
float4 acc4 = yl[0] * float4{values[aux8[0]], values[aux8[1]], values[aux8[2]], values[aux8[3]]}
@@ -5958,8 +5958,8 @@ void dequantize_iq1_bn(device const block_iq1_bn * xb, short il, thread type4x4
uint16_t idx1 = xb->ql[2*il+0] | ((xb->qh[il] << 8) & 0x0f00);
uint16_t idx2 = xb->ql[2*il+1] | ((xb->qh[il] << 4) & 0x0f00);
- uint16_t val1 = gs & 1 ? 0xaaaa - iq1bn_grid_zzz[idx1] : iq1bn_grid_zzz[idx1];
- uint16_t val2 = gs & 2 ? 0xaaaa - iq1bn_grid_zzz[idx2] : iq1bn_grid_zzz[idx2];
+ uint16_t val1 = gs & 1 ? 0xaaaa - iq1bn_grid_u16[idx1] : iq1bn_grid_u16[idx1];
+ uint16_t val2 = gs & 2 ? 0xaaaa - iq1bn_grid_u16[idx2] : iq1bn_grid_u16[idx2];
uint32_t v = val1 | (val1 << 14);
uint32_t aux32;
diff --git a/iqk-quantize.cpp b/iqk-quantize.cpp
index f5840778..b8d91bcf 100644
--- a/iqk-quantize.cpp
+++ b/iqk-quantize.cpp
@@ -190,7 +190,7 @@ void dequantize_row_iq1_bn(const block_iq1_bn * x, float * y, int64_t k) {
auto ql = x[i].ql;
for (int k = 0; k < QK_IQ1BN/8; ++k) {
uint16_t idx = ql[k] | ((qh[k/2] << (8 - 4*(k%2))) & 0x0f00);
- uint16_t val = extra & 1 ? 0xaaaa - iq1bn_grid_zzz[idx] : iq1bn_grid_zzz[idx];
+ uint16_t val = extra & 1 ? 0xaaaa - iq1bn_grid_u16[idx] : iq1bn_grid_u16[idx];
aux32[0] = val | (val << 14);
aux32[1] = (aux32[0] >> 4) & 0x03030303;
aux32[0] &= 0x03030303;
@@ -270,8 +270,8 @@ void ggml_vec_dot_iq1_bn_q8_K64(int n, float * s, size_t bs, const void * vx, si
for (int j = 0; j < QK_IQ1BN/16; ++j) {
uint16_t idx1 = ql[2*j+0] | ((qh[j] << 8) & 0x0f00);
uint16_t idx2 = ql[2*j+1] | ((qh[j] << 4) & 0x0f00);
- uint16_t val1 = extra & 1 ? k_magic - iq1bn_grid_zzz[idx1] : iq1bn_grid_zzz[idx1];
- uint16_t val2 = extra & 2 ? k_magic - iq1bn_grid_zzz[idx2] : iq1bn_grid_zzz[idx2];
+ uint16_t val1 = extra & 1 ? k_magic - iq1bn_grid_u16[idx1] : iq1bn_grid_u16[idx1];
+ uint16_t val2 = extra & 2 ? k_magic - iq1bn_grid_u16[idx2] : iq1bn_grid_u16[idx2];
extra >>= 2;
aux32[0] = val1 | (val1 << 14);
aux32[1] = (aux32[0] >> 4) & 0x03030303;
diff --git a/iqk_mul_mat.cpp b/iqk_mul_mat.cpp
index d4354343..8c46cfc7 100644
--- a/iqk_mul_mat.cpp
+++ b/iqk_mul_mat.cpp
@@ -1342,33 +1342,35 @@ template <int nrc> struct Q8_K64 {
struct DequantizerIQ1BN {
const __m256i m1_8 = _mm256_set1_epi8(1);
- const __m256i shuff1 = _mm256_set_epi64x(0x0808080808080808, 0x0000000000000000, 0x0808080808080808, 0x0000000000000000);
- const __m256i shuff2 = _mm256_add_epi8(shuff1, m1_8);
+ const __m256i shuff1 = _mm256_set_epi64x(0x0908090809080908, 0x0100010001000100, 0x0908090809080908, 0x0100010001000100);
#if defined __AVX512F__ && defined __AVX512VL__
- const __m256i minus1 = _mm256_set1_epi64x(0xffff);
+ const __m256i minus1 = _mm256_set1_epi64x(0xaaaa);
+ const __m256i shifts = _mm256_set1_epi64x(0x0006000400020000);
#else
const __m256i shuff3 = _mm256_set_epi64x(0x0303030303030303, 0x0202020202020202, 0x0101010101010101, 0x0000000000000000);
const __m256i shuff4 = _mm256_set_epi64x(0x0707070707070707, 0x0606060606060606, 0x0505050505050505, 0x0404040404040404);
-#endif
const __m256i mask1 = _mm256_set1_epi64x(0x8040201008040201);
+#endif
+ const __m256i qmask = _mm256_set1_epi8(0x03);
IQK_ALWAYS_INLINE void prepare_iq1bn_quants(uint8_t extra, const uint8_t * ql, const uint8_t * qh, __m256i& v1, __m256i& v2) {
- auto aux1 = _mm256_set_epi64x(iq1bn_grid_xxx[ql[3] | ((qh[1] << 4) & 0x0f00)], iq1bn_grid_xxx[ql[2] | ((qh[1] << 8) & 0x0f00)],
- iq1bn_grid_xxx[ql[1] | ((qh[0] << 4) & 0x0f00)], iq1bn_grid_xxx[ql[0] | ((qh[0] << 8) & 0x0f00)]);
- auto aux2 = _mm256_set_epi64x(iq1bn_grid_xxx[ql[7] | ((qh[3] << 4) & 0x0f00)], iq1bn_grid_xxx[ql[6] | ((qh[3] << 8) & 0x0f00)],
- iq1bn_grid_xxx[ql[5] | ((qh[2] << 4) & 0x0f00)], iq1bn_grid_xxx[ql[4] | ((qh[2] << 8) & 0x0f00)]);
+ auto aux1 = _mm256_set_epi64x(iq1bn_grid_u16[ql[3] | ((qh[1] << 4) & 0x0f00)], iq1bn_grid_u16[ql[2] | ((qh[1] << 8) & 0x0f00)],
+ iq1bn_grid_u16[ql[1] | ((qh[0] << 4) & 0x0f00)], iq1bn_grid_u16[ql[0] | ((qh[0] << 8) & 0x0f00)]);
+ auto aux2 = _mm256_set_epi64x(iq1bn_grid_u16[ql[7] | ((qh[3] << 4) & 0x0f00)], iq1bn_grid_u16[ql[6] | ((qh[3] << 8) & 0x0f00)],
+ iq1bn_grid_u16[ql[5] | ((qh[2] << 4) & 0x0f00)], iq1bn_grid_u16[ql[4] | ((qh[2] << 8) & 0x0f00)]);
#if defined __AVX512F__ && defined __AVX512VL__
aux1 = _mm256_mask_sub_epi64(aux1, extra & 0xf, minus1, aux1);
aux2 = _mm256_mask_sub_epi64(aux2, extra >> 4, minus1, aux2);
-#endif
-
- v1 = _mm256_sub_epi8(_mm256_cmpeq_epi8(_mm256_and_si256(_mm256_shuffle_epi8(aux1, shuff2), mask1), mask1),
- _mm256_cmpeq_epi8(_mm256_and_si256(_mm256_shuffle_epi8(aux1, shuff1), mask1), mask1));
- v2 = _mm256_sub_epi8(_mm256_cmpeq_epi8(_mm256_and_si256(_mm256_shuffle_epi8(aux2, shuff2), mask1), mask1),
- _mm256_cmpeq_epi8(_mm256_and_si256(_mm256_shuffle_epi8(aux2, shuff1), mask1), mask1));
-
-#if !(defined __AVX512F__ && defined __AVX512VL__)
+ v1 = _mm256_sub_epi8(_mm256_and_si256(_mm256_srlv_epi16(_mm256_shuffle_epi8(aux1, shuff1), shifts), qmask), m1_8);
+ v2 = _mm256_sub_epi8(_mm256_and_si256(_mm256_srlv_epi16(_mm256_shuffle_epi8(aux2, shuff1), shifts), qmask), m1_8);
+#else
+ aux1 = _mm256_or_si256(aux1, _mm256_slli_epi64(aux1, 14));
+ aux2 = _mm256_or_si256(aux2, _mm256_slli_epi64(aux2, 14));
+ aux1 = _mm256_or_si256(aux1, _mm256_slli_epi64(aux1, 28));
+ aux2 = _mm256_or_si256(aux2, _mm256_slli_epi64(aux2, 28));
+ v1 = _mm256_sub_epi8(_mm256_and_si256(aux1, qmask), m1_8);
+ v2 = _mm256_sub_epi8(_mm256_and_si256(aux2, qmask), m1_8);
auto all_signs = _mm256_set1_epi8(extra);
all_signs = _mm256_or_si256(_mm256_cmpeq_epi8(_mm256_and_si256(all_signs, mask1), mask1), m1_8);
v1 = _mm256_sign_epi8(v1, _mm256_shuffle_epi8(all_signs, shuff3));
@@ -4373,10 +4375,10 @@ struct DequantizerIQ1BN {
aux32[0] &= 0x0f0f0f0f;
const uint8_t * h = (const uint8_t *)aux32;
- a.val[0] = uint64x2_t{iq1bn_grid_zzz[ql[0] | (h[0] << 8)], iq1bn_grid_zzz[ql[1] | (h[4] << 4)]};
- a.val[1] = uint64x2_t{iq1bn_grid_zzz[ql[2] | (h[1] << 8)], iq1bn_grid_zzz[ql[3] | (h[5] << 4)]};
- a.val[2] = uint64x2_t{iq1bn_grid_zzz[ql[4] | (h[2] << 8)], iq1bn_grid_zzz[ql[5] | (h[6] << 4)]};
- a.val[3] = uint64x2_t{iq1bn_grid_zzz[ql[6] | (h[3] << 8)], iq1bn_grid_zzz[ql[7] | (h[7] << 4)]};
+ a.val[0] = uint64x2_t{iq1bn_grid_u16[ql[0] | (h[0] << 8)], iq1bn_grid_u16[ql[1] | (h[4] << 4)]};
+ a.val[1] = uint64x2_t{iq1bn_grid_u16[ql[2] | (h[1] << 8)], iq1bn_grid_u16[ql[3] | (h[5] << 4)]};
+ a.val[2] = uint64x2_t{iq1bn_grid_u16[ql[4] | (h[2] << 8)], iq1bn_grid_u16[ql[5] | (h[6] << 4)]};
+ a.val[3] = uint64x2_t{iq1bn_grid_u16[ql[6] | (h[3] << 8)], iq1bn_grid_u16[ql[7] | (h[7] << 4)]};
v.val[0] = vsubq_s8(vandq_u8(vshlq_u16(vqtbl1q_u8(vreinterpretq_u8_u64(a.val[0]), shuff1), shift), qmask), m1);
v.val[1] = vsubq_s8(vandq_u8(vshlq_u16(vqtbl1q_u8(vreinterpretq_u8_u64(a.val[1]), shuff1), shift), qmask), m1);