1# python3 script to generate caseconvert.h. 2# It uses difference in lower() and upper() on a character to make a mapping 3# that maps a given unicode point to either a lower or upper case UTF-8 character. 4# this also include multi-byte characters. 5 6import codecs 7 8toupper = {} 9tolower = {} 10 11def writeMapping(file,mapping): 12 for k,v in sorted(mapping.items()): 13 file.write(u" case %s /* %s */: BSEQ(%s) /* %s */;\n" % 14 (hex(ord(k[0])), k, ",".join(f"0x{b:02x}" for b in v.encode('utf-8')), v)) 15 16# create mappings of characters whose upper and lower case differ 17for codeValue in range(0,0x1FFFF): 18 s = chr(codeValue) 19 sl = s.lower() 20 su = s.upper() 21 if ord(s[0])!=ord(sl[0]): 22 tolower[s]=sl 23 if ord(s[0])!=ord(su[0]): 24 toupper[s]=su 25 26file = codecs.open("caseconvert.h", "w", "utf-8") 27file.write(r'''/** This file is generated by python3 caseconvert.py. DO NOT EDIT! */ 28 29#ifndef CASECONVERT_H 30#define CASECONVERT_H 31 32#include <cstdint> 33#include <string> 34 35#define BSEQ(...) { static unsigned char s[] = { __VA_ARGS__, 0x00 }; \ 36 return reinterpret_cast<const char *>(s); } 37 38inline const char *convertUnicodeToUpper(uint32_t code) 39{ 40 switch(code) 41 { 42'''); 43writeMapping(file,toupper); 44file.write(r''' default: return nullptr; 45 } 46} 47 48inline const char *convertUnicodeToLower(uint32_t code) 49{ 50 switch(code) 51 { 52'''); 53writeMapping(file,tolower); 54file.write(r''' default: return nullptr; 55 } 56} 57 58#endif 59'''); 60