6 sources = "Blocks-4.txt UnicodeData-3.1.0.txt"
9 blocks = open("Blocks-4.txt", "r")
11 print "Missing Blocks-4.txt, aborting ..."
15 for line in blocks.readlines():
18 line = string.strip(line)
22 fields = string.split(line, ';')
23 range = string.strip(fields[0])
24 (start, end) = string.split(range, "..")
25 name = string.strip(fields[1])
26 name = string.replace(name, ' ', '')
28 print "Failed to process line: %s" % (line)
30 BlockNames[name] = ("0x"+start, "0x"+end)
32 print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
35 data = open("UnicodeData-3.1.0.txt", "r")
37 print "Missing UnicodeData-3.1.0.txt, aborting ..."
42 for line in data.readlines():
45 line = string.strip(line)
49 fields = string.split(line, ';')
50 point = string.strip(fields[0])
54 if point[0] >= '0' and point[0] <= '9':
55 value = value + ord(point[0]) - ord('0')
56 elif point[0] >= 'A' and point[0] <= 'F':
57 value = value + 10 + ord(point[0]) - ord('A')
58 elif point[0] >= 'a' and point[0] <= 'f':
59 value = value + 10 + ord(point[0]) - ord('a')
63 print "Failed to process line: %s" % (line)
68 Categories[name].append(value)
71 Categories[name] = [value]
73 print "Failed to process line: %s" % (line)
75 Categories[name[0]].append(value)
78 Categories[name[0]] = [value]
80 print "Failed to process line: %s" % (line)
83 print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
84 #reduce the number list into ranges
85 for cat in Categories.keys():
86 list = Categories[cat]
100 ranges.append((prev, prev))
105 ranges.append((start, prev))
110 ranges.append((prev, prev))
112 ranges.append((start, prev))
113 Categories[cat] = ranges
116 # Generate the resulting files
119 header = open("xmlunicode.h", "w")
121 print "Failed to open xmlunicode.h"
125 output = open("xmlunicode.c", "w")
127 print "Failed to open xmlunicode.c"
130 date = time.asctime(time.localtime(time.time()))
134 * xmlunicode.h: this header exports interfaces for the Unicode character APIs
136 * This file is automatically generated from the
137 * UCS description files of the Unicode Character Database
138 * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
139 * using the genUnicode.py Python script.
141 * Generation date: %s
143 * Daniel Veillard <veillard@redhat.com>
146 #ifndef __XML_UNICODE_H__
147 #define __XML_UNICODE_H__
153 """ % (date, sources));
156 * xmlunicode.c: this module implements the Unicode character APIs
158 * This file is automatically generated from the
159 * UCS description files of the Unicode Character Database
160 * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
161 * using the genUnicode.py Python script.
163 * Generation date: %s
165 * Daniel Veillard <veillard@redhat.com>
171 #ifdef LIBXML_UNICODE_ENABLED
174 #include <libxml/xmlversion.h>
175 #include <libxml/xmlunicode.h>
177 """ % (date, sources));
179 keys = BlockNames.keys()
182 (start, end) = BlockNames[block]
183 name = string.replace(block, '-', '')
184 header.write("int\txmlUCSIs%s\t(int code);\n" % name)
185 output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
186 output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
188 output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
189 output.write("int\nxmlUCSIs%s(int code) {\n" % name)
190 output.write(" return((code >= %s) && (code <= %s));\n" % (start, end))
191 output.write("}\n\n")
193 header.write("\nint\txmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n")
194 output.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n")
195 output.write(" * @block: UCS block name\n")
196 output.write(" *\n * Check whether the caracter is part of the UCS Block\n")
197 output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n");
198 output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n")
199 keys = BlockNames.keys()
202 name = string.replace(block, '-', '')
203 output.write(" if (!strcmp(block, \"%s\"))\n return(xmlUCSIs%s(code));\n" %
205 output.write(" return(-1);\n}\n\n")
208 keys = Categories.keys()
211 ranges = Categories[name]
212 header.write("int\txmlUCSIsCat%s\t(int code);\n" % name)
213 output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
214 output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
216 output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
217 output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
220 (begin, end) = range;
222 output.write(" return(");
225 output.write(" ||\n ");
227 output.write("(code == %s)" % (hex(begin)))
229 output.write("((code >= %s) && (code <= %s))" % (
230 hex(begin), hex(end)))
231 output.write(");\n}\n\n")
233 header.write("\nint\txmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n")
234 output.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n")
235 output.write(" * @cat: UCS Category name\n")
236 output.write(" *\n * Check whether the caracter is part of the UCS Category\n")
237 output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n");
238 output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n")
239 keys = Categories.keys()
242 output.write(" if (!strcmp(cat, \"%s\"))\n return(xmlUCSIsCat%s(code));\n" %
244 output.write(" return(-1);\n}\n\n")
250 #endif /* __XML_UNICODE_H__ */
253 #endif /* LIBXML_UNICODE_ENABLED */