3 # tries to parse the output of gtk-doc declaration files and make
4 # an XML reusable description from them
26 ##################################################################
28 # Indexer to generate the word index
30 ##################################################################
34 def indexString(id, str):
37 str = string.replace(str, "'", ' ')
38 str = string.replace(str, '"', ' ')
39 str = string.replace(str, "/", ' ')
40 str = string.replace(str, '*', ' ')
41 str = string.replace(str, "[", ' ')
42 str = string.replace(str, "]", ' ')
43 str = string.replace(str, "(", ' ')
44 str = string.replace(str, ")", ' ')
45 str = string.replace(str, "<", ' ')
46 str = string.replace(str, '>', ' ')
47 str = string.replace(str, "&", ' ')
48 str = string.replace(str, '#', ' ')
49 str = string.replace(str, ",", ' ')
50 str = string.replace(str, '.', ' ')
51 str = string.replace(str, ';', ' ')
52 tokens = string.split(str)
56 if string.find(string.letters, c) < 0:
61 lower = string.lower(token)
62 # TODO: generalize this a bit
63 if lower == 'and' or lower == 'the':
65 elif index.has_key(token):
66 index[token].append(id)
74 ##################################################################
76 # Parsing: libxml-decl.txt
78 ##################################################################
79 def mormalizeTypeSpaces(raw, function):
82 tokens = string.split(raw)
86 type = type + ' ' + token
89 if types.has_key(type):
90 types[type].append(function)
92 types[type] = [function]
95 def removeComments(raw):
96 while string.find(raw, '/*') > 0:
97 e = string.find(raw, '/*')
100 e = string.find(raw, '*/')
102 raw = tmp + raw[e + 2:]
107 def extractArgs(raw, function):
108 raw = removeComments(raw)
109 raw = string.replace(raw, '\n', ' ')
110 raw = string.replace(raw, '\r', ' ')
111 list = string.split(raw, ",")
119 while string.find(string.letters, c) >= 0 or \
120 string.find(string.digits, c) >= 0 or c == '_':
126 while string.find(string.whitespace, c) >= 0:
131 type = mormalizeTypeSpaces(arg[0:i+1], function)
132 if name == 'void' and type == '':
135 ret.append([type, name, ''])
139 def extractTypes(raw, function):
142 tokens = string.split(raw)
146 type = type + ' ' + token
149 if ret_types.has_key(type):
150 ret_types[type].append(function)
152 ret_types[type] = [function]
162 line = input.readline()[:-1]
163 while line != "</MACRO>":
164 if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
166 elif string.find(line, "#define") >= 0:
168 line = input.readline()[:-1]
171 variables[name] = ['', ''] # type, info
172 identifiers_type[name] = "variable"
174 macros[name] = [[], ''] # args, info
175 identifiers_type[name] = "macro"
181 line = input.readline()[:-1]
182 while line != "</STRUCT>":
183 if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
185 line = input.readline()[:-1]
188 identifiers_type[name] = "struct"
194 line = input.readline()[:-1]
195 while line != "</TYPEDEF>":
196 if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
198 line = input.readline()[:-1]
201 identifiers_type[name] = "typedef"
207 line = input.readline()[:-1]
209 while line != "</ENUM>":
210 if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
212 elif string.find(line, 'enum') >= 0:
214 elif string.find(line, '{') >= 0:
216 elif string.find(line, '}') >= 0:
218 elif string.find(line, ';') >= 0:
221 comment = string.find(line, '/*')
223 line = line[0:comment]
224 decls = string.split(line, ",")
226 val = string.split(decl, "=")[0]
227 tokens = string.split(val)
230 if string.find(string.letters, token[0]) >= 0:
232 identifiers_type[token] = "const"
233 line = input.readline()[:-1]
235 enums[name] = [consts, '']
236 identifiers_type[name] = "enum"
238 def parseStaticFunction():
240 global user_functions
242 line = input.readline()[:-1]
245 while line != "</USER_FUNCTION>":
246 if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
248 elif line[0:9] == "<RETURNS>" and line[-10:] == "</RETURNS>":
249 type = extractTypes(line[9:-10], name)
251 signature = signature + line
252 line = input.readline()[:-1]
254 args = extractArgs(signature, name)
255 user_functions[name] = [[type, ''] , args, '']
256 identifiers_type[name] = "functype"
262 line = input.readline()[:-1]
265 while line != "</FUNCTION>":
266 if line[0:6] == "<NAME>" and line[-7:] == "</NAME>":
268 elif line[0:9] == "<RETURNS>" and line[-10:] == "</RETURNS>":
269 type = extractTypes(line[9:-10], name)
271 signature = signature + line
272 line = input.readline()[:-1]
274 args = extractArgs(signature, name)
275 functions[name] = [[type, ''] , args, '']
276 identifiers_type[name] = "function"
278 print "Parsing: libxml-decl.txt"
279 input = open('libxml-decl.txt')
281 line = input.readline()
285 if line == "<MACRO>":
287 elif line == "<ENUM>":
289 elif line == "<FUNCTION>":
291 elif line == "<STRUCT>":
293 elif line == "<TYPEDEF>":
295 elif line == "<USER_FUNCTION>":
296 parseStaticFunction()
297 elif len(line) >= 1 and line[0] == "<":
298 print "unhandled %s" % (line)
300 print "Parsed: %d macros. %d structs, %d typedefs, %d enums" % (
301 len(macros.keys()), len(structs.keys()), len(typedefs.keys()),
304 for enum in enums.keys():
305 consts = enums[enum][0]
307 print " %d variables, %d constants, %d functions and %d functypes" % (
308 len(variables.keys()), c, len(functions.keys()),
309 len(user_functions.keys()))
310 print "The functions manipulates %d different types" % (len(types.keys()))
311 print "The functions returns %d different types" % (len(ret_types.keys()))
313 ##################################################################
315 # Parsing: libxml-decl-list.txt
317 ##################################################################
322 global identifiers_file
325 line = input.readline()[:-1]
326 while line != "</SECTION>":
327 if line[0:6] == "<FILE>" and line[-7:] == "</FILE>":
331 line = input.readline()[:-1]
333 sections.append(name)
336 identifiers_file[token] = name
338 # Small transitivity for enum values
340 if enums.has_key(token):
341 for const in enums[token][0]:
342 identifiers_file[const] = name
344 print "Parsing: libxml-decl-list.txt"
345 input = open('libxml-decl-list.txt')
347 line = input.readline()
351 if line == "<SECTION>":
353 elif len(line) >= 1 and line[0] == "<":
354 print "unhandled %s" % (line)
356 print "Parsed: %d files %d identifiers" % (len(files), len(identifiers_file.keys()))
357 ##################################################################
360 # To enrich the existing info with extracted comments
362 ##################################################################
366 def insertParameterComment(id, name, value, is_param):
369 indexString(id, value)
370 if functions.has_key(id):
372 args = functions[id][1]
379 if found == 0 and name != '...':
380 print "Arg %s not found on function %s description" % (name, id)
383 ret = functions[id][0]
385 elif user_functions.has_key(id):
387 args = user_functions[id][1]
394 if found == 0 and name != '...':
395 print "Arg %s not found on functype %s description" % (name, id)
399 ret = user_functions[id][0]
401 elif macros.has_key(id):
411 args.append([name, value])
413 print "Return info for macro %s: %s" % (id, value)
414 # ret = macros[id][0]
417 print "lost specific comment %s: %s: %s" % (id, name, value)
419 nbcomments = nbcomments + 1
421 def insertComment(name, title, value, id):
425 indexString(name, value)
426 if functions.has_key(name):
427 functions[name][2] = value
429 elif typedefs.has_key(name):
430 typedefs[name] = value
432 elif macros.has_key(name):
433 macros[name][1] = value
435 elif variables.has_key(name):
436 variables[name][1] = value
438 elif structs.has_key(name):
439 structs[name] = value
441 elif enums.has_key(name):
442 enums[name][1] = value
444 elif user_functions.has_key(name):
445 user_functions[name][2] = value
446 return "user_function"
448 print "lost comment %s: %s" % (name, value)
450 nbcomments = nbcomments + 1
457 def analyzeXMLDescriptionRow(doc, desc, id, row):
458 if doc == None or desc == None or id == None or row == None:
460 ctxt = doc.xpathNewContext()
461 ctxt.setContextNode(row)
462 param = ctxt.xpathEval("entry[1]/parameter")
463 entries = ctxt.xpathEval("entry")
468 name = param[0].content
470 str = entries[1].content
471 str = string.replace(str, '\n', ' ')
472 str = string.replace(str, '\r', ' ')
473 str = string.replace(str, ' ', ' ')
474 str = string.replace(str, ' ', ' ')
475 str = string.replace(str, ' ', ' ')
476 while len(str) >= 1 and str[0] == ' ':
479 insertParameterComment(id, name, str, is_param)
483 def analyzeXMLDescription(doc, desc):
484 if doc == None or desc == None:
486 ctxt = doc.xpathNewContext()
487 ctxt.setContextNode(desc)
490 # get the function name
493 title = ctxt.xpathEval("title")[0].content
496 old_id = ctxt.xpathEval("string(title/anchor/@id)")
497 id = string.replace(title, '(', ' ');
498 id = string.replace(id, ')', ' ');
499 id = string.split(id) [0]
502 # get the function comments
505 paras = ctxt.xpathEval("para")
508 str = string.replace(str, '\n', ' ')
509 str = string.replace(str, '\r', ' ')
510 str = string.replace(str, ' ', ' ')
511 str = string.replace(str, ' ', ' ')
512 str = string.replace(str, ' ', ' ')
513 while len(str) >= 1 and str[0] == ' ':
516 comment = comment + str
518 insertComment(id, title, comment, old_id)
520 rows = ctxt.xpathEval("informaltable/tgroup/tbody/row")
522 analyzeXMLDescriptionRow(doc, desc, id, row)
524 def analyzeXMLDoc(doc):
527 ctxt = doc.xpathNewContext()
528 descriptions = ctxt.xpathEval("//refsect2")
529 print len(descriptions)
530 for description in descriptions:
531 analyzeXMLDescription(doc, description)
534 filenames = os.listdir("xml")
535 for filename in filenames:
538 doc = libxml2.parseFile("xml/" + filename)
541 xmlfiles = xmlfiles + 1
543 print "failed to parse XML description %s" % ("xml/" + filename)
546 print "Parsed: %d XML files collexting %d comments" % (xmlfiles, nbcomments)
548 ##################################################################
550 # Saving: libxml2-api.xml
552 ##################################################################
555 raw = string.replace(raw, '&', '&')
556 raw = string.replace(raw, '<', '<')
557 raw = string.replace(raw, '>', '>')
558 raw = string.replace(raw, "'", ''')
559 raw = string.replace(raw, '"', '"')
562 print "Saving XML description libxml2-api.xml"
563 output = open("libxml2-api.xml", "w")
564 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
565 output.write("<api name='libxml2'>\n")
566 output.write(" <files>\n")
567 for file in files.keys():
568 output.write(" <file name='%s'>\n" % file)
569 for symbol in files[file]:
570 output.write(" <exports symbol='%s'/>\n" % (symbol))
571 output.write(" </file>\n")
572 output.write(" </files>\n")
574 output.write(" <symbols>\n")
575 symbols=macros.keys()
576 for i in structs.keys(): symbols.append(i)
577 for i in variables.keys(): variables.append(i)
578 for i in typedefs.keys(): symbols.append(i)
579 for i in enums.keys():
581 for j in enums[i][0]:
583 for i in functions.keys(): symbols.append(i)
584 for i in user_functions.keys(): symbols.append(i)
589 # print "Symbol %s redefined" % (i)
593 if identifiers_type.has_key(i):
594 type = identifiers_type[i]
596 if identifiers_file.has_key(i):
597 file = identifiers_file[i]
601 output.write(" <%s name='%s'" % (type, i))
603 output.write(" file='%s'" % (file))
604 if type == "function":
606 (ret, args, doc) = functions[i]
607 if doc != None and doc != '':
608 output.write(" <info>%s</info>\n" % (escape(doc)))
609 if ret[1] != None and ret[1] != '':
610 output.write(" <return type='%s' info='%s'/>\n" % (
611 ret[0], escape(ret[1])))
613 if ret[0] != 'void' and\
614 ret[0][0:4] != 'void': # This one is actually a bug in GTK Doc
615 print "Description for return on %s is missing" % (i)
616 output.write(" <return type='%s'/>\n" % (ret[0]))
618 if arg[2] != None and arg[2] != '':
619 output.write(" <arg name='%s' type='%s' info='%s'/>\n" %
620 (arg[1], arg[0], escape(arg[2])))
623 print "Description for %s on %s is missing" % (arg[1], i)
624 output.write(" <arg name='%s' type='%s'/>\n" % (
626 output.write(" </%s>\n" % (type));
627 elif type == 'functype':
629 (ret, args, doc) = user_functions[i]
630 if doc != None and doc != '':
631 output.write(" <info>%s</info>\n" % (escape(doc)))
632 if ret[1] != None and ret[1] != '':
633 output.write(" <return type='%s' info='%s'/>\n" % (
634 ret[0], escape(ret[1])))
636 if ret[0] != 'void' and\
637 ret[0][0:4] != 'void': # This one is actually a bug in GTK Doc
638 print "Description for return on %s is missing" % (i)
639 output.write(" <return type='%s'/>\n" % (ret[0]))
641 if arg[2] != None and arg[2] != '':
642 output.write(" <arg name='%s' type='%s' info='%s'/>\n" %
643 (arg[1], arg[0], escape(arg[2])))
646 print "Description for %s on %s is missing" % (arg[1], i)
647 output.write(" <arg name='%s' type='%s'/>\n" % (
649 output.write(" </%s>\n" % (type));
650 elif type == 'macro':
652 if macros[i][1] != None and macros[i][1] != '':
653 output.write(" <info>%s</info>\n" % (escape(macros[i][1])))
655 print "Description for %s is missing" % (i)
658 if arg[1] != None and arg[1] != '':
659 output.write(" <arg name='%s' info='%s'/>\n" %
660 (arg[0], escape(arg[1])))
662 print "Description for %s on %s is missing" % (arg[1], i)
663 output.write(" <arg name='%s'/>\n" % (arg[0]))
664 output.write(" </%s>\n" % (type));
665 elif type == 'struct':
666 if structs[i] != None and structs[i] != '':
667 output.write(" info='%s'/>\n" % (escape(structs[i])))
669 output.write("/>\n");
670 elif type == 'variable':
671 if variables[i][1] != None and variables[i][1] != '':
672 output.write(" info='%s'/>\n" % (escape(variables[i])))
674 output.write("/>\n");
675 elif type == 'typedef':
676 if typedefs[i] != None and typedefs[i] != '':
677 output.write(" info='%s'/>\n" % (escape(typedefs[i])))
679 output.write("/>\n");
681 output.write("/>\n");
683 print "Symbol %s not found in identifiers list" % (i)
684 output.write(" </symbols>\n")
685 output.write("</api>\n")
687 print "generated XML for %d symbols" % (len(symbols))
689 ##################################################################
691 # Saving: libxml2-api.xml
693 ##################################################################
696 for file in files.keys():
697 for symbol in files[file]:
702 target = string.upper(ids[id])
704 target = string.upper(id)
706 module = string.lower(hash[id])
709 file = 'html/libxml-' + module + '.html';
710 return file + '#' + target
712 print "Saving XML crossreferences libxml2-refs.xml"
713 output = open("libxml2-refs.xml", "w")
714 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
715 output.write("<apirefs name='libxml2'>\n")
716 output.write(" <references>\n")
720 output.write(" <reference name='%s' href='%s'/>\n" % (id, link(id)))
721 output.write(" </references>\n")
722 output.write(" <alpha>\n")
729 output.write(" </letter>\n")
731 output.write(" <letter name='%s'>\n" % (letter))
732 output.write(" <ref name='%s'/>\n" % (id))
734 output.write(" </letter>\n")
735 output.write(" </alpha>\n")
736 output.write(" <constructors>\n")
737 typ = ret_types.keys()
740 if type == '' or type == 'void' or type == "int" or type == "char *" or \
741 type == "const char *" :
743 output.write(" <type name='%s'>\n" % (type))
744 ids = ret_types[type]
746 output.write(" <ref name='%s'/>\n" % (id))
747 output.write(" </type>\n")
748 output.write(" </constructors>\n")
749 output.write(" <functions>\n")
753 if type == '' or type == 'void' or type == "int" or type == "char *" or \
754 type == "const char *" :
756 output.write(" <type name='%s'>\n" % (type))
759 output.write(" <ref name='%s'/>\n" % (id))
760 output.write(" </type>\n")
761 output.write(" </functions>\n")
763 output.write(" <files>\n")
767 output.write(" <file name='%s'>\n" % (file))
768 for id in files[file]:
769 output.write(" <ref name='%s'/>\n" % (id))
770 output.write(" </file>\n")
771 output.write(" </files>\n")
773 output.write(" <index>\n")
781 if len(index[id]) > 30:
784 if letter == None or count > 200:
786 output.write(" </letter>\n")
787 output.write(" </chunk>\n")
789 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
790 output.write(" <chunk name='chunk%s'>\n" % (chunk))
794 output.write(" </letter>\n")
796 output.write(" <letter name='%s'>\n" % (letter))
797 output.write(" <word name='%s'>\n" % (id))
801 for token in index[id]:
805 output.write(" <ref name='%s'/>\n" % (token))
807 output.write(" </word>\n")
809 output.write(" </letter>\n")
810 output.write(" </chunk>\n")
811 output.write(" <chunks>\n")
813 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
814 ch[0], ch[1], ch[2]))
815 output.write(" </chunks>\n")
816 output.write(" </index>\n")
818 output.write("</apirefs>\n")