# gtkmm - DocsParser module # # Copyright 2001 Free Software Foundation # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Based on XML::Parser tutorial found at http://www.devshed.com/Server_Side/Perl/PerlXML/PerlXML1/page1.html # This module isn't properly Object Orientated because the XML Parser needs global callbacks. package DocsParser; use XML::Parser; use strict; use warnings; use feature 'state'; use Util; use Function; use GtkDefs; use Object; BEGIN { use Exporter (); our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); # set the version for version checking $VERSION = 1.00; @ISA = qw(Exporter); @EXPORT = ( ); %EXPORT_TAGS = ( ); # your exported package globals go here, # as well as any optionally exported functions @EXPORT_OK = ( ); } our @EXPORT_OK; ##################################### use strict; use warnings; ##################################### $DocsParser::CurrentFile = ""; $DocsParser::refAppendTo = undef; # string reference to store the data into $DocsParser::currentParam = undef; $DocsParser::objCurrentFunction = undef; #Function %DocsParser::hasharrayFunctions = (); #Function elements %DocsParser::type_names = (); # Type names (e.g. enums) with non-standard C-to-C++ translation. %DocsParser::enumerator_name_prefixes = (); # Enumerator name prefixes with non-standard C-to-C++ translation. %DocsParser::enumerator_names = (); # Enumerator names with non-standard C-to-C++ translation. $DocsParser::commentStart = " /** "; $DocsParser::commentMiddleStart = " * "; $DocsParser::commentEnd = " */"; sub read_defs($$$) { my ($path, $filename, $filename_override) = @_; my $objParser = new XML::Parser(ErrorContext => 0); $objParser->setHandlers(Start => \&parse_on_start, End => \&parse_on_end, Char => \&parse_on_cdata); # C documentation: $DocsParser::CurrentFile = "$path/$filename"; if ( ! -r $DocsParser::CurrentFile) { print STDERR "DocsParser.pm: Warning: Can't read file \"" . $DocsParser::CurrentFile . "\".\n"; return; } # Parse eval { $objParser->parsefile($DocsParser::CurrentFile) }; if( $@ ) { $@ =~ s/at \/.*?$//s; print STDERR "\nError in \"" . $DocsParser::CurrentFile . "\":$@\n"; return; } # C++ override documentation: $DocsParser::CurrentFile = $path . '/' . $filename_override; # It is not an error if the documentation override file does not exist. return unless (-r $DocsParser::CurrentFile); # Parse eval { $objParser->parsefile($DocsParser::CurrentFile) }; if( $@ ) { $@ =~ s/at \/.*?$//s; print STDERR "\nError in \"" . $DocsParser::CurrentFile . "\":$@"; return; } } sub parse_on_start($$%) { my ($objParser, $tag, %attr) = @_; $tag = lc($tag); if($tag eq "function" or $tag eq "signal" or $tag eq "property" or $tag eq "enum") { if(defined $DocsParser::objCurrentFunction) { $objParser->xpcroak( "\nClose a function, signal, property or enum tag before you open another one."); } my $functionName = $attr{name}; # Change signal name from Class::a-signal-name to Class::a_signal_name # and property name from Class:a-property-name to Class:a_property_name $functionName =~ s/-/_/g if ($tag eq "signal" or $tag eq "property"); #Reuse existing Function, if it exists: #(For instance, if this is the override parse) $DocsParser::objCurrentFunction = $DocsParser::hasharrayFunctions{$functionName}; if(!$DocsParser::objCurrentFunction) { #Make a new one if necessary: $DocsParser::objCurrentFunction = Function::new_empty(); # The idea is to change the policy a bit: # If a function is redefined in a later parsing run only values which are redefined # will be overwritten. For the name this is trivial. The description is simply rewritten. # Same goes for the return description and the class mapping. Only exception is the # parameter list. Everytime we enter a tag the list is emptied again. $$DocsParser::objCurrentFunction{name} = $functionName; $$DocsParser::objCurrentFunction{description} = ""; $$DocsParser::objCurrentFunction{param_names} = []; $$DocsParser::objCurrentFunction{param_descriptions} = (); $$DocsParser::objCurrentFunction{return_description} = ""; $$DocsParser::objCurrentFunction{mapped_class} = ""; } } elsif($tag eq "parameters") { $$DocsParser::objCurrentFunction{param_names} = []; $$DocsParser::objCurrentFunction{param_descriptions} = (); } elsif($tag eq "parameter") { $DocsParser::currentParam = $attr{name}; $$DocsParser::objCurrentFunction{param_descriptions}->{$DocsParser::currentParam} = ""; } elsif($tag eq "description") { $$DocsParser::objCurrentFunction{description} = ""; # Set destination for parse_on_cdata(). $DocsParser::refAppendTo = \$$DocsParser::objCurrentFunction{description}; } elsif($tag eq "parameter_description") { # Set destination for parse_on_cdata(). my $param_desc = \$$DocsParser::objCurrentFunction{param_descriptions}; $DocsParser::refAppendTo = \$$param_desc->{$DocsParser::currentParam}; } elsif($tag eq "return") { $$DocsParser::objCurrentFunction{return_description} = ""; # Set destination for parse_on_cdata(). $DocsParser::refAppendTo = \$$DocsParser::objCurrentFunction{return_description}; } elsif($tag eq "mapping") { $$DocsParser::objCurrentFunction{mapped_class} = $attr{class}; } elsif($tag eq "substitute_type_name") { $DocsParser::type_names{$attr{from}} = $attr{to}; } elsif($tag eq "substitute_enumerator_name") { if (exists $attr{from_prefix}) { $DocsParser::enumerator_name_prefixes{$attr{from_prefix}} = $attr{to_prefix}; } if (exists $attr{from}) { $DocsParser::enumerator_names{$attr{from}} = $attr{to}; } } elsif($tag ne "root") { $objParser->xpcroak("\nUnknown tag \"$tag\"."); } } sub parse_on_end($$) { my ($parser, $tag) = @_; # Clear destination for parse_on_cdata(). $DocsParser::refAppendTo = undef; $tag = lc($tag); if($tag eq "function" or $tag eq "signal" or $tag eq "property" or $tag eq "enum") { # Store the Function structure in the array: my $functionName = $$DocsParser::objCurrentFunction{name}; $DocsParser::hasharrayFunctions{$functionName} = $DocsParser::objCurrentFunction; $DocsParser::objCurrentFunction = undef; } elsif($tag eq "parameter") { # and means the same. if($DocsParser::currentParam eq "returns") { my $param_descriptions = \$$DocsParser::objCurrentFunction{param_descriptions}; my $return_description = \$$DocsParser::objCurrentFunction{return_description}; $$return_description = delete $$param_descriptions->{"returns"}; } else { # Append to list of parameters. push(@{$$DocsParser::objCurrentFunction{param_names}}, $DocsParser::currentParam); } $DocsParser::currentParam = undef; } } sub parse_on_cdata($$) { my ($parser, $data) = @_; if(defined $DocsParser::refAppendTo) { # Dispatch $data to the current destination string. $$DocsParser::refAppendTo .= $data; } } sub lookup_enum_documentation($$$$$$$) { my ($c_enum_name, $cpp_enum_name, $indent, $ref_subst_in, $ref_subst_out, $deprecation_docs, $newin) = @_; my $objFunction = $DocsParser::hasharrayFunctions{$c_enum_name}; if(!$objFunction) { #print "DocsParser.pm: Warning: enum not found: $enum_name\n"; return "" } my $docs = ""; my @param_names = @{$$objFunction{param_names}}; my $param_descriptions = \$$objFunction{param_descriptions}; # Append the param docs first so that the enum description can come last and # the possible flag docs that the m4 _ENUM() macro appends goes in the right # place. foreach my $param (@param_names) { my $desc = $$param_descriptions->{$param}; # Remove the initial prefix in the name of the enum constant. Would be something like GTK_. $param =~ s/\b[A-Z]+_//; # Now apply custom substitutions. for(my $i = 0; $i < scalar(@$ref_subst_in); ++$i) { $param =~ s/$$ref_subst_in[$i]/$$ref_subst_out[$i]/; $desc =~ s/$$ref_subst_in[$i]/$$ref_subst_out[$i]/; } # Skip this element, if its name has been deleted. next if($param eq ""); $param =~ s/([a-zA-Z0-9]*(_[a-zA-Z0-9]+)*)_?/$1/g; if(length($desc) > 0) { # Chop off leading and trailing whitespace. $desc =~ s/^\s+//; $desc =~ s/\s+$//; $desc .= '.' unless($desc =~ /(?:^|\.)$/); $docs .= "\@var $cpp_enum_name ${param}\n\u${desc}\n\n"; # \u = Convert next char to uppercase } } # Replace @newin in the enum description, but don't in the element descriptions. my $description = "\@enum $cpp_enum_name\n"; $description .= $$objFunction{description}; DocsParser::convert_docs_to_cpp($c_enum_name, \$description); DocsParser::replace_or_add_newin(\$description, $newin); # Add note about deprecation if we have specified that in our _WRAP_ENUM(), # _WRAP_ENUM_DOCS_ONLY() or _WRAP_GERROR() call: if($deprecation_docs ne "") { $description .= "\n\@deprecated $deprecation_docs\n"; } # Append the enum description docs. DocsParser::convert_docs_to_cpp($c_enum_name, \$docs); $docs .= "\n\n$description"; DocsParser::add_m4_quotes(\$docs); # Escape the space after "i.e." or "e.g." in the brief description. $docs =~ s/^([^.]*\b(?:i\.e\.|e\.g\.))\s/$1\\ /; remove_example_code($c_enum_name, \$docs); # Add indentation and an asterisk on all lines except the first. # $docs does not contain leading "/**" and trailing "*/". $docs =~ s/\n/\n${indent}\* /g; return $docs; } # $strCommentBlock lookup_documentation($strFunctionName, $deprecation_docs, # $newin, $objCppfunc, $errthrow, $voidreturn) # The parameters from objCppfunc are optional. If objCppfunc is passed, it is used for # - deciding if the final C parameter shall be omitted if the C++ method # has a slot parameter, # - converting C parameter names to C++ parameter names in the documentation, # if they differ, # - deciding if the @return section shall be omitted. sub lookup_documentation($$$;$$$) { my ($functionName, $deprecation_docs, $newin, $objCppfunc, $errthrow, $voidreturn) = @_; my $objFunction = $DocsParser::hasharrayFunctions{$functionName}; if(!$objFunction) { #print "DocsParser.pm: Warning: function not found: $functionName\n"; return "" } my $text = $$objFunction{description}; if(length($text) eq 0) { print "DocsParser.pm: Warning: No C docs for: \"$functionName\"\n"; } DocsParser::convert_docs_to_cpp($functionName, \$text); DocsParser::replace_or_add_newin(\$text, $newin); # A blank line, marking the end of a paragraph, is needed after @newin. # Most @newins are at the end of a function description. $text .= "\n"; # Add note about deprecation if we have specified that in our _WRAP_METHOD(), # _WRAP_SIGNAL(), _WRAP_PROPERTY() or _WRAP_CHILD_PROPERTY() call: if($deprecation_docs ne "") { $text .= "\n\@deprecated $deprecation_docs\n"; } my %param_name_mappings = DocsParser::append_parameter_docs($objFunction, \$text, $objCppfunc); unless ((defined($objCppfunc) && $$objCppfunc{rettype} eq "void") || $voidreturn) { DocsParser::append_return_docs($objFunction, \$text); } DocsParser::add_throws(\$text, $errthrow); # Convert C parameter names to C++ parameter names where they differ. foreach my $key (keys %param_name_mappings) { $text =~ s/\@(param|a) $key\b/\@$1 $param_name_mappings{$key}/g; } # Remove leading and trailing white space. $text = string_trim($text); DocsParser::add_m4_quotes(\$text); # Escape the space after "i.e." or "e.g." in the brief description. $text =~ s/^([^.]*\b(?:i\.e\.|e\.g\.))\s/$1\\ /; remove_example_code($functionName, \$text); # Convert to Doxygen-style comment. $text =~ s/\n/\n${DocsParser::commentMiddleStart}/g; $text = $DocsParser::commentStart . $text; $text .= "\n${DocsParser::commentEnd}\n"; return $text; } # void convert_value_to_cpp(\$text) # Converts e.g. a property's default value. sub convert_value_to_cpp($) { my ($text) = @_; $$text =~ s"\bFALSE\b"false"g; $$text =~ s"\bTRUE\b"true"g; $$text =~ s"\bNULL\b"nullptr"g; # Enumerator names $$text =~ s/\b([A-Z]+)_([A-Z\d_]+)\b/&DocsParser::substitute_enumerator_name($1, $2)/eg; } # void remove_example_code($obj_name, \$text) # Removes example code from the text of docs (passed by reference). sub remove_example_code($$) { my ($obj_name, $text) = @_; # Remove C example code. my $example_removals = ($$text =~ s".*?"[C example ellipted]"sg); $example_removals += ($$text =~ s".*?"\n[C example ellipted]"sg); $example_removals += ($$text =~ s"\|\[.*?]\|"\n[C example ellipted]"sg); # See "MS Visual Studio" comment in gmmproc.in. print STDERR "gmmproc, $main::source, $obj_name: Example code discarded.\n" if ($example_removals); } sub add_m4_quotes($) { my ($text) = @_; # __BT__ and __FT__ are M4 macros defined in the base.m4 file that produce # a "`" and a "'" resp. without M4 errors. my %m4_quotes = ( "`" => "'__BT__`", "'" => "'__FT__`", ); $$text =~ s/([`'])/$m4_quotes{$1}/g; $$text = "`" . $$text . "'"; } # The final objCppfunc is optional. If passed, it is used to determine # if the final C parameter should be omitted if the C++ method has a # slot parameter. It is also used for converting C parameter names to # C++ parameter names in the documentation, if they differ. sub append_parameter_docs($$;$) { my ($obj_function, $text, $objCppfunc) = @_; my @docs_param_names = @{$$obj_function{param_names}}; my $param_descriptions = \$$obj_function{param_descriptions}; my $defs_method = GtkDefs::lookup_method_dont_mark($$obj_function{name}); my @c_param_names = $defs_method ? @{$$defs_method{param_names}} : @docs_param_names; # The information in # $obj_function comes from the docs.xml file, # $objCppfunc comes from _WRAP_METHOD() or _WRAP_SIGNAL() in the .hg file, # $defs_method comes from the methods.defs file. # Ideally @docs_param_names and @c_param_names are identical. # In the real world the parameters in the C documentation are sometimes not # listed in the same order as the arguments in the C function declaration. # We try to handle that case to some extent. If no argument name is misspelt # in either the docs or the C function declaration, it usually succeeds for # methods, but not for signals. For signals there is no C function declaration # to compare with. If the docs of some method or signal get badly distorted # due to imperfections in the C docs, and it's difficult to get the C docs # corrected, correct docs can be added to the docs_override.xml file. # Skip first param if this is a signal. if ($$obj_function{name} =~ /\w+::/) { shift(@docs_param_names); shift(@c_param_names); } # Skip first parameter if this is a non-static method. elsif (defined($objCppfunc)) { if (!$$objCppfunc{static}) { shift(@docs_param_names); shift(@c_param_names); } } # The second alternative is for use with method-mappings meaning: # this function is mapped into this Gtk::class. elsif (($defs_method && $$defs_method{class} ne "") || $$obj_function{mapped_class} ne "") { shift(@docs_param_names); shift(@c_param_names); } # Skip the last param if there is a slot because it would be a # gpointer user_data parameter. if (defined($objCppfunc) && $$objCppfunc{slot_name}) { pop(@docs_param_names); pop(@c_param_names); } # Skip the last param if it's an error output param. if (scalar @docs_param_names && $docs_param_names[-1] eq "error") { pop(@docs_param_names); pop(@c_param_names); } my $cpp_param_names; my $param_mappings; my $out_param_index = 1000; # No method has that many arguments, hopefully. if (defined($objCppfunc)) { $cpp_param_names = $$objCppfunc{param_names}; $param_mappings = $$objCppfunc{param_mappings}; # C name -> C++ index if (exists $$param_mappings{OUT}) { $out_param_index = $$param_mappings{OUT}; } } my %param_name_mappings; # C name -> C++ name for (my $i = 0; $i < @docs_param_names; ++$i) { my $param = $docs_param_names[$i]; my $desc = $$param_descriptions->{$param}; my $param_without_trailing_underscore = $param; $param_without_trailing_underscore =~ s/([a-zA-Z0-9]*(_[a-zA-Z0-9]+)*)_?/$1/g; if (defined($objCppfunc)) { # If the C++ name is not equal to the C name, mark that the name # shall be changed in the documentation. my $cpp_name = $param; if (exists $$param_mappings{$param}) { # Rename and/or reorder declaration ({c_name} or {.}) in _WRAP_*(). $cpp_name = $$cpp_param_names[$$param_mappings{$param}]; } elsif ($c_param_names[$i] eq $param) { # Location in docs coincides with location in C declaration. my $cpp_index = $i; $cpp_index++ if ($i >= $out_param_index); $cpp_name = $$cpp_param_names[$cpp_index]; } else { # Search for the param in the C declaration. for (my $j = 0; $j < @c_param_names; ++$j) { if ($c_param_names[$j] eq $param) { my $cpp_index = $j; $cpp_index++ if ($j >= $out_param_index); $cpp_name = $$cpp_param_names[$cpp_index]; last; } } } if ($cpp_name ne $param) { $param_name_mappings{$param_without_trailing_underscore} = $cpp_name; } } elsif ($param eq "callback") { # Deal with callback parameters converting the docs to a slot # compatible format. $param_name_mappings{$param} = "slot"; } DocsParser::convert_docs_to_cpp($$obj_function{name}, \$desc); if(length($desc) > 0) { $desc .= '.' unless($desc =~ /(?:^|\.)$/); $$text .= "\n\@param ${param_without_trailing_underscore} \u${desc}"; } } return %param_name_mappings; } sub append_return_docs($$) { my ($obj_function, $text) = @_; my $desc = $$obj_function{return_description}; DocsParser::convert_docs_to_cpp($$obj_function{name}, \$desc); $desc =~ s/\.$//; $$text .= "\n\@return \u${desc}." unless($desc eq ""); } sub convert_docs_to_cpp($$) { my ($doc_func, $text) = @_; # Chop off leading and trailing whitespace. $$text =~ s/^\s+//; $$text =~ s/\s+$//; # Convert C documentation to C++. DocsParser::remove_c_memory_handling_info($text); DocsParser::convert_tags_to_doxygen($text); DocsParser::substitute_identifiers($doc_func, $text); $$text =~ s/\bX\s+Window\b/X \%Window/g; $$text =~ s/\bWindow\s+manager/\%Window manager/g; } sub remove_c_memory_handling_info($) { my ($text) = @_; # These C memory handling functions are removed, in most cases: # g_free, g_strfreev, g_list_free, g_slist_free my $mem_funcs = '\\bg_(?:free|strfreev|s?list_free)\\b'; return if ($$text !~ /$mem_funcs/); # The text contains $mem_funcs. That's usually not relevant to C++ programmers. # Try to remove irrelevant text without removing too much. # This function is called separately for the description of each method, # parameter and return value. Let's assume that only one removal is necessary. # Don't modify the text, if $mem_funcs is part of example code. # remove_c_memory_handling_info() is called before remove_example_code(). return if ($$text =~ m"(?:||\|\[).*?$mem_funcs.*?(?:||]\|)"s); # First try to remove the sentence containing $mem_funcs. # For simplicity, assume that a sentence is any string ending with a period. my $tmp = $$text; if ($tmp =~ s/[^.]*$mem_funcs.*?(?:\.|$)//s) { if ($tmp =~ /\w/) { # A sentence contains $mem_funcs, and it's not the only sentence in the text. # Remove that sentence. $$text = $tmp; return; } } $tmp = $$text; if ($tmp =~ s/[^.,]*$mem_funcs.*?(?:\.|,|$)//s) { if ($tmp =~ /\w/) { # A clause, delimited by comma or period, contains $mem_funcs, # and it's not the only clause in the text. Remove that clause. $tmp =~ s/,\s*$/./; $$text = $tmp; return; } } # Last attempt. If this doesn't remove anything, don't modify the text. $$text =~ s/ that (?:must|should) be freed with g_free(?:\(\))?//; } sub convert_tags_to_doxygen($) { my ($text) = @_; for($$text) { # Replace format tags. s"<(/?)(?:emphasis|replaceable)>"<$1em>"g; s"<(/?)(?:constant|envar|filename|function|guimenuitem|literal|option|structfield|varname)>"<$1tt>"g; # Some argument names are suffixed by "_" -- strip this. # gtk-doc uses @thearg, but doxygen uses @a thearg. s" ?\@([a-zA-Z0-9]*(_[a-zA-Z0-9]+)*)_?\b" \@a $1"g; # Don't convert Doxygen's $throw, @throws and @param, so these can be used # in the docs_override.xml. # Also don't convert @enum and @var which are used for enum documentation. s" \@a (throws?|param|enum|var)\b" \@$1"g; s"^Note ?\d?: "\@note "mg; s"""g; s"""g; # Remove all link tags. s"""g; # Remove all para tags and simpara tags (simple paragraph). s"""g; # Convert , and to something that # Doxygen understands. s"\n?(.*?)\n?"&DocsParser::convert_simplelist($1)"esg; s"\n?(.*?)\n?"&DocsParser::convert_itemizedlist($1)"esg; s"\n?(.*?)\n?"&DocsParser::convert_variablelist($1)"esg; # Use our Doxygen @newin alias. # Accept "Since" with or without a following colon. # Require the Since clause to be # - at the end of the string, # - at the end of a line and followed by a blank line, or # - followed by "Deprecated". # If none of these requirements is met, "Since" may be embedded inside # a function description, referring to only a part of the description. # See e.g. g_date_time_format() and gdk_cursor_new_from_pixbuf(). # Doxygen assumes that @newin is followed by a paragraph that describes # what is new, but we don't use it that way. my $first_part = '\bSince[:\h]\h*(\d+)\.(\d+)'; # \h == [\t ] (horizontal whitespace) my $last_part = '\.?(\s*$|\h*\n\h*\n|\s+Deprecated)'; s/$first_part\.(\d+)$last_part/\@newin{$1,$2,$3}$4/g; s/$first_part$last_part/\@newin{$1,$2}$3/g; # Doxygen is too dumb to handle — s"—" \@htmlonly—\@endhtmlonly "g; s"\%?\bFALSE\b"false"g; s"\%?\bTRUE\b"true"g; s"\%?\bNULL\b"nullptr"g; s"#?\bgboolean\b"bool"g; s"#?\bg(int|short|long)\b"$1"g; s"#?\bgu(int|short|long)\b"unsigned $1"g; # Escape all backslashes, except in \throw, \throws and \param, which can # be Doxygen commands in the docs_override.xml. s"\\"\\\\"g; s"\\\\(throws?|param)\b"\\$1"g } } # void replace_or_add_newin(\$text, $newin) # If $newin is not empty, replace the version numbers in an existing @newin # Doxygen alias, or add one if there is none. sub replace_or_add_newin($$) { my ($text, $newin) = @_; return if ($newin eq ""); if (!($$text =~ s/\@newin\{[\d,]+\}/\@newin{$newin}/)) { $$text .= "\n\n\@newin{$newin}"; } } # void add_throws(\$text, $errthrow) # If $errthrow is defined and not empty, and $$text does not contain a @throw, # @throws or @exception Doxygen command, add one or more @throws commands. sub add_throws($$) { my ($text, $errthrow) = @_; return if (!defined($errthrow) or $errthrow eq ""); if (!($$text =~ /[\@\\](throws?|exception)\b/)) { # Each comma, not preceded by backslash, creates a new @throws command. $errthrow =~ s/([^\\]),\s*/$1\n\@throws /g; $errthrow =~ s/\\,/,/g; # Delete backslash before comma $$text .= "\n\n\@throws $errthrow"; } } # Convert tags to a list of newline-separated elements. sub convert_simplelist($) { my ($text) = @_; $text =~ s"(.*?)(\n?)(\n?)"$1
\n"sg; return "
\n" . $text . "
\n"; } # Convert tags to Doxygen format. sub convert_itemizedlist($) { my ($text) = @_; $text =~ s"(.*?)(\n?)(\n?)"- $1\n"sg; return $text; } # Convert tags to an HTML definition list. sub convert_variablelist($) { my ($text) = @_; $text =~ s"\n?""g; $text =~ s"<(/?)term>"<$1dt>"g; $text =~ s"<(/?)listitem>"<$1dd>"g; return "
\n" . $text . "
\n"; } sub substitute_identifiers($$) { my ($doc_func, $text) = @_; for($$text) { # TODO: handle more than one namespace # Convert property names to C++. # The standard (and correct) gtk-doc way of referring to properties. s/(#[A-Z]\w+):([a-z\d-]+)/my $name = "$1::property_$2()"; $name =~ s"-"_"g; "$name";/ge; # This is an incorrect format but widely used so correctly treat as a # property. s/(\s)::([a-z\d-]+)(\s+property)/my $name = "$1property_$2()$3"; $name =~ s"-"_"g; "$name";/ge; # This one catches properties written in the gtk-doc block as for example # '#GtkActivatable::related-action property'. The correct way to write it # would be 'GtkActivatable:related-action' (with a single colon and not # two because the double colons are specifically for signals -- see the # gtk-doc docs: # http://developer.gnome.org/gtk-doc-manual/unstable/documenting_symbols.html.en) # but a few are written with the double colon in the gtk+ docs so this # protects against those errors. s/([A-Z]\w+)::([a-z\d-]+)(\s+property)/my $name = "$1::property_$2()$3"; $name =~ s"-"_"g; "$name";/ge; # Convert signal names to C++. s/(^|\s)::([a-z\d-]+)(\(\))*([^:\w]|$)/my $name = "$1signal_$2()$4"; $name =~ s"-"_"g; "$name";/ge; s/(#[A-Z]\w+)::([a-z\d-]+)(\(\))*([^:\w]|$)/my $name = "$1::signal_$2()$4"; $name =~ s"-"_"g; "$name";/ge; # Type names s/[#%]([A-Z][a-z]*)([A-Z][A-Za-z]+)\b/&DocsParser::substitute_type_name($1, $2)/eg; # Enumerator names s/[#%]([A-Z]+)_([A-Z\d_]+)\b/&DocsParser::substitute_enumerator_name($1, $2)/eg; s/\bG:://g; #Rename G::Something to Something. # Substitute callback types to slot types. s/(\b\w+)Callback/Slot$1/g; # Replace C function names with C++ counterparts. s/\b([a-z]+_[a-z][a-z\d_]+) ?\(\)/&DocsParser::substitute_function($doc_func, $1)/eg; } } sub substitute_type_name($$) { my ($module, $name) = @_; my $c_name = $module . $name; if (exists $DocsParser::type_names{$c_name}) { return $DocsParser::type_names{$c_name}; } #print "DocsParser.pm: Assuming the type $c_name shall become " . (($module eq "G") ? "" : "${module}::") . "$name.\n"; return $module . "::" . $name; } sub substitute_enumerator_name($$) { state $first_call = 1; state @sorted_keys; my ($module, $name) = @_; my $c_name = $module . "_" . $name; if (exists $DocsParser::enumerator_names{$c_name}) { return $DocsParser::enumerator_names{$c_name}; } if ($first_call) { # Sort only once, on the first call. # "state @sorted_keys = ...;" is not possible. Only a scalar variable # can have a one-time assignment in its defining "state" statement. $first_call = 0; @sorted_keys = reverse sort keys(%DocsParser::enumerator_name_prefixes); } # This is a linear search through the keys of %DocsParser::enumerator_name_prefixes. # It's inefficient if %DocsParser::enumerator_name_prefixes contains many values. # # If one key is part of another key (e.g. G_REGEX_MATCH_ and G_REGEX_), # search for a match against the longer key before the shorter key. foreach my $key (@sorted_keys) { if ($c_name =~ m/^$key/) { # $c_name begins with $key. Replace that part of $c_name with the C++ analogue. $c_name =~ s/^$key/$DocsParser::enumerator_name_prefixes{$key}/; return $c_name; # Now it's the C++ name. } } # Don't apply the default substitution to these module names. # They are not really modules. if (grep {$module eq $_} qw(HAS NO O SO AF)) { return $c_name; } my $cxx_name = (($module eq "G") ? "" : (ucfirst(lc($module)) . "::")) . $name; #print "DocsParser.pm: Assuming the enumerator $c_name shall become $cxx_name.\n"; return $cxx_name; } sub substitute_function($$) { my ($doc_func, $name) = @_; if(my $defs_method = GtkDefs::lookup_method_dont_mark($name)) { if(my $defs_object = DocsParser::lookup_object_of_method($$defs_method{class}, $name)) { my $module = $$defs_object{module}; my $class = $$defs_object{name}; DocsParser::build_method_name($doc_func, $module, $class, \$name); } else { print STDERR "Documentation: Class/Namespace for $name not found\n"; } } else { # Not perfect, but better than nothing. $name =~ s/^g_/Glib::/; } return $name . "()"; } sub lookup_object_of_method($$) { my ($object, $name) = @_; if($object ne "") { my $result = GtkDefs::lookup_object($object); # We already know the C object name, because $name is a non-static method. if(defined($result) and ($result ne "")) { return $result; } else { print "DocsParser.pm: lookup_object_of_method(): Warning: GtkDefs::lookup_object() failed for object name=" . $object . ", function name=" . $name . "\n"; print " This may be a missing define-object in a *.defs file.\n" } } my @parts = split(/_/, $name); pop(@parts); # (gtk, foo, bar) -> (Gtk, Foo, Bar) foreach(@parts) { $_ = (length > 2) ? ucfirst : uc; } # Do a bit of try'n'error. while($#parts >= 1) { my $try = join("", @parts); if(my $defs_object = GtkDefs::lookup_object($try)) { return $defs_object; } pop(@parts); } return undef; } sub build_method_name($$$$) { my ($doc_func, $module, $class, $name) = @_; my $prefix = $module . $class; $prefix =~ s/([a-z])([A-Z])/$1_$2/g; $prefix = lc($prefix) . '_'; if($$name =~ m/^\Q$prefix\E/) { my $scope = ""; $scope = "${module}::${class}::" unless($doc_func =~ m/^\Q$prefix\E/); substr($$name, 0, length($prefix)) = $scope; } } 1; # indicate proper module load.