| #!/usr/bin/perl |
| |
| # |
| #//===----------------------------------------------------------------------===// |
| #// |
| #// The LLVM Compiler Infrastructure |
| #// |
| #// This file is dual licensed under the MIT and the University of Illinois Open |
| #// Source Licenses. See LICENSE.txt for details. |
| #// |
| #//===----------------------------------------------------------------------===// |
| # |
| |
| use strict; |
| use warnings; |
| |
| use File::Glob ":glob"; |
| use Encode qw{ encode }; |
| |
| use FindBin; |
| use lib "$FindBin::Bin/lib"; |
| |
| use tools; |
| |
| our $VERSION = "0.04"; |
| my $escape = qr{%}; |
| my $placeholder = qr{(\d)\$(s|l?[du])}; |
| my $target_os; |
| |
| my $sections = |
| { |
| meta => { short => "prp" }, # "prp" stands for "property". |
| strings => { short => "str" }, |
| formats => { short => "fmt" }, |
| messages => { short => "msg" }, |
| hints => { short => "hnt" }, |
| }; |
| my @sections = qw{ meta strings formats messages hints }; |
| # Assign section properties: long name, set number, base number. |
| map( $sections->{ $sections[ $_ ] }->{ long } = $sections[ $_ ], ( 0 .. @sections - 1 ) ); |
| map( $sections->{ $sections[ $_ ] }->{ set } = ( $_ + 1 ), ( 0 .. @sections - 1 ) ); |
| map( $sections->{ $sections[ $_ ] }->{ base } = ( ( $_ + 1 ) << 16 ), ( 0 .. @sections - 1 ) ); |
| |
| # Properties of Meta section. |
| my @properties = qw{ Language Country LangId Version Revision }; |
| |
| |
| sub _generate_comment($$$) { |
| |
| my ( $data, $open, $close ) = @_; |
| my $bulk = |
| $open . " Do not edit this file! " . $close . "\n" . |
| $open . " The file was generated from " . get_file( $data->{ "%meta" }->{ source } ) . |
| " by " . $tool . " on " . localtime() . ". " . $close . "\n"; |
| return $bulk; |
| |
| }; # sub _generate_comment |
| |
| |
| sub msg2sgn($) { |
| |
| # Convert message string to signature. Signature is a list of placeholders in sorted order. |
| # For example, signature of "%1$s value \"%2$s\" is invalid." is "%1$s %2$s". |
| |
| my ( $msg ) = @_; |
| my @placeholders; |
| pos( $msg ) = 0; |
| while ( $msg =~ m{\G.*?$escape$placeholder}g ) { |
| $placeholders[ $1 - 1 ] = "%$1\$$2"; |
| }; # while |
| for ( my $i = 1; $i <= @placeholders; ++ $i ) { |
| if ( not defined( $placeholders[ $i - 1 ] ) ) { |
| $placeholders[ $i - 1 ] = "%$i\$-"; |
| }; # if |
| }; # for $i |
| return join( " ", @placeholders ); |
| |
| }; # sub msg2sgn |
| |
| |
| sub msg2src($) { |
| |
| # Convert message string to a C string constant. |
| |
| my ( $msg ) = @_; |
| if ( $target_os eq "win" ) { |
| $msg =~ s{$escape$placeholder}{\%$1!$2!}g; |
| }; # if |
| return $msg; |
| |
| }; # sub msg2src |
| |
| |
| my $special = |
| { |
| "n" => "\n", |
| "t" => "\t", |
| }; |
| |
| sub msg2mc($) { |
| my ( $msg ) = @_; |
| $msg = msg2src( $msg ); # Get windows style placeholders. |
| $msg =~ s{\\(.)}{ exists( $special->{ $1 } ) ? $special->{ $1 } : $1 }ge; |
| return $msg; |
| }; # sub msg2mc |
| |
| |
| |
| sub parse_message($) { |
| |
| my ( $msg ) = @_; |
| pos( $msg ) = 0; |
| for ( ; ; ) { |
| if ( $msg !~ m{\G.*?$escape}gc ) { |
| last; |
| } |
| if ( $msg !~ m{\G$placeholder}gc ) { |
| return "Bad %-sequence near \"%" . substr( $msg, pos( $msg ), 7 ) . "\""; |
| }; # if |
| }; # forever |
| return undef; |
| |
| }; # sub parse_message |
| |
| |
| sub parse_source($) { |
| |
| my ( $name ) = @_; |
| |
| my @bulk = read_file( $name, -layer => ":utf8" ); |
| my $data = {}; |
| |
| my $line; |
| my $n = 0; # Line number. |
| my $obsolete = 0; # Counter of obsolete entries. |
| my $last_idx; |
| my %idents; |
| my $section; |
| |
| my $error = |
| sub { |
| my ( $n, $line, $msg ) = @_; |
| runtime_error( "Error parsing $name line $n: " . "$msg:\n" . " $line" ); |
| }; # sub |
| |
| foreach $line ( @bulk ) { |
| ++ $n; |
| # Skip empty lines and comments. |
| if ( $line =~ m{\A\s*(\n|#)} ) { |
| $last_idx = undef; |
| next; |
| }; # if |
| # Parse section header. |
| if ( $line =~ m{\A-\*-\s*([A-Z_]*)\s*-\*-\s*\n\z}i ) { |
| $section = ( lc( $1 ) ); |
| if ( not grep( $section eq $_, @sections ) ) { |
| $error->( $n, $line, "Unknown section \"$section\" specified" ); |
| }; # if |
| if ( exists( $data->{ $section } ) ) { |
| $error->( $n, $line, "Multiple sections of the same type specified" ); |
| }; # if |
| %idents = (); # Clean list of known message identifiers. |
| next; |
| }; # if |
| if ( not defined( $section ) ) { |
| $error->( $n, $line, "Section heading expected" ); |
| }; # if |
| # Parse section body. |
| if ( $section eq "meta" ) { |
| if ( $line =~ m{\A([A-Z_][A-Z_0-9]*)\s+"(.*)"\s*?\n?\z}i ) { |
| # Parse meta properties (such as Language, Country, and LangId). |
| my ( $property, $value ) = ( $1, $2 ); |
| if ( not grep( $_ eq $property , @properties ) ) { |
| $error->( $n, $line, "Unknown property \"$property\" specified" ); |
| }; # if |
| if ( exists( $data->{ "%meta" }->{ $property } ) ) { |
| $error->( $n, $line, "Property \"$property\" has already been specified" ); |
| }; # if |
| $data->{ "%meta" }->{ $property } = $value; |
| $last_idx = undef; |
| next; |
| }; # if |
| $error->( $n, $line, "Property line expected" ); |
| }; # if |
| # Parse message. |
| if ( $line =~ m{\A([A-Z_][A-Z_0-9]*)\s+"(.*)"\s*?\n?\z}i ) { |
| my ( $ident, $message ) = ( $1, $2 ); |
| if ( $ident eq "OBSOLETE" ) { |
| # If id is "OBSOLETE", add a unique suffix. It provides convenient way to mark |
| # obsolete messages. |
| ++ $obsolete; |
| $ident .= $obsolete; |
| }; # if |
| if ( exists( $idents{ $ident } ) ) { |
| $error->( $n, $line, "Identifier \"$ident\" is redefined" ); |
| }; # if |
| # Check %-sequences. |
| my $err = parse_message( $message ); |
| if ( $err ) { |
| $error->( $n, $line, $err ); |
| }; # if |
| # Save message. |
| push( @{ $data->{ $section } }, [ $ident, $message ] ); |
| $idents{ $ident } = 1; |
| $last_idx = @{ $data->{ $section } } - 1; |
| next; |
| }; # if |
| # Parse continuation line. |
| if ( $line =~ m{\A\s*"(.*)"\s*\z} ) { |
| my $message = $1; |
| if ( not defined( $last_idx ) ) { |
| $error->( $n, $line, "Unexpected continuation line" ); |
| }; # if |
| # Check %-sequences. |
| my $err = parse_message( $message ); |
| if ( $err ) { |
| $error->( $n, $line, $err ); |
| }; # if |
| # Save continuation. |
| $data->{ $section }->[ $last_idx ]->[ 1 ] .= $message; |
| next; |
| }; # if |
| $error->( $n, $line, "Message definition expected" ); |
| }; # foreach |
| $data->{ "%meta" }->{ source } = $name; |
| foreach my $section ( @sections ) { |
| if ( not exists( $data->{ $section } ) ) { |
| $data->{ $section } = []; |
| }; # if |
| }; # foreach $section |
| |
| foreach my $property ( @properties ) { |
| if ( not defined( $data->{ "%meta" }->{ $property } ) ) { |
| runtime_error( |
| "Error parsing $name: " . |
| "Required \"$property\" property is not specified" |
| ); |
| }; # if |
| push( @{ $data->{ meta } }, [ $property, $data->{ "%meta" }->{ $property } ] ); |
| }; # foreach |
| |
| return $data; |
| |
| }; # sub parse_source |
| |
| |
| sub generate_enum($$$) { |
| |
| my ( $data, $file, $prefix ) = @_; |
| my $bulk = ""; |
| |
| $bulk = |
| _generate_comment( $data, "//", "//" ) . |
| "\n" . |
| "enum ${prefix}_id {\n\n" . |
| " // A special id for absence of message.\n" . |
| " ${prefix}_null = 0,\n\n"; |
| |
| foreach my $section ( @sections ) { |
| my $props = $sections->{ $section }; # Section properties. |
| my $short = $props->{ short }; # Short section name, frequently used. |
| $bulk .= |
| " // Set #$props->{ set }, $props->{ long }.\n" . |
| " ${prefix}_${short}_first = $props->{ base },\n"; |
| foreach my $item ( @{ $data->{ $section } } ) { |
| my ( $ident, undef ) = @$item; |
| $bulk .= " ${prefix}_${short}_${ident},\n"; |
| }; # foreach |
| $bulk .= " ${prefix}_${short}_last,\n\n"; |
| }; # foreach $type |
| $bulk .= " ${prefix}_xxx_lastest\n\n"; |
| |
| $bulk .= |
| "}; // enum ${prefix}_id\n" . |
| "\n" . |
| "typedef enum ${prefix}_id ${prefix}_id_t;\n" . |
| "\n"; |
| |
| $bulk .= |
| "\n" . |
| "// end of file //\n"; |
| |
| write_file( $file, \$bulk ); |
| |
| }; # sub generate_enum |
| |
| |
| sub generate_signature($$) { |
| |
| my ( $data, $file ) = @_; |
| my $bulk = ""; |
| |
| $bulk .= "// message catalog signature file //\n\n"; |
| |
| foreach my $section ( @sections ) { |
| my $props = $sections->{ $section }; # Section properties. |
| my $short = $props->{ short }; # Short section name, frequently used. |
| $bulk .= "-*- " . uc( $props->{ long } ) . "-*-\n\n"; |
| foreach my $item ( @{ $data->{ $section } } ) { |
| my ( $ident, $msg ) = @$item; |
| $bulk .= sprintf( "%-40s %s\n", $ident, msg2sgn( $msg ) ); |
| }; # foreach |
| $bulk .= "\n"; |
| }; # foreach $type |
| |
| $bulk .= "// end of file //\n"; |
| |
| write_file( $file, \$bulk ); |
| |
| }; # sub generate_signature |
| |
| |
| sub generate_default($$$) { |
| |
| my ( $data, $file, $prefix ) = @_; |
| my $bulk = ""; |
| |
| $bulk .= |
| _generate_comment( $data, "//", "//" ) . |
| "\n"; |
| |
| foreach my $section ( @sections ) { |
| $bulk .= |
| "static char const *\n" . |
| "__${prefix}_default_${section}" . "[] =\n" . |
| " {\n" . |
| " NULL,\n"; |
| foreach my $item ( @{ $data->{ $section } } ) { |
| my ( undef, $msg ) = @$item; |
| $bulk .= " \"" . msg2src( $msg ) . "\",\n"; |
| }; # while |
| $bulk .= |
| " NULL\n" . |
| " };\n" . |
| "\n"; |
| }; # foreach $type |
| |
| $bulk .= |
| "struct kmp_i18n_section {\n" . |
| " int size;\n" . |
| " char const ** str;\n" . |
| "}; // struct kmp_i18n_section\n" . |
| "typedef struct kmp_i18n_section kmp_i18n_section_t;\n" . |
| "\n" . |
| "static kmp_i18n_section_t\n" . |
| "__${prefix}_sections[] =\n" . |
| " {\n" . |
| " { 0, NULL },\n"; |
| foreach my $section ( @sections ) { |
| $bulk .= |
| " { " . @{ $data->{ $section } } . ", __${prefix}_default_${section} },\n"; |
| }; # foreach $type |
| $bulk .= |
| " { 0, NULL }\n" . |
| " };\n" . |
| "\n"; |
| |
| $bulk .= |
| "struct kmp_i18n_table {\n" . |
| " int size;\n" . |
| " kmp_i18n_section_t * sect;\n" . |
| "}; // struct kmp_i18n_table\n" . |
| "typedef struct kmp_i18n_table kmp_i18n_table_t;\n" . |
| "\n" . |
| "static kmp_i18n_table_t __kmp_i18n_default_table =\n" . |
| " {\n" . |
| " " . @sections . ",\n" . |
| " __kmp_i18n_sections\n" . |
| " };\n" . |
| "\n" . |
| "// end of file //\n"; |
| |
| write_file( $file, \$bulk ); |
| |
| }; # sub generate_default |
| |
| |
| sub generate_message_unix($$) { |
| |
| my ( $data, $file ) = @_; |
| my $bulk = ""; |
| |
| $bulk .= |
| _generate_comment( $data, "\$", "\$" ) . |
| "\n" . |
| "\$quote \"\n\n"; |
| |
| foreach my $section ( @sections ) { |
| $bulk .= |
| "\$ " . ( "-" x 78 ) . "\n\$ $section\n\$ " . ( "-" x 78 ) . "\n\n" . |
| "\$set $sections->{ $section }->{ set }\n" . |
| "\n"; |
| my $n = 0; |
| foreach my $item ( @{ $data->{ $section } } ) { |
| my ( undef, $msg ) = @$item; |
| ++ $n; |
| $bulk .= "$n \"" . msg2src( $msg ) . "\"\n"; |
| }; # foreach |
| $bulk .= "\n"; |
| }; # foreach $type |
| |
| $bulk .= |
| "\n" . |
| "\$ end of file \$\n"; |
| |
| write_file( $file, \$bulk, -layer => ":utf8" ); |
| |
| }; # sub generate_message_linux |
| |
| |
| sub generate_message_windows($$) { |
| |
| my ( $data, $file ) = @_; |
| my $bulk = ""; |
| my $language = $data->{ "%meta" }->{ Language }; |
| my $langid = $data->{ "%meta" }->{ LangId }; |
| |
| $bulk .= |
| _generate_comment( $data, ";", ";" ) . |
| "\n" . |
| "LanguageNames = ($language=$langid:msg_$langid)\n" . |
| "\n"; |
| |
| $bulk .= |
| "FacilityNames=(\n"; |
| foreach my $section ( @sections ) { |
| my $props = $sections->{ $section }; # Section properties. |
| $bulk .= |
| " $props->{ short }=" . $props->{ set } ."\n"; |
| }; # foreach $section |
| $bulk .= |
| ")\n\n"; |
| |
| foreach my $section ( @sections ) { |
| my $short = $sections->{ $section }->{ short }; |
| my $n = 0; |
| foreach my $item ( @{ $data->{ $section } } ) { |
| my ( undef, $msg ) = @$item; |
| ++ $n; |
| $bulk .= |
| "MessageId=$n\n" . |
| "Facility=$short\n" . |
| "Language=$language\n" . |
| msg2mc( $msg ) . "\n.\n\n"; |
| }; # foreach $item |
| }; # foreach $section |
| |
| $bulk .= |
| "\n" . |
| "; end of file ;\n"; |
| |
| $bulk = encode( "UTF-16LE", $bulk ); # Convert text to UTF-16LE used in Windows* OS. |
| write_file( $file, \$bulk, -binary => 1 ); |
| |
| }; # sub generate_message_windows |
| |
| |
| # |
| # Parse command line. |
| # |
| |
| my $input_file; |
| my $enum_file; |
| my $signature_file; |
| my $default_file; |
| my $message_file; |
| my $id; |
| my $prefix = ""; |
| get_options( |
| "os=s" => \$target_os, |
| "enum-file=s" => \$enum_file, |
| "signature-file=s" => \$signature_file, |
| "default-file=s" => \$default_file, |
| "message-file=s" => \$message_file, |
| "id|lang-id" => \$id, |
| "prefix=s" => \$prefix, |
| ); |
| if ( @ARGV == 0 ) { |
| cmdline_error( "No source file specified -- nothing to do" ); |
| }; # if |
| if ( @ARGV > 1 ) { |
| cmdline_error( "Too many source files specified" ); |
| }; # if |
| $input_file = $ARGV[ 0 ]; |
| |
| |
| my $generate_message; |
| if ( $target_os =~ m{\A(?:lin|mac)\z} ) { |
| $generate_message = \&generate_message_unix; |
| } elsif ( $target_os eq "win" ) { |
| $generate_message = \&generate_message_windows; |
| } else { |
| runtime_error( "OS \"$target_os\" is not supported" ); |
| }; # if |
| |
| |
| # |
| # Do the work. |
| # |
| |
| my $data = parse_source( $input_file ); |
| if ( defined( $id ) ) { |
| print( $data->{ "%meta" }->{ LangId }, "\n" ); |
| }; # if |
| if ( defined( $enum_file ) ) { |
| generate_enum( $data, $enum_file, $prefix ); |
| }; # if |
| if ( defined( $signature_file ) ) { |
| generate_signature( $data, $signature_file ); |
| }; # if |
| if ( defined( $default_file ) ) { |
| generate_default( $data, $default_file, $prefix ); |
| }; # if |
| if ( defined( $message_file ) ) { |
| $generate_message->( $data, $message_file ); |
| }; # if |
| |
| exit( 0 ); |
| |
| __END__ |
| |
| =pod |
| |
| =head1 NAME |
| |
| B<message-converter.pl> -- Convert message catalog source file into another text forms. |
| |
| =head1 SYNOPSIS |
| |
| B<message-converter.pl> I<option>... <file> |
| |
| =head1 OPTIONS |
| |
| =over |
| |
| =item B<--enum-file=>I<file> |
| |
| Generate enum file named I<file>. |
| |
| =item B<--default-file=>I<file> |
| |
| Generate default messages file named I<file>. |
| |
| =item B<--lang-id> |
| |
| Print language identifier of the message catalog source file. |
| |
| =item B<--message-file=>I<file> |
| |
| Generate message file. |
| |
| =item B<--signature-file=>I<file> |
| |
| Generate signature file. |
| |
| Signatures are used for checking compatibility. For example, to check a primary |
| catalog and its translation to another language, signatures of both catalogs should be generated |
| and compared. If signatures are identical, catalogs are compatible. |
| |
| =item B<--prefix=>I<prefix> |
| |
| Prefix to be used for all C identifiers (type and variable names) in enum and default messages |
| files. |
| |
| =item B<--os=>I<str> |
| |
| Specify OS name the message formats to be converted for. If not specified expolicitly, value of |
| LIBOMP_OS environment variable is used. If LIBOMP_OS is not defined, host OS is detected. |
| |
| Depending on OS, B<message-converter.pl> converts message formats to GNU style or MS style. |
| |
| =item Standard Options |
| |
| =over |
| |
| =item B<--doc> |
| |
| =item B<--manual> |
| |
| Print full documentation and exit. |
| |
| =item B<--help> |
| |
| Print short help message and exit. |
| |
| =item B<--version> |
| |
| Print version string and exit. |
| |
| =back |
| |
| =back |
| |
| =head1 ARGUMENTS |
| |
| =over |
| |
| =item I<file> |
| |
| A name of input file. |
| |
| =back |
| |
| =head1 DESCRIPTION |
| |
| =head2 Message Catalog File Format |
| |
| It is plain text file in UTF-8 encoding. Empty lines and lines beginning with sharp sign (C<#>) are |
| ignored. EBNF syntax of content: |
| |
| catalog = { section }; |
| section = header body; |
| header = "-*- " section-id " -*-" "\n"; |
| body = { message }; |
| message = message-id string "\n" { string "\n" }; |
| section-id = identifier; |
| message-id = "OBSOLETE" | identifier; |
| identifier = letter { letter | digit | "_" }; |
| string = """ { character } """; |
| |
| Identifier starts with letter, with following letters, digits, and underscores. Identifiers are |
| case-sensitive. Setion identifiers are fixed: C<META>, C<STRINGS>, C<FORMATS>, C<MESSAGES> and |
| C<HINTS>. Message identifiers must be unique within section. Special C<OBSOLETE> pseudo-identifier |
| may be used many times. |
| |
| String is a C string literal which must not cross line boundaries. |
| Long messages may occupy multiple lines, a string per line. |
| |
| Message may include printf-like GNU-style placeholders for arguments: C<%I<n>$I<t>>, |
| where I<n> is argument number (C<1>, C<2>, ...), |
| I<t> -- argument type, C<s> (string) or C<d> (32-bit integer). |
| |
| See also comments in F<i18n/en_US.txt>. |
| |
| =head2 Output Files |
| |
| This script can generate 3 different text files from single source: |
| |
| =over |
| |
| =item Enum file. |
| |
| Enum file is a C include file, containing definitions of message identifiers, e. g.: |
| |
| enum kmp_i18n_id { |
| |
| // Set #1, meta. |
| kmp_i18n_prp_first = 65536, |
| kmp_i18n_prp_Language, |
| kmp_i18n_prp_Country, |
| kmp_i18n_prp_LangId, |
| kmp_i18n_prp_Version, |
| kmp_i18n_prp_Revision, |
| kmp_i18n_prp_last, |
| |
| // Set #2, strings. |
| kmp_i18n_str_first = 131072, |
| kmp_i18n_str_Error, |
| kmp_i18n_str_UnknownFile, |
| kmp_i18n_str_NotANumber, |
| ... |
| |
| // Set #3, fotrmats. |
| ... |
| |
| kmp_i18n_xxx_lastest |
| |
| }; // enum kmp_i18n_id |
| |
| typedef enum kmp_i18n_id kmp_i18n_id_t; |
| |
| =item Default messages file. |
| |
| Default messages file is a C include file containing default messages to be embedded into |
| application (and used if external message catalog does not exist or could not be open): |
| |
| static char const * |
| __kmp_i18n_default_meta[] = |
| { |
| NULL, |
| "English", |
| "USA", |
| "1033", |
| "2", |
| "20090806", |
| NULL |
| }; |
| |
| static char const * |
| __kmp_i18n_default_strings[] = |
| { |
| "Error", |
| "(unknown file)", |
| "not a number", |
| ... |
| NULL |
| }; |
| |
| ... |
| |
| =item Message file. |
| |
| Message file is an input for message compiler, F<gencat> on Linux* OS and OS X*, or F<mc.exe> on |
| Windows* OS. |
| |
| Here is the example of Linux* OS message file: |
| |
| $quote " |
| 1 "Japanese" |
| 2 "Japan" |
| 3 "1041" |
| 4 "2" |
| 5 "Based on Enlish message catalog revision 20090806" |
| ... |
| |
| Example of Windows* OS message file: |
| |
| LanguageNames = (Japanese=10041:msg_1041) |
| |
| FacilityNames = ( |
| prp=1 |
| str=2 |
| fmt=3 |
| ... |
| ) |
| |
| MessageId=1 |
| Facility=prp |
| Language=Japanese |
| Japanese |
| . |
| |
| ... |
| |
| =item Signature. |
| |
| Signature is a processed source file: comments stripped, strings deleted, but placeholders kept and |
| sorted. |
| |
| -*- FORMATS-*- |
| |
| Info %1$d %2$s |
| Warning %1$d %2$s |
| Fatal %1$d %2$s |
| SysErr %1$d %2$s |
| Hint %1$- %2$s |
| Pragma %1$s %2$s %3$s %4$s |
| |
| The purpose of signatures -- compare two message source files for compatibility. If signatures of |
| two message sources are the same, binary message catalogs will be compatible. |
| |
| =back |
| |
| =head1 EXAMPLES |
| |
| Generate include file containing message identifiers: |
| |
| $ message-converter.pl --enum-file=kmp_i18n_id.inc en_US.txt |
| |
| Generate include file contating default messages: |
| |
| $ message-converter.pl --default-file=kmp_i18n_default.inc en_US.txt |
| |
| Generate input file for message compiler, Linux* OS example: |
| |
| $ message-converter.pl --message-file=ru_RU.UTF-8.msg ru_RU.txt |
| |
| Generate input file for message compiler, Windows* OS example: |
| |
| > message-converter.pl --message-file=ru_RU.UTF-8.mc ru_RU.txt |
| |
| =cut |
| |
| # end of file # |
| |