#!/vol/perl/bin/perl -w # sampa to praat ipa font converter # By Thorsten Trippel # University of Bielefeld # ttrippel@spectrum.uni-bielefeld.de # May 2001 # # This program requires perl 5.6 or higher, # tested on perl 5.6.0 build for sun4-solaris # # # This program is a pure font converter, using SAMPA-IPA symbols as input, # read from input file, and produces coresponding PRAAT-IPA-notation, # which are ASCII combinations, send to STDOUT # # Usage: sampa2praat.pl INPUTFILE # # Preprocessing rule needs to replacs X-Sampa J\ by # "iotbackslash" due to otherwise multiple-matching substrings # no other way of expressing regular expressions have been found due to a # lack of a sufficient delimiting character (i.e. whitespace, etc.) # For historic reasons the keys of the hashes are PRAAT-representations # this does not have any consequences as long as the IPA characters exist # in both notations. However the PRAAT manual does not specify the following # characters (which are existend in the IPA chart with symbols in SAMPA # notation): # O\ |\ !\ =\ |\|\ p_> t_> k_> s_> x\ @\ 3\ # Beginning of translation table %praat2sampa= ( "\\c\," => "C", "\\l\-" => "K(?!\\\\)", "\\lz" => "K\\\\", "\\lc" => "L\\\\", "\\9\-" => "<\\\\", "\\9e" => "\\?\\\\", "\\l\." => "l`", "\\ab" => "Q", "\\ae" => "\\{", "\\as" => "A", "\\at" => "6", "\\b\^" => "b_<", "\\bc" => "B\\\\", "\\be" => "B", "\\cc" => "s\\\\", "\\ct" => "O", "\\d\." => "d`", "\\d\^" => "d_<", "\\dh" => "D", "\\ep" => "E", "\\er" => "3", "\\f\." => "r`", "\\f2" => "p\\\\", "\\fh" => "4", "\\g\^" => "g_<", "\\gc" => "G\\\\", "\\h\-" => "X\\\\", "\\h\^" => "(? "H\\\\", "\\hs" => "U", "\\ht" => "H", "\\i\-" => "1", "\\ic" => "I", "\\j\^" => "J\\\\_<", "\\jc" => "j\\\\", "\\?-" => ">\\\\", "\\l\~" => "5", "\\mj" => "F", "\\ml" => "M\\\\", "\\mt" => "M", "\\n\." => "n`", "\\nc" => "N\\\\", "\\ng" => "N", "\\nj" => "J", "\\o\-" => "8", "\\oe" => "(? "r\\\\`", "\\rc" => "R\\\\", "\\rh" => "7", "\\ri" => "R", "\\rl" => "(? "s`", "\\sh" => "S", "\\sr" => "`.@`.", "\\te" => "(? "(? "P", "\\vt" => "V", "\\wt" => "W", "\\yc" => "Y", "\\yt" => "L(?!\\\\)", "\\z\." => "z`", "\\zc" => "z\\\\", "\\zh" => "Z", "a" => "a", "b" => "b", "c" => "c", "d" => "d", "d\\cn" => "_}\\(t_}\\)", "e" => "e", "f" => "f", "h" => "h", "i" => "i", "j" => "j", "k" => "k", "l" => "l", "m" => "m", "n" => "n", "notspecified" => "!\\\\", "notspecified" => "3\\\\", "notspecified" => "=\\\\", "notspecified" => "@\\\\", "notspecified" => "O\\\\", "notspecified" => "k_>", "notspecified" => "p_>", "notspecified" => "s_>", "notspecified" => "t_>", "notspecified" => "x\\\\", "notspecified" => "\\|\\\\", "o" => "o", "o\\T\^" => "_r\\(e_r\\)", "o\\Tv" => "_oe_o", "p" => "p", "q" => "q", "r" => "r", "s" => "s", "t" => "t", "u" => "u", "v" => "v", "w" => "w", "x" => "x", "y" => "y", "z" => "z", "n\\|v" => "\_=", "\\G\^" => "G\\_\\<", "\\?g" => "\\?(?!\-|\\ )", "\\ci" => "X(?!\\\\)", "\\ga" => "G(?!\\\\|^|_)", "\\gs" => "(? "(? "r\\\\(?!`)", "\\sw" => "@(?!`)", "\\t\." => "t`", "\\o/" => "(? "_(?!`|^|<|r|})", ); # Processing loop # This is the major function reading the input and replacing a string from # the translation table one at a time while (<>){ $var=$_; $var=~s#J\\#jotbackslash#; while (($praat,$sampa)= each(%praat2sampa)) { $var=~s#$sampa#$praat#; }; print "$var"; }; # End of function # EOF