# Copyright (c) 2009 by Grzegorz Kondrak # purpose: recover phonetic morpho boundaries from the encoded file # usage: cut -d'\' -f6 LDC96L14/english/epl/epl.cd |paste - epl_e |perl .p # or: cut -d'\' -f7 LDC96L14/english/epw/epw.cd | paste - epw_e | perl .p use strict; my $mb = "+"; # morpho boundary while (<>) { chomp; my ($symb, $code) = split /\t/; die unless $symb; $symb =~ s/[-'"]//g; if (!$code) { printf "\n"; next; } my @l = split //, $symb; my @c = split //, $code; my $out = ""; for (my $i = 0; $i < scalar(@l); $i++) { $out .= $mb if ($i > 0) && ($c[$i] == 1); $out .= $l[$i]; } printf "%s", $out; printf "\n"; }