# Copyright (c) 2009 by Grzegorz Kondrak # purpose: recover lexical morpho boundaries from the encoded file eml_le # usage:cut -d'\' -f2 LDC96L14/english/eml/eml.cd |paste - eml_le |perl .p # or: cut -d'\' -f2 LDC96L14/english/emw/emw.cd | paste - emw_e | perl .p use strict; my $mb = "+"; # morpho boundary while (<>) { chomp; my ($symb, $code) = split /\t/; die unless $symb; if (!$code) { printf "\n"; next; } my @l = split //, $symb; my @c = split //, $code; my $out = ""; for (my $i = 0; $i < scalar(@l); $i++) { $out .= $mb if ($i > 0) && ($c[$i] == 1); $out .= $l[$i]; } printf "%s", $out; printf "\n"; }