DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world
Text Synonymizer In Perl - Unintelligent Text Rewriter
Very scrappy and silly, but you get some funny results. It uses the great Lingua::EN::Tagger for POS (Parts of Speech) tagging.
use WordNet::QueryData;
use Lingua::EN::Tagger;
my $t = new Lingua::EN::Tagger;
my $wn = WordNet::QueryData->new;
my $text;
open (FH, "<" . $ARGV[0]);
while (<FH>) { $text .= $_; }
close (FH);
my $tagged = $t->add_tags($text);
while ($tagged =~ /\<(.+?)\>(\w+)\<.+?\>/g) {
my $sense = $1;
my $word = $2;
my $newsense = "";
$newsense = "n" if ($sense =~ /nn/i);
$newsense = "a" if ($sense =~ /jj/i);
$newsense = "v" if ($sense =~ /vb/i);
if ($newsense) {
foreach ($wn->querySense($word . "#" . $newsense . "#1" , "syns")) {
s/\#.+//;
next if (/$word/);
$text =~ s/$word/$_/;
last;
}
}
};
print $text;
exit;Or to do it to a Web page / URL, use HTML::Parser like so:
use WordNet::QueryData;
use Lingua::EN::Tagger;
use HTML::Parser;
use LWP::Simple;
my $t = new Lingua::EN::Tagger;
my $wn = WordNet::QueryData->new;
my $p = HTML::Parser->new( text_h => [\&text, "text"] );
$p->parse(get("http://www.petercooper.co.uk/"));
exit;
sub text {
my $text = shift;
$text =~ s/\s+/\ /g;
if ($text =~ /\w{5}/) {
print "WAS: " . $text . "\n\n";
print "BECOMES: " . &synonymize($text) . "\n\n\n\n";
}
}
sub synonymize {
my $text = shift;
my $tagged = $t->add_tags($text);
while ($tagged =~ /\<(.+?)\>(\w+)\<.+?\>/g) {
my $sense = $1;
my $word = $2;
my $newsense = "";
$newsense = "n" if ($sense =~ /nn/i);
$newsense = "a" if ($sense =~ /jj/i);
$newsense = "v" if ($sense =~ /vb/i);
if ($newsense) {
foreach ($wn->querySense($word . "#" . $newsense . "#1" , "syns")) {
s/\#.+//;
next if (/$word/);
$text =~ s/$word/$_/;
last;
}
}
};
return $text;
}




