#!/usr/bin/perl # # Written by Marc Liyanage (http://www.entropy.ch) # print iso2utf8(mac2iso(join("", <>))); sub mac2iso { my ($string) = @_; $string =~ tr/\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa1\xa4\xa6\xa7\xa8\xab\xac\xae\xaf\xb4\xbb\xbc\xbe\xbf\xc0\xc1\xc2\xc7\xc8\xca\xcb\xcc\xd6\xd8\xdb\xe1\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf1\xf2\xf3\xf4\xf8\xfc/\xc4\xc5\xc7\xc9\xd1\xd6\xdc\xe1\xe0\xe2\xe4\xe3\xe5\xe7\xe9\xe8\xea\xeb\xed\xec\xee\xef\xf1\xf3\xf2\xf4\xf6\xf5\xfa\xf9\xfb\xfc\xb0\xa7\xb6\xdf\xae\xb4\xa8\xc6\xd8\xa5\xaa\xba\xe6\xf8\xbf\xa1\xac\xab\xbb\xa0\xc0\xc3\xf7\xff\xa4\xb7\xc2\xca\xc1\xcb\xc8\xcd\xce\xcf\xcc\xd3\xd4\xd2\xda\xdb\xd9\xaf\xb8/; return $string; } sub iso2utf8 { my ($string) = @_; my %cache = (); # Match characters with their high bit set (0x80-0xff). # Send these to the encode function and replace # them with the result of the function. # $string =~ s/([\x80-\xff])/$cache{$1} ||= encode_iso($1)/eg; return $string; } sub encode_iso { # Take the integer value of the ISO character # using the ord() function # my $iso = ord($_[0]); # Take the two high bits (0xc0) of the iso value. # Shift them to the right by 6 positions and OR # it with the UTF-8 bit pattern for a two-byte sequence # (110xxxxx or 0xc0). This gives the leading byte of the # 2-byte UTF-8 sequence. # # Then take the ISO value again, keep the 6 lower bits # (0x3f) and OR the resulting value with the bit pattern # for an UTF-8 trailing byte (10xxxxxx or 0x80). # This is the second byte of the UTF-8 sequence. # my $a = ((($iso & 0xc0) >> 6) | 0xc0); my $b = ($iso & 0x3f) | 0x80; # Look up the character for the two bytes, concatenate them # and return this two-character sequence # return chr($a) . chr($b); }