[Oberon] Cyrillic encodings (KOI8-R, KOI8-U and CP1251) support

Yaroslav Romanchenko mail at sage.com.ua
Wed Jun 21 21:51:33 CEST 2006


Hallo, All!

It will be nice to provide support in Bluebottle the following Cyrillic
encodings: KOI8-R, KOI8-U and cp1251 ("windows-1251" in MIME).
They especially wide used on the Web. It will be really great to provide support
for this encodings in WebBrowser application too. Considerably rare the
encodings cp866 and ISO-8859-5 are used. Their support also possibly needs to
be realized.

Code example for implementation of encodings for the AosTextUtilities.Mod module
and corresponding proper settings in the AosConfig.XML file it is provided
below.

How to do such encoding conversions in more efficient way? I've used CASE
constructions. In my code examples conversion of only Cyrillic letters are
performed. As described in corresponding RFCs (KOI8-R - RFC1489, KOI8-U -
RFC2319), special characters (represented in Unicode) must be converted too.
Maybe better solution for decoders to use arrays of LONGINT with 256
pre-initialized values - Unicode values corresponding to input character codes
as index.

Good resource with all described above encoding tables:
http://czyborra.com/charsets/cyrillic.html


Piece of code:

	CP1251Decoder = OBJECT(AosCodecs.TextDecoder)
	VAR errors : BOOLEAN;
		in : AosIO.Reader;
		text : AosTexts.Text;

		PROCEDURE Error(x : ARRAY OF CHAR);
		BEGIN
			AosOut.String("CP-1251 Decoder Error: ");
			AosOut.String(x); AosOut.Ln;
			errors := TRUE
		END Error;

		PROCEDURE Log(x : ARRAY OF CHAR);
		BEGIN
			AosOut.String("CP-1251 Decoder Info: ");
			AosOut.String(x); AosOut.Ln;
		END Log;

		PROCEDURE Open*(in : AosIO.Reader; VAR res : LONGINT);
		VAR i, m: LONGINT;
			r : AosFS.Reader;
			tempUCS32 : ARRAY 1024 OF Char32;
			ch, last : CHAR;
		BEGIN
			errors := FALSE;
			res := -1;
			IF in = NIL THEN Error("Input Stream is NIL"); RETURN; END;
			SELF.in := in;

			NEW(text);
			text.AcquireWrite;
			m := LEN(tempUCS32) - 1;
			i := 0;
			REPEAT
				in.Char(ch);
				IF i = m  THEN tempUCS32[i] := 0; text.InsertUCS32(text.GetLength(),
tempUCS32); i := 0 END;
				IF (last # CR) OR (ch # LF) THEN
					IF ch = CR THEN tempUCS32[i] := ORD(LF)
					ELSE
						CASE ORD(ch) OF
							0C0H..0FFH : tempUCS32[i] := ORD(ch) + 350H;

						|	0B8H : tempUCS32[i] := 451H; (* CYRILLIC SMALL LETTER IO *)
						|	0A8H : tempUCS32[i] := 401H; (* CYRILLIC CAPITAL LETTER IO *)

						|	0B4H : tempUCS32[i] := 491H; (* CYRILLIC SMALL LETTER UKRAINIAN KGE
(WITH UPTURN) *)
						|	0A5H : tempUCS32[i] := 490H; (* CYRILLIC CAPITAL LETTER UKRAINIAN KGE
(WITH UPTURN) *)

						|	0BAH : tempUCS32[i] := 454H; (* CYRILLIC SMALL LETTER UKRAINIAN IE *)
						|	0AAH : tempUCS32[i] := 404H; (* CYRILLIC CAPITAL LETTER UKRAINIAN IE *)

						|	0BFH : tempUCS32[i] := 457H; (* CYRILLIC SMALL LETTER UKRAINIAN YI *)
						|	0AFH : tempUCS32[i] := 407H; (* CYRILLIC CAPITAL LETTER UKRAINIAN YI *)

						|	0B3H : tempUCS32[i] := 456H; (* CYRILLIC SMALL LETTER
BELORUSSIAN-UKRAINIAN I *)
						|	0B2H : tempUCS32[i] := 406H; (* CYRILLIC CAPITAL LETTER
BELORUSSIAN-UKRAINIAN I *)

						ELSE
							tempUCS32[i] := ORD(ch);
						END;
					END;
					INC(i)
				END;
				last := ch
			UNTIL (in.res # AosIO.Ok);
			tempUCS32[i] := 0; text.InsertUCS32(text.GetLength(), tempUCS32);
			res := 0;
			text.ReleaseWrite
		END Open;

		PROCEDURE GetText*() : AosTexts.Text;
		BEGIN
			RETURN text;
		END GetText;

	END CP1251Decoder;

	CP1251Encoder = OBJECT(AosCodecs.TextEncoder)
	VAR out: AosIO.Writer;

		PROCEDURE Open*(out : AosIO.Writer);
		BEGIN
			IF out = NIL THEN AosOut.String("CP-1251 Encoder Error: output stream is
NIL");
			ELSE SELF.out := out;
			END;
		END Open;

		PROCEDURE WriteText*(text : AosTexts.Text; VAR res : LONGINT);
		VAR r : AosTexts.TextReader; ch : AosTexts.Char32; i : LONGINT;
		BEGIN
			res :=  -1;
			text.AcquireRead;
			NEW(r, text);
			FOR i := 0 TO text.GetLength() - 1 DO
				r.ReadCh(ch);

				CASE ch OF

					410H..44FH : ch := ch - 350H;

				|	451H : ch := 0B8H; (* CYRILLIC SMALL LETTER IO *)
				|	401H : ch := 0A8H; (* CYRILLIC CAPITAL LETTER IO *)

				|	491H : ch := 0B4H; (* CYRILLIC SMALL LETTER UKRAINIAN KGE (WITH UPTURN) *)
				|	490H : ch := 0A5H; (* CYRILLIC CAPITAL LETTER UKRAINIAN KGE (WITH UPTURN)
*)

				|	454H : ch := 0BAH; (* CYRILLIC SMALL LETTER UKRAINIAN IE *)
				|	404H : ch := 0AAH; (* CYRILLIC CAPITAL LETTER UKRAINIAN IE *)

				|	457H : ch := 0BFH; (* CYRILLIC SMALL LETTER UKRAINIAN YI *)
				|	407H : ch := 0AFH; (* CYRILLIC CAPITAL LETTER UKRAINIAN YI *)

				|	456H : ch := 0B3H; (* CYRILLIC SMALL LETTER BELORUSSIAN-UKRAINIAN I *)
				|	406H : ch := 0B2H; (* CYRILLIC CAPITAL LETTER BELORUSSIAN-UKRAINIAN I *)

				ELSE

				END;

				IF (ch >= 0) & (ch < 256) THEN out.Char(CHR(ch)) END
			END;
			out.Update;
			text.ReleaseRead;
			res := 0;
		END WriteText;

	END CP1251Encoder;

	KOI8RDecoder = OBJECT(AosCodecs.TextDecoder)
	VAR errors : BOOLEAN;
		in : AosIO.Reader;
		text : AosTexts.Text;

		(* map KOI8-R to UCS32 *)
		PROCEDURE KOI8RToUCS32(ch : LONGINT) : LONGINT;
		VAR ret : LONGINT;
		BEGIN

			CASE ch OF

				0E1H :	 ret := 410H;
			|	0E2H :	 ret := 411H;
			|	0F7H :	 ret := 412H;
			|	0E7H :	 ret := 413H;

			|	0E4H :	 ret := 414H;
			|	0E5H :	 ret := 415H;
			|	0F6H :	 ret := 416H;
			|	0FAH :	 ret := 417H;

			|	0E9H :	 ret := 418H;
			|	0EAH :	 ret := 419H;
			|	0EBH :	 ret := 41AH;
			|	0ECH :	 ret := 41BH;

			|	0EDH :	 ret := 41CH;
			|	0EEH :	 ret := 41DH;
			|	0EFH :	 ret := 41EH;
			|	0F0H :	 ret := 41FH;

			|	0F2H :	 ret := 420H;
			|	0F3H :	 ret := 421H;
			|	0F4H :	 ret := 422H;
			|	0F5H :	 ret := 423H;

			|	0E6H :	 ret := 424H;
			|	0E8H :	 ret := 425H;
			|	0E3H :	 ret := 426H;
			|	0FEH :	 ret := 427H;

			|	0FBH :	 ret := 428H;
			|	0FDH :	 ret := 429H;
			|	0FFH :	 ret := 42AH;
			|	0F9H :	 ret := 42BH;

			|	0F8H :	 ret := 42CH;
			|	0FCH :	 ret := 42DH;
			|	0E0H :	 ret := 42EH;
			|	0F1H :	 ret := 42FH;

			|	0C1H :	 ret := 430H;
			|	0C2H :	 ret := 431H;
			|	0D7H :	 ret := 432H;
			|	0C7H :	 ret := 433H;

			|	0C4H :	 ret := 434H;
			|	0C5H :	 ret := 435H;
			|	0D6H :	 ret := 436H;
			|	0DAH :	 ret := 437H;

			|	0C9H :	 ret := 438H;
			|	0CAH :	 ret := 439H;
			|	0CBH :	 ret := 43AH;
			|	0CCH :	 ret := 43BH;

			|	0CDH :	 ret := 43CH;
			|	0CEH :	 ret := 43DH;
			|	0CFH :	 ret := 43EH;
			|	0D0H :	 ret := 43FH;

			|	0D2H :	 ret := 440H;
			|	0D3H :	 ret := 441H;
			|	0D4H :	 ret := 442H;
			|	0D5H :	 ret := 443H;

			|	0C6H :	 ret := 444H;
			|	0C8H :	 ret := 445H;
			|	0C3H :	 ret := 446H;
			|	0DEH :	 ret := 447H;

			|	0DBH :	 ret := 448H;
			|	0DDH :	 ret := 449H;
			|	0DFH :	 ret := 44AH;
			|	0D9H :	 ret := 44BH;

			|	0D8H :	 ret := 44CH;
			|	0DCH :	 ret := 44DH;
			|	0C0H :	 ret := 44EH;
			|	0D1H :	 ret := 44FH;

			|	0A3H :	 ret := 451H; (* CYRILLIC SMALL LETTER IO *)
			|	0B3H :	 ret := 401H; (* CYRILLIC CAPITAL LETTER IO *)

			ELSE
				ret := ch
			END;

			RETURN ret
		END KOI8RToUCS32;

		PROCEDURE Error(x : ARRAY OF CHAR);
		BEGIN
			AosOut.String("KOI8-R Decoder Error: ");
			AosOut.String(x); AosOut.Ln;
			errors := TRUE
		END Error;

		PROCEDURE Log(x : ARRAY OF CHAR);
		BEGIN
			AosOut.String("KOI8-R Decoder Info: ");
			AosOut.String(x); AosOut.Ln;
		END Log;


		PROCEDURE Open*(in : AosIO.Reader; VAR res : LONGINT);
		VAR i, m: LONGINT;
			r : AosFS.Reader;
			tempUCS32 : ARRAY 1024 OF Char32;
			ch, last : CHAR;
		BEGIN
			errors := FALSE;
			res := -1;
			IF in = NIL THEN Error("Input Stream is NIL"); RETURN; END;
			SELF.in := in;

			NEW(text);
			text.AcquireWrite;
			m := LEN(tempUCS32) - 1;
			i := 0;
			REPEAT
				in.Char(ch);
				IF i = m  THEN tempUCS32[i] := 0; text.InsertUCS32(text.GetLength(),
tempUCS32); i := 0 END;
				IF (last # CR) OR (ch # LF) THEN
					IF ch = CR THEN tempUCS32[i] := ORD(LF)
					ELSE
						tempUCS32[i] := KOI8RToUCS32(ORD(ch));
					END;
					INC(i)
				END;
				last := ch
			UNTIL (in.res # AosIO.Ok);
			tempUCS32[i] := 0; text.InsertUCS32(text.GetLength(), tempUCS32);
			res := 0;
			text.ReleaseWrite
		END Open;

		PROCEDURE GetText*() : AosTexts.Text;
		BEGIN
			RETURN text;
		END GetText;

	END KOI8RDecoder;

	KOI8REncoder = OBJECT(AosCodecs.TextEncoder)
	VAR out: AosIO.Writer;

		(* map UCS32 to KOI8-R *)
		PROCEDURE UCS32ToKOI8R(ch : LONGINT) : LONGINT;
		VAR ret : LONGINT;
		BEGIN

			CASE ch OF

				410H : ret := 0E1H;
			|	411H : ret := 0E2H;
			|	412H : ret := 0F7H;
			|	413H : ret := 0E7H;

			|	414H : ret := 0E4H;
			|	415H : ret := 0E5H;
			|	416H : ret := 0F6H;
			|	417H : ret := 0FAH;

			|	418H : ret := 0E9H;
			|	419H : ret := 0EAH;
			|	41AH : ret := 0EBH;
			|	41BH : ret := 0ECH;

			|	41CH : ret := 0EDH;
			|	41DH : ret := 0EEH;
			|	41EH : ret := 0EFH;
			|	41FH : ret := 0F0H;

			|	420H : ret := 0F2H;
			|	421H : ret := 0F3H;
			|	422H : ret := 0F4H;
			|	423H : ret := 0F5H;

			|	424H : ret := 0E6H;
			|	425H : ret := 0E8H;
			|	426H : ret := 0E3H;
			|	427H : ret := 0FEH;

			|	428H : ret := 0FBH;
			|	429H : ret := 0FDH;
			|	42AH : ret := 0FFH;
			|	42BH : ret := 0F9H;

			|	42CH : ret := 0F8H;
			|	42DH : ret := 0FCH;
			|	42EH : ret := 0E0H;
			|	42FH : ret := 0F1H;

			|	430H : ret := 0C1H;
			|	431H : ret := 0C2H;
			|	432H : ret := 0D7H;
			|	433H : ret := 0C7H;

			|	434H : ret := 0C4H;
			|	435H : ret := 0C5H;
			|	436H : ret := 0D6H;
			|	437H : ret := 0DAH;

			|	438H : ret := 0C9H;
			|	439H : ret := 0CAH;
			|	43AH : ret := 0CBH;
			|	43BH : ret := 0CCH;

			|	43CH : ret := 0CDH;
			|	43DH : ret := 0CEH;
			|	43EH : ret := 0CFH;
			|	43FH : ret := 0D0H;

			|	440H : ret := 0D2H;
			|	441H : ret := 0D3H;
			|	442H : ret := 0D4H;
			|	443H : ret := 0D5H;

			|	444H : ret := 0C6H;
			|	445H : ret := 0C8H;
			|	446H : ret := 0C3H;
			|	447H : ret := 0DEH;

			|	448H : ret := 0DBH;
			|	449H : ret := 0DDH;
			|	44AH : ret := 0DFH;
			|	44BH : ret := 0D9H;

			|	44CH : ret := 0D8H;
			|	44DH : ret := 0DCH;
			|	44EH : ret := 0C0H;
			|	44FH : ret := 0D1H;

			|	451H : ret := 0A3H; (* CYRILLIC SMALL LETTER IO *)
			|	401H : ret := 0B3H; (* CYRILLIC CAPITAL LETTER IO *)

			ELSE
				ret := ch
			END;

			RETURN ret
		END UCS32ToKOI8R;

		PROCEDURE Open*(out : AosIO.Writer);
		BEGIN
			IF out = NIL THEN AosOut.String("KOI8-R Encoder Error: output stream is
NIL");
			ELSE SELF.out := out;
			END;
		END Open;

		PROCEDURE WriteText*(text : AosTexts.Text; VAR res : LONGINT);
		VAR r : AosTexts.TextReader; ch : AosTexts.Char32; i : LONGINT;
		BEGIN
			res :=  -1;
			text.AcquireRead;
			NEW(r, text);
			FOR i := 0 TO text.GetLength() - 1 DO
				r.ReadCh(ch);
				ch := UCS32ToKOI8R(ch);
				IF (ch >= 0) & (ch < 256) THEN out.Char(CHR(ch)) END
			END;
			out.Update;
			text.ReleaseRead;
			res := 0;
		END WriteText;

	END KOI8REncoder;

	KOI8UDecoder = OBJECT(AosCodecs.TextDecoder)
	VAR errors : BOOLEAN;
		in : AosIO.Reader;
		text : AosTexts.Text;

		(* map KOI8-U to UCS32 *)
		PROCEDURE KOI8UToUCS32(ch : LONGINT) : LONGINT;
		VAR ret : LONGINT;
		BEGIN

			CASE ch OF

				0E1H : ret := 410H;
			|	0E2H : ret := 411H;
			|	0F7H : ret := 412H;
			|	0E7H : ret := 413H;

			|	0E4H : ret := 414H;
			|	0E5H : ret := 415H;
			|	0F6H : ret := 416H;
			|	0FAH : ret := 417H;

			|	0E9H : ret := 418H;
			|	0EAH : ret := 419H;
			|	0EBH : ret := 41AH;
			|	0ECH : ret := 41BH;

			|	0EDH : ret := 41CH;
			|	0EEH : ret := 41DH;
			|	0EFH : ret := 41EH;
			|	0F0H : ret := 41FH;

			|	0F2H : ret := 420H;
			|	0F3H : ret := 421H;
			|	0F4H : ret := 422H;
			|	0F5H : ret := 423H;

			|	0E6H : ret := 424H;
			|	0E8H : ret := 425H;
			|	0E3H : ret := 426H;
			|	0FEH : ret := 427H;

			|	0FBH : ret := 428H;
			|	0FDH : ret := 429H;
			|	0FFH : ret := 42AH;
			|	0F9H : ret := 42BH;

			|	0F8H : ret := 42CH;
			|	0FCH : ret := 42DH;
			|	0E0H : ret := 42EH;
			|	0F1H : ret := 42FH;

			|	0C1H : ret := 430H;
			|	0C2H : ret := 431H;
			|	0D7H : ret := 432H;
			|	0C7H : ret := 433H;

			|	0C4H : ret := 434H;
			|	0C5H : ret := 435H;
			|	0D6H : ret := 436H;
			|	0DAH : ret := 437H;

			|	0C9H : ret := 438H;
			|	0CAH : ret := 439H;
			|	0CBH : ret := 43AH;
			|	0CCH : ret := 43BH;

			|	0CDH : ret := 43CH;
			|	0CEH : ret := 43DH;
			|	0CFH : ret := 43EH;
			|	0D0H : ret := 43FH;

			|	0D2H : ret := 440H;
			|	0D3H : ret := 441H;
			|	0D4H : ret := 442H;
			|	0D5H : ret := 443H;

			|	0C6H : ret := 444H;
			|	0C8H : ret := 445H;
			|	0C3H : ret := 446H;
			|	0DEH : ret := 447H;

			|	0DBH : ret := 448H;
			|	0DDH : ret := 449H;
			|	0DFH : ret := 44AH;
			|	0D9H : ret := 44BH;

			|	0D8H : ret := 44CH;
			|	0DCH : ret := 44DH;
			|	0C0H : ret := 44EH;
			|	0D1H : ret := 44FH;

			|	0A3H : ret := 451H; (* CYRILLIC SMALL LETTER IO *)
			|	0B3H : ret := 401H; (* CYRILLIC CAPITAL LETTER IO *)

			|	0ADH : ret := 491H; (* CYRILLIC SMALL LETTER UKRAINIAN KGE (WITH UPTURN) *)
			|	0BDH : ret := 490H; (* CYRILLIC CAPITAL LETTER UKRAINIAN KGE (WITH UPTURN)
*)

			|	0A4H : ret := 454H; (* CYRILLIC SMALL LETTER UKRAINIAN IE *)
			|	0B4H : ret := 404H; (* CYRILLIC CAPITAL LETTER UKRAINIAN IE *)

			|	0A7H : ret := 457H; (* CYRILLIC SMALL LETTER UKRAINIAN YI *)
			|	0B7H : ret := 407H; (* CYRILLIC CAPITAL LETTER UKRAINIAN YI *)

			|	0A6H : ret := 456H; (* CYRILLIC SMALL LETTER BELORUSSIAN-UKRAINIAN I *)
			|	0B6H : ret := 406H; (* CYRILLIC CAPITAL LETTER BELORUSSIAN-UKRAINIAN I *)

			ELSE
				ret := ch
			END;

			RETURN ret
		END KOI8UToUCS32;

		PROCEDURE Error(x : ARRAY OF CHAR);
		BEGIN
			AosOut.String("KOI8-R Decoder Error: ");
			AosOut.String(x); AosOut.Ln;
			errors := TRUE
		END Error;

		PROCEDURE Log(x : ARRAY OF CHAR);
		BEGIN
			AosOut.String("KOI8-R Decoder Info: ");
			AosOut.String(x); AosOut.Ln;
		END Log;


		PROCEDURE Open*(in : AosIO.Reader; VAR res : LONGINT);
		VAR i, m: LONGINT;
			r : AosFS.Reader;
			tempUCS32 : ARRAY 1024 OF Char32;
			ch, last : CHAR;
		BEGIN
			errors := FALSE;
			res := -1;
			IF in = NIL THEN Error("Input Stream is NIL"); RETURN; END;
			SELF.in := in;

			NEW(text);
			text.AcquireWrite;
			m := LEN(tempUCS32) - 1;
			i := 0;
			REPEAT
				in.Char(ch);
				IF i = m  THEN tempUCS32[i] := 0; text.InsertUCS32(text.GetLength(),
tempUCS32); i := 0 END;
				IF (last # CR) OR (ch # LF) THEN
					IF ch = CR THEN tempUCS32[i] := ORD(LF)
					ELSE
						tempUCS32[i] := KOI8UToUCS32(ORD(ch));
					END;
					INC(i)
				END;
				last := ch
			UNTIL (in.res # AosIO.Ok);
			tempUCS32[i] := 0; text.InsertUCS32(text.GetLength(), tempUCS32);
			res := 0;
			text.ReleaseWrite
		END Open;

		PROCEDURE GetText*() : AosTexts.Text;
		BEGIN
			RETURN text;
		END GetText;

	END KOI8UDecoder;

	KOI8UEncoder = OBJECT(AosCodecs.TextEncoder)
	VAR out: AosIO.Writer;

		(* map UCS32 to KOI8-U *)
		PROCEDURE UCS32ToKOI8U(ch : LONGINT) : LONGINT;
		VAR ret : LONGINT;
		BEGIN

			CASE ch OF

				410H : ret := 0E1H;
			|	411H : ret := 0E2H;
			|	412H : ret := 0F7H;
			|	413H : ret := 0E7H;

			|	414H : ret := 0E4H;
			|	415H : ret := 0E5H;
			|	416H : ret := 0F6H;
			|	417H : ret := 0FAH;

			|	418H : ret := 0E9H;
			|	419H : ret := 0EAH;
			|	41AH : ret := 0EBH;
			|	41BH : ret := 0ECH;

			|	41CH : ret := 0EDH;
			|	41DH : ret := 0EEH;
			|	41EH : ret := 0EFH;
			|	41FH : ret := 0F0H;

			|	420H : ret := 0F2H;
			|	421H : ret := 0F3H;
			|	422H : ret := 0F4H;
			|	423H : ret := 0F5H;

			|	424H : ret := 0E6H;
			|	425H : ret := 0E8H;
			|	426H : ret := 0E3H;
			|	427H : ret := 0FEH;

			|	428H : ret := 0FBH;
			|	429H : ret := 0FDH;
			|	42AH : ret := 0FFH;
			|	42BH : ret := 0F9H;

			|	42CH : ret := 0F8H;
			|	42DH : ret := 0FCH;
			|	42EH : ret := 0E0H;
			|	42FH : ret := 0F1H;

			|	430H : ret := 0C1H;
			|	431H : ret := 0C2H;
			|	432H : ret := 0D7H;
			|	433H : ret := 0C7H;

			|	434H : ret := 0C4H;
			|	435H : ret := 0C5H;
			|	436H : ret := 0D6H;
			|	437H : ret := 0DAH;

			|	438H : ret := 0C9H;
			|	439H : ret := 0CAH;
			|	43AH : ret := 0CBH;
			|	43BH : ret := 0CCH;

			|	43CH : ret := 0CDH;
			|	43DH : ret := 0CEH;
			|	43EH : ret := 0CFH;
			|	43FH : ret := 0D0H;

			|	440H : ret := 0D2H;
			|	441H : ret := 0D3H;
			|	442H : ret := 0D4H;
			|	443H : ret := 0D5H;

			|	444H : ret := 0C6H;
			|	445H : ret := 0C8H;
			|	446H : ret := 0C3H;
			|	447H : ret := 0DEH;

			|	448H : ret := 0DBH;
			|	449H : ret := 0DDH;
			|	44AH : ret := 0DFH;
			|	44BH : ret := 0D9H;

			|	44CH : ret := 0D8H;
			|	44DH : ret := 0DCH;
			|	44EH : ret := 0C0H;
			|	44FH : ret := 0D1H;

			|	451H : ret := 0A3H; (* CYRILLIC SMALL LETTER IO *)
			|	401H : ret := 0B3H; (* CYRILLIC CAPITAL LETTER IO *)

			|	491H : ret := 0ADH; (* CYRILLIC SMALL LETTER UKRAINIAN KGE (WITH UPTURN) *)
			|	490H : ret := 0BDH; (* CYRILLIC CAPITAL LETTER UKRAINIAN KGE (WITH UPTURN)
*)

			|	454H : ret := 0A4H; (* CYRILLIC SMALL LETTER UKRAINIAN IE *)
			|	404H : ret := 0B4H; (* CYRILLIC CAPITAL LETTER UKRAINIAN IE *)

			|	457H : ret := 0A7H; (* CYRILLIC SMALL LETTER UKRAINIAN YI *)
			|	407H : ret := 0B7H; (* CYRILLIC CAPITAL LETTER UKRAINIAN YI *)

			|	456H : ret := 0A6H; (* CYRILLIC SMALL LETTER BELORUSSIAN-UKRAINIAN I *)
			|	406H : ret := 0B6H; (* CYRILLIC CAPITAL LETTER BELORUSSIAN-UKRAINIAN I *)

			ELSE
				ret := ch
			END;

			RETURN ret
		END UCS32ToKOI8U;

		PROCEDURE Open*(out : AosIO.Writer);
		BEGIN
			IF out = NIL THEN AosOut.String("KOI8-U Encoder Error: output stream is
NIL");
			ELSE SELF.out := out;
			END;
		END Open;

		PROCEDURE WriteText*(text : AosTexts.Text; VAR res : LONGINT);
		VAR r : AosTexts.TextReader; ch : AosTexts.Char32; i : LONGINT;
		BEGIN
			res :=  -1;
			text.AcquireRead;
			NEW(r, text);
			FOR i := 0 TO text.GetLength() - 1 DO
				r.ReadCh(ch);
				ch := UCS32ToKOI8U(ch);
				IF (ch >= 0) & (ch < 256) THEN out.Char(CHR(ch)) END
			END;
			out.Update;
			text.ReleaseRead;
			res := 0;
		END WriteText;

	END KOI8UEncoder;

(*  CP1251 File Format *)
PROCEDURE CP1251DecoderFactory*(par : ANY) : ANY;
VAR p : CP1251Decoder;
BEGIN
	NEW(p);
	RETURN p
END CP1251DecoderFactory;

PROCEDURE CP1251EncoderFactory*(par : ANY) : ANY;
VAR p : CP1251Encoder;
BEGIN
	NEW(p);
	RETURN p
END CP1251EncoderFactory;

(* KOI8R File Format *)
PROCEDURE KOI8RDecoderFactory*(par : ANY) : ANY;
VAR p : KOI8RDecoder;
BEGIN
	NEW(p);
	RETURN p
END KOI8RDecoderFactory;

PROCEDURE KOI8REncoderFactory*(par : ANY) : ANY;
VAR p : KOI8REncoder;
BEGIN
	NEW(p);
	RETURN p
END KOI8REncoderFactory;

(* KOI8U File Format *)
PROCEDURE KOI8UDecoderFactory*(par : ANY) : ANY;
VAR p : KOI8UDecoder;
BEGIN
	NEW(p);
	RETURN p
END KOI8UDecoderFactory;

PROCEDURE KOI8UEncoderFactory*(par : ANY) : ANY;
VAR p : KOI8UEncoder;
BEGIN
	NEW(p);
	RETURN p
END KOI8UEncoderFactory;


AosConfig.XML "Encoders" section
<Setting name="KOI8-R" value="AosTextUtilities.KOI8REncoderFactory"/>
<Setting name="KOI8-U" value="AosTextUtilities.KOI8UEncoderFactory"/>
<Setting name="CP-1251" value="AosTextUtilities.CP1251EncoderFactory"/>

AosConfig.XML "Decoders" section
<Setting name="KOI8-R" value="AosTextUtilities.KOI8RDecoderFactory"/>
<Setting name="KOI8-U" value="AosTextUtilities.KOI8UDecoderFactory"/>
<Setting name="CP-1251" value="AosTextUtilities.CP1251DecoderFactory"/>





---
Cheers, SAGE
http://sage.h15.ru/

----------------------------------------------------------------
This message was sent using NBI WebMail service



More information about the Oberon mailing list